From dce4a407a24b04eebc6a376f8e62b41aaa7b071f Mon Sep 17 00:00:00 2001 From: Stephen Hines Date: Thu, 29 May 2014 02:49:00 -0700 Subject: Update LLVM for 3.5 rebase (r209712). Change-Id: I149556c940fb7dc92d075273c87ff584f400941f --- lib/Analysis/AliasAnalysis.cpp | 2 +- lib/Analysis/AliasAnalysisCounter.cpp | 4 +- lib/Analysis/AliasSetTracker.cpp | 18 +- lib/Analysis/Analysis.cpp | 5 +- lib/Analysis/Android.mk | 1 + lib/Analysis/BasicAliasAnalysis.cpp | 43 +- lib/Analysis/BlockFrequencyInfo.cpp | 13 +- lib/Analysis/BlockFrequencyInfoImpl.cpp | 995 ++ lib/Analysis/BranchProbabilityInfo.cpp | 18 +- lib/Analysis/CFG.cpp | 8 +- lib/Analysis/CFGPrinter.cpp | 13 +- lib/Analysis/CGSCCPassManager.cpp | 167 + lib/Analysis/CMakeLists.txt | 2 + lib/Analysis/ConstantFolding.cpp | 160 +- lib/Analysis/CostModel.cpp | 40 +- lib/Analysis/Delinearization.cpp | 32 +- lib/Analysis/DependenceAnalysis.cpp | 164 +- lib/Analysis/DominanceFrontier.cpp | 4 +- lib/Analysis/IPA/CallGraph.cpp | 12 +- lib/Analysis/IPA/CallGraphSCCPass.cpp | 27 +- lib/Analysis/IPA/GlobalsModRef.cpp | 21 +- lib/Analysis/IPA/InlineCost.cpp | 64 +- lib/Analysis/IVUsers.cpp | 13 +- lib/Analysis/InstCount.cpp | 5 +- lib/Analysis/InstructionSimplify.cpp | 203 +- lib/Analysis/IntervalPartition.cpp | 2 +- lib/Analysis/LazyCallGraph.cpp | 673 +- lib/Analysis/LazyValueInfo.cpp | 21 +- lib/Analysis/LibCallAliasAnalysis.cpp | 2 +- lib/Analysis/LibCallSemantics.cpp | 4 +- lib/Analysis/Lint.cpp | 40 +- lib/Analysis/Loads.cpp | 10 +- lib/Analysis/LoopInfo.cpp | 30 +- lib/Analysis/LoopPass.cpp | 9 +- lib/Analysis/MemDepPrinter.cpp | 14 +- lib/Analysis/MemoryBuiltins.cpp | 57 +- lib/Analysis/MemoryDependenceAnalysis.cpp | 31 +- lib/Analysis/NoAliasAnalysis.cpp | 2 +- lib/Analysis/PHITransAddr.cpp | 44 +- lib/Analysis/PostDominators.cpp | 4 +- lib/Analysis/RegionInfo.cpp | 85 +- lib/Analysis/RegionPass.cpp | 18 +- lib/Analysis/RegionPrinter.cpp | 22 +- lib/Analysis/ScalarEvolution.cpp | 1167 +- lib/Analysis/ScalarEvolutionAliasAnalysis.cpp | 8 +- lib/Analysis/ScalarEvolutionExpander.cpp | 51 +- lib/Analysis/ScalarEvolutionNormalization.cpp | 2 +- lib/Analysis/SparsePropagation.cpp | 7 +- lib/Analysis/TargetTransformInfo.cpp | 15 +- lib/Analysis/TypeBasedAliasAnalysis.cpp | 30 +- lib/Analysis/ValueTracking.cpp | 264 +- lib/AsmParser/LLLexer.cpp | 19 +- lib/AsmParser/LLLexer.h | 4 + lib/AsmParser/LLParser.cpp | 302 +- lib/AsmParser/LLParser.h | 10 +- lib/AsmParser/LLToken.h | 6 +- lib/AsmParser/Parser.cpp | 8 +- lib/AsmParser/module.modulemap | 1 + lib/Bitcode/Reader/BitReader.cpp | 4 +- lib/Bitcode/Reader/BitcodeReader.cpp | 222 +- lib/Bitcode/Reader/BitcodeReader.h | 20 +- lib/Bitcode/Reader/BitstreamReader.cpp | 2 +- lib/Bitcode/Writer/BitWriter.cpp | 1 + lib/Bitcode/Writer/BitcodeWriter.cpp | 118 +- lib/Bitcode/module.modulemap | 1 + lib/CodeGen/AggressiveAntiDepBreaker.cpp | 52 +- lib/CodeGen/AggressiveAntiDepBreaker.h | 3 +- lib/CodeGen/AllocationOrder.cpp | 3 +- lib/CodeGen/Analysis.cpp | 6 +- lib/CodeGen/Android.mk | 1 + lib/CodeGen/AsmPrinter/ARMException.cpp | 23 +- lib/CodeGen/AsmPrinter/AddressPool.cpp | 45 + lib/CodeGen/AsmPrinter/AddressPool.h | 52 + lib/CodeGen/AsmPrinter/Android.mk | 12 +- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 287 +- lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 74 +- lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp | 25 +- lib/CodeGen/AsmPrinter/CMakeLists.txt | 4 + lib/CodeGen/AsmPrinter/DIE.cpp | 21 +- lib/CodeGen/AsmPrinter/DIE.h | 37 +- lib/CodeGen/AsmPrinter/DIEHash.cpp | 18 +- lib/CodeGen/AsmPrinter/DIEHash.h | 2 +- .../AsmPrinter/DbgValueHistoryCalculator.cpp | 175 + lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h | 54 + lib/CodeGen/AsmPrinter/DebugLocEntry.h | 138 +- lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp | 39 +- lib/CodeGen/AsmPrinter/DwarfAccelTable.h | 29 +- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 1609 +-- lib/CodeGen/AsmPrinter/DwarfDebug.h | 211 +- lib/CodeGen/AsmPrinter/DwarfException.cpp | 28 +- lib/CodeGen/AsmPrinter/DwarfFile.cpp | 156 + lib/CodeGen/AsmPrinter/DwarfFile.h | 84 + lib/CodeGen/AsmPrinter/DwarfStringPool.cpp | 74 + lib/CodeGen/AsmPrinter/DwarfStringPool.h | 55 + lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 838 +- lib/CodeGen/AsmPrinter/DwarfUnit.h | 174 +- lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp | 27 +- lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h | 4 +- lib/CodeGen/AtomicExpandLoadLinkedPass.cpp | 337 + lib/CodeGen/BasicTargetTransformInfo.cpp | 90 +- lib/CodeGen/BranchFolding.cpp | 74 +- lib/CodeGen/CMakeLists.txt | 1 + lib/CodeGen/CalcSpillWeights.cpp | 14 +- lib/CodeGen/CallingConvLower.cpp | 12 +- lib/CodeGen/CodeGen.cpp | 1 + lib/CodeGen/CodeGenPrepare.cpp | 457 +- lib/CodeGen/CriticalAntiDepBreaker.cpp | 34 +- lib/CodeGen/DFAPacketizer.cpp | 4 +- lib/CodeGen/DeadMachineInstructionElim.cpp | 5 +- lib/CodeGen/DwarfEHPrepare.cpp | 11 +- lib/CodeGen/EarlyIfConversion.cpp | 19 +- lib/CodeGen/EdgeBundles.cpp | 32 +- lib/CodeGen/ExecutionDepsFix.cpp | 17 +- lib/CodeGen/ExpandISelPseudos.cpp | 3 +- lib/CodeGen/ExpandPostRAPseudos.cpp | 3 +- lib/CodeGen/GCMetadata.cpp | 17 +- lib/CodeGen/GCStrategy.cpp | 14 +- lib/CodeGen/IfConversion.cpp | 44 +- lib/CodeGen/InlineSpiller.cpp | 17 +- lib/CodeGen/InterferenceCache.cpp | 3 +- lib/CodeGen/InterferenceCache.h | 20 +- lib/CodeGen/IntrinsicLowering.cpp | 6 +- lib/CodeGen/LLVMTargetMachine.cpp | 63 +- lib/CodeGen/LatencyPriorityQueue.cpp | 11 +- lib/CodeGen/LexicalScopes.cpp | 142 +- lib/CodeGen/LiveDebugVariables.cpp | 37 +- lib/CodeGen/LiveInterval.cpp | 12 +- lib/CodeGen/LiveIntervalAnalysis.cpp | 15 +- lib/CodeGen/LiveIntervalUnion.cpp | 7 +- lib/CodeGen/LiveRangeCalc.cpp | 11 +- lib/CodeGen/LiveRangeCalc.h | 7 +- lib/CodeGen/LiveRangeEdit.cpp | 11 +- lib/CodeGen/LiveRegMatrix.cpp | 3 +- lib/CodeGen/LiveStackAnalysis.cpp | 3 +- lib/CodeGen/LiveVariables.cpp | 43 +- lib/CodeGen/LocalStackSlotAllocation.cpp | 3 +- lib/CodeGen/MachineBasicBlock.cpp | 52 +- lib/CodeGen/MachineBlockFrequencyInfo.cpp | 17 +- lib/CodeGen/MachineBlockPlacement.cpp | 37 +- lib/CodeGen/MachineBranchProbabilityInfo.cpp | 4 +- lib/CodeGen/MachineCSE.cpp | 3 +- lib/CodeGen/MachineCopyPropagation.cpp | 3 +- lib/CodeGen/MachineFunction.cpp | 35 +- lib/CodeGen/MachineFunctionAnalysis.cpp | 4 +- lib/CodeGen/MachineInstr.cpp | 79 +- lib/CodeGen/MachineLICM.cpp | 42 +- lib/CodeGen/MachineModuleInfo.cpp | 40 +- lib/CodeGen/MachinePassRegistry.cpp | 2 +- lib/CodeGen/MachineRegisterInfo.cpp | 18 +- lib/CodeGen/MachineSSAUpdater.cpp | 10 +- lib/CodeGen/MachineScheduler.cpp | 82 +- lib/CodeGen/MachineSink.cpp | 37 +- lib/CodeGen/MachineTraceMetrics.cpp | 78 +- lib/CodeGen/MachineVerifier.cpp | 126 +- lib/CodeGen/OptimizePHIs.cpp | 3 +- lib/CodeGen/PHIElimination.cpp | 24 +- lib/CodeGen/Passes.cpp | 24 +- lib/CodeGen/PeepholeOptimizer.cpp | 15 +- lib/CodeGen/PostRASchedulerList.cpp | 23 +- lib/CodeGen/ProcessImplicitDefs.cpp | 4 +- lib/CodeGen/PrologEpilogInserter.cpp | 20 +- lib/CodeGen/PseudoSourceValue.cpp | 10 +- lib/CodeGen/RegAllocBase.cpp | 5 +- lib/CodeGen/RegAllocBase.h | 3 +- lib/CodeGen/RegAllocBasic.cpp | 7 +- lib/CodeGen/RegAllocFast.cpp | 7 +- lib/CodeGen/RegAllocGreedy.cpp | 100 +- lib/CodeGen/RegAllocPBQP.cpp | 25 +- lib/CodeGen/RegisterClassInfo.cpp | 9 +- lib/CodeGen/RegisterCoalescer.cpp | 50 +- lib/CodeGen/RegisterCoalescer.h | 4 +- lib/CodeGen/RegisterPressure.cpp | 4 +- lib/CodeGen/RegisterScavenging.cpp | 15 +- lib/CodeGen/ScheduleDAG.cpp | 5 +- lib/CodeGen/ScheduleDAGInstrs.cpp | 134 +- lib/CodeGen/ScoreboardHazardRecognizer.cpp | 5 +- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 612 +- lib/CodeGen/SelectionDAG/FastISel.cpp | 78 +- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 7 +- lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 40 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 137 +- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 14 +- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 162 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 35 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 +- lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp | 9 +- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 47 +- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 120 +- lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp | 13 +- lib/CodeGen/SelectionDAG/SDNodeDbgValue.h | 16 +- lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 33 +- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 75 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 30 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 2 +- lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp | 11 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 819 +- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 510 +- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 31 +- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 8 +- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 133 +- lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp | 10 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 236 +- lib/CodeGen/ShadowStackGC.cpp | 13 +- lib/CodeGen/SjLjEHPrepare.cpp | 11 +- lib/CodeGen/SlotIndexes.cpp | 12 +- lib/CodeGen/SpillPlacement.cpp | 33 +- lib/CodeGen/SpillPlacement.h | 2 +- lib/CodeGen/Spiller.cpp | 4 +- lib/CodeGen/SplitKit.cpp | 17 +- lib/CodeGen/SplitKit.h | 4 +- lib/CodeGen/StackColoring.cpp | 32 +- lib/CodeGen/StackMapLivenessAnalysis.cpp | 5 +- lib/CodeGen/StackMaps.cpp | 289 +- lib/CodeGen/StackProtector.cpp | 23 +- lib/CodeGen/StackSlotColoring.cpp | 20 +- lib/CodeGen/TailDuplication.cpp | 29 +- lib/CodeGen/TargetInstrInfo.cpp | 46 +- lib/CodeGen/TargetLoweringBase.cpp | 38 +- lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 56 +- lib/CodeGen/TargetRegisterInfo.cpp | 14 +- lib/CodeGen/TwoAddressInstructionPass.cpp | 23 +- lib/CodeGen/VirtRegMap.cpp | 3 +- lib/CodeGen/module.modulemap | 1 + lib/DebugInfo/DWARFCompileUnit.h | 6 +- lib/DebugInfo/DWARFContext.cpp | 246 +- lib/DebugInfo/DWARFContext.h | 2 +- lib/DebugInfo/DWARFDebugAbbrev.cpp | 112 +- lib/DebugInfo/DWARFDebugAbbrev.h | 50 +- lib/DebugInfo/DWARFDebugArangeSet.h | 1 - lib/DebugInfo/DWARFDebugAranges.cpp | 36 +- lib/DebugInfo/DWARFDebugAranges.h | 17 +- lib/DebugInfo/DWARFDebugFrame.cpp | 72 +- lib/DebugInfo/DWARFDebugFrame.h | 6 +- lib/DebugInfo/DWARFDebugInfoEntry.cpp | 95 +- lib/DebugInfo/DWARFDebugInfoEntry.h | 57 +- lib/DebugInfo/DWARFDebugLine.cpp | 348 +- lib/DebugInfo/DWARFDebugLine.h | 109 +- lib/DebugInfo/DWARFDebugRangeList.cpp | 15 +- lib/DebugInfo/DWARFDebugRangeList.h | 15 +- lib/DebugInfo/DWARFFormValue.cpp | 10 +- lib/DebugInfo/DWARFTypeUnit.h | 10 +- lib/DebugInfo/DWARFUnit.cpp | 145 +- lib/DebugInfo/DWARFUnit.h | 24 +- lib/DebugInfo/module.modulemap | 1 + lib/ExecutionEngine/ExecutionEngine.cpp | 146 +- lib/ExecutionEngine/ExecutionEngineBindings.cpp | 15 +- .../IntelJITEvents/IntelJITEventListener.cpp | 10 +- lib/ExecutionEngine/Interpreter/Execution.cpp | 67 +- .../Interpreter/ExternalFunctions.cpp | 6 +- lib/ExecutionEngine/Interpreter/Interpreter.cpp | 2 +- lib/ExecutionEngine/Interpreter/Interpreter.h | 4 +- lib/ExecutionEngine/JIT/JIT.cpp | 26 +- lib/ExecutionEngine/JIT/JIT.h | 2 +- lib/ExecutionEngine/JIT/JITEmitter.cpp | 36 +- lib/ExecutionEngine/JIT/JITMemoryManager.cpp | 57 +- lib/ExecutionEngine/MCJIT/LLVMBuild.txt | 2 +- lib/ExecutionEngine/MCJIT/MCJIT.cpp | 35 +- lib/ExecutionEngine/MCJIT/MCJIT.h | 4 +- lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp | 2 +- .../OProfileJIT/OProfileJITEventListener.cpp | 17 +- .../OProfileJIT/OProfileWrapper.cpp | 3 +- lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp | 13 +- .../RuntimeDyld/ObjectImageCommon.h | 21 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp | 97 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 107 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h | 8 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h | 27 +- .../RuntimeDyld/RuntimeDyldMachO.cpp | 483 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h | 60 +- lib/ExecutionEngine/TargetSelect.cpp | 8 +- lib/IR/Android.mk | 1 + lib/IR/AsmWriter.cpp | 100 +- lib/IR/Attributes.cpp | 22 +- lib/IR/AutoUpgrade.cpp | 69 +- lib/IR/BasicBlock.cpp | 20 +- lib/IR/CMakeLists.txt | 5 +- lib/IR/ConstantFold.cpp | 108 +- lib/IR/Constants.cpp | 65 +- lib/IR/ConstantsContext.h | 7 +- lib/IR/Core.cpp | 140 +- lib/IR/DIBuilder.cpp | 204 +- lib/IR/DataLayout.cpp | 14 +- lib/IR/DebugInfo.cpp | 69 +- lib/IR/DebugLoc.cpp | 59 +- lib/IR/DiagnosticInfo.cpp | 125 + lib/IR/Function.cpp | 24 +- lib/IR/GCOV.cpp | 183 +- lib/IR/Globals.cpp | 155 +- lib/IR/IRPrintingPasses.cpp | 2 +- lib/IR/InlineAsm.cpp | 2 +- lib/IR/Instruction.cpp | 147 +- lib/IR/Instructions.cpp | 91 +- lib/IR/IntrinsicInst.cpp | 4 +- lib/IR/LLVMContext.cpp | 36 +- lib/IR/LLVMContextImpl.cpp | 27 +- lib/IR/LLVMContextImpl.h | 10 +- lib/IR/LeaksContext.h | 16 +- lib/IR/LegacyPassManager.cpp | 67 +- lib/IR/MDBuilder.cpp | 139 + lib/IR/Mangler.cpp | 2 +- lib/IR/Metadata.cpp | 45 +- lib/IR/Module.cpp | 20 +- lib/IR/Pass.cpp | 12 +- lib/IR/PassManager.cpp | 11 +- lib/IR/PassRegistry.cpp | 22 +- lib/IR/SymbolTableListTraitsImpl.h | 4 +- lib/IR/Type.cpp | 28 +- lib/IR/Use.cpp | 6 +- lib/IR/Value.cpp | 86 +- lib/IR/ValueSymbolTable.cpp | 9 +- lib/IR/Verifier.cpp | 442 +- lib/IR/module.modulemap | 1 + lib/IRReader/IRReader.cpp | 14 +- lib/LTO/LTOCodeGenerator.cpp | 60 +- lib/LTO/LTOModule.cpp | 54 +- lib/Linker/LinkModules.cpp | 282 +- lib/MC/Android.mk | 4 +- lib/MC/CMakeLists.txt | 2 +- lib/MC/ELFObjectWriter.cpp | 446 +- lib/MC/MCAsmInfo.cpp | 6 +- lib/MC/MCAsmStreamer.cpp | 193 +- lib/MC/MCAssembler.cpp | 196 +- lib/MC/MCContext.cpp | 102 +- lib/MC/MCDisassembler.cpp | 14 - lib/MC/MCDisassembler/Disassembler.cpp | 27 +- lib/MC/MCDwarf.cpp | 111 +- lib/MC/MCELFStreamer.cpp | 11 +- lib/MC/MCExpr.cpp | 69 +- lib/MC/MCExternalSymbolizer.cpp | 14 +- lib/MC/MCFixup.cpp | 37 - lib/MC/MCFunction.cpp | 15 +- lib/MC/MCInst.cpp | 4 +- lib/MC/MCMachOStreamer.cpp | 27 +- lib/MC/MCModule.cpp | 16 +- lib/MC/MCModuleYAML.cpp | 25 +- lib/MC/MCNullStreamer.cpp | 10 +- lib/MC/MCObjectDisassembler.cpp | 16 +- lib/MC/MCObjectFileInfo.cpp | 111 +- lib/MC/MCObjectStreamer.cpp | 40 +- lib/MC/MCObjectSymbolizer.cpp | 42 +- lib/MC/MCParser/AsmLexer.cpp | 8 +- lib/MC/MCParser/AsmParser.cpp | 82 +- lib/MC/MCParser/COFFAsmParser.cpp | 6 +- lib/MC/MCParser/DarwinAsmParser.cpp | 9 +- lib/MC/MCParser/ELFAsmParser.cpp | 8 +- lib/MC/MCParser/MCAsmLexer.cpp | 2 +- lib/MC/MCParser/MCAsmParser.cpp | 2 +- lib/MC/MCRelocationInfo.cpp | 4 +- lib/MC/MCSectionCOFF.cpp | 5 +- lib/MC/MCSectionMachO.cpp | 16 +- lib/MC/MCStreamer.cpp | 38 +- lib/MC/MCSubtargetInfo.cpp | 17 +- lib/MC/MCTargetOptions.cpp | 19 + lib/MC/MCValue.cpp | 20 +- lib/MC/MachObjectWriter.cpp | 34 +- lib/MC/SubtargetFeature.cpp | 154 +- lib/MC/WinCOFFObjectWriter.cpp | 294 +- lib/MC/WinCOFFStreamer.cpp | 313 +- lib/Object/Android.mk | 1 + lib/Object/Archive.cpp | 21 +- lib/Object/CMakeLists.txt | 1 + lib/Object/COFFObjectFile.cpp | 55 +- lib/Object/COFFYAML.cpp | 33 +- lib/Object/ELF.cpp | 10 + lib/Object/ELFYAML.cpp | 429 +- lib/Object/LLVMBuild.txt | 2 +- lib/Object/MachOObjectFile.cpp | 135 +- lib/Object/MachOUniversal.cpp | 22 +- lib/Object/Object.cpp | 9 +- lib/Object/StringTableBuilder.cpp | 51 + lib/Option/ArgList.cpp | 87 +- lib/Option/OptTable.cpp | 10 +- lib/Option/Option.cpp | 20 +- lib/ProfileData/Android.mk | 33 + lib/ProfileData/InstrProf.cpp | 4 +- lib/ProfileData/InstrProfIndexed.h | 55 + lib/ProfileData/InstrProfReader.cpp | 165 +- lib/ProfileData/InstrProfWriter.cpp | 90 +- lib/Support/APFloat.cpp | 18 +- lib/Support/APInt.cpp | 15 +- lib/Support/Allocator.cpp | 33 +- lib/Support/Atomic.cpp | 1 + lib/Support/BlockFrequency.cpp | 95 +- lib/Support/BranchProbability.cpp | 55 +- lib/Support/CommandLine.cpp | 75 +- lib/Support/Compression.cpp | 35 +- lib/Support/CrashRecoveryContext.cpp | 25 +- lib/Support/DAGDeltaAlgorithm.cpp | 2 + lib/Support/DataExtractor.cpp | 4 +- lib/Support/DataStream.cpp | 5 +- lib/Support/Debug.cpp | 2 +- lib/Support/Dwarf.cpp | 44 +- lib/Support/DynamicLibrary.cpp | 12 +- lib/Support/ErrorHandling.cpp | 6 +- lib/Support/FileOutputBuffer.cpp | 13 +- lib/Support/FoldingSet.cpp | 22 +- lib/Support/FormattedStream.cpp | 2 +- lib/Support/GraphWriter.cpp | 6 +- lib/Support/Host.cpp | 45 +- lib/Support/IntervalMap.cpp | 2 +- lib/Support/LineIterator.cpp | 7 +- lib/Support/LockFileManager.cpp | 18 +- lib/Support/ManagedStatic.cpp | 17 +- lib/Support/MemoryBuffer.cpp | 120 +- lib/Support/Mutex.cpp | 10 +- lib/Support/Path.cpp | 8 +- lib/Support/PrettyStackTrace.cpp | 4 +- lib/Support/RWMutex.cpp | 14 +- lib/Support/Regex.cpp | 2 +- lib/Support/SearchForAddressOfSpecialSymbol.cpp | 2 +- lib/Support/SmallPtrSet.cpp | 2 +- lib/Support/SourceMgr.cpp | 7 +- lib/Support/StringMap.cpp | 10 +- lib/Support/StringRef.cpp | 4 +- lib/Support/TargetRegistry.cpp | 20 +- lib/Support/ThreadLocal.cpp | 4 +- lib/Support/Threading.cpp | 6 +- lib/Support/Timer.cpp | 21 +- lib/Support/Triple.cpp | 16 +- lib/Support/Unix/Memory.inc | 14 +- lib/Support/Unix/Path.inc | 22 +- lib/Support/Unix/Process.inc | 4 +- lib/Support/Unix/Program.inc | 23 +- lib/Support/Unix/Signals.inc | 32 +- lib/Support/Unix/TimeValue.inc | 10 +- lib/Support/Windows/DynamicLibrary.inc | 2 +- lib/Support/Windows/Process.inc | 14 +- lib/Support/Windows/TimeValue.inc | 19 +- lib/Support/YAMLParser.cpp | 44 +- lib/Support/YAMLTraits.cpp | 58 +- lib/Support/raw_ostream.cpp | 6 +- lib/Support/regengine.inc | 2 +- lib/TableGen/Main.cpp | 1 + lib/TableGen/Record.cpp | 190 +- lib/TableGen/TGLexer.cpp | 11 +- lib/TableGen/TGLexer.h | 2 +- lib/TableGen/TGParser.cpp | 398 +- lib/TableGen/TGParser.h | 12 +- lib/TableGen/module.modulemap | 1 + lib/Target/AArch64/AArch64.h | 45 +- lib/Target/AArch64/AArch64.td | 93 +- lib/Target/AArch64/AArch64AddressTypePromotion.cpp | 492 + lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp | 387 + lib/Target/AArch64/AArch64AsmPrinter.cpp | 652 +- lib/Target/AArch64/AArch64AsmPrinter.h | 76 - lib/Target/AArch64/AArch64BranchFixupPass.cpp | 600 - lib/Target/AArch64/AArch64BranchRelaxation.cpp | 510 + lib/Target/AArch64/AArch64CallingConv.td | 197 - lib/Target/AArch64/AArch64CallingConvention.td | 240 + .../AArch64/AArch64CleanupLocalDynamicTLSPass.cpp | 147 + lib/Target/AArch64/AArch64CollectLOH.cpp | 1117 ++ lib/Target/AArch64/AArch64ConditionalCompares.cpp | 919 ++ .../AArch64/AArch64DeadRegisterDefinitionsPass.cpp | 134 + lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp | 749 ++ lib/Target/AArch64/AArch64FastISel.cpp | 1981 ++++ lib/Target/AArch64/AArch64FrameLowering.cpp | 1295 ++- lib/Target/AArch64/AArch64FrameLowering.h | 125 +- lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 4074 ++++--- lib/Target/AArch64/AArch64ISelLowering.cpp | 11375 ++++++++++++------- lib/Target/AArch64/AArch64ISelLowering.h | 701 +- lib/Target/AArch64/AArch64InstrAtomics.td | 364 + lib/Target/AArch64/AArch64InstrFormats.td | 9605 ++++++++++++++-- lib/Target/AArch64/AArch64InstrInfo.cpp | 2646 +++-- lib/Target/AArch64/AArch64InstrInfo.h | 236 +- lib/Target/AArch64/AArch64InstrInfo.td | 10158 ++++++++--------- lib/Target/AArch64/AArch64InstrNEON.td | 9476 --------------- lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp | 942 ++ lib/Target/AArch64/AArch64MCInstLower.cpp | 243 +- lib/Target/AArch64/AArch64MCInstLower.h | 52 + lib/Target/AArch64/AArch64MachineFunctionInfo.cpp | 18 - lib/Target/AArch64/AArch64MachineFunctionInfo.h | 188 +- lib/Target/AArch64/AArch64PerfectShuffle.h | 6586 +++++++++++ lib/Target/AArch64/AArch64PromoteConstant.cpp | 578 + lib/Target/AArch64/AArch64RegisterInfo.cpp | 452 +- lib/Target/AArch64/AArch64RegisterInfo.h | 104 +- lib/Target/AArch64/AArch64RegisterInfo.td | 733 +- lib/Target/AArch64/AArch64SchedA53.td | 291 + lib/Target/AArch64/AArch64SchedCyclone.td | 865 ++ lib/Target/AArch64/AArch64Schedule.td | 168 +- lib/Target/AArch64/AArch64ScheduleA53.td | 144 - lib/Target/AArch64/AArch64SelectionDAGInfo.cpp | 48 +- lib/Target/AArch64/AArch64SelectionDAGInfo.h | 17 +- lib/Target/AArch64/AArch64StorePairSuppress.cpp | 168 + lib/Target/AArch64/AArch64Subtarget.cpp | 117 +- lib/Target/AArch64/AArch64Subtarget.h | 91 +- lib/Target/AArch64/AArch64TargetMachine.cpp | 170 +- lib/Target/AArch64/AArch64TargetMachine.h | 74 +- lib/Target/AArch64/AArch64TargetObjectFile.cpp | 46 +- lib/Target/AArch64/AArch64TargetObjectFile.h | 35 +- lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 385 +- lib/Target/AArch64/Android.mk | 34 +- lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 5357 +++++---- lib/Target/AArch64/AsmParser/CMakeLists.txt | 3 + lib/Target/AArch64/AsmParser/LLVMBuild.txt | 2 +- lib/Target/AArch64/AsmParser/Makefile | 2 +- lib/Target/AArch64/CMakeLists.txt | 32 +- .../AArch64/Disassembler/AArch64Disassembler.cpp | 2556 +++-- .../AArch64/Disassembler/AArch64Disassembler.h | 40 + .../Disassembler/AArch64ExternalSymbolizer.cpp | 221 + .../Disassembler/AArch64ExternalSymbolizer.h | 38 + lib/Target/AArch64/Disassembler/Android.mk | 3 +- lib/Target/AArch64/Disassembler/CMakeLists.txt | 11 + lib/Target/AArch64/Disassembler/LLVMBuild.txt | 2 +- lib/Target/AArch64/Disassembler/Makefile | 2 +- .../AArch64/InstPrinter/AArch64InstPrinter.cpp | 1567 ++- .../AArch64/InstPrinter/AArch64InstPrinter.h | 214 +- lib/Target/AArch64/InstPrinter/Android.mk | 1 + lib/Target/AArch64/InstPrinter/CMakeLists.txt | 4 + lib/Target/AArch64/InstPrinter/LLVMBuild.txt | 2 +- lib/Target/AArch64/InstPrinter/Makefile | 2 +- lib/Target/AArch64/LLVMBuild.txt | 4 +- .../AArch64/MCTargetDesc/AArch64AddressingModes.h | 738 ++ .../AArch64/MCTargetDesc/AArch64AsmBackend.cpp | 1009 +- .../MCTargetDesc/AArch64ELFObjectWriter.cpp | 428 +- .../AArch64/MCTargetDesc/AArch64ELFStreamer.cpp | 49 +- .../AArch64/MCTargetDesc/AArch64ELFStreamer.h | 7 +- .../AArch64/MCTargetDesc/AArch64FixupKinds.h | 161 +- .../AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp | 67 +- lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h | 23 +- .../AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 914 +- lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp | 212 +- lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h | 237 +- .../AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp | 196 +- .../AArch64/MCTargetDesc/AArch64MCTargetDesc.h | 38 +- .../MCTargetDesc/AArch64MachObjectWriter.cpp | 396 + lib/Target/AArch64/MCTargetDesc/Android.mk | 1 + lib/Target/AArch64/MCTargetDesc/CMakeLists.txt | 7 +- lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt | 2 +- lib/Target/AArch64/Makefile | 21 +- lib/Target/AArch64/README.txt | 2 - .../AArch64/TargetInfo/AArch64TargetInfo.cpp | 30 +- lib/Target/AArch64/TargetInfo/CMakeLists.txt | 4 + lib/Target/AArch64/TargetInfo/LLVMBuild.txt | 2 +- lib/Target/AArch64/Utils/AArch64BaseInfo.cpp | 424 +- lib/Target/AArch64/Utils/AArch64BaseInfo.h | 592 +- lib/Target/AArch64/Utils/Android.mk | 15 + lib/Target/AArch64/Utils/LLVMBuild.txt | 2 +- lib/Target/AArch64/Utils/Makefile | 5 +- lib/Target/ARM/A15SDOptimizer.cpp | 17 +- lib/Target/ARM/ARM.h | 2 - lib/Target/ARM/ARMAsmPrinter.cpp | 163 +- lib/Target/ARM/ARMAsmPrinter.h | 9 +- lib/Target/ARM/ARMAtomicExpandPass.cpp | 406 - lib/Target/ARM/ARMBaseInstrInfo.cpp | 73 +- lib/Target/ARM/ARMBaseInstrInfo.h | 2 +- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 18 +- lib/Target/ARM/ARMBaseRegisterInfo.h | 6 +- lib/Target/ARM/ARMCallingConv.h | 113 +- lib/Target/ARM/ARMCallingConv.td | 3 + lib/Target/ARM/ARMCodeEmitter.cpp | 9 +- lib/Target/ARM/ARMConstantIslandPass.cpp | 21 +- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 62 +- lib/Target/ARM/ARMFastISel.cpp | 32 +- lib/Target/ARM/ARMFeatures.h | 6 +- lib/Target/ARM/ARMFrameLowering.cpp | 129 +- lib/Target/ARM/ARMFrameLowering.h | 2 +- lib/Target/ARM/ARMHazardRecognizer.cpp | 4 +- lib/Target/ARM/ARMHazardRecognizer.h | 2 +- lib/Target/ARM/ARMISelDAGToDAG.cpp | 126 +- lib/Target/ARM/ARMISelLowering.cpp | 888 +- lib/Target/ARM/ARMISelLowering.h | 28 +- lib/Target/ARM/ARMInstrFormats.td | 5 +- lib/Target/ARM/ARMInstrInfo.td | 178 +- lib/Target/ARM/ARMInstrNEON.td | 1326 ++- lib/Target/ARM/ARMInstrThumb.td | 25 +- lib/Target/ARM/ARMInstrThumb2.td | 30 +- lib/Target/ARM/ARMJITInfo.cpp | 7 +- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 305 +- lib/Target/ARM/ARMOptimizeBarriersPass.cpp | 8 +- lib/Target/ARM/ARMRegisterInfo.td | 8 +- lib/Target/ARM/ARMScheduleV6.td | 4 +- lib/Target/ARM/ARMSelectionDAGInfo.cpp | 51 +- lib/Target/ARM/ARMSubtarget.cpp | 6 +- lib/Target/ARM/ARMSubtarget.h | 8 +- lib/Target/ARM/ARMTargetMachine.cpp | 13 +- lib/Target/ARM/ARMTargetMachine.h | 17 +- lib/Target/ARM/ARMTargetObjectFile.cpp | 7 +- lib/Target/ARM/ARMTargetObjectFile.h | 2 +- lib/Target/ARM/ARMTargetTransformInfo.cpp | 7 +- lib/Target/ARM/Android.mk | 1 - lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 485 +- lib/Target/ARM/CMakeLists.txt | 1 - lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 24 +- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 3 +- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 129 +- lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp | 8 +- lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp | 92 +- lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp | 14 +- lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h | 4 +- lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 14 +- lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp | 3 +- lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 29 +- lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h | 10 +- .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp | 14 +- lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp | 6 +- .../ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp | 82 + lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp | 46 + lib/Target/ARM/MCTargetDesc/Android.mk | 4 +- lib/Target/ARM/MCTargetDesc/CMakeLists.txt | 8 +- lib/Target/ARM/MLxExpansionPass.cpp | 9 +- lib/Target/ARM/README-Thumb.txt | 4 - lib/Target/ARM/Thumb1FrameLowering.cpp | 4 +- lib/Target/ARM/Thumb1RegisterInfo.h | 2 +- lib/Target/ARM/Thumb2ITBlockPass.cpp | 3 +- lib/Target/ARM/Thumb2SizeReduction.cpp | 14 +- lib/Target/ARM64/ARM64.h | 48 - lib/Target/ARM64/ARM64.td | 95 - lib/Target/ARM64/ARM64AddressTypePromotion.cpp | 496 - lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp | 392 - lib/Target/ARM64/ARM64AsmPrinter.cpp | 563 - lib/Target/ARM64/ARM64BranchRelaxation.cpp | 505 - lib/Target/ARM64/ARM64CallingConv.h | 94 - lib/Target/ARM64/ARM64CallingConvention.td | 210 - .../ARM64/ARM64CleanupLocalDynamicTLSPass.cpp | 147 - lib/Target/ARM64/ARM64CollectLOH.cpp | 1157 -- lib/Target/ARM64/ARM64ConditionalCompares.cpp | 918 -- .../ARM64/ARM64DeadRegisterDefinitionsPass.cpp | 104 - lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp | 737 -- lib/Target/ARM64/ARM64FastISel.cpp | 1929 ---- lib/Target/ARM64/ARM64FrameLowering.cpp | 816 -- lib/Target/ARM64/ARM64FrameLowering.h | 75 - lib/Target/ARM64/ARM64ISelDAGToDAG.cpp | 2381 ---- lib/Target/ARM64/ARM64ISelLowering.cpp | 7551 ------------ lib/Target/ARM64/ARM64ISelLowering.h | 422 - lib/Target/ARM64/ARM64InstrAtomics.td | 293 - lib/Target/ARM64/ARM64InstrFormats.td | 8193 ------------- lib/Target/ARM64/ARM64InstrInfo.cpp | 1864 --- lib/Target/ARM64/ARM64InstrInfo.h | 219 - lib/Target/ARM64/ARM64InstrInfo.td | 4458 -------- lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp | 947 -- lib/Target/ARM64/ARM64MCInstLower.cpp | 201 - lib/Target/ARM64/ARM64MCInstLower.h | 52 - lib/Target/ARM64/ARM64MachineFunctionInfo.h | 139 - lib/Target/ARM64/ARM64PerfectShuffle.h | 6586 ----------- lib/Target/ARM64/ARM64PromoteConstant.cpp | 585 - lib/Target/ARM64/ARM64RegisterInfo.cpp | 400 - lib/Target/ARM64/ARM64RegisterInfo.h | 101 - lib/Target/ARM64/ARM64RegisterInfo.td | 561 - lib/Target/ARM64/ARM64SchedCyclone.td | 852 -- lib/Target/ARM64/ARM64Schedule.td | 92 - lib/Target/ARM64/ARM64SelectionDAGInfo.cpp | 57 - lib/Target/ARM64/ARM64SelectionDAGInfo.h | 37 - lib/Target/ARM64/ARM64StorePairSuppress.cpp | 167 - lib/Target/ARM64/ARM64Subtarget.cpp | 100 - lib/Target/ARM64/ARM64Subtarget.h | 87 - lib/Target/ARM64/ARM64TargetMachine.cpp | 157 - lib/Target/ARM64/ARM64TargetMachine.h | 69 - lib/Target/ARM64/ARM64TargetObjectFile.cpp | 52 - lib/Target/ARM64/ARM64TargetObjectFile.h | 40 - lib/Target/ARM64/ARM64TargetTransformInfo.cpp | 326 - lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp | 4832 -------- lib/Target/ARM64/AsmParser/CMakeLists.txt | 6 - lib/Target/ARM64/AsmParser/LLVMBuild.txt | 24 - lib/Target/ARM64/AsmParser/Makefile | 15 - lib/Target/ARM64/CMakeLists.txt | 50 - .../ARM64/Disassembler/ARM64Disassembler.cpp | 2142 ---- lib/Target/ARM64/Disassembler/ARM64Disassembler.h | 54 - lib/Target/ARM64/Disassembler/CMakeLists.txt | 13 - lib/Target/ARM64/Disassembler/LLVMBuild.txt | 24 - lib/Target/ARM64/Disassembler/Makefile | 16 - lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp | 1428 --- lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h | 157 - lib/Target/ARM64/InstPrinter/CMakeLists.txt | 7 - lib/Target/ARM64/InstPrinter/LLVMBuild.txt | 24 - lib/Target/ARM64/InstPrinter/Makefile | 15 - lib/Target/ARM64/LLVMBuild.txt | 36 - .../ARM64/MCTargetDesc/ARM64AddressingModes.h | 758 -- lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp | 533 - lib/Target/ARM64/MCTargetDesc/ARM64BaseInfo.h | 998 -- .../ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp | 237 - lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp | 158 - lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h | 26 - lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h | 72 - lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp | 92 - lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h | 36 - .../ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp | 563 - lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp | 168 - lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h | 162 - .../ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp | 167 - lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h | 62 - .../ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp | 396 - lib/Target/ARM64/MCTargetDesc/CMakeLists.txt | 14 - lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt | 24 - lib/Target/ARM64/MCTargetDesc/Makefile | 16 - lib/Target/ARM64/Makefile | 25 - lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp | 21 - lib/Target/ARM64/TargetInfo/CMakeLists.txt | 7 - lib/Target/ARM64/TargetInfo/LLVMBuild.txt | 24 - lib/Target/ARM64/TargetInfo/Makefile | 15 - lib/Target/CppBackend/CPPBackend.cpp | 28 +- lib/Target/CppBackend/CPPTargetMachine.h | 14 +- lib/Target/Hexagon/Hexagon.td | 2 - lib/Target/Hexagon/HexagonAsmPrinter.cpp | 5 +- lib/Target/Hexagon/HexagonAsmPrinter.h | 11 +- lib/Target/Hexagon/HexagonCFGOptimizer.cpp | 15 +- lib/Target/Hexagon/HexagonCopyToCombine.cpp | 18 +- lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp | 6 +- lib/Target/Hexagon/HexagonFixupHwLoops.cpp | 8 +- lib/Target/Hexagon/HexagonFrameLowering.cpp | 4 +- lib/Target/Hexagon/HexagonFrameLowering.h | 32 +- lib/Target/Hexagon/HexagonHardwareLoops.cpp | 107 +- lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 19 +- lib/Target/Hexagon/HexagonISelLowering.cpp | 29 +- lib/Target/Hexagon/HexagonISelLowering.h | 43 +- lib/Target/Hexagon/HexagonInstrFormats.td | 167 +- lib/Target/Hexagon/HexagonInstrFormatsV4.td | 31 +- lib/Target/Hexagon/HexagonInstrInfo.cpp | 19 +- lib/Target/Hexagon/HexagonInstrInfo.h | 197 +- lib/Target/Hexagon/HexagonInstrInfo.td | 14 +- lib/Target/Hexagon/HexagonInstrInfoV4.td | 35 +- lib/Target/Hexagon/HexagonMachineScheduler.cpp | 20 +- lib/Target/Hexagon/HexagonMachineScheduler.h | 19 +- lib/Target/Hexagon/HexagonNewValueJump.cpp | 13 +- lib/Target/Hexagon/HexagonPeephole.cpp | 9 +- lib/Target/Hexagon/HexagonRegisterInfo.cpp | 9 +- lib/Target/Hexagon/HexagonRegisterInfo.h | 17 +- lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp | 6 +- lib/Target/Hexagon/HexagonSchedule.td | 51 - lib/Target/Hexagon/HexagonScheduleV4.td | 165 +- lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp | 3 +- lib/Target/Hexagon/HexagonSelectionDAGInfo.h | 3 +- .../Hexagon/HexagonSplitConst32AndConst64.cpp | 24 +- lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp | 10 +- lib/Target/Hexagon/HexagonSubtarget.cpp | 2 + lib/Target/Hexagon/HexagonTargetMachine.cpp | 38 +- lib/Target/Hexagon/HexagonTargetMachine.h | 20 +- lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 28 +- .../Hexagon/InstPrinter/HexagonInstPrinter.cpp | 3 +- .../Hexagon/InstPrinter/HexagonInstPrinter.h | 2 +- lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h | 72 +- .../Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp | 2 +- lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h | 2 +- lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h | 2 +- .../Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp | 6 +- lib/Target/LLVMBuild.txt | 2 +- .../MSP430/InstPrinter/MSP430InstPrinter.cpp | 5 +- lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h | 6 +- lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h | 2 +- .../MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp | 6 +- lib/Target/MSP430/MSP430AsmPrinter.cpp | 13 +- lib/Target/MSP430/MSP430BranchSelector.cpp | 7 +- lib/Target/MSP430/MSP430FrameLowering.cpp | 2 +- lib/Target/MSP430/MSP430FrameLowering.h | 22 +- lib/Target/MSP430/MSP430ISelDAGToDAG.cpp | 39 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 23 +- lib/Target/MSP430/MSP430ISelLowering.h | 56 +- lib/Target/MSP430/MSP430InstrInfo.cpp | 8 +- lib/Target/MSP430/MSP430InstrInfo.h | 39 +- lib/Target/MSP430/MSP430RegisterInfo.cpp | 18 +- lib/Target/MSP430/MSP430RegisterInfo.h | 12 +- lib/Target/MSP430/MSP430SelectionDAGInfo.cpp | 3 +- lib/Target/MSP430/MSP430Subtarget.cpp | 6 +- lib/Target/MSP430/MSP430TargetMachine.cpp | 4 +- lib/Target/MSP430/MSP430TargetMachine.h | 16 +- lib/Target/Mips/Android.mk | 1 + lib/Target/Mips/AsmParser/LLVMBuild.txt | 2 +- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 273 +- lib/Target/Mips/CMakeLists.txt | 2 + lib/Target/Mips/Disassembler/LLVMBuild.txt | 2 +- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 370 +- lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp | 5 +- lib/Target/Mips/InstPrinter/MipsInstPrinter.h | 6 +- lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp | 92 +- lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h | 18 +- .../Mips/MCTargetDesc/MipsELFObjectWriter.cpp | 12 + lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h | 12 + lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h | 2 +- lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 85 +- lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h | 27 +- lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp | 3 +- lib/Target/Mips/MCTargetDesc/MipsMCExpr.h | 10 +- lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h | 2 +- lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp | 13 +- .../Mips/MCTargetDesc/MipsNaClELFStreamer.cpp | 7 +- .../Mips/MCTargetDesc/MipsTargetStreamer.cpp | 153 + lib/Target/Mips/Makefile | 2 +- lib/Target/Mips/MicroMipsInstrFPU.td | 14 +- lib/Target/Mips/MicroMipsInstrInfo.td | 23 +- lib/Target/Mips/Mips.td | 101 +- lib/Target/Mips/Mips16FrameLowering.cpp | 2 +- lib/Target/Mips/Mips16FrameLowering.h | 20 +- lib/Target/Mips/Mips16HardFloat.cpp | 14 +- lib/Target/Mips/Mips16HardFloat.h | 4 +- lib/Target/Mips/Mips16HardFloatInfo.cpp | 4 +- lib/Target/Mips/Mips16ISelDAGToDAG.cpp | 19 +- lib/Target/Mips/Mips16ISelDAGToDAG.h | 10 +- lib/Target/Mips/Mips16ISelLowering.cpp | 34 +- lib/Target/Mips/Mips16ISelLowering.h | 21 +- lib/Target/Mips/Mips16InstrInfo.cpp | 1 + lib/Target/Mips/Mips16InstrInfo.h | 50 +- lib/Target/Mips/Mips16RegisterInfo.cpp | 2 + lib/Target/Mips/Mips16RegisterInfo.h | 16 +- lib/Target/Mips/Mips32r6InstrFormats.td | 386 + lib/Target/Mips/Mips32r6InstrInfo.td | 583 + lib/Target/Mips/Mips64InstrInfo.td | 257 +- lib/Target/Mips/Mips64r6InstrInfo.td | 88 + lib/Target/Mips/MipsAsmPrinter.cpp | 40 +- lib/Target/Mips/MipsAsmPrinter.h | 32 +- lib/Target/Mips/MipsCallingConv.td | 4 +- lib/Target/Mips/MipsCodeEmitter.cpp | 46 +- lib/Target/Mips/MipsCondMov.td | 138 +- lib/Target/Mips/MipsConstantIslandPass.cpp | 20 +- lib/Target/Mips/MipsDelaySlotFiller.cpp | 78 +- lib/Target/Mips/MipsFastISel.cpp | 283 + lib/Target/Mips/MipsFrameLowering.cpp | 2 +- lib/Target/Mips/MipsFrameLowering.h | 2 +- lib/Target/Mips/MipsISelDAGToDAG.cpp | 12 +- lib/Target/Mips/MipsISelDAGToDAG.h | 12 +- lib/Target/Mips/MipsISelLowering.cpp | 296 +- lib/Target/Mips/MipsISelLowering.h | 103 +- lib/Target/Mips/MipsInstrFPU.td | 356 +- lib/Target/Mips/MipsInstrFormats.td | 33 +- lib/Target/Mips/MipsInstrInfo.cpp | 8 +- lib/Target/Mips/MipsInstrInfo.h | 50 +- lib/Target/Mips/MipsInstrInfo.td | 412 +- lib/Target/Mips/MipsJITInfo.cpp | 3 +- lib/Target/Mips/MipsJITInfo.h | 14 +- lib/Target/Mips/MipsLongBranch.cpp | 96 +- lib/Target/Mips/MipsMCInstLower.cpp | 68 + lib/Target/Mips/MipsMCInstLower.h | 8 + lib/Target/Mips/MipsMSAInstrInfo.td | 90 +- lib/Target/Mips/MipsMachineFunction.cpp | 7 +- lib/Target/Mips/MipsMachineFunction.h | 8 +- lib/Target/Mips/MipsModuleISelDAGToDAG.cpp | 2 + lib/Target/Mips/MipsModuleISelDAGToDAG.h | 8 +- lib/Target/Mips/MipsOptimizePICCall.cpp | 46 +- lib/Target/Mips/MipsOs16.cpp | 3 +- lib/Target/Mips/MipsOs16.h | 4 +- lib/Target/Mips/MipsRegisterInfo.cpp | 16 +- lib/Target/Mips/MipsRegisterInfo.h | 21 +- lib/Target/Mips/MipsRegisterInfo.td | 16 + lib/Target/Mips/MipsSEFrameLowering.cpp | 3 +- lib/Target/Mips/MipsSEFrameLowering.h | 14 +- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 15 +- lib/Target/Mips/MipsSEISelDAGToDAG.h | 66 +- lib/Target/Mips/MipsSEISelLowering.cpp | 185 +- lib/Target/Mips/MipsSEISelLowering.h | 36 +- lib/Target/Mips/MipsSEInstrInfo.cpp | 2 +- lib/Target/Mips/MipsSEInstrInfo.h | 48 +- lib/Target/Mips/MipsSERegisterInfo.cpp | 4 +- lib/Target/Mips/MipsSERegisterInfo.h | 12 +- lib/Target/Mips/MipsSelectionDAGInfo.cpp | 3 +- lib/Target/Mips/MipsSubtarget.cpp | 57 +- lib/Target/Mips/MipsSubtarget.h | 61 +- lib/Target/Mips/MipsTargetMachine.cpp | 12 +- lib/Target/Mips/MipsTargetMachine.h | 46 +- lib/Target/Mips/MipsTargetStreamer.h | 134 +- lib/Target/NVPTX/CMakeLists.txt | 3 + lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp | 3 +- lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h | 14 +- lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h | 6 +- .../NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp | 6 +- lib/Target/NVPTX/NVPTX.h | 3 + lib/Target/NVPTX/NVPTXAllocaHoisting.h | 6 +- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 382 +- lib/Target/NVPTX/NVPTXAsmPrinter.h | 66 +- lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp | 2 +- .../NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp | 195 + lib/Target/NVPTX/NVPTXFrameLowering.h | 10 +- lib/Target/NVPTX/NVPTXGenericToNVVM.cpp | 10 +- lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 826 +- lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 15 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 552 +- lib/Target/NVPTX/NVPTXISelLowering.h | 139 +- lib/Target/NVPTX/NVPTXImageOptimizer.cpp | 178 + lib/Target/NVPTX/NVPTXInstrInfo.cpp | 7 +- lib/Target/NVPTX/NVPTXInstrInfo.h | 16 +- lib/Target/NVPTX/NVPTXIntrinsics.td | 1823 +++ lib/Target/NVPTX/NVPTXLowerAggrCopies.h | 6 +- lib/Target/NVPTX/NVPTXMCExpr.cpp | 3 +- lib/Target/NVPTX/NVPTXMCExpr.h | 12 +- lib/Target/NVPTX/NVPTXMachineFunctionInfo.h | 46 + lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp | 6 +- lib/Target/NVPTX/NVPTXRegisterInfo.cpp | 21 +- lib/Target/NVPTX/NVPTXRegisterInfo.h | 23 +- lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp | 357 + lib/Target/NVPTX/NVPTXSection.h | 16 +- lib/Target/NVPTX/NVPTXSubtarget.cpp | 8 +- lib/Target/NVPTX/NVPTXSubtarget.h | 7 +- lib/Target/NVPTX/NVPTXTargetMachine.cpp | 47 +- lib/Target/NVPTX/NVPTXTargetMachine.h | 24 +- lib/Target/NVPTX/NVPTXTargetObjectFile.h | 38 +- lib/Target/NVPTX/NVPTXUtilities.cpp | 67 +- lib/Target/NVPTX/NVPTXUtilities.h | 4 + lib/Target/NVPTX/NVVMReflect.cpp | 14 +- lib/Target/PowerPC/AsmParser/LLVMBuild.txt | 4 +- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 52 +- lib/Target/PowerPC/Disassembler/LLVMBuild.txt | 2 +- .../PowerPC/Disassembler/PPCDisassembler.cpp | 11 +- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 3 +- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h | 6 +- lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 20 +- .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 7 +- lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 4 +- lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h | 4 +- .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 5 +- lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp | 3 +- lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h | 10 +- .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 25 +- .../PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp | 8 +- lib/Target/PowerPC/PPCAsmPrinter.cpp | 82 +- lib/Target/PowerPC/PPCBranchSelector.cpp | 9 +- lib/Target/PowerPC/PPCCTRLoops.cpp | 30 +- lib/Target/PowerPC/PPCCodeEmitter.cpp | 10 +- lib/Target/PowerPC/PPCFastISel.cpp | 71 +- lib/Target/PowerPC/PPCFrameLowering.cpp | 102 +- lib/Target/PowerPC/PPCFrameLowering.h | 28 +- lib/Target/PowerPC/PPCHazardRecognizers.cpp | 5 +- lib/Target/PowerPC/PPCHazardRecognizers.h | 24 +- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 164 +- lib/Target/PowerPC/PPCISelLowering.cpp | 308 +- lib/Target/PowerPC/PPCISelLowering.h | 120 +- lib/Target/PowerPC/PPCInstrAltivec.td | 2 +- lib/Target/PowerPC/PPCInstrInfo.cpp | 43 +- lib/Target/PowerPC/PPCInstrInfo.h | 169 +- lib/Target/PowerPC/PPCInstrInfo.td | 8 +- lib/Target/PowerPC/PPCInstrVSX.td | 2 +- lib/Target/PowerPC/PPCJITInfo.cpp | 3 +- lib/Target/PowerPC/PPCJITInfo.h | 16 +- lib/Target/PowerPC/PPCMCInstLower.cpp | 2 +- lib/Target/PowerPC/PPCRegisterInfo.cpp | 9 +- lib/Target/PowerPC/PPCRegisterInfo.h | 40 +- lib/Target/PowerPC/PPCRegisterInfo.td | 12 + lib/Target/PowerPC/PPCSelectionDAGInfo.cpp | 3 +- lib/Target/PowerPC/PPCSubtarget.cpp | 32 +- lib/Target/PowerPC/PPCSubtarget.h | 15 +- lib/Target/PowerPC/PPCTargetMachine.cpp | 36 +- lib/Target/PowerPC/PPCTargetMachine.h | 26 +- lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 187 +- lib/Target/R600/AMDGPU.h | 11 +- lib/Target/R600/AMDGPU.td | 11 + lib/Target/R600/AMDGPUAsmPrinter.cpp | 25 +- lib/Target/R600/AMDGPUAsmPrinter.h | 13 +- lib/Target/R600/AMDGPUCallingConv.td | 2 +- lib/Target/R600/AMDGPUConvertToISA.cpp | 4 +- lib/Target/R600/AMDGPUFrameLowering.cpp | 2 +- lib/Target/R600/AMDGPUFrameLowering.h | 13 +- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 291 +- lib/Target/R600/AMDGPUISelLowering.cpp | 744 +- lib/Target/R600/AMDGPUISelLowering.h | 91 +- lib/Target/R600/AMDGPUInstrInfo.cpp | 22 +- lib/Target/R600/AMDGPUInstrInfo.h | 73 +- lib/Target/R600/AMDGPUInstrInfo.td | 15 + lib/Target/R600/AMDGPUInstructions.td | 49 +- lib/Target/R600/AMDGPUIntrinsics.td | 4 + lib/Target/R600/AMDGPUMCInstLower.cpp | 28 +- lib/Target/R600/AMDGPUMCInstLower.h | 16 +- lib/Target/R600/AMDGPURegisterInfo.cpp | 8 +- lib/Target/R600/AMDGPURegisterInfo.h | 14 +- lib/Target/R600/AMDGPUSubtarget.cpp | 13 +- lib/Target/R600/AMDGPUSubtarget.h | 15 +- lib/Target/R600/AMDGPUTargetMachine.cpp | 23 +- lib/Target/R600/AMDGPUTargetMachine.h | 27 +- lib/Target/R600/AMDGPUTargetTransformInfo.cpp | 26 +- lib/Target/R600/AMDILCFGStructurizer.cpp | 58 +- lib/Target/R600/AMDILISelLowering.cpp | 92 +- lib/Target/R600/AMDILIntrinsicInfo.cpp | 2 +- lib/Target/R600/AMDILIntrinsicInfo.h | 12 +- lib/Target/R600/AMDILIntrinsics.td | 4 - lib/Target/R600/CMakeLists.txt | 1 + lib/Target/R600/CaymanInstructions.td | 7 +- lib/Target/R600/EvergreenInstructions.td | 18 +- lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 118 +- lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h | 13 +- lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp | 26 +- lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp | 4 +- lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h | 2 +- .../R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp | 4 +- lib/Target/R600/MCTargetDesc/LLVMBuild.txt | 4 +- lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 10 +- lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp | 10 +- lib/Target/R600/Processors.td | 2 + lib/Target/R600/R600ClauseMergePass.cpp | 7 +- lib/Target/R600/R600ControlFlowFinalizer.cpp | 15 +- lib/Target/R600/R600EmitClauseMarkers.cpp | 6 +- lib/Target/R600/R600ExpandSpecialInstrs.cpp | 6 +- lib/Target/R600/R600ISelLowering.cpp | 150 +- lib/Target/R600/R600ISelLowering.h | 32 +- lib/Target/R600/R600InstrInfo.cpp | 8 +- lib/Target/R600/R600InstrInfo.h | 76 +- lib/Target/R600/R600Instructions.td | 6 + lib/Target/R600/R600MachineFunctionInfo.h | 2 +- lib/Target/R600/R600MachineScheduler.cpp | 14 +- lib/Target/R600/R600MachineScheduler.h | 15 +- lib/Target/R600/R600OptimizeVectorRegisters.cpp | 11 +- lib/Target/R600/R600Packetizer.cpp | 24 +- lib/Target/R600/R600RegisterInfo.h | 15 +- lib/Target/R600/R600TextureIntrinsicsReplacer.cpp | 8 +- lib/Target/R600/SIAnnotateControlFlow.cpp | 28 +- lib/Target/R600/SIFixSGPRCopies.cpp | 23 +- lib/Target/R600/SIISelLowering.cpp | 307 +- lib/Target/R600/SIISelLowering.h | 36 +- lib/Target/R600/SIInsertWaits.cpp | 8 +- lib/Target/R600/SIInstrFormats.td | 23 +- lib/Target/R600/SIInstrInfo.cpp | 379 +- lib/Target/R600/SIInstrInfo.h | 66 +- lib/Target/R600/SIInstrInfo.td | 146 +- lib/Target/R600/SIInstructions.td | 1255 +- lib/Target/R600/SILowerControlFlow.cpp | 10 +- lib/Target/R600/SILowerI1Copies.cpp | 148 + lib/Target/R600/SIMachineFunctionInfo.cpp | 57 +- lib/Target/R600/SIMachineFunctionInfo.h | 9 +- lib/Target/R600/SIRegisterInfo.cpp | 18 +- lib/Target/R600/SIRegisterInfo.h | 20 +- lib/Target/R600/SIRegisterInfo.td | 14 +- lib/Target/R600/SITypeRewriter.cpp | 24 +- lib/Target/Sparc/AsmParser/LLVMBuild.txt | 2 +- lib/Target/Sparc/AsmParser/SparcAsmParser.cpp | 51 +- lib/Target/Sparc/DelaySlotFiller.cpp | 7 +- lib/Target/Sparc/Disassembler/LLVMBuild.txt | 2 +- .../Sparc/Disassembler/SparcDisassembler.cpp | 29 +- lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp | 4 +- lib/Target/Sparc/InstPrinter/SparcInstPrinter.h | 8 +- lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp | 16 +- lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp | 2 +- lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h | 14 +- .../Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp | 5 +- lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp | 3 +- lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h | 10 +- .../Sparc/MCTargetDesc/SparcMCTargetDesc.cpp | 16 +- lib/Target/Sparc/SparcAsmPrinter.cpp | 17 +- lib/Target/Sparc/SparcCodeEmitter.cpp | 13 +- lib/Target/Sparc/SparcFrameLowering.cpp | 9 +- lib/Target/Sparc/SparcFrameLowering.h | 17 +- lib/Target/Sparc/SparcISelDAGToDAG.cpp | 12 +- lib/Target/Sparc/SparcISelLowering.cpp | 110 +- lib/Target/Sparc/SparcISelLowering.h | 52 +- lib/Target/Sparc/SparcInstr64Bit.td | 4 +- lib/Target/Sparc/SparcInstrAliases.td | 8 +- lib/Target/Sparc/SparcInstrInfo.cpp | 15 +- lib/Target/Sparc/SparcInstrInfo.h | 68 +- lib/Target/Sparc/SparcJITInfo.cpp | 3 +- lib/Target/Sparc/SparcJITInfo.h | 14 +- lib/Target/Sparc/SparcMCInstLower.cpp | 2 +- lib/Target/Sparc/SparcRegisterInfo.cpp | 8 +- lib/Target/Sparc/SparcRegisterInfo.h | 15 +- lib/Target/Sparc/SparcSelectionDAGInfo.cpp | 3 +- lib/Target/Sparc/SparcSubtarget.cpp | 6 +- lib/Target/Sparc/SparcTargetMachine.cpp | 4 +- lib/Target/Sparc/SparcTargetMachine.h | 20 +- lib/Target/Sparc/SparcTargetObjectFile.cpp | 2 +- lib/Target/Sparc/SparcTargetStreamer.h | 8 +- lib/Target/SystemZ/AsmParser/LLVMBuild.txt | 2 +- lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp | 11 +- .../SystemZ/Disassembler/SystemZDisassembler.cpp | 11 +- .../SystemZ/InstPrinter/SystemZInstPrinter.cpp | 4 +- lib/Target/SystemZ/MCTargetDesc/LLVMBuild.txt | 2 +- .../SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp | 3 +- .../SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp | 2 +- .../SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp | 7 +- lib/Target/SystemZ/SystemZElimCompare.cpp | 8 +- lib/Target/SystemZ/SystemZFrameLowering.cpp | 2 +- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 32 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 54 +- lib/Target/SystemZ/SystemZInstrFormats.td | 2 +- lib/Target/SystemZ/SystemZInstrInfo.cpp | 24 +- lib/Target/SystemZ/SystemZInstrInfo.h | 2 +- lib/Target/SystemZ/SystemZLongBranch.cpp | 13 +- lib/Target/SystemZ/SystemZRegisterInfo.cpp | 8 +- lib/Target/SystemZ/SystemZRegisterInfo.h | 2 +- lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 5 +- lib/Target/SystemZ/SystemZShortenInst.cpp | 8 +- lib/Target/SystemZ/SystemZSubtarget.cpp | 6 +- lib/Target/Target.cpp | 8 - lib/Target/TargetLoweringObjectFile.cpp | 12 +- lib/Target/TargetMachine.cpp | 69 +- lib/Target/TargetMachineC.cpp | 22 +- lib/Target/TargetSubtargetInfo.cpp | 11 +- lib/Target/X86/Android.mk | 1 - lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp | 122 +- lib/Target/X86/AsmParser/X86AsmInstrumentation.h | 17 +- lib/Target/X86/AsmParser/X86AsmParser.cpp | 93 +- lib/Target/X86/AsmParser/X86Operand.h | 6 +- lib/Target/X86/CMakeLists.txt | 1 - lib/Target/X86/Disassembler/Android.mk | 3 +- lib/Target/X86/Disassembler/CMakeLists.txt | 2 +- lib/Target/X86/Disassembler/Makefile | 4 +- lib/Target/X86/Disassembler/X86Disassembler.cpp | 51 +- lib/Target/X86/Disassembler/X86Disassembler.h | 17 +- .../X86/Disassembler/X86DisassemblerDecoder.c | 1821 --- .../X86/Disassembler/X86DisassemblerDecoder.cpp | 1838 +++ .../X86/Disassembler/X86DisassemblerDecoder.h | 362 +- .../Disassembler/X86DisassemblerDecoderCommon.h | 221 +- lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp | 3 +- lib/Target/X86/InstPrinter/X86ATTInstPrinter.h | 2 + lib/Target/X86/InstPrinter/X86InstComments.cpp | 4 +- lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp | 3 +- lib/Target/X86/MCTargetDesc/Android.mk | 3 +- lib/Target/X86/MCTargetDesc/CMakeLists.txt | 1 + lib/Target/X86/MCTargetDesc/LLVMBuild.txt | 2 +- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 16 +- lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 9 +- lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp | 8 +- .../X86/MCTargetDesc/X86ELFRelocationInfo.cpp | 2 +- lib/Target/X86/MCTargetDesc/X86FixupKinds.h | 1 + lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 8 +- lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 19 +- lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp | 42 +- lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h | 9 + .../X86/MCTargetDesc/X86MachORelocationInfo.cpp | 2 +- .../X86/MCTargetDesc/X86MachObjectWriter.cpp | 23 +- .../X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp | 72 +- lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp | 51 + lib/Target/X86/X86.h | 4 +- lib/Target/X86/X86.td | 27 +- lib/Target/X86/X86AsmPrinter.cpp | 192 +- lib/Target/X86/X86AsmPrinter.h | 4 +- lib/Target/X86/X86COFFMachineModuleInfo.cpp | 19 - lib/Target/X86/X86COFFMachineModuleInfo.h | 46 - lib/Target/X86/X86CallingConv.h | 27 - lib/Target/X86/X86CallingConv.td | 10 - lib/Target/X86/X86CodeEmitter.cpp | 9 +- lib/Target/X86/X86FastISel.cpp | 36 +- lib/Target/X86/X86FixupLEAs.cpp | 107 +- lib/Target/X86/X86FloatingPoint.cpp | 5 +- lib/Target/X86/X86FrameLowering.cpp | 91 +- lib/Target/X86/X86FrameLowering.h | 2 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 98 +- lib/Target/X86/X86ISelLowering.cpp | 2193 +++- lib/Target/X86/X86ISelLowering.h | 42 +- lib/Target/X86/X86InstrAVX512.td | 206 +- lib/Target/X86/X86InstrBuilder.h | 3 +- lib/Target/X86/X86InstrCompiler.td | 4 +- lib/Target/X86/X86InstrFMA.td | 46 +- lib/Target/X86/X86InstrFragmentsSIMD.td | 5 +- lib/Target/X86/X86InstrInfo.cpp | 262 +- lib/Target/X86/X86InstrInfo.h | 9 +- lib/Target/X86/X86InstrInfo.td | 200 +- lib/Target/X86/X86InstrMMX.td | 8 +- lib/Target/X86/X86InstrSSE.td | 147 +- lib/Target/X86/X86InstrSystem.td | 2 +- lib/Target/X86/X86JITInfo.cpp | 10 +- lib/Target/X86/X86MCInstLower.cpp | 8 +- lib/Target/X86/X86PadShortFunction.cpp | 9 +- lib/Target/X86/X86RegisterInfo.cpp | 8 +- lib/Target/X86/X86RegisterInfo.h | 4 +- lib/Target/X86/X86SchedHaswell.td | 3 + lib/Target/X86/X86SchedSandyBridge.td | 3 + lib/Target/X86/X86ScheduleAtom.td | 4 + lib/Target/X86/X86ScheduleSLM.td | 849 +- lib/Target/X86/X86SelectionDAGInfo.cpp | 36 +- lib/Target/X86/X86Subtarget.cpp | 364 +- lib/Target/X86/X86Subtarget.h | 11 +- lib/Target/X86/X86TargetMachine.cpp | 33 +- lib/Target/X86/X86TargetObjectFile.cpp | 14 +- lib/Target/X86/X86TargetTransformInfo.cpp | 159 +- lib/Target/X86/X86VZeroUpper.cpp | 6 +- .../XCore/Disassembler/XCoreDisassembler.cpp | 19 +- lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp | 3 +- lib/Target/XCore/InstPrinter/XCoreInstPrinter.h | 4 +- lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp | 2 +- lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h | 2 +- .../XCore/MCTargetDesc/XCoreMCTargetDesc.cpp | 13 +- lib/Target/XCore/XCoreAsmPrinter.cpp | 17 +- lib/Target/XCore/XCoreFrameLowering.cpp | 19 +- lib/Target/XCore/XCoreFrameLowering.h | 27 +- lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp | 4 +- lib/Target/XCore/XCoreISelDAGToDAG.cpp | 17 +- lib/Target/XCore/XCoreISelLowering.cpp | 125 +- lib/Target/XCore/XCoreISelLowering.h | 51 +- lib/Target/XCore/XCoreInstrInfo.cpp | 17 +- lib/Target/XCore/XCoreInstrInfo.h | 76 +- lib/Target/XCore/XCoreLowerThreadLocal.cpp | 7 +- lib/Target/XCore/XCoreRegisterInfo.cpp | 12 +- lib/Target/XCore/XCoreRegisterInfo.h | 15 +- lib/Target/XCore/XCoreSelectionDAGInfo.cpp | 19 +- lib/Target/XCore/XCoreSelectionDAGInfo.h | 4 +- lib/Target/XCore/XCoreSubtarget.cpp | 6 +- lib/Target/XCore/XCoreTargetMachine.cpp | 6 +- lib/Target/XCore/XCoreTargetMachine.h | 18 +- lib/Target/XCore/XCoreTargetObjectFile.h | 2 +- lib/Target/XCore/XCoreTargetTransformInfo.cpp | 5 +- lib/Transforms/Hello/Hello.cpp | 3 +- lib/Transforms/IPO/ArgumentPromotion.cpp | 17 +- lib/Transforms/IPO/ConstantMerge.cpp | 9 +- lib/Transforms/IPO/DeadArgumentElimination.cpp | 8 +- lib/Transforms/IPO/ExtractGV.cpp | 9 +- lib/Transforms/IPO/FunctionAttrs.cpp | 28 +- lib/Transforms/IPO/GlobalDCE.cpp | 23 +- lib/Transforms/IPO/GlobalOpt.cpp | 269 +- lib/Transforms/IPO/IPConstantPropagation.cpp | 11 +- lib/Transforms/IPO/InlineAlways.cpp | 11 +- lib/Transforms/IPO/InlineSimple.cpp | 7 +- lib/Transforms/IPO/Inliner.cpp | 70 +- lib/Transforms/IPO/Internalize.cpp | 10 +- lib/Transforms/IPO/LoopExtractor.cpp | 7 +- lib/Transforms/IPO/MergeFunctions.cpp | 734 +- lib/Transforms/IPO/PartialInlining.cpp | 11 +- lib/Transforms/IPO/PassManagerBuilder.cpp | 28 +- lib/Transforms/IPO/PruneEH.cpp | 5 +- lib/Transforms/IPO/StripDeadPrototypes.cpp | 3 +- lib/Transforms/IPO/StripSymbols.cpp | 2 +- lib/Transforms/InstCombine/InstCombine.h | 107 +- lib/Transforms/InstCombine/InstCombineAddSub.cpp | 102 +- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 149 +- lib/Transforms/InstCombine/InstCombineCalls.cpp | 305 +- lib/Transforms/InstCombine/InstCombineCasts.cpp | 89 +- lib/Transforms/InstCombine/InstCombineCompares.cpp | 390 +- .../InstCombine/InstCombineLoadStoreAlloca.cpp | 51 +- .../InstCombine/InstCombineMulDivRem.cpp | 128 +- lib/Transforms/InstCombine/InstCombinePHI.cpp | 80 +- lib/Transforms/InstCombine/InstCombineSelect.cpp | 92 +- lib/Transforms/InstCombine/InstCombineShifts.cpp | 91 +- .../InstCombine/InstCombineSimplifyDemanded.cpp | 85 +- .../InstCombine/InstCombineVectorOps.cpp | 115 +- lib/Transforms/InstCombine/InstCombineWorklist.h | 7 +- .../InstCombine/InstructionCombining.cpp | 246 +- .../Instrumentation/AddressSanitizer.cpp | 310 +- lib/Transforms/Instrumentation/BoundsChecking.cpp | 9 +- .../Instrumentation/DataFlowSanitizer.cpp | 32 +- lib/Transforms/Instrumentation/DebugIR.cpp | 30 +- lib/Transforms/Instrumentation/DebugIR.h | 2 +- lib/Transforms/Instrumentation/GCOVProfiling.cpp | 45 +- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 226 +- lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 14 +- lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h | 40 +- lib/Transforms/ObjCARC/DependencyAnalysis.cpp | 5 +- lib/Transforms/ObjCARC/ObjCARCAPElim.cpp | 13 +- lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp | 3 +- lib/Transforms/ObjCARC/ObjCARCContract.cpp | 9 +- lib/Transforms/ObjCARC/ObjCARCExpand.cpp | 4 +- lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 49 +- lib/Transforms/Scalar/ADCE.cpp | 3 +- lib/Transforms/Scalar/Android.mk | 6 +- lib/Transforms/Scalar/CMakeLists.txt | 11 +- lib/Transforms/Scalar/ConstantHoisting.cpp | 24 +- lib/Transforms/Scalar/ConstantProp.cpp | 5 +- .../Scalar/CorrelatedValuePropagation.cpp | 5 +- lib/Transforms/Scalar/DCE.cpp | 3 +- lib/Transforms/Scalar/DeadStoreElimination.cpp | 33 +- lib/Transforms/Scalar/EarlyCSE.cpp | 23 +- lib/Transforms/Scalar/FlattenCFGPass.cpp | 3 +- lib/Transforms/Scalar/GVN.cpp | 115 +- lib/Transforms/Scalar/GlobalMerge.cpp | 8 +- lib/Transforms/Scalar/IndVarSimplify.cpp | 80 +- lib/Transforms/Scalar/JumpThreading.cpp | 39 +- lib/Transforms/Scalar/LICM.cpp | 15 +- lib/Transforms/Scalar/LoopDeletion.cpp | 3 +- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 65 +- lib/Transforms/Scalar/LoopInstSimplify.cpp | 17 +- lib/Transforms/Scalar/LoopRerollPass.cpp | 7 +- lib/Transforms/Scalar/LoopRotation.cpp | 36 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 683 +- lib/Transforms/Scalar/LoopUnrollPass.cpp | 12 +- lib/Transforms/Scalar/LoopUnswitch.cpp | 62 +- lib/Transforms/Scalar/LowerAtomic.cpp | 5 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 41 +- lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp | 3 +- lib/Transforms/Scalar/Reassociate.cpp | 43 +- lib/Transforms/Scalar/Reg2Mem.cpp | 3 +- lib/Transforms/Scalar/SCCP.cpp | 33 +- lib/Transforms/Scalar/SROA.cpp | 106 +- lib/Transforms/Scalar/SampleProfile.cpp | 10 +- lib/Transforms/Scalar/Scalar.cpp | 2 + lib/Transforms/Scalar/ScalarReplAggregates.cpp | 79 +- lib/Transforms/Scalar/Scalarizer.cpp | 13 +- .../Scalar/SeparateConstOffsetFromGEP.cpp | 623 + lib/Transforms/Scalar/SimplifyCFGPass.cpp | 13 +- lib/Transforms/Scalar/Sink.cpp | 13 +- lib/Transforms/Scalar/StructurizeCFG.cpp | 27 +- lib/Transforms/Scalar/TailRecursionElimination.cpp | 380 +- lib/Transforms/Utils/AddDiscriminators.cpp | 15 +- lib/Transforms/Utils/Android.mk | 1 + lib/Transforms/Utils/BasicBlockUtils.cpp | 117 +- lib/Transforms/Utils/BreakCriticalEdges.cpp | 15 +- lib/Transforms/Utils/BuildLibCalls.cpp | 31 +- lib/Transforms/Utils/BypassSlowDivision.cpp | 7 +- lib/Transforms/Utils/CMakeLists.txt | 1 + lib/Transforms/Utils/CloneFunction.cpp | 8 +- lib/Transforms/Utils/CloneModule.cpp | 14 +- lib/Transforms/Utils/CmpInstAnalysis.cpp | 2 +- lib/Transforms/Utils/CodeExtractor.cpp | 20 +- lib/Transforms/Utils/CtorUtils.cpp | 183 + lib/Transforms/Utils/DemoteRegToStack.cpp | 14 +- lib/Transforms/Utils/FlattenCFG.cpp | 14 +- lib/Transforms/Utils/GlobalStatus.cpp | 4 +- lib/Transforms/Utils/InlineFunction.cpp | 253 +- lib/Transforms/Utils/IntegerDivision.cpp | 3 +- lib/Transforms/Utils/LCSSA.cpp | 5 +- lib/Transforms/Utils/Local.cpp | 84 +- lib/Transforms/Utils/LoopSimplify.cpp | 42 +- lib/Transforms/Utils/LoopUnroll.cpp | 28 +- lib/Transforms/Utils/LoopUnrollRuntime.cpp | 13 +- lib/Transforms/Utils/LowerExpectIntrinsic.cpp | 3 +- lib/Transforms/Utils/LowerInvoke.cpp | 3 +- lib/Transforms/Utils/LowerSwitch.cpp | 7 +- lib/Transforms/Utils/Mem2Reg.cpp | 3 +- lib/Transforms/Utils/ModuleUtils.cpp | 29 +- lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 29 +- lib/Transforms/Utils/SSAUpdater.cpp | 21 +- lib/Transforms/Utils/SimplifyCFG.cpp | 153 +- lib/Transforms/Utils/SimplifyIndVar.cpp | 30 +- lib/Transforms/Utils/SimplifyInstructions.cpp | 24 +- lib/Transforms/Utils/SimplifyLibCalls.cpp | 316 +- lib/Transforms/Utils/SpecialCaseList.cpp | 6 +- lib/Transforms/Utils/UnifyFunctionExitNodes.cpp | 8 +- lib/Transforms/Utils/ValueMapper.cpp | 20 +- lib/Transforms/Vectorize/BBVectorize.cpp | 94 +- lib/Transforms/Vectorize/LoopVectorize.cpp | 443 +- lib/Transforms/Vectorize/SLPVectorizer.cpp | 257 +- 1300 files changed, 100623 insertions(+), 116881 deletions(-) create mode 100644 lib/Analysis/BlockFrequencyInfoImpl.cpp create mode 100644 lib/Analysis/CGSCCPassManager.cpp create mode 100644 lib/AsmParser/module.modulemap create mode 100644 lib/Bitcode/module.modulemap create mode 100644 lib/CodeGen/AsmPrinter/AddressPool.cpp create mode 100644 lib/CodeGen/AsmPrinter/AddressPool.h create mode 100644 lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp create mode 100644 lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h create mode 100644 lib/CodeGen/AsmPrinter/DwarfFile.cpp create mode 100644 lib/CodeGen/AsmPrinter/DwarfFile.h create mode 100644 lib/CodeGen/AsmPrinter/DwarfStringPool.cpp create mode 100644 lib/CodeGen/AsmPrinter/DwarfStringPool.h create mode 100644 lib/CodeGen/AtomicExpandLoadLinkedPass.cpp create mode 100644 lib/CodeGen/module.modulemap create mode 100644 lib/DebugInfo/module.modulemap create mode 100644 lib/IR/MDBuilder.cpp create mode 100644 lib/IR/module.modulemap delete mode 100644 lib/MC/MCFixup.cpp create mode 100644 lib/MC/MCTargetOptions.cpp create mode 100644 lib/Object/StringTableBuilder.cpp create mode 100644 lib/ProfileData/Android.mk create mode 100644 lib/ProfileData/InstrProfIndexed.h create mode 100644 lib/TableGen/module.modulemap create mode 100644 lib/Target/AArch64/AArch64AddressTypePromotion.cpp create mode 100644 lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp delete mode 100644 lib/Target/AArch64/AArch64AsmPrinter.h delete mode 100644 lib/Target/AArch64/AArch64BranchFixupPass.cpp create mode 100644 lib/Target/AArch64/AArch64BranchRelaxation.cpp delete mode 100644 lib/Target/AArch64/AArch64CallingConv.td create mode 100644 lib/Target/AArch64/AArch64CallingConvention.td create mode 100644 lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp create mode 100644 lib/Target/AArch64/AArch64CollectLOH.cpp create mode 100644 lib/Target/AArch64/AArch64ConditionalCompares.cpp create mode 100644 lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp create mode 100644 lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp create mode 100644 lib/Target/AArch64/AArch64FastISel.cpp create mode 100644 lib/Target/AArch64/AArch64InstrAtomics.td delete mode 100644 lib/Target/AArch64/AArch64InstrNEON.td create mode 100644 lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp create mode 100644 lib/Target/AArch64/AArch64MCInstLower.h delete mode 100644 lib/Target/AArch64/AArch64MachineFunctionInfo.cpp create mode 100644 lib/Target/AArch64/AArch64PerfectShuffle.h create mode 100644 lib/Target/AArch64/AArch64PromoteConstant.cpp create mode 100644 lib/Target/AArch64/AArch64SchedA53.td create mode 100644 lib/Target/AArch64/AArch64SchedCyclone.td delete mode 100644 lib/Target/AArch64/AArch64ScheduleA53.td create mode 100644 lib/Target/AArch64/AArch64StorePairSuppress.cpp create mode 100644 lib/Target/AArch64/Disassembler/AArch64Disassembler.h create mode 100644 lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp create mode 100644 lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp delete mode 100644 lib/Target/AArch64/README.txt delete mode 100644 lib/Target/ARM/ARMAtomicExpandPass.cpp create mode 100644 lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp create mode 100644 lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp delete mode 100644 lib/Target/ARM64/ARM64.h delete mode 100644 lib/Target/ARM64/ARM64.td delete mode 100644 lib/Target/ARM64/ARM64AddressTypePromotion.cpp delete mode 100644 lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp delete mode 100644 lib/Target/ARM64/ARM64AsmPrinter.cpp delete mode 100644 lib/Target/ARM64/ARM64BranchRelaxation.cpp delete mode 100644 lib/Target/ARM64/ARM64CallingConv.h delete mode 100644 lib/Target/ARM64/ARM64CallingConvention.td delete mode 100644 lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp delete mode 100644 lib/Target/ARM64/ARM64CollectLOH.cpp delete mode 100644 lib/Target/ARM64/ARM64ConditionalCompares.cpp delete mode 100644 lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp delete mode 100644 lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp delete mode 100644 lib/Target/ARM64/ARM64FastISel.cpp delete mode 100644 lib/Target/ARM64/ARM64FrameLowering.cpp delete mode 100644 lib/Target/ARM64/ARM64FrameLowering.h delete mode 100644 lib/Target/ARM64/ARM64ISelDAGToDAG.cpp delete mode 100644 lib/Target/ARM64/ARM64ISelLowering.cpp delete mode 100644 lib/Target/ARM64/ARM64ISelLowering.h delete mode 100644 lib/Target/ARM64/ARM64InstrAtomics.td delete mode 100644 lib/Target/ARM64/ARM64InstrFormats.td delete mode 100644 lib/Target/ARM64/ARM64InstrInfo.cpp delete mode 100644 lib/Target/ARM64/ARM64InstrInfo.h delete mode 100644 lib/Target/ARM64/ARM64InstrInfo.td delete mode 100644 lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp delete mode 100644 lib/Target/ARM64/ARM64MCInstLower.cpp delete mode 100644 lib/Target/ARM64/ARM64MCInstLower.h delete mode 100644 lib/Target/ARM64/ARM64MachineFunctionInfo.h delete mode 100644 lib/Target/ARM64/ARM64PerfectShuffle.h delete mode 100644 lib/Target/ARM64/ARM64PromoteConstant.cpp delete mode 100644 lib/Target/ARM64/ARM64RegisterInfo.cpp delete mode 100644 lib/Target/ARM64/ARM64RegisterInfo.h delete mode 100644 lib/Target/ARM64/ARM64RegisterInfo.td delete mode 100644 lib/Target/ARM64/ARM64SchedCyclone.td delete mode 100644 lib/Target/ARM64/ARM64Schedule.td delete mode 100644 lib/Target/ARM64/ARM64SelectionDAGInfo.cpp delete mode 100644 lib/Target/ARM64/ARM64SelectionDAGInfo.h delete mode 100644 lib/Target/ARM64/ARM64StorePairSuppress.cpp delete mode 100644 lib/Target/ARM64/ARM64Subtarget.cpp delete mode 100644 lib/Target/ARM64/ARM64Subtarget.h delete mode 100644 lib/Target/ARM64/ARM64TargetMachine.cpp delete mode 100644 lib/Target/ARM64/ARM64TargetMachine.h delete mode 100644 lib/Target/ARM64/ARM64TargetObjectFile.cpp delete mode 100644 lib/Target/ARM64/ARM64TargetObjectFile.h delete mode 100644 lib/Target/ARM64/ARM64TargetTransformInfo.cpp delete mode 100644 lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp delete mode 100644 lib/Target/ARM64/AsmParser/CMakeLists.txt delete mode 100644 lib/Target/ARM64/AsmParser/LLVMBuild.txt delete mode 100644 lib/Target/ARM64/AsmParser/Makefile delete mode 100644 lib/Target/ARM64/CMakeLists.txt delete mode 100644 lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp delete mode 100644 lib/Target/ARM64/Disassembler/ARM64Disassembler.h delete mode 100644 lib/Target/ARM64/Disassembler/CMakeLists.txt delete mode 100644 lib/Target/ARM64/Disassembler/LLVMBuild.txt delete mode 100644 lib/Target/ARM64/Disassembler/Makefile delete mode 100644 lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp delete mode 100644 lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h delete mode 100644 lib/Target/ARM64/InstPrinter/CMakeLists.txt delete mode 100644 lib/Target/ARM64/InstPrinter/LLVMBuild.txt delete mode 100644 lib/Target/ARM64/InstPrinter/Makefile delete mode 100644 lib/Target/ARM64/LLVMBuild.txt delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64BaseInfo.h delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h delete mode 100644 lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp delete mode 100644 lib/Target/ARM64/MCTargetDesc/CMakeLists.txt delete mode 100644 lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt delete mode 100644 lib/Target/ARM64/MCTargetDesc/Makefile delete mode 100644 lib/Target/ARM64/Makefile delete mode 100644 lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp delete mode 100644 lib/Target/ARM64/TargetInfo/CMakeLists.txt delete mode 100644 lib/Target/ARM64/TargetInfo/LLVMBuild.txt delete mode 100644 lib/Target/ARM64/TargetInfo/Makefile create mode 100644 lib/Target/Mips/Mips32r6InstrFormats.td create mode 100644 lib/Target/Mips/Mips32r6InstrInfo.td create mode 100644 lib/Target/Mips/Mips64r6InstrInfo.td create mode 100644 lib/Target/Mips/MipsFastISel.cpp create mode 100644 lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp create mode 100644 lib/Target/NVPTX/NVPTXImageOptimizer.cpp create mode 100644 lib/Target/NVPTX/NVPTXMachineFunctionInfo.h create mode 100644 lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp create mode 100644 lib/Target/R600/SILowerI1Copies.cpp delete mode 100644 lib/Target/X86/Disassembler/X86DisassemblerDecoder.c create mode 100644 lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp create mode 100644 lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp delete mode 100644 lib/Target/X86/X86COFFMachineModuleInfo.cpp delete mode 100644 lib/Target/X86/X86COFFMachineModuleInfo.h create mode 100644 lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp create mode 100644 lib/Transforms/Utils/CtorUtils.cpp (limited to 'lib') diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 9583bbe..57237e5 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -473,7 +473,7 @@ AliasAnalysis::~AliasAnalysis() {} /// void AliasAnalysis::InitializeAliasAnalysis(Pass *P) { DataLayoutPass *DLP = P->getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = P->getAnalysisIfAvailable(); AA = &P->getAnalysis(); } diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp index 2e3bc55..b860914 100644 --- a/lib/Analysis/AliasAnalysisCounter.cpp +++ b/lib/Analysis/AliasAnalysisCounter.cpp @@ -126,7 +126,7 @@ AliasAnalysis::AliasResult AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) { AliasResult R = getAnalysis().alias(LocA, LocB); - const char *AliasString = 0; + const char *AliasString = nullptr; switch (R) { case NoAlias: No++; AliasString = "No alias"; break; case MayAlias: May++; AliasString = "May alias"; break; @@ -152,7 +152,7 @@ AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS, const Location &Loc) { ModRefResult R = getAnalysis().getModRefInfo(CS, Loc); - const char *MRString = 0; + const char *MRString = nullptr; switch (R) { case NoModRef: NoMR++; MRString = "NoModRef"; break; case Ref: JustRef++; MRString = "JustRef"; break; diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index ab1005e..a45fe23 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -72,16 +72,16 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) { AS.PtrList->setPrevInList(PtrListEnd); PtrListEnd = AS.PtrListEnd; - AS.PtrList = 0; + AS.PtrList = nullptr; AS.PtrListEnd = &AS.PtrList; - assert(*AS.PtrListEnd == 0 && "End of list is not null?"); + assert(*AS.PtrListEnd == nullptr && "End of list is not null?"); } } void AliasSetTracker::removeAliasSet(AliasSet *AS) { if (AliasSet *Fwd = AS->Forward) { Fwd->dropRef(*this); - AS->Forward = 0; + AS->Forward = nullptr; } AliasSets.erase(AS); } @@ -115,10 +115,10 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry, Entry.updateSizeAndTBAAInfo(Size, TBAAInfo); // Add it to the end of the list... - assert(*PtrListEnd == 0 && "End of list is not null?"); + assert(*PtrListEnd == nullptr && "End of list is not null?"); *PtrListEnd = &Entry; PtrListEnd = Entry.setPrevInList(PtrListEnd); - assert(*PtrListEnd == 0 && "End of list is not null?"); + assert(*PtrListEnd == nullptr && "End of list is not null?"); addRef(); // Entry points to alias set. } @@ -217,11 +217,11 @@ void AliasSetTracker::clear() { AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) { - AliasSet *FoundSet = 0; + AliasSet *FoundSet = nullptr; for (iterator I = begin(), E = end(); I != E; ++I) { if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue; - if (FoundSet == 0) { // If this is the first alias set ptr can go into. + if (!FoundSet) { // If this is the first alias set ptr can go into. FoundSet = I; // Remember it. } else { // Otherwise, we must merge the sets. FoundSet->mergeSetIn(*I, *this); // Merge in contents. @@ -245,12 +245,12 @@ bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size, AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) { - AliasSet *FoundSet = 0; + AliasSet *FoundSet = nullptr; for (iterator I = begin(), E = end(); I != E; ++I) { if (I->Forward || !I->aliasesUnknownInst(Inst, AA)) continue; - if (FoundSet == 0) // If this is the first alias set ptr can go into. + if (!FoundSet) // If this is the first alias set ptr can go into. FoundSet = I; // Remember it. else if (!I->Forward) // Otherwise, we must merge the sets. FoundSet->mergeSetIn(*I, *this); // Merge in contents. diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index c960123..01c1c7e 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -73,7 +73,7 @@ void LLVMInitializeAnalysis(LLVMPassRegistryRef R) { LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, char **OutMessages) { - raw_ostream *DebugOS = Action != LLVMReturnStatusAction ? &errs() : 0; + raw_ostream *DebugOS = Action != LLVMReturnStatusAction ? &errs() : nullptr; std::string Messages; raw_string_ostream MsgsOS(Messages); @@ -94,7 +94,8 @@ LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action, LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) { LLVMBool Result = verifyFunction( - *unwrap(Fn), Action != LLVMReturnStatusAction ? &errs() : 0); + *unwrap(Fn), Action != LLVMReturnStatusAction ? &errs() + : nullptr); if (Action == LLVMAbortProcessAction && Result) report_fatal_error("Broken function found, compilation aborted!"); diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk index 76eee74..a8fef77 100644 --- a/lib/Analysis/Android.mk +++ b/lib/Analysis/Android.mk @@ -9,6 +9,7 @@ analysis_SRC_FILES := \ Analysis.cpp \ BasicAliasAnalysis.cpp \ BlockFrequencyInfo.cpp \ + BlockFrequencyInfoImpl.cpp \ BranchProbabilityInfo.cpp \ CFG.cpp \ CFGPrinter.cpp \ diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index e267374..fe90b84 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -298,7 +298,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, do { // See if this is a bitcast or GEP. const Operator *Op = dyn_cast(V); - if (Op == 0) { + if (!Op) { // The only non-operator case we can handle are GlobalAliases. if (const GlobalAlias *GA = dyn_cast(V)) { if (!GA->mayBeOverridden()) { @@ -315,7 +315,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, } const GEPOperator *GEPOp = dyn_cast(Op); - if (GEPOp == 0) { + if (!GEPOp) { // If it's not a GEP, hand it off to SimplifyInstruction to see if it // can come up with something. This matches what GetUnderlyingObject does. if (const Instruction *I = dyn_cast(V)) @@ -336,7 +336,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // If we are lacking DataLayout information, we can't compute the offets of // elements computed by GEPs. However, we can handle bitcast equivalent // GEPs. - if (DL == 0) { + if (!DL) { if (!GEPOp->hasAllZeroIndices()) return V; V = GEPOp->getOperand(0); @@ -433,7 +433,7 @@ static const Function *getParent(const Value *V) { if (const Argument *arg = dyn_cast(V)) return arg->getParent(); - return NULL; + return nullptr; } static bool notDifferentParent(const Value *O1, const Value *O2) { @@ -753,7 +753,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, // Finally, handle specific knowledge of intrinsics. const IntrinsicInst *II = dyn_cast(CS.getInstruction()); - if (II != 0) + if (II != nullptr) switch (II->getIntrinsicID()) { default: break; case Intrinsic::memcpy: @@ -868,21 +868,6 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min); } -static bool areVarIndicesEqual(SmallVectorImpl &Indices1, - SmallVectorImpl &Indices2) { - unsigned Size1 = Indices1.size(); - unsigned Size2 = Indices2.size(); - - if (Size1 != Size2) - return false; - - for (unsigned I = 0; I != Size1; ++I) - if (Indices1[I] != Indices2[I]) - return false; - - return true; -} - /// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction /// against another pointer. We know that V1 is a GEP, but we don't know /// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, DL), @@ -904,8 +889,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // derived pointer. if (const GEPOperator *GEP2 = dyn_cast(V2)) { // Do the base pointers alias? - AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0, - UnderlyingV2, UnknownSize, 0); + AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, nullptr, + UnderlyingV2, UnknownSize, nullptr); // Check for geps of non-aliasing underlying pointers where the offsets are // identical. @@ -929,8 +914,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { - assert(DL == 0 && - "DecomposeGEPExpression and GetUnderlyingObject disagree!"); + assert(!DL && + "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } // If the max search depth is reached the result is undefined @@ -939,7 +924,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // Same offsets. if (GEP1BaseOffset == GEP2BaseOffset && - areVarIndicesEqual(GEP1VariableIndices, GEP2VariableIndices)) + GEP1VariableIndices == GEP2VariableIndices) return NoAlias; GEP1VariableIndices.clear(); } @@ -966,7 +951,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { - assert(DL == 0 && + assert(!DL && "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } @@ -988,7 +973,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, if (V1Size == UnknownSize && V2Size == UnknownSize) return MayAlias; - AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, 0, + AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, nullptr, V2, V2Size, V2TBAAInfo); if (R != MustAlias) // If V2 may alias GEP base pointer, conservatively returns MayAlias. @@ -1005,7 +990,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1) { - assert(DL == 0 && + assert(!DL && "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } @@ -1371,7 +1356,7 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V, // Use dominance or loop info if available. DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0; + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; LoopInfo *LI = getAnalysisIfAvailable(); // Make sure that the visited phis cannot reach the Value. This ensures that diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp index 63049a5..8ed8e3e 100644 --- a/lib/Analysis/BlockFrequencyInfo.cpp +++ b/lib/Analysis/BlockFrequencyInfo.cpp @@ -1,4 +1,4 @@ -//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------===// +//===- BlockFrequencyInfo.cpp - Block Frequency Analysis ------------------===// // // The LLVM Compiler Infrastructure // @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" @@ -24,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "block-freq" + #ifndef NDEBUG enum GVDAGType { GVDT_None, @@ -106,6 +108,7 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", true, true) @@ -120,14 +123,16 @@ BlockFrequencyInfo::~BlockFrequencyInfo() {} void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + AU.addRequired(); AU.setPreservesAll(); } bool BlockFrequencyInfo::runOnFunction(Function &F) { BranchProbabilityInfo &BPI = getAnalysis(); + LoopInfo &LI = getAnalysis(); if (!BFI) BFI.reset(new ImplType); - BFI->doFunction(&F, &BPI); + BFI->doFunction(&F, &BPI, &LI); #ifndef NDEBUG if (ViewBlockFreqPropagationDAG != GVDT_None) view(); @@ -158,7 +163,7 @@ void BlockFrequencyInfo::view() const { } const Function *BlockFrequencyInfo::getFunction() const { - return BFI ? BFI->Fn : nullptr; + return BFI ? BFI->getFunction() : nullptr; } raw_ostream &BlockFrequencyInfo:: diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp new file mode 100644 index 0000000..87d93a4 --- /dev/null +++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -0,0 +1,995 @@ +//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; +using namespace llvm::bfi_detail; + +#define DEBUG_TYPE "block-freq" + +//===----------------------------------------------------------------------===// +// +// UnsignedFloat implementation. +// +//===----------------------------------------------------------------------===// +#ifndef _MSC_VER +const int32_t UnsignedFloatBase::MaxExponent; +const int32_t UnsignedFloatBase::MinExponent; +#endif + +static void appendDigit(std::string &Str, unsigned D) { + assert(D < 10); + Str += '0' + D % 10; +} + +static void appendNumber(std::string &Str, uint64_t N) { + while (N) { + appendDigit(Str, N % 10); + N /= 10; + } +} + +static bool doesRoundUp(char Digit) { + switch (Digit) { + case '5': + case '6': + case '7': + case '8': + case '9': + return true; + default: + return false; + } +} + +static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) { + assert(E >= UnsignedFloatBase::MinExponent); + assert(E <= UnsignedFloatBase::MaxExponent); + + // Find a new E, but don't let it increase past MaxExponent. + int LeadingZeros = UnsignedFloatBase::countLeadingZeros64(D); + int NewE = std::min(UnsignedFloatBase::MaxExponent, E + 63 - LeadingZeros); + int Shift = 63 - (NewE - E); + assert(Shift <= LeadingZeros); + assert(Shift == LeadingZeros || NewE == UnsignedFloatBase::MaxExponent); + D <<= Shift; + E = NewE; + + // Check for a denormal. + unsigned AdjustedE = E + 16383; + if (!(D >> 63)) { + assert(E == UnsignedFloatBase::MaxExponent); + AdjustedE = 0; + } + + // Build the float and print it. + uint64_t RawBits[2] = {D, AdjustedE}; + APFloat Float(APFloat::x87DoubleExtended, APInt(80, RawBits)); + SmallVector Chars; + Float.toString(Chars, Precision, 0); + return std::string(Chars.begin(), Chars.end()); +} + +static std::string stripTrailingZeros(const std::string &Float) { + size_t NonZero = Float.find_last_not_of('0'); + assert(NonZero != std::string::npos && "no . in floating point string"); + + if (Float[NonZero] == '.') + ++NonZero; + + return Float.substr(0, NonZero + 1); +} + +std::string UnsignedFloatBase::toString(uint64_t D, int16_t E, int Width, + unsigned Precision) { + if (!D) + return "0.0"; + + // Canonicalize exponent and digits. + uint64_t Above0 = 0; + uint64_t Below0 = 0; + uint64_t Extra = 0; + int ExtraShift = 0; + if (E == 0) { + Above0 = D; + } else if (E > 0) { + if (int Shift = std::min(int16_t(countLeadingZeros64(D)), E)) { + D <<= Shift; + E -= Shift; + + if (!E) + Above0 = D; + } + } else if (E > -64) { + Above0 = D >> -E; + Below0 = D << (64 + E); + } else if (E > -120) { + Below0 = D >> (-E - 64); + Extra = D << (128 + E); + ExtraShift = -64 - E; + } + + // Fall back on APFloat for very small and very large numbers. + if (!Above0 && !Below0) + return toStringAPFloat(D, E, Precision); + + // Append the digits before the decimal. + std::string Str; + size_t DigitsOut = 0; + if (Above0) { + appendNumber(Str, Above0); + DigitsOut = Str.size(); + } else + appendDigit(Str, 0); + std::reverse(Str.begin(), Str.end()); + + // Return early if there's nothing after the decimal. + if (!Below0) + return Str + ".0"; + + // Append the decimal and beyond. + Str += '.'; + uint64_t Error = UINT64_C(1) << (64 - Width); + + // We need to shift Below0 to the right to make space for calculating + // digits. Save the precision we're losing in Extra. + Extra = (Below0 & 0xf) << 56 | (Extra >> 8); + Below0 >>= 4; + size_t SinceDot = 0; + size_t AfterDot = Str.size(); + do { + if (ExtraShift) { + --ExtraShift; + Error *= 5; + } else + Error *= 10; + + Below0 *= 10; + Extra *= 10; + Below0 += (Extra >> 60); + Extra = Extra & (UINT64_MAX >> 4); + appendDigit(Str, Below0 >> 60); + Below0 = Below0 & (UINT64_MAX >> 4); + if (DigitsOut || Str.back() != '0') + ++DigitsOut; + ++SinceDot; + } while (Error && (Below0 << 4 | Extra >> 60) >= Error / 2 && + (!Precision || DigitsOut <= Precision || SinceDot < 2)); + + // Return early for maximum precision. + if (!Precision || DigitsOut <= Precision) + return stripTrailingZeros(Str); + + // Find where to truncate. + size_t Truncate = + std::max(Str.size() - (DigitsOut - Precision), AfterDot + 1); + + // Check if there's anything to truncate. + if (Truncate >= Str.size()) + return stripTrailingZeros(Str); + + bool Carry = doesRoundUp(Str[Truncate]); + if (!Carry) + return stripTrailingZeros(Str.substr(0, Truncate)); + + // Round with the first truncated digit. + for (std::string::reverse_iterator I(Str.begin() + Truncate), E = Str.rend(); + I != E; ++I) { + if (*I == '.') + continue; + if (*I == '9') { + *I = '0'; + continue; + } + + ++*I; + Carry = false; + break; + } + + // Add "1" in front if we still need to carry. + return stripTrailingZeros(std::string(Carry, '1') + Str.substr(0, Truncate)); +} + +raw_ostream &UnsignedFloatBase::print(raw_ostream &OS, uint64_t D, int16_t E, + int Width, unsigned Precision) { + return OS << toString(D, E, Width, Precision); +} + +void UnsignedFloatBase::dump(uint64_t D, int16_t E, int Width) { + print(dbgs(), D, E, Width, 0) << "[" << Width << ":" << D << "*2^" << E + << "]"; +} + +static std::pair +getRoundedFloat(uint64_t N, bool ShouldRound, int64_t Shift) { + if (ShouldRound) + if (!++N) + // Rounding caused an overflow. + return std::make_pair(UINT64_C(1), Shift + 64); + return std::make_pair(N, Shift); +} + +std::pair UnsignedFloatBase::divide64(uint64_t Dividend, + uint64_t Divisor) { + // Input should be sanitized. + assert(Divisor); + assert(Dividend); + + // Minimize size of divisor. + int16_t Shift = 0; + if (int Zeros = countTrailingZeros(Divisor)) { + Shift -= Zeros; + Divisor >>= Zeros; + } + + // Check for powers of two. + if (Divisor == 1) + return std::make_pair(Dividend, Shift); + + // Maximize size of dividend. + if (int Zeros = countLeadingZeros64(Dividend)) { + Shift -= Zeros; + Dividend <<= Zeros; + } + + // Start with the result of a divide. + uint64_t Quotient = Dividend / Divisor; + Dividend %= Divisor; + + // Continue building the quotient with long division. + // + // TODO: continue with largers digits. + while (!(Quotient >> 63) && Dividend) { + // Shift Dividend, and check for overflow. + bool IsOverflow = Dividend >> 63; + Dividend <<= 1; + --Shift; + + // Divide. + bool DoesDivide = IsOverflow || Divisor <= Dividend; + Quotient = (Quotient << 1) | uint64_t(DoesDivide); + Dividend -= DoesDivide ? Divisor : 0; + } + + // Round. + if (Dividend >= getHalf(Divisor)) + if (!++Quotient) + // Rounding caused an overflow in Quotient. + return std::make_pair(UINT64_C(1), Shift + 64); + + return getRoundedFloat(Quotient, Dividend >= getHalf(Divisor), Shift); +} + +std::pair UnsignedFloatBase::multiply64(uint64_t L, + uint64_t R) { + // Separate into two 32-bit digits (U.L). + uint64_t UL = L >> 32, LL = L & UINT32_MAX, UR = R >> 32, LR = R & UINT32_MAX; + + // Compute cross products. + uint64_t P1 = UL * UR, P2 = UL * LR, P3 = LL * UR, P4 = LL * LR; + + // Sum into two 64-bit digits. + uint64_t Upper = P1, Lower = P4; + auto addWithCarry = [&](uint64_t N) { + uint64_t NewLower = Lower + (N << 32); + Upper += (N >> 32) + (NewLower < Lower); + Lower = NewLower; + }; + addWithCarry(P2); + addWithCarry(P3); + + // Check whether the upper digit is empty. + if (!Upper) + return std::make_pair(Lower, 0); + + // Shift as little as possible to maximize precision. + unsigned LeadingZeros = countLeadingZeros64(Upper); + int16_t Shift = 64 - LeadingZeros; + if (LeadingZeros) + Upper = Upper << LeadingZeros | Lower >> Shift; + bool ShouldRound = Shift && (Lower & UINT64_C(1) << (Shift - 1)); + return getRoundedFloat(Upper, ShouldRound, Shift); +} + +//===----------------------------------------------------------------------===// +// +// BlockMass implementation. +// +//===----------------------------------------------------------------------===// +UnsignedFloat BlockMass::toFloat() const { + if (isFull()) + return UnsignedFloat(1, 0); + return UnsignedFloat(getMass() + 1, -64); +} + +void BlockMass::dump() const { print(dbgs()); } + +static char getHexDigit(int N) { + assert(N < 16); + if (N < 10) + return '0' + N; + return 'a' + N - 10; +} +raw_ostream &BlockMass::print(raw_ostream &OS) const { + for (int Digits = 0; Digits < 16; ++Digits) + OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf); + return OS; +} + +//===----------------------------------------------------------------------===// +// +// BlockFrequencyInfoImpl implementation. +// +//===----------------------------------------------------------------------===// +namespace { + +typedef BlockFrequencyInfoImplBase::BlockNode BlockNode; +typedef BlockFrequencyInfoImplBase::Distribution Distribution; +typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList; +typedef BlockFrequencyInfoImplBase::Float Float; +typedef BlockFrequencyInfoImplBase::LoopData LoopData; +typedef BlockFrequencyInfoImplBase::Weight Weight; +typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData; + +/// \brief Dithering mass distributer. +/// +/// This class splits up a single mass into portions by weight, dithering to +/// spread out error. No mass is lost. The dithering precision depends on the +/// precision of the product of \a BlockMass and \a BranchProbability. +/// +/// The distribution algorithm follows. +/// +/// 1. Initialize by saving the sum of the weights in \a RemWeight and the +/// mass to distribute in \a RemMass. +/// +/// 2. For each portion: +/// +/// 1. Construct a branch probability, P, as the portion's weight divided +/// by the current value of \a RemWeight. +/// 2. Calculate the portion's mass as \a RemMass times P. +/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting +/// the current portion's weight and mass. +struct DitheringDistributer { + uint32_t RemWeight; + BlockMass RemMass; + + DitheringDistributer(Distribution &Dist, const BlockMass &Mass); + + BlockMass takeMass(uint32_t Weight); +}; +} + +DitheringDistributer::DitheringDistributer(Distribution &Dist, + const BlockMass &Mass) { + Dist.normalize(); + RemWeight = Dist.Total; + RemMass = Mass; +} + +BlockMass DitheringDistributer::takeMass(uint32_t Weight) { + assert(Weight && "invalid weight"); + assert(Weight <= RemWeight); + BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight); + + // Decrement totals (dither). + RemWeight -= Weight; + RemMass -= Mass; + return Mass; +} + +void Distribution::add(const BlockNode &Node, uint64_t Amount, + Weight::DistType Type) { + assert(Amount && "invalid weight of 0"); + uint64_t NewTotal = Total + Amount; + + // Check for overflow. It should be impossible to overflow twice. + bool IsOverflow = NewTotal < Total; + assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow"); + DidOverflow |= IsOverflow; + + // Update the total. + Total = NewTotal; + + // Save the weight. + Weight W; + W.TargetNode = Node; + W.Amount = Amount; + W.Type = Type; + Weights.push_back(W); +} + +static void combineWeight(Weight &W, const Weight &OtherW) { + assert(OtherW.TargetNode.isValid()); + if (!W.Amount) { + W = OtherW; + return; + } + assert(W.Type == OtherW.Type); + assert(W.TargetNode == OtherW.TargetNode); + assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow"); + W.Amount += OtherW.Amount; +} +static void combineWeightsBySorting(WeightList &Weights) { + // Sort so edges to the same node are adjacent. + std::sort(Weights.begin(), Weights.end(), + [](const Weight &L, + const Weight &R) { return L.TargetNode < R.TargetNode; }); + + // Combine adjacent edges. + WeightList::iterator O = Weights.begin(); + for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E; + ++O, (I = L)) { + *O = *I; + + // Find the adjacent weights to the same node. + for (++L; L != E && I->TargetNode == L->TargetNode; ++L) + combineWeight(*O, *L); + } + + // Erase extra entries. + Weights.erase(O, Weights.end()); + return; +} +static void combineWeightsByHashing(WeightList &Weights) { + // Collect weights into a DenseMap. + typedef DenseMap HashTable; + HashTable Combined(NextPowerOf2(2 * Weights.size())); + for (const Weight &W : Weights) + combineWeight(Combined[W.TargetNode.Index], W); + + // Check whether anything changed. + if (Weights.size() == Combined.size()) + return; + + // Fill in the new weights. + Weights.clear(); + Weights.reserve(Combined.size()); + for (const auto &I : Combined) + Weights.push_back(I.second); +} +static void combineWeights(WeightList &Weights) { + // Use a hash table for many successors to keep this linear. + if (Weights.size() > 128) { + combineWeightsByHashing(Weights); + return; + } + + combineWeightsBySorting(Weights); +} +static uint64_t shiftRightAndRound(uint64_t N, int Shift) { + assert(Shift >= 0); + assert(Shift < 64); + if (!Shift) + return N; + return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1)); +} +void Distribution::normalize() { + // Early exit for termination nodes. + if (Weights.empty()) + return; + + // Only bother if there are multiple successors. + if (Weights.size() > 1) + combineWeights(Weights); + + // Early exit when combined into a single successor. + if (Weights.size() == 1) { + Total = 1; + Weights.front().Amount = 1; + return; + } + + // Determine how much to shift right so that the total fits into 32-bits. + // + // If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1 + // for each weight can cause a 32-bit overflow. + int Shift = 0; + if (DidOverflow) + Shift = 33; + else if (Total > UINT32_MAX) + Shift = 33 - countLeadingZeros(Total); + + // Early exit if nothing needs to be scaled. + if (!Shift) + return; + + // Recompute the total through accumulation (rather than shifting it) so that + // it's accurate after shifting. + Total = 0; + + // Sum the weights to each node and shift right if necessary. + for (Weight &W : Weights) { + // Scale down below UINT32_MAX. Since Shift is larger than necessary, we + // can round here without concern about overflow. + assert(W.TargetNode.isValid()); + W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift)); + assert(W.Amount <= UINT32_MAX); + + // Update the total. + Total += W.Amount; + } + assert(Total <= UINT32_MAX); +} + +void BlockFrequencyInfoImplBase::clear() { + // Swap with a default-constructed std::vector, since std::vector<>::clear() + // does not actually clear heap storage. + std::vector().swap(Freqs); + std::vector().swap(Working); + Loops.clear(); +} + +/// \brief Clear all memory not needed downstream. +/// +/// Releases all memory not used downstream. In particular, saves Freqs. +static void cleanup(BlockFrequencyInfoImplBase &BFI) { + std::vector SavedFreqs(std::move(BFI.Freqs)); + BFI.clear(); + BFI.Freqs = std::move(SavedFreqs); +} + +bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist, + const LoopData *OuterLoop, + const BlockNode &Pred, + const BlockNode &Succ, + uint64_t Weight) { + if (!Weight) + Weight = 1; + + auto isLoopHeader = [&OuterLoop](const BlockNode &Node) { + return OuterLoop && OuterLoop->isHeader(Node); + }; + + BlockNode Resolved = Working[Succ.Index].getResolvedNode(); + +#ifndef NDEBUG + auto debugSuccessor = [&](const char *Type) { + dbgs() << " =>" + << " [" << Type << "] weight = " << Weight; + if (!isLoopHeader(Resolved)) + dbgs() << ", succ = " << getBlockName(Succ); + if (Resolved != Succ) + dbgs() << ", resolved = " << getBlockName(Resolved); + dbgs() << "\n"; + }; + (void)debugSuccessor; +#endif + + if (isLoopHeader(Resolved)) { + DEBUG(debugSuccessor("backedge")); + Dist.addBackedge(OuterLoop->getHeader(), Weight); + return true; + } + + if (Working[Resolved.Index].getContainingLoop() != OuterLoop) { + DEBUG(debugSuccessor(" exit ")); + Dist.addExit(Resolved, Weight); + return true; + } + + if (Resolved < Pred) { + if (!isLoopHeader(Pred)) { + // If OuterLoop is an irreducible loop, we can't actually handle this. + assert((!OuterLoop || !OuterLoop->isIrreducible()) && + "unhandled irreducible control flow"); + + // Irreducible backedge. Abort. + DEBUG(debugSuccessor("abort!!!")); + return false; + } + + // If "Pred" is a loop header, then this isn't really a backedge; rather, + // OuterLoop must be irreducible. These false backedges can come only from + // secondary loop headers. + assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) && + "unhandled irreducible control flow"); + } + + DEBUG(debugSuccessor(" local ")); + Dist.addLocal(Resolved, Weight); + return true; +} + +bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist( + const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) { + // Copy the exit map into Dist. + for (const auto &I : Loop.Exits) + if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first, + I.second.getMass())) + // Irreducible backedge. + return false; + + return true; +} + +/// \brief Get the maximum allowed loop scale. +/// +/// Gives the maximum number of estimated iterations allowed for a loop. Very +/// large numbers cause problems downstream (even within 64-bits). +static Float getMaxLoopScale() { return Float(1, 12); } + +/// \brief Compute the loop scale for a loop. +void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) { + // Compute loop scale. + DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n"); + + // LoopScale == 1 / ExitMass + // ExitMass == HeadMass - BackedgeMass + BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass; + + // Block scale stores the inverse of the scale. + Loop.Scale = ExitMass.toFloat().inverse(); + + DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull() + << " - " << Loop.BackedgeMass << ")\n" + << " - scale = " << Loop.Scale << "\n"); + + if (Loop.Scale > getMaxLoopScale()) { + Loop.Scale = getMaxLoopScale(); + DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n"); + } +} + +/// \brief Package up a loop. +void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) { + DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n"); + + // Clear the subloop exits to prevent quadratic memory usage. + for (const BlockNode &M : Loop.Nodes) { + if (auto *Loop = Working[M.Index].getPackagedLoop()) + Loop->Exits.clear(); + DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n"); + } + Loop.IsPackaged = true; +} + +void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source, + LoopData *OuterLoop, + Distribution &Dist) { + BlockMass Mass = Working[Source.Index].getMass(); + DEBUG(dbgs() << " => mass: " << Mass << "\n"); + + // Distribute mass to successors as laid out in Dist. + DitheringDistributer D(Dist, Mass); + +#ifndef NDEBUG + auto debugAssign = [&](const BlockNode &T, const BlockMass &M, + const char *Desc) { + dbgs() << " => assign " << M << " (" << D.RemMass << ")"; + if (Desc) + dbgs() << " [" << Desc << "]"; + if (T.isValid()) + dbgs() << " to " << getBlockName(T); + dbgs() << "\n"; + }; + (void)debugAssign; +#endif + + for (const Weight &W : Dist.Weights) { + // Check for a local edge (non-backedge and non-exit). + BlockMass Taken = D.takeMass(W.Amount); + if (W.Type == Weight::Local) { + Working[W.TargetNode.Index].getMass() += Taken; + DEBUG(debugAssign(W.TargetNode, Taken, nullptr)); + continue; + } + + // Backedges and exits only make sense if we're processing a loop. + assert(OuterLoop && "backedge or exit outside of loop"); + + // Check for a backedge. + if (W.Type == Weight::Backedge) { + OuterLoop->BackedgeMass += Taken; + DEBUG(debugAssign(BlockNode(), Taken, "back")); + continue; + } + + // This must be an exit. + assert(W.Type == Weight::Exit); + OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken)); + DEBUG(debugAssign(W.TargetNode, Taken, "exit")); + } +} + +static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, + const Float &Min, const Float &Max) { + // Scale the Factor to a size that creates integers. Ideally, integers would + // be scaled so that Max == UINT64_MAX so that they can be best + // differentiated. However, the register allocator currently deals poorly + // with large numbers. Instead, push Min up a little from 1 to give some + // room to differentiate small, unequal numbers. + // + // TODO: fix issues downstream so that ScalingFactor can be Float(1,64)/Max. + Float ScalingFactor = Min.inverse(); + if ((Max / Min).lg() < 60) + ScalingFactor <<= 3; + + // Translate the floats to integers. + DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max + << ", factor = " << ScalingFactor << "\n"); + for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) { + Float Scaled = BFI.Freqs[Index].Floating * ScalingFactor; + BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt()); + DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = " + << BFI.Freqs[Index].Floating << ", scaled = " << Scaled + << ", int = " << BFI.Freqs[Index].Integer << "\n"); + } +} + +/// \brief Unwrap a loop package. +/// +/// Visits all the members of a loop, adjusting their BlockData according to +/// the loop's pseudo-node. +static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) { + DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop) + << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale + << "\n"); + Loop.Scale *= Loop.Mass.toFloat(); + Loop.IsPackaged = false; + DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n"); + + // Propagate the head scale through the loop. Since members are visited in + // RPO, the head scale will be updated by the loop scale first, and then the + // final head scale will be used for updated the rest of the members. + for (const BlockNode &N : Loop.Nodes) { + const auto &Working = BFI.Working[N.Index]; + Float &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale + : BFI.Freqs[N.Index].Floating; + Float New = Loop.Scale * F; + DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New + << "\n"); + F = New; + } +} + +void BlockFrequencyInfoImplBase::unwrapLoops() { + // Set initial frequencies from loop-local masses. + for (size_t Index = 0; Index < Working.size(); ++Index) + Freqs[Index].Floating = Working[Index].Mass.toFloat(); + + for (LoopData &Loop : Loops) + unwrapLoop(*this, Loop); +} + +void BlockFrequencyInfoImplBase::finalizeMetrics() { + // Unwrap loop packages in reverse post-order, tracking min and max + // frequencies. + auto Min = Float::getLargest(); + auto Max = Float::getZero(); + for (size_t Index = 0; Index < Working.size(); ++Index) { + // Update min/max scale. + Min = std::min(Min, Freqs[Index].Floating); + Max = std::max(Max, Freqs[Index].Floating); + } + + // Convert to integers. + convertFloatingToInteger(*this, Min, Max); + + // Clean up data structures. + cleanup(*this); + + // Print out the final stats. + DEBUG(dump()); +} + +BlockFrequency +BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const { + if (!Node.isValid()) + return 0; + return Freqs[Node.Index].Integer; +} +Float +BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const { + if (!Node.isValid()) + return Float::getZero(); + return Freqs[Node.Index].Floating; +} + +std::string +BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const { + return std::string(); +} +std::string +BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const { + return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*"); +} + +raw_ostream & +BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS, + const BlockNode &Node) const { + return OS << getFloatingBlockFreq(Node); +} + +raw_ostream & +BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS, + const BlockFrequency &Freq) const { + Float Block(Freq.getFrequency(), 0); + Float Entry(getEntryFreq(), 0); + + return OS << Block / Entry; +} + +void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) { + Start = OuterLoop.getHeader(); + Nodes.reserve(OuterLoop.Nodes.size()); + for (auto N : OuterLoop.Nodes) + addNode(N); + indexNodes(); +} +void IrreducibleGraph::addNodesInFunction() { + Start = 0; + for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index) + if (!BFI.Working[Index].isPackaged()) + addNode(Index); + indexNodes(); +} +void IrreducibleGraph::indexNodes() { + for (auto &I : Nodes) + Lookup[I.Node.Index] = &I; +} +void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ, + const BFIBase::LoopData *OuterLoop) { + if (OuterLoop && OuterLoop->isHeader(Succ)) + return; + auto L = Lookup.find(Succ.Index); + if (L == Lookup.end()) + return; + IrrNode &SuccIrr = *L->second; + Irr.Edges.push_back(&SuccIrr); + SuccIrr.Edges.push_front(&Irr); + ++SuccIrr.NumIn; +} + +namespace llvm { +template <> struct GraphTraits { + typedef bfi_detail::IrreducibleGraph GraphT; + + typedef const GraphT::IrrNode NodeType; + typedef GraphT::IrrNode::iterator ChildIteratorType; + + static const NodeType *getEntryNode(const GraphT &G) { + return G.StartIrr; + } + static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); } + static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); } +}; +} + +/// \brief Find extra irreducible headers. +/// +/// Find entry blocks and other blocks with backedges, which exist when \c G +/// contains irreducible sub-SCCs. +static void findIrreducibleHeaders( + const BlockFrequencyInfoImplBase &BFI, + const IrreducibleGraph &G, + const std::vector &SCC, + LoopData::NodeList &Headers, LoopData::NodeList &Others) { + // Map from nodes in the SCC to whether it's an entry block. + SmallDenseMap InSCC; + + // InSCC also acts the set of nodes in the graph. Seed it. + for (const auto *I : SCC) + InSCC[I] = false; + + for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) { + auto &Irr = *I->first; + for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) { + if (InSCC.count(P)) + continue; + + // This is an entry block. + I->second = true; + Headers.push_back(Irr.Node); + DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node) << "\n"); + break; + } + } + assert(Headers.size() >= 2 && "Should be irreducible"); + if (Headers.size() == InSCC.size()) { + // Every block is a header. + std::sort(Headers.begin(), Headers.end()); + return; + } + + // Look for extra headers from irreducible sub-SCCs. + for (const auto &I : InSCC) { + // Entry blocks are already headers. + if (I.second) + continue; + + auto &Irr = *I.first; + for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) { + // Skip forward edges. + if (P->Node < Irr.Node) + continue; + + // Skip predecessors from entry blocks. These can have inverted + // ordering. + if (InSCC.lookup(P)) + continue; + + // Store the extra header. + Headers.push_back(Irr.Node); + DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node) << "\n"); + break; + } + if (Headers.back() == Irr.Node) + // Added this as a header. + continue; + + // This is not a header. + Others.push_back(Irr.Node); + DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n"); + } + std::sort(Headers.begin(), Headers.end()); + std::sort(Others.begin(), Others.end()); +} + +static void createIrreducibleLoop( + BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, + LoopData *OuterLoop, std::list::iterator Insert, + const std::vector &SCC) { + // Translate the SCC into RPO. + DEBUG(dbgs() << " - found-scc\n"); + + LoopData::NodeList Headers; + LoopData::NodeList Others; + findIrreducibleHeaders(BFI, G, SCC, Headers, Others); + + auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(), + Headers.end(), Others.begin(), Others.end()); + + // Update loop hierarchy. + for (const auto &N : Loop->Nodes) + if (BFI.Working[N.Index].isLoopHeader()) + BFI.Working[N.Index].Loop->Parent = &*Loop; + else + BFI.Working[N.Index].Loop = &*Loop; +} + +iterator_range::iterator> +BlockFrequencyInfoImplBase::analyzeIrreducible( + const IrreducibleGraph &G, LoopData *OuterLoop, + std::list::iterator Insert) { + assert((OuterLoop == nullptr) == (Insert == Loops.begin())); + auto Prev = OuterLoop ? std::prev(Insert) : Loops.end(); + + for (auto I = scc_begin(G); !I.isAtEnd(); ++I) { + if (I->size() < 2) + continue; + + // Translate the SCC into RPO. + createIrreducibleLoop(*this, G, OuterLoop, Insert, *I); + } + + if (OuterLoop) + return make_range(std::next(Prev), Insert); + return make_range(Loops.begin(), Insert); +} + +void +BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) { + OuterLoop.Exits.clear(); + OuterLoop.BackedgeMass = BlockMass::getEmpty(); + auto O = OuterLoop.Nodes.begin() + 1; + for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I) + if (!Working[I->Index].isPackaged()) + *O++ = *I; + OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end()); +} diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index b901c54..bbd8750 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "branch-prob" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/LoopInfo.h" @@ -25,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "branch-prob" + INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob", "Branch Probability Analysis", false, true) INITIALIZE_PASS_DEPENDENCY(LoopInfo) @@ -322,6 +323,9 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { InEdges.push_back(I.getSuccessorIndex()); } + if (BackEdges.empty() && ExitingEdges.empty()) + return false; + if (uint32_t numBackEdges = BackEdges.size()) { uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges; if (backWeight < NORMAL_WEIGHT) @@ -557,7 +561,7 @@ isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { uint32_t Sum = 0; uint32_t MaxWeight = 0; - BasicBlock *MaxSucc = 0; + BasicBlock *MaxSucc = nullptr; for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { BasicBlock *Succ = *I; @@ -577,7 +581,7 @@ BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5)) return MaxSucc; - return 0; + return nullptr; } /// Get the raw edge weight for the edge. If can't find it, return @@ -594,11 +598,9 @@ getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const { return DEFAULT_WEIGHT; } -uint32_t -BranchProbabilityInfo:: -getEdgeWeight(const BasicBlock *Src, succ_const_iterator Dst) const { - size_t index = std::distance(succ_begin(Src), Dst); - return getEdgeWeight(Src, index); +uint32_t BranchProbabilityInfo::getEdgeWeight(const BasicBlock *Src, + succ_const_iterator Dst) const { + return getEdgeWeight(Src, Dst.getSuccessorIndex()); } /// Get the raw edge weight calculated for the block pair. This returns the sum diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp index 6963760..8ef5302 100644 --- a/lib/Analysis/CFG.cpp +++ b/lib/Analysis/CFG.cpp @@ -123,7 +123,7 @@ static bool loopContainsBoth(const LoopInfo *LI, const BasicBlock *BB1, const BasicBlock *BB2) { const Loop *L1 = getOutermostLoop(LI, BB1); const Loop *L2 = getOutermostLoop(LI, BB2); - return L1 != NULL && L1 == L2; + return L1 != nullptr && L1 == L2; } static bool isPotentiallyReachableInner(SmallVectorImpl &Worklist, @@ -133,7 +133,7 @@ static bool isPotentiallyReachableInner(SmallVectorImpl &Worklist, // When the stop block is unreachable, it's dominated from everywhere, // regardless of whether there's a path between the two blocks. if (DT && !DT->isReachableFromEntry(StopBB)) - DT = 0; + DT = nullptr; // Limit the number of blocks we visit. The goal is to avoid run-away compile // times on large CFGs without hampering sensible code. Arbitrarily chosen. @@ -156,7 +156,7 @@ static bool isPotentiallyReachableInner(SmallVectorImpl &Worklist, return true; } - if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : 0) { + if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : nullptr) { // All blocks in a single loop are reachable from all other blocks. From // any of these blocks, we can skip directly to the exits of the loop, // ignoring any other blocks inside the loop body. @@ -200,7 +200,7 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B, // If the block is in a loop then we can reach any instruction in the block // from any other instruction in the block by going around a backedge. - if (LI && LI->getLoopFor(BB) != 0) + if (LI && LI->getLoopFor(BB) != nullptr) return true; // Linear scan, start at 'A', see whether we hit 'B' or the end first. diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index 537d6d1..c2c19d6 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/CFGPrinter.h" #include "llvm/Pass.h" +#include "llvm/Support/FileSystem.h" using namespace llvm; namespace { @@ -33,7 +34,7 @@ namespace { return false; } - void print(raw_ostream &OS, const Module* = 0) const override {} + void print(raw_ostream &OS, const Module* = nullptr) const override {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -56,7 +57,7 @@ namespace { return false; } - void print(raw_ostream &OS, const Module* = 0) const override {} + void print(raw_ostream &OS, const Module* = nullptr) const override {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -90,7 +91,7 @@ namespace { return false; } - void print(raw_ostream &OS, const Module* = 0) const override {} + void print(raw_ostream &OS, const Module* = nullptr) const override {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -123,7 +124,7 @@ namespace { errs() << "\n"; return false; } - void print(raw_ostream &OS, const Module* = 0) const override {} + void print(raw_ostream &OS, const Module* = nullptr) const override {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -147,8 +148,8 @@ void Function::viewCFG() const { /// viewCFGOnly - This function is meant for use from the debugger. It works /// just like viewCFG, but it does not include the contents of basic blocks -/// into the nodes, just the label. If you are only interested in the CFG t -/// his can make the graph smaller. +/// into the nodes, just the label. If you are only interested in the CFG +/// this can make the graph smaller. /// void Function::viewCFGOnly() const { ViewGraph(this, "cfg" + getName(), true); diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp new file mode 100644 index 0000000..5d1d8a9 --- /dev/null +++ b/lib/Analysis/CGSCCPassManager.cpp @@ -0,0 +1,167 @@ +//===- CGSCCPassManager.cpp - Managing & running CGSCC passes -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +static cl::opt +DebugPM("debug-cgscc-pass-manager", cl::Hidden, + cl::desc("Print CGSCC pass management debugging information")); + +PreservedAnalyses CGSCCPassManager::run(LazyCallGraph::SCC *C, + CGSCCAnalysisManager *AM) { + PreservedAnalyses PA = PreservedAnalyses::all(); + + if (DebugPM) + dbgs() << "Starting CGSCC pass manager run.\n"; + + for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) { + if (DebugPM) + dbgs() << "Running CGSCC pass: " << Passes[Idx]->name() << "\n"; + + PreservedAnalyses PassPA = Passes[Idx]->run(C, AM); + if (AM) + AM->invalidate(C, PassPA); + PA.intersect(std::move(PassPA)); + } + + if (DebugPM) + dbgs() << "Finished CGSCC pass manager run.\n"; + + return PA; +} + +bool CGSCCAnalysisManager::empty() const { + assert(CGSCCAnalysisResults.empty() == CGSCCAnalysisResultLists.empty() && + "The storage and index of analysis results disagree on how many there " + "are!"); + return CGSCCAnalysisResults.empty(); +} + +void CGSCCAnalysisManager::clear() { + CGSCCAnalysisResults.clear(); + CGSCCAnalysisResultLists.clear(); +} + +CGSCCAnalysisManager::ResultConceptT & +CGSCCAnalysisManager::getResultImpl(void *PassID, LazyCallGraph::SCC *C) { + CGSCCAnalysisResultMapT::iterator RI; + bool Inserted; + std::tie(RI, Inserted) = CGSCCAnalysisResults.insert(std::make_pair( + std::make_pair(PassID, C), CGSCCAnalysisResultListT::iterator())); + + // If we don't have a cached result for this function, look up the pass and + // run it to produce a result, which we then add to the cache. + if (Inserted) { + CGSCCAnalysisResultListT &ResultList = CGSCCAnalysisResultLists[C]; + ResultList.emplace_back(PassID, lookupPass(PassID).run(C, this)); + RI->second = std::prev(ResultList.end()); + } + + return *RI->second->second; +} + +CGSCCAnalysisManager::ResultConceptT * +CGSCCAnalysisManager::getCachedResultImpl(void *PassID, + LazyCallGraph::SCC *C) const { + CGSCCAnalysisResultMapT::const_iterator RI = + CGSCCAnalysisResults.find(std::make_pair(PassID, C)); + return RI == CGSCCAnalysisResults.end() ? nullptr : &*RI->second->second; +} + +void CGSCCAnalysisManager::invalidateImpl(void *PassID, LazyCallGraph::SCC *C) { + CGSCCAnalysisResultMapT::iterator RI = + CGSCCAnalysisResults.find(std::make_pair(PassID, C)); + if (RI == CGSCCAnalysisResults.end()) + return; + + CGSCCAnalysisResultLists[C].erase(RI->second); +} + +void CGSCCAnalysisManager::invalidateImpl(LazyCallGraph::SCC *C, + const PreservedAnalyses &PA) { + // Clear all the invalidated results associated specifically with this + // function. + SmallVector InvalidatedPassIDs; + CGSCCAnalysisResultListT &ResultsList = CGSCCAnalysisResultLists[C]; + for (CGSCCAnalysisResultListT::iterator I = ResultsList.begin(), + E = ResultsList.end(); + I != E;) + if (I->second->invalidate(C, PA)) { + InvalidatedPassIDs.push_back(I->first); + I = ResultsList.erase(I); + } else { + ++I; + } + while (!InvalidatedPassIDs.empty()) + CGSCCAnalysisResults.erase( + std::make_pair(InvalidatedPassIDs.pop_back_val(), C)); + CGSCCAnalysisResultLists.erase(C); +} + +char CGSCCAnalysisManagerModuleProxy::PassID; + +CGSCCAnalysisManagerModuleProxy::Result +CGSCCAnalysisManagerModuleProxy::run(Module *M) { + assert(CGAM->empty() && "CGSCC analyses ran prior to the module proxy!"); + return Result(*CGAM); +} + +CGSCCAnalysisManagerModuleProxy::Result::~Result() { + // Clear out the analysis manager if we're being destroyed -- it means we + // didn't even see an invalidate call when we got invalidated. + CGAM->clear(); +} + +bool CGSCCAnalysisManagerModuleProxy::Result::invalidate( + Module *M, const PreservedAnalyses &PA) { + // If this proxy isn't marked as preserved, then we can't even invalidate + // individual CGSCC analyses, there may be an invalid set of SCC objects in + // the cache making it impossible to incrementally preserve them. + // Just clear the entire manager. + if (!PA.preserved(ID())) + CGAM->clear(); + + // Return false to indicate that this result is still a valid proxy. + return false; +} + +char ModuleAnalysisManagerCGSCCProxy::PassID; + +char FunctionAnalysisManagerCGSCCProxy::PassID; + +FunctionAnalysisManagerCGSCCProxy::Result +FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC *C) { + assert(FAM->empty() && "Function analyses ran prior to the CGSCC proxy!"); + return Result(*FAM); +} + +FunctionAnalysisManagerCGSCCProxy::Result::~Result() { + // Clear out the analysis manager if we're being destroyed -- it means we + // didn't even see an invalidate call when we got invalidated. + FAM->clear(); +} + +bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate( + LazyCallGraph::SCC *C, const PreservedAnalyses &PA) { + // If this proxy isn't marked as preserved, then we can't even invalidate + // individual function analyses, there may be an invalid set of Function + // objects in the cache making it impossible to incrementally preserve them. + // Just clear the entire manager. + if (!PA.preserved(ID())) + FAM->clear(); + + // Return false to indicate that this result is still a valid proxy. + return false; +} + +char CGSCCAnalysisManagerFunctionProxy::PassID; diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index c6d4573..b546789 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -7,9 +7,11 @@ add_llvm_library(LLVMAnalysis Analysis.cpp BasicAliasAnalysis.cpp BlockFrequencyInfo.cpp + BlockFrequencyInfoImpl.cpp BranchProbabilityInfo.cpp CFG.cpp CFGPrinter.cpp + CGSCCPassManager.cpp CaptureTracking.cpp CostModel.cpp CodeMetrics.cpp diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 782acfa..0ac1cb5 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -56,7 +56,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // Handle a vector->integer cast. if (IntegerType *IT = dyn_cast(DestTy)) { VectorType *VTy = dyn_cast(C->getType()); - if (VTy == 0) + if (!VTy) return ConstantExpr::getBitCast(C, DestTy); unsigned NumSrcElts = VTy->getNumElements(); @@ -73,7 +73,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, } ConstantDataVector *CDV = dyn_cast(C); - if (CDV == 0) + if (!CDV) return ConstantExpr::getBitCast(C, DestTy); // Now that we know that the input value is a vector of integers, just shift @@ -93,7 +93,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // The code below only handles casts to vectors currently. VectorType *DestVTy = dyn_cast(DestTy); - if (DestVTy == 0) + if (!DestVTy) return ConstantExpr::getBitCast(C, DestTy); // If this is a scalar -> vector cast, convert the input into a <1 x scalar> @@ -411,32 +411,32 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, TD.getTypeAllocSizeInBits(LoadTy), AS); } else - return 0; + return nullptr; C = FoldBitCast(C, MapTy, TD); if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD)) return FoldBitCast(Res, LoadTy, TD); - return 0; + return nullptr; } unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8; if (BytesLoaded > 32 || BytesLoaded == 0) - return 0; + return nullptr; GlobalValue *GVal; APInt Offset; if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) - return 0; + return nullptr; GlobalVariable *GV = dyn_cast(GVal); if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || !GV->getInitializer()->getType()->isSized()) - return 0; + return nullptr; // If we're loading off the beginning of the global, some bytes may be valid, // but we don't try to handle this. if (Offset.isNegative()) - return 0; + return nullptr; // If we're not accessing anything in this constant, the result is undefined. if (Offset.getZExtValue() >= @@ -446,7 +446,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, unsigned char RawBytes[32] = {0}; if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes, BytesLoaded, TD)) - return 0; + return nullptr; APInt ResultVal = APInt(IntType->getBitWidth(), 0); if (TD.isLittleEndian()) { @@ -466,6 +466,52 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, return ConstantInt::get(IntType->getContext(), ResultVal); } +static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE, + const DataLayout *DL) { + if (!DL) + return nullptr; + auto *DestPtrTy = dyn_cast(CE->getType()); + if (!DestPtrTy) + return nullptr; + Type *DestTy = DestPtrTy->getElementType(); + + Constant *C = ConstantFoldLoadFromConstPtr(CE->getOperand(0), DL); + if (!C) + return nullptr; + + do { + Type *SrcTy = C->getType(); + + // If the type sizes are the same and a cast is legal, just directly + // cast the constant. + if (DL->getTypeSizeInBits(DestTy) == DL->getTypeSizeInBits(SrcTy)) { + Instruction::CastOps Cast = Instruction::BitCast; + // If we are going from a pointer to int or vice versa, we spell the cast + // differently. + if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) + Cast = Instruction::IntToPtr; + else if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) + Cast = Instruction::PtrToInt; + + if (CastInst::castIsValid(Cast, C, DestTy)) + return ConstantExpr::getCast(Cast, C, DestTy); + } + + // If this isn't an aggregate type, there is nothing we can do to drill down + // and find a bitcastable constant. + if (!SrcTy->isAggregateType()) + return nullptr; + + // We're simulating a load through a pointer that was bitcast to point to + // a different type, so we can try to walk down through the initial + // elements of an aggregate to see if some part of th e aggregate is + // castable to implement the "load" semantic model. + C = C->getAggregateElement(0u); + } while (C); + + return nullptr; +} + /// ConstantFoldLoadFromConstPtr - Return the value that a load from C would /// produce if it is constant and determinable. If this is not determinable, /// return null. @@ -479,7 +525,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, // If the loaded value isn't a constant expr, we can't handle it. ConstantExpr *CE = dyn_cast(C); if (!CE) - return 0; + return nullptr; if (CE->getOpcode() == Instruction::GetElementPtr) { if (GlobalVariable *GV = dyn_cast(CE->getOperand(0))) { @@ -491,6 +537,10 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, } } + if (CE->getOpcode() == Instruction::BitCast) + if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, TD)) + return LoadedC; + // Instead of loading constant c string, use corresponding integer value // directly if string length is small enough. StringRef Str; @@ -542,16 +592,16 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, // Try hard to fold loads from bitcasted strange and non-type-safe things. if (TD) return FoldReinterpretLoadFromConstPtr(CE, *TD); - return 0; + return nullptr; } static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){ - if (LI->isVolatile()) return 0; + if (LI->isVolatile()) return nullptr; if (Constant *C = dyn_cast(LI->getOperand(0))) return ConstantFoldLoadFromConstPtr(C, TD); - return 0; + return nullptr; } /// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression. @@ -571,8 +621,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()->getScalarType()); APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0); APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0); - ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL); - ComputeMaskedBits(Op1, KnownZero1, KnownOne1, DL); + computeKnownBits(Op0, KnownZero0, KnownOne0, DL); + computeKnownBits(Op1, KnownZero1, KnownOne1, DL); if ((KnownOne1 | KnownZero0).isAllOnesValue()) { // All the bits of Op0 that the 'and' could be masking are already zero. return Op0; @@ -608,7 +658,7 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, } } - return 0; + return nullptr; } /// CastGEPIndices - If array indices are not pointer-sized integers, @@ -618,7 +668,7 @@ static Constant *CastGEPIndices(ArrayRef Ops, Type *ResultTy, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TD) - return 0; + return nullptr; Type *IntPtrTy = TD->getIntPtrType(ResultTy); @@ -641,7 +691,7 @@ static Constant *CastGEPIndices(ArrayRef Ops, } if (!Any) - return 0; + return nullptr; Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs); if (ConstantExpr *CE = dyn_cast(C)) { @@ -676,7 +726,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, Constant *Ptr = Ops[0]; if (!TD || !Ptr->getType()->getPointerElementType()->isSized() || !Ptr->getType()->isPointerTy()) - return 0; + return nullptr; Type *IntPtrTy = TD->getIntPtrType(Ptr->getType()); Type *ResultElementTy = ResultTy->getPointerElementType(); @@ -690,7 +740,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, // "inttoptr (sub (ptrtoint Ptr), V)" if (Ops.size() == 2 && ResultElementTy->isIntegerTy(8)) { ConstantExpr *CE = dyn_cast(Ops[1]); - assert((CE == 0 || CE->getType() == IntPtrTy) && + assert((!CE || CE->getType() == IntPtrTy) && "CastGEPIndices didn't canonicalize index types!"); if (CE && CE->getOpcode() == Instruction::Sub && CE->getOperand(0)->isNullValue()) { @@ -702,7 +752,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, return Res; } } - return 0; + return nullptr; } unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy); @@ -765,7 +815,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, // Only handle pointers to sized types, not pointers to functions. if (!ATy->getElementType()->isSized()) - return 0; + return nullptr; } // Determine which element of the array the offset points into. @@ -810,7 +860,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, // type, then the offset is pointing into the middle of an indivisible // member, so we can't simplify it. if (Offset != 0) - return 0; + return nullptr; // Create a GEP. Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs); @@ -841,7 +891,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetLibraryInfo *TLI) { // Handle PHI nodes quickly here... if (PHINode *PN = dyn_cast(I)) { - Constant *CommonValue = 0; + Constant *CommonValue = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *Incoming = PN->getIncomingValue(i); @@ -854,14 +904,14 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, // If the incoming value is not a constant, then give up. Constant *C = dyn_cast(Incoming); if (!C) - return 0; + return nullptr; // Fold the PHI's operands. if (ConstantExpr *NewC = dyn_cast(C)) C = ConstantFoldConstantExpression(NewC, TD, TLI); // If the incoming value is a different constant to // the one we saw previously, then give up. if (CommonValue && C != CommonValue) - return 0; + return nullptr; CommonValue = C; } @@ -876,7 +926,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) { Constant *Op = dyn_cast(*i); if (!Op) - return 0; // All operands not constant! + return nullptr; // All operands not constant! // Fold the Instruction's operands. if (ConstantExpr *NewCE = dyn_cast(Op)) @@ -966,14 +1016,14 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, } switch (Opcode) { - default: return 0; + default: return nullptr; case Instruction::ICmp: case Instruction::FCmp: llvm_unreachable("Invalid for compares"); case Instruction::Call: if (Function *F = dyn_cast(Ops.back())) if (canConstantFoldCallTo(F)) return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI); - return 0; + return nullptr; case Instruction::PtrToInt: // If the input is a inttoptr, eliminate the pair. This requires knowing // the width of a pointer, so it can't be done in ConstantExpr::getCast. @@ -1142,14 +1192,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE) { if (!CE->getOperand(1)->isNullValue()) - return 0; // Do not allow stepping over the value! + return nullptr; // Do not allow stepping over the value! // Loop over all of the operands, tracking down which value we are // addressing. for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) { C = C->getAggregateElement(CE->getOperand(i)); - if (C == 0) - return 0; + if (!C) + return nullptr; } return C; } @@ -1164,8 +1214,8 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, // addressing. for (unsigned i = 0, e = Indices.size(); i != e; ++i) { C = C->getAggregateElement(Indices[i]); - if (C == 0) - return 0; + if (!C) + return nullptr; } return C; } @@ -1270,7 +1320,7 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, V = NativeFP(V); if (sys::llvm_fenv_testexcept()) { sys::llvm_fenv_clearexcept(); - return 0; + return nullptr; } return GetConstantFoldFPValue(V, Ty); @@ -1282,7 +1332,7 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), V = NativeFP(V, W); if (sys::llvm_fenv_testexcept()) { sys::llvm_fenv_clearexcept(); - return 0; + return nullptr; } return GetConstantFoldFPValue(V, Ty); @@ -1311,7 +1361,7 @@ static Constant *ConstantFoldConvertToInt(const APFloat &Val, /*isSigned=*/true, mode, &isExact); if (status != APFloat::opOK && status != APFloat::opInexact) - return 0; + return nullptr; return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true); } @@ -1345,7 +1395,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, } if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) - return 0; + return nullptr; if (IntrinsicID == Intrinsic::round) { APFloat V = Op->getValueAPF(); @@ -1357,7 +1407,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, /// likely to be aborted with an exception anyway, and some host libms /// have known errors raising exceptions. if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity()) - return 0; + return nullptr; /// Currently APFloat versions of these functions do not exist, so we use /// the host native double versions. Float versions are not called @@ -1396,7 +1446,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, } if (!TLI) - return 0; + return nullptr; switch (Name[0]) { case 'a': @@ -1467,7 +1517,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, default: break; } - return 0; + return nullptr; } if (ConstantInt *Op = dyn_cast(Operands[0])) { @@ -1491,7 +1541,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, return ConstantFP::get(Ty->getContext(), Val); } default: - return 0; + return nullptr; } } @@ -1523,21 +1573,21 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, if (isa(Operands[0])) { if (IntrinsicID == Intrinsic::bswap) return Operands[0]; - return 0; + return nullptr; } - return 0; + return nullptr; } if (Operands.size() == 2) { if (ConstantFP *Op1 = dyn_cast(Operands[0])) { if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) - return 0; + return nullptr; double Op1V = getValueAsDouble(Op1); if (ConstantFP *Op2 = dyn_cast(Operands[1])) { if (Op2->getType() != Op1->getType()) - return 0; + return nullptr; double Op2V = getValueAsDouble(Op2); if (IntrinsicID == Intrinsic::pow) { @@ -1550,7 +1600,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, return ConstantFP::get(Ty->getContext(), V1); } if (!TLI) - return 0; + return nullptr; if (Name == "pow" && TLI->has(LibFunc::pow)) return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); if (Name == "fmod" && TLI->has(LibFunc::fmod)) @@ -1571,7 +1621,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, APFloat((double)std::pow((double)Op1V, (int)Op2C->getZExtValue()))); } - return 0; + return nullptr; } if (ConstantInt *Op1 = dyn_cast(Operands[0])) { @@ -1624,13 +1674,13 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, } } - return 0; + return nullptr; } - return 0; + return nullptr; } if (Operands.size() != 3) - return 0; + return nullptr; if (const ConstantFP *Op1 = dyn_cast(Operands[0])) { if (const ConstantFP *Op2 = dyn_cast(Operands[1])) { @@ -1646,14 +1696,14 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, if (s != APFloat::opInvalidOp) return ConstantFP::get(Ty->getContext(), V); - return 0; + return nullptr; } } } } } - return 0; + return nullptr; } static Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, @@ -1690,7 +1740,7 @@ Constant * llvm::ConstantFoldCall(Function *F, ArrayRef Operands, const TargetLibraryInfo *TLI) { if (!F->hasName()) - return 0; + return nullptr; StringRef Name = F->getName(); Type *Ty = F->getReturnType(); diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp index b49211d..780b1aa 100644 --- a/lib/Analysis/CostModel.cpp +++ b/lib/Analysis/CostModel.cpp @@ -17,8 +17,6 @@ // //===----------------------------------------------------------------------===// -#define CM_NAME "cost-model" -#define DEBUG_TYPE CM_NAME #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -32,6 +30,9 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define CM_NAME "cost-model" +#define DEBUG_TYPE CM_NAME + static cl::opt EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns.")); @@ -41,7 +42,7 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo - CostModelAnalysis() : FunctionPass(ID), F(0), TTI(0) { + CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) { initializeCostModelAnalysisPass( *PassRegistry::getPassRegistry()); } @@ -101,24 +102,13 @@ static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { // Check for a splat of a constant or for a non uniform vector of constants. if (isa(V) || isa(V)) { OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; - if (cast(V)->getSplatValue() != NULL) + if (cast(V)->getSplatValue() != nullptr) OpInfo = TargetTransformInfo::OK_UniformConstantValue; } return OpInfo; } -static bool matchMask(SmallVectorImpl &M1, SmallVectorImpl &M2) { - if (M1.size() != M2.size()) - return false; - - for (unsigned i = 0, e = M1.size(); i != e; ++i) - if (M1[i] != M2[i]) - return false; - - return true; -} - static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, unsigned Level) { // We don't need a shuffle if we just want to have element 0 in position 0 of @@ -136,7 +126,7 @@ static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, Mask[i] = val; SmallVector ActualMask = SI->getShuffleMask(); - if (!matchMask(Mask, ActualMask)) + if (Mask != ActualMask) return false; return true; @@ -150,7 +140,7 @@ static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, // <4 x i32> // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 - if (BinOp == 0) + if (BinOp == nullptr) return false; assert(BinOp->getType()->isVectorTy() && "Expecting a vector type"); @@ -171,9 +161,9 @@ static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, return false; // Shuffle inputs must match. - Value *NextLevelOpL = LS ? LS->getOperand(0) : 0; - Value *NextLevelOpR = RS ? RS->getOperand(0) : 0; - Value *NextLevelOp = 0; + Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; + Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; + Value *NextLevelOp = nullptr; if (NextLevelOpR && NextLevelOpL) { // If we have two shuffles their operands must match. if (NextLevelOpL != NextLevelOpR) @@ -198,7 +188,7 @@ static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, // Check that the next levels binary operation exists and matches with the // current one. - BinaryOperator *NextLevelBinOp = 0; + BinaryOperator *NextLevelBinOp = nullptr; if (Level + 1 != NumLevels) { if (!(NextLevelBinOp = dyn_cast(NextLevelOp))) return false; @@ -277,7 +267,7 @@ getShuffleAndOtherOprd(BinaryOperator *B) { Value *L = B->getOperand(0); Value *R = B->getOperand(1); - ShuffleVectorInst *S = 0; + ShuffleVectorInst *S = nullptr; if ((S = dyn_cast(L))) return std::make_pair(R, S); @@ -337,7 +327,7 @@ static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp); // Check the current reduction operation and the shuffle use the same value. - if (Shuffle == 0) + if (Shuffle == nullptr) return false; if (Shuffle->getOperand(0) != NextRdxOp) return false; @@ -349,7 +339,7 @@ static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); SmallVector Mask = Shuffle->getShuffleMask(); - if (!matchMask(ShuffleMask, Mask)) + if (ShuffleMask != Mask) return false; RdxOp = NextRdxOp; @@ -478,7 +468,7 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { if (NumVecElems == Mask.size() && isReverseVectorMask(Mask)) return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, - 0); + nullptr); return -1; } case Instruction::Call: diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp index fd4a2f0..9334ceb 100644 --- a/lib/Analysis/Delinearization.cpp +++ b/lib/Analysis/Delinearization.cpp @@ -14,8 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DL_NAME "delinearize" -#define DEBUG_TYPE DL_NAME #include "llvm/IR/Constants.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" @@ -34,6 +32,9 @@ using namespace llvm; +#define DL_NAME "delinearize" +#define DEBUG_TYPE DL_NAME + namespace { class Delinearization : public FunctionPass { @@ -51,7 +52,7 @@ public: } bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override; - void print(raw_ostream &O, const Module *M = 0) const override; + void print(raw_ostream &O, const Module *M = nullptr) const override; }; } // end anonymous namespace @@ -76,7 +77,7 @@ static Value *getPointerOperand(Instruction &Inst) { return Store->getPointerOperand(); else if (GetElementPtrInst *Gep = dyn_cast(&Inst)) return Gep->getPointerOperand(); - return NULL; + return nullptr; } void Delinearization::print(raw_ostream &O, const Module *) const { @@ -92,25 +93,38 @@ void Delinearization::print(raw_ostream &O, const Module *) const { const BasicBlock *BB = Inst->getParent(); // Delinearize the memory access as analyzed in all the surrounding loops. // Do not analyze memory accesses outside loops. - for (Loop *L = LI->getLoopFor(BB); L != NULL; L = L->getParentLoop()) { + for (Loop *L = LI->getLoopFor(BB); L != nullptr; L = L->getParentLoop()) { const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L); + + const SCEVUnknown *BasePointer = + dyn_cast(SE->getPointerBase(AccessFn)); + // Do not delinearize if we cannot find the base pointer. + if (!BasePointer) + break; + AccessFn = SE->getMinusSCEV(AccessFn, BasePointer); const SCEVAddRecExpr *AR = dyn_cast(AccessFn); // Do not try to delinearize memory accesses that are not AddRecs. if (!AR) break; + + O << "\n"; + O << "Inst:" << *Inst << "\n"; + O << "In Loop with Header: " << L->getHeader()->getName() << "\n"; O << "AddRec: " << *AR << "\n"; SmallVector Subscripts, Sizes; - const SCEV *Res = AR->delinearize(*SE, Subscripts, Sizes); - int Size = Subscripts.size(); - if (Res == AR || Size == 0) { + AR->delinearize(*SE, Subscripts, Sizes, SE->getElementSize(Inst)); + if (Subscripts.size() == 0 || Sizes.size() == 0 || + Subscripts.size() != Sizes.size()) { O << "failed to delinearize\n"; continue; } - O << "Base offset: " << *Res << "\n"; + + O << "Base offset: " << *BasePointer << "\n"; O << "ArrayDecl[UnknownSize]"; + int Size = Subscripts.size(); for (int i = 0; i < Size - 1; i++) O << "[" << *Sizes[i] << "]"; O << " with elements of " << *Sizes[Size - 1] << " bytes.\n"; diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index ff98611..d0784f1 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -51,8 +51,6 @@ // // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "da" - #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -69,6 +67,8 @@ using namespace llvm; +#define DEBUG_TYPE "da" + //===----------------------------------------------------------------------===// // statistics @@ -234,7 +234,7 @@ FullDependence::FullDependence(Instruction *Source, Levels(CommonLevels), LoopIndependent(PossiblyLoopIndependent) { Consistent = true; - DV = CommonLevels ? new DVEntry[CommonLevels] : NULL; + DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr; } // The rest are simple getters that hide the implementation. @@ -658,7 +658,7 @@ Value *getPointerOperand(Instruction *I) { if (StoreInst *SI = dyn_cast(I)) return SI->getPointerOperand(); llvm_unreachable("Value is not load or store instruction"); - return 0; + return nullptr; } @@ -932,7 +932,7 @@ const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L, const SCEV *UB = SE->getBackedgeTakenCount(L); return SE->getNoopOrZeroExtend(UB, T); } - return NULL; + return nullptr; } @@ -943,7 +943,7 @@ const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L, ) const { if (const SCEV *UB = collectUpperBound(L, T)) return dyn_cast(UB); - return NULL; + return nullptr; } @@ -2194,7 +2194,7 @@ const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { if (const SCEVConstant *Constant = dyn_cast(Product->getOperand(Op))) return Constant; } - return NULL; + return nullptr; } @@ -2646,8 +2646,8 @@ void DependenceAnalysis::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, BoundInfo *Bound, unsigned K) const { - Bound[K].Lower[Dependence::DVEntry::ALL] = NULL; // Default value = -infinity. - Bound[K].Upper[Dependence::DVEntry::ALL] = NULL; // Default value = +infinity. + Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity. if (Bound[K].Iterations) { Bound[K].Lower[Dependence::DVEntry::ALL] = SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart), @@ -2687,8 +2687,8 @@ void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A, CoefficientInfo *B, BoundInfo *Bound, unsigned K) const { - Bound[K].Lower[Dependence::DVEntry::EQ] = NULL; // Default value = -infinity. - Bound[K].Upper[Dependence::DVEntry::EQ] = NULL; // Default value = +infinity. + Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity. if (Bound[K].Iterations) { const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff); const SCEV *NegativePart = getNegativePart(Delta); @@ -2729,8 +2729,8 @@ void DependenceAnalysis::findBoundsLT(CoefficientInfo *A, CoefficientInfo *B, BoundInfo *Bound, unsigned K) const { - Bound[K].Lower[Dependence::DVEntry::LT] = NULL; // Default value = -infinity. - Bound[K].Upper[Dependence::DVEntry::LT] = NULL; // Default value = +infinity. + Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity. if (Bound[K].Iterations) { const SCEV *Iter_1 = SE->getMinusSCEV(Bound[K].Iterations, @@ -2776,8 +2776,8 @@ void DependenceAnalysis::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, BoundInfo *Bound, unsigned K) const { - Bound[K].Lower[Dependence::DVEntry::GT] = NULL; // Default value = -infinity. - Bound[K].Upper[Dependence::DVEntry::GT] = NULL; // Default value = +infinity. + Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity. + Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity. if (Bound[K].Iterations) { const SCEV *Iter_1 = SE->getMinusSCEV(Bound[K].Iterations, @@ -2829,7 +2829,7 @@ DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript, CI[K].Coeff = Zero; CI[K].PosPart = Zero; CI[K].NegPart = Zero; - CI[K].Iterations = NULL; + CI[K].Iterations = nullptr; } while (const SCEVAddRecExpr *AddRec = dyn_cast(Subscript)) { const Loop *L = AddRec->getLoop(); @@ -2872,7 +2872,7 @@ const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const { if (Bound[K].Lower[Bound[K].Direction]) Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]); else - Sum = NULL; + Sum = nullptr; } return Sum; } @@ -2888,7 +2888,7 @@ const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const { if (Bound[K].Upper[Bound[K].Direction]) Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]); else - Sum = NULL; + Sum = nullptr; } return Sum; } @@ -3148,12 +3148,12 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, } else if (CurConstraint.isLine()) { Level.Scalar = false; - Level.Distance = NULL; + Level.Distance = nullptr; // direction should be accurate } else if (CurConstraint.isPoint()) { Level.Scalar = false; - Level.Distance = NULL; + Level.Distance = nullptr; unsigned NewDirection = Dependence::DVEntry::NONE; if (!isKnownPredicate(CmpInst::ICMP_NE, CurConstraint.getY(), @@ -3180,59 +3180,55 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, /// source and destination array references are recurrences on a nested loop, /// this function flattens the nested recurrences into separate recurrences /// for each loop level. -bool -DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV, - SmallVectorImpl &Pair) const { +bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, + const SCEV *DstSCEV, + SmallVectorImpl &Pair, + const SCEV *ElementSize) const { + const SCEVUnknown *SrcBase = + dyn_cast(SE->getPointerBase(SrcSCEV)); + const SCEVUnknown *DstBase = + dyn_cast(SE->getPointerBase(DstSCEV)); + + if (!SrcBase || !DstBase || SrcBase != DstBase) + return false; + + SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase); + DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase); + const SCEVAddRecExpr *SrcAR = dyn_cast(SrcSCEV); const SCEVAddRecExpr *DstAR = dyn_cast(DstSCEV); if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) return false; - SmallVector SrcSubscripts, DstSubscripts, SrcSizes, DstSizes; - const SCEV *RemainderS = SrcAR->delinearize(*SE, SrcSubscripts, SrcSizes); - const SCEV *RemainderD = DstAR->delinearize(*SE, DstSubscripts, DstSizes); + // First step: collect parametric terms in both array references. + SmallVector Terms; + SrcAR->collectParametricTerms(*SE, Terms); + DstAR->collectParametricTerms(*SE, Terms); - int size = SrcSubscripts.size(); - // Fail when there is only a subscript: that's a linearized access function. - if (size < 2) - return false; - - int dstSize = DstSubscripts.size(); - // Fail when the number of subscripts in Src and Dst differ. - if (size != dstSize) - return false; + // Second step: find subscript sizes. + SmallVector Sizes; + SE->findArrayDimensions(Terms, Sizes, ElementSize); - // Fail when the size of any of the subscripts in Src and Dst differs: the - // dependence analysis assumes that elements in the same array have same size. - // SCEV delinearization does not have a context based on which it would decide - // globally the size of subscripts that would best fit all the array accesses. - for (int i = 0; i < size; ++i) - if (SrcSizes[i] != DstSizes[i]) - return false; + // Third step: compute the access functions for each subscript. + SmallVector SrcSubscripts, DstSubscripts; + SrcAR->computeAccessFunctions(*SE, SrcSubscripts, Sizes); + DstAR->computeAccessFunctions(*SE, DstSubscripts, Sizes); - // When the difference in remainders is different than a constant it might be - // that the base address of the arrays is not the same. - const SCEV *DiffRemainders = SE->getMinusSCEV(RemainderS, RemainderD); - if (!isa(DiffRemainders)) + // Fail when there is only a subscript: that's a linearized access function. + if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || + SrcSubscripts.size() != DstSubscripts.size()) return false; - // Normalize the last dimension: integrate the size of the "scalar dimension" - // and the remainder of the delinearization. - DstSubscripts[size-1] = SE->getMulExpr(DstSubscripts[size-1], - DstSizes[size-1]); - SrcSubscripts[size-1] = SE->getMulExpr(SrcSubscripts[size-1], - SrcSizes[size-1]); - SrcSubscripts[size-1] = SE->getAddExpr(SrcSubscripts[size-1], RemainderS); - DstSubscripts[size-1] = SE->getAddExpr(DstSubscripts[size-1], RemainderD); + int size = SrcSubscripts.size(); -#ifndef NDEBUG - DEBUG(errs() << "\nSrcSubscripts: "); - for (int i = 0; i < size; i++) - DEBUG(errs() << *SrcSubscripts[i]); - DEBUG(errs() << "\nDstSubscripts: "); - for (int i = 0; i < size; i++) - DEBUG(errs() << *DstSubscripts[i]); -#endif + DEBUG({ + dbgs() << "\nSrcSubscripts: "; + for (int i = 0; i < size; i++) + dbgs() << *SrcSubscripts[i]; + dbgs() << "\nDstSubscripts: "; + for (int i = 0; i < size; i++) + dbgs() << *DstSubscripts[i]; + }); // The delinearization transforms a single-subscript MIV dependence test into // a multi-subscript SIV dependence test that is easier to compute. So we @@ -3290,7 +3286,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) // if both instructions don't reference memory, there's no dependence - return NULL; + return nullptr; if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { // can only analyze simple loads and stores, i.e., no calls, invokes, etc. @@ -3310,7 +3306,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, case AliasAnalysis::NoAlias: // If the objects noalias, they are distinct, accesses are independent. DEBUG(dbgs() << "no alias\n"); - return NULL; + return nullptr; case AliasAnalysis::MustAlias: break; // The underlying objects alias; test accesses for dependence. } @@ -3363,7 +3359,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, } if (Delinearize && Pairs == 1 && CommonLevels > 1 && - tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) { + tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { DEBUG(dbgs() << " delinerized GEP\n"); Pairs = Pair.size(); } @@ -3505,26 +3501,26 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, case Subscript::ZIV: DEBUG(dbgs() << ", ZIV\n"); if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) - return NULL; + return nullptr; break; case Subscript::SIV: { DEBUG(dbgs() << ", SIV\n"); unsigned Level; - const SCEV *SplitIter = NULL; + const SCEV *SplitIter = nullptr; if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, SplitIter)) - return NULL; + return nullptr; break; } case Subscript::RDIV: DEBUG(dbgs() << ", RDIV\n"); if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) - return NULL; + return nullptr; break; case Subscript::MIV: DEBUG(dbgs() << ", MIV\n"); if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) - return NULL; + return nullptr; break; default: llvm_unreachable("subscript has unexpected classification"); @@ -3558,16 +3554,16 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); // SJ is an SIV subscript that's part of the current coupled group unsigned Level; - const SCEV *SplitIter = NULL; + const SCEV *SplitIter = nullptr; DEBUG(dbgs() << "SIV\n"); if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, SplitIter)) - return NULL; + return nullptr; ConstrainedLevels.set(Level); if (intersectConstraints(&Constraints[Level], &NewConstraint)) { if (Constraints[Level].isEmpty()) { ++DeltaIndependence; - return NULL; + return nullptr; } Changed = true; } @@ -3593,7 +3589,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, case Subscript::ZIV: DEBUG(dbgs() << "ZIV\n"); if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) - return NULL; + return nullptr; Mivs.reset(SJ); break; case Subscript::SIV: @@ -3616,7 +3612,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, if (Pair[SJ].Classification == Subscript::RDIV) { DEBUG(dbgs() << "RDIV test\n"); if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) - return NULL; + return nullptr; // I don't yet understand how to propagate RDIV results Mivs.reset(SJ); } @@ -3629,7 +3625,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, if (Pair[SJ].Classification == Subscript::MIV) { DEBUG(dbgs() << "MIV test\n"); if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) - return NULL; + return nullptr; } else llvm_unreachable("expected only MIV subscripts at this point"); @@ -3641,7 +3637,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) { updateDirection(Result.DV[SJ - 1], Constraints[SJ]); if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) - return NULL; + return nullptr; } } } @@ -3676,11 +3672,11 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, } } if (AllEqual) - return NULL; + return nullptr; } FullDependence *Final = new FullDependence(Result); - Result.DV = NULL; + Result.DV = nullptr; return Final; } @@ -3787,7 +3783,7 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, } if (Delinearize && Pairs == 1 && CommonLevels > 1 && - tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) { + tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { DEBUG(dbgs() << " delinerized GEP\n"); Pairs = Pair.size(); } @@ -3853,11 +3849,11 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, switch (Pair[SI].Classification) { case Subscript::SIV: { unsigned Level; - const SCEV *SplitIter = NULL; + const SCEV *SplitIter = nullptr; (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, SplitIter); if (Level == SplitLevel) { - assert(SplitIter != NULL); + assert(SplitIter != nullptr); return SplitIter; } break; @@ -3892,7 +3888,7 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) { // SJ is an SIV subscript that's part of the current coupled group unsigned Level; - const SCEV *SplitIter = NULL; + const SCEV *SplitIter = nullptr; (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, SplitIter); if (Level == SplitLevel && SplitIter) @@ -3933,5 +3929,5 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, } } llvm_unreachable("somehow reached end of routine"); - return NULL; + return nullptr; } diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp index f0787f1..74594f8 100644 --- a/lib/Analysis/DominanceFrontier.cpp +++ b/lib/Analysis/DominanceFrontier.cpp @@ -40,12 +40,12 @@ const DominanceFrontier::DomSetType & DominanceFrontier::calculate(const DominatorTree &DT, const DomTreeNode *Node) { BasicBlock *BB = Node->getBlock(); - DomSetType *Result = NULL; + DomSetType *Result = nullptr; std::vector workList; SmallPtrSet visited; - workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL)); + workList.push_back(DFCalculateWorkObject(BB, nullptr, Node, nullptr)); do { DFCalculateWorkObject *currentW = &workList.back(); assert (currentW && "Missing work object."); diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index f43675b..caec253 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -21,14 +21,14 @@ using namespace llvm; // CallGraph::CallGraph(Module &M) - : M(M), Root(0), ExternalCallingNode(getOrInsertFunction(0)), - CallsExternalNode(new CallGraphNode(0)) { + : M(M), Root(nullptr), ExternalCallingNode(getOrInsertFunction(nullptr)), + CallsExternalNode(new CallGraphNode(nullptr)) { // Add every function to the call graph. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) addToCallGraph(I); // If we didn't find a main function, use the external call graph node - if (Root == 0) + if (!Root) Root = ExternalCallingNode; } @@ -210,7 +210,7 @@ void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) { for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) { assert(I != CalledFunctions.end() && "Cannot find callee to remove!"); CallRecord &CR = *I; - if (CR.second == Callee && CR.first == 0) { + if (CR.second == Callee && CR.first == nullptr) { Callee->DropRef(); *I = CalledFunctions.back(); CalledFunctions.pop_back(); @@ -267,7 +267,7 @@ INITIALIZE_PASS(CallGraphWrapperPass, "basiccg", "CallGraph Construction", char CallGraphWrapperPass::ID = 0; -void CallGraphWrapperPass::releaseMemory() { G.reset(0); } +void CallGraphWrapperPass::releaseMemory() { G.reset(nullptr); } void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const { if (!G) { @@ -280,7 +280,7 @@ void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void CallGraphWrapperPass::dump() const { print(dbgs(), 0); } +void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); } #endif // Enuse that users of CallGraph.h also link with this file diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index aafc085..bfab744 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "cgscc-passmgr" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" @@ -23,12 +22,15 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LegacyPassManagers.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "cgscc-passmgr" + static cl::opt MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4)); @@ -112,7 +114,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, bool Changed = false; PMDataManager *PM = P->getAsPMDataManager(); - if (PM == 0) { + if (!PM) { CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P; if (!CallGraphUpToDate) { DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false); @@ -144,8 +146,11 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC, I != E; ++I) { if (Function *F = (*I)->getFunction()) { dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName()); - TimeRegion PassTimer(getPassTimer(FPP)); - Changed |= FPP->runOnFunction(*F); + { + TimeRegion PassTimer(getPassTimer(FPP)); + Changed |= FPP->runOnFunction(*F); + } + F->getContext().yield(); } } @@ -190,7 +195,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, SCCIdx != E; ++SCCIdx, ++FunctionNo) { CallGraphNode *CGN = *SCCIdx; Function *F = CGN->getFunction(); - if (F == 0 || F->isDeclaration()) continue; + if (!F || F->isDeclaration()) continue; // Walk the function body looking for call sites. Sync up the call sites in // CGN with those actually in the function. @@ -203,7 +208,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) { // If this call site is null, then the function pass deleted the call // entirely and the WeakVH nulled it out. - if (I->first == 0 || + if (!I->first || // If we've already seen this call site, then the FunctionPass RAUW'd // one call with another, which resulted in two "uses" in the edge // list of the same call. @@ -217,7 +222,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, "CallGraphSCCPass did not update the CallGraph correctly!"); // If this was an indirect call site, count it. - if (I->second->getFunction() == 0) + if (!I->second->getFunction()) ++NumIndirectRemoved; else ++NumDirectRemoved; @@ -273,7 +278,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, // site could be turned direct), don't reject it in checking mode, and // don't tweak it to be more precise. if (CheckingMode && CS.getCalledFunction() && - ExistingNode->getFunction() == 0) + ExistingNode->getFunction() == nullptr) continue; assert(!CheckingMode && @@ -286,7 +291,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC, CalleeNode = CG.getOrInsertFunction(Callee); // Keep track of whether we turned an indirect call into a direct // one. - if (ExistingNode->getFunction() == 0) { + if (!ExistingNode->getFunction()) { DevirtualizedCall = true; DEBUG(dbgs() << " CGSCCPASSMGR: Devirtualized call to '" << Callee->getName() << "'\n"); @@ -434,8 +439,8 @@ bool CGPassManager::runOnModule(Module &M) { while (!CGI.isAtEnd()) { // Copy the current SCC and increment past it so that the pass can hack // on the SCC if it wants to without invalidating our iterator. - std::vector &NodeVec = *CGI; - CurSCC.initialize(&NodeVec[0], &NodeVec[0]+NodeVec.size()); + const std::vector &NodeVec = *CGI; + CurSCC.initialize(NodeVec.data(), NodeVec.data() + NodeVec.size()); ++CGI; // At the top level, we run all the passes in this pass manager on the diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index f4097e4..607c068 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "globalsmodref-aa" #include "llvm/Analysis/Passes.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" @@ -33,6 +32,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "globalsmodref-aa" + STATISTIC(NumNonAddrTakenGlobalVars, "Number of global vars without address taken"); STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken"); @@ -177,14 +178,14 @@ namespace { FunctionInfo.find(F); if (I != FunctionInfo.end()) return &I->second; - return 0; + return nullptr; } void AnalyzeGlobals(Module &M); void AnalyzeCallGraph(CallGraph &CG, Module &M); bool AnalyzeUsesOfPointer(Value *V, std::vector &Readers, std::vector &Writers, - GlobalValue *OkayStoreDest = 0); + GlobalValue *OkayStoreDest = nullptr); bool AnalyzeIndirectGlobalMemory(GlobalValue *GV); }; } @@ -358,7 +359,7 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { // We do a bottom-up SCC traversal of the call graph. In other words, we // visit all callees before callers (leaf-first). for (scc_iterator I = scc_begin(&CG); !I.isAtEnd(); ++I) { - std::vector &SCC = *I; + const std::vector &SCC = *I; assert(!SCC.empty() && "SCC with no functions?"); if (!SCC[0]->getFunction()) { @@ -410,10 +411,8 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { FunctionEffect |= CalleeFR->FunctionEffect; // Incorporate callee's effects on globals into our info. - for (std::map::iterator GI = - CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end(); - GI != E; ++GI) - FR.GlobalInfo[GI->first] |= GI->second; + for (const auto &G : CalleeFR->GlobalInfo) + FR.GlobalInfo[G.first] |= G.second; FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal; } else { // Can't say anything about it. However, if it is inside our SCC, @@ -492,8 +491,8 @@ GlobalsModRef::alias(const Location &LocA, if (GV1 || GV2) { // If the global's address is taken, pretend we don't know it's a pointer to // the global. - if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = 0; - if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = 0; + if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = nullptr; + if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = nullptr; // If the two pointers are derived from two different non-addr-taken // globals, or if one is and the other isn't, we know these can't alias. @@ -507,7 +506,7 @@ GlobalsModRef::alias(const Location &LocA, // These pointers may be based on the memory owned by an indirect global. If // so, we may be able to handle this. First check to see if the base pointer // is a direct load from an indirect global. - GV1 = GV2 = 0; + GV1 = GV2 = nullptr; if (const LoadInst *LI = dyn_cast(UV1)) if (GlobalVariable *GV = dyn_cast(LI->getOperand(0))) if (IndirectGlobals.count(GV)) diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index 8dafc1c..66f3f8e 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "inline-cost" #include "llvm/Analysis/InlineCost.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -34,6 +33,8 @@ using namespace llvm; +#define DEBUG_TYPE "inline-cost" + STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); namespace { @@ -97,9 +98,6 @@ class CallAnalyzer : public InstVisitor { void disableSROA(Value *V); void accumulateSROACost(DenseMap::iterator CostIt, int InstructionCost); - bool handleSROACandidate(bool IsSROAValid, - DenseMap::iterator CostIt, - int InstructionCost); bool isGEPOffsetConstant(GetElementPtrInst &GEP); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); bool simplifyCallSite(Function *F, CallSite CS); @@ -225,21 +223,6 @@ void CallAnalyzer::accumulateSROACost(DenseMap::iterator CostIt, SROACostSavings += InstructionCost; } -/// \brief Helper for the common pattern of handling a SROA candidate. -/// Either accumulates the cost savings if the SROA remains valid, or disables -/// SROA for the candidate. -bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, - DenseMap::iterator CostIt, - int InstructionCost) { - if (IsSROAValid) { - accumulateSROACost(CostIt, InstructionCost); - return true; - } - - disableSROA(CostIt); - return false; -} - /// \brief Check whether a GEP's indices are all constant. /// /// Respects any simplified values known during the analysis of this callsite. @@ -287,8 +270,17 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { } bool CallAnalyzer::visitAlloca(AllocaInst &I) { - // FIXME: Check whether inlining will turn a dynamic alloca into a static + // Check whether inlining will turn a dynamic alloca into a static // alloca, and handle that case. + if (I.isArrayAllocation()) { + if (Constant *Size = SimplifiedValues.lookup(I.getArraySize())) { + ConstantInt *AllocSize = dyn_cast(Size); + assert(AllocSize && "Allocation size not a constant int?"); + Type *Ty = I.getAllocatedType(); + AllocatedSize += Ty->getPrimitiveSizeInBits() * AllocSize->getZExtValue(); + return Base::visitAlloca(I); + } + } // Accumulate the allocated size. if (I.isStaticAlloca()) { @@ -816,9 +808,29 @@ bool CallAnalyzer::visitBranchInst(BranchInst &BI) { bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { // We model unconditional switches as free, see the comments on handling // branches. - return isa(SI.getCondition()) || - dyn_cast_or_null( - SimplifiedValues.lookup(SI.getCondition())); + if (isa(SI.getCondition())) + return true; + if (Value *V = SimplifiedValues.lookup(SI.getCondition())) + if (isa(V)) + return true; + + // Otherwise, we need to accumulate a cost proportional to the number of + // distinct successor blocks. This fan-out in the CFG cannot be represented + // for free even if we can represent the core switch as a jumptable that + // takes a single instruction. + // + // NB: We convert large switches which are just used to initialize large phi + // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent + // inlining those. It will prevent inlining in cases where the optimization + // does not (yet) fire. + SmallPtrSet SuccessorBlocks; + SuccessorBlocks.insert(SI.getDefaultDest()); + for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I) + SuccessorBlocks.insert(I.getCaseSuccessor()); + // Add cost corresponding to the number of distinct destinations. The first + // we model as free because of fallthrough. + Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost; + return false; } bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { @@ -934,7 +946,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { /// no constant offsets applied. ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { if (!DL || !V->getType()->isPointerTy()) - return 0; + return nullptr; unsigned IntPtrWidth = DL->getPointerSizeInBits(); APInt Offset = APInt::getNullValue(IntPtrWidth); @@ -946,7 +958,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { do { if (GEPOperator *GEP = dyn_cast(V)) { if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) - return 0; + return nullptr; V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { V = cast(V)->getOperand(0); @@ -1247,7 +1259,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, // Calls to functions with always-inline attributes should be inlined // whenever possible. - if (Callee->hasFnAttribute(Attribute::AlwaysInline)) { + if (CS.hasFnAttr(Attribute::AlwaysInline)) { if (isInlineViable(*Callee)) return llvm::InlineCost::getAlways(); return llvm::InlineCost::getNever(); diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 5317a47..c819bd3 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "iv-users" #include "llvm/Analysis/IVUsers.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/LoopPass.h" @@ -29,6 +28,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "iv-users" + char IVUsers::ID = 0; INITIALIZE_PASS_BEGIN(IVUsers, "iv-users", "Induction Variable Users", false, true) @@ -84,7 +85,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L, static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT, const LoopInfo *LI, SmallPtrSet &SimpleLoopNests) { - Loop *NearestLoop = 0; + Loop *NearestLoop = nullptr; for (DomTreeNode *Rung = DT->getNode(BB); Rung; Rung = Rung->getIDom()) { BasicBlock *DomBB = Rung->getBlock(); @@ -253,7 +254,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { DT = &getAnalysis().getDomTree(); SE = &getAnalysis(); DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; // Find all uses of induction variables in this loop, and categorize // them by stride. Start by finding all of the PHI nodes in the header for @@ -329,16 +330,16 @@ static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) { I != E; ++I) if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L)) return AR; - return 0; + return nullptr; } - return 0; + return nullptr; } const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const { if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L)) return AR->getStepRecurrence(*SE); - return 0; + return nullptr; } void IVStrideUse::transformToPostInc(const Loop *L) { diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp index 3d05556..de2b9c0 100644 --- a/lib/Analysis/InstCount.cpp +++ b/lib/Analysis/InstCount.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "instcount" #include "llvm/Analysis/Passes.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Function.h" @@ -22,6 +21,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "instcount" + STATISTIC(TotalInsts , "Number of instructions (of all types)"); STATISTIC(TotalBlocks, "Number of basic blocks"); STATISTIC(TotalFuncs , "Number of non-external functions"); @@ -47,7 +48,7 @@ namespace { void visitInstruction(Instruction &I) { errs() << "Instruction Count does not know about " << I; - llvm_unreachable(0); + llvm_unreachable(nullptr); } public: static char ID; // Pass identification, replacement for typeid diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index d8d8a09..3684fda 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -17,7 +17,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "instsimplify" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" @@ -35,6 +34,8 @@ using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "instsimplify" + enum { RecursionLimit = 3 }; STATISTIC(NumExpand, "Number of expansions"); @@ -131,7 +132,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand; // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; // Check whether the expression has the form "(A op' B) op C". if (BinaryOperator *Op0 = dyn_cast(LHS)) @@ -179,7 +180,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, } } - return 0; + return nullptr; } /// FactorizeBinOp - Simplify "LHS Opcode RHS" by factorizing out a common term @@ -192,14 +193,14 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract; // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; BinaryOperator *Op0 = dyn_cast(LHS); BinaryOperator *Op1 = dyn_cast(RHS); if (!Op0 || Op0->getOpcode() != OpcodeToExtract || !Op1 || Op1->getOpcode() != OpcodeToExtract) - return 0; + return nullptr; // The expression has the form "(A op' B) op (C op' D)". Value *A = Op0->getOperand(0), *B = Op0->getOperand(1); @@ -251,7 +252,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, } } - return 0; + return nullptr; } /// SimplifyAssociativeBinOp - Generic simplifications for associative binary @@ -263,7 +264,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; BinaryOperator *Op0 = dyn_cast(LHS); BinaryOperator *Op1 = dyn_cast(RHS); @@ -308,7 +309,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, // The remaining transforms require commutativity as well as associativity. if (!Instruction::isCommutative(Opcode)) - return 0; + return nullptr; // Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely. if (Op0 && Op0->getOpcode() == Opcode) { @@ -348,7 +349,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, } } - return 0; + return nullptr; } /// ThreadBinOpOverSelect - In the case of a binary operation with a select @@ -359,7 +360,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; SelectInst *SI; if (isa(LHS)) { @@ -420,7 +421,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS, } } - return 0; + return nullptr; } /// ThreadCmpOverSelect - In the case of a comparison with a select instruction, @@ -432,7 +433,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; // Make sure the select is on the LHS. if (!isa(LHS)) { @@ -456,7 +457,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, // It didn't simplify. However if "cmp TV, RHS" is equal to the select // condition then we can replace it with 'true'. Otherwise give up. if (!isSameCompare(Cond, Pred, TV, RHS)) - return 0; + return nullptr; TCmp = getTrue(Cond->getType()); } @@ -470,7 +471,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, // It didn't simplify. However if "cmp FV, RHS" is equal to the select // condition then we can replace it with 'false'. Otherwise give up. if (!isSameCompare(Cond, Pred, FV, RHS)) - return 0; + return nullptr; FCmp = getFalse(Cond->getType()); } @@ -482,7 +483,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, // The remaining cases only make sense if the select condition has the same // type as the result of the comparison, so bail out if this is not so. if (Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy()) - return 0; + return nullptr; // If the false value simplified to false, then the result of the compare // is equal to "Cond && TCmp". This also catches the case when the false // value simplified to false and the true value to true, returning "Cond". @@ -502,7 +503,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, Q, MaxRecurse)) return V; - return 0; + return nullptr; } /// ThreadBinOpOverPHI - In the case of a binary operation with an operand that @@ -513,24 +514,24 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; PHINode *PI; if (isa(LHS)) { PI = cast(LHS); // Bail out if RHS and the phi may be mutually interdependent due to a loop. if (!ValueDominatesPHI(RHS, PI, Q.DT)) - return 0; + return nullptr; } else { assert(isa(RHS) && "No PHI instruction operand!"); PI = cast(RHS); // Bail out if LHS and the phi may be mutually interdependent due to a loop. if (!ValueDominatesPHI(LHS, PI, Q.DT)) - return 0; + return nullptr; } // Evaluate the BinOp on the incoming phi values. - Value *CommonValue = 0; + Value *CommonValue = nullptr; for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) { Value *Incoming = PI->getIncomingValue(i); // If the incoming value is the phi node itself, it can safely be skipped. @@ -541,7 +542,7 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS, // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) - return 0; + return nullptr; CommonValue = V; } @@ -556,7 +557,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Query &Q, unsigned MaxRecurse) { // Recursion is always used, so bail out at once if we already hit the limit. if (!MaxRecurse--) - return 0; + return nullptr; // Make sure the phi is on the LHS. if (!isa(LHS)) { @@ -568,10 +569,10 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, // Bail out if RHS and the phi may be mutually interdependent due to a loop. if (!ValueDominatesPHI(RHS, PI, Q.DT)) - return 0; + return nullptr; // Evaluate the BinOp on the incoming phi values. - Value *CommonValue = 0; + Value *CommonValue = nullptr; for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) { Value *Incoming = PI->getIncomingValue(i); // If the incoming value is the phi node itself, it can safely be skipped. @@ -580,7 +581,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) - return 0; + return nullptr; CommonValue = V; } @@ -613,7 +614,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // X + (Y - X) -> Y // (Y - X) + X -> Y // Eg: X + -X -> 0 - Value *Y = 0; + Value *Y = nullptr; if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) || match(Op0, m_Sub(m_Value(Y), m_Specific(Op1)))) return Y; @@ -647,7 +648,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // "A+B" and "A+C" thus gains nothing, but costs compile time. Similarly // for threading over phi nodes. - return 0; + return nullptr; } Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, @@ -720,7 +721,7 @@ static Constant *computePointerDifference(const DataLayout *DL, // If LHS and RHS are not related via constant offsets to the same base // value, there is nothing we can do here. if (LHS != RHS) - return 0; + return nullptr; // Otherwise, the difference of LHS - RHS can be computed as: // LHS - RHS @@ -755,14 +756,14 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // (X*2) - X -> X // (X<<1) - X -> X - Value *X = 0; + Value *X = nullptr; if (match(Op0, m_Mul(m_Specific(Op1), m_ConstantInt<2>())) || match(Op0, m_Shl(m_Specific(Op1), m_One()))) return Op1; // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies. // For example, (X + Y) - Y -> X; (Y + X) - Y -> X - Value *Y = 0, *Z = Op1; + Value *Y = nullptr, *Z = Op1; if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z // See if "V === Y - Z" simplifies. if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1)) @@ -853,7 +854,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, // "A-B" and "A-C" thus gains nothing, but costs compile time. Similarly // for threading over phi nodes. - return 0; + return nullptr; } Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, @@ -890,7 +891,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0 // where nnan and ninf have to occur at least once somewhere in this // expression - Value *SubOp = 0; + Value *SubOp = nullptr; if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0)))) SubOp = Op1; else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1)))) @@ -902,7 +903,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, return Constant::getNullValue(Op0->getType()); } - return 0; + return nullptr; } /// Given operands for an FSub, see if we can fold the result. If not, this @@ -939,7 +940,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, if (FMF.noNaNs() && FMF.noInfs() && Op0 == Op1) return Constant::getNullValue(Op0->getType()); - return 0; + return nullptr; } /// Given the operands for an FMul, see if we can fold the result @@ -966,7 +967,7 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1, if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero())) return Op1; - return 0; + return nullptr; } /// SimplifyMulInst - Given operands for a Mul, see if we can @@ -997,7 +998,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, return Op0; // (X / Y) * Y -> X if the division is exact. - Value *X = 0; + Value *X = nullptr; if (match(Op0, m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0))))) // Y * (X / Y) return X; @@ -1031,7 +1032,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, @@ -1098,7 +1099,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, return ConstantInt::get(Op0->getType(), 1); // (X * Y) / Y -> X if the multiplication does not overflow. - Value *X = 0, *Y = 0; + Value *X = nullptr, *Y = nullptr; if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) { if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1 OverflowingBinaryOperator *Mul = cast(Op0); @@ -1129,7 +1130,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } /// SimplifySDivInst - Given operands for an SDiv, see if we can @@ -1139,7 +1140,7 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1155,7 +1156,7 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1174,7 +1175,7 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q, if (match(Op1, m_Undef())) return Op1; - return 0; + return nullptr; } Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1234,7 +1235,7 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } /// SimplifySRemInst - Given operands for an SRem, see if we can @@ -1244,7 +1245,7 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1260,7 +1261,7 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1279,7 +1280,7 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &, if (match(Op1, m_Undef())) return Op1; - return 0; + return nullptr; } Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1350,7 +1351,7 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } /// SimplifyShlInst - Given operands for an Shl, see if we can @@ -1368,7 +1369,7 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, Value *X; if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1))))) return X; - return 0; + return nullptr; } Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, @@ -1399,7 +1400,7 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, cast(Op0)->hasNoUnsignedWrap()) return X; - return 0; + return nullptr; } Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, @@ -1435,7 +1436,7 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, cast(Op0)->hasNoSignedWrap()) return X; - return 0; + return nullptr; } Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, @@ -1483,7 +1484,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, return Constant::getNullValue(Op0->getType()); // (A | ?) & A = A - Value *A = 0, *B = 0; + Value *A = nullptr, *B = nullptr; if (match(Op0, m_Or(m_Value(A), m_Value(B))) && (A == Op1 || B == Op1)) return Op1; @@ -1536,7 +1537,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1582,7 +1583,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, return Constant::getAllOnesValue(Op0->getType()); // (A & ?) | A = A - Value *A = 0, *B = 0; + Value *A = nullptr, *B = nullptr; if (match(Op0, m_And(m_Value(A), m_Value(B))) && (A == Op1 || B == Op1)) return Op1; @@ -1630,7 +1631,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1690,7 +1691,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, // "A^B" and "A^C" thus gains nothing, but costs compile time. Similarly // for threading over phi nodes. - return 0; + return nullptr; } Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *DL, @@ -1710,17 +1711,17 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, Value *LHS, Value *RHS) { SelectInst *SI = dyn_cast(V); if (!SI) - return 0; + return nullptr; CmpInst *Cmp = dyn_cast(SI->getCondition()); if (!Cmp) - return 0; + return nullptr; Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1); if (Pred == Cmp->getPredicate() && LHS == CmpLHS && RHS == CmpRHS) return Cmp; if (Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) && LHS == CmpRHS && RHS == CmpLHS) return Cmp; - return 0; + return nullptr; } // A significant optimization not implemented here is assuming that alloca @@ -1768,7 +1769,7 @@ static Constant *computePointerICmp(const DataLayout *DL, // We can only fold certain predicates on pointer comparisons. switch (Pred) { default: - return 0; + return nullptr; // Equality comaprisons are easy to fold. case CmpInst::ICMP_EQ: @@ -1874,7 +1875,7 @@ static Constant *computePointerICmp(const DataLayout *DL, } // Otherwise, fail. - return 0; + return nullptr; } /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can @@ -2000,7 +2001,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Many binary operators with constant RHS have easy to compute constant // range. Use them to check whether the comparison is a tautology. - uint32_t Width = CI->getBitWidth(); + unsigned Width = CI->getBitWidth(); APInt Lower = APInt(Width, 0); APInt Upper = APInt(Width, 0); ConstantInt *CI2; @@ -2019,6 +2020,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, APInt NegOne = APInt::getAllOnesValue(Width); if (!CI2->isZero()) Upper = NegOne.udiv(CI2->getValue()) + 1; + } else if (match(LHS, m_SDiv(m_ConstantInt(CI2), m_Value()))) { + // 'sdiv CI2, x' produces [-|CI2|, |CI2|]. + Upper = CI2->getValue().abs() + 1; + Lower = (-Upper) + 1; } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) { // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2]. APInt IntMin = APInt::getSignedMinValue(Width); @@ -2033,6 +2038,13 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, APInt NegOne = APInt::getAllOnesValue(Width); if (CI2->getValue().ult(Width)) Upper = NegOne.lshr(CI2->getValue()) + 1; + } else if (match(LHS, m_LShr(m_ConstantInt(CI2), m_Value()))) { + // 'lshr CI2, x' produces [CI2 >> (Width-1), CI2]. + unsigned ShiftAmount = Width - 1; + if (!CI2->isZero() && cast(LHS)->isExact()) + ShiftAmount = CI2->getValue().countTrailingZeros(); + Lower = CI2->getValue().lshr(ShiftAmount); + Upper = CI2->getValue() + 1; } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) { // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2]. APInt IntMin = APInt::getSignedMinValue(Width); @@ -2041,6 +2053,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Lower = IntMin.ashr(CI2->getValue()); Upper = IntMax.ashr(CI2->getValue()) + 1; } + } else if (match(LHS, m_AShr(m_ConstantInt(CI2), m_Value()))) { + unsigned ShiftAmount = Width - 1; + if (!CI2->isZero() && cast(LHS)->isExact()) + ShiftAmount = CI2->getValue().countTrailingZeros(); + if (CI2->isNegative()) { + // 'ashr CI2, x' produces [CI2, CI2 >> (Width-1)] + Lower = CI2->getValue(); + Upper = CI2->getValue().ashr(ShiftAmount) + 1; + } else { + // 'ashr CI2, x' produces [CI2 >> (Width-1), CI2] + Lower = CI2->getValue().ashr(ShiftAmount); + Upper = CI2->getValue() + 1; + } } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) { // 'or x, CI2' produces [CI2, UINT_MAX]. Lower = CI2->getValue(); @@ -2221,7 +2246,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, BinaryOperator *RBO = dyn_cast(RHS); if (MaxRecurse && (LBO || RBO)) { // Analyze the case when either LHS or RHS is an add instruction. - Value *A = 0, *B = 0, *C = 0, *D = 0; + Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null). bool NoLHSWrapProblem = false, NoRHSWrapProblem = false; if (LBO && LBO->getOpcode() == Instruction::Add) { @@ -2279,6 +2304,28 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // 0 - (zext X) pred C + if (!CmpInst::isUnsigned(Pred) && match(LHS, m_Neg(m_ZExt(m_Value())))) { + if (ConstantInt *RHSC = dyn_cast(RHS)) { + if (RHSC->getValue().isStrictlyPositive()) { + if (Pred == ICmpInst::ICMP_SLT) + return ConstantInt::getTrue(RHSC->getContext()); + if (Pred == ICmpInst::ICMP_SGE) + return ConstantInt::getFalse(RHSC->getContext()); + if (Pred == ICmpInst::ICMP_EQ) + return ConstantInt::getFalse(RHSC->getContext()); + if (Pred == ICmpInst::ICMP_NE) + return ConstantInt::getTrue(RHSC->getContext()); + } + if (RHSC->getValue().isNonNegative()) { + if (Pred == ICmpInst::ICMP_SLE) + return ConstantInt::getTrue(RHSC->getContext()); + if (Pred == ICmpInst::ICMP_SGT) + return ConstantInt::getFalse(RHSC->getContext()); + } + } + } + // icmp pred (urem X, Y), Y if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) { bool KnownNonNegative, KnownNegative; @@ -2605,7 +2652,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, @@ -2702,7 +2749,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse)) return V; - return 0; + return nullptr; } Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, @@ -2741,7 +2788,7 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal, if (isa(FalseVal)) // select C, X, undef -> X return TrueVal; - return 0; + return nullptr; } Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, @@ -2786,7 +2833,7 @@ static Value *SimplifyGEPInst(ArrayRef Ops, const Query &Q, unsigned) { // Check to see if this is constant foldable. for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (!isa(Ops[i])) - return 0; + return nullptr; return ConstantExpr::getGetElementPtr(cast(Ops[0]), Ops.slice(1)); } @@ -2823,7 +2870,7 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val, return Agg; } - return 0; + return nullptr; } Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, @@ -2839,7 +2886,7 @@ Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val, static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { // If all of the PHI's incoming values are the same then replace the PHI node // with the common value. - Value *CommonValue = 0; + Value *CommonValue = nullptr; bool HasUndefInput = false; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *Incoming = PN->getIncomingValue(i); @@ -2851,7 +2898,7 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { continue; } if (CommonValue && Incoming != CommonValue) - return 0; // Not the same, bail out. + return nullptr; // Not the same, bail out. CommonValue = Incoming; } @@ -2864,7 +2911,7 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) { // instruction, we cannot return X as the result of the PHI node unless it // dominates the PHI block. if (HasUndefInput) - return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : 0; + return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr; return CommonValue; } @@ -2873,7 +2920,7 @@ static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) { if (Constant *C = dyn_cast(Op)) return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.DL, Q.TLI); - return 0; + return nullptr; } Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *DL, @@ -2945,7 +2992,7 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse)) return V; - return 0; + return nullptr; } } @@ -2992,7 +3039,7 @@ static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEn const Query &Q, unsigned MaxRecurse) { // Perform idempotent optimizations if (!IsIdempotent(IID)) - return 0; + return nullptr; // Unary Ops if (std::distance(ArgBegin, ArgEnd) == 1) @@ -3000,7 +3047,7 @@ static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEn if (II->getIntrinsicID() == IID) return II; - return 0; + return nullptr; } template @@ -3017,7 +3064,7 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, Function *F = dyn_cast(V); if (!F) - return 0; + return nullptr; if (unsigned IID = F->getIntrinsicID()) if (Value *Ret = @@ -3025,14 +3072,14 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, return Ret; if (!canConstantFoldCallTo(F)) - return 0; + return nullptr; SmallVector ConstantArgs; ConstantArgs.reserve(ArgEnd - ArgBegin); for (IterTy I = ArgBegin, E = ArgEnd; I != E; ++I) { Constant *C = dyn_cast(*I); if (!C) - return 0; + return nullptr; ConstantArgs.push_back(C); } @@ -3247,7 +3294,7 @@ bool llvm::recursivelySimplifyInstruction(Instruction *I, const DataLayout *DL, const TargetLibraryInfo *TLI, const DominatorTree *DT) { - return replaceAndRecursivelySimplifyImpl(I, 0, DL, TLI, DT); + return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT); } bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp index 2e259b1..a0583e8 100644 --- a/lib/Analysis/IntervalPartition.cpp +++ b/lib/Analysis/IntervalPartition.cpp @@ -29,7 +29,7 @@ void IntervalPartition::releaseMemory() { delete Intervals[i]; IntervalMap.clear(); Intervals.clear(); - RootInterval = 0; + RootInterval = nullptr; } void IntervalPartition::print(raw_ostream &O, const Module*) const { diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp index ea213f2..e073616 100644 --- a/lib/Analysis/LazyCallGraph.cpp +++ b/lib/Analysis/LazyCallGraph.cpp @@ -8,19 +8,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LazyCallGraph.h" -#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "lcg" + static void findCallees( SmallVectorImpl &Worklist, SmallPtrSetImpl &Visited, SmallVectorImpl> &Callees, - SmallPtrSetImpl &CalleeSet) { + DenseMap &CalleeIndexMap) { while (!Worklist.empty()) { Constant *C = Worklist.pop_back_val(); @@ -35,8 +38,12 @@ static void findCallees( // alias. Then a test of the address of the weak function against the new // strong definition's address would be an effective way to determine the // safety of optimizing a direct call edge. - if (!F->isDeclaration() && CalleeSet.insert(F)) + if (!F->isDeclaration() && + CalleeIndexMap.insert(std::make_pair(F, Callees.size())).second) { + DEBUG(dbgs() << " Added callable function: " << F->getName() + << "\n"); Callees.push_back(F); + } continue; } @@ -46,7 +53,11 @@ static void findCallees( } } -LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) : G(G), F(F) { +LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) + : G(&G), F(F), DFSNumber(0), LowLink(0) { + DEBUG(dbgs() << " Adding functions called by '" << F.getName() + << "' to the graph.\n"); + SmallVector Worklist; SmallPtrSet Visited; // Find all the potential callees in this function. First walk the @@ -61,36 +72,41 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) : G(G), F(F) { // We've collected all the constant (and thus potentially function or // function containing) operands to all of the instructions in the function. // Process them (recursively) collecting every function found. - findCallees(Worklist, Visited, Callees, CalleeSet); + findCallees(Worklist, Visited, Callees, CalleeIndexMap); } -LazyCallGraph::Node::Node(LazyCallGraph &G, const Node &OtherN) - : G(G), F(OtherN.F), CalleeSet(OtherN.CalleeSet) { - // Loop over the other node's callees, adding the Function*s to our list - // directly, and recursing to add the Node*s. - Callees.reserve(OtherN.Callees.size()); - for (const auto &OtherCallee : OtherN.Callees) - if (Function *Callee = OtherCallee.dyn_cast()) - Callees.push_back(Callee); - else - Callees.push_back(G.copyInto(*OtherCallee.get())); +void LazyCallGraph::Node::insertEdgeInternal(Function &Callee) { + if (Node *N = G->lookup(Callee)) + return insertEdgeInternal(*N); + + CalleeIndexMap.insert(std::make_pair(&Callee, Callees.size())); + Callees.push_back(&Callee); } -LazyCallGraph::Node::Node(LazyCallGraph &G, Node &&OtherN) - : G(G), F(OtherN.F), Callees(std::move(OtherN.Callees)), - CalleeSet(std::move(OtherN.CalleeSet)) { - // Loop over our Callees. They've been moved from another node, but we need - // to move the Node*s to live under our bump ptr allocator. - for (auto &Callee : Callees) - if (Node *ChildN = Callee.dyn_cast()) - Callee = G.moveInto(std::move(*ChildN)); +void LazyCallGraph::Node::insertEdgeInternal(Node &CalleeN) { + CalleeIndexMap.insert(std::make_pair(&CalleeN.getFunction(), Callees.size())); + Callees.push_back(&CalleeN); } -LazyCallGraph::LazyCallGraph(Module &M) : M(M) { +void LazyCallGraph::Node::removeEdgeInternal(Function &Callee) { + auto IndexMapI = CalleeIndexMap.find(&Callee); + assert(IndexMapI != CalleeIndexMap.end() && + "Callee not in the callee set for this caller?"); + + Callees[IndexMapI->second] = nullptr; + CalleeIndexMap.erase(IndexMapI); +} + +LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) { + DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier() + << "\n"); for (Function &F : M) if (!F.isDeclaration() && !F.hasLocalLinkage()) - if (EntryNodeSet.insert(&F)) + if (EntryIndexMap.insert(std::make_pair(&F, EntryNodes.size())).second) { + DEBUG(dbgs() << " Adding '" << F.getName() + << "' to entry set of the graph.\n"); EntryNodes.push_back(&F); + } // Now add entry nodes for functions reachable via initializers to globals. SmallVector Worklist; @@ -100,51 +116,568 @@ LazyCallGraph::LazyCallGraph(Module &M) : M(M) { if (Visited.insert(GV.getInitializer())) Worklist.push_back(GV.getInitializer()); - findCallees(Worklist, Visited, EntryNodes, EntryNodeSet); -} + DEBUG(dbgs() << " Adding functions referenced by global initializers to the " + "entry set.\n"); + findCallees(Worklist, Visited, EntryNodes, EntryIndexMap); -LazyCallGraph::LazyCallGraph(const LazyCallGraph &G) - : M(G.M), EntryNodeSet(G.EntryNodeSet) { - EntryNodes.reserve(G.EntryNodes.size()); - for (const auto &EntryNode : G.EntryNodes) - if (Function *Callee = EntryNode.dyn_cast()) - EntryNodes.push_back(Callee); + for (auto &Entry : EntryNodes) { + assert(!Entry.isNull() && + "We can't have removed edges before we finish the constructor!"); + if (Function *F = Entry.dyn_cast()) + SCCEntryNodes.push_back(F); else - EntryNodes.push_back(copyInto(*EntryNode.get())); + SCCEntryNodes.push_back(&Entry.get()->getFunction()); + } } -// FIXME: This would be crazy simpler if BumpPtrAllocator were movable without -// invalidating any of the allocated memory. We should make that be the case at -// some point and delete this. LazyCallGraph::LazyCallGraph(LazyCallGraph &&G) - : M(G.M), EntryNodes(std::move(G.EntryNodes)), - EntryNodeSet(std::move(G.EntryNodeSet)) { - // Loop over our EntryNodes. They've been moved from another graph, so we - // need to move the Node*s to live under our bump ptr allocator. We can just - // do this in-place. - for (auto &Entry : EntryNodes) - if (Node *EntryN = Entry.dyn_cast()) - Entry = moveInto(std::move(*EntryN)); + : BPA(std::move(G.BPA)), NodeMap(std::move(G.NodeMap)), + EntryNodes(std::move(G.EntryNodes)), + EntryIndexMap(std::move(G.EntryIndexMap)), SCCBPA(std::move(G.SCCBPA)), + SCCMap(std::move(G.SCCMap)), LeafSCCs(std::move(G.LeafSCCs)), + DFSStack(std::move(G.DFSStack)), + SCCEntryNodes(std::move(G.SCCEntryNodes)), + NextDFSNumber(G.NextDFSNumber) { + updateGraphPtrs(); +} + +LazyCallGraph &LazyCallGraph::operator=(LazyCallGraph &&G) { + BPA = std::move(G.BPA); + NodeMap = std::move(G.NodeMap); + EntryNodes = std::move(G.EntryNodes); + EntryIndexMap = std::move(G.EntryIndexMap); + SCCBPA = std::move(G.SCCBPA); + SCCMap = std::move(G.SCCMap); + LeafSCCs = std::move(G.LeafSCCs); + DFSStack = std::move(G.DFSStack); + SCCEntryNodes = std::move(G.SCCEntryNodes); + NextDFSNumber = G.NextDFSNumber; + updateGraphPtrs(); + return *this; } -LazyCallGraph::Node *LazyCallGraph::insertInto(Function &F, Node *&MappedN) { - return new (MappedN = BPA.Allocate()) Node(*this, F); +void LazyCallGraph::SCC::insert(Node &N) { + N.DFSNumber = N.LowLink = -1; + Nodes.push_back(&N); + G->SCCMap[&N] = this; } -LazyCallGraph::Node *LazyCallGraph::copyInto(const Node &OtherN) { - Node *&N = NodeMap[&OtherN.F]; - if (N) - return N; +bool LazyCallGraph::SCC::isDescendantOf(const SCC &C) const { + // Walk up the parents of this SCC and verify that we eventually find C. + SmallVector AncestorWorklist; + AncestorWorklist.push_back(this); + do { + const SCC *AncestorC = AncestorWorklist.pop_back_val(); + if (AncestorC->isChildOf(C)) + return true; + for (const SCC *ParentC : AncestorC->ParentSCCs) + AncestorWorklist.push_back(ParentC); + } while (!AncestorWorklist.empty()); - return new (N = BPA.Allocate()) Node(*this, OtherN); + return false; } -LazyCallGraph::Node *LazyCallGraph::moveInto(Node &&OtherN) { - Node *&N = NodeMap[&OtherN.F]; - if (N) - return N; +void LazyCallGraph::SCC::insertIntraSCCEdge(Node &CallerN, Node &CalleeN) { + // First insert it into the caller. + CallerN.insertEdgeInternal(CalleeN); + + assert(G->SCCMap.lookup(&CallerN) == this && "Caller must be in this SCC."); + assert(G->SCCMap.lookup(&CalleeN) == this && "Callee must be in this SCC."); - return new (N = BPA.Allocate()) Node(*this, std::move(OtherN)); + // Nothing changes about this SCC or any other. +} + +void LazyCallGraph::SCC::insertOutgoingEdge(Node &CallerN, Node &CalleeN) { + // First insert it into the caller. + CallerN.insertEdgeInternal(CalleeN); + + assert(G->SCCMap.lookup(&CallerN) == this && "Caller must be in this SCC."); + + SCC &CalleeC = *G->SCCMap.lookup(&CalleeN); + assert(&CalleeC != this && "Callee must not be in this SCC."); + assert(CalleeC.isDescendantOf(*this) && + "Callee must be a descendant of the Caller."); + + // The only change required is to add this SCC to the parent set of the callee. + CalleeC.ParentSCCs.insert(this); +} + +SmallVector +LazyCallGraph::SCC::insertIncomingEdge(Node &CallerN, Node &CalleeN) { + // First insert it into the caller. + CallerN.insertEdgeInternal(CalleeN); + + assert(G->SCCMap.lookup(&CalleeN) == this && "Callee must be in this SCC."); + + SCC &CallerC = *G->SCCMap.lookup(&CallerN); + assert(&CallerC != this && "Caller must not be in this SCC."); + assert(CallerC.isDescendantOf(*this) && + "Caller must be a descendant of the Callee."); + + // The algorithm we use for merging SCCs based on the cycle introduced here + // is to walk the SCC inverted DAG formed by the parent SCC sets. The inverse + // graph has the same cycle properties as the actual DAG of the SCCs, and + // when forming SCCs lazily by a DFS, the bottom of the graph won't exist in + // many cases which should prune the search space. + // + // FIXME: We can get this pruning behavior even after the incremental SCC + // formation by leaving behind (conservative) DFS numberings in the nodes, + // and pruning the search with them. These would need to be cleverly updated + // during the removal of intra-SCC edges, but could be preserved + // conservatively. + + // The set of SCCs that are connected to the caller, and thus will + // participate in the merged connected component. + SmallPtrSet ConnectedSCCs; + ConnectedSCCs.insert(this); + ConnectedSCCs.insert(&CallerC); + + // We build up a DFS stack of the parents chains. + SmallVector, 8> DFSSCCs; + SmallPtrSet VisitedSCCs; + int ConnectedDepth = -1; + SCC *C = this; + parent_iterator I = parent_begin(), E = parent_end(); + for (;;) { + while (I != E) { + SCC &ParentSCC = *I++; + + // If we have already processed this parent SCC, skip it, and remember + // whether it was connected so we don't have to check the rest of the + // stack. This also handles when we reach a child of the 'this' SCC (the + // callee) which terminates the search. + if (ConnectedSCCs.count(&ParentSCC)) { + ConnectedDepth = std::max(ConnectedDepth, DFSSCCs.size()); + continue; + } + if (VisitedSCCs.count(&ParentSCC)) + continue; + + // We fully explore the depth-first space, adding nodes to the connected + // set only as we pop them off, so "recurse" by rotating to the parent. + DFSSCCs.push_back(std::make_pair(C, I)); + C = &ParentSCC; + I = ParentSCC.parent_begin(); + E = ParentSCC.parent_end(); + } + + // If we've found a connection anywhere below this point on the stack (and + // thus up the parent graph from the caller), the current node needs to be + // added to the connected set now that we've processed all of its parents. + if ((int)DFSSCCs.size() == ConnectedDepth) { + --ConnectedDepth; // We're finished with this connection. + ConnectedSCCs.insert(C); + } else { + // Otherwise remember that its parents don't ever connect. + assert(ConnectedDepth < (int)DFSSCCs.size() && + "Cannot have a connected depth greater than the DFS depth!"); + VisitedSCCs.insert(C); + } + + if (DFSSCCs.empty()) + break; // We've walked all the parents of the caller transitively. + + // Pop off the prior node and position to unwind the depth first recursion. + std::tie(C, I) = DFSSCCs.pop_back_val(); + E = C->parent_end(); + } + + // Now that we have identified all of the SCCs which need to be merged into + // a connected set with the inserted edge, merge all of them into this SCC. + // FIXME: This operation currently creates ordering stability problems + // because we don't use stably ordered containers for the parent SCCs or the + // connected SCCs. + unsigned NewNodeBeginIdx = Nodes.size(); + for (SCC *C : ConnectedSCCs) { + if (C == this) + continue; + for (SCC *ParentC : C->ParentSCCs) + if (!ConnectedSCCs.count(ParentC)) + ParentSCCs.insert(ParentC); + C->ParentSCCs.clear(); + + for (Node *N : *C) { + for (Node &ChildN : *N) { + SCC &ChildC = *G->SCCMap.lookup(&ChildN); + if (&ChildC != C) + ChildC.ParentSCCs.erase(C); + } + G->SCCMap[N] = this; + Nodes.push_back(N); + } + C->Nodes.clear(); + } + for (auto I = Nodes.begin() + NewNodeBeginIdx, E = Nodes.end(); I != E; ++I) + for (Node &ChildN : **I) { + SCC &ChildC = *G->SCCMap.lookup(&ChildN); + if (&ChildC != this) + ChildC.ParentSCCs.insert(this); + } + + // We return the list of SCCs which were merged so that callers can + // invalidate any data they have associated with those SCCs. Note that these + // SCCs are no longer in an interesting state (they are totally empty) but + // the pointers will remain stable for the life of the graph itself. + return SmallVector(ConnectedSCCs.begin(), ConnectedSCCs.end()); +} + +void LazyCallGraph::SCC::removeInterSCCEdge(Node &CallerN, Node &CalleeN) { + // First remove it from the node. + CallerN.removeEdgeInternal(CalleeN.getFunction()); + + assert(G->SCCMap.lookup(&CallerN) == this && + "The caller must be a member of this SCC."); + + SCC &CalleeC = *G->SCCMap.lookup(&CalleeN); + assert(&CalleeC != this && + "This API only supports the rmoval of inter-SCC edges."); + + assert(std::find(G->LeafSCCs.begin(), G->LeafSCCs.end(), this) == + G->LeafSCCs.end() && + "Cannot have a leaf SCC caller with a different SCC callee."); + + bool HasOtherCallToCalleeC = false; + bool HasOtherCallOutsideSCC = false; + for (Node *N : *this) { + for (Node &OtherCalleeN : *N) { + SCC &OtherCalleeC = *G->SCCMap.lookup(&OtherCalleeN); + if (&OtherCalleeC == &CalleeC) { + HasOtherCallToCalleeC = true; + break; + } + if (&OtherCalleeC != this) + HasOtherCallOutsideSCC = true; + } + if (HasOtherCallToCalleeC) + break; + } + // Because the SCCs form a DAG, deleting such an edge cannot change the set + // of SCCs in the graph. However, it may cut an edge of the SCC DAG, making + // the caller no longer a parent of the callee. Walk the other call edges + // in the caller to tell. + if (!HasOtherCallToCalleeC) { + bool Removed = CalleeC.ParentSCCs.erase(this); + (void)Removed; + assert(Removed && + "Did not find the caller SCC in the callee SCC's parent list!"); + + // It may orphan an SCC if it is the last edge reaching it, but that does + // not violate any invariants of the graph. + if (CalleeC.ParentSCCs.empty()) + DEBUG(dbgs() << "LCG: Update removing " << CallerN.getFunction().getName() + << " -> " << CalleeN.getFunction().getName() + << " edge orphaned the callee's SCC!\n"); + } + + // It may make the Caller SCC a leaf SCC. + if (!HasOtherCallOutsideSCC) + G->LeafSCCs.push_back(this); +} + +void LazyCallGraph::SCC::internalDFS( + SmallVectorImpl> &DFSStack, + SmallVectorImpl &PendingSCCStack, Node *N, + SmallVectorImpl &ResultSCCs) { + Node::iterator I = N->begin(); + N->LowLink = N->DFSNumber = 1; + int NextDFSNumber = 2; + for (;;) { + assert(N->DFSNumber != 0 && "We should always assign a DFS number " + "before processing a node."); + + // We simulate recursion by popping out of the nested loop and continuing. + Node::iterator E = N->end(); + while (I != E) { + Node &ChildN = *I; + if (SCC *ChildSCC = G->SCCMap.lookup(&ChildN)) { + // Check if we have reached a node in the new (known connected) set of + // this SCC. If so, the entire stack is necessarily in that set and we + // can re-start. + if (ChildSCC == this) { + insert(*N); + while (!PendingSCCStack.empty()) + insert(*PendingSCCStack.pop_back_val()); + while (!DFSStack.empty()) + insert(*DFSStack.pop_back_val().first); + return; + } + + // If this child isn't currently in this SCC, no need to process it. + // However, we do need to remove this SCC from its SCC's parent set. + ChildSCC->ParentSCCs.erase(this); + ++I; + continue; + } + + if (ChildN.DFSNumber == 0) { + // Mark that we should start at this child when next this node is the + // top of the stack. We don't start at the next child to ensure this + // child's lowlink is reflected. + DFSStack.push_back(std::make_pair(N, I)); + + // Continue, resetting to the child node. + ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++; + N = &ChildN; + I = ChildN.begin(); + E = ChildN.end(); + continue; + } + + // Track the lowest link of the children, if any are still in the stack. + // Any child not on the stack will have a LowLink of -1. + assert(ChildN.LowLink != 0 && + "Low-link must not be zero with a non-zero DFS number."); + if (ChildN.LowLink >= 0 && ChildN.LowLink < N->LowLink) + N->LowLink = ChildN.LowLink; + ++I; + } + + if (N->LowLink == N->DFSNumber) { + ResultSCCs.push_back(G->formSCC(N, PendingSCCStack)); + if (DFSStack.empty()) + return; + } else { + // At this point we know that N cannot ever be an SCC root. Its low-link + // is not its dfs-number, and we've processed all of its children. It is + // just sitting here waiting until some node further down the stack gets + // low-link == dfs-number and pops it off as well. Move it to the pending + // stack which is pulled into the next SCC to be formed. + PendingSCCStack.push_back(N); + + assert(!DFSStack.empty() && "We shouldn't have an empty stack!"); + } + + N = DFSStack.back().first; + I = DFSStack.back().second; + DFSStack.pop_back(); + } +} + +SmallVector +LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN, + Node &CalleeN) { + // First remove it from the node. + CallerN.removeEdgeInternal(CalleeN.getFunction()); + + // We return a list of the resulting *new* SCCs in postorder. + SmallVector ResultSCCs; + + // Direct recursion doesn't impact the SCC graph at all. + if (&CallerN == &CalleeN) + return ResultSCCs; + + // The worklist is every node in the original SCC. + SmallVector Worklist; + Worklist.swap(Nodes); + for (Node *N : Worklist) { + // The nodes formerly in this SCC are no longer in any SCC. + N->DFSNumber = 0; + N->LowLink = 0; + G->SCCMap.erase(N); + } + assert(Worklist.size() > 1 && "We have to have at least two nodes to have an " + "edge between them that is within the SCC."); + + // The callee can already reach every node in this SCC (by definition). It is + // the only node we know will stay inside this SCC. Everything which + // transitively reaches Callee will also remain in the SCC. To model this we + // incrementally add any chain of nodes which reaches something in the new + // node set to the new node set. This short circuits one side of the Tarjan's + // walk. + insert(CalleeN); + + // We're going to do a full mini-Tarjan's walk using a local stack here. + SmallVector, 4> DFSStack; + SmallVector PendingSCCStack; + do { + Node *N = Worklist.pop_back_val(); + if (N->DFSNumber == 0) + internalDFS(DFSStack, PendingSCCStack, N, ResultSCCs); + + assert(DFSStack.empty() && "Didn't flush the entire DFS stack!"); + assert(PendingSCCStack.empty() && "Didn't flush all pending SCC nodes!"); + } while (!Worklist.empty()); + + // Now we need to reconnect the current SCC to the graph. + bool IsLeafSCC = true; + for (Node *N : Nodes) { + for (Node &ChildN : *N) { + SCC &ChildSCC = *G->SCCMap.lookup(&ChildN); + if (&ChildSCC == this) + continue; + ChildSCC.ParentSCCs.insert(this); + IsLeafSCC = false; + } + } +#ifndef NDEBUG + if (!ResultSCCs.empty()) + assert(!IsLeafSCC && "This SCC cannot be a leaf as we have split out new " + "SCCs by removing this edge."); + if (!std::any_of(G->LeafSCCs.begin(), G->LeafSCCs.end(), + [&](SCC *C) { return C == this; })) + assert(!IsLeafSCC && "This SCC cannot be a leaf as it already had child " + "SCCs before we removed this edge."); +#endif + // If this SCC stopped being a leaf through this edge removal, remove it from + // the leaf SCC list. + if (!IsLeafSCC && !ResultSCCs.empty()) + G->LeafSCCs.erase(std::remove(G->LeafSCCs.begin(), G->LeafSCCs.end(), this), + G->LeafSCCs.end()); + + // Return the new list of SCCs. + return ResultSCCs; +} + +void LazyCallGraph::insertEdge(Node &CallerN, Function &Callee) { + assert(SCCMap.empty() && DFSStack.empty() && + "This method cannot be called after SCCs have been formed!"); + + return CallerN.insertEdgeInternal(Callee); +} + +void LazyCallGraph::removeEdge(Node &CallerN, Function &Callee) { + assert(SCCMap.empty() && DFSStack.empty() && + "This method cannot be called after SCCs have been formed!"); + + return CallerN.removeEdgeInternal(Callee); +} + +LazyCallGraph::Node &LazyCallGraph::insertInto(Function &F, Node *&MappedN) { + return *new (MappedN = BPA.Allocate()) Node(*this, F); +} + +void LazyCallGraph::updateGraphPtrs() { + // Process all nodes updating the graph pointers. + { + SmallVector Worklist; + for (auto &Entry : EntryNodes) + if (Node *EntryN = Entry.dyn_cast()) + Worklist.push_back(EntryN); + + while (!Worklist.empty()) { + Node *N = Worklist.pop_back_val(); + N->G = this; + for (auto &Callee : N->Callees) + if (!Callee.isNull()) + if (Node *CalleeN = Callee.dyn_cast()) + Worklist.push_back(CalleeN); + } + } + + // Process all SCCs updating the graph pointers. + { + SmallVector Worklist(LeafSCCs.begin(), LeafSCCs.end()); + + while (!Worklist.empty()) { + SCC *C = Worklist.pop_back_val(); + C->G = this; + Worklist.insert(Worklist.end(), C->ParentSCCs.begin(), + C->ParentSCCs.end()); + } + } +} + +LazyCallGraph::SCC *LazyCallGraph::formSCC(Node *RootN, + SmallVectorImpl &NodeStack) { + // The tail of the stack is the new SCC. Allocate the SCC and pop the stack + // into it. + SCC *NewSCC = new (SCCBPA.Allocate()) SCC(*this); + + while (!NodeStack.empty() && NodeStack.back()->DFSNumber > RootN->DFSNumber) { + assert(NodeStack.back()->LowLink >= RootN->LowLink && + "We cannot have a low link in an SCC lower than its root on the " + "stack!"); + NewSCC->insert(*NodeStack.pop_back_val()); + } + NewSCC->insert(*RootN); + + // A final pass over all edges in the SCC (this remains linear as we only + // do this once when we build the SCC) to connect it to the parent sets of + // its children. + bool IsLeafSCC = true; + for (Node *SCCN : NewSCC->Nodes) + for (Node &SCCChildN : *SCCN) { + SCC &ChildSCC = *SCCMap.lookup(&SCCChildN); + if (&ChildSCC == NewSCC) + continue; + ChildSCC.ParentSCCs.insert(NewSCC); + IsLeafSCC = false; + } + + // For the SCCs where we fine no child SCCs, add them to the leaf list. + if (IsLeafSCC) + LeafSCCs.push_back(NewSCC); + + return NewSCC; +} + +LazyCallGraph::SCC *LazyCallGraph::getNextSCCInPostOrder() { + Node *N; + Node::iterator I; + if (!DFSStack.empty()) { + N = DFSStack.back().first; + I = DFSStack.back().second; + DFSStack.pop_back(); + } else { + // If we've handled all candidate entry nodes to the SCC forest, we're done. + do { + if (SCCEntryNodes.empty()) + return nullptr; + + N = &get(*SCCEntryNodes.pop_back_val()); + } while (N->DFSNumber != 0); + I = N->begin(); + N->LowLink = N->DFSNumber = 1; + NextDFSNumber = 2; + } + + for (;;) { + assert(N->DFSNumber != 0 && "We should always assign a DFS number " + "before placing a node onto the stack."); + + Node::iterator E = N->end(); + while (I != E) { + Node &ChildN = *I; + if (ChildN.DFSNumber == 0) { + // Mark that we should start at this child when next this node is the + // top of the stack. We don't start at the next child to ensure this + // child's lowlink is reflected. + DFSStack.push_back(std::make_pair(N, N->begin())); + + // Recurse onto this node via a tail call. + assert(!SCCMap.count(&ChildN) && + "Found a node with 0 DFS number but already in an SCC!"); + ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++; + N = &ChildN; + I = ChildN.begin(); + E = ChildN.end(); + continue; + } + + // Track the lowest link of the children, if any are still in the stack. + assert(ChildN.LowLink != 0 && + "Low-link must not be zero with a non-zero DFS number."); + if (ChildN.LowLink >= 0 && ChildN.LowLink < N->LowLink) + N->LowLink = ChildN.LowLink; + ++I; + } + + if (N->LowLink == N->DFSNumber) + // Form the new SCC out of the top of the DFS stack. + return formSCC(N, PendingSCCStack); + + // At this point we know that N cannot ever be an SCC root. Its low-link + // is not its dfs-number, and we've processed all of its children. It is + // just sitting here waiting until some node further down the stack gets + // low-link == dfs-number and pops it off as well. Move it to the pending + // stack which is pulled into the next SCC to be formed. + PendingSCCStack.push_back(N); + + assert(!DFSStack.empty() && "We never found a viable root!"); + N = DFSStack.back().first; + I = DFSStack.back().second; + DFSStack.pop_back(); + } } char LazyCallGraphAnalysis::PassID; @@ -154,9 +687,9 @@ LazyCallGraphPrinterPass::LazyCallGraphPrinterPass(raw_ostream &OS) : OS(OS) {} static void printNodes(raw_ostream &OS, LazyCallGraph::Node &N, SmallPtrSetImpl &Printed) { // Recurse depth first through the nodes. - for (LazyCallGraph::Node *ChildN : N) - if (Printed.insert(ChildN)) - printNodes(OS, *ChildN, Printed); + for (LazyCallGraph::Node &ChildN : N) + if (Printed.insert(&ChildN)) + printNodes(OS, ChildN, Printed); OS << " Call edges in function: " << N.getFunction().getName() << "\n"; for (LazyCallGraph::iterator I = N.begin(), E = N.end(); I != E; ++I) @@ -165,6 +698,16 @@ static void printNodes(raw_ostream &OS, LazyCallGraph::Node &N, OS << "\n"; } +static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &SCC) { + ptrdiff_t SCCSize = std::distance(SCC.begin(), SCC.end()); + OS << " SCC with " << SCCSize << " functions:\n"; + + for (LazyCallGraph::Node *N : SCC) + OS << " " << N->getFunction().getName() << "\n"; + + OS << "\n"; +} + PreservedAnalyses LazyCallGraphPrinterPass::run(Module *M, ModuleAnalysisManager *AM) { LazyCallGraph &G = AM->getResult(M); @@ -173,9 +716,13 @@ PreservedAnalyses LazyCallGraphPrinterPass::run(Module *M, << "\n\n"; SmallPtrSet Printed; - for (LazyCallGraph::Node *N : G) - if (Printed.insert(N)) - printNodes(OS, *N, Printed); + for (LazyCallGraph::Node &N : G) + if (Printed.insert(&N)) + printNodes(OS, N, Printed); + + for (LazyCallGraph::SCC &SCC : G.postorder_sccs()) + printSCC(OS, SCC); return PreservedAnalyses::all(); + } diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 3d6c583..9f919f7 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "lazy-value-info" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" @@ -34,6 +33,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "lazy-value-info" + char LazyValueInfo::ID = 0; INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info", "Lazy Value Information Analysis", false, true) @@ -82,7 +83,7 @@ class LVILatticeVal { ConstantRange Range; public: - LVILatticeVal() : Tag(undefined), Val(0), Range(1, true) {} + LVILatticeVal() : Tag(undefined), Val(nullptr), Range(1, true) {} static LVILatticeVal get(Constant *C) { LVILatticeVal Res; @@ -516,7 +517,7 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { BBLV.markOverdefined(); Instruction *BBI = dyn_cast(Val); - if (BBI == 0 || BBI->getParent() != BB) { + if (!BBI || BBI->getParent() != BB) { return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB)); } @@ -595,7 +596,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV, Value *UnderlyingVal = GetUnderlyingObject(Val); // If 'GetUnderlyingObject' didn't converge, skip it. It won't converge // inside InstructionDereferencesPointer either. - if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, NULL, 1)) { + if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, nullptr, 1)) { for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { if (InstructionDereferencesPointer(BI, UnderlyingVal)) { @@ -813,7 +814,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom, // Recognize the range checking idiom that InstCombine produces. // (X-C1) u< C2 --> [C1, C1+C2) - ConstantInt *NegOffset = 0; + ConstantInt *NegOffset = nullptr; if (ICI->getPredicate() == ICmpInst::ICMP_ULT) match(ICI->getOperand(0), m_Add(m_Specific(Val), m_ConstantInt(NegOffset))); @@ -1014,7 +1015,7 @@ bool LazyValueInfo::runOnFunction(Function &F) { getCache(PImpl).clear(); DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis(); // Fully lazy. @@ -1030,7 +1031,7 @@ void LazyValueInfo::releaseMemory() { // If the cache was allocated, free it. if (PImpl) { delete &getCache(PImpl); - PImpl = 0; + PImpl = nullptr; } } @@ -1044,7 +1045,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) { if (const APInt *SingleVal = CR.getSingleElement()) return ConstantInt::get(V->getContext(), *SingleVal); } - return 0; + return nullptr; } /// getConstantOnEdge - Determine whether the specified value is known to be a @@ -1060,7 +1061,7 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB, if (const APInt *SingleVal = CR.getSingleElement()) return ConstantInt::get(V->getContext(), *SingleVal); } - return 0; + return nullptr; } /// getPredicateOnEdge - Determine whether the specified value comparison @@ -1072,7 +1073,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C, LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB); // If we know the value is a constant, evaluate the conditional. - Constant *Res = 0; + Constant *Res = nullptr; if (Result.isConstant()) { Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, DL, TLI); diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp index fefa516..016f8c5 100644 --- a/lib/Analysis/LibCallAliasAnalysis.cpp +++ b/lib/Analysis/LibCallAliasAnalysis.cpp @@ -54,7 +54,7 @@ LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, // if we have detailed info and if 'P' is any of the locations we know // about. const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails; - if (Details == 0) + if (Details == nullptr) return MRInfo; // If the details array is of the 'DoesNot' kind, we only know something if diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp index 0592ccb..7d4e254 100644 --- a/lib/Analysis/LibCallSemantics.cpp +++ b/lib/Analysis/LibCallSemantics.cpp @@ -46,11 +46,11 @@ LibCallInfo::getFunctionInfo(const Function *F) const { /// If this is the first time we are querying for this info, lazily construct /// the StringMap to index it. - if (Map == 0) { + if (!Map) { Impl = Map = new StringMap(); const LibCallFunctionInfo *Array = getFunctionInfoArray(); - if (Array == 0) return 0; + if (!Array) return nullptr; // We now have the array of entries. Populate the StringMap. for (unsigned i = 0; Array[i].Name; ++i) diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index b2182b1..b14f329 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -137,8 +137,8 @@ namespace { // that failed. This provides a nice place to put a breakpoint if you want // to see why something is not correct. void CheckFailed(const Twine &Message, - const Value *V1 = 0, const Value *V2 = 0, - const Value *V3 = 0, const Value *V4 = 0) { + const Value *V1 = nullptr, const Value *V2 = nullptr, + const Value *V3 = nullptr, const Value *V4 = nullptr) { MessagesStr << Message.str() << "\n"; WriteValue(V1); WriteValue(V2); @@ -177,7 +177,7 @@ bool Lint::runOnFunction(Function &F) { AA = &getAnalysis(); DT = &getAnalysis().getDomTree(); DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis(); visit(F); dbgs() << MessagesStr.str(); @@ -199,7 +199,7 @@ void Lint::visitCallSite(CallSite CS) { Value *Callee = CS.getCalledValue(); visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize, - 0, 0, MemRef::Callee); + 0, nullptr, MemRef::Callee); if (Function *F = dyn_cast(findValue(Callee, /*OffsetOk=*/false))) { Assert1(CS.getCallingConv() == F->getCallingConv(), @@ -275,10 +275,10 @@ void Lint::visitCallSite(CallSite CS) { MemCpyInst *MCI = cast(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize, - MCI->getAlignment(), 0, + MCI->getAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize, - MCI->getAlignment(), 0, + MCI->getAlignment(), nullptr, MemRef::Read); // Check that the memcpy arguments don't overlap. The AliasAnalysis API @@ -299,10 +299,10 @@ void Lint::visitCallSite(CallSite CS) { MemMoveInst *MMI = cast(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize, - MMI->getAlignment(), 0, + MMI->getAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize, - MMI->getAlignment(), 0, + MMI->getAlignment(), nullptr, MemRef::Read); break; } @@ -310,7 +310,7 @@ void Lint::visitCallSite(CallSite CS) { MemSetInst *MSI = cast(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize, - MSI->getAlignment(), 0, + MSI->getAlignment(), nullptr, MemRef::Write); break; } @@ -321,17 +321,17 @@ void Lint::visitCallSite(CallSite CS) { &I); visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, - 0, 0, MemRef::Read | MemRef::Write); + 0, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::vacopy: visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, - 0, 0, MemRef::Write); + 0, nullptr, MemRef::Write); visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize, - 0, 0, MemRef::Read); + 0, nullptr, MemRef::Read); break; case Intrinsic::vaend: visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, - 0, 0, MemRef::Read | MemRef::Write); + 0, nullptr, MemRef::Read | MemRef::Write); break; case Intrinsic::stackrestore: @@ -339,7 +339,7 @@ void Lint::visitCallSite(CallSite CS) { // stack pointer, which the compiler may read from or write to // at any time, so check it for both readability and writeability. visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize, - 0, 0, MemRef::Read | MemRef::Write); + 0, nullptr, MemRef::Read | MemRef::Write); break; } } @@ -513,7 +513,7 @@ static bool isZero(Value *V, const DataLayout *DL) { if (!VecTy) { unsigned BitWidth = V->getType()->getIntegerBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, KnownZero, KnownOne, DL); + computeKnownBits(V, KnownZero, KnownOne, DL); return KnownZero.isAllOnesValue(); } @@ -534,7 +534,7 @@ static bool isZero(Value *V, const DataLayout *DL) { return true; APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(Elem, KnownZero, KnownOne, DL); + computeKnownBits(Elem, KnownZero, KnownOne, DL); if (KnownZero.isAllOnesValue()) return true; } @@ -572,13 +572,13 @@ void Lint::visitAllocaInst(AllocaInst &I) { } void Lint::visitVAArgInst(VAArgInst &I) { - visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 0, - MemRef::Read | MemRef::Write); + visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, + nullptr, MemRef::Read | MemRef::Write); } void Lint::visitIndirectBrInst(IndirectBrInst &I) { - visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 0, - MemRef::Branchee); + visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, + nullptr, MemRef::Branchee); Assert1(I.getNumDestinations() != 0, "Undefined behavior: indirectbr with no destinations", &I); diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 0902a39..005d309 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -62,7 +62,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, if (ByteOffset < 0) // out of bounds return false; - Type *BaseType = 0; + Type *BaseType = nullptr; unsigned BaseAlign = 0; if (const AllocaInst *AI = dyn_cast(Base)) { // An alloca is safe to load from as load as it is suitably aligned. @@ -161,7 +161,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, ScanFrom++; // Don't scan huge blocks. - if (MaxInstsToScan-- == 0) return 0; + if (MaxInstsToScan-- == 0) return nullptr; --ScanFrom; // If this is a load of Ptr, the loaded value is available. @@ -198,7 +198,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // Otherwise the store that may or may not alias the pointer, bail out. ++ScanFrom; - return 0; + return nullptr; } // If this is some other instruction that may clobber Ptr, bail out. @@ -211,11 +211,11 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, // May modify the pointer, bail out. ++ScanFrom; - return 0; + return nullptr; } } // Got to the start of the block, we didn't find it, but are done for this // block. - return 0; + return nullptr; } diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index b38672e..46c0eaa 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -141,21 +141,21 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed, PHINode *Loop::getCanonicalInductionVariable() const { BasicBlock *H = getHeader(); - BasicBlock *Incoming = 0, *Backedge = 0; + BasicBlock *Incoming = nullptr, *Backedge = nullptr; pred_iterator PI = pred_begin(H); assert(PI != pred_end(H) && "Loop must have at least one backedge!"); Backedge = *PI++; - if (PI == pred_end(H)) return 0; // dead loop + if (PI == pred_end(H)) return nullptr; // dead loop Incoming = *PI++; - if (PI != pred_end(H)) return 0; // multiple backedges? + if (PI != pred_end(H)) return nullptr; // multiple backedges? if (contains(Incoming)) { if (contains(Backedge)) - return 0; + return nullptr; std::swap(Incoming, Backedge); } else if (!contains(Backedge)) - return 0; + return nullptr; // Loop over all of the PHI nodes, looking for a canonical indvar. for (BasicBlock::iterator I = H->begin(); isa(I); ++I) { @@ -171,7 +171,7 @@ PHINode *Loop::getCanonicalInductionVariable() const { if (CI->equalsInt(1)) return PN; } - return 0; + return nullptr; } /// isLCSSAForm - Return true if the Loop is in LCSSA form @@ -232,7 +232,7 @@ bool Loop::isSafeToClone() const { } MDNode *Loop::getLoopID() const { - MDNode *LoopID = 0; + MDNode *LoopID = nullptr; if (isLoopSimplifyForm()) { LoopID = getLoopLatch()->getTerminator()->getMetadata(LoopMDName); } else { @@ -241,7 +241,7 @@ MDNode *Loop::getLoopID() const { BasicBlock *H = getHeader(); for (block_iterator I = block_begin(), IE = block_end(); I != IE; ++I) { TerminatorInst *TI = (*I)->getTerminator(); - MDNode *MD = 0; + MDNode *MD = nullptr; // Check if this terminator branches to the loop header. for (unsigned i = 0, ie = TI->getNumSuccessors(); i != ie; ++i) { @@ -251,17 +251,17 @@ MDNode *Loop::getLoopID() const { } } if (!MD) - return 0; + return nullptr; if (!LoopID) LoopID = MD; else if (MD != LoopID) - return 0; + return nullptr; } } if (!LoopID || LoopID->getNumOperands() == 0 || LoopID->getOperand(0) != LoopID) - return 0; + return nullptr; return LoopID; } @@ -402,7 +402,7 @@ BasicBlock *Loop::getUniqueExitBlock() const { getUniqueExitBlocks(UniqueExitBlocks); if (UniqueExitBlocks.size() == 1) return UniqueExitBlocks[0]; - return 0; + return nullptr; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -548,7 +548,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { // is considered uninitialized. Loop *NearLoop = BBLoop; - Loop *Subloop = 0; + Loop *Subloop = nullptr; if (NearLoop != Unloop && Unloop->contains(NearLoop)) { Subloop = NearLoop; // Find the subloop ancestor that is directly contained within Unloop. @@ -564,7 +564,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) { succ_iterator I = succ_begin(BB), E = succ_end(BB); if (I == E) { assert(!Subloop && "subloop blocks must have a successor"); - NearLoop = 0; // unloop blocks may now exit the function. + NearLoop = nullptr; // unloop blocks may now exit the function. } for (; I != E; ++I) { if (*I == BB) @@ -637,7 +637,7 @@ void LoopInfo::updateUnloop(Loop *Unloop) { // Blocks no longer have a parent but are still referenced by Unloop until // the Unloop object is deleted. - LI.changeLoopFor(*I, 0); + LI.changeLoopFor(*I, nullptr); } // Remove the loop from the top-level LoopInfo object. diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index 38e753f..8df18e7 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -15,10 +15,13 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" using namespace llvm; +#define DEBUG_TYPE "loop-pass-manager" + namespace { /// PrintLoopPass - Print a Function corresponding to a Loop. @@ -61,8 +64,8 @@ LPPassManager::LPPassManager() : FunctionPass(ID), PMDataManager() { skipThisLoop = false; redoThisLoop = false; - LI = NULL; - CurrentLoop = NULL; + LI = nullptr; + CurrentLoop = nullptr; } /// Delete loop from the loop queue and loop hierarchy (LoopInfo). @@ -251,6 +254,8 @@ bool LPPassManager::runOnFunction(Function &F) { // Then call the regular verifyAnalysis functions. verifyPreservedAnalysis(P); + + F.getContext().yield(); } removeNotPreservedAnalysis(P); diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index bc1dc69..10da3d5 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -46,7 +46,7 @@ namespace { bool runOnFunction(Function &F) override; - void print(raw_ostream &OS, const Module * = 0) const override; + void print(raw_ostream &OS, const Module * = nullptr) const override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequiredTransitive(); @@ -56,7 +56,7 @@ namespace { void releaseMemory() override { Deps.clear(); - F = 0; + F = nullptr; } private: @@ -106,7 +106,7 @@ bool MemDepPrinter::runOnFunction(Function &F) { MemDepResult Res = MDA.getDependency(Inst); if (!Res.isNonLocal()) { Deps[Inst].insert(std::make_pair(getInstTypePair(Res), - static_cast(0))); + static_cast(nullptr))); } else if (CallSite CS = cast(Inst)) { const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI = MDA.getNonLocalCallDependency(CS); @@ -122,8 +122,8 @@ bool MemDepPrinter::runOnFunction(Function &F) { if (LoadInst *LI = dyn_cast(Inst)) { if (!LI->isUnordered()) { // FIXME: Handle atomic/volatile loads. - Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown), - static_cast(0))); + Deps[Inst].insert(std::make_pair(getInstTypePair(nullptr, Unknown), + static_cast(nullptr))); continue; } AliasAnalysis::Location Loc = AA.getLocation(LI); @@ -131,8 +131,8 @@ bool MemDepPrinter::runOnFunction(Function &F) { } else if (StoreInst *SI = dyn_cast(Inst)) { if (!SI->isUnordered()) { // FIXME: Handle atomic/volatile stores. - Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown), - static_cast(0))); + Deps[Inst].insert(std::make_pair(getInstTypePair(nullptr, Unknown), + static_cast(nullptr))); continue; } AliasAnalysis::Location Loc = AA.getLocation(SI); diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 1dba323..64d339f 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "memory-builtins" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" @@ -30,6 +29,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "memory-builtins" + enum AllocType { OpNewLike = 1<<0, // allocates; never returns null MallocLike = 1<<1 | OpNewLike, // allocates; may return null @@ -76,14 +77,14 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { CallSite CS(const_cast(V)); if (!CS.getInstruction()) - return 0; + return nullptr; if (CS.isNoBuiltin()) - return 0; + return nullptr; Function *Callee = CS.getCalledFunction(); if (!Callee || !Callee->isDeclaration()) - return 0; + return nullptr; return Callee; } @@ -94,17 +95,17 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, bool LookThroughBitCast = false) { // Skip intrinsics if (isa(V)) - return 0; + return nullptr; Function *Callee = getCalledFunction(V, LookThroughBitCast); if (!Callee) - return 0; + return nullptr; // Make sure that the function is available. StringRef FnName = Callee->getName(); LibFunc::Func TLIFn; if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) - return 0; + return nullptr; unsigned i = 0; bool found = false; @@ -115,11 +116,11 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, } } if (!found) - return 0; + return nullptr; const AllocFnsTy *FnData = &AllocationFnData[i]; if ((FnData->AllocTy & AllocTy) != FnData->AllocTy) - return 0; + return nullptr; // Check function prototype. int FstParam = FnData->FstParam; @@ -135,7 +136,7 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, FTy->getParamType(SndParam)->isIntegerTy(32) || FTy->getParamType(SndParam)->isIntegerTy(64))) return FnData; - return 0; + return nullptr; } static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { @@ -202,19 +203,19 @@ bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, /// ignore InvokeInst here. const CallInst *llvm::extractMallocCall(const Value *I, const TargetLibraryInfo *TLI) { - return isMallocLikeFn(I, TLI) ? dyn_cast(I) : 0; + return isMallocLikeFn(I, TLI) ? dyn_cast(I) : nullptr; } static Value *computeArraySize(const CallInst *CI, const DataLayout *DL, const TargetLibraryInfo *TLI, bool LookThroughSExt = false) { if (!CI) - return 0; + return nullptr; // The size of the malloc's result type must be known to determine array size. Type *T = getMallocAllocatedType(CI, TLI); if (!T || !T->isSized() || !DL) - return 0; + return nullptr; unsigned ElementSize = DL->getTypeAllocSize(T); if (StructType *ST = dyn_cast(T)) @@ -223,12 +224,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *DL, // If malloc call's arg can be determined to be a multiple of ElementSize, // return the multiple. Otherwise, return NULL. Value *MallocArg = CI->getArgOperand(0); - Value *Multiple = 0; + Value *Multiple = nullptr; if (ComputeMultiple(MallocArg, ElementSize, Multiple, LookThroughSExt)) return Multiple; - return 0; + return nullptr; } /// isArrayMalloc - Returns the corresponding CallInst if the instruction @@ -245,7 +246,7 @@ const CallInst *llvm::isArrayMalloc(const Value *I, return CI; // CI is a non-array malloc or we can't figure out that it is an array malloc. - return 0; + return nullptr; } /// getMallocType - Returns the PointerType resulting from the malloc call. @@ -257,7 +258,7 @@ PointerType *llvm::getMallocType(const CallInst *CI, const TargetLibraryInfo *TLI) { assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call"); - PointerType *MallocType = 0; + PointerType *MallocType = nullptr; unsigned NumOfBitCastUses = 0; // Determine if CallInst has a bitcast use. @@ -277,7 +278,7 @@ PointerType *llvm::getMallocType(const CallInst *CI, return cast(CI->getType()); // Type could not be determined. - return 0; + return nullptr; } /// getMallocAllocatedType - Returns the Type allocated by malloc call. @@ -288,7 +289,7 @@ PointerType *llvm::getMallocType(const CallInst *CI, Type *llvm::getMallocAllocatedType(const CallInst *CI, const TargetLibraryInfo *TLI) { PointerType *PT = getMallocType(CI, TLI); - return PT ? PT->getElementType() : 0; + return PT ? PT->getElementType() : nullptr; } /// getMallocArraySize - Returns the array size of a malloc call. If the @@ -308,7 +309,7 @@ Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *DL, /// is a calloc call. const CallInst *llvm::extractCallocCall(const Value *I, const TargetLibraryInfo *TLI) { - return isCallocLikeFn(I, TLI) ? cast(I) : 0; + return isCallocLikeFn(I, TLI) ? cast(I) : nullptr; } @@ -316,15 +317,15 @@ const CallInst *llvm::extractCallocCall(const Value *I, const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { const CallInst *CI = dyn_cast(I); if (!CI || isa(CI)) - return 0; + return nullptr; Function *Callee = CI->getCalledFunction(); - if (Callee == 0 || !Callee->isDeclaration()) - return 0; + if (Callee == nullptr || !Callee->isDeclaration()) + return nullptr; StringRef FnName = Callee->getName(); LibFunc::Func TLIFn; if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) - return 0; + return nullptr; unsigned ExpectedNumParams; if (TLIFn == LibFunc::free || @@ -335,18 +336,18 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { TLIFn == LibFunc::ZdaPvRKSt9nothrow_t) // delete[](void*, nothrow) ExpectedNumParams = 2; else - return 0; + return nullptr; // Check free prototype. // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin // attribute will exist. FunctionType *FTy = Callee->getFunctionType(); if (!FTy->getReturnType()->isVoidTy()) - return 0; + return nullptr; if (FTy->getNumParams() != ExpectedNumParams) - return 0; + return nullptr; if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext())) - return 0; + return nullptr; return CI; } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 015ded1..9eaf109 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "memdep" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" @@ -33,6 +32,8 @@ #include "llvm/Support/Debug.h" using namespace llvm; +#define DEBUG_TYPE "memdep" + STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses"); STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses"); @@ -88,10 +89,10 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool MemoryDependenceAnalysis::runOnFunction(Function &) { AA = &getAnalysis(); DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); - DT = DTWP ? &DTWP->getDomTree() : 0; + DT = DTWP ? &DTWP->getDomTree() : nullptr; if (!PredCache) PredCache.reset(new PredIteratorCache()); return false; @@ -261,10 +262,10 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, const LoadInst *LI, const DataLayout *DL) { // If we have no target data, we can't do this. - if (DL == 0) return false; + if (!DL) return false; // If we haven't already computed the base/offset of MemLoc, do so now. - if (MemLocBase == 0) + if (!MemLocBase) MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL); unsigned Size = MemoryDependenceAnalysis:: @@ -362,13 +363,13 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst) { - const Value *MemLocBase = 0; + const Value *MemLocBase = nullptr; int64_t MemLocOffset = 0; unsigned Limit = BlockScanLimit; bool isInvariantLoad = false; if (isLoad && QueryInst) { LoadInst *LI = dyn_cast(QueryInst); - if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != 0) + if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr) isInvariantLoad = true; } @@ -696,7 +697,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB) --Entry; - NonLocalDepEntry *ExistingResult = 0; + NonLocalDepEntry *ExistingResult = nullptr; if (Entry != Cache.begin()+NumSortedEntries && Entry->getBB() == DirtyBB) { // If we already have an entry, and if it isn't already dirty, the block @@ -807,7 +808,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, if (Entry != Cache->begin() && (Entry-1)->getBB() == BB) --Entry; - NonLocalDepEntry *ExistingResult = 0; + NonLocalDepEntry *ExistingResult = nullptr; if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB) ExistingResult = &*Entry; @@ -960,7 +961,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, if (CacheInfo->TBAATag != Loc.TBAATag) { if (CacheInfo->TBAATag) { CacheInfo->Pair = BBSkipFirstBlockPair(); - CacheInfo->TBAATag = 0; + CacheInfo->TBAATag = nullptr; for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(), DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI) if (Instruction *Inst = DI->getResult().getInst()) @@ -1116,7 +1117,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, SortNonLocalDepInfoCache(*Cache, NumSortedEntries); NumSortedEntries = Cache->size(); } - Cache = 0; + Cache = nullptr; PredList.clear(); for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { @@ -1126,7 +1127,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Get the PHI translated pointer in this predecessor. This can fail if // not translatable, in which case the getAddr() returns null. PHITransAddr &PredPointer = PredList.back().second; - PredPointer.PHITranslateValue(BB, Pred, 0); + PredPointer.PHITranslateValue(BB, Pred, nullptr); Value *PredPtrVal = PredPointer.getAddr(); @@ -1175,7 +1176,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // predecessor, then we have to assume that the pointer is clobbered in // that predecessor. We can still do PRE of the load, which would insert // a computation of the pointer in this predecessor. - if (PredPtrVal == 0) + if (!PredPtrVal) CanTranslate = false; // FIXME: it is entirely possible that PHI translating will end up with @@ -1224,7 +1225,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // for the given block. It assumes that we haven't modified any of // our datastructures while processing the current block. - if (Cache == 0) { + if (!Cache) { // Refresh the CacheInfo/Cache pointer if it got invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->NonLocalDeps; @@ -1279,7 +1280,7 @@ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { Instruction *Target = PInfo[i].getResult().getInst(); - if (Target == 0) continue; // Ignore non-local dep results. + if (!Target) continue; // Ignore non-local dep results. assert(Target->getParent() == PInfo[i].getBB()); // Eliminating the dirty entry from 'Cache', so update the reverse info. diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp index 0c119d6..4e11e50 100644 --- a/lib/Analysis/NoAliasAnalysis.cpp +++ b/lib/Analysis/NoAliasAnalysis.cpp @@ -36,7 +36,7 @@ namespace { // Note: NoAA does not call InitializeAliasAnalysis because it's // special and does not support chaining. DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; } AliasResult alias(const Location &LocA, const Location &LocB) override { diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index ad3685a..bfe8642 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -43,7 +43,7 @@ static bool CanPHITrans(Instruction *Inst) { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void PHITransAddr::dump() const { - if (Addr == 0) { + if (!Addr) { dbgs() << "PHITransAddr: null\n"; return; } @@ -58,7 +58,7 @@ static bool VerifySubExpr(Value *Expr, SmallVectorImpl &InstInputs) { // If this is a non-instruction value, there is nothing to do. Instruction *I = dyn_cast(Expr); - if (I == 0) return true; + if (!I) return true; // If it's an instruction, it is either in Tmp or its operands recursively // are. @@ -90,7 +90,7 @@ static bool VerifySubExpr(Value *Expr, /// structure is valid, it returns true. If invalid, it prints errors and /// returns false. bool PHITransAddr::Verify() const { - if (Addr == 0) return true; + if (!Addr) return true; SmallVector Tmp(InstInputs.begin(), InstInputs.end()); @@ -116,14 +116,14 @@ bool PHITransAddr::IsPotentiallyPHITranslatable() const { // If the input value is not an instruction, or if it is not defined in CurBB, // then we don't need to phi translate it. Instruction *Inst = dyn_cast(Addr); - return Inst == 0 || CanPHITrans(Inst); + return !Inst || CanPHITrans(Inst); } static void RemoveInstInputs(Value *V, SmallVectorImpl &InstInputs) { Instruction *I = dyn_cast(V); - if (I == 0) return; + if (!I) return; // If the instruction is in the InstInputs list, remove it. SmallVectorImpl::iterator Entry = @@ -147,7 +147,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, const DominatorTree *DT) { // If this is a non-instruction value, it can't require PHI translation. Instruction *Inst = dyn_cast(V); - if (Inst == 0) return V; + if (!Inst) return V; // Determine whether 'Inst' is an input to our PHI translatable expression. bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst); @@ -173,7 +173,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // If this is a non-phi value, and it is analyzable, we can incorporate it // into the expression by making all instruction operands be inputs. if (!CanPHITrans(Inst)) - return 0; + return nullptr; // All instruction operands are now inputs (and of course, they may also be // defined in this block, so they may need to be phi translated themselves. @@ -187,9 +187,9 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, // operands need to be phi translated, and if so, reconstruct it. if (CastInst *Cast = dyn_cast(Inst)) { - if (!isSafeToSpeculativelyExecute(Cast)) return 0; + if (!isSafeToSpeculativelyExecute(Cast)) return nullptr; Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT); - if (PHIIn == 0) return 0; + if (!PHIIn) return nullptr; if (PHIIn == Cast->getOperand(0)) return Cast; @@ -209,7 +209,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, (!DT || DT->dominates(CastI->getParent(), PredBB))) return CastI; } - return 0; + return nullptr; } // Handle getelementptr with at least one PHI translatable operand. @@ -218,7 +218,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, bool AnyChanged = false; for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT); - if (GEPOp == 0) return 0; + if (!GEPOp) return nullptr; AnyChanged |= GEPOp != GEP->getOperand(i); GEPOps.push_back(GEPOp); @@ -253,7 +253,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, return GEPI; } } - return 0; + return nullptr; } // Handle add with a constant RHS. @@ -265,7 +265,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, bool isNUW = cast(Inst)->hasNoUnsignedWrap(); Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT); - if (LHS == 0) return 0; + if (!LHS) return nullptr; // If the PHI translated LHS is an add of a constant, fold the immediates. if (BinaryOperator *BOp = dyn_cast(LHS)) @@ -304,11 +304,11 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB, return BO; } - return 0; + return nullptr; } // Otherwise, we failed. - return 0; + return nullptr; } @@ -326,10 +326,10 @@ bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB, // Make sure the value is live in the predecessor. if (Instruction *Inst = dyn_cast_or_null(Addr)) if (!DT->dominates(Inst->getParent(), PredBB)) - Addr = 0; + Addr = nullptr; } - return Addr == 0; + return Addr == nullptr; } /// PHITranslateWithInsertion - PHI translate this value into the specified @@ -354,7 +354,7 @@ PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB, // If not, destroy any intermediate instructions inserted. while (NewInsts.size() != NISize) NewInsts.pop_back_val()->eraseFromParent(); - return 0; + return nullptr; } @@ -379,10 +379,10 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, // Handle cast of PHI translatable value. if (CastInst *Cast = dyn_cast(Inst)) { - if (!isSafeToSpeculativelyExecute(Cast)) return 0; + if (!isSafeToSpeculativelyExecute(Cast)) return nullptr; Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0), CurBB, PredBB, DT, NewInsts); - if (OpVal == 0) return 0; + if (!OpVal) return nullptr; // Otherwise insert a cast at the end of PredBB. CastInst *New = CastInst::Create(Cast->getOpcode(), @@ -400,7 +400,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i), CurBB, PredBB, DT, NewInsts); - if (OpVal == 0) return 0; + if (!OpVal) return nullptr; GEPOps.push_back(OpVal); } @@ -436,5 +436,5 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB, } #endif - return 0; + return nullptr; } diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp index f23833a..6d92909 100644 --- a/lib/Analysis/PostDominators.cpp +++ b/lib/Analysis/PostDominators.cpp @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "postdomtree" - #include "llvm/Analysis/PostDominators.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" @@ -22,6 +20,8 @@ #include "llvm/Support/GenericDomTreeConstruction.h" using namespace llvm; +#define DEBUG_TYPE "postdomtree" + //===----------------------------------------------------------------------===// // PostDominatorTree Implementation //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index f4da598..7f88ae1 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -9,7 +9,6 @@ // Detects single entry single exit regions in the control flow graph. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "region" #include "llvm/Analysis/RegionInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" @@ -19,10 +18,13 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include +#include #include using namespace llvm; +#define DEBUG_TYPE "region" + // Always verify if expensive checking is enabled. #ifdef XDEBUG static bool VerifyRegionInfo = true; @@ -62,9 +64,6 @@ Region::~Region() { // Only clean the cache for this Region. Caches of child Regions will be // cleaned when the child Regions are deleted. BBNodeMap.clear(); - - for (iterator I = begin(), E = end(); I != E; ++I) - delete *I; } void Region::replaceEntry(BasicBlock *BB) { @@ -88,7 +87,7 @@ void Region::replaceEntryRecursive(BasicBlock *NewEntry) { R->replaceEntry(NewEntry); for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) if ((*RI)->getEntry() == OldEntry) - RegionQueue.push_back(*RI); + RegionQueue.push_back(RI->get()); } } @@ -104,7 +103,7 @@ void Region::replaceExitRecursive(BasicBlock *NewExit) { R->replaceExit(NewExit); for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) if ((*RI)->getExit() == OldExit) - RegionQueue.push_back(*RI); + RegionQueue.push_back(RI->get()); } } @@ -128,8 +127,8 @@ bool Region::contains(const Loop *L) const { // BBs that are not part of any loop are element of the Loop // described by the NULL pointer. This loop is not part of any region, // except if the region describes the whole function. - if (L == 0) - return getExit() == 0; + if (!L) + return getExit() == nullptr; if (!contains(L->getHeader())) return false; @@ -147,7 +146,7 @@ bool Region::contains(const Loop *L) const { Loop *Region::outermostLoopInRegion(Loop *L) const { if (!contains(L)) - return 0; + return nullptr; while (L && contains(L->getParentLoop())) { L = L->getParentLoop(); @@ -165,14 +164,14 @@ Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const { BasicBlock *Region::getEnteringBlock() const { BasicBlock *entry = getEntry(); BasicBlock *Pred; - BasicBlock *enteringBlock = 0; + BasicBlock *enteringBlock = nullptr; for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE; ++PI) { Pred = *PI; if (DT->getNode(Pred) && !contains(Pred)) { if (enteringBlock) - return 0; + return nullptr; enteringBlock = Pred; } @@ -184,17 +183,17 @@ BasicBlock *Region::getEnteringBlock() const { BasicBlock *Region::getExitingBlock() const { BasicBlock *exit = getExit(); BasicBlock *Pred; - BasicBlock *exitingBlock = 0; + BasicBlock *exitingBlock = nullptr; if (!exit) - return 0; + return nullptr; for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE; ++PI) { Pred = *PI; if (contains(Pred)) { if (exitingBlock) - return 0; + return nullptr; exitingBlock = Pred; } @@ -295,7 +294,7 @@ Region* Region::getSubRegionNode(BasicBlock *BB) const { Region *R = RI->getRegionFor(BB); if (!R || R == this) - return 0; + return nullptr; // If we pass the BB out of this region, that means our code is broken. assert(contains(R) && "BB not in current region!"); @@ -304,7 +303,7 @@ Region* Region::getSubRegionNode(BasicBlock *BB) const { R = R->getParent(); if (R->getEntry() != BB) - return 0; + return nullptr; return R; } @@ -333,18 +332,20 @@ RegionNode* Region::getNode(BasicBlock *BB) const { void Region::transferChildrenTo(Region *To) { for (iterator I = begin(), E = end(); I != E; ++I) { (*I)->parent = To; - To->children.push_back(*I); + To->children.push_back(std::move(*I)); } children.clear(); } void Region::addSubRegion(Region *SubRegion, bool moveChildren) { - assert(SubRegion->parent == 0 && "SubRegion already has a parent!"); - assert(std::find(begin(), end(), SubRegion) == children.end() - && "Subregion already exists!"); + assert(!SubRegion->parent && "SubRegion already has a parent!"); + assert(std::find_if(begin(), end(), [&](const std::unique_ptr &R) { + return R.get() == SubRegion; + }) == children.end() && + "Subregion already exists!"); SubRegion->parent = this; - children.push_back(SubRegion); + children.push_back(std::unique_ptr(SubRegion)); if (!moveChildren) return; @@ -360,23 +361,27 @@ void Region::addSubRegion(Region *SubRegion, bool moveChildren) { RI->setRegionFor(BB, SubRegion); } - std::vector Keep; + std::vector> Keep; for (iterator I = begin(), E = end(); I != E; ++I) - if (SubRegion->contains(*I) && *I != SubRegion) { - SubRegion->children.push_back(*I); + if (SubRegion->contains(I->get()) && I->get() != SubRegion) { (*I)->parent = SubRegion; + SubRegion->children.push_back(std::move(*I)); } else - Keep.push_back(*I); + Keep.push_back(std::move(*I)); children.clear(); - children.insert(children.begin(), Keep.begin(), Keep.end()); + children.insert(children.begin(), + std::move_iterator(Keep.begin()), + std::move_iterator(Keep.end())); } Region *Region::removeSubRegion(Region *Child) { assert(Child->parent == this && "Child is not a child of this region!"); - Child->parent = 0; - RegionSet::iterator I = std::find(children.begin(), children.end(), Child); + Child->parent = nullptr; + RegionSet::iterator I = std::find_if( + children.begin(), children.end(), + [&](const std::unique_ptr &R) { return R.get() == Child; }); assert(I != children.end() && "Region does not exit. Unable to remove."); children.erase(children.begin()+(I-begin())); return Child; @@ -385,7 +390,7 @@ Region *Region::removeSubRegion(Region *Child) { unsigned Region::getDepth() const { unsigned Depth = 0; - for (Region *R = parent; R != 0; R = R->parent) + for (Region *R = parent; R != nullptr; R = R->parent) ++Depth; return Depth; @@ -395,12 +400,12 @@ Region *Region::getExpandedRegion() const { unsigned NumSuccessors = exit->getTerminator()->getNumSuccessors(); if (NumSuccessors == 0) - return NULL; + return nullptr; for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit()); PI != PE; ++PI) if (!DT->dominates(getEntry(), *PI)) - return NULL; + return nullptr; Region *R = RI->getRegionFor(exit); @@ -408,7 +413,7 @@ Region *Region::getExpandedRegion() const { if (exit->getTerminator()->getNumSuccessors() == 1) return new Region(getEntry(), *succ_begin(exit), RI, DT); else - return NULL; + return nullptr; } while (R->getParent() && R->getParent()->getEntry() == exit) @@ -418,7 +423,7 @@ Region *Region::getExpandedRegion() const { for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit()); PI != PE; ++PI) if (!DT->dominates(R->getExit(), *PI)) - return NULL; + return nullptr; return new Region(getEntry(), R->getExit(), RI, DT); } @@ -577,7 +582,7 @@ Region *RegionInfo::createRegion(BasicBlock *entry, BasicBlock *exit) { assert(entry && exit && "entry and exit must not be null!"); if (isTrivialRegion(entry, exit)) - return 0; + return nullptr; Region *region = new Region(entry, exit, this, DT); BBtoRegion.insert(std::make_pair(entry, region)); @@ -600,7 +605,7 @@ void RegionInfo::findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut) { if (!N) return; - Region *lastRegion= 0; + Region *lastRegion= nullptr; BasicBlock *lastExit = entry; // As only a BasicBlock that postdominates entry can finish a region, walk the @@ -680,12 +685,12 @@ void RegionInfo::releaseMemory() { BBtoRegion.clear(); if (TopLevelRegion) delete TopLevelRegion; - TopLevelRegion = 0; + TopLevelRegion = nullptr; } RegionInfo::RegionInfo() : FunctionPass(ID) { initializeRegionInfoPass(*PassRegistry::getPassRegistry()); - TopLevelRegion = 0; + TopLevelRegion = nullptr; } RegionInfo::~RegionInfo() { @@ -710,7 +715,7 @@ bool RegionInfo::runOnFunction(Function &F) { PDT = &getAnalysis(); DF = &getAnalysis(); - TopLevelRegion = new Region(&F.getEntryBlock(), 0, this, DT, 0); + TopLevelRegion = new Region(&F.getEntryBlock(), nullptr, this, DT, nullptr); updateStatistics(TopLevelRegion); Calculate(F); @@ -744,7 +749,7 @@ void RegionInfo::verifyAnalysis() const { Region *RegionInfo::getRegionFor(BasicBlock *BB) const { BBtoRegionMap::const_iterator I= BBtoRegion.find(BB); - return I != BBtoRegion.end() ? I->second : 0; + return I != BBtoRegion.end() ? I->second : nullptr; } void RegionInfo::setRegionFor(BasicBlock *BB, Region *R) { @@ -756,7 +761,7 @@ Region *RegionInfo::operator[](BasicBlock *BB) const { } BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const { - BasicBlock *Exit = NULL; + BasicBlock *Exit = nullptr; while (true) { // Get largest region that starts at BB. diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index 12d7ca3..3c7798f 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -17,10 +17,11 @@ #include "llvm/Analysis/RegionIterator.h" #include "llvm/Support/Timer.h" -#define DEBUG_TYPE "regionpassmgr" #include "llvm/Support/Debug.h" using namespace llvm; +#define DEBUG_TYPE "regionpassmgr" + //===----------------------------------------------------------------------===// // RGPassManager // @@ -31,15 +32,15 @@ RGPassManager::RGPassManager() : FunctionPass(ID), PMDataManager() { skipThisRegion = false; redoThisRegion = false; - RI = NULL; - CurrentRegion = NULL; + RI = nullptr; + CurrentRegion = nullptr; } // Recurse through all subregions and all regions into RQ. -static void addRegionIntoQueue(Region *R, std::deque &RQ) { - RQ.push_back(R); - for (Region::iterator I = R->begin(), E = R->end(); I != E; ++I) - addRegionIntoQueue(*I, RQ); +static void addRegionIntoQueue(Region &R, std::deque &RQ) { + RQ.push_back(&R); + for (const auto &E : R) + addRegionIntoQueue(*E, RQ); } /// Pass Manager itself does not invalidate any analysis info. @@ -57,7 +58,7 @@ bool RGPassManager::runOnFunction(Function &F) { // Collect inherited analysis from Module level pass manager. populateInheritedAnalysis(TPM->activeStack); - addRegionIntoQueue(RI->getTopLevelRegion(), RQ); + addRegionIntoQueue(*RI->getTopLevelRegion(), RQ); if (RQ.empty()) // No regions, skip calling finalizers return false; @@ -185,7 +186,6 @@ private: public: static char ID; - PrintRegionPass() : RegionPass(ID), Out(dbgs()) {} PrintRegionPass(const std::string &B, raw_ostream &o) : RegionPass(ID), Banner(B), Out(o) {} diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp index 6467f47..893210a 100644 --- a/lib/Analysis/RegionPrinter.cpp +++ b/lib/Analysis/RegionPrinter.cpp @@ -98,31 +98,31 @@ struct DOTGraphTraits : public DOTGraphTraits { // Print the cluster of the subregions. This groups the single basic blocks // and adds a different background color for each group. - static void printRegionCluster(const Region *R, GraphWriter &GW, + static void printRegionCluster(const Region &R, GraphWriter &GW, unsigned depth = 0) { raw_ostream &O = GW.getOStream(); - O.indent(2 * depth) << "subgraph cluster_" << static_cast(R) + O.indent(2 * depth) << "subgraph cluster_" << static_cast(&R) << " {\n"; O.indent(2 * (depth + 1)) << "label = \"\";\n"; - if (!onlySimpleRegions || R->isSimple()) { + if (!onlySimpleRegions || R.isSimple()) { O.indent(2 * (depth + 1)) << "style = filled;\n"; O.indent(2 * (depth + 1)) << "color = " - << ((R->getDepth() * 2 % 12) + 1) << "\n"; + << ((R.getDepth() * 2 % 12) + 1) << "\n"; } else { O.indent(2 * (depth + 1)) << "style = solid;\n"; O.indent(2 * (depth + 1)) << "color = " - << ((R->getDepth() * 2 % 12) + 2) << "\n"; + << ((R.getDepth() * 2 % 12) + 2) << "\n"; } - for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI) - printRegionCluster(*RI, GW, depth + 1); + for (Region::const_iterator RI = R.begin(), RE = R.end(); RI != RE; ++RI) + printRegionCluster(**RI, GW, depth + 1); - RegionInfo *RI = R->getRegionInfo(); + RegionInfo *RI = R.getRegionInfo(); - for (const auto &BB : R->blocks()) - if (RI->getRegionFor(BB) == R) + for (const auto &BB : R.blocks()) + if (RI->getRegionFor(BB) == &R) O.indent(2 * (depth + 1)) << "Node" << static_cast(RI->getTopLevelRegion()->getBBNode(BB)) << ";\n"; @@ -134,7 +134,7 @@ struct DOTGraphTraits : public DOTGraphTraits { GraphWriter &GW) { raw_ostream &O = GW.getOStream(); O << "\tcolorscheme = \"paired12\"\n"; - printRegionCluster(RI->getTopLevelRegion(), GW, 4); + printRegionCluster(*RI->getTopLevelRegion(), GW, 4); } }; } //end namespace llvm diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 08de621..42a7aa2 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -58,7 +58,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "scalar-evolution" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -89,6 +88,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "scalar-evolution" + STATISTIC(NumArrayLenItCounts, "Number of trip counts computed with array length"); STATISTIC(NumTripCountsComputed, @@ -182,7 +183,7 @@ void SCEV::print(raw_ostream &OS) const { case scUMaxExpr: case scSMaxExpr: { const SCEVNAryExpr *NAry = cast(this); - const char *OpStr = 0; + const char *OpStr = nullptr; switch (NAry->getSCEVType()) { case scAddExpr: OpStr = " + "; break; case scMulExpr: OpStr = " * "; break; @@ -312,7 +313,7 @@ const SCEV *ScalarEvolution::getConstant(ConstantInt *V) { FoldingSetNodeID ID; ID.AddInteger(scConstant); ID.AddPointer(V); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V); UniqueSCEVs.InsertNode(S, IP); @@ -365,7 +366,7 @@ void SCEVUnknown::deleted() { SE->UniqueSCEVs.RemoveNode(this); // Release the value. - setValPtr(0); + setValPtr(nullptr); } void SCEVUnknown::allUsesReplacedWith(Value *New) { @@ -829,7 +830,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, ID.AddInteger(scTruncate); ID.AddPointer(Op); ID.AddPointer(Ty); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // Fold if the operand is constant. @@ -919,7 +920,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, ID.AddInteger(scZeroExtend); ID.AddPointer(Op); ID.AddPointer(Ty); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // zext(trunc(x)) --> zext(x) or x or trunc(x) @@ -1072,7 +1073,7 @@ static const SCEV *getOverflowLimitForStep(const SCEV *Step, return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - SE->getSignedRange(Step).getSignedMin()); } - return 0; + return nullptr; } // The recurrence AR has been shown to have no signed wrap. Typically, if we can @@ -1091,19 +1092,18 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, // Check for a simple looking step prior to loop entry. const SCEVAddExpr *SA = dyn_cast(Start); if (!SA) - return 0; + return nullptr; // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV // subtraction is expensive. For this purpose, perform a quick and dirty // difference, by checking for Step in the operand list. SmallVector DiffOps; - for (SCEVAddExpr::op_iterator I = SA->op_begin(), E = SA->op_end(); - I != E; ++I) { - if (*I != Step) - DiffOps.push_back(*I); - } + for (const SCEV *Op : SA->operands()) + if (Op != Step) + DiffOps.push_back(Op); + if (DiffOps.size() == SA->getNumOperands()) - return 0; + return nullptr; // This is a postinc AR. Check for overflow on the preinc recurrence using the // same three conditions that getSignExtendedExpr checks. @@ -1139,7 +1139,7 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR, SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) { return PreStart; } - return 0; + return nullptr; } // Get the normalized sign-extended expression for this AddRec's Start. @@ -1181,7 +1181,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, ID.AddInteger(scSignExtend); ID.AddPointer(Op); ID.AddPointer(Ty); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; // If the input value is provably positive, build a zext instead. @@ -1201,6 +1201,23 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, return getTruncateOrSignExtend(X, Ty); } + // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2 + if (auto SA = dyn_cast(Op)) { + if (SA->getNumOperands() == 2) { + auto SC1 = dyn_cast(SA->getOperand(0)); + auto SMul = dyn_cast(SA->getOperand(1)); + if (SMul && SC1) { + if (auto SC2 = dyn_cast(SMul->getOperand(0))) { + const APInt &C1 = SC1->getValue()->getValue(); + const APInt &C2 = SC2->getValue()->getValue(); + if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && + C2.ugt(C1) && C2.isPowerOf2()) + return getAddExpr(getSignExtendExpr(SC1, Ty), + getSignExtendExpr(SMul, Ty)); + } + } + } + } // If the input value is a chrec scev, and we can prove that the value // did not overflow the old, smaller, value, we can sign extend all of the // operands (often constants). This allows analysis of something like @@ -1292,6 +1309,22 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, L, AR->getNoWrapFlags()); } } + // If Start and Step are constants, check if we can apply this + // transformation: + // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2 + auto SC1 = dyn_cast(Start); + auto SC2 = dyn_cast(Step); + if (SC1 && SC2) { + const APInt &C1 = SC1->getValue()->getValue(); + const APInt &C2 = SC2->getValue()->getValue(); + if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) && + C2.isPowerOf2()) { + Start = getSignExtendExpr(Start, Ty); + const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step, + L, AR->getNoWrapFlags()); + return getAddExpr(Start, getSignExtendExpr(NewAR, Ty)); + } + } } // The cast wasn't folded; create an explicit cast node. @@ -1340,9 +1373,8 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, // Force the cast to be folded into the operands of an addrec. if (const SCEVAddRecExpr *AR = dyn_cast(Op)) { SmallVector Ops; - for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); - I != E; ++I) - Ops.push_back(getAnyExtendExpr(*I, Ty)); + for (const SCEV *Op : AR->operands()) + Ops.push_back(getAnyExtendExpr(Op, Ty)); return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW); } @@ -1811,7 +1843,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, ID.AddInteger(scAddExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - void *IP = 0; + void *IP = nullptr; SCEVAddExpr *S = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { @@ -2105,7 +2137,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, ID.AddInteger(scMulExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - void *IP = 0; + void *IP = nullptr; SCEVMulExpr *S = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { @@ -2230,7 +2262,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, ID.AddInteger(scUDivExpr); ID.AddPointer(LHS); ID.AddPointer(RHS); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), LHS, RHS); @@ -2425,7 +2457,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, for (unsigned i = 0, e = Operands.size(); i != e; ++i) ID.AddPointer(Operands[i]); ID.AddPointer(L); - void *IP = 0; + void *IP = nullptr; SCEVAddRecExpr *S = static_cast(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { @@ -2533,7 +2565,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { ID.AddInteger(scSMaxExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); @@ -2637,7 +2669,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { ID.AddInteger(scUMaxExpr); for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); - void *IP = 0; + void *IP = nullptr; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; const SCEV **O = SCEVAllocator.Allocate(Ops.size()); std::uninitialized_copy(Ops.begin(), Ops.end(), O); @@ -2704,7 +2736,7 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) { FoldingSetNodeID ID; ID.AddInteger(scUnknown); ID.AddPointer(V); - void *IP = 0; + void *IP = nullptr; if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) { assert(cast(S)->getValue() == V && "Stale SCEVUnknown in uniquing map!"); @@ -3010,7 +3042,7 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { return getPointerBase(Cast->getOperand()); } else if (const SCEVNAryExpr *NAry = dyn_cast(V)) { - const SCEV *PtrOp = 0; + const SCEV *PtrOp = nullptr; for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); I != E; ++I) { if ((*I)->getType()->isPointerTy()) { @@ -3090,20 +3122,20 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // The loop may have multiple entrances or multiple exits; we can analyze // this phi as an addrec if it has a unique entry value and a unique // backedge value. - Value *BEValueV = 0, *StartValueV = 0; + Value *BEValueV = nullptr, *StartValueV = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = PN->getIncomingValue(i); if (L->contains(PN->getIncomingBlock(i))) { if (!BEValueV) { BEValueV = V; } else if (BEValueV != V) { - BEValueV = 0; + BEValueV = nullptr; break; } } else if (!StartValueV) { StartValueV = V; } else if (StartValueV != V) { - StartValueV = 0; + StartValueV = nullptr; break; } } @@ -3363,7 +3395,7 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - ComputeMaskedBits(U->getValue(), Zeros, Ones); + computeKnownBits(U->getValue(), Zeros, Ones); return Zeros.countTrailingOnes(); } @@ -3502,7 +3534,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast(S)) { // For a SCEVUnknown, ask ValueTracking. APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); - ComputeMaskedBits(U->getValue(), Zeros, Ones, DL); + computeKnownBits(U->getValue(), Zeros, Ones, DL); if (Ones == ~Zeros + 1) return setUnsignedRange(U, ConservativeResult); return setUnsignedRange(U, @@ -3755,13 +3787,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // Instcombine's ShrinkDemandedConstant may strip bits out of // constants, obscuring what would otherwise be a low-bits mask. - // Use ComputeMaskedBits to compute what ShrinkDemandedConstant + // Use computeKnownBits to compute what ShrinkDemandedConstant // knew about to reconstruct a low-bits mask value. unsigned LZ = A.countLeadingZeros(); unsigned TZ = A.countTrailingZeros(); unsigned BitWidth = A.getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, DL); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL); APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); @@ -4316,9 +4348,9 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute(); assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info"); - const SCEV *BECount = 0; + const SCEV *BECount = nullptr; for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != 0; ENT = ENT->getNextExit()) { + ENT != nullptr; ENT = ENT->getNextExit()) { assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV"); @@ -4336,7 +4368,7 @@ const SCEV * ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const { for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != 0; ENT = ENT->getNextExit()) { + ENT != nullptr; ENT = ENT->getNextExit()) { if (ENT->ExitingBlock == ExitingBlock) return ENT->ExactNotTaken; @@ -4359,7 +4391,7 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, return false; for (const ExitNotTakenInfo *ENT = &ExitNotTaken; - ENT != 0; ENT = ENT->getNextExit()) { + ENT != nullptr; ENT = ENT->getNextExit()) { if (ENT->ExactNotTaken != SE->getCouldNotCompute() && SE->hasOperand(ENT->ExactNotTaken, S)) { @@ -4398,8 +4430,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( /// clear - Invalidate this result and free the ExitNotTakenInfo array. void ScalarEvolution::BackedgeTakenInfo::clear() { - ExitNotTaken.ExitingBlock = 0; - ExitNotTaken.ExactNotTaken = 0; + ExitNotTaken.ExitingBlock = nullptr; + ExitNotTaken.ExactNotTaken = nullptr; delete[] ExitNotTaken.getNextExit(); } @@ -4410,38 +4442,63 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - // Examine all exits and pick the most conservative values. - const SCEV *MaxBECount = getCouldNotCompute(); + SmallVector, 4> ExitCounts; bool CouldComputeBECount = true; BasicBlock *Latch = L->getLoopLatch(); // may be NULL. - const SCEV *LatchMaxCount = 0; - SmallVector, 4> ExitCounts; + const SCEV *MustExitMaxBECount = nullptr; + const SCEV *MayExitMaxBECount = nullptr; + + // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts + // and compute maxBECount. for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { - ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]); + BasicBlock *ExitBB = ExitingBlocks[i]; + ExitLimit EL = ComputeExitLimit(L, ExitBB); + + // 1. For each exit that can be computed, add an entry to ExitCounts. + // CouldComputeBECount is true only if all exits can be computed. if (EL.Exact == getCouldNotCompute()) // We couldn't compute an exact value for this exit, so // we won't be able to compute an exact value for the loop. CouldComputeBECount = false; else - ExitCounts.push_back(std::make_pair(ExitingBlocks[i], EL.Exact)); - - if (MaxBECount == getCouldNotCompute()) - MaxBECount = EL.Max; - else if (EL.Max != getCouldNotCompute()) { - // We cannot take the "min" MaxBECount, because non-unit stride loops may - // skip some loop tests. Taking the max over the exits is sufficiently - // conservative. TODO: We could do better taking into consideration - // non-latch exits that dominate the latch. - if (EL.MustExit && ExitingBlocks[i] == Latch) - LatchMaxCount = EL.Max; - else - MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max); + ExitCounts.push_back(std::make_pair(ExitBB, EL.Exact)); + + // 2. Derive the loop's MaxBECount from each exit's max number of + // non-exiting iterations. Partition the loop exits into two kinds: + // LoopMustExits and LoopMayExits. + // + // A LoopMustExit meets two requirements: + // + // (a) Its ExitLimit.MustExit flag must be set which indicates that the exit + // test condition cannot be skipped (the tested variable has unit stride or + // the test is less-than or greater-than, rather than a strict inequality). + // + // (b) It must dominate the loop latch, hence must be tested on every loop + // iteration. + // + // If any computable LoopMustExit is found, then MaxBECount is the minimum + // EL.Max of computable LoopMustExits. Otherwise, MaxBECount is + // conservatively the maximum EL.Max, where CouldNotCompute is considered + // greater than any computable EL.Max. + if (EL.MustExit && EL.Max != getCouldNotCompute() && Latch && + DT->dominates(ExitBB, Latch)) { + if (!MustExitMaxBECount) + MustExitMaxBECount = EL.Max; + else { + MustExitMaxBECount = + getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max); + } + } else if (MayExitMaxBECount != getCouldNotCompute()) { + if (!MayExitMaxBECount || EL.Max == getCouldNotCompute()) + MayExitMaxBECount = EL.Max; + else { + MayExitMaxBECount = + getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max); + } } } - // Be more precise in the easy case of a loop latch that must exit. - if (LatchMaxCount) { - MaxBECount = getUMinFromMismatchedTypes(MaxBECount, LatchMaxCount); - } + const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount : + (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute()); return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount); } @@ -4454,7 +4511,7 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { // exit at this block and remember the exit block and whether all other targets // lead to the loop header. bool MustExecuteLoopHeader = true; - BasicBlock *Exit = 0; + BasicBlock *Exit = nullptr; for (succ_iterator SI = succ_begin(ExitingBlock), SE = succ_end(ExitingBlock); SI != SE; ++SI) if (!L->contains(*SI)) { @@ -4800,7 +4857,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( return getCouldNotCompute(); // Okay, we allow one non-constant index into the GEP instruction. - Value *VarIdx = 0; + Value *VarIdx = nullptr; std::vector Indexes; unsigned VarIdxNum = 0; for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) @@ -4810,7 +4867,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's. VarIdx = GEP->getOperand(i); VarIdxNum = i-2; - Indexes.push_back(0); + Indexes.push_back(nullptr); } // Loop-invariant loads may be a byproduct of loop optimization. Skip them. @@ -4841,7 +4898,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(), Indexes); - if (Result == 0) break; // Cannot compute! + if (!Result) break; // Cannot compute! // Evaluate the condition for this iteration. Result = ConstantExpr::getICmp(predicate, Result, RHS); @@ -4902,14 +4959,14 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, // Otherwise, we can evaluate this instruction if all of its operands are // constant or derived from a PHI node themselves. - PHINode *PHI = 0; + PHINode *PHI = nullptr; for (Instruction::op_iterator OpI = UseInst->op_begin(), OpE = UseInst->op_end(); OpI != OpE; ++OpI) { if (isa(*OpI)) continue; Instruction *OpInst = dyn_cast(*OpI); - if (!OpInst || !canConstantEvolve(OpInst, L)) return 0; + if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr; PHINode *P = dyn_cast(OpInst); if (!P) @@ -4923,8 +4980,10 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap); PHIMap[OpInst] = P; } - if (P == 0) return 0; // Not evolving from PHI - if (PHI && PHI != P) return 0; // Evolving from multiple different PHIs. + if (!P) + return nullptr; // Not evolving from PHI + if (PHI && PHI != P) + return nullptr; // Evolving from multiple different PHIs. PHI = P; } // This is a expression evolving from a constant PHI! @@ -4938,7 +4997,7 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, /// constraints, return null. static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { Instruction *I = dyn_cast(V); - if (I == 0 || !canConstantEvolve(I, L)) return 0; + if (!I || !canConstantEvolve(I, L)) return nullptr; if (PHINode *PN = dyn_cast(I)) { return PN; @@ -4960,18 +5019,18 @@ static Constant *EvaluateExpression(Value *V, const Loop *L, // Convenient constant check, but redundant for recursive calls. if (Constant *C = dyn_cast(V)) return C; Instruction *I = dyn_cast(V); - if (!I) return 0; + if (!I) return nullptr; if (Constant *C = Vals.lookup(I)) return C; // An instruction inside the loop depends on a value outside the loop that we // weren't given a mapping for, or a value such as a call inside the loop. - if (!canConstantEvolve(I, L)) return 0; + if (!canConstantEvolve(I, L)) return nullptr; // An unmapped PHI can be due to a branch or another loop inside this loop, // or due to this not being the initial iteration through a loop where we // couldn't compute the evolution of this particular PHI last time. - if (isa(I)) return 0; + if (isa(I)) return nullptr; std::vector Operands(I->getNumOperands()); @@ -4979,12 +5038,12 @@ static Constant *EvaluateExpression(Value *V, const Loop *L, Instruction *Operand = dyn_cast(I->getOperand(i)); if (!Operand) { Operands[i] = dyn_cast(I->getOperand(i)); - if (!Operands[i]) return 0; + if (!Operands[i]) return nullptr; continue; } Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI); Vals[Operand] = C; - if (!C) return 0; + if (!C) return nullptr; Operands[i] = C; } @@ -5013,7 +5072,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, return I->second; if (BEs.ugt(MaxBruteForceIterations)) - return ConstantEvolutionLoopExitValue[PN] = 0; // Not going to evaluate it. + return ConstantEvolutionLoopExitValue[PN] = nullptr; // Not going to evaluate it. Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; @@ -5025,22 +5084,22 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - PHINode *PHI = 0; + PHINode *PHI = nullptr; for (BasicBlock::iterator I = Header->begin(); (PHI = dyn_cast(I)); ++I) { Constant *StartCST = dyn_cast(PHI->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) continue; + if (!StartCST) continue; CurrentIterVals[PHI] = StartCST; } if (!CurrentIterVals.count(PN)) - return RetVal = 0; + return RetVal = nullptr; Value *BEValue = PN->getIncomingValue(SecondIsBackedge); // Execute the loop symbolically to determine the exit value. if (BEs.getActiveBits() >= 32) - return RetVal = 0; // More than 2^32-1 iterations?? Not doing it! + return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it! unsigned NumIterations = BEs.getZExtValue(); // must be in range unsigned IterationNum = 0; @@ -5053,8 +5112,8 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, DenseMap NextIterVals; Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); - if (NextPHI == 0) - return 0; // Couldn't evaluate! + if (!NextPHI) + return nullptr; // Couldn't evaluate! NextIterVals[PN] = NextPHI; bool StoppedEvolving = NextPHI == CurrentIterVals[PN]; @@ -5101,7 +5160,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen) { PHINode *PN = getConstantEvolvingPHI(Cond, L); - if (PN == 0) return getCouldNotCompute(); + if (!PN) return getCouldNotCompute(); // If the loop is canonicalized, the PHI will have exactly two entries. // That's the only form we support here. @@ -5114,12 +5173,12 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // One entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - PHINode *PHI = 0; + PHINode *PHI = nullptr; for (BasicBlock::iterator I = Header->begin(); (PHI = dyn_cast(I)); ++I) { Constant *StartCST = dyn_cast(PHI->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) continue; + if (!StartCST) continue; CurrentIterVals[PHI] = StartCST; } if (!CurrentIterVals.count(PN)) @@ -5189,7 +5248,7 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { if (Values[u].first == L) return Values[u].second ? Values[u].second : V; } - Values.push_back(std::make_pair(L, static_cast(0))); + Values.push_back(std::make_pair(L, static_cast(nullptr))); // Otherwise compute it. const SCEV *C = computeSCEVAtScope(V, L); SmallVector, 2> &Values2 = ValuesAtScopes[V]; @@ -5243,7 +5302,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { } for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) { Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); - if (!C2) return 0; + if (!C2) return nullptr; // First pointer! if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) { @@ -5258,7 +5317,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { // Don't bother trying to sum two pointers. We probably can't // statically compute a load that results from it anyway. if (C2->getType()->isPointerTy()) - return 0; + return nullptr; if (PointerType *PTy = dyn_cast(C->getType())) { if (PTy->getElementType()->isStructTy()) @@ -5276,10 +5335,10 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { const SCEVMulExpr *SM = cast(V); if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) { // Don't bother with pointers at all. - if (C->getType()->isPointerTy()) return 0; + if (C->getType()->isPointerTy()) return nullptr; for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) { Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i)); - if (!C2 || C2->getType()->isPointerTy()) return 0; + if (!C2 || C2->getType()->isPointerTy()) return nullptr; C = ConstantExpr::getMul(C, C2); } return C; @@ -5298,7 +5357,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { case scUMaxExpr: break; // TODO: smax, umax. } - return 0; + return nullptr; } const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { @@ -5365,7 +5424,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // Check to see if getSCEVAtScope actually made an improvement. if (MadeImprovement) { - Constant *C = 0; + Constant *C = nullptr; if (const CmpInst *CI = dyn_cast(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], Operands[1], DL, @@ -5697,7 +5756,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) { // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. // We have not yet seen any such cases. const SCEVConstant *StepC = dyn_cast(Step); - if (StepC == 0 || StepC->getValue()->equalsInt(0)) + if (!StepC || StepC->getValue()->equalsInt(0)) return getCouldNotCompute(); // For positive steps (counting up until unsigned overflow): @@ -6136,18 +6195,30 @@ bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, // If LHS or RHS is an addrec, check to see if the condition is true in // every iteration of the loop. - if (const SCEVAddRecExpr *AR = dyn_cast(LHS)) - if (isLoopEntryGuardedByCond( - AR->getLoop(), Pred, AR->getStart(), RHS) && - isLoopBackedgeGuardedByCond( - AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS)) - return true; - if (const SCEVAddRecExpr *AR = dyn_cast(RHS)) - if (isLoopEntryGuardedByCond( - AR->getLoop(), Pred, LHS, AR->getStart()) && - isLoopBackedgeGuardedByCond( - AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this))) - return true; + // If LHS and RHS are both addrec, both conditions must be true in + // every iteration of the loop. + const SCEVAddRecExpr *LAR = dyn_cast(LHS); + const SCEVAddRecExpr *RAR = dyn_cast(RHS); + bool LeftGuarded = false; + bool RightGuarded = false; + if (LAR) { + const Loop *L = LAR->getLoop(); + if (isLoopEntryGuardedByCond(L, Pred, LAR->getStart(), RHS) && + isLoopBackedgeGuardedByCond(L, Pred, LAR->getPostIncExpr(*this), RHS)) { + if (!RAR) return true; + LeftGuarded = true; + } + } + if (RAR) { + const Loop *L = RAR->getLoop(); + if (isLoopEntryGuardedByCond(L, Pred, LHS, RAR->getStart()) && + isLoopBackedgeGuardedByCond(L, Pred, LHS, RAR->getPostIncExpr(*this))) { + if (!LAR) return true; + RightGuarded = true; + } + } + if (LeftGuarded && RightGuarded) + return true; // Otherwise see what can be done with known constant ranges. return isKnownPredicateWithRanges(Pred, LHS, RHS); @@ -6814,6 +6885,105 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, return SE.getCouldNotCompute(); } +namespace { +struct FindUndefs { + bool Found; + FindUndefs() : Found(false) {} + + bool follow(const SCEV *S) { + if (const SCEVUnknown *C = dyn_cast(S)) { + if (isa(C->getValue())) + Found = true; + } else if (const SCEVConstant *C = dyn_cast(S)) { + if (isa(C->getValue())) + Found = true; + } + + // Keep looking if we haven't found it yet. + return !Found; + } + bool isDone() const { + // Stop recursion if we have found an undef. + return Found; + } +}; +} + +// Return true when S contains at least an undef value. +static inline bool +containsUndefs(const SCEV *S) { + FindUndefs F; + SCEVTraversal ST(F); + ST.visitAll(S); + + return F.Found; +} + +namespace { +// Collect all steps of SCEV expressions. +struct SCEVCollectStrides { + ScalarEvolution &SE; + SmallVectorImpl &Strides; + + SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl &S) + : SE(SE), Strides(S) {} + + bool follow(const SCEV *S) { + if (const SCEVAddRecExpr *AR = dyn_cast(S)) + Strides.push_back(AR->getStepRecurrence(SE)); + return true; + } + bool isDone() const { return false; } +}; + +// Collect all SCEVUnknown and SCEVMulExpr expressions. +struct SCEVCollectTerms { + SmallVectorImpl &Terms; + + SCEVCollectTerms(SmallVectorImpl &T) + : Terms(T) {} + + bool follow(const SCEV *S) { + if (isa(S) || isa(S)) { + if (!containsUndefs(S)) + Terms.push_back(S); + + // Stop recursion: once we collected a term, do not walk its operands. + return false; + } + + // Keep looking. + return true; + } + bool isDone() const { return false; } +}; +} + +/// Find parametric terms in this SCEVAddRecExpr. +void SCEVAddRecExpr::collectParametricTerms( + ScalarEvolution &SE, SmallVectorImpl &Terms) const { + SmallVector Strides; + SCEVCollectStrides StrideCollector(SE, Strides); + visitAll(this, StrideCollector); + + DEBUG({ + dbgs() << "Strides:\n"; + for (const SCEV *S : Strides) + dbgs() << *S << "\n"; + }); + + for (const SCEV *S : Strides) { + SCEVCollectTerms TermCollector(Terms); + visitAll(S, TermCollector); + } + + DEBUG({ + dbgs() << "Terms:\n"; + for (const SCEV *T : Terms) + dbgs() << *T << "\n"; + }); +} + static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) { APInt A = C1->getValue()->getValue(); APInt B = C2->getValue()->getValue(); @@ -6843,351 +7013,479 @@ static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) { } namespace { -struct SCEVGCD : public SCEVVisitor { -public: - // Pattern match Step into Start. When Step is a multiply expression, find - // the largest subexpression of Step that appears in Start. When Start is an - // add expression, try to match Step in the subexpressions of Start, non - // matching subexpressions are returned under Remainder. - static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start, - const SCEV *Step, const SCEV **Remainder) { - assert(Remainder && "Remainder should not be NULL"); - SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0)); - const SCEV *Res = R.visit(Start); - *Remainder = R.Remainder; - return Res; - } +struct FindSCEVSize { + int Size; + FindSCEVSize() : Size(0) {} - SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R) - : SE(S), GCD(G), Remainder(R) { - Zero = SE.getConstant(GCD->getType(), 0); - One = SE.getConstant(GCD->getType(), 1); + bool follow(const SCEV *S) { + ++Size; + // Keep looking at all operands of S. + return true; } + bool isDone() const { + return false; + } +}; +} - const SCEV *visitConstant(const SCEVConstant *Constant) { - if (GCD == Constant || Constant == Zero) - return GCD; +// Returns the size of the SCEV S. +static inline int sizeOfSCEV(const SCEV *S) { + FindSCEVSize F; + SCEVTraversal ST(F); + ST.visitAll(S); + return F.Size; +} - if (const SCEVConstant *CGCD = dyn_cast(GCD)) { - const SCEV *Res = SE.getConstant(gcd(Constant, CGCD)); - if (Res != One) - return Res; +namespace { - Remainder = SE.getConstant(srem(Constant, CGCD)); - Constant = cast(SE.getMinusSCEV(Constant, Remainder)); - Res = SE.getConstant(gcd(Constant, CGCD)); - return Res; +struct SCEVDivision : public SCEVVisitor { +public: + // Computes the Quotient and Remainder of the division of Numerator by + // Denominator. + static void divide(ScalarEvolution &SE, const SCEV *Numerator, + const SCEV *Denominator, const SCEV **Quotient, + const SCEV **Remainder) { + assert(Numerator && Denominator && "Uninitialized SCEV"); + + SCEVDivision D(SE, Numerator, Denominator); + + // Check for the trivial case here to avoid having to check for it in the + // rest of the code. + if (Numerator == Denominator) { + *Quotient = D.One; + *Remainder = D.Zero; + return; } - // When GCD is not a constant, it could be that the GCD is an Add, Mul, - // AddRec, etc., in which case we want to find out how many times the - // Constant divides the GCD: we then return that as the new GCD. - const SCEV *Rem = Zero; - const SCEV *Res = findGCD(SE, GCD, Constant, &Rem); + if (Numerator->isZero()) { + *Quotient = D.Zero; + *Remainder = D.Zero; + return; + } - if (Res == One || Rem != Zero) { - Remainder = Constant; - return One; + // Split the Denominator when it is a product. + if (const SCEVMulExpr *T = dyn_cast(Denominator)) { + const SCEV *Q, *R; + *Quotient = Numerator; + for (const SCEV *Op : T->operands()) { + divide(SE, *Quotient, Op, &Q, &R); + *Quotient = Q; + + // Bail out when the Numerator is not divisible by one of the terms of + // the Denominator. + if (!R->isZero()) { + *Quotient = D.Zero; + *Remainder = Numerator; + return; + } + } + *Remainder = D.Zero; + return; } - assert(isa(Res) && "Res should be a constant"); - Remainder = SE.getConstant(srem(Constant, cast(Res))); - return Res; + D.visit(Numerator); + *Quotient = D.Quotient; + *Remainder = D.Remainder; + } + + SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, const SCEV *Denominator) + : SE(S), Denominator(Denominator) { + Zero = SE.getConstant(Denominator->getType(), 0); + One = SE.getConstant(Denominator->getType(), 1); + + // By default, we don't know how to divide Expr by Denominator. + // Providing the default here simplifies the rest of the code. + Quotient = Zero; + Remainder = Numerator; + } + + // Except in the trivial case described above, we do not know how to divide + // Expr by Denominator for the following functions with empty implementation. + void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {} + void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {} + void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {} + void visitUDivExpr(const SCEVUDivExpr *Numerator) {} + void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {} + void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {} + void visitUnknown(const SCEVUnknown *Numerator) {} + void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} + + void visitConstant(const SCEVConstant *Numerator) { + if (const SCEVConstant *D = dyn_cast(Denominator)) { + Quotient = SE.getConstant(sdiv(Numerator, D)); + Remainder = SE.getConstant(srem(Numerator, D)); + return; + } } - const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; + void visitAddRecExpr(const SCEVAddRecExpr *Numerator) { + const SCEV *StartQ, *StartR, *StepQ, *StepR; + assert(Numerator->isAffine() && "Numerator should be affine"); + divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR); + divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR); + Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(), + Numerator->getNoWrapFlags()); + Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), + Numerator->getNoWrapFlags()); } - const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; - } + void visitAddExpr(const SCEVAddExpr *Numerator) { + SmallVector Qs, Rs; + Type *Ty = Denominator->getType(); - const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; - } + for (const SCEV *Op : Numerator->operands()) { + const SCEV *Q, *R; + divide(SE, Op, Denominator, &Q, &R); - const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { - if (GCD == Expr) - return GCD; + // Bail out if types do not match. + if (Ty != Q->getType() || Ty != R->getType()) { + Quotient = Zero; + Remainder = Numerator; + return; + } - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { - const SCEV *Rem = Zero; - const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem); + Qs.push_back(Q); + Rs.push_back(R); + } - // FIXME: There may be ambiguous situations: for instance, - // GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m). - // The order in which the AddExpr is traversed computes a different GCD - // and Remainder. - if (Res != One) - GCD = Res; - if (Rem != Zero) - Remainder = SE.getAddExpr(Remainder, Rem); + if (Qs.size() == 1) { + Quotient = Qs[0]; + Remainder = Rs[0]; + return; } - return GCD; + Quotient = SE.getAddExpr(Qs); + Remainder = SE.getAddExpr(Rs); } - const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { - if (GCD == Expr) - return GCD; + void visitMulExpr(const SCEVMulExpr *Numerator) { + SmallVector Qs; + Type *Ty = Denominator->getType(); - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { - if (Expr->getOperand(i) == GCD) - return GCD; - } + bool FoundDenominatorTerm = false; + for (const SCEV *Op : Numerator->operands()) { + // Bail out if types do not match. + if (Ty != Op->getType()) { + Quotient = Zero; + Remainder = Numerator; + return; + } + + if (FoundDenominatorTerm) { + Qs.push_back(Op); + continue; + } - // If we have not returned yet, it means that GCD is not part of Expr. - const SCEV *PartialGCD = One; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { - const SCEV *Rem = Zero; - const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem); - if (Rem != Zero) - // GCD does not divide Expr->getOperand(i). + // Check whether Denominator divides one of the product operands. + const SCEV *Q, *R; + divide(SE, Op, Denominator, &Q, &R); + if (!R->isZero()) { + Qs.push_back(Op); continue; + } - if (Res == GCD) - return GCD; - PartialGCD = SE.getMulExpr(PartialGCD, Res); - if (PartialGCD == GCD) - return GCD; - } - - if (PartialGCD != One) - return PartialGCD; - - Remainder = Expr; - const SCEVMulExpr *Mul = dyn_cast(GCD); - if (!Mul) - return PartialGCD; - - // When the GCD is a multiply expression, try to decompose it: - // this occurs when Step does not divide the Start expression - // as in: {(-4 + (3 * %m)),+,(2 * %m)} - for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) { - const SCEV *Rem = Zero; - const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem); - if (Rem == Zero) { - Remainder = Rem; - return Res; + // Bail out if types do not match. + if (Ty != Q->getType()) { + Quotient = Zero; + Remainder = Numerator; + return; } + + FoundDenominatorTerm = true; + Qs.push_back(Q); } - return PartialGCD; - } + if (FoundDenominatorTerm) { + Remainder = Zero; + if (Qs.size() == 1) + Quotient = Qs[0]; + else + Quotient = SE.getMulExpr(Qs); + return; + } - const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; + if (!isa(Denominator)) { + Quotient = Zero; + Remainder = Numerator; + return; + } + + // The Remainder is obtained by replacing Denominator by 0 in Numerator. + ValueToValueMap RewriteMap; + RewriteMap[cast(Denominator)->getValue()] = + cast(Zero)->getValue(); + Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); + + // Quotient is (Numerator - Remainder) divided by Denominator. + const SCEV *Q, *R; + const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder); + if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) { + // This SCEV does not seem to simplify: fail the division here. + Quotient = Zero; + Remainder = Numerator; + return; + } + divide(SE, Diff, Denominator, &Q, &R); + assert(R == Zero && + "(Numerator - Remainder) should evenly divide Denominator"); + Quotient = Q; } - const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { - if (GCD == Expr) - return GCD; +private: + ScalarEvolution &SE; + const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One; +}; +} - if (!Expr->isAffine()) { - Remainder = Expr; - return GCD; - } +static bool findArrayDimensionsRec(ScalarEvolution &SE, + SmallVectorImpl &Terms, + SmallVectorImpl &Sizes) { + int Last = Terms.size() - 1; + const SCEV *Step = Terms[Last]; - const SCEV *Rem = Zero; - const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem); - if (Rem != Zero) - Remainder = SE.getAddExpr(Remainder, Rem); + // End of recursion. + if (Last == 0) { + if (const SCEVMulExpr *M = dyn_cast(Step)) { + SmallVector Qs; + for (const SCEV *Op : M->operands()) + if (!isa(Op)) + Qs.push_back(Op); - Rem = Zero; - Res = findGCD(SE, Expr->getOperand(1), Res, &Rem); - if (Rem != Zero) { - Remainder = Expr; - return GCD; + Step = SE.getMulExpr(Qs); } - return Res; + Sizes.push_back(Step); + return true; } - const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; - } + for (const SCEV *&Term : Terms) { + // Normalize the terms before the next call to findArrayDimensionsRec. + const SCEV *Q, *R; + SCEVDivision::divide(SE, Term, Step, &Q, &R); - const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; - } + // Bail out when GCD does not evenly divide one of the terms. + if (!R->isZero()) + return false; - const SCEV *visitUnknown(const SCEVUnknown *Expr) { - if (GCD != Expr) - Remainder = Expr; - return GCD; + Term = Q; } - const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { - return One; - } + // Remove all SCEVConstants. + Terms.erase(std::remove_if(Terms.begin(), Terms.end(), [](const SCEV *E) { + return isa(E); + }), + Terms.end()); -private: - ScalarEvolution &SE; - const SCEV *GCD, *Remainder, *Zero, *One; -}; + if (Terms.size() > 0) + if (!findArrayDimensionsRec(SE, Terms, Sizes)) + return false; -struct SCEVDivision : public SCEVVisitor { -public: - // Remove from Start all multiples of Step. - static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start, - const SCEV *Step) { - SCEVDivision D(SE, Step); - const SCEV *Rem = D.Zero; - (void)Rem; - // The division is guaranteed to succeed: Step should divide Start with no - // remainder. - assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero && - "Step should divide Start with no remainder."); - return D.visit(Start); - } + Sizes.push_back(Step); + return true; +} + +namespace { +struct FindParameter { + bool FoundParameter; + FindParameter() : FoundParameter(false) {} - SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) { - Zero = SE.getConstant(GCD->getType(), 0); - One = SE.getConstant(GCD->getType(), 1); + bool follow(const SCEV *S) { + if (isa(S)) { + FoundParameter = true; + // Stop recursion: we found a parameter. + return false; + } + // Keep looking. + return true; } + bool isDone() const { + // Stop recursion if we have found a parameter. + return FoundParameter; + } +}; +} - const SCEV *visitConstant(const SCEVConstant *Constant) { - if (GCD == Constant) - return One; +// Returns true when S contains at least a SCEVUnknown parameter. +static inline bool +containsParameters(const SCEV *S) { + FindParameter F; + SCEVTraversal ST(F); + ST.visitAll(S); - if (const SCEVConstant *CGCD = dyn_cast(GCD)) - return SE.getConstant(sdiv(Constant, CGCD)); - return Constant; - } + return F.FoundParameter; +} - const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; - } +// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter. +static inline bool +containsParameters(SmallVectorImpl &Terms) { + for (const SCEV *T : Terms) + if (containsParameters(T)) + return true; + return false; +} - const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; - } +// Return the number of product terms in S. +static inline int numberOfTerms(const SCEV *S) { + if (const SCEVMulExpr *Expr = dyn_cast(S)) + return Expr->getNumOperands(); + return 1; +} - const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; - } +static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) { + if (isa(T)) + return nullptr; - const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { - if (GCD == Expr) - return One; + if (isa(T)) + return T; - SmallVector Operands; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - Operands.push_back(divide(SE, Expr->getOperand(i), GCD)); + if (const SCEVMulExpr *M = dyn_cast(T)) { + SmallVector Factors; + for (const SCEV *Op : M->operands()) + if (!isa(Op)) + Factors.push_back(Op); - if (Operands.size() == 1) - return Operands[0]; - return SE.getAddExpr(Operands); + return SE.getMulExpr(Factors); } - const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { - if (GCD == Expr) - return One; + return T; +} - bool FoundGCDTerm = false; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) - if (Expr->getOperand(i) == GCD) - FoundGCDTerm = true; +/// Return the size of an element read or written by Inst. +const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) { + Type *Ty; + if (StoreInst *Store = dyn_cast(Inst)) + Ty = Store->getValueOperand()->getType(); + else if (LoadInst *Load = dyn_cast(Inst)) + Ty = Load->getPointerOperand()->getType(); + else + return nullptr; - SmallVector Operands; - if (FoundGCDTerm) { - FoundGCDTerm = false; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { - if (FoundGCDTerm) - Operands.push_back(Expr->getOperand(i)); - else if (Expr->getOperand(i) == GCD) - FoundGCDTerm = true; - else - Operands.push_back(Expr->getOperand(i)); - } - } else { - const SCEV *PartialGCD = One; - for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { - if (PartialGCD == GCD) { - Operands.push_back(Expr->getOperand(i)); - continue; - } + Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty)); + return getSizeOfExpr(ETy, Ty); +} - const SCEV *Rem = Zero; - const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem); - if (Rem == Zero) { - PartialGCD = SE.getMulExpr(PartialGCD, Res); - Operands.push_back(divide(SE, Expr->getOperand(i), GCD)); - } else { - Operands.push_back(Expr->getOperand(i)); - } - } - } +/// Second step of delinearization: compute the array dimensions Sizes from the +/// set of Terms extracted from the memory access function of this SCEVAddRec. +void ScalarEvolution::findArrayDimensions(SmallVectorImpl &Terms, + SmallVectorImpl &Sizes, + const SCEV *ElementSize) const { - if (Operands.size() == 1) - return Operands[0]; - return SE.getMulExpr(Operands); - } + if (Terms.size() < 1) + return; - const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; - } + // Early return when Terms do not contain parameters: we do not delinearize + // non parametric SCEVs. + if (!containsParameters(Terms)) + return; - const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { - if (GCD == Expr) - return One; + DEBUG({ + dbgs() << "Terms:\n"; + for (const SCEV *T : Terms) + dbgs() << *T << "\n"; + }); - assert(Expr->isAffine() && "Expr should be affine"); + // Remove duplicates. + std::sort(Terms.begin(), Terms.end()); + Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end()); - const SCEV *Start = divide(SE, Expr->getStart(), GCD); - const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD); + // Put larger terms first. + std::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) { + return numberOfTerms(LHS) > numberOfTerms(RHS); + }); - return SE.getAddRecExpr(Start, Step, Expr->getLoop(), - Expr->getNoWrapFlags()); - } + ScalarEvolution &SE = *const_cast(this); - const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; + // Divide all terms by the element size. + for (const SCEV *&Term : Terms) { + const SCEV *Q, *R; + SCEVDivision::divide(SE, Term, ElementSize, &Q, &R); + Term = Q; } - const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { - if (GCD == Expr) - return One; - return Expr; - } + SmallVector NewTerms; - const SCEV *visitUnknown(const SCEVUnknown *Expr) { - if (GCD == Expr) - return One; - return Expr; + // Remove constant factors. + for (const SCEV *T : Terms) + if (const SCEV *NewT = removeConstantFactors(SE, T)) + NewTerms.push_back(NewT); + + DEBUG({ + dbgs() << "Terms after sorting:\n"; + for (const SCEV *T : NewTerms) + dbgs() << *T << "\n"; + }); + + if (NewTerms.empty() || + !findArrayDimensionsRec(SE, NewTerms, Sizes)) { + Sizes.clear(); + return; } - const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { - return Expr; + // The last element to be pushed into Sizes is the size of an element. + Sizes.push_back(ElementSize); + + DEBUG({ + dbgs() << "Sizes:\n"; + for (const SCEV *S : Sizes) + dbgs() << *S << "\n"; + }); +} + +/// Third step of delinearization: compute the access functions for the +/// Subscripts based on the dimensions in Sizes. +void SCEVAddRecExpr::computeAccessFunctions( + ScalarEvolution &SE, SmallVectorImpl &Subscripts, + SmallVectorImpl &Sizes) const { + + // Early exit in case this SCEV is not an affine multivariate function. + if (Sizes.empty() || !this->isAffine()) + return; + + const SCEV *Res = this; + int Last = Sizes.size() - 1; + for (int i = Last; i >= 0; i--) { + const SCEV *Q, *R; + SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R); + + DEBUG({ + dbgs() << "Res: " << *Res << "\n"; + dbgs() << "Sizes[i]: " << *Sizes[i] << "\n"; + dbgs() << "Res divided by Sizes[i]:\n"; + dbgs() << "Quotient: " << *Q << "\n"; + dbgs() << "Remainder: " << *R << "\n"; + }); + + Res = Q; + + // Do not record the last subscript corresponding to the size of elements in + // the array. + if (i == Last) { + + // Bail out if the remainder is too complex. + if (isa(R)) { + Subscripts.clear(); + Sizes.clear(); + return; + } + + continue; + } + + // Record the access function for the current subscript. + Subscripts.push_back(R); } -private: - ScalarEvolution &SE; - const SCEV *GCD, *Zero, *One; -}; + // Also push in last position the remainder of the last division: it will be + // the access function of the innermost dimension. + Subscripts.push_back(Res); + + std::reverse(Subscripts.begin(), Subscripts.end()); + + DEBUG({ + dbgs() << "Subscripts:\n"; + for (const SCEV *S : Subscripts) + dbgs() << *S << "\n"; + }); } /// Splits the SCEV into two vectors of SCEVs representing the subscripts and @@ -7239,84 +7537,40 @@ private: /// asking for the SCEV of the memory access with respect to all enclosing /// loops, calling SCEV->delinearize on that and printing the results. -const SCEV * -SCEVAddRecExpr::delinearize(ScalarEvolution &SE, - SmallVectorImpl &Subscripts, - SmallVectorImpl &Sizes) const { - // Early exit in case this SCEV is not an affine multivariate function. - if (!this->isAffine()) - return this; - - const SCEV *Start = this->getStart(); - const SCEV *Step = this->getStepRecurrence(SE); - - // Build the SCEV representation of the canonical induction variable in the - // loop of this SCEV. - const SCEV *Zero = SE.getConstant(this->getType(), 0); - const SCEV *One = SE.getConstant(this->getType(), 1); - const SCEV *IV = - SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags()); - - DEBUG(dbgs() << "(delinearize: " << *this << "\n"); - - // When the stride of this SCEV is 1, do not compute the GCD: the size of this - // subscript is 1, and this same SCEV for the access function. - const SCEV *Remainder = Zero; - const SCEV *GCD = One; - - // Find the GCD and Remainder of the Start and Step coefficients of this SCEV. - if (Step != One && !Step->isAllOnesValue()) - GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder); - - DEBUG(dbgs() << "GCD: " << *GCD << "\n"); - DEBUG(dbgs() << "Remainder: " << *Remainder << "\n"); - - const SCEV *Quotient = Start; - if (GCD != One && !GCD->isAllOnesValue()) - // As findGCD computed Remainder, GCD divides "Start - Remainder." The - // Quotient is then this SCEV without Remainder, scaled down by the GCD. The - // Quotient is what will be used in the next subscript delinearization. - Quotient = SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD); - - DEBUG(dbgs() << "Quotient: " << *Quotient << "\n"); - - const SCEV *Rem = Quotient; - if (const SCEVAddRecExpr *AR = dyn_cast(Quotient)) - // Recursively call delinearize on the Quotient until there are no more - // multiples that can be recognized. - Rem = AR->delinearize(SE, Subscripts, Sizes); - - // Scale up the canonical induction variable IV by whatever remains from the - // Step after division by the GCD: the GCD is the size of all the sub-array. - if (Step != One && !Step->isAllOnesValue() && GCD != One && - !GCD->isAllOnesValue() && Step != GCD) { - Step = SCEVDivision::divide(SE, Step, GCD); - IV = SE.getMulExpr(IV, Step); - } - // The access function in the current subscript is computed as the canonical - // induction variable IV (potentially scaled up by the step) and offset by - // Rem, the offset of delinearization in the sub-array. - const SCEV *Index = SE.getAddExpr(IV, Rem); - - // Record the access function and the size of the current subscript. - Subscripts.push_back(Index); - Sizes.push_back(GCD); +void SCEVAddRecExpr::delinearize(ScalarEvolution &SE, + SmallVectorImpl &Subscripts, + SmallVectorImpl &Sizes, + const SCEV *ElementSize) const { + // First step: collect parametric terms. + SmallVector Terms; + collectParametricTerms(SE, Terms); -#ifndef NDEBUG - int Size = Sizes.size(); - DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n"); - DEBUG(dbgs() << "ArrayDecl[UnknownSize]"); - for (int i = 0; i < Size - 1; i++) - DEBUG(dbgs() << "[" << *Sizes[i] << "]"); - DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n"); - - DEBUG(dbgs() << "ArrayRef"); - for (int i = 0; i < Size; i++) - DEBUG(dbgs() << "[" << *Subscripts[i] << "]"); - DEBUG(dbgs() << "\n)\n"); -#endif + if (Terms.empty()) + return; + + // Second step: find subscript sizes. + SE.findArrayDimensions(Terms, Sizes, ElementSize); + + if (Sizes.empty()) + return; + + // Third step: compute the access functions for each subscript. + computeAccessFunctions(SE, Subscripts, Sizes); + + if (Subscripts.empty()) + return; + + DEBUG({ + dbgs() << "succeeded to delinearize " << *this << "\n"; + dbgs() << "ArrayDecl[UnknownSize]"; + for (const SCEV *S : Sizes) + dbgs() << "[" << *S << "]"; - return Remainder; + dbgs() << "\nArrayRef"; + for (const SCEV *S : Subscripts) + dbgs() << "[" << *S << "]"; + dbgs() << "\n"; + }); } //===----------------------------------------------------------------------===// @@ -7368,7 +7622,8 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) //===----------------------------------------------------------------------===// ScalarEvolution::ScalarEvolution() - : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), FirstUnknown(0) { + : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), + BlockDispositions(64), FirstUnknown(nullptr) { initializeScalarEvolutionPass(*PassRegistry::getPassRegistry()); } @@ -7376,7 +7631,7 @@ bool ScalarEvolution::runOnFunction(Function &F) { this->F = &F; LI = &getAnalysis(); DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis(); DT = &getAnalysis().getDomTree(); return false; @@ -7387,7 +7642,7 @@ void ScalarEvolution::releaseMemory() { // destructors, so that they release their references to their values. for (SCEVUnknown *U = FirstUnknown; U; U = U->Next) U->~SCEVUnknown(); - FirstUnknown = 0; + FirstUnknown = nullptr; ValueExprMap.clear(); diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 7be6aca..6933f74 100644 --- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -34,7 +34,7 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo - ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) { + ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(nullptr) { initializeScalarEvolutionAliasAnalysisPass( *PassRegistry::getPassRegistry()); } @@ -102,7 +102,7 @@ ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) { return U->getValue(); } // No Identified object found. - return 0; + return nullptr; } AliasAnalysis::AliasResult @@ -162,10 +162,10 @@ ScalarEvolutionAliasAnalysis::alias(const Location &LocA, if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr)) if (alias(Location(AO ? AO : LocA.Ptr, AO ? +UnknownSize : LocA.Size, - AO ? 0 : LocA.TBAATag), + AO ? nullptr : LocA.TBAATag), Location(BO ? BO : LocB.Ptr, BO ? +UnknownSize : LocB.Size, - BO ? 0 : LocB.TBAATag)) == NoAlias) + BO ? nullptr : LocB.TBAATag)) == NoAlias) return NoAlias; // Forward the query to the next analysis. diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index fb3d595..b507043 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -44,7 +44,7 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, // not allowed to move it. BasicBlock::iterator BIP = Builder.GetInsertPoint(); - Instruction *Ret = NULL; + Instruction *Ret = nullptr; // Check to see if there is already a cast! for (User *U : V->users()) @@ -627,21 +627,21 @@ static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { // Test whether we've already computed the most relevant loop for this SCEV. std::pair::iterator, bool> Pair = - RelevantLoops.insert(std::make_pair(S, static_cast(0))); + RelevantLoops.insert(std::make_pair(S, nullptr)); if (!Pair.second) return Pair.first->second; if (isa(S)) // A constant has no relevant loops. - return 0; + return nullptr; if (const SCEVUnknown *U = dyn_cast(S)) { if (const Instruction *I = dyn_cast(U->getValue())) return Pair.first->second = SE.LI->getLoopFor(I->getParent()); // A non-instruction has no relevant loops. - return 0; + return nullptr; } if (const SCEVNAryExpr *N = dyn_cast(S)) { - const Loop *L = 0; + const Loop *L = nullptr; if (const SCEVAddRecExpr *AR = dyn_cast(S)) L = AR->getLoop(); for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end(); @@ -716,7 +716,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // Emit instructions to add all the operands. Hoist as much as possible // out of loops, and form meaningful getelementptrs where possible. - Value *Sum = 0; + Value *Sum = nullptr; for (SmallVectorImpl >::iterator I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { const Loop *CurLoop = I->first; @@ -784,7 +784,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { // Emit instructions to mul all the operands. Hoist as much as possible // out of loops. - Value *Prod = 0; + Value *Prod = nullptr; for (SmallVectorImpl >::iterator I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { const SCEV *Op = I->second; @@ -892,18 +892,18 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, Instruction *InsertPos, bool allowScale) { if (IncV == InsertPos) - return NULL; + return nullptr; switch (IncV->getOpcode()) { default: - return NULL; + return nullptr; // Check for a simple Add/Sub or GEP of a loop invariant step. case Instruction::Add: case Instruction::Sub: { Instruction *OInst = dyn_cast(IncV->getOperand(1)); if (!OInst || SE.DT->dominates(OInst, InsertPos)) return dyn_cast(IncV->getOperand(0)); - return NULL; + return nullptr; } case Instruction::BitCast: return dyn_cast(IncV->getOperand(0)); @@ -914,7 +914,7 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, continue; if (Instruction *OInst = dyn_cast(*I)) { if (!SE.DT->dominates(OInst, InsertPos)) - return NULL; + return nullptr; } if (allowScale) { // allow any kind of GEP as long as it can be hoisted. @@ -925,11 +925,11 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, // have 2 operands. i1* is used by the expander to represent an // address-size element. if (IncV->getNumOperands() != 2) - return NULL; + return nullptr; unsigned AS = cast(IncV->getType())->getAddressSpace(); if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) - return NULL; + return nullptr; break; } return dyn_cast(IncV->getOperand(0)); @@ -1077,9 +1077,9 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // Reuse a previously-inserted PHI, if present. BasicBlock *LatchBlock = L->getLoopLatch(); if (LatchBlock) { - PHINode *AddRecPhiMatch = 0; - Instruction *IncV = 0; - TruncTy = 0; + PHINode *AddRecPhiMatch = nullptr; + Instruction *IncV = nullptr; + TruncTy = nullptr; InvertStep = false; // Only try partially matching scevs that need truncation and/or @@ -1120,7 +1120,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // Stop if we have found an exact match SCEV. if (IsMatchingSCEV) { IncV = TempIncV; - TruncTy = 0; + TruncTy = nullptr; InvertStep = false; AddRecPhiMatch = PN; break; @@ -1243,13 +1243,13 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { PostIncLoopSet Loops; Loops.insert(L); Normalized = - cast(TransformForPostIncUse(Normalize, S, 0, 0, - Loops, SE, *SE.DT)); + cast(TransformForPostIncUse(Normalize, S, nullptr, + nullptr, Loops, SE, *SE.DT)); } // Strip off any non-loop-dominating component from the addrec start. const SCEV *Start = Normalized->getStart(); - const SCEV *PostLoopOffset = 0; + const SCEV *PostLoopOffset = nullptr; if (!SE.properlyDominates(Start, L->getHeader())) { PostLoopOffset = Start; Start = SE.getConstant(Normalized->getType(), 0); @@ -1261,7 +1261,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Strip off any non-loop-dominating component from the addrec step. const SCEV *Step = Normalized->getStepRecurrence(SE); - const SCEV *PostLoopScale = 0; + const SCEV *PostLoopScale = nullptr; if (!SE.dominates(Step, L->getHeader())) { PostLoopScale = Step; Step = SE.getConstant(Normalized->getType(), 1); @@ -1276,7 +1276,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { Type *ExpandTy = PostLoopScale ? IntTy : STy; // In some cases, we decide to reuse an existing phi node but need to truncate // it and/or invert the step. - Type *TruncTy = 0; + Type *TruncTy = nullptr; bool InvertStep = false; PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy, TruncTy, InvertStep); @@ -1372,7 +1372,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { const Loop *L = S->getLoop(); // First check for an existing canonical IV in a suitable type. - PHINode *CanonicalIV = 0; + PHINode *CanonicalIV = nullptr; if (PHINode *PN = L->getCanonicalInductionVariable()) if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) CanonicalIV = PN; @@ -1393,7 +1393,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { while (isa(NewInsertPt) || isa(NewInsertPt) || isa(NewInsertPt)) ++NewInsertPt; - V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, + V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, NewInsertPt); return V; } @@ -1666,7 +1666,8 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, // Emit code for it. BuilderType::InsertPointGuard Guard(Builder); - PHINode *V = cast(expandCodeFor(H, 0, L->getHeader()->begin())); + PHINode *V = cast(expandCodeFor(H, nullptr, + L->getHeader()->begin())); return V; } diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index 1e4c0bd..e9db295 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -113,7 +113,7 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { // Transform each operand. for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); I != E; ++I) { - Operands.push_back(TransformSubExpr(*I, LUser, 0)); + Operands.push_back(TransformSubExpr(*I, LUser, nullptr)); } // Conservatively use AnyWrap until/unless we need FlagNW. const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp index 87a4fa4..edd82f5 100644 --- a/lib/Analysis/SparsePropagation.cpp +++ b/lib/Analysis/SparsePropagation.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sparseprop" #include "llvm/Analysis/SparsePropagation.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" @@ -21,6 +20,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "sparseprop" + //===----------------------------------------------------------------------===// // AbstractLatticeFunction Implementation //===----------------------------------------------------------------------===// @@ -147,7 +148,7 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, return; Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this); - if (C == 0 || !isa(C)) { + if (!C || !isa(C)) { // Non-constant values can go either way. Succs[0] = Succs[1] = true; return; @@ -189,7 +190,7 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI, return; Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this); - if (C == 0 || !isa(C)) { + if (!C || !isa(C)) { // All destinations are executable! Succs.assign(TI.getNumSuccessors(), true); return; diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 04d09f1..cdb0b79 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "tti" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" @@ -19,6 +18,8 @@ using namespace llvm; +#define DEBUG_TYPE "tti" + // Setup the analysis group to manage the TargetTransformInfo passes. INITIALIZE_ANALYSIS_GROUP(TargetTransformInfo, "Target Information", NoTTI) char TargetTransformInfo::ID = 0; @@ -234,7 +235,7 @@ namespace { struct NoTTI final : ImmutablePass, TargetTransformInfo { const DataLayout *DL; - NoTTI() : ImmutablePass(ID), DL(0) { + NoTTI() : ImmutablePass(ID), DL(nullptr) { initializeNoTTIPass(*PassRegistry::getPassRegistry()); } @@ -242,9 +243,9 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { // Note that this subclass is special, and must *not* call initializeTTI as // it does not chain. TopTTI = this; - PrevTTI = 0; + PrevTTI = nullptr; DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; } virtual void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -443,7 +444,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { // Otherwise delegate to the fully generic implementations. return getOperationCost(Operator::getOpcode(U), U->getType(), U->getNumOperands() == 1 ? - U->getOperand(0)->getType() : 0); + U->getOperand(0)->getType() : nullptr); } bool hasBranchDivergence() const override { return false; } @@ -567,7 +568,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { } unsigned getShuffleCost(ShuffleKind Kind, Type *Ty, - int Index = 0, Type *SubTp = 0) const override { + int Index = 0, Type *SubTp = nullptr) const override { return 1; } @@ -581,7 +582,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo { } unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy = 0) const override { + Type *CondTy = nullptr) const override { return 1; } diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index 05daf18..f36f6f8 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -144,7 +144,7 @@ namespace { const MDNode *Node; public: - TBAANode() : Node(0) {} + TBAANode() : Node(nullptr) {} explicit TBAANode(const MDNode *N) : Node(N) {} /// getNode - Get the MDNode for this TBAANode. @@ -182,7 +182,6 @@ namespace { const MDNode *Node; public: - TBAAStructTagNode() : Node(0) {} explicit TBAAStructTagNode(const MDNode *N) : Node(N) {} /// Get the MDNode for this TBAAStructTagNode. @@ -218,7 +217,7 @@ namespace { const MDNode *Node; public: - TBAAStructTypeNode() : Node(0) {} + TBAAStructTypeNode() : Node(nullptr) {} explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} /// Get the MDNode for this TBAAStructTypeNode. @@ -340,7 +339,8 @@ static bool isStructPathTBAA(const MDNode *MD) { bool TypeBasedAliasAnalysis::Aliases(const MDNode *A, const MDNode *B) const { - if (isStructPathTBAA(A)) + // Make sure that both MDNodes are struct-path aware. + if (isStructPathTBAA(A) && isStructPathTBAA(B)) return PathAliases(A, B); // Keep track of the root node for A and B. @@ -386,6 +386,10 @@ TypeBasedAliasAnalysis::Aliases(const MDNode *A, bool TypeBasedAliasAnalysis::PathAliases(const MDNode *A, const MDNode *B) const { + // Verify that both input nodes are struct-path aware. + assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware."); + assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware."); + // Keep track of the root node for A and B. TBAAStructTypeNode RootA, RootB; TBAAStructTagNode TagA(A), TagB(B); @@ -555,38 +559,40 @@ bool MDNode::isTBAAVtableAccess() const { MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { if (!A || !B) - return NULL; + return nullptr; if (A == B) return A; // For struct-path aware TBAA, we use the access type of the tag. - bool StructPath = isStructPathTBAA(A); + bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B); if (StructPath) { A = cast_or_null(A->getOperand(1)); - if (!A) return 0; + if (!A) return nullptr; B = cast_or_null(B->getOperand(1)); - if (!B) return 0; + if (!B) return nullptr; } SmallVector PathA; MDNode *T = A; while (T) { PathA.push_back(T); - T = T->getNumOperands() >= 2 ? cast_or_null(T->getOperand(1)) : 0; + T = T->getNumOperands() >= 2 ? cast_or_null(T->getOperand(1)) + : nullptr; } SmallVector PathB; T = B; while (T) { PathB.push_back(T); - T = T->getNumOperands() >= 2 ? cast_or_null(T->getOperand(1)) : 0; + T = T->getNumOperands() >= 2 ? cast_or_null(T->getOperand(1)) + : nullptr; } int IA = PathA.size() - 1; int IB = PathB.size() - 1; - MDNode *Ret = 0; + MDNode *Ret = nullptr; while (IA >= 0 && IB >=0) { if (PathA[IA] == PathB[IB]) Ret = PathA[IA]; @@ -599,7 +605,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { return Ret; if (!Ret) - return 0; + return nullptr; // We need to convert from a type node to a tag node. Type *Int64 = IntegerType::get(A->getContext(), 64); Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) }; diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 72617a0..4f48753 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -44,10 +45,10 @@ static unsigned getBitWidth(Type *Ty, const DataLayout *TD) { return TD ? TD->getPointerTypeSizeInBits(Ty) : 0; } -static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2, - const DataLayout *TD, unsigned Depth) { +static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const DataLayout *TD, unsigned Depth) { if (!Add) { if (ConstantInt *CLHS = dyn_cast(Op0)) { // We know that the top bits of C-X are clear if X contains less bits @@ -58,7 +59,7 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); + llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -79,13 +80,10 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, // result. For an add, this works with either operand. For a subtract, // this only works if the known zeros are in the right operand. APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - llvm::ComputeMaskedBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1); - assert((LHSKnownZero & LHSKnownOne) == 0 && - "Bits known to be one AND zero?"); + llvm::computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1); unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); - llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); // Determine which operand has more trailing zeros, and use that @@ -130,15 +128,13 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, } } -static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW, - APInt &KnownZero, APInt &KnownOne, - APInt &KnownZero2, APInt &KnownOne2, - const DataLayout *TD, unsigned Depth) { +static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const DataLayout *TD, unsigned Depth) { unsigned BitWidth = KnownZero.getBitWidth(); - ComputeMaskedBits(Op1, KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(Op0, KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1); bool isKnownNegative = false; bool isKnownNonNegative = false; @@ -192,7 +188,7 @@ static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW, KnownOne.setBit(BitWidth - 1); } -void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) { +void llvm::computeKnownBitsLoad(const MDNode &Ranges, APInt &KnownZero) { unsigned BitWidth = KnownZero.getBitWidth(); unsigned NumRanges = Ranges.getNumOperands() / 2; assert(NumRanges >= 1); @@ -211,8 +207,9 @@ void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) { KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros); } -/// ComputeMaskedBits - Determine which of the bits are known to be either zero -/// or one and return them in the KnownZero/KnownOne bit sets. + +/// Determine which bits of V are known to be either zero or one and return +/// them in the KnownZero/KnownOne bit sets. /// /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that /// we cannot optimize based on the assumption that it is zero without changing @@ -226,8 +223,8 @@ void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) { /// where V is a vector, known zero, and known one values are the /// same width as the vector element, and the bit is set only if it is true /// for all of the elements in the vector. -void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, - const DataLayout *TD, unsigned Depth) { +void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, + const DataLayout *TD, unsigned Depth) { assert(V && "No Value?"); assert(Depth <= MaxDepth && "Limit Search Depth"); unsigned BitWidth = KnownZero.getBitWidth(); @@ -241,7 +238,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, V->getType()->getScalarSizeInBits() == BitWidth) && KnownZero.getBitWidth() == BitWidth && KnownOne.getBitWidth() == BitWidth && - "V, Mask, KnownOne and KnownZero should have same BitWidth"); + "V, KnownOne and KnownZero should have same BitWidth"); if (ConstantInt *CI = dyn_cast(V)) { // We know all of the bits for a constant! @@ -303,7 +300,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (GA->mayBeOverridden()) { KnownZero.clearAllBits(); KnownOne.clearAllBits(); } else { - ComputeMaskedBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1); } return; } @@ -341,49 +338,43 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, default: break; case Instruction::Load: if (MDNode *MD = cast(I)->getMetadata(LLVMContext::MD_range)) - computeMaskedBitsLoad(*MD, KnownZero); - return; + computeKnownBitsLoad(*MD, KnownZero); + break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; // Output known-0 are known to be clear if zero in either the LHS | RHS. KnownZero |= KnownZero2; - return; + break; } case Instruction::Or: { - ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); // Output known-0 bits are only known if clear in both the LHS & RHS. KnownZero &= KnownZero2; // Output known-1 are known to be set if set in either the LHS | RHS. KnownOne |= KnownOne2; - return; + break; } case Instruction::Xor: { - ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); // Output known-1 are known to be set if set in only one of the LHS, RHS. KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); KnownZero = KnownZeroOut; - return; + break; } case Instruction::Mul: { bool NSW = cast(I)->hasNoSignedWrap(); - ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW, + computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth); break; } @@ -391,42 +382,40 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); - return; + break; } case Instruction::Select: - ComputeMaskedBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, + computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; KnownZero &= KnownZero2; - return; + break; case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::SIToFP: case Instruction::UIToFP: - return; // Can't work with floating point. + break; // Can't work with floating point. case Instruction::PtrToInt: case Instruction::IntToPtr: // We can't handle these if we don't know the pointer size. - if (!TD) return; + if (!TD) break; // FALL THROUGH and handle them the same as zext/trunc. case Instruction::ZExt: case Instruction::Trunc: { @@ -439,19 +428,19 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType()); } else { SrcBitWidth = SrcTy->getScalarSizeInBits(); - if (!SrcBitWidth) return; + if (!SrcBitWidth) break; } assert(SrcBitWidth && "SrcBitWidth can't be zero"); KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); KnownOne = KnownOne.zextOrTrunc(SrcBitWidth); - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); KnownZero = KnownZero.zextOrTrunc(BitWidth); KnownOne = KnownOne.zextOrTrunc(BitWidth); // Any top bits are known to be zero. if (BitWidth > SrcBitWidth) KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); - return; + break; } case Instruction::BitCast: { Type *SrcTy = I->getOperand(0)->getType(); @@ -459,8 +448,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // TODO: For now, not handling conversions like: // (bitcast i64 %x to <2 x i32>) !I->getType()->isVectorTy()) { - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - return; + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + break; } break; } @@ -470,8 +459,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -481,18 +469,17 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth); - return; + break; } case Instruction::Shl: // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); KnownZero <<= ShiftAmt; KnownOne <<= ShiftAmt; KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 - return; + break; } break; case Instruction::LShr: @@ -502,13 +489,12 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); // Unsigned shift right. - ComputeMaskedBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); // high bits known zero. KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt); - return; + break; } break; case Instruction::AShr: @@ -518,8 +504,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); @@ -528,19 +513,19 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero |= HighBits; else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one. KnownOne |= HighBits; - return; + break; } break; case Instruction::Sub: { bool NSW = cast(I)->hasNoSignedWrap(); - ComputeMaskedBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, + computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth); break; } case Instruction::Add: { bool NSW = cast(I)->hasNoSignedWrap(); - ComputeMaskedBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, + computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth); break; @@ -550,7 +535,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -574,8 +559,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // remainder is zero. if (KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD, - Depth+1); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD, + Depth+1); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) KnownZero.setBit(BitWidth - 1); @@ -587,9 +572,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, APInt RA = Rem->getValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, - Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, + Depth+1); KnownZero |= ~LowBits; KnownOne &= LowBits; break; @@ -598,8 +582,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); unsigned Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); @@ -622,8 +606,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Analyze all of the subscripts of this getelementptr instruction // to determine if we can prove known low zero bits. APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0); - ComputeMaskedBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD, - Depth+1); + computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD, + Depth+1); unsigned TrailZ = LocalKnownZero.countTrailingOnes(); gep_type_iterator GTI = gep_type_begin(I); @@ -631,8 +615,10 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, Value *Index = I->getOperand(i); if (StructType *STy = dyn_cast(*GTI)) { // Handle struct member offset arithmetic. - if (!TD) - return; + if (!TD) { + TrailZ = 0; + break; + } // Handle case when index is vector zeroinitializer Constant *CIndex = cast(Index); @@ -650,11 +636,14 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, } else { // Handle array index arithmetic. Type *IndexedTy = GTI.getIndexedType(); - if (!IndexedTy->isSized()) return; + if (!IndexedTy->isSized()) { + TrailZ = 0; + break; + } unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits(); uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1; LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); - ComputeMaskedBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1); + computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1); TrailZ = std::min(TrailZ, unsigned(countTrailingZeros(TypeSize) + LocalKnownZero.countTrailingOnes())); @@ -696,11 +685,11 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; // Ok, we have a PHI of the form L op= R. Check for low // zero bits. - ComputeMaskedBits(R, KnownZero2, KnownOne2, TD, Depth+1); + computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1); // We need to take the minimum number of known bits APInt KnownZero3(KnownZero), KnownOne3(KnownOne); - ComputeMaskedBits(L, KnownZero3, KnownOne3, TD, Depth+1); + computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1); KnownZero = APInt::getLowBitsSet(BitWidth, std::min(KnownZero2.countTrailingOnes(), @@ -712,7 +701,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // Unreachable blocks may have zero-operand PHI nodes. if (P->getNumIncomingValues() == 0) - return; + break; // Otherwise take the unions of the known bit sets of the operands, // taking conservative care to avoid excessive recursion. @@ -731,8 +720,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownOne2 = APInt(BitWidth, 0); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. - ComputeMaskedBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD, - MaxDepth-1); + computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD, + MaxDepth-1); KnownZero &= KnownZero2; KnownOne &= KnownOne2; // If all bits have been ruled out, there's no need to check @@ -776,30 +765,32 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, default: break; case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: - ComputeMaskedBitsAddSub(true, II->getArgOperand(0), - II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, TD, Depth); + computeKnownBitsAddSub(true, II->getArgOperand(0), + II->getArgOperand(1), false, KnownZero, + KnownOne, KnownZero2, KnownOne2, TD, Depth); break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: - ComputeMaskedBitsAddSub(false, II->getArgOperand(0), - II->getArgOperand(1), false, KnownZero, - KnownOne, KnownZero2, KnownOne2, TD, Depth); + computeKnownBitsAddSub(false, II->getArgOperand(0), + II->getArgOperand(1), false, KnownZero, + KnownOne, KnownZero2, KnownOne2, TD, Depth); break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: - ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1), - false, KnownZero, KnownOne, - KnownZero2, KnownOne2, TD, Depth); + computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), + false, KnownZero, KnownOne, + KnownZero2, KnownOne2, TD, Depth); break; } } } } + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } /// ComputeSignBit - Determine whether the sign bit is known to be zero or -/// one. Convenience wrapper around ComputeMaskedBits. +/// one. Convenience wrapper around computeKnownBits. void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, const DataLayout *TD, unsigned Depth) { unsigned BitWidth = getBitWidth(V->getType(), TD); @@ -810,7 +801,7 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, } APInt ZeroBits(BitWidth, 0); APInt OneBits(BitWidth, 0); - ComputeMaskedBits(V, ZeroBits, OneBits, TD, Depth); + computeKnownBits(V, ZeroBits, OneBits, TD, Depth); KnownOne = OneBits[BitWidth - 1]; KnownZero = ZeroBits[BitWidth - 1]; } @@ -842,7 +833,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { if (Depth++ == MaxDepth) return false; - Value *X = 0, *Y = 0; + Value *X = nullptr, *Y = nullptr; // A shift of a power of two is a power of two or zero. if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || match(V, m_Shr(m_Value(X), m_Value())))) @@ -882,10 +873,10 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { unsigned BitWidth = V->getType()->getScalarSizeInBits(); APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0); - ComputeMaskedBits(X, LHSZeroBits, LHSOneBits, 0, Depth); + computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth); APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0); - ComputeMaskedBits(Y, RHSZeroBits, RHSOneBits, 0, Depth); + computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth); // If i8 V is a power of two or zero: // ZeroBits: 1 1 1 0 1 1 1 1 // ~ZeroBits: 0 0 0 1 0 0 0 0 @@ -1005,7 +996,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), TD); // X | Y != 0 if X != 0 or Y != 0. - Value *X = 0, *Y = 0; + Value *X = nullptr, *Y = nullptr; if (match(V, m_Or(m_Value(X), m_Value(Y)))) return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth); @@ -1023,7 +1014,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth); + computeKnownBits(X, KnownZero, KnownOne, TD, Depth); if (KnownOne[0]) return true; } @@ -1065,12 +1056,12 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { APInt Mask = APInt::getSignedMaxValue(BitWidth); // The sign bit of X is set. If some other bit is set then X is not equal // to INT_MIN. - ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth); + computeKnownBits(X, KnownZero, KnownOne, TD, Depth); if ((KnownOne & Mask) != 0) return true; // The sign bit of Y is set. If some other bit is set then Y is not equal // to INT_MIN. - ComputeMaskedBits(Y, KnownZero, KnownOne, TD, Depth); + computeKnownBits(Y, KnownZero, KnownOne, TD, Depth); if ((KnownOne & Mask) != 0) return true; } @@ -1100,7 +1091,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { if (!BitWidth) return false; APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); + computeKnownBits(V, KnownZero, KnownOne, TD, Depth); return KnownOne != 0; } @@ -1116,8 +1107,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout *TD, unsigned Depth) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); - ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(V, KnownZero, KnownOne, TD, Depth); return (KnownZero & Mask) == Mask; } @@ -1142,7 +1132,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; - // Note that ConstantInt is handled by the general ComputeMaskedBits case + // Note that ConstantInt is handled by the general computeKnownBits case // below. if (Depth == 6) @@ -1187,7 +1177,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, FirstAnswer = std::min(Tmp, Tmp2); // We computed what we know about the sign bits as our first // answer. Now proceed to the generic code that uses - // ComputeMaskedBits, and pick whichever answer is better. + // computeKnownBits, and pick whichever answer is better. } break; @@ -1207,7 +1197,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, if (ConstantInt *CRHS = dyn_cast(U->getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. @@ -1232,7 +1222,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, if (ConstantInt *CLHS = dyn_cast(U->getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); - ComputeMaskedBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1); + computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) @@ -1278,7 +1268,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, // use this information. APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); APInt Mask; - ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); + computeKnownBits(V, KnownZero, KnownOne, TD, Depth); if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; @@ -1364,7 +1354,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, Op1 = ConstantInt::get(V->getContext(), API); } - Value *Mul0 = NULL; + Value *Mul0 = nullptr; if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) { if (Constant *Op1C = dyn_cast(Op1)) if (Constant *MulC = dyn_cast(Mul0)) { @@ -1388,7 +1378,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, } } - Value *Mul1 = NULL; + Value *Mul1 = nullptr; if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) { if (Constant *Op0C = dyn_cast(Op0)) if (Constant *MulC = dyn_cast(Mul1)) { @@ -1432,7 +1422,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { return 1; // Limit search depth. const Operator *I = dyn_cast(V); - if (I == 0) return false; + if (!I) return false; // Check if the nsz fast-math flag is set if (const FPMathOperator *FPO = dyn_cast(I)) @@ -1513,7 +1503,7 @@ Value *llvm::isBytewiseValue(Value *V) { // If the top/bottom halves aren't the same, reject it. if (Val != Val2) - return 0; + return nullptr; } return ConstantInt::get(V->getContext(), Val); } @@ -1525,11 +1515,11 @@ Value *llvm::isBytewiseValue(Value *V) { Value *Elt = CA->getElementAsConstant(0); Value *Val = isBytewiseValue(Elt); if (!Val) - return 0; + return nullptr; for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I) if (CA->getElementAsConstant(I) != Elt) - return 0; + return nullptr; return Val; } @@ -1540,7 +1530,7 @@ Value *llvm::isBytewiseValue(Value *V) { // %c = or i16 %a, %b // but until there is an example that actually needs this, it doesn't seem // worth worrying about. - return 0; + return nullptr; } @@ -1590,7 +1580,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, Value *V = FindInsertedValue(From, Idxs); if (!V) - return NULL; + return nullptr; // Insert the value in the new (sub) aggregrate return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip), @@ -1641,7 +1631,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, if (Constant *C = dyn_cast(V)) { C = C->getAggregateElement(idx_range[0]); - if (C == 0) return 0; + if (!C) return nullptr; return FindInsertedValue(C, idx_range.slice(1), InsertBefore); } @@ -1654,7 +1644,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, if (req_idx == idx_range.end()) { // We can't handle this without inserting insertvalues if (!InsertBefore) - return 0; + return nullptr; // The requested index identifies a part of a nested aggregate. Handle // this specially. For example, @@ -1708,7 +1698,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, } // Otherwise, we don't know (such as, extracting from a function return value // or load instruction) - return 0; + return nullptr; } /// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if @@ -1769,13 +1759,13 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, // Make sure the index-ee is a pointer to array of i8. PointerType *PT = cast(GEP->getOperand(0)->getType()); ArrayType *AT = dyn_cast(PT->getElementType()); - if (AT == 0 || !AT->getElementType()->isIntegerTy(8)) + if (!AT || !AT->getElementType()->isIntegerTy(8)) return false; // Check to make sure that the first operand of the GEP is an integer and // has value 0 so that we are sure we're indexing into the initializer. const ConstantInt *FirstIdx = dyn_cast(GEP->getOperand(1)); - if (FirstIdx == 0 || !FirstIdx->isZero()) + if (!FirstIdx || !FirstIdx->isZero()) return false; // If the second index isn't a ConstantInt, then this is a variable index @@ -1807,7 +1797,7 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, // Must be a Constant Array const ConstantDataArray *Array = dyn_cast(GV->getInitializer()); - if (Array == 0 || !Array->isString()) + if (!Array || !Array->isString()) return false; // Get the number of elements in the array @@ -1913,7 +1903,7 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) { // See if InstructionSimplify knows any relevant tricks. if (Instruction *I = dyn_cast(V)) // TODO: Acquire a DominatorTree and use it. - if (Value *Simplified = SimplifyInstruction(I, TD, 0)) { + if (Value *Simplified = SimplifyInstruction(I, TD, nullptr)) { V = Simplified; continue; } @@ -2001,7 +1991,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, return false; APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(Op, KnownZero, KnownOne, TD); + computeKnownBits(Op, KnownZero, KnownOne, TD); return !!KnownZero; } case Instruction::Load: { @@ -2076,14 +2066,18 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { // Alloca never returns null, malloc might. if (isa(V)) return true; - // A byval or inalloca argument is never null. + // A byval, inalloca, or nonnull argument is never null. if (const Argument *A = dyn_cast(V)) - return A->hasByValOrInAllocaAttr(); + return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr(); // Global values are not null unless extern weak. if (const GlobalValue *GV = dyn_cast(V)) return !GV->hasExternalWeakLinkage(); + if (ImmutableCallSite CS = V) + if (CS.paramHasAttr(0, Attribute::NonNull)) + return true; + // operator new never returns null. if (isOperatorNewLikeFn(V, TLI, /*LookThroughBitCast=*/true)) return true; diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 1a5eec3..44a3412 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -34,6 +34,10 @@ bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const { return true; } +void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const { + SM.PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg); +} + //===----------------------------------------------------------------------===// // Helper functions. //===----------------------------------------------------------------------===// @@ -146,7 +150,7 @@ static bool isLabelChar(char C) { static const char *isLabelTail(const char *CurPtr) { while (1) { if (CurPtr[0] == ':') return CurPtr+1; - if (!isLabelChar(CurPtr[0])) return 0; + if (!isLabelChar(CurPtr[0])) return nullptr; ++CurPtr; } } @@ -431,8 +435,8 @@ lltok::Kind LLLexer::LexHash() { /// HexIntConstant [us]0x[0-9A-Fa-f]+ lltok::Kind LLLexer::LexIdentifier() { const char *StartChar = CurPtr; - const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar; - const char *KeywordEnd = 0; + const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar; + const char *KeywordEnd = nullptr; for (; isLabelChar(*CurPtr); ++CurPtr) { // If we decide this is an integer, remember the end of the sequence. @@ -451,7 +455,7 @@ lltok::Kind LLLexer::LexIdentifier() { // Otherwise, this wasn't a label. If this was valid as an integer type, // return it. - if (IntEnd == 0) IntEnd = CurPtr; + if (!IntEnd) IntEnd = CurPtr; if (IntEnd != StartChar) { CurPtr = IntEnd; uint64_t NumBits = atoull(StartChar, CurPtr); @@ -465,7 +469,7 @@ lltok::Kind LLLexer::LexIdentifier() { } // Otherwise, this was a letter sequence. See which keyword this is. - if (KeywordEnd == 0) KeywordEnd = CurPtr; + if (!KeywordEnd) KeywordEnd = CurPtr; CurPtr = KeywordEnd; --StartChar; unsigned Len = CurPtr-StartChar; @@ -481,6 +485,8 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(private); KEYWORD(internal); + KEYWORD(linker_private); // NOTE: deprecated, for parser compatibility + KEYWORD(linker_private_weak); // NOTE: deprecated, for parser compatibility KEYWORD(available_externally); KEYWORD(linkonce); KEYWORD(linkonce_odr); @@ -506,6 +512,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(null); KEYWORD(to); KEYWORD(tail); + KEYWORD(musttail); KEYWORD(target); KEYWORD(triple); KEYWORD(unwind); @@ -548,7 +555,6 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(x86_stdcallcc); KEYWORD(x86_fastcallcc); KEYWORD(x86_thiscallcc); - KEYWORD(x86_cdeclmethodcc); KEYWORD(arm_apcscc); KEYWORD(arm_aapcscc); KEYWORD(arm_aapcs_vfpcc); @@ -587,6 +593,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(noimplicitfloat); KEYWORD(noinline); KEYWORD(nonlazybind); + KEYWORD(nonnull); KEYWORD(noredzone); KEYWORD(noreturn); KEYWORD(nounwind); diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h index 85703c7..ad11d49 100644 --- a/lib/AsmParser/LLLexer.h +++ b/lib/AsmParser/LLLexer.h @@ -63,6 +63,10 @@ namespace llvm { bool Error(LocTy L, const Twine &Msg) const; bool Error(const Twine &Msg) const { return Error(getLoc(), Msg); } + + void Warning(LocTy WarningLoc, const Twine &Msg) const; + void Warning(const Twine &Msg) const { return Warning(getLoc(), Msg); } + std::string getFilename() const; private: diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 37151e6..3282e8a 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -57,7 +57,8 @@ bool LLParser::ValidateEndOfModule() { for (unsigned i = 0, e = MDList.size(); i != e; ++i) { unsigned SlotNo = MDList[i].MDSlot; - if (SlotNo >= NumberedMetadata.size() || NumberedMetadata[SlotNo] == 0) + if (SlotNo >= NumberedMetadata.size() || + NumberedMetadata[SlotNo] == nullptr) return Error(MDList[i].Loc, "use of undefined metadata '!" + Twine(SlotNo) + "'"); Inst->setMetadata(MDList[i].MDKind, NumberedMetadata[SlotNo]); @@ -132,20 +133,20 @@ bool LLParser::ValidateEndOfModule() { // references after the function was defined. Resolve those now. while (!ForwardRefBlockAddresses.empty()) { // Okay, we are referencing an already-parsed function, resolve them now. - Function *TheFn = 0; + Function *TheFn = nullptr; const ValID &Fn = ForwardRefBlockAddresses.begin()->first; if (Fn.Kind == ValID::t_GlobalName) TheFn = M->getFunction(Fn.StrVal); else if (Fn.UIntVal < NumberedVals.size()) TheFn = dyn_cast(NumberedVals[Fn.UIntVal]); - if (TheFn == 0) + if (!TheFn) return Error(Fn.Loc, "unknown function referenced by blockaddress"); // Resolve all these references. if (ResolveForwardRefBlockAddresses(TheFn, ForwardRefBlockAddresses.begin()->second, - 0)) + nullptr)) return true; ForwardRefBlockAddresses.erase(ForwardRefBlockAddresses.begin()); @@ -206,7 +207,7 @@ bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn, TheFn->getValueSymbolTable().lookup(Refs[i].first.StrVal)); } - if (Res == 0) + if (!Res) return Error(Refs[i].first.Loc, "referenced value is not a basic block"); @@ -247,6 +248,8 @@ bool LLParser::ParseTopLevelEntities() { // ('constant'|'global') ... case lltok::kw_private: // OptionalLinkage case lltok::kw_internal: // OptionalLinkage + case lltok::kw_linker_private: // Obsolete OptionalLinkage + case lltok::kw_linker_private_weak: // Obsolete OptionalLinkage case lltok::kw_weak: // OptionalLinkage case lltok::kw_weak_odr: // OptionalLinkage case lltok::kw_linkonce: // OptionalLinkage @@ -362,7 +365,7 @@ bool LLParser::ParseUnnamedType() { if (TypeID >= NumberedTypes.size()) NumberedTypes.resize(TypeID+1); - Type *Result = 0; + Type *Result = nullptr; if (ParseStructDefinition(TypeLoc, "", NumberedTypes[TypeID], Result)) return true; @@ -389,7 +392,7 @@ bool LLParser::ParseNamedType() { ParseToken(lltok::kw_type, "expected 'type' after name")) return true; - Type *Result = 0; + Type *Result = nullptr; if (ParseStructDefinition(NameLoc, Name, NamedTypes[Name], Result)) return true; @@ -521,10 +524,10 @@ bool LLParser::ParseMDNodeID(MDNode *&Result, unsigned &SlotNo) { if (ParseUInt32(SlotNo)) return true; // Check existing MDNode. - if (SlotNo < NumberedMetadata.size() && NumberedMetadata[SlotNo] != 0) + if (SlotNo < NumberedMetadata.size() && NumberedMetadata[SlotNo] != nullptr) Result = NumberedMetadata[SlotNo]; else - Result = 0; + Result = nullptr; return false; } @@ -565,7 +568,7 @@ bool LLParser::ParseNamedMetadata() { if (ParseToken(lltok::exclaim, "Expected '!' here")) return true; - MDNode *N = 0; + MDNode *N = nullptr; if (ParseMDNodeID(N)) return true; NMD->addOperand(N); } while (EatIfPresent(lltok::comma)); @@ -584,14 +587,14 @@ bool LLParser::ParseStandaloneMetadata() { unsigned MetadataID = 0; LocTy TyLoc; - Type *Ty = 0; + Type *Ty = nullptr; SmallVector Elts; if (ParseUInt32(MetadataID) || ParseToken(lltok::equal, "expected '=' here") || ParseType(Ty, TyLoc) || ParseToken(lltok::exclaim, "Expected '!' here") || ParseToken(lltok::lbrace, "Expected '{' here") || - ParseMDNodeVector(Elts, NULL) || + ParseMDNodeVector(Elts, nullptr) || ParseToken(lltok::rbrace, "expected end of metadata node")) return true; @@ -611,7 +614,7 @@ bool LLParser::ParseStandaloneMetadata() { if (MetadataID >= NumberedMetadata.size()) NumberedMetadata.resize(MetadataID+1); - if (NumberedMetadata[MetadataID] != 0) + if (NumberedMetadata[MetadataID] != nullptr) return TokError("Metadata id is already used"); NumberedMetadata[MetadataID] = Init; } @@ -619,13 +622,19 @@ bool LLParser::ParseStandaloneMetadata() { return false; } +static bool isValidVisibilityForLinkage(unsigned V, unsigned L) { + return !GlobalValue::isLocalLinkage((GlobalValue::LinkageTypes)L) || + (GlobalValue::VisibilityTypes)V == GlobalValue::DefaultVisibility; +} + /// ParseAlias: /// ::= GlobalVar '=' OptionalVisibility OptionalDLLStorageClass 'alias' /// OptionalLinkage Aliasee +/// ::= GlobalVar '=' OptionalVisibility OptionalDLLStorageClass 'alias' +/// OptionalLinkage OptionalAddrSpace Type, Aliasee +/// /// Aliasee /// ::= TypeAndValue -/// ::= 'bitcast' '(' TypeAndValue 'to' Type ')' -/// ::= 'getelementptr' 'inbounds'? '(' ... ')' /// /// Everything through DLL storage class has already been parsed. /// @@ -643,27 +652,53 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, if(!GlobalAlias::isValidLinkage(Linkage)) return Error(LinkageLoc, "invalid linkage type for alias"); - Constant *Aliasee; - LocTy AliaseeLoc = Lex.getLoc(); - if (Lex.getKind() != lltok::kw_bitcast && - Lex.getKind() != lltok::kw_getelementptr) { - if (ParseGlobalTypeAndValue(Aliasee)) return true; + if (!isValidVisibilityForLinkage(Visibility, L)) + return Error(LinkageLoc, + "symbol with local linkage must have default visibility"); + + bool HasAddrSpace = Lex.getKind() == lltok::kw_addrspace; + unsigned AddrSpace; + LocTy AddrSpaceLoc = Lex.getLoc(); + if (ParseOptionalAddrSpace(AddrSpace)) + return true; + + LocTy TyLoc = Lex.getLoc(); + Type *Ty = nullptr; + if (ParseType(Ty)) + return true; + + bool DifferentType = EatIfPresent(lltok::comma); + if (HasAddrSpace && !DifferentType) + return Error(AddrSpaceLoc, "A type is required if addrspace is given"); + + Type *AliaseeType = nullptr; + if (DifferentType) { + if (ParseType(AliaseeType)) + return true; } else { - // The bitcast dest type is not present, it is implied by the dest type. - ValID ID; - if (ParseValID(ID)) return true; - if (ID.Kind != ValID::t_Constant) - return Error(AliaseeLoc, "invalid aliasee"); - Aliasee = ID.ConstantVal; + AliaseeType = Ty; + auto *PTy = dyn_cast(Ty); + if (!PTy) + return Error(TyLoc, "An alias must have pointer type"); + Ty = PTy->getElementType(); + AddrSpace = PTy->getAddressSpace(); } - if (!Aliasee->getType()->isPointerTy()) - return Error(AliaseeLoc, "alias must have pointer type"); + LocTy AliaseeLoc = Lex.getLoc(); + Constant *C; + if (ParseGlobalValue(AliaseeType, C)) + return true; + + auto *Aliasee = dyn_cast(C); + if (!Aliasee) + return Error(AliaseeLoc, "Alias must point to function or variable"); + + assert(Aliasee->getType()->isPointerTy()); // Okay, create the alias but do not insert it into the module yet. - GlobalAlias* GA = new GlobalAlias(Aliasee->getType(), - (GlobalValue::LinkageTypes)Linkage, Name, - Aliasee); + std::unique_ptr GA( + GlobalAlias::create(Ty, AddrSpace, (GlobalValue::LinkageTypes)Linkage, + Name, Aliasee, /*Parent*/ nullptr)); GA->setVisibility((GlobalValue::VisibilityTypes)Visibility); GA->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass); @@ -685,15 +720,23 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, // If they agree, just RAUW the old value with the alias and remove the // forward ref info. - Val->replaceAllUsesWith(GA); + for (auto *User : Val->users()) { + if (auto *GA = dyn_cast(User)) + return Error(NameLoc, "Alias is pointed by alias " + GA->getName()); + } + + Val->replaceAllUsesWith(GA.get()); Val->eraseFromParent(); ForwardRefVals.erase(I); } // Insert into the module, we know its name won't collide now. - M->getAliasList().push_back(GA); + M->getAliasList().push_back(GA.get()); assert(GA->getName() == Name && "Should not be a name conflict!"); + // The module owns this now + GA.release(); + return false; } @@ -711,6 +754,10 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, unsigned Linkage, bool HasLinkage, unsigned Visibility, unsigned DLLStorageClass) { + if (!isValidVisibilityForLinkage(Visibility, Linkage)) + return Error(NameLoc, + "symbol with local linkage must have default visibility"); + unsigned AddrSpace; bool IsConstant, UnnamedAddr, IsExternallyInitialized; GlobalVariable::ThreadLocalMode TLM; @@ -718,7 +765,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, LocTy IsExternallyInitializedLoc; LocTy TyLoc; - Type *Ty = 0; + Type *Ty = nullptr; if (ParseOptionalThreadLocal(TLM) || ParseOptionalAddrSpace(AddrSpace) || ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr, @@ -732,7 +779,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, // If the linkage is specified and is external, then no initializer is // present. - Constant *Init = 0; + Constant *Init = nullptr; if (!HasLinkage || (Linkage != GlobalValue::ExternalWeakLinkage && Linkage != GlobalValue::ExternalLinkage)) { if (ParseGlobalValue(Ty, Init)) @@ -742,7 +789,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, if (Ty->isFunctionTy() || Ty->isLabelTy()) return Error(TyLoc, "invalid type for global variable"); - GlobalVariable *GV = 0; + GlobalVariable *GV = nullptr; // See if the global was forward referenced, if so, use the global. if (!Name.empty()) { @@ -760,9 +807,9 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, } } - if (GV == 0) { - GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, 0, - Name, 0, GlobalVariable::NotThreadLocal, + if (!GV) { + GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, nullptr, + Name, nullptr, GlobalVariable::NotThreadLocal, AddrSpace); } else { if (GV->getType()->getElementType() != Ty) @@ -956,6 +1003,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, case lltok::kw_nest: case lltok::kw_noalias: case lltok::kw_nocapture: + case lltok::kw_nonnull: case lltok::kw_returned: case lltok::kw_sret: HaveError |= @@ -978,9 +1026,9 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty, LocTy Loc) { PointerType *PTy = dyn_cast(Ty); - if (PTy == 0) { + if (!PTy) { Error(Loc, "global variable reference must have pointer type"); - return 0; + return nullptr; } // Look this name up in the normal function symbol table. @@ -989,7 +1037,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty, // If this is a forward reference for the value, see if we already created a // forward ref record. - if (Val == 0) { + if (!Val) { std::map >::iterator I = ForwardRefVals.find(Name); if (I != ForwardRefVals.end()) @@ -1001,7 +1049,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty, if (Val->getType() == Ty) return Val; Error(Loc, "'@" + Name + "' defined with type '" + getTypeString(Val->getType()) + "'"); - return 0; + return nullptr; } // Otherwise, create a new forward reference for this value and remember it. @@ -1010,8 +1058,8 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty, FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M); else FwdVal = new GlobalVariable(*M, PTy->getElementType(), false, - GlobalValue::ExternalWeakLinkage, 0, Name, - 0, GlobalVariable::NotThreadLocal, + GlobalValue::ExternalWeakLinkage, nullptr, Name, + nullptr, GlobalVariable::NotThreadLocal, PTy->getAddressSpace()); ForwardRefVals[Name] = std::make_pair(FwdVal, Loc); @@ -1020,16 +1068,16 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty, GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) { PointerType *PTy = dyn_cast(Ty); - if (PTy == 0) { + if (!PTy) { Error(Loc, "global variable reference must have pointer type"); - return 0; + return nullptr; } - GlobalValue *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0; + GlobalValue *Val = ID < NumberedVals.size() ? NumberedVals[ID] : nullptr; // If this is a forward reference for the value, see if we already created a // forward ref record. - if (Val == 0) { + if (!Val) { std::map >::iterator I = ForwardRefValIDs.find(ID); if (I != ForwardRefValIDs.end()) @@ -1041,7 +1089,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) { if (Val->getType() == Ty) return Val; Error(Loc, "'@" + Twine(ID) + "' defined with type '" + getTypeString(Val->getType()) + "'"); - return 0; + return nullptr; } // Otherwise, create a new forward reference for this value and remember it. @@ -1050,7 +1098,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) { FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M); else FwdVal = new GlobalVariable(*M, PTy->getElementType(), false, - GlobalValue::ExternalWeakLinkage, 0, ""); + GlobalValue::ExternalWeakLinkage, nullptr, ""); ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc); return FwdVal; @@ -1170,6 +1218,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { case lltok::kw_nest: B.addAttribute(Attribute::Nest); break; case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break; case lltok::kw_nocapture: B.addAttribute(Attribute::NoCapture); break; + case lltok::kw_nonnull: B.addAttribute(Attribute::NonNull); break; case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break; case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break; case lltok::kw_returned: B.addAttribute(Attribute::Returned); break; @@ -1222,6 +1271,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { return HaveError; case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break; case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break; + case lltok::kw_nonnull: B.addAttribute(Attribute::NonNull); break; case lltok::kw_signext: B.addAttribute(Attribute::SExt); break; case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break; @@ -1286,6 +1336,10 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { /// ::= 'common' /// ::= 'extern_weak' /// ::= 'external' +/// +/// Deprecated Values: +/// ::= 'linker_private' +/// ::= 'linker_private_weak' bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) { HasLinkage = false; switch (Lex.getKind()) { @@ -1303,6 +1357,15 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) { case lltok::kw_common: Res = GlobalValue::CommonLinkage; break; case lltok::kw_extern_weak: Res = GlobalValue::ExternalWeakLinkage; break; case lltok::kw_external: Res = GlobalValue::ExternalLinkage; break; + + case lltok::kw_linker_private: + case lltok::kw_linker_private_weak: + Lex.Warning("'" + Lex.getStrVal() + "' is deprecated, treating as" + " PrivateLinkage"); + Lex.Lex(); + // treat linker_private and linker_private_weak as PrivateLinkage + Res = GlobalValue::PrivateLinkage; + return false; } Lex.Lex(); HasLinkage = true; @@ -1350,7 +1413,6 @@ bool LLParser::ParseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'x86_stdcallcc' /// ::= 'x86_fastcallcc' /// ::= 'x86_thiscallcc' -/// ::= 'x86_cdeclmethodcc' /// ::= 'arm_apcscc' /// ::= 'arm_aapcscc' /// ::= 'arm_aapcs_vfpcc' @@ -1376,7 +1438,6 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break; case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break; case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break; - case lltok::kw_x86_cdeclmethodcc:CC = CallingConv::X86_CDeclMethod; break; case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break; case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break; case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break; @@ -1623,7 +1684,7 @@ bool LLParser::ParseType(Type *&Result, bool AllowVoid) { // If the type hasn't been defined yet, create a forward definition and // remember where that forward def'n was seen (in case it never is defined). - if (Entry.first == 0) { + if (!Entry.first) { Entry.first = StructType::create(Context, Lex.getStrVal()); Entry.second = Lex.getLoc(); } @@ -1640,7 +1701,7 @@ bool LLParser::ParseType(Type *&Result, bool AllowVoid) { // If the type hasn't been defined yet, create a forward definition and // remember where that forward def'n was seen (in case it never is defined). - if (Entry.first == 0) { + if (!Entry.first) { Entry.first = StructType::create(Context); Entry.second = Lex.getLoc(); } @@ -1716,7 +1777,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl &ArgList, // Parse the argument. LocTy ArgLoc; - Type *ArgTy = 0; + Type *ArgTy = nullptr; AttrBuilder ArgAttrs; Value *V; if (ParseType(ArgTy, ArgLoc)) @@ -1758,7 +1819,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl &ArgList, Lex.Lex(); } else { LocTy TypeLoc = Lex.getLoc(); - Type *ArgTy = 0; + Type *ArgTy = nullptr; AttrBuilder Attrs; std::string Name; @@ -1870,7 +1931,7 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name, Entry.second = SMLoc(); // If this type number has never been uttered, create it. - if (Entry.first == 0) + if (!Entry.first) Entry.first = StructType::create(Context, Name); ResultTy = Entry.first; return false; @@ -1886,7 +1947,7 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name, if (Entry.first) return Error(TypeLoc, "forward references to non-struct type"); - ResultTy = 0; + ResultTy = nullptr; if (isPacked) return ParseArrayVectorType(ResultTy, true); return ParseType(ResultTy); @@ -1896,7 +1957,7 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name, Entry.second = SMLoc(); // If this type number has never been uttered, create it. - if (Entry.first == 0) + if (!Entry.first) Entry.first = StructType::create(Context, Name); StructType *STy = cast(Entry.first); @@ -1927,7 +1988,7 @@ bool LLParser::ParseStructBody(SmallVectorImpl &Body) { return false; LocTy EltTyLoc = Lex.getLoc(); - Type *Ty = 0; + Type *Ty = nullptr; if (ParseType(Ty)) return true; Body.push_back(Ty); @@ -1965,7 +2026,7 @@ bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) { return true; LocTy TypeLoc = Lex.getLoc(); - Type *EltTy = 0; + Type *EltTy = nullptr; if (ParseType(EltTy)) return true; if (ParseToken(isVector ? lltok::greater : lltok::rsquare, @@ -2011,7 +2072,7 @@ LLParser::PerFunctionState::~PerFunctionState() { I->second.first->replaceAllUsesWith( UndefValue::get(I->second.first->getType())); delete I->second.first; - I->second.first = 0; + I->second.first = nullptr; } for (std::map >::iterator @@ -2020,7 +2081,7 @@ LLParser::PerFunctionState::~PerFunctionState() { I->second.first->replaceAllUsesWith( UndefValue::get(I->second.first->getType())); delete I->second.first; - I->second.first = 0; + I->second.first = nullptr; } } @@ -2069,7 +2130,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name, // If this is a forward reference for the value, see if we already created a // forward ref record. - if (Val == 0) { + if (!Val) { std::map >::iterator I = ForwardRefVals.find(Name); if (I != ForwardRefVals.end()) @@ -2084,13 +2145,13 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name, else P.Error(Loc, "'%" + Name + "' defined with type '" + getTypeString(Val->getType()) + "'"); - return 0; + return nullptr; } // Don't make placeholders with invalid type. if (!Ty->isFirstClassType() && !Ty->isLabelTy()) { P.Error(Loc, "invalid use of a non-first-class type"); - return 0; + return nullptr; } // Otherwise, create a new forward reference for this value and remember it. @@ -2107,11 +2168,11 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name, Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty, LocTy Loc) { // Look this name up in the normal function symbol table. - Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0; + Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : nullptr; // If this is a forward reference for the value, see if we already created a // forward ref record. - if (Val == 0) { + if (!Val) { std::map >::iterator I = ForwardRefValIDs.find(ID); if (I != ForwardRefValIDs.end()) @@ -2126,12 +2187,12 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty, else P.Error(Loc, "'%" + Twine(ID) + "' defined with type '" + getTypeString(Val->getType()) + "'"); - return 0; + return nullptr; } if (!Ty->isFirstClassType() && !Ty->isLabelTy()) { P.Error(Loc, "invalid use of a non-first-class type"); - return 0; + return nullptr; } // Otherwise, create a new forward reference for this value and remember it. @@ -2227,7 +2288,7 @@ BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name, BB = GetBB(NumberedVals.size(), Loc); else BB = GetBB(Name, Loc); - if (BB == 0) return 0; // Already diagnosed error. + if (!BB) return nullptr; // Already diagnosed error. // Move the block to the end of the function. Forward ref'd blocks are // inserted wherever they happen to be referenced. @@ -2435,7 +2496,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { // Make a global variable as a placeholder for this reference. GlobalVariable *FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context), false, GlobalValue::InternalLinkage, - 0, ""); + nullptr, ""); ForwardRefBlockAddresses[Fn].push_back(std::make_pair(Label, FwdRef)); ID.ConstantVal = FwdRef; ID.Kind = ValID::t_Constant; @@ -2456,7 +2517,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { case lltok::kw_inttoptr: case lltok::kw_ptrtoint: { unsigned Opc = Lex.getUIntVal(); - Type *DestTy = 0; + Type *DestTy = nullptr; Constant *SrcVal; Lex.Lex(); if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") || @@ -2720,18 +2781,18 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { /// ParseGlobalValue - Parse a global value with the specified type. bool LLParser::ParseGlobalValue(Type *Ty, Constant *&C) { - C = 0; + C = nullptr; ValID ID; - Value *V = NULL; + Value *V = nullptr; bool Parsed = ParseValID(ID) || - ConvertValIDToValue(Ty, ID, V, NULL); + ConvertValIDToValue(Ty, ID, V, nullptr); if (V && !(C = dyn_cast(V))) return Error(ID.Loc, "global values must be constants"); return Parsed; } bool LLParser::ParseGlobalTypeAndValue(Constant *&V) { - Type *Ty = 0; + Type *Ty = nullptr; return ParseType(Ty) || ParseGlobalValue(Ty, V); } @@ -2815,15 +2876,15 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, case ValID::t_LocalID: if (!PFS) return Error(ID.Loc, "invalid use of function-local name"); V = PFS->GetVal(ID.UIntVal, Ty, ID.Loc); - return (V == 0); + return V == nullptr; case ValID::t_LocalName: if (!PFS) return Error(ID.Loc, "invalid use of function-local name"); V = PFS->GetVal(ID.StrVal, Ty, ID.Loc); - return (V == 0); + return V == nullptr; case ValID::t_InlineAsm: { PointerType *PTy = dyn_cast(Ty); FunctionType *FTy = - PTy ? dyn_cast(PTy->getElementType()) : 0; + PTy ? dyn_cast(PTy->getElementType()) : nullptr; if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2)) return Error(ID.Loc, "invalid type for inline asm constraint string"); V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, @@ -2842,10 +2903,10 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, return false; case ValID::t_GlobalName: V = GetGlobalVal(ID.StrVal, Ty, ID.Loc); - return V == 0; + return V == nullptr; case ValID::t_GlobalID: V = GetGlobalVal(ID.UIntVal, Ty, ID.Loc); - return V == 0; + return V == nullptr; case ValID::t_APSInt: if (!Ty->isIntegerTy()) return Error(ID.Loc, "integer constant must have integer type"); @@ -2928,14 +2989,14 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, } bool LLParser::ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS) { - V = 0; + V = nullptr; ValID ID; return ParseValID(ID, PFS) || ConvertValIDToValue(Ty, ID, V, PFS); } bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState *PFS) { - Type *Ty = 0; + Type *Ty = nullptr; return ParseType(Ty) || ParseValue(Ty, V, PFS); } @@ -2965,7 +3026,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { unsigned DLLStorageClass; AttrBuilder RetAttrs; CallingConv::ID CC; - Type *RetType = 0; + Type *RetType = nullptr; LocTy RetTypeLoc = Lex.getLoc(); if (ParseOptionalLinkage(Linkage) || ParseOptionalVisibility(Visibility) || @@ -2998,6 +3059,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { return Error(LinkageLoc, "invalid function linkage type"); } + if (!isValidVisibilityForLinkage(Visibility, Linkage)) + return Error(LinkageLoc, + "symbol with local linkage must have default visibility"); + if (!FunctionType::isValidReturnType(RetType)) return Error(RetTypeLoc, "invalid function return type"); @@ -3031,7 +3096,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { std::string GC; bool UnnamedAddr; LocTy UnnamedAddrLoc; - Constant *Prefix = 0; + Constant *Prefix = nullptr; if (ParseArgumentList(ArgList, isVarArg) || ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr, @@ -3088,7 +3153,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { FunctionType::get(RetType, ParamTypeList, isVarArg); PointerType *PFT = PointerType::getUnqual(FT); - Fn = 0; + Fn = nullptr; if (!FunctionName.empty()) { // If this was a definition of a forward reference, remove the definition // from the forward reference table and fill in the forward ref. @@ -3126,7 +3191,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { } } - if (Fn == 0) + if (!Fn) Fn = Function::Create(FT, GlobalValue::ExternalLinkage, FunctionName, M); else // Move the forward-reference to the correct spot in the module. M->getFunctionList().splice(M->end(), M->getFunctionList(), Fn); @@ -3203,7 +3268,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) { } BasicBlock *BB = PFS.DefineBB(Name, NameLoc); - if (BB == 0) return true; + if (!BB) return true; std::string NameStr; @@ -3351,8 +3416,10 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_shufflevector: return ParseShuffleVector(Inst, PFS); case lltok::kw_phi: return ParsePHI(Inst, PFS); case lltok::kw_landingpad: return ParseLandingPad(Inst, PFS); - case lltok::kw_call: return ParseCall(Inst, PFS, false); - case lltok::kw_tail: return ParseCall(Inst, PFS, true); + // Call. + case lltok::kw_call: return ParseCall(Inst, PFS, CallInst::TCK_None); + case lltok::kw_tail: return ParseCall(Inst, PFS, CallInst::TCK_Tail); + case lltok::kw_musttail: return ParseCall(Inst, PFS, CallInst::TCK_MustTail); // Memory. case lltok::kw_alloca: return ParseAlloc(Inst, PFS); case lltok::kw_load: return ParseLoad(Inst, PFS); @@ -3417,7 +3484,7 @@ bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) { bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS) { SMLoc TypeLoc = Lex.getLoc(); - Type *Ty = 0; + Type *Ty = nullptr; if (ParseType(Ty, true /*void allowed*/)) return true; Type *ResType = PFS.getFunction().getReturnType(); @@ -3567,7 +3634,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { std::vector FwdRefAttrGrps; LocTy NoBuiltinLoc; CallingConv::ID CC; - Type *RetType = 0; + Type *RetType = nullptr; LocTy RetTypeLoc; ValID CalleeID; SmallVector ArgList; @@ -3589,8 +3656,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { // If RetType is a non-function pointer type, then this is the short syntax // for the call, which means that RetType is just the return type. Infer the // rest of the function argument types from the arguments that are present. - PointerType *PFTy = 0; - FunctionType *Ty = 0; + PointerType *PFTy = nullptr; + FunctionType *Ty = nullptr; if (!(PFTy = dyn_cast(RetType)) || !(Ty = dyn_cast(PFTy->getElementType()))) { // Pull out the types of all of the arguments... @@ -3623,7 +3690,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { FunctionType::param_iterator I = Ty->param_begin(); FunctionType::param_iterator E = Ty->param_end(); for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { - Type *ExpectedTy = 0; + Type *ExpectedTy = nullptr; if (I != E) { ExpectedTy = *I++; } else if (!Ty->isVarArg()) { @@ -3764,7 +3831,7 @@ bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc) { LocTy Loc; Value *Op; - Type *DestTy = 0; + Type *DestTy = nullptr; if (ParseTypeAndValue(Op, Loc, PFS) || ParseToken(lltok::kw_to, "expected 'to' after cast value") || ParseType(DestTy)) @@ -3803,7 +3870,7 @@ bool LLParser::ParseSelect(Instruction *&Inst, PerFunctionState &PFS) { /// ::= 'va_arg' TypeAndValue ',' Type bool LLParser::ParseVA_Arg(Instruction *&Inst, PerFunctionState &PFS) { Value *Op; - Type *EltTy = 0; + Type *EltTy = nullptr; LocTy TypeLoc; if (ParseTypeAndValue(Op, PFS) || ParseToken(lltok::comma, "expected ',' after vaarg operand") || @@ -3875,7 +3942,7 @@ bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) { /// ParsePHI /// ::= 'phi' Type '[' Value ',' Value ']' (',' '[' Value ',' Value ']')* int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) { - Type *Ty = 0; LocTy TypeLoc; + Type *Ty = nullptr; LocTy TypeLoc; Value *Op0, *Op1; if (ParseType(Ty, TypeLoc) || @@ -3924,7 +3991,7 @@ int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) { /// ::= 'filter' /// ::= 'filter' TypeAndValue ( ',' TypeAndValue )* bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) { - Type *Ty = 0; LocTy TyLoc; + Type *Ty = nullptr; LocTy TyLoc; Value *PersFn; LocTy PersFnLoc; if (ParseType(Ty, TyLoc) || @@ -3968,21 +4035,26 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) { } /// ParseCall -/// ::= 'tail'? 'call' OptionalCallingConv OptionalAttrs Type Value +/// ::= 'call' OptionalCallingConv OptionalAttrs Type Value +/// ParameterList OptionalAttrs +/// ::= 'tail' 'call' OptionalCallingConv OptionalAttrs Type Value +/// ParameterList OptionalAttrs +/// ::= 'musttail' 'call' OptionalCallingConv OptionalAttrs Type Value /// ParameterList OptionalAttrs bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, - bool isTail) { + CallInst::TailCallKind TCK) { AttrBuilder RetAttrs, FnAttrs; std::vector FwdRefAttrGrps; LocTy BuiltinLoc; CallingConv::ID CC; - Type *RetType = 0; + Type *RetType = nullptr; LocTy RetTypeLoc; ValID CalleeID; SmallVector ArgList; LocTy CallLoc = Lex.getLoc(); - if ((isTail && ParseToken(lltok::kw_call, "expected 'tail call'")) || + if ((TCK != CallInst::TCK_None && + ParseToken(lltok::kw_call, "expected 'tail call'")) || ParseOptionalCallingConv(CC) || ParseOptionalReturnAttrs(RetAttrs) || ParseType(RetType, RetTypeLoc, true /*void allowed*/) || @@ -3995,8 +4067,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, // If RetType is a non-function pointer type, then this is the short syntax // for the call, which means that RetType is just the return type. Infer the // rest of the function argument types from the arguments that are present. - PointerType *PFTy = 0; - FunctionType *Ty = 0; + PointerType *PFTy = nullptr; + FunctionType *Ty = nullptr; if (!(PFTy = dyn_cast(RetType)) || !(Ty = dyn_cast(PFTy->getElementType()))) { // Pull out the types of all of the arguments... @@ -4029,7 +4101,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, FunctionType::param_iterator I = Ty->param_begin(); FunctionType::param_iterator E = Ty->param_end(); for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { - Type *ExpectedTy = 0; + Type *ExpectedTy = nullptr; if (I != E) { ExpectedTy = *I++; } else if (!Ty->isVarArg()) { @@ -4058,7 +4130,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, AttributeSet PAL = AttributeSet::get(Context, Attrs); CallInst *CI = CallInst::Create(Callee, Args); - CI->setTailCall(isTail); + CI->setTailCallKind(TCK); CI->setCallingConv(CC); CI->setAttributes(PAL); ForwardRefAttrGroups[CI] = FwdRefAttrGrps; @@ -4073,10 +4145,10 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, /// ParseAlloc /// ::= 'alloca' 'inalloca'? Type (',' TypeAndValue)? (',' 'align' i32)? int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) { - Value *Size = 0; + Value *Size = nullptr; LocTy SizeLoc; unsigned Alignment = 0; - Type *Ty = 0; + Type *Ty = nullptr; bool IsInAlloca = EatIfPresent(lltok::kw_inalloca); @@ -4315,8 +4387,8 @@ int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) { /// ParseGetElementPtr /// ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)* int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { - Value *Ptr = 0; - Value *Val = 0; + Value *Ptr = nullptr; + Value *Val = nullptr; LocTy Loc, EltLoc; bool InBounds = EatIfPresent(lltok::kw_inbounds); @@ -4418,11 +4490,11 @@ bool LLParser::ParseMDNodeVector(SmallVectorImpl &Elts, do { // Null is a special case since it is typeless. if (EatIfPresent(lltok::kw_null)) { - Elts.push_back(0); + Elts.push_back(nullptr); continue; } - Value *V = 0; + Value *V = nullptr; if (ParseTypeAndValue(V, PFS)) return true; Elts.push_back(V); } while (EatIfPresent(lltok::comma)); diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index 294a1e1..e2bf462 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -176,7 +176,8 @@ namespace llvm { return FMF; } - bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) { + bool ParseOptionalToken(lltok::Kind T, bool &Present, + LocTy *Loc = nullptr) { if (Lex.getKind() != T) { Present = false; } else { @@ -348,7 +349,7 @@ namespace llvm { PerFunctionState &PFS); // Constant Parsing. - bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL); + bool ParseValID(ValID &ID, PerFunctionState *PFS = nullptr); bool ParseGlobalValue(Type *Ty, Constant *&V); bool ParseGlobalTypeAndValue(Constant *&V); bool ParseGlobalValueVector(SmallVectorImpl &Elts); @@ -371,6 +372,8 @@ namespace llvm { bool ParseFunctionBody(Function &Fn); bool ParseBasicBlock(PerFunctionState &PFS); + enum TailCallType { TCT_None, TCT_Tail, TCT_MustTail }; + // Instruction Parsing. Each instruction parsing routine can return with a // normal result, an error result, or return having eaten an extra comma. enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 }; @@ -397,7 +400,8 @@ namespace llvm { bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS); int ParsePHI(Instruction *&I, PerFunctionState &PFS); bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS); - bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail); + bool ParseCall(Instruction *&I, PerFunctionState &PFS, + CallInst::TailCallKind IsTail); int ParseAlloc(Instruction *&I, PerFunctionState &PFS); int ParseLoad(Instruction *&I, PerFunctionState &PFS); int ParseStore(Instruction *&I, PerFunctionState &PFS); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 532e896..b6b7d82 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -39,6 +39,8 @@ namespace lltok { kw_private, kw_internal, + kw_linker_private, // NOTE: deprecated, for parser compatibility + kw_linker_private_weak, // NOTE: deprecated, for parser compatibility kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, kw_appending, kw_dllimport, kw_dllexport, kw_common, kw_available_externally, @@ -52,6 +54,7 @@ namespace lltok { kw_undef, kw_null, kw_to, kw_tail, + kw_musttail, kw_target, kw_triple, kw_unwind, @@ -85,7 +88,7 @@ namespace lltok { kw_cc, kw_ccc, kw_fastcc, kw_coldcc, kw_intel_ocl_bicc, - kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc, kw_x86_cdeclmethodcc, + kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc, kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc, kw_msp430_intrcc, kw_ptx_kernel, kw_ptx_device, @@ -114,6 +117,7 @@ namespace lltok { kw_noimplicitfloat, kw_noinline, kw_nonlazybind, + kw_nonnull, kw_noredzone, kw_noreturn, kw_nounwind, diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp index a1da5e1..2606bc2 100644 --- a/lib/AsmParser/Parser.cpp +++ b/lib/AsmParser/Parser.cpp @@ -30,12 +30,12 @@ Module *llvm::ParseAssembly(MemoryBuffer *F, // If we are parsing into an existing module, do it. if (M) - return LLParser(F, SM, Err, M).Run() ? 0 : M; + return LLParser(F, SM, Err, M).Run() ? nullptr : M; // Otherwise create a new module. std::unique_ptr M2(new Module(F->getBufferIdentifier(), Context)); if (LLParser(F, SM, Err, M2.get()).Run()) - return 0; + return nullptr; return M2.release(); } @@ -45,10 +45,10 @@ Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err, if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, File)) { Err = SMDiagnostic(Filename, SourceMgr::DK_Error, "Could not open input file: " + ec.message()); - return 0; + return nullptr; } - return ParseAssembly(File.release(), 0, Err, Context); + return ParseAssembly(File.release(), nullptr, Err, Context); } Module *llvm::ParseAssemblyString(const char *AsmString, Module *M, diff --git a/lib/AsmParser/module.modulemap b/lib/AsmParser/module.modulemap new file mode 100644 index 0000000..cc300060 --- /dev/null +++ b/lib/AsmParser/module.modulemap @@ -0,0 +1 @@ +module AsmParser { requires cplusplus umbrella "." module * { export * } } diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp index 3e360a8..716299f 100644 --- a/lib/Bitcode/Reader/BitReader.cpp +++ b/lib/Bitcode/Reader/BitReader.cpp @@ -35,7 +35,7 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef, if (error_code EC = ModuleOrErr.getError()) { if (OutMessage) *OutMessage = strdup(EC.message().c_str()); - *OutModule = wrap((Module*)0); + *OutModule = wrap((Module*)nullptr); return 1; } @@ -55,7 +55,7 @@ LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef, getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef)); if (error_code EC = ModuleOrErr.getError()) { - *OutM = wrap((Module *)NULL); + *OutM = wrap((Module *)nullptr); if (OutMessage) *OutMessage = strdup(EC.message().c_str()); return 1; diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index f712d9d..4170f98 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -41,7 +41,7 @@ void BitcodeReader::materializeForwardReferencedFunctions() { void BitcodeReader::FreeState() { if (BufferOwned) delete Buffer; - Buffer = 0; + Buffer = nullptr; std::vector().swap(TypeList); ValueList.clear(); MDValueList.clear(); @@ -258,7 +258,7 @@ void BitcodeReaderValueList::AssignValue(Value *V, unsigned Idx) { resize(Idx+1); WeakVH &OldV = ValuePtrs[Idx]; - if (OldV == 0) { + if (!OldV) { OldV = V; return; } @@ -298,12 +298,12 @@ Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) { resize(Idx + 1); if (Value *V = ValuePtrs[Idx]) { - assert((Ty == 0 || Ty == V->getType()) && "Type mismatch in value table!"); + assert((!Ty || Ty == V->getType()) && "Type mismatch in value table!"); return V; } // No type specified, must be invalid reference. - if (Ty == 0) return 0; + if (!Ty) return nullptr; // Create and return a placeholder, which will later be RAUW'd. Value *V = new Argument(Ty); @@ -403,7 +403,7 @@ void BitcodeReaderMDValueList::AssignValue(Value *V, unsigned Idx) { resize(Idx+1); WeakVH &OldV = MDValuePtrs[Idx]; - if (OldV == 0) { + if (!OldV) { OldV = V; return; } @@ -435,7 +435,7 @@ Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) { Type *BitcodeReader::getTypeByID(unsigned ID) { // The type table size is always specified correctly. if (ID >= TypeList.size()) - return 0; + return nullptr; if (Type *Ty = TypeList[ID]) return Ty; @@ -569,6 +569,8 @@ static Attribute::AttrKind GetAttrFromCode(uint64_t Code) { return Attribute::NoInline; case bitc::ATTR_KIND_NON_LAZY_BIND: return Attribute::NonLazyBind; + case bitc::ATTR_KIND_NON_NULL: + return Attribute::NonNull; case bitc::ATTR_KIND_NO_RED_ZONE: return Attribute::NoRedZone; case bitc::ATTR_KIND_NO_RETURN: @@ -737,7 +739,7 @@ error_code BitcodeReader::ParseTypeTableBody() { // Read a record. Record.clear(); - Type *ResultTy = 0; + Type *ResultTy = nullptr; switch (Stream.readRecord(Entry.ID, Record)) { default: return Error(InvalidValue); @@ -792,7 +794,7 @@ error_code BitcodeReader::ParseTypeTableBody() { if (Record.size() == 2) AddressSpace = Record[1]; ResultTy = getTypeByID(Record[0]); - if (ResultTy == 0) + if (!ResultTy) return Error(InvalidType); ResultTy = PointerType::get(ResultTy, AddressSpace); break; @@ -811,7 +813,7 @@ error_code BitcodeReader::ParseTypeTableBody() { } ResultTy = getTypeByID(Record[2]); - if (ResultTy == 0 || ArgTys.size() < Record.size()-3) + if (!ResultTy || ArgTys.size() < Record.size()-3) return Error(InvalidType); ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); @@ -830,7 +832,7 @@ error_code BitcodeReader::ParseTypeTableBody() { } ResultTy = getTypeByID(Record[1]); - if (ResultTy == 0 || ArgTys.size() < Record.size()-2) + if (!ResultTy || ArgTys.size() < Record.size()-2) return Error(InvalidType); ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); @@ -867,7 +869,7 @@ error_code BitcodeReader::ParseTypeTableBody() { StructType *Res = cast_or_null(TypeList[NumRecords]); if (Res) { Res->setName(TypeName); - TypeList[NumRecords] = 0; + TypeList[NumRecords] = nullptr; } else // Otherwise, create a new struct. Res = StructType::create(Context, TypeName); TypeName.clear(); @@ -896,7 +898,7 @@ error_code BitcodeReader::ParseTypeTableBody() { StructType *Res = cast_or_null(TypeList[NumRecords]); if (Res) { Res->setName(TypeName); - TypeList[NumRecords] = 0; + TypeList[NumRecords] = nullptr; } else // Otherwise, create a new struct with no body. Res = StructType::create(Context, TypeName); TypeName.clear(); @@ -924,7 +926,7 @@ error_code BitcodeReader::ParseTypeTableBody() { if (NumRecords >= TypeList.size()) return Error(InvalidTYPETable); assert(ResultTy && "Didn't read a type?"); - assert(TypeList[NumRecords] == 0 && "Already read type?"); + assert(!TypeList[NumRecords] && "Already read type?"); TypeList[NumRecords++] = ResultTy; } } @@ -972,7 +974,7 @@ error_code BitcodeReader::ParseValueSymbolTable() { if (ConvertToString(Record, 1, ValueName)) return Error(InvalidRecord); BasicBlock *BB = getBasicBlock(Record[0]); - if (BB == 0) + if (!BB) return Error(InvalidRecord); BB->setName(StringRef(ValueName.data(), ValueName.size())); @@ -1028,7 +1030,7 @@ error_code BitcodeReader::ParseMetadata() { NamedMDNode *NMD = TheModule->getOrInsertNamedMetadata(Name); for (unsigned i = 0; i != Size; ++i) { MDNode *MD = dyn_cast_or_null(MDValueList.getValueFwdRef(Record[i])); - if (MD == 0) + if (!MD) return Error(InvalidRecord); NMD->addOperand(MD); } @@ -1052,7 +1054,7 @@ error_code BitcodeReader::ParseMetadata() { else if (!Ty->isVoidTy()) Elts.push_back(ValueList.getValueFwdRef(Record[i+1], Ty)); else - Elts.push_back(NULL); + Elts.push_back(nullptr); } Value *V = MDNode::getWhenValsUnresolved(Context, Elts, IsFunctionLocal); IsFunctionLocal = false; @@ -1092,6 +1094,28 @@ uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) { return 1ULL << 63; } +// FIXME: Delete this in LLVM 4.0 and just assert that the aliasee is a +// GlobalObject. +static GlobalObject & +getGlobalObjectInExpr(const DenseMap &Map, + Constant &C) { + auto *GO = dyn_cast(&C); + if (GO) + return *GO; + + auto *GA = dyn_cast(&C); + if (GA) + return getGlobalObjectInExpr(Map, *Map.find(GA)->second); + + auto &CE = cast(C); + assert(CE.getOpcode() == Instruction::BitCast || + CE.getOpcode() == Instruction::GetElementPtr || + CE.getOpcode() == Instruction::AddrSpaceCast); + if (CE.getOpcode() == Instruction::GetElementPtr) + assert(cast(CE).hasAllZeroIndices()); + return getGlobalObjectInExpr(Map, *CE.getOperand(0)); +} + /// ResolveGlobalAndAliasInits - Resolve all of the initializers for global /// values and aliases that we can. error_code BitcodeReader::ResolveGlobalAndAliasInits() { @@ -1117,19 +1141,30 @@ error_code BitcodeReader::ResolveGlobalAndAliasInits() { GlobalInitWorklist.pop_back(); } + // FIXME: Delete this in LLVM 4.0 + // Older versions of llvm could write an alias pointing to another. We cannot + // construct those aliases, so we first collect an alias to aliasee expression + // and then compute the actual aliasee. + DenseMap AliasInit; + while (!AliasInitWorklist.empty()) { unsigned ValID = AliasInitWorklist.back().second; if (ValID >= ValueList.size()) { AliasInits.push_back(AliasInitWorklist.back()); } else { if (Constant *C = dyn_cast_or_null(ValueList[ValID])) - AliasInitWorklist.back().first->setAliasee(C); + AliasInit.insert(std::make_pair(AliasInitWorklist.back().first, C)); else return Error(ExpectedConstant); } AliasInitWorklist.pop_back(); } + for (auto &Pair : AliasInit) { + auto &GO = getGlobalObjectInExpr(AliasInit, *Pair.second); + Pair.first->setAliasee(&GO); + } + while (!FunctionPrefixWorklist.empty()) { unsigned ValID = FunctionPrefixWorklist.back().second; if (ValID >= ValueList.size()) { @@ -1185,7 +1220,7 @@ error_code BitcodeReader::ParseConstants() { // Read a record. Record.clear(); - Value *V = 0; + Value *V = nullptr; unsigned BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: // Default behavior: unknown constant @@ -1418,34 +1453,52 @@ error_code BitcodeReader::ParseConstants() { ValueList.getConstantFwdRef(Record[2],CurTy)); break; } - case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval] + case bitc::CST_CODE_CE_EXTRACTELT + : { // CE_EXTRACTELT: [opty, opval, opty, opval] if (Record.size() < 3) return Error(InvalidRecord); VectorType *OpTy = dyn_cast_or_null(getTypeByID(Record[0])); - if (OpTy == 0) + if (!OpTy) return Error(InvalidRecord); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); - Constant *Op1 = ValueList.getConstantFwdRef(Record[2], - Type::getInt32Ty(Context)); + Constant *Op1 = nullptr; + if (Record.size() == 4) { + Type *IdxTy = getTypeByID(Record[2]); + if (!IdxTy) + return Error(InvalidRecord); + Op1 = ValueList.getConstantFwdRef(Record[3], IdxTy); + } else // TODO: Remove with llvm 4.0 + Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context)); + if (!Op1) + return Error(InvalidRecord); V = ConstantExpr::getExtractElement(Op0, Op1); break; } - case bitc::CST_CODE_CE_INSERTELT: { // CE_INSERTELT: [opval, opval, opval] + case bitc::CST_CODE_CE_INSERTELT + : { // CE_INSERTELT: [opval, opval, opty, opval] VectorType *OpTy = dyn_cast(CurTy); - if (Record.size() < 3 || OpTy == 0) + if (Record.size() < 3 || !OpTy) return Error(InvalidRecord); Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy->getElementType()); - Constant *Op2 = ValueList.getConstantFwdRef(Record[2], - Type::getInt32Ty(Context)); + Constant *Op2 = nullptr; + if (Record.size() == 4) { + Type *IdxTy = getTypeByID(Record[2]); + if (!IdxTy) + return Error(InvalidRecord); + Op2 = ValueList.getConstantFwdRef(Record[3], IdxTy); + } else // TODO: Remove with llvm 4.0 + Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context)); + if (!Op2) + return Error(InvalidRecord); V = ConstantExpr::getInsertElement(Op0, Op1, Op2); break; } case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval] VectorType *OpTy = dyn_cast(CurTy); - if (Record.size() < 3 || OpTy == 0) + if (Record.size() < 3 || !OpTy) return Error(InvalidRecord); Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy); @@ -1459,7 +1512,7 @@ error_code BitcodeReader::ParseConstants() { VectorType *RTy = dyn_cast(CurTy); VectorType *OpTy = dyn_cast_or_null(getTypeByID(Record[0])); - if (Record.size() < 4 || RTy == 0 || OpTy == 0) + if (Record.size() < 4 || !RTy || !OpTy) return Error(InvalidRecord); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); @@ -1473,7 +1526,7 @@ error_code BitcodeReader::ParseConstants() { if (Record.size() < 4) return Error(InvalidRecord); Type *OpTy = getTypeByID(Record[0]); - if (OpTy == 0) + if (!OpTy) return Error(InvalidRecord); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); @@ -1538,11 +1591,11 @@ error_code BitcodeReader::ParseConstants() { if (Record.size() < 3) return Error(InvalidRecord); Type *FnTy = getTypeByID(Record[0]); - if (FnTy == 0) + if (!FnTy) return Error(InvalidRecord); Function *Fn = dyn_cast_or_null(ValueList.getConstantFwdRef(Record[1],FnTy)); - if (Fn == 0) + if (!Fn) return Error(InvalidRecord); // If the function is already parsed we can insert the block address right @@ -1561,7 +1614,7 @@ error_code BitcodeReader::ParseConstants() { GlobalVariable *FwdRef = new GlobalVariable(*Fn->getParent(), Type::getInt8Ty(Context), false, GlobalValue::InternalLinkage, - 0, ""); + nullptr, ""); BlockAddrFwdRefs[Fn].push_back(std::make_pair(Record[2], FwdRef)); V = FwdRef; } @@ -1649,8 +1702,11 @@ error_code BitcodeReader::GlobalCleanup() { // Look for global variables which need to be renamed. for (Module::global_iterator GI = TheModule->global_begin(), GE = TheModule->global_end(); - GI != GE; ++GI) - UpgradeGlobalVariable(GI); + GI != GE;) { + GlobalVariable *GV = GI++; + UpgradeGlobalVariable(GV); + } + // Force deallocation of memory for these vectors to favor the client that // want lazy deserialization. std::vector >().swap(GlobalInits); @@ -1838,7 +1894,9 @@ error_code BitcodeReader::ParseModule(bool Resume) { Section = SectionTable[Record[5]-1]; } GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility; - if (Record.size() > 6) + // Local linkage must have default visibility. + if (Record.size() > 6 && !GlobalValue::isLocalLinkage(Linkage)) + // FIXME: Change to an error if non-default in 4.0. Visibility = GetDecodedVisibility(Record[6]); GlobalVariable::ThreadLocalMode TLM = GlobalVariable::NotThreadLocal; @@ -1854,7 +1912,7 @@ error_code BitcodeReader::ParseModule(bool Resume) { ExternallyInitialized = Record[9]; GlobalVariable *NewGV = - new GlobalVariable(*TheModule, Ty, isConstant, Linkage, 0, "", 0, + new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, "", nullptr, TLM, AddressSpace, ExternallyInitialized); NewGV->setAlignment(Alignment); if (!Section.empty()) @@ -1904,7 +1962,10 @@ error_code BitcodeReader::ParseModule(bool Resume) { return Error(InvalidID); Func->setSection(SectionTable[Record[6]-1]); } - Func->setVisibility(GetDecodedVisibility(Record[7])); + // Local linkage must have default visibility. + if (!Func->hasLocalLinkage()) + // FIXME: Change to an error if non-default in 4.0. + Func->setVisibility(GetDecodedVisibility(Record[7])); if (Record.size() > 8 && Record[8]) { if (Record[8]-1 > GCTable.size()) return Error(InvalidID); @@ -1940,13 +2001,17 @@ error_code BitcodeReader::ParseModule(bool Resume) { Type *Ty = getTypeByID(Record[0]); if (!Ty) return Error(InvalidRecord); - if (!Ty->isPointerTy()) + auto *PTy = dyn_cast(Ty); + if (!PTy) return Error(InvalidTypeForValue); - GlobalAlias *NewGA = new GlobalAlias(Ty, GetDecodedLinkage(Record[2]), - "", 0, TheModule); + auto *NewGA = + GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), + GetDecodedLinkage(Record[2]), "", TheModule); // Old bitcode files didn't have visibility field. - if (Record.size() > 3) + // Local linkage must have default visibility. + if (Record.size() > 3 && !NewGA->hasLocalLinkage()) + // FIXME: Change to an error if non-default in 4.0. NewGA->setVisibility(GetDecodedVisibility(Record[3])); if (Record.size() > 4) NewGA->setDLLStorageClass(GetDecodedDLLStorageClass(Record[4])); @@ -1969,7 +2034,7 @@ error_code BitcodeReader::ParseModule(bool Resume) { } error_code BitcodeReader::ParseBitcodeInto(Module *M) { - TheModule = 0; + TheModule = nullptr; if (error_code EC = InitStream()) return EC; @@ -2173,7 +2238,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { ValueList.push_back(I); unsigned NextValueNo = ValueList.size(); - BasicBlock *CurBB = 0; + BasicBlock *CurBB = nullptr; unsigned CurBBNo = 0; DebugLoc LastLoc; @@ -2222,7 +2287,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { // Read a record. Record.clear(); - Instruction *I = 0; + Instruction *I = nullptr; unsigned BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: // Default behavior: reject @@ -2240,7 +2305,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { case bitc::FUNC_CODE_DEBUG_LOC_AGAIN: // DEBUG_LOC_AGAIN // This record indicates that the last instruction is at the same // location as the previous instruction with a location. - I = 0; + I = nullptr; // Get the last instruction emitted. if (CurBB && !CurBB->empty()) @@ -2249,31 +2314,31 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { !FunctionBBs[CurBBNo-1]->empty()) I = &FunctionBBs[CurBBNo-1]->back(); - if (I == 0) + if (!I) return Error(InvalidRecord); I->setDebugLoc(LastLoc); - I = 0; + I = nullptr; continue; case bitc::FUNC_CODE_DEBUG_LOC: { // DEBUG_LOC: [line, col, scope, ia] - I = 0; // Get the last instruction emitted. + I = nullptr; // Get the last instruction emitted. if (CurBB && !CurBB->empty()) I = &CurBB->back(); else if (CurBBNo && FunctionBBs[CurBBNo-1] && !FunctionBBs[CurBBNo-1]->empty()) I = &FunctionBBs[CurBBNo-1]->back(); - if (I == 0 || Record.size() < 4) + if (!I || Record.size() < 4) return Error(InvalidRecord); unsigned Line = Record[0], Col = Record[1]; unsigned ScopeID = Record[2], IAID = Record[3]; - MDNode *Scope = 0, *IA = 0; + MDNode *Scope = nullptr, *IA = nullptr; if (ScopeID) Scope = cast(MDValueList.getValueFwdRef(ScopeID-1)); if (IAID) IA = cast(MDValueList.getValueFwdRef(IAID-1)); LastLoc = DebugLoc::get(Line, Col, Scope, IA); I->setDebugLoc(LastLoc); - I = 0; + I = nullptr; continue; } @@ -2333,9 +2398,9 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { Type *ResTy = getTypeByID(Record[OpNum]); int Opc = GetDecodedCastOpcode(Record[OpNum+1]); - if (Opc == -1 || ResTy == 0) + if (Opc == -1 || !ResTy) return Error(InvalidRecord); - Instruction *Temp = 0; + Instruction *Temp = nullptr; if ((I = UpgradeBitCastInst(Opc, Op, ResTy, Temp))) { if (Temp) { InstructionList.push_back(Temp); @@ -2460,7 +2525,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Vec, *Idx; if (getValueTypePair(Record, OpNum, NextValueNo, Vec) || - popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx)) + getValueTypePair(Record, OpNum, NextValueNo, Idx)) return Error(InvalidRecord); I = ExtractElementInst::Create(Vec, Idx); InstructionList.push_back(I); @@ -2473,7 +2538,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, OpNum, NextValueNo, Vec) || popValue(Record, OpNum, NextValueNo, cast(Vec->getType())->getElementType(), Elt) || - popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx)) + getValueTypePair(Record, OpNum, NextValueNo, Idx)) return Error(InvalidRecord); I = InsertElementInst::Create(Vec, Elt, Idx); InstructionList.push_back(I); @@ -2526,7 +2591,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { } unsigned OpNum = 0; - Value *Op = NULL; + Value *Op = nullptr; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) return Error(InvalidRecord); if (OpNum != Record.size()) @@ -2540,7 +2605,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { if (Record.size() != 1 && Record.size() != 3) return Error(InvalidRecord); BasicBlock *TrueDest = getBasicBlock(Record[0]); - if (TrueDest == 0) + if (!TrueDest) return Error(InvalidRecord); if (Record.size() == 1) { @@ -2551,7 +2616,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { BasicBlock *FalseDest = getBasicBlock(Record[1]); Value *Cond = getValue(Record, 2, NextValueNo, Type::getInt1Ty(Context)); - if (FalseDest == 0 || Cond == 0) + if (!FalseDest || !Cond) return Error(InvalidRecord); I = BranchInst::Create(TrueDest, FalseDest, Cond); InstructionList.push_back(I); @@ -2571,7 +2636,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { Value *Cond = getValue(Record, 2, NextValueNo, OpTy); BasicBlock *Default = getBasicBlock(Record[3]); - if (OpTy == 0 || Cond == 0 || Default == 0) + if (!OpTy || !Cond || !Default) return Error(InvalidRecord); unsigned NumCases = Record[4]; @@ -2628,7 +2693,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { Type *OpTy = getTypeByID(Record[0]); Value *Cond = getValue(Record, 1, NextValueNo, OpTy); BasicBlock *Default = getBasicBlock(Record[2]); - if (OpTy == 0 || Cond == 0 || Default == 0) + if (!OpTy || !Cond || !Default) return Error(InvalidRecord); unsigned NumCases = (Record.size()-3)/2; SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases); @@ -2637,7 +2702,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { ConstantInt *CaseVal = dyn_cast_or_null(getFnValueByID(Record[3+i*2], OpTy)); BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]); - if (CaseVal == 0 || DestBB == 0) { + if (!CaseVal || !DestBB) { delete SI; return Error(InvalidRecord); } @@ -2651,7 +2716,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { return Error(InvalidRecord); Type *OpTy = getTypeByID(Record[0]); Value *Address = getValue(Record, 1, NextValueNo, OpTy); - if (OpTy == 0 || Address == 0) + if (!OpTy || !Address) return Error(InvalidRecord); unsigned NumDests = Record.size()-2; IndirectBrInst *IBI = IndirectBrInst::Create(Address, NumDests); @@ -2683,11 +2748,11 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { return Error(InvalidRecord); PointerType *CalleeTy = dyn_cast(Callee->getType()); - FunctionType *FTy = !CalleeTy ? 0 : + FunctionType *FTy = !CalleeTy ? nullptr : dyn_cast(CalleeTy->getElementType()); // Check that the right number of fixed parameters are here. - if (FTy == 0 || NormalBB == 0 || UnwindBB == 0 || + if (!FTy || !NormalBB || !UnwindBB || Record.size() < OpNum+FTy->getNumParams()) return Error(InvalidRecord); @@ -2695,7 +2760,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) { Ops.push_back(getValue(Record, OpNum, NextValueNo, FTy->getParamType(i))); - if (Ops.back() == 0) + if (!Ops.back()) return Error(InvalidRecord); } @@ -2721,7 +2786,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { } case bitc::FUNC_CODE_INST_RESUME: { // RESUME: [opval] unsigned Idx = 0; - Value *Val = 0; + Value *Val = nullptr; if (getValueTypePair(Record, Idx, NextValueNo, Val)) return Error(InvalidRecord); I = ResumeInst::Create(Val); @@ -2768,7 +2833,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { Type *Ty = getTypeByID(Record[Idx++]); if (!Ty) return Error(InvalidRecord); - Value *PersFn = 0; + Value *PersFn = nullptr; if (getValueTypePair(Record, Idx, NextValueNo, PersFn)) return Error(InvalidRecord); @@ -2961,7 +3026,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { return Error(InvalidRecord); PointerType *OpTy = dyn_cast(Callee->getType()); - FunctionType *FTy = 0; + FunctionType *FTy = nullptr; if (OpTy) FTy = dyn_cast(OpTy->getElementType()); if (!FTy || Record.size() < FTy->getNumParams()+OpNum) return Error(InvalidRecord); @@ -2974,7 +3039,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { else Args.push_back(getValue(Record, OpNum, NextValueNo, FTy->getParamType(i))); - if (Args.back() == 0) + if (!Args.back()) return Error(InvalidRecord); } @@ -2994,8 +3059,13 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { I = CallInst::Create(Callee, Args); InstructionList.push_back(I); cast(I)->setCallingConv( - static_cast(CCInfo>>1)); - cast(I)->setTailCall(CCInfo & 1); + static_cast((~(1U << 14) & CCInfo) >> 1)); + CallInst::TailCallKind TCK = CallInst::TCK_None; + if (CCInfo & 1) + TCK = CallInst::TCK_Tail; + if (CCInfo & (1 << 14)) + TCK = CallInst::TCK_MustTail; + cast(I)->setTailCallKind(TCK); cast(I)->setAttributes(PAL); break; } @@ -3015,7 +3085,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { // Add instruction to end of current BB. If there is no current BB, reject // this file. - if (CurBB == 0) { + if (!CurBB) { delete I; return Error(InvalidInstructionWithNoBB); } @@ -3024,7 +3094,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { // If this was a terminator instruction, move to the next block. if (isa(I)) { ++CurBBNo; - CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : 0; + CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : nullptr; } // Non-void values get registered in the value table for future use. @@ -3036,10 +3106,10 @@ OutOfRecordLoop: // Check the function list for unresolved values. if (Argument *A = dyn_cast(ValueList.back())) { - if (A->getParent() == 0) { + if (!A->getParent()) { // We found at least one unresolved value. Nuke them all to avoid leaks. for (unsigned i = ModuleValueListSize, e = ValueList.size(); i != e; ++i){ - if ((A = dyn_cast_or_null(ValueList[i])) && A->getParent() == 0) { + if ((A = dyn_cast_or_null(ValueList[i])) && !A->getParent()) { A->replaceAllUsesWith(UndefValue::get(A->getType())); delete A; } @@ -3348,7 +3418,7 @@ Module *llvm::getStreamedBitcodeModule(const std::string &name, if (ErrMsg) *ErrMsg = EC.message(); delete M; // Also deletes R. - return 0; + return nullptr; } R->setBufferOwned(false); // no buffer to delete return M; diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h index 15be31f..593d8f9 100644 --- a/lib/Bitcode/Reader/BitcodeReader.h +++ b/lib/Bitcode/Reader/BitcodeReader.h @@ -224,13 +224,13 @@ public: } explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C) - : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false), - LazyStreamer(0), NextUnreadBit(0), SeenValueSymbolTable(false), + : Context(C), TheModule(nullptr), Buffer(buffer), BufferOwned(false), + LazyStreamer(nullptr), NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) { } explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C) - : Context(C), TheModule(0), Buffer(0), BufferOwned(false), + : Context(C), TheModule(nullptr), Buffer(nullptr), BufferOwned(false), LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) { @@ -271,7 +271,7 @@ private: return ValueList.getValueFwdRef(ID, Ty); } BasicBlock *getBasicBlock(unsigned ID) const { - if (ID >= FunctionBBs.size()) return 0; // Invalid ID + if (ID >= FunctionBBs.size()) return nullptr; // Invalid ID return FunctionBBs[ID]; } AttributeSet getAttributes(unsigned i) const { @@ -293,15 +293,15 @@ private: if (ValNo < InstNum) { // If this is not a forward reference, just return the value we already // have. - ResVal = getFnValueByID(ValNo, 0); - return ResVal == 0; + ResVal = getFnValueByID(ValNo, nullptr); + return ResVal == nullptr; } else if (Slot == Record.size()) { return true; } unsigned TypeNo = (unsigned)Record[Slot++]; ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo)); - return ResVal == 0; + return ResVal == nullptr; } /// popValue - Read a value out of the specified record from slot 'Slot'. @@ -320,14 +320,14 @@ private: bool getValue(SmallVectorImpl &Record, unsigned Slot, unsigned InstNum, Type *Ty, Value *&ResVal) { ResVal = getValue(Record, Slot, InstNum, Ty); - return ResVal == 0; + return ResVal == nullptr; } /// getValue -- Version of getValue that returns ResVal directly, /// or 0 if there is an error. Value *getValue(SmallVectorImpl &Record, unsigned Slot, unsigned InstNum, Type *Ty) { - if (Slot == Record.size()) return 0; + if (Slot == Record.size()) return nullptr; unsigned ValNo = (unsigned)Record[Slot]; // Adjust the ValNo, if it was encoded relative to the InstNum. if (UseRelativeIDs) @@ -338,7 +338,7 @@ private: /// getValueSigned -- Like getValue, but decodes signed VBRs. Value *getValueSigned(SmallVectorImpl &Record, unsigned Slot, unsigned InstNum, Type *Ty) { - if (Slot == Record.size()) return 0; + if (Slot == Record.size()) return nullptr; unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]); // Adjust the ValNo, if it was encoded relative to the InstNum. if (UseRelativeIDs) diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp index 1fd9abd..f31e1fa 100644 --- a/lib/Bitcode/Reader/BitstreamReader.cpp +++ b/lib/Bitcode/Reader/BitstreamReader.cpp @@ -315,7 +315,7 @@ bool BitstreamCursor::ReadBlockInfoBlock() { if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true; SmallVector Record; - BitstreamReader::BlockInfo *CurBlockInfo = 0; + BitstreamReader::BlockInfo *CurBlockInfo = nullptr; // Read all the records for this module. while (1) { diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp index 0275f96..3747122 100644 --- a/lib/Bitcode/Writer/BitWriter.cpp +++ b/lib/Bitcode/Writer/BitWriter.cpp @@ -10,6 +10,7 @@ #include "llvm-c/BitWriter.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/Module.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 5d1dac1..cc73b84 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -197,6 +197,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_NO_INLINE; case Attribute::NonLazyBind: return bitc::ATTR_KIND_NON_LAZY_BIND; + case Attribute::NonNull: + return bitc::ATTR_KIND_NON_NULL; case Attribute::NoRedZone: return bitc::ATTR_KIND_NO_RED_ZONE; case Attribute::NoReturn: @@ -474,8 +476,8 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Stream.ExitBlock(); } -static unsigned getEncodedLinkage(const GlobalValue *GV) { - switch (GV->getLinkage()) { +static unsigned getEncodedLinkage(const GlobalValue &GV) { + switch (GV.getLinkage()) { case GlobalValue::ExternalLinkage: return 0; case GlobalValue::WeakAnyLinkage: return 1; case GlobalValue::AppendingLinkage: return 2; @@ -491,8 +493,8 @@ static unsigned getEncodedLinkage(const GlobalValue *GV) { llvm_unreachable("Invalid linkage"); } -static unsigned getEncodedVisibility(const GlobalValue *GV) { - switch (GV->getVisibility()) { +static unsigned getEncodedVisibility(const GlobalValue &GV) { + switch (GV.getVisibility()) { case GlobalValue::DefaultVisibility: return 0; case GlobalValue::HiddenVisibility: return 1; case GlobalValue::ProtectedVisibility: return 2; @@ -500,8 +502,8 @@ static unsigned getEncodedVisibility(const GlobalValue *GV) { llvm_unreachable("Invalid visibility"); } -static unsigned getEncodedDLLStorageClass(const GlobalValue *GV) { - switch (GV->getDLLStorageClass()) { +static unsigned getEncodedDLLStorageClass(const GlobalValue &GV) { + switch (GV.getDLLStorageClass()) { case GlobalValue::DefaultStorageClass: return 0; case GlobalValue::DLLImportStorageClass: return 1; case GlobalValue::DLLExportStorageClass: return 2; @@ -509,8 +511,8 @@ static unsigned getEncodedDLLStorageClass(const GlobalValue *GV) { llvm_unreachable("Invalid DLL storage class"); } -static unsigned getEncodedThreadLocalMode(const GlobalVariable *GV) { - switch (GV->getThreadLocalMode()) { +static unsigned getEncodedThreadLocalMode(const GlobalVariable &GV) { + switch (GV.getThreadLocalMode()) { case GlobalVariable::NotThreadLocal: return 0; case GlobalVariable::GeneralDynamicTLSModel: return 1; case GlobalVariable::LocalDynamicTLSModel: return 2; @@ -541,36 +543,35 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, std::map GCMap; unsigned MaxAlignment = 0; unsigned MaxGlobalType = 0; - for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end(); - GV != E; ++GV) { - MaxAlignment = std::max(MaxAlignment, GV->getAlignment()); - MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV->getType())); - if (GV->hasSection()) { + for (const GlobalValue &GV : M->globals()) { + MaxAlignment = std::max(MaxAlignment, GV.getAlignment()); + MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV.getType())); + if (GV.hasSection()) { // Give section names unique ID's. - unsigned &Entry = SectionMap[GV->getSection()]; + unsigned &Entry = SectionMap[GV.getSection()]; if (!Entry) { - WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV->getSection(), + WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV.getSection(), 0/*TODO*/, Stream); Entry = SectionMap.size(); } } } - for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) { - MaxAlignment = std::max(MaxAlignment, F->getAlignment()); - if (F->hasSection()) { + for (const Function &F : *M) { + MaxAlignment = std::max(MaxAlignment, F.getAlignment()); + if (F.hasSection()) { // Give section names unique ID's. - unsigned &Entry = SectionMap[F->getSection()]; + unsigned &Entry = SectionMap[F.getSection()]; if (!Entry) { - WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, F->getSection(), + WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, F.getSection(), 0/*TODO*/, Stream); Entry = SectionMap.size(); } } - if (F->hasGC()) { + if (F.hasGC()) { // Same for GC names. - unsigned &Entry = GCMap[F->getGC()]; + unsigned &Entry = GCMap[F.getGC()]; if (!Entry) { - WriteStringRecord(bitc::MODULE_CODE_GCNAME, F->getGC(), + WriteStringRecord(bitc::MODULE_CODE_GCNAME, F.getGC(), 0/*TODO*/, Stream); Entry = GCMap.size(); } @@ -606,28 +607,27 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, // Emit the global variable information. SmallVector Vals; - for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end(); - GV != E; ++GV) { + for (const GlobalVariable &GV : M->globals()) { unsigned AbbrevToUse = 0; // GLOBALVAR: [type, isconst, initid, // linkage, alignment, section, visibility, threadlocal, // unnamed_addr, externally_initialized, dllstorageclass] - Vals.push_back(VE.getTypeID(GV->getType())); - Vals.push_back(GV->isConstant()); - Vals.push_back(GV->isDeclaration() ? 0 : - (VE.getValueID(GV->getInitializer()) + 1)); + Vals.push_back(VE.getTypeID(GV.getType())); + Vals.push_back(GV.isConstant()); + Vals.push_back(GV.isDeclaration() ? 0 : + (VE.getValueID(GV.getInitializer()) + 1)); Vals.push_back(getEncodedLinkage(GV)); - Vals.push_back(Log2_32(GV->getAlignment())+1); - Vals.push_back(GV->hasSection() ? SectionMap[GV->getSection()] : 0); - if (GV->isThreadLocal() || - GV->getVisibility() != GlobalValue::DefaultVisibility || - GV->hasUnnamedAddr() || GV->isExternallyInitialized() || - GV->getDLLStorageClass() != GlobalValue::DefaultStorageClass) { + Vals.push_back(Log2_32(GV.getAlignment())+1); + Vals.push_back(GV.hasSection() ? SectionMap[GV.getSection()] : 0); + if (GV.isThreadLocal() || + GV.getVisibility() != GlobalValue::DefaultVisibility || + GV.hasUnnamedAddr() || GV.isExternallyInitialized() || + GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass) { Vals.push_back(getEncodedVisibility(GV)); Vals.push_back(getEncodedThreadLocalMode(GV)); - Vals.push_back(GV->hasUnnamedAddr()); - Vals.push_back(GV->isExternallyInitialized()); + Vals.push_back(GV.hasUnnamedAddr()); + Vals.push_back(GV.isExternallyInitialized()); Vals.push_back(getEncodedDLLStorageClass(GV)); } else { AbbrevToUse = SimpleGVarAbbrev; @@ -638,20 +638,20 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, } // Emit the function proto information. - for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) { + for (const Function &F : *M) { // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment, // section, visibility, gc, unnamed_addr, prefix] - Vals.push_back(VE.getTypeID(F->getType())); - Vals.push_back(F->getCallingConv()); - Vals.push_back(F->isDeclaration()); + Vals.push_back(VE.getTypeID(F.getType())); + Vals.push_back(F.getCallingConv()); + Vals.push_back(F.isDeclaration()); Vals.push_back(getEncodedLinkage(F)); - Vals.push_back(VE.getAttributeID(F->getAttributes())); - Vals.push_back(Log2_32(F->getAlignment())+1); - Vals.push_back(F->hasSection() ? SectionMap[F->getSection()] : 0); + Vals.push_back(VE.getAttributeID(F.getAttributes())); + Vals.push_back(Log2_32(F.getAlignment())+1); + Vals.push_back(F.hasSection() ? SectionMap[F.getSection()] : 0); Vals.push_back(getEncodedVisibility(F)); - Vals.push_back(F->hasGC() ? GCMap[F->getGC()] : 0); - Vals.push_back(F->hasUnnamedAddr()); - Vals.push_back(F->hasPrefixData() ? (VE.getValueID(F->getPrefixData()) + 1) + Vals.push_back(F.hasGC() ? GCMap[F.getGC()] : 0); + Vals.push_back(F.hasUnnamedAddr()); + Vals.push_back(F.hasPrefixData() ? (VE.getValueID(F.getPrefixData()) + 1) : 0); Vals.push_back(getEncodedDLLStorageClass(F)); @@ -661,14 +661,13 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, } // Emit the alias information. - for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end(); - AI != E; ++AI) { + for (const GlobalAlias &A : M->aliases()) { // ALIAS: [alias type, aliasee val#, linkage, visibility] - Vals.push_back(VE.getTypeID(AI->getType())); - Vals.push_back(VE.getValueID(AI->getAliasee())); - Vals.push_back(getEncodedLinkage(AI)); - Vals.push_back(getEncodedVisibility(AI)); - Vals.push_back(getEncodedDLLStorageClass(AI)); + Vals.push_back(VE.getTypeID(A.getType())); + Vals.push_back(VE.getValueID(A.getAliasee())); + Vals.push_back(getEncodedLinkage(A)); + Vals.push_back(getEncodedVisibility(A)); + Vals.push_back(getEncodedDLLStorageClass(A)); unsigned AbbrevToUse = 0; Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse); Vals.clear(); @@ -917,7 +916,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, SmallVector Record; const ValueEnumerator::ValueList &Vals = VE.getValues(); - Type *LastTy = 0; + Type *LastTy = nullptr; for (unsigned i = FirstVal; i != LastVal; ++i) { const Value *V = Vals[i].first; // If we need to switch types, do so now. @@ -1087,12 +1086,14 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, Code = bitc::CST_CODE_CE_EXTRACTELT; Record.push_back(VE.getTypeID(C->getOperand(0)->getType())); Record.push_back(VE.getValueID(C->getOperand(0))); + Record.push_back(VE.getTypeID(C->getOperand(1)->getType())); Record.push_back(VE.getValueID(C->getOperand(1))); break; case Instruction::InsertElement: Code = bitc::CST_CODE_CE_INSERTELT; Record.push_back(VE.getValueID(C->getOperand(0))); Record.push_back(VE.getValueID(C->getOperand(1))); + Record.push_back(VE.getTypeID(C->getOperand(2)->getType())); Record.push_back(VE.getValueID(C->getOperand(2))); break; case Instruction::ShuffleVector: @@ -1253,13 +1254,13 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::ExtractElement: Code = bitc::FUNC_CODE_INST_EXTRACTELT; PushValueAndType(I.getOperand(0), InstID, Vals, VE); - pushValue(I.getOperand(1), InstID, Vals, VE); + PushValueAndType(I.getOperand(1), InstID, Vals, VE); break; case Instruction::InsertElement: Code = bitc::FUNC_CODE_INST_INSERTELT; PushValueAndType(I.getOperand(0), InstID, Vals, VE); pushValue(I.getOperand(1), InstID, Vals, VE); - pushValue(I.getOperand(2), InstID, Vals, VE); + PushValueAndType(I.getOperand(2), InstID, Vals, VE); break; case Instruction::ShuffleVector: Code = bitc::FUNC_CODE_INST_SHUFFLEVEC; @@ -1469,7 +1470,8 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Code = bitc::FUNC_CODE_INST_CALL; Vals.push_back(VE.getAttributeID(CI.getAttributes())); - Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall())); + Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()) | + unsigned(CI.isMustTailCall()) << 14); PushValueAndType(CI.getCalledValue(), InstID, Vals, VE); // Callee // Emit value #'s for the fixed parameters. diff --git a/lib/Bitcode/module.modulemap b/lib/Bitcode/module.modulemap new file mode 100644 index 0000000..7df1a0a --- /dev/null +++ b/lib/Bitcode/module.modulemap @@ -0,0 +1 @@ +module Bitcode { requires cplusplus umbrella "." module * { export * } } diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 25c438c..0f38c64 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "post-RA-sched" #include "AggressiveAntiDepBreaker.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -29,6 +28,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "post-RA-sched" + // If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod static cl::opt DebugDiv("agg-antidep-debugdiv", @@ -121,7 +122,7 @@ AggressiveAntiDepBreaker(MachineFunction& MFi, TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), RegClassInfo(RCI), - State(NULL) { + State(nullptr) { /* Collect a bitset of all registers that are only broken if they are on the critical path. */ for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) { @@ -144,7 +145,7 @@ AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() { } void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { - assert(State == NULL); + assert(!State); State = new AggressiveAntiDepState(TRI->getNumRegs(), BB); bool IsReturnBlock = (!BB->empty() && BB->back().isReturn()); @@ -169,7 +170,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // callee-saved register that is not saved in the prolog. const MachineFrameInfo *MFI = MF.getFrameInfo(); BitVector Pristine = MFI->getPristineRegs(BB); - for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { + for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { unsigned Reg = *I; if (!IsReturnBlock && !Pristine.test(Reg)) continue; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { @@ -183,7 +184,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { void AggressiveAntiDepBreaker::FinishBlock() { delete State; - State = NULL; + State = nullptr; } void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, @@ -230,13 +231,13 @@ bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI, if (Reg == 0) return false; - MachineOperand *Op = NULL; + MachineOperand *Op = nullptr; if (MO.isDef()) Op = MI->findRegisterUseOperand(Reg, true); else Op = MI->findRegisterDefOperand(Reg); - return((Op != NULL) && Op->isImplicit()); + return(Op && Op->isImplicit()); } void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, @@ -273,10 +274,10 @@ static void AntiDepEdges(const SUnit *SU, std::vector& Edges) { /// CriticalPathStep - Return the next SUnit after SU on the bottom-up /// critical path. static const SUnit *CriticalPathStep(const SUnit *SU) { - const SDep *Next = 0; + const SDep *Next = nullptr; unsigned NextDepth = 0; // Find the predecessor edge with the greatest depth. - if (SU != 0) { + if (SU) { for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { const SUnit *PredSU = P->getSUnit(); @@ -292,7 +293,7 @@ static const SUnit *CriticalPathStep(const SUnit *SU) { } } - return (Next) ? Next->getSUnit() : 0; + return (Next) ? Next->getSUnit() : nullptr; } void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, @@ -309,8 +310,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, DefIndices[Reg] = ~0u; RegRefs.erase(Reg); State->LeaveGroup(Reg); - DEBUG(if (header != NULL) { - dbgs() << header << TRI->getName(Reg); header = NULL; }); + DEBUG(if (header) { + dbgs() << header << TRI->getName(Reg); header = nullptr; }); DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag); } // Repeat for subregisters. @@ -321,14 +322,14 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, DefIndices[SubregReg] = ~0u; RegRefs.erase(SubregReg); State->LeaveGroup(SubregReg); - DEBUG(if (header != NULL) { - dbgs() << header << TRI->getName(Reg); header = NULL; }); + DEBUG(if (header) { + dbgs() << header << TRI->getName(Reg); header = nullptr; }); DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" << State->GetGroup(SubregReg) << tag); } } - DEBUG(if ((header == NULL) && (footer != NULL)) dbgs() << footer); + DEBUG(if (!header && footer) dbgs() << footer); } void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, @@ -382,7 +383,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, } // Note register reference... - const TargetRegisterClass *RC = NULL; + const TargetRegisterClass *RC = nullptr; if (i < MI->getDesc().getNumOperands()) RC = TII->getRegClass(MI->getDesc(), i, TRI, MF); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; @@ -466,7 +467,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, } // Note register reference... - const TargetRegisterClass *RC = NULL; + const TargetRegisterClass *RC = nullptr; if (i < MI->getDesc().getNumOperands()) RC = TII->getRegClass(MI->getDesc(), i, TRI, MF); AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; @@ -516,7 +517,7 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) { AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first, QE = Range.second; Q != QE; ++Q) { const TargetRegisterClass *RC = Q->second.RC; - if (RC == NULL) continue; + if (!RC) continue; BitVector RCBV = TRI->getAllocatableSet(MF, RC); if (first) { @@ -734,8 +735,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Track progress along the critical path through the SUnit graph as // we walk the instructions. This is needed for regclasses that only // break critical-path anti-dependencies. - const SUnit *CriticalPathSU = 0; - MachineInstr *CriticalPathMI = 0; + const SUnit *CriticalPathSU = nullptr; + MachineInstr *CriticalPathMI = nullptr; if (CriticalPathSet.any()) { for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { const SUnit *SU = &SUnits[i]; @@ -788,10 +789,10 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // If MI is not on the critical path, then we don't rename // registers in the CriticalPathSet. - BitVector *ExcludeRegs = NULL; + BitVector *ExcludeRegs = nullptr; if (MI == CriticalPathMI) { CriticalPathSU = CriticalPathStep(CriticalPathSU); - CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0; + CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : nullptr; } else if (CriticalPathSet.any()) { ExcludeRegs = &CriticalPathSet; } @@ -815,7 +816,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Don't break anti-dependencies on non-allocatable registers. DEBUG(dbgs() << " (non-allocatable)\n"); continue; - } else if ((ExcludeRegs != NULL) && ExcludeRegs->test(AntiDepReg)) { + } else if (ExcludeRegs && ExcludeRegs->test(AntiDepReg)) { // Don't break anti-dependencies for critical path registers // if not on the critical path DEBUG(dbgs() << " (not critical-path)\n"); @@ -829,9 +830,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( } else { // No anti-dep breaking for implicit deps MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg); - assert(AntiDepOp != NULL && - "Can't find index for defined register operand"); - if ((AntiDepOp == NULL) || AntiDepOp->isImplicit()) { + assert(AntiDepOp && "Can't find index for defined register operand"); + if (!AntiDepOp || AntiDepOp->isImplicit()) { DEBUG(dbgs() << " (implicit)\n"); continue; } diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index 29b6a10..2ab9d89 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -170,7 +170,8 @@ class RegisterClassInfo; void GetPassthruRegs(MachineInstr *MI, std::set& PassthruRegs); void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag, - const char *header =NULL, const char *footer =NULL); + const char *header = nullptr, + const char *footer = nullptr); void PrescanInstruction(MachineInstr *MI, unsigned Count, std::set& PassthruRegs); diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp index 3fa1f8f..dc9bcff 100644 --- a/lib/CodeGen/AllocationOrder.cpp +++ b/lib/CodeGen/AllocationOrder.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "AllocationOrder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -25,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + // Compare VirtRegMap::getRegAllocPref(). AllocationOrder::AllocationOrder(unsigned VirtReg, const VirtRegMap &VRM, diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 6ac5de2..6fc83a2 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -46,7 +46,7 @@ unsigned llvm::ComputeLinearIndex(Type *Ty, EI != EE; ++EI) { if (Indices && *Indices == unsigned(EI - EB)) return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex); - CurIndex = ComputeLinearIndex(*EI, 0, 0, CurIndex); + CurIndex = ComputeLinearIndex(*EI, nullptr, nullptr, CurIndex); } return CurIndex; } @@ -56,7 +56,7 @@ unsigned llvm::ComputeLinearIndex(Type *Ty, for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { if (Indices && *Indices == i) return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex); - CurIndex = ComputeLinearIndex(EltTy, 0, 0, CurIndex); + CurIndex = ComputeLinearIndex(EltTy, nullptr, nullptr, CurIndex); } return CurIndex; } @@ -228,7 +228,7 @@ static const Value *getNoopInput(const Value *V, // through. const Instruction *I = dyn_cast(V); if (!I || I->getNumOperands() == 0) return V; - const Value *NoopInput = 0; + const Value *NoopInput = nullptr; Value *Op = I->getOperand(0); if (isa(I)) { diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk index 26f04d0..7feb42c 100644 --- a/lib/CodeGen/Android.mk +++ b/lib/CodeGen/Android.mk @@ -4,6 +4,7 @@ codegen_SRC_FILES := \ AggressiveAntiDepBreaker.cpp \ AllocationOrder.cpp \ Analysis.cpp \ + AtomicExpandLoadLinkedPass.cpp \ BasicTargetTransformInfo.cpp \ BranchFolding.cpp \ CalcSpillWeights.cpp \ diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 403feb4..1cb0159 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -57,10 +57,10 @@ void ARMException::endModule() { /// beginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. void ARMException::beginFunction(const MachineFunction *MF) { - getTargetStreamer().emitFnStart(); - if (Asm->MF->getFunction()->needsUnwindTableEntry()) - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); + if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM) + getTargetStreamer().emitFnStart(); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", + Asm->getFunctionNumber())); // See if we need call frame info. AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves(); assert(MoveType != AsmPrinter::CFI_M_EH && @@ -77,16 +77,16 @@ void ARMException::endFunction(const MachineFunction *) { if (shouldEmitCFI) Asm->OutStreamer.EmitCFIEndProc(); + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); + ARMTargetStreamer &ATS = getTargetStreamer(); - if (!Asm->MF->getFunction()->needsUnwindTableEntry()) + if (!Asm->MF->getFunction()->needsUnwindTableEntry() && + MMI->getLandingPads().empty()) ATS.emitCantUnwind(); else { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber())); - - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(); - if (!MMI->getLandingPads().empty()) { // Emit references to personality. if (const Function * Personality = @@ -104,7 +104,8 @@ void ARMException::endFunction(const MachineFunction *) { } } - ATS.emitFnEnd(); + if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM) + ATS.emitFnEnd(); } void ARMException::EmitTypeInfos(unsigned TTypeEncoding) { @@ -144,7 +145,7 @@ void ARMException::EmitTypeInfos(unsigned TTypeEncoding) { Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry)); } - Asm->EmitTTypeReference((TypeID == 0 ? 0 : TypeInfos[TypeID - 1]), + Asm->EmitTTypeReference((TypeID == 0 ? nullptr : TypeInfos[TypeID - 1]), TTypeEncoding); } } diff --git a/lib/CodeGen/AsmPrinter/AddressPool.cpp b/lib/CodeGen/AsmPrinter/AddressPool.cpp new file mode 100644 index 0000000..8dab5e5 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -0,0 +1,45 @@ +//===-- llvm/CodeGen/AddressPool.cpp - Dwarf Debug Framework ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AddressPool.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +using namespace llvm; + +class MCExpr; + +unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) { + HasBeenUsed = true; + auto IterBool = + Pool.insert(std::make_pair(Sym, AddressPoolEntry(Pool.size(), TLS))); + return IterBool.first->second.Number; +} + +// Emit addresses into the section given. +void AddressPool::emit(AsmPrinter &Asm, const MCSection *AddrSection) { + if (Pool.empty()) + return; + + // Start the dwarf addr section. + Asm.OutStreamer.SwitchSection(AddrSection); + + // Order the address pool entries by ID + SmallVector Entries(Pool.size()); + + for (const auto &I : Pool) + Entries[I.second.Number] = + I.second.TLS + ? Asm.getObjFileLowering().getDebugThreadLocalSymbol(I.first) + : MCSymbolRefExpr::Create(I.first, Asm.OutContext); + + for (const MCExpr *Entry : Entries) + Asm.OutStreamer.EmitValue(Entry, Asm.getDataLayout().getPointerSize()); +} diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h new file mode 100644 index 0000000..42757d7 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/AddressPool.h @@ -0,0 +1,52 @@ +//===-- llvm/CodeGen/AddressPool.h - Dwarf Debug Framework -----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_ADDRESSPOOL_H__ +#define CODEGEN_ASMPRINTER_ADDRESSPOOL_H__ + +#include "llvm/ADT/DenseMap.h" + +namespace llvm { +class MCSection; +class MCSymbol; +class AsmPrinter; +// Collection of addresses for this unit and assorted labels. +// A Symbol->unsigned mapping of addresses used by indirect +// references. +class AddressPool { + struct AddressPoolEntry { + unsigned Number; + bool TLS; + AddressPoolEntry(unsigned Number, bool TLS) : Number(Number), TLS(TLS) {} + }; + DenseMap Pool; + + /// Record whether the AddressPool has been queried for an address index since + /// the last "resetUsedFlag" call. Used to implement type unit fallback - a + /// type that references addresses cannot be placed in a type unit when using + /// fission. + bool HasBeenUsed; + +public: + AddressPool() : HasBeenUsed(false) {} + + /// \brief Returns the index into the address pool with the given + /// label/symbol. + unsigned getIndex(const MCSymbol *Sym, bool TLS = false); + + void emit(AsmPrinter &Asm, const MCSection *AddrSection); + + bool isEmpty() { return Pool.empty(); } + + bool hasBeenUsed() const { return HasBeenUsed; } + + void resetUsedFlag() { HasBeenUsed = false; } +}; +} +#endif diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk index a725fba..f56eb6e 100644 --- a/lib/CodeGen/AsmPrinter/Android.mk +++ b/lib/CodeGen/AsmPrinter/Android.mk @@ -8,17 +8,21 @@ codegen_asmprinter_SRC_FILES := \ include $(CLEAR_VARS) LOCAL_SRC_FILES := \ + AddressPool.cpp \ AsmPrinter.cpp \ AsmPrinterDwarf.cpp \ AsmPrinterInlineAsm.cpp \ ARMException.cpp \ + DbgValueHistoryCalculator.cpp \ DIE.cpp \ DIEHash.cpp \ DwarfAccelTable.cpp \ DwarfCFIException.cpp \ DwarfDebug.cpp \ DwarfException.cpp \ - DwarfUnit.cpp \ + DwarfFile.cpp \ + DwarfStringPool.cpp \ + DwarfUnit.cpp \ ErlangGCPrinter.cpp \ OcamlGCPrinter.cpp \ Win64Exception.cpp \ @@ -38,17 +42,21 @@ ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS)) include $(CLEAR_VARS) LOCAL_SRC_FILES := \ + AddressPool.cpp \ AsmPrinter.cpp \ AsmPrinterDwarf.cpp \ AsmPrinterInlineAsm.cpp \ ARMException.cpp \ + DbgValueHistoryCalculator.cpp \ DIE.cpp \ DIEHash.cpp \ DwarfAccelTable.cpp \ DwarfCFIException.cpp \ DwarfDebug.cpp \ DwarfException.cpp \ - DwarfUnit.cpp \ + DwarfFile.cpp \ + DwarfStringPool.cpp \ + DwarfUnit.cpp \ ErlangGCPrinter.cpp \ OcamlGCPrinter.cpp \ Win64Exception.cpp \ diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index c3afc8b..7de9c6d 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "llvm/CodeGen/AsmPrinter.h" #include "DwarfDebug.h" #include "DwarfException.h" @@ -53,6 +52,8 @@ #include "llvm/Transforms/Utils/GlobalStatus.h" using namespace llvm; +#define DEBUG_TYPE "asm-printer" + static const char *const DWARFGroupName = "DWARF Emission"; static const char *const DbgTimerName = "Debug Info Emission"; static const char *const EHTimerName = "DWARF Exception Writer"; @@ -62,9 +63,9 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed"); char AsmPrinter::ID = 0; -typedef DenseMap gcp_map_type; +typedef DenseMap> gcp_map_type; static gcp_map_type &getGCMap(void *&P) { - if (P == 0) + if (!P) P = new gcp_map_type(); return *(gcp_map_type*)P; } @@ -101,23 +102,21 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) TM(tm), MAI(tm.getMCAsmInfo()), MII(tm.getInstrInfo()), OutContext(Streamer.getContext()), OutStreamer(Streamer), - LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) { - DD = 0; MMI = 0; LI = 0; MF = 0; - CurrentFnSym = CurrentFnSymForSize = 0; - GCMetadataPrinters = 0; + LastMI(nullptr), LastFn(0), Counter(~0U), SetCounter(0) { + DD = nullptr; MMI = nullptr; LI = nullptr; MF = nullptr; + CurrentFnSym = CurrentFnSymForSize = nullptr; + GCMetadataPrinters = nullptr; VerboseAsm = Streamer.isVerboseAsm(); } AsmPrinter::~AsmPrinter() { - assert(DD == 0 && Handlers.empty() && "Debug/EH info didn't get finalized"); + assert(!DD && Handlers.empty() && "Debug/EH info didn't get finalized"); - if (GCMetadataPrinters != 0) { + if (GCMetadataPrinters) { gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); - for (gcp_map_type::iterator I = GCMap.begin(), E = GCMap.end(); I != E; ++I) - delete I->second; delete &GCMap; - GCMetadataPrinters = 0; + GCMetadataPrinters = nullptr; } delete &OutStreamer; @@ -209,7 +208,7 @@ bool AsmPrinter::doInitialization(Module &M) { GCModuleInfo *MI = getAnalysisIfAvailable(); assert(MI && "AsmPrinter didn't require GCModuleInfo?"); - for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I) + for (auto &I : *MI) if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) MP->beginAssembly(*this); @@ -233,7 +232,7 @@ bool AsmPrinter::doInitialization(Module &M) { } } - DwarfException *DE = 0; + DwarfException *DE = nullptr; switch (MAI->getExceptionHandlingType()) { case ExceptionHandling::None: break; @@ -370,10 +369,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // sections and expected to be contiguous (e.g. ObjC metadata). unsigned AlignLog = getGVAlignmentLog2(GV, *DL); - for (unsigned I = 0, E = Handlers.size(); I != E; ++I) { - const HandlerInfo &OI = Handlers[I]; - NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, TimePassesIsEnabled); - OI.Handler->setSymbolSize(GVSym, Size); + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); + HI.Handler->setSymbolSize(GVSym, Size); } // Handle common and BSS local symbols (.lcomm). @@ -545,10 +543,9 @@ void AsmPrinter::EmitFunctionHeader() { } // Emit pre-function debug and/or EH information. - for (unsigned I = 0, E = Handlers.size(); I != E; ++I) { - const HandlerInfo &OI = Handlers[I]; - NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, TimePassesIsEnabled); - OI.Handler->beginFunction(MF); + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); + HI.Handler->beginFunction(MF); } // Emit the prefix data. @@ -746,69 +743,65 @@ void AsmPrinter::EmitFunctionBody() { // Print out code for the function. bool HasAnyRealCode = false; - const MachineInstr *LastMI = 0; - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) { + const MachineInstr *LastMI = nullptr; + for (auto &MBB : *MF) { // Print a label for the basic block. - EmitBasicBlockStart(I); - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - LastMI = II; + EmitBasicBlockStart(MBB); + for (auto &MI : MBB) { + LastMI = &MI; // Print the assembly for the instruction. - if (!II->isPosition() && !II->isImplicitDef() && !II->isKill() && - !II->isDebugValue()) { + if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && + !MI.isDebugValue()) { HasAnyRealCode = true; ++EmittedInsts; } if (ShouldPrintDebugScopes) { - for (unsigned III = 0, EEE = Handlers.size(); III != EEE; ++III) { - const HandlerInfo &OI = Handlers[III]; - NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); - OI.Handler->beginInstruction(II); + HI.Handler->beginInstruction(&MI); } } if (isVerbose()) - emitComments(*II, OutStreamer.GetCommentOS()); + emitComments(MI, OutStreamer.GetCommentOS()); - switch (II->getOpcode()) { + switch (MI.getOpcode()) { case TargetOpcode::CFI_INSTRUCTION: - emitCFIInstruction(*II); + emitCFIInstruction(MI); break; case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: - OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol()); + OutStreamer.EmitLabel(MI.getOperand(0).getMCSymbol()); break; case TargetOpcode::INLINEASM: - EmitInlineAsm(II); + EmitInlineAsm(&MI); break; case TargetOpcode::DBG_VALUE: if (isVerbose()) { - if (!emitDebugValueComment(II, *this)) - EmitInstruction(II); + if (!emitDebugValueComment(&MI, *this)) + EmitInstruction(&MI); } break; case TargetOpcode::IMPLICIT_DEF: - if (isVerbose()) emitImplicitDef(II); + if (isVerbose()) emitImplicitDef(&MI); break; case TargetOpcode::KILL: - if (isVerbose()) emitKill(II, *this); + if (isVerbose()) emitKill(&MI, *this); break; default: - EmitInstruction(II); + EmitInstruction(&MI); break; } if (ShouldPrintDebugScopes) { - for (unsigned III = 0, EEE = Handlers.size(); III != EEE; ++III) { - const HandlerInfo &OI = Handlers[III]; - NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); - OI.Handler->endInstruction(); + HI.Handler->endInstruction(); } } } @@ -835,11 +828,10 @@ void AsmPrinter::EmitFunctionBody() { } const Function *F = MF->getFunction(); - for (Function::const_iterator i = F->begin(), e = F->end(); i != e; ++i) { - const BasicBlock *BB = i; - if (!BB->hasAddressTaken()) + for (const auto &BB : *F) { + if (!BB.hasAddressTaken()) continue; - MCSymbol *Sym = GetBlockAddressSymbol(BB); + MCSymbol *Sym = GetBlockAddressSymbol(&BB); if (Sym->isDefined()) continue; OutStreamer.AddComment("Address of block that was removed by CodeGen"); @@ -866,10 +858,9 @@ void AsmPrinter::EmitFunctionBody() { } // Emit post-function debug and/or EH information. - for (unsigned I = 0, E = Handlers.size(); I != E; ++I) { - const HandlerInfo &OI = Handlers[I]; - NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, TimePassesIsEnabled); - OI.Handler->endFunction(MF); + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); + HI.Handler->endFunction(MF); } MMI->EndFunction(); @@ -881,13 +872,11 @@ void AsmPrinter::EmitFunctionBody() { bool AsmPrinter::doFinalization(Module &M) { // Emit global variables. - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - EmitGlobalVariable(I); + for (const auto &G : M.globals()) + EmitGlobalVariable(&G); // Emit visibility info for declarations - for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { - const Function &F = *I; + for (const Function &F : M) { if (!F.isDeclaration()) continue; GlobalValue::VisibilityTypes V = F.getVisibility(); @@ -908,15 +897,14 @@ bool AsmPrinter::doFinalization(Module &M) { OutStreamer.Flush(); // Finalize debug and EH information. - for (unsigned I = 0, E = Handlers.size(); I != E; ++I) { - const HandlerInfo &OI = Handlers[I]; - NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); - OI.Handler->endModule(); - delete OI.Handler; + HI.Handler->endModule(); + delete HI.Handler; } Handlers.clear(); - DD = 0; + DD = nullptr; // If the target wants to know about weak references, print them all. if (MAI->getWeakRefDirective()) { @@ -926,36 +914,36 @@ bool AsmPrinter::doFinalization(Module &M) { // happen with the MC stuff eventually. // Print out module-level global variables here. - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - if (!I->hasExternalWeakLinkage()) continue; - OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference); + for (const auto &G : M.globals()) { + if (!G.hasExternalWeakLinkage()) + continue; + OutStreamer.EmitSymbolAttribute(getSymbol(&G), MCSA_WeakReference); } - for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { - if (!I->hasExternalWeakLinkage()) continue; - OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference); + for (const auto &F : M) { + if (!F.hasExternalWeakLinkage()) + continue; + OutStreamer.EmitSymbolAttribute(getSymbol(&F), MCSA_WeakReference); } } if (MAI->hasSetDirective()) { OutStreamer.AddBlankLine(); - for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); - I != E; ++I) { - MCSymbol *Name = getSymbol(I); + for (const auto &Alias : M.aliases()) { + MCSymbol *Name = getSymbol(&Alias); - const GlobalValue *GV = I->getAliasedGlobal(); + const GlobalValue *GV = Alias.getAliasee(); assert(!GV->isDeclaration()); MCSymbol *Target = getSymbol(GV); - if (I->hasExternalLinkage() || !MAI->getWeakRefDirective()) + if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective()) OutStreamer.EmitSymbolAttribute(Name, MCSA_Global); - else if (I->hasWeakLinkage() || I->hasLinkOnceLinkage()) + else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage()) OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference); else - assert(I->hasLocalLinkage() && "Invalid alias linkage"); + assert(Alias.hasLocalLinkage() && "Invalid alias linkage"); - EmitVisibility(Name, I->getVisibility()); + EmitVisibility(Name, Alias.getVisibility()); // Emit the directives as assignments aka .set: OutStreamer.EmitAssignment(Name, @@ -966,7 +954,7 @@ bool AsmPrinter::doFinalization(Module &M) { GCModuleInfo *MI = getAnalysisIfAvailable(); assert(MI && "AsmPrinter didn't require GCModuleInfo?"); for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; ) - if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I)) + if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(**--I)) MP->finishAssembly(*this); // Emit llvm.ident metadata in an '.ident' directive. @@ -983,8 +971,8 @@ bool AsmPrinter::doFinalization(Module &M) { // after everything else has gone out. EmitEndOfAsmFile(M); - delete Mang; Mang = 0; - MMI = 0; + delete Mang; Mang = nullptr; + MMI = nullptr; OutStreamer.Finish(); OutStreamer.reset(); @@ -1100,7 +1088,7 @@ void AsmPrinter::EmitConstantPool() { void AsmPrinter::EmitJumpTableInfo() { const DataLayout *DL = MF->getTarget().getDataLayout(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - if (MJTI == 0) return; + if (!MJTI) return; if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return; const std::vector &JT = MJTI->getJumpTables(); if (JT.empty()) return; @@ -1185,7 +1173,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned UID) const { assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block"); - const MCExpr *Value = 0; + const MCExpr *Value = nullptr; switch (MJTI->getEntryKind()) { case MachineJumpTableInfo::EK_Inline: llvm_unreachable("Cannot emit EK_Inline jump table entry"); @@ -1308,6 +1296,15 @@ void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) { } } +namespace { +struct Structor { + Structor() : Priority(0), Func(nullptr), ComdatKey(nullptr) {} + int Priority; + llvm::Constant *Func; + llvm::GlobalValue *ComdatKey; +}; +} // end namespace + /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init /// priority. void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { @@ -1319,37 +1316,52 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { const ConstantArray *InitList = dyn_cast(List); if (!InitList) return; // Not an array! StructType *ETy = dyn_cast(InitList->getType()->getElementType()); - if (!ETy || ETy->getNumElements() != 2) return; // Not an array of pairs! + // FIXME: Only allow the 3-field form in LLVM 4.0. + if (!ETy || ETy->getNumElements() < 2 || ETy->getNumElements() > 3) + return; // Not an array of two or three elements! if (!isa(ETy->getTypeAtIndex(0U)) || !isa(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr). + if (ETy->getNumElements() == 3 && !isa(ETy->getTypeAtIndex(2U))) + return; // Not (int, ptr, ptr). // Gather the structors in a form that's convenient for sorting by priority. - typedef std::pair Structor; SmallVector Structors; - for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { - ConstantStruct *CS = dyn_cast(InitList->getOperand(i)); + for (Value *O : InitList->operands()) { + ConstantStruct *CS = dyn_cast(O); if (!CS) continue; // Malformed. if (CS->getOperand(1)->isNullValue()) break; // Found a null terminator, skip the rest. ConstantInt *Priority = dyn_cast(CS->getOperand(0)); if (!Priority) continue; // Malformed. - Structors.push_back(std::make_pair(Priority->getLimitedValue(65535), - CS->getOperand(1))); + Structors.push_back(Structor()); + Structor &S = Structors.back(); + S.Priority = Priority->getLimitedValue(65535); + S.Func = CS->getOperand(1); + if (ETy->getNumElements() == 3 && !CS->getOperand(2)->isNullValue()) + S.ComdatKey = dyn_cast(CS->getOperand(2)->stripPointerCasts()); } // Emit the function pointers in the target-specific order const DataLayout *DL = TM.getDataLayout(); unsigned Align = Log2_32(DL->getPointerPrefAlignment()); - std::stable_sort(Structors.begin(), Structors.end(), less_first()); - for (unsigned i = 0, e = Structors.size(); i != e; ++i) { + std::stable_sort(Structors.begin(), Structors.end(), + [](const Structor &L, + const Structor &R) { return L.Priority < R.Priority; }); + for (Structor &S : Structors) { + const TargetLoweringObjectFile &Obj = getObjFileLowering(); + const MCSymbol *KeySym = nullptr; + const MCSection *KeySec = nullptr; + if (S.ComdatKey) { + KeySym = getSymbol(S.ComdatKey); + KeySec = getObjFileLowering().SectionForGlobal(S.ComdatKey, *Mang, TM); + } const MCSection *OutputSection = - (isCtor ? - getObjFileLowering().getStaticCtorSection(Structors[i].first) : - getObjFileLowering().getStaticDtorSection(Structors[i].first)); + (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym, KeySec) + : Obj.getStaticDtorSection(S.Priority, KeySym, KeySec)); OutStreamer.SwitchSection(OutputSection); if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection()) EmitAlignment(Align); - EmitXXStructor(Structors[i].second); + EmitXXStructor(S.Func); } } @@ -1470,7 +1482,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // an explicit alignment requested, it will override the alignment request // if required for correctness. // -void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { +void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const { if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits); if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. @@ -1503,7 +1515,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); const ConstantExpr *CE = dyn_cast(CV); - if (CE == 0) { + if (!CE) { llvm_unreachable("Unknown constant value to lower!"); } @@ -1528,7 +1540,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { raw_string_ostream OS(S); OS << "Unsupported expression in static initializer: "; CE->printAsOperand(OS, /*PrintType=*/false, - !AP.MF ? 0 : AP.MF->getFunction()->getParent()); + !AP.MF ? nullptr : AP.MF->getFunction()->getParent()); report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { @@ -2055,7 +2067,7 @@ MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const { /// PrintParentLoopComment - Print comments about parent loops of this one. static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop, unsigned FunctionNumber) { - if (Loop == 0) return; + if (!Loop) return; PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber); OS.indent(Loop->getLoopDepth()*2) << "Parent Loop BB" << FunctionNumber << "_" @@ -2069,12 +2081,12 @@ static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop, static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop, unsigned FunctionNumber) { // Add child loop information - for (MachineLoop::iterator CL = Loop->begin(), E = Loop->end();CL != E; ++CL){ - OS.indent((*CL)->getLoopDepth()*2) + for (const MachineLoop *CL : *Loop) { + OS.indent(CL->getLoopDepth()*2) << "Child Loop BB" << FunctionNumber << "_" - << (*CL)->getHeader()->getNumber() << " Depth " << (*CL)->getLoopDepth() + << CL->getHeader()->getNumber() << " Depth " << CL->getLoopDepth() << '\n'; - PrintChildLoopComment(OS, *CL, FunctionNumber); + PrintChildLoopComment(OS, CL, FunctionNumber); } } @@ -2084,7 +2096,7 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, const AsmPrinter &AP) { // Add loop depth information const MachineLoop *Loop = LI->getLoopFor(&MBB); - if (Loop == 0) return; + if (!Loop) return; MachineBasicBlock *Header = Loop->getHeader(); assert(Header && "No header for loop"); @@ -2120,42 +2132,41 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, /// EmitBasicBlockStart - This method prints the label for the specified /// MachineBasicBlock, an alignment (if present) and a comment describing /// it if appropriate. -void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { +void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { // Emit an alignment directive for this block, if needed. - if (unsigned Align = MBB->getAlignment()) + if (unsigned Align = MBB.getAlignment()) EmitAlignment(Align); // If the block has its address taken, emit any labels that were used to // reference the block. It is possible that there is more than one label // here, because multiple LLVM BB's may have been RAUW'd to this block after // the references were generated. - if (MBB->hasAddressTaken()) { - const BasicBlock *BB = MBB->getBasicBlock(); + if (MBB.hasAddressTaken()) { + const BasicBlock *BB = MBB.getBasicBlock(); if (isVerbose()) OutStreamer.AddComment("Block address taken"); - std::vector Syms = MMI->getAddrLabelSymbolToEmit(BB); - - for (unsigned i = 0, e = Syms.size(); i != e; ++i) - OutStreamer.EmitLabel(Syms[i]); + std::vector Symbols = MMI->getAddrLabelSymbolToEmit(BB); + for (auto *Sym : Symbols) + OutStreamer.EmitLabel(Sym); } // Print some verbose block comments. if (isVerbose()) { - if (const BasicBlock *BB = MBB->getBasicBlock()) + if (const BasicBlock *BB = MBB.getBasicBlock()) if (BB->hasName()) OutStreamer.AddComment("%" + BB->getName()); - emitBasicBlockLoopComments(*MBB, LI, *this); + emitBasicBlockLoopComments(MBB, LI, *this); } // Print the main label for the block. - if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) { + if (MBB.pred_empty() || isBlockOnlyReachableByFallthrough(&MBB)) { if (isVerbose()) { // NOTE: Want this comment at start of line, don't emit with AddComment. - OutStreamer.emitRawComment(" BB#" + Twine(MBB->getNumber()) + ":", false); + OutStreamer.emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false); } } else { - OutStreamer.EmitLabel(MBB->getSymbol()); + OutStreamer.EmitLabel(MBB.getSymbol()); } } @@ -2191,14 +2202,11 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { return false; // If there isn't exactly one predecessor, it can't be a fall through. - MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; - ++PI2; - if (PI2 != MBB->pred_end()) + if (MBB->pred_size() > 1) return false; // The predecessor has to be immediately before this block. - MachineBasicBlock *Pred = *PI; - + MachineBasicBlock *Pred = *MBB->pred_begin(); if (!Pred->isLayoutSuccessor(MBB)) return false; @@ -2207,10 +2215,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { return true; // Check the terminators in the previous blocks - for (MachineBasicBlock::iterator II = Pred->getFirstTerminator(), - IE = Pred->end(); II != IE; ++II) { - MachineInstr &MI = *II; - + for (const auto &MI : Pred->terminators()) { // If it is not a simple branch, we are in a table somewhere. if (!MI.isBranch() || MI.isIndirectBranch()) return false; @@ -2231,25 +2236,25 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { -GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { - if (!S->usesMetadata()) - return 0; +GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) { + if (!S.usesMetadata()) + return nullptr; gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); - gcp_map_type::iterator GCPI = GCMap.find(S); + gcp_map_type::iterator GCPI = GCMap.find(&S); if (GCPI != GCMap.end()) - return GCPI->second; + return GCPI->second.get(); - const char *Name = S->getName().c_str(); + const char *Name = S.getName().c_str(); for (GCMetadataPrinterRegistry::iterator I = GCMetadataPrinterRegistry::begin(), E = GCMetadataPrinterRegistry::end(); I != E; ++I) if (strcmp(Name, I->getName()) == 0) { - GCMetadataPrinter *GMP = I->instantiate(); - GMP->S = S; - GCMap.insert(std::make_pair(S, GMP)); - return GMP; + std::unique_ptr GMP = I->instantiate(); + GMP->S = &S; + auto IterBool = GCMap.insert(std::make_pair(&S, std::move(GMP))); + return IterBool.first->second.get(); } report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index b696069..02cd12b 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "ByteStreamer.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/ADT/SmallBitVector.h" @@ -30,6 +29,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "asm-printer" + //===----------------------------------------------------------------------===// // Dwarf Emission Helper Routines //===----------------------------------------------------------------------===// @@ -216,30 +217,48 @@ static void emitDwarfRegOpIndirect(ByteStreamer &Streamer, int Reg, int Offset, /// Emit a dwarf register operation for describing /// - a small value occupying only part of a register or /// - a small register representing only part of a value. -static void emitDwarfOpPiece(ByteStreamer &Streamer, unsigned Size, - unsigned Offset) { - assert(Size > 0); - if (Offset > 0) { +static void emitDwarfOpPiece(ByteStreamer &Streamer, unsigned SizeInBits, + unsigned OffsetInBits) { + assert(SizeInBits > 0 && "zero-sized piece"); + unsigned SizeOfByte = 8; + if (OffsetInBits > 0 || SizeInBits % SizeOfByte) { Streamer.EmitInt8(dwarf::DW_OP_bit_piece, "DW_OP_bit_piece"); - Streamer.EmitULEB128(Size, Twine(Size)); - Streamer.EmitULEB128(Offset, Twine(Offset)); + Streamer.EmitULEB128(SizeInBits, Twine(SizeInBits)); + Streamer.EmitULEB128(OffsetInBits, Twine(OffsetInBits)); } else { Streamer.EmitInt8(dwarf::DW_OP_piece, "DW_OP_piece"); - unsigned ByteSize = Size / 8; // Assuming 8 bits per byte. + unsigned ByteSize = SizeInBits / SizeOfByte; Streamer.EmitULEB128(ByteSize, Twine(ByteSize)); } } -/// Some targets do not provide a DWARF register number for every -/// register. This function attempts to emit a dwarf register by -/// emitting a piece of a super-register or by piecing together -/// multiple subregisters that alias the register. -static void EmitDwarfRegOpPiece(ByteStreamer &Streamer, const AsmPrinter &AP, - const MachineLocation &MLoc) { - assert(!MLoc.isIndirect()); - const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo(); +/// Emit a shift-right dwarf expression. +static void emitDwarfOpShr(ByteStreamer &Streamer, + unsigned ShiftBy) { + Streamer.EmitInt8(dwarf::DW_OP_constu, "DW_OP_constu"); + Streamer.EmitULEB128(ShiftBy); + Streamer.EmitInt8(dwarf::DW_OP_shr, "DW_OP_shr"); +} + +// Some targets do not provide a DWARF register number for every +// register. This function attempts to emit a DWARF register by +// emitting a piece of a super-register or by piecing together +// multiple subregisters that alias the register. +void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer, + const MachineLocation &MLoc, + unsigned PieceSizeInBits, + unsigned PieceOffsetInBits) const { + assert(MLoc.isReg() && "MLoc must be a register"); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); + // If this is a valid register number, emit it. + if (Reg >= 0) { + emitDwarfRegOp(Streamer, Reg); + emitDwarfOpPiece(Streamer, PieceSizeInBits, PieceOffsetInBits); + return; + } + // Walk up the super-register chain until we find a valid number. // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0. for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid(); ++SR) { @@ -248,9 +267,19 @@ static void EmitDwarfRegOpPiece(ByteStreamer &Streamer, const AsmPrinter &AP, unsigned Idx = TRI->getSubRegIndex(*SR, MLoc.getReg()); unsigned Size = TRI->getSubRegIdxSize(Idx); unsigned Offset = TRI->getSubRegIdxOffset(Idx); - AP.OutStreamer.AddComment("super-register"); + OutStreamer.AddComment("super-register"); emitDwarfRegOp(Streamer, Reg); - emitDwarfOpPiece(Streamer, Size, Offset); + if (PieceOffsetInBits == Offset) { + emitDwarfOpPiece(Streamer, Size, Offset); + } else { + // If this is part of a variable in a sub-register at a + // non-zero offset, we need to manually shift the value into + // place, since the DW_OP_piece describes the part of the + // variable, not the position of the subregister. + emitDwarfOpPiece(Streamer, Size, PieceOffsetInBits); + if (Offset) + emitDwarfOpShr(Streamer, Offset); + } return; } } @@ -260,7 +289,7 @@ static void EmitDwarfRegOpPiece(ByteStreamer &Streamer, const AsmPrinter &AP, // // Keep track of the current position so we can emit the more // efficient DW_OP_piece. - unsigned CurPos = 0; + unsigned CurPos = PieceOffsetInBits; // The size of the register in bits, assuming 8 bits per byte. unsigned RegSize = TRI->getMinimalPhysRegClass(MLoc.getReg())->getSize() * 8; // Keep track of the bits in the register we already emitted, so we @@ -281,7 +310,7 @@ static void EmitDwarfRegOpPiece(ByteStreamer &Streamer, const AsmPrinter &AP, // If this sub-register has a DWARF number and we haven't covered // its range, emit a DWARF piece for it. if (Reg >= 0 && Intersection.any()) { - AP.OutStreamer.AddComment("sub-register"); + OutStreamer.AddComment("sub-register"); emitDwarfRegOp(Streamer, Reg); emitDwarfOpPiece(Streamer, Size, Offset == CurPos ? 0 : Offset); CurPos = Offset + Size; @@ -291,7 +320,7 @@ static void EmitDwarfRegOpPiece(ByteStreamer &Streamer, const AsmPrinter &AP, } } - if (CurPos == 0) { + if (CurPos == PieceOffsetInBits) { // FIXME: We have no reasonable way of handling errors in here. Streamer.EmitInt8(dwarf::DW_OP_nop, "nop (could not find a dwarf register number)"); @@ -317,8 +346,7 @@ void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer, } // Attempt to find a valid super- or sub-register. - if (!Indirect && !MLoc.isIndirect()) - return EmitDwarfRegOpPiece(Streamer, *this, MLoc); + return EmitDwarfRegOpPiece(Streamer, MLoc); } if (MLoc.isIndirect()) diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 567b6e3..46ee0c8 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -36,6 +36,8 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +#define DEBUG_TYPE "asm-printer" + namespace { struct SrcMgrDiagInfo { const MDNode *LocInfo; @@ -88,7 +90,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, if (!MCAI->useIntegratedAssembler() && !OutStreamer.isIntegratedAssemblerRequired()) { OutStreamer.EmitRawText(Str); - emitInlineAsmEnd(TM.getSubtarget(), 0); + emitInlineAsmEnd(TM.getSubtarget(), nullptr); return; } @@ -98,7 +100,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, // If the current LLVMContext has an inline asm handler, set it in SourceMgr. LLVMContext &LLVMCtx = MMI->getModule()->getContext(); bool HasDiagHandler = false; - if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) { + if (LLVMCtx.getInlineAsmDiagnosticHandler() != nullptr) { // If the source manager has an issue, we arrange for srcMgrDiagHandler // to be invoked, getting DiagInfo passed into it. DiagInfo.LocInfo = LocMDNode; @@ -134,8 +136,11 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, // emitInlineAsmEnd(). MCSubtargetInfo STIOrig = *STI; + MCTargetOptions MCOptions; + if (MF) + MCOptions = MF->getTarget().Options.MCOptions; std::unique_ptr TAP( - TM.getTarget().createMCAsmParser(*STI, *Parser, *MII)); + TM.getTarget().createMCAsmParser(*STI, *Parser, *MII, MCOptions)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); @@ -229,10 +234,10 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, if (InlineAsm::isMemKind(OpFlags)) { Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, - /*Modifier*/ 0, OS); + /*Modifier*/ nullptr, OS); } else { Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant, - /*Modifier*/ 0, OS); + /*Modifier*/ nullptr, OS); } } if (Error) { @@ -324,7 +329,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, ++LastEmitted; const char *StrStart = LastEmitted; const char *StrEnd = strchr(StrStart, '}'); - if (StrEnd == 0) + if (!StrEnd) report_fatal_error("Unterminated ${:foo} operand in inline asm" " string: '" + Twine(AsmStr) + "'"); @@ -399,11 +404,11 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, else { if (InlineAsm::isMemKind(OpFlags)) { Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, - Modifier[0] ? Modifier : 0, + Modifier[0] ? Modifier : nullptr, OS); } else { Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant, - Modifier[0] ? Modifier : 0, OS); + Modifier[0] ? Modifier : nullptr, OS); } } } @@ -452,7 +457,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { // Get the !srcloc metadata node if we have it, and decode the loc cookie from // it. unsigned LocCookie = 0; - const MDNode *LocMD = 0; + const MDNode *LocMD = nullptr; for (unsigned i = MI->getNumOperands(); i != 0; --i) { if (MI->getOperand(i-1).isMetadata() && (LocMD = MI->getOperand(i-1).getMetadata()) && diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index b3eddac..b4ef185 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -1,14 +1,18 @@ add_llvm_library(LLVMAsmPrinter + AddressPool.cpp ARMException.cpp AsmPrinter.cpp AsmPrinterDwarf.cpp AsmPrinterInlineAsm.cpp + DbgValueHistoryCalculator.cpp DIE.cpp DIEHash.cpp DwarfAccelTable.cpp DwarfCFIException.cpp DwarfDebug.cpp DwarfException.cpp + DwarfFile.cpp + DwarfStringPool.cpp DwarfUnit.cpp ErlangGCPrinter.cpp OcamlGCPrinter.cpp diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 26e8f2d..c3dcd9c 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -104,15 +104,6 @@ void DIEAbbrev::print(raw_ostream &O) { void DIEAbbrev::dump() { print(dbgs()); } #endif -//===----------------------------------------------------------------------===// -// DIE Implementation -//===----------------------------------------------------------------------===// - -DIE::~DIE() { - for (unsigned i = 0, N = Children.size(); i < N; ++i) - delete Children[i]; -} - /// Climb up the parent chain to get the unit DIE to which this DIE /// belongs. const DIE *DIE::getUnit() const { @@ -131,7 +122,7 @@ const DIE *DIE::getUnitOrNull() const { return p; p = p->getParent(); } - return NULL; + return nullptr; } DIEValue *DIE::findAttribute(dwarf::Attribute Attribute) const { @@ -143,7 +134,7 @@ DIEValue *DIE::findAttribute(dwarf::Attribute Attribute) const { for (size_t i = 0; i < Values.size(); ++i) if (Abbrevs.getData()[i].getAttribute() == Attribute) return Values[i]; - return NULL; + return nullptr; } #ifndef NDEBUG @@ -385,12 +376,12 @@ void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_ref_addr) { const DwarfDebug *DD = AP->getDwarfDebug(); - unsigned Addr = Entry->getOffset(); + unsigned Addr = Entry.getOffset(); assert(!DD->useSplitDwarf() && "TODO: dwo files can't have relocations."); // For DW_FORM_ref_addr, output the offset from beginning of debug info // section. Entry->getOffset() returns the offset from start of the // compile unit. - DwarfCompileUnit *CU = DD->lookupUnit(Entry->getUnit()); + DwarfCompileUnit *CU = DD->lookupUnit(Entry.getUnit()); assert(CU && "CUDie should belong to a CU."); Addr += CU->getDebugInfoOffset(); if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) @@ -401,7 +392,7 @@ void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { CU->getSectionSym(), DIEEntry::getRefAddrSize(AP)); } else - AP->EmitInt32(Entry->getOffset()); + AP->EmitInt32(Entry.getOffset()); } unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) { @@ -418,7 +409,7 @@ unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) { #ifndef NDEBUG void DIEEntry::print(raw_ostream &O) const { - O << format("Die: 0x%lx", (long)(intptr_t)Entry); + O << format("Die: 0x%lx", (long)(intptr_t)&Entry); } #endif diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 7fefd4f..ef05f17 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -124,7 +124,13 @@ protected: /// Children DIEs. /// - std::vector Children; + // This can't be a vector because pointer validity is requirent for the + // Parent pointer and DIEEntry. + // It can't be a list because some clients need pointer validity before + // the object has been added to any child list + // (eg: DwarfUnit::constructVariableDIE). These aren't insurmountable, but may + // be more convoluted than beneficial. + std::vector> Children; DIE *Parent; @@ -132,11 +138,15 @@ protected: /// SmallVector Values; +protected: + DIE() + : Offset(0), Size(0), Abbrev((dwarf::Tag)0, dwarf::DW_CHILDREN_no), + Parent(nullptr) {} + public: - explicit DIE(unsigned Tag) + explicit DIE(dwarf::Tag Tag) : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no), - Parent(0) {} - ~DIE(); + Parent(nullptr) {} // Accessors. DIEAbbrev &getAbbrev() { return Abbrev; } @@ -145,7 +155,9 @@ public: dwarf::Tag getTag() const { return Abbrev.getTag(); } unsigned getOffset() const { return Offset; } unsigned getSize() const { return Size; } - const std::vector &getChildren() const { return Children; } + const std::vector> &getChildren() const { + return Children; + } const SmallVectorImpl &getValues() const { return Values; } DIE *getParent() const { return Parent; } /// Climb up the parent chain to get the compile or type unit DIE this DIE @@ -166,11 +178,11 @@ public: /// addChild - Add a child to the DIE. /// - void addChild(DIE *Child) { + void addChild(std::unique_ptr Child) { assert(!Child->getParent()); Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes); - Children.push_back(Child); Child->Parent = this; + Children.push_back(std::move(Child)); } /// findAttribute - Find a value in the DIE with the attribute given, @@ -399,14 +411,13 @@ public: /// this class can also be used as a proxy for a debug information entry not /// yet defined (ie. types.) class DIEEntry : public DIEValue { - DIE *const Entry; + DIE &Entry; public: - explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) { - assert(E && "Cannot construct a DIEEntry with a null DIE"); + explicit DIEEntry(DIE &E) : DIEValue(isEntry), Entry(E) { } - DIE *getEntry() const { return Entry; } + DIE &getEntry() const { return Entry; } /// EmitValue - Emit debug information entry offset. /// @@ -464,7 +475,7 @@ public: class DIELoc : public DIEValue, public DIE { mutable unsigned Size; // Size in bytes excluding size header. public: - DIELoc() : DIEValue(isLoc), DIE(0), Size(0) {} + DIELoc() : DIEValue(isLoc), Size(0) {} /// ComputeSize - Calculate the size of the location expression. /// @@ -507,7 +518,7 @@ public: class DIEBlock : public DIEValue, public DIE { mutable unsigned Size; // Size in bytes excluding size header. public: - DIEBlock() : DIEValue(isBlock), DIE(0), Size(0) {} + DIEBlock() : DIEValue(isBlock), Size(0) {} /// ComputeSize - Calculate the size of the location expression. /// diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp index 74beec1..c2fad59 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dwarfdebug" - #include "ByteStreamer.h" #include "DIEHash.h" #include "DIE.h" @@ -28,6 +26,8 @@ using namespace llvm; +#define DEBUG_TYPE "dwarfdebug" + /// \brief Grabs the string in whichever attribute is passed in and returns /// a reference to it. static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) { @@ -309,7 +309,7 @@ void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) { // ... An attribute that refers to another type entry T is processed as // follows: case DIEValue::isEntry: - hashDIEEntry(Attribute, Tag, *cast(Value)->getEntry()); + hashDIEEntry(Attribute, Tag, cast(Value)->getEntry()); break; case DIEValue::isInteger: { addULEB128('A'); @@ -463,20 +463,18 @@ void DIEHash::computeHash(const DIE &Die) { addAttributes(Die); // Then hash each of the children of the DIE. - for (std::vector::const_iterator I = Die.getChildren().begin(), - E = Die.getChildren().end(); - I != E; ++I) { + for (auto &C : Die.getChildren()) { // 7.27 Step 7 // If C is a nested type entry or a member function entry, ... - if (isType((*I)->getTag()) || (*I)->getTag() == dwarf::DW_TAG_subprogram) { - StringRef Name = getDIEStringAttr(**I, dwarf::DW_AT_name); + if (isType(C->getTag()) || C->getTag() == dwarf::DW_TAG_subprogram) { + StringRef Name = getDIEStringAttr(*C, dwarf::DW_AT_name); // ... and has a DW_AT_name attribute if (!Name.empty()) { - hashNestedType(**I, Name); + hashNestedType(*C, Name); continue; } } - computeHash(**I); + computeHash(*C); } // Following the last (or if there are no children), append a zero byte. diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h index 48f1601..175d660 100644 --- a/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/lib/CodeGen/AsmPrinter/DIEHash.h @@ -89,7 +89,7 @@ class DIEHash { }; public: - DIEHash(AsmPrinter *A = NULL) : AP(A) {} + DIEHash(AsmPrinter *A = nullptr) : AP(A) {} /// \brief Computes the ODR signature. uint64_t computeDIEODRSignature(const DIE &Die); diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp new file mode 100644 index 0000000..6103254 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -0,0 +1,175 @@ +//===-- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DbgValueHistoryCalculator.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include +#include + +#define DEBUG_TYPE "dwarfdebug" + +namespace llvm { + +// \brief If @MI is a DBG_VALUE with debug value described by a +// defined register, returns the number of this register. +// In the other case, returns 0. +static unsigned isDescribedByReg(const MachineInstr &MI) { + assert(MI.isDebugValue()); + assert(MI.getNumOperands() == 3); + // If location of variable is described using a register (directly or + // indirecltly), this register is always a first operand. + return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; +} + +void DbgValueHistoryMap::startInstrRange(const MDNode *Var, + const MachineInstr &MI) { + // Instruction range should start with a DBG_VALUE instruction for the + // variable. + assert(MI.isDebugValue() && MI.getDebugVariable() == Var); + auto &Ranges = VarInstrRanges[Var]; + if (!Ranges.empty() && Ranges.back().second == nullptr && + Ranges.back().first->isIdenticalTo(&MI)) { + DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" + << "\t" << Ranges.back().first << "\t" << MI << "\n"); + return; + } + Ranges.push_back(std::make_pair(&MI, nullptr)); +} + +void DbgValueHistoryMap::endInstrRange(const MDNode *Var, + const MachineInstr &MI) { + auto &Ranges = VarInstrRanges[Var]; + // Verify that the current instruction range is not yet closed. + assert(!Ranges.empty() && Ranges.back().second == nullptr); + // For now, instruction ranges are not allowed to cross basic block + // boundaries. + assert(Ranges.back().first->getParent() == MI.getParent()); + Ranges.back().second = &MI; +} + +unsigned DbgValueHistoryMap::getRegisterForVar(const MDNode *Var) const { + const auto &I = VarInstrRanges.find(Var); + if (I == VarInstrRanges.end()) + return 0; + const auto &Ranges = I->second; + if (Ranges.empty() || Ranges.back().second != nullptr) + return 0; + return isDescribedByReg(*Ranges.back().first); +} + +namespace { +// Maps physreg numbers to the variables they describe. +typedef std::map> RegDescribedVarsMap; +} + +// \brief Claim that @Var is not described by @RegNo anymore. +static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, + unsigned RegNo, const MDNode *Var) { + const auto &I = RegVars.find(RegNo); + assert(RegNo != 0U && I != RegVars.end()); + auto &VarSet = I->second; + const auto &VarPos = std::find(VarSet.begin(), VarSet.end(), Var); + assert(VarPos != VarSet.end()); + VarSet.erase(VarPos); + // Don't keep empty sets in a map to keep it as small as possible. + if (VarSet.empty()) + RegVars.erase(I); +} + +// \brief Claim that @Var is now described by @RegNo. +static void addRegDescribedVar(RegDescribedVarsMap &RegVars, + unsigned RegNo, const MDNode *Var) { + assert(RegNo != 0U); + auto &VarSet = RegVars[RegNo]; + assert(std::find(VarSet.begin(), VarSet.end(), Var) == VarSet.end()); + VarSet.push_back(Var); +} + +// \brief Terminate the location range for variables described by register +// @RegNo by inserting @ClobberingInstr to their history. +static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo, + DbgValueHistoryMap &HistMap, + const MachineInstr &ClobberingInstr) { + const auto &I = RegVars.find(RegNo); + if (I == RegVars.end()) + return; + // Iterate over all variables described by this register and add this + // instruction to their history, clobbering it. + for (const auto &Var : I->second) + HistMap.endInstrRange(Var, ClobberingInstr); + RegVars.erase(I); +} + +// \brief Terminate location ranges for all variables, described by registers +// clobbered by @MI. +static void clobberRegisterUses(RegDescribedVarsMap &RegVars, + const MachineInstr &MI, + const TargetRegisterInfo *TRI, + DbgValueHistoryMap &HistMap) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.getReg()) + continue; + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); + ++AI) { + unsigned RegNo = *AI; + clobberRegisterUses(RegVars, RegNo, HistMap, MI); + } + } +} + +// \brief Terminate the location range for all register-described variables +// by inserting @ClobberingInstr to their history. +static void clobberAllRegistersUses(RegDescribedVarsMap &RegVars, + DbgValueHistoryMap &HistMap, + const MachineInstr &ClobberingInstr) { + for (const auto &I : RegVars) + for (const auto &Var : I.second) + HistMap.endInstrRange(Var, ClobberingInstr); + RegVars.clear(); +} + +void calculateDbgValueHistory(const MachineFunction *MF, + const TargetRegisterInfo *TRI, + DbgValueHistoryMap &Result) { + RegDescribedVarsMap RegVars; + + for (const auto &MBB : *MF) { + for (const auto &MI : MBB) { + if (!MI.isDebugValue()) { + // Not a DBG_VALUE instruction. It may clobber registers which describe + // some variables. + clobberRegisterUses(RegVars, MI, TRI, Result); + continue; + } + + assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!"); + const MDNode *Var = MI.getDebugVariable(); + + if (unsigned PrevReg = Result.getRegisterForVar(Var)) + dropRegDescribedVar(RegVars, PrevReg, Var); + + Result.startInstrRange(Var, MI); + + if (unsigned NewReg = isDescribedByReg(MI)) + addRegDescribedVar(RegVars, NewReg, Var); + } + + // Make sure locations for register-described variables are valid only + // until the end of the basic block (unless it's the last basic block, in + // which case let their liveness run off to the end of the function). + if (!MBB.empty() && &MBB != &MF->back()) + clobberAllRegistersUses(RegVars, Result, MBB.back()); + } +} + +} diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h new file mode 100644 index 0000000..b9177f0 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h @@ -0,0 +1,54 @@ +//===-- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H_ +#define CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H_ + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +class MachineFunction; +class MachineInstr; +class MDNode; +class TargetRegisterInfo; + +// For each user variable, keep a list of instruction ranges where this variable +// is accessible. The variables are listed in order of appearance. +class DbgValueHistoryMap { + // Each instruction range starts with a DBG_VALUE instruction, specifying the + // location of a variable, which is assumed to be valid until the end of the + // range. If end is not specified, location is valid until the start + // instruction of the next instruction range, or until the end of the + // function. + typedef std::pair InstrRange; + typedef SmallVector InstrRanges; + typedef MapVector InstrRangesMap; + InstrRangesMap VarInstrRanges; + +public: + void startInstrRange(const MDNode *Var, const MachineInstr &MI); + void endInstrRange(const MDNode *Var, const MachineInstr &MI); + // Returns register currently describing @Var. If @Var is currently + // unaccessible or is not described by a register, returns 0. + unsigned getRegisterForVar(const MDNode *Var) const; + + bool empty() const { return VarInstrRanges.empty(); } + void clear() { VarInstrRanges.clear(); } + InstrRangesMap::const_iterator begin() const { return VarInstrRanges.begin(); } + InstrRangesMap::const_iterator end() const { return VarInstrRanges.end(); } +}; + +void calculateDbgValueHistory(const MachineFunction *MF, + const TargetRegisterInfo *TRI, + DbgValueHistoryMap &Result); +} + +#endif diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h index 470453f..3beb799 100644 --- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h +++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -23,75 +23,82 @@ class DebugLocEntry { const MCSymbol *Begin; const MCSymbol *End; - // Type of entry that this represents. - enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt }; - enum EntryType EntryKind; +public: + /// A single location or constant. + struct Value { + Value(const MDNode *Var, int64_t i) + : Variable(Var), EntryKind(E_Integer) { + Constant.Int = i; + } + Value(const MDNode *Var, const ConstantFP *CFP) + : Variable(Var), EntryKind(E_ConstantFP) { + Constant.CFP = CFP; + } + Value(const MDNode *Var, const ConstantInt *CIP) + : Variable(Var), EntryKind(E_ConstantInt) { + Constant.CIP = CIP; + } + Value(const MDNode *Var, MachineLocation Loc) + : Variable(Var), EntryKind(E_Location), Loc(Loc) { + } - union { - int64_t Int; - const ConstantFP *CFP; - const ConstantInt *CIP; - } Constants; + // The variable to which this location entry corresponds. + const MDNode *Variable; - // The location in the machine frame. - MachineLocation Loc; + // Type of entry that this represents. + enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt }; + enum EntryType EntryKind; - // The variable to which this location entry corresponds. - const MDNode *Variable; + // Either a constant, + union { + int64_t Int; + const ConstantFP *CFP; + const ConstantInt *CIP; + } Constant; - // The compile unit to which this location entry is referenced by. - const DwarfCompileUnit *Unit; + // Or a location in the machine frame. + MachineLocation Loc; - bool hasSameValueOrLocation(const DebugLocEntry &Next) { - if (EntryKind != Next.EntryKind) - return false; + bool operator==(const Value &other) const { + if (EntryKind != other.EntryKind) + return false; - bool EqualValues; - switch (EntryKind) { - case E_Location: - EqualValues = Loc == Next.Loc; - break; - case E_Integer: - EqualValues = Constants.Int == Next.Constants.Int; - break; - case E_ConstantFP: - EqualValues = Constants.CFP == Next.Constants.CFP; - break; - case E_ConstantInt: - EqualValues = Constants.CIP == Next.Constants.CIP; - break; + switch (EntryKind) { + case E_Location: + return Loc == other.Loc; + case E_Integer: + return Constant.Int == other.Constant.Int; + case E_ConstantFP: + return Constant.CFP == other.Constant.CFP; + case E_ConstantInt: + return Constant.CIP == other.Constant.CIP; + } + llvm_unreachable("unhandled EntryKind"); } - return EqualValues; - } + bool isLocation() const { return EntryKind == E_Location; } + bool isInt() const { return EntryKind == E_Integer; } + bool isConstantFP() const { return EntryKind == E_ConstantFP; } + bool isConstantInt() const { return EntryKind == E_ConstantInt; } + int64_t getInt() const { return Constant.Int; } + const ConstantFP *getConstantFP() const { return Constant.CFP; } + const ConstantInt *getConstantInt() const { return Constant.CIP; } + MachineLocation getLoc() const { return Loc; } + const MDNode *getVariable() const { return Variable; } + }; +private: + /// A list of locations/constants belonging to this entry. + SmallVector Values; + + /// The compile unit that this location entry is referenced by. + const DwarfCompileUnit *Unit; public: - DebugLocEntry() : Begin(0), End(0), Variable(0), Unit(0) { - Constants.Int = 0; - } - DebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L, - const MDNode *V, const DwarfCompileUnit *U) - : Begin(B), End(E), Loc(L), Variable(V), Unit(U) { - Constants.Int = 0; - EntryKind = E_Location; - } - DebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i, - const DwarfCompileUnit *U) - : Begin(B), End(E), Variable(0), Unit(U) { - Constants.Int = i; - EntryKind = E_Integer; - } - DebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr, - const DwarfCompileUnit *U) - : Begin(B), End(E), Variable(0), Unit(U) { - Constants.CFP = FPtr; - EntryKind = E_ConstantFP; - } - DebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr, - const DwarfCompileUnit *U) - : Begin(B), End(E), Variable(0), Unit(U) { - Constants.CIP = IPtr; - EntryKind = E_ConstantInt; + DebugLocEntry() : Begin(nullptr), End(nullptr), Unit(nullptr) {} + DebugLocEntry(const MCSymbol *B, const MCSymbol *E, + Value Val, const DwarfCompileUnit *U) + : Begin(B), End(E), Unit(U) { + Values.push_back(std::move(Val)); } /// \brief Attempt to merge this DebugLocEntry with Next and return @@ -99,24 +106,17 @@ public: /// share the same Loc/Constant and if Next immediately follows this /// Entry. bool Merge(const DebugLocEntry &Next) { - if (End == Next.Begin && hasSameValueOrLocation(Next)) { + if ((End == Next.Begin && Values == Next.Values)) { End = Next.End; return true; } return false; } - bool isLocation() const { return EntryKind == E_Location; } - bool isInt() const { return EntryKind == E_Integer; } - bool isConstantFP() const { return EntryKind == E_ConstantFP; } - bool isConstantInt() const { return EntryKind == E_ConstantInt; } - int64_t getInt() const { return Constants.Int; } - const ConstantFP *getConstantFP() const { return Constants.CFP; } - const ConstantInt *getConstantInt() const { return Constants.CIP; } - const MDNode *getVariable() const { return Variable; } const MCSymbol *getBeginSym() const { return Begin; } const MCSymbol *getEndSym() const { return End; } const DwarfCompileUnit *getCU() const { return Unit; } - MachineLocation getLoc() const { return Loc; } + const ArrayRef getValues() const { return Values; } + void addValue(Value Val) { Values.push_back(Val); } }; } diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index bcbb6c8..e9527c4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -29,14 +29,15 @@ DwarfAccelTable::DwarfAccelTable(ArrayRef atomList) : Header(8 + (atomList.size() * 4)), HeaderData(atomList), Entries(Allocator) {} -DwarfAccelTable::~DwarfAccelTable() {} - -void DwarfAccelTable::AddName(StringRef Name, const DIE *die, char Flags) { +void DwarfAccelTable::AddName(StringRef Name, MCSymbol *StrSym, const DIE *die, + char Flags) { assert(Data.empty() && "Already finalized!"); // If the string is in the list already then add this die to the list // otherwise add a new one. DataArray &DIEs = Entries[Name]; - DIEs.push_back(new (Allocator) HashDataContents(die, Flags)); + assert(!DIEs.StrSym || DIEs.StrSym == StrSym); + DIEs.StrSym = StrSym; + DIEs.Values.push_back(new (Allocator) HashDataContents(die, Flags)); } void DwarfAccelTable::ComputeBucketCount(void) { @@ -72,9 +73,10 @@ void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) { EI != EE; ++EI) { // Unique the entries. - std::stable_sort(EI->second.begin(), EI->second.end(), compareDIEs); - EI->second.erase(std::unique(EI->second.begin(), EI->second.end()), - EI->second.end()); + std::stable_sort(EI->second.Values.begin(), EI->second.Values.end(), compareDIEs); + EI->second.Values.erase( + std::unique(EI->second.Values.begin(), EI->second.Values.end()), + EI->second.Values.end()); HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second); Data.push_back(Entry); @@ -181,21 +183,18 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) { // Remember to emit the label for our offset. Asm->OutStreamer.EmitLabel((*HI)->Sym); Asm->OutStreamer.AddComment((*HI)->Str); - Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str), - D->getStringPoolSym()); + Asm->EmitSectionOffset((*HI)->Data.StrSym, + D->getStringPool().getSectionSymbol()); Asm->OutStreamer.AddComment("Num DIEs"); - Asm->EmitInt32((*HI)->Data.size()); - for (ArrayRef::const_iterator - DI = (*HI)->Data.begin(), - DE = (*HI)->Data.end(); - DI != DE; ++DI) { + Asm->EmitInt32((*HI)->Data.Values.size()); + for (HashDataContents *HD : (*HI)->Data.Values) { // Emit the DIE offset - Asm->EmitInt32((*DI)->Die->getOffset()); + Asm->EmitInt32(HD->Die->getOffset()); // If we have multiple Atoms emit that info too. // FIXME: A bit of a hack, we either emit only one atom or all info. if (HeaderData.Atoms.size() > 1) { - Asm->EmitInt16((*DI)->Die->getTag()); - Asm->EmitInt8((*DI)->Flags); + Asm->EmitInt16(HD->Die->getTag()); + Asm->EmitInt8(HD->Flags); } } // Emit a 0 to terminate the data unless we have a hash collision. @@ -235,10 +234,8 @@ void DwarfAccelTable::print(raw_ostream &O) { EE = Entries.end(); EI != EE; ++EI) { O << "Name: " << EI->getKeyData() << "\n"; - for (DataArray::const_iterator DI = EI->second.begin(), - DE = EI->second.end(); - DI != DE; ++DI) - (*DI)->print(O); + for (HashDataContents *HD : EI->second.Values) + HD->print(O); } O << "Buckets and Hashes: \n"; diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 4a14497..a3cc95f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -18,6 +18,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringMap.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" @@ -125,7 +126,8 @@ public: uint16_t type; // enum AtomType uint16_t form; // DWARF DW_FORM_ defines - Atom(uint16_t type, uint16_t form) : type(type), form(form) {} + LLVM_CONSTEXPR Atom(uint16_t type, uint16_t form) + : type(type), form(form) {} #ifndef NDEBUG void print(raw_ostream &O) { O << "Type: " << dwarf::AtomTypeString(type) << "\n" @@ -177,12 +179,19 @@ public: }; private: + // String Data + struct DataArray { + MCSymbol *StrSym; + std::vector Values; + DataArray() : StrSym(nullptr) {} + }; + friend struct HashData; struct HashData { StringRef Str; uint32_t HashValue; MCSymbol *Sym; - ArrayRef Data; // offsets - HashData(StringRef S, ArrayRef Data) + DwarfAccelTable::DataArray &Data; // offsets + HashData(StringRef S, DwarfAccelTable::DataArray &Data) : Str(S), Data(Data) { HashValue = DwarfAccelTable::HashDJB(S); } @@ -196,10 +205,10 @@ private: else O << ""; O << "\n"; - for (size_t i = 0; i < Data.size(); i++) { - O << " Offset: " << Data[i]->Die->getOffset() << "\n"; - O << " Tag: " << dwarf::TagString(Data[i]->Die->getTag()) << "\n"; - O << " Flags: " << Data[i]->Flags << "\n"; + for (HashDataContents *C : Data.Values) { + O << " Offset: " << C->Die->getOffset() << "\n"; + O << " Tag: " << dwarf::TagString(C->Die->getTag()) << "\n"; + O << " Flags: " << C->Flags << "\n"; } } void dump() { print(dbgs()); } @@ -224,8 +233,6 @@ private: TableHeaderData HeaderData; std::vector Data; - // String Data - typedef std::vector DataArray; typedef StringMap StringEntries; StringEntries Entries; @@ -238,8 +245,8 @@ private: // Public Implementation public: DwarfAccelTable(ArrayRef); - ~DwarfAccelTable(); - void AddName(StringRef, const DIE *, char = 0); + void AddName(StringRef Name, MCSymbol *StrSym, const DIE *Die, + char Flags = 0); void FinalizeTable(AsmPrinter *, StringRef); void Emit(AsmPrinter *, MCSymbol *, DwarfFile *); #ifndef NDEBUG diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 11345eb..2a0615d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -11,12 +11,10 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dwarfdebug" #include "ByteStreamer.h" #include "DwarfDebug.h" #include "DIE.h" #include "DIEHash.h" -#include "DwarfAccelTable.h" #include "DwarfUnit.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" @@ -51,6 +49,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "dwarfdebug" + static cl::opt DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, cl::desc("Disable debug info printing")); @@ -107,8 +107,6 @@ static const char *const DbgTimerName = "DWARF Debug Writer"; //===----------------------------------------------------------------------===// -namespace llvm { - /// resolve - Look in the DwarfDebug map for the MDNode that /// corresponds to the reference. template T DbgVariable::resolve(DIRef Ref) const { @@ -120,7 +118,6 @@ bool DbgVariable::isBlockByrefVariable() const { return Var.isBlockByrefVariable(DD->getTypeIdentifierMap()); } - DIType DbgVariable::getType() const { DIType Ty = Var.getType().resolve(DD->getTypeIdentifierMap()); // FIXME: isBlockByrefVariable should be reformulated in terms of complex @@ -166,29 +163,32 @@ DIType DbgVariable::getType() const { return Ty; } -} // end llvm namespace - -/// Return Dwarf Version by checking module flags. -static unsigned getDwarfVersionFromModule(const Module *M) { - Value *Val = M->getModuleFlag("Dwarf Version"); - if (!Val) - return dwarf::DWARF_VERSION; - return cast(Val)->getZExtValue(); -} +static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = { + DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), + DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), + DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) - : Asm(A), MMI(Asm->MMI), FirstCU(0), PrevLabel(NULL), GlobalRangeCount(0), - InfoHolder(A, "info_string", DIEValueAllocator), + : Asm(A), MMI(Asm->MMI), FirstCU(nullptr), PrevLabel(nullptr), + GlobalRangeCount(0), InfoHolder(A, "info_string", DIEValueAllocator), UsedNonDefaultText(false), - SkeletonHolder(A, "skel_string", DIEValueAllocator) { - - DwarfInfoSectionSym = DwarfAbbrevSectionSym = DwarfStrSectionSym = 0; - DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0; - DwarfAddrSectionSym = 0; - DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0; - FunctionBeginSym = FunctionEndSym = 0; - CurFn = 0; - CurMI = 0; + SkeletonHolder(A, "skel_string", DIEValueAllocator), + AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, + dwarf::DW_FORM_data4)), + AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, + dwarf::DW_FORM_data4)), + AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, + dwarf::DW_FORM_data4)), + AccelTypes(TypeAtoms) { + + DwarfInfoSectionSym = DwarfAbbrevSectionSym = DwarfStrSectionSym = nullptr; + DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = nullptr; + DwarfLineSectionSym = nullptr; + DwarfAddrSectionSym = nullptr; + DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = nullptr; + FunctionBeginSym = FunctionEndSym = nullptr; + CurFn = nullptr; + CurMI = nullptr; // Turn on accelerator tables for Darwin by default, pubnames by // default for non-Darwin, and handle split dwarf. @@ -209,9 +209,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) else HasDwarfPubSections = DwarfPubSections == Enable; - DwarfVersion = DwarfVersionNumber - ? DwarfVersionNumber - : getDwarfVersionFromModule(MMI->getModule()); + DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber + : MMI->getModule()->getDwarfVersion(); { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); @@ -219,76 +218,22 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) } } +// Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h. +DwarfDebug::~DwarfDebug() { } + // Switch to the specified MCSection and emit an assembler // temporary label to it if SymbolStem is specified. static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section, - const char *SymbolStem = 0) { + const char *SymbolStem = nullptr) { Asm->OutStreamer.SwitchSection(Section); if (!SymbolStem) - return 0; + return nullptr; MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem); Asm->OutStreamer.EmitLabel(TmpSym); return TmpSym; } -DwarfFile::~DwarfFile() { - for (DwarfUnit *DU : CUs) - delete DU; -} - -MCSymbol *DwarfFile::getStringPoolSym() { - return Asm->GetTempSymbol(StringPref); -} - -MCSymbol *DwarfFile::getStringPoolEntry(StringRef Str) { - std::pair &Entry = - StringPool.GetOrCreateValue(Str).getValue(); - if (Entry.first) - return Entry.first; - - Entry.second = NextStringPoolNumber++; - return Entry.first = Asm->GetTempSymbol(StringPref, Entry.second); -} - -unsigned DwarfFile::getStringPoolIndex(StringRef Str) { - std::pair &Entry = - StringPool.GetOrCreateValue(Str).getValue(); - if (Entry.first) - return Entry.second; - - Entry.second = NextStringPoolNumber++; - Entry.first = Asm->GetTempSymbol(StringPref, Entry.second); - return Entry.second; -} - -unsigned DwarfFile::getAddrPoolIndex(const MCSymbol *Sym, bool TLS) { - std::pair P = AddressPool.insert( - std::make_pair(Sym, AddressPoolEntry(NextAddrPoolNumber, TLS))); - if (P.second) - ++NextAddrPoolNumber; - return P.first->second.Number; -} - -// Define a unique number for the abbreviation. -// -void DwarfFile::assignAbbrevNumber(DIEAbbrev &Abbrev) { - // Check the set for priors. - DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev); - - // If it's newly added. - if (InSet == &Abbrev) { - // Add to abbreviation list. - Abbreviations.push_back(&Abbrev); - - // Assign the vector position + 1 as its number. - Abbrev.setNumber(Abbreviations.size()); - } else { - // Assign existing abbreviation number. - Abbrev.setNumber(InSet->getNumber()); - } -} - static bool isObjCClass(StringRef Name) { return Name.startswith("+") || Name.startswith("-"); } @@ -328,26 +273,26 @@ static bool SectionSort(const MCSection *A, const MCSection *B) { // TODO: Determine whether or not we should add names for programs // that do not have a DW_AT_name or DW_AT_linkage_name field - this // is only slightly different than the lookup of non-standard ObjC names. -static void addSubprogramNames(DwarfUnit *TheU, DISubprogram SP, DIE *Die) { +void DwarfDebug::addSubprogramNames(DISubprogram SP, DIE &Die) { if (!SP.isDefinition()) return; - TheU->addAccelName(SP.getName(), Die); + addAccelName(SP.getName(), Die); // If the linkage name is different than the name, go ahead and output // that as well into the name table. if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName()) - TheU->addAccelName(SP.getLinkageName(), Die); + addAccelName(SP.getLinkageName(), Die); // If this is an Objective-C selector name add it to the ObjC accelerator // too. if (isObjCClass(SP.getName())) { StringRef Class, Category; getObjCClassCategory(SP.getName(), Class, Category); - TheU->addAccelObjC(Class, Die); + addAccelObjC(Class, Die); if (Category != "") - TheU->addAccelObjC(Category, Die); + addAccelObjC(Category, Die); // Also add the base method name to the name table. - TheU->addAccelName(getObjCMethodName(SP.getName()), Die); + addAccelName(getObjCMethodName(SP.getName()), Die); } } @@ -367,58 +312,21 @@ bool DwarfDebug::isSubprogramContext(const MDNode *Context) { // Find DIE for the given subprogram and attach appropriate DW_AT_low_pc // and DW_AT_high_pc attributes. If there are global variables in this // scope then create and insert DIEs for these variables. -DIE *DwarfDebug::updateSubprogramScopeDIE(DwarfCompileUnit *SPCU, +DIE &DwarfDebug::updateSubprogramScopeDIE(DwarfCompileUnit &SPCU, DISubprogram SP) { - DIE *SPDie = SPCU->getDIE(SP); - - assert(SPDie && "Unable to find subprogram DIE!"); - - // If we're updating an abstract DIE, then we will be adding the children and - // object pointer later on. But what we don't want to do is process the - // concrete DIE twice. - if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) { - // Pick up abstract subprogram DIE. - SPDie = - SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getUnitDie()); - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, AbsSPDIE); - } else { - DISubprogram SPDecl = SP.getFunctionDeclaration(); - if (!SPDecl.isSubprogram()) { - // There is not any need to generate specification DIE for a function - // defined at compile unit level. If a function is defined inside another - // function then gdb prefers the definition at top level and but does not - // expect specification DIE in parent function. So avoid creating - // specification DIE for a function defined inside a function. - DIScope SPContext = resolve(SP.getContext()); - if (SP.isDefinition() && !SPContext.isCompileUnit() && - !SPContext.isFile() && !isSubprogramContext(SPContext)) { - SPCU->addFlag(SPDie, dwarf::DW_AT_declaration); - - // Add arguments. - DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - uint16_t SPTag = SPTy.getTag(); - if (SPTag == dwarf::DW_TAG_subroutine_type) - SPCU->constructSubprogramArguments(*SPDie, Args); - DIE *SPDeclDie = SPDie; - SPDie = SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, - *SPCU->getUnitDie()); - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, SPDeclDie); - } - } - } + DIE *SPDie = SPCU.getOrCreateSubprogramDIE(SP); - attachLowHighPC(SPCU, SPDie, FunctionBeginSym, FunctionEndSym); + attachLowHighPC(SPCU, *SPDie, FunctionBeginSym, FunctionEndSym); const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); MachineLocation Location(RI->getFrameRegister(*Asm->MF)); - SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location); + SPCU.addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_subprogram nodes. - addSubprogramNames(SPCU, SP, SPDie); + addSubprogramNames(SP, *SPDie); - return SPDie; + return *SPDie; } /// Check whether we should create a DIE for the given Scope, return true @@ -442,16 +350,16 @@ bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { return !End; } -static void addSectionLabel(AsmPrinter *Asm, DwarfUnit *U, DIE *D, +static void addSectionLabel(AsmPrinter &Asm, DwarfUnit &U, DIE &D, dwarf::Attribute A, const MCSymbol *L, const MCSymbol *Sec) { - if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - U->addSectionLabel(D, A, L); + if (Asm.MAI->doesDwarfUseRelocationsAcrossSections()) + U.addSectionLabel(D, A, L); else - U->addSectionDelta(D, A, L, Sec); + U.addSectionDelta(D, A, L, Sec); } -void DwarfDebug::addScopeRangeList(DwarfCompileUnit *TheCU, DIE *ScopeDIE, +void DwarfDebug::addScopeRangeList(DwarfCompileUnit &TheCU, DIE &ScopeDIE, const SmallVectorImpl &Range) { // Emit offset in .debug_range as a relocatable label. emitDIE will handle // emitting it appropriately. @@ -460,10 +368,10 @@ void DwarfDebug::addScopeRangeList(DwarfCompileUnit *TheCU, DIE *ScopeDIE, // Under fission, ranges are specified by constant offsets relative to the // CU's DW_AT_GNU_ranges_base. if (useSplitDwarf()) - TheCU->addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, RangeSym, - DwarfDebugRangeSectionSym); + TheCU.addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, RangeSym, + DwarfDebugRangeSectionSym); else - addSectionLabel(Asm, TheCU, ScopeDIE, dwarf::DW_AT_ranges, RangeSym, + addSectionLabel(*Asm, TheCU, ScopeDIE, dwarf::DW_AT_ranges, RangeSym, DwarfDebugRangeSectionSym); RangeSpanList List(RangeSym); @@ -473,227 +381,256 @@ void DwarfDebug::addScopeRangeList(DwarfCompileUnit *TheCU, DIE *ScopeDIE, } // Add the range list to the set of ranges to be emitted. - TheCU->addRangeList(std::move(List)); + TheCU.addRangeList(std::move(List)); +} + +void DwarfDebug::attachRangesOrLowHighPC(DwarfCompileUnit &TheCU, DIE &Die, + const SmallVectorImpl &Ranges) { + assert(!Ranges.empty()); + if (Ranges.size() == 1) + attachLowHighPC(TheCU, Die, getLabelBeforeInsn(Ranges.front().first), + getLabelAfterInsn(Ranges.front().second)); + else + addScopeRangeList(TheCU, Die, Ranges); } // Construct new DW_TAG_lexical_block for this scope and attach // DW_AT_low_pc/DW_AT_high_pc labels. -DIE *DwarfDebug::constructLexicalScopeDIE(DwarfCompileUnit *TheCU, - LexicalScope *Scope) { +std::unique_ptr +DwarfDebug::constructLexicalScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope) { if (isLexicalScopeDIENull(Scope)) - return 0; + return nullptr; - DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block); + auto ScopeDIE = make_unique(dwarf::DW_TAG_lexical_block); if (Scope->isAbstractScope()) return ScopeDIE; - const SmallVectorImpl &ScopeRanges = Scope->getRanges(); - - // If we have multiple ranges, emit them into the range section. - if (ScopeRanges.size() > 1) { - addScopeRangeList(TheCU, ScopeDIE, ScopeRanges); - return ScopeDIE; - } - - // Construct the address range for this DIE. - SmallVectorImpl::const_iterator RI = ScopeRanges.begin(); - MCSymbol *Start = getLabelBeforeInsn(RI->first); - MCSymbol *End = getLabelAfterInsn(RI->second); - assert(End && "End label should not be null!"); - - assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); - assert(End->isDefined() && "Invalid end label for an inlined scope!"); - - attachLowHighPC(TheCU, ScopeDIE, Start, End); + attachRangesOrLowHighPC(TheCU, *ScopeDIE, Scope->getRanges()); return ScopeDIE; } // This scope represents inlined body of a function. Construct DIE to // represent this concrete inlined copy of the function. -DIE *DwarfDebug::constructInlinedScopeDIE(DwarfCompileUnit *TheCU, - LexicalScope *Scope) { - const SmallVectorImpl &ScopeRanges = Scope->getRanges(); - assert(!ScopeRanges.empty() && - "LexicalScope does not have instruction markers!"); - - if (!Scope->getScopeNode()) - return NULL; +std::unique_ptr +DwarfDebug::constructInlinedScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope) { + assert(Scope->getScopeNode()); DIScope DS(Scope->getScopeNode()); DISubprogram InlinedSP = getDISubprogram(DS); - DIE *OriginDIE = TheCU->getDIE(InlinedSP); - if (!OriginDIE) { - DEBUG(dbgs() << "Unable to find original DIE for an inlined subprogram."); - return NULL; - } - - DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); - TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, OriginDIE); + // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram + // was inlined from another compile unit. + DIE *OriginDIE = AbstractSPDies[InlinedSP]; + assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); - // If we have multiple ranges, emit them into the range section. - if (ScopeRanges.size() > 1) - addScopeRangeList(TheCU, ScopeDIE, ScopeRanges); - else { - SmallVectorImpl::const_iterator RI = ScopeRanges.begin(); - MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); - MCSymbol *EndLabel = getLabelAfterInsn(RI->second); + auto ScopeDIE = make_unique(dwarf::DW_TAG_inlined_subroutine); + TheCU.addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE); - if (StartLabel == 0 || EndLabel == 0) - llvm_unreachable("Unexpected Start and End labels for an inlined scope!"); - - assert(StartLabel->isDefined() && - "Invalid starting label for an inlined scope!"); - assert(EndLabel->isDefined() && "Invalid end label for an inlined scope!"); - - attachLowHighPC(TheCU, ScopeDIE, StartLabel, EndLabel); - } + attachRangesOrLowHighPC(TheCU, *ScopeDIE, Scope->getRanges()); InlinedSubprogramDIEs.insert(OriginDIE); // Add the call site information to the DIE. DILocation DL(Scope->getInlinedAt()); - TheCU->addUInt( - ScopeDIE, dwarf::DW_AT_call_file, None, - TheCU->getOrCreateSourceID(DL.getFilename(), DL.getDirectory())); - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber()); + TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, + TheCU.getOrCreateSourceID(DL.getFilename(), DL.getDirectory())); + TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber()); // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_inlined_subprogram nodes. - addSubprogramNames(TheCU, InlinedSP, ScopeDIE); + addSubprogramNames(InlinedSP, *ScopeDIE); return ScopeDIE; } -DIE *DwarfDebug::createScopeChildrenDIE(DwarfCompileUnit *TheCU, - LexicalScope *Scope, - SmallVectorImpl &Children) { - DIE *ObjectPointer = NULL; +static std::unique_ptr constructVariableDIE(DwarfCompileUnit &TheCU, + DbgVariable &DV, + const LexicalScope &Scope, + DIE *&ObjectPointer) { + auto Var = TheCU.constructVariableDIE(DV, Scope.isAbstractScope()); + if (DV.isObjectPointer()) + ObjectPointer = Var.get(); + return Var; +} + +DIE *DwarfDebug::createScopeChildrenDIE( + DwarfCompileUnit &TheCU, LexicalScope *Scope, + SmallVectorImpl> &Children) { + DIE *ObjectPointer = nullptr; // Collect arguments for current function. if (LScopes.isCurrentFunctionScope(Scope)) { for (DbgVariable *ArgDV : CurrentFnArguments) if (ArgDV) - if (DIE *Arg = - TheCU->constructVariableDIE(*ArgDV, Scope->isAbstractScope())) { - Children.push_back(Arg); - if (ArgDV->isObjectPointer()) - ObjectPointer = Arg; - } + Children.push_back( + constructVariableDIE(TheCU, *ArgDV, *Scope, ObjectPointer)); // If this is a variadic function, add an unspecified parameter. DISubprogram SP(Scope->getScopeNode()); DIArray FnArgs = SP.getType().getTypeArray(); if (FnArgs.getElement(FnArgs.getNumElements() - 1) .isUnspecifiedParameter()) { - DIE *Ellipsis = new DIE(dwarf::DW_TAG_unspecified_parameters); - Children.push_back(Ellipsis); + Children.push_back( + make_unique(dwarf::DW_TAG_unspecified_parameters)); } } // Collect lexical scope children first. for (DbgVariable *DV : ScopeVariables.lookup(Scope)) - if (DIE *Variable = TheCU->constructVariableDIE(*DV, - Scope->isAbstractScope())) { - Children.push_back(Variable); - if (DV->isObjectPointer()) - ObjectPointer = Variable; - } + Children.push_back(constructVariableDIE(TheCU, *DV, *Scope, ObjectPointer)); + for (LexicalScope *LS : Scope->getChildren()) - if (DIE *Nested = constructScopeDIE(TheCU, LS)) - Children.push_back(Nested); + if (std::unique_ptr Nested = constructScopeDIE(TheCU, LS)) + Children.push_back(std::move(Nested)); return ObjectPointer; } +void DwarfDebug::createAndAddScopeChildren(DwarfCompileUnit &TheCU, + LexicalScope *Scope, DIE &ScopeDIE) { + // We create children when the scope DIE is not null. + SmallVector, 8> Children; + if (DIE *ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children)) + TheCU.addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer); + + // Add children + for (auto &I : Children) + ScopeDIE.addChild(std::move(I)); +} + +void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope) { + assert(Scope && Scope->getScopeNode()); + assert(Scope->isAbstractScope()); + assert(!Scope->getInlinedAt()); + + DISubprogram SP(Scope->getScopeNode()); + + ProcessedSPNodes.insert(SP); + + DIE *&AbsDef = AbstractSPDies[SP]; + if (AbsDef) + return; + + // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram + // was inlined from another compile unit. + DwarfCompileUnit &SPCU = *SPMap[SP]; + DIE *ContextDIE; + + // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with + // the important distinction that the DIDescriptor is not associated with the + // DIE (since the DIDescriptor will be associated with the concrete DIE, if + // any). It could be refactored to some common utility function. + if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { + ContextDIE = &SPCU.getUnitDie(); + SPCU.getOrCreateSubprogramDIE(SPDecl); + } else + ContextDIE = SPCU.getOrCreateContextDIE(resolve(SP.getContext())); + + // Passing null as the associated DIDescriptor because the abstract definition + // shouldn't be found by lookup. + AbsDef = &SPCU.createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, + DIDescriptor()); + SPCU.applySubprogramAttributes(SP, *AbsDef); + SPCU.addGlobalName(SP.getName(), *AbsDef, resolve(SP.getContext())); + + SPCU.addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); + createAndAddScopeChildren(SPCU, Scope, *AbsDef); +} + +DIE &DwarfDebug::constructSubprogramScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope) { + assert(Scope && Scope->getScopeNode()); + assert(!Scope->getInlinedAt()); + assert(!Scope->isAbstractScope()); + DISubprogram Sub(Scope->getScopeNode()); + + assert(Sub.isSubprogram()); + + ProcessedSPNodes.insert(Sub); + + DIE &ScopeDIE = updateSubprogramScopeDIE(TheCU, Sub); + + createAndAddScopeChildren(TheCU, Scope, ScopeDIE); + + return ScopeDIE; +} + // Construct a DIE for this scope. -DIE *DwarfDebug::constructScopeDIE(DwarfCompileUnit *TheCU, - LexicalScope *Scope) { +std::unique_ptr DwarfDebug::constructScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope) { if (!Scope || !Scope->getScopeNode()) - return NULL; + return nullptr; DIScope DS(Scope->getScopeNode()); - SmallVector Children; - DIE *ObjectPointer = NULL; - bool ChildrenCreated = false; + assert((Scope->getInlinedAt() || !DS.isSubprogram()) && + "Only handle inlined subprograms here, use " + "constructSubprogramScopeDIE for non-inlined " + "subprograms"); + + SmallVector, 8> Children; // We try to create the scope DIE first, then the children DIEs. This will // avoid creating un-used children then removing them later when we find out // the scope DIE is null. - DIE *ScopeDIE = NULL; - if (Scope->getInlinedAt()) + std::unique_ptr ScopeDIE; + if (Scope->getParent() && DS.isSubprogram()) { ScopeDIE = constructInlinedScopeDIE(TheCU, Scope); - else if (DS.isSubprogram()) { - ProcessedSPNodes.insert(DS); - if (Scope->isAbstractScope()) { - ScopeDIE = TheCU->getDIE(DS); - // Note down abstract DIE. - if (ScopeDIE) - AbstractSPDies.insert(std::make_pair(DS, ScopeDIE)); - } else - ScopeDIE = updateSubprogramScopeDIE(TheCU, DISubprogram(DS)); + if (!ScopeDIE) + return nullptr; + // We create children when the scope DIE is not null. + createScopeChildrenDIE(TheCU, Scope, Children); } else { // Early exit when we know the scope DIE is going to be null. if (isLexicalScopeDIENull(Scope)) - return NULL; + return nullptr; // We create children here when we know the scope DIE is not going to be // null and the children will be added to the scope DIE. - ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children); - ChildrenCreated = true; + createScopeChildrenDIE(TheCU, Scope, Children); // There is no need to emit empty lexical block DIE. std::pair Range = - std::equal_range( - ScopesWithImportedEntities.begin(), - ScopesWithImportedEntities.end(), - std::pair(DS, (const MDNode *)0), - less_first()); + std::equal_range(ScopesWithImportedEntities.begin(), + ScopesWithImportedEntities.end(), + std::pair(DS, nullptr), + less_first()); if (Children.empty() && Range.first == Range.second) - return NULL; + return nullptr; ScopeDIE = constructLexicalScopeDIE(TheCU, Scope); assert(ScopeDIE && "Scope DIE should not be null."); for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; ++i) - constructImportedEntityDIE(TheCU, i->second, ScopeDIE); + constructImportedEntityDIE(TheCU, i->second, *ScopeDIE); } - if (!ScopeDIE) { - assert(Children.empty() && - "We create children only when the scope DIE is not null."); - return NULL; - } - if (!ChildrenCreated) - // We create children when the scope DIE is not null. - ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children); - // Add children - for (DIE *I : Children) - ScopeDIE->addChild(I); - - if (DS.isSubprogram() && ObjectPointer != NULL) - TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, ObjectPointer); + for (auto &I : Children) + ScopeDIE->addChild(std::move(I)); return ScopeDIE; } -void DwarfDebug::addGnuPubAttributes(DwarfUnit *U, DIE *D) const { +void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const { if (!GenerateGnuPubSections) return; - U->addFlag(D, dwarf::DW_AT_GNU_pubnames); + U.addFlag(D, dwarf::DW_AT_GNU_pubnames); } // Create new DwarfCompileUnit for the given metadata node with tag // DW_TAG_compile_unit. -DwarfCompileUnit *DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { +DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { StringRef FN = DIUnit.getFilename(); CompilationDir = DIUnit.getDirectory(); - DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - DwarfCompileUnit *NewCU = new DwarfCompileUnit( - InfoHolder.getUnits().size(), Die, DIUnit, Asm, this, &InfoHolder); - InfoHolder.addUnit(NewCU); + auto OwnedUnit = make_unique( + InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder); + DwarfCompileUnit &NewCU = *OwnedUnit; + DIE &Die = NewCU.getUnitDie(); + InfoHolder.addUnit(std::move(OwnedUnit)); // LTO with assembly output shares a single line table amongst multiple CUs. // To avoid the compilation directory being ambiguous, let the line table @@ -701,116 +638,89 @@ DwarfCompileUnit *DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) { // compilation directory. if (!Asm->OutStreamer.hasRawTextSupport() || SingleCU) Asm->OutStreamer.getContext().setMCLineTableCompilationDir( - NewCU->getUniqueID(), CompilationDir); + NewCU.getUniqueID(), CompilationDir); - NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer()); - NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, - DIUnit.getLanguage()); - NewCU->addString(Die, dwarf::DW_AT_name, FN); + NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer()); + NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, + DIUnit.getLanguage()); + NewCU.addString(Die, dwarf::DW_AT_name, FN); if (!useSplitDwarf()) { - NewCU->initStmtList(DwarfLineSectionSym); + NewCU.initStmtList(DwarfLineSectionSym); // If we're using split dwarf the compilation dir is going to be in the // skeleton CU and so we don't need to duplicate it here. if (!CompilationDir.empty()) - NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + NewCU.addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); addGnuPubAttributes(NewCU, Die); } if (DIUnit.isOptimized()) - NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized); + NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized); StringRef Flags = DIUnit.getFlags(); if (!Flags.empty()) - NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, Flags); + NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags); if (unsigned RVer = DIUnit.getRunTimeVersion()) - NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, - dwarf::DW_FORM_data1, RVer); + NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, + dwarf::DW_FORM_data1, RVer); if (!FirstCU) - FirstCU = NewCU; + FirstCU = &NewCU; if (useSplitDwarf()) { - NewCU->initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(), - DwarfInfoDWOSectionSym); - NewCU->setSkeleton(constructSkeletonCU(NewCU)); + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(), + DwarfInfoDWOSectionSym); + NewCU.setSkeleton(constructSkeletonCU(NewCU)); } else - NewCU->initSection(Asm->getObjFileLowering().getDwarfInfoSection(), - DwarfInfoSectionSym); + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(), + DwarfInfoSectionSym); - CUMap.insert(std::make_pair(DIUnit, NewCU)); - CUDieMap.insert(std::make_pair(Die, NewCU)); + CUMap.insert(std::make_pair(DIUnit, &NewCU)); + CUDieMap.insert(std::make_pair(&Die, &NewCU)); return NewCU; } -// Construct subprogram DIE. -void DwarfDebug::constructSubprogramDIE(DwarfCompileUnit *TheCU, - const MDNode *N) { - // FIXME: We should only call this routine once, however, during LTO if a - // program is defined in multiple CUs we could end up calling it out of - // beginModule as we walk the CUs. - - DwarfCompileUnit *&CURef = SPMap[N]; - if (CURef) - return; - CURef = TheCU; - - DISubprogram SP(N); - if (!SP.isDefinition()) - // This is a method declaration which will be handled while constructing - // class type. - return; - - DIE *SubprogramDie = TheCU->getOrCreateSubprogramDIE(SP); - - // Expose as a global name. - TheCU->addGlobalName(SP.getName(), SubprogramDie, resolve(SP.getContext())); -} - -void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit *TheCU, +void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N) { DIImportedEntity Module(N); assert(Module.Verify()); - if (DIE *D = TheCU->getOrCreateContextDIE(Module.getContext())) - constructImportedEntityDIE(TheCU, Module, D); + if (DIE *D = TheCU.getOrCreateContextDIE(Module.getContext())) + constructImportedEntityDIE(TheCU, Module, *D); } -void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit *TheCU, - const MDNode *N, DIE *Context) { +void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU, + const MDNode *N, DIE &Context) { DIImportedEntity Module(N); assert(Module.Verify()); return constructImportedEntityDIE(TheCU, Module, Context); } -void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit *TheCU, +void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU, const DIImportedEntity &Module, - DIE *Context) { + DIE &Context) { assert(Module.Verify() && "Use one of the MDNode * overloads to handle invalid metadata"); - assert(Context && "Should always have a context for an imported_module"); - DIE *IMDie = new DIE(Module.getTag()); - TheCU->insertDIE(Module, IMDie); + DIE &IMDie = TheCU.createAndAddDIE(Module.getTag(), Context, Module); DIE *EntityDie; DIDescriptor Entity = resolve(Module.getEntity()); if (Entity.isNameSpace()) - EntityDie = TheCU->getOrCreateNameSpace(DINameSpace(Entity)); + EntityDie = TheCU.getOrCreateNameSpace(DINameSpace(Entity)); else if (Entity.isSubprogram()) - EntityDie = TheCU->getOrCreateSubprogramDIE(DISubprogram(Entity)); + EntityDie = TheCU.getOrCreateSubprogramDIE(DISubprogram(Entity)); else if (Entity.isType()) - EntityDie = TheCU->getOrCreateTypeDIE(DIType(Entity)); + EntityDie = TheCU.getOrCreateTypeDIE(DIType(Entity)); else - EntityDie = TheCU->getDIE(Entity); - TheCU->addSourceLine(IMDie, Module.getLineNumber(), - Module.getContext().getFilename(), - Module.getContext().getDirectory()); - TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, EntityDie); + EntityDie = TheCU.getDIE(Entity); + TheCU.addSourceLine(IMDie, Module.getLineNumber(), + Module.getContext().getFilename(), + Module.getContext().getDirectory()); + TheCU.addDIEEntry(IMDie, dwarf::DW_AT_import, *EntityDie); StringRef Name = Module.getName(); if (!Name.empty()) - TheCU->addString(IMDie, dwarf::DW_AT_name, Name); - Context->addChild(IMDie); + TheCU.addString(IMDie, dwarf::DW_AT_name, Name); } // Emit all Dwarf sections that should come prior to the content. Create @@ -836,7 +746,7 @@ void DwarfDebug::beginModule() { for (MDNode *N : CU_Nodes->operands()) { DICompileUnit CUNode(N); - DwarfCompileUnit *CU = constructDwarfCompileUnit(CUNode); + DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode); DIArray ImportedEntities = CUNode.getImportedEntities(); for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) ScopesWithImportedEntities.push_back(std::make_pair( @@ -846,20 +756,20 @@ void DwarfDebug::beginModule() { ScopesWithImportedEntities.end(), less_first()); DIArray GVs = CUNode.getGlobalVariables(); for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) - CU->createGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i))); + CU.createGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i))); DIArray SPs = CUNode.getSubprograms(); for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) - constructSubprogramDIE(CU, SPs.getElement(i)); + SPMap.insert(std::make_pair(SPs.getElement(i), &CU)); DIArray EnumTypes = CUNode.getEnumTypes(); for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) - CU->getOrCreateTypeDIE(EnumTypes.getElement(i)); + CU.getOrCreateTypeDIE(EnumTypes.getElement(i)); DIArray RetainedTypes = CUNode.getRetainedTypes(); for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) { DIType Ty(RetainedTypes.getElement(i)); // The retained types array by design contains pointers to // MDNodes rather than DIRefs. Unique them here. DIType UniqueTy(resolve(Ty.getRef())); - CU->getOrCreateTypeDIE(UniqueTy); + CU.getOrCreateTypeDIE(UniqueTy); } // Emit imported_modules last so that the relevant context is already // available. @@ -874,20 +784,41 @@ void DwarfDebug::beginModule() { SectionMap[Asm->getObjFileLowering().getTextSection()]; } -// Attach DW_AT_inline attribute with inlined subprogram DIEs. -void DwarfDebug::computeInlinedDIEs() { - // Attach DW_AT_inline attribute with inlined subprogram DIEs. - for (DIE *ISP : InlinedSubprogramDIEs) - FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); +void DwarfDebug::finishSubprogramDefinitions() { + const Module *M = MMI->getModule(); - for (const auto &AI : AbstractSPDies) { - DIE *ISP = AI.second; - if (InlinedSubprogramDIEs.count(ISP)) - continue; - FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); + NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); + for (MDNode *N : CU_Nodes->operands()) { + DICompileUnit TheCU(N); + // Construct subprogram DIE and add variables DIEs. + DwarfCompileUnit *SPCU = + static_cast(CUMap.lookup(TheCU)); + DIArray Subprograms = TheCU.getSubprograms(); + for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { + DISubprogram SP(Subprograms.getElement(i)); + // Perhaps the subprogram is in another CU (such as due to comdat + // folding, etc), in which case ignore it here. + if (SPMap[SP] != SPCU) + continue; + DIE *D = SPCU->getDIE(SP); + if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) { + if (D) + // If this subprogram has an abstract definition, reference that + SPCU->addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE); + } else { + if (!D) + // Lazily construct the subprogram if we didn't see either concrete or + // inlined versions during codegen. + D = SPCU->getOrCreateSubprogramDIE(SP); + // And attach the attributes + SPCU->applySubprogramAttributes(SP, *D); + SPCU->addGlobalName(SP.getName(), *D, resolve(SP.getContext())); + } + } } } + // Collect info for variables that were optimized out. void DwarfDebug::collectDeadVariables() { const Module *M = MMI->getModule(); @@ -895,34 +826,32 @@ void DwarfDebug::collectDeadVariables() { if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) { for (MDNode *N : CU_Nodes->operands()) { DICompileUnit TheCU(N); + // Construct subprogram DIE and add variables DIEs. + DwarfCompileUnit *SPCU = + static_cast(CUMap.lookup(TheCU)); + assert(SPCU && "Unable to find Compile Unit!"); DIArray Subprograms = TheCU.getSubprograms(); for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { DISubprogram SP(Subprograms.getElement(i)); if (ProcessedSPNodes.count(SP) != 0) continue; - if (!SP.isSubprogram()) - continue; - if (!SP.isDefinition()) - continue; + assert(SP.isSubprogram() && + "CU's subprogram list contains a non-subprogram"); + assert(SP.isDefinition() && + "CU's subprogram list contains a subprogram declaration"); DIArray Variables = SP.getVariables(); if (Variables.getNumElements() == 0) continue; - // Construct subprogram DIE and add variables DIEs. - DwarfCompileUnit *SPCU = - static_cast(CUMap.lookup(TheCU)); - assert(SPCU && "Unable to find Compile Unit!"); - // FIXME: See the comment in constructSubprogramDIE about duplicate - // subprogram DIEs. - constructSubprogramDIE(SPCU, SP); - DIE *SPDIE = SPCU->getDIE(SP); + DIE *SPDIE = AbstractSPDies.lookup(SP); + if (!SPDIE) + SPDIE = SPCU->getDIE(SP); + assert(SPDIE); for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { DIVariable DV(Variables.getElement(vi)); - if (!DV.isVariable()) - continue; - DbgVariable NewVar(DV, NULL, this); - if (DIE *VariableDIE = SPCU->constructVariableDIE(NewVar, false)) - SPDIE->addChild(VariableDIE); + assert(DV.isVariable()); + DbgVariable NewVar(DV, nullptr, this); + SPDIE->addChild(SPCU->constructVariableDIE(NewVar)); } } } @@ -930,28 +859,27 @@ void DwarfDebug::collectDeadVariables() { } void DwarfDebug::finalizeModuleInfo() { + finishSubprogramDefinitions(); + // Collect info for variables that were optimized out. collectDeadVariables(); - // Attach DW_AT_inline attribute with inlined subprogram DIEs. - computeInlinedDIEs(); - // Handle anything that needs to be done on a per-unit basis after // all other generation. - for (DwarfUnit *TheU : getUnits()) { + for (const auto &TheU : getUnits()) { // Emit DW_AT_containing_type attribute to connect types with their // vtable holding type. TheU->constructContainingTypeDIEs(); // Add CU specific attributes if we need to add any. - if (TheU->getUnitDie()->getTag() == dwarf::DW_TAG_compile_unit) { + if (TheU->getUnitDie().getTag() == dwarf::DW_TAG_compile_unit) { // If we're splitting the dwarf out now that we've got the entire // CU then add the dwo id to it. DwarfCompileUnit *SkCU = static_cast(TheU->getSkeleton()); if (useSplitDwarf()) { // Emit a unique identifier for this CU. - uint64_t ID = DIEHash(Asm).computeCUSignature(*TheU->getUnitDie()); + uint64_t ID = DIEHash(Asm).computeCUSignature(TheU->getUnitDie()); TheU->addUInt(TheU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, ID); SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, @@ -959,12 +887,12 @@ void DwarfDebug::finalizeModuleInfo() { // We don't keep track of which addresses are used in which CU so this // is a bit pessimistic under LTO. - if (!InfoHolder.getAddrPool()->empty()) - addSectionLabel(Asm, SkCU, SkCU->getUnitDie(), + if (!AddrPool.isEmpty()) + addSectionLabel(*Asm, *SkCU, SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base, DwarfAddrSectionSym, DwarfAddrSectionSym); if (!TheU->getRangeLists().empty()) - addSectionLabel(Asm, SkCU, SkCU->getUnitDie(), + addSectionLabel(*Asm, *SkCU, SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base, DwarfDebugRangeSectionSym, DwarfDebugRangeSectionSym); } @@ -975,26 +903,27 @@ void DwarfDebug::finalizeModuleInfo() { // FIXME: We should use ranges allow reordering of code ala // .subsections_via_symbols in mach-o. This would mean turning on // ranges for all subprogram DIEs for mach-o. - DwarfCompileUnit *U = SkCU ? SkCU : static_cast(TheU); + DwarfCompileUnit &U = + SkCU ? *SkCU : static_cast(*TheU); unsigned NumRanges = TheU->getRanges().size(); if (NumRanges) { if (NumRanges > 1) { - addSectionLabel(Asm, U, U->getUnitDie(), dwarf::DW_AT_ranges, - Asm->GetTempSymbol("cu_ranges", U->getUniqueID()), + addSectionLabel(*Asm, U, U.getUnitDie(), dwarf::DW_AT_ranges, + Asm->GetTempSymbol("cu_ranges", U.getUniqueID()), DwarfDebugRangeSectionSym); // A DW_AT_low_pc attribute may also be specified in combination with // DW_AT_ranges to specify the default base address for use in // location lists (see Section 2.6.2) and range lists (see Section // 2.17.3). - U->addUInt(U->getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - 0); + U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + 0); } else { RangeSpan &Range = TheU->getRanges().back(); - U->addLocalLabelAddress(U->getUnitDie(), dwarf::DW_AT_low_pc, - Range.getStart()); - U->addLabelDelta(U->getUnitDie(), dwarf::DW_AT_high_pc, - Range.getEnd(), Range.getStart()); + U.addLocalLabelAddress(U.getUnitDie(), dwarf::DW_AT_low_pc, + Range.getStart()); + U.addLabelDelta(U.getUnitDie(), dwarf::DW_AT_high_pc, Range.getEnd(), + Range.getStart()); } } } @@ -1018,7 +947,7 @@ void DwarfDebug::endSections() { // Some symbols (e.g. common/bss on mach-o) can have no section but still // appear in the output. This sucks as we rely on sections to build // arange spans. We can do it without, but it's icky. - SectionMap[NULL].push_back(SCU); + SectionMap[nullptr].push_back(SCU); } } @@ -1036,7 +965,7 @@ void DwarfDebug::endSections() { // Add terminating symbols for each section. for (unsigned ID = 0, E = Sections.size(); ID != E; ID++) { const MCSection *Section = Sections[ID]; - MCSymbol *Sym = NULL; + MCSymbol *Sym = nullptr; if (Section) { // We can't call MCSection::getLabelEndName, as it's only safe to do so @@ -1049,14 +978,14 @@ void DwarfDebug::endSections() { } // Insert a final terminator. - SectionMap[Section].push_back(SymbolCU(NULL, Sym)); + SectionMap[Section].push_back(SymbolCU(nullptr, Sym)); } } // Emit all Dwarf sections that should come after the content. void DwarfDebug::endModule() { - assert(CurFn == 0); - assert(CurMI == 0); + assert(CurFn == nullptr); + assert(CurMI == nullptr); if (!FirstCU) return; @@ -1089,7 +1018,7 @@ void DwarfDebug::endModule() { emitDebugAbbrevDWO(); emitDebugLineDWO(); // Emit DWO addresses. - InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection()); + AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection()); emitDebugLocDWO(); } else // Emit info into a debug loc section. @@ -1111,29 +1040,34 @@ void DwarfDebug::endModule() { // clean up. SPMap.clear(); + AbstractVariables.clear(); // Reset these for the next Module if we have one. - FirstCU = NULL; + FirstCU = nullptr; } // Find abstract variable, if any, associated with Var. DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, DebugLoc ScopeLoc) { + return findAbstractVariable(DV, ScopeLoc.getScope(DV->getContext())); +} + +DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, + const MDNode *ScopeNode) { LLVMContext &Ctx = DV->getContext(); // More then one inlined variable corresponds to one abstract variable. DIVariable Var = cleanseInlinedVariable(DV, Ctx); - DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var); - if (AbsDbgVariable) - return AbsDbgVariable; + auto I = AbstractVariables.find(Var); + if (I != AbstractVariables.end()) + return I->second.get(); - LexicalScope *Scope = LScopes.findAbstractScope(ScopeLoc.getScope(Ctx)); + LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode); if (!Scope) - return NULL; + return nullptr; - AbsDbgVariable = new DbgVariable(Var, NULL, this); - addScopeVariable(Scope, AbsDbgVariable); - AbstractVariables[Var] = AbsDbgVariable; - return AbsDbgVariable; + auto AbsDbgVariable = make_unique(Var, nullptr, this); + addScopeVariable(Scope, AbsDbgVariable.get()); + return (AbstractVariables[Var] = std::move(AbsDbgVariable)).get(); } // If Var is a current function argument then add it to CurrentFnArguments list. @@ -1169,7 +1103,7 @@ void DwarfDebug::collectVariableInfoFromMMITable( LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc); // If variable scope is not found then skip this variable. - if (Scope == 0) + if (!Scope) continue; DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VI.Loc); @@ -1177,28 +1111,12 @@ void DwarfDebug::collectVariableInfoFromMMITable( RegVar->setFrameIndex(VI.Slot); if (!addCurrentFnArgument(RegVar, Scope)) addScopeVariable(Scope, RegVar); - if (AbsDbgVariable) - AbsDbgVariable->setFrameIndex(VI.Slot); } } -// Return true if debug value, encoded by DBG_VALUE instruction, is in a -// defined reg. -static bool isDbgValueInDefinedReg(const MachineInstr *MI) { - assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); - return MI->getNumOperands() == 3 && MI->getOperand(0).isReg() && - MI->getOperand(0).getReg() && - (MI->getOperand(1).isImm() || - (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == 0U)); -} - // Get .debug_loc entry for the instruction range starting at MI. -static DebugLocEntry getDebugLocEntry(AsmPrinter *Asm, - const MCSymbol *FLabel, - const MCSymbol *SLabel, - const MachineInstr *MI, - DwarfCompileUnit *Unit) { - const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata(); +static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) { + const MDNode *Var = MI->getDebugVariable(); assert(MI->getNumOperands() == 3); if (MI->getOperand(0).isReg()) { @@ -1209,14 +1127,14 @@ static DebugLocEntry getDebugLocEntry(AsmPrinter *Asm, MLoc.set(MI->getOperand(0).getReg()); else MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); - return DebugLocEntry(FLabel, SLabel, MLoc, Var, Unit); + return DebugLocEntry::Value(Var, MLoc); } if (MI->getOperand(0).isImm()) - return DebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm(), Unit); + return DebugLocEntry::Value(Var, MI->getOperand(0).getImm()); if (MI->getOperand(0).isFPImm()) - return DebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm(), Unit); + return DebugLocEntry::Value(Var, MI->getOperand(0).getFPImm()); if (MI->getOperand(0).isCImm()) - return DebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm(), Unit); + return DebugLocEntry::Value(Var, MI->getOperand(0).getCImm()); llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!"); } @@ -1224,35 +1142,38 @@ static DebugLocEntry getDebugLocEntry(AsmPrinter *Asm, // Find variables for each lexical scope. void DwarfDebug::collectVariableInfo(SmallPtrSet &Processed) { + LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); // Grab the variable info that was squirreled away in the MMI side-table. collectVariableInfoFromMMITable(Processed); - for (const MDNode *Var : UserVariables) { - if (Processed.count(Var)) + for (const auto &I : DbgValues) { + DIVariable DV(I.first); + if (Processed.count(DV)) continue; - // History contains relevant DBG_VALUE instructions for Var and instructions - // clobbering it. - SmallVectorImpl &History = DbgValues[Var]; - if (History.empty()) + // Instruction ranges, specifying where DV is accessible. + const auto &Ranges = I.second; + if (Ranges.empty()) continue; - const MachineInstr *MInsn = History.front(); - DIVariable DV(Var); - LexicalScope *Scope = NULL; + LexicalScope *Scope = nullptr; if (DV.getTag() == dwarf::DW_TAG_arg_variable && DISubprogram(DV.getContext()).describes(CurFn->getFunction())) Scope = LScopes.getCurrentFunctionScope(); - else if (MDNode *IA = DV.getInlinedAt()) - Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA)); - else - Scope = LScopes.findLexicalScope(cast(DV->getOperand(1))); + else if (MDNode *IA = DV.getInlinedAt()) { + DebugLoc DL = DebugLoc::getFromDILocation(IA); + Scope = LScopes.findInlinedScope(DebugLoc::get( + DL.getLine(), DL.getCol(), DV.getContext(), IA)); + } else + Scope = LScopes.findLexicalScope(DV.getContext()); // If variable scope is not found then skip this variable. if (!Scope) continue; Processed.insert(DV); + const MachineInstr *MInsn = Ranges.front().first; assert(MInsn->isDebugValue() && "History must begin with debug value"); DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc()); DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this); @@ -1261,9 +1182,8 @@ DwarfDebug::collectVariableInfo(SmallPtrSet &Processed) { if (AbsVar) AbsVar->setMInsn(MInsn); - // Simplify ranges that are fully coalesced. - if (History.size() <= 1 || - (History.size() == 2 && MInsn->isIdenticalTo(History.back()))) { + // Check if the first DBG_VALUE is valid for the rest of the function. + if (Ranges.size() == 1 && Ranges.front().second == nullptr) { RegVar->setMInsn(MInsn); continue; } @@ -1276,58 +1196,48 @@ DwarfDebug::collectVariableInfo(SmallPtrSet &Processed) { LocList.Label = Asm->GetTempSymbol("debug_loc", DotDebugLocEntries.size() - 1); SmallVector &DebugLoc = LocList.List; - for (SmallVectorImpl::const_iterator - HI = History.begin(), - HE = History.end(); - HI != HE; ++HI) { - const MachineInstr *Begin = *HI; + for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { + const MachineInstr *Begin = I->first; + const MachineInstr *End = I->second; assert(Begin->isDebugValue() && "Invalid History entry"); - // Check if DBG_VALUE is truncating a range. + // Check if a variable is unaccessible in this range. if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() && !Begin->getOperand(0).getReg()) continue; - // Compute the range for a register location. - const MCSymbol *FLabel = getLabelBeforeInsn(Begin); - const MCSymbol *SLabel = 0; - - if (HI + 1 == HE) - // If Begin is the last instruction in History then its value is valid - // until the end of the function. - SLabel = FunctionEndSym; - else { - const MachineInstr *End = HI[1]; - DEBUG(dbgs() << "DotDebugLoc Pair:\n" - << "\t" << *Begin << "\t" << *End << "\n"); - if (End->isDebugValue()) - SLabel = getLabelBeforeInsn(End); - else { - // End is a normal instruction clobbering the range. - SLabel = getLabelAfterInsn(End); - assert(SLabel && "Forgot label after clobber instruction"); - ++HI; - } - } + const MCSymbol *StartLabel = getLabelBeforeInsn(Begin); + assert(StartLabel && "Forgot label before DBG_VALUE starting a range!"); + + const MCSymbol *EndLabel; + if (End != nullptr) + EndLabel = getLabelAfterInsn(End); + else if (std::next(I) == Ranges.end()) + EndLabel = FunctionEndSym; + else + EndLabel = getLabelBeforeInsn(std::next(I)->first); + assert(EndLabel && "Forgot label after instruction ending a range!"); - // The value is valid until the next DBG_VALUE or clobber. - LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); - DebugLocEntry Loc = getDebugLocEntry(Asm, FLabel, SLabel, Begin, TheCU); + DEBUG(dbgs() << "DotDebugLoc Pair:\n" + << "\t" << *Begin << "\t" << *End << "\n"); + DebugLocEntry Loc(StartLabel, EndLabel, getDebugLocValue(Begin), TheCU); if (DebugLoc.empty() || !DebugLoc.back().Merge(Loc)) DebugLoc.push_back(std::move(Loc)); } } // Collect info for variables that were optimized out. - LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables(); for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { DIVariable DV(Variables.getElement(i)); - if (!DV || !DV.isVariable() || !Processed.insert(DV)) + assert(DV.isVariable()); + if (!Processed.insert(DV)) continue; if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) - addScopeVariable(Scope, new DbgVariable(DV, NULL, this)); + addScopeVariable( + Scope, + new DbgVariable(DV, findAbstractVariable(DV, Scope->getScopeNode()), + this)); } } @@ -1345,7 +1255,7 @@ MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { // Process beginning of an instruction. void DwarfDebug::beginInstruction(const MachineInstr *MI) { - assert(CurMI == 0); + assert(CurMI == nullptr); CurMI = MI; // Check if source location changes, but ignore DBG_VALUE locations. if (!MI->isDebugValue()) { @@ -1364,7 +1274,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); } else - recordSourceLine(0, 0, 0, 0); + recordSourceLine(0, 0, nullptr, 0); } } @@ -1389,15 +1299,15 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { // Process end of an instruction. void DwarfDebug::endInstruction() { - assert(CurMI != 0); + assert(CurMI != nullptr); // Don't create a new label after DBG_VALUE instructions. // They don't generate code. if (!CurMI->isDebugValue()) - PrevLabel = 0; + PrevLabel = nullptr; DenseMap::iterator I = LabelsAfterInsn.find(CurMI); - CurMI = 0; + CurMI = nullptr; // No label needed. if (I == LabelsAfterInsn.end()) @@ -1441,6 +1351,17 @@ void DwarfDebug::identifyScopeMarkers() { } } +static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { + // First known non-DBG_VALUE and non-frame setup location marks + // the beginning of the function body. + for (const auto &MBB : *MF) + for (const auto &MI : MBB) + if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && + !MI.getDebugLoc().isUnknown()) + return MI.getDebugLoc(); + return DebugLoc(); +} + // Gather pre-function debug information. Assumes being called immediately // after the function entry point has been emitted. void DwarfDebug::beginFunction(const MachineFunction *MF) { @@ -1456,7 +1377,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (LScopes.empty()) return; - assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); + assert(DbgValues.empty() && "DbgValues map wasn't cleaned!"); // Make sure that each lexical scope will have a begin/end label. identifyScopeMarkers(); @@ -1478,144 +1399,26 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionBeginSym); - const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); - // LiveUserVar - Map physreg numbers to the MDNode they contain. - std::vector LiveUserVar(TRI->getNumRegs()); - - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; - ++I) { - bool AtBlockEntry = true; - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - const MachineInstr *MI = II; - - if (MI->isDebugValue()) { - assert(MI->getNumOperands() > 1 && "Invalid machine instruction!"); - - // Keep track of user variables. - const MDNode *Var = - MI->getOperand(MI->getNumOperands() - 1).getMetadata(); - - // Variable is in a register, we need to check for clobbers. - if (isDbgValueInDefinedReg(MI)) - LiveUserVar[MI->getOperand(0).getReg()] = Var; - - // Check the history of this variable. - SmallVectorImpl &History = DbgValues[Var]; - if (History.empty()) { - UserVariables.push_back(Var); - // The first mention of a function argument gets the FunctionBeginSym - // label, so arguments are visible when breaking at function entry. - DIVariable DV(Var); - if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable && - getDISubprogram(DV.getContext()).describes(MF->getFunction())) - LabelsBeforeInsn[MI] = FunctionBeginSym; - } else { - // We have seen this variable before. Try to coalesce DBG_VALUEs. - const MachineInstr *Prev = History.back(); - if (Prev->isDebugValue()) { - // Coalesce identical entries at the end of History. - if (History.size() >= 2 && - Prev->isIdenticalTo(History[History.size() - 2])) { - DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" - << "\t" << *Prev << "\t" - << *History[History.size() - 2] << "\n"); - History.pop_back(); - } - - // Terminate old register assignments that don't reach MI; - MachineFunction::const_iterator PrevMBB = Prev->getParent(); - if (PrevMBB != I && (!AtBlockEntry || std::next(PrevMBB) != I) && - isDbgValueInDefinedReg(Prev)) { - // Previous register assignment needs to terminate at the end of - // its basic block. - MachineBasicBlock::const_iterator LastMI = - PrevMBB->getLastNonDebugInstr(); - if (LastMI == PrevMBB->end()) { - // Drop DBG_VALUE for empty range. - DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n" - << "\t" << *Prev << "\n"); - History.pop_back(); - } else if (std::next(PrevMBB) != PrevMBB->getParent()->end()) - // Terminate after LastMI. - History.push_back(LastMI); - } - } - } - History.push_back(MI); - } else { - // Not a DBG_VALUE instruction. - if (!MI->isPosition()) - AtBlockEntry = false; - - // First known non-DBG_VALUE and non-frame setup location marks - // the beginning of the function body. - if (!MI->getFlag(MachineInstr::FrameSetup) && - (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown())) - PrologEndLoc = MI->getDebugLoc(); - - // Check if the instruction clobbers any registers with debug vars. - for (const MachineOperand &MO : MI->operands()) { - if (!MO.isReg() || !MO.isDef() || !MO.getReg()) - continue; - for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); - ++AI) { - unsigned Reg = *AI; - const MDNode *Var = LiveUserVar[Reg]; - if (!Var) - continue; - // Reg is now clobbered. - LiveUserVar[Reg] = 0; - - // Was MD last defined by a DBG_VALUE referring to Reg? - DbgValueHistoryMap::iterator HistI = DbgValues.find(Var); - if (HistI == DbgValues.end()) - continue; - SmallVectorImpl &History = HistI->second; - if (History.empty()) - continue; - const MachineInstr *Prev = History.back(); - // Sanity-check: Register assignments are terminated at the end of - // their block. - if (!Prev->isDebugValue() || Prev->getParent() != MI->getParent()) - continue; - // Is the variable still in Reg? - if (!isDbgValueInDefinedReg(Prev) || - Prev->getOperand(0).getReg() != Reg) - continue; - // Var is clobbered. Make sure the next instruction gets a label. - History.push_back(MI); - } - } - } - } - } + // Calculate history for local variables. + calculateDbgValueHistory(MF, Asm->TM.getRegisterInfo(), DbgValues); - for (auto &I : DbgValues) { - SmallVectorImpl &History = I.second; - if (History.empty()) + // Request labels for the full history. + for (const auto &I : DbgValues) { + const auto &Ranges = I.second; + if (Ranges.empty()) continue; - // Make sure the final register assignments are terminated. - const MachineInstr *Prev = History.back(); - if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) { - const MachineBasicBlock *PrevMBB = Prev->getParent(); - MachineBasicBlock::const_iterator LastMI = - PrevMBB->getLastNonDebugInstr(); - if (LastMI == PrevMBB->end()) - // Drop DBG_VALUE for empty range. - History.pop_back(); - else if (PrevMBB != &PrevMBB->getParent()->back()) { - // Terminate after LastMI. - History.push_back(LastMI); - } - } - // Request labels for the full history. - for (const MachineInstr *MI : History) { - if (MI->isDebugValue()) - requestLabelBeforeInsn(MI); - else - requestLabelAfterInsn(MI); + // The first mention of a function argument gets the FunctionBeginSym + // label, so arguments are visible when breaking at function entry. + DIVariable DV(I.first); + if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable && + getDISubprogram(DV.getContext()).describes(MF->getFunction())) + LabelsBeforeInsn[Ranges.front().first] = FunctionBeginSym; + + for (const auto &Range : Ranges) { + requestLabelBeforeInsn(Range.first); + if (Range.second) + requestLabelAfterInsn(Range.second); } } @@ -1623,6 +1426,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { PrevLabel = FunctionBeginSym; // Record beginning of function. + PrologEndLoc = findPrologueEndLoc(MF); if (!PrologEndLoc.isUnknown()) { DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext()); @@ -1671,11 +1475,11 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Every beginFunction(MF) call should be followed by an endFunction(MF) call, // though the beginFunction may not be called at all. // We should handle both cases. - if (CurFn == 0) + if (!CurFn) CurFn = MF; else assert(CurFn == MF); - assert(CurFn != 0); + assert(CurFn != nullptr); if (!MMI->hasDebugInfo() || LScopes.empty()) { // If we don't have a lexical scope for this function then there will @@ -1683,7 +1487,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // previously used section to nullptr. PrevSection = nullptr; PrevCU = nullptr; - CurFn = 0; + CurFn = nullptr; return; } @@ -1699,55 +1503,50 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { collectVariableInfo(ProcessedVars); LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); - assert(TheCU && "Unable to find compile unit!"); + DwarfCompileUnit &TheCU = *SPMap.lookup(FnScope->getScopeNode()); // Construct abstract scopes. for (LexicalScope *AScope : LScopes.getAbstractScopesList()) { DISubprogram SP(AScope->getScopeNode()); - if (SP.isSubprogram()) { - // Collect info for variables that were optimized out. - DIArray Variables = SP.getVariables(); - for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { - DIVariable DV(Variables.getElement(i)); - if (!DV || !DV.isVariable() || !ProcessedVars.insert(DV)) - continue; - // Check that DbgVariable for DV wasn't created earlier, when - // findAbstractVariable() was called for inlined instance of DV. - LLVMContext &Ctx = DV->getContext(); - DIVariable CleanDV = cleanseInlinedVariable(DV, Ctx); - if (AbstractVariables.lookup(CleanDV)) - continue; - if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext())) - addScopeVariable(Scope, new DbgVariable(DV, NULL, this)); - } + if (!SP.isSubprogram()) + continue; + // Collect info for variables that were optimized out. + DIArray Variables = SP.getVariables(); + for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { + DIVariable DV(Variables.getElement(i)); + assert(DV && DV.isVariable()); + if (!ProcessedVars.insert(DV)) + continue; + findAbstractVariable(DV, DV.getContext()); } - if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0) - constructScopeDIE(TheCU, AScope); + constructAbstractSubprogramScopeDIE(TheCU, AScope); } - DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope); + DIE &CurFnDIE = constructSubprogramScopeDIE(TheCU, FnScope); if (!CurFn->getTarget().Options.DisableFramePointerElim(*CurFn)) - TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); + TheCU.addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); // Add the range of this function to the list of ranges for the CU. RangeSpan Span(FunctionBeginSym, FunctionEndSym); - TheCU->addRange(std::move(Span)); + TheCU.addRange(std::move(Span)); PrevSection = Asm->getCurrentSection(); - PrevCU = TheCU; + PrevCU = &TheCU; // Clear debug info - for (auto &I : ScopeVariables) - DeleteContainerPointers(I.second); + // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the + // DbgVariables except those that are also in AbstractVariables (since they + // can be used cross-function) + for (const auto &I : ScopeVariables) + for (const auto *Var : I.second) + if (!AbstractVariables.count(Var->getVariable()) || Var->getAbstractVariable()) + delete Var; ScopeVariables.clear(); DeleteContainerPointers(CurrentFnArguments); - UserVariables.clear(); DbgValues.clear(); - AbstractVariables.clear(); LabelsBeforeInsn.clear(); LabelsAfterInsn.clear(); - PrevLabel = NULL; - CurFn = 0; + PrevLabel = nullptr; + CurFn = nullptr; } // Register a source line with debug info. Returns the unique label that was @@ -1758,36 +1557,16 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, StringRef Dir; unsigned Src = 1; unsigned Discriminator = 0; - if (S) { - DIDescriptor Scope(S); - - if (Scope.isCompileUnit()) { - DICompileUnit CU(S); - Fn = CU.getFilename(); - Dir = CU.getDirectory(); - } else if (Scope.isFile()) { - DIFile F(S); - Fn = F.getFilename(); - Dir = F.getDirectory(); - } else if (Scope.isSubprogram()) { - DISubprogram SP(S); - Fn = SP.getFilename(); - Dir = SP.getDirectory(); - } else if (Scope.isLexicalBlockFile()) { - DILexicalBlockFile DBF(S); - Fn = DBF.getFilename(); - Dir = DBF.getDirectory(); - } else if (Scope.isLexicalBlock()) { - DILexicalBlock DB(S); - Fn = DB.getFilename(); - Dir = DB.getDirectory(); - Discriminator = DB.getDiscriminator(); - } else - llvm_unreachable("Unexpected scope info"); + if (DIScope Scope = DIScope(S)) { + assert(Scope.isScope()); + Fn = Scope.getFilename(); + Dir = Scope.getDirectory(); + if (Scope.isLexicalBlock()) + Discriminator = DILexicalBlock(S).getDiscriminator(); unsigned CUID = Asm->OutStreamer.getContext().getDwarfCompileUnitID(); - Src = static_cast(InfoHolder.getUnits()[CUID]) - ->getOrCreateSourceID(Fn, Dir); + Src = static_cast(*InfoHolder.getUnits()[CUID]) + .getOrCreateSourceID(Fn, Dir); } Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, Discriminator, Fn); @@ -1797,68 +1576,6 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, // Emit Methods //===----------------------------------------------------------------------===// -// Compute the size and offset of a DIE. The offset is relative to start of the -// CU. It returns the offset after laying out the DIE. -unsigned DwarfFile::computeSizeAndOffset(DIE *Die, unsigned Offset) { - // Record the abbreviation. - assignAbbrevNumber(Die->getAbbrev()); - - // Get the abbreviation for this DIE. - const DIEAbbrev &Abbrev = Die->getAbbrev(); - - // Set DIE offset - Die->setOffset(Offset); - - // Start the size with the size of abbreviation code. - Offset += getULEB128Size(Die->getAbbrevNumber()); - - const SmallVectorImpl &Values = Die->getValues(); - const SmallVectorImpl &AbbrevData = Abbrev.getData(); - - // Size the DIE attribute values. - for (unsigned i = 0, N = Values.size(); i < N; ++i) - // Size attribute value. - Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm()); - - // Get the children. - const std::vector &Children = Die->getChildren(); - - // Size the DIE children if any. - if (!Children.empty()) { - assert(Abbrev.hasChildren() && "Children flag not set"); - - for (DIE *Child : Children) - Offset = computeSizeAndOffset(Child, Offset); - - // End of children marker. - Offset += sizeof(int8_t); - } - - Die->setSize(Offset - Die->getOffset()); - return Offset; -} - -// Compute the size and offset for each DIE. -void DwarfFile::computeSizeAndOffsets() { - // Offset from the first CU in the debug info section is 0 initially. - unsigned SecOffset = 0; - - // Iterate over each compile unit and set the size and offsets for each - // DIE within each compile unit. All offsets are CU relative. - for (DwarfUnit *TheU : CUs) { - TheU->setDebugInfoOffset(SecOffset); - - // CU-relative offset is reset to 0 here. - unsigned Offset = sizeof(int32_t) + // Length of Unit Info - TheU->getHeaderSize(); // Unit-specific headers - - // EndOffset here is CU-relative, after laying out - // all of the CU DIE. - unsigned EndOffset = computeSizeAndOffset(TheU->getUnitDie(), Offset); - SecOffset += EndOffset; - } -} - // Emit initial Dwarf sections with a label at the start of each one. void DwarfDebug::emitSectionLabels() { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); @@ -1906,19 +1623,19 @@ void DwarfDebug::emitSectionLabels() { } // Recursively emits a debug information entry. -void DwarfDebug::emitDIE(DIE *Die) { +void DwarfDebug::emitDIE(DIE &Die) { // Get the abbreviation for this DIE. - const DIEAbbrev &Abbrev = Die->getAbbrev(); + const DIEAbbrev &Abbrev = Die.getAbbrev(); // Emit the code (index) for the abbreviation. if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Abbrev [" + Twine(Abbrev.getNumber()) + - "] 0x" + Twine::utohexstr(Die->getOffset()) + - ":0x" + Twine::utohexstr(Die->getSize()) + " " + + "] 0x" + Twine::utohexstr(Die.getOffset()) + + ":0x" + Twine::utohexstr(Die.getSize()) + " " + dwarf::TagString(Abbrev.getTag())); Asm->EmitULEB128(Abbrev.getNumber()); - const SmallVectorImpl &Values = Die->getValues(); + const SmallVectorImpl &Values = Die.getValues(); const SmallVectorImpl &AbbrevData = Abbrev.getData(); // Emit the DIE attribute values. @@ -1940,38 +1657,14 @@ void DwarfDebug::emitDIE(DIE *Die) { // Emit the DIE children if any. if (Abbrev.hasChildren()) { - const std::vector &Children = Die->getChildren(); - - for (DIE *Child : Children) - emitDIE(Child); + for (auto &Child : Die.getChildren()) + emitDIE(*Child); Asm->OutStreamer.AddComment("End Of Children Mark"); Asm->EmitInt8(0); } } -// Emit the various dwarf units to the unit section USection with -// the abbreviations going into ASection. -void DwarfFile::emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym) { - for (DwarfUnit *TheU : CUs) { - DIE *Die = TheU->getUnitDie(); - const MCSection *USection = TheU->getSection(); - Asm->OutStreamer.SwitchSection(USection); - - // Emit the compile units header. - Asm->OutStreamer.EmitLabel(TheU->getLabelBegin()); - - // Emit size of content not including length itself - Asm->OutStreamer.AddComment("Length of Unit"); - Asm->EmitInt32(TheU->getHeaderSize() + Die->getSize()); - - TheU->emitHeader(ASectionSym); - - DD->emitDIE(Die); - Asm->OutStreamer.EmitLabel(TheU->getLabelEnd()); - } -} - // Emit the debug info section. void DwarfDebug::emitDebugInfo() { DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; @@ -1986,26 +1679,6 @@ void DwarfDebug::emitAbbreviations() { Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection()); } -void DwarfFile::emitAbbrevs(const MCSection *Section) { - // Check to see if it is worth the effort. - if (!Abbreviations.empty()) { - // Start the debug abbrev section. - Asm->OutStreamer.SwitchSection(Section); - - // For each abbrevation. - for (const DIEAbbrev *Abbrev : Abbreviations) { - // Emit the abbrevations code (base 1 index.) - Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code"); - - // Emit the abbreviations data. - Abbrev->Emit(Asm); - } - - // Mark end of abbreviations. - Asm->EmitULEB128(0, "EOM(3)"); - } -} - // Emit the last address of the section and the end of the line matrix. void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { // Define last address of section. @@ -2032,97 +1705,52 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { // Emit visible names into a hashed accelerator table section. void DwarfDebug::emitAccelNames() { - DwarfAccelTable AT( - DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); - for (DwarfUnit *TheU : getUnits()) { - for (const auto &GI : TheU->getAccelNames()) { - StringRef Name = GI.getKey(); - for (const DIE *D : GI.second) - AT.AddName(Name, D); - } - } - - AT.FinalizeTable(Asm, "Names"); + AccelNames.FinalizeTable(Asm, "Names"); Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfAccelNamesSection()); MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin"); Asm->OutStreamer.EmitLabel(SectionBegin); // Emit the full data. - AT.Emit(Asm, SectionBegin, &InfoHolder); + AccelNames.Emit(Asm, SectionBegin, &InfoHolder); } // Emit objective C classes and categories into a hashed accelerator table // section. void DwarfDebug::emitAccelObjC() { - DwarfAccelTable AT( - DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); - for (DwarfUnit *TheU : getUnits()) { - for (const auto &GI : TheU->getAccelObjC()) { - StringRef Name = GI.getKey(); - for (const DIE *D : GI.second) - AT.AddName(Name, D); - } - } - - AT.FinalizeTable(Asm, "ObjC"); + AccelObjC.FinalizeTable(Asm, "ObjC"); Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfAccelObjCSection()); MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin"); Asm->OutStreamer.EmitLabel(SectionBegin); // Emit the full data. - AT.Emit(Asm, SectionBegin, &InfoHolder); + AccelObjC.Emit(Asm, SectionBegin, &InfoHolder); } // Emit namespace dies into a hashed accelerator table. void DwarfDebug::emitAccelNamespaces() { - DwarfAccelTable AT( - DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); - for (DwarfUnit *TheU : getUnits()) { - for (const auto &GI : TheU->getAccelNamespace()) { - StringRef Name = GI.getKey(); - for (const DIE *D : GI.second) - AT.AddName(Name, D); - } - } - - AT.FinalizeTable(Asm, "namespac"); + AccelNamespace.FinalizeTable(Asm, "namespac"); Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfAccelNamespaceSection()); MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin"); Asm->OutStreamer.EmitLabel(SectionBegin); // Emit the full data. - AT.Emit(Asm, SectionBegin, &InfoHolder); + AccelNamespace.Emit(Asm, SectionBegin, &InfoHolder); } // Emit type dies into a hashed accelerator table. void DwarfDebug::emitAccelTypes() { - std::vector Atoms; - Atoms.push_back( - DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); - Atoms.push_back( - DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2)); - Atoms.push_back( - DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)); - DwarfAccelTable AT(Atoms); - for (DwarfUnit *TheU : getUnits()) { - for (const auto &GI : TheU->getAccelTypes()) { - StringRef Name = GI.getKey(); - for (const auto &DI : GI.second) - AT.AddName(Name, DI.first, DI.second); - } - } - AT.FinalizeTable(Asm, "types"); + AccelTypes.FinalizeTable(Asm, "types"); Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfAccelTypesSection()); MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin"); Asm->OutStreamer.EmitLabel(SectionBegin); // Emit the full data. - AT.Emit(Asm, SectionBegin, &InfoHolder); + AccelTypes.Emit(Asm, SectionBegin, &InfoHolder); } // Public name handling. @@ -2148,8 +1776,8 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, // look for that now. DIEValue *SpecVal = Die->findAttribute(dwarf::DW_AT_specification); if (SpecVal) { - DIE *SpecDIE = cast(SpecVal)->getEntry(); - if (SpecDIE->findAttribute(dwarf::DW_AT_external)) + DIE &SpecDIE = cast(SpecVal)->getEntry(); + if (SpecDIE.findAttribute(dwarf::DW_AT_external)) Linkage = dwarf::GIEL_EXTERNAL; } else if (Die->findAttribute(dwarf::DW_AT_external)) Linkage = dwarf::GIEL_EXTERNAL; @@ -2261,69 +1889,6 @@ void DwarfDebug::emitDebugPubTypes(bool GnuStyle) { emitDebugPubSection(GnuStyle, PSec, "Types", &DwarfUnit::getGlobalTypes); } -// Emit strings into a string section. -void DwarfFile::emitStrings(const MCSection *StrSection, - const MCSection *OffsetSection = NULL, - const MCSymbol *StrSecSym = NULL) { - - if (StringPool.empty()) - return; - - // Start the dwarf str section. - Asm->OutStreamer.SwitchSection(StrSection); - - // Get all of the string pool entries and put them in an array by their ID so - // we can sort them. - SmallVector, 64 > Entries; - - for (const auto &I : StringPool) - Entries.push_back(std::make_pair(I.second.second, &I)); - - array_pod_sort(Entries.begin(), Entries.end()); - - for (const auto &Entry : Entries) { - // Emit a label for reference from debug information entries. - Asm->OutStreamer.EmitLabel(Entry.second->getValue().first); - - // Emit the string itself with a terminating null byte. - Asm->OutStreamer.EmitBytes(StringRef(Entry.second->getKeyData(), - Entry.second->getKeyLength() + 1)); - } - - // If we've got an offset section go ahead and emit that now as well. - if (OffsetSection) { - Asm->OutStreamer.SwitchSection(OffsetSection); - unsigned offset = 0; - unsigned size = 4; // FIXME: DWARF64 is 8. - for (const auto &Entry : Entries) { - Asm->OutStreamer.EmitIntValue(offset, size); - offset += Entry.second->getKeyLength() + 1; - } - } -} - -// Emit addresses into the section given. -void DwarfFile::emitAddresses(const MCSection *AddrSection) { - - if (AddressPool.empty()) - return; - - // Start the dwarf addr section. - Asm->OutStreamer.SwitchSection(AddrSection); - - // Order the address pool entries by ID - SmallVector Entries(AddressPool.size()); - - for (const auto &I : AddressPool) - Entries[I.second.Number] = - I.second.TLS - ? Asm->getObjFileLowering().getDebugThreadLocalSymbol(I.first) - : MCSymbolRefExpr::Create(I.first, Asm->OutContext); - - for (const MCExpr *Entry : Entries) - Asm->OutStreamer.EmitValue(Entry, Asm->getDataLayout().getPointerSize()); -} - // Emit visible names into a debug str section. void DwarfDebug::emitDebugStr() { DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; @@ -2332,19 +1897,22 @@ void DwarfDebug::emitDebugStr() { void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, const DebugLocEntry &Entry) { - DIVariable DV(Entry.getVariable()); - if (Entry.isInt()) { + assert(Entry.getValues().size() == 1 && + "multi-value entries are not supported yet."); + const DebugLocEntry::Value Value = Entry.getValues()[0]; + DIVariable DV(Value.getVariable()); + if (Value.isInt()) { DIBasicType BTy(resolve(DV.getType())); if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) { Streamer.EmitInt8(dwarf::DW_OP_consts, "DW_OP_consts"); - Streamer.EmitSLEB128(Entry.getInt()); + Streamer.EmitSLEB128(Value.getInt()); } else { Streamer.EmitInt8(dwarf::DW_OP_constu, "DW_OP_constu"); - Streamer.EmitULEB128(Entry.getInt()); + Streamer.EmitULEB128(Value.getInt()); } - } else if (Entry.isLocation()) { - MachineLocation Loc = Entry.getLoc(); + } else if (Value.isLocation()) { + MachineLocation Loc = Value.getLoc(); if (!DV.hasComplexAddress()) // Regular entry. Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect()); @@ -2443,7 +2011,7 @@ void DwarfDebug::emitDebugLocDWO() { // address we know we've emitted elsewhere (the start of the function? // The start of the CU or CU subrange that encloses this range?) Asm->EmitInt8(dwarf::DW_LLE_start_length_entry); - unsigned idx = InfoHolder.getAddrPoolIndex(Entry.getBeginSym()); + unsigned idx = AddrPool.getIndex(Entry.getBeginSym()); Asm->EmitULEB128(idx); Asm->EmitLabelDifference(Entry.getEndSym(), Entry.getBeginSym(), 4); @@ -2464,7 +2032,7 @@ void DwarfDebug::emitDebugARanges() { Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfARangesSection()); - typedef DenseMap > SpansType; + typedef DenseMap> SpansType; SpansType Spans; @@ -2502,11 +2070,11 @@ void DwarfDebug::emitDebugARanges() { // If we have no section (e.g. common), just write out // individual spans for each symbol. - if (Section == NULL) { + if (!Section) { for (const SymbolCU &Cur : List) { ArangeSpan Span; Span.Start = Cur.Sym; - Span.End = NULL; + Span.End = nullptr; if (Cur.CU) Spans[Cur.CU].push_back(Span); } @@ -2613,9 +2181,6 @@ void DwarfDebug::emitDebugRanges() { for (const auto &I : CUMap) { DwarfCompileUnit *TheCU = I.second; - // Emit a symbol so we can find the beginning of our ranges. - Asm->OutStreamer.EmitLabel(TheCU->getLabelRange()); - // Iterate over the misc ranges for the compile units in the module. for (const RangeSpanList &List : TheCU->getRangeLists()) { // Emit our symbol so we can find the beginning of the range. @@ -2626,8 +2191,15 @@ void DwarfDebug::emitDebugRanges() { const MCSymbol *End = Range.getEnd(); assert(Begin && "Range without a begin symbol?"); assert(End && "Range without an end symbol?"); - Asm->OutStreamer.EmitSymbolValue(Begin, Size); - Asm->OutStreamer.EmitSymbolValue(End, Size); + if (TheCU->getRanges().size() == 1) { + // Grab the begin symbol from the first range as our base. + const MCSymbol *Base = TheCU->getRanges()[0].getStart(); + Asm->EmitLabelDifference(Begin, Base, Size); + Asm->EmitLabelDifference(End, Base, Size); + } else { + Asm->OutStreamer.EmitSymbolValue(Begin, Size); + Asm->OutStreamer.EmitSymbolValue(End, Size); + } } // And terminate the list with two 0 values. @@ -2656,52 +2228,52 @@ void DwarfDebug::emitDebugRanges() { // DWARF5 Experimental Separate Dwarf emitters. -void DwarfDebug::initSkeletonUnit(const DwarfUnit *U, DIE *Die, - DwarfUnit *NewU) { +void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, + std::unique_ptr NewU) { NewU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, - U->getCUNode().getSplitDebugFilename()); + U.getCUNode().getSplitDebugFilename()); if (!CompilationDir.empty()) NewU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); - addGnuPubAttributes(NewU, Die); + addGnuPubAttributes(*NewU, Die); - SkeletonHolder.addUnit(NewU); + SkeletonHolder.addUnit(std::move(NewU)); } // This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list, // DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id, // DW_AT_addr_base, DW_AT_ranges_base. -DwarfCompileUnit *DwarfDebug::constructSkeletonCU(const DwarfCompileUnit *CU) { +DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { - DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - DwarfCompileUnit *NewCU = new DwarfCompileUnit( - CU->getUniqueID(), Die, CU->getCUNode(), Asm, this, &SkeletonHolder); - NewCU->initSection(Asm->getObjFileLowering().getDwarfInfoSection(), - DwarfInfoSectionSym); + auto OwnedUnit = make_unique( + CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder); + DwarfCompileUnit &NewCU = *OwnedUnit; + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(), + DwarfInfoSectionSym); - NewCU->initStmtList(DwarfLineSectionSym); + NewCU.initStmtList(DwarfLineSectionSym); - initSkeletonUnit(CU, Die, NewCU); + initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit)); return NewCU; } // This DIE has the following attributes: DW_AT_comp_dir, DW_AT_dwo_name, // DW_AT_addr_base. -DwarfTypeUnit *DwarfDebug::constructSkeletonTU(DwarfTypeUnit *TU) { +DwarfTypeUnit &DwarfDebug::constructSkeletonTU(DwarfTypeUnit &TU) { DwarfCompileUnit &CU = static_cast( - *SkeletonHolder.getUnits()[TU->getCU().getUniqueID()]); + *SkeletonHolder.getUnits()[TU.getCU().getUniqueID()]); - DIE *Die = new DIE(dwarf::DW_TAG_type_unit); - DwarfTypeUnit *NewTU = - new DwarfTypeUnit(TU->getUniqueID(), Die, CU, Asm, this, &SkeletonHolder); - NewTU->setTypeSignature(TU->getTypeSignature()); - NewTU->setType(NULL); - NewTU->initSection( - Asm->getObjFileLowering().getDwarfTypesSection(TU->getTypeSignature())); + auto OwnedUnit = make_unique(TU.getUniqueID(), CU, Asm, this, + &SkeletonHolder); + DwarfTypeUnit &NewTU = *OwnedUnit; + NewTU.setTypeSignature(TU.getTypeSignature()); + NewTU.setType(nullptr); + NewTU.initSection( + Asm->getObjFileLowering().getDwarfTypesSection(TU.getTypeSignature())); - initSkeletonUnit(TU, Die, NewTU); + initSkeletonUnit(TU, NewTU.getUnitDie(), std::move(OwnedUnit)); return NewTU; } @@ -2711,7 +2283,7 @@ void DwarfDebug::emitDebugInfoDWO() { assert(useSplitDwarf() && "No split dwarf debug info?"); // Don't pass an abbrev symbol, using a constant zero instead so as not to // emit relocations into the dwo file. - InfoHolder.emitUnits(this, /* AbbrevSymbol */nullptr); + InfoHolder.emitUnits(this, /* AbbrevSymbol */ nullptr); } // Emit the .debug_abbrev.dwo section for separated dwarf. This contains the @@ -2748,14 +2320,25 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) { return &SplitTypeUnitFileTable; } +static uint64_t makeTypeSignature(StringRef Identifier) { + MD5 Hash; + Hash.update(Identifier); + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + MD5::MD5Result Result; + Hash.final(Result); + return *reinterpret_cast(Result + 8); +} + void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, - StringRef Identifier, DIE *RefDie, + StringRef Identifier, DIE &RefDie, DICompositeType CTy) { - // Flag the type unit reference as a declaration so that if it contains - // members (implicit special members, static data member definitions, member - // declarations for definitions in this CU, etc) consumers don't get confused - // and think this is a full definition. - CU.addFlag(RefDie, dwarf::DW_AT_declaration); + // Fast path if we're building some type units and one has already used the + // address pool we know we're going to throw away all this work anyway, so + // don't bother building dependent types. + if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed()) + return; const DwarfTypeUnit *&TU = DwarfTypeUnits[CTy]; if (TU) { @@ -2763,45 +2346,111 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, return; } - DIE *UnitDie = new DIE(dwarf::DW_TAG_type_unit); - DwarfTypeUnit *NewTU = - new DwarfTypeUnit(InfoHolder.getUnits().size(), UnitDie, CU, Asm, this, - &InfoHolder, getDwoLineTable(CU)); - TU = NewTU; - InfoHolder.addUnit(NewTU); + bool TopLevelType = TypeUnitsUnderConstruction.empty(); + AddrPool.resetUsedFlag(); - NewTU->addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2, - CU.getLanguage()); + auto OwnedUnit = + make_unique(InfoHolder.getUnits().size(), CU, Asm, this, + &InfoHolder, getDwoLineTable(CU)); + DwarfTypeUnit &NewTU = *OwnedUnit; + DIE &UnitDie = NewTU.getUnitDie(); + TU = &NewTU; + TypeUnitsUnderConstruction.push_back( + std::make_pair(std::move(OwnedUnit), CTy)); - MD5 Hash; - Hash.update(Identifier); - // ... take the least significant 8 bytes and return those. Our MD5 - // implementation always returns its results in little endian, swap bytes - // appropriately. - MD5::MD5Result Result; - Hash.final(Result); - uint64_t Signature = *reinterpret_cast(Result + 8); - NewTU->setTypeSignature(Signature); - if (useSplitDwarf()) - NewTU->setSkeleton(constructSkeletonTU(NewTU)); - else - CU.applyStmtList(*UnitDie); + NewTU.addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2, + CU.getLanguage()); - NewTU->setType(NewTU->createTypeDIE(CTy)); + uint64_t Signature = makeTypeSignature(Identifier); + NewTU.setTypeSignature(Signature); - NewTU->initSection( + if (!useSplitDwarf()) + CU.applyStmtList(UnitDie); + + // FIXME: Skip using COMDAT groups for type units in the .dwo file once tools + // such as DWP ( http://gcc.gnu.org/wiki/DebugFissionDWP ) can cope with it. + NewTU.initSection( useSplitDwarf() ? Asm->getObjFileLowering().getDwarfTypesDWOSection(Signature) : Asm->getObjFileLowering().getDwarfTypesSection(Signature)); - CU.addDIETypeSignature(RefDie, *NewTU); + NewTU.setType(NewTU.createTypeDIE(CTy)); + + if (TopLevelType) { + auto TypeUnitsToAdd = std::move(TypeUnitsUnderConstruction); + TypeUnitsUnderConstruction.clear(); + + // Types referencing entries in the address table cannot be placed in type + // units. + if (AddrPool.hasBeenUsed()) { + + // Remove all the types built while building this type. + // This is pessimistic as some of these types might not be dependent on + // the type that used an address. + for (const auto &TU : TypeUnitsToAdd) + DwarfTypeUnits.erase(TU.second); + + // Construct this type in the CU directly. + // This is inefficient because all the dependent types will be rebuilt + // from scratch, including building them in type units, discovering that + // they depend on addresses, throwing them out and rebuilding them. + CU.constructTypeDIE(RefDie, CTy); + return; + } + + // If the type wasn't dependent on fission addresses, finish adding the type + // and all its dependent types. + for (auto &TU : TypeUnitsToAdd) { + if (useSplitDwarf()) + TU.first->setSkeleton(constructSkeletonTU(*TU.first)); + InfoHolder.addUnit(std::move(TU.first)); + } + } + CU.addDIETypeSignature(RefDie, NewTU); } -void DwarfDebug::attachLowHighPC(DwarfCompileUnit *Unit, DIE *D, +void DwarfDebug::attachLowHighPC(DwarfCompileUnit &Unit, DIE &D, MCSymbol *Begin, MCSymbol *End) { - Unit->addLabelAddress(D, dwarf::DW_AT_low_pc, Begin); + assert(Begin && "Begin label should not be null!"); + assert(End && "End label should not be null!"); + assert(Begin->isDefined() && "Invalid starting label"); + assert(End->isDefined() && "Invalid end label"); + + Unit.addLabelAddress(D, dwarf::DW_AT_low_pc, Begin); if (DwarfVersion < 4) - Unit->addLabelAddress(D, dwarf::DW_AT_high_pc, End); + Unit.addLabelAddress(D, dwarf::DW_AT_high_pc, End); else - Unit->addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin); + Unit.addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin); +} + +// Accelerator table mutators - add each name along with its companion +// DIE to the proper table while ensuring that the name that we're going +// to reference is in the string table. We do this since the names we +// add may not only be identical to the names in the DIE. +void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) { + if (!useDwarfAccelTables()) + return; + AccelNames.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name), + &Die); +} + +void DwarfDebug::addAccelObjC(StringRef Name, const DIE &Die) { + if (!useDwarfAccelTables()) + return; + AccelObjC.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name), + &Die); +} + +void DwarfDebug::addAccelNamespace(StringRef Name, const DIE &Die) { + if (!useDwarfAccelTables()) + return; + AccelNamespace.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name), + &Die); +} + +void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) { + if (!useDwarfAccelTables()) + return; + AccelTypes.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name), + &Die); } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index da708f5..2f5abc8 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -14,10 +14,13 @@ #ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__ #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ +#include "DwarfFile.h" #include "AsmPrinterHandler.h" #include "DIE.h" +#include "DbgValueHistoryCalculator.h" #include "DebugLocEntry.h" #include "DebugLocList.h" +#include "DwarfAccelTable.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -30,6 +33,8 @@ #include "llvm/MC/MCDwarf.h" #include "llvm/Support/Allocator.h" +#include + namespace llvm { class AsmPrinter; @@ -74,12 +79,12 @@ class DbgVariable { public: // AbsVar may be NULL. DbgVariable(DIVariable V, DbgVariable *AV, DwarfDebug *DD) - : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0), - FrameIndex(~0), DD(DD) {} + : Var(V), TheDIE(nullptr), DotDebugLocOffset(~0U), AbsVar(AV), + MInsn(nullptr), FrameIndex(~0), DD(DD) {} // Accessors. DIVariable getVariable() const { return Var; } - void setDIE(DIE *D) { TheDIE = D; } + void setDIE(DIE &D) { TheDIE = &D; } DIE *getDIE() const { return TheDIE; } void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } @@ -90,7 +95,7 @@ public: int getFrameIndex() const { return FrameIndex; } void setFrameIndex(int FI) { FrameIndex = FI; } // Translate tag to proper Dwarf tag. - uint16_t getTag() const { + dwarf::Tag getTag() const { if (Var.getTag() == dwarf::DW_TAG_arg_variable) return dwarf::DW_TAG_formal_parameter; @@ -131,99 +136,6 @@ private: template T resolve(DIRef Ref) const; }; -/// \brief Collects and handles information specific to a particular -/// collection of units. This collection represents all of the units -/// that will be ultimately output into a single object file. -class DwarfFile { - // Target of Dwarf emission, used for sizing of abbreviations. - AsmPrinter *Asm; - - // Used to uniquely define abbreviations. - FoldingSet AbbreviationsSet; - - // A list of all the unique abbreviations in use. - std::vector Abbreviations; - - // A pointer to all units in the section. - SmallVector CUs; - - // Collection of strings for this unit and assorted symbols. - // A String->Symbol mapping of strings used by indirect - // references. - typedef StringMap, BumpPtrAllocator &> - StrPool; - StrPool StringPool; - unsigned NextStringPoolNumber; - std::string StringPref; - - struct AddressPoolEntry { - unsigned Number; - bool TLS; - AddressPoolEntry(unsigned Number, bool TLS) : Number(Number), TLS(TLS) {} - }; - // Collection of addresses for this unit and assorted labels. - // A Symbol->unsigned mapping of addresses used by indirect - // references. - typedef DenseMap AddrPool; - AddrPool AddressPool; - unsigned NextAddrPoolNumber; - -public: - DwarfFile(AsmPrinter *AP, const char *Pref, BumpPtrAllocator &DA) - : Asm(AP), StringPool(DA), NextStringPoolNumber(0), StringPref(Pref), - AddressPool(), NextAddrPoolNumber(0) {} - - ~DwarfFile(); - - const SmallVectorImpl &getUnits() { return CUs; } - - /// \brief Compute the size and offset of a DIE given an incoming Offset. - unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); - - /// \brief Compute the size and offset of all the DIEs. - void computeSizeAndOffsets(); - - /// \brief Define a unique number for the abbreviation. - void assignAbbrevNumber(DIEAbbrev &Abbrev); - - /// \brief Add a unit to the list of CUs. - void addUnit(DwarfUnit *CU) { CUs.push_back(CU); } - - /// \brief Emit all of the units to the section listed with the given - /// abbreviation section. - void emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym); - - /// \brief Emit a set of abbreviations to the specific section. - void emitAbbrevs(const MCSection *); - - /// \brief Emit all of the strings to the section given. - void emitStrings(const MCSection *StrSection, const MCSection *OffsetSection, - const MCSymbol *StrSecSym); - - /// \brief Emit all of the addresses to the section given. - void emitAddresses(const MCSection *AddrSection); - - /// \brief Returns the entry into the start of the pool. - MCSymbol *getStringPoolSym(); - - /// \brief Returns an entry into the string pool with the given - /// string text. - MCSymbol *getStringPoolEntry(StringRef Str); - - /// \brief Returns the index into the string pool with the given - /// string text. - unsigned getStringPoolIndex(StringRef Str); - - /// \brief Returns the string pool. - StrPool *getStringPool() { return &StringPool; } - - /// \brief Returns the index into the address pool with the given - /// label/symbol. - unsigned getAddrPoolIndex(const MCSymbol *Sym, bool TLS = false); - - /// \brief Returns the address pool. - AddrPool *getAddrPool() { return &AddressPool; } -}; /// \brief Helper used to pair up a symbol and its DWARF compile unit. struct SymbolCU { @@ -287,7 +199,7 @@ class DwarfDebug : public AsmPrinterHandler { ScopeVariablesMap ScopeVariables; // Collection of abstract variables. - DenseMap AbstractVariables; + DenseMap> AbstractVariables; // Collection of DebugLocEntry. Stored in a linked list so that DIELocLists // can refer to them in spite of insertions into this list. @@ -307,15 +219,8 @@ class DwarfDebug : public AsmPrinterHandler { // Maps instruction with label emitted after instruction. DenseMap LabelsAfterInsn; - // Every user variable mentioned by a DBG_VALUE instruction in order of - // appearance. - SmallVector UserVariables; - - // For each user variable, keep a list of DBG_VALUE instructions in order. - // The list can also contain normal instructions that clobber the previous - // DBG_VALUE. - typedef DenseMap > - DbgValueHistoryMap; + // History of DBG_VALUE and clobber instructions for each user variable. + // Variables are listed in order of appearance. DbgValueHistoryMap DbgValues; // Previous instruction's location information. This is used to determine @@ -373,6 +278,8 @@ class DwarfDebug : public AsmPrinterHandler { // them. DenseMap DwarfTypeUnits; + SmallVector, DICompositeType>, 1> TypeUnitsUnderConstruction; + // Whether to emit the pubnames/pubtypes sections. bool HasDwarfPubSections; @@ -411,22 +318,30 @@ class DwarfDebug : public AsmPrinterHandler { // True iff there are multiple CUs in this module. bool SingleCU; + AddressPool AddrPool; + + DwarfAccelTable AccelNames; + DwarfAccelTable AccelObjC; + DwarfAccelTable AccelNamespace; + DwarfAccelTable AccelTypes; + MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); void addScopeVariable(LexicalScope *LS, DbgVariable *Var); - const SmallVectorImpl &getUnits() { + const SmallVectorImpl> &getUnits() { return InfoHolder.getUnits(); } /// \brief Find abstract variable associated with Var. DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc); + DbgVariable *findAbstractVariable(DIVariable &Var, const MDNode *Scope); /// \brief Find DIE for the given subprogram and attach appropriate /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global /// variables in this scope then create and insert DIEs for these /// variables. - DIE *updateSubprogramScopeDIE(DwarfCompileUnit *SPCU, DISubprogram SP); + DIE &updateSubprogramScopeDIE(DwarfCompileUnit &SPCU, DISubprogram SP); /// \brief A helper function to check whether the DIE for a given Scope is /// going to be null. @@ -434,22 +349,33 @@ class DwarfDebug : public AsmPrinterHandler { /// \brief A helper function to construct a RangeSpanList for a given /// lexical scope. - void addScopeRangeList(DwarfCompileUnit *TheCU, DIE *ScopeDIE, + void addScopeRangeList(DwarfCompileUnit &TheCU, DIE &ScopeDIE, const SmallVectorImpl &Range); /// \brief Construct new DW_TAG_lexical_block for this scope and /// attach DW_AT_low_pc/DW_AT_high_pc labels. - DIE *constructLexicalScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope); + std::unique_ptr constructLexicalScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope); /// \brief This scope represents inlined body of a function. Construct /// DIE to represent this concrete inlined copy of the function. - DIE *constructInlinedScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope); + std::unique_ptr constructInlinedScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope); /// \brief Construct a DIE for this scope. - DIE *constructScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope); + std::unique_ptr constructScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope); + void createAndAddScopeChildren(DwarfCompileUnit &TheCU, LexicalScope *Scope, + DIE &ScopeDIE); + /// \brief Construct a DIE for this abstract scope. + void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope); + /// \brief Construct a DIE for this subprogram scope. + DIE &constructSubprogramScopeDIE(DwarfCompileUnit &TheCU, + LexicalScope *Scope); /// A helper function to create children of a Scope DIE. - DIE *createScopeChildrenDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope, - SmallVectorImpl &Children); + DIE *createScopeChildrenDIE(DwarfCompileUnit &TheCU, LexicalScope *Scope, + SmallVectorImpl> &Children); /// \brief Emit initial Dwarf sections with a label at the start of each one. void emitSectionLabels(); @@ -460,12 +386,11 @@ class DwarfDebug : public AsmPrinterHandler { /// \brief Compute the size and offset of all the DIEs. void computeSizeAndOffsets(); - /// \brief Attach DW_AT_inline attribute with inlined subprogram DIEs. - void computeInlinedDIEs(); - /// \brief Collect info for variables that were optimized out. void collectDeadVariables(); + void finishSubprogramDefinitions(); + /// \brief Finish off debug information after all functions have been /// processed. void finalizeModuleInfo(); @@ -535,15 +460,16 @@ class DwarfDebug : public AsmPrinterHandler { /// DWARF 5 Experimental Split Dwarf Emitters /// \brief Initialize common features of skeleton units. - void initSkeletonUnit(const DwarfUnit *U, DIE *Die, DwarfUnit *NewU); + void initSkeletonUnit(const DwarfUnit &U, DIE &Die, + std::unique_ptr NewU); /// \brief Construct the split debug info compile unit for the debug info /// section. - DwarfCompileUnit *constructSkeletonCU(const DwarfCompileUnit *CU); + DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU); /// \brief Construct the split debug info compile unit for the debug info /// section. - DwarfTypeUnit *constructSkeletonTU(DwarfTypeUnit *TU); + DwarfTypeUnit &constructSkeletonTU(DwarfTypeUnit &TU); /// \brief Emit the debug info dwo section. void emitDebugInfoDWO(); @@ -559,25 +485,22 @@ class DwarfDebug : public AsmPrinterHandler { /// Flags to let the linker know we have emitted new style pubnames. Only /// emit it here if we don't have a skeleton CU for split dwarf. - void addGnuPubAttributes(DwarfUnit *U, DIE *D) const; + void addGnuPubAttributes(DwarfUnit &U, DIE &D) const; /// \brief Create new DwarfCompileUnit for the given metadata node with tag /// DW_TAG_compile_unit. - DwarfCompileUnit *constructDwarfCompileUnit(DICompileUnit DIUnit); - - /// \brief Construct subprogram DIE. - void constructSubprogramDIE(DwarfCompileUnit *TheCU, const MDNode *N); + DwarfCompileUnit &constructDwarfCompileUnit(DICompileUnit DIUnit); /// \brief Construct imported_module or imported_declaration DIE. - void constructImportedEntityDIE(DwarfCompileUnit *TheCU, const MDNode *N); + void constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N); /// \brief Construct import_module DIE. - void constructImportedEntityDIE(DwarfCompileUnit *TheCU, const MDNode *N, - DIE *Context); + void constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N, + DIE &Context); /// \brief Construct import_module DIE. - void constructImportedEntityDIE(DwarfCompileUnit *TheCU, - const DIImportedEntity &Module, DIE *Context); + void constructImportedEntityDIE(DwarfCompileUnit &TheCU, + const DIImportedEntity &Module, DIE &Context); /// \brief Register a source line with debug info. Returns the unique /// label that was emitted and which provides correspondence to the @@ -602,7 +525,7 @@ class DwarfDebug : public AsmPrinterHandler { /// \brief Ensure that a label will be emitted before MI. void requestLabelBeforeInsn(const MachineInstr *MI) { - LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol *)0)); + LabelsBeforeInsn.insert(std::make_pair(MI, nullptr)); } /// \brief Return Label preceding the instruction. @@ -610,13 +533,15 @@ class DwarfDebug : public AsmPrinterHandler { /// \brief Ensure that a label will be emitted after MI. void requestLabelAfterInsn(const MachineInstr *MI) { - LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol *)0)); + LabelsAfterInsn.insert(std::make_pair(MI, nullptr)); } /// \brief Return Label immediately following the instruction. MCSymbol *getLabelAfterInsn(const MachineInstr *MI); - void attachLowHighPC(DwarfCompileUnit *Unit, DIE *D, MCSymbol *Begin, + void attachRangesOrLowHighPC(DwarfCompileUnit &Unit, DIE &D, + const SmallVectorImpl &Ranges); + void attachLowHighPC(DwarfCompileUnit &Unit, DIE &D, MCSymbol *Begin, MCSymbol *End); public: @@ -625,6 +550,8 @@ public: // DwarfDebug(AsmPrinter *A, Module *M); + ~DwarfDebug() override; + void insertDIE(const MDNode *TypeMD, DIE *Die) { MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); } @@ -654,7 +581,7 @@ public: /// \brief Add a DIE to the set of types that we're going to pull into /// type units. void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, - DIE *Die, DICompositeType CTy); + DIE &Die, DICompositeType CTy); /// \brief Add a label so that arange data can be generated for it. void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } @@ -666,7 +593,7 @@ public: } /// \brief Recursively Emits a debug information entry. - void emitDIE(DIE *Die); + void emitDIE(DIE &Die); // Experimental DWARF5 features. @@ -720,6 +647,18 @@ public: /// isSubprogramContext - Return true if Context is either a subprogram /// or another context nested inside a subprogram. bool isSubprogramContext(const MDNode *Context); + + void addSubprogramNames(DISubprogram SP, DIE &Die); + + AddressPool &getAddressPool() { return AddrPool; } + + void addAccelName(StringRef Name, const DIE &Die); + + void addAccelObjC(StringRef Name, const DIE &Die); + + void addAccelNamespace(StringRef Name, const DIE &Die); + + void addAccelType(StringRef Name, const DIE &Die, char Flags); }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 113a9e4..3a12c73 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -103,7 +103,7 @@ ComputeActionsTable(const SmallVectorImpl &LandingPads, int FirstAction = 0; unsigned SizeActions = 0; - const LandingPadInfo *PrevLPI = 0; + const LandingPadInfo *PrevLPI = nullptr; for (SmallVectorImpl::const_iterator I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) { @@ -181,7 +181,7 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { if (!MO.isGlobal()) continue; const Function *F = dyn_cast(MO.getGlobal()); - if (F == 0) continue; + if (!F) continue; if (SawFunc) { // Be conservative. If we have more than one function operand for this @@ -214,7 +214,7 @@ ComputeCallSiteTable(SmallVectorImpl &CallSites, const SmallVectorImpl &LandingPads, const SmallVectorImpl &FirstActions) { // The end label of the previous invoke or nounwind try-range. - MCSymbol *LastLabel = 0; + MCSymbol *LastLabel = nullptr; // Whether there is a potentially throwing instruction (currently this means // an ordinary call) between the end of the previous try-range and now. @@ -224,18 +224,16 @@ ComputeCallSiteTable(SmallVectorImpl &CallSites, bool PreviousIsInvoke = false; // Visit all instructions in order of address. - for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); - I != E; ++I) { - for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); - MI != E; ++MI) { - if (!MI->isEHLabel()) { - if (MI->isCall()) - SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI); + for (const auto &MBB : *Asm->MF) { + for (const auto &MI : MBB) { + if (!MI.isEHLabel()) { + if (MI.isCall()) + SawPotentiallyThrowing |= !CallToNoUnwindFunction(&MI); continue; } // End of the previous try-range? - MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol(); + MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol(); if (BeginLabel == LastLabel) SawPotentiallyThrowing = false; @@ -255,7 +253,7 @@ ComputeCallSiteTable(SmallVectorImpl &CallSites, // create a call-site entry with no landing pad for the region between the // try-ranges. if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) { - CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 }; + CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 }; CallSites.push_back(Site); PreviousIsInvoke = false; } @@ -305,7 +303,7 @@ ComputeCallSiteTable(SmallVectorImpl &CallSites, // function may throw, create a call-site entry with no landing pad for the // region following the try-range. if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) { - CallSiteEntry Site = { LastLabel, 0, 0, 0 }; + CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 }; CallSites.push_back(Site); } } @@ -571,10 +569,10 @@ void DwarfException::EmitExceptionTable() { Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); MCSymbol *BeginLabel = S.BeginLabel; - if (BeginLabel == 0) + if (!BeginLabel) BeginLabel = EHFuncBeginSym; MCSymbol *EndLabel = S.EndLabel; - if (EndLabel == 0) + if (!EndLabel) EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber()); diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp new file mode 100644 index 0000000..737ee54 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -0,0 +1,156 @@ +//===-- llvm/CodeGen/DwarfFile.cpp - Dwarf Debug Framework ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DwarfFile.h" + +#include "DwarfDebug.h" +#include "DwarfUnit.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/LEB128.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { +DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA) + : Asm(AP), StrPool(DA, *Asm, Pref) {} + +DwarfFile::~DwarfFile() {} + +// Define a unique number for the abbreviation. +// +void DwarfFile::assignAbbrevNumber(DIEAbbrev &Abbrev) { + // Check the set for priors. + DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev); + + // If it's newly added. + if (InSet == &Abbrev) { + // Add to abbreviation list. + Abbreviations.push_back(&Abbrev); + + // Assign the vector position + 1 as its number. + Abbrev.setNumber(Abbreviations.size()); + } else { + // Assign existing abbreviation number. + Abbrev.setNumber(InSet->getNumber()); + } +} + +void DwarfFile::addUnit(std::unique_ptr U) { + CUs.push_back(std::move(U)); +} + +// Emit the various dwarf units to the unit section USection with +// the abbreviations going into ASection. +void DwarfFile::emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym) { + for (const auto &TheU : CUs) { + DIE &Die = TheU->getUnitDie(); + const MCSection *USection = TheU->getSection(); + Asm->OutStreamer.SwitchSection(USection); + + // Emit the compile units header. + Asm->OutStreamer.EmitLabel(TheU->getLabelBegin()); + + // Emit size of content not including length itself + Asm->OutStreamer.AddComment("Length of Unit"); + Asm->EmitInt32(TheU->getHeaderSize() + Die.getSize()); + + TheU->emitHeader(ASectionSym); + + DD->emitDIE(Die); + Asm->OutStreamer.EmitLabel(TheU->getLabelEnd()); + } +} +// Compute the size and offset for each DIE. +void DwarfFile::computeSizeAndOffsets() { + // Offset from the first CU in the debug info section is 0 initially. + unsigned SecOffset = 0; + + // Iterate over each compile unit and set the size and offsets for each + // DIE within each compile unit. All offsets are CU relative. + for (const auto &TheU : CUs) { + TheU->setDebugInfoOffset(SecOffset); + + // CU-relative offset is reset to 0 here. + unsigned Offset = sizeof(int32_t) + // Length of Unit Info + TheU->getHeaderSize(); // Unit-specific headers + + // EndOffset here is CU-relative, after laying out + // all of the CU DIE. + unsigned EndOffset = computeSizeAndOffset(TheU->getUnitDie(), Offset); + SecOffset += EndOffset; + } +} +// Compute the size and offset of a DIE. The offset is relative to start of the +// CU. It returns the offset after laying out the DIE. +unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) { + // Record the abbreviation. + assignAbbrevNumber(Die.getAbbrev()); + + // Get the abbreviation for this DIE. + const DIEAbbrev &Abbrev = Die.getAbbrev(); + + // Set DIE offset + Die.setOffset(Offset); + + // Start the size with the size of abbreviation code. + Offset += getULEB128Size(Die.getAbbrevNumber()); + + const SmallVectorImpl &Values = Die.getValues(); + const SmallVectorImpl &AbbrevData = Abbrev.getData(); + + // Size the DIE attribute values. + for (unsigned i = 0, N = Values.size(); i < N; ++i) + // Size attribute value. + Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm()); + + // Get the children. + const auto &Children = Die.getChildren(); + + // Size the DIE children if any. + if (!Children.empty()) { + assert(Abbrev.hasChildren() && "Children flag not set"); + + for (auto &Child : Children) + Offset = computeSizeAndOffset(*Child, Offset); + + // End of children marker. + Offset += sizeof(int8_t); + } + + Die.setSize(Offset - Die.getOffset()); + return Offset; +} +void DwarfFile::emitAbbrevs(const MCSection *Section) { + // Check to see if it is worth the effort. + if (!Abbreviations.empty()) { + // Start the debug abbrev section. + Asm->OutStreamer.SwitchSection(Section); + + // For each abbrevation. + for (const DIEAbbrev *Abbrev : Abbreviations) { + // Emit the abbrevations code (base 1 index.) + Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code"); + + // Emit the abbreviations data. + Abbrev->Emit(Asm); + } + + // Mark end of abbreviations. + Asm->EmitULEB128(0, "EOM(3)"); + } +} + +// Emit strings into a string section. +void DwarfFile::emitStrings(const MCSection *StrSection, + const MCSection *OffsetSection, + const MCSymbol *StrSecSym) { + StrPool.emit(*Asm, StrSection, OffsetSection, StrSecSym); +} +} diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h new file mode 100644 index 0000000..3985eb2 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -0,0 +1,84 @@ +//===-- llvm/CodeGen/DwarfFile.h - Dwarf Debug Framework -------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFFILE_H__ +#define CODEGEN_ASMPRINTER_DWARFFILE_H__ + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/Allocator.h" +#include "AddressPool.h" +#include "DwarfStringPool.h" + +#include +#include +#include + +namespace llvm { +class AsmPrinter; +class DwarfUnit; +class DIEAbbrev; +class MCSymbol; +class DIE; +class StringRef; +class DwarfDebug; +class MCSection; +class DwarfFile { + // Target of Dwarf emission, used for sizing of abbreviations. + AsmPrinter *Asm; + + // Used to uniquely define abbreviations. + FoldingSet AbbreviationsSet; + + // A list of all the unique abbreviations in use. + std::vector Abbreviations; + + // A pointer to all units in the section. + SmallVector, 1> CUs; + + DwarfStringPool StrPool; + +public: + DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA); + + ~DwarfFile(); + + const SmallVectorImpl> &getUnits() { return CUs; } + + /// \brief Compute the size and offset of a DIE given an incoming Offset. + unsigned computeSizeAndOffset(DIE &Die, unsigned Offset); + + /// \brief Compute the size and offset of all the DIEs. + void computeSizeAndOffsets(); + + /// \brief Define a unique number for the abbreviation. + void assignAbbrevNumber(DIEAbbrev &Abbrev); + + /// \brief Add a unit to the list of CUs. + void addUnit(std::unique_ptr U); + + /// \brief Emit all of the units to the section listed with the given + /// abbreviation section. + void emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym); + + /// \brief Emit a set of abbreviations to the specific section. + void emitAbbrevs(const MCSection *); + + /// \brief Emit all of the strings to the section given. + void emitStrings(const MCSection *StrSection, + const MCSection *OffsetSection = nullptr, + const MCSymbol *StrSecSym = nullptr); + + /// \brief Returns the string pool. + DwarfStringPool &getStringPool() { return StrPool; } +}; +} +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp new file mode 100644 index 0000000..72cab60 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -0,0 +1,74 @@ +//===-- llvm/CodeGen/DwarfStringPool.cpp - Dwarf Debug Framework ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DwarfStringPool.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +MCSymbol *DwarfStringPool::getSectionSymbol() { return SectionSymbol; } + +static std::pair & +getEntry(AsmPrinter &Asm, + StringMap, BumpPtrAllocator &> &Pool, + StringRef Prefix, StringRef Str) { + std::pair &Entry = + Pool.GetOrCreateValue(Str).getValue(); + if (!Entry.first) { + Entry.second = Pool.size() - 1; + Entry.first = Asm.GetTempSymbol(Prefix, Entry.second); + } + return Entry; +} + +MCSymbol *DwarfStringPool::getSymbol(AsmPrinter &Asm, StringRef Str) { + return getEntry(Asm, Pool, Prefix, Str).first; +} + +unsigned DwarfStringPool::getIndex(AsmPrinter &Asm, StringRef Str) { + return getEntry(Asm, Pool, Prefix, Str).second; +} + +void DwarfStringPool::emit(AsmPrinter &Asm, const MCSection *StrSection, + const MCSection *OffsetSection, + const MCSymbol *StrSecSym) { + if (Pool.empty()) + return; + + // Start the dwarf str section. + Asm.OutStreamer.SwitchSection(StrSection); + + // Get all of the string pool entries and put them in an array by their ID so + // we can sort them. + SmallVector> *, 64> + Entries(Pool.size()); + + for (const auto &E : Pool) + Entries[E.getValue().second] = &E; + + for (const auto &Entry : Entries) { + // Emit a label for reference from debug information entries. + Asm.OutStreamer.EmitLabel(Entry->getValue().first); + + // Emit the string itself with a terminating null byte. + Asm.OutStreamer.EmitBytes( + StringRef(Entry->getKeyData(), Entry->getKeyLength() + 1)); + } + + // If we've got an offset section go ahead and emit that now as well. + if (OffsetSection) { + Asm.OutStreamer.SwitchSection(OffsetSection); + unsigned offset = 0; + unsigned size = 4; // FIXME: DWARF64 is 8. + for (const auto &Entry : Entries) { + Asm.OutStreamer.EmitIntValue(offset, size); + offset += Entry->getKeyLength() + 1; + } + } +} diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h new file mode 100644 index 0000000..c1615fb --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h @@ -0,0 +1,55 @@ +//===-- llvm/CodeGen/DwarfStringPool.h - Dwarf Debug Framework -*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_STRINGPOOL_H__ +#define CODEGEN_ASMPRINTER_STRINGPOOL_H__ + +#include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/Allocator.h" + +#include + +namespace llvm { + +class MCSymbol; +class MCSection; +class StringRef; + +// Collection of strings for this unit and assorted symbols. +// A String->Symbol mapping of strings used by indirect +// references. +class DwarfStringPool { + StringMap, BumpPtrAllocator &> Pool; + StringRef Prefix; + MCSymbol *SectionSymbol; + +public: + DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix) + : Pool(A), Prefix(Prefix), SectionSymbol(Asm.GetTempSymbol(Prefix)) {} + + void emit(AsmPrinter &Asm, const MCSection *StrSection, + const MCSection *OffsetSection = nullptr, + const MCSymbol *StrSecSym = nullptr); + + /// \brief Returns the entry into the start of the pool. + MCSymbol *getSectionSymbol(); + + /// \brief Returns an entry into the string pool with the given + /// string text. + MCSymbol *getSymbol(AsmPrinter &Asm, StringRef Str); + + /// \brief Returns the index into the string pool with the given + /// string text. + unsigned getIndex(AsmPrinter &Asm, StringRef Str); + + bool empty() const { return Pool.empty(); } +}; +} +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 82e9bb0..a70c0f7 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dwarfdebug" - #include "DwarfUnit.h" #include "DwarfAccelTable.h" #include "DwarfDebug.h" @@ -35,33 +33,38 @@ using namespace llvm; +#define DEBUG_TYPE "dwarfdebug" + static cl::opt GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, cl::desc("Generate DWARF4 type units."), cl::init(false)); /// Unit - Unit constructor. -DwarfUnit::DwarfUnit(unsigned UID, DIE *D, DICompileUnit Node, AsmPrinter *A, - DwarfDebug *DW, DwarfFile *DWU) - : UniqueID(UID), CUNode(Node), UnitDie(D), DebugInfoOffset(0), Asm(A), - DD(DW), DU(DWU), IndexTyDie(0), Section(0), Skeleton(0) { +DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag, DICompileUnit Node, + AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) + : UniqueID(UID), CUNode(Node), UnitDie(UnitTag), DebugInfoOffset(0), Asm(A), + DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr), + Skeleton(nullptr) { + assert(UnitTag == dwarf::DW_TAG_compile_unit || + UnitTag == dwarf::DW_TAG_type_unit); DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); } -DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DIE *D, DICompileUnit Node, +DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) - : DwarfUnit(UID, D, Node, A, DW, DWU) { - insertDIE(Node, D); + : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU) { + insertDIE(Node, &getUnitDie()); } -DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DIE *D, DwarfCompileUnit &CU, - AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU, +DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU, MCDwarfDwoLineTable *SplitLineTable) - : DwarfUnit(UID, D, CU.getCUNode(), A, DW, DWU), CU(CU), - SplitLineTable(SplitLineTable) { + : DwarfUnit(UID, dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), + CU(CU), SplitLineTable(SplitLineTable) { if (SplitLineTable) - addSectionOffset(UnitDie.get(), dwarf::DW_AT_stmt_list, 0); + addSectionOffset(UnitDie, dwarf::DW_AT_stmt_list, 0); } /// ~Unit - Destructor for compile unit. @@ -74,7 +77,7 @@ DwarfUnit::~DwarfUnit() { /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug /// information entry. -DIEEntry *DwarfUnit::createDIEEntry(DIE *Entry) { +DIEEntry *DwarfUnit::createDIEEntry(DIE &Entry) { DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry); return Value; } @@ -159,39 +162,39 @@ void DwarfUnit::insertDIE(DIDescriptor Desc, DIE *D) { } /// addFlag - Add a flag that is true. -void DwarfUnit::addFlag(DIE *Die, dwarf::Attribute Attribute) { +void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) { if (DD->getDwarfVersion() >= 4) - Die->addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne); + Die.addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne); else - Die->addValue(Attribute, dwarf::DW_FORM_flag, DIEIntegerOne); + Die.addValue(Attribute, dwarf::DW_FORM_flag, DIEIntegerOne); } /// addUInt - Add an unsigned integer attribute data and value. /// -void DwarfUnit::addUInt(DIE *Die, dwarf::Attribute Attribute, +void DwarfUnit::addUInt(DIE &Die, dwarf::Attribute Attribute, Optional Form, uint64_t Integer) { if (!Form) Form = DIEInteger::BestForm(false, Integer); DIEValue *Value = Integer == 1 ? DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, *Form, Value); + Die.addValue(Attribute, *Form, Value); } -void DwarfUnit::addUInt(DIE *Block, dwarf::Form Form, uint64_t Integer) { +void DwarfUnit::addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer) { addUInt(Block, (dwarf::Attribute)0, Form, Integer); } /// addSInt - Add an signed integer attribute data and value. /// -void DwarfUnit::addSInt(DIE *Die, dwarf::Attribute Attribute, +void DwarfUnit::addSInt(DIE &Die, dwarf::Attribute Attribute, Optional Form, int64_t Integer) { if (!Form) Form = DIEInteger::BestForm(true, Integer); DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, *Form, Value); + Die.addValue(Attribute, *Form, Value); } -void DwarfUnit::addSInt(DIELoc *Die, Optional Form, +void DwarfUnit::addSInt(DIELoc &Die, Optional Form, int64_t Integer) { addSInt(Die, (dwarf::Attribute)0, Form, Integer); } @@ -201,66 +204,66 @@ void DwarfUnit::addSInt(DIELoc *Die, Optional Form, /// more predictable sizes. In the case of split dwarf we emit an index /// into another table which gets us the static offset into the string /// table. -void DwarfUnit::addString(DIE *Die, dwarf::Attribute Attribute, +void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute, StringRef String) { if (!DD->useSplitDwarf()) return addLocalString(Die, Attribute, String); - unsigned idx = DU->getStringPoolIndex(String); + unsigned idx = DU->getStringPool().getIndex(*Asm, String); DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); - Die->addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Str); + Die.addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Str); } /// addLocalString - Add a string attribute data and value. This is guaranteed /// to be in the local string pool instead of indirected. -void DwarfUnit::addLocalString(DIE *Die, dwarf::Attribute Attribute, +void DwarfUnit::addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef String) { - MCSymbol *Symb = DU->getStringPoolEntry(String); + MCSymbol *Symb = DU->getStringPool().getSymbol(*Asm, String); DIEValue *Value; if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) Value = new (DIEValueAllocator) DIELabel(Symb); else { - MCSymbol *StringPool = DU->getStringPoolSym(); + MCSymbol *StringPool = DU->getStringPool().getSectionSymbol(); Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); } DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); - Die->addValue(Attribute, dwarf::DW_FORM_strp, Str); + Die.addValue(Attribute, dwarf::DW_FORM_strp, Str); } /// addExpr - Add a Dwarf expression attribute data and value. /// -void DwarfUnit::addExpr(DIELoc *Die, dwarf::Form Form, const MCExpr *Expr) { +void DwarfUnit::addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr) { DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr); - Die->addValue((dwarf::Attribute)0, Form, Value); + Die.addValue((dwarf::Attribute)0, Form, Value); } /// addLocationList - Add a Dwarf loclistptr attribute data and value. /// -void DwarfUnit::addLocationList(DIE *Die, dwarf::Attribute Attribute, +void DwarfUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index) { DIEValue *Value = new (DIEValueAllocator) DIELocList(Index); dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset : dwarf::DW_FORM_data4; - Die->addValue(Attribute, Form, Value); + Die.addValue(Attribute, Form, Value); } /// addLabel - Add a Dwarf label attribute data and value. /// -void DwarfUnit::addLabel(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form, +void DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form, const MCSymbol *Label) { DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); - Die->addValue(Attribute, Form, Value); + Die.addValue(Attribute, Form, Value); } -void DwarfUnit::addLabel(DIELoc *Die, dwarf::Form Form, const MCSymbol *Label) { +void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) { addLabel(Die, (dwarf::Attribute)0, Form, Label); } /// addSectionLabel - Add a Dwarf section label attribute data and value. /// -void DwarfUnit::addSectionLabel(DIE *Die, dwarf::Attribute Attribute, +void DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label) { if (DD->getDwarfVersion() >= 4) addLabel(Die, Attribute, dwarf::DW_FORM_sec_offset, Label); @@ -270,7 +273,7 @@ void DwarfUnit::addSectionLabel(DIE *Die, dwarf::Attribute Attribute, /// addSectionOffset - Add an offset into a section attribute data and value. /// -void DwarfUnit::addSectionOffset(DIE *Die, dwarf::Attribute Attribute, +void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer) { if (DD->getDwarfVersion() >= 4) addUInt(Die, Attribute, dwarf::DW_FORM_sec_offset, Integer); @@ -281,7 +284,7 @@ void DwarfUnit::addSectionOffset(DIE *Die, dwarf::Attribute Attribute, /// addLabelAddress - Add a dwarf label attribute data and value using /// DW_FORM_addr or DW_FORM_GNU_addr_index. /// -void DwarfCompileUnit::addLabelAddress(DIE *Die, dwarf::Attribute Attribute, +void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label) { if (!DD->useSplitDwarf()) @@ -290,24 +293,20 @@ void DwarfCompileUnit::addLabelAddress(DIE *Die, dwarf::Attribute Attribute, if (Label) DD->addArangeLabel(SymbolCU(this, Label)); - unsigned idx = DU->getAddrPoolIndex(Label); + unsigned idx = DD->getAddressPool().getIndex(Label); DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); - Die->addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value); + Die.addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value); } -void DwarfCompileUnit::addLocalLabelAddress(DIE *Die, +void DwarfCompileUnit::addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label) { if (Label) DD->addArangeLabel(SymbolCU(this, Label)); - if (Label) { - DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); - Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); - } else { - DIEValue *Value = new (DIEValueAllocator) DIEInteger(0); - Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); - } + Die.addValue(Attribute, dwarf::DW_FORM_addr, + Label ? (DIEValue *)new (DIEValueAllocator) DIELabel(Label) + : new (DIEValueAllocator) DIEInteger(0)); } unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) { @@ -329,86 +328,94 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirNam /// addOpAddress - Add a dwarf op address data and value using the /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. /// -void DwarfUnit::addOpAddress(DIELoc *Die, const MCSymbol *Sym) { +void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { if (!DD->useSplitDwarf()) { addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); addLabel(Die, dwarf::DW_FORM_udata, Sym); } else { addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); - addUInt(Die, dwarf::DW_FORM_GNU_addr_index, DU->getAddrPoolIndex(Sym)); + addUInt(Die, dwarf::DW_FORM_GNU_addr_index, + DD->getAddressPool().getIndex(Sym)); } } /// addSectionDelta - Add a section label delta attribute data and value. /// -void DwarfUnit::addSectionDelta(DIE *Die, dwarf::Attribute Attribute, +void DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, const MCSymbol *Lo) { DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); - if (DD->getDwarfVersion() >= 4) - Die->addValue(Attribute, dwarf::DW_FORM_sec_offset, Value); - else - Die->addValue(Attribute, dwarf::DW_FORM_data4, Value); + Die.addValue(Attribute, DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4, + Value); } -void DwarfUnit::addLabelDelta(DIE *Die, dwarf::Attribute Attribute, +void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, const MCSymbol *Lo) { DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); - Die->addValue(Attribute, dwarf::DW_FORM_data4, Value); + Die.addValue(Attribute, dwarf::DW_FORM_data4, Value); } /// addDIEEntry - Add a DIE attribute data and value. /// -void DwarfUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry) { +void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry) { addDIEEntry(Die, Attribute, createDIEEntry(Entry)); } -void DwarfUnit::addDIETypeSignature(DIE *Die, const DwarfTypeUnit &Type) { - Die->addValue(dwarf::DW_AT_signature, dwarf::DW_FORM_ref_sig8, - new (DIEValueAllocator) DIETypeSignature(Type)); +void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) { + // Flag the type unit reference as a declaration so that if it contains + // members (implicit special members, static data member definitions, member + // declarations for definitions in this CU, etc) consumers don't get confused + // and think this is a full definition. + addFlag(Die, dwarf::DW_AT_declaration); + + Die.addValue(dwarf::DW_AT_signature, dwarf::DW_FORM_ref_sig8, + new (DIEValueAllocator) DIETypeSignature(Type)); } -void DwarfUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, +void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry *Entry) { - const DIE *DieCU = Die->getUnitOrNull(); - const DIE *EntryCU = Entry->getEntry()->getUnitOrNull(); + const DIE *DieCU = Die.getUnitOrNull(); + const DIE *EntryCU = Entry->getEntry().getUnitOrNull(); if (!DieCU) // We assume that Die belongs to this CU, if it is not linked to any CU yet. - DieCU = getUnitDie(); + DieCU = &getUnitDie(); if (!EntryCU) - EntryCU = getUnitDie(); - Die->addValue(Attribute, EntryCU == DieCU ? dwarf::DW_FORM_ref4 - : dwarf::DW_FORM_ref_addr, - Entry); + EntryCU = &getUnitDie(); + Die.addValue(Attribute, + EntryCU == DieCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, + Entry); } /// Create a DIE with the given Tag, add the DIE to its parent, and /// call insertDIE if MD is not null. -DIE *DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) { - DIE *Die = new DIE(Tag); - Parent.addChild(Die); +DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) { + assert(Tag != dwarf::DW_TAG_auto_variable && + Tag != dwarf::DW_TAG_arg_variable); + Parent.addChild(make_unique((dwarf::Tag)Tag)); + DIE &Die = *Parent.getChildren().back(); if (N) - insertDIE(N, Die); + insertDIE(N, &Die); return Die; } /// addBlock - Add block data. /// -void DwarfUnit::addBlock(DIE *Die, dwarf::Attribute Attribute, DIELoc *Loc) { +void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) { Loc->ComputeSize(Asm); DIELocs.push_back(Loc); // Memoize so we can call the destructor later on. - Die->addValue(Attribute, Loc->BestForm(DD->getDwarfVersion()), Loc); + Die.addValue(Attribute, Loc->BestForm(DD->getDwarfVersion()), Loc); } -void DwarfUnit::addBlock(DIE *Die, dwarf::Attribute Attribute, +void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block) { Block->ComputeSize(Asm); DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. - Die->addValue(Attribute, Block->BestForm(), Block); + Die.addValue(Attribute, Block->BestForm(), Block); } /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfUnit::addSourceLine(DIE *Die, unsigned Line, StringRef File, +void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File, StringRef Directory) { if (Line == 0) return; @@ -421,7 +428,7 @@ void DwarfUnit::addSourceLine(DIE *Die, unsigned Line, StringRef File, /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfUnit::addSourceLine(DIE *Die, DIVariable V) { +void DwarfUnit::addSourceLine(DIE &Die, DIVariable V) { assert(V.isVariable()); addSourceLine(Die, V.getLineNumber(), V.getContext().getFilename(), @@ -430,7 +437,7 @@ void DwarfUnit::addSourceLine(DIE *Die, DIVariable V) { /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { +void DwarfUnit::addSourceLine(DIE &Die, DIGlobalVariable G) { assert(G.isGlobalVariable()); addSourceLine(Die, G.getLineNumber(), G.getFilename(), G.getDirectory()); @@ -438,7 +445,7 @@ void DwarfUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfUnit::addSourceLine(DIE *Die, DISubprogram SP) { +void DwarfUnit::addSourceLine(DIE &Die, DISubprogram SP) { assert(SP.isSubprogram()); addSourceLine(Die, SP.getLineNumber(), SP.getFilename(), SP.getDirectory()); @@ -446,7 +453,7 @@ void DwarfUnit::addSourceLine(DIE *Die, DISubprogram SP) { /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfUnit::addSourceLine(DIE *Die, DIType Ty) { +void DwarfUnit::addSourceLine(DIE &Die, DIType Ty) { assert(Ty.isType()); addSourceLine(Die, Ty.getLineNumber(), Ty.getFilename(), Ty.getDirectory()); @@ -454,7 +461,7 @@ void DwarfUnit::addSourceLine(DIE *Die, DIType Ty) { /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { +void DwarfUnit::addSourceLine(DIE &Die, DIObjCProperty Ty) { assert(Ty.isObjCProperty()); DIFile File = Ty.getFile(); @@ -464,7 +471,7 @@ void DwarfUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { /// addSourceLine - Add location information to specified debug information /// entry. -void DwarfUnit::addSourceLine(DIE *Die, DINameSpace NS) { +void DwarfUnit::addSourceLine(DIE &Die, DINameSpace NS) { assert(NS.Verify()); addSourceLine(Die, NS.getLineNumber(), NS.getFilename(), NS.getDirectory()); @@ -472,7 +479,7 @@ void DwarfUnit::addSourceLine(DIE *Die, DINameSpace NS) { /// addVariableAddress - Add DW_AT_location attribute for a /// DbgVariable based on provided MachineLocation. -void DwarfUnit::addVariableAddress(const DbgVariable &DV, DIE *Die, +void DwarfUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, MachineLocation Location) { if (DV.variableHasComplexAddress()) addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); @@ -484,7 +491,7 @@ void DwarfUnit::addVariableAddress(const DbgVariable &DV, DIE *Die, } /// addRegisterOp - Add register operand. -void DwarfUnit::addRegisterOp(DIELoc *TheDie, unsigned Reg) { +void DwarfUnit::addRegisterOp(DIELoc &TheDie, unsigned Reg) { const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); int DWReg = RI->getDwarfRegNum(Reg, false); bool isSubRegister = DWReg < 0; @@ -529,7 +536,7 @@ void DwarfUnit::addRegisterOp(DIELoc *TheDie, unsigned Reg) { } /// addRegisterOffset - Add register offset. -void DwarfUnit::addRegisterOffset(DIELoc *TheDie, unsigned Reg, +void DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset) { const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); unsigned DWReg = RI->getDwarfRegNum(Reg, false); @@ -548,16 +555,16 @@ void DwarfUnit::addRegisterOffset(DIELoc *TheDie, unsigned Reg, /// addAddress - Add an address attribute to a die based on the location /// provided. -void DwarfUnit::addAddress(DIE *Die, dwarf::Attribute Attribute, +void DwarfUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, const MachineLocation &Location, bool Indirect) { DIELoc *Loc = new (DIEValueAllocator) DIELoc(); if (Location.isReg() && !Indirect) - addRegisterOp(Loc, Location.getReg()); + addRegisterOp(*Loc, Location.getReg()); else { - addRegisterOffset(Loc, Location.getReg(), Location.getOffset()); + addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); if (Indirect && !Location.isReg()) { - addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); } } @@ -570,7 +577,7 @@ void DwarfUnit::addAddress(DIE *Die, dwarf::Attribute Attribute, /// given the extra address information encoded in the DbgVariable, starting /// from the starting location. Add the DWARF information to the die. /// -void DwarfUnit::addComplexAddress(const DbgVariable &DV, DIE *Die, +void DwarfUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, dwarf::Attribute Attribute, const MachineLocation &Location) { DIELoc *Loc = new (DIEValueAllocator) DIELoc(); @@ -580,21 +587,21 @@ void DwarfUnit::addComplexAddress(const DbgVariable &DV, DIE *Die, if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { // If first address element is OpPlus then emit // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - addRegisterOffset(Loc, Location.getReg(), DV.getAddrElement(1)); + addRegisterOffset(*Loc, Location.getReg(), DV.getAddrElement(1)); i = 2; } else - addRegisterOp(Loc, Location.getReg()); + addRegisterOp(*Loc, Location.getReg()); } else - addRegisterOffset(Loc, Location.getReg(), Location.getOffset()); + addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); for (; i < N; ++i) { uint64_t Element = DV.getAddrElement(i); if (Element == DIBuilder::OpPlus) { - addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Loc, dwarf::DW_FORM_udata, DV.getAddrElement(++i)); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(*Loc, dwarf::DW_FORM_udata, DV.getAddrElement(++i)); } else if (Element == DIBuilder::OpDeref) { if (!Location.isReg()) - addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); } else llvm_unreachable("unknown DIBuilder Opcode"); } @@ -663,7 +670,7 @@ void DwarfUnit::addComplexAddress(const DbgVariable &DV, DIE *Die, /// starting location. Add the DWARF information to the die. For /// more information, read large comment just above here. /// -void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die, +void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, dwarf::Attribute Attribute, const MachineLocation &Location) { DIType Ty = DV.getType(); @@ -705,68 +712,78 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die, DIELoc *Loc = new (DIEValueAllocator) DIELoc(); if (Location.isReg()) - addRegisterOp(Loc, Location.getReg()); + addRegisterOp(*Loc, Location.getReg()); else - addRegisterOffset(Loc, Location.getReg(), Location.getOffset()); + addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); // If we started with a pointer to the __Block_byref... struct, then // the first thing we need to do is dereference the pointer (DW_OP_deref). if (isPointer) - addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); // Next add the offset for the '__forwarding' field: // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in // adding the offset if it's 0. if (forwardingFieldOffset > 0) { - addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Loc, dwarf::DW_FORM_udata, forwardingFieldOffset); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(*Loc, dwarf::DW_FORM_udata, forwardingFieldOffset); } // Now dereference the __forwarding field to get to the real __Block_byref // struct: DW_OP_deref. - addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); // Now that we've got the real __Block_byref... struct, add the offset // for the variable's field to get to the location of the actual variable: // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. if (varFieldOffset > 0) { - addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Loc, dwarf::DW_FORM_udata, varFieldOffset); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(*Loc, dwarf::DW_FORM_udata, varFieldOffset); } // Now attach the location information to the DIE. addBlock(Die, Attribute, Loc); } -/// isTypeSigned - Return true if the type is signed. -static bool isTypeSigned(DwarfDebug *DD, DIType Ty, int *SizeInBits) { - if (Ty.isDerivedType()) - return isTypeSigned(DD, DD->resolve(DIDerivedType(Ty).getTypeDerivedFrom()), - SizeInBits); - if (Ty.isBasicType()) - if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed || - DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) { - *SizeInBits = Ty.getSizeInBits(); - return true; - } - return false; -} - /// Return true if type encoding is unsigned. static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { DIDerivedType DTy(Ty); - if (DTy.isDerivedType()) - return isUnsignedDIType(DD, DD->resolve(DTy.getTypeDerivedFrom())); - - DIBasicType BTy(Ty); - if (BTy.isBasicType()) { - unsigned Encoding = BTy.getEncoding(); - if (Encoding == dwarf::DW_ATE_unsigned || - Encoding == dwarf::DW_ATE_unsigned_char || - Encoding == dwarf::DW_ATE_boolean) + if (DTy.isDerivedType()) { + dwarf::Tag T = (dwarf::Tag)Ty.getTag(); + // Encode pointer constants as unsigned bytes. This is used at least for + // null pointer constant emission. + // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed + // here, but accept them for now due to a bug in SROA producing bogus + // dbg.values. + if (T == dwarf::DW_TAG_pointer_type || + T == dwarf::DW_TAG_ptr_to_member_type || + T == dwarf::DW_TAG_reference_type || + T == dwarf::DW_TAG_rvalue_reference_type) return true; + assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || + T == dwarf::DW_TAG_volatile_type || + T == dwarf::DW_TAG_restrict_type || + T == dwarf::DW_TAG_enumeration_type); + if (DITypeRef Deriv = DTy.getTypeDerivedFrom()) + return isUnsignedDIType(DD, DD->resolve(Deriv)); + // FIXME: Enums without a fixed underlying type have unknown signedness + // here, leading to incorrectly emitted constants. + assert(DTy.getTag() == dwarf::DW_TAG_enumeration_type); + return false; } - return false; + + DIBasicType BTy(Ty); + assert(BTy.isBasicType()); + unsigned Encoding = BTy.getEncoding(); + assert((Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_signed || + Encoding == dwarf::DW_ATE_signed_char || + Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean) && + "Unsupported encoding"); + return (Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean); } /// If this type is derived from a base type then return base type size. @@ -798,47 +815,8 @@ static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) { return BaseType.getSizeInBits(); } -/// addConstantValue - Add constant value entry in variable DIE. -void DwarfUnit::addConstantValue(DIE *Die, const MachineOperand &MO, - DIType Ty) { - // FIXME: This is a bit conservative/simple - it emits negative values at - // their maximum bit width which is a bit unfortunate (& doesn't prefer - // udata/sdata over dataN as suggested by the DWARF spec) - assert(MO.isImm() && "Invalid machine operand!"); - int SizeInBits = -1; - bool SignedConstant = isTypeSigned(DD, Ty, &SizeInBits); - dwarf::Form Form; - - // If we're a signed constant definitely use sdata. - if (SignedConstant) { - addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, MO.getImm()); - return; - } - - // Else use data for now unless it's larger than we can deal with. - switch (SizeInBits) { - case 8: - Form = dwarf::DW_FORM_data1; - break; - case 16: - Form = dwarf::DW_FORM_data2; - break; - case 32: - Form = dwarf::DW_FORM_data4; - break; - case 64: - Form = dwarf::DW_FORM_data8; - break; - default: - Form = dwarf::DW_FORM_udata; - addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm()); - return; - } - addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm()); -} - /// addConstantFPValue - Add constant value entry in variable DIE. -void DwarfUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { +void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) { assert(MO.isFPImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); APFloat FPImm = MO.getFPImm()->getValueAPF(); @@ -855,55 +833,47 @@ void DwarfUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { // Output the constant to DWARF one byte at a time. for (; Start != Stop; Start += Incr) - addUInt(Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]); + addUInt(*Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]); addBlock(Die, dwarf::DW_AT_const_value, Block); } /// addConstantFPValue - Add constant value entry in variable DIE. -void DwarfUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { +void DwarfUnit::addConstantFPValue(DIE &Die, const ConstantFP *CFP) { // Pass this down to addConstantValue as an unsigned bag of bits. addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true); } /// addConstantValue - Add constant value entry in variable DIE. -void DwarfUnit::addConstantValue(DIE *Die, const ConstantInt *CI, - bool Unsigned) { - addConstantValue(Die, CI->getValue(), Unsigned); +void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI, DIType Ty) { + addConstantValue(Die, CI->getValue(), Ty); +} + +/// addConstantValue - Add constant value entry in variable DIE. +void DwarfUnit::addConstantValue(DIE &Die, const MachineOperand &MO, + DIType Ty) { + assert(MO.isImm() && "Invalid machine operand!"); + + addConstantValue(Die, isUnsignedDIType(DD, Ty), MO.getImm()); +} + +void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) { + // FIXME: This is a bit conservative/simple - it emits negative values always + // sign extended to 64 bits rather than minimizing the number of bytes. + addUInt(Die, dwarf::DW_AT_const_value, + Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata, Val); +} + +void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, DIType Ty) { + addConstantValue(Die, Val, isUnsignedDIType(DD, Ty)); } // addConstantValue - Add constant value entry in variable DIE. -void DwarfUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) { +void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) { unsigned CIBitWidth = Val.getBitWidth(); if (CIBitWidth <= 64) { - // If we're a signed constant definitely use sdata. - if (!Unsigned) { - addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, - Val.getSExtValue()); - return; - } - - // Else use data for now unless it's larger than we can deal with. - dwarf::Form Form; - switch (CIBitWidth) { - case 8: - Form = dwarf::DW_FORM_data1; - break; - case 16: - Form = dwarf::DW_FORM_data2; - break; - case 32: - Form = dwarf::DW_FORM_data4; - break; - case 64: - Form = dwarf::DW_FORM_data8; - break; - default: - addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, - Val.getZExtValue()); - return; - } - addUInt(Die, dwarf::DW_AT_const_value, Form, Val.getZExtValue()); + addConstantValue(Die, Unsigned, + Unsigned ? Val.getZExtValue() : Val.getSExtValue()); return; } @@ -922,7 +892,7 @@ void DwarfUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) { c = Ptr64[i / 8] >> (8 * (i & 7)); else c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7)); - addUInt(Block, dwarf::DW_FORM_data1, c); + addUInt(*Block, dwarf::DW_FORM_data1, c); } addBlock(Die, dwarf::DW_AT_const_value, Block); @@ -945,7 +915,7 @@ void DwarfUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { /// getOrCreateContextDIE - Get context owner's DIE. DIE *DwarfUnit::getOrCreateContextDIE(DIScope Context) { if (!Context || Context.isFile()) - return getUnitDie(); + return &getUnitDie(); if (Context.isType()) return getOrCreateTypeDIE(DIType(Context)); if (Context.isNameSpace()) @@ -959,66 +929,68 @@ DIE *DwarfUnit::createTypeDIE(DICompositeType Ty) { DIScope Context = resolve(Ty.getContext()); DIE *ContextDIE = getOrCreateContextDIE(Context); - DIE *TyDIE = getDIE(Ty); - if (TyDIE) + if (DIE *TyDIE = getDIE(Ty)) return TyDIE; // Create new type. - TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); + DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); - constructTypeDIE(*TyDIE, Ty); + constructTypeDIE(TyDIE, Ty); updateAcceleratorTables(Context, Ty, TyDIE); - return TyDIE; + return &TyDIE; } /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { if (!TyNode) - return NULL; + return nullptr; DIType Ty(TyNode); assert(Ty.isType()); assert(Ty == resolve(Ty.getRef()) && "type was not uniqued, possible ODR violation."); + // DW_TAG_restrict_type is not supported in DWARF2 + if (Ty.getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2) + return getOrCreateTypeDIE(resolve(DIDerivedType(Ty).getTypeDerivedFrom())); + // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE. DIScope Context = resolve(Ty.getContext()); DIE *ContextDIE = getOrCreateContextDIE(Context); assert(ContextDIE); - DIE *TyDIE = getDIE(Ty); - if (TyDIE) + if (DIE *TyDIE = getDIE(Ty)) return TyDIE; // Create new type. - TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); + DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); updateAcceleratorTables(Context, Ty, TyDIE); if (Ty.isBasicType()) - constructTypeDIE(*TyDIE, DIBasicType(Ty)); + constructTypeDIE(TyDIE, DIBasicType(Ty)); else if (Ty.isCompositeType()) { DICompositeType CTy(Ty); if (GenerateDwarfTypeUnits && !Ty.isForwardDecl()) if (MDString *TypeId = CTy.getIdentifier()) { DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy); // Skip updating the accelerator tables since this is not the full type. - return TyDIE; + return &TyDIE; } - constructTypeDIE(*TyDIE, CTy); + constructTypeDIE(TyDIE, CTy); } else { assert(Ty.isDerivedType() && "Unknown kind of DIType"); - constructTypeDIE(*TyDIE, DIDerivedType(Ty)); + constructTypeDIE(TyDIE, DIDerivedType(Ty)); } - return TyDIE; + return &TyDIE; } void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty, - const DIE *TyDIE) { + const DIE &TyDIE) { if (!Ty.getName().empty() && !Ty.isForwardDecl()) { bool IsImplementation = 0; if (Ty.isCompositeType()) { @@ -1028,17 +1000,18 @@ void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty, IsImplementation = (CT.getRunTimeLang() == 0) || CT.isObjcClassComplete(); } unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0; - addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags)); + DD->addAccelType(Ty.getName(), TyDIE, Flags); if ((!Context || Context.isCompileUnit() || Context.isFile() || Context.isNameSpace()) && getCUNode().getEmissionKind() != DIBuilder::LineTablesOnly) - GlobalTypes[getParentContextString(Context) + Ty.getName().str()] = TyDIE; + GlobalTypes[getParentContextString(Context) + Ty.getName().str()] = + &TyDIE; } } /// addType - Add a new type attribute to the specified entity. -void DwarfUnit::addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute) { +void DwarfUnit::addType(DIE &Entity, DIType Ty, dwarf::Attribute Attribute) { assert(Ty && "Trying to add a type that doesn't exist?"); // Check for pre-existence. @@ -1053,54 +1026,17 @@ void DwarfUnit::addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute) { DIE *Buffer = getOrCreateTypeDIE(Ty); // Set up proxy. - Entry = createDIEEntry(Buffer); + Entry = createDIEEntry(*Buffer); insertDIEEntry(Ty, Entry); addDIEEntry(Entity, Attribute, Entry); } -// Accelerator table mutators - add each name along with its companion -// DIE to the proper table while ensuring that the name that we're going -// to reference is in the string table. We do this since the names we -// add may not only be identical to the names in the DIE. -void DwarfUnit::addAccelName(StringRef Name, const DIE *Die) { - if (!DD->useDwarfAccelTables()) - return; - DU->getStringPoolEntry(Name); - std::vector &DIEs = AccelNames[Name]; - DIEs.push_back(Die); -} - -void DwarfUnit::addAccelObjC(StringRef Name, const DIE *Die) { - if (!DD->useDwarfAccelTables()) - return; - DU->getStringPoolEntry(Name); - std::vector &DIEs = AccelObjC[Name]; - DIEs.push_back(Die); -} - -void DwarfUnit::addAccelNamespace(StringRef Name, const DIE *Die) { - if (!DD->useDwarfAccelTables()) - return; - DU->getStringPoolEntry(Name); - std::vector &DIEs = AccelNamespace[Name]; - DIEs.push_back(Die); -} - -void DwarfUnit::addAccelType(StringRef Name, - std::pair Die) { - if (!DD->useDwarfAccelTables()) - return; - DU->getStringPoolEntry(Name); - std::vector > &DIEs = AccelTypes[Name]; - DIEs.push_back(Die); -} - /// addGlobalName - Add a new global name to the compile unit. -void DwarfUnit::addGlobalName(StringRef Name, DIE *Die, DIScope Context) { +void DwarfUnit::addGlobalName(StringRef Name, DIE &Die, DIScope Context) { if (getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly) return; std::string FullName = getParentContextString(Context) + Name.str(); - GlobalNames[FullName] = Die; + GlobalNames[FullName] = &Die; } /// getParentContextString - Walks the metadata parent chain in a language @@ -1149,17 +1085,17 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { StringRef Name = BTy.getName(); // Add name if not anonymous or intermediate type. if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, Name); + addString(Buffer, dwarf::DW_AT_name, Name); // An unspecified type only has a name attribute. if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) return; - addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, BTy.getEncoding()); uint64_t Size = BTy.getSizeInBits() >> 3; - addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); + addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); } /// constructTypeDIE - Construct derived type die from DIDerivedType. @@ -1172,22 +1108,22 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Map to main type, void will not have a type. DIType FromTy = resolve(DTy.getTypeDerivedFrom()); if (FromTy) - addType(&Buffer, FromTy); + addType(Buffer, FromTy); // Add name if not anonymous or intermediate type. if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, Name); + addString(Buffer, dwarf::DW_AT_name, Name); // Add size if non-zero (derived types might be zero-sized.) if (Size && Tag != dwarf::DW_TAG_pointer_type) - addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); + addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); if (Tag == dwarf::DW_TAG_ptr_to_member_type) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, - getOrCreateTypeDIE(resolve(DTy.getClassType()))); + addDIEEntry(Buffer, dwarf::DW_AT_containing_type, + *getOrCreateTypeDIE(resolve(DTy.getClassType()))); // Add source line info if available and TyDesc is not a forward declaration. if (!DTy.isForwardDecl()) - addSourceLine(&Buffer, DTy); + addSourceLine(Buffer, DTy); } /// constructSubprogramArguments - Construct function argument DIEs. @@ -1198,7 +1134,7 @@ void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DIArray Args) { assert(i == N-1 && "Unspecified parameter must be the last argument"); createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer); } else { - DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer); + DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer); addType(Arg, DIType(Ty)); if (DIType(Ty).isArtificial()) addFlag(Arg, dwarf::DW_AT_artificial); @@ -1226,7 +1162,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { DIArray Elements = CTy.getTypeArray(); DIType RTy(Elements.getElement(0)); if (RTy) - addType(&Buffer, RTy); + addType(Buffer, RTy); bool isPrototyped = true; if (Elements.getNumElements() == 2 && @@ -1241,13 +1177,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (isPrototyped && (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) - addFlag(&Buffer, dwarf::DW_AT_prototyped); + addFlag(Buffer, dwarf::DW_AT_prototyped); if (CTy.isLValueReference()) - addFlag(&Buffer, dwarf::DW_AT_reference); + addFlag(Buffer, dwarf::DW_AT_reference); if (CTy.isRValueReference()) - addFlag(&Buffer, dwarf::DW_AT_rvalue_reference); + addFlag(Buffer, dwarf::DW_AT_rvalue_reference); } break; case dwarf::DW_TAG_structure_type: case dwarf::DW_TAG_union_type: @@ -1256,13 +1192,12 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { DIArray Elements = CTy.getTypeArray(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); - DIE *ElemDie = NULL; if (Element.isSubprogram()) - ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element)); + getOrCreateSubprogramDIE(DISubprogram(Element)); else if (Element.isDerivedType()) { DIDerivedType DDTy(Element); if (DDTy.getTag() == dwarf::DW_TAG_friend) { - ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer); + DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer); addType(ElemDie, resolve(DDTy.getTypeDerivedFrom()), dwarf::DW_AT_friend); } else if (DDTy.isStaticMember()) { @@ -1272,7 +1207,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } } else if (Element.isObjCProperty()) { DIObjCProperty Property(Element); - ElemDie = createAndAddDIE(Property.getTag(), Buffer); + DIE &ElemDie = createAndAddDIE(Property.getTag(), Buffer); StringRef PropertyName = Property.getObjCPropertyName(); addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); if (Property.getType()) @@ -1311,15 +1246,15 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } if (CTy.isAppleBlockExtension()) - addFlag(&Buffer, dwarf::DW_AT_APPLE_block); + addFlag(Buffer, dwarf::DW_AT_APPLE_block); DICompositeType ContainingType(resolve(CTy.getContainingType())); if (ContainingType) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, - getOrCreateTypeDIE(ContainingType)); + addDIEEntry(Buffer, dwarf::DW_AT_containing_type, + *getOrCreateTypeDIE(ContainingType)); if (CTy.isObjcClassComplete()) - addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type); + addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type); // Add template parameters to a class, structure or union types. // FIXME: The support isn't in the metadata for this yet. @@ -1335,7 +1270,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add name if not anonymous or intermediate type. if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, Name); + addString(Buffer, dwarf::DW_AT_name, Name); if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || @@ -1343,23 +1278,23 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add size if non-zero (derived types might be zero-sized.) // TODO: Do we care about size for enum forward declarations? if (Size) - addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); + addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); else if (!CTy.isForwardDecl()) // Add zero size if it is not a forward declaration. - addUInt(&Buffer, dwarf::DW_AT_byte_size, None, 0); + addUInt(Buffer, dwarf::DW_AT_byte_size, None, 0); // If we're a forward decl, say so. if (CTy.isForwardDecl()) - addFlag(&Buffer, dwarf::DW_AT_declaration); + addFlag(Buffer, dwarf::DW_AT_declaration); // Add source line info if available. if (!CTy.isForwardDecl()) - addSourceLine(&Buffer, CTy); + addSourceLine(Buffer, CTy); // No harm in adding the runtime language to the declaration. unsigned RLang = CTy.getRunTimeLang(); if (RLang) - addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, + addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, RLang); } } @@ -1368,7 +1303,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { /// DITemplateTypeParameter. void DwarfUnit::constructTemplateTypeParameterDIE(DIE &Buffer, DITemplateTypeParameter TP) { - DIE *ParamDIE = + DIE &ParamDIE = createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer); // Add the type if it exists, it could be void and therefore no type. if (TP.getType()) @@ -1382,7 +1317,7 @@ void DwarfUnit::constructTemplateTypeParameterDIE(DIE &Buffer, void DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer, DITemplateValueParameter VP) { - DIE *ParamDIE = createAndAddDIE(VP.getTag(), Buffer); + DIE &ParamDIE = createAndAddDIE(VP.getTag(), Buffer); // Add the type if there is one, template template and template parameter // packs will not have a type. @@ -1392,16 +1327,15 @@ DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer, addString(ParamDIE, dwarf::DW_AT_name, VP.getName()); if (Value *Val = VP.getValue()) { if (ConstantInt *CI = dyn_cast(Val)) - addConstantValue(ParamDIE, CI, - isUnsignedDIType(DD, resolve(VP.getType()))); + addConstantValue(ParamDIE, CI, resolve(VP.getType())); else if (GlobalValue *GV = dyn_cast(Val)) { // For declaration non-type template parameters (such as global values and // functions) DIELoc *Loc = new (DIEValueAllocator) DIELoc(); - addOpAddress(Loc, Asm->getSymbol(GV)); + addOpAddress(*Loc, Asm->getSymbol(GV)); // Emit DW_OP_stack_value to use the address as the immediate value of the // parameter, rather than a pointer to it. - addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); addBlock(ParamDIE, dwarf::DW_AT_location, Loc); } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_template_param) { assert(isa(Val)); @@ -1410,7 +1344,7 @@ DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer, } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) { assert(isa(Val)); DIArray A(cast(Val)); - addTemplateParams(*ParamDIE, A); + addTemplateParams(ParamDIE, A); } } } @@ -1421,19 +1355,18 @@ DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) { // such construction creates the DIE. DIE *ContextDIE = getOrCreateContextDIE(NS.getContext()); - DIE *NDie = getDIE(NS); - if (NDie) + if (DIE *NDie = getDIE(NS)) return NDie; - NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS); + DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS); if (!NS.getName().empty()) { addString(NDie, dwarf::DW_AT_name, NS.getName()); - addAccelNamespace(NS.getName(), NDie); + DD->addAccelNamespace(NS.getName(), NDie); addGlobalName(NS.getName(), NDie, NS.getContext()); } else - addAccelNamespace("(anonymous namespace)", NDie); + DD->addAccelNamespace("(anonymous namespace)", NDie); addSourceLine(NDie, NS); - return NDie; + return &NDie; } /// getOrCreateSubprogramDIE - Create new DIE using SP. @@ -1441,47 +1374,58 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE (as is the case for member function // declarations). - DIScope Context = resolve(SP.getContext()); - DIE *ContextDIE = getOrCreateContextDIE(Context); + DIE *ContextDIE = getOrCreateContextDIE(resolve(SP.getContext())); - // Unique declarations based on the ODR, where applicable. - SP = DISubprogram(DD->resolve(SP.getRef())); - assert(SP.Verify()); - - DIE *SPDie = getDIE(SP); - if (SPDie) + if (DIE *SPDie = getDIE(SP)) return SPDie; - DISubprogram SPDecl = SP.getFunctionDeclaration(); - if (SPDecl.isSubprogram()) + if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { // Add subprogram definitions to the CU die directly. - ContextDIE = UnitDie.get(); + ContextDIE = &getUnitDie(); + // Build the decl now to ensure it preceeds the definition. + getOrCreateSubprogramDIE(SPDecl); + } // DW_TAG_inlined_subroutine may refer to this DIE. - SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP); - - DIE *DeclDie = NULL; - if (SPDecl.isSubprogram()) - DeclDie = getOrCreateSubprogramDIE(SPDecl); + DIE &SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP); - // Add function template parameters. - addTemplateParams(*SPDie, SP.getTemplateParams()); + // Abort here and fill this in later, depending on whether or not this + // subprogram turns out to have inlined instances or not. + if (SP.isDefinition()) + return &SPDie; - // If this DIE is going to refer declaration info using AT_specification - // then there is no need to add other attributes. - if (DeclDie) { - // Refer function declaration directly. - addDIEEntry(SPDie, dwarf::DW_AT_specification, DeclDie); + applySubprogramAttributes(SP, SPDie); + return &SPDie; +} - return SPDie; +void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { + DIE *DeclDie = nullptr; + StringRef DeclLinkageName; + if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { + DeclDie = getDIE(SPDecl); + assert(DeclDie); + DeclLinkageName = SPDecl.getLinkageName(); } - // Add the linkage name if we have one. + // Add function template parameters. + addTemplateParams(SPDie, SP.getTemplateParams()); + + // Add the linkage name if we have one and it isn't in the Decl. StringRef LinkageName = SP.getLinkageName(); - if (!LinkageName.empty()) + assert(((LinkageName.empty() || DeclLinkageName.empty()) || + LinkageName == DeclLinkageName) && + "decl has a linkage name and it is different"); + if (!LinkageName.empty() && DeclLinkageName.empty()) addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, GlobalValue::getRealLinkageName(LinkageName)); + if (DeclDie) { + // Refer to the function declaration where all the other attributes will be + // found. + addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie); + return; + } + // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) addString(SPDie, dwarf::DW_AT_name, SP.getName()); @@ -1510,11 +1454,11 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { if (VK) { addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK); DIELoc *Block = getDIELoc(); - addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(Block, dwarf::DW_FORM_udata, SP.getVirtualIndex()); + addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(*Block, dwarf::DW_FORM_udata, SP.getVirtualIndex()); addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block); ContainingTypeMap.insert( - std::make_pair(SPDie, resolve(SP.getContainingType()))); + std::make_pair(&SPDie, resolve(SP.getContainingType()))); } if (!SP.isDefinition()) { @@ -1522,7 +1466,7 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add arguments. Do not add arguments for subprogram definition. They will // be handled while processing variables. - constructSubprogramArguments(*SPDie, Args); + constructSubprogramArguments(SPDie, Args); } if (SP.isArtificial()) @@ -1556,8 +1500,6 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { if (SP.isExplicit()) addFlag(SPDie, dwarf::DW_AT_explicit); - - return SPDie; } // Return const expression if value is a GEP to access merged global @@ -1567,22 +1509,22 @@ static const ConstantExpr *getMergedGlobalExpr(const Value *V) { const ConstantExpr *CE = dyn_cast_or_null(V); if (!CE || CE->getNumOperands() != 3 || CE->getOpcode() != Instruction::GetElementPtr) - return NULL; + return nullptr; // First operand points to a global struct. Value *Ptr = CE->getOperand(0); if (!isa(Ptr) || !isa(cast(Ptr->getType())->getElementType())) - return NULL; + return nullptr; // Second operand is zero. const ConstantInt *CI = dyn_cast_or_null(CE->getOperand(1)); if (!CI || !CI->isZero()) - return NULL; + return nullptr; // Third operand is offset. if (!isa(CE->getOperand(2))) - return NULL; + return nullptr; return CE; } @@ -1600,7 +1542,7 @@ void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { // If this is a static data member definition, some attributes belong // to the declaration DIE. - DIE *VariableDIE = NULL; + DIE *VariableDIE = nullptr; bool IsStaticMember = false; DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration(); if (SDMDecl.Verify()) { @@ -1618,24 +1560,24 @@ void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { DIE *ContextDIE = getOrCreateContextDIE(GVContext); // Add to map. - VariableDIE = createAndAddDIE(GV.getTag(), *ContextDIE, GV); + VariableDIE = &createAndAddDIE(GV.getTag(), *ContextDIE, GV); // Add name and type. - addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); - addType(VariableDIE, GTy); + addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); + addType(*VariableDIE, GTy); // Add scoping info. if (!GV.isLocalToUnit()) - addFlag(VariableDIE, dwarf::DW_AT_external); + addFlag(*VariableDIE, dwarf::DW_AT_external); // Add line number info. - addSourceLine(VariableDIE, GV); + addSourceLine(*VariableDIE, GV); } // Add location. bool addToAccelTable = false; - DIE *VariableSpecDIE = NULL; - bool isGlobalVariable = GV.getGlobal() != NULL; + DIE *VariableSpecDIE = nullptr; + bool isGlobalVariable = GV.getGlobal() != nullptr; if (isGlobalVariable) { addToAccelTable = true; DIELoc *Loc = new (DIEValueAllocator) DIELoc(); @@ -1648,36 +1590,36 @@ void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { // Based on GCC's support for TLS: if (!DD->useSplitDwarf()) { // 1) Start with a constNu of the appropriate pointer size - addUInt(Loc, dwarf::DW_FORM_data1, + addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); // 2) containing the (relocated) offset of the TLS variable // within the module's TLS block. - addExpr(Loc, dwarf::DW_FORM_udata, + addExpr(*Loc, dwarf::DW_FORM_udata, Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); } else { - addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); - addUInt(Loc, dwarf::DW_FORM_udata, - DU->getAddrPoolIndex(Sym, /* TLS */ true)); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(*Loc, dwarf::DW_FORM_udata, + DD->getAddressPool().getIndex(Sym, /* TLS */ true)); } // 3) followed by a custom OP to make the debugger do a TLS lookup. - addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); } else { DD->addArangeLabel(SymbolCU(this, Sym)); - addOpAddress(Loc, Sym); + addOpAddress(*Loc, Sym); } // Do not create specification DIE if context is either compile unit // or a subprogram. if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && !GVContext.isFile() && !DD->isSubprogramContext(GVContext)) { // Create specification DIE. - VariableSpecDIE = createAndAddDIE(dwarf::DW_TAG_variable, *UnitDie); - addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, VariableDIE); - addBlock(VariableSpecDIE, dwarf::DW_AT_location, Loc); + VariableSpecDIE = &createAndAddDIE(dwarf::DW_TAG_variable, UnitDie); + addDIEEntry(*VariableSpecDIE, dwarf::DW_AT_specification, *VariableDIE); + addBlock(*VariableSpecDIE, dwarf::DW_AT_location, Loc); // A static member's declaration is already flagged as such. if (!SDMDecl.Verify()) - addFlag(VariableDIE, dwarf::DW_AT_declaration); + addFlag(*VariableDIE, dwarf::DW_AT_declaration); } else { - addBlock(VariableDIE, dwarf::DW_AT_location, Loc); + addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); } // Add the linkage name. StringRef LinkageName = GV.getLinkageName(); @@ -1685,8 +1627,8 @@ void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { // From DWARF4: DIEs to which DW_AT_linkage_name may apply include: // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and // TAG_variable. - addString(IsStaticMember && VariableSpecDIE ? VariableSpecDIE - : VariableDIE, + addString(IsStaticMember && VariableSpecDIE ? *VariableSpecDIE + : *VariableDIE, DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name : dwarf::DW_AT_MIPS_linkage_name, GlobalValue::getRealLinkageName(LinkageName)); @@ -1696,7 +1638,7 @@ void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { // emitting AT_const_value multiple times, we only add AT_const_value when // it is not a static member. if (!IsStaticMember) - addConstantValue(VariableDIE, CI, isUnsignedDIType(DD, GTy)); + addConstantValue(*VariableDIE, CI, GTy); } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getOperand(11))) { addToAccelTable = true; // GV is a merged global. @@ -1704,34 +1646,35 @@ void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { Value *Ptr = CE->getOperand(0); MCSymbol *Sym = Asm->getSymbol(cast(Ptr)); DD->addArangeLabel(SymbolCU(this, Sym)); - addOpAddress(Loc, Sym); - addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addOpAddress(*Loc, Sym); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); SmallVector Idx(CE->op_begin() + 1, CE->op_end()); - addUInt(Loc, dwarf::DW_FORM_udata, + addUInt(*Loc, dwarf::DW_FORM_udata, Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); - addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - addBlock(VariableDIE, dwarf::DW_AT_location, Loc); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); } if (addToAccelTable) { - DIE *AddrDIE = VariableSpecDIE ? VariableSpecDIE : VariableDIE; - addAccelName(GV.getName(), AddrDIE); + DIE &AddrDIE = VariableSpecDIE ? *VariableSpecDIE : *VariableDIE; + DD->addAccelName(GV.getName(), AddrDIE); // If the linkage name is different than the name, go ahead and output // that as well into the name table. if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName()) - addAccelName(GV.getLinkageName(), AddrDIE); + DD->addAccelName(GV.getLinkageName(), AddrDIE); } if (!GV.isLocalToUnit()) - addGlobalName(GV.getName(), VariableSpecDIE ? VariableSpecDIE : VariableDIE, + addGlobalName(GV.getName(), + VariableSpecDIE ? *VariableSpecDIE : *VariableDIE, GV.getContext()); } /// constructSubrangeDIE - Construct subrange DIE from DISubrange. void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { - DIE *DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer); - addDIEEntry(DW_Subrange, dwarf::DW_AT_type, IndexTy); + DIE &DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer); + addDIEEntry(DW_Subrange, dwarf::DW_AT_type, *IndexTy); // The LowerBound value defines the lower bounds which is typically zero for // C/C++. The Count value is the number of elements. Values are 64 bit. If @@ -1756,10 +1699,10 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { if (CTy.isVector()) - addFlag(&Buffer, dwarf::DW_AT_GNU_vector); + addFlag(Buffer, dwarf::DW_AT_GNU_vector); // Emit the element type. - addType(&Buffer, resolve(CTy.getTypeDerivedFrom())); + addType(Buffer, resolve(CTy.getTypeDerivedFrom())); // Get an anonymous type for index type. // FIXME: This type should be passed down from the front end @@ -1767,10 +1710,10 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { DIE *IdxTy = getIndexTyDie(); if (!IdxTy) { // Construct an integer type to use for indexes. - IdxTy = createAndAddDIE(dwarf::DW_TAG_base_type, *UnitDie); - addString(IdxTy, dwarf::DW_AT_name, "sizetype"); - addUInt(IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int64_t)); - addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + IdxTy = &createAndAddDIE(dwarf::DW_TAG_base_type, UnitDie); + addString(*IdxTy, dwarf::DW_AT_name, "sizetype"); + addUInt(*IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int64_t)); + addUInt(*IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, dwarf::DW_ATE_unsigned); setIndexTyDie(IdxTy); } @@ -1792,7 +1735,7 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) { for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIEnumerator Enum(Elements.getElement(i)); if (Enum.isEnumerator()) { - DIE *Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer); + DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer); StringRef Name = Enum.getName(); addString(Enumerator, dwarf::DW_AT_name, Name); int64_t Value = Enum.getEnumValue(); @@ -1802,8 +1745,8 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) { } DIType DTy = resolve(CTy.getTypeDerivedFrom()); if (DTy) { - addType(&Buffer, DTy); - addFlag(&Buffer, dwarf::DW_AT_enum_class); + addType(Buffer, DTy); + addFlag(Buffer, dwarf::DW_AT_enum_class); } } @@ -1813,48 +1756,51 @@ void DwarfUnit::constructContainingTypeDIEs() { for (DenseMap::iterator CI = ContainingTypeMap.begin(), CE = ContainingTypeMap.end(); CI != CE; ++CI) { - DIE *SPDie = CI->first; + DIE &SPDie = *CI->first; DIDescriptor D(CI->second); if (!D) continue; DIE *NDie = getDIE(D); if (!NDie) continue; - addDIEEntry(SPDie, dwarf::DW_AT_containing_type, NDie); + addDIEEntry(SPDie, dwarf::DW_AT_containing_type, *NDie); } } /// constructVariableDIE - Construct a DIE for the given DbgVariable. -DIE *DwarfUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) { +std::unique_ptr DwarfUnit::constructVariableDIE(DbgVariable &DV, + bool Abstract) { + auto D = constructVariableDIEImpl(DV, Abstract); + DV.setDIE(*D); + return D; +} + +std::unique_ptr DwarfUnit::constructVariableDIEImpl(const DbgVariable &DV, + bool Abstract) { StringRef Name = DV.getName(); // Define variable debug information entry. - DIE *VariableDie = new DIE(DV.getTag()); + auto VariableDie = make_unique(DV.getTag()); DbgVariable *AbsVar = DV.getAbstractVariable(); - DIE *AbsDIE = AbsVar ? AbsVar->getDIE() : NULL; - if (AbsDIE) - addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, AbsDIE); + if (AbsVar && AbsVar->getDIE()) + addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, *AbsVar->getDIE()); else { if (!Name.empty()) - addString(VariableDie, dwarf::DW_AT_name, Name); - addSourceLine(VariableDie, DV.getVariable()); - addType(VariableDie, DV.getType()); + addString(*VariableDie, dwarf::DW_AT_name, Name); + addSourceLine(*VariableDie, DV.getVariable()); + addType(*VariableDie, DV.getType()); + if (DV.isArtificial()) + addFlag(*VariableDie, dwarf::DW_AT_artificial); } - if (DV.isArtificial()) - addFlag(VariableDie, dwarf::DW_AT_artificial); - - if (isScopeAbstract) { - DV.setDIE(VariableDie); + if (Abstract) return VariableDie; - } // Add variable address. unsigned Offset = DV.getDotDebugLocOffset(); if (Offset != ~0U) { - addLocationList(VariableDie, dwarf::DW_AT_location, Offset); - DV.setDIE(VariableDie); + addLocationList(*VariableDie, dwarf::DW_AT_location, Offset); return VariableDie; } @@ -1867,38 +1813,36 @@ DIE *DwarfUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) { if (DVInsn->getOperand(1).isImm()) { MachineLocation Location(RegOp.getReg(), DVInsn->getOperand(1).getImm()); - addVariableAddress(DV, VariableDie, Location); + addVariableAddress(DV, *VariableDie, Location); } else if (RegOp.getReg()) - addVariableAddress(DV, VariableDie, MachineLocation(RegOp.getReg())); + addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg())); } else if (DVInsn->getOperand(0).isImm()) - addConstantValue(VariableDie, DVInsn->getOperand(0), DV.getType()); + addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType()); else if (DVInsn->getOperand(0).isFPImm()) - addConstantFPValue(VariableDie, DVInsn->getOperand(0)); + addConstantFPValue(*VariableDie, DVInsn->getOperand(0)); else if (DVInsn->getOperand(0).isCImm()) - addConstantValue(VariableDie, DVInsn->getOperand(0).getCImm(), - isUnsignedDIType(DD, DV.getType())); + addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(), + DV.getType()); - DV.setDIE(VariableDie); return VariableDie; - } else { - // .. else use frame index. - int FI = DV.getFrameIndex(); - if (FI != ~0) { - unsigned FrameReg = 0; - const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - MachineLocation Location(FrameReg, Offset); - addVariableAddress(DV, VariableDie, Location); - } } - DV.setDIE(VariableDie); + // .. else use frame index. + int FI = DV.getFrameIndex(); + if (FI != ~0) { + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); + int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + MachineLocation Location(FrameReg, Offset); + addVariableAddress(DV, *VariableDie, Location); + } + return VariableDie; } /// constructMemberDIE - Construct member DIE from DIDerivedType. void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { - DIE *MemberDie = createAndAddDIE(DT.getTag(), Buffer); + DIE &MemberDie = createAndAddDIE(DT.getTag(), Buffer); StringRef Name = DT.getName(); if (!Name.empty()) addString(MemberDie, dwarf::DW_AT_name, Name); @@ -1914,13 +1858,13 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { // BaseAddr = ObAddr + *((*ObAddr) - Offset) DIELoc *VBaseLocationDie = new (DIEValueAllocator) DIELoc(); - addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); - addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits()); - addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); - addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits()); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie); } else { @@ -1953,8 +1897,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { if (DD->getDwarfVersion() <= 2) { DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc(); - addUInt(MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes); + addUInt(*MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(*MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes); addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie); } else addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, @@ -1978,8 +1922,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { // Objective-C properties. if (MDNode *PNode = DT.getObjCProperty()) if (DIEEntry *PropertyDie = getDIEEntry(PNode)) - MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4, - PropertyDie); + MemberDie.addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4, + PropertyDie); if (DT.isArtificial()) addFlag(MemberDie, dwarf::DW_AT_artificial); @@ -1988,7 +1932,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { /// getOrCreateStaticMemberDIE - Create new DIE for C++ static member. DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { if (!DT.Verify()) - return NULL; + return nullptr; // Construct the context before querying for the existence of the DIE in case // such construction creates the DIE. @@ -1996,11 +1940,10 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { assert(dwarf::isType(ContextDIE->getTag()) && "Static member should belong to a type."); - DIE *StaticMemberDIE = getDIE(DT); - if (StaticMemberDIE) + if (DIE *StaticMemberDIE = getDIE(DT)) return StaticMemberDIE; - StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT); + DIE &StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT); DIType Ty = resolve(DT.getTypeDerivedFrom()); @@ -2023,11 +1966,11 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { dwarf::DW_ACCESS_public); if (const ConstantInt *CI = dyn_cast_or_null(DT.getConstant())) - addConstantValue(StaticMemberDIE, CI, isUnsignedDIType(DD, Ty)); + addConstantValue(StaticMemberDIE, CI, Ty); if (const ConstantFP *CFP = dyn_cast_or_null(DT.getConstant())) addConstantFPValue(StaticMemberDIE, CFP); - return StaticMemberDIE; + return &StaticMemberDIE; } void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const { @@ -2072,7 +2015,7 @@ void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) { MCSymbol *LineTableStartSym = Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID()); - stmtListIndex = UnitDie->getValues().size(); + stmtListIndex = UnitDie.getValues().size(); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. For split dwarf this is @@ -2080,16 +2023,16 @@ void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) { // The line table entries are not always emitted in assembly, so it // is not okay to use line_table_start here. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - addSectionLabel(UnitDie.get(), dwarf::DW_AT_stmt_list, LineTableStartSym); + addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym); else - addSectionDelta(UnitDie.get(), dwarf::DW_AT_stmt_list, LineTableStartSym, + addSectionDelta(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym, DwarfLineSectionSym); } void DwarfCompileUnit::applyStmtList(DIE &D) { D.addValue(dwarf::DW_AT_stmt_list, - UnitDie->getAbbrev().getData()[stmtListIndex].getForm(), - UnitDie->getValues()[stmtListIndex]); + UnitDie.getAbbrev().getData()[stmtListIndex].getForm(), + UnitDie.getValues()[stmtListIndex]); } void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const { @@ -2114,5 +2057,4 @@ void DwarfTypeUnit::initSection(const MCSection *Section) { Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID()); this->LabelEnd = Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID()); - this->LabelRange = Asm->GetTempSymbol("gnu_ranges", getUniqueID()); } diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index ef713f7..acb7528 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -73,7 +73,7 @@ protected: DICompileUnit CUNode; /// Unit debug information entry. - const std::unique_ptr UnitDie; + DIE UnitDie; /// Offset of the UnitDie from beginning of debug info section. unsigned DebugInfoOffset; @@ -102,18 +102,6 @@ protected: /// GlobalTypes - A map of globally visible types for this unit. StringMap GlobalTypes; - /// AccelNames - A map of names for the name accelerator table. - StringMap > AccelNames; - - /// AccelObjC - A map of objc spec for the objc accelerator table. - StringMap > AccelObjC; - - /// AccelNamespace - A map of names for the namespace accelerator table. - StringMap > AccelNamespace; - - /// AccelTypes - A map of names for the type accelerator table. - StringMap > > AccelTypes; - /// DIEBlocks - A list of all the DIEBlocks in use. std::vector DIEBlocks; @@ -150,20 +138,17 @@ protected: /// The end of the unit within its section. MCSymbol *LabelEnd; - /// The label for the start of the range sets for the elements of this unit. - MCSymbol *LabelRange; - /// Skeleton unit associated with this unit. DwarfUnit *Skeleton; - DwarfUnit(unsigned UID, DIE *D, DICompileUnit CU, AsmPrinter *A, + DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU); public: virtual ~DwarfUnit(); /// Set the skeleton unit associated with this unit. - void setSkeleton(DwarfUnit *Skel) { Skeleton = Skel; } + void setSkeleton(DwarfUnit &Skel) { Skeleton = &Skel; } /// Get the skeleton unit associated with this unit. DwarfUnit *getSkeleton() const { return Skeleton; } @@ -179,7 +164,6 @@ public: Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID()); this->LabelEnd = Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID()); - this->LabelRange = Asm->GetTempSymbol("gnu_ranges", getUniqueID()); } const MCSection *getSection() const { @@ -218,38 +202,19 @@ public: return LabelEnd; } - MCSymbol *getLabelRange() const { - assert(Section); - return LabelRange; - } - // Accessors. unsigned getUniqueID() const { return UniqueID; } uint16_t getLanguage() const { return CUNode.getLanguage(); } DICompileUnit getCUNode() const { return CUNode; } - DIE *getUnitDie() const { return UnitDie.get(); } + DIE &getUnitDie() { return UnitDie; } const StringMap &getGlobalNames() const { return GlobalNames; } const StringMap &getGlobalTypes() const { return GlobalTypes; } - const StringMap > &getAccelNames() const { - return AccelNames; - } - const StringMap > &getAccelObjC() const { - return AccelObjC; - } - const StringMap > &getAccelNamespace() const { - return AccelNamespace; - } - const StringMap > > & - getAccelTypes() const { - return AccelTypes; - } - unsigned getDebugInfoOffset() const { return DebugInfoOffset; } void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } /// hasContent - Return true if this compile unit has something to write out. - bool hasContent() const { return !UnitDie->getChildren().empty(); } + bool hasContent() const { return !UnitDie.getChildren().empty(); } /// addRange - Add an address range to the list of ranges for this unit. void addRange(RangeSpan Range); @@ -273,19 +238,10 @@ public: /// addGlobalName - Add a new global entity to the compile unit. /// - void addGlobalName(StringRef Name, DIE *Die, DIScope Context); - - /// addAccelName - Add a new name to the name accelerator table. - void addAccelName(StringRef Name, const DIE *Die); - - /// addAccelObjC - Add a new name to the ObjC accelerator table. - void addAccelObjC(StringRef Name, const DIE *Die); + void addGlobalName(StringRef Name, DIE &Die, DIScope Context); /// addAccelNamespace - Add a new name to the namespace accelerator table. - void addAccelNamespace(StringRef Name, const DIE *Die); - - /// addAccelType - Add a new type to the type accelerator table. - void addAccelType(StringRef Name, std::pair Die); + void addAccelNamespace(StringRef Name, const DIE &Die); /// getDIE - Returns the debug information entry map slot for the /// specified debug variable. We delegate the request to DwarfDebug @@ -303,118 +259,116 @@ public: /// kept in DwarfDebug. void insertDIE(DIDescriptor Desc, DIE *D); - /// addDie - Adds or interns the DIE to the compile unit. - /// - void addDie(DIE *Buffer) { UnitDie->addChild(Buffer); } - /// addFlag - Add a flag that is true to the DIE. - void addFlag(DIE *Die, dwarf::Attribute Attribute); + void addFlag(DIE &Die, dwarf::Attribute Attribute); /// addUInt - Add an unsigned integer attribute data and value. - void addUInt(DIE *Die, dwarf::Attribute Attribute, Optional Form, + void addUInt(DIE &Die, dwarf::Attribute Attribute, Optional Form, uint64_t Integer); - void addUInt(DIE *Block, dwarf::Form Form, uint64_t Integer); + void addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer); /// addSInt - Add an signed integer attribute data and value. - void addSInt(DIE *Die, dwarf::Attribute Attribute, Optional Form, + void addSInt(DIE &Die, dwarf::Attribute Attribute, Optional Form, int64_t Integer); - void addSInt(DIELoc *Die, Optional Form, int64_t Integer); + void addSInt(DIELoc &Die, Optional Form, int64_t Integer); /// addString - Add a string attribute data and value. - void addString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str); + void addString(DIE &Die, dwarf::Attribute Attribute, const StringRef Str); /// addLocalString - Add a string attribute data and value. - void addLocalString(DIE *Die, dwarf::Attribute Attribute, + void addLocalString(DIE &Die, dwarf::Attribute Attribute, const StringRef Str); /// addExpr - Add a Dwarf expression attribute data and value. - void addExpr(DIELoc *Die, dwarf::Form Form, const MCExpr *Expr); + void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr); /// addLabel - Add a Dwarf label attribute data and value. - void addLabel(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form, + void addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form, const MCSymbol *Label); - void addLabel(DIELoc *Die, dwarf::Form Form, const MCSymbol *Label); + void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label); /// addLocationList - Add a Dwarf loclistptr attribute data and value. - void addLocationList(DIE *Die, dwarf::Attribute Attribute, unsigned Index); + void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index); /// addSectionLabel - Add a Dwarf section label attribute data and value. /// - void addSectionLabel(DIE *Die, dwarf::Attribute Attribute, + void addSectionLabel(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label); /// addSectionOffset - Add an offset into a section attribute data and value. /// - void addSectionOffset(DIE *Die, dwarf::Attribute Attribute, uint64_t Integer); + void addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer); /// addOpAddress - Add a dwarf op address data and value using the /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. - void addOpAddress(DIELoc *Die, const MCSymbol *Label); + void addOpAddress(DIELoc &Die, const MCSymbol *Label); /// addSectionDelta - Add a label delta attribute data and value. - void addSectionDelta(DIE *Die, dwarf::Attribute Attribute, const MCSymbol *Hi, + void addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, const MCSymbol *Lo); /// addLabelDelta - Add a label delta attribute data and value. - void addLabelDelta(DIE *Die, dwarf::Attribute Attribute, const MCSymbol *Hi, + void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, const MCSymbol *Lo); /// addDIEEntry - Add a DIE attribute data and value. - void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry); + void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry); /// addDIEEntry - Add a DIE attribute data and value. - void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIEEntry *Entry); + void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry *Entry); - void addDIETypeSignature(DIE *Die, const DwarfTypeUnit &Type); + void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type); /// addBlock - Add block data. - void addBlock(DIE *Die, dwarf::Attribute Attribute, DIELoc *Block); + void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block); /// addBlock - Add block data. - void addBlock(DIE *Die, dwarf::Attribute Attribute, DIEBlock *Block); + void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block); /// addSourceLine - Add location information to specified debug information /// entry. - void addSourceLine(DIE *Die, unsigned Line, StringRef File, + void addSourceLine(DIE &Die, unsigned Line, StringRef File, StringRef Directory); - void addSourceLine(DIE *Die, DIVariable V); - void addSourceLine(DIE *Die, DIGlobalVariable G); - void addSourceLine(DIE *Die, DISubprogram SP); - void addSourceLine(DIE *Die, DIType Ty); - void addSourceLine(DIE *Die, DINameSpace NS); - void addSourceLine(DIE *Die, DIObjCProperty Ty); + void addSourceLine(DIE &Die, DIVariable V); + void addSourceLine(DIE &Die, DIGlobalVariable G); + void addSourceLine(DIE &Die, DISubprogram SP); + void addSourceLine(DIE &Die, DIType Ty); + void addSourceLine(DIE &Die, DINameSpace NS); + void addSourceLine(DIE &Die, DIObjCProperty Ty); /// addAddress - Add an address attribute to a die based on the location /// provided. - void addAddress(DIE *Die, dwarf::Attribute Attribute, + void addAddress(DIE &Die, dwarf::Attribute Attribute, const MachineLocation &Location, bool Indirect = false); /// addConstantValue - Add constant value entry in variable DIE. - void addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); - void addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); - void addConstantValue(DIE *Die, const APInt &Val, bool Unsigned); + void addConstantValue(DIE &Die, const MachineOperand &MO, DIType Ty); + void addConstantValue(DIE &Die, const ConstantInt *CI, DIType Ty); + void addConstantValue(DIE &Die, const APInt &Val, DIType Ty); + void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned); + void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val); /// addConstantFPValue - Add constant value entry in variable DIE. - void addConstantFPValue(DIE *Die, const MachineOperand &MO); - void addConstantFPValue(DIE *Die, const ConstantFP *CFP); + void addConstantFPValue(DIE &Die, const MachineOperand &MO); + void addConstantFPValue(DIE &Die, const ConstantFP *CFP); /// addTemplateParams - Add template parameters in buffer. void addTemplateParams(DIE &Buffer, DIArray TParams); /// addRegisterOp - Add register operand. - void addRegisterOp(DIELoc *TheDie, unsigned Reg); + void addRegisterOp(DIELoc &TheDie, unsigned Reg); /// addRegisterOffset - Add register offset. - void addRegisterOffset(DIELoc *TheDie, unsigned Reg, int64_t Offset); + void addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset); /// addComplexAddress - Start with the address based on the location provided, /// and generate the DWARF information necessary to find the actual variable /// (navigating the extra location information encoded in the type) based on /// the starting location. Add the DWARF information to the die. - void addComplexAddress(const DbgVariable &DV, DIE *Die, + void addComplexAddress(const DbgVariable &DV, DIE &Die, dwarf::Attribute Attribute, const MachineLocation &Location); @@ -424,19 +378,19 @@ public: /// actual Block variable (navigating the Block struct) based on the /// starting location. Add the DWARF information to the die. Obsolete, /// please use addComplexAddress instead. - void addBlockByrefAddress(const DbgVariable &DV, DIE *Die, + void addBlockByrefAddress(const DbgVariable &DV, DIE &Die, dwarf::Attribute Attribute, const MachineLocation &Location); /// addVariableAddress - Add DW_AT_location attribute for a /// DbgVariable based on provided MachineLocation. - void addVariableAddress(const DbgVariable &DV, DIE *Die, + void addVariableAddress(const DbgVariable &DV, DIE &Die, MachineLocation Location); /// addType - Add a new type attribute to the specified entity. This takes /// and attribute parameter because DW_AT_friend attributes are also /// type references. - void addType(DIE *Entity, DIType Ty, + void addType(DIE &Entity, DIType Ty, dwarf::Attribute Attribute = dwarf::DW_AT_type); /// getOrCreateNameSpace - Create a DIE for DINameSpace. @@ -445,6 +399,8 @@ public: /// getOrCreateSubprogramDIE - Create new DIE using SP. DIE *getOrCreateSubprogramDIE(DISubprogram SP); + void applySubprogramAttributes(DISubprogram SP, DIE &SPDie); + /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. DIE *getOrCreateTypeDIE(const MDNode *N); @@ -460,14 +416,15 @@ public: void constructContainingTypeDIEs(); /// constructVariableDIE - Construct a DIE for the given DbgVariable. - DIE *constructVariableDIE(DbgVariable &DV, bool isScopeAbstract); + std::unique_ptr constructVariableDIE(DbgVariable &DV, + bool Abstract = false); /// constructSubprogramArguments - Construct function argument DIEs. void constructSubprogramArguments(DIE &Buffer, DIArray Args); /// Create a DIE with the given Tag, add the DIE to its parent, and /// call insertDIE if MD is not null. - DIE *createAndAddDIE(unsigned Tag, DIE &Parent, + DIE &createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N = DIDescriptor()); /// Compute the size of a header for this unit, not including the initial @@ -483,6 +440,9 @@ public: virtual DwarfCompileUnit &getCU() = 0; + /// constructTypeDIE - Construct type DIE from DICompositeType. + void constructTypeDIE(DIE &Buffer, DICompositeType CTy); + protected: /// getOrCreateStaticMemberDIE - Create new static data member DIE. DIE *getOrCreateStaticMemberDIE(DIDerivedType DT); @@ -492,15 +452,17 @@ protected: virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0; private: + /// \brief Construct a DIE for the given DbgVariable without initializing the + /// DbgVariable's DIE reference. + std::unique_ptr constructVariableDIEImpl(const DbgVariable &DV, + bool Abstract); + /// constructTypeDIE - Construct basic type die from DIBasicType. void constructTypeDIE(DIE &Buffer, DIBasicType BTy); /// constructTypeDIE - Construct derived type die from DIDerivedType. void constructTypeDIE(DIE &Buffer, DIDerivedType DTy); - /// constructTypeDIE - Construct type DIE from DICompositeType. - void constructTypeDIE(DIE &Buffer, DICompositeType CTy); - /// constructSubrangeDIE - Construct subrange DIE from DISubrange. void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); @@ -547,7 +509,7 @@ private: /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug /// information entry. - DIEEntry *createDIEEntry(DIE *Entry); + DIEEntry *createDIEEntry(DIE &Entry); /// resolve - Look in the DwarfDebug map for the MDNode that /// corresponds to the reference. @@ -557,7 +519,7 @@ private: /// If this is a named finished type then include it in the list of types for /// the accelerator tables. - void updateAcceleratorTables(DIScope Context, DIType Ty, const DIE *TyDIE); + void updateAcceleratorTables(DIScope Context, DIType Ty, const DIE &TyDIE); }; class DwarfCompileUnit : public DwarfUnit { @@ -566,7 +528,7 @@ class DwarfCompileUnit : public DwarfUnit { unsigned stmtListIndex; public: - DwarfCompileUnit(unsigned UID, DIE *D, DICompileUnit Node, AsmPrinter *A, + DwarfCompileUnit(unsigned UID, DICompileUnit Node, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU); void initStmtList(MCSymbol *DwarfLineSectionSym); @@ -579,12 +541,12 @@ public: /// addLabelAddress - Add a dwarf label attribute data and value using /// either DW_FORM_addr or DW_FORM_GNU_addr_index. - void addLabelAddress(DIE *Die, dwarf::Attribute Attribute, + void addLabelAddress(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label); /// addLocalLabelAddress - Add a dwarf label attribute data and value using /// DW_FORM_addr only. - void addLocalLabelAddress(DIE *Die, dwarf::Attribute Attribute, + void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Label); DwarfCompileUnit &getCU() override { return *this; } @@ -600,7 +562,7 @@ private: MCDwarfDwoLineTable *SplitLineTable; public: - DwarfTypeUnit(unsigned UID, DIE *D, DwarfCompileUnit &CU, AsmPrinter *A, + DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU, MCDwarfDwoLineTable *SplitLineTable = nullptr); diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp index 50b2ca8..2212941 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -29,7 +29,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) { StringRef Dir = Scope.getDirectory(), Filename = Scope.getFilename(); char *&Result = DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)]; - if (Result != 0) + if (Result) return Result; // Clang emits directory and relative filename info into the IR, but CodeView @@ -102,7 +102,7 @@ void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL, } WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP) - : Asm(0), CurFn(0) { + : Asm(nullptr), CurFn(nullptr) { MachineModuleInfo *MMI = AP->MMI; // If module doesn't have named metadata anchors or COFF debug section @@ -171,7 +171,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { EmitLabelDiff(Asm->OutStreamer, Fn, FI.End); // PC-to-linenumber lookup table: - MCSymbol *FileSegmentEnd = 0; + MCSymbol *FileSegmentEnd = nullptr; for (size_t J = 0, F = FI.Instrs.size(); J != F; ++J) { MCSymbol *Instr = FI.Instrs[J]; assert(InstrInfo.count(Instr)); @@ -216,7 +216,7 @@ void WinCodeViewLineTables::endModule() { if (FnDebugInfo.empty()) return; - assert(Asm != 0); + assert(Asm != nullptr); Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC); @@ -277,20 +277,19 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) { // for the first instruction of the function, not the last of the prolog? DebugLoc PrologEndLoc; bool EmptyPrologue = true; - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E && PrologEndLoc.isUnknown(); ++I) { - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - const MachineInstr *MI = II; - if (MI->isDebugValue()) + for (const auto &MBB : *MF) { + if (!PrologEndLoc.isUnknown()) + break; + for (const auto &MI : MBB) { + if (MI.isDebugValue()) continue; // First known non-DBG_VALUE and non-frame setup location marks // the beginning of the function body. // FIXME: do we need the first subcondition? - if (!MI->getFlag(MachineInstr::FrameSetup) && - (!MI->getDebugLoc().isUnknown())) { - PrologEndLoc = MI->getDebugLoc(); + if (!MI.getFlag(MachineInstr::FrameSetup) && + (!MI.getDebugLoc().isUnknown())) { + PrologEndLoc = MI.getDebugLoc(); break; } EmptyPrologue = false; @@ -321,7 +320,7 @@ void WinCodeViewLineTables::endFunction(const MachineFunction *MF) { Asm->OutStreamer.EmitLabel(FunctionEndSym); CurFn->End = FunctionEndSym; } - CurFn = 0; + CurFn = nullptr; } void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) { diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h index a7a6205..0734d97 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h @@ -38,7 +38,7 @@ class WinCodeViewLineTables : public AsmPrinterHandler { struct FunctionInfo { SmallVector Instrs; MCSymbol *End; - FunctionInfo() : End(0) {} + FunctionInfo() : End(nullptr) {} } *CurFn; typedef DenseMap FnDebugInfoTy; @@ -104,7 +104,7 @@ class WinCodeViewLineTables : public AsmPrinterHandler { void maybeRecordLocation(DebugLoc DL, const MachineFunction *MF); void clear() { - assert(CurFn == 0); + assert(CurFn == nullptr); FileNameRegistry.clear(); InstrInfo.clear(); } diff --git a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp new file mode 100644 index 0000000..d995333 --- /dev/null +++ b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp @@ -0,0 +1,337 @@ +//===-- AtomicExpandLoadLinkedPass.cpp - Expand atomic instructions -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass (at IR level) to replace atomic instructions with +// appropriate (intrinsic-based) ldrex/strex loops. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +#define DEBUG_TYPE "arm-atomic-expand" + +namespace { + class AtomicExpandLoadLinked : public FunctionPass { + const TargetLowering *TLI; + public: + static char ID; // Pass identification, replacement for typeid + explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TLI(TM ? TM->getTargetLowering() : nullptr) { + initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + bool expandAtomicInsts(Function &F); + + bool expandAtomicLoad(LoadInst *LI); + bool expandAtomicStore(StoreInst *LI); + bool expandAtomicRMW(AtomicRMWInst *AI); + bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); + + AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord); + void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord); + }; +} + +char AtomicExpandLoadLinked::ID = 0; +char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID; + +static void *initializeAtomicExpandLoadLinkedPassOnce(PassRegistry &Registry) { + PassInfo *PI = new PassInfo( + "Expand Atomic calls in terms of load-linked & store-conditional", + "atomic-ll-sc", &AtomicExpandLoadLinked::ID, + PassInfo::NormalCtor_t(callDefaultCtor), false, + false, PassInfo::TargetMachineCtor_t( + callTargetMachineCtor)); + Registry.registerPass(*PI, true); + return PI; +} + +void llvm::initializeAtomicExpandLoadLinkedPass(PassRegistry &Registry) { + CALL_ONCE_INITIALIZATION(initializeAtomicExpandLoadLinkedPassOnce) +} + + +FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) { + return new AtomicExpandLoadLinked(TM); +} + +bool AtomicExpandLoadLinked::runOnFunction(Function &F) { + if (!TLI) + return false; + + SmallVector AtomicInsts; + + // Changing control-flow while iterating through it is a bad idea, so gather a + // list of all atomic instructions before we start. + for (BasicBlock &BB : F) + for (Instruction &Inst : BB) { + if (isa(&Inst) || isa(&Inst) || + (isa(&Inst) && cast(&Inst)->isAtomic()) || + (isa(&Inst) && cast(&Inst)->isAtomic())) + AtomicInsts.push_back(&Inst); + } + + bool MadeChange = false; + for (Instruction *Inst : AtomicInsts) { + if (!TLI->shouldExpandAtomicInIR(Inst)) + continue; + + if (AtomicRMWInst *AI = dyn_cast(Inst)) + MadeChange |= expandAtomicRMW(AI); + else if (AtomicCmpXchgInst *CI = dyn_cast(Inst)) + MadeChange |= expandAtomicCmpXchg(CI); + else if (LoadInst *LI = dyn_cast(Inst)) + MadeChange |= expandAtomicLoad(LI); + else if (StoreInst *SI = dyn_cast(Inst)) + MadeChange |= expandAtomicStore(SI); + else + llvm_unreachable("Unknown atomic instruction"); + } + + return MadeChange; +} + +bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) { + // Load instructions don't actually need a leading fence, even in the + // SequentiallyConsistent case. + AtomicOrdering MemOpOrder = + TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering(); + + // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is + // an ldrexd (A3.5.3). + IRBuilder<> Builder(LI); + Value *Val = + TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder); + + insertTrailingFence(Builder, LI->getOrdering()); + + LI->replaceAllUsesWith(Val); + LI->eraseFromParent(); + + return true; +} + +bool AtomicExpandLoadLinked::expandAtomicStore(StoreInst *SI) { + // The only atomic 64-bit store on ARM is an strexd that succeeds, which means + // we need a loop and the entire instruction is essentially an "atomicrmw + // xchg" that ignores the value loaded. + IRBuilder<> Builder(SI); + AtomicRMWInst *AI = + Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), + SI->getValueOperand(), SI->getOrdering()); + SI->eraseFromParent(); + + // Now we have an appropriate swap instruction, lower it as usual. + return expandAtomicRMW(AI); +} + +bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) { + AtomicOrdering Order = AI->getOrdering(); + Value *Addr = AI->getPointerOperand(); + BasicBlock *BB = AI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // fence? + // atomicrmw.start: + // %loaded = @load.linked(%addr) + // %new = some_op iN %loaded, %incr + // %stored = @store_conditional(%new, %addr) + // %try_again = icmp i32 ne %stored, 0 + // br i1 %try_again, label %loop, label %atomicrmw.end + // atomicrmw.end: + // fence? + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from AI. + IRBuilder<> Builder(AI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we might want a fence too. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + + Value *NewVal; + switch (AI->getOperation()) { + case AtomicRMWInst::Xchg: + NewVal = AI->getValOperand(); + break; + case AtomicRMWInst::Add: + NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Sub: + NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::And: + NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Nand: + NewVal = Builder.CreateAnd(Loaded, Builder.CreateNot(AI->getValOperand()), + "new"); + break; + case AtomicRMWInst::Or: + NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Xor: + NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Max: + NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::Min: + NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::UMax: + NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + case AtomicRMWInst::UMin: + NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand()); + NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); + break; + default: + llvm_unreachable("Unknown atomic op"); + } + + Value *StoreSuccess = + TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); + Value *TryAgain = Builder.CreateICmpNE( + StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); + Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); + + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + insertTrailingFence(Builder, Order); + + AI->replaceAllUsesWith(Loaded); + AI->eraseFromParent(); + + return true; +} + +bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { + AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); + AtomicOrdering FailureOrder = CI->getFailureOrdering(); + Value *Addr = CI->getPointerOperand(); + BasicBlock *BB = CI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord + // + // The full expansion we produce is: + // [...] + // fence? + // cmpxchg.start: + // %loaded = @load.linked(%addr) + // %should_store = icmp eq %loaded, %desired + // br i1 %should_store, label %cmpxchg.trystore, + // label %cmpxchg.end/%cmpxchg.barrier + // cmpxchg.trystore: + // %stored = @store_conditional(%new, %addr) + // %try_again = icmp i32 ne %stored, 0 + // br i1 %try_again, label %loop, label %cmpxchg.end + // cmpxchg.barrier: + // fence? + // br label %cmpxchg.end + // cmpxchg.end: + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); + auto BarrierBB = BasicBlock::Create(Ctx, "cmpxchg.barrier", F, ExitBB); + auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, BarrierBB); + auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); + + // This grabs the DebugLoc from CI + IRBuilder<> Builder(CI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we might want a fence too. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *ShouldStore = + Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); + + // If the the cmpxchg doesn't actually need any ordering when it fails, we can + // jump straight past that fence instruction (if it exists). + BasicBlock *FailureBB = FailureOrder == Monotonic ? ExitBB : BarrierBB; + Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); + + Builder.SetInsertPoint(TryStoreBB); + Value *StoreSuccess = TLI->emitStoreConditional( + Builder, CI->getNewValOperand(), Addr, MemOpOrder); + Value *TryAgain = Builder.CreateICmpNE( + StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); + Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB); + + // Finally, make sure later instructions don't get reordered with a fence if + // necessary. + Builder.SetInsertPoint(BarrierBB); + insertTrailingFence(Builder, SuccessOrder); + Builder.CreateBr(ExitBB); + + CI->replaceAllUsesWith(Loaded); + CI->eraseFromParent(); + + return true; +} + +AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder, + AtomicOrdering Ord) { + if (!TLI->getInsertFencesForAtomic()) + return Ord; + + if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) + Builder.CreateFence(Release); + + // The exclusive operations don't need any barrier if we're adding separate + // fences. + return Monotonic; +} + +void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder, + AtomicOrdering Ord) { + if (!TLI->getInsertFencesForAtomic()) + return; + + if (Ord == Acquire || Ord == AcquireRelease) + Builder.CreateFence(Acquire); + else if (Ord == SequentiallyConsistent) + Builder.CreateFence(SequentiallyConsistent); +} diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index c6654ec2..7f31b1a 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -15,13 +15,21 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "basictti" #include "llvm/CodeGen/Passes.h" +#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; +static cl::opt +PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0), + cl::desc("Threshold for partial unrolling"), cl::Hidden); + +#define DEBUG_TYPE "basictti" + namespace { class BasicTTI final : public ImmutablePass, public TargetTransformInfo { @@ -34,7 +42,7 @@ class BasicTTI final : public ImmutablePass, public TargetTransformInfo { const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); } public: - BasicTTI() : ImmutablePass(ID), TM(0) { + BasicTTI() : ImmutablePass(ID), TM(nullptr) { llvm_unreachable("This pass cannot be directly constructed"); } @@ -186,7 +194,61 @@ bool BasicTTI::haveFastSqrt(Type *Ty) const { return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); } -void BasicTTI::getUnrollingPreferences(Loop *, UnrollingPreferences &) const { } +void BasicTTI::getUnrollingPreferences(Loop *L, + UnrollingPreferences &UP) const { + // This unrolling functionality is target independent, but to provide some + // motivation for its intended use, for x86: + + // According to the Intel 64 and IA-32 Architectures Optimization Reference + // Manual, Intel Core models and later have a loop stream detector + // (and associated uop queue) that can benefit from partial unrolling. + // The relevant requirements are: + // - The loop must have no more than 4 (8 for Nehalem and later) branches + // taken, and none of them may be calls. + // - The loop can have no more than 18 (28 for Nehalem and later) uops. + + // According to the Software Optimization Guide for AMD Family 15h Processors, + // models 30h-4fh (Steamroller and later) have a loop predictor and loop + // buffer which can benefit from partial unrolling. + // The relevant requirements are: + // - The loop must have fewer than 16 branches + // - The loop must have less than 40 uops in all executed loop branches + + // The number of taken branches in a loop is hard to estimate here, and + // benchmarking has revealed that it is better not to be conservative when + // estimating the branch count. As a result, we'll ignore the branch limits + // until someone finds a case where it matters in practice. + + unsigned MaxOps; + const TargetSubtargetInfo *ST = &TM->getSubtarget(); + if (PartialUnrollingThreshold.getNumOccurrences() > 0) + MaxOps = PartialUnrollingThreshold; + else if (ST->getSchedModel()->LoopMicroOpBufferSize > 0) + MaxOps = ST->getSchedModel()->LoopMicroOpBufferSize; + else + return; + + // Scan the loop: don't unroll loops with calls. + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) { + BasicBlock *BB = *I; + + for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) + if (isa(J) || isa(J)) { + ImmutableCallSite CS(J); + if (const Function *F = CS.getCalledFunction()) { + if (!TopTTI->isLoweredToCall(F)) + continue; + } + + return; + } + } + + // Enable runtime and partial unrolling up to the specified size. + UP.Partial = UP.Runtime = true; + UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps; +} //===----------------------------------------------------------------------===// // @@ -424,12 +486,14 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, // This is a vector load that legalizes to a larger type than the vector // itself. Unless the corresponding extending load or truncating store is // legal, then this will scalarize. - TargetLowering::LegalizeAction LA; - MVT MemVT = getTLI()->getSimpleValueType(Src, true); - if (Opcode == Instruction::Store) - LA = getTLI()->getTruncStoreAction(LT.second, MemVT); - else - LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, MemVT); + TargetLowering::LegalizeAction LA = TargetLowering::Expand; + EVT MemVT = getTLI()->getValueType(Src, true); + if (MemVT.isSimple() && MemVT != MVT::Other) { + if (Opcode == Instruction::Store) + LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT()); + else + LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, MemVT.getSimpleVT()); + } if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { // This is a vector load/store for some illegal type that is scalarized. @@ -484,7 +548,7 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, case Intrinsic::round: ISD = ISD::FROUND; break; case Intrinsic::pow: ISD = ISD::FPOW; break; case Intrinsic::fma: ISD = ISD::FMA; break; - case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add? + case Intrinsic::fmuladd: ISD = ISD::FMA; break; case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: return 0; @@ -509,6 +573,12 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, return LT.first * 2; } + // If we can't lower fmuladd into an FMA estimate the cost as a floating + // point mul followed by an add. + if (IID == Intrinsic::fmuladd) + return TopTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + + TopTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); + // Else, assume that we need to scalarize this intrinsic. For math builtins // this will emit a costly libcall, adding call overhead and spills. Make it // very expensive. diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index b39777e..f623a48 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "branchfolding" #include "BranchFolding.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" @@ -38,6 +37,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "branchfolding" + STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumBranchOpts, "Number of branches optimized"); STATISTIC(NumTailMerge , "Number of block tails merged"); @@ -189,7 +190,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, TII = tii; TRI = tri; MMI = mmi; - RS = NULL; + RS = nullptr; // Use a RegScavenger to help update liveness when required. MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -201,7 +202,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, // Fix CFG. The later algorithms expect it to be right. bool MadeChange = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) { - MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0; + MachineBasicBlock *MBB = I, *TBB = nullptr, *FBB = nullptr; SmallVector Cond; if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true)) MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); @@ -220,7 +221,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, // See if any jump tables have become dead as the code generator // did its thing. MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); - if (JTI == 0) { + if (!JTI) { delete RS; return MadeChange; } @@ -416,7 +417,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, MachineBasicBlock::iterator BBI1, const BasicBlock *BB) { if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1)) - return 0; + return nullptr; MachineFunction &MF = *CurMBB.getParent(); @@ -466,7 +467,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, const TargetInstrInfo *TII) { MachineFunction *MF = CurMBB->getParent(); MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB)); - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; DebugLoc dl; // FIXME: this is nowhere if (I != MF->end() && @@ -475,12 +476,12 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, if (TBB == NextBB && !Cond.empty() && !FBB) { if (!TII->ReverseBranchCondition(Cond)) { TII->RemoveBranch(*CurMBB); - TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond, dl); + TII->InsertBranch(*CurMBB, SuccBB, nullptr, Cond, dl); return; } } } - TII->InsertBranch(*CurMBB, SuccBB, NULL, + TII->InsertBranch(*CurMBB, SuccBB, nullptr, SmallVector(), dl); } @@ -849,7 +850,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // See if we can do any tail merging on those. if (MergePotentials.size() >= 2) - MadeChange |= TryTailMergeBlocks(NULL, NULL); + MadeChange |= TryTailMergeBlocks(nullptr, nullptr); // Look at blocks (IBB) with multiple predecessors (PBB). // We change each predecessor to a canonical form, by @@ -896,7 +897,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { if (PBB->getLandingPadSuccessor()) continue; - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { // Failing case: IBB is the target of a cbr, and we cannot reverse the @@ -915,10 +916,10 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // a bit in the edge so we didn't have to do all this. if (IBB->isLandingPad()) { MachineFunction::iterator IP = PBB; IP++; - MachineBasicBlock *PredNextBB = NULL; + MachineBasicBlock *PredNextBB = nullptr; if (IP != MF.end()) PredNextBB = IP; - if (TBB == NULL) { + if (!TBB) { if (IBB != PredNextBB) // fallthrough continue; } else if (FBB) { @@ -939,7 +940,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { TII->RemoveBranch(*PBB); if (!Cond.empty()) // reinsert conditional branch only, for now - TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl); + TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr, + NewCond, dl); } MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); @@ -1099,7 +1101,7 @@ ReoptimizeBlock: // one. MachineBasicBlock &PrevBB = *std::prev(MachineFunction::iterator(MBB)); - MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; + MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; SmallVector PriorCond; bool PriorUnAnalyzable = TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); @@ -1116,7 +1118,7 @@ ReoptimizeBlock: TII->RemoveBranch(PrevBB); PriorCond.clear(); if (PriorTBB != MBB) - TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); + TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1160,7 +1162,7 @@ ReoptimizeBlock: // If the previous branch *only* branches to *this* block (conditional or // not) remove the branch. - if (PriorTBB == MBB && PriorFBB == 0) { + if (PriorTBB == MBB && !PriorFBB) { TII->RemoveBranch(PrevBB); MadeChange = true; ++NumBranchOpts; @@ -1172,7 +1174,7 @@ ReoptimizeBlock: if (PriorFBB == MBB) { DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); + TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1186,7 +1188,7 @@ ReoptimizeBlock: if (!TII->ReverseBranchCondition(NewPriorCond)) { DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl); + TII->InsertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1201,7 +1203,7 @@ ReoptimizeBlock: // We consider it more likely that execution will stay in the function (e.g. // due to loops) than it is to exit it. This asserts in loops etc, moving // the assert condition out of the loop body. - if (MBB->succ_empty() && !PriorCond.empty() && PriorFBB == 0 && + if (MBB->succ_empty() && !PriorCond.empty() && !PriorFBB && MachineFunction::iterator(PriorTBB) == FallThrough && !MBB->canFallThrough()) { bool DoTransform = true; @@ -1224,7 +1226,7 @@ ReoptimizeBlock: DebugLoc dl = getBranchDebugLoc(PrevBB); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl); + TII->InsertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl); // Move this block to the end of the function. MBB->moveAfter(--MF.end()); @@ -1237,7 +1239,7 @@ ReoptimizeBlock: } // Analyze the branch in the current block. - MachineBasicBlock *CurTBB = 0, *CurFBB = 0; + MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr; SmallVector CurCond; bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true); if (!CurUnAnalyzable) { @@ -1263,7 +1265,7 @@ ReoptimizeBlock: // If this branch is the only thing in its block, see if we can forward // other blocks across it. - if (CurTBB && CurCond.empty() && CurFBB == 0 && + if (CurTBB && CurCond.empty() && !CurFBB && IsBranchOnlyBlock(MBB) && CurTBB != MBB && !MBB->hasAddressTaken()) { DebugLoc dl = getBranchDebugLoc(*MBB); @@ -1301,12 +1303,12 @@ ReoptimizeBlock: // explicit branch to us to make updates simpler. if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) && PriorTBB != MBB && PriorFBB != MBB) { - if (PriorTBB == 0) { - assert(PriorCond.empty() && PriorFBB == 0 && + if (!PriorTBB) { + assert(PriorCond.empty() && !PriorFBB && "Bad branch analysis"); PriorTBB = MBB; } else { - assert(PriorFBB == 0 && "Machine CFG out of date!"); + assert(!PriorFBB && "Machine CFG out of date!"); PriorFBB = MBB; } DebugLoc pdl = getBranchDebugLoc(PrevBB); @@ -1330,7 +1332,7 @@ ReoptimizeBlock: // If this change resulted in PMBB ending in a conditional // branch where both conditions go to the same destination, // change this to an unconditional branch (and fix the CFG). - MachineBasicBlock *NewCurTBB = 0, *NewCurFBB = 0; + MachineBasicBlock *NewCurTBB = nullptr, *NewCurFBB = nullptr; SmallVector NewCurCond; bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB, NewCurFBB, NewCurCond, true); @@ -1338,10 +1340,10 @@ ReoptimizeBlock: DebugLoc pdl = getBranchDebugLoc(*PMBB); TII->RemoveBranch(*PMBB); NewCurCond.clear(); - TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl); + TII->InsertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl); MadeChange = true; ++NumBranchOpts; - PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false); + PMBB->CorrectExtraCFGEdges(NewCurTBB, nullptr, false); } } } @@ -1358,7 +1360,7 @@ ReoptimizeBlock: } // Add the branch back if the block is more than just an uncond branch. - TII->InsertBranch(*MBB, CurTBB, 0, CurCond, dl); + TII->InsertBranch(*MBB, CurTBB, nullptr, CurCond, dl); } } @@ -1379,7 +1381,7 @@ ReoptimizeBlock: // Analyze the branch at the end of the pred. MachineBasicBlock *PredBB = *PI; MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough; - MachineBasicBlock *PredTBB = 0, *PredFBB = 0; + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector PredCond; if (PredBB != MBB && !PredBB->canFallThrough() && !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) @@ -1399,7 +1401,7 @@ ReoptimizeBlock: MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB)); CurCond.clear(); - TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc()); + TII->InsertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc()); } MBB->moveAfter(PredBB); MadeChange = true; @@ -1432,7 +1434,7 @@ ReoptimizeBlock: // Okay, there is no really great place to put this block. If, however, // the block before this one would be a fall-through if this block were // removed, move this block to the end of the function. - MachineBasicBlock *PrevTBB = 0, *PrevFBB = 0; + MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr; SmallVector PrevCond; if (FallThrough != MF.end() && !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && @@ -1473,7 +1475,7 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, if (SuccBB != TrueBB) return SuccBB; } - return NULL; + return nullptr; } /// findHoistingInsertPosAndDeps - Find the location to move common instructions @@ -1547,7 +1549,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, // Also avoid moving code above predicated instruction since it's hard to // reason about register liveness with predicated instruction. bool DontMoveAcrossStore = true; - if (!PI->isSafeToMove(TII, 0, DontMoveAcrossStore) || + if (!PI->isSafeToMove(TII, nullptr, DontMoveAcrossStore) || TII->isPredicated(PI)) return MBB->end(); @@ -1581,7 +1583,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, /// sequence at the start of the function, move the instructions before MBB /// terminator if it's legal. bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty()) return false; @@ -1686,7 +1688,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { break; bool DontMoveAcrossStore = true; - if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore)) + if (!TIB->isSafeToMove(TII, nullptr, DontMoveAcrossStore)) break; // Remove kills from LocalDefsSet, these registers had short live ranges. diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 8943cb1..0b492a9 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -2,6 +2,7 @@ add_llvm_library(LLVMCodeGen AggressiveAntiDepBreaker.cpp AllocationOrder.cpp Analysis.cpp + AtomicExpandLoadLinkedPass.cpp BasicTargetTransformInfo.cpp BranchFolding.cpp CalcSpillWeights.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 4833731..bc033f9 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "calcspillweights" - #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -22,6 +20,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "calcspillweights" + void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, MachineFunction &MF, const MachineLoopInfo &MLI, @@ -96,8 +96,8 @@ void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { MachineRegisterInfo &mri = MF.getRegInfo(); const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo(); - MachineBasicBlock *mbb = 0; - MachineLoop *loop = 0; + MachineBasicBlock *mbb = nullptr; + MachineLoop *loop = nullptr; bool isExiting = false; float totalWeight = 0; SmallPtrSet visited; @@ -149,7 +149,11 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { unsigned hint = copyHint(mi, li.reg, tri, mri); if (!hint) continue; - float hweight = Hint[hint] += weight; + // Force hweight onto the stack so that x86 doesn't add hidden precision, + // making the comparison incorrectly pass (i.e., 1 > 1 == true??). + // + // FIXME: we probably shouldn't use floats at all. + volatile float hweight = Hint[hint] += weight; if (TargetRegisterInfo::isPhysicalRegister(hint)) { if (hweight > bestPhys && mri.isAllocatable(hint)) bestPhys = hweight, hintPhys = hint; diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index fcfc9dc..add861a 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -76,7 +76,7 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl &Ins, dbgs() << "Formal argument #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << '\n'; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } } @@ -108,7 +108,7 @@ void CCState::AnalyzeReturn(const SmallVectorImpl &Outs, dbgs() << "Return operand #" << i << " has unhandled type " << EVT(VT).getEVTString() << '\n'; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } } @@ -126,7 +126,7 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl &Outs, dbgs() << "Call operand #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << '\n'; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } } @@ -145,7 +145,7 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl &ArgVTs, dbgs() << "Call operand #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << '\n'; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } } @@ -162,7 +162,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl &Ins, dbgs() << "Call result #" << i << " has unhandled type " << EVT(VT).getEVTString() << '\n'; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } } @@ -175,6 +175,6 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) { dbgs() << "Call result has unhandled type " << EVT(VT).getEVTString() << '\n'; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 17402f0..b3beac3 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -20,6 +20,7 @@ using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { + initializeAtomicExpandLoadLinkedPass(Registry); initializeBasicTTIPass(Registry); initializeBranchFolderPassPass(Registry); initializeCodeGenPreparePass(Registry); diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index e82a306..6aa60c6 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "codegenprepare" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" @@ -39,6 +38,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/BypassSlowDivision.h" @@ -46,6 +46,8 @@ using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "codegenprepare" + STATISTIC(NumBlocksElim, "Number of blocks eliminated"); STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); @@ -70,6 +72,10 @@ static cl::opt DisableSelectToBranch( "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); +static cl::opt AddrSinkUsingGEPs( + "addr-sink-using-gep", cl::Hidden, cl::init(false), + cl::desc("Address sinking in CGP using GEPs.")); + static cl::opt EnableAndCmpSinking( "enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinkinig and/cmp into branches.")); @@ -111,8 +117,8 @@ typedef DenseMap InstrToOrigTy; public: static char ID; // Pass identification, replacement for typeid - explicit CodeGenPrepare(const TargetMachine *TM = 0) - : FunctionPass(ID), TM(TM), TLI(0) { + explicit CodeGenPrepare(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM), TLI(nullptr) { initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; @@ -177,7 +183,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLInfo = &getAnalysis(); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); - DT = DTWP ? &DTWP->getDomTree() : 0; + DT = DTWP ? &DTWP->getDomTree() : nullptr; OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); @@ -623,6 +629,187 @@ static bool OptimizeCmpExpression(CmpInst *CI) { return MadeChange; } +/// isExtractBitsCandidateUse - Check if the candidates could +/// be combined with shift instruction, which includes: +/// 1. Truncate instruction +/// 2. And instruction and the imm is a mask of the low bits: +/// imm & (imm+1) == 0 +static bool isExtractBitsCandidateUse(Instruction *User) { + if (!isa(User)) { + if (User->getOpcode() != Instruction::And || + !isa(User->getOperand(1))) + return false; + + const APInt &Cimm = cast(User->getOperand(1))->getValue(); + + if ((Cimm & (Cimm + 1)).getBoolValue()) + return false; + } + return true; +} + +/// SinkShiftAndTruncate - sink both shift and truncate instruction +/// to the use of truncate's BB. +static bool +SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, + DenseMap &InsertedShifts, + const TargetLowering &TLI) { + BasicBlock *UserBB = User->getParent(); + DenseMap InsertedTruncs; + TruncInst *TruncI = dyn_cast(User); + bool MadeChange = false; + + for (Value::user_iterator TruncUI = TruncI->user_begin(), + TruncE = TruncI->user_end(); + TruncUI != TruncE;) { + + Use &TruncTheUse = TruncUI.getUse(); + Instruction *TruncUser = cast(*TruncUI); + // Preincrement use iterator so we don't invalidate it. + + ++TruncUI; + + int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode()); + if (!ISDOpcode) + continue; + + // If the use is actually a legal node, there will not be an implicit + // truncate. + if (TLI.isOperationLegalOrCustom(ISDOpcode, + EVT::getEVT(TruncUser->getType()))) + continue; + + // Don't bother for PHI nodes. + if (isa(TruncUser)) + continue; + + BasicBlock *TruncUserBB = TruncUser->getParent(); + + if (UserBB == TruncUserBB) + continue; + + BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB]; + CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB]; + + if (!InsertedShift && !InsertedTrunc) { + BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); + // Sink the shift + if (ShiftI->getOpcode() == Instruction::AShr) + InsertedShift = + BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); + else + InsertedShift = + BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); + + // Sink the trunc + BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); + TruncInsertPt++; + + InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, + TruncI->getType(), "", TruncInsertPt); + + MadeChange = true; + + TruncTheUse = InsertedTrunc; + } + } + return MadeChange; +} + +/// OptimizeExtractBits - sink the shift *right* instruction into user blocks if +/// the uses could potentially be combined with this shift instruction and +/// generate BitExtract instruction. It will only be applied if the architecture +/// supports BitExtract instruction. Here is an example: +/// BB1: +/// %x.extract.shift = lshr i64 %arg1, 32 +/// BB2: +/// %x.extract.trunc = trunc i64 %x.extract.shift to i16 +/// ==> +/// +/// BB2: +/// %x.extract.shift.1 = lshr i64 %arg1, 32 +/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 +/// +/// CodeGen will recoginze the pattern in BB2 and generate BitExtract +/// instruction. +/// Return true if any changes are made. +static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, + const TargetLowering &TLI) { + BasicBlock *DefBB = ShiftI->getParent(); + + /// Only insert instructions in each block once. + DenseMap InsertedShifts; + + bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType())); + + bool MadeChange = false; + for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); + UI != E;) { + Use &TheUse = UI.getUse(); + Instruction *User = cast(*UI); + // Preincrement use iterator so we don't invalidate it. + ++UI; + + // Don't bother for PHI nodes. + if (isa(User)) + continue; + + if (!isExtractBitsCandidateUse(User)) + continue; + + BasicBlock *UserBB = User->getParent(); + + if (UserBB == DefBB) { + // If the shift and truncate instruction are in the same BB. The use of + // the truncate(TruncUse) may still introduce another truncate if not + // legal. In this case, we would like to sink both shift and truncate + // instruction to the BB of TruncUse. + // for example: + // BB1: + // i64 shift.result = lshr i64 opnd, imm + // trunc.result = trunc shift.result to i16 + // + // BB2: + // ----> We will have an implicit truncate here if the architecture does + // not have i16 compare. + // cmp i16 trunc.result, opnd2 + // + if (isa(User) && shiftIsLegal + // If the type of the truncate is legal, no trucate will be + // introduced in other basic blocks. + && (!TLI.isTypeLegal(TLI.getValueType(User->getType())))) + MadeChange = + SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI); + + continue; + } + // If we have already inserted a shift into this block, use it. + BinaryOperator *&InsertedShift = InsertedShifts[UserBB]; + + if (!InsertedShift) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + + if (ShiftI->getOpcode() == Instruction::AShr) + InsertedShift = + BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt); + else + InsertedShift = + BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt); + + MadeChange = true; + } + + // Replace a use of the shift with a use of the new shift. + TheUse = InsertedShift; + } + + // If we removed all uses, nuke the shift. + if (ShiftI->use_empty()) + ShiftI->eraseFromParent(); + + return MadeChange; +} + namespace { class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { protected: @@ -671,8 +858,9 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // happens. WeakVH IterHandle(CurInstIterator); - replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getDataLayout() : 0, - TLInfo, ModifiedDT ? 0 : DT); + replaceAndRecursivelySimplify(CI, RetVal, + TLI ? TLI->getDataLayout() : nullptr, + TLInfo, ModifiedDT ? nullptr : DT); // If the iterator instruction was recursively deleted, start over at the // start of the block. @@ -693,10 +881,10 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { } // From here on out we're working with named functions. - if (CI->getCalledFunction() == 0) return false; + if (!CI->getCalledFunction()) return false; // We'll need DataLayout from here on out. - const DataLayout *TD = TLI ? TLI->getDataLayout() : 0; + const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr; if (!TD) return false; // Lower all default uses of _chk calls. This is very similar @@ -746,8 +934,8 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { if (!RI) return false; - PHINode *PN = 0; - BitCastInst *BCI = 0; + PHINode *PN = nullptr; + BitCastInst *BCI = nullptr; Value *V = RI->getReturnValue(); if (V) { BCI = dyn_cast(V); @@ -862,7 +1050,7 @@ namespace { struct ExtAddrMode : public TargetLowering::AddrMode { Value *BaseReg; Value *ScaledReg; - ExtAddrMode() : BaseReg(0), ScaledReg(0) {} + ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {} void print(raw_ostream &OS) const; void dump() const; @@ -1189,10 +1377,10 @@ class TypePromotionTransaction { public: /// \brief Remove all reference of \p Inst and optinally replace all its /// uses with New. - /// \pre If !Inst->use_empty(), then New != NULL - InstructionRemover(Instruction *Inst, Value *New = NULL) + /// \pre If !Inst->use_empty(), then New != nullptr + InstructionRemover(Instruction *Inst, Value *New = nullptr) : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), - Replacer(NULL) { + Replacer(nullptr) { if (New) Replacer = new UsesReplacer(Inst, New); DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); @@ -1232,7 +1420,7 @@ public: /// Same as Instruction::setOperand. void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal); /// Same as Instruction::eraseFromParent. - void eraseInstruction(Instruction *Inst, Value *NewVal = NULL); + void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr); /// Same as Value::replaceAllUsesWith. void replaceAllUsesWith(Instruction *Inst, Value *New); /// Same as Value::mutateType. @@ -1245,84 +1433,75 @@ public: void moveBefore(Instruction *Inst, Instruction *Before); /// @} - ~TypePromotionTransaction(); - private: /// The ordered list of actions made so far. - SmallVector Actions; - typedef SmallVectorImpl::iterator CommitPt; + SmallVector, 16> Actions; + typedef SmallVectorImpl>::iterator CommitPt; }; void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, Value *NewVal) { Actions.push_back( - new TypePromotionTransaction::OperandSetter(Inst, Idx, NewVal)); + make_unique(Inst, Idx, NewVal)); } void TypePromotionTransaction::eraseInstruction(Instruction *Inst, Value *NewVal) { Actions.push_back( - new TypePromotionTransaction::InstructionRemover(Inst, NewVal)); + make_unique(Inst, NewVal)); } void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, Value *New) { - Actions.push_back(new TypePromotionTransaction::UsesReplacer(Inst, New)); + Actions.push_back(make_unique(Inst, New)); } void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { - Actions.push_back(new TypePromotionTransaction::TypeMutator(Inst, NewTy)); + Actions.push_back(make_unique(Inst, NewTy)); } Instruction *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) { - TruncBuilder *TB = new TruncBuilder(Opnd, Ty); - Actions.push_back(TB); - return TB->getBuiltInstruction(); + std::unique_ptr Ptr(new TruncBuilder(Opnd, Ty)); + Instruction *I = Ptr->getBuiltInstruction(); + Actions.push_back(std::move(Ptr)); + return I; } Instruction *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd, Type *Ty) { - SExtBuilder *SB = new SExtBuilder(Inst, Opnd, Ty); - Actions.push_back(SB); - return SB->getBuiltInstruction(); + std::unique_ptr Ptr(new SExtBuilder(Inst, Opnd, Ty)); + Instruction *I = Ptr->getBuiltInstruction(); + Actions.push_back(std::move(Ptr)); + return I; } void TypePromotionTransaction::moveBefore(Instruction *Inst, Instruction *Before) { Actions.push_back( - new TypePromotionTransaction::InstructionMoveBefore(Inst, Before)); + make_unique(Inst, Before)); } TypePromotionTransaction::ConstRestorationPt TypePromotionTransaction::getRestorationPoint() const { - return Actions.rbegin() != Actions.rend() ? *Actions.rbegin() : NULL; + return !Actions.empty() ? Actions.back().get() : nullptr; } void TypePromotionTransaction::commit() { for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; - ++It) { + ++It) (*It)->commit(); - delete *It; - } Actions.clear(); } void TypePromotionTransaction::rollback( TypePromotionTransaction::ConstRestorationPt Point) { - while (!Actions.empty() && Point != (*Actions.rbegin())) { - TypePromotionAction *Curr = Actions.pop_back_val(); + while (!Actions.empty() && Point != Actions.back().get()) { + std::unique_ptr Curr = Actions.pop_back_val(); Curr->undo(); - delete Curr; } } -TypePromotionTransaction::~TypePromotionTransaction() { - for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; ++It) - delete *It; - Actions.clear(); -} - /// \brief A helper class for matching addressing modes. /// /// This encapsulates the logic for matching the target-legal addressing modes. @@ -1390,7 +1569,7 @@ private: bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); bool MatchAddr(Value *V, unsigned Depth); bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, - bool *MovedAway = NULL); + bool *MovedAway = nullptr); bool IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter); @@ -1435,7 +1614,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now // to see if ScaleReg is actually X+C. If so, we can turn this into adding // X*Scale + C*Scale to addr mode. - ConstantInt *CI = 0; Value *AddLHS = 0; + ConstantInt *CI = nullptr; Value *AddLHS = nullptr; if (isa(ScaleReg) && // not a constant expr. match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { TestAddrMode.ScaledReg = AddLHS; @@ -1461,6 +1640,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, static bool MightBeFoldableInst(Instruction *I) { switch (I->getOpcode()) { case Instruction::BitCast: + case Instruction::AddrSpaceCast: // Don't touch identity bitcasts. if (I->getType() == I->getOperand(0)->getType()) return false; @@ -1612,13 +1792,13 @@ TypePromotionHelper::Action TypePromotionHelper::getAction( // get through. // If it, check we can get through. if (!SExtOpnd || !canGetThrough(SExtOpnd, SExtTy, PromotedInsts)) - return NULL; + return nullptr; // Do not promote if the operand has been added by codegenprepare. // Otherwise, it means we are undoing an optimization that is likely to be // redone, thus causing potential infinite loop. if (isa(SExtOpnd) && InsertedTruncs.count(SExtOpnd)) - return NULL; + return nullptr; // SExt or Trunc instructions. // Return the related handler. @@ -1629,7 +1809,7 @@ TypePromotionHelper::Action TypePromotionHelper::getAction( // Abort early if we will have to insert non-free instructions. if (!SExtOpnd->hasOneUse() && !TLI.isTruncateFree(SExtTy, SExtOpnd->getType())) - return NULL; + return nullptr; return promoteOperandForOther; } @@ -1740,7 +1920,7 @@ TypePromotionHelper::promoteOperandForOther(Instruction *SExt, TPT.moveBefore(SExtForOpnd, SExtOpnd); TPT.setOperand(SExtOpnd, OpIdx, SExtForOpnd); // If more sext are required, new instructions will have to be created. - SExtForOpnd = NULL; + SExtForOpnd = nullptr; } if (SExtForOpnd == SExt) { DEBUG(dbgs() << "Sign extension is useless now\n"); @@ -1815,6 +1995,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, return MatchAddr(AddrInst->getOperand(0), Depth); return false; case Instruction::BitCast: + case Instruction::AddrSpaceCast: // BitCast is always a noop, and we can handle it as long as it is // int->int or pointer->pointer (we don't want int<->fp or something). if ((AddrInst->getOperand(0)->getType()->isPointerTy() || @@ -2022,11 +2203,11 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { AddrMode.BaseOffs -= CI->getSExtValue(); } else if (GlobalValue *GV = dyn_cast(Addr)) { // If this is a global variable, try to fold it into the addressing mode. - if (AddrMode.BaseGV == 0) { + if (!AddrMode.BaseGV) { AddrMode.BaseGV = GV; if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) return true; - AddrMode.BaseGV = 0; + AddrMode.BaseGV = nullptr; } } else if (Instruction *I = dyn_cast(Addr)) { ExtAddrMode BackupAddrMode = AddrMode; @@ -2071,7 +2252,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) return true; AddrMode.HasBaseReg = false; - AddrMode.BaseReg = 0; + AddrMode.BaseReg = nullptr; } // If the base register is already taken, see if we can do [r+r]. @@ -2081,7 +2262,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) return true; AddrMode.Scale = 0; - AddrMode.ScaledReg = 0; + AddrMode.ScaledReg = nullptr; } // Couldn't match. TPT.rollback(LastKnownGood); @@ -2166,7 +2347,7 @@ static bool FindAllMemoryUses(Instruction *I, bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, Value *KnownLive2) { // If Val is either of the known-live values, we know it is live! - if (Val == 0 || Val == KnownLive1 || Val == KnownLive2) + if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) return true; // All values other than instructions and arguments (e.g. constants) are live. @@ -2225,13 +2406,13 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, // If the BaseReg or ScaledReg was referenced by the previous addrmode, their // lifetime wasn't extended by adding this instruction. if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) - BaseReg = 0; + BaseReg = nullptr; if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) - ScaledReg = 0; + ScaledReg = nullptr; // If folding this instruction (and it's subexprs) didn't extend any live // ranges, we're ok with it. - if (BaseReg == 0 && ScaledReg == 0) + if (!BaseReg && !ScaledReg) return true; // If all uses of this instruction are ultimately load/store/inlineasm's, @@ -2320,7 +2501,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // Use a worklist to iteratively look through PHI nodes, and ensure that // the addressing mode obtained from the non-PHI roots of the graph // are equivalent. - Value *Consensus = 0; + Value *Consensus = nullptr; unsigned NumUsesConsensus = 0; bool IsNumUsesConsensusValid = false; SmallVector AddrModeInsts; @@ -2334,7 +2515,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // Break use-def graph loops. if (!Visited.insert(V)) { - Consensus = 0; + Consensus = nullptr; break; } @@ -2380,7 +2561,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, continue; } - Consensus = 0; + Consensus = nullptr; break; } @@ -2420,14 +2601,135 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Value *&SunkAddr = SunkAddrs[Addr]; if (SunkAddr) { DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst); + << *MemoryInst << "\n"); if (SunkAddr->getType() != Addr->getType()) SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); + } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && + TM && TM->getSubtarget().useAA())) { + // By default, we use the GEP-based method when AA is used later. This + // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. + DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst << "\n"); + Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); + Value *ResultPtr = nullptr, *ResultIndex = nullptr; + + // First, find the pointer. + if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) { + ResultPtr = AddrMode.BaseReg; + AddrMode.BaseReg = nullptr; + } + + if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) { + // We can't add more than one pointer together, nor can we scale a + // pointer (both of which seem meaningless). + if (ResultPtr || AddrMode.Scale != 1) + return false; + + ResultPtr = AddrMode.ScaledReg; + AddrMode.Scale = 0; + } + + if (AddrMode.BaseGV) { + if (ResultPtr) + return false; + + ResultPtr = AddrMode.BaseGV; + } + + // If the real base value actually came from an inttoptr, then the matcher + // will look through it and provide only the integer value. In that case, + // use it here. + if (!ResultPtr && AddrMode.BaseReg) { + ResultPtr = + Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr"); + AddrMode.BaseReg = nullptr; + } else if (!ResultPtr && AddrMode.Scale == 1) { + ResultPtr = + Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr"); + AddrMode.Scale = 0; + } + + if (!ResultPtr && + !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) { + SunkAddr = Constant::getNullValue(Addr->getType()); + } else if (!ResultPtr) { + return false; + } else { + Type *I8PtrTy = + Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); + + // Start with the base register. Do this first so that subsequent address + // matching finds it last, which will prevent it from trying to match it + // as the scaled value in case it happens to be a mul. That would be + // problematic if we've sunk a different mul for the scale, because then + // we'd end up sinking both muls. + if (AddrMode.BaseReg) { + Value *V = AddrMode.BaseReg; + if (V->getType() != IntPtrTy) + V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); + + ResultIndex = V; + } + + // Add the scale value. + if (AddrMode.Scale) { + Value *V = AddrMode.ScaledReg; + if (V->getType() == IntPtrTy) { + // done. + } else if (cast(IntPtrTy)->getBitWidth() < + cast(V->getType())->getBitWidth()) { + V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); + } else { + // It is only safe to sign extend the BaseReg if we know that the math + // required to create it did not overflow before we extend it. Since + // the original IR value was tossed in favor of a constant back when + // the AddrMode was created we need to bail out gracefully if widths + // do not match instead of extending it. + Instruction *I = dyn_cast_or_null(ResultIndex); + if (I && (ResultIndex != AddrMode.BaseReg)) + I->eraseFromParent(); + return false; + } + + if (AddrMode.Scale != 1) + V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), + "sunkaddr"); + if (ResultIndex) + ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr"); + else + ResultIndex = V; + } + + // Add in the Base Offset if present. + if (AddrMode.BaseOffs) { + Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); + if (ResultIndex) { + // We need to add this separately from the scale above to help with + // SDAG consecutive load/store merging. + if (ResultPtr->getType() != I8PtrTy) + ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); + ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); + } + + ResultIndex = V; + } + + if (!ResultIndex) { + SunkAddr = ResultPtr; + } else { + if (ResultPtr->getType() != I8PtrTy) + ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); + SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr"); + } + + if (SunkAddr->getType() != Addr->getType()) + SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); + } } else { DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " - << *MemoryInst); + << *MemoryInst << "\n"); Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); - Value *Result = 0; + Value *Result = nullptr; // Start with the base register. Do this first so that subsequent address // matching finds it last, which will prevent it from trying to match it @@ -2459,8 +2761,9 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // the original IR value was tossed in favor of a constant back when // the AddrMode was created we need to bail out gracefully if widths // do not match instead of extending it. - if (Result != AddrMode.BaseReg) - cast(Result)->eraseFromParent(); + Instruction *I = dyn_cast_or_null(Result); + if (I && (Result != AddrMode.BaseReg)) + I->eraseFromParent(); return false; } if (AddrMode.Scale != 1) @@ -2490,7 +2793,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Result = V; } - if (Result == 0) + if (!Result) SunkAddr = Constant::getNullValue(Addr->getType()); else SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); @@ -2815,7 +3118,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { // It is possible for very late stage optimizations (such as SimplifyCFG) // to introduce PHI nodes too late to be cleaned up. If we detect such a // trivial PHI, go ahead and zap it here. - if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0, + if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr, TLInfo, DT)) { P->replaceAllUsesWith(V); P->eraseFromParent(); @@ -2870,6 +3173,17 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { return false; } + BinaryOperator *BinOp = dyn_cast(I); + + if (BinOp && (BinOp->getOpcode() == Instruction::AShr || + BinOp->getOpcode() == Instruction::LShr)) { + ConstantInt *CI = dyn_cast(BinOp->getOperand(1)); + if (TLI && CI && TLI->hasExtractBitsInsn()) + return OptimizeExtractBits(BinOp, CI, *TLI); + + return false; + } + if (GetElementPtrInst *GEPI = dyn_cast(I)) { if (GEPI->hasAllZeroIndices()) { /// The GEP operand must be a pointer, so must its result -> BitCast @@ -2918,11 +3232,16 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { bool CodeGenPrepare::PlaceDbgValues(Function &F) { bool MadeChange = false; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - Instruction *PrevNonDbgInst = NULL; + Instruction *PrevNonDbgInst = nullptr; for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { Instruction *Insn = BI; ++BI; DbgValueInst *DVI = dyn_cast(Insn); - if (!DVI) { + // Leave dbg.values that refer to an alloca alone. These + // instrinsics describe the address of a variable (= the alloca) + // being taken. They should not be moved next to the alloca + // (and to the beginning of the scope), but rather stay close to + // where said address is used. + if (!DVI || (DVI->getValue() && isa(DVI->getValue()))) { PrevNonDbgInst = Insn; continue; } diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 463eb86..822636f 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "post-RA-sched" #include "CriticalAntiDepBreaker.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -26,6 +25,8 @@ using namespace llvm; +#define DEBUG_TYPE "post-RA-sched" + CriticalAntiDepBreaker:: CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) : AntiDepBreaker(), MF(MFi), @@ -33,7 +34,7 @@ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) : TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), RegClassInfo(RCI), - Classes(TRI->getNumRegs(), static_cast(0)), + Classes(TRI->getNumRegs(), nullptr), KillIndices(TRI->getNumRegs(), 0), DefIndices(TRI->getNumRegs(), 0), KeepRegs(TRI->getNumRegs(), false) {} @@ -45,7 +46,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { const unsigned BBSize = BB->size(); for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) { // Clear out the register class data. - Classes[i] = static_cast(0); + Classes[i] = nullptr; // Initialize the indices to indicate that no registers are live. KillIndices[i] = ~0u; @@ -75,7 +76,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { // callee-saved register that is not saved in the prolog. const MachineFrameInfo *MFI = MF.getFrameInfo(); BitVector Pristine = MFI->getPristineRegs(BB); - for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { + for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { if (!IsReturnBlock && !Pristine.test(*I)) continue; for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { unsigned Reg = *AI; @@ -124,7 +125,7 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, /// CriticalPathStep - Return the next SUnit after SU on the bottom-up /// critical path. static const SDep *CriticalPathStep(const SUnit *SU) { - const SDep *Next = 0; + const SDep *Next = nullptr; unsigned NextDepth = 0; // Find the predecessor edge with the greatest depth. for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); @@ -171,7 +172,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; - const TargetRegisterClass *NewRC = 0; + const TargetRegisterClass *NewRC = nullptr; if (i < MI->getDesc().getNumOperands()) NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF); @@ -227,7 +228,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, DefIndices[i] = Count; KillIndices[i] = ~0u; KeepRegs.reset(i); - Classes[i] = 0; + Classes[i] = nullptr; RegRefs.erase(i); } @@ -244,7 +245,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, (DefIndices[Reg] == ~0u)) && "Kill and Def maps aren't consistent for Reg!"); KeepRegs.reset(Reg); - Classes[Reg] = 0; + Classes[Reg] = nullptr; RegRefs.erase(Reg); // Repeat, for all subregs. for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { @@ -252,7 +253,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, DefIndices[SubregReg] = Count; KillIndices[SubregReg] = ~0u; KeepRegs.reset(SubregReg); - Classes[SubregReg] = 0; + Classes[SubregReg] = nullptr; RegRefs.erase(SubregReg); } // Conservatively mark super-registers as unusable. @@ -267,7 +268,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, if (Reg == 0) continue; if (!MO.isUse()) continue; - const TargetRegisterClass *NewRC = 0; + const TargetRegisterClass *NewRC = nullptr; if (i < MI->getDesc().getNumOperands()) NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF); @@ -419,7 +420,7 @@ BreakAntiDependencies(const std::vector& SUnits, DenseMap MISUnitMap; // Find the node at the bottom of the critical path. - const SUnit *Max = 0; + const SUnit *Max = nullptr; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { const SUnit *SU = &SUnits[i]; MISUnitMap[SU->getInstr()] = SU; @@ -551,8 +552,8 @@ BreakAntiDependencies(const std::vector& SUnits, CriticalPathMI = CriticalPathSU->getInstr(); } else { // We've reached the end of the critical path. - CriticalPathSU = 0; - CriticalPathMI = 0; + CriticalPathSU = nullptr; + CriticalPathMI = nullptr; } } @@ -589,8 +590,9 @@ BreakAntiDependencies(const std::vector& SUnits, // Determine AntiDepReg's register class, if it is live and is // consistently used within a single class. - const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0; - assert((AntiDepReg == 0 || RC != NULL) && + const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] + : nullptr; + assert((AntiDepReg == 0 || RC != nullptr) && "Register should be live if it's causing an anti-dependence!"); if (RC == reinterpret_cast(-1)) AntiDepReg = 0; @@ -638,7 +640,7 @@ BreakAntiDependencies(const std::vector& SUnits, (DefIndices[NewReg] == ~0u)) && "Kill and Def maps aren't consistent for NewReg!"); - Classes[AntiDepReg] = 0; + Classes[AntiDepReg] = nullptr; DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; KillIndices[AntiDepReg] = ~0u; assert(((KillIndices[AntiDepReg] == ~0u) != diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index 5b40ae1..bc6e9dc 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -121,7 +121,7 @@ DefaultVLIWScheduler::DefaultVLIWScheduler( void DefaultVLIWScheduler::schedule() { // Build the scheduling graph. - buildSchedGraph(0); + buildSchedGraph(nullptr); } // VLIWPacketizerList Ctor @@ -129,7 +129,7 @@ VLIWPacketizerList::VLIWPacketizerList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, bool IsPostRA) : TM(MF.getTarget()), MF(MF) { TII = TM.getInstrInfo(); - ResourceTracker = TII->CreateTargetScheduleState(&TM, 0); + ResourceTracker = TII->CreateTargetScheduleState(&TM, nullptr); VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA); } diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index aa03e77..2b144d8 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "codegen-dce" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -23,6 +22,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "codegen-dce" + STATISTIC(NumDeletes, "Number of dead instructions deleted"); namespace { @@ -59,7 +60,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { // Don't delete instructions with side effects. bool SawStore = false; - if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI()) + if (!MI->isSafeToMove(TII, nullptr, SawStore) && !MI->isPHI()) return false; // Examine each operand. diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index d543baf..a195586 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dwarfehprepare" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/CallSite.h" @@ -28,6 +27,8 @@ #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; +#define DEBUG_TYPE "dwarfehprepare" + STATISTIC(NumResumesLowered, "Number of resume calls lowered"); namespace { @@ -43,7 +44,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. DwarfEHPrepare(const TargetMachine *TM) - : FunctionPass(ID), TM(TM), RewindFunction(0) { + : FunctionPass(ID), TM(TM), RewindFunction(nullptr) { initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()); } @@ -68,10 +69,10 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) { /// instructions, including the 'resume' instruction. Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { Value *V = RI->getOperand(0); - Value *ExnObj = 0; + Value *ExnObj = nullptr; InsertValueInst *SelIVI = dyn_cast(V); - LoadInst *SelLoad = 0; - InsertValueInst *ExcIVI = 0; + LoadInst *SelLoad = nullptr; + InsertValueInst *ExcIVI = nullptr; bool EraseIVIs = false; if (SelIVI) { diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index f8887ef..c470632 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "early-ifcvt" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" @@ -40,6 +39,8 @@ using namespace llvm; +#define DEBUG_TYPE "early-ifcvt" + // Absolute maximum number of instructions allowed per speculated block. // This bypasses all other heuristics, so it should be set fairly high. static cl::opt @@ -219,7 +220,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { // We never speculate stores, so an AA pointer isn't necessary. bool DontMoveAcrossStore = true; - if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) { + if (!I->isSafeToMove(TII, nullptr, DontMoveAcrossStore)) { DEBUG(dbgs() << "Can't speculate: " << *I); return false; } @@ -338,7 +339,7 @@ bool SSAIfConv::findInsertionPoint() { /// bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { Head = MBB; - TBB = FBB = Tail = 0; + TBB = FBB = Tail = nullptr; if (Head->succ_size() != 2) return false; @@ -463,7 +464,7 @@ void SSAIfConv::replacePHIInstrs() { TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); PI.PHI->eraseFromParent(); - PI.PHI = 0; + PI.PHI = nullptr; } } @@ -564,7 +565,7 @@ void SSAIfConv::convertIf(SmallVectorImpl &RemovedBlocks) { // We need a branch to Tail, let code placement work it out later. DEBUG(dbgs() << "Converting to unconditional branch.\n"); SmallVector EmptyCond; - TII->InsertBranch(*Head, Tail, 0, EmptyCond, HeadDL); + TII->InsertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL); Head->addSuccessor(Tail); } DEBUG(dbgs() << *Head); @@ -775,6 +776,12 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" << "********** Function: " << MF.getName() << '\n'); + // Only run if conversion if the target wants it. + if (!MF.getTarget() + .getSubtarget() + .enableEarlyIfConversion()) + return false; + TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); SchedModel = @@ -783,7 +790,7 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { DomTree = &getAnalysis(); Loops = getAnalysisIfAvailable(); Traces = &getAnalysis(); - MinInstr = 0; + MinInstr = nullptr; bool Changed = false; IfConv.runOnMachineFunction(MF); diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp index 3bb0465..aea7c31 100644 --- a/lib/CodeGen/EdgeBundles.cpp +++ b/lib/CodeGen/EdgeBundles.cpp @@ -41,9 +41,7 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { EC.clear(); EC.grow(2 * MF->getNumBlockIDs()); - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; - ++I) { - const MachineBasicBlock &MBB = *I; + for (const auto &MBB : *MF) { unsigned OutE = 2 * MBB.getNumber() + 1; // Join the outgoing bundle with the ingoing bundles of all successors. for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), @@ -69,29 +67,31 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { return false; } -/// view - Visualize the annotated bipartite CFG with Graphviz. -void EdgeBundles::view() const { - ViewGraph(*this, "EdgeBundles"); -} - /// Specialize WriteGraph, the standard implementation won't work. -raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G, - bool ShortNames, - const Twine &Title) { +namespace llvm { +template<> +raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G, + bool ShortNames, + const Twine &Title) { const MachineFunction *MF = G.getMachineFunction(); O << "digraph {\n"; - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) { - unsigned BB = I->getNumber(); + for (const auto &MBB : *MF) { + unsigned BB = MBB.getNumber(); O << "\t\"BB#" << BB << "\" [ shape=box ]\n" << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n" << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n'; - for (MachineBasicBlock::const_succ_iterator SI = I->succ_begin(), - SE = I->succ_end(); SI != SE; ++SI) + for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), + SE = MBB.succ_end(); SI != SE; ++SI) O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber() << "\" [ color=lightgray ]\n"; } O << "}\n"; return O; } +} + +/// view - Visualize the annotated bipartite CFG with Graphviz. +void EdgeBundles::view() const { + ViewGraph(*this, "EdgeBundles"); +} diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index a08eb6b..cf55b68 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -20,7 +20,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "execution-fix" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/LivePhysRegs.h" @@ -33,6 +32,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "execution-fix" + /// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track /// of execution domains. /// @@ -100,7 +101,7 @@ struct DomainValue { // Clear this DomainValue and point to next which has all its data. void clear() { AvailableDomains = 0; - Next = 0; + Next = nullptr; Instrs.clear(); } }; @@ -275,7 +276,7 @@ void ExeDepsFix::kill(int rx) { return; release(LiveRegs[rx].Value); - LiveRegs[rx].Value = 0; + LiveRegs[rx].Value = nullptr; } /// Force register rx into domain. @@ -360,7 +361,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // Default values are 'nothing happened a long time ago'. for (unsigned rx = 0; rx != NumRegs; ++rx) { - LiveRegs[rx].Value = 0; + LiveRegs[rx].Value = nullptr; LiveRegs[rx].Def = -(1 << 20); } @@ -404,7 +405,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // We have a live DomainValue from more than one predecessor. if (LiveRegs[rx].Value->isCollapsed()) { - // We are already collapsed, but predecessor is not. Force him. + // We are already collapsed, but predecessor is not. Force it. unsigned Domain = LiveRegs[rx].Value->getFirstDomain(); if (!pdv->isCollapsed() && pdv->hasDomain(Domain)) collapse(pdv, Domain); @@ -440,7 +441,7 @@ void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { release(LiveRegs[i].Value); delete[] LiveRegs; } - LiveRegs = 0; + LiveRegs = nullptr; } void ExeDepsFix::visitInstr(MachineInstr *MI) { @@ -664,7 +665,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // doms are now sorted in order of appearance. Try to merge them all, giving // priority to the latest ones. - DomainValue *dv = 0; + DomainValue *dv = nullptr; while (!Regs.empty()) { if (!dv) { dv = Regs.pop_back_val().Value; @@ -714,7 +715,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { MF = &mf; TII = MF->getTarget().getInstrInfo(); TRI = MF->getTarget().getRegisterInfo(); - LiveRegs = 0; + LiveRegs = nullptr; assert(NumRegs == RC->getNumRegs() && "Bad regclass"); DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: " diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index fb2e446..90b62b5 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "expand-isel-pseudos" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -23,6 +22,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "expand-isel-pseudos" + namespace { class ExpandISelPseudos : public MachineFunctionPass { public: diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index 1b0315a..8969bcc 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "postrapseudos" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -25,6 +24,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "postrapseudos" + namespace { struct ExpandPostRA : public MachineFunctionPass { private: diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index 54b047b..c3e4f3e 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -61,10 +61,6 @@ GCModuleInfo::GCModuleInfo() initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); } -GCModuleInfo::~GCModuleInfo() { - clear(); -} - GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M, const std::string &Name) { strategy_map_type::iterator NMI = StrategyMap.find(Name); @@ -74,17 +70,17 @@ GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M, for (GCRegistry::iterator I = GCRegistry::begin(), E = GCRegistry::end(); I != E; ++I) { if (Name == I->getName()) { - GCStrategy *S = I->instantiate(); + std::unique_ptr S = I->instantiate(); S->M = M; S->Name = Name; - StrategyMap.GetOrCreateValue(Name).setValue(S); - StrategyList.push_back(S); - return S; + StrategyMap.GetOrCreateValue(Name).setValue(S.get()); + StrategyList.push_back(std::move(S)); + return StrategyList.back().get(); } } dbgs() << "unsupported GC: " << Name << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { @@ -104,9 +100,6 @@ GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { void GCModuleInfo::clear() { FInfoMap.clear(); StrategyMap.clear(); - - for (iterator I = begin(), E = end(); I != E; ++I) - delete *I; StrategyList.clear(); } diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index b31a0f2..1fdff6b 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -101,13 +101,6 @@ GCStrategy::GCStrategy() : UsesMetadata(false) {} -GCStrategy::~GCStrategy() { - for (iterator I = begin(), E = end(); I != E; ++I) - delete *I; - - Functions.clear(); -} - bool GCStrategy::initializeCustomLowering(Module &M) { return false; } bool GCStrategy::performCustomLowering(Function &F) { @@ -118,14 +111,13 @@ bool GCStrategy::performCustomLowering(Function &F) { bool GCStrategy::findCustomSafePoints(GCFunctionInfo& FI, MachineFunction &F) { dbgs() << "gc " << getName() << " must override findCustomSafePoints.\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) { - GCFunctionInfo *FI = new GCFunctionInfo(F, *this); - Functions.push_back(FI); - return FI; + Functions.push_back(make_unique(F, *this)); + return Functions.back().get(); } // ----------------------------------------------------------------------------- diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 1a18b1a..1502d5f 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ifcvt" #include "llvm/CodeGen/Passes.h" #include "BranchFolding.h" #include "llvm/ADT/STLExtras.h" @@ -37,6 +36,8 @@ using namespace llvm; +#define DEBUG_TYPE "ifcvt" + // Hidden options for help debugging. static cl::opt IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden); static cl::opt IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden); @@ -127,7 +128,8 @@ namespace { IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), HasFallThrough(false), IsUnpredicable(false), CannotBeCopied(false), ClobbersPred(false), NonPredSize(0), - ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {} + ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr), + FalseBB(nullptr) {} }; /// IfcvtToken - Record information about pending if-conversions to attempt: @@ -205,7 +207,7 @@ namespace { void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl &Cond, - SmallSet *LaterRedefs = 0); + SmallSet *LaterRedefs = nullptr); void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl &Cond, bool IgnoreBr = false); @@ -230,7 +232,7 @@ namespace { // blockAlwaysFallThrough - Block ends without a terminator. bool blockAlwaysFallThrough(BBInfo &BBI) const { - return BBI.IsBrAnalyzable && BBI.TrueBB == NULL; + return BBI.IsBrAnalyzable && BBI.TrueBB == nullptr; } // IfcvtTokenCmp - Used to sort if-conversion candidates. @@ -438,7 +440,7 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, if (SuccBB != TrueBB) return SuccBB; } - return NULL; + return nullptr; } /// ReverseBranchCondition - Reverse the condition of the end of the block @@ -460,7 +462,7 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { MachineFunction::iterator I = BB; MachineFunction::iterator E = BB->getParent()->end(); if (++I == E) - return NULL; + return nullptr; return I; } @@ -551,7 +553,7 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, FT = getNextBlock(FalseBBI.BB); if (TT != FT) return false; - if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable)) + if (!TT && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable)) return false; if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) return false; @@ -641,11 +643,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { bool AlreadyPredicated = !BBI.Predicate.empty(); // First analyze the end of BB branches. - BBI.TrueBB = BBI.FalseBB = NULL; + BBI.TrueBB = BBI.FalseBB = nullptr; BBI.BrCond.clear(); BBI.IsBrAnalyzable = !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); - BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL; + BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr; if (BBI.BrCond.size()) { // No false branch. This BB must end with a conditional branch and a @@ -954,13 +956,13 @@ static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB, const TargetInstrInfo *TII) { DebugLoc dl; // FIXME: this is nowhere SmallVector NoCond; - TII->InsertBranch(*BB, ToBB, NULL, NoCond, dl); + TII->InsertBranch(*BB, ToBB, nullptr, NoCond, dl); } /// RemoveExtraEdges - Remove true / false edges if either / both are no longer /// successors. void IfConverter::RemoveExtraEdges(BBInfo &BBI) { - MachineBasicBlock *TBB = NULL, *FBB = NULL; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond)) BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); @@ -1179,7 +1181,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { DontKill.clear(); - bool HasEarlyExit = CvtBBI->FalseBB != NULL; + bool HasEarlyExit = CvtBBI->FalseBB != nullptr; uint64_t CvtNext = 0, CvtFalse = 0, BBNext = 0, BBCvt = 0, SumWeight = 0; uint32_t WeightScale = 0; if (HasEarlyExit) { @@ -1215,7 +1217,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { CvtBBI->BrCond.end()); if (TII->ReverseBranchCondition(RevCond)) llvm_unreachable("Unable to reverse branch condition!"); - TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl); + TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl); BBI.BB->addSuccessor(CvtBBI->FalseBB); // Update the edge weight for both CvtBBI->FalseBB and NextBBI. // New_Weight(BBI.BB, NextBBI->BB) = @@ -1453,8 +1455,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, PredicateBlock(*BBI2, DI2, *Cond2); // Merge the true block into the entry of the diamond. - MergeBlocks(BBI, *BBI1, TailBB == 0); - MergeBlocks(BBI, *BBI2, TailBB == 0); + MergeBlocks(BBI, *BBI1, TailBB == nullptr); + MergeBlocks(BBI, *BBI2, TailBB == nullptr); // If the if-converted block falls through or unconditionally branches into // the tail block, and the tail block does not have other predecessors, then @@ -1503,7 +1505,7 @@ static bool MaySpeculate(const MachineInstr *MI, SmallSet &LaterRedefs, const TargetInstrInfo *TII) { bool SawStore = true; - if (!MI->isSafeToMove(TII, 0, SawStore)) + if (!MI->isSafeToMove(TII, nullptr, SawStore)) return false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -1527,7 +1529,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, SmallVectorImpl &Cond, SmallSet *LaterRedefs) { bool AnyUnpred = false; - bool MaySpec = LaterRedefs != 0; + bool MaySpec = LaterRedefs != nullptr; for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) { if (I->isDebugValue() || TII->isPredicated(I)) continue; @@ -1545,7 +1547,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, #ifndef NDEBUG dbgs() << "Unable to predicate " << *I << "!\n"; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } // If the predicated instruction now redefines a register as the result of @@ -1590,7 +1592,7 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, #ifndef NDEBUG dbgs() << "Unable to predicate " << *I << "!\n"; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } @@ -1607,7 +1609,7 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, std::vector Succs(FromBBI.BB->succ_begin(), FromBBI.BB->succ_end()); MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); - MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr; for (unsigned i = 0, e = Succs.size(); i != e; ++i) { MachineBasicBlock *Succ = Succs[i]; @@ -1643,7 +1645,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { std::vector Succs(FromBBI.BB->succ_begin(), FromBBI.BB->succ_end()); MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); - MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr; for (unsigned i = 0, e = Succs.size(); i != e; ++i) { MachineBasicBlock *Succ = Succs[i]; diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 0f7ba8e..f3c8d3d 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "Spiller.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" @@ -39,6 +38,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(NumSpilledRanges, "Number of spilled live ranges"); STATISTIC(NumSnippets, "Number of spilled snippets"); STATISTIC(NumSpills, "Number of spills inserted"); @@ -121,7 +122,7 @@ public: SibValueInfo(unsigned Reg, VNInfo *VNI) : AllDefsAreReloads(true), DefByOrigPHI(false), KillsSource(false), - SpillReg(Reg), SpillVNI(VNI), SpillMBB(0), DefMI(0) {} + SpillReg(Reg), SpillVNI(VNI), SpillMBB(nullptr), DefMI(nullptr) {} // Returns true when a def has been found. bool hasDef() const { return DefByOrigPHI || DefMI; } @@ -167,7 +168,7 @@ private: bool isSibling(unsigned Reg); MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*); - void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = 0); + void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = nullptr); void analyzeSiblingValues(); bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI); @@ -179,7 +180,7 @@ private: bool coalesceStackAccess(MachineInstr *MI, unsigned Reg); bool foldMemoryOperand(ArrayRef >, - MachineInstr *LoadMI = 0); + MachineInstr *LoadMI = nullptr); void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI); void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI); @@ -236,7 +237,7 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI)) return false; - MachineInstr *UseMI = 0; + MachineInstr *UseMI = nullptr; // Check that all uses satisfy our criteria. for (MachineRegisterInfo::reg_instr_nodbg_iterator @@ -367,7 +368,7 @@ void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter, do { SVI = WorkList.pop_back_val(); TinyPtrVector *Deps = VNI ? &FirstDeps : &SVI->second.Deps; - VNI = 0; + VNI = nullptr; SibValueInfo &SV = SVI->second; if (!SV.SpillMBB) @@ -659,7 +660,7 @@ void InlineSpiller::analyzeSiblingValues() { VNInfo *VNI = *VI; if (VNI->isUnused()) continue; - MachineInstr *DefMI = 0; + MachineInstr *DefMI = nullptr; if (!VNI->isPHIDef()) { DefMI = LIS.getInstructionFromIndex(VNI->def); assert(DefMI && "No defining instruction"); @@ -1359,7 +1360,7 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { // Share a stack slot among all descendants of Original. Original = VRM.getOriginal(edit.getReg()); StackSlot = VRM.getStackSlot(Original); - StackInt = 0; + StackInt = nullptr; DEBUG(dbgs() << "Inline spilling " << MRI.getRegClass(edit.getReg())->getName() diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index 61d065a..187e015 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "InterferenceCache.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/Support/ErrorHandling.h" @@ -19,6 +18,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + // Static member used for null interference cursors. InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference; diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index d3482d0..91a1da9 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -77,7 +77,8 @@ class InterferenceCache { /// Iterator pointing into the fixed RegUnit interference. LiveInterval::iterator FixedI; - RegUnitInfo(LiveIntervalUnion &LIU) : VirtTag(LIU.getTag()), Fixed(0) { + RegUnitInfo(LiveIntervalUnion &LIU) + : VirtTag(LIU.getTag()), Fixed(nullptr) { VirtI.setMap(LIU.getMap()); } }; @@ -93,7 +94,7 @@ class InterferenceCache { void update(unsigned MBBNum); public: - Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0), LIS(0) {} + Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(nullptr), LIS(nullptr) {} void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) { assert(!hasRefs() && "Cannot clear cache entry with references"); @@ -148,8 +149,9 @@ class InterferenceCache { Entry *get(unsigned PhysReg); public: - InterferenceCache() : TRI(0), LIUArray(0), MF(0), PhysRegEntries(NULL), - PhysRegEntriesCount(0), RoundRobin(0) {} + InterferenceCache() + : TRI(nullptr), LIUArray(nullptr), MF(nullptr), PhysRegEntries(nullptr), + PhysRegEntriesCount(0), RoundRobin(0) {} ~InterferenceCache() { free(PhysRegEntries); @@ -172,7 +174,7 @@ public: static BlockInterference NoInterference; void setEntry(Entry *E) { - Current = 0; + Current = nullptr; // Update reference counts. Nothing happens when RefCount reaches 0, so // we don't have to check for E == CacheEntry etc. if (CacheEntry) @@ -184,10 +186,10 @@ public: public: /// Cursor - Create a dangling cursor. - Cursor() : CacheEntry(0), Current(0) {} - ~Cursor() { setEntry(0); } + Cursor() : CacheEntry(nullptr), Current(nullptr) {} + ~Cursor() { setEntry(nullptr); } - Cursor(const Cursor &O) : CacheEntry(0), Current(0) { + Cursor(const Cursor &O) : CacheEntry(nullptr), Current(nullptr) { setEntry(O.CacheEntry); } @@ -200,7 +202,7 @@ public: void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) { // Release reference before getting a new one. That guarantees we can // actually have CacheEntries live cursors. - setEntry(0); + setEntry(nullptr); if (PhysReg) setEntry(Cache.get(PhysReg)); } diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 9977c6b..a8b8600 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -115,21 +115,21 @@ void IntrinsicLowering::AddPrototypes(Module &M) { Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), - DL.getIntPtrType(Context), (Type *)0); + DL.getIntPtrType(Context), nullptr); break; case Intrinsic::memmove: M.getOrInsertFunction("memmove", Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), - DL.getIntPtrType(Context), (Type *)0); + DL.getIntPtrType(Context), nullptr); break; case Intrinsic::memset: M.getOrInsertFunction("memset", Type::getInt8PtrTy(Context), Type::getInt8PtrTy(Context), Type::getInt32Ty(M.getContext()), - DL.getIntPtrType(Context), (Type *)0); + DL.getIntPtrType(Context), nullptr); break; case Intrinsic::sqrt: EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl"); diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 9c2718b..a5ac057 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -43,24 +43,6 @@ static cl::opt EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); -static cl::opt ShowMCEncoding("show-mc-encoding", cl::Hidden, - cl::desc("Show encoding in .s output")); -static cl::opt ShowMCInst("show-mc-inst", cl::Hidden, - cl::desc("Show instruction structure in .s output")); - -static cl::opt -AsmVerbose("asm-verbose", cl::desc("Add comments to directives."), - cl::init(cl::BOU_UNSET)); - -static bool getVerboseAsm() { - switch (AsmVerbose) { - case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault(); - case cl::BOU_TRUE: return true; - case cl::BOU_FALSE: return false; - } - llvm_unreachable("Invalid verbose asm state"); -} - void LLVMTargetMachine::initAsmInfo() { MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(), TargetTriple); @@ -103,7 +85,8 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, // Add internal analysis passes from the target machine. TM->addAnalysisPasses(PM); - // Targets may override createPassConfig to provide a target-specific sublass. + // Targets may override createPassConfig to provide a target-specific + // subclass. TargetPassConfig *PassConfig = TM->createPassConfig(PM); PassConfig->setStartStopPasses(StartAfter, StopAfter); @@ -138,7 +121,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, // Ask the target for an isel. if (PassConfig->addInstSelector()) - return NULL; + return nullptr; PassConfig->addMachinePasses(); @@ -169,7 +152,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, return false; } - if (hasMCSaveTempLabels()) + if (Options.MCOptions.MCSaveTempLabels) Context->setAllowTemporaryLabels(false); const MCAsmInfo &MAI = *getMCAsmInfo(); @@ -185,19 +168,16 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, MII, MRI, STI); // Create a code emitter if asked to show the encoding. - MCCodeEmitter *MCE = 0; - if (ShowMCEncoding) + MCCodeEmitter *MCE = nullptr; + if (Options.MCOptions.ShowMCEncoding) MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); - MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, - getVerboseAsm(), - hasMCUseCFI(), - hasMCUseDwarfDirectory(), - InstPrinter, - MCE, MAB, - ShowMCInst); + MCStreamer *S = getTarget().createAsmStreamer( + *Context, Out, Options.MCOptions.AsmVerbose, + Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB, + Options.MCOptions.ShowMCInst); AsmStreamer.reset(S); break; } @@ -208,12 +188,12 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, *Context); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); - if (MCE == 0 || MAB == 0) + if (!MCE || !MAB) return true; AsmStreamer.reset(getTarget().createMCObjectStreamer( - getTargetTriple(), *Context, *MAB, Out, MCE, STI, hasMCRelaxAll(), - hasMCNoExecStack())); + getTargetTriple(), *Context, *MAB, Out, MCE, STI, + Options.MCOptions.MCRelaxAll, Options.MCOptions.MCNoExecStack)); break; } case CGFT_Null: @@ -225,7 +205,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); - if (Printer == 0) + if (!Printer) return true; // If successful, createAsmPrinter took ownership of AsmStreamer. @@ -246,7 +226,8 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, JITCodeEmitter &JCE, bool DisableVerify) { // Add common CodeGen passes. - MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0); + MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, + nullptr); if (!Context) return true; @@ -265,11 +246,11 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, raw_ostream &Out, bool DisableVerify) { // Add common CodeGen passes. - Ctx = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0); + Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr); if (!Ctx) return true; - if (hasMCSaveTempLabels()) + if (Options.MCOptions.MCSaveTempLabels) Ctx->setAllowTemporaryLabels(false); // Create the code emitter for the target if it exists. If not, .o file @@ -280,17 +261,17 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, STI, *Ctx); MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); - if (MCE == 0 || MAB == 0) + if (!MCE || !MAB) return true; std::unique_ptr AsmStreamer; AsmStreamer.reset(getTarget().createMCObjectStreamer( - getTargetTriple(), *Ctx, *MAB, Out, MCE, STI, hasMCRelaxAll(), - hasMCNoExecStack())); + getTargetTriple(), *Ctx, *MAB, Out, MCE, STI, + Options.MCOptions.MCRelaxAll, Options.MCOptions.MCNoExecStack)); // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); - if (Printer == 0) + if (!Printer) return true; // If successful, createAsmPrinter took ownership of AsmStreamer. diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index e88d537..cdf505e 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -13,12 +13,13 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "scheduler" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "scheduler" + bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { // The isScheduleHigh flag allows nodes with wraparound dependencies that // cannot easily be modeled as edges with latencies to be scheduled as @@ -53,7 +54,7 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor /// of SU, return it, otherwise return null. SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { - SUnit *OnlyAvailablePred = 0; + SUnit *OnlyAvailablePred = nullptr; for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { SUnit &Pred = *I->getSUnit(); @@ -61,7 +62,7 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { // We found an available, but not scheduled, predecessor. If it's the // only one we have found, keep track of it... otherwise give up. if (OnlyAvailablePred && OnlyAvailablePred != &Pred) - return 0; + return nullptr; OnlyAvailablePred = &Pred; } } @@ -105,7 +106,7 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { if (SU->isAvailable) return; // All preds scheduled. SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); - if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return; + if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable) return; // Okay, we found a single predecessor that is available, but not scheduled. // Since it is available, it must be in the priority queue. First remove it. @@ -117,7 +118,7 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { } SUnit *LatencyPriorityQueue::pop() { - if (empty()) return NULL; + if (empty()) return nullptr; std::vector::iterator Best = Queue.begin(); for (std::vector::iterator I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index c22ab11..d12c234 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "lexicalscopes" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" @@ -25,15 +24,14 @@ #include "llvm/Support/FormattedStream.h" using namespace llvm; -/// ~LexicalScopes - final cleanup after ourselves. -LexicalScopes::~LexicalScopes() { reset(); } +#define DEBUG_TYPE "lexicalscopes" /// reset - Reset the instance so that it's prepared for another function. void LexicalScopes::reset() { - MF = NULL; - CurrentFnLexicalScope = NULL; - DeleteContainerSeconds(LexicalScopeMap); - DeleteContainerSeconds(AbstractScopeMap); + MF = nullptr; + CurrentFnLexicalScope = nullptr; + LexicalScopeMap.clear(); + AbstractScopeMap.clear(); InlinedLexicalScopeMap.clear(); AbstractScopesList.clear(); } @@ -58,30 +56,26 @@ void LexicalScopes::extractLexicalScopes( DenseMap &MI2ScopeMap) { // Scan each instruction and create scopes. First build working set of scopes. - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; - ++I) { - const MachineInstr *RangeBeginMI = NULL; - const MachineInstr *PrevMI = NULL; + for (const auto &MBB : *MF) { + const MachineInstr *RangeBeginMI = nullptr; + const MachineInstr *PrevMI = nullptr; DebugLoc PrevDL; - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - const MachineInstr *MInsn = II; - + for (const auto &MInsn : MBB) { // Check if instruction has valid location information. - const DebugLoc MIDL = MInsn->getDebugLoc(); + const DebugLoc MIDL = MInsn.getDebugLoc(); if (MIDL.isUnknown()) { - PrevMI = MInsn; + PrevMI = &MInsn; continue; } // If scope has not changed then skip this instruction. if (MIDL == PrevDL) { - PrevMI = MInsn; + PrevMI = &MInsn; continue; } // Ignore DBG_VALUE. It does not contribute to any instruction in output. - if (MInsn->isDebugValue()) + if (MInsn.isDebugValue()) continue; if (RangeBeginMI) { @@ -94,10 +88,10 @@ void LexicalScopes::extractLexicalScopes( } // This is a beginning of a new instruction range. - RangeBeginMI = MInsn; + RangeBeginMI = &MInsn; // Reset previous markers. - PrevMI = MInsn; + PrevMI = &MInsn; PrevDL = MIDL; } @@ -110,14 +104,22 @@ void LexicalScopes::extractLexicalScopes( } } +LexicalScope *LexicalScopes::findInlinedScope(DebugLoc DL) { + MDNode *Scope = nullptr; + MDNode *IA = nullptr; + DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext()); + auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA)); + return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr; +} + /// findLexicalScope - Find lexical scope, either regular or inlined, for the /// given DebugLoc. Return NULL if not found. LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) { - MDNode *Scope = NULL; - MDNode *IA = NULL; + MDNode *Scope = nullptr; + MDNode *IA = nullptr; DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext()); if (!Scope) - return NULL; + return nullptr; // The scope that we were created with could have an extra file - which // isn't what we care about in this case. @@ -125,16 +127,18 @@ LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) { if (D.isLexicalBlockFile()) Scope = DILexicalBlockFile(Scope).getScope(); - if (IA) - return InlinedLexicalScopeMap.lookup(DebugLoc::getFromDILocation(IA)); - return LexicalScopeMap.lookup(Scope); + if (IA) { + auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA)); + return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr; + } + return findLexicalScope(Scope); } /// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If /// not available then create new lexical scope. LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) { - MDNode *Scope = NULL; - MDNode *InlinedAt = NULL; + MDNode *Scope = nullptr; + MDNode *InlinedAt = nullptr; DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext()); if (InlinedAt) { @@ -155,35 +159,48 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) { D = DIDescriptor(Scope); } - LexicalScope *WScope = LexicalScopeMap.lookup(Scope); - if (WScope) - return WScope; + auto I = LexicalScopeMap.find(Scope); + if (I != LexicalScopeMap.end()) + return &I->second; - LexicalScope *Parent = NULL; + LexicalScope *Parent = nullptr; if (D.isLexicalBlock()) Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope)); - WScope = new LexicalScope(Parent, DIDescriptor(Scope), NULL, false); - LexicalScopeMap.insert(std::make_pair(Scope, WScope)); + // FIXME: Use forward_as_tuple instead of make_tuple, once MSVC2012 + // compatibility is no longer required. + I = LexicalScopeMap.emplace(std::piecewise_construct, std::make_tuple(Scope), + std::make_tuple(Parent, DIDescriptor(Scope), + nullptr, false)).first; + if (!Parent && DIDescriptor(Scope).isSubprogram() && DISubprogram(Scope).describes(MF->getFunction())) - CurrentFnLexicalScope = WScope; + CurrentFnLexicalScope = &I->second; - return WScope; + return &I->second; } /// getOrCreateInlinedScope - Find or create an inlined lexical scope. -LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *Scope, +LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *ScopeNode, MDNode *InlinedAt) { - LexicalScope *InlinedScope = LexicalScopeMap.lookup(InlinedAt); - if (InlinedScope) - return InlinedScope; - - DebugLoc InlinedLoc = DebugLoc::getFromDILocation(InlinedAt); - InlinedScope = new LexicalScope(getOrCreateLexicalScope(InlinedLoc), - DIDescriptor(Scope), InlinedAt, false); - InlinedLexicalScopeMap[InlinedLoc] = InlinedScope; - LexicalScopeMap[InlinedAt] = InlinedScope; - return InlinedScope; + std::pair P(ScopeNode, InlinedAt); + auto I = InlinedLexicalScopeMap.find(P); + if (I != InlinedLexicalScopeMap.end()) + return &I->second; + + LexicalScope *Parent; + DILexicalBlock Scope(ScopeNode); + if (Scope.isSubprogram()) + Parent = getOrCreateLexicalScope(DebugLoc::getFromDILocation(InlinedAt)); + else + Parent = getOrCreateInlinedScope(Scope.getContext(), InlinedAt); + + // FIXME: Use forward_as_tuple instead of make_tuple, once MSVC2012 + // compatibility is no longer required. + I = InlinedLexicalScopeMap.emplace(std::piecewise_construct, + std::make_tuple(P), + std::make_tuple(Parent, Scope, InlinedAt, + false)).first; + return &I->second; } /// getOrCreateAbstractScope - Find or create an abstract lexical scope. @@ -193,21 +210,23 @@ LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) { DIDescriptor Scope(N); if (Scope.isLexicalBlockFile()) Scope = DILexicalBlockFile(Scope).getScope(); - LexicalScope *AScope = AbstractScopeMap.lookup(N); - if (AScope) - return AScope; + auto I = AbstractScopeMap.find(Scope); + if (I != AbstractScopeMap.end()) + return &I->second; - LexicalScope *Parent = NULL; + LexicalScope *Parent = nullptr; if (Scope.isLexicalBlock()) { - DILexicalBlock DB(N); + DILexicalBlock DB(Scope); DIDescriptor ParentDesc = DB.getContext(); Parent = getOrCreateAbstractScope(ParentDesc); } - AScope = new LexicalScope(Parent, DIDescriptor(N), NULL, true); - AbstractScopeMap[N] = AScope; - if (DIDescriptor(N).isSubprogram()) - AbstractScopesList.push_back(AScope); - return AScope; + I = AbstractScopeMap.emplace(std::piecewise_construct, + std::forward_as_tuple(Scope), + std::forward_as_tuple(Parent, Scope, + nullptr, true)).first; + if (Scope.isSubprogram()) + AbstractScopesList.push_back(&I->second); + return &I->second; } /// constructScopeNest @@ -244,7 +263,7 @@ void LexicalScopes::assignInstructionRanges( SmallVectorImpl &MIRanges, DenseMap &MI2ScopeMap) { - LexicalScope *PrevLexicalScope = NULL; + LexicalScope *PrevLexicalScope = nullptr; for (SmallVectorImpl::const_iterator RI = MIRanges.begin(), RE = MIRanges.end(); RI != RE; ++RI) { @@ -273,9 +292,8 @@ void LexicalScopes::getMachineBasicBlocks( return; if (Scope == CurrentFnLexicalScope) { - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; - ++I) - MBBs.insert(I); + for (const auto &MBB : *MF) + MBBs.insert(&MBB); return; } diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index bef4156..388f58f 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -19,7 +19,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "livedebug" #include "LiveDebugVariables.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/Statistic.h" @@ -41,8 +40,12 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include + using namespace llvm; +#define DEBUG_TYPE "livedebug" + static cl::opt EnableLDV("live-debug-variables", cl::init(true), cl::desc("Enable the live debug variables pass"), cl::Hidden); @@ -64,7 +67,7 @@ void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) { +LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(nullptr) { initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); } @@ -139,7 +142,7 @@ public: UserValue(const MDNode *var, unsigned o, bool i, DebugLoc L, LocMap::Allocator &alloc) : variable(var), offset(o), IsIndirect(i), dl(L), leader(this), - next(0), locInts(alloc) + next(nullptr), locInts(alloc) {} /// getLeader - Get the leader of this value's equivalence class. @@ -154,8 +157,8 @@ public: UserValue *getNext() const { return next; } /// match - Does this UserValue match the parameters? - bool match(const MDNode *Var, unsigned Offset) const { - return Var == variable && Offset == offset; + bool match(const MDNode *Var, unsigned Offset, bool indirect) const { + return Var == variable && Offset == offset && indirect == IsIndirect; } /// merge - Merge equivalence classes. @@ -292,7 +295,7 @@ class LDVImpl { bool ModifiedMF; /// userValues - All allocated UserValue instances. - SmallVector userValues; + SmallVector, 8> userValues; /// Map virtual register to eq class leader. typedef DenseMap VRMap; @@ -332,7 +335,6 @@ public: /// clear - Release all memory. void clear() { - DeleteContainerPointers(userValues); userValues.clear(); virtRegToEqClass.clear(); userVarMap.clear(); @@ -425,12 +427,13 @@ UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset, UserValue *UV = Leader->getLeader(); Leader = UV; for (; UV; UV = UV->getNext()) - if (UV->match(Var, Offset)) + if (UV->match(Var, Offset, IsIndirect)) return UV; } - UserValue *UV = new UserValue(Var, Offset, IsIndirect, DL, allocator); - userValues.push_back(UV); + userValues.push_back( + make_unique(Var, Offset, IsIndirect, DL, allocator)); + UserValue *UV = userValues.back().get(); Leader = UserValue::merge(Leader, UV); return UV; } @@ -444,7 +447,7 @@ void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) { UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { if (UserValue *UV = virtRegToEqClass.lookup(VirtReg)) return UV->getLeader(); - return 0; + return nullptr; } bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { @@ -646,14 +649,14 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, const MachineOperand &Loc = locations[LocNo]; if (!Loc.isReg()) { - extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS); + extendDef(Idx, LocNo, nullptr, nullptr, nullptr, LIS, MDT, UVS); continue; } // Register locations are constrained to where the register value is live. if (TargetRegisterInfo::isVirtualRegister(Loc.getReg())) { - LiveInterval *LI = 0; - const VNInfo *VNI = 0; + LiveInterval *LI = nullptr; + const VNInfo *VNI = nullptr; if (LIS.hasInterval(Loc.getReg())) { LI = &LIS.getInterval(Loc.getReg()); VNI = LI->getVNInfoAt(Idx); @@ -670,7 +673,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, LiveRange *LR = &LIS.getRegUnit(Unit); const VNInfo *VNI = LR->getVNInfoAt(Idx); // Don't track copies from physregs, it is too expensive. - extendDef(Idx, LocNo, LR, VNI, 0, LIS, MDT, UVS); + extendDef(Idx, LocNo, LR, VNI, nullptr, LIS, MDT, UVS); } // Finally, erase all the undefs. @@ -733,7 +736,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef NewRegs, LiveIntervals& LIS) { DEBUG({ dbgs() << "Splitting Loc" << OldLocNo << '\t'; - print(dbgs(), 0); + print(dbgs(), nullptr); }); bool DidChange = false; LocMap::iterator LocMapI; @@ -823,7 +826,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef NewRegs, } } - DEBUG({dbgs() << "Split result: \t"; print(dbgs(), 0);}); + DEBUG({dbgs() << "Split result: \t"; print(dbgs(), nullptr);}); return DidChange; } diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 3a7ac11..ce8ce96 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -331,13 +331,13 @@ LiveRange::iterator LiveRange::addSegmentFrom(Segment S, iterator From) { /// the value. If there is no live range before Kill, return NULL. VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { if (empty()) - return 0; + return nullptr; iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot()); if (I == begin()) - return 0; + return nullptr; --I; if (I->end <= StartIdx) - return 0; + return nullptr; if (I->end < Kill) extendSegmentEndTo(I, Kill); return I->valno; @@ -435,7 +435,7 @@ void LiveRange::join(LiveRange &Other, OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]]; for (iterator I = std::next(OutIt), E = end(); I != E; ++I) { VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]]; - assert(nextValNo != 0 && "Huh?"); + assert(nextValNo && "Huh?"); // If this live range has the same value # as its immediate predecessor, // and if they are neighbors, remove one Segment. This happens when we @@ -638,7 +638,7 @@ void LiveRange::verify() const { assert(I->start.isValid()); assert(I->end.isValid()); assert(I->start < I->end); - assert(I->valno != 0); + assert(I->valno != nullptr); assert(I->valno->id < valnos.size()); assert(I->valno == valnos[I->valno->id]); if (std::next(I) != E) { @@ -857,7 +857,7 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { EqClass.clear(); EqClass.grow(LI->getNumValNums()); - const VNInfo *used = 0, *unused = 0; + const VNInfo *used = nullptr, *unused = nullptr; // Determine connections. for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end(); diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index fdc673f..3563f8e 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "LiveRangeCalc.h" #include "llvm/ADT/DenseSet.h" @@ -42,6 +41,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "regalloc" + char LiveIntervals::ID = 0; char &llvm::LiveIntervalsID = LiveIntervals::ID; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", @@ -79,7 +80,7 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { } LiveIntervals::LiveIntervals() : MachineFunctionPass(ID), - DomTree(0), LRCalc(0) { + DomTree(nullptr), LRCalc(nullptr) { initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); } @@ -572,9 +573,9 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { break; } if (CancelKill) - MI->clearRegisterKills(Reg, NULL); + MI->clearRegisterKills(Reg, nullptr); else - MI->addRegisterKilled(Reg, NULL); + MI->addRegisterKilled(Reg, nullptr); } } } @@ -590,17 +591,17 @@ LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const { SlotIndex Start = LI.beginIndex(); if (Start.isBlock()) - return NULL; + return nullptr; SlotIndex Stop = LI.endIndex(); if (Stop.isBlock()) - return NULL; + return nullptr; // getMBBFromIndex doesn't need to search the MBB table when both indexes // belong to proper instructions. MachineBasicBlock *MBB1 = Indexes->getMBBFromIndex(Start); MachineBasicBlock *MBB2 = Indexes->getMBBFromIndex(Stop); - return MBB1 == MBB2 ? MBB1 : NULL; + return MBB1 == MBB2 ? MBB1 : nullptr; } bool diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index d5a81a3..d81221b 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/Support/Debug.h" @@ -23,6 +22,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + // Merge a LiveInterval's segments. Guarantee no overlaps. void LiveIntervalUnion::unify(LiveInterval &VirtReg) { @@ -138,7 +139,7 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) { } LiveInterval::iterator VirtRegEnd = VirtReg->end(); - LiveInterval *RecentReg = 0; + LiveInterval *RecentReg = nullptr; while (LiveUnionI.valid()) { assert(VirtRegI != VirtRegEnd && "Reached end of VirtReg"); @@ -200,5 +201,5 @@ void LiveIntervalUnion::Array::clear() { LIUs[i].~LiveIntervalUnion(); free(LIUs); Size = 0; - LIUs = 0; + LIUs = nullptr; } diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index ecd75b4..a558e14 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -11,13 +11,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "LiveRangeCalc.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "regalloc" + void LiveRangeCalc::reset(const MachineFunction *mf, SlotIndexes *SI, MachineDominatorTree *MDT, @@ -121,7 +122,7 @@ void LiveRangeCalc::updateLiveIns() { // The value is live-through, update LiveOut as well. // Defer the Domtree lookup until it is needed. assert(Seen.test(MBB->getNumber())); - LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)0); + LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)nullptr); } Updater.setDest(&I->LR); Updater.add(Start, End, I->Value); @@ -174,7 +175,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, // Remember if we have seen more than one value. bool UniqueVNI = true; - VNInfo *TheVNI = 0; + VNInfo *TheVNI = nullptr; // Using Seen as a visited set, perform a BFS for all reaching defs. for (unsigned i = 0; i != WorkList.size(); ++i) { @@ -251,7 +252,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, End = Kill; else LiveOut[MF->getBlockNumbered(*I)] = - LiveOutPair(TheVNI, (MachineDomTreeNode *)0); + LiveOutPair(TheVNI, nullptr); Updater.add(Start, End, TheVNI); } return true; @@ -345,7 +346,7 @@ void LiveRangeCalc::updateSSA() { VNInfo *VNI = LR.getNextValue(Start, *Alloc); I->Value = VNI; // This block is done, we know the final value. - I->DomNode = 0; + I->DomNode = nullptr; // Add liveness since updateLiveIns now skips this node. if (I->Kill.isValid()) diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h index a3a3fbb..67ab559 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/lib/CodeGen/LiveRangeCalc.h @@ -92,7 +92,7 @@ class LiveRangeCalc { VNInfo *Value; LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill) - : LR(LR), DomNode(node), Kill(kill), Value(0) {} + : LR(LR), DomNode(node), Kill(kill), Value(nullptr) {} }; /// LiveIn - Work list of blocks where the live-in value has yet to be @@ -125,7 +125,8 @@ class LiveRangeCalc { void updateLiveIns(); public: - LiveRangeCalc() : MF(0), MRI(0), Indexes(0), DomTree(0), Alloc(0) {} + LiveRangeCalc() : MF(nullptr), MRI(nullptr), Indexes(nullptr), + DomTree(nullptr), Alloc(nullptr) {} //===--------------------------------------------------------------------===// // High-level interface. @@ -203,7 +204,7 @@ public: /// addLiveInBlock(). void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) { Seen.set(MBB->getNumber()); - LiveOut[MBB] = LiveOutPair(VNI, (MachineDomTreeNode *)0); + LiveOut[MBB] = LiveOutPair(VNI, nullptr); } /// addLiveInBlock - Add a block with an unknown live-in value. This diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 891eaab..431241f 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -11,7 +11,6 @@ // is spilled or split. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CalcSpillWeights.h" @@ -24,6 +23,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE"); STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE"); STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); @@ -164,7 +165,7 @@ void LiveRangeEdit::eraseVirtReg(unsigned Reg) { bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, SmallVectorImpl &Dead) { - MachineInstr *DefMI = 0, *UseMI = 0; + MachineInstr *DefMI = nullptr, *UseMI = nullptr; // Check that there is a single def and a single use. for (MachineOperand &MO : MRI.reg_nodbg_operands(LI->reg)) { @@ -197,7 +198,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, // We also need to make sure it is safe to move the load. // Assume there are stores between DefMI and UseMI. bool SawStore = true; - if (!DefMI->isSafeToMove(&TII, 0, SawStore)) + if (!DefMI->isSafeToMove(&TII, nullptr, SawStore)) return false; DEBUG(dbgs() << "Try to fold single def: " << *DefMI @@ -213,7 +214,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, DEBUG(dbgs() << " folded: " << *FoldMI); LIS.ReplaceMachineInstrInMaps(UseMI, FoldMI); UseMI->eraseFromParent(); - DefMI->addRegisterDead(LI->reg, 0); + DefMI->addRegisterDead(LI->reg, nullptr); Dead.push_back(DefMI); ++NumDCEFoldedLoads; return true; @@ -236,7 +237,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { // Use the same criteria as DeadMachineInstructionElim. bool SawStore = false; - if (!MI->isSafeToMove(&TII, 0, SawStore)) { + if (!MI->isSafeToMove(&TII, nullptr, SawStore)) { DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); return; } diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index 7f797be..de2ce22 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/LiveRegMatrix.h" #include "RegisterCoalescer.h" #include "llvm/ADT/Statistic.h" @@ -25,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(NumAssigned , "Number of registers assigned"); STATISTIC(NumUnassigned , "Number of registers unassigned"); diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index be11a8f..b3161a4 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "livestacks" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -24,6 +23,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "livestacks" + char LiveStacks::ID = 0; INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks", "Live Stack Slot Analysis", false, false) diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index ed55d7a..758b216 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -61,7 +61,7 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { for (unsigned i = 0, e = Kills.size(); i != e; ++i) if (Kills[i]->getParent() == MBB) return Kills[i]; - return NULL; + return nullptr; } void LiveVariables::VarInfo::dump() const { @@ -193,7 +193,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, SmallSet &PartDefRegs) { unsigned LastDefReg = 0; unsigned LastDefDist = 0; - MachineInstr *LastDef = NULL; + MachineInstr *LastDef = nullptr; for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { unsigned SubReg = *SubRegs; MachineInstr *Def = PhysRegDef[SubReg]; @@ -208,7 +208,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, } if (!LastDef) - return 0; + return nullptr; PartDefRegs.insert(LastDefReg); for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) { @@ -282,7 +282,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { MachineInstr *LastDef = PhysRegDef[Reg]; MachineInstr *LastUse = PhysRegUse[Reg]; if (!LastDef && !LastUse) - return 0; + return nullptr; MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; @@ -333,7 +333,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { // AX = AL // = AL // AX = - MachineInstr *LastPartDef = 0; + MachineInstr *LastPartDef = nullptr; unsigned LastPartDefDist = 0; SmallSet PartUses; for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { @@ -436,7 +436,7 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) { for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR)) Super = *SR; - HandlePhysRegKill(Super, 0); + HandlePhysRegKill(Super, nullptr); } } @@ -492,7 +492,7 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, SubRegs.isValid(); ++SubRegs) { unsigned SubReg = *SubRegs; PhysRegDef[SubReg] = MI; - PhysRegUse[SubReg] = NULL; + PhysRegUse[SubReg] = nullptr; } } } @@ -506,8 +506,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { PhysRegDef = new MachineInstr*[NumRegs]; PhysRegUse = new MachineInstr*[NumRegs]; PHIVarInfo = new SmallVector[MF->getNumBlockIDs()]; - std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); - std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); + std::fill(PhysRegDef, PhysRegDef + NumRegs, nullptr); + std::fill(PhysRegUse, PhysRegUse + NumRegs, nullptr); PHIJoins.clear(); // FIXME: LiveIntervals will be updated to remove its dependence on @@ -536,7 +536,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { EE = MBB->livein_end(); II != EE; ++II) { assert(TargetRegisterInfo::isPhysicalRegister(*II) && "Cannot have a live-in virtual register!"); - HandlePhysRegDef(*II, 0, Defs); + HandlePhysRegDef(*II, nullptr, Defs); } // Loop over all of the instructions, processing them. @@ -639,10 +639,10 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // available at the end of the basic block. for (unsigned i = 0; i != NumRegs; ++i) if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i)) - HandlePhysRegDef(i, 0, Defs); + HandlePhysRegDef(i, nullptr, Defs); - std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0); - std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0); + std::fill(PhysRegDef, PhysRegDef + NumRegs, nullptr); + std::fill(PhysRegUse, PhysRegUse + NumRegs, nullptr); } // Convert and transfer the dead / killed information we have gathered into @@ -701,14 +701,15 @@ void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) { /// which is used in a PHI node. We map that to the BB the vreg is coming from. /// void LiveVariables::analyzePHINodes(const MachineFunction& Fn) { - for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end(); - I != E; ++I) - for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->isPHI(); ++BBI) - for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) - if (BBI->getOperand(i).readsReg()) - PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()] - .push_back(BBI->getOperand(i).getReg()); + for (const auto &MBB : Fn) + for (const auto &BBI : MBB) { + if (!BBI.isPHI()) + break; + for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) + if (BBI.getOperand(i).readsReg()) + PHIVarInfo[BBI.getOperand(i + 1).getMBB()->getNumber()] + .push_back(BBI.getOperand(i).getReg()); + } } bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB, diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index 122d467..36885e8 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "localstackalloc" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -40,6 +39,8 @@ using namespace llvm; +#define DEBUG_TYPE "localstackalloc" + STATISTIC(NumAllocations, "Number of frame indices allocated into local block"); STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated"); STATISTIC(NumReplacements, "Number of frame indices references replaced"); diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 888c20e..0ec5c33 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -35,9 +35,11 @@ #include using namespace llvm; +#define DEBUG_TYPE "codegen" + MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb) : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false), - AddressTaken(false), CachedMCSymbol(NULL) { + AddressTaken(false), CachedMCSymbol(nullptr) { Insts.Parent = this; } @@ -98,7 +100,7 @@ void ilist_traits::removeNodeFromList(MachineBasicBlock *N) { /// list, we update its parent pointer and add its operands from reg use/def /// lists if appropriate. void ilist_traits::addNodeToList(MachineInstr *N) { - assert(N->getParent() == 0 && "machine instruction already in a basic block"); + assert(!N->getParent() && "machine instruction already in a basic block"); N->setParent(Parent); // Add the instruction's register operands to their corresponding @@ -113,13 +115,13 @@ void ilist_traits::addNodeToList(MachineInstr *N) { /// list, we update its parent pointer and remove its operands from reg use/def /// lists if appropriate. void ilist_traits::removeNodeFromList(MachineInstr *N) { - assert(N->getParent() != 0 && "machine instruction not in a basic block"); + assert(N->getParent() && "machine instruction not in a basic block"); // Remove from the use/def lists. if (MachineFunction *MF = N->getParent()->getParent()) N->RemoveRegOperandsFromUseLists(MF->getRegInfo()); - N->setParent(0); + N->setParent(nullptr); LeakDetector::addGarbageObject(N); } @@ -229,11 +231,11 @@ MachineBasicBlock::getLastNonDebugInstr() const { const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const { // A block with a landing pad successor only has one other successor. if (succ_size() > 2) - return 0; + return nullptr; for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I) if ((*I)->isLandingPad()) return *I; - return 0; + return nullptr; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -392,7 +394,7 @@ void MachineBasicBlock::updateTerminator() { // A block with no successors has no concerns with fall-through edges. if (this->succ_empty()) return; - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; DebugLoc dl; // FIXME: this is nowhere bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond); @@ -423,7 +425,7 @@ void MachineBasicBlock::updateTerminator() { // Finally update the unconditional successor to be reached via a branch // if it would not be reached by fallthrough. if (!isLayoutSuccessor(TBB)) - TII->InsertBranch(*this, TBB, 0, Cond, dl); + TII->InsertBranch(*this, TBB, nullptr, Cond, dl); } } else { if (FBB) { @@ -434,16 +436,16 @@ void MachineBasicBlock::updateTerminator() { if (TII->ReverseBranchCondition(Cond)) return; TII->RemoveBranch(*this); - TII->InsertBranch(*this, FBB, 0, Cond, dl); + TII->InsertBranch(*this, FBB, nullptr, Cond, dl); } else if (isLayoutSuccessor(FBB)) { TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, 0, Cond, dl); + TII->InsertBranch(*this, TBB, nullptr, Cond, dl); } } else { // Walk through the successors and find the successor which is not // a landing pad and is not the conditional branch destination (in TBB) // as the fallthrough successor. - MachineBasicBlock *FallthroughBB = 0; + MachineBasicBlock *FallthroughBB = nullptr; for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { if ((*SI)->isLandingPad() || *SI == TBB) continue; @@ -461,7 +463,7 @@ void MachineBasicBlock::updateTerminator() { // Finally update the unconditional successor to be reached via a branch // if it would not be reached by fallthrough. if (!isLayoutSuccessor(TBB)) - TII->InsertBranch(*this, TBB, 0, Cond, dl); + TII->InsertBranch(*this, TBB, nullptr, Cond, dl); return; } @@ -470,11 +472,11 @@ void MachineBasicBlock::updateTerminator() { if (TII->ReverseBranchCondition(Cond)) { // We can't reverse the condition, add an unconditional branch. Cond.clear(); - TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl); + TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, dl); return; } TII->RemoveBranch(*this); - TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl); + TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, dl); } else if (!isLayoutSuccessor(FallthroughBB)) { TII->RemoveBranch(*this); TII->InsertBranch(*this, TBB, FallthroughBB, Cond, dl); @@ -641,7 +643,7 @@ bool MachineBasicBlock::canFallThrough() { return false; // Analyze the branches, if any, at the end of the block. - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo(); if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) { @@ -654,7 +656,7 @@ bool MachineBasicBlock::canFallThrough() { } // If there is no branch, control always falls through. - if (TBB == 0) return true; + if (!TBB) return true; // If there is some explicit branch to the fallthrough block, it can obviously // reach, even though the branch should get folded to fall through implicitly. @@ -668,7 +670,7 @@ bool MachineBasicBlock::canFallThrough() { // Otherwise, if it is conditional and has no explicit false block, it falls // through. - return FBB == 0; + return FBB == nullptr; } MachineBasicBlock * @@ -676,7 +678,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Splitting the critical edge to a landing pad block is non-trivial. Don't do // it in this generic function. if (Succ->isLandingPad()) - return NULL; + return nullptr; MachineFunction *MF = getParent(); DebugLoc dl; // FIXME: this is nowhere @@ -684,15 +686,15 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { // Performance might be harmed on HW that implements branching using exec mask // where both sides of the branches are always executed. if (MF->getTarget().requiresStructuredCFG()) - return NULL; + return nullptr; // We may need to update this's terminator, but we can't do that if // AnalyzeBranch fails. If this uses a jump table, we won't touch it. const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) - return NULL; + return nullptr; // Avoid bugpoint weirdness: A block may end with a conditional branch but // jumps to the same MBB is either case. We have duplicate CFG edges in that @@ -701,7 +703,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (TBB && TBB == FBB) { DEBUG(dbgs() << "Won't split critical edge after degenerate BB#" << getNumber() << '\n'); - return NULL; + return nullptr; } MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); @@ -793,7 +795,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { NMBB->addSuccessor(Succ); if (!NMBB->isLayoutSuccessor(Succ)) { Cond.clear(); - MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl); + MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond, dl); if (Indexes) { for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end(); @@ -1065,11 +1067,11 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, MachineFunction::iterator FallThru = std::next(MachineFunction::iterator(this)); - if (DestA == 0 && DestB == 0) { + if (!DestA && !DestB) { // Block falls through to successor. DestA = FallThru; DestB = FallThru; - } else if (DestA != 0 && DestB == 0) { + } else if (DestA && !DestB) { if (isCond) // Block ends in conditional jump that falls through to successor. DestB = FallThru; diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 13203d5..9151d99 100644 --- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -1,4 +1,4 @@ -//====------ MachineBlockFrequencyInfo.cpp - MBB Frequency Analysis ------====// +//===- MachineBlockFrequencyInfo.cpp - MBB Frequency Analysis -------------===// // // The LLVM Compiler Infrastructure // @@ -12,8 +12,10 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" @@ -22,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "block-freq" + #ifndef NDEBUG enum GVDAGType { GVDT_None, @@ -112,6 +116,7 @@ struct DOTGraphTraits : INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq", "Machine Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq", "Machine Block Frequency Analysis", true, true) @@ -127,16 +132,18 @@ MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() {} void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + AU.addRequired(); AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { MachineBranchProbabilityInfo &MBPI = - getAnalysis(); + getAnalysis(); + MachineLoopInfo &MLI = getAnalysis(); if (!MBFI) MBFI.reset(new ImplType); - MBFI->doFunction(&F, &MBPI); + MBFI->doFunction(&F, &MBPI, &MLI); #ifndef NDEBUG if (ViewMachineBlockFreqPropagationDAG != GVDT_None) { view(); @@ -166,7 +173,7 @@ getBlockFreq(const MachineBasicBlock *MBB) const { } const MachineFunction *MachineBlockFrequencyInfo::getFunction() const { - return MBFI ? MBFI->Fn : nullptr; + return MBFI ? MBFI->getFunction() : nullptr; } raw_ostream & diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 771e7ce..74af1e2 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -25,7 +25,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "block-placement2" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -46,6 +45,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "block-placement2" + STATISTIC(NumCondBranches, "Number of conditional branches"); STATISTIC(NumUncondBranches, "Number of uncondittional branches"); STATISTIC(CondBranchTakenFreq, @@ -206,7 +207,7 @@ class MachineBlockPlacement : public MachineFunctionPass { void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, SmallVectorImpl &BlockWorkList, - const BlockFilterSet *BlockFilter = 0); + const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter); @@ -220,7 +221,7 @@ class MachineBlockPlacement : public MachineFunctionPass { const BlockFilterSet *BlockFilter); void buildChain(MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl &BlockWorkList, - const BlockFilterSet *BlockFilter = 0); + const BlockFilterSet *BlockFilter = nullptr); MachineBasicBlock *findBestLoopTop(MachineLoop &L, const BlockFilterSet &LoopBlockSet); MachineBasicBlock *findBestLoopExit(MachineFunction &F, @@ -334,7 +335,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( const BlockFilterSet *BlockFilter) { const BranchProbability HotProb(4, 5); // 80% - MachineBasicBlock *BestSucc = 0; + MachineBasicBlock *BestSucc = nullptr; // FIXME: Due to the performance of the probability and weight routines in // the MBPI analysis, we manually compute probabilities using the edge // weights. This is suboptimal as it means that the somewhat subtle @@ -432,7 +433,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( }), WorkList.end()); - MachineBasicBlock *BestBlock = 0; + MachineBasicBlock *BestBlock = nullptr; BlockFrequency BestFreq; for (SmallVectorImpl::iterator WBI = WorkList.begin(), WBE = WorkList.end(); @@ -479,7 +480,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( return *BlockToChain[I]->begin(); } } - return 0; + return nullptr; } void MachineBlockPlacement::buildChain( @@ -560,7 +561,7 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L, << getBlockName(L.getHeader()) << "\n"); BlockFrequency BestPredFreq; - MachineBasicBlock *BestPred = 0; + MachineBasicBlock *BestPred = nullptr; for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(), PE = L.getHeader()->pred_end(); PI != PE; ++PI) { @@ -616,11 +617,11 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, // header and only rotate if safe. BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; if (!LoopBlockSet.count(*HeaderChain.begin())) - return 0; + return nullptr; BlockFrequency BestExitEdgeFreq; unsigned BestExitLoopDepth = 0; - MachineBasicBlock *ExitingBB = 0; + MachineBasicBlock *ExitingBB = nullptr; // If there are exits to outer loops, loop rotation can severely limit // fallthrough opportunites unless it selects such an exit. Keep a set of // blocks where rotating to exit with that block will reach an outer loop. @@ -709,14 +710,14 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, // Without a candidate exiting block or with only a single block in the // loop, just use the loop header to layout the loop. if (!ExitingBB || L.getNumBlocks() == 1) - return 0; + return nullptr; // Also, if we have exit blocks which lead to outer loops but didn't select // one of them as the exiting block we are rotating toward, disable loop // rotation altogether. if (!BlocksExitingToOuterLoop.empty() && !BlocksExitingToOuterLoop.count(ExitingBB)) - return 0; + return nullptr; DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n"); return ExitingBB; @@ -795,7 +796,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, // If we selected just the header for the loop top, look for a potentially // profitable exit block in the event that rotating the loop can eliminate // branches by placing an exit edge at the bottom. - MachineBasicBlock *ExitingBB = 0; + MachineBasicBlock *ExitingBB = nullptr; if (LoopTop == L.getHeader()) ExitingBB = findBestLoopExit(F, L, LoopBlockSet); @@ -883,7 +884,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // the exact fallthrough behavior for. for (;;) { Cond.clear(); - MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) break; @@ -895,7 +896,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: " << getBlockName(BB) << " -> " << getBlockName(NextBB) << "\n"); - Chain->merge(NextBB, 0); + Chain->merge(NextBB, nullptr); FI = NextFI; BB = NextBB; } @@ -987,7 +988,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // than assert when the branch cannot be analyzed in order to remove this // boiler plate. Cond.clear(); - MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { // The "PrevBB" is not yet updated to reflect current code layout, so, // o. it may fall-through to a block without explict "goto" instruction @@ -1004,10 +1005,10 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { PrevBB->updateTerminator(); needUpdateBr = false; Cond.clear(); - TBB = FBB = 0; + TBB = FBB = nullptr; if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { // FIXME: This should never take place. - TBB = FBB = 0; + TBB = FBB = nullptr; } } @@ -1032,7 +1033,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Fixup the last block. Cond.clear(); - MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond)) F.back().updateTerminator(); diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 1d6879b..6fbc2be 100644 --- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -88,7 +88,7 @@ MachineBranchProbabilityInfo::isEdgeHot(const MachineBasicBlock *Src, MachineBasicBlock * MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { uint32_t MaxWeight = 0; - MachineBasicBlock *MaxSucc = 0; + MachineBasicBlock *MaxSucc = nullptr; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { uint32_t Weight = getEdgeWeight(MBB, I); @@ -101,7 +101,7 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5)) return MaxSucc; - return 0; + return nullptr; } BranchProbability MachineBranchProbabilityInfo::getEdgeProbability( diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 9c3bcc4..7da439c 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "machine-cse" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopedHashTable.h" @@ -28,6 +27,8 @@ #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; +#define DEBUG_TYPE "machine-cse" + STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumCSEs, "Number of common subexpression eliminated"); STATISTIC(NumPhysCSEs, diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 7e1970c..3119a35 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "codegen-cp" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" @@ -28,6 +27,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "codegen-cp" + STATISTIC(NumDeletes, "Number of dead copies deleted"); namespace { diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 061efdb..eb3d71f 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -38,6 +38,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "codegen" + //===----------------------------------------------------------------------===// // MachineFunction implementation //===----------------------------------------------------------------------===// @@ -56,9 +58,9 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, if (TM.getRegisterInfo()) RegInfo = new (Allocator) MachineRegisterInfo(TM); else - RegInfo = 0; + RegInfo = nullptr; - MFInfo = 0; + MFInfo = nullptr; FrameInfo = new (Allocator) MachineFrameInfo(TM,!F->hasFnAttribute("no-realign-stack")); @@ -77,7 +79,7 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, TM.getTargetLowering()->getPrefFunctionAlignment()); FunctionNumber = FunctionNum; - JumpTableInfo = 0; + JumpTableInfo = nullptr; } MachineFunction::~MachineFunction() { @@ -123,6 +125,11 @@ getOrCreateJumpTableInfo(unsigned EntryKind) { return JumpTableInfo; } +/// Should we be emitting segmented stack stuff for the function +bool MachineFunction::shouldSplitStack() { + return getFunction()->hasFnAttribute("split-stack"); +} + /// RenumberBlocks - This discards all of the MachineBasicBlock numbers and /// recomputes them. This guarantees that the MBB numbers are sequential, /// dense, and match the ordering of the blocks within the function. If a @@ -131,7 +138,7 @@ getOrCreateJumpTableInfo(unsigned EntryKind) { void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { if (empty()) { MBBNumbering.clear(); return; } MachineFunction::iterator MBBI, E = end(); - if (MBB == 0) + if (MBB == nullptr) MBBI = begin(); else MBBI = MBB; @@ -147,7 +154,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { if (MBBI->getNumber() != -1) { assert(MBBNumbering[MBBI->getNumber()] == &*MBBI && "MBB number mismatch!"); - MBBNumbering[MBBI->getNumber()] = 0; + MBBNumbering[MBBI->getNumber()] = nullptr; } // If BlockNo is already taken, set that block's number to -1. @@ -231,11 +238,17 @@ MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, MachineMemOperand * MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size) { + if (MMO->getValue()) + return new (Allocator) + MachineMemOperand(MachinePointerInfo(MMO->getValue(), + MMO->getOffset()+Offset), + MMO->getFlags(), Size, + MMO->getBaseAlignment(), nullptr); return new (Allocator) - MachineMemOperand(MachinePointerInfo(MMO->getValue(), + MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(), MMO->getOffset()+Offset), MMO->getFlags(), Size, - MMO->getBaseAlignment(), 0); + MMO->getBaseAlignment(), nullptr); } MachineInstr::mmo_iterator @@ -352,9 +365,9 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { OS << '\n'; } - for (const_iterator BB = begin(), E = end(); BB != E; ++BB) { + for (const auto &BB : *this) { OS << '\n'; - BB->print(OS, Indexes); + BB.print(OS, Indexes); } OS << "\n# End machine code for function " << getName() << ".\n\n"; @@ -564,7 +577,7 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, Align, getFrameLowering()->getStackAlignment()); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, - /*Alloca*/ 0)); + /*Alloca*/ nullptr)); return -++NumFixedObjects; } @@ -583,7 +596,7 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { if (!isCalleeSavedInfoValid()) return BV; - for (const uint16_t *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR) + for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR) BV.set(*CSR); // The entry MBB always has all CSRs pristine. diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp index 35591e1..46cd60a 100644 --- a/lib/CodeGen/MachineFunctionAnalysis.cpp +++ b/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -20,7 +20,7 @@ using namespace llvm; char MachineFunctionAnalysis::ID = 0; MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm) : - FunctionPass(ID), TM(tm), MF(0) { + FunctionPass(ID), TM(tm), MF(nullptr) { initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); } @@ -53,5 +53,5 @@ bool MachineFunctionAnalysis::runOnFunction(Function &F) { void MachineFunctionAnalysis::releaseMemory() { delete MF; - MF = 0; + MF = nullptr; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index d102794..5122165 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -128,7 +128,7 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) { void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, bool isKill, bool isDead, bool isUndef, bool isDebug) { - MachineRegisterInfo *RegInfo = 0; + MachineRegisterInfo *RegInfo = nullptr; if (MachineInstr *MI = getParent()) if (MachineBasicBlock *MBB = MI->getParent()) if (MachineFunction *MF = MBB->getParent()) @@ -152,7 +152,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, IsEarlyClobber = false; IsDebug = isDebug; // Ensure isOnRegUseList() returns false. - Contents.Reg.Prev = 0; + Contents.Reg.Prev = nullptr; // Preserve the tie when the operand was already a register. if (!WasReg) TiedTo = 0; @@ -265,7 +265,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { if (const MachineBasicBlock *MBB = MI->getParent()) if (const MachineFunction *MF = MBB->getParent()) TM = &MF->getTarget(); - const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0; + const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : nullptr; switch (getType()) { case MachineOperand::MO_Register: @@ -399,8 +399,8 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const { /// getAddrSpace - Return the LLVM IR address space number that this pointer /// points into. unsigned MachinePointerInfo::getAddrSpace() const { - if (V == 0) return 0; - return cast(V->getType())->getAddressSpace(); + if (V.isNull() || V.is()) return 0; + return cast(V.get()->getType())->getAddressSpace(); } /// getConstantPool - Return a MachinePointerInfo record that refers to the @@ -434,7 +434,8 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f, : PtrInfo(ptrinfo), Size(s), Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)), TBAAInfo(TBAAInfo), Ranges(Ranges) { - assert((PtrInfo.V == 0 || isa(PtrInfo.V->getType())) && + assert((PtrInfo.V.isNull() || PtrInfo.V.is() || + isa(PtrInfo.V.get()->getType())) && "invalid pointer value"); assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); assert((isLoad() || isStore()) && "Not a load/store!"); @@ -445,7 +446,7 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f, void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { ID.AddInteger(getOffset()); ID.AddInteger(Size); - ID.AddPointer(getValue()); + ID.AddPointer(getOpaqueValue()); ID.AddInteger(Flags); } @@ -486,10 +487,12 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { // Print the address information. OS << "["; - if (!MMO.getValue()) - OS << ""; + if (const Value *V = MMO.getValue()) + V->printAsOperand(OS, /*PrintType=*/false); + else if (const PseudoSourceValue *PSV = MMO.getPseudoValue()) + PSV->printCustom(OS); else - MMO.getValue()->printAsOperand(OS, /*PrintType=*/false); + OS << ""; unsigned AS = MMO.getAddrSpace(); if (AS != 0) @@ -545,9 +548,9 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { /// the MCInstrDesc. MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, const DebugLoc dl, bool NoImp) - : MCID(&tid), Parent(0), Operands(0), NumOperands(0), + : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0), AsmPrinterFlags(0), - NumMemRefs(0), MemRefs(0), debugLoc(dl) { + NumMemRefs(0), MemRefs(nullptr), debugLoc(dl) { // Reserve space for the expected number of operands. if (unsigned NumOps = MCID->getNumOperands() + MCID->getNumImplicitDefs() + MCID->getNumImplicitUses()) { @@ -562,7 +565,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : MCID(&MI.getDesc()), Parent(0), Operands(0), NumOperands(0), + : MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0), AsmPrinterFlags(0), NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc()) { @@ -583,7 +586,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) MachineRegisterInfo *MachineInstr::getRegInfo() { if (MachineBasicBlock *MBB = getParent()) return &MBB->getParent()->getRegInfo(); - return 0; + return nullptr; } /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in @@ -702,7 +705,7 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) { // When adding a register operand, tell MRI about it. if (NewMO->isReg()) { // Ensure isOnRegUseList() returns false, regardless of Op's status. - NewMO->Contents.Reg.Prev = 0; + NewMO->Contents.Reg.Prev = nullptr; // Ignore existing ties. This is not a property that can be copied. NewMO->TiedTo = 0; // Add the new operand to MRI, but only for instructions in an MBB. @@ -974,7 +977,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, return TII->getRegClass(getDesc(), OpIdx, TRI, MF); if (!getOperand(OpIdx).isReg()) - return NULL; + return nullptr; // For tied uses on inline asm, get the constraint from the def. unsigned DefIdx; @@ -984,7 +987,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, // Inline asm stores register class constraints in the flag word. int FlagIdx = findInlineAsmFlagIdx(OpIdx); if (FlagIdx < 0) - return NULL; + return nullptr; unsigned Flag = getOperand(FlagIdx).getImm(); unsigned RCID; @@ -995,7 +998,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, if (InlineAsm::getKind(Flag) == InlineAsm::Kind_Mem) return TRI->getPointerRegClass(MF); - return NULL; + return nullptr; } const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg( @@ -1366,11 +1369,13 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { if ((*I)->isStore()) return false; if ((*I)->isInvariant()) return true; + + // A load from a constant PseudoSourceValue is invariant. + if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) + if (PSV->isConstant(MFI)) + continue; + if (const Value *V = (*I)->getValue()) { - // A load from a constant PseudoSourceValue is invariant. - if (const PseudoSourceValue *PSV = dyn_cast(V)) - if (PSV->isConstant(MFI)) - continue; // If we have an AliasAnalysis, ask it whether the memory is constant. if (AA && AA->pointsToConstantMemory( AliasAnalysis::Location(V, (*I)->getSize(), @@ -1448,32 +1453,14 @@ void MachineInstr::dump() const { static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, raw_ostream &CommentOS) { const LLVMContext &Ctx = MF->getFunction()->getContext(); - if (!DL.isUnknown()) { // Print source line info. - DIScope Scope(DL.getScope(Ctx)); - assert((!Scope || Scope.isScope()) && - "Scope of a DebugLoc should be null or a DIScope."); - // Omit the directory, because it's likely to be long and uninteresting. - if (Scope) - CommentOS << Scope.getFilename(); - else - CommentOS << ""; - CommentOS << ':' << DL.getLine(); - if (DL.getCol() != 0) - CommentOS << ':' << DL.getCol(); - DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); - if (!InlinedAtDL.isUnknown()) { - CommentOS << " @[ "; - printDebugLoc(InlinedAtDL, MF, CommentOS); - CommentOS << " ]"; - } - } + DL.print(Ctx, CommentOS); } void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, bool SkipOpers) const { // We can be a bit tidier if we know the TargetMachine and/or MachineFunction. - const MachineFunction *MF = 0; - const MachineRegisterInfo *MRI = 0; + const MachineFunction *MF = nullptr; + const MachineRegisterInfo *MRI = nullptr; if (const MachineBasicBlock *MBB = getParent()) { MF = MBB->getParent(); if (!TM && MF) @@ -1679,7 +1666,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, OS << " line no:" << DV.getLineNumber(); if (MDNode *InlinedAt = DV.getInlinedAt()) { DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt); - if (!InlinedAtDL.isUnknown()) { + if (!InlinedAtDL.isUnknown() && MF) { OS << " inlined @[ "; printDebugLoc(InlinedAtDL, MF, OS); OS << " ]"; @@ -1756,7 +1743,7 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg, void MachineInstr::clearRegisterKills(unsigned Reg, const TargetRegisterInfo *RegInfo) { if (!TargetRegisterInfo::isPhysicalRegister(Reg)) - RegInfo = 0; + RegInfo = nullptr; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { MachineOperand &MO = getOperand(i); if (!MO.isReg() || !MO.isUse() || !MO.isKill()) @@ -1889,7 +1876,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { void MachineInstr::emitError(StringRef Msg) const { // Find the source location cookie. unsigned LocCookie = 0; - const MDNode *LocMD = 0; + const MDNode *LocMD = nullptr; for (unsigned i = getNumOperands(); i != 0; --i) { if (getOperand(i-1).isMetadata() && (LocMD = getOperand(i-1).getMetadata()) && diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index d3a1ee7..68d2efd 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -20,7 +20,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "machine-licm" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" @@ -42,6 +41,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "machine-licm" + static cl::opt AvoidSpeculation("avoid-speculation", cl::desc("MachineLICM should avoid speculation"), @@ -358,7 +359,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { SmallVector Worklist(MLI->begin(), MLI->end()); while (!Worklist.empty()) { CurLoop = Worklist.pop_back_val(); - CurPreheader = 0; + CurPreheader = nullptr; ExitBlocks.clear(); // If this is done before regalloc, only visit outer-most preheader-sporting @@ -390,10 +391,10 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), oe = MI->memoperands_end(); o != oe; ++o) { - if (!(*o)->isStore() || !(*o)->getValue()) + if (!(*o)->isStore() || !(*o)->getPseudoValue()) continue; if (const FixedStackPseudoSourceValue *Value = - dyn_cast((*o)->getValue())) { + dyn_cast((*o)->getPseudoValue())) { if (Value->getFrameIndex() == FI) return true; } @@ -700,7 +701,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { WorkList.push_back(HeaderN); do { MachineDomTreeNode *Node = WorkList.pop_back_val(); - assert(Node != 0 && "Null dominator tree node?"); + assert(Node && "Null dominator tree node?"); MachineBasicBlock *BB = Node->getBlock(); // If the header of the loop containing this basic block is a landing pad, @@ -804,7 +805,7 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { // defs as well. This happens whenever the preheader is created by splitting // the critical edge from the loop predecessor to the loop header. if (BB->pred_size() == 1) { - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) InitRegPressure(*BB->pred_begin()); @@ -882,10 +883,9 @@ static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { assert (MI.mayLoad() && "Expected MI that loads!"); for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), E = MI.memoperands_end(); I != E; ++I) { - if (const Value *V = (*I)->getValue()) { - if (const PseudoSourceValue *PSV = dyn_cast(V)) - if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) - return true; + if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) { + if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) + return true; } } return false; @@ -1241,13 +1241,13 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { // Don't unfold simple loads. if (MI->canFoldAsLoad()) - return 0; + return nullptr; // If not, we may be able to unfold a load and hoist that. // First test whether the instruction is loading from an amenable // memory location. if (!MI->isInvariantLoad(AA)) - return 0; + return nullptr; // Next determine the register class for a temporary register. unsigned LoadRegIndex; @@ -1256,9 +1256,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { /*UnfoldLoad=*/true, /*UnfoldStore=*/false, &LoadRegIndex); - if (NewOpc == 0) return 0; + if (NewOpc == 0) return nullptr; const MCInstrDesc &MID = TII->get(NewOpc); - if (MID.getNumDefs() != 1) return 0; + if (MID.getNumDefs() != 1) return nullptr; MachineFunction &MF = *MI->getParent()->getParent(); const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); // Ok, we're unfolding. Create a temporary register and do the unfold. @@ -1284,7 +1284,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { NewMIs[0]->eraseFromParent(); NewMIs[1]->eraseFromParent(); - return 0; + return nullptr; } // Update register pressure for the unfolded instruction. @@ -1316,10 +1316,10 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI, std::vector &PrevMIs) { for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { const MachineInstr *PrevMI = PrevMIs[i]; - if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0))) + if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr))) return PrevMI; } - return 0; + return nullptr; } bool MachineLICM::EliminateCSE(MachineInstr *MI, @@ -1390,7 +1390,7 @@ bool MachineLICM::MayCSE(MachineInstr *MI) { if (CI == CSEMap.end() || MI->isImplicitDef()) return false; - return LookForDuplicate(MI, CI->second) != 0; + return LookForDuplicate(MI, CI->second) != nullptr; } /// Hoist - When an instruction is found to use only loop invariant operands @@ -1466,7 +1466,7 @@ MachineBasicBlock *MachineLICM::getCurPreheader() { // If we've tried to get a preheader and failed, don't try again. if (CurPreheader == reinterpret_cast(-1)) - return 0; + return nullptr; if (!CurPreheader) { CurPreheader = CurLoop->getLoopPreheader(); @@ -1474,13 +1474,13 @@ MachineBasicBlock *MachineLICM::getCurPreheader() { MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); if (!Pred) { CurPreheader = reinterpret_cast(-1); - return 0; + return nullptr; } CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); if (!CurPreheader) { CurPreheader = reinterpret_cast(-1); - return 0; + return nullptr; } } } diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 7181025..4976e35 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -36,8 +36,8 @@ namespace llvm { class MMIAddrLabelMapCallbackPtr : CallbackVH { MMIAddrLabelMap *Map; public: - MMIAddrLabelMapCallbackPtr() : Map(0) {} - MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {} + MMIAddrLabelMapCallbackPtr() : Map(nullptr) {} + MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(nullptr) {} void setPtr(BasicBlock *BB) { ValueHandleBase::operator=(BB); @@ -163,9 +163,9 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { AddrLabelSymEntry Entry = AddrLabelSymbols[BB]; AddrLabelSymbols.erase(BB); assert(!Entry.Symbols.isNull() && "Didn't have a symbol, why a callback?"); - BBCallbacks[Entry.Index] = 0; // Clear the callback. + BBCallbacks[Entry.Index] = nullptr; // Clear the callback. - assert((BB->getParent() == 0 || BB->getParent() == Entry.Fn) && + assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) && "Block/parent mismatch"); // Handle both the single and the multiple symbols cases. @@ -213,7 +213,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { return; } - BBCallbacks[OldEntry.Index] = 0; // Update the callback. + BBCallbacks[OldEntry.Index] = nullptr; // Update the callback. // Otherwise, we need to add the old symbol to the new block's set. If it is // just a single entry, upgrade it to a symbol list. @@ -253,12 +253,12 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, const MCObjectFileInfo *MOFI) - : ImmutablePass(ID), Context(&MAI, &MRI, MOFI, 0, false) { + : ImmutablePass(ID), Context(&MAI, &MRI, MOFI, nullptr, false) { initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); } MachineModuleInfo::MachineModuleInfo() - : ImmutablePass(ID), Context(0, 0, 0) { + : ImmutablePass(ID), Context(nullptr, nullptr, nullptr) { llvm_unreachable("This MachineModuleInfo constructor should never be called, " "MMI should always be explicitly constructed by " "LLVMTargetMachine"); @@ -269,16 +269,16 @@ MachineModuleInfo::~MachineModuleInfo() { bool MachineModuleInfo::doInitialization(Module &M) { - ObjFileMMI = 0; + ObjFileMMI = nullptr; CompactUnwindEncoding = 0; CurCallSite = 0; CallsEHReturn = 0; CallsUnwindInit = 0; DbgInfoAvailable = UsesVAFloatArgument = false; // Always emit some info, by default "no personality" info. - Personalities.push_back(NULL); - AddrLabelSymbols = 0; - TheModule = 0; + Personalities.push_back(nullptr); + AddrLabelSymbols = nullptr; + TheModule = nullptr; return false; } @@ -288,12 +288,12 @@ bool MachineModuleInfo::doFinalization(Module &M) { Personalities.clear(); delete AddrLabelSymbols; - AddrLabelSymbols = 0; + AddrLabelSymbols = nullptr; Context.reset(); delete ObjFileMMI; - ObjFileMMI = 0; + ObjFileMMI = nullptr; return false; } @@ -341,7 +341,7 @@ void MachineModuleInfo::AnalyzeModule(const Module &M) { /// because the block may be accessed outside its containing function. MCSymbol *MachineModuleInfo::getAddrLabelSymbol(const BasicBlock *BB) { // Lazily create AddrLabelSymbols. - if (AddrLabelSymbols == 0) + if (!AddrLabelSymbols) AddrLabelSymbols = new MMIAddrLabelMap(Context); return AddrLabelSymbols->getAddrLabelSymbol(const_cast(BB)); } @@ -352,7 +352,7 @@ MCSymbol *MachineModuleInfo::getAddrLabelSymbol(const BasicBlock *BB) { std::vector MachineModuleInfo:: getAddrLabelSymbolToEmit(const BasicBlock *BB) { // Lazily create AddrLabelSymbols. - if (AddrLabelSymbols == 0) + if (!AddrLabelSymbols) AddrLabelSymbols = new MMIAddrLabelMap(Context); return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast(BB)); } @@ -366,7 +366,7 @@ void MachineModuleInfo:: takeDeletedSymbolsForFunction(const Function *F, std::vector &Result) { // If no blocks have had their addresses taken, we're done. - if (AddrLabelSymbols == 0) return; + if (!AddrLabelSymbols) return; return AddrLabelSymbols-> takeDeletedSymbolsForFunction(const_cast(F), Result); } @@ -419,7 +419,7 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad, // If this is the first personality we're adding go // ahead and add it at the beginning. - if (Personalities[0] == NULL) + if (!Personalities[0]) Personalities[0] = Personality; else Personalities.push_back(Personality); @@ -462,7 +462,7 @@ void MachineModuleInfo::TidyLandingPads(DenseMap *LPMap) { if (LandingPad.LandingPadLabel && !LandingPad.LandingPadLabel->isDefined() && (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0)) - LandingPad.LandingPadLabel = 0; + LandingPad.LandingPadLabel = nullptr; // Special case: we *should* emit LPs with null LP MBB. This indicates // "nounwind" case. @@ -550,13 +550,13 @@ try_next:; const Function *MachineModuleInfo::getPersonality() const { // FIXME: Until PR1414 will be fixed, we're using 1 personality function per // function - return !LandingPads.empty() ? LandingPads[0].Personality : NULL; + return !LandingPads.empty() ? LandingPads[0].Personality : nullptr; } /// getPersonalityIndex - Return unique index for current personality /// function. NULL/first personality function should always get zero index. unsigned MachineModuleInfo::getPersonalityIndex() const { - const Function* Personality = NULL; + const Function* Personality = nullptr; // Scan landing pads. If there is at least one non-NULL personality - use it. for (unsigned i = 0, e = LandingPads.size(); i != e; ++i) diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp index cb204fd..3ee3e40 100644 --- a/lib/CodeGen/MachinePassRegistry.cpp +++ b/lib/CodeGen/MachinePassRegistry.cpp @@ -20,7 +20,7 @@ void MachinePassRegistryListener::anchor() { } /// setDefault - Set the default constructor by name. void MachinePassRegistry::setDefault(StringRef Name) { - MachinePassCtor Ctor = 0; + MachinePassCtor Ctor = nullptr; for(MachinePassRegistryNode *R = getList(); R; R = R->getNext()) { if (R->getName() == Name) { Ctor = R->getCtor(); diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index db3eec3..f560259 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -23,7 +23,7 @@ using namespace llvm; void MachineRegisterInfo::Delegate::anchor() {} MachineRegisterInfo::MachineRegisterInfo(const TargetMachine &TM) - : TM(TM), TheDelegate(0), IsSSA(true), TracksLiveness(true) { + : TM(TM), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true) { VRegInfo.reserve(256); RegAllocHints.reserve(256); UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits()); @@ -60,7 +60,7 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg, if (!NewRC || NewRC == OldRC) return NewRC; if (NewRC->getNumRegs() < MinNumRegs) - return 0; + return nullptr; setRegClass(Reg, NewRC); return NewRC; } @@ -182,7 +182,7 @@ void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) { // Head is NULL for an empty list. if (!Head) { MO->Contents.Reg.Prev = MO; - MO->Contents.Reg.Next = 0; + MO->Contents.Reg.Next = nullptr; HeadRef = MO; return; } @@ -203,7 +203,7 @@ void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) { HeadRef = MO; } else { // Insert use at the end. - MO->Contents.Reg.Next = 0; + MO->Contents.Reg.Next = nullptr; Last->Contents.Reg.Next = MO; } } @@ -227,8 +227,8 @@ void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) { (Next ? Next : Head)->Contents.Reg.Prev = Prev; - MO->Contents.Reg.Prev = 0; - MO->Contents.Reg.Next = 0; + MO->Contents.Reg.Prev = nullptr; + MO->Contents.Reg.Next = nullptr; } /// Move NumOps operands from Src to Dst, updating use-def lists as needed. @@ -303,17 +303,17 @@ MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { def_instr_iterator I = def_instr_begin(Reg); assert((I.atEnd() || std::next(I) == def_instr_end()) && "getVRegDef assumes a single definition or no definition"); - return !I.atEnd() ? &*I : 0; + return !I.atEnd() ? &*I : nullptr; } /// getUniqueVRegDef - Return the unique machine instr that defines the /// specified virtual register or null if none is found. If there are /// multiple definitions or no definition, return null. MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const { - if (def_empty(Reg)) return 0; + if (def_empty(Reg)) return nullptr; def_instr_iterator I = def_instr_begin(Reg); if (std::next(I) != def_instr_end()) - return 0; + return nullptr; return &*I; } diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index 77496ad..d9173a2 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -29,6 +29,8 @@ #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" using namespace llvm; +#define DEBUG_TYPE "machine-ssaupdater" + typedef DenseMap AvailableValsTy; static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast(AV); @@ -36,7 +38,7 @@ static AvailableValsTy &getAvailableVals(void *AV) { MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, SmallVectorImpl *NewPHI) - : AV(0), InsertedPHIs(NewPHI) { + : AV(nullptr), InsertedPHIs(NewPHI) { TII = MF.getTarget().getInstrInfo(); MRI = &MF.getRegInfo(); } @@ -48,7 +50,7 @@ MachineSSAUpdater::~MachineSSAUpdater() { /// Initialize - Reset this object to get ready for a new set of SSA /// updates. ProtoValue is the value used to name PHI nodes. void MachineSSAUpdater::Initialize(unsigned V) { - if (AV == 0) + if (!AV) AV = new AvailableValsTy(); else getAvailableVals(AV).clear(); @@ -313,7 +315,7 @@ public: static MachineInstr *InstrIsPHI(MachineInstr *I) { if (I && I->isPHI()) return I; - return 0; + return nullptr; } /// ValueIsPHI - Check if the instruction that defines the specified register @@ -328,7 +330,7 @@ public: MachineInstr *PHI = ValueIsPHI(Val, Updater); if (PHI && PHI->getNumOperands() <= 1) return PHI; - return 0; + return nullptr; } /// GetPHIValue - For the specified PHI instruction, return the register diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index d90cd23..23847d6 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -12,8 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "misched" - #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -35,6 +33,8 @@ using namespace llvm; +#define DEBUG_TYPE "misched" + namespace llvm { cl::opt ForceTopDown("misched-topdown", cl::Hidden, cl::desc("Force top-down list scheduling")); @@ -85,7 +85,7 @@ void ScheduleDAGMutation::anchor() {} //===----------------------------------------------------------------------===// MachineSchedContext::MachineSchedContext(): - MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) { + MF(nullptr), MLI(nullptr), MDT(nullptr), PassConfig(nullptr), AA(nullptr), LIS(nullptr) { RegClassInfo = new RegisterClassInfo(); } @@ -100,7 +100,7 @@ class MachineSchedulerBase : public MachineSchedContext, public: MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {} - void print(raw_ostream &O, const Module* = 0) const override; + void print(raw_ostream &O, const Module* = nullptr) const override; protected: void scheduleRegions(ScheduleDAGInstrs &Scheduler); @@ -192,7 +192,7 @@ MachinePassRegistry MachineSchedRegistry::Registry; /// A dummy default scheduler factory indicates whether the scheduler /// is overridden on the command line. static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { - return 0; + return nullptr; } /// MachineSchedOpt allows command line selection of the scheduler. @@ -487,9 +487,8 @@ void ReadyQueue::dump() { // virtual registers. // ===----------------------------------------------------------------------===/ +// Provide a vtable anchor. ScheduleDAGMI::~ScheduleDAGMI() { - DeleteContainerPointers(Mutations); - delete SchedImpl; } bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { @@ -527,7 +526,7 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { dbgs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } #endif --SuccSU->NumPredsLeft; @@ -561,7 +560,7 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { dbgs() << "*** Scheduling failed! ***\n"; PredSU->dump(this); dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } #endif --PredSU->NumSuccsLeft; @@ -723,8 +722,8 @@ findRootsAndBiasEdges(SmallVectorImpl &TopRoots, /// Identify DAG roots and setup scheduler queues. void ScheduleDAGMI::initQueues(ArrayRef TopRoots, ArrayRef BotRoots) { - NextClusterSucc = NULL; - NextClusterPred = NULL; + NextClusterSucc = nullptr; + NextClusterPred = nullptr; // Release all DAG roots for scheduling, not including EntrySU/ExitSU. // @@ -782,7 +781,7 @@ void ScheduleDAGMI::placeDebugValues() { RegionEnd = DbgValue; } DbgValues.clear(); - FirstDbgValue = NULL; + FirstDbgValue = nullptr; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1549,7 +1548,7 @@ void SchedBoundary::reset() { // invalid, placeholder HazardRecs. if (HazardRec && HazardRec->isEnabled()) { delete HazardRec; - HazardRec = 0; + HazardRec = nullptr; } Available.clear(); Pending.clear(); @@ -1679,7 +1678,7 @@ bool SchedBoundary::checkHazard(SUnit *SU) { // Find the unscheduled node in ReadySUs with the highest latency. unsigned SchedBoundary:: findMaxLatency(ArrayRef ReadySUs) { - SUnit *LateSU = 0; + SUnit *LateSU = nullptr; unsigned RemLatency = 0; for (ArrayRef::iterator I = ReadySUs.begin(), E = ReadySUs.end(); I != E; ++I) { @@ -2057,7 +2056,7 @@ SUnit *SchedBoundary::pickOnlyChoice() { } if (Available.size() == 1) return *Available.begin(); - return NULL; + return nullptr; } #ifndef NDEBUG @@ -2157,7 +2156,7 @@ public: SchedResourceDelta ResDelta; SchedCandidate(const CandPolicy &policy) - : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {} + : Policy(policy), SU(nullptr), Reason(NoCand), RepeatReasonSet(0) {} bool isValid() const { return SU; } @@ -2185,7 +2184,7 @@ protected: SchedRemainder Rem; protected: GenericSchedulerBase(const MachineSchedContext *C): - Context(C), SchedModel(0), TRI(0) {} + Context(C), SchedModel(nullptr), TRI(nullptr) {} void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, SchedBoundary *OtherZone); @@ -2444,7 +2443,7 @@ class GenericScheduler : public GenericSchedulerBase { MachineSchedPolicy RegionPolicy; public: GenericScheduler(const MachineSchedContext *C): - GenericSchedulerBase(C), DAG(0), Top(SchedBoundary::TopQID, "TopQ"), + GenericSchedulerBase(C), DAG(nullptr), Top(SchedBoundary::TopQID, "TopQ"), Bot(SchedBoundary::BotQID, "BotQ") {} void initPolicy(MachineBasicBlock::iterator Begin, @@ -2910,7 +2909,7 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) { if (DAG->top() == DAG->bottom()) { assert(Top.Available.empty() && Top.Pending.empty() && Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); - return NULL; + return nullptr; } SUnit *SU; do { @@ -3002,17 +3001,17 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// Create the standard converging machine scheduler. This will be used as the /// default scheduler if the target does not set a default. static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) { - ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, new GenericScheduler(C)); + ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique(C)); // Register DAG post-processors. // // FIXME: extend the mutation API to allow earlier mutations to instantiate // data and pass it to later mutations. Have a single mutation that gathers // the interesting nodes in one pass. - DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI)); + DAG->addMutation(make_unique(DAG->TII, DAG->TRI)); if (EnableLoadCluster && DAG->TII->enableClusterLoads()) - DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); + DAG->addMutation(make_unique(DAG->TII, DAG->TRI)); if (EnableMacroFusion) - DAG->addMutation(new MacroFusion(DAG->TII)); + DAG->addMutation(make_unique(DAG->TII)); return DAG; } @@ -3164,7 +3163,7 @@ void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { if (DAG->top() == DAG->bottom()) { assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage"); - return NULL; + return nullptr; } SUnit *SU; do { @@ -3174,7 +3173,7 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { SchedCandidate TopCand(NoPolicy); // Set the top-down policy based on the state of the current top zone and // the instructions outside the zone, including the bottom zone. - setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, NULL); + setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr); pickNodeFromQueue(TopCand); assert(TopCand.Reason != NoCand && "failed to find a candidate"); tracePick(TopCand, true); @@ -3198,7 +3197,7 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// Create a generic scheduler with no vreg liveness or DAG mutation passes. static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) { - return new ScheduleDAGMI(C, new PostGenericScheduler(C), /*IsPostRA=*/true); + return new ScheduleDAGMI(C, make_unique(C), /*IsPostRA=*/true); } //===----------------------------------------------------------------------===// @@ -3212,7 +3211,8 @@ struct ILPOrder { const BitVector *ScheduledTrees; bool MaximizeILP; - ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {} + ILPOrder(bool MaxILP) + : DFSResult(nullptr), ScheduledTrees(nullptr), MaximizeILP(MaxILP) {} /// \brief Apply a less-than relation on node priority. /// @@ -3246,7 +3246,7 @@ class ILPScheduler : public MachineSchedStrategy { std::vector ReadyQ; public: - ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {} + ILPScheduler(bool MaximizeILP): DAG(nullptr), Cmp(MaximizeILP) {} void initialize(ScheduleDAGMI *dag) override { assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness"); @@ -3267,7 +3267,7 @@ public: /// Callback to select the highest priority node from the ready Q. SUnit *pickNode(bool &IsTopNode) override { - if (ReadyQ.empty()) return NULL; + if (ReadyQ.empty()) return nullptr; std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); SUnit *SU = ReadyQ.back(); ReadyQ.pop_back(); @@ -3302,10 +3302,10 @@ public: } // namespace static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { - return new ScheduleDAGMILive(C, new ILPScheduler(true)); + return new ScheduleDAGMILive(C, make_unique(true)); } static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { - return new ScheduleDAGMILive(C, new ILPScheduler(false)); + return new ScheduleDAGMILive(C, make_unique(false)); } static MachineSchedRegistry ILPMaxRegistry( "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); @@ -3347,7 +3347,7 @@ public: InstructionShuffler(bool alternate, bool topdown) : IsAlternating(alternate), IsTopDown(topdown) {} - virtual void initialize(ScheduleDAGMI*) { + void initialize(ScheduleDAGMI*) override { TopQ.clear(); BottomQ.clear(); } @@ -3355,11 +3355,11 @@ public: /// Implement MachineSchedStrategy interface. /// ----------------------------------------- - virtual SUnit *pickNode(bool &IsTopNode) { + SUnit *pickNode(bool &IsTopNode) override { SUnit *SU; if (IsTopDown) { do { - if (TopQ.empty()) return NULL; + if (TopQ.empty()) return nullptr; SU = TopQ.top(); TopQ.pop(); } while (SU->isScheduled); @@ -3367,7 +3367,7 @@ public: } else { do { - if (BottomQ.empty()) return NULL; + if (BottomQ.empty()) return nullptr; SU = BottomQ.top(); BottomQ.pop(); } while (SU->isScheduled); @@ -3378,12 +3378,12 @@ public: return SU; } - virtual void schedNode(SUnit *SU, bool IsTopNode) {} + void schedNode(SUnit *SU, bool IsTopNode) override {} - virtual void releaseTopNode(SUnit *SU) { + void releaseTopNode(SUnit *SU) override { TopQ.push(SU); } - virtual void releaseBottomNode(SUnit *SU) { + void releaseBottomNode(SUnit *SU) override { BottomQ.push(SU); } }; @@ -3394,7 +3394,7 @@ static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { bool TopDown = !ForceBottomUp; assert((TopDown || !ForceTopDown) && "-misched-topdown incompatible with -misched-bottomup"); - return new ScheduleDAGMILive(C, new InstructionShuffler(Alternate, TopDown)); + return new ScheduleDAGMILive(C, make_unique(Alternate, TopDown)); } static MachineSchedRegistry ShufflerRegistry( "shuffle", "Shuffle machine instructions alternating directions", @@ -3450,7 +3450,7 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { raw_string_ostream SS(Str); const ScheduleDAGMI *DAG = static_cast(G); const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? - static_cast(G)->getDFSResult() : 0; + static_cast(G)->getDFSResult() : nullptr; SS << "SU:" << SU->NodeNum; if (DFS) SS << " I:" << DFS->getNumInstrs(SU); @@ -3464,7 +3464,7 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { std::string Str("shape=Mrecord"); const ScheduleDAGMI *DAG = static_cast(G); const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? - static_cast(G)->getDFSResult() : 0; + static_cast(G)->getDFSResult() : nullptr; if (DFS) { Str += ",style=filled,fillcolor=\"#"; Str += DOT::getColorString(DFS->getSubtreeID(N)); diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index dbff1f6..f44e4d1 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "machine-sink" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -32,6 +31,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "machine-sink" + static cl::opt SplitEdges("machine-sink-split", cl::desc("Split critical edges during machine sinking"), @@ -332,16 +333,16 @@ MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI, MachineBasicBlock *ToBB, bool BreakPHIEdge) { if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB)) - return 0; + return nullptr; // Avoid breaking back edge. From == To means backedge for single BB loop. if (!SplitEdges || FromBB == ToBB) - return 0; + return nullptr; // Check for backedges of more "complex" loops. if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) && LI->isLoopHeader(ToBB)) - return 0; + return nullptr; // It's not always legal to break critical edges and sink the computation // to the edge. @@ -388,7 +389,7 @@ MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI, if (*PI == FromBB) continue; if (!DT->dominates(ToBB, *PI)) - return 0; + return nullptr; } } @@ -484,7 +485,7 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. - MachineBasicBlock *SuccToSinkTo = 0; + MachineBasicBlock *SuccToSinkTo = nullptr; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; // Ignore non-register operands. @@ -498,10 +499,10 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) - return NULL; + return nullptr; } else if (!MO.isDead()) { // A def that isn't dead. We can't move it. - return NULL; + return nullptr; } } else { // Virtual register uses are always safe to sink. @@ -509,7 +510,7 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // If it's not safe to move defs of the register class, then abort. if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg))) - return NULL; + return nullptr; // FIXME: This picks a successor to sink into based on having one // successor that dominates all the uses. However, there are cases where @@ -532,7 +533,7 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, bool LocalUse = false; if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge, LocalUse)) - return NULL; + return nullptr; continue; } @@ -558,26 +559,26 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, } if (LocalUse) // Def is used locally, it's never safe to move this def. - return NULL; + return nullptr; } // If we couldn't find a block to sink to, ignore this instruction. - if (SuccToSinkTo == 0) - return NULL; - else if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo)) - return NULL; + if (!SuccToSinkTo) + return nullptr; + if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo)) + return nullptr; } } // It is not possible to sink an instruction into its own block. This can // happen with loops. if (MBB == SuccToSinkTo) - return NULL; + return nullptr; // It's not safe to sink instructions to EH landing pad. Control flow into // landing pad is implicitly defined. if (SuccToSinkTo && SuccToSinkTo->isLandingPad()) - return NULL; + return nullptr; return SuccToSinkTo; } @@ -607,7 +608,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge); // If there are no outputs, it must have side-effects. - if (SuccToSinkTo == 0) + if (!SuccToSinkTo) return false; diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index d07178e..1bbf0ad 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "machine-trace-metrics" #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SparseSet.h" @@ -26,6 +25,8 @@ using namespace llvm; +#define DEBUG_TYPE "machine-trace-metrics" + char MachineTraceMetrics::ID = 0; char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID; @@ -37,8 +38,9 @@ INITIALIZE_PASS_END(MachineTraceMetrics, "machine-trace-metrics", "Machine Trace Metrics", false, true) MachineTraceMetrics::MachineTraceMetrics() - : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) { - std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0); + : MachineFunctionPass(ID), MF(nullptr), TII(nullptr), TRI(nullptr), + MRI(nullptr), Loops(nullptr) { + std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr); } void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { @@ -64,11 +66,11 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { } void MachineTraceMetrics::releaseMemory() { - MF = 0; + MF = nullptr; BlockInfo.clear(); for (unsigned i = 0; i != TS_NumStrategies; ++i) { delete Ensembles[i]; - Ensembles[i] = 0; + Ensembles[i] = nullptr; } } @@ -95,19 +97,17 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { unsigned PRKinds = SchedModel.getNumProcResourceKinds(); SmallVector PRCycles(PRKinds); - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - const MachineInstr *MI = I; - if (MI->isTransient()) + for (const auto &MI : *MBB) { + if (MI.isTransient()) continue; ++InstrCount; - if (MI->isCall()) + if (MI.isCall()) FBI->HasCalls = true; // Count processor resources used. if (!SchedModel.hasInstrSchedModel()) continue; - const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(MI); + const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI); if (!SC->isValid()) continue; @@ -233,7 +233,7 @@ const MachineTraceMetrics::TraceBlockInfo* MachineTraceMetrics::Ensemble:: getDepthResources(const MachineBasicBlock *MBB) const { const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; - return TBI->hasValidDepth() ? TBI : 0; + return TBI->hasValidDepth() ? TBI : nullptr; } // Check if height resources for MBB are valid and return the TBI. @@ -242,7 +242,7 @@ const MachineTraceMetrics::TraceBlockInfo* MachineTraceMetrics::Ensemble:: getHeightResources(const MachineBasicBlock *MBB) const { const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; - return TBI->hasValidHeight() ? TBI : 0; + return TBI->hasValidHeight() ? TBI : nullptr; } /// Get an array of processor resource depths for MBB. Indexed by processor @@ -316,13 +316,13 @@ public: const MachineBasicBlock* MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { if (MBB->pred_empty()) - return 0; + return nullptr; const MachineLoop *CurLoop = getLoopFor(MBB); // Don't leave loops, and never follow back-edges. if (CurLoop && MBB == CurLoop->getHeader()) - return 0; + return nullptr; unsigned CurCount = MTM.getResources(MBB)->InstrCount; - const MachineBasicBlock *Best = 0; + const MachineBasicBlock *Best = nullptr; unsigned BestDepth = 0; for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) { @@ -344,9 +344,9 @@ MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { const MachineBasicBlock* MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { if (MBB->pred_empty()) - return 0; + return nullptr; const MachineLoop *CurLoop = getLoopFor(MBB); - const MachineBasicBlock *Best = 0; + const MachineBasicBlock *Best = nullptr; unsigned BestHeight = 0; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { @@ -568,9 +568,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { // invalidated, but their instructions will stay the same, so there is no // need to erase the Cycle entries. They will be overwritten when we // recompute. - for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end(); - I != E; ++I) - Cycles.erase(I); + for (const auto &I : *BadMBB) + Cycles.erase(&I); } void MachineTraceMetrics::Ensemble::verify() const { @@ -690,7 +689,7 @@ struct LiveRegUnit { unsigned getSparseSetIndex() const { return RegUnit; } - LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {} + LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(nullptr), Op(0) {} }; } @@ -828,16 +827,13 @@ computeInstrDepths(const MachineBasicBlock *MBB) { if (TBI.HasValidInstrHeights) TBI.CriticalPath = computeCrossBlockCriticalPath(TBI); - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - const MachineInstr *UseMI = I; - + for (const auto &UseMI : *MBB) { // Collect all data dependencies. Deps.clear(); - if (UseMI->isPHI()) - getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI); - else if (getDataDeps(UseMI, Deps, MTM.MRI)) - updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI); + if (UseMI.isPHI()) + getPHIDeps(&UseMI, Deps, TBI.Pred, MTM.MRI); + else if (getDataDeps(&UseMI, Deps, MTM.MRI)) + updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI); // Filter and process dependencies, computing the earliest issue cycle. unsigned Cycle = 0; @@ -853,20 +849,20 @@ computeInstrDepths(const MachineBasicBlock *MBB) { // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) DepCycle += MTM.SchedModel - .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp); + .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp); Cycle = std::max(Cycle, DepCycle); } // Remember the instruction depth. - InstrCycles &MICycles = Cycles[UseMI]; + InstrCycles &MICycles = Cycles[&UseMI]; MICycles.Depth = Cycle; if (!TBI.HasValidInstrHeights) { - DEBUG(dbgs() << Cycle << '\t' << *UseMI); + DEBUG(dbgs() << Cycle << '\t' << UseMI); continue; } // Update critical path length. TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height); - DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI); } } } @@ -1055,16 +1051,16 @@ computeInstrHeights(const MachineBasicBlock *MBB) { Succ = Loop->getHeader(); if (Succ) { - for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end(); - I != E && I->isPHI(); ++I) { - const MachineInstr *PHI = I; + for (const auto &PHI : *Succ) { + if (!PHI.isPHI()) + break; Deps.clear(); - getPHIDeps(PHI, Deps, MBB, MTM.MRI); + getPHIDeps(&PHI, Deps, MBB, MTM.MRI); if (!Deps.empty()) { // Loop header PHI heights are all 0. - unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0; - DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI); - if (pushDepHeight(Deps.front(), PHI, Height, + unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0; + DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI); + if (pushDepHeight(Deps.front(), &PHI, Height, Heights, MTM.SchedModel, MTM.TII)) addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack); } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 1bd75f7..8515b0f 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -33,7 +33,6 @@ #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/BasicBlock.h" @@ -42,6 +41,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -241,7 +241,7 @@ namespace { static char ID; // Pass ID, replacement for typeid const char *const Banner; - MachineVerifierPass(const char *b = 0) + MachineVerifierPass(const char *b = nullptr) : MachineFunctionPass(ID), Banner(b) { initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry()); } @@ -273,7 +273,7 @@ void MachineFunction::verify(Pass *p, const char *Banner) const { } bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { - raw_ostream *OutFile = 0; + raw_ostream *OutFile = nullptr; if (OutFileName) { std::string ErrorInfo; OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, @@ -296,10 +296,10 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { TRI = TM->getRegisterInfo(); MRI = &MF.getRegInfo(); - LiveVars = NULL; - LiveInts = NULL; - LiveStks = NULL; - Indexes = NULL; + LiveVars = nullptr; + LiveInts = nullptr; + LiveStks = nullptr; + Indexes = nullptr; if (PASS) { LiveInts = PASS->getAnalysisIfAvailable(); // We don't want to verify LiveVariables if LiveIntervals is available. @@ -314,7 +314,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { MFI!=MFE; ++MFI) { visitMachineBasicBlockBefore(MFI); // Keep track of the current bundle header. - const MachineInstr *CurBundle = 0; + const MachineInstr *CurBundle = nullptr; // Do we expect the next instruction to be part of the same bundle? bool InBundle = false; @@ -469,18 +469,17 @@ void MachineVerifier::visitMachineFunctionBefore() { // Build a set of the basic blocks in the function. FunctionBlocks.clear(); - for (MachineFunction::const_iterator - I = MF->begin(), E = MF->end(); I != E; ++I) { - FunctionBlocks.insert(I); - BBInfo &MInfo = MBBInfoMap[I]; - - MInfo.Preds.insert(I->pred_begin(), I->pred_end()); - if (MInfo.Preds.size() != I->pred_size()) - report("MBB has duplicate entries in its predecessor list.", I); - - MInfo.Succs.insert(I->succ_begin(), I->succ_end()); - if (MInfo.Succs.size() != I->succ_size()) - report("MBB has duplicate entries in its successor list.", I); + for (const auto &MBB : *MF) { + FunctionBlocks.insert(&MBB); + BBInfo &MInfo = MBBInfoMap[&MBB]; + + MInfo.Preds.insert(MBB.pred_begin(), MBB.pred_end()); + if (MInfo.Preds.size() != MBB.pred_size()) + report("MBB has duplicate entries in its predecessor list.", &MBB); + + MInfo.Succs.insert(MBB.succ_begin(), MBB.succ_end()); + if (MInfo.Succs.size() != MBB.succ_size()) + report("MBB has duplicate entries in its successor list.", &MBB); } // Check that the register use lists are sane. @@ -501,7 +500,7 @@ static bool matchPair(MachineBasicBlock::const_succ_iterator i, void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { - FirstTerminator = 0; + FirstTerminator = nullptr; if (MRI->isSSA()) { // If this block has allocatable physical registers live-in, check that @@ -553,7 +552,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB has more than one landing pad successor", MBB); // Call AnalyzeBranch. If it succeeds, there several more conditions to check. - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; if (!TII->AnalyzeBranch(*const_cast(MBB), TBB, FBB, Cond)) { @@ -578,8 +577,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } - if (!MBB->empty() && getBundleStart(&MBB->back())->isBarrier() && - !TII->isPredicated(getBundleStart(&MBB->back()))) { + if (!MBB->empty() && MBB->back().isBarrier() && + !TII->isPredicated(&MBB->back())) { report("MBB exits via unconditional fall-through but ends with a " "barrier instruction!", MBB); } @@ -599,10 +598,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via unconditional branch but doesn't contain " "any instructions!", MBB); - } else if (!getBundleStart(&MBB->back())->isBarrier()) { + } else if (!MBB->back().isBarrier()) { report("MBB exits via unconditional branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!getBundleStart(&MBB->back())->isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via unconditional branch but the branch isn't a " "terminator instruction!", MBB); } @@ -630,10 +629,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/fall-through but doesn't " "contain any instructions!", MBB); - } else if (getBundleStart(&MBB->back())->isBarrier()) { + } else if (MBB->back().isBarrier()) { report("MBB exits via conditional branch/fall-through but ends with a " "barrier instruction!", MBB); - } else if (!getBundleStart(&MBB->back())->isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via conditional branch/fall-through but the branch " "isn't a terminator instruction!", MBB); } @@ -658,10 +657,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { if (MBB->empty()) { report("MBB exits via conditional branch/branch but doesn't " "contain any instructions!", MBB); - } else if (!getBundleStart(&MBB->back())->isBarrier()) { + } else if (!MBB->back().isBarrier()) { report("MBB exits via conditional branch/branch but doesn't end with a " "barrier instruction!", MBB); - } else if (!getBundleStart(&MBB->back())->isTerminator()) { + } else if (!MBB->back().isTerminator()) { report("MBB exits via conditional branch/branch but the branch " "isn't a terminator instruction!", MBB); } @@ -1158,9 +1157,7 @@ void MachineVerifier::calcRegsPassed() { // First push live-out regs to successors' vregsPassed. Remember the MBBs that // have any vregsPassed. SmallPtrSet todo; - for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); - MFI != MFE; ++MFI) { - const MachineBasicBlock &MBB(*MFI); + for (const auto &MBB : *MF) { BBInfo &MInfo = MBBInfoMap[&MBB]; if (!MInfo.reachable) continue; @@ -1195,9 +1192,7 @@ void MachineVerifier::calcRegsPassed() { void MachineVerifier::calcRegsRequired() { // First push live-in regs to predecessors' vregsRequired. SmallPtrSet todo; - for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); - MFI != MFE; ++MFI) { - const MachineBasicBlock &MBB(*MFI); + for (const auto &MBB : *MF) { BBInfo &MInfo = MBBInfoMap[&MBB]; for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(), PrE = MBB.pred_end(); PrI != PrE; ++PrI) { @@ -1228,27 +1223,28 @@ void MachineVerifier::calcRegsRequired() { // calcRegsPassed has been run so BBInfo::isLiveOut is valid. void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { SmallPtrSet seen; - for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { + for (const auto &BBI : *MBB) { + if (!BBI.isPHI()) + break; seen.clear(); - for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { - unsigned Reg = BBI->getOperand(i).getReg(); - const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB(); + for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) { + unsigned Reg = BBI.getOperand(i).getReg(); + const MachineBasicBlock *Pre = BBI.getOperand(i + 1).getMBB(); if (!Pre->isSuccessor(MBB)) continue; seen.insert(Pre); BBInfo &PrInfo = MBBInfoMap[Pre]; if (PrInfo.reachable && !PrInfo.isLiveOut(Reg)) report("PHI operand is not live-out from predecessor", - &BBI->getOperand(i), i); + &BBI.getOperand(i), i); } // Did we see all predecessors? for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), PrE = MBB->pred_end(); PrI != PrE; ++PrI) { if (!seen.count(*PrI)) { - report("Missing PHI operand", BBI); + report("Missing PHI operand", &BBI); *OS << "BB#" << (*PrI)->getNumber() << " is a predecessor according to the CFG.\n"; } @@ -1259,29 +1255,27 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { void MachineVerifier::visitMachineFunctionAfter() { calcRegsPassed(); - for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); - MFI != MFE; ++MFI) { - BBInfo &MInfo = MBBInfoMap[MFI]; + for (const auto &MBB : *MF) { + BBInfo &MInfo = MBBInfoMap[&MBB]; // Skip unreachable MBBs. if (!MInfo.reachable) continue; - checkPHIOps(MFI); + checkPHIOps(&MBB); } // Now check liveness info if available calcRegsRequired(); // Check for killed virtual registers that should be live out. - for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); - MFI != MFE; ++MFI) { - BBInfo &MInfo = MBBInfoMap[MFI]; + for (const auto &MBB : *MF) { + BBInfo &MInfo = MBBInfoMap[&MBB]; for (RegSet::iterator I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; ++I) if (MInfo.regsKilled.count(*I)) { - report("Virtual register killed in block, but needed live out.", MFI); + report("Virtual register killed in block, but needed live out.", &MBB); *OS << "Virtual register " << PrintReg(*I) << " is used after the block.\n"; } @@ -1307,20 +1301,19 @@ void MachineVerifier::verifyLiveVariables() { for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); - for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); - MFI != MFE; ++MFI) { - BBInfo &MInfo = MBBInfoMap[MFI]; + for (const auto &MBB : *MF) { + BBInfo &MInfo = MBBInfoMap[&MBB]; // Our vregsRequired should be identical to LiveVariables' AliveBlocks if (MInfo.vregsRequired.count(Reg)) { - if (!VI.AliveBlocks.test(MFI->getNumber())) { - report("LiveVariables: Block missing from AliveBlocks", MFI); + if (!VI.AliveBlocks.test(MBB.getNumber())) { + report("LiveVariables: Block missing from AliveBlocks", &MBB); *OS << "Virtual register " << PrintReg(Reg) << " must be live through the block.\n"; } } else { - if (VI.AliveBlocks.test(MFI->getNumber())) { - report("LiveVariables: Block should not be in AliveBlocks", MFI); + if (VI.AliveBlocks.test(MBB.getNumber())) { + report("LiveVariables: Block should not be in AliveBlocks", &MBB); *OS << "Virtual register " << PrintReg(Reg) << " is not needed live through the block.\n"; } @@ -1675,32 +1668,31 @@ void MachineVerifier::verifyStackFrame() { } // Update stack state by checking contents of MBB. - for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) { - if (I->getOpcode() == FrameSetupOpcode) { + for (const auto &I : *MBB) { + if (I.getOpcode() == FrameSetupOpcode) { // The first operand of a FrameOpcode should be i32. - int Size = I->getOperand(0).getImm(); + int Size = I.getOperand(0).getImm(); assert(Size >= 0 && "Value should be non-negative in FrameSetup and FrameDestroy.\n"); if (BBState.ExitIsSetup) - report("FrameSetup is after another FrameSetup", I); + report("FrameSetup is after another FrameSetup", &I); BBState.ExitValue -= Size; BBState.ExitIsSetup = true; } - if (I->getOpcode() == FrameDestroyOpcode) { + if (I.getOpcode() == FrameDestroyOpcode) { // The first operand of a FrameOpcode should be i32. - int Size = I->getOperand(0).getImm(); + int Size = I.getOperand(0).getImm(); assert(Size >= 0 && "Value should be non-negative in FrameSetup and FrameDestroy.\n"); if (!BBState.ExitIsSetup) - report("FrameDestroy is not after a FrameSetup", I); + report("FrameDestroy is not after a FrameSetup", &I); int AbsSPAdj = BBState.ExitValue < 0 ? -BBState.ExitValue : BBState.ExitValue; if (BBState.ExitIsSetup && AbsSPAdj != Size) { - report("FrameDestroy is after FrameSetup ", I); + report("FrameDestroy is after FrameSetup ", &I); *OS << "FrameDestroy <" << Size << "> is after FrameSetup <" << AbsSPAdj << ">.\n"; } diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index 56cb673..95a2934 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "phi-opt" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -23,6 +22,8 @@ #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; +#define DEBUG_TYPE "phi-opt" + STATISTIC(NumPHICycles, "Number of PHI cycles replaced"); STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles"); diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 0e9df58..c8d0819 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "phielim" #include "llvm/CodeGen/Passes.h" #include "PHIEliminationUtils.h" #include "llvm/ADT/STLExtras.h" @@ -35,6 +34,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "phielim" + static cl::opt DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false), cl::Hidden, cl::desc("Disable critical edge splitting " @@ -377,7 +378,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, findPHICopyInsertPoint(&opBlock, &MBB, SrcReg); // Insert the copy. - MachineInstr *NewSrcInstr = 0; + MachineInstr *NewSrcInstr = nullptr; if (!reusedIncoming && IncomingReg) { if (SrcUndef) { // The source register is undefined, so there is no need for a real @@ -531,13 +532,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, /// used later to determine when the vreg is killed in the BB. /// void PHIElimination::analyzePHINodes(const MachineFunction& MF) { - for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); - I != E; ++I) - for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->isPHI(); ++BBI) - for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) - ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i+1).getMBB()->getNumber(), - BBI->getOperand(i).getReg())]; + for (const auto &MBB : MF) + for (const auto &BBI : MBB) { + if (!BBI.isPHI()) + break; + for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) + ++VRegPHIUseCount[BBVRegPair(BBI.getOperand(i+1).getMBB()->getNumber(), + BBI.getOperand(i).getReg())]; + } } bool PHIElimination::SplitPHIEdges(MachineFunction &MF, @@ -546,7 +548,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) return false; // Quick exit for basic blocks without PHIs. - const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : 0; + const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : nullptr; bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader(); bool Changed = false; @@ -563,7 +565,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // out-of-line blocks into the loop which is very bad for code placement. if (PreMBB == &MBB && !SplitAllCriticalEdges) continue; - const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0; + const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : nullptr; if (IsLoopHeader && PreLoop == CurLoop && !SplitAllCriticalEdges) continue; diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 080b20d..b3f7198 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -84,7 +84,7 @@ static cl::opt PrintGCInfo("print-gc", cl::Hidden, cl::desc("Dump garbage collector data")); static cl::opt VerifyMachineCode("verify-machineinstrs", cl::Hidden, cl::desc("Verify generated machine code"), - cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL)); + cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=nullptr)); static cl::opt PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), @@ -126,7 +126,7 @@ static IdentifyingPassPtr applyOverride(IdentifyingPassPtr TargetID, case cl::BOU_TRUE: if (TargetID.isValid()) return TargetID; - if (StandardID == 0) + if (StandardID == nullptr) report_fatal_error("Target cannot enable pass"); return StandardID; case cl::BOU_FALSE: @@ -232,8 +232,8 @@ TargetPassConfig::~TargetPassConfig() { // Out of line constructor provides default values for pass options and // registers all common codegen passes. TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) - : ImmutablePass(ID), PM(&pm), StartAfter(0), StopAfter(0), - Started(true), Stopped(false), TM(tm), Impl(0), Initialized(false), + : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr), + Started(true), Stopped(false), TM(tm), Impl(nullptr), Initialized(false), DisableVerify(false), EnableTailMerge(true) { @@ -274,7 +274,7 @@ TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) { } TargetPassConfig::TargetPassConfig() - : ImmutablePass(ID), PM(0) { + : ImmutablePass(ID), PM(nullptr) { llvm_unreachable("TargetPassConfig should not be constructed on-the-fly"); } @@ -332,7 +332,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { IdentifyingPassPtr TargetID = getPassSubstitution(PassID); IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID); if (!FinalPtr.isValid()) - return 0; + return nullptr; Pass *P; if (FinalPtr.isInstance()) @@ -384,8 +384,10 @@ void TargetPassConfig::addIRPasses() { // Before running any passes, run the verifier to determine if the input // coming from the front-end and/or optimizer is valid. - if (!DisableVerify) + if (!DisableVerify) { addPass(createVerifierPass()); + addPass(createDebugInfoVerifierPass()); + } // Run loop strength reduction before anything else. if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { @@ -443,6 +445,12 @@ void TargetPassConfig::addCodeGenPrepare() { void TargetPassConfig::addISelPrepare() { addPreISel(); + // Need to verify DebugInfo *before* creating the stack protector analysis. + // It's a function pass, and verifying between it and its users causes a + // crash. + if (!DisableVerify) + addPass(createDebugInfoVerifierPass()); + addPass(createStackProtectorPass(TM)); if (PrintISelInput) @@ -620,7 +628,7 @@ MachinePassRegistry RegisterRegAlloc::Registry; /// A dummy default pass factory indicates whether the register allocator is /// overridden on the command line. -static FunctionPass *useDefaultRegisterAllocator() { return 0; } +static FunctionPass *useDefaultRegisterAllocator() { return nullptr; } static RegisterRegAlloc defaultRegAlloc("default", "pick register allocator based on -O option", diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index e18d9635..eeee93a 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -66,7 +66,6 @@ // C = copy A <-- same-bank copy //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "peephole-opt" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -81,6 +80,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "peephole-opt" + // Optimize Extensions static cl::opt Aggressive("aggressive-ext-opt", cl::Hidden, @@ -183,7 +184,7 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of // SrcReg:SubIdx should be replaced. bool UseSrcSubIdx = TM->getRegisterInfo()-> - getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0; + getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr; // The source has other uses. See if we can replace the other uses with use of // the result of the extension. @@ -358,7 +359,7 @@ static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, unsigned SrcIdx, DefIdx; if (SrcSubReg && DefSubReg) return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, - SrcIdx, DefIdx) != NULL; + SrcIdx, DefIdx) != nullptr; // At most one of the register is a sub register, make it Src to avoid // duplicating the test. if (!SrcSubReg) { @@ -368,9 +369,9 @@ static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, // One of the register is a sub register, check if we can get a superclass. if (SrcSubReg) - return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != NULL; + return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr; // Plain copy. - return TRI.getCommonSubClass(DefRC, SrcRC) != NULL; + return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; } /// \brief Get the index of the definition and source for \p Copy @@ -568,7 +569,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { TM = &MF.getTarget(); TII = TM->getInstrInfo(); MRI = &MF.getRegInfo(); - DT = Aggressive ? &getAnalysis() : 0; + DT = Aggressive ? &getAnalysis() : nullptr; bool Changed = false; @@ -643,7 +644,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and // we need it for markUsesInDebugValueAsUndef(). unsigned FoldedReg = FoldAsLoadDefReg; - MachineInstr *DefMI = 0; + MachineInstr *DefMI = nullptr; MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, FoldAsLoadDefReg, DefMI); diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index a13e51f..db3933e 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -18,7 +18,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "post-RA-sched" #include "llvm/CodeGen/Passes.h" #include "AggressiveAntiDepBreaker.h" #include "AntiDepBreaker.h" @@ -47,6 +46,8 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +#define DEBUG_TYPE "post-RA-sched" + STATISTIC(NumNoops, "Number of noops inserted"); STATISTIC(NumStalls, "Number of pipeline stalls"); STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies"); @@ -205,7 +206,7 @@ SchedulePostRATDList::SchedulePostRATDList( ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ? (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) : ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ? - (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : NULL)); + (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : nullptr)); } SchedulePostRATDList::~SchedulePostRATDList() { @@ -355,7 +356,7 @@ void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) { // Reset the hazard recognizer and anti-dep breaker. HazardRec->Reset(); - if (AntiDepBreak != NULL) + if (AntiDepBreak) AntiDepBreak->StartBlock(BB); } @@ -365,7 +366,7 @@ void SchedulePostRATDList::schedule() { // Build the scheduling graph. buildSchedGraph(AA); - if (AntiDepBreak != NULL) { + if (AntiDepBreak) { unsigned Broken = AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd, EndIndex, DbgValues); @@ -397,14 +398,14 @@ void SchedulePostRATDList::schedule() { /// instruction, which will not be scheduled. /// void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) { - if (AntiDepBreak != NULL) + if (AntiDepBreak) AntiDepBreak->Observe(MI, Count, EndIndex); } /// FinishBlock - Clean up register live-range state. /// void SchedulePostRATDList::finishBlock() { - if (AntiDepBreak != NULL) + if (AntiDepBreak) AntiDepBreak->FinishBlock(); // Call the superclass. @@ -429,7 +430,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { dbgs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } #endif --SuccSU->NumPredsLeft; @@ -480,7 +481,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { void SchedulePostRATDList::emitNoop(unsigned CurCycle) { DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); HazardRec->EmitNoop(); - Sequence.push_back(0); // NULL here means noop + Sequence.push_back(nullptr); // NULL here means noop ++NumNoops; } @@ -532,7 +533,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this)); - SUnit *FoundSUnit = 0, *NotPreferredSUnit = 0; + SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr; bool HasNoopHazards = false; while (!AvailableQueue.empty()) { SUnit *CurSUnit = AvailableQueue.pop(); @@ -572,7 +573,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { AvailableQueue.push(NotPreferredSUnit); } - NotPreferredSUnit = 0; + NotPreferredSUnit = nullptr; } // Add the nodes that aren't ready back onto the available list. @@ -662,5 +663,5 @@ void SchedulePostRATDList::EmitSchedule() { BB->splice(++OrigPrivMI, BB, DbgValue); } DbgValues.clear(); - FirstDbgValue = NULL; + FirstDbgValue = nullptr; } diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 360e8d7..3129927 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "processimplicitdefs" - #include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -21,6 +19,8 @@ using namespace llvm; +#define DEBUG_TYPE "processimplicitdefs" + namespace { /// Process IMPLICIT_DEF instructions and make sure there is one implicit_def /// for each use. Add isUndef marker to implicit_def defs and their uses. diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 136b1ed..c74a42f 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pei" #include "PrologEpilogInserter.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/STLExtras.h" @@ -46,6 +45,8 @@ using namespace llvm; +#define DEBUG_TYPE "pei" + char PEI::ID = 0; char &llvm::PrologEpilogCodeInserterID = PEI::ID; @@ -114,7 +115,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs"); - RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL; + RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr; FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); // Calculate the MaxCallFrameSize and AdjustsStack variables for the @@ -243,14 +244,14 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { MachineFrameInfo *MFI = F.getFrameInfo(); // Get the callee saved register list... - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&F); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F); // These are used to keep track the callee-save area. Initialize them. MinCSFrameIndex = INT_MAX; MaxCSFrameIndex = 0; // Early exit for targets which have no callee saved registers. - if (CSRegs == 0 || CSRegs[0] == 0) + if (!CSRegs || CSRegs[0] == 0) return; // In Naked functions we aren't going to save any registers. @@ -680,7 +681,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // we've been asked for it. This, when linked with a runtime with support // for segmented stacks (libgcc is one), will result in allocating stack // space in small chunks instead of one large contiguous block. - if (Fn.getTarget().Options.EnableSegmentedStacks) + if (Fn.shouldSplitStack()) TFI.adjustForSegmentedStacks(Fn); // Emit additional code that is required to explicitly handle the stack in @@ -805,7 +806,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, // use that target machine register info object to eliminate // it. TRI.eliminateFrameIndex(MI, SPAdj, i, - FrameIndexVirtualScavenging ? NULL : RS); + FrameIndexVirtualScavenging ? nullptr : RS); // Reset the iterator if we were at the beginning of the BB. if (AtBeginning) { @@ -813,7 +814,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, DoIncr = false; } - MI = 0; + MI = nullptr; break; } @@ -845,13 +846,14 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // We might end up here again with a NULL iterator if we scavenged a // register for which we inserted spill code for definition by what was // originally the first instruction in BB. - if (I == MachineBasicBlock::iterator(NULL)) + if (I == MachineBasicBlock::iterator(nullptr)) I = BB->begin(); MachineInstr *MI = I; MachineBasicBlock::iterator J = std::next(I); MachineBasicBlock::iterator P = - I == BB->begin() ? MachineBasicBlock::iterator(NULL) : std::prev(I); + I == BB->begin() ? MachineBasicBlock::iterator(nullptr) + : std::prev(I); // RS should process this instruction before we might scavenge at this // location. This is because we might be replacing a virtual register diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index 8564911..12b2c90 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -58,13 +58,9 @@ static const char *const PSVNames[] = { "ConstantPool" }; -// FIXME: THIS IS A HACK!!!! -// Eventually these should be uniqued on LLVMContext rather than in a managed -// static. For now, we can safely use the global context for the time being to -// squeak by. -PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) : - Value(Type::getInt8PtrTy(getGlobalContext()), - Subclass) {} +PseudoSourceValue::PseudoSourceValue(bool isFixed) : isFixed(isFixed) {} + +PseudoSourceValue::~PseudoSourceValue() {} void PseudoSourceValue::printCustom(raw_ostream &O) const { O << PSVNames[this - PSVGlobals->PSVs]; diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index 33584f8..894aee7 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "RegAllocBase.h" #include "Spiller.h" #include "llvm/ADT/Statistic.h" @@ -35,6 +34,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(NumNewQueued , "Number of new live ranges queued"); // Temporary verification option until we can put verification inside @@ -110,7 +111,7 @@ void RegAllocBase::allocatePhysRegs() { if (AvailablePhysReg == ~0u) { // selectOrSplit failed to find a register! // Probably caused by an inline asm. - MachineInstr *MI = 0; + MachineInstr *MI = nullptr; for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(VirtReg->reg), E = MRI->reg_instr_end(); I != E; ) { diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index 68bd4b5..b333c36 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -65,7 +65,8 @@ protected: LiveRegMatrix *Matrix; RegisterClassInfo RegClassInfo; - RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0), Matrix(0) {} + RegAllocBase() + : TRI(nullptr), MRI(nullptr), VRM(nullptr), LIS(nullptr), Matrix(nullptr) {} virtual ~RegAllocBase() {} diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index b8c04fc..b722098 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/Passes.h" #include "AllocationOrder.h" #include "LiveDebugVariables.h" @@ -41,6 +40,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator", createBasicRegisterAllocator); @@ -93,7 +94,7 @@ public: LiveInterval *dequeue() override { if (Queue.empty()) - return 0; + return nullptr; LiveInterval *LI = Queue.top(); Queue.pop(); return LI; @@ -156,7 +157,7 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { } void RABasic::releaseMemory() { - SpillerInstance.reset(0); + SpillerInstance.reset(nullptr); } diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 8dc44f5..97b9f76 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" @@ -38,6 +37,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(NumStores, "Number of stores added"); STATISTIC(NumLoads , "Number of loads added"); STATISTIC(NumCopies, "Number of copies coalesced"); @@ -75,7 +76,7 @@ namespace { bool Dirty; // Register needs spill. explicit LiveReg(unsigned v) - : LastUse(0), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false) {} + : LastUse(nullptr), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false){} unsigned getSparseSetIndex() const { return TargetRegisterInfo::virtReg2Index(VirtReg); @@ -319,7 +320,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, // now. LRIDbgValues.clear(); if (SpillKill) - LR.LastUse = 0; // Don't kill register again + LR.LastUse = nullptr; // Don't kill register again } killVirtReg(LRI); } diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 6a623b8..aa7c178 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/Passes.h" #include "AllocationOrder.h" #include "InterferenceCache.h" @@ -37,7 +36,9 @@ #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/PassAnalysisSupport.h" +#include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -47,6 +48,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(NumGlobalSplits, "Number of split global live ranges"); STATISTIC(NumLocalSplits, "Number of split local live ranges"); STATISTIC(NumEvicted, "Number of interferences evicted"); @@ -71,6 +74,11 @@ static cl::opt LastChanceRecoloringMaxInterference( " interference at a time"), cl::init(8)); +static cl::opt +ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, + cl::desc("Exhaustive Search for registers bypassing the depth " + "and interference cutoffs of last chance recoloring")); + // FIXME: Find a good default for this flag and remove the flag. static cl::opt CSRFirstTimeCost("regalloc-csr-first-time-cost", @@ -147,6 +155,22 @@ class RAGreedy : public MachineFunctionPass, RS_Done }; + // Enum CutOffStage to keep a track whether the register allocation failed + // because of the cutoffs encountered in last chance recoloring. + // Note: This is used as bitmask. New value should be next power of 2. + enum CutOffStage { + // No cutoffs encountered + CO_None = 0, + + // lcr-max-depth cutoff encountered + CO_Depth = 1, + + // lcr-max-interf cutoff encountered + CO_Interf = 2 + }; + + uint8_t CutOffInfo; + #ifndef NDEBUG static const char *const StageName[]; #endif @@ -258,6 +282,9 @@ class RAGreedy : public MachineFunctionPass, /// NoCand which indicates the stack interval. SmallVector BundleCand; + /// Callee-save register cost, calculated once per machine function. + BlockFrequency CSRCost; + public: RAGreedy(); @@ -326,6 +353,7 @@ private: unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, unsigned PhysReg, unsigned &CostPerUseLimit, SmallVectorImpl &NewVRegs); + void initializeCSRCost(); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, @@ -447,7 +475,7 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { } void RAGreedy::releaseMemory() { - SpillerInstance.reset(0); + SpillerInstance.reset(nullptr); ExtraRegInfo.clear(); GlobalCand.clear(); } @@ -514,7 +542,7 @@ LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { if (CurQueue.empty()) - return 0; + return nullptr; LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); CurQueue.pop(); return LI; @@ -1910,8 +1938,9 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, // If there is LastChanceRecoloringMaxInterference or more interferences, // chances are one would not be recolorable. if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= - LastChanceRecoloringMaxInterference) { + LastChanceRecoloringMaxInterference && !ExhaustiveSearch) { DEBUG(dbgs() << "Early abort: too many interferences.\n"); + CutOffInfo |= CO_Interf; return false; } for (unsigned i = Q.interferingVRegs().size(); i; --i) { @@ -1982,8 +2011,9 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, // We may want to reconsider that if we end up with a too large search space // for target with hundreds of registers. // Indeed, in that case we may want to cut the search space earlier. - if (Depth >= LastChanceRecoloringMaxDepth) { + if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) { DEBUG(dbgs() << "Abort because max depth has been reached.\n"); + CutOffInfo |= CO_Depth; return ~0u; } @@ -2108,8 +2138,26 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl &NewVRegs) { + CutOffInfo = CO_None; + LLVMContext &Ctx = MF->getFunction()->getContext(); SmallVirtRegSet FixedRegisters; - return selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); + unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); + if (Reg == ~0U && (CutOffInfo != CO_None)) { + uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); + if (CutOffEncountered == CO_Depth) + Ctx.emitError("register allocation failed: maximum depth for recoloring " + "reached. Use -fexhaustive-register-search to skip " + "cutoffs"); + else if (CutOffEncountered == CO_Interf) + Ctx.emitError("register allocation failed: maximum interference for " + "recoloring reached. Use -fexhaustive-register-search " + "to skip cutoffs"); + else if (CutOffEncountered == (CO_Depth | CO_Interf)) + Ctx.emitError("register allocation failed: maximum interference and " + "depth for recoloring reached. Use " + "-fexhaustive-register-search to skip cutoffs"); + } + return Reg; } /// Using a CSR for the first time has a cost because it causes push|pop @@ -2123,10 +2171,6 @@ unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, unsigned PhysReg, unsigned &CostPerUseLimit, SmallVectorImpl &NewVRegs) { - // We use the larger one out of the command-line option and the value report - // by TRI. - BlockFrequency CSRCost(std::max((unsigned)CSRFirstTimeCost, - TRI->getCSRFirstUseCost())); if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { // We choose spill over using the CSR for the first time if the spill cost // is lower than CSRCost. @@ -2144,9 +2188,9 @@ unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, // the cost of splitting is lower than CSRCost. SA->analyze(&VirtReg); unsigned NumCands = 0; - unsigned BestCand = - calculateRegionSplitCost(VirtReg, Order, CSRCost, NumCands, - true/*IgnoreCSR*/); + BlockFrequency BestCost = CSRCost; // Don't modify CSRCost. + unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, + NumCands, true /*IgnoreCSR*/); if (BestCand == NoCand) // Use the CSR if we can't find a region split below CSRCost. return PhysReg; @@ -2158,6 +2202,31 @@ unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, return PhysReg; } +void RAGreedy::initializeCSRCost() { + // We use the larger one out of the command-line option and the value report + // by TRI. + CSRCost = BlockFrequency( + std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); + if (!CSRCost.getFrequency()) + return; + + // Raw cost is relative to Entry == 2^14; scale it appropriately. + uint64_t ActualEntry = MBFI->getEntryFreq(); + if (!ActualEntry) { + CSRCost = 0; + return; + } + uint64_t FixedEntry = 1 << 14; + if (ActualEntry < FixedEntry) + CSRCost *= BranchProbability(ActualEntry, FixedEntry); + else if (ActualEntry <= UINT32_MAX) + // Invert the fraction and divide. + CSRCost /= BranchProbability(FixedEntry, ActualEntry); + else + // Can't use BranchProbability in general, since it takes 32-bit numbers. + CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); +} + unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, SmallVectorImpl &NewVRegs, SmallVirtRegSet &FixedRegisters, @@ -2175,8 +2244,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, // When NewVRegs is not empty, we may have made decisions such as evicting // a virtual register, go with the earlier decisions and use the physical // register. - if ((CSRFirstTimeCost || TRI->getCSRFirstUseCost()) && - CSRFirstUse && NewVRegs.empty()) { + if (CSRCost.getFrequency() && CSRFirstUse && NewVRegs.empty()) { unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, CostPerUseLimit, NewVRegs); if (CSRReg || !NewVRegs.empty()) @@ -2258,6 +2326,8 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { SpillPlacer = &getAnalysis(); DebugVars = &getAnalysis(); + initializeCSRCost(); + calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI); DEBUG(LIS->dump()); diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 96dbd9a..b8d2325 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -29,8 +29,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" - #include "llvm/CodeGen/RegAllocPBQP.h" #include "RegisterCoalescer.h" #include "Spiller.h" @@ -48,6 +46,7 @@ #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -59,6 +58,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + static RegisterRegAlloc registerPBQPRepAlloc("pbqp", "PBQP register allocator", createDefaultPBQPRegisterAllocator); @@ -87,7 +88,7 @@ public: static char ID; /// Construct a PBQP register allocator. - RegAllocPBQP(std::unique_ptr &b, char *cPassID=0) + RegAllocPBQP(std::unique_ptr &b, char *cPassID=nullptr) : MachineFunctionPass(ID), builder(b.release()), customPassID(cPassID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); @@ -215,7 +216,7 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, // Compute an initial allowed set for the current vreg. typedef std::vector VRAllowed; VRAllowed vrAllowed; - ArrayRef rawOrder = trc->getRawAllocationOrder(*mf); + ArrayRef rawOrder = trc->getRawAllocationOrder(*mf); for (unsigned i = 0; i != rawOrder.size(); ++i) { unsigned preg = rawOrder[i]; if (mri->isReserved(preg)) @@ -320,17 +321,9 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, // Scan the machine function and add a coalescing cost whenever CoalescerPair // gives the Ok. - for (MachineFunction::const_iterator mbbItr = mf->begin(), - mbbEnd = mf->end(); - mbbItr != mbbEnd; ++mbbItr) { - const MachineBasicBlock *mbb = &*mbbItr; - - for (MachineBasicBlock::const_iterator miItr = mbb->begin(), - miEnd = mbb->end(); - miItr != miEnd; ++miItr) { - const MachineInstr *mi = &*miItr; - - if (!cp.setRegisters(mi)) { + for (const auto &mbb : *mf) { + for (const auto &mi : mbb) { + if (!cp.setRegisters(&mi)) { continue; // Not coalescable. } @@ -345,7 +338,7 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, // value plucked randomly out of the air. PBQP::PBQPNum cBenefit = - copyFactor * LiveIntervals::getSpillWeight(false, true, mbfi, mi); + copyFactor * LiveIntervals::getSpillWeight(false, true, mbfi, &mi); if (cp.isPhys()) { if (!mf->getRegInfo().isAllocatable(dst)) { diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index aa84446..8b5445c 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -25,12 +24,14 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + static cl::opt StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"), cl::desc("Limit all regclasses to N registers")); -RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(0), TRI(0), CalleeSaved(0) -{} +RegisterClassInfo::RegisterClassInfo() + : Tag(0), MF(nullptr), TRI(nullptr), CalleeSaved(nullptr) {} void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { bool Update = false; @@ -151,7 +152,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { /// nonoverlapping reserved registers. However, computing the allocation order /// for all register classes would be too expensive. unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const { - const TargetRegisterClass *RC = 0; + const TargetRegisterClass *RC = nullptr; unsigned NumRCUnits = 0; for (TargetRegisterInfo::regclass_iterator RI = TRI->regclass_begin(), RE = TRI->regclass_end(); RI != RE; ++RI) { diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 682c26c..5aaeb87 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "RegisterCoalescer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" @@ -42,6 +41,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(numJoins , "Number of interval joins performed"); STATISTIC(numCrossRCs , "Number of cross class joins performed"); STATISTIC(numCommutes , "Number of instruction commuting performed"); @@ -195,7 +196,7 @@ namespace { bool runOnMachineFunction(MachineFunction&) override; /// print - Implement the dump method. - void print(raw_ostream &O, const Module* = 0) const override; + void print(raw_ostream &O, const Module* = nullptr) const override; }; } /// end anonymous namespace @@ -240,9 +241,8 @@ static bool isSplitEdge(const MachineBasicBlock *MBB) { if (MBB->pred_size() != 1 || MBB->succ_size() != 1) return false; - for (MachineBasicBlock::const_iterator MII = MBB->begin(), E = MBB->end(); - MII != E; ++MII) { - if (!MII->isCopyLike() && !MII->isUnconditionalBranch()) + for (const auto &MI : *MBB) { + if (!MI.isCopyLike() && !MI.isUnconditionalBranch()) return false; } return true; @@ -251,7 +251,7 @@ static bool isSplitEdge(const MachineBasicBlock *MBB) { bool CoalescerPair::setRegisters(const MachineInstr *MI) { SrcReg = DstReg = 0; SrcIdx = DstIdx = 0; - NewRC = 0; + NewRC = nullptr; Flipped = CrossClass = false; unsigned Src, Dst, SrcSub, DstSub; @@ -397,7 +397,8 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { void RegisterCoalescer::eliminateDeadDefs() { SmallVector NewRegs; - LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs); + LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, + nullptr, this).eliminateDeadDefs(DeadDefs); } // Callback from eliminateDeadDefs(). @@ -844,6 +845,27 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/)); + // Record small dead def live-ranges for all the subregisters + // of the destination register. + // Otherwise, variables that live through may miss some + // interferences, thus creating invalid allocation. + // E.g., i386 code: + // vreg1 = somedef ; vreg1 GR8 + // vreg2 = remat ; vreg2 GR32 + // CL = COPY vreg2.sub_8bit + // = somedef vreg1 ; vreg1 GR8 + // => + // vreg1 = somedef ; vreg1 GR8 + // ECX = remat ; CL + // = somedef vreg1 ; vreg1 GR8 + // vreg1 will see the inteferences with CL but not with CH since + // no live-ranges would have been created for ECX. + // Fix that! + SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); + for (MCRegUnitIterator Units(NewMI->getOperand(0).getReg(), TRI); + Units.isValid(); ++Units) + if (LiveRange *LR = LIS->getCachedRegUnit(*Units)) + LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); } if (NewMI->getOperand(0).getSubReg()) @@ -902,7 +924,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, // No intervals are live-in to CopyMI - it is undef. if (CP.isFlipped()) DstInt = SrcInt; - SrcInt = 0; + SrcInt = nullptr; VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot()); assert(DeadVNI && "No value defined in DstInt"); @@ -931,7 +953,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx) { bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg); + LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); SmallPtrSet Visited; for (MachineRegisterInfo::reg_instr_iterator @@ -1355,7 +1377,7 @@ class JoinVals { bool PrunedComputed; Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0), - RedefVNI(0), OtherVNI(0), ErasableImplicitDef(false), + RedefVNI(nullptr), OtherVNI(nullptr), ErasableImplicitDef(false), Pruned(false), PrunedComputed(false) {} bool isAnalyzed() const { return WriteLanes != 0; } @@ -1461,7 +1483,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { } // Get the instruction defining this value, compute the lanes written. - const MachineInstr *DefMI = 0; + const MachineInstr *DefMI = nullptr; if (VNI->isPHIDef()) { // Conservatively assume that all lanes in a PHI are valid. V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx); @@ -2085,14 +2107,14 @@ copyCoalesceWorkList(MutableArrayRef CurrList) { // Skip instruction pointers that have already been erased, for example by // dead code elimination. if (ErasedInstrs.erase(CurrList[i])) { - CurrList[i] = 0; + CurrList[i] = nullptr; continue; } bool Again = false; bool Success = joinCopy(CurrList[i], Again); Progress |= Success; if (Success || !Again) - CurrList[i] = 0; + CurrList[i] = nullptr; } return Progress; } @@ -2132,7 +2154,7 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { CurrList(WorkList.begin() + PrevSize, WorkList.end()); if (copyCoalesceWorkList(CurrList)) WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(), - (MachineInstr*)0), WorkList.end()); + (MachineInstr*)nullptr), WorkList.end()); } void RegisterCoalescer::coalesceLocals() { diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h index 47c3df1..e57ceab 100644 --- a/lib/CodeGen/RegisterCoalescer.h +++ b/lib/CodeGen/RegisterCoalescer.h @@ -61,14 +61,14 @@ namespace llvm { public: CoalescerPair(const TargetRegisterInfo &tri) : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0), - Partial(false), CrossClass(false), Flipped(false), NewRC(0) {} + Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {} /// Create a CoalescerPair representing a virtreg-to-physreg copy. /// No need to call setRegisters(). CoalescerPair(unsigned VirtReg, unsigned PhysReg, const TargetRegisterInfo &tri) : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0), - Partial(false), CrossClass(false), Flipped(false), NewRC(0) {} + Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {} /// setRegisters - set registers to match the copy instruction MI. Return /// false if MI is not a coalescable copy instruction. diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 97817da..b2909e0 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -154,8 +154,8 @@ const LiveRange *RegPressureTracker::getLiveRange(unsigned Reg) const { } void RegPressureTracker::reset() { - MBB = 0; - LIS = 0; + MBB = nullptr; + LIS = nullptr; CurrSetPressure.clear(); LiveThruPressure.clear(); diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index bfd26dc..72b6285 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "reg-scavenging" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -29,6 +28,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "reg-scavenging" + /// setUsed - Set the register and its sub-registers as being used. void RegScavenger::setUsed(unsigned Reg) { for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); @@ -47,7 +48,7 @@ void RegScavenger::initRegState() { for (SmallVectorImpl::iterator I = Scavenged.begin(), IE = Scavenged.end(); I != IE; ++I) { I->Reg = 0; - I->Restore = NULL; + I->Restore = nullptr; } // All registers started out unused. @@ -91,8 +92,8 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { // Create callee-saved registers bitvector. CalleeSavedRegs.resize(NumPhysRegs); - const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF); - if (CSRegs != NULL) + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + if (CSRegs != nullptr) for (unsigned i = 0; CSRegs[i]; ++i) CalleeSavedRegs.set(CSRegs[i]); } @@ -162,7 +163,7 @@ void RegScavenger::unprocess() { } if (MBBI == MBB->begin()) { - MBBI = MachineBasicBlock::iterator(NULL); + MBBI = MachineBasicBlock::iterator(nullptr); Tracking = false; } else --MBBI; @@ -187,7 +188,7 @@ void RegScavenger::forward() { continue; I->Reg = 0; - I->Restore = NULL; + I->Restore = nullptr; } if (MI->isDebugValue()) @@ -223,7 +224,7 @@ void RegScavenger::forward() { break; } if (!SubUsed) { - MBB->getParent()->verify(NULL, "In Register Scavenger"); + MBB->getParent()->verify(nullptr, "In Register Scavenger"); llvm_unreachable("Using an undefined register!"); } (void)SubUsed; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index d08eb65..6a2a080 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pre-RA-sched" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -25,6 +24,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "pre-RA-sched" + #ifndef NDEBUG static cl::opt StressSchedOpt( "stress-sched", cl::Hidden, cl::init(false), @@ -55,7 +56,7 @@ void ScheduleDAG::clearDAG() { /// getInstrDesc helper to handle SDNodes. const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { - if (!Node || !Node->isMachineOpcode()) return NULL; + if (!Node || !Node->isMachineOpcode()) return nullptr; return &TII->get(Node->getMachineOpcode()); } diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index c8328ad..92a9a30 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "misched" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" @@ -41,18 +40,14 @@ using namespace llvm; +#define DEBUG_TYPE "misched" + static cl::opt EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Enable use of AA during MI GAD construction")); -// FIXME: Enable the use of TBAA. There are two known issues preventing this: -// 1. Stack coloring does not update TBAA when merging allocas -// 2. CGP inserts ptrtoint/inttoptr pairs when sinking address computations. -// Because BasicAA does not handle inttoptr, we'll often miss basic type -// punning idioms that we need to catch so we don't miscompile real-world -// code. static cl::opt UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, - cl::init(false), cl::desc("Enable use of TBAA during MI GAD construction")); + cl::init(true), cl::desc("Enable use of TBAA during MI GAD construction")); ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, @@ -62,7 +57,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, LiveIntervals *lis) : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis), IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags), - CanHandleTerminators(false), FirstDbgValue(0) { + CanHandleTerminators(false), FirstDbgValue(nullptr) { assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); DbgValues.clear(); assert(!(IsPostRA && MRI.getNumVirtRegs()) && @@ -104,7 +99,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. static void getUnderlyingObjects(const Value *V, SmallVectorImpl &Objects) { - SmallPtrSet Visited; + SmallPtrSet Visited; SmallVector Working(1, V); do { V = Working.pop_back_val(); @@ -130,7 +125,8 @@ static void getUnderlyingObjects(const Value *V, } while (!Working.empty()); } -typedef SmallVector, 4> +typedef PointerUnion ValueType; +typedef SmallVector, 4> UnderlyingObjectsVector; /// getUnderlyingObjectsForInstr - If this machine instr has memory reference @@ -140,25 +136,27 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo *MFI, UnderlyingObjectsVector &Objects) { if (!MI->hasOneMemOperand() || - !(*MI->memoperands_begin())->getValue() || + (!(*MI->memoperands_begin())->getValue() && + !(*MI->memoperands_begin())->getPseudoValue()) || (*MI->memoperands_begin())->isVolatile()) return; - const Value *V = (*MI->memoperands_begin())->getValue(); - if (!V) - return; - - if (const PseudoSourceValue *PSV = dyn_cast(V)) { + if (const PseudoSourceValue *PSV = + (*MI->memoperands_begin())->getPseudoValue()) { // For now, ignore PseudoSourceValues which may alias LLVM IR values // because the code that uses this function has no way to cope with // such aliases. if (!PSV->isAliased(MFI)) { bool MayAlias = PSV->mayAlias(MFI); - Objects.push_back(UnderlyingObjectsVector::value_type(V, MayAlias)); + Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias)); } return; } + const Value *V = (*MI->memoperands_begin())->getValue(); + if (!V) + return; + SmallVector Objs; getUnderlyingObjects(V, Objs); @@ -166,8 +164,6 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, I != IE; ++I) { V = *I; - assert(!isa(V) && "Underlying value is a stack slot!"); - if (!isIdentifiedObject(V)) { Objects.clear(); return; @@ -183,7 +179,7 @@ void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { void ScheduleDAGInstrs::finishBlock() { // Subclasses should no longer refer to the old block. - BB = 0; + BB = nullptr; } /// Initialize the DAG and common scheduler state for the current scheduling @@ -215,7 +211,7 @@ void ScheduleDAGInstrs::exitRegion() { /// are too high to be hidden by the branch or when the liveout registers /// used by instructions in the fallthrough block. void ScheduleDAGInstrs::addSchedBarrierDeps() { - MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0; + MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr; ExitSU.setInstr(ExitMI); bool AllDepKnown = ExitMI && (ExitMI->isCall() || ExitMI->isBarrier()); @@ -272,7 +268,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { // Adjust the dependence latency using operand def/use information, // then allow the target to perform its own adjustments. int UseOp = I->OpIdx; - MachineInstr *RegUse = 0; + MachineInstr *RegUse = nullptr; SDep Dep; if (UseOp < 0) Dep = SDep(SU, SDep::Artificial); @@ -483,6 +479,15 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, if ((*MI->memoperands_begin())->isVolatile() || MI->hasUnmodeledSideEffects()) return true; + + if ((*MI->memoperands_begin())->getPseudoValue()) { + // Similarly to getUnderlyingObjectForInstr: + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. + return true; + } + const Value *V = (*MI->memoperands_begin())->getValue(); if (!V) return true; @@ -491,19 +496,8 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, getUnderlyingObjects(V, Objs); for (SmallVectorImpl::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { - V = *I; - - if (const PseudoSourceValue *PSV = dyn_cast(V)) { - // Similarly to getUnderlyingObjectForInstr: - // For now, ignore PseudoSourceValues which may alias LLVM IR values - // because the code that uses this function has no way to cope with - // such aliases. - if (PSV->isAliased(MFI)) - return true; - } - // Does this pointer refer to a distinct and identifiable object? - if (!isIdentifiedObject(V)) + if (!isIdentifiedObject(*I)) return true; } @@ -541,6 +535,9 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, MachineMemOperand *MMOa = *MIa->memoperands_begin(); MachineMemOperand *MMOb = *MIb->memoperands_begin(); + if (!MMOa->getValue() || !MMOb->getValue()) + return true; + // The following interface to AA is fashioned after DAGCombiner::isAlias // and operates with MachineMemOperand offset with some important // assumptions: @@ -566,9 +563,9 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, AliasAnalysis::AliasResult AAResult = AA->alias( AliasAnalysis::Location(MMOa->getValue(), Overlapa, - UseTBAA ? MMOa->getTBAAInfo() : 0), + UseTBAA ? MMOa->getTBAAInfo() : nullptr), AliasAnalysis::Location(MMOb->getValue(), Overlapb, - UseTBAA ? MMOb->getTBAAInfo() : 0)); + UseTBAA ? MMOb->getTBAAInfo() : nullptr)); return (AAResult != AliasAnalysis::NoAlias); } @@ -703,10 +700,14 @@ void ScheduleDAGInstrs::initSUnits() { // Assign the Latency field of SU using target-provided information. SU->Latency = SchedModel.computeInstrLatency(SU->getInstr()); - // If this SUnit uses an unbuffered resource, mark it as such. - // These resources are used for in-order execution pipelines within an - // out-of-order core and are identified by BufferSize=1. BufferSize=0 is - // used for dispatch/issue groups and is not considered here. + // If this SUnit uses a reserved or unbuffered resource, mark it as such. + // + // Reserved resources block an instruction from issuing and stall the + // entire pipeline. These are identified by BufferSize=0. + // + // Unbuffered resources prevent execution of subsequent instructions that + // require the same resources. This is used for in-order execution pipelines + // within an out-of-order core. These are identified by BufferSize=1. if (SchedModel.hasInstrSchedModel()) { const MCSchedClassDesc *SC = getSchedClass(SU); for (TargetSchedModel::ProcResIter @@ -736,7 +737,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, const TargetSubtargetInfo &ST = TM.getSubtarget(); bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI : ST.useAA(); - AliasAnalysis *AAForDep = UseAA ? AA : 0; + AliasAnalysis *AAForDep = UseAA ? AA : nullptr; MISUnitMap.clear(); ScheduleDAG::clearDAG(); @@ -751,20 +752,20 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // to top. // Remember where a generic side-effecting instruction is as we procede. - SUnit *BarrierChain = 0, *AliasChain = 0; + SUnit *BarrierChain = nullptr, *AliasChain = nullptr; // Memory references to specific known memory locations are tracked // so that they can be given more precise dependencies. We track // separately the known memory locations that may alias and those // that are known not to alias - MapVector > AliasMemDefs, NonAliasMemDefs; - MapVector > AliasMemUses, NonAliasMemUses; + MapVector > AliasMemDefs, NonAliasMemDefs; + MapVector > AliasMemUses, NonAliasMemUses; std::set RejectMemNodes; // Remove any stale debug info; sometimes BuildSchedGraph is called again // without emitting the info from the previous call. DbgValues.clear(); - FirstDbgValue = NULL; + FirstDbgValue = nullptr; assert(Defs.empty() && Uses.empty() && "Only BuildGraph should update Defs/Uses"); @@ -781,13 +782,13 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, addSchedBarrierDeps(); // Walk the list of instructions, from bottom moving up. - MachineInstr *DbgMI = NULL; + MachineInstr *DbgMI = nullptr; for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; MII != MIE; --MII) { MachineInstr *MI = std::prev(MII); if (MI && DbgMI) { DbgValues.push_back(std::make_pair(DbgMI, MI)); - DbgMI = NULL; + DbgMI = nullptr; } if (MI->isDebugValue()) { @@ -798,8 +799,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, assert(SU && "No SUnit mapped to this MI"); if (RPTracker) { - PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : 0; - RPTracker->recede(/*LiveUses=*/0, PDiff); + PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : nullptr; + RPTracker->recede(/*LiveUses=*/nullptr, PDiff); assert(RPTracker->getPos() == std::prev(MII) && "RPTracker can't find MI"); } @@ -854,13 +855,13 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (isGlobalMemoryObject(AA, MI)) { // Be conservative with these and add dependencies on all memory // references, even those that are known to not alias. - for (MapVector >::iterator I = + for (MapVector >::iterator I = NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) { I->second[i]->addPred(SDep(SU, SDep::Barrier)); } } - for (MapVector >::iterator I = + for (MapVector >::iterator I = NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) { SDep Dep(SU, SDep::Barrier); @@ -894,12 +895,12 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); - for (MapVector >::iterator I = + for (MapVector >::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes); } - for (MapVector >::iterator I = + for (MapVector >::iterator I = AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes, @@ -922,7 +923,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, bool MayAlias = false; for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end(); K != KE; ++K) { - const Value *V = K->getPointer(); + ValueType V = K->getPointer(); bool ThisMayAlias = K->getInt(); if (ThisMayAlias) MayAlias = true; @@ -930,9 +931,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // A store to a specific PseudoSourceValue. Add precise dependencies. // Record the def in MemDefs, first adding a dep if there is // an existing def. - MapVector >::iterator I = + MapVector >::iterator I = ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); - MapVector >::iterator IE = + MapVector >::iterator IE = ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) @@ -955,9 +956,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, } } // Handle the uses in MemUses, if there are any. - MapVector >::iterator J = + MapVector >::iterator J = ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); - MapVector >::iterator JE = + MapVector >::iterator JE = ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) @@ -986,11 +987,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // we have lost all RejectMemNodes below barrier. if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Barrier)); - - if (!ExitSU.isPred(SU)) - // Push store's up a bit to avoid them getting in between cmp - // and branches. - ExitSU.addPred(SDep(SU, SDep::Artificial)); } else if (MI->mayLoad()) { bool MayAlias = true; if (MI->isInvariantLoad(AA)) { @@ -1002,7 +998,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (Objs.empty()) { // A load with no underlying object. Depend on all // potentially aliasing stores. - for (MapVector >::iterator I = + for (MapVector >::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) for (unsigned i = 0, e = I->second.size(); i != e; ++i) addChainDependency(AAForDep, MFI, SU, I->second[i], @@ -1016,16 +1012,16 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, for (UnderlyingObjectsVector::iterator J = Objs.begin(), JE = Objs.end(); J != JE; ++J) { - const Value *V = J->getPointer(); + ValueType V = J->getPointer(); bool ThisMayAlias = J->getInt(); if (ThisMayAlias) MayAlias = true; // A load from a specific PseudoSourceValue. Add precise dependencies. - MapVector >::iterator I = + MapVector >::iterator I = ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); - MapVector >::iterator IE = + MapVector >::iterator IE = ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) for (unsigned i = 0, e = I->second.size(); i != e; ++i) @@ -1429,7 +1425,7 @@ public: const SDep *backtrack() { DFSStack.pop_back(); - return DFSStack.empty() ? 0 : std::prev(DFSStack.back().second); + return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second); } const SUnit *getCurr() const { return DFSStack.back().first; } diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index 2cd84d6..004c685 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/MC/MCInstrItineraries.h" @@ -24,6 +23,8 @@ using namespace llvm; +#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType + #ifndef NDEBUG const char *ScoreboardHazardRecognizer::DebugType = ""; #endif @@ -126,7 +127,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { // free FU's in the scoreboard at the appropriate future cycles. const MCInstrDesc *MCID = DAG->getInstrDesc(SU); - if (MCID == NULL) { + if (!MCID) { // Don't check hazards for non-machineinstr Nodes. return NoHazard; } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cc0c5fa..2d2fd53 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dagcombine" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -40,6 +39,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "dagcombine" + STATISTIC(NodesCombined , "Number of dag nodes combined"); STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); @@ -56,14 +57,8 @@ namespace { CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Enable DAG combiner's use of IR alias analysis")); -// FIXME: Enable the use of TBAA. There are two known issues preventing this: -// 1. Stack coloring does not update TBAA when merging allocas -// 2. CGP inserts ptrtoint/inttoptr pairs when sinking address computations. -// Because BasicAA does not handle inttoptr, we'll often miss basic type -// punning idioms that we need to catch so we don't miscompile real-world -// code. static cl::opt - UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(false), + UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), cl::desc("Enable DAG combiner's use of TBAA")); #ifndef NDEBUG @@ -120,9 +115,8 @@ namespace { /// now. /// void AddUsersToWorkList(SDNode *N) { - for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); - UI != UE; ++UI) - AddToWorkList(*UI); + for (SDNode *Node : N->uses()) + AddToWorkList(Node); } /// visit - call the node-specific routine that knows how to fold each @@ -173,6 +167,7 @@ namespace { bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); + SDValue SplitIndexingFromLoad(LoadSDNode *LD); bool SliceUpLoad(SDNode *N); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); @@ -324,26 +319,7 @@ namespace { /// isAlias - Return true if there is any possibility that the two addresses /// overlap. - bool isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, - const Value *SrcValue1, int SrcValueOffset1, - unsigned SrcValueAlign1, - const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, bool IsVolatile2, - const Value *SrcValue2, int SrcValueOffset2, - unsigned SrcValueAlign2, - const MDNode *TBAAInfo2) const; - - /// isAlias - Return true if there is any possibility that the two addresses - /// overlap. - bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1); - - /// FindAliasInfo - Extracts the relevant alias information from the memory - /// node. Returns true if the operand was a load. - bool FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, bool &IsVolatile, - const Value *&SrcValue, int &SrcValueOffset, - unsigned &SrcValueAlignment, - const MDNode *&TBAAInfo) const; + bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, /// looking for a better chain (aliasing node.) @@ -660,7 +636,7 @@ static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) { BuildVectorSDNode *BV = dyn_cast(N); if(BV && BV->isConstant()) return BV; - return NULL; + return nullptr; } // \brief Returns the SDNode if it is a constant splat BuildVector or constant @@ -669,8 +645,13 @@ static ConstantSDNode *isConstOrConstSplat(SDValue N) { if (ConstantSDNode *CN = dyn_cast(N)) return CN; - if (BuildVectorSDNode *BV = dyn_cast(N)) - return BV->getConstantSplatValue(); + if (BuildVectorSDNode *BV = dyn_cast(N)) { + ConstantSDNode *CN = BV->getConstantSplatValue(); + + // BuildVectors can truncate their operands. Ignore that case here. + if (CN && CN->getValueType(0) == N.getValueType().getScalarType()) + return CN; + } return nullptr; } @@ -781,10 +762,14 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // If the operands of this node are only used by the node, they will now // be dead. Make sure to visit them first to delete dead nodes early. - for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) - if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) - AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); - + for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) { + SDNode *Op = TLO.Old.getNode()->getOperand(i).getNode(); + // For an operand generating multiple values, one of the values may + // become dead allowing further simplification (e.g. split index + // arithmetic from an indexed load). + if (Op->hasOneUse() || Op->getNumValues() > 1) + AddToWorkList(Op); + } DAG.DeleteNode(TLO.Old.getNode()); } } @@ -876,7 +861,7 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { SDLoc dl(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); - if (NewOp.getNode() == 0) + if (!NewOp.getNode()) return SDValue(); AddToWorkList(NewOp.getNode()); @@ -891,7 +876,7 @@ SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { SDLoc dl(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); - if (NewOp.getNode() == 0) + if (!NewOp.getNode()) return SDValue(); AddToWorkList(NewOp.getNode()); @@ -926,7 +911,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { bool Replace0 = false; SDValue N0 = Op.getOperand(0); SDValue NN0 = PromoteOperand(N0, PVT, Replace0); - if (NN0.getNode() == 0) + if (!NN0.getNode()) return SDValue(); bool Replace1 = false; @@ -936,7 +921,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { NN1 = NN0; else { NN1 = PromoteOperand(N1, PVT, Replace1); - if (NN1.getNode() == 0) + if (!NN1.getNode()) return SDValue(); } @@ -989,7 +974,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); else N0 = PromoteOperand(N0, PVT, Replace); - if (N0.getNode() == 0) + if (!N0.getNode()) return SDValue(); AddToWorkList(N0.getNode()); @@ -1134,7 +1119,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { SDValue RV = combine(N); - if (RV.getNode() == 0) + if (!RV.getNode()) continue; ++NodesCombined; @@ -1282,7 +1267,7 @@ SDValue DAGCombiner::combine(SDNode *N) { SDValue RV = visit(N); // If nothing happened, try a target-specific DAG combine. - if (RV.getNode() == 0) { + if (!RV.getNode()) { assert(N->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned NULL!"); @@ -1298,7 +1283,7 @@ SDValue DAGCombiner::combine(SDNode *N) { } // If nothing happened still, try promoting the operation. - if (RV.getNode() == 0) { + if (!RV.getNode()) { switch (N->getOpcode()) { default: break; case ISD::ADD: @@ -1328,8 +1313,7 @@ SDValue DAGCombiner::combine(SDNode *N) { // If N is a commutative binary node, try commuting it to enable more // sdisel CSE. - if (RV.getNode() == 0 && - SelectionDAG::isCommutativeBinOp(N->getOpcode()) && + if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && N->getNumValues() == 1) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1338,7 +1322,7 @@ SDValue DAGCombiner::combine(SDNode *N) { if (isa(N0) || !isa(N1)) { SDValue Ops[] = { N1, N0 }; SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), - Ops, 2); + Ops); if (CSENode) return SDValue(CSENode, 0); } @@ -1428,8 +1412,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Result = DAG.getEntryNode(); } else { // New and improved token factor. - Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), - MVT::Other, &Ops[0], Ops.size()); + Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); } // Don't add users to work list. @@ -1528,7 +1511,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { N0.getOperand(1)); // reassociate add SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1); - if (RADD.getNode() != 0) + if (RADD.getNode()) return RADD; // fold ((0-A) + B) -> B-A if (N0.getOpcode() == ISD::SUB && isa(N0.getOperand(0)) && @@ -1581,10 +1564,10 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (VT.isInteger() && !VT.isVector()) { APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); + DAG.computeKnownBits(N0, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { - DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); + DAG.computeKnownBits(N1, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. @@ -1676,10 +1659,10 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. APInt LHSZero, LHSOne; APInt RHSZero, RHSOne; - DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); + DAG.computeKnownBits(N0, LHSZero, LHSOne); if (LHSZero.getBoolValue()) { - DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); + DAG.computeKnownBits(N1, RHSZero, RHSOne); // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. @@ -1728,7 +1711,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N0C = dyn_cast(N0.getNode()); ConstantSDNode *N1C = dyn_cast(N1.getNode()); - ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 : + ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : dyn_cast(N1.getOperand(1).getNode()); EVT VT = N0.getValueType(); @@ -1881,10 +1864,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); } else { - N0IsConst = dyn_cast(N0) != 0; + N0IsConst = dyn_cast(N0) != nullptr; ConstValue0 = N0IsConst ? (dyn_cast(N0))->getAPIntValue() : APInt(); - N1IsConst = dyn_cast(N1) != 0; + N1IsConst = dyn_cast(N1) != nullptr; ConstValue1 = N1IsConst ? (dyn_cast(N1))->getAPIntValue() : APInt(); } @@ -1942,7 +1925,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one // use. { - SDValue Sh(0,0), Y(0,0); + SDValue Sh(nullptr,0), Y(nullptr,0); // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). if (N0.getOpcode() == ISD::SHL && (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || @@ -1975,7 +1958,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // reassociate mul SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1); - if (RMUL.getNode() != 0) + if (RMUL.getNode()) return RMUL; return SDValue(); @@ -1984,8 +1967,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0.getNode()); - ConstantSDNode *N1C = dyn_cast(N1.getNode()); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold vector ops @@ -2011,10 +1994,10 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), N0, N1); } + // fold (sdiv X, pow2) -> simple ops after legalize - if (N1C && !N1C->isNullValue() && - (N1C->getAPIntValue().isPowerOf2() || - (-N1C->getAPIntValue()).isPowerOf2())) { + if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() || + (-N1C->getAPIntValue()).isPowerOf2())) { // If dividing by powers of two is cheap, then don't perform the following // fold. if (TLI.isPow2DivCheap()) @@ -2023,15 +2006,17 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); // Splat the sign bit into the register - SDValue SGN = DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, - DAG.getConstant(VT.getSizeInBits()-1, - getShiftAmountTy(N0.getValueType()))); + SDValue SGN = + DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, + DAG.getConstant(VT.getScalarSizeInBits() - 1, + getShiftAmountTy(N0.getValueType()))); AddToWorkList(SGN.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; - SDValue SRL = DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, - DAG.getConstant(VT.getSizeInBits() - lg2, - getShiftAmountTy(SGN.getValueType()))); + SDValue SRL = + DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, + DAG.getConstant(VT.getScalarSizeInBits() - lg2, + getShiftAmountTy(SGN.getValueType()))); SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); AddToWorkList(SRL.getNode()); AddToWorkList(ADD.getNode()); // Divide by pow2 @@ -2044,13 +2029,12 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return SRA; AddToWorkList(SRA.getNode()); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, - DAG.getConstant(0, VT), SRA); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); } // if integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. - if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { + if (N1C && !TLI.isIntDivCheap()) { SDValue Op = BuildSDIV(N); if (Op.getNode()) return Op; } @@ -2068,8 +2052,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0.getNode()); - ConstantSDNode *N1C = dyn_cast(N1.getNode()); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold vector ops @@ -2102,7 +2086,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { } } // fold (udiv x, c) -> alternate - if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { + if (N1C && !TLI.isIntDivCheap()) { SDValue Op = BuildUDIV(N); if (Op.getNode()) return Op; } @@ -2120,8 +2104,8 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold (srem c1, c2) -> c1%c2 @@ -2162,8 +2146,8 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { SDValue DAGCombiner::visitUREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); EVT VT = N->getValueType(0); // fold (urem c1, c2) -> c1%c2 @@ -2298,7 +2282,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, (!LegalOperations || TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), - N->op_begin(), N->getNumOperands()); + ArrayRef(N->op_begin(), N->op_end())); return CombineTo(N, Res, Res); } @@ -2308,7 +2292,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, (!LegalOperations || TLI.isOperationLegal(HiOp, N->getValueType(1)))) { SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), - N->op_begin(), N->getNumOperands()); + ArrayRef(N->op_begin(), N->op_end())); return CombineTo(N, Res, Res); } @@ -2319,7 +2303,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, // If the two computed results can be simplified separately, separate them. if (LoExists) { SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), - N->op_begin(), N->getNumOperands()); + ArrayRef(N->op_begin(), N->op_end())); AddToWorkList(Lo.getNode()); SDValue LoOpt = combine(Lo.getNode()); if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && @@ -2330,7 +2314,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, if (HiExists) { SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), - N->op_begin(), N->getNumOperands()); + ArrayRef(N->op_begin(), N->op_end())); AddToWorkList(Hi.getNode()); SDValue HiOpt = combine(Hi.getNode()); if (HiOpt.getNode() && HiOpt != Hi && @@ -2532,7 +2516,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && "Inputs to shuffles are not the same type"); - + // Check that both shuffles use the same mask. The masks are known to be of // the same length because the result vector type is the same. // Check also that shuffles have only one use to avoid introducing extra @@ -2632,7 +2616,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return DAG.getConstant(0, VT); // reassociate and SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1); - if (RAND.getNode() != 0) + if (RAND.getNode()) return RAND; // fold (and (or x, C), D) -> D if (C & D) == D if (N1C && N0.getOpcode() == ISD::OR) @@ -3165,7 +3149,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (!TLI.isOperationLegal(ISD::BSWAP, VT)) return SDValue(); - SmallVector Parts(4, (SDNode*)0); + SmallVector Parts(4, (SDNode*)nullptr); // Look for either // (or (or (and), (and)), (or (and), (and))) // (or (or (or (and), (and)), (and)), (and)) @@ -3270,11 +3254,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // two ways to fold this node into a shuffle. SmallVector Mask1; SmallVector Mask2; - + for (unsigned i = 0; i != NumElts && CanFold; ++i) { int M0 = SV0->getMaskElt(i); int M1 = SV1->getMaskElt(i); - + // Both shuffle indexes are undef. Propagate Undef. if (M0 < 0 && M1 < 0) { Mask1.push_back(M0); @@ -3288,7 +3272,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { CanFold = false; break; } - + Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); } @@ -3329,15 +3313,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) SDValue BSwap = MatchBSwapHWord(N, N0, N1); - if (BSwap.getNode() != 0) + if (BSwap.getNode()) return BSwap; BSwap = MatchBSwapHWordLow(N, N0, N1); - if (BSwap.getNode() != 0) + if (BSwap.getNode()) return BSwap; // reassociate or SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1); - if (ROR.getNode() != 0) + if (ROR.getNode()) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) // iff (c1 & c2) == 0. @@ -3582,28 +3566,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, HasPos ? Pos : Neg).getNode(); } - // fold (or (shl (*ext x), (*ext y)), - // (srl (*ext x), (*ext (sub 32, y)))) -> - // (*ext (rotl x, y)) or (*ext (rotr x, (sub 32, y))) - // - // fold (or (shl (*ext x), (*ext (sub 32, y))), - // (srl (*ext x), (*ext y))) -> - // (*ext (rotr x, y)) or (*ext (rotl x, (sub 32, y))) - if (Shifted.getOpcode() == ISD::ZERO_EXTEND || - Shifted.getOpcode() == ISD::ANY_EXTEND) { - SDValue InnerShifted = Shifted.getOperand(0); - EVT InnerVT = InnerShifted.getValueType(); - bool HasPosInner = TLI.isOperationLegalOrCustom(PosOpcode, InnerVT); - if (HasPosInner || TLI.isOperationLegalOrCustom(NegOpcode, InnerVT)) { - if (matchRotateSub(InnerPos, InnerNeg, InnerVT.getSizeInBits())) { - SDValue V = DAG.getNode(HasPosInner ? PosOpcode : NegOpcode, DL, - InnerVT, InnerShifted, HasPosInner ? Pos : Neg); - return DAG.getNode(Shifted.getOpcode(), DL, VT, V).getNode(); - } - } - } - - return 0; + return nullptr; } // MatchRotate - Handle an 'or' of two operands. If this is one of the many @@ -3612,29 +3575,29 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); - if (!TLI.isTypeLegal(VT)) return 0; + if (!TLI.isTypeLegal(VT)) return nullptr; // The target must have at least one rotate flavor. bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); - if (!HasROTL && !HasROTR) return 0; + if (!HasROTL && !HasROTR) return nullptr; // Match "(X shl/srl V1) & V2" where V2 may not be present. SDValue LHSShift; // The shift. SDValue LHSMask; // AND value if any. if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) - return 0; // Not part of a rotate. + return nullptr; // Not part of a rotate. SDValue RHSShift; // The shift. SDValue RHSMask; // AND value if any. if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) - return 0; // Not part of a rotate. + return nullptr; // Not part of a rotate. if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) - return 0; // Not shifting the same value. + return nullptr; // Not shifting the same value. if (LHSShift.getOpcode() == RHSShift.getOpcode()) - return 0; // Shifts must disagree. + return nullptr; // Shifts must disagree. // Canonicalize shl to left side in a shl/srl pair. if (RHSShift.getOpcode() == ISD::SHL) { @@ -3656,7 +3619,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { uint64_t LShVal = cast(LHSShiftAmt)->getZExtValue(); uint64_t RShVal = cast(RHSShiftAmt)->getZExtValue(); if ((LShVal + RShVal) != OpSizeInBits) - return 0; + return nullptr; SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); @@ -3683,7 +3646,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // If there is a mask here, and we have a variable shift, we can't be sure // that we're masking out the right stuff. if (LHSMask.getNode() || RHSMask.getNode()) - return 0; + return nullptr; // If the shift amount is sign/zext/any-extended just peel it off. SDValue LExtOp0 = LHSShiftAmt; @@ -3710,7 +3673,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { if (TryR) return TryR; - return 0; + return nullptr; } SDValue DAGCombiner::visitXOR(SDNode *N) { @@ -3752,7 +3715,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return N0; // reassociate xor SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1); - if (RXOR.getNode() != 0) + if (RXOR.getNode()) return RXOR; // fold !(x cc y) -> (x !cc y) @@ -3909,6 +3872,9 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { return SDValue(); } + if (!TLI.isDesirableToCommuteWithShift(LHS)) + return SDValue(); + // Fold the constants, shifting the binop RHS by the shift amount. SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), N->getValueType(0), @@ -4382,7 +4348,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::CTLZ && N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne); + DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne); // If any of the input bits are KnownOne, then the input couldn't be all // zeros, thus the result of the srl will always be zero. @@ -4745,7 +4711,7 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { // tryToFoldExtendOfConstant - Try to fold a sext/zext/aext // dag node into a ConstantSDNode or a build_vector of constants. // This function is called by the DAGCombiner when visiting sext/zext/aext -// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). +// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). // Vector extends are not folded if operations are legal; this is to // avoid introducing illegal build_vector dag nodes. static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, @@ -4771,8 +4737,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, if (!(VT.isVector() && (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) && ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) - return 0; - + return nullptr; + // We can fold this node into a build_vector. unsigned VTBits = SVT.getSizeInBits(); unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); @@ -4798,7 +4764,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SVT)); } - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], NumElts).getNode(); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); } // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: @@ -4882,8 +4848,7 @@ void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl &SetCCs, } Ops.push_back(SetCC->getOperand(2)); - CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), - &Ops[0], Ops.size())); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); } } @@ -4957,6 +4922,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // on vectors in one instruction. We only perform this transformation on // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && + ISD::isUNINDEXEDLoad(N0.getNode()) && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { bool DoXform = true; @@ -5009,7 +4975,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN0 = cast(N0.getOperand(0)); - if (LN0->getExtensionType() != ISD::ZEXTLOAD) { + if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { bool DoXform = true; SmallVector SetCCs; if (!N0.hasOneUse()) @@ -5108,13 +5074,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // isTruncateOf - If N is a truncate of some other value, return true, record // the value being truncated in Op and which of Op's bits are zero in KnownZero. // This function computes KnownZero to avoid a duplicated call to -// ComputeMaskedBits in the caller. +// computeKnownBits in the caller. static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, APInt &KnownZero) { APInt KnownOne; if (N->getOpcode() == ISD::TRUNCATE) { Op = N->getOperand(0); - DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + DAG.computeKnownBits(Op, KnownZero, KnownOne); return true; } @@ -5135,7 +5101,7 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, else return false; - DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); + DAG.computeKnownBits(Op, KnownZero, KnownOne); if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) return false; @@ -5250,6 +5216,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // on vectors in one instruction. We only perform this transformation on // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && + ISD::isUNINDEXEDLoad(N0.getNode()) && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { bool DoXform = true; @@ -5282,7 +5249,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN0 = cast(N0.getOperand(0)); - if (LN0->getExtensionType() != ISD::SEXTLOAD) { + if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { bool DoXform = true; SmallVector SetCCs; if (!N0.hasOneUse()) @@ -5353,7 +5320,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getOperand(1), cast(N0.getOperand(2))->get()), DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, - &OneOps[0], OneOps.size())); + OneOps)); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then @@ -5370,8 +5337,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { cast(N0.getOperand(2))->get()); return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT), - DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, - &OneOps[0], OneOps.size())); + DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps)); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc @@ -5478,6 +5444,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // on vectors in one instruction. We only perform this transformation on // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && + ISD::isUNINDEXEDLoad(N0.getNode()) && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { bool DoXform = true; @@ -5507,20 +5474,26 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); + ISD::LoadExtType ExtType = LN0->getExtensionType(); EVT MemVT = LN0->getMemoryVT(); - SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N), - VT, LN0->getChain(), LN0->getBasePtr(), - MemVT, LN0->getMemOperand()); - CombineTo(N, ExtLoad); - CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, SDLoc(N0), - N0.getValueType(), ExtLoad), - ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + if (!LegalOperations || TLI.isLoadExtLegal(ExtType, MemVT)) { + SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), + VT, LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), + N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } } if (N0.getOpcode() == ISD::SETCC) { - // aext(setcc) -> sext_in_reg(vsetcc) for vectors. + // For vectors: + // aext(setcc) -> vsetcc + // aext(setcc) -> truncate(vsetcc) + // aext(setcc) -> aext(vsetcc) // Only do this before legalize for now. if (VT.isVector() && !LegalOperations) { EVT N0VT = N0.getOperand(0).getValueType(); @@ -5535,19 +5508,14 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { cast(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then - // truncate/sign extend + // truncate/any extend else { - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); + EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); + return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); } } @@ -5571,7 +5539,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { default: break; case ISD::Constant: { const ConstantSDNode *CV = cast(V.getNode()); - assert(CV != 0 && "Const value should be ConstSDNode."); + assert(CV && "Const value should be ConstSDNode."); const APInt &CVal = CV->getAPIntValue(); APInt NewVal = CVal & Mask; if (NewVal != CVal) @@ -5872,7 +5840,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), N0.getOperand(1), false); - if (BSwap.getNode() != 0) + if (BSwap.getNode()) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } @@ -5897,7 +5865,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { Op.getValueType())); } - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Elts[0], NumElts); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts); } return SDValue(); @@ -5998,8 +5966,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) Opnds.push_back(BuildVect.getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0], - Opnds.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } } @@ -6074,8 +6041,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { AddToWorkList(NV.getNode()); Opnds.push_back(NV); } - return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, - &Opnds[0], Opnds.size()); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); } } @@ -6313,8 +6279,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { DstEltVT, Op)); AddToWorkList(Ops.back().getNode()); } - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, - &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); } // Otherwise, we're growing or shrinking the elements. To avoid having to @@ -6370,8 +6335,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { } EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, - &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); } // Finally, this must be the case where we are shrinking elements: each input @@ -6407,8 +6371,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); } - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, - &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); } SDValue DAGCombiner::visitFADD(SDNode *N) { @@ -7006,7 +6969,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { { N0.getOperand(0), N0.getOperand(1), DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); } // fold (sint_to_fp (zext (setcc x, y, cc))) -> @@ -7019,7 +6982,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), N0.getOperand(0).getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); } } @@ -7063,7 +7026,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { { N0.getOperand(0), N0.getOperand(1), DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); } } @@ -7223,11 +7186,16 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // (fneg (fmul c, x)) -> (fmul -c, x) if (N0.getOpcode() == ISD::FMUL) { ConstantFPSDNode *CFP1 = dyn_cast(N0.getOperand(1)); - if (CFP1) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(0), - DAG.getNode(ISD::FNEG, SDLoc(N), VT, - N0.getOperand(1))); + if (CFP1) { + APFloat CVal = CFP1->getValueAPF(); + CVal.changeSign(); + if (Level >= AfterLegalizeDAG && + (TLI.isFPImmLegal(CVal, N->getValueType(0)) || + TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0)))) + return DAG.getNode( + ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); + } } return SDValue(); @@ -7335,7 +7303,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && (N1.getOperand(0).hasOneUse() && N1.getOperand(0).getOpcode() == ISD::SRL))) { - SDNode *Trunc = 0; + SDNode *Trunc = nullptr; if (N1.getOpcode() == ISD::TRUNCATE) { // Look pass the truncate. Trunc = N1.getNode(); @@ -7616,9 +7584,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // a copy of the original base pointer. SmallVector OtherUses; if (isa(Offset)) - for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(), - E = BasePtr.getNode()->use_end(); I != E; ++I) { - SDNode *Use = *I; + for (SDNode *Use : BasePtr.getNode()->uses()) { if (Use == Ptr.getNode()) continue; @@ -7660,9 +7626,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { SmallPtrSet Visited; SmallVector Worklist; - for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), - E = Ptr.getNode()->use_end(); I != E; ++I) { - SDNode *Use = *I; + for (SDNode *Use : Ptr.getNode()->uses()) { if (Use == N) continue; if (N->hasPredecessorHelper(Use, Visited, Worklist)) @@ -7798,9 +7762,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { if (Ptr.getNode()->hasOneUse()) return false; - for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), - E = Ptr.getNode()->use_end(); I != E; ++I) { - SDNode *Op = *I; + for (SDNode *Op : Ptr.getNode()->uses()) { if (Op == N || (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) continue; @@ -7826,9 +7788,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // Check for #1. bool TryNext = false; - for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(), - EE = BasePtr.getNode()->use_end(); II != EE; ++II) { - SDNode *Use = *II; + for (SDNode *Use : BasePtr.getNode()->uses()) { if (Use == Ptr.getNode()) continue; @@ -7836,9 +7796,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // transformation. if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ bool RealUse = false; - for (SDNode::use_iterator III = Use->use_begin(), - EEE = Use->use_end(); III != EEE; ++III) { - SDNode *UseUse = *III; + for (SDNode *UseUse : Use->uses()) { if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) RealUse = true; } @@ -7891,6 +7849,17 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { return false; } +/// \brief Return the base-pointer arithmetic from an indexed \p LD. +SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { + ISD::MemIndexedMode AM = LD->getAddressingMode(); + assert(AM != ISD::UNINDEXED); + SDValue BP = LD->getOperand(1); + SDValue Inc = LD->getOperand(2); + unsigned Opc = + (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); + return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); +} + SDValue DAGCombiner::visitLOAD(SDNode *N) { LoadSDNode *LD = cast(N); SDValue Chain = LD->getChain(); @@ -7927,8 +7896,16 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { } else { // Indexed loads. assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); - if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) { + if (!N->hasAnyUseOfValue(0)) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); + SDValue Index; + if (N->hasAnyUseOfValue(1)) { + Index = SplitIndexingFromLoad(LD); + // Try to fold the base pointer arithmetic into subsequent loads and + // stores. + AddUsersToWorkList(N); + } else + Index = DAG.getUNDEF(N->getValueType(1)); DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); dbgs() << "\nWith: "; @@ -7936,8 +7913,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { dbgs() << " and 2 other values\n"); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), - DAG.getUNDEF(N->getValueType(1))); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); removeFromWorkList(N); DAG.DeleteNode(N); @@ -8131,8 +8107,8 @@ struct LoadedSlice { // This is used to get some contextual information about legal types, etc. SelectionDAG *DAG; - LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL, - unsigned Shift = 0, SelectionDAG *DAG = NULL) + LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr, + unsigned Shift = 0, SelectionDAG *DAG = nullptr) : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} LoadedSlice(const LoadedSlice &LS) @@ -8228,7 +8204,7 @@ struct LoadedSlice { /// \brief Get the offset in bytes of this slice in the original chunk of /// bits. - /// \pre DAG != NULL. + /// \pre DAG != nullptr. uint64_t getOffsetFromBase() const { assert(DAG && "Missing context."); bool IsBigEndian = @@ -8384,8 +8360,8 @@ static void adjustCostForPairing(SmallVectorImpl &LoadedSlices, const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); // First (resp. Second) is the first (resp. Second) potentially candidate // to be placed in a paired load. - const LoadedSlice *First = NULL; - const LoadedSlice *Second = NULL; + const LoadedSlice *First = nullptr; + const LoadedSlice *Second = nullptr; for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, // Set the beginning of the pair. First = Second) { @@ -8407,7 +8383,7 @@ static void adjustCostForPairing(SmallVectorImpl &LoadedSlices, unsigned RequiredAlignment = 0; if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { // move to the next pair, this type is hopeless. - Second = NULL; + Second = nullptr; continue; } // Check if we meet the alignment requirement. @@ -8421,7 +8397,7 @@ static void adjustCostForPairing(SmallVectorImpl &LoadedSlices, assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); --GlobalLSCost.Loads; // Move to the next pair. - Second = NULL; + Second = nullptr; } } @@ -8565,7 +8541,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { } SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, - &ArgChains[0], ArgChains.size()); + ArgChains); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); return true; } @@ -8660,14 +8636,14 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, // that uses this. If not, this is not a replacement. APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), ByteShift*8, (ByteShift+NumBytes)*8); - if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0; + if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; // Check that it is legal on the target to do this. It is legal if the new // VT we're shrinking to (i8/i16/i32) is legal or we're still before type // legalization. MVT VT = MVT::getIntegerVT(NumBytes*8); if (!DC->isTypeLegal(VT)) - return 0; + return nullptr; // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. @@ -9081,7 +9057,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { break; } else if (LoadSDNode *Ldn = dyn_cast(NextInChain)) { if (Ldn->isVolatile()) { - Index = NULL; + Index = nullptr; break; } @@ -9090,7 +9066,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { NextInChain = Ldn->getChain().getNode(); continue; } else { - Index = NULL; + Index = nullptr; break; } } @@ -9719,8 +9695,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { } // Return the new vector - return DAG.getNode(ISD::BUILD_VECTOR, dl, - VT, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { @@ -9826,8 +9801,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { NewLoad = true; } - LoadSDNode *LN0 = NULL; - const ShuffleVectorSDNode *SVN = NULL; + LoadSDNode *LN0 = nullptr; + const ShuffleVectorSDNode *SVN = nullptr; if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast(InVec); } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && @@ -10052,7 +10027,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { if (!isTypeLegal(VecVT)) return SDValue(); // Make the new BUILD_VECTOR. - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size()); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); // The new BUILD_VECTOR node has the potential to be further optimized. AddToWorkList(BV.getNode()); @@ -10120,8 +10095,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { else Opnds.push_back(In.getOperand(0)); } - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, - &Opnds[0], Opnds.size()); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds); AddToWorkList(BV.getNode()); return DAG.getNode(Opcode, dl, VT, BV); @@ -10162,7 +10136,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // constant index, bail out. if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isa(N->getOperand(i).getOperand(1))) { - VecIn1 = VecIn2 = SDValue(0, 0); + VecIn1 = VecIn2 = SDValue(nullptr, 0); break; } @@ -10171,18 +10145,18 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) continue; - if (VecIn1.getNode() == 0) { + if (!VecIn1.getNode()) { VecIn1 = ExtractedFromVec; - } else if (VecIn2.getNode() == 0) { + } else if (!VecIn2.getNode()) { VecIn2 = ExtractedFromVec; } else { // Too many inputs. - VecIn1 = VecIn2 = SDValue(0, 0); + VecIn1 = VecIn2 = SDValue(nullptr, 0); break; } } - // If everything is good, we can make a shuffle operation. + // If everything is good, we can make a shuffle operation. if (VecIn1.getNode()) { SmallVector Mask; for (unsigned i = 0; i != NumInScalars; ++i) { @@ -10212,7 +10186,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // Attempt to transform a single input vector to the correct type. if ((VT != VecIn1.getValueType())) { // We don't support shuffeling between TWO values of different types. - if (VecIn2.getNode() != 0) + if (VecIn2.getNode()) return SDValue(); // We only support widening of vectors which are half the size of the @@ -10311,8 +10285,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { for (unsigned i = 0; i != BuildVecNumElts; ++i) Opnds.push_back(N1.getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0], - Opnds.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR @@ -10469,8 +10442,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { } } - return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops.data(), - Ops.size()); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); } SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { @@ -10685,8 +10657,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT EltVT = RVT.getVectorElementType(); SmallVector ZeroOps(RVT.getVectorNumElements(), DAG.getConstant(0, EltVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - RVT, &ZeroOps[0], ZeroOps.size()); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps); LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); @@ -10755,8 +10726,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } if (Ops.size() == LHS.getNumOperands()) - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - LHS.getValueType(), &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops); } return SDValue(); @@ -10791,8 +10761,7 @@ SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { if (Ops.size() != N0.getNumOperands()) return SDValue(); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - N0.getValueType(), &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops); } SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, @@ -10994,7 +10963,9 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, if (ConstantFPSDNode *FV = dyn_cast(N3)) { if (TLI.isTypeLegal(N2.getValueType()) && (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != - TargetLowering::Legal) && + TargetLowering::Legal && + !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) && + !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) && // If both constants have multiple uses, then we won't need to do an // extra load, they are likely around in registers for other users. (TV->hasOneUse() || FV->hasOneUse())) { @@ -11201,7 +11172,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // select_cc setlt X, 1, -X, X -> // Y = sra (X, size(X)-1); xor (add (X, Y), Y) if (N1C) { - ConstantSDNode *SubC = NULL; + ConstantSDNode *SubC = nullptr; if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || (N1C->isAllOnesValue() && CC == ISD::SETGT)) && N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) @@ -11242,26 +11213,42 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, /// multiplying by a magic number. See: /// SDValue DAGCombiner::BuildSDIV(SDNode *N) { + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); + if (!C) + return SDValue(); + + // Avoid division by zero. + if (!C->getAPIntValue()) + return SDValue(); + std::vector Built; - SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built); + SDValue S = + TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); - for (std::vector::iterator ii = Built.begin(), ee = Built.end(); - ii != ee; ++ii) - AddToWorkList(*ii); + for (SDNode *N : Built) + AddToWorkList(N); return S; } -/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// BuildUDIV - Given an ISD::UDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// SDValue DAGCombiner::BuildUDIV(SDNode *N) { + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); + if (!C) + return SDValue(); + + // Avoid division by zero. + if (!C->getAPIntValue()) + return SDValue(); + std::vector Built; - SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built); + SDValue S = + TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); - for (std::vector::iterator ii = Built.begin(), ee = Built.end(); - ii != ee; ++ii) - AddToWorkList(*ii); + for (SDNode *N : Built) + AddToWorkList(N); return S; } @@ -11271,7 +11258,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, const GlobalValue *&GV, const void *&CV) { // Assume it is a primitive operation. - Base = Ptr; Offset = 0; GV = 0; CV = 0; + Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr; // If it's an adding a simple constant then integrate the offset. if (Base.getOpcode() == ISD::ADD) { @@ -11305,31 +11292,27 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, /// isAlias - Return true if there is any possibility that the two addresses /// overlap. -bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, - const Value *SrcValue1, int SrcValueOffset1, - unsigned SrcValueAlign1, - const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, bool IsVolatile2, - const Value *SrcValue2, int SrcValueOffset2, - unsigned SrcValueAlign2, - const MDNode *TBAAInfo2) const { +bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { // If they are the same then they must be aliases. - if (Ptr1 == Ptr2) return true; + if (Op0->getBasePtr() == Op1->getBasePtr()) return true; // If they are both volatile then they cannot be reordered. - if (IsVolatile1 && IsVolatile2) return true; + if (Op0->isVolatile() && Op1->isVolatile()) return true; // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; const GlobalValue *GV1, *GV2; const void *CV1, *CV2; - bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); - bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); + bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(), + Base1, Offset1, GV1, CV1); + bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(), + Base2, Offset2, GV2, CV2); // If they have a same base address then check to see if they overlap. if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) - return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); + return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || + (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); // It is possible for different frame indices to alias each other, mostly // when tail call optimization reuses return address slots for arguments. @@ -11339,7 +11322,8 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); Offset1 += MFI->getObjectOffset(cast(Base1)->getIndex()); Offset2 += MFI->getObjectOffset(cast(Base2)->getIndex()); - return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); + return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || + (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); } // Otherwise, if we know what the bases are, and they aren't identical, then @@ -11351,15 +11335,18 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, // compared to the size and offset of the access, we may be able to prove they // do not alias. This check is conservative for now to catch cases created by // splitting vector types. - if ((SrcValueAlign1 == SrcValueAlign2) && - (SrcValueOffset1 != SrcValueOffset2) && - (Size1 == Size2) && (SrcValueAlign1 > Size1)) { - int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1; - int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; + if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) && + (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) && + (Op0->getMemoryVT().getSizeInBits() >> 3 == + Op1->getMemoryVT().getSizeInBits() >> 3) && + (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) { + int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment(); + int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment(); // There is no overlap between these relatively aligned accesses of similar // size, return no alias. - if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1) + if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 || + (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1) return false; } @@ -11370,16 +11357,22 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) UseAA = false; #endif - if (UseAA && SrcValue1 && SrcValue2) { + if (UseAA && + Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { // Use alias analysis information. - int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); - int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; - int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; + int64_t MinOffset = std::min(Op0->getSrcValueOffset(), + Op1->getSrcValueOffset()); + int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) + + Op0->getSrcValueOffset() - MinOffset; + int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) + + Op1->getSrcValueOffset() - MinOffset; AliasAnalysis::AliasResult AAResult = - AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, - UseTBAA ? TBAAInfo1 : 0), - AliasAnalysis::Location(SrcValue2, Overlap2, - UseTBAA ? TBAAInfo2 : 0)); + AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(), + Overlap1, + UseTBAA ? Op0->getTBAAInfo() : nullptr), + AliasAnalysis::Location(Op1->getMemOperand()->getValue(), + Overlap2, + UseTBAA ? Op1->getTBAAInfo() : nullptr)); if (AAResult == AliasAnalysis::NoAlias) return false; } @@ -11388,44 +11381,6 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, return true; } -bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) { - SDValue Ptr0, Ptr1; - int64_t Size0, Size1; - bool IsVolatile0, IsVolatile1; - const Value *SrcValue0, *SrcValue1; - int SrcValueOffset0, SrcValueOffset1; - unsigned SrcValueAlign0, SrcValueAlign1; - const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1; - FindAliasInfo(Op0, Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, - SrcValueAlign0, SrcTBAAInfo0); - FindAliasInfo(Op1, Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, - SrcValueAlign1, SrcTBAAInfo1); - return isAlias(Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, - SrcValueAlign0, SrcTBAAInfo0, - Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, - SrcValueAlign1, SrcTBAAInfo1); -} - -/// FindAliasInfo - Extracts the relevant alias information from the memory -/// node. Returns true if the operand was a nonvolatile load. -bool DAGCombiner::FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, bool &IsVolatile, - const Value *&SrcValue, - int &SrcValueOffset, - unsigned &SrcValueAlign, - const MDNode *&TBAAInfo) const { - LSBaseSDNode *LS = cast(N); - - Ptr = LS->getBasePtr(); - Size = LS->getMemoryVT().getSizeInBits() >> 3; - IsVolatile = LS->isVolatile(); - SrcValue = LS->getSrcValue(); - SrcValueOffset = LS->getSrcValueOffset(); - SrcValueAlign = LS->getOriginalAlignment(); - TBAAInfo = LS->getTBAAInfo(); - return isa(LS) && !IsVolatile; -} - /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, @@ -11434,15 +11389,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallPtrSet Visited; // Visited node set. // Get alias information for node. - SDValue Ptr; - int64_t Size; - bool IsVolatile; - const Value *SrcValue; - int SrcValueOffset; - unsigned SrcValueAlign; - const MDNode *SrcTBAAInfo; - bool IsLoad = FindAliasInfo(N, Ptr, Size, IsVolatile, SrcValue, - SrcValueOffset, SrcValueAlign, SrcTBAAInfo); + bool IsLoad = isa(N) && !cast(N)->isVolatile(); // Starting off. Chains.push_back(OriginalChain); @@ -11481,24 +11428,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, case ISD::LOAD: case ISD::STORE: { // Get alias information for Chain. - SDValue OpPtr; - int64_t OpSize; - bool OpIsVolatile; - const Value *OpSrcValue; - int OpSrcValueOffset; - unsigned OpSrcValueAlign; - const MDNode *OpSrcTBAAInfo; - bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, - OpIsVolatile, OpSrcValue, OpSrcValueOffset, - OpSrcValueAlign, - OpSrcTBAAInfo); + bool IsOpLoad = isa(Chain.getNode()) && + !cast(Chain.getNode())->isVolatile(); // If chain is alias then stop here. if (!(IsLoad && IsOpLoad) && - isAlias(Ptr, Size, IsVolatile, SrcValue, SrcValueOffset, - SrcValueAlign, SrcTBAAInfo, - OpPtr, OpSize, OpIsVolatile, OpSrcValue, OpSrcValueOffset, - OpSrcValueAlign, OpSrcTBAAInfo)) { + isAlias(cast(N), cast(Chain.getNode()))) { Aliases.push_back(Chain); } else { // Look further up the chain. @@ -11604,8 +11539,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return Aliases[0]; // Construct a custom tailored token factor. - return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, - &Aliases[0], Aliases.size()); + return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } // SelectionDAG::Combine - This is the entry point for the file. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index baba51e..99931c1 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -39,7 +39,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "isel" #include "llvm/CodeGen/FastISel.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" @@ -64,6 +63,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "isel" + STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " "target-independent selector"); STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " @@ -79,7 +80,7 @@ void FastISel::startNewBlock() { // Instructions are appended to FuncInfo.MBB. If the basic block already // contains labels or copies, use the last instruction as the last local // value. - EmitStartPt = 0; + EmitStartPt = nullptr; if (!FuncInfo.MBB->empty()) EmitStartPt = &FuncInfo.MBB->back(); LastLocalValue = EmitStartPt; @@ -826,15 +827,21 @@ FastISel::SelectInstruction(const Instruction *I) { MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; - // As a special case, don't handle calls to builtin library functions that - // may be translated directly to target instructions. if (const CallInst *Call = dyn_cast(I)) { const Function *F = Call->getCalledFunction(); LibFunc::Func Func; + + // As a special case, don't handle calls to builtin library functions that + // may be translated directly to target instructions. if (F && !F->hasLocalLinkage() && F->hasName() && LibInfo->getLibFunc(F->getName(), Func) && LibInfo->hasOptimizedCodeGen(Func)) return false; + + // Don't handle Intrinsic::trap if a trap funciton is specified. + if (F && F->getIntrinsicID() == Intrinsic::trap && + !TM.Options.getTrapFunctionName().empty()) + return false; } // First, try doing target-independent selection. @@ -880,7 +887,7 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { // fall-through case, which needs no instructions. } else { // The unconditional branch case. - TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL, + TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr, SmallVector(), DbgLoc); } FuncInfo.MBB->addSuccessor(MSucc); @@ -1035,8 +1042,10 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { } case Instruction::Unreachable: - // Nothing to emit. - return true; + if (TM.Options.TrapUnreachable) + return FastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0; + else + return true; case Instruction::Alloca: // FunctionLowering has the static-sized case covered. @@ -1204,6 +1213,23 @@ unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { return MRI.createVirtualRegister(RC); } +unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, + unsigned Op, unsigned OpNum) { + if (TargetRegisterInfo::isVirtualRegister(Op)) { + const TargetRegisterClass *RegClass = + TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); + if (!MRI.constrainRegClass(Op, RegClass)) { + // If it's not legal to COPY between the register classes, something + // has gone very wrong before we got here. + unsigned NewOp = createResultReg(RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), NewOp).addReg(Op); + return NewOp; + } + } + return Op; +} + unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, const TargetRegisterClass* RC) { unsigned ResultReg = createResultReg(RC); @@ -1216,9 +1242,11 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill); @@ -1236,9 +1264,12 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -1258,9 +1289,13 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, unsigned Op2, bool Op2IsKill) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2); + if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -1281,9 +1316,12 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + RC = TII.getRegClass(II, II.getNumDefs(), &TRI, *FuncInfo.MF); + MRI.constrainRegClass(Op0, RC); + if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -1302,9 +1340,11 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm1, uint64_t Imm2) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -1325,9 +1365,11 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, const ConstantFP *FPImm) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -1347,9 +1389,12 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, uint64_t Imm) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -1371,9 +1416,12 @@ unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, uint64_t Imm1, uint64_t Imm2) { - unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 5f0006e..ae124e8 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "function-lowering-info" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/Analysis.h" @@ -40,6 +39,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "function-lowering-info" + /// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by /// PHI nodes or outside of the basic block that defines it, or used by a /// switch or atomic instruction, which may expand to multiple basic blocks. @@ -283,11 +284,11 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { const FunctionLoweringInfo::LiveOutInfo * FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { if (!LiveOutRegInfo.inBounds(Reg)) - return NULL; + return nullptr; LiveOutInfo *LOI = &LiveOutRegInfo[Reg]; if (!LOI->IsValid) - return NULL; + return nullptr; if (BitWidth > LOI->KnownZero.getBitWidth()) { LOI->NumSignBits = 1; diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 1c596b8..7c124b8 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "instr-emitter" #include "InstrEmitter.h" #include "SDNodeDbgValue.h" #include "llvm/ADT/Statistic.h" @@ -31,6 +30,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "instr-emitter" + /// MinRCSize - Smallest register class we allow when constraining virtual /// registers. If satisfying all register class constraints would require /// using a smaller register class, emit a COPY to a new virtual register @@ -99,7 +100,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. bool MatchReg = true; - const TargetRegisterClass *UseRC = NULL; + const TargetRegisterClass *UseRC = nullptr; MVT VT = Node->getSimpleValueType(ResNo); // Stick to the preferred register classes for legal types. @@ -107,9 +108,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, UseRC = TLI->getRegClassFor(VT); if (!IsClone && !IsCloned) - for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); - UI != E; ++UI) { - SDNode *User = *UI; + for (SDNode *User : Node->uses()) { bool Match = true; if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && @@ -131,7 +130,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, Match = false; if (User->isMachineOpcode()) { const MCInstrDesc &II = TII->get(User->getMachineOpcode()); - const TargetRegisterClass *RC = 0; + const TargetRegisterClass *RC = nullptr; if (i+II.getNumDefs() < II.getNumOperands()) { RC = TRI->getAllocatableClass( TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF)); @@ -154,7 +153,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, break; } - const TargetRegisterClass *SrcRC = 0, *DstRC = 0; + const TargetRegisterClass *SrcRC = nullptr, *DstRC = nullptr; SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); // Figure out the register class to create for the destreg. @@ -242,9 +241,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, } if (!VRBase && !IsClone && !IsCloned) - for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); - UI != E; ++UI) { - SDNode *User = *UI; + for (SDNode *User : Node->uses()) { if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == i) { @@ -329,7 +326,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, // shrink VReg's register class within reason. For example, if VReg == GR32 // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP. if (II) { - const TargetRegisterClass *DstRC = 0; + const TargetRegisterClass *DstRC = nullptr; if (IIOpNum < II->getNumOperands()) DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF)); if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { @@ -470,9 +467,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // If the node is only used by a CopyToReg and the dest reg is a vreg, use // the CopyToReg'd destination register instead of creating a new vreg. - for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end(); - UI != E; ++UI) { - SDNode *User = *UI; + for (SDNode *User : Node->uses()) { if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node) { unsigned DestReg = cast(User->getOperand(1))->getReg(); @@ -561,10 +556,10 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, const ConstantSDNode *SD = cast(N0); MIB.addImm(SD->getZExtValue()); } else - AddOperand(MIB, N0, 0, 0, VRBaseMap, /*IsDebug=*/false, + AddOperand(MIB, N0, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Add the subregster being inserted - AddOperand(MIB, N1, 0, 0, VRBaseMap, /*IsDebug=*/false, + AddOperand(MIB, N1, 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); MIB.addImm(SubIdx); MBB->insert(InsertPos, MIB); @@ -693,10 +688,13 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, MIB.addReg(0U); } - if (Offset != 0) // Indirect addressing. + // Indirect addressing is indicated by an Imm as the second parameter. + if (SD->isIndirect()) MIB.addImm(Offset); - else + else { + assert(Offset == 0 && "direct value cannot have an offset"); MIB.addReg(0U, RegState::Debug); + } MIB.addMetadata(MDPtr); @@ -738,7 +736,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); unsigned NumDefs = II.getNumDefs(); - const uint16_t *ScratchRegs = NULL; + const MCPhysReg *ScratchRegs = nullptr; // Handle STACKMAP and PATCHPOINT specially and then use the generic code. if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { @@ -756,7 +754,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, unsigned NumImpUses = 0; unsigned NodeOperands = countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); - bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=0; + bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) @@ -982,7 +980,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (unsigned j = 0; j != NumVals; ++j, ++i) - AddOperand(MIB, Node->getOperand(i), 0, 0, VRBaseMap, + AddOperand(MIB, Node->getOperand(i), 0, nullptr, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Manually set isTied bits. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 20afb3d..a59e895 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -387,9 +387,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, MinAlign(ST->getAlignment(), Offset), ST->getTBAAInfo())); // The order of the stores doesn't matter - say it with a TokenFactor. - SDValue Result = - DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); return; } @@ -506,8 +504,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, false, false, 0)); // The order of the stores doesn't matter - say it with a TokenFactor. - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); // Finally, perform the original load only redirected to the stack slot. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, @@ -705,7 +702,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { } } } - return SDValue(0, 0); + return SDValue(nullptr, 0); } void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { @@ -1268,6 +1265,13 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; + case ISD::READ_REGISTER: + case ISD::WRITE_REGISTER: + // Named register is legal in the DAG, but blocked by register name + // selection if not implemented by target (to chose the correct register) + // They'll be converted to Copy(To/From)Reg. + Action = TargetLowering::Legal; + break; case ISD::DEBUGTRAP: Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); if (Action == TargetLowering::Expand) { @@ -1528,8 +1532,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { SDValue StoreChain; if (!Stores.empty()) // Not all undef elements? - StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Stores[0], Stores.size()); + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); else StoreChain = DAG.getEntryNode(); @@ -1649,8 +1652,8 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, /// If the SETCC has been legalized using the inverse condcode, then LHS and /// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert /// will be set to true. The caller must invert the result of the SETCC with -/// SelectionDAG::getNOT() or take equivalent action to swap the effect of a -/// true/false result. +/// SelectionDAG::getLogicalNOT() or take equivalent action to swap the effect +/// of a true/false result. /// /// \returns true if the SetCC has been legalized, false if it hasn't. bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, @@ -2055,13 +2058,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, if (isTailCall) InChain = TCChain; - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, - 0, TLI.getLibcallCallingConv(LC), isTailCall, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, SDLoc(Node)); - std::pair CallInfo = TLI.LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair CallInfo = TLI.LowerCallTo(CLI); if (!CallInfo.second.getNode()) // It's a tailcall, return the chain (which is the DAG root). @@ -2090,12 +2092,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, TLI.getPointerTy()); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, TLI.getLibcallCallingConv(LC), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair CallInfo = TLI.LowerCallTo(CLI); return CallInfo.first; @@ -2124,11 +2126,12 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, TLI.getPointerTy()); Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, - 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, SDLoc(Node)); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair CallInfo = TLI.LowerCallTo(CLI); return CallInfo; @@ -2183,7 +2186,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; } - return TLI.getLibcallName(LC) != 0; + return TLI.getLibcallName(LC) != nullptr; } /// useDivRem - Only issue divrem libcall if both quotient and remainder are @@ -2261,11 +2264,11 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, TLI.getPointerTy()); SDLoc dl(Node); - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, - 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair CallInfo = TLI.LowerCallTo(CLI); // Remainder is loaded back from the stack frame. @@ -2286,7 +2289,7 @@ static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { case MVT::f128: LC = RTLIB::SINCOS_F128; break; case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; } - return TLI.getLibcallName(LC) != 0; + return TLI.getLibcallName(LC) != nullptr; } /// canCombineSinCosLibcall - Return true if sincos libcall is available and @@ -2375,12 +2378,11 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, TLI.getPointerTy()); SDLoc dl(Node); - TargetLowering:: - CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()), - false, false, false, false, - 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), + Type::getVoidTy(*DAG.getContext()), Callee, &Args, 0); + std::pair CallInfo = TLI.LowerCallTo(CLI); Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, @@ -2990,15 +2992,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // If the target didn't lower this, lower it to '__sync_synchronize()' call // FIXME: handle "fence singlethread" more efficiently. TargetLowering::ArgListTy Args; - TargetLowering:: - CallLoweringInfo CLI(Node->getOperand(0), - Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, CallingConv::C, - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__sync_synchronize", - TLI.getPointerTy()), - Args, DAG, dl); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Node->getOperand(0)) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy()), + &Args, 0); + std::pair CallResult = TLI.LowerCallTo(CLI); Results.push_back(CallResult.second); @@ -3071,14 +3071,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::TRAP: { // If this operation is not supported, lower it to 'abort()' call TargetLowering::ArgListTy Args; - TargetLowering:: - CallLoweringInfo CLI(Node->getOperand(0), - Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, CallingConv::C, - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("abort", TLI.getPointerTy()), - Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Node->getOperand(0)) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("abort", TLI.getPointerTy()), &Args, 0); std::pair CallResult = TLI.LowerCallTo(CLI); Results.push_back(CallResult.second); @@ -3304,7 +3300,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.getVectorIdxTy()))); } - Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); + Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); // We may have changed the BUILD_VECTOR type. Cast it back to the Node type. Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1); Results.push_back(Tmp1); @@ -3625,6 +3621,23 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(1))); break; } + + SDValue Lo, Hi; + EVT HalfType = VT.getHalfSizedIntegerVT(*DAG.getContext()); + if (TLI.isOperationLegalOrCustom(ISD::ZERO_EXTEND, VT) && + TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND, VT) && + TLI.isOperationLegalOrCustom(ISD::SHL, VT) && + TLI.isOperationLegalOrCustom(ISD::OR, VT) && + TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) { + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo); + Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi); + SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(), + TLI.getShiftAmountTy(HalfType)); + Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); + Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); + break; + } + Tmp1 = ExpandIntLibCall(Node, false, RTLIB::MUL_I8, RTLIB::MUL_I16, RTLIB::MUL_I32, @@ -3698,8 +3711,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, RHS); TopHalf = BottomHalf.getValue(1); - } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), - VT.getSizeInBits() * 2))) { + } else if (TLI.isTypeLegal(WideVT)) { LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); @@ -3857,7 +3869,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // If we expanded the SETCC by inverting the condition code, then wrap // the existing SETCC in a NOT to restore the intended condition. if (NeedInvert) - Tmp1 = DAG.getNOT(dl, Tmp1, Tmp1->getValueType(0)); + Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0)); Results.push_back(Tmp1); break; @@ -3994,8 +4006,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { VT.getScalarType(), Ex, Sh)); } SDValue Result = - DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), - &Scalars[0], Scalars.size()); + DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Scalars); ReplaceNode(SDValue(Node, 0), Result); break; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index ecf4c5d..6b8fec6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -24,6 +24,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "legalize-types" + /// GetFPLibCall - Return the right libcall for the given floating point type. static RTLIB::Libcall GetFPLibCall(EVT VT, RTLIB::Libcall Call_F32, @@ -674,7 +676,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -720,7 +722,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -742,7 +744,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); // If softenSetCCOperands returned a scalar, use it. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { assert(NewLHS.getValueType() == N->getValueType(0) && "Unexpected setcc expansion!"); return NewLHS; @@ -1340,7 +1342,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -1433,7 +1435,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -1450,7 +1452,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, use it. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { assert(NewLHS.getValueType() == N->getValueType(0) && "Unexpected setcc expansion!"); return NewLHS; diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 18b2376..2483184 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -24,6 +24,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "legalize-types" + //===----------------------------------------------------------------------===// // Integer Result Promotion //===----------------------------------------------------------------------===// @@ -266,9 +268,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { EVT NVT = Op.getValueType(); SDLoc dl(N); - unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); + unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), - DAG.getConstant(DiffBits, TLI.getPointerTy())); + DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT))); } SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { @@ -432,7 +434,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { EVT ValueVTs[] = { N->getValueType(0), NVT }; SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), - DAG.getVTList(ValueVTs, 2), Ops, 2); + DAG.getVTList(ValueVTs), Ops); // Modified the sum result - switch anything that used the old sum to use // the new one. @@ -931,7 +933,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { for (unsigned i = 0; i < NumElts; ++i) NewOps.push_back(GetPromotedInteger(N->getOperand(i))); - return SDValue(DAG.UpdateNodeOperands(N, &NewOps[0], NumElts), 0); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { @@ -1270,6 +1272,7 @@ std::pair DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { /// and the shift amount is a constant 'Amt'. Expand the operation. void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi) { + assert(Amt && "Expected zero shifts to be already optimized away."); SDLoc DL(N); // Expand the incoming operand to be shifted, so that we have its parts SDValue InL, InH; @@ -1296,9 +1299,9 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, // Emit this X << 1 as X+X. SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); SDValue LoOps[2] = { InL, InL }; - Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2); + Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps); SDValue HiOps[3] = { InH, InH, Lo.getValue(1) }; - Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3); + Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps); } else { Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy)); Hi = DAG.getNode(ISD::OR, DL, NVT, @@ -1372,7 +1375,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(N->getOperand(1), KnownZero, KnownOne); + DAG.computeKnownBits(N->getOperand(1), KnownZero, KnownOne); // If we don't know anything about the high bits, exit. if (((KnownZero|KnownOne) & HighBitMask) == 0) @@ -1547,20 +1550,20 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, if (hasCarry) { SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); if (N->getOpcode() == ISD::ADD) { - Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps); } else { - Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2); + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps); } return; } if (N->getOpcode() == ISD::ADD) { - Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); - Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); + Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps); + Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, @@ -1572,8 +1575,8 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, DAG.getConstant(1, NVT), Carry1); Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); } else { - Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); - Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2); + Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps); + Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2)); SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); @@ -1596,13 +1599,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N, SDValue HiOps[3] = { LHSH, RHSH }; if (N->getOpcode() == ISD::ADDC) { - Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2); + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps); } else { - Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2); + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps); } // Legalized the flag result - switch anything that used the old flag to @@ -1621,9 +1624,9 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) }; SDValue HiOps[3] = { LHSH, RHSH }; - Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3); + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3); + Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps); // Legalized the flag result - switch anything that used the old flag to // use the new one. @@ -1712,9 +1715,13 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned NBitWidth = NVT.getSizeInBits(); - const APInt &Cst = cast(N)->getAPIntValue(); - Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT); - Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT); + auto Constant = cast(N); + const APInt &Cst = Constant->getAPIntValue(); + bool IsTarget = Constant->isTargetOpcode(); + bool IsOpaque = Constant->isOpaque(); + Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT, IsTarget, IsOpaque); + Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT, IsTarget, + IsOpaque); } void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, @@ -1923,73 +1930,12 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDLoc dl(N); - bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT); - bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT); - bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT); - bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT); - if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) { - SDValue LL, LH, RL, RH; - GetExpandedInteger(N->getOperand(0), LL, LH); - GetExpandedInteger(N->getOperand(1), RL, RH); - unsigned OuterBitSize = VT.getSizeInBits(); - unsigned InnerBitSize = NVT.getSizeInBits(); - unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0)); - unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1)); - - APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); - if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) && - DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) { - // The inputs are both zero-extended. - if (HasUMUL_LOHI) { - // We can emit a umul_lohi. - Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL); - Hi = SDValue(Lo.getNode(), 1); - return; - } - if (HasMULHU) { - // We can emit a mulhu+mul. - Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); - Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL); - return; - } - } - if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) { - // The input values are both sign-extended. - if (HasSMUL_LOHI) { - // We can emit a smul_lohi. - Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL); - Hi = SDValue(Lo.getNode(), 1); - return; - } - if (HasMULHS) { - // We can emit a mulhs+mul. - Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); - Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL); - return; - } - } - if (HasUMUL_LOHI) { - // Lo,Hi = umul LHS, RHS. - SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, - DAG.getVTList(NVT, NVT), LL, RL); - Lo = UMulLOHI; - Hi = UMulLOHI.getValue(1); - RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH); - LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH); - return; - } - if (HasMULHU) { - Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL); - Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL); - RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH); - LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH); - return; - } - } + SDValue LL, LH, RL, RH; + GetExpandedInteger(N->getOperand(0), LL, LH); + GetExpandedInteger(N->getOperand(1), RL, RH); + + if (TLI.expandMUL(N, Lo, Hi, NVT, DAG, LL, LH, RL, RH)) + return; // If nothing else, we can make a libcall. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -2120,7 +2066,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy); SDValue Ops[] = { LHSL, LHSH, ShiftOp }; - Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3); + Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops); Hi = Lo.getValue(1); return; } @@ -2352,12 +2298,12 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, Args.push_back(Entry); SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); - TargetLowering:: - CallLoweringInfo CLI(Chain, RetTy, true, false, false, false, - 0, TLI.getLibcallCallingConv(LC), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Func, Args, DAG, dl); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, &Args, 0) + .setSExtResult(); + std::pair CallInfo = TLI.LowerCallTo(CLI); SplitInteger(CallInfo.first, Lo, Hi); @@ -2576,7 +2522,8 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, // NOTE: on targets without efficient SELECT of bools, we can always use // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) - TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL); + TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, + nullptr); SDValue Tmp1, Tmp2; if (TLI.isTypeLegal(LHSLo.getValueType()) && TLI.isTypeLegal(RHSLo.getValueType())) @@ -2629,7 +2576,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -2647,7 +2594,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); CCCode = ISD::SETNE; } @@ -2664,7 +2611,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, use it. - if (NewRHS.getNode() == 0) { + if (!NewRHS.getNode()) { assert(NewLHS.getValueType() == N->getValueType(0) && "Unexpected setcc expansion!"); return NewLHS; @@ -2912,7 +2859,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { Ops.push_back(Op); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); } @@ -2959,7 +2906,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { Ops.push_back(Op); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); } SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { @@ -3007,7 +2954,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { } } - return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); } SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { @@ -3063,6 +3010,5 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { } } - return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0), - &NewOps[0], NewOps.size()); - } + return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0), NewOps); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index e141883..3971fc3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -22,6 +22,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "legalize-types" + static cl::opt EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden); @@ -159,7 +161,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { if (Mapped & 128) dbgs() << " WidenedVectors"; dbgs() << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } } } @@ -433,7 +435,7 @@ NodeDone: if (Failed) { I->dump(&DAG); dbgs() << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } } #endif @@ -488,7 +490,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { // Some operands changed - update the node. if (!NewOps.empty()) { - SDNode *M = DAG.UpdateNodeOperands(N, &NewOps[0], NewOps.size()); + SDNode *M = DAG.UpdateNodeOperands(N, NewOps); if (M != N) { // The node morphed into a different node. Normally for this to happen // the original node would have to be marked NewNode. However this can @@ -736,7 +738,7 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { AnalyzeNewValue(Result); SDValue &OpEntry = PromotedIntegers[Op]; - assert(OpEntry.getNode() == 0 && "Node is already promoted!"); + assert(!OpEntry.getNode() && "Node is already promoted!"); OpEntry = Result; } @@ -747,7 +749,7 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { AnalyzeNewValue(Result); SDValue &OpEntry = SoftenedFloats[Op]; - assert(OpEntry.getNode() == 0 && "Node is already converted to integer!"); + assert(!OpEntry.getNode() && "Node is already converted to integer!"); OpEntry = Result; } @@ -761,7 +763,7 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { AnalyzeNewValue(Result); SDValue &OpEntry = ScalarizedVectors[Op]; - assert(OpEntry.getNode() == 0 && "Node is already scalarized!"); + assert(!OpEntry.getNode() && "Node is already scalarized!"); OpEntry = Result; } @@ -787,7 +789,7 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, // Remember that this is the result of the node. std::pair &Entry = ExpandedIntegers[Op]; - assert(Entry.first.getNode() == 0 && "Node already expanded"); + assert(!Entry.first.getNode() && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; } @@ -814,7 +816,7 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, // Remember that this is the result of the node. std::pair &Entry = ExpandedFloats[Op]; - assert(Entry.first.getNode() == 0 && "Node already expanded"); + assert(!Entry.first.getNode() && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; } @@ -843,7 +845,7 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, // Remember that this is the result of the node. std::pair &Entry = SplitVectors[Op]; - assert(Entry.first.getNode() == 0 && "Node already split"); + assert(!Entry.first.getNode() && "Node already split"); Entry.first = Lo; Entry.second = Hi; } @@ -855,7 +857,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { AnalyzeNewValue(Result); SDValue &OpEntry = WidenedVectors[Op]; - assert(OpEntry.getNode() == 0 && "Node already widened!"); + assert(!OpEntry.getNode() && "Node already widened!"); OpEntry = Result; } @@ -1007,7 +1009,7 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, unsigned NumOps = N->getNumOperands(); SDLoc dl(N); if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), nullptr, 0, isSigned, dl).first; } else if (NumOps == 1) { SDValue Op = N->getOperand(0); @@ -1049,11 +1051,12 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, TLI.getPointerTy()); Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, - 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, SDLoc(Node)); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair CallInfo = TLI.LowerCallTo(CLI); return CallInfo; diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 947ea10..e4bbc78 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -16,7 +16,6 @@ #ifndef SELECTIONDAG_LEGALIZETYPES_H #define SELECTIONDAG_LEGALIZETYPES_H -#define DEBUG_TYPE "legalize-types" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -540,6 +539,7 @@ private: SDValue ScalarizeVecOp_UnaryOp(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue ScalarizeVecOp_VSELECT(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index e9424f2..f40ed76 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -23,6 +23,8 @@ #include "llvm/IR/DataLayout.h" using namespace llvm; +#define DEBUG_TYPE "legalize-types" + //===----------------------------------------------------------------------===// // Generic Result Expansion. //===----------------------------------------------------------------------===// @@ -352,7 +354,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { SmallVector Ops; IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType()); - SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], NumElts); + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, + makeArrayRef(Ops.data(), NumElts)); return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); } @@ -388,7 +391,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, EVT::getVectorVT(*DAG.getContext(), NewVT, NewElts.size()), - &NewElts[0], NewElts.size()); + NewElts); // Convert the new vector to the old vector type. return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); @@ -447,7 +450,7 @@ SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType()); for (unsigned i = 1; i < NumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 551d054..898cd29 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -63,6 +63,8 @@ class VectorLegalizer { SDValue ExpandUINT_TO_FLOAT(SDValue Op); // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. SDValue ExpandSEXTINREG(SDValue Op); + // Expand bswap of vectors into a shuffle if legal. + SDValue ExpandBSWAP(SDValue Op); // Implement vselect in terms of XOR, AND, OR when blend is not supported // by the target. SDValue ExpandVSELECT(SDValue Op); @@ -152,8 +154,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) Ops.push_back(LegalizeOp(Node->getOperand(i))); - SDValue Result = - SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0); + SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0); if (Op.getOpcode() == ISD::LOAD) { LoadSDNode *LD = cast(Op.getNode()); @@ -298,6 +299,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case TargetLowering::Expand: if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG) Result = ExpandSEXTINREG(Op); + else if (Node->getOpcode() == ISD::BSWAP) + Result = ExpandBSWAP(Op); else if (Node->getOpcode() == ISD::VSELECT) Result = ExpandVSELECT(Op); else if (Node->getOpcode() == ISD::SELECT) @@ -343,7 +346,7 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { Operands[j] = Op.getOperand(j); } - Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size()); + Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands); return DAG.getNode(ISD::BITCAST, dl, VT, Op); } @@ -377,8 +380,7 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { Operands[j] = Op.getOperand(j); } - return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), &Operands[0], - Operands.size()); + return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands); } // For FP_TO_INT we promote the result type to a vector type with wider @@ -546,10 +548,9 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { } } - SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &LoadChains[0], LoadChains.size()); + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, - Op.getNode()->getValueType(0), &Vals[0], Vals.size()); + Op.getNode()->getValueType(0), Vals); AddLegalizedOperand(Op.getValue(0), Value); AddLegalizedOperand(Op.getValue(1), NewChain); @@ -603,8 +604,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { Stores.push_back(Store); } - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Stores[0], Stores.size()); + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); AddLegalizedOperand(Op, TF); return TF; } @@ -648,7 +648,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { // Broadcast the mask so that the entire vector is all-one or all zero. SmallVector Ops(NumElem, Mask); - Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size()); + Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, Ops); // Bitcast the operands to be the same type as the mask. // This is needed when we select between FP types because @@ -686,6 +686,29 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); } +SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { + EVT VT = Op.getValueType(); + + // Generate a byte wise shuffle mask for the BSWAP. + SmallVector ShuffleMask; + int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; + for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) + for (int J = ScalarSizeInBytes - 1; J >= 0; --J) + ShuffleMask.push_back((I * ScalarSizeInBytes) + J); + + EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); + + // Only emit a shuffle if the mask is legal. + if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) + return DAG.UnrollVectorOp(Op.getNode()); + + SDLoc DL(Op); + Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); + Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), + ShuffleMask.data()); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. @@ -803,7 +826,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { (EltVT.getSizeInBits()), EltVT), DAG.getConstant(0, EltVT)); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 940a9c9..368eba3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -26,6 +26,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "legalize-types" + //===----------------------------------------------------------------------===// // Result Vector Scalarization: <1 x ty> -> ty. //===----------------------------------------------------------------------===// @@ -331,12 +333,24 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && "Operand types must be vectors"); - - SDValue LHS = GetScalarizedVector(N->getOperand(0)); - SDValue RHS = GetScalarizedVector(N->getOperand(1)); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + EVT OpVT = LHS.getValueType(); EVT NVT = N->getValueType(0).getVectorElementType(); SDLoc DL(N); + // The result needs scalarizing, but it's not a given that the source does. + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + LHS = GetScalarizedVector(LHS); + RHS = GetScalarizedVector(RHS); + } else { + EVT VT = OpVT.getVectorElementType(); + LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, + DAG.getConstant(0, TLI.getVectorIdxTy())); + RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, + DAG.getConstant(0, TLI.getVectorIdxTy())); + } + // Turn it into a scalar SETCC. SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); @@ -358,7 +372,7 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { dbgs() << "\n"); SDValue Res = SDValue(); - if (Res.getNode() == 0) { + if (!Res.getNode()) { switch (N->getOpcode()) { default: #ifndef NDEBUG @@ -382,6 +396,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_VECTOR_ELT: Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::VSELECT: + Res = ScalarizeVecOp_VSELECT(N); + break; case ISD::STORE: Res = ScalarizeVecOp_STORE(cast(N), OpNo); break; @@ -420,13 +437,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { assert(N->getValueType(0).getVectorNumElements() == 1 && "Unexected vector type!"); SDValue Elt = GetScalarizedVector(N->getOperand(0)); - SmallVector Ops(1); - Ops[0] = DAG.getNode(N->getOpcode(), SDLoc(N), - N->getValueType(0).getScalarType(), Elt); + SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), + N->getValueType(0).getScalarType(), Elt); // Revectorize the result so the types line up with what the uses of this // expression expect. - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), - &Ops[0], 1); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op); } /// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one - @@ -435,8 +450,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SmallVector Ops(N->getNumOperands()); for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) Ops[i] = GetScalarizedVector(N->getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), - &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Ops); } /// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to @@ -450,6 +464,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { return Res; } + +/// ScalarizeVecOp_VSELECT - If the input condition is a vector that needs to be +/// scalarized, it must be <1 x i1>, so just convert to a normal ISD::SELECT +/// (still with vector output type since that was acceptable if we got here). +SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) { + SDValue ScalarCond = GetScalarizedVector(N->getOperand(0)); + EVT VT = N->getValueType(0); + + return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1), + N->getOperand(2)); +} + /// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be /// scalarized, it must be <1 x ty>. Just store the element. SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ @@ -696,10 +722,10 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); unsigned LoNumElts = LoVT.getVectorNumElements(); SmallVector LoOps(N->op_begin(), N->op_begin()+LoNumElts); - Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size()); + Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, LoOps); SmallVector HiOps(N->op_begin()+LoNumElts, N->op_end()); - Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size()); + Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, HiOps); } void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, @@ -717,10 +743,10 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SmallVector LoOps(N->op_begin(), N->op_begin()+NumSubvectors); - Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size()); + Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps); SmallVector HiOps(N->op_begin()+NumSubvectors, N->op_end()); - Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size()); + Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps); } void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, @@ -1064,7 +1090,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, } // Construct the Lo/Hi output using a BUILD_VECTOR. - Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size()); + Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, SVOps); } else if (InputUsed[0] == -1U) { // No input vectors were used! The result is undefined. Output = DAG.getUNDEF(NewVT); @@ -1100,7 +1126,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) return false; - if (Res.getNode() == 0) { + if (!Res.getNode()) { switch (N->getOpcode()) { default: #ifndef NDEBUG @@ -1342,8 +1368,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { } } - return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), - &Elts[0], Elts.size()); + return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts); } SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { @@ -1700,8 +1725,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { while (SubConcatEnd < OpsToConcat) SubConcatOps[SubConcatEnd++] = undefVec; ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, - NextVT, &SubConcatOps[0], - OpsToConcat); + NextVT, SubConcatOps); ConcatEnd = SubConcatIdx + 1; } } @@ -1720,7 +1744,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { for (unsigned j = ConcatEnd; j < NumOps; ++j) ConcatOps[j] = UndefVal; } - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, + makeArrayRef(ConcatOps.data(), NumOps)); } SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { @@ -1762,8 +1787,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(InVT); for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = UndefVal; - SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, - &Ops[0], NumConcat); + SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVec); return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1)); @@ -1798,7 +1822,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { for (; i < WidenNumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts); + return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { @@ -1922,11 +1946,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { SDValue NewVec; if (InVT.isVector()) - NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, - NewInVT, &Ops[0], NewNumElts); + NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); else - NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, - NewInVT, &Ops[0], NewNumElts); + NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops); return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec); } } @@ -1951,7 +1973,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!"); NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT)); - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, NewOps); } SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { @@ -1974,7 +1996,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { Ops[i] = N->getOperand(i); for (unsigned i = NumOperands; i != NumConcat; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops); } } else { InputWidened = true; @@ -2020,7 +2042,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(EltVT); for (; Idx < WidenNumElts; ++Idx) Ops[Idx] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { @@ -2065,7 +2087,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = UndefVal; - InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat); + InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, Ops); return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, SatOp, CvtCode); } @@ -2098,7 +2120,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { for (; i < WidenNumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { @@ -2137,7 +2159,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); } SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { @@ -2165,8 +2187,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { if (LdChain.size() == 1) NewChain = LdChain[0]; else - NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, - &LdChain[0], LdChain.size()); + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); // Modified the chain - switch anything that used the old chain to use // the new one. @@ -2372,7 +2393,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, DAG.getConstant(i, TLI.getVectorIdxTy()))); - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { @@ -2421,7 +2442,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, DAG.getConstant(j, TLI.getVectorIdxTy())); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { @@ -2450,8 +2471,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { if (StChain.size() == 1) return StChain[0]; else - return DAG.getNode(ISD::TokenFactor, SDLoc(ST), - MVT::Other,&StChain[0],StChain.size()); + return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); } SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { @@ -2626,8 +2646,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, ConcatOps[0] = LdOp; for (unsigned i = 1; i != NumConcat; ++i) ConcatOps[i] = UndefVal; - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], - NumConcat); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); } // Load vector by using multiple loads from largest vector to scalar @@ -2661,8 +2680,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, Loads.push_back(DAG.getUNDEF(L->getValueType(0))); size += L->getValueSizeInBits(0); } - L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), - &Loads[0], Loads.size()); + L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads); } } else { L = DAG.getLoad(NewVT, dl, Chain, BasePtr, @@ -2706,7 +2724,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, if (NewLdTy != LdTy) { // Create a larger vector ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, - &ConcatOps[Idx], End - Idx); + makeArrayRef(&ConcatOps[Idx], End - Idx)); Idx = End - 1; LdTy = NewLdTy; } @@ -2715,7 +2733,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, if (WidenWidth == LdTy.getSizeInBits()*(End - Idx)) return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, - &ConcatOps[Idx], End - Idx); + makeArrayRef(&ConcatOps[Idx], End - Idx)); // We need to fill the rest with undefs to build the vector unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); @@ -2728,7 +2746,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, for (; i != NumOps; ++i) WidenOps[i] = UndefVal; } - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps); } SDValue @@ -2779,7 +2797,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl &LdChain, for (; i != WidenNumElts; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); } @@ -2925,7 +2943,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { for (unsigned i = 1; i != NumConcat; ++i) Ops[i] = UndefVal; - return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops); } if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) @@ -2944,5 +2962,5 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { SDValue UndefVal = DAG.getUNDEF(EltVT); for ( ; Idx < WidenNumElts; ++Idx) Ops[Idx] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops); } diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 3b3424d..f92230c 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -19,7 +19,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "scheduler" #include "llvm/CodeGen/ResourcePriorityQueue.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -31,6 +30,8 @@ using namespace llvm; +#define DEBUG_TYPE "scheduler" + static cl::opt DisableDFASched("disable-dfa-sched", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable use of DFA during scheduling")); @@ -49,7 +50,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : TLI = IS->getTargetLowering(); const TargetMachine &tm = (*IS->MF).getTarget(); - ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL); + ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,nullptr); // This hard requirement could be relaxed, but for now // do not let it procede. assert (ResourcesModel && "Unimplemented CreateTargetScheduleState."); @@ -214,7 +215,7 @@ bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor /// of SU, return it, otherwise return null. SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { - SUnit *OnlyAvailablePred = 0; + SUnit *OnlyAvailablePred = nullptr; for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { SUnit &Pred = *I->getSUnit(); @@ -222,7 +223,7 @@ SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { // We found an available, but not scheduled, predecessor. If it's the // only one we have found, keep track of it... otherwise give up. if (OnlyAvailablePred && OnlyAvailablePred != &Pred) - return 0; + return nullptr; OnlyAvailablePred = &Pred; } } @@ -581,7 +582,7 @@ void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) { if (SU->isAvailable) return; // All preds scheduled. SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); - if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) + if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable) return; // Okay, we found a single predecessor that is available, but not scheduled. @@ -598,7 +599,7 @@ void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) { /// to be placed in scheduling sequence. SUnit *ResourcePriorityQueue::pop() { if (empty()) - return 0; + return nullptr; std::vector::iterator Best = Queue.begin(); if (!DisableDFASched) { diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index b62bd62..ee54292 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -45,14 +45,17 @@ private: unsigned FrameIx; // valid for stack objects } u; MDNode *mdPtr; + bool IsIndirect; uint64_t Offset; DebugLoc DL; unsigned Order; bool Invalid; public: // Constructor for non-constants. - SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl, - unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O), + SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, + bool indir, uint64_t off, DebugLoc dl, + unsigned O) : mdPtr(mdP), IsIndirect(indir), + Offset(off), DL(dl), Order(O), Invalid(false) { kind = SDNODE; u.s.Node = N; @@ -62,14 +65,16 @@ public: // Constructor for constants. SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl, unsigned O) : - mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) { + mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O), + Invalid(false) { kind = CONST; u.Const = C; } // Constructor for frame indices. SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) : - mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) { + mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O), + Invalid(false) { kind = FRAMEIX; u.FrameIx = FI; } @@ -92,6 +97,9 @@ public: // Returns the FrameIx for a stack object unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; } + // Returns whether this is an indirect value. + bool isIndirect() { return IsIndirect; } + // Returns the offset. uint64_t getOffset() { return Offset; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 0687392..4d8c2c7 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pre-RA-sched" #include "llvm/CodeGen/SchedulerRegistry.h" #include "InstrEmitter.h" #include "ScheduleDAGSDNodes.h" @@ -28,6 +27,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "pre-RA-sched" + STATISTIC(NumUnfolds, "Number of nodes unfolded"); STATISTIC(NumDups, "Number of duplicated nodes"); STATISTIC(NumPRCopies, "Number of physical copies"); @@ -54,7 +55,7 @@ namespace { } SUnit *pop() { - if (empty()) return NULL; + if (empty()) return nullptr; SUnit *V = Queue.back(); Queue.pop_back(); return V; @@ -117,11 +118,11 @@ void ScheduleDAGFast::Schedule() { DEBUG(dbgs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; - LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegDefs.resize(TRI->getNumRegs(), nullptr); LiveRegCycles.resize(TRI->getNumRegs(), 0); // Build the scheduling graph. - BuildSchedGraph(NULL); + BuildSchedGraph(nullptr); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); @@ -144,7 +145,7 @@ void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { dbgs() << "*** Scheduling failed! ***\n"; PredSU->dump(this); dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } #endif --PredSU->NumSuccsLeft; @@ -198,7 +199,7 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { assert(LiveRegDefs[I->getReg()] == SU && "Physical register dependency violated?"); --NumLiveRegs; - LiveRegDefs[I->getReg()] = NULL; + LiveRegDefs[I->getReg()] = nullptr; LiveRegCycles[I->getReg()] = 0; } } @@ -211,18 +212,18 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { /// successors to the newly created node. SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { if (SU->getNode()->getGluedNode()) - return NULL; + return nullptr; SDNode *N = SU->getNode(); if (!N) - return NULL; + return nullptr; SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); if (VT == MVT::Glue) - return NULL; + return nullptr; else if (VT == MVT::Other) TryUnfold = true; } @@ -230,13 +231,13 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { const SDValue &Op = N->getOperand(i); EVT VT = Op.getNode()->getValueType(Op.getResNo()); if (VT == MVT::Glue) - return NULL; + return nullptr; } if (TryUnfold) { SmallVector NewNodes; if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) - return NULL; + return nullptr; DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); @@ -388,11 +389,11 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, SmallVectorImpl &Copies) { - SUnit *CopyFromSU = newSUnit(static_cast(NULL)); + SUnit *CopyFromSU = newSUnit(static_cast(nullptr)); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; - SUnit *CopyToSU = newSUnit(static_cast(NULL)); + SUnit *CopyToSU = newSUnit(static_cast(nullptr)); CopyToSU->CopySrcRC = DestRC; CopyToSU->CopyDstRC = SrcRC; @@ -583,7 +584,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { // and it is expensive. // If cross copy register class is null, then it's not possible to copy // the value at all. - SUnit *NewDef = 0; + SUnit *NewDef = nullptr; if (DestRC != RC) { NewDef = CopyAndMoveSuccessors(LRDef); if (!DestRC && !NewDef) @@ -661,7 +662,7 @@ private: void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { if (N->getNodeId() != 0) - llvm_unreachable(0); + llvm_unreachable(nullptr); if (!N->isMachineOpcode() && (N->getOpcode() == ISD::EntryToken || isPassiveNode(N))) @@ -674,7 +675,7 @@ void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { unsigned NumOps = N->getNumOperands(); if (unsigned NumLeft = NumOps) { - SDNode *GluedOpN = 0; + SDNode *GluedOpN = nullptr; do { const SDValue &Op = N->getOperand(NumLeft-1); SDNode *OpN = Op.getNode(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index c283664..78ec4df 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pre-RA-sched" #include "llvm/CodeGen/SchedulerRegistry.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/STLExtras.h" @@ -36,6 +35,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "pre-RA-sched" + STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); STATISTIC(NumUnfolds, "Number of nodes unfolded"); STATISTIC(NumDups, "Number of duplicated nodes"); @@ -163,7 +164,7 @@ public: CodeGenOpt::Level OptLevel) : ScheduleDAGSDNodes(mf), NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), - Topo(SUnits, NULL) { + Topo(SUnits, nullptr) { const TargetMachine &tm = mf.getTarget(); if (DisableSchedCycles || !NeedLatency) @@ -327,13 +328,13 @@ void ScheduleDAGRRList::Schedule() { NumLiveRegs = 0; // Allocate slots for each physical register, plus one for a special register // to track the virtual resource of a calling sequence. - LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); - LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); + LiveRegDefs.resize(TRI->getNumRegs() + 1, nullptr); + LiveRegGens.resize(TRI->getNumRegs() + 1, nullptr); CallSeqEndForStart.clear(); assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); // Build the scheduling graph. - BuildSchedGraph(NULL); + BuildSchedGraph(nullptr); DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); @@ -369,7 +370,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { dbgs() << "*** Scheduling failed! ***\n"; PredSU->dump(this); dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } #endif --PredSU->NumSuccsLeft; @@ -461,7 +462,7 @@ FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, // to get to the CALLSEQ_BEGIN, but we need to find the path with the // most nesting in order to ensure that we find the corresponding match. if (N->getOpcode() == ISD::TokenFactor) { - SDNode *Best = 0; + SDNode *Best = nullptr; unsigned BestMaxNest = MaxNest; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { unsigned MyNestLevel = NestLevel; @@ -497,10 +498,10 @@ FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, N = N->getOperand(i).getNode(); goto found_chain_operand; } - return 0; + return nullptr; found_chain_operand:; if (N->getOpcode() == ISD::EntryToken) - return 0; + return nullptr; } } @@ -742,8 +743,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); --NumLiveRegs; - LiveRegDefs[I->getReg()] = NULL; - LiveRegGens[I->getReg()] = NULL; + LiveRegDefs[I->getReg()] = nullptr; + LiveRegGens[I->getReg()] = nullptr; releaseInterferences(I->getReg()); } } @@ -757,8 +758,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); --NumLiveRegs; - LiveRegDefs[CallResource] = NULL; - LiveRegGens[CallResource] = NULL; + LiveRegDefs[CallResource] = nullptr; + LiveRegGens[CallResource] = nullptr; releaseInterferences(CallResource); } } @@ -813,8 +814,8 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { assert(LiveRegDefs[I->getReg()] == I->getSUnit() && "Physical register dependency violated?"); --NumLiveRegs; - LiveRegDefs[I->getReg()] = NULL; - LiveRegGens[I->getReg()] = NULL; + LiveRegDefs[I->getReg()] = nullptr; + LiveRegGens[I->getReg()] = nullptr; releaseInterferences(I->getReg()); } } @@ -841,8 +842,8 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); --NumLiveRegs; - LiveRegDefs[CallResource] = NULL; - LiveRegGens[CallResource] = NULL; + LiveRegDefs[CallResource] = nullptr; + LiveRegGens[CallResource] = nullptr; releaseInterferences(CallResource); } } @@ -855,7 +856,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { // This becomes the nearest def. Note that an earlier def may still be // pending if this is a two-address node. LiveRegDefs[I->getReg()] = SU; - if (LiveRegGens[I->getReg()] == NULL || + if (LiveRegGens[I->getReg()] == nullptr || I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) LiveRegGens[I->getReg()] = I->getSUnit(); } @@ -936,17 +937,17 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) { SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { SDNode *N = SU->getNode(); if (!N) - return NULL; + return nullptr; if (SU->getNode()->getGluedNode()) - return NULL; + return nullptr; SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { EVT VT = N->getValueType(i); if (VT == MVT::Glue) - return NULL; + return nullptr; else if (VT == MVT::Other) TryUnfold = true; } @@ -954,18 +955,18 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { const SDValue &Op = N->getOperand(i); EVT VT = Op.getNode()->getValueType(Op.getResNo()); if (VT == MVT::Glue) - return NULL; + return nullptr; } if (TryUnfold) { SmallVector NewNodes; if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) - return NULL; + return nullptr; // unfolding an x86 DEC64m operation results in store, dec, load which // can't be handled here so quit if (NewNodes.size() == 3) - return NULL; + return nullptr; DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); @@ -1136,11 +1137,11 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, SmallVectorImpl &Copies) { - SUnit *CopyFromSU = CreateNewSUnit(NULL); + SUnit *CopyFromSU = CreateNewSUnit(nullptr); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; - SUnit *CopyToSU = CreateNewSUnit(NULL); + SUnit *CopyToSU = CreateNewSUnit(nullptr); CopyToSU->CopySrcRC = DestRC; CopyToSU->CopyDstRC = SrcRC; @@ -1244,7 +1245,7 @@ static const uint32_t *getNodeRegMask(const SDNode *N) { if (const RegisterMaskSDNode *Op = dyn_cast(N->getOperand(i).getNode())) return Op->getRegMask(); - return NULL; + return nullptr; } /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay @@ -1355,7 +1356,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { /// (2) No Hazards: resources are available /// (3) No Interferences: may unschedule to break register interferences. SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { - SUnit *CurSU = AvailableQueue->empty() ? 0 : AvailableQueue->pop(); + SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop(); while (CurSU) { SmallVector LRegs; if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) @@ -1389,7 +1390,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // Try unscheduling up to the point where it's safe to schedule // this node. - SUnit *BtSU = NULL; + SUnit *BtSU = nullptr; unsigned LiveCycle = UINT_MAX; for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { unsigned Reg = LRegs[j]; @@ -1449,7 +1450,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // expensive. // If cross copy register class is null, then it's not possible to copy // the value at all. - SUnit *NewDef = 0; + SUnit *NewDef = nullptr; if (DestRC != RC) { NewDef = CopyAndMoveSuccessors(LRDef); if (!DestRC && !NewDef) @@ -1646,7 +1647,7 @@ public: const TargetLowering *tli) : SchedulingPriorityQueue(hasReadyFilter), CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), - MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { + MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) { if (TracksRegPressure) { unsigned NumRC = TRI->getNumRegClasses(); RegLimit.resize(NumRC); @@ -1674,7 +1675,7 @@ public: void updateNode(const SUnit *SU) override; void releaseState() override { - SUnits = 0; + SUnits = nullptr; SethiUllmanNumbers.clear(); std::fill(RegPressure.begin(), RegPressure.end(), 0); } @@ -1775,7 +1776,7 @@ public: } SUnit *pop() override { - if (Queue.empty()) return NULL; + if (Queue.empty()) return nullptr; SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); V->NodeQueueId = 0; @@ -1783,7 +1784,7 @@ public: } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - void dump(ScheduleDAG *DAG) const { + void dump(ScheduleDAG *DAG) const override { // Emulate pop() without clobbering NodeQueueIds. std::vector DumpQueue = Queue; SF DumpPicker = Picker; @@ -2824,7 +2825,7 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { continue; // Locate the single data predecessor. - SUnit *PredSU = 0; + SUnit *PredSU = nullptr; for (SUnit::const_pred_iterator II = SU->Preds.begin(), EE = SU->Preds.end(); II != EE; ++II) if (!II->isCtrl()) { @@ -2980,7 +2981,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, const TargetRegisterInfo *TRI = TM.getRegisterInfo(); BURegReductionPriorityQueue *PQ = - new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0); + new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; @@ -2994,7 +2995,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, const TargetRegisterInfo *TRI = TM.getRegisterInfo(); SrcRegReductionPriorityQueue *PQ = - new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0); + new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr); ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 5639894..de910b7 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pre-RA-sched" #include "ScheduleDAGSDNodes.h" #include "InstrEmitter.h" #include "SDNodeDbgValue.h" @@ -35,6 +34,8 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +#define DEBUG_TYPE "pre-RA-sched" + STATISTIC(LoadsClustered, "Number of loads clustered together"); // This allows latency based scheduler to notice high latency instructions @@ -46,7 +47,7 @@ static cl::opt HighLatencyCycles( "instructions take for targets with no itinerary")); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) - : ScheduleDAG(mf), BB(0), DAG(0), + : ScheduleDAG(mf), BB(nullptr), DAG(nullptr), InstrItins(mf.getTarget().getInstrItineraryData()) {} /// Run - perform scheduling. @@ -67,12 +68,12 @@ void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) { /// SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) { #ifndef NDEBUG - const SUnit *Addr = 0; + const SUnit *Addr = nullptr; if (!SUnits.empty()) Addr = &SUnits[0]; #endif SUnits.push_back(SUnit(N, (unsigned)SUnits.size())); - assert((Addr == 0 || Addr == &SUnits[0]) && + assert((Addr == nullptr || Addr == &SUnits[0]) && "SUnits std::vector reallocated on the fly!"); SUnits.back().OrigNode = &SUnits.back(); SUnit *SU = &SUnits.back(); @@ -142,8 +143,8 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, if (ExtraOper.getNode()) Ops.push_back(ExtraOper); - SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size()); - MachineSDNode::mmo_iterator Begin = 0, End = 0; + SDVTList VTList = DAG->getVTList(VTs); + MachineSDNode::mmo_iterator Begin = nullptr, End = nullptr; MachineSDNode *MN = dyn_cast(N); // Store memory references. @@ -152,7 +153,7 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, End = MN->memoperands_end(); } - DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size()); + DAG->MorphNodeTo(N, N->getOpcode(), VTList, Ops); // Reset the memory references if (MN) @@ -205,7 +206,7 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { /// outputs to ensure they are scheduled together and in order. This /// optimization may benefit some targets by improving cache locality. void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { - SDNode *Chain = 0; + SDNode *Chain = nullptr; unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) Chain = Node->getOperand(NumOps-1).getNode(); @@ -219,8 +220,11 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { DenseMap O2SMap; // Map from offset to SDNode. bool Cluster = false; SDNode *Base = Node; + // This algorithm requires a reasonably low use count before finding a match + // to avoid uselessly blowing up compile time in large blocks. + unsigned UseCount = 0; for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); - I != E; ++I) { + I != E && UseCount < 100; ++I, ++UseCount) { SDNode *User = *I; if (User == Node || !Visited.insert(User)) continue; @@ -237,6 +241,8 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { if (Offset2 < Offset1) Base = User; Cluster = true; + // Reset UseCount to allow more matches. + UseCount = 0; } if (!Cluster) @@ -266,7 +272,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { // Cluster loads by adding MVT::Glue outputs and inputs. This also // ensure they are scheduled in order of increasing addresses. SDNode *Lead = Loads[0]; - SDValue InGlue = SDValue(0, 0); + SDValue InGlue = SDValue(nullptr, 0); if (AddGlue(Lead, InGlue, true, DAG)) InGlue = SDValue(Lead, Lead->getNumValues() - 1); for (unsigned I = 1, E = Loads.size(); I != E; ++I) { @@ -567,7 +573,7 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() { return; // Found a normal regdef. } Node = Node->getGluedNode(); - if (Node == NULL) { + if (!Node) { return; // No values left to visit. } InitNodeNumDefs(); @@ -740,7 +746,7 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, // BB->back().isPHI() test will not fire when we want it to. std::prev(Emitter.getInsertPos())->isPHI()) { // Did not insert any instruction. - Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); + Orders.push_back(std::make_pair(Order, (MachineInstr*)nullptr)); return; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 5e11dbb..39ebadf 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -139,7 +139,7 @@ namespace llvm { public: RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD); - bool IsValid() const { return Node != NULL; } + bool IsValid() const { return Node != nullptr; } MVT GetValue() const { assert(IsValid() && "bad iterator"); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index fb86103..51c51d6 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -18,7 +18,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pre-RA-sched" #include "llvm/CodeGen/SchedulerRegistry.h" #include "ScheduleDAGSDNodes.h" #include "llvm/ADT/Statistic.h" @@ -35,6 +34,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "pre-RA-sched" + STATISTIC(NumNoops , "Number of noops inserted"); STATISTIC(NumStalls, "Number of pipeline stalls"); @@ -120,7 +121,7 @@ void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { dbgs() << "*** Scheduling failed! ***\n"; SuccSU->dump(this); dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } #endif assert(!D.isWeak() && "unexpected artificial DAG edge"); @@ -204,12 +205,12 @@ void ScheduleDAGVLIW::listScheduleTopDown() { // don't advance the hazard recognizer. if (AvailableQueue->empty()) { // Reset DFA state. - AvailableQueue->scheduledNode(0); + AvailableQueue->scheduledNode(nullptr); ++CurCycle; continue; } - SUnit *FoundSUnit = 0; + SUnit *FoundSUnit = nullptr; bool HasNoopHazards = false; while (!AvailableQueue->empty()) { @@ -256,7 +257,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() { // processors without pipeline interlocks and other cases. DEBUG(dbgs() << "*** Emitting noop\n"); HazardRec->EmitNoop(); - Sequence.push_back(0); // NULL here means noop + Sequence.push_back(nullptr); // NULL here means noop ++NumNoops; ++CurCycle; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d11ce80..b1b8035 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -364,29 +364,28 @@ static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. /// static void AddNodeIDOperands(FoldingSetNodeID &ID, - const SDValue *Ops, unsigned NumOps) { - for (; NumOps; --NumOps, ++Ops) { - ID.AddPointer(Ops->getNode()); - ID.AddInteger(Ops->getResNo()); + ArrayRef Ops) { + for (auto& Op : Ops) { + ID.AddPointer(Op.getNode()); + ID.AddInteger(Op.getResNo()); } } /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. /// static void AddNodeIDOperands(FoldingSetNodeID &ID, - const SDUse *Ops, unsigned NumOps) { - for (; NumOps; --NumOps, ++Ops) { - ID.AddPointer(Ops->getNode()); - ID.AddInteger(Ops->getResNo()); + ArrayRef Ops) { + for (auto& Op : Ops) { + ID.AddPointer(Op.getNode()); + ID.AddInteger(Op.getResNo()); } } -static void AddNodeIDNode(FoldingSetNodeID &ID, - unsigned short OpC, SDVTList VTList, - const SDValue *OpList, unsigned N) { +static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC, + SDVTList VTList, ArrayRef OpList) { AddNodeIDOpcode(ID, OpC); AddNodeIDValueTypes(ID, VTList); - AddNodeIDOperands(ID, OpList, N); + AddNodeIDOperands(ID, OpList); } /// AddNodeIDCustom - If this is an SDNode with special info, add this info to @@ -528,7 +527,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { // Add the return value info. AddNodeIDValueTypes(ID, N->getVTList()); // Add the operand info. - AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands()); + AddNodeIDOperands(ID, makeArrayRef(N->op_begin(), N->op_end())); // Handle SDNode leafs with special info. AddNodeIDCustom(ID, N); @@ -606,7 +605,7 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl &DeadNodes) { SDNode *N = DeadNodes.pop_back_val(); for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) - DUL->NodeDeleted(N, 0); + DUL->NodeDeleted(N, nullptr); // Take the node out of the appropriate CSE map. RemoveNodeFromCSEMaps(N); @@ -684,8 +683,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { case ISD::CONDCODE: assert(CondCodeNodes[cast(N)->get()] && "Cond code doesn't exist!"); - Erased = CondCodeNodes[cast(N)->get()] != 0; - CondCodeNodes[cast(N)->get()] = 0; + Erased = CondCodeNodes[cast(N)->get()] != nullptr; + CondCodeNodes[cast(N)->get()] = nullptr; break; case ISD::ExternalSymbol: Erased = ExternalSymbols.erase(cast(N)->getSymbol()); @@ -702,8 +701,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { if (VT.isExtended()) { Erased = ExtendedValueTypeNodes.erase(VT); } else { - Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != 0; - ValueTypeNodes[VT.getSimpleVT().SimpleTy] = 0; + Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != nullptr; + ValueTypeNodes[VT.getSimpleVT().SimpleTy] = nullptr; } break; } @@ -765,11 +764,11 @@ SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) { SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, void *&InsertPos) { if (doNotCSE(N)) - return 0; + return nullptr; SDValue Ops[] = { Op }; FoldingSetNodeID ID; - AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1); + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); return Node; @@ -783,11 +782,11 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op1, SDValue Op2, void *&InsertPos) { if (doNotCSE(N)) - return 0; + return nullptr; SDValue Ops[] = { Op1, Op2 }; FoldingSetNodeID ID; - AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2); + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); return Node; @@ -798,14 +797,13 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, /// were replaced with those specified. If this node is never memoized, /// return null, otherwise return a pointer to the slot it would take. If a /// node already exists with these operands, the slot will be non-null. -SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, - const SDValue *Ops,unsigned NumOps, +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef Ops, void *&InsertPos) { if (doNotCSE(N)) - return 0; + return nullptr; FoldingSetNodeID ID; - AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps); + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos); return Node; @@ -901,10 +899,10 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), TSI(*tm.getSelectionDAGInfo()), TLI(0), OptLevel(OL), + : TM(tm), TSI(*tm.getSelectionDAGInfo()), TLI(nullptr), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), - UpdateListeners(0) { + UpdateListeners(nullptr) { AllNodes.push_back(&EntryNode); DbgInfo = new SDDbgInfo(); } @@ -937,11 +935,11 @@ void SelectionDAG::clear() { ExternalSymbols.clear(); TargetExternalSymbols.clear(); std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), - static_cast(0)); + static_cast(nullptr)); std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), - static_cast(0)); + static_cast(nullptr)); - EntryNode.UseList = 0; + EntryNode.UseList = nullptr; AllNodes.push_back(&EntryNode); Root = getEntryNode(); DbgInfo->clear(); @@ -965,6 +963,14 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { getNode(ISD::TRUNCATE, DL, VT, Op); } +SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT) { + if (VT.bitsLE(Op.getValueType())) + return getNode(ISD::TRUNCATE, SL, VT, Op); + + TargetLowering::BooleanContent BType = TLI->getBooleanContents(VT.isVector()); + return getNode(TLI->getExtendForContent(BType), SL, VT, Op); +} + SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { assert(!VT.isVector() && "getZeroExtendInReg should use the vector element type instead of " @@ -986,6 +992,22 @@ SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { return getNode(ISD::XOR, DL, VT, Val, NegOne); } +SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) { + EVT EltVT = VT.getScalarType(); + SDValue TrueValue; + switch (TLI->getBooleanContents(VT.isVector())) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + TrueValue = getConstant(1, VT); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), + VT); + break; + } + return getNode(ISD::XOR, DL, VT, Val, TrueValue); +} + SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT, bool isO) { EVT EltVT = VT.getScalarType(); assert((EltVT.getSizeInBits() >= 64 || @@ -1063,7 +1085,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, SDValue Result = getNode(ISD::BITCAST, SDLoc(), VT, getNode(ISD::BUILD_VECTOR, SDLoc(), ViaVecVT, - &Ops[0], Ops.size())); + Ops)); return Result; } @@ -1071,11 +1093,11 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, "APInt size does not match type size!"); unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(EltVT), None); ID.AddPointer(Elt); ID.AddBoolean(isO); - void *IP = 0; - SDNode *N = NULL; + void *IP = nullptr; + SDNode *N = nullptr; if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) if (!VT.isVector()) return SDValue(N, 0); @@ -1090,7 +1112,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, if (VT.isVector()) { SmallVector Ops; Ops.assign(VT.getVectorNumElements(), Result); - Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, &Ops[0], Ops.size()); + Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops); } return Result; } @@ -1114,10 +1136,10 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ // we don't have issues with SNANs. unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(EltVT), None); ID.AddPointer(&V); - void *IP = 0; - SDNode *N = NULL; + void *IP = nullptr; + SDNode *N = nullptr; if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) if (!VT.isVector()) return SDValue(N, 0); @@ -1133,7 +1155,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ SmallVector Ops; Ops.assign(VT.getVectorNumElements(), Result); // FIXME SDLoc info might be appropriate here - Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, &Ops[0], Ops.size()); + Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops); } return Result; } @@ -1172,7 +1194,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, if (!GVar) { // If GV is an alias then use the aliasee for determining thread-localness. if (const GlobalAlias *GA = dyn_cast(GV)) - GVar = dyn_cast_or_null(GA->getAliasedGlobal()); + GVar = dyn_cast_or_null(GA->getAliasee()); } unsigned Opc; @@ -1182,12 +1204,12 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddPointer(GV); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); ID.AddInteger(GV->getType()->getAddressSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1202,9 +1224,9 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddInteger(FI); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1220,10 +1242,10 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, "Cannot set target flags on target-independent jump tables"); unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddInteger(JTI); ID.AddInteger(TargetFlags); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1245,12 +1267,12 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddInteger(Alignment); ID.AddInteger(Offset); ID.AddPointer(C); ID.AddInteger(TargetFlags); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1273,12 +1295,12 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddInteger(Alignment); ID.AddInteger(Offset); C->addSelectionDAGCSEId(ID); ID.AddInteger(TargetFlags); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1292,11 +1314,11 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, unsigned char TargetFlags) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), 0, 0); + AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None); ID.AddInteger(Index); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1309,9 +1331,9 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0); + AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None); ID.AddPointer(MBB); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1358,7 +1380,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { if ((unsigned)Cond >= CondCodeNodes.size()) CondCodeNodes.resize(Cond+1); - if (CondCodeNodes[Cond] == 0) { + if (!CondCodeNodes[Cond]) { CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond); CondCodeNodes[Cond] = N; AllNodes.push_back(N); @@ -1441,13 +1463,18 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, if (Identity && NElts) return N1; + // Shuffling a constant splat doesn't change the result. + if (N2Undef && N1.getOpcode() == ISD::BUILD_VECTOR) + if (cast(N1)->getConstantSplatValue()) + return N1; + FoldingSetNodeID ID; SDValue Ops[2] = { N1, N2 }; - AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2); + AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops); for (unsigned i = 0; i != NElts; ++i) ID.AddInteger(MaskVec[i]); - void* IP = 0; + void* IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1478,14 +1505,14 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, FoldingSetNodeID ID; SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; - AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5); - void* IP = 0; + AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops); + void* IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), dl.getDebugLoc(), - Ops, 5, Code); + Ops, Code); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1493,9 +1520,9 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0); + AddNodeIDNode(ID, ISD::Register, getVTList(VT), None); ID.AddInteger(RegNo); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1507,9 +1534,9 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0); + AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None); ID.AddPointer(RegMask); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1522,9 +1549,9 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { FoldingSetNodeID ID; SDValue Ops[] = { Root }; - AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), &Ops[0], 1); + AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops); ID.AddPointer(Label); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1543,11 +1570,11 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), None); ID.AddPointer(BA); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1563,10 +1590,10 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { "SrcValue is not a pointer?"); FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0); + AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None); ID.AddPointer(V); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1579,10 +1606,10 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { /// getMDNode - Return an MDNodeSDNode which holds an MDNode. SDValue SelectionDAG::getMDNode(const MDNode *MD) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0); + AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None); ID.AddPointer(MD); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1597,11 +1624,11 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS) { SDValue Ops[] = {Ptr}; FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), &Ops[0], 1); + AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), Ops); ID.AddInteger(SrcAS); ID.AddInteger(DestAS); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -1780,17 +1807,14 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth) const { APInt KnownZero, KnownOne; - ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op, KnownZero, KnownOne, Depth); return (KnownZero & Mask) == Mask; } -/// ComputeMaskedBits - Determine which of the bits specified in Mask are -/// known to be either zero or one and return them in the KnownZero/KnownOne -/// bitsets. This code only analyzes bits in Mask, in order to short-circuit -/// processing. -void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, - APInt &KnownOne, unsigned Depth) const { +/// Determine which bits of Op are known to be either zero or one and return +/// them in the KnownZero/KnownOne bitsets. +void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, + APInt &KnownOne, unsigned Depth) const { const TargetLowering *TLI = TM.getTargetLowering(); unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); @@ -1805,48 +1829,40 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // We know all of the bits for a constant! KnownOne = cast(Op)->getAPIntValue(); KnownZero = ~KnownOne; - return; + break; case ISD::AND: // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; // Output known-0 are known to be clear if zero in either the LHS | RHS. KnownZero |= KnownZero2; - return; + break; case ISD::OR: - ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); // Output known-0 bits are only known if clear in both the LHS & RHS. KnownZero &= KnownZero2; // Output known-1 are known to be set if set in either the LHS | RHS. KnownOne |= KnownOne2; - return; + break; case ISD::XOR: { - ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); // Output known-1 are known to be set if set in only one of the LHS, RHS. KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); KnownZero = KnownZeroOut; - return; + break; } case ISD::MUL: { - ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); // If low bits are zero in either operand, output low known-0 bits. // Also compute a conserative estimate for high known-0 bits. @@ -1863,46 +1879,42 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, LeadZ = std::min(LeadZ, BitWidth); KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | APInt::getHighBitsSet(BitWidth, LeadZ); - return; + break; } case ISD::UDIV: { // For the purposes of computing leading zeros we can conservatively // treat a udiv as a logical right shift by the power of 2 known to // be less than the denominator. - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); unsigned LeadZ = KnownZero2.countLeadingOnes(); KnownOne2.clearAllBits(); KnownZero2.clearAllBits(); - ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); if (RHSUnknownLeadingOnes != BitWidth) LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); - return; + break; } case ISD::SELECT: - ComputeMaskedBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; KnownZero &= KnownZero2; - return; + break; case ISD::SELECT_CC: - ComputeMaskedBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; KnownZero &= KnownZero2; - return; + break; case ISD::SADDO: case ISD::UADDO: case ISD::SSUBO: @@ -1910,14 +1922,14 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::SMULO: case ISD::UMULO: if (Op.getResNo() != 1) - return; + break; // The boolean result conforms to getBooleanContents. Fall through. case ISD::SETCC: // If we know the result of a setcc has the top bits zero, use this info. if (TLI->getBooleanContents(Op.getValueType().isVector()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); - return; + break; case ISD::SHL: // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { @@ -1925,16 +1937,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // If the shift count is an invalid immediate, don't do anything. if (ShAmt >= BitWidth) - return; + break; - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero <<= ShAmt; KnownOne <<= ShAmt; // low bits known zero. KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt); } - return; + break; case ISD::SRL: // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { @@ -1942,31 +1953,29 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // If the shift count is an invalid immediate, don't do anything. if (ShAmt >= BitWidth) - return; + break; - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); KnownZero |= HighBits; // High bits known zero. } - return; + break; case ISD::SRA: if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { unsigned ShAmt = SA->getZExtValue(); // If the shift count is an invalid immediate, don't do anything. if (ShAmt >= BitWidth) - return; + break; // If any of the demanded bits are produced by the sign extension, we also // demand the input sign bit. APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.lshr(ShAmt); KnownOne = KnownOne.lshr(ShAmt); @@ -1980,7 +1989,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, KnownOne |= HighBits; // New bits are known one. } } - return; + break; case ISD::SIGN_EXTEND_INREG: { EVT EVT = cast(Op.getOperand(1))->getVT(); unsigned EBits = EVT.getScalarType().getSizeInBits(); @@ -1998,10 +2007,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, if (NewBits.getBoolValue()) InputDemandedBits |= InSignBit; - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownOne &= InputDemandedBits; KnownZero &= InputDemandedBits; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. @@ -2015,7 +2023,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, KnownZero &= ~NewBits; KnownOne &= ~NewBits; } - return; + break; } case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: @@ -2025,7 +2033,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, unsigned LowBits = Log2_32(BitWidth)+1; KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); KnownOne.clearAllBits(); - return; + break; } case ISD::LOAD: { LoadSDNode *LD = cast(Op); @@ -2035,9 +2043,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, unsigned MemBits = VT.getScalarType().getSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); } else if (const MDNode *Ranges = LD->getRanges()) { - computeMaskedBitsLoad(*Ranges, KnownZero); + computeKnownBitsLoad(*Ranges, KnownZero); } - return; + break; } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); @@ -2045,11 +2053,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); KnownZero |= NewBits; - return; + break; } case ISD::SIGN_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); @@ -2058,13 +2066,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); // Note if the sign bit is known to be zero or one. bool SignBitKnownZero = KnownZero.isNegative(); bool SignBitKnownOne = KnownOne.isNegative(); - assert(!(SignBitKnownZero && SignBitKnownOne) && - "Sign bit can't be known to be both zero and one!"); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -2074,25 +2080,24 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, KnownZero |= NewBits; else if (SignBitKnownOne) KnownOne |= NewBits; - return; + break; } case ISD::ANY_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); - return; + break; } case ISD::TRUNCATE: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); KnownZero = KnownZero.zext(InBits); KnownOne = KnownOne.zext(InBits); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero = KnownZero.trunc(BitWidth); KnownOne = KnownOne.trunc(BitWidth); break; @@ -2100,15 +2105,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::AssertZext: { EVT VT = cast(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); KnownZero |= (~InMask); KnownOne &= (~KnownZero); - return; + break; } case ISD::FGETSIGN: // All bits are zero except the low bit. KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1); - return; + break; case ISD::SUB: { if (ConstantSDNode *CLHS = dyn_cast(Op.getOperand(0))) { @@ -2119,7 +2124,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is @@ -2138,18 +2143,16 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. - ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); - ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); KnownZeroOut = std::min(KnownZeroOut, KnownZero2.countTrailingOnes()); if (Op.getOpcode() == ISD::ADD) { KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); - return; + break; } // With ADDE, a carry bit may be added in, so we can only use this @@ -2158,14 +2161,14 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // are known zero. if (KnownZeroOut >= 2) // ADDE KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut); - return; + break; } case ISD::SREM: if (ConstantSDNode *Rem = dyn_cast(Op.getOperand(1))) { const APInt &RA = Rem->getAPIntValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); // The low bits of the first operand are unchanged by the srem. KnownZero = KnownZero2 & LowBits; @@ -2183,36 +2186,35 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); } } - return; + break; case ISD::UREM: { if (ConstantSDNode *Rem = dyn_cast(Op.getOperand(1))) { const APInt &RA = Rem->getAPIntValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); KnownZero |= ~LowBits; - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1); - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1); break; } } // Since the result is less than or equal to either operand, any leading // zero bits in either operand must also exist in the result. - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); KnownOne.clearAllBits(); KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); - return; + break; } case ISD::FrameIndex: case ISD::TargetFrameIndex: if (unsigned Align = InferPtrAlignment(Op)) { // The low bits are known zero if the pointer is aligned. KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align)); - return; + break; } break; @@ -2224,9 +2226,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Allow the target to implement this method for its nodes. - TLI->computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); - return; + TLI->computeKnownBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); + break; } + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } /// ComputeNumSignBits - Return the number of times the sign bit of the @@ -2300,7 +2304,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ FirstAnswer = std::min(Tmp, Tmp2); // We computed what we know about the sign bits as our first // answer. Now proceed to the generic code that uses - // ComputeMaskedBits, and pick whichever answer is better. + // computeKnownBits, and pick whichever answer is better. } break; @@ -2350,7 +2354,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (ConstantSDNode *CRHS = dyn_cast(Op.getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero, KnownOne; - ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. @@ -2375,7 +2379,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (ConstantSDNode *CLHS = dyn_cast(Op.getOperand(0))) if (CLHS->isNullValue()) { APInt KnownZero, KnownOne; - ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) @@ -2422,14 +2426,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) { - unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, Depth); + unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, *this, Depth); if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits); } // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. APInt KnownZero, KnownOne; - ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + computeKnownBits(Op, KnownZero, KnownOne, Depth); APInt Mask; if (KnownZero.isNegative()) { // sign bit is 0 @@ -2517,8 +2521,8 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { /// SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); - void *IP = 0; + AddNodeIDNode(ID, Opcode, getVTList(VT), None); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -2789,8 +2793,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, if (VT != MVT::Glue) { // Don't CSE flag producing nodes FoldingSetNodeID ID; SDValue Ops[1] = { Operand }; - AddNodeIDNode(ID, Opcode, VTs, Ops, 1); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTs, Ops); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -2811,6 +2815,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, SDNode *Cst1, SDNode *Cst2) { + // If the opcode is a target-specific ISD node, there's nothing we can + // do here and the operand rules may not line up with the below, so + // bail early. + if (Opcode >= ISD::BUILTIN_OP_END) + return SDValue(); + SmallVector, 4> Inputs; SmallVector Outputs; EVT SVT = VT.getScalarType(); @@ -2915,13 +2925,18 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, } } + assert((Scalar1 && Scalar2) || (VT.getVectorNumElements() == Outputs.size() && + "Expected a scalar or vector!")); + // Handle the scalar case first. - if (Scalar1 && Scalar2) + if (!VT.isVector()) return Outputs.back(); - // Otherwise build a big vector out of the scalar elements we generated. - return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs.data(), - Outputs.size()); + // We may have a vector type but a scalar result. Create a splat. + Outputs.resize(VT.getVectorNumElements(), Outputs.back()); + + // Build a big vector out of the scalar elements we generated. + return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, @@ -2951,7 +2966,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SmallVector Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); - return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); + return getNode(ISD::BUILD_VECTOR, DL, VT, Elts); } break; case ISD::AND: @@ -3370,8 +3385,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, if (VT != MVT::Glue) { SDValue Ops[] = { N1, N2 }; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTs, Ops, 2); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTs, Ops); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -3420,7 +3435,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, N1.getNode()->op_end()); Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end()); - return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); + return getNode(ISD::BUILD_VECTOR, DL, VT, Elts); } break; case ISD::SETCC: { @@ -3477,8 +3492,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, if (VT != MVT::Glue) { SDValue Ops[] = { N1, N2, N3 }; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTs, Ops, 3); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTs, Ops); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -3501,14 +3516,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; - return getNode(Opcode, DL, VT, Ops, 4); + return getNode(Opcode, DL, VT, Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; - return getNode(Opcode, DL, VT, Ops, 5); + return getNode(Opcode, DL, VT, Ops); } /// getStackArgumentTokenFactor - Compute a TokenFactor to force all @@ -3530,8 +3545,7 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { ArgChains.push_back(SDValue(L, 1)); // Build a tokenfactor for all the chains. - return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, - &ArgChains[0], ArgChains.size()); + return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); } /// getMemsetValue - Vectorized representation of the memset value @@ -3600,7 +3614,7 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, Type *Ty = VT.getTypeForEVT(*DAG.getContext()); if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty)) return DAG.getConstant(Val, VT); - return SDValue(0, 0); + return SDValue(nullptr, 0); } /// getMemBasePlusOffset - Returns base and offset node for the @@ -3616,7 +3630,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SDLoc dl, /// static bool isMemSrcFromString(SDValue Src, StringRef &Str) { unsigned SrcDelta = 0; - GlobalAddressSDNode *G = NULL; + GlobalAddressSDNode *G = nullptr; if (Src.getOpcode() == ISD::GlobalAddress) G = cast(Src); else if (Src.getOpcode() == ISD::ADD && @@ -3852,8 +3866,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, Size -= VTSize; } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &OutChains[0], OutChains.size()); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, @@ -3918,8 +3931,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &LoadChains[0], LoadChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); OutChains.clear(); for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; @@ -3933,8 +3945,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, DstOff += VTSize; } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &OutChains[0], OutChains.size()); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } /// \brief Lower the call to 'memset' intrinsic function into a series of store @@ -4035,8 +4046,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, Size -= VTSize; } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &OutChains[0], OutChains.size()); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, @@ -4095,15 +4105,13 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in SDLoc - TargetLowering:: - CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), - false, false, false, false, 0, - TLI->getLibcallCallingConv(RTLIB::MEMCPY), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), - TLI->getPointerTy()), - Args, *this, dl); + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), + TLI->getPointerTy()), &Args, 0) + .setDiscardResult(); std::pair CallResult = TLI->LowerCallTo(CLI); return CallResult.second; @@ -4153,15 +4161,13 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); // FIXME: pass in SDLoc - TargetLowering:: - CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), - false, false, false, false, 0, - TLI->getLibcallCallingConv(RTLIB::MEMMOVE), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), - TLI->getPointerTy()), - Args, *this, dl); + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), + TLI->getPointerTy()), &Args, 0) + .setDiscardResult(); std::pair CallResult = TLI->LowerCallTo(CLI); return CallResult.second; @@ -4217,32 +4223,31 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, Entry.Ty = IntPtrTy; Entry.isSExt = false; Args.push_back(Entry); + // FIXME: pass in SDLoc - TargetLowering:: - CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), - false, false, false, false, 0, - TLI->getLibcallCallingConv(RTLIB::MEMSET), - /*isTailCall=*/false, - /*doesNotReturn*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), - TLI->getPointerTy()), - Args, *this, dl); - std::pair CallResult = TLI->LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), + TLI->getPointerTy()), &Args, 0) + .setDiscardResult(); + std::pair CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDVTList VTList, SDValue *Ops, unsigned NumOps, + SDVTList VTList, ArrayRef Ops, MachineMemOperand *MMO, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { FoldingSetNodeID ID; ID.AddInteger(MemVT.getRawBits()); - AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + AddNodeIDNode(ID, Opcode, VTList, Ops); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; + void* IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); return SDValue(E, 0); @@ -4253,11 +4258,13 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, // the node is deallocated, but recovered when the allocator is released. // If the number of operands is less than 5 we use AtomicSDNode's internal // storage. - SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate(NumOps) : 0; + unsigned NumOps = Ops.size(); + SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate(NumOps) + : nullptr; SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, MemVT, - Ops, DynOps, NumOps, MMO, + Ops.data(), DynOps, NumOps, MMO, SuccessOrdering, FailureOrdering, SynchScope); CSEMap.InsertNode(N, IP); @@ -4266,11 +4273,11 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDVTList VTList, SDValue *Ops, unsigned NumOps, + SDVTList VTList, ArrayRef Ops, MachineMemOperand *MMO, AtomicOrdering Ordering, SynchronizationScope SynchScope) { - return getAtomic(Opcode, dl, MemVT, VTList, Ops, NumOps, MMO, Ordering, + return getAtomic(Opcode, dl, MemVT, VTList, Ops, MMO, Ordering, Ordering, SynchScope); } @@ -4317,7 +4324,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; - return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, SuccessOrdering, + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, SuccessOrdering, FailureOrdering, SynchScope); } @@ -4377,38 +4384,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) : getVTList(VT, MVT::Other); SDValue Ops[] = {Chain, Ptr, Val}; - return getAtomic(Opcode, dl, MemVT, VTs, Ops, 3, MMO, Ordering, SynchScope); -} - -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - EVT VT, SDValue Chain, - SDValue Ptr, - const Value* PtrVal, - unsigned Alignment, - AtomicOrdering Ordering, - SynchronizationScope SynchScope) { - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getEVTAlignment(MemVT); - - MachineFunction &MF = getMachineFunction(); - // An atomic store does not load. An atomic load does not store. - // (An atomicrmw obviously both loads and stores.) - // For now, atomics are considered to be volatile always, and they are - // chained as such. - // FIXME: Volatile isn't really correct; we should keep track of atomic - // orderings in the memoperand. - unsigned Flags = MachineMemOperand::MOVolatile; - if (Opcode != ISD::ATOMIC_STORE) - Flags |= MachineMemOperand::MOLoad; - if (Opcode != ISD::ATOMIC_LOAD) - Flags |= MachineMemOperand::MOStore; - - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, - MemVT.getStoreSize(), Alignment); - - return getAtomic(Opcode, dl, MemVT, VT, Chain, Ptr, MMO, - Ordering, SynchScope); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, @@ -4421,38 +4397,24 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = {Chain, Ptr}; - return getAtomic(Opcode, dl, MemVT, VTs, Ops, 2, MMO, Ordering, SynchScope); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope); } /// getMergeValues - Create a MERGE_VALUES node from the given operands. -SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, - SDLoc dl) { - if (NumOps == 1) +SDValue SelectionDAG::getMergeValues(ArrayRef Ops, SDLoc dl) { + if (Ops.size() == 1) return Ops[0]; SmallVector VTs; - VTs.reserve(NumOps); - for (unsigned i = 0; i < NumOps; ++i) + VTs.reserve(Ops.size()); + for (unsigned i = 0; i < Ops.size(); ++i) VTs.push_back(Ops[i].getValueType()); - return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps), - Ops, NumOps); -} - -SDValue -SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, - const EVT *VTs, unsigned NumVTs, - const SDValue *Ops, unsigned NumOps, - EVT MemVT, MachinePointerInfo PtrInfo, - unsigned Align, bool Vol, - bool ReadMem, bool WriteMem) { - return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps, - MemVT, PtrInfo, Align, Vol, - ReadMem, WriteMem); + return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops); } SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, - const SDValue *Ops, unsigned NumOps, + ArrayRef Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, bool ReadMem, bool WriteMem) { @@ -4470,13 +4432,13 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align); - return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); + return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); } SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, - const SDValue *Ops, unsigned NumOps, - EVT MemVT, MachineMemOperand *MMO) { + ArrayRef Ops, EVT MemVT, + MachineMemOperand *MMO) { assert((Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::PREFETCH || @@ -4490,9 +4452,9 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, MemIntrinsicSDNode *N; if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + AddNodeIDNode(ID, Opcode, VTList, Ops); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); return SDValue(E, 0); @@ -4500,12 +4462,12 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, Ops, - NumOps, MemVT, MMO); + MemVT, MMO); CSEMap.InsertNode(N, IP); } else { N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), dl.getDebugLoc(), VTList, Ops, - NumOps, MemVT, MMO); + MemVT, MMO); } AllNodes.push_back(N); return SDValue(N, 0); @@ -4568,7 +4530,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, // If we don't have a PtrInfo, infer the trivial frame index case to simplify // clients. - if (PtrInfo.V == 0) + if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(Ptr, Offset); MachineFunction &MF = getMachineFunction(); @@ -4608,13 +4570,13 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other); SDValue Ops[] = { Chain, Ptr, Offset }; FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); + AddNodeIDNode(ID, ISD::LOAD, VTs, Ops); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); return SDValue(E, 0); @@ -4695,7 +4657,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, if (isNonTemporal) Flags |= MachineMemOperand::MONonTemporal; - if (PtrInfo.V == 0) + if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(Ptr); MachineFunction &MF = getMachineFunction(); @@ -4716,12 +4678,12 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Undef = getUNDEF(Ptr.getValueType()); SDValue Ops[] = { Chain, Val, Ptr, Undef }; FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + AddNodeIDNode(ID, ISD::STORE, VTs, Ops); ID.AddInteger(VT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); return SDValue(E, 0); @@ -4750,7 +4712,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, if (isNonTemporal) Flags |= MachineMemOperand::MONonTemporal; - if (PtrInfo.V == 0) + if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(Ptr); MachineFunction &MF = getMachineFunction(); @@ -4785,12 +4747,12 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Undef = getUNDEF(Ptr.getValueType()); SDValue Ops[] = { Chain, Val, Ptr, Undef }; FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + AddNodeIDNode(ID, ISD::STORE, VTs, Ops); ID.AddInteger(SVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant())); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); return SDValue(E, 0); @@ -4812,11 +4774,11 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset }; FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); + AddNodeIDNode(ID, ISD::STORE, VTs, Ops); ID.AddInteger(ST->getMemoryVT().getRawBits()); ID.AddInteger(ST->getRawSubclassData()); ID.AddInteger(ST->getPointerInfo().getAddrSpace()); - void *IP = 0; + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -4835,14 +4797,14 @@ SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl, SDValue SV, unsigned Align) { SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) }; - return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4); + return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, - const SDUse *Ops, unsigned NumOps) { - switch (NumOps) { + ArrayRef Ops) { + switch (Ops.size()) { case 0: return getNode(Opcode, DL, VT); - case 1: return getNode(Opcode, DL, VT, Ops[0]); + case 1: return getNode(Opcode, DL, VT, static_cast(Ops[0])); case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); default: break; @@ -4850,12 +4812,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, // Copy from an SDUse array into an SDValue array for use with // the regular getNode logic. - SmallVector NewOps(Ops, Ops + NumOps); - return getNode(Opcode, DL, VT, &NewOps[0], NumOps); + SmallVector NewOps(Ops.begin(), Ops.end()); + return getNode(Opcode, DL, VT, NewOps); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, - const SDValue *Ops, unsigned NumOps) { + ArrayRef Ops) { + unsigned NumOps = Ops.size(); switch (NumOps) { case 0: return getNode(Opcode, DL, VT); case 1: return getNode(Opcode, DL, VT, Ops[0]); @@ -4890,18 +4853,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, if (VT != MVT::Glue) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTs, Ops); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTs, Ops, NumOps); + VTs, Ops); CSEMap.InsertNode(N, IP); } else { N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTs, Ops, NumOps); + VTs, Ops); } AllNodes.push_back(N); @@ -4912,24 +4875,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, - ArrayRef ResultTys, - const SDValue *Ops, unsigned NumOps) { - return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()), - Ops, NumOps); -} - -SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, - const EVT *VTs, unsigned NumVTs, - const SDValue *Ops, unsigned NumOps) { - if (NumVTs == 1) - return getNode(Opcode, DL, VTs[0], Ops, NumOps); - return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps); + ArrayRef ResultTys, ArrayRef Ops) { + return getNode(Opcode, DL, getVTList(ResultTys), Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, - const SDValue *Ops, unsigned NumOps) { + ArrayRef Ops) { if (VTList.NumVTs == 1) - return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps); + return getNode(Opcode, DL, VTList.VTs[0], Ops); #if 0 switch (Opcode) { @@ -4956,10 +4909,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, // Memoize the node unless it returns a flag. SDNode *N; + unsigned NumOps = Ops.size(); if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTList, Ops); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); @@ -4976,7 +4930,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, Ops[1], Ops[2]); } else { N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTList, Ops, NumOps); + VTList, Ops); } CSEMap.InsertNode(N, IP); } else { @@ -4993,7 +4947,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, Ops[1], Ops[2]); } else { N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), - VTList, Ops, NumOps); + VTList, Ops); } } AllNodes.push_back(N); @@ -5004,39 +4958,39 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList) { - return getNode(Opcode, DL, VTList, 0, 0); + return getNode(Opcode, DL, VTList, ArrayRef()); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1) { SDValue Ops[] = { N1 }; - return getNode(Opcode, DL, VTList, Ops, 1); + return getNode(Opcode, DL, VTList, Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2) { SDValue Ops[] = { N1, N2 }; - return getNode(Opcode, DL, VTList, Ops, 2); + return getNode(Opcode, DL, VTList, Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3) { SDValue Ops[] = { N1, N2, N3 }; - return getNode(Opcode, DL, VTList, Ops, 3); + return getNode(Opcode, DL, VTList, Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; - return getNode(Opcode, DL, VTList, Ops, 4); + return getNode(Opcode, DL, VTList, Ops); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; - return getNode(Opcode, DL, VTList, Ops, 5); + return getNode(Opcode, DL, VTList, Ops); } SDVTList SelectionDAG::getVTList(EVT VT) { @@ -5049,9 +5003,9 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { ID.AddInteger(VT1.getRawBits()); ID.AddInteger(VT2.getRawBits()); - void *IP = 0; + void *IP = nullptr; SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); - if (Result == NULL) { + if (!Result) { EVT *Array = Allocator.Allocate(2); Array[0] = VT1; Array[1] = VT2; @@ -5068,9 +5022,9 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { ID.AddInteger(VT2.getRawBits()); ID.AddInteger(VT3.getRawBits()); - void *IP = 0; + void *IP = nullptr; SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); - if (Result == NULL) { + if (!Result) { EVT *Array = Allocator.Allocate(3); Array[0] = VT1; Array[1] = VT2; @@ -5089,9 +5043,9 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { ID.AddInteger(VT3.getRawBits()); ID.AddInteger(VT4.getRawBits()); - void *IP = 0; + void *IP = nullptr; SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); - if (Result == NULL) { + if (!Result) { EVT *Array = Allocator.Allocate(4); Array[0] = VT1; Array[1] = VT2; @@ -5103,18 +5057,19 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { return Result->getSDVTList(); } -SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { +SDVTList SelectionDAG::getVTList(ArrayRef VTs) { + unsigned NumVTs = VTs.size(); FoldingSetNodeID ID; ID.AddInteger(NumVTs); for (unsigned index = 0; index < NumVTs; index++) { ID.AddInteger(VTs[index].getRawBits()); } - void *IP = 0; + void *IP = nullptr; SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); - if (Result == NULL) { + if (!Result) { EVT *Array = Allocator.Allocate(NumVTs); - std::copy(VTs, VTs + NumVTs, Array); + std::copy(VTs.begin(), VTs.end(), Array); Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs); VTListMap.InsertNode(Result, IP); } @@ -5135,14 +5090,14 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { if (Op == N->getOperand(0)) return N; // See if the modified node already exists. - void *InsertPos = 0; + void *InsertPos = nullptr; if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) if (!RemoveNodeFromCSEMaps(N)) - InsertPos = 0; + InsertPos = nullptr; // Now we update the operands. N->OperandList[0].set(Op); @@ -5160,14 +5115,14 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { return N; // No operands changed, just return the input node. // See if the modified node already exists. - void *InsertPos = 0; + void *InsertPos = nullptr; if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) if (!RemoveNodeFromCSEMaps(N)) - InsertPos = 0; + InsertPos = nullptr; // Now we update the operands. if (N->OperandList[0] != Op1) @@ -5183,25 +5138,26 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { SDNode *SelectionDAG:: UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) { SDValue Ops[] = { Op1, Op2, Op3 }; - return UpdateNodeOperands(N, Ops, 3); + return UpdateNodeOperands(N, Ops); } SDNode *SelectionDAG:: UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3, SDValue Op4) { SDValue Ops[] = { Op1, Op2, Op3, Op4 }; - return UpdateNodeOperands(N, Ops, 4); + return UpdateNodeOperands(N, Ops); } SDNode *SelectionDAG:: UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3, SDValue Op4, SDValue Op5) { SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; - return UpdateNodeOperands(N, Ops, 5); + return UpdateNodeOperands(N, Ops); } SDNode *SelectionDAG:: -UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) { +UpdateNodeOperands(SDNode *N, ArrayRef Ops) { + unsigned NumOps = Ops.size(); assert(N->getNumOperands() == NumOps && "Update with wrong number of operands"); @@ -5218,14 +5174,14 @@ UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) { if (!AnyChange) return N; // See if the modified node already exists. - void *InsertPos = 0; - if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos)) + void *InsertPos = nullptr; + if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, InsertPos)) return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) if (!RemoveNodeFromCSEMaps(N)) - InsertPos = 0; + InsertPos = nullptr; // Now we update the operands. for (unsigned i = 0; i != NumOps; ++i) @@ -5254,14 +5210,14 @@ void SDNode::DropOperands() { SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT) { SDVTList VTs = getVTList(VT); - return SelectNodeTo(N, MachineOpc, VTs, 0, 0); + return SelectNodeTo(N, MachineOpc, VTs, None); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT, SDValue Op1) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5269,7 +5225,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op2) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 2); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5277,41 +5233,39 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2, Op3 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - EVT VT, const SDValue *Ops, - unsigned NumOps) { + EVT VT, ArrayRef Ops) { SDVTList VTs = getVTList(VT); - return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - EVT VT1, EVT VT2, const SDValue *Ops, - unsigned NumOps) { + EVT VT1, EVT VT2, ArrayRef Ops) { SDVTList VTs = getVTList(VT1, VT2); - return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); - return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0); + return SelectNodeTo(N, MachineOpc, VTs, None); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2, EVT VT3, - const SDValue *Ops, unsigned NumOps) { + ArrayRef Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3); - return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2, EVT VT3, EVT VT4, - const SDValue *Ops, unsigned NumOps) { + ArrayRef Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); - return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5319,7 +5273,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op1) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 1); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5327,7 +5281,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 2); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5336,7 +5290,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2, Op3 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -5345,13 +5299,12 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2, VT3); SDValue Ops[] = { Op1, Op2, Op3 }; - return SelectNodeTo(N, MachineOpc, VTs, Ops, 3); + return SelectNodeTo(N, MachineOpc, VTs, Ops); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, - SDVTList VTs, const SDValue *Ops, - unsigned NumOps) { - N = MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps); + SDVTList VTs,ArrayRef Ops) { + N = MorphNodeTo(N, ~MachineOpc, VTs, Ops); // Reset the NodeID to -1. N->setNodeId(-1); return N; @@ -5388,19 +5341,19 @@ SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) { /// the node's users. /// SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, - SDVTList VTs, const SDValue *Ops, - unsigned NumOps) { + SDVTList VTs, ArrayRef Ops) { + unsigned NumOps = Ops.size(); // If an identical node already exists, use it. - void *IP = 0; + void *IP = nullptr; if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); + AddNodeIDNode(ID, Opc, VTs, Ops); if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N)); } if (!RemoveNodeFromCSEMaps(N)) - IP = 0; + IP = nullptr; // Start the morphing. N->NodeType = Opc; @@ -5420,7 +5373,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, if (MachineSDNode *MN = dyn_cast(N)) { // Initialize the memory references information. - MN->setMemRefs(0, 0); + MN->setMemRefs(nullptr, nullptr); // If NumOps is larger than the # of operands we can have in a // MachineSDNode, reallocate the operand list. if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) { @@ -5431,22 +5384,22 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, // remainder of the current SelectionDAG iteration, so we can allocate // the operands directly out of a pool with no recycling metadata. MN->InitOperands(OperandAllocator.Allocate(NumOps), - Ops, NumOps); + Ops.data(), NumOps); else - MN->InitOperands(MN->LocalOperands, Ops, NumOps); + MN->InitOperands(MN->LocalOperands, Ops.data(), NumOps); MN->OperandsNeedDelete = false; } else - MN->InitOperands(MN->OperandList, Ops, NumOps); + MN->InitOperands(MN->OperandList, Ops.data(), NumOps); } else { // If NumOps is larger than the # of operands we currently have, reallocate // the operand list. if (NumOps > N->NumOperands) { if (N->OperandsNeedDelete) delete[] N->OperandList; - N->InitOperands(new SDUse[NumOps], Ops, NumOps); + N->InitOperands(new SDUse[NumOps], Ops.data(), NumOps); N->OperandsNeedDelete = true; } else - N->InitOperands(N->OperandList, Ops, NumOps); + N->InitOperands(N->OperandList, Ops.data(), NumOps); } // Delete any nodes that are still dead after adding the uses for the @@ -5585,7 +5538,7 @@ MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, ArrayRef ResultTys, ArrayRef Ops) { - SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size()); + SDVTList VTs = getVTList(ResultTys); return getMachineNode(Opcode, dl, VTs, Ops); } @@ -5594,14 +5547,14 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, ArrayRef OpsArray) { bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; MachineSDNode *N; - void *IP = 0; + void *IP = nullptr; const SDValue *Ops = OpsArray.data(); unsigned NumOps = OpsArray.size(); if (DoCSE) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); - IP = 0; + AddNodeIDNode(ID, ~Opcode, VTs, OpsArray); + IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { return cast(UpdadeSDLocOnMergedSDNode(E, DL)); } @@ -5657,34 +5610,39 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT, /// getNodeIfExists - Get the specified node if it's already available, or /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, - const SDValue *Ops, unsigned NumOps) { + ArrayRef Ops) { if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); - void *IP = 0; + AddNodeIDNode(ID, Opcode, VTList, Ops); + void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return E; } - return NULL; + return nullptr; } /// getDbgValue - Creates a SDDbgValue node. /// +/// SDNode SDDbgValue * -SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off, +SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, + bool IsIndirect, uint64_t Off, DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(MDPtr, N, R, Off, DL, O); + return new (Allocator) SDDbgValue(MDPtr, N, R, IsIndirect, Off, DL, O); } +/// Constant SDDbgValue * -SelectionDAG::getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off, - DebugLoc DL, unsigned O) { +SelectionDAG::getConstantDbgValue(MDNode *MDPtr, const Value *C, + uint64_t Off, + DebugLoc DL, unsigned O) { return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O); } +/// FrameIndex SDDbgValue * -SelectionDAG::getDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off, - DebugLoc DL, unsigned O) { +SelectionDAG::getFrameIndexDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off, + DebugLoc DL, unsigned O) { return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O); } @@ -6049,7 +6007,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { dbgs() << "Overran sorted position:\n"; S->dumprFull(); #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } @@ -6090,6 +6048,7 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { SDDbgValue *Dbg = *I; if (Dbg->getKind() == SDDbgValue::SDNODE) { SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(), + Dbg->isIndirect(), Dbg->getOffset(), Dbg->getDebugLoc(), Dbg->getOrder()); ClonedDVs.push_back(Clone); @@ -6133,9 +6092,8 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, } MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, - const SDValue *Ops, unsigned NumOps, EVT memvt, - MachineMemOperand *mmo) - : SDNode(Opc, Order, dl, VTs, Ops, NumOps), + ArrayRef Ops, EVT memvt, MachineMemOperand *mmo) + : SDNode(Opc, Order, dl, VTs, Ops), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant()); @@ -6354,12 +6312,10 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { switch (N->getOpcode()) { default: - Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, - &Operands[0], Operands.size())); + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands)); break; case ISD::VSELECT: - Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, - &Operands[0], Operands.size())); + Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands)); break; case ISD::SHL: case ISD::SRA: @@ -6384,8 +6340,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { Scalars.push_back(getUNDEF(EltVT)); return getNode(ISD::BUILD_VECTOR, dl, - EVT::getVectorVT(*getContext(), EltVT, ResNE), - &Scalars[0], Scalars.size()); + EVT::getVectorVT(*getContext(), EltVT, ResNE), Scalars); } @@ -6419,8 +6374,8 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, cast(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) return true; - const GlobalValue *GV1 = NULL; - const GlobalValue *GV2 = NULL; + const GlobalValue *GV1 = nullptr; + const GlobalValue *GV2 = nullptr; int64_t Offset1 = 0; int64_t Offset2 = 0; const TargetLowering *TLI = TM.getTargetLowering(); @@ -6442,8 +6397,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType()); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); - llvm::ComputeMaskedBits(const_cast(GV), KnownZero, KnownOne, - TLI->getDataLayout()); + llvm::computeKnownBits(const_cast(GV), KnownZero, KnownOne, + TLI->getDataLayout()); unsigned AlignBits = KnownZero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) @@ -6505,6 +6460,22 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, return std::make_pair(Lo, Hi); } +void SelectionDAG::ExtractVectorElements(SDValue Op, + SmallVectorImpl &Args, + unsigned Start, unsigned Count) { + EVT VT = Op.getValueType(); + if (Count == 0) + Count = VT.getVectorNumElements(); + + EVT EltVT = VT.getVectorElementType(); + EVT IdxTy = TLI->getVectorIdxTy(); + SDLoc SL(Op); + for (unsigned i = Start, e = Start + Count; i != e; ++i) { + Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, + Op, getConstant(i, IdxTy))); + } +} + // getAddressSpace - Return the address space this GlobalAddress belongs to. unsigned GlobalAddressSDNode::getAddressSpace() const { return getGlobal()->getType()->getAddressSpace(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 4a6e5cf..070e929 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "isel" #include "SelectionDAGBuilder.h" #include "SDNodeDbgValue.h" #include "llvm/ADT/BitVector.h" @@ -62,6 +61,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "isel" + /// LimitFloatPrecision - Generate low-precision inline sequences for /// some float libcalls (6, 8 or 12 bits). static unsigned LimitFloatPrecision; @@ -276,9 +277,9 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the // intermediate operands. - Val = DAG.getNode(IntermediateVT.isVector() ? - ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, - ValueVT, &Ops[0], NumIntermediates); + Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS + : ISD::BUILD_VECTOR, + DL, ValueVT, Ops); } // There is now one part, held in Val. Correct it to match ValueVT. @@ -495,7 +496,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, e = PartVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getUNDEF(ElementVT)); - Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size()); + Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, Ops); // FIXME: Use CONCAT for 2x -> 4x. @@ -638,7 +639,7 @@ namespace { SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, SDLoc dl, SDValue &Chain, SDValue *Flag, - const Value *V = 0) const; + const Value *V = nullptr) const; /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the /// specified value into the registers specified by this object. This uses @@ -684,7 +685,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { SDValue P; - if (Flag == 0) { + if (!Flag) { P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); } else { P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); @@ -752,9 +753,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, Parts.clear(); } - return DAG.getNode(ISD::MERGE_VALUES, dl, - DAG.getVTList(&ValueVTs[0], ValueVTs.size()), - &Values[0], ValueVTs.size()); + return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values); } /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the @@ -785,7 +784,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SmallVector Chains(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { SDValue Part; - if (Flag == 0) { + if (!Flag) { Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); } else { Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); @@ -808,7 +807,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, // = op c3, ..., f2 Chain = Chains[NumRegs-1]; else - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); } /// AddInlineAsmOperands - Add this value to the specified inlineasm node @@ -877,7 +876,7 @@ void SelectionDAGBuilder::clear() { UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); - CurInst = NULL; + CurInst = nullptr; HasTailCall = false; SDNodeOrder = LowestSDNodeOrder; } @@ -910,7 +909,7 @@ SDValue SelectionDAGBuilder::getRoot() { // Otherwise, we have to make a token factor node. SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, - &PendingLoads[0], PendingLoads.size()); + PendingLoads); PendingLoads.clear(); DAG.setRoot(Root); return Root; @@ -940,8 +939,7 @@ SDValue SelectionDAGBuilder::getControlRoot() { } Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, - &PendingExports[0], - PendingExports.size()); + PendingExports); PendingExports.clear(); DAG.setRoot(Root); return Root; @@ -961,7 +959,7 @@ void SelectionDAGBuilder::visit(const Instruction &I) { if (!isa(&I) && !HasTailCall) CopyToExportRegsIfNeeded(&I); - CurInst = NULL; + CurInst = nullptr; } void SelectionDAGBuilder::visitPHI(const PHINode &) { @@ -991,11 +989,14 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); MDNode *Variable = DI->getVariable(); uint64_t Offset = DI->getOffset(); + // A dbg.value for an alloca is always indirect. + bool IsIndirect = isa(V) || Offset != 0; SDDbgValue *SDV; if (Val.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) { + if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, Val)) { SDV = DAG.getDbgValue(Variable, Val.getNode(), - Val.getResNo(), Offset, dl, DbgSDNodeOrder); + Val.getResNo(), IsIndirect, + Offset, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } } else @@ -1020,7 +1021,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { RegsForValue RFV(*DAG.getContext(), *TM.getTargetLowering(), InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); - N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V); + N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); resolveDanglingDebugInfo(V, N); return N; } @@ -1091,8 +1092,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Constants.push_back(SDValue(Val, i)); } - return DAG.getMergeValues(&Constants[0], Constants.size(), - getCurSDLoc()); + return DAG.getMergeValues(Constants, getCurSDLoc()); } if (const ConstantDataSequential *CDS = @@ -1107,9 +1107,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } if (isa(CDS->getType())) - return DAG.getMergeValues(&Ops[0], Ops.size(), getCurSDLoc()); + return DAG.getMergeValues(Ops, getCurSDLoc()); return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), - VT, &Ops[0], Ops.size()); + VT, Ops); } if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { @@ -1132,8 +1132,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Constants[i] = DAG.getConstant(0, EltVT); } - return DAG.getMergeValues(&Constants[0], NumElts, - getCurSDLoc()); + return DAG.getMergeValues(Constants, getCurSDLoc()); } if (const BlockAddress *BA = dyn_cast(C)) @@ -1161,8 +1160,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } // Create a BUILD_VECTOR node. - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), - VT, &Ops[0], Ops.size()); + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); } // If this is a static alloca, generate it as the frameindex instead of @@ -1179,7 +1177,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); RegsForValue RFV(*DAG.getContext(), *TLI, InReg, Inst->getType()); SDValue Chain = DAG.getEntryNode(); - return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } llvm_unreachable("Can't get register for value!"); @@ -1223,7 +1221,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], NumValues); + MVT::Other, Chains); } else if (I.getNumOperands() != 0) { SmallVector ValueVTs; ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs); @@ -1406,8 +1404,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, llvm_unreachable("Unknown compare instruction"); } - CaseBlock CB(Condition, BOp->getOperand(0), - BOp->getOperand(1), NULL, TBB, FBB, CurBB, TWeight, FWeight); + CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, + TBB, FBB, CurBB, TWeight, FWeight); SwitchCases.push_back(CB); return; } @@ -1415,7 +1413,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), - NULL, TBB, FBB, CurBB, TWeight, FWeight); + nullptr, TBB, FBB, CurBB, TWeight, FWeight); SwitchCases.push_back(CB); } @@ -1562,7 +1560,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = BrMBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -1639,7 +1637,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), - NULL, Succ0MBB, Succ1MBB, BrMBB); + nullptr, Succ0MBB, Succ1MBB, BrMBB); // Use visitSwitchCase to actually insert the fast branch sequence for this // cond branch. @@ -1655,7 +1653,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, SDLoc dl = getCurSDLoc(); // Build the setcc now. - if (CB.CmpMHS == NULL) { + if (!CB.CmpMHS) { // Fold "(X == true)" to X and "(X == false)" to !X to // handle common cases produced by branch lowering. if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && @@ -1696,7 +1694,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -1774,7 +1772,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) @@ -1857,8 +1855,8 @@ void SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { const TargetLowering *TLI = TM.getTargetLowering(); SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, - MVT::isVoid, 0, 0, false, getCurSDLoc(), - false, false).second; + MVT::isVoid, nullptr, 0, false, + getCurSDLoc(), false, false).second; DAG.setRoot(Chain); } @@ -1905,7 +1903,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -1979,7 +1977,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = SwitchBB; if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; @@ -2059,8 +2057,7 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { // Merge into one. SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&ValueVTs[0], ValueVTs.size()), - &Ops[0], 2); + DAG.getVTList(ValueVTs), Ops); setValue(&LP, Res); } @@ -2081,7 +2078,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, MachineFunction *CurMF = FuncInfo.MF; // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineFunction::iterator BBI = CR.CaseBB; if (++BBI != FuncInfo.MF->end()) @@ -2192,7 +2189,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, if (I->High == I->Low) { // This is just small small case range :) containing exactly 1 case CC = ISD::SETEQ; - LHS = SV; RHS = I->High; MHS = NULL; + LHS = SV; RHS = I->High; MHS = nullptr; } else { CC = ISD::SETLE; LHS = I->Low; MHS = SV; RHS = I->High; @@ -2427,7 +2424,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, CaseRange LHSR(CR.Range.first, Pivot); CaseRange RHSR(Pivot, CR.Range.second); const Constant *C = Pivot->Low; - MachineBasicBlock *FalseBB = 0, *TrueBB = 0; + MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr; // We know that we branch to the LHS if the Value being switched on is // less than the Pivot value, C. We use this to optimize our binary @@ -2469,7 +2466,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, // Create a CaseBlock record representing a conditional branch to // the LHS node if the value being switched on SV is less than C. // Otherwise, branch to LHS. - CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); + CaseBlock CB(ISD::SETLT, SV, C, nullptr, TrueBB, FalseBB, CR.CaseBB); if (CR.CaseBB == SwitchBB) visitSwitchCase(CB, SwitchBB); @@ -2682,7 +2679,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { MachineBasicBlock *SwitchMBB = FuncInfo.MBB; // Figure out which block is immediately after the current one. - MachineBasicBlock *NextBlock = 0; + MachineBasicBlock *NextBlock = nullptr; MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; // If there is only the default destination, branch to it if it is not the @@ -2716,7 +2713,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // Push the initial CaseRec onto the worklist CaseRecVector WorkList; - WorkList.push_back(CaseRec(SwitchMBB,0,0, + WorkList.push_back(CaseRec(SwitchMBB,nullptr,nullptr, CaseRange(Cases.begin(),Cases.end()))); while (!WorkList.empty()) { @@ -2765,6 +2762,11 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { getValue(I.getAddress()))); } +void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { + if (DAG.getTarget().Options.TrapUnreachable) + DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); +} + void SelectionDAGBuilder::visitFSub(const User &I) { // -0.0 - X --> fneg Type *Ty = I.getType(); @@ -2887,8 +2889,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { FalseVal.getResNo() + i)); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&ValueVTs[0], NumValues), - &Values[0], NumValues)); + DAG.getVTList(ValueVTs), Values)); } void SelectionDAGBuilder::visitTrunc(const User &I) { @@ -3097,11 +3098,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MOps2[0] = Src2; Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurSDLoc(), VT, - &MOps1[0], NumConcat); + getCurSDLoc(), VT, MOps1); Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurSDLoc(), VT, - &MOps2[0], NumConcat); + getCurSDLoc(), VT, MOps2); // Readjust mask for new input vector length. SmallVector MappedOps; @@ -3219,8 +3218,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { Ops.push_back(Res); } - setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), - VT, &Ops[0], Ops.size())); + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops)); } void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { @@ -3262,8 +3260,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { SDValue(Agg.getNode(), Agg.getResNo() + i); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&AggValueVTs[0], NumAggValues), - &Values[0], NumAggValues)); + DAG.getVTList(AggValueVTs), Values)); } void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { @@ -3297,8 +3294,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { SDValue(Agg.getNode(), Agg.getResNo() + i); setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&ValValueVTs[0], NumValValues), - &Values[0], NumValValues)); + DAG.getVTList(ValValueVTs), Values)); } void SelectionDAGBuilder::visitGetElementPtr(const User &I) { @@ -3420,8 +3416,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); - SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), - VTs, Ops, 3); + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops); setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); @@ -3438,8 +3433,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Type *Ty = I.getType(); bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata("nontemporal") != 0; - bool isInvariant = I.getMetadata("invariant.load") != 0; + bool isNonTemporal = I.getMetadata("nontemporal") != nullptr; + bool isInvariant = I.getMetadata("invariant.load") != nullptr; unsigned Alignment = I.getAlignment(); const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); @@ -3484,8 +3479,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // (MaxParallelChains should always remain as failsafe). if (ChainI == MaxParallelChains) { assert(PendingLoads.empty() && "PendingLoads must be serialized first"); - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], ChainI); + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } @@ -3502,8 +3497,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } if (!ConstantMemory) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], ChainI); + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + makeArrayRef(Chains.data(), ChainI)); if (isVolatile) DAG.setRoot(Chain); else @@ -3511,8 +3506,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&ValueVTs[0], NumValues), - &Values[0], NumValues)); + DAG.getVTList(ValueVTs), Values)); } void SelectionDAGBuilder::visitStore(const StoreInst &I) { @@ -3540,7 +3534,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { NumValues)); EVT PtrVT = Ptr.getValueType(); bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata("nontemporal") != 0; + bool isNonTemporal = I.getMetadata("nontemporal") != nullptr; unsigned Alignment = I.getAlignment(); const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); @@ -3548,8 +3542,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. if (ChainI == MaxParallelChains) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], ChainI); + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + makeArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } @@ -3562,8 +3556,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { Chains[ChainI] = St; } - SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], ChainI); + SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + makeArrayRef(Chains.data(), ChainI)); DAG.setRoot(StoreNode); } @@ -3588,7 +3582,7 @@ static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, Ops[0] = Chain; Ops[1] = DAG.getConstant(Order, TLI.getPointerTy()); Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy()); - return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3); + return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops); } void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { @@ -3680,7 +3674,7 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { Ops[0] = getRoot(); Ops[1] = DAG.getConstant(I.getOrdering(), TLI->getPointerTy()); Ops[2] = DAG.getConstant(I.getSynchScope(), TLI->getPointerTy()); - DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3)); + DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { @@ -3696,13 +3690,21 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), + MachineMemOperand::MOVolatile | + MachineMemOperand::MOLoad, + VT.getStoreSize(), + I.getAlignment() ? I.getAlignment() : + DAG.getEVTAlignment(VT)); + InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = - DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, - getValue(I.getPointerOperand()), - I.getPointerOperand(), I.getAlignment(), - TLI->getInsertFencesForAtomic() ? Monotonic : Order, - Scope); + DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, + getValue(I.getPointerOperand()), MMO, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, + Scope); SDValue OutChain = L.getValue(1); @@ -3788,27 +3790,23 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (HasChain) ValueVTs.push_back(MVT::Other); - SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); + SDVTList VTs = DAG.getVTList(ValueVTs); // Create the node. SDValue Result; if (IsTgtIntrinsic) { // This is target intrinsic that touches memory Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), - VTs, &Ops[0], Ops.size(), - Info.memVT, + VTs, Ops, Info.memVT, MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, Info.vol, Info.readMem, Info.writeMem); } else if (!HasChain) { - Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), - VTs, &Ops[0], Ops.size()); + Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { - Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), - VTs, &Ops[0], Ops.size()); + Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); } else { - Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), - VTs, &Ops[0], Ops.size()); + Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); } if (HasChain) { @@ -4530,7 +4528,7 @@ static unsigned getTruncatedArgReg(const SDValue &N) { /// At the end of instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, - int64_t Offset, + int64_t Offset, bool IsIndirect, const SDValue &N) { const Argument *Arg = dyn_cast(V); if (!Arg) @@ -4582,8 +4580,6 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, if (!Op) return false; - // FIXME: This does not handle register-indirect values at offset 0. - bool IsIndirect = Offset != 0; if (Op->isReg()) FuncInfo.ArgDbgValues.push_back(BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE), @@ -4619,18 +4615,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { default: // By default, turn this into a target intrinsic node. visitTargetIntrinsic(I, Intrinsic); - return 0; - case Intrinsic::vastart: visitVAStart(I); return 0; - case Intrinsic::vaend: visitVAEnd(I); return 0; - case Intrinsic::vacopy: visitVACopy(I); return 0; + return nullptr; + case Intrinsic::vastart: visitVAStart(I); return nullptr; + case Intrinsic::vaend: visitVAEnd(I); return nullptr; + case Intrinsic::vacopy: visitVACopy(I); return nullptr; case Intrinsic::returnaddress: setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; + case Intrinsic::read_register: { + Value *Reg = I.getArgOperand(0); + SDValue RegName = DAG.getMDNode(cast(Reg)); + EVT VT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName)); + return nullptr; + } + case Intrinsic::write_register: { + Value *Reg = I.getArgOperand(0); + Value *RegValue = I.getArgOperand(1); + SDValue Chain = getValue(RegValue).getOperand(0); + SDValue RegName = DAG.getMDNode(cast(Reg)); + DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, + RegName, getValue(RegValue))); + return nullptr; + } case Intrinsic::setjmp: return &"_setjmp"[!TLI->usesUnderscoreSetJmp()]; case Intrinsic::longjmp: @@ -4653,7 +4665,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)))); - return 0; + return nullptr; } case Intrinsic::memset: { // Assert for address < 256 since we support only user defined address @@ -4670,7 +4682,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { bool isVol = cast(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)))); - return 0; + return nullptr; } case Intrinsic::memmove: { // Assert for address < 256 since we support only user defined address @@ -4690,7 +4702,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)))); - return 0; + return nullptr; } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast(I); @@ -4701,14 +4713,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { "Variable in DbgDeclareInst should be either null or a DIVariable."); if (!Address || !DIVar) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - return 0; + return nullptr; } // Check if address has undef value. if (isa(Address) || (Address->use_empty() && !isa(Address))) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - return 0; + return nullptr; } SDValue &N = NodeMap[Address]; @@ -4730,29 +4742,29 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { FrameIndexSDNode *FINode = dyn_cast(N.getNode()); if (FINode) // Byval parameter. We have a frame index at this point. - SDV = DAG.getDbgValue(Variable, FINode->getIndex(), - 0, dl, SDNodeOrder); + SDV = DAG.getFrameIndexDbgValue(Variable, FINode->getIndex(), + 0, dl, SDNodeOrder); else { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, 0, N); - return 0; + EmitFuncArgumentDbgValue(Address, Variable, 0, false, N); + return nullptr; } } else if (AI) SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), - 0, dl, SDNodeOrder); + true, 0, dl, SDNodeOrder); else { // Can't do anything with other non-AI cases yet. DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); DEBUG(Address->dump()); - return 0; + return nullptr; } DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) { + if (!EmitFuncArgumentDbgValue(Address, Variable, 0, false, N)) { // If variable is pinned by a alloca in dominating bb then // use StaticAllocaMap. if (const AllocaInst *AI = dyn_cast(Address)) { @@ -4760,17 +4772,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { - SDV = DAG.getDbgValue(Variable, SI->second, - 0, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, 0, false); - return 0; + SDV = DAG.getFrameIndexDbgValue(Variable, SI->second, + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + return nullptr; } } } DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } - return 0; + return nullptr; } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast(I); @@ -4778,18 +4790,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { assert((!DIVar || DIVar.isVariable()) && "Variable in DbgValueInst should be either null or a DIVariable."); if (!DIVar) - return 0; + return nullptr; MDNode *Variable = DI.getVariable(); uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) - return 0; + return nullptr; SDDbgValue *SDV; if (isa(V) || isa(V) || isa(V)) { - SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); - DAG.AddDbgValue(SDV, 0, false); + SDV = DAG.getConstantDbgValue(Variable, V, Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); } else { // Do not use getValue() in here; we don't want to generate code at // this point if it hasn't been done yet. @@ -4798,9 +4810,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Check unused arguments map. N = UnusedArgNodeMap[V]; if (N.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) { + // A dbg.value for an alloca is always indirect. + bool IsIndirect = isa(V) || Offset != 0; + if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, N)) { SDV = DAG.getDbgValue(Variable, N.getNode(), - N.getResNo(), Offset, dl, SDNodeOrder); + N.getResNo(), IsIndirect, + Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } } else if (!V->use_empty() ) { @@ -4823,18 +4838,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!AI) { DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); - return 0; + return nullptr; } DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI == FuncInfo.StaticAllocaMap.end()) - return 0; // VLAs. - int FI = SI->second; - - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo()) - MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc()); - return 0; + return nullptr; // VLAs. + return nullptr; } case Intrinsic::eh_typeid_for: { @@ -4843,7 +4853,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); Res = DAG.getConstant(TypeID, MVT::i32); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::eh_return_i32: @@ -4854,10 +4864,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getControlRoot(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return 0; + return nullptr; case Intrinsic::eh_unwind_init: DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); - return 0; + return nullptr; case Intrinsic::eh_dwarf_cfa: { SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, TLI->getPointerTy()); @@ -4871,7 +4881,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getConstant(0, TLI->getPointerTy())); setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), FA, Offset)); - return 0; + return nullptr; } case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); @@ -4880,7 +4890,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); MMI.setCurrentCallSite(CI->getZExtValue()); - return 0; + return nullptr; } case Intrinsic::eh_sjlj_functioncontext: { // Get and store the index of the function context. @@ -4889,23 +4899,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { cast(I.getArgOperand(0)->stripPointerCasts()); int FI = FuncInfo.StaticAllocaMap[FnCtx]; MFI->setFunctionContextIndex(FI); - return 0; + return nullptr; } case Intrinsic::eh_sjlj_setjmp: { SDValue Ops[2]; Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl, - DAG.getVTList(MVT::i32, MVT::Other), - Ops, 2); + DAG.getVTList(MVT::i32, MVT::Other), Ops); setValue(&I, Op.getValue(0)); DAG.setRoot(Op.getValue(1)); - return 0; + return nullptr; } case Intrinsic::eh_sjlj_longjmp: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, getRoot(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; } case Intrinsic::x86_mmx_pslli_w: @@ -4919,7 +4928,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue ShAmt = getValue(I.getArgOperand(1)); if (isa(ShAmt)) { visitTargetIntrinsic(I, Intrinsic); - return 0; + return nullptr; } unsigned NewIntrinsic = 0; EVT ShAmtVT = MVT::v2i32; @@ -4958,14 +4967,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue ShOps[2]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, &ShOps[0], 2); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); EVT DestVT = TLI->getValueType(I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, DAG.getConstant(NewIntrinsic, MVT::i32), getValue(I.getArgOperand(0)), ShAmt); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::x86_avx_vinsertf128_pd_256: case Intrinsic::x86_avx_vinsertf128_ps_256: @@ -4980,7 +4989,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), DAG.getConstant(Idx, TLI->getVectorIdxTy())); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::x86_avx_vextractf128_pd_256: case Intrinsic::x86_avx_vextractf128_ps_256: @@ -4993,7 +5002,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)), DAG.getConstant(Idx, TLI->getVectorIdxTy())); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::convertff: case Intrinsic::convertfsi: @@ -5026,31 +5035,31 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(2)), Code); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::powi: setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); - return 0; + return nullptr; case Intrinsic::log: setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::log2: setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::log10: setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::exp: setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::exp2: setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::pow: setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG, *TLI)); - return 0; + return nullptr; case Intrinsic::sqrt: case Intrinsic::fabs: case Intrinsic::sin: @@ -5079,21 +5088,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; } case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)))); - return 0; + return nullptr; case Intrinsic::fma: setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); - return 0; + return nullptr; case Intrinsic::fmuladd: { EVT VT = TLI->getValueType(I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && @@ -5114,42 +5123,41 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(2))); setValue(&I, Add); } - return 0; + return nullptr; } case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, sdl, MVT::i16, getValue(I.getArgOperand(0)))); - return 0; + return nullptr; case Intrinsic::convert_from_fp16: setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, sdl, MVT::f32, getValue(I.getArgOperand(0)))); - return 0; + return nullptr; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); - return 0; + return nullptr; } case Intrinsic::readcyclecounter: { SDValue Op = getRoot(); Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl, - DAG.getVTList(MVT::i64, MVT::Other), - &Op, 1); + DAG.getVTList(MVT::i64, MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); - return 0; + return nullptr; } case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; case Intrinsic::cttz: { SDValue Arg = getValue(I.getArgOperand(0)); ConstantInt *CI = cast(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, sdl, Ty, Arg)); - return 0; + return nullptr; } case Intrinsic::ctlz: { SDValue Arg = getValue(I.getArgOperand(0)); @@ -5157,26 +5165,26 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, sdl, Ty, Arg)); - return 0; + return nullptr; } case Intrinsic::ctpop: { SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); - return 0; + return nullptr; } case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode(ISD::STACKSAVE, sdl, - DAG.getVTList(TLI->getPointerTy(), MVT::Other), &Op, 1); + DAG.getVTList(TLI->getPointerTy(), MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); - return 0; + return nullptr; } case Intrinsic::stackrestore: { Res = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); - return 0; + return nullptr; } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. @@ -5198,7 +5206,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { true, false, 0); setValue(&I, Res); DAG.setRoot(Res); - return 0; + return nullptr; } case Intrinsic::objectsize: { // If we don't know by now, we're never going to know. @@ -5215,16 +5223,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Res = DAG.getConstant(0, Ty); setValue(&I, Res); - return 0; + return nullptr; } case Intrinsic::annotation: case Intrinsic::ptr_annotation: // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); - return 0; + return nullptr; case Intrinsic::var_annotation: // Discard annotate attributes - return 0; + return nullptr; case Intrinsic::init_trampoline: { const Function *F = cast(I.getArgOperand(1)->stripPointerCasts()); @@ -5237,16 +5245,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); Ops[5] = DAG.getSrcValue(F); - Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops, 6); + Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops); DAG.setRoot(Res); - return 0; + return nullptr; } case Intrinsic::adjust_trampoline: { setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); - return 0; + return nullptr; } case Intrinsic::gcroot: if (GFI) { @@ -5256,18 +5264,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { FrameIndexSDNode *FI = cast(getValue(Alloca).getNode()); GFI->addStackRoot(FI->getIndex(), TypeMap); } - return 0; + return nullptr; case Intrinsic::gcread: case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); - return 0; + return nullptr; case Intrinsic::expect: { // Just replace __builtin_expect(exp, c) with EXP. setValue(&I, getValue(I.getArgOperand(0))); - return 0; + return nullptr; } case Intrinsic::debugtrap: @@ -5277,20 +5285,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? ISD::TRAP : ISD::DEBUGTRAP; DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); - return 0; + return nullptr; } TargetLowering::ArgListTy Args; - TargetLowering:: - CallLoweringInfo CLI(getRoot(), I.getType(), - false, false, false, false, 0, CallingConv::C, - /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol(TrapFuncName.data(), - TLI->getPointerTy()), - Args, DAG, sdl); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()) + .setCallee(CallingConv::C, I.getType(), + DAG.getExternalSymbol(TrapFuncName.data(), TLI->getPointerTy()), + &Args, 0); + std::pair Result = TLI->LowerCallTo(CLI); DAG.setRoot(Result.second); - return 0; + return nullptr; } case Intrinsic::uadd_with_overflow: @@ -5314,7 +5321,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); - return 0; + return nullptr; } case Intrinsic::prefetch: { SDValue Ops[5]; @@ -5325,22 +5332,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[3] = getValue(I.getArgOperand(2)); Ops[4] = getValue(I.getArgOperand(3)); DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, - DAG.getVTList(MVT::Other), - &Ops[0], 5, + DAG.getVTList(MVT::Other), Ops, EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), 0, /* align */ false, /* volatile */ rw==0, /* read */ rw==1)); /* write */ - return 0; + return nullptr; } case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: { bool IsStart = (Intrinsic == Intrinsic::lifetime_start); // Stack coloring is not enabled in O0, discard region information. if (TM.getOptLevel() == CodeGenOpt::None) - return 0; + return nullptr; SmallVector Allocas; GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL); @@ -5360,18 +5366,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[1] = DAG.getFrameIndex(FI, TLI->getPointerTy(), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); - Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops, 2); + Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); DAG.setRoot(Res); } - return 0; + return nullptr; } case Intrinsic::invariant_start: // Discard region information. setValue(&I, DAG.getUNDEF(TLI->getPointerTy())); - return 0; + return nullptr; case Intrinsic::invariant_end: // Discard region information. - return 0; + return nullptr; case Intrinsic::stackprotectorcheck: { // Do not actually emit anything for this basic block. Instead we initialize // the stack protector descriptor and export the guard variable so we can @@ -5382,21 +5388,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Flush our exports since we are going to process a terminator. (void)getControlRoot(); - return 0; + return nullptr; } case Intrinsic::clear_cache: return TLI->getClearCacheBuiltinName(); case Intrinsic::donothing: // ignore - return 0; + return nullptr; case Intrinsic::experimental_stackmap: { visitStackmap(I); - return 0; + return nullptr; } case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: { visitPatchpoint(I); - return 0; + return nullptr; } } } @@ -5408,7 +5414,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, FunctionType *FTy = cast(PT->getElementType()); Type *RetTy = FTy->getReturnType(); MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - MCSymbol *BeginLabel = 0; + MCSymbol *BeginLabel = nullptr; TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -5496,9 +5502,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (isTailCall && !isInTailCallPosition(CS, *TLI)) isTailCall = false; - TargetLowering:: - CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG, - getCurSDLoc(), CS); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) + .setCallee(RetTy, FTy, Callee, &Args, CS).setTailCall(isTailCall); + std::pair Result = TLI->LowerCallTo(CLI); assert((isTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); @@ -5537,13 +5544,12 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, } SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, &Chains[0], NumValues); + MVT::Other, Chains); PendingLoads.push_back(Chain); setValue(CS.getInstruction(), DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(&RetTys[0], RetTys.size()), - &Values[0], Values.size())); + DAG.getVTList(RetTys), Values)); } if (!Result.second.getNode()) { @@ -5683,7 +5689,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { switch (CSize->getZExtValue()) { default: LoadVT = MVT::Other; - LoadTy = 0; + LoadTy = nullptr; ActuallyDoIt = false; break; case 2: @@ -5910,7 +5916,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); ComputeUsesVAFloatArgument(I, &MMI); - const char *RenameFn = 0; + const char *RenameFn = nullptr; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { @@ -6085,7 +6091,7 @@ public: RegsForValue AssignedRegs; explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) - : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) { + : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) { } /// getCallOperandValEVT - Return the EVT of the Value* that this operand @@ -6094,7 +6100,7 @@ public: EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, const DataLayout *DL) const { - if (CallOperandVal == 0) return MVT::Other; + if (!CallOperandVal) return MVT::Other; if (isa(CallOperandVal)) return TLI.getPointerTy(); @@ -6415,7 +6421,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // There is no longer a Value* corresponding to this operand. - OpInfo.CallOperandVal = 0; + OpInfo.CallOperandVal = nullptr; // It is now an indirect operand. OpInfo.isIndirect = true; @@ -6704,8 +6710,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (Flag.getNode()) AsmNodeOperands.push_back(Flag); Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(), - DAG.getVTList(MVT::Other, MVT::Glue), - &AsmNodeOperands[0], AsmNodeOperands.size()); + DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); Flag = Chain.getValue(1); // If this asm returns a register value, copy the result from that register @@ -6768,8 +6773,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } if (!OutChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, - &OutChains[0], OutChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains); DAG.setRoot(Chain); } @@ -6839,10 +6843,10 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, } Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType(); - TargetLowering::CallLoweringInfo CLI(getRoot(), retTy, /*retSExt*/ false, - /*retZExt*/ false, /*isVarArg*/ false, /*isInReg*/ false, NumArgs, - CI.getCallingConv(), /*isTailCall*/ false, /*doesNotReturn*/ false, - /*isReturnValueUsed*/ CI.use_empty(), Callee, Args, DAG, getCurSDLoc()); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) + .setCallee(CI.getCallingConv(), retTy, Callee, &Args, NumArgs) + .setDiscardResult(!CI.use_empty()); const TargetLowering *TLI = TM.getTargetLowering(); return TLI->LowerCallTo(CLI); @@ -7056,7 +7060,7 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // There is always a chain and a glue type at the end ValueVTs.push_back(MVT::Other); ValueVTs.push_back(MVT::Glue); - NodeTys = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); + NodeTys = DAG.getVTList(ValueVTs); } else NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -7120,19 +7124,23 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Handle all of the outgoing arguments. CLI.Outs.clear(); CLI.OutVals.clear(); - ArgListTy &Args = CLI.Args; + ArgListTy &Args = CLI.getArgs(); for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector ValueVTs; ComputeValueVTs(*this, Args[i].Ty, ValueVTs); - for (unsigned Value = 0, NumValues = ValueVTs.size(); - Value != NumValues; ++Value) { + Type *FinalType = Args[i].Ty; + if (Args[i].isByVal) + FinalType = cast(Args[i].Ty)->getElementType(); + bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( + FinalType, CLI.CallConv, CLI.IsVarArg); + for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; + ++Value) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); SDValue Op = SDValue(Args[i].Node.getNode(), Args[i].Node.getResNo() + Value); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = - getDataLayout()->getABITypeAlignment(ArgTy); + unsigned OriginalAlignment = getDataLayout()->getABITypeAlignment(ArgTy); if (Args[i].isZExt) Flags.setZExt(); @@ -7168,6 +7176,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } if (Args[i].isNest) Flags.setNest(); + if (NeedsRegBlock) + Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); @@ -7200,8 +7210,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setReturned(); } - getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, - PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind); + getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, + CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 @@ -7213,6 +7223,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { else if (j != 0) MyFlags.Flags.setOrigAlign(1); + // Only mark the end at the last register of the last value. + if (NeedsRegBlock && Value == NumValues - 1 && j == NumParts - 1) + MyFlags.Flags.setInConsecutiveRegsLast(); + CLI.Outs.push_back(MyFlags); CLI.OutVals.push_back(Parts[j]); } @@ -7261,7 +7275,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], - NumRegs, RegisterVT, VT, NULL, + NumRegs, RegisterVT, VT, nullptr, AssertOp)); CurReg += NumRegs; } @@ -7273,8 +7287,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { return std::make_pair(SDValue(), CLI.Chain); SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, - CLI.DAG.getVTList(&RetTys[0], RetTys.size()), - &ReturnValues[0], ReturnValues.size()); + CLI.DAG.getVTList(RetTys), ReturnValues); return std::make_pair(Res, CLI.Chain); } @@ -7301,7 +7314,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { const TargetLowering *TLI = TM.getTargetLowering(); RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); - RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, 0, V); + RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V); PendingExports.push_back(Chain); } @@ -7354,13 +7367,17 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ComputeValueVTs(*TLI, I->getType(), ValueVTs); bool isArgValueUsed = !I->use_empty(); unsigned PartBase = 0; + Type *FinalType = I->getType(); + if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) + FinalType = cast(FinalType)->getElementType(); + bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( + FinalType, F.getCallingConv(), F.isVarArg()); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); ISD::ArgFlagsTy Flags; - unsigned OriginalAlignment = - DL->getABITypeAlignment(ArgTy); + unsigned OriginalAlignment = DL->getABITypeAlignment(ArgTy); if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) Flags.setZExt(); @@ -7396,6 +7413,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); + if (NeedsRegBlock) + Flags.setInConsecutiveRegs(); Flags.setOrigAlign(OriginalAlignment); MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); @@ -7408,6 +7427,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // if it isn't first piece, alignment must be 1 else if (i > 0) MyFlags.Flags.setOrigAlign(1); + + // Only mark the end at the last register of the last value. + if (NeedsRegBlock && Value == NumValues - 1 && i == NumRegs - 1) + MyFlags.Flags.setInConsecutiveRegsLast(); + Ins.push_back(MyFlags); } PartBase += VT.getStoreSize(); @@ -7449,7 +7473,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); ISD::NodeType AssertOp = ISD::DELETED_NODE; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, - RegVT, VT, NULL, AssertOp); + RegVT, VT, nullptr, AssertOp); MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); @@ -7496,7 +7520,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts, PartVT, VT, - NULL, AssertOp)); + nullptr, AssertOp)); } i += NumParts; @@ -7511,7 +7535,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { dyn_cast(ArgValues[0].getNode())) FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); - SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues, + SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), SDB->getCurSDLoc()); SDB->setValue(I, Res); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 66835bf..fb29691 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -96,7 +96,7 @@ class SelectionDAGBuilder { DebugLoc dl; unsigned SDNodeOrder; public: - DanglingDebugInfo() : DI(0), dl(DebugLoc()), SDNodeOrder(0) { } + DanglingDebugInfo() : DI(nullptr), dl(DebugLoc()), SDNodeOrder(0) { } DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) : DI(di), dl(DL), SDNodeOrder(SDNO) { } const DbgValueInst* getDI() { return DI; } @@ -135,7 +135,7 @@ private: MachineBasicBlock* BB; uint32_t ExtraWeight; - Case() : Low(0), High(0), BB(0), ExtraWeight(0) { } + Case() : Low(nullptr), High(nullptr), BB(nullptr), ExtraWeight(0) { } Case(const Constant *low, const Constant *high, MachineBasicBlock *bb, uint32_t extraweight) : Low(low), High(high), BB(bb), ExtraWeight(extraweight) { } @@ -396,8 +396,8 @@ private: /// the same function, use the same failure basic block). class StackProtectorDescriptor { public: - StackProtectorDescriptor() : ParentMBB(0), SuccessMBB(0), FailureMBB(0), - Guard(0) { } + StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr), + FailureMBB(nullptr), Guard(nullptr) { } ~StackProtectorDescriptor() { } /// Returns true if all fields of the stack protector descriptor are @@ -432,8 +432,8 @@ private: /// parent mbb after we create the stack protector check (SuccessMBB). This /// BB is visited only on stack protector check success. void resetPerBBState() { - ParentMBB = 0; - SuccessMBB = 0; + ParentMBB = nullptr; + SuccessMBB = nullptr; } /// Reset state that only changes when we switch functions. @@ -446,8 +446,8 @@ private: /// 2.The guard variable since the guard variable we are checking against is /// always the same. void resetPerFunctionState() { - FailureMBB = 0; - Guard = 0; + FailureMBB = nullptr; + Guard = nullptr; } MachineBasicBlock *getParentMBB() { return ParentMBB; } @@ -482,7 +482,7 @@ private: /// block will be created. MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB, MachineBasicBlock *ParentMBB, - MachineBasicBlock *SuccMBB = 0); + MachineBasicBlock *SuccMBB = nullptr); }; private: @@ -538,7 +538,7 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) - : CurInst(NULL), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), + : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), HasTailCall(false) { } @@ -600,13 +600,13 @@ public: void setValue(const Value *V, SDValue NewN) { SDValue &N = NodeMap[V]; - assert(N.getNode() == 0 && "Already set a value for this node!"); + assert(!N.getNode() && "Already set a value for this node!"); N = NewN; } void setUnusedArgValue(const Value *V, SDValue NewN) { SDValue &N = UnusedArgNodeMap[V]; - assert(N.getNode() == 0 && "Already set a value for this node!"); + assert(!N.getNode() && "Already set a value for this node!"); N = NewN; } @@ -624,7 +624,7 @@ public: void CopyToExportRegsIfNeeded(const Value *V); void ExportFromCurrentBlock(const Value *V); void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, - MachineBasicBlock *LandingPad = NULL); + MachineBasicBlock *LandingPad = nullptr); std::pair LowerCallOperands(const CallInst &CI, unsigned ArgIdx, @@ -642,7 +642,7 @@ private: void visitBr(const BranchInst &I); void visitSwitch(const SwitchInst &I); void visitIndirectBr(const IndirectBrInst &I); - void visitUnreachable(const UnreachableInst &I) { /* noop */ } + void visitUnreachable(const UnreachableInst &I); // Helpers for visitSwitch bool handleSmallSwitchRange(CaseRec& CR, @@ -785,7 +785,8 @@ private: /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, - int64_t Offset, const SDValue &N); + int64_t Offset, bool IsIndirect, + const SDValue &N); }; } // end namespace llvm diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 535feba..d6b5255 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -93,6 +93,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::READ_REGISTER: return "READ_REGISTER"; + case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; @@ -330,7 +332,7 @@ const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { } } -void SDNode::dump() const { dump(0); } +void SDNode::dump() const { dump(nullptr); } void SDNode::dump(const SelectionDAG *G) const { print(dbgs(), G); dbgs() << '\n'; @@ -427,7 +429,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << LBB->getName() << " "; OS << (const void*)BBDN->getBasicBlock() << ">"; } else if (const RegisterSDNode *R = dyn_cast(this)) { - OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0); + OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :nullptr); } else if (const ExternalSymbolSDNode *ES = dyn_cast(this)) { OS << "'" << ES->getSymbol() << "'"; @@ -595,7 +597,7 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, void SDNode::dumpr() const { VisitedSDNodeSet once; - DumpNodesr(dbgs(), this, 0, 0, once); + DumpNodesr(dbgs(), this, 0, nullptr, once); } void SDNode::dumpr(const SelectionDAG *G) const { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 5d0e2b9..472fc9c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "isel" #include "llvm/CodeGen/SelectionDAGISel.h" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" @@ -58,6 +57,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "isel" + STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected"); STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); @@ -299,7 +300,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, "'usesCustomInserter', it must implement " "TargetLowering::EmitInstrWithCustomInserter!"; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, @@ -356,7 +357,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { // Loop for blocks with phi nodes. for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { PHINode *PN = dyn_cast(BB->begin()); - if (PN == 0) continue; + if (!PN) continue; ReprocessBlock: // For each block with a PHI node, check to see if any of the input values @@ -366,7 +367,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) { for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast(I)); ++I) for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantExpr *CE = dyn_cast(PN->getIncomingValue(i)); - if (CE == 0 || !CE->canTrap()) continue; + if (!CE || !CE->canTrap()) continue; // The only case we have to worry about is when the edge is critical. // Since this block has a PHI Node, we assume it has multiple input @@ -399,7 +400,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { RegInfo = &MF->getRegInfo(); AA = &getAnalysis(); LibInfo = &getAnalysis(); - GFI = Fn.hasGC() ? &getAnalysis().getFunctionInfo(Fn) : 0; + GFI = Fn.hasGC() ? &getAnalysis().getFunctionInfo(Fn) : nullptr; TargetSubtargetInfo &ST = const_cast(TM.getSubtarget()); @@ -422,7 +423,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (UseMBPI && OptLevel != CodeGenOpt::None) FuncInfo->BPI = &getAnalysis(); else - FuncInfo->BPI = 0; + FuncInfo->BPI = nullptr; SDB->init(GFI, *AA, LibInfo); @@ -482,7 +483,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // If this vreg is directly copied into an exported register then // that COPY instructions also need DBG_VALUE, if it is the only // user of LDI->second. - MachineInstr *CopyUseMI = NULL; + MachineInstr *CopyUseMI = nullptr; for (MachineRegisterInfo::use_instr_iterator UI = RegInfo->use_instr_begin(LDI->second), E = RegInfo->use_instr_end(); UI != E; ) { @@ -492,7 +493,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { CopyUseMI = UseMI; continue; } // Otherwise this is another use or second copy use. - CopyUseMI = NULL; break; + CopyUseMI = nullptr; break; } if (CopyUseMI) { MachineInstr *NewMI = @@ -509,21 +510,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Determine if there are any calls in this machine function. MachineFrameInfo *MFI = MF->getFrameInfo(); - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; - ++I) { - + for (const auto &MBB : *MF) { if (MFI->hasCalls() && MF->hasInlineAsm()) break; - const MachineBasicBlock *MBB = I; - for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end(); - II != IE; ++II) { - const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode()); + for (const auto &MI : MBB) { + const MCInstrDesc &MCID = TM.getInstrInfo()->get(MI.getOpcode()); if ((MCID.isCall() && !MCID.isReturn()) || - II->isStackAligningInlineAsm()) { + MI.isStackAligningInlineAsm()) { MFI->setHasCalls(true); } - if (II->isInlineAsm()) { + if (MI.isInlineAsm()) { MF->setHasInlineAsm(true); } } @@ -624,7 +621,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { continue; unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); - CurDAG->ComputeMaskedBits(Src, KnownZero, KnownOne); + CurDAG->computeKnownBits(Src, KnownZero, KnownOne); FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne); } while (!Worklist.empty()); } @@ -994,7 +991,7 @@ static void collectFailStats(const Instruction *I) { void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. - FastISel *FastIS = 0; + FastISel *FastIS = nullptr; if (TM.Options.EnableFastISel) FastIS = getTargetLowering()->createFastISel(*FuncInfo, LibInfo); @@ -1069,7 +1066,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (FuncInfo->InsertPt != FuncInfo->MBB->begin()) FastIS->setLastLocalValue(std::prev(FuncInfo->InsertPt)); else - FastIS->setLastLocalValue(0); + FastIS->setLastLocalValue(nullptr); } unsigned NumFastIselRemaining = std::distance(Begin, End); @@ -1607,7 +1604,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, APInt NeededMask = DesiredMask & ~ActualMask; APInt KnownZero, KnownOne; - CurDAG->ComputeMaskedBits(LHS, KnownZero, KnownOne); + CurDAG->computeKnownBits(LHS, KnownZero, KnownOne); // If all the missing bits in the or are already known to be set, match! if ((NeededMask & KnownOne) == NeededMask) @@ -1676,7 +1673,7 @@ static SDNode *findGlueUse(SDNode *N) { if (Use.getResNo() == FlagResNo) return Use.getUser(); } - return NULL; + return nullptr; } /// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". @@ -1783,7 +1780,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, EVT VT = Root->getValueType(Root->getNumValues()-1); while (VT == MVT::Glue) { SDNode *GU = findGlueUse(Root); - if (GU == NULL) + if (!GU) break; Root = GU; VT = Root->getValueType(Root->getNumValues()-1); @@ -1805,12 +1802,39 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { SelectInlineAsmMemoryOperands(Ops); EVT VTs[] = { MVT::Other, MVT::Glue }; - SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), - VTs, &Ops[0], Ops.size()); + SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), VTs, Ops); + New->setNodeId(-1); + return New.getNode(); +} + +SDNode +*SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast(Op->getOperand(0)); + const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); + unsigned Reg = getTargetLowering()->getRegisterByName( + RegStr->getString().data(), Op->getValueType(0)); + SDValue New = CurDAG->getCopyFromReg( + CurDAG->getEntryNode(), dl, Reg, Op->getValueType(0)); New->setNodeId(-1); return New.getNode(); } +SDNode +*SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast(Op->getOperand(1)); + const MDString *RegStr = dyn_cast(MD->getMD()->getOperand(0)); + unsigned Reg = getTargetLowering()->getRegisterByName( + RegStr->getString().data(), Op->getOperand(2).getValueType()); + SDValue New = CurDAG->getCopyToReg( + CurDAG->getEntryNode(), dl, Reg, Op->getOperand(2)); + New->setNodeId(-1); + return New.getNode(); +} + + + SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) { return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0)); } @@ -1846,7 +1870,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, // Now that all the normal results are replaced, we replace the chain and // glue results if present. if (!ChainNodesMatched.empty()) { - assert(InputChain.getNode() != 0 && + assert(InputChain.getNode() && "Matched input chains but didn't produce a chain"); // Loop over all of the nodes we matched that produced a chain result. // Replace all the chain results with the final chain we ended up with. @@ -1877,7 +1901,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, // If the result produces glue, update any glue results in the matched // pattern with the glue result. - if (InputGlue.getNode() != 0) { + if (InputGlue.getNode()) { // Handle any interior nodes explicitly marked. for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) { SDNode *FRN = GlueResultNodesMatched[i]; @@ -2080,13 +2104,13 @@ HandleMergeInputChains(SmallVectorImpl &ChainNodesMatched, if (InputChains.size() == 1) return InputChains[0]; return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]), - MVT::Other, &InputChains[0], InputChains.size()); + MVT::Other, InputChains); } /// MorphNode - Handle morphing a node in place for the selector. SDNode *SelectionDAGISel:: MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, - const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) { + ArrayRef Ops, unsigned EmitNodeInfo) { // It is possible we're using MorphNodeTo to replace a node with no // normal results with one that has a normal result (or we could be // adding a chain) and the input could have glue and chains as well. @@ -2106,7 +2130,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, // Call the underlying SelectionDAG routine to do the transmogrification. Note // that this deletes operands of the old node that become dead. - SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops, NumOps); + SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops); // MorphNodeTo can operate in two ways: if an existing node with the // specified operands exists, it can just return it. Otherwise, it @@ -2230,7 +2254,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, Val = GetVBR(Val, MatcherTable, MatcherIndex); ConstantSDNode *C = dyn_cast(N); - return C != 0 && C->getSExtValue() == Val; + return C && C->getSExtValue() == Val; } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool @@ -2251,7 +2275,7 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, if (N->getOpcode() != ISD::AND) return false; ConstantSDNode *C = dyn_cast(N->getOperand(1)); - return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val); + return C && SDISel.CheckAndMask(N.getOperand(0), C, Val); } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool @@ -2264,7 +2288,7 @@ CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, if (N->getOpcode() != ISD::OR) return false; ConstantSDNode *C = dyn_cast(N->getOperand(1)); - return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val); + return C && SDISel.CheckOrMask(N.getOperand(0), C, Val); } /// IsPredicateKnownToFail - If we know how and can do so without pushing a @@ -2396,13 +2420,15 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::LIFETIME_START: case ISD::LIFETIME_END: NodeToMatch->setNodeId(-1); // Mark selected. - return 0; + return nullptr; case ISD::AssertSext: case ISD::AssertZext: CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0)); - return 0; + return nullptr; case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch); + case ISD::READ_REGISTER: return Select_READ_REGISTER(NodeToMatch); + case ISD::WRITE_REGISTER: return Select_WRITE_REGISTER(NodeToMatch); case ISD::UNDEF: return Select_UNDEF(NodeToMatch); } @@ -2548,7 +2574,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, } case OPC_RecordNode: { // Remember this node, it may end up being an operand in the pattern. - SDNode *Parent = 0; + SDNode *Parent = nullptr; if (NodeStack.size() > 1) Parent = NodeStack[NodeStack.size()-2].getNode(); RecordedNodes.push_back(std::make_pair(N, Parent)); @@ -2755,7 +2781,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); RecordedNodes.push_back(std::pair( - CurDAG->getTargetConstant(Val, VT), (SDNode*)0)); + CurDAG->getTargetConstant(Val, VT), nullptr)); continue; } case OPC_EmitRegister: { @@ -2763,7 +2789,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; unsigned RegNo = MatcherTable[MatcherIndex++]; RecordedNodes.push_back(std::pair( - CurDAG->getRegister(RegNo, VT), (SDNode*)0)); + CurDAG->getRegister(RegNo, VT), nullptr)); continue; } case OPC_EmitRegister2: { @@ -2775,7 +2801,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned RegNo = MatcherTable[MatcherIndex++]; RegNo |= MatcherTable[MatcherIndex++] << 8; RecordedNodes.push_back(std::pair( - CurDAG->getRegister(RegNo, VT), (SDNode*)0)); + CurDAG->getRegister(RegNo, VT), nullptr)); continue; } @@ -2800,7 +2826,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0 case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1 // These are space-optimized forms of OPC_EmitMergeInputChains. - assert(InputChain.getNode() == 0 && + assert(!InputChain.getNode() && "EmitMergeInputChains should be the first chain producing node"); assert(ChainNodesMatched.empty() && "Should only have one EmitMergeInputChains per match"); @@ -2821,13 +2847,13 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Merge the input chains if they are not intra-pattern references. InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); - if (InputChain.getNode() == 0) + if (!InputChain.getNode()) break; // Failed to merge. continue; } case OPC_EmitMergeInputChains: { - assert(InputChain.getNode() == 0 && + assert(!InputChain.getNode() && "EmitMergeInputChains should be the first chain producing node"); // This node gets a list of nodes we matched in the input that have // chains. We want to token factor all of the input chains to these nodes @@ -2863,7 +2889,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Merge the input chains if they are not intra-pattern references. InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); - if (InputChain.getNode() == 0) + if (!InputChain.getNode()) break; // Failed to merge. continue; @@ -2874,7 +2900,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg"); unsigned DestPhysReg = MatcherTable[MatcherIndex++]; - if (InputChain.getNode() == 0) + if (!InputChain.getNode()) InputChain = CurDAG->getEntryNode(); InputChain = CurDAG->getCopyToReg(InputChain, SDLoc(NodeToMatch), @@ -2890,7 +2916,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm"); SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo); - RecordedNodes.push_back(std::pair(Res, (SDNode*) 0)); + RecordedNodes.push_back(std::pair(Res, nullptr)); continue; } @@ -2922,7 +2948,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, else if (VTs.size() == 2) VTList = CurDAG->getVTList(VTs[0], VTs[1]); else - VTList = CurDAG->getVTList(VTs.data(), VTs.size()); + VTList = CurDAG->getVTList(VTs); // Get the operand list. unsigned NumOps = MatcherTable[MatcherIndex++]; @@ -2956,11 +2982,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // If this has chain/glue inputs, add them. if (EmitNodeInfo & OPFL_Chain) Ops.push_back(InputChain); - if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0) + if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr) Ops.push_back(InputGlue); // Create the node. - SDNode *Res = 0; + SDNode *Res = nullptr; if (Opcode != OPC_MorphNodeTo) { // If this is a normal EmitNode command, just create the new node and // add the results to the RecordedNodes list. @@ -2971,17 +2997,16 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, for (unsigned i = 0, e = VTs.size(); i != e; ++i) { if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break; RecordedNodes.push_back(std::pair(SDValue(Res, i), - (SDNode*) 0)); + nullptr)); } } else if (NodeToMatch->getOpcode() != ISD::DELETED_NODE) { - Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(), - EmitNodeInfo); + Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo); } else { // NodeToMatch was eliminated by CSE when the target changed the DAG. // We will visit the equivalent node later. DEBUG(dbgs() << "Node was eliminated by CSE\n"); - return 0; + return nullptr; } // If the node had chain/glue results, update our notion of the current @@ -3111,7 +3136,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // FIXME: We just return here, which interacts correctly with SelectRoot // above. We should fix this to not return an SDNode* anymore. - return 0; + return nullptr; } } @@ -3123,7 +3148,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, while (1) { if (MatchScopes.empty()) { CannotYetSelect(NodeToMatch); - return 0; + return nullptr; } // Restore the interpreter state back to the point where the scope was diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 1483fdd..4df5ede 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -27,6 +27,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "dag-printer" + namespace llvm { template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { @@ -124,9 +126,9 @@ namespace llvm { static void addCustomGraphFeatures(SelectionDAG *G, GraphWriter &GW) { - GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); + GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot"); if (G->getRoot().getNode()) - GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(), + GW.emitEdge(nullptr, -1, G->getRoot().getNode(), G->getRoot().getResNo(), "color=blue,style=dashed"); } }; @@ -289,10 +291,10 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter &GW) const { if (DAG) { // Draw a special "GraphRoot" node to indicate the root of the graph. - GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot"); + GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot"); const SDNode *N = DAG->getRoot().getNode(); if (N && N->getNodeId() != -1) - GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1, + GW.emitEdge(nullptr, -1, &SUnits[N->getNodeId()], -1, "color=blue,style=dashed"); } } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5de0b03..b75d805 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -40,7 +40,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, : TargetLoweringBase(tm, tlof) {} const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { - return NULL; + return nullptr; } /// Check whether a given call node is in tail position within its function. If @@ -103,12 +103,11 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy()); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, getLibcallCallingConv(LC), - /*isTailCall=*/false, - doesNotReturn, isReturnValueUsed, Callee, Args, - DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed) + .setSExtResult(isSigned).setZExtResult(!isSigned); return LowerCallTo(CLI); } @@ -226,7 +225,7 @@ unsigned TargetLowering::getJumpTableEncoding() const { return MachineJumpTableInfo::EK_BlockAddress; // In PIC mode, if the target supports a GPRel32 directive, use it. - if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0) + if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr) return MachineJumpTableInfo::EK_GPRel32BlockAddress; // Otherwise, use a label difference. @@ -386,7 +385,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (Depth != 0) { // If not at the root, Just compute the KnownZero/KnownOne bits to // simplify things downstream. - TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth); return false; } // If this is the root being simplified, allow it to have multiple uses, @@ -416,7 +415,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (ConstantSDNode *RHSC = dyn_cast(Op.getOperand(1))) { APInt LHSZero, LHSOne; // Do not increment Depth here; that can cause an infinite loop. - TLO.DAG.ComputeMaskedBits(Op.getOperand(0), LHSZero, LHSOne, Depth); + TLO.DAG.computeKnownBits(Op.getOperand(0), LHSZero, LHSOne, Depth); // If the LHS already has zeros where RHSC does, this and is dead. if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op.getOperand(0)); @@ -848,6 +847,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } break; } + case ISD::BUILD_PAIR: { + EVT HalfVT = Op.getOperand(0).getValueType(); + unsigned HalfBitWidth = HalfVT.getScalarSizeInBits(); + + APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth); + APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth); + + APInt KnownZeroLo, KnownOneLo; + APInt KnownZeroHi, KnownOneHi; + + if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownZeroLo, + KnownOneLo, TLO, Depth + 1)) + return true; + + if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownZeroHi, + KnownOneHi, TLO, Depth + 1)) + return true; + + KnownZero = KnownZeroLo.zext(BitWidth) | + KnownZeroHi.zext(BitWidth).shl(HalfBitWidth); + + KnownOne = KnownOneLo.zext(BitWidth) | + KnownOneHi.zext(BitWidth).shl(HalfBitWidth); + break; + } case ISD::ZERO_EXTEND: { unsigned OperandBitWidth = Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); @@ -1040,8 +1064,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } // FALL THROUGH default: - // Just use ComputeMaskedBits to compute output bits. - TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth); + // Just use computeKnownBits to compute output bits. + TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth); break; } @@ -1053,14 +1077,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return false; } -/// computeMaskedBitsForTargetNode - Determine which of the bits specified +/// computeKnownBitsForTargetNode - Determine which of the bits specified /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. -void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const { +void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || @@ -1074,6 +1098,7 @@ void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, /// targets that want to expose additional information about sign bits to the /// DAG Combiner. unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + const SelectionDAG &, unsigned Depth) const { assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || @@ -1085,7 +1110,7 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, } /// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly -/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to +/// one bit set. This differs from computeKnownBits in that it doesn't need to /// determine which bit is set. /// static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { @@ -1108,11 +1133,11 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { // More could be done here, though the above checks are enough // to handle some common cases. - // Fall back to ComputeMaskedBits to catch other known cases. + // Fall back to computeKnownBits to catch other known cases. EVT OpVT = Val.getValueType(); unsigned BitWidth = OpVT.getScalarType().getSizeInBits(); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(Val, KnownZero, KnownOne); + DAG.computeKnownBits(Val, KnownZero, KnownOne); return (KnownZero.countPopulation() == BitWidth - 1) && (KnownOne.countPopulation() == 1); } @@ -1381,10 +1406,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, EVT newVT = N0.getOperand(0).getValueType(); if (DCI.isBeforeLegalizeOps() || (isOperationLegal(ISD::SETCC, newVT) && - getCondCodeAction(Cond, newVT.getSimpleVT())==Legal)) - return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(C1.trunc(InSize), newVT), - Cond); + getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) { + EVT NewSetCCVT = getSetCCResultType(*DAG.getContext(), newVT); + SDValue NewConst = DAG.getConstant(C1.trunc(InSize), newVT); + + SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0), + NewConst, Cond); + return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT); + } break; } default: @@ -2052,7 +2081,7 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ return "r"; if (ConstraintVT.isFloatingPoint()) return "f"; // works for many targets - return 0; + return nullptr; } /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops @@ -2086,12 +2115,12 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, if (Op.getOpcode() == ISD::ADD) { C = dyn_cast(Op.getOperand(1)); GA = dyn_cast(Op.getOperand(0)); - if (C == 0 || GA == 0) { + if (!C || !GA) { C = dyn_cast(Op.getOperand(0)); GA = dyn_cast(Op.getOperand(1)); } - if (C == 0 || GA == 0) - C = 0, GA = 0; + if (!C || !GA) + C = nullptr, GA = nullptr; } // If we find a valid operand, map to the TargetXXX version so that the @@ -2126,14 +2155,14 @@ std::pair TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { if (Constraint.empty() || Constraint[0] != '{') - return std::make_pair(0u, static_cast(0)); + return std::make_pair(0u, static_cast(nullptr)); assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); // Remove the braces from around the name. StringRef RegName(Constraint.data()+1, Constraint.size()-2); std::pair R = - std::make_pair(0u, static_cast(0)); + std::make_pair(0u, static_cast(nullptr)); // Figure out which register class contains this reg. const TargetRegisterInfo *RI = getTargetMachine().getRegisterInfo(); @@ -2428,7 +2457,7 @@ TargetLowering::ConstraintWeight Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; // Look at the constraint type. switch (*constraint) { @@ -2601,9 +2630,9 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// -SDValue TargetLowering:: -BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, - std::vector *Created) const { +SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, bool IsAfterLegalization, + std::vector *Created) const { EVT VT = N->getValueType(0); SDLoc dl(N); @@ -2612,8 +2641,7 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, if (!isTypeLegal(VT)) return SDValue(); - APInt d = cast(N->getOperand(1))->getAPIntValue(); - APInt::ms magics = d.magic(); + APInt::ms magics = Divisor.magic(); // Multiply the numerator (operand 0) by the magic value // FIXME: We should support doing a MUL in a wider type @@ -2630,13 +2658,13 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, else return SDValue(); // No mulhs or equvialent // If d > 0 and m < 0, add the numerator - if (d.isStrictlyPositive() && magics.m.isNegative()) { + if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); if (Created) Created->push_back(Q.getNode()); } // If d < 0 and m > 0, subtract the numerator. - if (d.isNegative() && magics.m.isStrictlyPositive()) { + if (Divisor.isNegative() && magics.m.isStrictlyPositive()) { Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0)); if (Created) Created->push_back(Q.getNode()); @@ -2649,9 +2677,9 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, Created->push_back(Q.getNode()); } // Extract the sign bit and add it to the quotient - SDValue T = - DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1, - getShiftAmountTy(Q.getValueType()))); + SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, + DAG.getConstant(VT.getScalarSizeInBits() - 1, + getShiftAmountTy(Q.getValueType()))); if (Created) Created->push_back(T.getNode()); return DAG.getNode(ISD::ADD, dl, VT, Q, T); @@ -2661,9 +2689,9 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// -SDValue TargetLowering:: -BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, - std::vector *Created) const { +SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, bool IsAfterLegalization, + std::vector *Created) const { EVT VT = N->getValueType(0); SDLoc dl(N); @@ -2674,22 +2702,21 @@ BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, // FIXME: We should use a narrower constant when the upper // bits are known to be zero. - const APInt &N1C = cast(N->getOperand(1))->getAPIntValue(); - APInt::mu magics = N1C.magicu(); + APInt::mu magics = Divisor.magicu(); SDValue Q = N->getOperand(0); // If the divisor is even, we can avoid using the expensive fixup by shifting // the divided value upfront. - if (magics.a != 0 && !N1C[0]) { - unsigned Shift = N1C.countTrailingZeros(); + if (magics.a != 0 && !Divisor[0]) { + unsigned Shift = Divisor.countTrailingZeros(); Q = DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType()))); if (Created) Created->push_back(Q.getNode()); // Get magic number for the shifted divisor. - magics = N1C.lshr(Shift).magicu(Shift); + magics = Divisor.lshr(Shift).magicu(Shift); assert(magics.a == 0 && "Should use cheap fixup now"); } @@ -2708,7 +2735,7 @@ BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, Created->push_back(Q.getNode()); if (magics.a == 0) { - assert(magics.s < N1C.getBitWidth() && + assert(magics.s < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!"); return DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); @@ -2738,3 +2765,110 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { return false; } + +//===----------------------------------------------------------------------===// +// Legalization Utilities +//===----------------------------------------------------------------------===// + +bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, + SelectionDAG &DAG, SDValue LL, SDValue LH, + SDValue RL, SDValue RH) const { + EVT VT = N->getValueType(0); + SDLoc dl(N); + + bool HasMULHS = isOperationLegalOrCustom(ISD::MULHS, HiLoVT); + bool HasMULHU = isOperationLegalOrCustom(ISD::MULHU, HiLoVT); + bool HasSMUL_LOHI = isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT); + bool HasUMUL_LOHI = isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT); + if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) { + unsigned OuterBitSize = VT.getSizeInBits(); + unsigned InnerBitSize = HiLoVT.getSizeInBits(); + unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0)); + unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1)); + + // LL, LH, RL, and RH must be either all NULL or all set to a value. + assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) || + (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode())); + + if (!LL.getNode() && !RL.getNode() && + isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { + LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(0)); + RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(1)); + } + + if (!LL.getNode()) + return false; + + APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); + if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) && + DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) { + // The inputs are both zero-extended. + if (HasUMUL_LOHI) { + // We can emit a umul_lohi. + Lo = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(HiLoVT, HiLoVT), LL, RL); + Hi = SDValue(Lo.getNode(), 1); + return true; + } + if (HasMULHU) { + // We can emit a mulhu+mul. + Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL); + return true; + } + } + if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) { + // The input values are both sign-extended. + if (HasSMUL_LOHI) { + // We can emit a smul_lohi. + Lo = DAG.getNode(ISD::SMUL_LOHI, dl, + DAG.getVTList(HiLoVT, HiLoVT), LL, RL); + Hi = SDValue(Lo.getNode(), 1); + return true; + } + if (HasMULHS) { + // We can emit a mulhs+mul. + Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); + Hi = DAG.getNode(ISD::MULHS, dl, HiLoVT, LL, RL); + return true; + } + } + + if (!LH.getNode() && !RH.getNode() && + isOperationLegalOrCustom(ISD::SRL, VT) && + isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { + unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits(); + SDValue Shift = DAG.getConstant(ShiftAmt, getShiftAmountTy(VT)); + LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift); + LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH); + RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift); + RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH); + } + + if (!LH.getNode()) + return false; + + if (HasUMUL_LOHI) { + // Lo,Hi = umul LHS, RHS. + SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(HiLoVT, HiLoVT), LL, RL); + Lo = UMulLOHI; + Hi = UMulLOHI.getValue(1); + RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH); + return true; + } + if (HasMULHU) { + Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL); + RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH); + return true; + } + } + return false; +} diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index adb3ef9..f7c64da 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -25,7 +25,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "shadowstackgc" #include "llvm/CodeGen/GCs.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/GCStrategy.h" @@ -36,6 +35,8 @@ using namespace llvm; +#define DEBUG_TYPE "shadowstackgc" + namespace { class ShadowStackGC : public GCStrategy { @@ -101,7 +102,7 @@ namespace { IRBuilder<> *Next() { switch (State) { default: - return 0; + return nullptr; case 0: StateBB = F.begin(); @@ -137,7 +138,7 @@ namespace { Calls.push_back(CI); if (Calls.empty()) - return 0; + return nullptr; // Create a cleanup block. LLVMContext &C = F.getContext(); @@ -194,7 +195,7 @@ namespace { void llvm::linkShadowStackGC() { } -ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) { +ShadowStackGC::ShadowStackGC() : Head(nullptr), StackEntryTy(nullptr) { InitRoots = true; CustomRoots = true; } @@ -390,8 +391,8 @@ bool ShadowStackGC::performCustomLowering(Function &F) { BasicBlock::iterator IP = F.getEntryBlock().begin(); IRBuilder<> AtEntry(IP->getParent(), IP); - Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0, - "gc_frame"); + Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, + "gc_frame"); while (isa(IP)) ++IP; AtEntry.SetInsertPoint(IP->getParent(), IP); diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index dc7ca2b..d2f3955 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sjljehprepare" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" @@ -38,6 +37,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "sjljehprepare" + STATISTIC(NumInvokes, "Number of invokes replaced"); STATISTIC(NumSpilled, "Number of registers live across unwind edges"); @@ -100,10 +101,10 @@ bool SjLjEHPrepare::doInitialization(Module &M) { NULL); RegisterFn = M.getOrInsertFunction( "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()), - PointerType::getUnqual(FunctionContextTy), (Type *)0); + PointerType::getUnqual(FunctionContextTy), (Type *)nullptr); UnregisterFn = M.getOrInsertFunction( "_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()), - PointerType::getUnqual(FunctionContextTy), (Type *)0); + PointerType::getUnqual(FunctionContextTy), (Type *)nullptr); FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); @@ -111,7 +112,7 @@ bool SjLjEHPrepare::doInitialization(Module &M) { LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite); FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext); - PersonalityFn = 0; + PersonalityFn = nullptr; return true; } @@ -192,7 +193,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F, const TargetLowering *TLI = TM->getTargetLowering(); unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); - FuncCtx = new AllocaInst(FunctionContextTy, 0, Align, "fn_context", + FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context", EntryBB->begin()); // Fill in the function context structure. diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index a6c6261..d46621d 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "slotindexes" - #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" @@ -18,6 +16,8 @@ using namespace llvm; +#define DEBUG_TYPE "slotindexes" + char SlotIndexes::ID = 0; INITIALIZE_PASS(SlotIndexes, "slotindexes", "Slot index numbering", false, false) @@ -66,7 +66,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { MBBRanges.resize(mf->getNumBlockIDs()); idx2MBBMap.reserve(mf->size()); - indexList.push_back(createEntry(0, index)); + indexList.push_back(createEntry(nullptr, index)); // Iterate over the function. for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end(); @@ -91,7 +91,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { } // We insert one blank instructions between basic blocks. - indexList.push_back(createEntry(0, index += SlotIndex::InstrDist)); + indexList.push_back(createEntry(nullptr, index += SlotIndex::InstrDist)); MBBRanges[mbb->getNumber()].first = blockStartIndex; MBBRanges[mbb->getNumber()].second = SlotIndex(&indexList.back(), @@ -182,7 +182,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, "Decremented past the beginning of region to repair."); MachineInstr *SlotMI = ListI->getInstr(); - MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? MBBI : 0; + MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? MBBI : nullptr; bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart); if (SlotMI == MI && !MBBIAtBegin) { @@ -219,7 +219,7 @@ void SlotIndexes::dump() const { itr != indexList.end(); ++itr) { dbgs() << itr->getIndex() << " "; - if (itr->getInstr() != 0) { + if (itr->getInstr()) { dbgs() << *itr->getInstr(); } else { dbgs() << "\n"; diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index 5f73469..24e94d1 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -27,7 +27,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "spillplacement" #include "SpillPlacement.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/EdgeBundles.h" @@ -41,6 +40,8 @@ using namespace llvm; +#define DEBUG_TYPE "spillplacement" + char SpillPlacement::ID = 0; INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement", "Spill Code Placement Analysis", true, true) @@ -59,9 +60,26 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +namespace { +static BlockFrequency Threshold; +} + /// Decision threshold. A node gets the output value 0 if the weighted sum of /// its inputs falls in the open interval (-Threshold;Threshold). -static const BlockFrequency Threshold = 2; +static BlockFrequency getThreshold() { return Threshold; } + +/// \brief Set the threshold for a given entry frequency. +/// +/// Set the threshold relative to \c Entry. Since the threshold is used as a +/// bound on the open interval (-Threshold;Threshold), 1 is the minimum +/// threshold. +static void setThreshold(const BlockFrequency &Entry) { + // Apparently 2 is a good threshold when Entry==2^14, but we need to scale + // it. Divide by 2^13, rounding as appropriate. + uint64_t Freq = Entry.getFrequency(); + uint64_t Scaled = (Freq >> 13) + bool(Freq & (1 << 12)); + Threshold = std::max(UINT64_C(1), Scaled); +} /// Node - Each edge bundle corresponds to a Hopfield node. /// @@ -110,7 +128,7 @@ struct SpillPlacement::Node { // the CFG. void clear() { BiasN = BiasP = Value = 0; - SumLinkWeights = Threshold; + SumLinkWeights = getThreshold(); Links.clear(); } @@ -168,9 +186,9 @@ struct SpillPlacement::Node { // 2. It helps tame rounding errors when the links nominally sum to 0. // bool Before = preferReg(); - if (SumN >= SumP + Threshold) + if (SumN >= SumP + getThreshold()) Value = -1; - else if (SumP >= SumN + Threshold) + else if (SumP >= SumN + getThreshold()) Value = 1; else Value = 0; @@ -189,6 +207,7 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { // Compute total ingoing and outgoing block frequencies for all bundles. BlockFrequencies.resize(mf.getNumBlockIDs()); MBFI = &getAnalysis(); + setThreshold(MBFI->getEntryFreq()); for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) { unsigned Num = I->getNumber(); BlockFrequencies[Num] = MBFI->getBlockFreq(I); @@ -200,7 +219,7 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { void SpillPlacement::releaseMemory() { delete[] nodes; - nodes = 0; + nodes = nullptr; } /// activate - mark node n as active if it wasn't already. @@ -375,6 +394,6 @@ SpillPlacement::finish() { ActiveNodes->reset(n); Perfect = false; } - ActiveNodes = 0; + ActiveNodes = nullptr; return Perfect; } diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h index a88d7ac..43fc7f5 100644 --- a/lib/CodeGen/SpillPlacement.h +++ b/lib/CodeGen/SpillPlacement.h @@ -65,7 +65,7 @@ class SpillPlacement : public MachineFunctionPass { public: static char ID; // Pass identification, replacement for typeid. - SpillPlacement() : MachineFunctionPass(ID), nodes(0) {} + SpillPlacement() : MachineFunctionPass(ID), nodes(nullptr) {} ~SpillPlacement() { releaseMemory(); } /// BorderConstraint - A basic block has separate constraints for entry and diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 094641c..0649448 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "spiller" - #include "Spiller.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" @@ -28,6 +26,8 @@ using namespace llvm; +#define DEBUG_TYPE "spiller" + namespace { enum SpillerName { trivial, inline_ }; } diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 16fe979..7d4f568 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "SplitKit.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -29,6 +28,8 @@ using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(NumFinished, "Number of splits finished"); STATISTIC(NumSimple, "Number of splits that were simple"); STATISTIC(NumCopies, "Number of copies inserted for splitting"); @@ -47,14 +48,14 @@ SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, LIS(lis), Loops(mli), TII(*MF.getTarget().getInstrInfo()), - CurLI(0), + CurLI(nullptr), LastSplitPoint(MF.getNumBlockIDs()) {} void SplitAnalysis::clear() { UseSlots.clear(); UseBlocks.clear(); ThroughBlocks.clear(); - CurLI = 0; + CurLI = nullptr; DidRepairRange = false; } @@ -331,7 +332,7 @@ SplitEditor::SplitEditor(SplitAnalysis &sa, TII(*vrm.getMachineFunction().getTarget().getInstrInfo()), TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()), MBFI(mbfi), - Edit(0), + Edit(nullptr), OpenIdx(0), SpillMode(SM_Partition), RegAssign(Allocator) @@ -353,7 +354,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { // We don't need an AliasAnalysis since we will only be performing // cheap-as-a-copy remats anyway. - Edit->anyRematerializable(0); + Edit->anyRematerializable(nullptr); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -423,7 +424,7 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) { LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI)); // Mark as complex mapped, forced. - VFP = ValueForcePair(0, true); + VFP = ValueForcePair(nullptr, true); } VNInfo *SplitEditor::defFromParent(unsigned RegIdx, @@ -431,7 +432,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, SlotIndex UseIdx, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { - MachineInstr *CopyMI = 0; + MachineInstr *CopyMI = nullptr; SlotIndex Def; LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); @@ -922,7 +923,7 @@ bool SplitEditor::transferValues() { else { // Live-through, and we don't know the value. LRC.addLiveInBlock(LR, MDT[MBB]); - LRC.setLiveOutValue(MBB, 0); + LRC.setLiveOutValue(MBB, nullptr); } } BlockStart = BlockEnd; diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index f029c73..7048ee3 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -377,7 +377,7 @@ public: SlotIndex enterIntvAfter(SlotIndex Idx); /// enterIntvAtEnd - Enter the open interval at the end of MBB. - /// Use the open interval from he inserted copy to the MBB end. + /// Use the open interval from the inserted copy to the MBB end. /// Return the beginning of the new live range. SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB); @@ -417,7 +417,7 @@ public: /// @param LRMap When not null, this vector will map each live range in Edit /// back to the indices returned by openIntv. /// There may be extra indices created by dead code elimination. - void finish(SmallVectorImpl *LRMap = 0); + void finish(SmallVectorImpl *LRMap = nullptr); /// dump - print the current interval maping to dbgs(). void dump() const; diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 7b1de85..370430c 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -21,7 +21,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "stackcoloring" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DepthFirstIterator.h" @@ -58,6 +57,8 @@ using namespace llvm; +#define DEBUG_TYPE "stackcoloring" + static cl::opt DisableColoring("no-stack-coloring", cl::init(false), cl::Hidden, @@ -193,12 +194,11 @@ void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { } void StackColoring::dump() const { - for (df_iterator FI = df_begin(MF), FE = df_end(MF); - FI != FE; ++FI) { - DEBUG(dbgs()<<"Inspecting block #"<getName()<<"]\n"); + for (MachineBasicBlock *MBB : depth_first(MF)) { + DEBUG(dbgs() << "Inspecting block #" << BasicBlocks.lookup(MBB) << " [" + << MBB->getName() << "]\n"); - LivenessMap::const_iterator BI = BlockLiveness.find(*FI); + LivenessMap::const_iterator BI = BlockLiveness.find(MBB); assert(BI != BlockLiveness.end() && "Block not found"); const BlockLifetimeInfo &BlockInfo = BI->second; @@ -231,20 +231,19 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { // NOTE: We use the a reverse-post-order iteration to ensure that we obtain a // deterministic numbering, and because we'll need a post-order iteration // later for solving the liveness dataflow problem. - for (df_iterator FI = df_begin(MF), FE = df_end(MF); - FI != FE; ++FI) { + for (MachineBasicBlock *MBB : depth_first(MF)) { // Assign a serial number to this basic block. - BasicBlocks[*FI] = BasicBlockNumbering.size(); - BasicBlockNumbering.push_back(*FI); + BasicBlocks[MBB] = BasicBlockNumbering.size(); + BasicBlockNumbering.push_back(MBB); // Keep a reference to avoid repeated lookups. - BlockLifetimeInfo &BlockInfo = BlockLiveness[*FI]; + BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB]; BlockInfo.Begin.resize(NumSlot); BlockInfo.End.resize(NumSlot); - for (MachineInstr &MI : **FI) { + for (MachineInstr &MI : *MBB) { if (MI.getOpcode() != TargetOpcode::LIFETIME_START && MI.getOpcode() != TargetOpcode::LIFETIME_END) continue; @@ -511,11 +510,6 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { // Update the MachineMemOperand to use the new alloca. for (MachineMemOperand *MMO : I.memoperands()) { - const Value *V = MMO->getValue(); - - if (!V) - continue; - // FIXME: In order to enable the use of TBAA when using AA in CodeGen, // we'll also need to update the TBAA nodes in MMOs with values // derived from the merged allocas. When doing this, we'll need to use @@ -525,10 +519,10 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { // We've replaced IR-level uses of the remapped allocas, so we only // need to replace direct uses here. - if (!isa(V)) + const AllocaInst *AI = dyn_cast_or_null(MMO->getValue()); + if (!AI) continue; - const AllocaInst *AI= cast(V); if (!Allocas.count(AI)) continue; diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp index a374417..4dd87dd 100644 --- a/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "stackmaps" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -26,6 +25,8 @@ using namespace llvm; +#define DEBUG_TYPE "stackmaps" + namespace llvm { cl::opt EnableStackMapLiveness("enable-stackmap-liveness", cl::Hidden, cl::desc("Enable StackMap Liveness Analysis Pass")); @@ -99,7 +100,7 @@ bool StackMapLiveness::calculateLiveness() { HasStackMap = true; ++NumStackMaps; } - DEBUG(dbgs() << " " << *I << " " << LiveRegs); + DEBUG(dbgs() << " " << LiveRegs << " " << *I); LiveRegs.stepBackward(*I); } ++NumBBsVisited; diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp index a6522dc..1473fc1 100644 --- a/lib/CodeGen/StackMaps.cpp +++ b/lib/CodeGen/StackMaps.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "stackmaps" - #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -20,6 +18,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -29,6 +28,13 @@ using namespace llvm; +#define DEBUG_TYPE "stackmaps" + +static cl::opt StackMapVersion("stackmap-version", cl::init(1), + cl::desc("Specify the stackmap encoding version (default = 1)")); + +const char *StackMaps::WSMP = "Stack Maps: "; + PatchPointOpers::PatchPointOpers(const MachineInstr *MI) : MI(MI), HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && @@ -64,6 +70,11 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const { return ScratchIdx; } +StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) { + if (StackMapVersion != 1) + llvm_unreachable("Unsupported stackmap version!"); +} + MachineInstr::const_mop_iterator StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, MachineInstr::const_mop_iterator MOE, @@ -209,7 +220,8 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, if (I->LocType == Location::Constant && ((I->Offset + (int64_t(1)<<31)) >> 32) != 0) { I->LocType = Location::ConstantIndex; - I->Offset = ConstPool.getConstantIndex(I->Offset); + auto Result = ConstPool.insert(std::make_pair(I->Offset, I->Offset)); + I->Offset = Result.first - ConstPool.begin(); } } @@ -259,7 +271,7 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) { #endif } -/// serializeToStackMapSection conceptually populates the following fields: +/// Emit the stackmap header. /// /// Header { /// uint8 : Stack Map Version (currently 1) @@ -269,11 +281,54 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) { /// uint32 : NumFunctions /// uint32 : NumConstants /// uint32 : NumRecords +void StackMaps::emitStackmapHeader(MCStreamer &OS) { + // Header. + OS.EmitIntValue(StackMapVersion, 1); // Version. + OS.EmitIntValue(0, 1); // Reserved. + OS.EmitIntValue(0, 2); // Reserved. + + // Num functions. + DEBUG(dbgs() << WSMP << "#functions = " << FnStackSize.size() << '\n'); + OS.EmitIntValue(FnStackSize.size(), 4); + // Num constants. + DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n'); + OS.EmitIntValue(ConstPool.size(), 4); + // Num callsites. + DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n'); + OS.EmitIntValue(CSInfos.size(), 4); +} + +/// Emit the function frame record for each function. +/// /// StkSizeRecord[NumFunctions] { /// uint64 : Function Address /// uint64 : Stack Size /// } +void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) { + // Function Frame records. + DEBUG(dbgs() << WSMP << "functions:\n"); + for (auto const &FR : FnStackSize) { + DEBUG(dbgs() << WSMP << "function addr: " << FR.first + << " frame size: " << FR.second); + OS.EmitSymbolValue(FR.first, 8); + OS.EmitIntValue(FR.second, 8); + } +} + +/// Emit the constant pool. +/// /// int64 : Constants[NumConstants] +void StackMaps::emitConstantPoolEntries(MCStreamer &OS) { + // Constant pool entries. + DEBUG(dbgs() << WSMP << "constants:\n"); + for (auto ConstEntry : ConstPool) { + DEBUG(dbgs() << WSMP << ConstEntry.second << '\n'); + OS.EmitIntValue(ConstEntry.second, 8); + } +} + +/// Emit the callsite info for each callsite. +/// /// StkMapRecord[NumRecords] { /// uint64 : PatchPoint ID /// uint32 : Instruction Offset @@ -301,95 +356,43 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) { /// 0x3, Indirect, [Reg + Offset] (spilled value) /// 0x4, Constant, Offset (small constant) /// 0x5, ConstIndex, Constants[Offset] (large constant) -/// -void StackMaps::serializeToStackMapSection() { - // Bail out if there's no stack map data. - if (CSInfos.empty()) - return; - - MCContext &OutContext = AP.OutStreamer.getContext(); - const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo(); - - // Create the section. - const MCSection *StackMapSection = - OutContext.getObjectFileInfo()->getStackMapSection(); - AP.OutStreamer.SwitchSection(StackMapSection); - - // Emit a dummy symbol to force section inclusion. - AP.OutStreamer.EmitLabel( - OutContext.GetOrCreateSymbol(Twine("__LLVM_StackMaps"))); - - // Serialize data. - const char *WSMP = "Stack Maps: "; - (void)WSMP; - - DEBUG(dbgs() << "********** Stack Map Output **********\n"); - - // Header. - AP.OutStreamer.EmitIntValue(1, 1); // Version. - AP.OutStreamer.EmitIntValue(0, 1); // Reserved. - AP.OutStreamer.EmitIntValue(0, 2); // Reserved. - - // Num functions. - DEBUG(dbgs() << WSMP << "#functions = " << FnStackSize.size() << '\n'); - AP.OutStreamer.EmitIntValue(FnStackSize.size(), 4); - // Num constants. - DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.getNumConstants() - << '\n'); - AP.OutStreamer.EmitIntValue(ConstPool.getNumConstants(), 4); - // Num callsites. - DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n'); - AP.OutStreamer.EmitIntValue(CSInfos.size(), 4); - - // Function stack size entries. - for (FnStackSizeMap::iterator I = FnStackSize.begin(), E = FnStackSize.end(); - I != E; ++I) { - AP.OutStreamer.EmitSymbolValue(I->first, 8); - AP.OutStreamer.EmitIntValue(I->second, 8); - } - - // Constant pool entries. - for (unsigned i = 0; i < ConstPool.getNumConstants(); ++i) - AP.OutStreamer.EmitIntValue(ConstPool.getConstant(i), 8); - +void StackMaps::emitCallsiteEntries(MCStreamer &OS, + const TargetRegisterInfo *TRI) { // Callsite entries. - for (CallsiteInfoList::const_iterator CSII = CSInfos.begin(), - CSIE = CSInfos.end(); CSII != CSIE; ++CSII) { - uint64_t CallsiteID = CSII->ID; - const LocationVec &CSLocs = CSII->Locations; - const LiveOutVec &LiveOuts = CSII->LiveOuts; + DEBUG(dbgs() << WSMP << "callsites:\n"); + for (const auto &CSI : CSInfos) { + const LocationVec &CSLocs = CSI.Locations; + const LiveOutVec &LiveOuts = CSI.LiveOuts; - DEBUG(dbgs() << WSMP << "callsite " << CallsiteID << "\n"); + DEBUG(dbgs() << WSMP << "callsite " << CSI.ID << "\n"); // Verify stack map entry. It's better to communicate a problem to the // runtime than crash in case of in-process compilation. Currently, we do // simple overflow checks, but we may eventually communicate other // compilation errors this way. if (CSLocs.size() > UINT16_MAX || LiveOuts.size() > UINT16_MAX) { - AP.OutStreamer.EmitIntValue(UINT64_MAX, 8); // Invalid ID. - AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4); - AP.OutStreamer.EmitIntValue(0, 2); // Reserved. - AP.OutStreamer.EmitIntValue(0, 2); // 0 locations. - AP.OutStreamer.EmitIntValue(0, 2); // padding. - AP.OutStreamer.EmitIntValue(0, 2); // 0 live-out registers. - AP.OutStreamer.EmitIntValue(0, 4); // padding. + OS.EmitIntValue(UINT64_MAX, 8); // Invalid ID. + OS.EmitValue(CSI.CSOffsetExpr, 4); + OS.EmitIntValue(0, 2); // Reserved. + OS.EmitIntValue(0, 2); // 0 locations. + OS.EmitIntValue(0, 2); // padding. + OS.EmitIntValue(0, 2); // 0 live-out registers. + OS.EmitIntValue(0, 4); // padding. continue; } - AP.OutStreamer.EmitIntValue(CallsiteID, 8); - AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4); + OS.EmitIntValue(CSI.ID, 8); + OS.EmitValue(CSI.CSOffsetExpr, 4); // Reserved for flags. - AP.OutStreamer.EmitIntValue(0, 2); + OS.EmitIntValue(0, 2); DEBUG(dbgs() << WSMP << " has " << CSLocs.size() << " locations\n"); - AP.OutStreamer.EmitIntValue(CSLocs.size(), 2); + OS.EmitIntValue(CSLocs.size(), 2); - unsigned operIdx = 0; - for (LocationVec::const_iterator LocI = CSLocs.begin(), LocE = CSLocs.end(); - LocI != LocE; ++LocI, ++operIdx) { - const Location &Loc = *LocI; + unsigned OperIdx = 0; + for (const auto &Loc : CSLocs) { unsigned RegNo = 0; int Offset = Loc.Offset; if(Loc.Reg) { @@ -410,67 +413,97 @@ void StackMaps::serializeToStackMapSection() { "Missing location register"); } - DEBUG( - dbgs() << WSMP << " Loc " << operIdx << ": "; - switch (Loc.LocType) { - case Location::Unprocessed: - dbgs() << ""; - break; - case Location::Register: - dbgs() << "Register " << TRI->getName(Loc.Reg); - break; - case Location::Direct: - dbgs() << "Direct " << TRI->getName(Loc.Reg); - if (Loc.Offset) - dbgs() << " + " << Loc.Offset; - break; - case Location::Indirect: - dbgs() << "Indirect " << TRI->getName(Loc.Reg) - << " + " << Loc.Offset; - break; - case Location::Constant: - dbgs() << "Constant " << Loc.Offset; - break; - case Location::ConstantIndex: - dbgs() << "Constant Index " << Loc.Offset; - break; - } - dbgs() << " [encoding: .byte " << Loc.LocType - << ", .byte " << Loc.Size - << ", .short " << RegNo - << ", .int " << Offset << "]\n"; - ); - - AP.OutStreamer.EmitIntValue(Loc.LocType, 1); - AP.OutStreamer.EmitIntValue(Loc.Size, 1); - AP.OutStreamer.EmitIntValue(RegNo, 2); - AP.OutStreamer.EmitIntValue(Offset, 4); + DEBUG(dbgs() << WSMP << " Loc " << OperIdx << ": "; + switch (Loc.LocType) { + case Location::Unprocessed: + dbgs() << ""; + break; + case Location::Register: + dbgs() << "Register " << TRI->getName(Loc.Reg); + break; + case Location::Direct: + dbgs() << "Direct " << TRI->getName(Loc.Reg); + if (Loc.Offset) + dbgs() << " + " << Loc.Offset; + break; + case Location::Indirect: + dbgs() << "Indirect " << TRI->getName(Loc.Reg) + << " + " << Loc.Offset; + break; + case Location::Constant: + dbgs() << "Constant " << Loc.Offset; + break; + case Location::ConstantIndex: + dbgs() << "Constant Index " << Loc.Offset; + break; + } + dbgs() << " [encoding: .byte " << Loc.LocType + << ", .byte " << Loc.Size + << ", .short " << RegNo + << ", .int " << Offset << "]\n"; + ); + + OS.EmitIntValue(Loc.LocType, 1); + OS.EmitIntValue(Loc.Size, 1); + OS.EmitIntValue(RegNo, 2); + OS.EmitIntValue(Offset, 4); + OperIdx++; } DEBUG(dbgs() << WSMP << " has " << LiveOuts.size() - << " live-out registers\n"); + << " live-out registers\n"); // Num live-out registers and padding to align to 4 byte. - AP.OutStreamer.EmitIntValue(0, 2); - AP.OutStreamer.EmitIntValue(LiveOuts.size(), 2); - - operIdx = 0; - for (LiveOutVec::const_iterator LI = LiveOuts.begin(), LE = LiveOuts.end(); - LI != LE; ++LI, ++operIdx) { - DEBUG(dbgs() << WSMP << " LO " << operIdx << ": " - << TRI->getName(LI->Reg) - << " [encoding: .short " << LI->RegNo - << ", .byte 0, .byte " << LI->Size << "]\n"); - - AP.OutStreamer.EmitIntValue(LI->RegNo, 2); - AP.OutStreamer.EmitIntValue(0, 1); - AP.OutStreamer.EmitIntValue(LI->Size, 1); + OS.EmitIntValue(0, 2); + OS.EmitIntValue(LiveOuts.size(), 2); + + OperIdx = 0; + for (const auto &LO : LiveOuts) { + DEBUG(dbgs() << WSMP << " LO " << OperIdx << ": " + << TRI->getName(LO.Reg) + << " [encoding: .short " << LO.RegNo + << ", .byte 0, .byte " << LO.Size << "]\n"); + OS.EmitIntValue(LO.RegNo, 2); + OS.EmitIntValue(0, 1); + OS.EmitIntValue(LO.Size, 1); } // Emit alignment to 8 byte. - AP.OutStreamer.EmitValueToAlignment(8); + OS.EmitValueToAlignment(8); } +} + +/// Serialize the stackmap data. +void StackMaps::serializeToStackMapSection() { + (void) WSMP; + // Bail out if there's no stack map data. + assert((!CSInfos.empty() || (CSInfos.empty() && ConstPool.empty())) && + "Expected empty constant pool too!"); + assert((!CSInfos.empty() || (CSInfos.empty() && FnStackSize.empty())) && + "Expected empty function record too!"); + if (CSInfos.empty()) + return; - AP.OutStreamer.AddBlankLine(); + MCContext &OutContext = AP.OutStreamer.getContext(); + MCStreamer &OS = AP.OutStreamer; + const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo(); + + // Create the section. + const MCSection *StackMapSection = + OutContext.getObjectFileInfo()->getStackMapSection(); + OS.SwitchSection(StackMapSection); + + // Emit a dummy symbol to force section inclusion. + OS.EmitLabel(OutContext.GetOrCreateSymbol(Twine("__LLVM_StackMaps"))); + + // Serialize data. + DEBUG(dbgs() << "********** Stack Map Output **********\n"); + emitStackmapHeader(OS); + emitFunctionFrameRecords(OS); + emitConstantPoolEntries(OS); + emitCallsiteEntries(OS, TRI); + OS.AddBlankLine(); + // Clean up. CSInfos.clear(); + ConstPool.clear(); } diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index f3749e5..accfe7b 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "stack-protector" #include "llvm/CodeGen/StackProtector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -37,6 +36,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "stack-protector" + STATISTIC(NumFunProtected, "Number of functions protected"); STATISTIC(NumAddrTaken, "Number of local variables that have their address" " taken."); @@ -83,18 +84,18 @@ bool StackProtector::runOnFunction(Function &Fn) { M = F->getParent(); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); - DT = DTWP ? &DTWP->getDomTree() : 0; + DT = DTWP ? &DTWP->getDomTree() : nullptr; TLI = TM->getTargetLowering(); - if (!RequiresStackProtector()) - return false; - Attribute Attr = Fn.getAttributes().getAttribute( AttributeSet::FunctionIndex, "stack-protector-buffer-size"); if (Attr.isStringAttribute() && Attr.getValueAsString().getAsInteger(10, SSPBufferSize)) return false; // Invalid integer string + if (!RequiresStackProtector()) + return false; + ++NumFunProtected; return InsertStackProtectors(); } @@ -319,7 +320,7 @@ static CallInst *FindPotentialTailCall(BasicBlock *BB, ReturnInst *RI, SearchCounter++; } - return 0; + return nullptr; } /// Insert code into the entry block that stores the __stack_chk_guard @@ -354,7 +355,7 @@ static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, } IRBuilder<> B(&F->getEntryBlock().front()); - AI = B.CreateAlloca(PtrTy, 0, "StackGuardSlot"); + AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot"); LoadInst *LI = B.CreateLoad(StackGuardVar, "StackGuard"); B.CreateCall2(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), LI, AI); @@ -372,8 +373,8 @@ bool StackProtector::InsertStackProtectors() { bool HasPrologue = false; bool SupportsSelectionDAGSP = EnableSelectionDAGSP && !TM->Options.EnableFastISel; - AllocaInst *AI = 0; // Place on stack that stores the stack guard. - Value *StackGuardVar = 0; // The stack guard variable. + AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. + Value *StackGuardVar = nullptr; // The stack guard variable. for (Function::iterator I = F->begin(), E = F->end(); I != E;) { BasicBlock *BB = I++; @@ -390,14 +391,14 @@ bool StackProtector::InsertStackProtectors() { if (SupportsSelectionDAGSP) { // Since we have a potential tail call, insert the special stack check // intrinsic. - Instruction *InsertionPt = 0; + Instruction *InsertionPt = nullptr; if (CallInst *CI = FindPotentialTailCall(BB, RI, TLI)) { InsertionPt = CI; } else { InsertionPt = RI; // At this point we know that BB has a return statement so it *DOES* // have a terminator. - assert(InsertionPt != 0 && "BB must have a terminator instruction at " + assert(InsertionPt != nullptr && "BB must have a terminator instruction at " "this point."); } diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 2717f4c..791168f 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "stackslotcoloring" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" @@ -33,6 +32,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "stackslotcoloring" + static cl::opt DisableSharing("no-stack-slot-sharing", cl::init(false), cl::Hidden, @@ -161,13 +162,12 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { for (MachineInstr::mmo_iterator MMOI = MI->memoperands_begin(), EE = MI->memoperands_end(); MMOI != EE; ++MMOI) { MachineMemOperand *MMO = *MMOI; - if (const Value *V = MMO->getValue()) { - if (const FixedStackPseudoSourceValue *FSV = - dyn_cast(V)) { - int FI = FSV->getFrameIndex(); - if (FI >= 0) - SSRefs[FI].push_back(MMO); - } + if (const FixedStackPseudoSourceValue *FSV = + dyn_cast_or_null( + MMO->getPseudoValue())) { + int FI = FSV->getFrameIndex(); + if (FI >= 0) + SSRefs[FI].push_back(MMO); } } } @@ -310,7 +310,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { if (NewFI == -1 || (NewFI == (int)SS)) continue; - const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI); + const PseudoSourceValue *NewSV = PseudoSourceValue::getFixedStack(NewFI); SmallVectorImpl &RefMMOs = SSRefs[SS]; for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i) RefMMOs[i]->setValue(NewSV); @@ -398,7 +398,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { ++NumDead; changed = true; - if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) { + if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) { ++NumDead; toErase.push_back(I); } diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 3b7a04c..723a629 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "tailduplication" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SetVector.h" @@ -34,6 +33,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "tailduplication" + STATISTIC(NumTails , "Number of tails duplicated"); STATISTIC(NumTailDups , "Number of tail duplicated blocks"); STATISTIC(NumInstrDups , "Additional instructions due to tail duplication"); @@ -181,7 +182,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; dbgs() << " missing input from predecessor BB#" << PredBB->getNumber() << '\n'; - llvm_unreachable(0); + llvm_unreachable(nullptr); } } @@ -192,12 +193,12 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { << ": " << *MI; dbgs() << " extra input from predecessor BB#" << PHIBB->getNumber() << '\n'; - llvm_unreachable(0); + llvm_unreachable(nullptr); } if (PHIBB->getNumber() < 0) { dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; - llvm_unreachable(0); + llvm_unreachable(nullptr); } } ++MI; @@ -247,7 +248,7 @@ TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, // If the original definition is still around, add it as an available // value. MachineInstr *DefMI = MRI->getVRegDef(VReg); - MachineBasicBlock *DefBB = 0; + MachineBasicBlock *DefBB = nullptr; if (DefMI) { DefBB = DefMI->getParent(); SSAUpdate.AddAvailableValue(DefBB, VReg); @@ -363,9 +364,7 @@ static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { // block (which is why we need to copy the information). static void getRegsUsedByPHIs(const MachineBasicBlock &BB, DenseSet *UsedByPhi) { - for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end(); - I != E; ++I) { - const MachineInstr &MI = *I; + for (const auto &MI : BB) { if (!MI.isPHI()) break; for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { @@ -656,7 +655,7 @@ TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { if (PredBB->succ_size() > 1) return false; - MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL; + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector PredCond; if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) return false; @@ -687,7 +686,7 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, if (bothUsedInPHI(*PredBB, Succs)) continue; - MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL; + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector PredCond; if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) continue; @@ -718,14 +717,14 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, // Make the branch unconditional if possible if (PredTBB == PredFBB) { PredCond.clear(); - PredFBB = NULL; + PredFBB = nullptr; } // Avoid adding fall through branches. if (PredFBB == NextBB) - PredFBB = NULL; - if (PredTBB == NextBB && PredFBB == NULL) - PredTBB = NULL; + PredFBB = nullptr; + if (PredTBB == NextBB && PredFBB == nullptr) + PredTBB = nullptr; TII->RemoveBranch(*PredBB); @@ -858,7 +857,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, // block, which falls through unconditionally, move the contents of this // block into the prior block. MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(TailBB)); - MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; + MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; SmallVector PriorCond; // This has to check PrevBB->succ_size() because EH edges are ignored by // AnalyzeBranch. diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index cae3ccd..c3f84c6 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -43,7 +43,7 @@ TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, const TargetRegisterInfo *TRI, const MachineFunction &MF) const { if (OpNum >= MCID.getNumOperands()) - return 0; + return nullptr; short RegClass = MCID.OpInfo[OpNum].RegClass; if (MCID.OpInfo[OpNum].isLookupPtrRegClass()) @@ -51,7 +51,7 @@ TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, // Instructions like INSERT_SUBREG do not have fixed register classes. if (RegClass < 0) - return 0; + return nullptr; // Otherwise just look it up normally. return TRI->getRegClass(RegClass); @@ -111,7 +111,7 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, // If MBB isn't immediately before MBB, insert a branch to it. if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest)) - InsertBranch(*MBB, NewDest, 0, SmallVector(), + InsertBranch(*MBB, NewDest, nullptr, SmallVector(), Tail->getDebugLoc()); MBB->addSuccessor(NewDest); } @@ -124,13 +124,11 @@ MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI, bool HasDef = MCID.getNumDefs(); if (HasDef && !MI->getOperand(0).isReg()) // No idea how to commute this instruction. Target should implement its own. - return 0; + return nullptr; unsigned Idx1, Idx2; if (!findCommutedOpIndices(MI, Idx1, Idx2)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Don't know how to commute: " << *MI; - report_fatal_error(Msg.str()); + assert(MI->isCommutable() && "Precondition violation: MI must be commutable."); + return nullptr; } assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() && @@ -250,13 +248,15 @@ bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, oe = MI->memoperands_end(); o != oe; ++o) { - if ((*o)->isLoad() && (*o)->getValue()) + if ((*o)->isLoad()) { if (const FixedStackPseudoSourceValue *Value = - dyn_cast((*o)->getValue())) { + dyn_cast_or_null( + (*o)->getPseudoValue())) { FrameIndex = Value->getFrameIndex(); MMO = *o; return true; } + } } return false; } @@ -268,13 +268,15 @@ bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI, oe = MI->memoperands_end(); o != oe; ++o) { - if ((*o)->isStore() && (*o)->getValue()) + if ((*o)->isStore()) { if (const FixedStackPseudoSourceValue *Value = - dyn_cast((*o)->getValue())) { + dyn_cast_or_null( + (*o)->getPseudoValue())) { FrameIndex = Value->getFrameIndex(); MMO = *o; return true; } + } } return false; } @@ -340,14 +342,14 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, unsigned FoldIdx) { assert(MI->isCopy() && "MI must be a COPY instruction"); if (MI->getNumOperands() != 2) - return 0; + return nullptr; assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand"); const MachineOperand &FoldOp = MI->getOperand(FoldIdx); const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx); if (FoldOp.getSubReg() || LiveOp.getSubReg()) - return 0; + return nullptr; unsigned FoldReg = FoldOp.getReg(); unsigned LiveReg = LiveOp.getReg(); @@ -359,13 +361,13 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg())) - return RC->contains(LiveOp.getReg()) ? RC : 0; + return RC->contains(LiveOp.getReg()) ? RC : nullptr; if (RC->hasSubClassEq(MRI.getRegClass(LiveReg))) return RC; // FIXME: Allow folding when register classes are memory compatible. - return 0; + return nullptr; } bool TargetInstrInfo:: @@ -399,7 +401,7 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF, for (SmallVectorImpl::const_iterator I = Ops.begin(), E = Ops.end(); I != E; ++I) { if (*I < StartIdx) - return 0; + return nullptr; } MachineInstr *NewMI = @@ -454,7 +456,7 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, assert(MBB && "foldMemoryOperand needs an inserted instruction"); MachineFunction &MF = *MBB->getParent(); - MachineInstr *NewMI = 0; + MachineInstr *NewMI = nullptr; if (MI->getOpcode() == TargetOpcode::STACKMAP || MI->getOpcode() == TargetOpcode::PATCHPOINT) { @@ -488,11 +490,11 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, // Straight COPY may fold as load/store. if (!MI->isCopy() || Ops.size() != 1) - return 0; + return nullptr; const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]); if (!RC) - return 0; + return nullptr; const MachineOperand &MO = MI->getOperand(1-Ops[0]); MachineBasicBlock::iterator Pos = MI; @@ -521,7 +523,7 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, MachineFunction &MF = *MBB.getParent(); // Ask the target to do the actual folding. - MachineInstr *NewMI = 0; + MachineInstr *NewMI = nullptr; int FrameIndex = 0; if ((MI->getOpcode() == TargetOpcode::STACKMAP || @@ -534,7 +536,7 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI); } - if (!NewMI) return 0; + if (!NewMI) return nullptr; NewMI = MBB.insert(MI, NewMI); diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 870370b..2634d71 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -82,16 +82,16 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::UREM_I128] = "__umodti3"; // These are generally not available. - Names[RTLIB::SDIVREM_I8] = 0; - Names[RTLIB::SDIVREM_I16] = 0; - Names[RTLIB::SDIVREM_I32] = 0; - Names[RTLIB::SDIVREM_I64] = 0; - Names[RTLIB::SDIVREM_I128] = 0; - Names[RTLIB::UDIVREM_I8] = 0; - Names[RTLIB::UDIVREM_I16] = 0; - Names[RTLIB::UDIVREM_I32] = 0; - Names[RTLIB::UDIVREM_I64] = 0; - Names[RTLIB::UDIVREM_I128] = 0; + Names[RTLIB::SDIVREM_I8] = nullptr; + Names[RTLIB::SDIVREM_I16] = nullptr; + Names[RTLIB::SDIVREM_I32] = nullptr; + Names[RTLIB::SDIVREM_I64] = nullptr; + Names[RTLIB::SDIVREM_I128] = nullptr; + Names[RTLIB::UDIVREM_I8] = nullptr; + Names[RTLIB::UDIVREM_I16] = nullptr; + Names[RTLIB::UDIVREM_I32] = nullptr; + Names[RTLIB::UDIVREM_I64] = nullptr; + Names[RTLIB::UDIVREM_I128] = nullptr; Names[RTLIB::NEG_I32] = "__negsi2"; Names[RTLIB::NEG_I64] = "__negdi2"; @@ -392,18 +392,18 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::SINCOS_PPCF128] = "sincosl"; } else { // These are generally not available. - Names[RTLIB::SINCOS_F32] = 0; - Names[RTLIB::SINCOS_F64] = 0; - Names[RTLIB::SINCOS_F80] = 0; - Names[RTLIB::SINCOS_F128] = 0; - Names[RTLIB::SINCOS_PPCF128] = 0; + Names[RTLIB::SINCOS_F32] = nullptr; + Names[RTLIB::SINCOS_F64] = nullptr; + Names[RTLIB::SINCOS_F80] = nullptr; + Names[RTLIB::SINCOS_F128] = nullptr; + Names[RTLIB::SINCOS_PPCF128] = nullptr; } if (Triple(TM.getTargetTriple()).getOS() != Triple::OpenBSD) { Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail"; } else { // These are generally not available. - Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = 0; + Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = nullptr; } } @@ -680,6 +680,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, UseUnderscoreLongJmp = false; SelectIsExpensive = false; HasMultipleConditionRegisters = false; + HasExtractBitsInsn = false; IntDivIsCheap = false; Pow2DivIsCheap = false; JumpIsExpensive = false; @@ -914,7 +915,6 @@ bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const { MachineBasicBlock* TargetLoweringBase::emitPatchPoint(MachineInstr *MI, MachineBasicBlock *MBB) const { - const TargetMachine &TM = getTargetMachine(); MachineFunction &MF = *MI->getParent()->getParent(); // MI changes inside this loop as we grow operands. @@ -1006,7 +1006,7 @@ void TargetLoweringBase::computeRegisterProperties() { // Find the largest integer register class. unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; - for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) + for (; RegClassForVT[LargestIntReg] == nullptr; --LargestIntReg) assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); // Every integer value type larger than this largest register takes twice as @@ -1326,7 +1326,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { case Mul: return ISD::MUL; case FMul: return ISD::FMUL; case UDiv: return ISD::UDIV; - case SDiv: return ISD::UDIV; + case SDiv: return ISD::SDIV; case FDiv: return ISD::FDIV; case URem: return ISD::UREM; case SRem: return ISD::SREM; diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index e41fbfc..dda2259 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -100,7 +100,7 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference( // Add information about the stub reference to ELFMMI so that the stub // gets emitted by the asmprinter. MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym); - if (StubSym.getPointer() == 0) { + if (!StubSym.getPointer()) { MCSymbol *Sym = TM.getSymbol(GV, Mang); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -339,8 +339,8 @@ getSectionForConstant(SectionKind Kind) const { return DataRelROSection; } -const MCSection * -TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const { +const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection( + unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const { // The default scheme is .ctor / .dtor, so we have to invert the priority // numbering. if (Priority == 65535) @@ -359,8 +359,8 @@ TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const { } } -const MCSection * -TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const { +const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection( + unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const { // The default scheme is .ctor / .dtor, so we have to invert the priority // numbering. if (Priority == 65535) @@ -418,7 +418,7 @@ emitModuleFlags(MCStreamer &Streamer, Mangler &Mang, const TargetMachine &TM) const { unsigned VersionVal = 0; unsigned ImageInfoFlags = 0; - MDNode *LinkerOptions = 0; + MDNode *LinkerOptions = nullptr; StringRef SectionVal; for (ArrayRef::iterator @@ -659,7 +659,7 @@ const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference( MachineModuleInfoImpl::StubValueTy &StubSym = GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) : MachOMMI.getGVStubEntry(SSym); - if (StubSym.getPointer() == 0) { + if (!StubSym.getPointer()) { MCSymbol *Sym = TM.getSymbol(GV, Mang); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -685,7 +685,7 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol( // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); - if (StubSym.getPointer() == 0) { + if (!StubSym.getPointer()) { MCSymbol *Sym = TM.getSymbol(GV, Mang); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -755,7 +755,7 @@ const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) { if (Kind.isText()) return ".text"; - if (Kind.isBSS ()) + if (Kind.isBSS()) return ".bss"; if (Kind.isThreadLocal()) return ".tls$"; @@ -781,7 +781,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // Section names depend on the name of the symbol which is not feasible if the // symbol has private linkage. if ((GV->isWeakForLinker() || EmitUniquedSection) && - !GV->hasPrivateLinkage()) { + !GV->hasPrivateLinkage() && !Kind.isCommon()) { const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); unsigned Characteristics = getCOFFSectionFlags(Kind); @@ -802,7 +802,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if (Kind.isReadOnly()) return ReadOnlySection; - if (Kind.isBSS()) + // Note: we claim that common symbols are put in BSSSection, but they are + // really emitted with the magic .comm directive, which creates a symbol table + // entry but not a section. + if (Kind.isBSS() || Kind.isCommon()) return BSSSection; return DataSection; @@ -820,7 +823,7 @@ void TargetLoweringObjectFileCOFF:: emitModuleFlags(MCStreamer &Streamer, ArrayRef ModuleFlags, Mangler &Mang, const TargetMachine &TM) const { - MDNode *LinkerOptions = 0; + MDNode *LinkerOptions = nullptr; // Look for the "Linker Options" flag, since it's the only one we support. for (ArrayRef::iterator @@ -862,3 +865,32 @@ emitModuleFlags(MCStreamer &Streamer, } } } + +static const MCSection *getAssociativeCOFFSection(MCContext &Ctx, + const MCSection *Sec, + const MCSymbol *KeySym, + const MCSection *KeySec) { + // Return the normal section if we don't have to be associative. + if (!KeySym) + return Sec; + + // Make an associative section with the same name and kind as the normal + // section. + const MCSectionCOFF *SecCOFF = cast(Sec); + const MCSectionCOFF *KeySecCOFF = cast(KeySec); + unsigned Characteristics = + SecCOFF->getCharacteristics() | COFF::IMAGE_SCN_LNK_COMDAT; + return Ctx.getCOFFSection(SecCOFF->getSectionName(), Characteristics, + SecCOFF->getKind(), KeySym->getName(), + COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, KeySecCOFF); +} + +const MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection( + unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const { + return getAssociativeCOFFSection(getContext(), StaticCtorSection, KeySym, KeySec); +} + +const MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection( + unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const { + return getAssociativeCOFFSection(getContext(), StaticDtorSection, KeySym, KeySec); +} diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index 5a15243..a3a4fb3 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -101,7 +101,7 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { Idx += Offset + 1; } } - return NULL; + return nullptr; } /// getMinimalPhysRegClass - Returns the Register Class of a physical @@ -113,7 +113,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const { // Pick the most sub register class of the right type that contains // this physreg. - const TargetRegisterClass* BestRC = 0; + const TargetRegisterClass* BestRC = nullptr; for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){ const TargetRegisterClass* RC = *I; if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) && @@ -130,7 +130,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const { static void getAllocatableSetForRC(const MachineFunction &MF, const TargetRegisterClass *RC, BitVector &R){ assert(RC->isAllocatable() && "invalid for nonallocatable sets"); - ArrayRef Order = RC->getRawAllocationOrder(MF); + ArrayRef Order = RC->getRawAllocationOrder(MF); for (unsigned i = 0; i != Order.size(); ++i) R.set(Order[i]); } @@ -164,7 +164,7 @@ const TargetRegisterClass *firstCommonClass(const uint32_t *A, for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) if (unsigned Common = *A++ & *B++) return TRI->getRegClass(I + countTrailingZeros(Common)); - return 0; + return nullptr; } const TargetRegisterClass * @@ -174,7 +174,7 @@ TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, if (A == B) return A; if (!A || !B) - return 0; + return nullptr; // Register classes are ordered topologically, so the largest common // sub-class it the common sub-class with the smallest ID. @@ -194,7 +194,7 @@ TargetRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, // The bit mask contains all register classes that are projected into B // by Idx. Find a class that is also a sub-class of A. return firstCommonClass(RCI.getMask(), A->getSubClassMask(), this); - return 0; + return nullptr; } const TargetRegisterClass *TargetRegisterInfo:: @@ -215,7 +215,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, // Arrange for RCA to be the larger register so the answer will be found in // the first iteration. This makes the search linear for the most common // case. - const TargetRegisterClass *BestRC = 0; + const TargetRegisterClass *BestRC = nullptr; unsigned *BestPreA = &PreA; unsigned *BestPreB = &PreB; if (RCA->getSize() < RCB->getSize()) { diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index d9e5aae..f42d47b 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -27,7 +27,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "twoaddrinstr" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" @@ -51,6 +50,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "twoaddrinstr" + STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions"); STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted"); @@ -211,7 +212,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, } // Find the instruction that kills SavedReg. - MachineInstr *KillMI = NULL; + MachineInstr *KillMI = nullptr; if (LIS) { LiveInterval &LI = LIS->getInterval(SavedReg); assert(LI.end() != LI.begin() && @@ -250,7 +251,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, // FIXME: This can be sped up if there is an easy way to query whether an // instruction is before or after another instruction. Then we can use // MachineRegisterInfo def / use instead. - MachineOperand *KillMO = NULL; + MachineOperand *KillMO = nullptr; MachineBasicBlock::iterator KillPos = KillMI; ++KillPos; @@ -454,10 +455,10 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, unsigned &DstReg, bool &IsDstPhys) { if (!MRI->hasOneNonDBGUse(Reg)) // None or more than one use. - return 0; + return nullptr; MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(Reg); if (UseMI.getParent() != MBB) - return 0; + return nullptr; unsigned SrcReg; bool IsSrcPhys; if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) { @@ -469,7 +470,7 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); return &UseMI; } - return 0; + return nullptr; } /// getMappedReg - Return the physical register the specified virtual register @@ -576,7 +577,7 @@ commuteInstruction(MachineBasicBlock::iterator &mi, DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); MachineInstr *NewMI = TII->commuteInstruction(MI); - if (NewMI == 0) { + if (NewMI == nullptr) { DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n"); return false; } @@ -755,7 +756,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, // Must be created from unfolded load. Don't waste time trying this. return false; - MachineInstr *KillMI = 0; + MachineInstr *KillMI = nullptr; if (LIS) { LiveInterval &LI = LIS->getInterval(Reg); assert(LI.end() != LI.begin() && @@ -947,7 +948,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, // Must be created from unfolded load. Don't waste time trying this. return false; - MachineInstr *KillMI = 0; + MachineInstr *KillMI = nullptr; if (LIS) { LiveInterval &LI = LIS->getInterval(Reg); assert(LI.end() != LI.begin() && @@ -1394,7 +1395,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, SubRegB) && "tied subregister must be a truncation"); // The superreg class will not be used to constrain the subreg class. - RC = 0; + RC = nullptr; } else { assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB)) @@ -1631,7 +1632,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { TargetRegisterInfo::isPhysicalRegister(DstReg) || !(MI->getNumOperands() & 1)) { DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); - llvm_unreachable(0); + llvm_unreachable(nullptr); } SmallVector OrigRegs; diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index f892e94..704736f 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/VirtRegMap.h" #include "LiveDebugVariables.h" #include "llvm/ADT/STLExtras.h" @@ -40,6 +39,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "regalloc" + STATISTIC(NumSpillSlots, "Number of spill slots allocated"); STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting"); diff --git a/lib/CodeGen/module.modulemap b/lib/CodeGen/module.modulemap new file mode 100644 index 0000000..d4f68bc --- /dev/null +++ b/lib/CodeGen/module.modulemap @@ -0,0 +1 @@ +module CodeGen { requires cplusplus umbrella "." module * { export * } } diff --git a/lib/DebugInfo/DWARFCompileUnit.h b/lib/DebugInfo/DWARFCompileUnit.h index d1853d8..2ed188e 100644 --- a/lib/DebugInfo/DWARFCompileUnit.h +++ b/lib/DebugInfo/DWARFCompileUnit.h @@ -16,10 +16,10 @@ namespace llvm { class DWARFCompileUnit : public DWARFUnit { public: - DWARFCompileUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS, - StringRef RS, StringRef SS, StringRef SOS, StringRef AOS, + DWARFCompileUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef RS, + StringRef SS, StringRef SOS, StringRef AOS, const RelocAddrMap *M, bool LE) - : DWARFUnit(DA, IS, AS, RS, SS, SOS, AOS, M, LE) {} + : DWARFUnit(DA, IS, RS, SS, SOS, AOS, M, LE) {} void dump(raw_ostream &OS); // VTable anchor. ~DWARFCompileUnit() override; diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index 60c5f6a..e52e8af 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -8,6 +8,8 @@ //===----------------------------------------------------------------------===// #include "DWARFContext.h" +#include "DWARFDebugArangeSet.h" + #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Compression.h" @@ -20,7 +22,11 @@ using namespace llvm; using namespace dwarf; using namespace object; +#define DEBUG_TYPE "dwarf" + typedef DWARFDebugLine::LineTable DWARFLineTable; +typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; +typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind; static void dumpPubSection(raw_ostream &OS, StringRef Name, StringRef Data, bool LittleEndian, bool GnuStyle) { @@ -126,8 +132,9 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { if (stmtOffset != -1U) { DataExtractor lineData(getLineSection().Data, isLittleEndian(), savedAddressByteSize); - DWARFDebugLine::DumpingState state(OS); - DWARFDebugLine::parseStatementTable(lineData, &getLineSection().Relocs, &stmtOffset, state); + DWARFDebugLine::LineTable LineTable; + LineTable.parse(lineData, &getLineSection().Relocs, &stmtOffset); + LineTable.dump(OS); } } } @@ -137,9 +144,11 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { unsigned stmtOffset = 0; DataExtractor lineData(getLineDWOSection().Data, isLittleEndian(), savedAddressByteSize); - DWARFDebugLine::DumpingState state(OS); - while (DWARFDebugLine::parsePrologue(lineData, &stmtOffset, &state.Prologue)) - state.finalize(); + DWARFDebugLine::LineTable LineTable; + while (LineTable.Prologue.parse(lineData, &stmtOffset)) { + LineTable.dump(OS); + LineTable.clear(); + } } if (DumpType == DIDT_All || DumpType == DIDT_Str) { @@ -216,7 +225,7 @@ const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() { DataExtractor abbrData(getAbbrevSection(), isLittleEndian(), 0); Abbrev.reset(new DWARFDebugAbbrev()); - Abbrev->parse(abbrData); + Abbrev->extract(abbrData); return Abbrev.get(); } @@ -226,7 +235,7 @@ const DWARFDebugAbbrev *DWARFContext::getDebugAbbrevDWO() { DataExtractor abbrData(getAbbrevDWOSection(), isLittleEndian(), 0); AbbrevDWO.reset(new DWARFDebugAbbrev()); - AbbrevDWO->parse(abbrData); + AbbrevDWO->extract(abbrData); return AbbrevDWO.get(); } @@ -290,7 +299,7 @@ DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) { cu->getCompileUnitDIE()->getAttributeValueAsSectionOffset( cu, DW_AT_stmt_list, -1U); if (stmtOffset == -1U) - return 0; // No line table for this compile unit. + return nullptr; // No line table for this compile unit. // See if the line table is cached. if (const DWARFLineTable *lt = Line->getLineTable(stmtOffset)) @@ -310,8 +319,8 @@ void DWARFContext::parseCompileUnits() { isLittleEndian(), 0); while (DIData.isValidOffset(offset)) { std::unique_ptr CU(new DWARFCompileUnit( - getDebugAbbrev(), getInfoSection().Data, getAbbrevSection(), - getRangeSection(), getStringSection(), StringRef(), getAddrSection(), + getDebugAbbrev(), getInfoSection().Data, getRangeSection(), + getStringSection(), StringRef(), getAddrSection(), &getInfoSection().Relocs, isLittleEndian())); if (!CU->extract(DIData, &offset)) { break; @@ -329,10 +338,10 @@ void DWARFContext::parseTypeUnits() { const DataExtractor &DIData = DataExtractor(I.second.Data, isLittleEndian(), 0); while (DIData.isValidOffset(offset)) { - std::unique_ptr TU(new DWARFTypeUnit( - getDebugAbbrev(), I.second.Data, getAbbrevSection(), - getRangeSection(), getStringSection(), StringRef(), getAddrSection(), - &I.second.Relocs, isLittleEndian())); + std::unique_ptr TU( + new DWARFTypeUnit(getDebugAbbrev(), I.second.Data, getRangeSection(), + getStringSection(), StringRef(), getAddrSection(), + &I.second.Relocs, isLittleEndian())); if (!TU->extract(DIData, &offset)) break; TUs.push_back(std::move(TU)); @@ -349,9 +358,8 @@ void DWARFContext::parseDWOCompileUnits() { DataExtractor(getInfoDWOSection().Data, isLittleEndian(), 0); while (DIData.isValidOffset(offset)) { std::unique_ptr DWOCU(new DWARFCompileUnit( - getDebugAbbrevDWO(), getInfoDWOSection().Data, getAbbrevDWOSection(), - getRangeDWOSection(), getStringDWOSection(), - getStringOffsetDWOSection(), getAddrSection(), + getDebugAbbrevDWO(), getInfoDWOSection().Data, getRangeDWOSection(), + getStringDWOSection(), getStringOffsetDWOSection(), getAddrSection(), &getInfoDWOSection().Relocs, isLittleEndian())); if (!DWOCU->extract(DIData, &offset)) { break; @@ -370,10 +378,9 @@ void DWARFContext::parseDWOTypeUnits() { DataExtractor(I.second.Data, isLittleEndian(), 0); while (DIData.isValidOffset(offset)) { std::unique_ptr TU(new DWARFTypeUnit( - getDebugAbbrevDWO(), I.second.Data, getAbbrevDWOSection(), - getRangeDWOSection(), getStringDWOSection(), - getStringOffsetDWOSection(), getAddrSection(), &I.second.Relocs, - isLittleEndian())); + getDebugAbbrevDWO(), I.second.Data, getRangeDWOSection(), + getStringDWOSection(), getStringOffsetDWOSection(), getAddrSection(), + &I.second.Relocs, isLittleEndian())); if (!TU->extract(DIData, &offset)) break; DWOTUs.push_back(std::move(TU)); @@ -408,7 +415,7 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) { if (CU != CUs.end()) { return CU->get(); } - return 0; + return nullptr; } DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { @@ -420,15 +427,13 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) { static bool getFileNameForCompileUnit(DWARFCompileUnit *CU, const DWARFLineTable *LineTable, - uint64_t FileIndex, - bool NeedsAbsoluteFilePath, + uint64_t FileIndex, FileLineInfoKind Kind, std::string &FileName) { - if (CU == 0 || - LineTable == 0 || - !LineTable->getFileNameByIndex(FileIndex, NeedsAbsoluteFilePath, - FileName)) + if (!CU || !LineTable || Kind == FileLineInfoKind::None || + !LineTable->getFileNameByIndex(FileIndex, Kind, FileName)) return false; - if (NeedsAbsoluteFilePath && sys::path::is_relative(FileName)) { + if (Kind == FileLineInfoKind::AbsoluteFilePath && + sys::path::is_relative(FileName)) { // We may still need to append compilation directory of compile unit. SmallString<16> AbsolutePath; if (const char *CompilationDir = CU->getCompilationDir()) { @@ -443,10 +448,9 @@ static bool getFileNameForCompileUnit(DWARFCompileUnit *CU, static bool getFileLineInfoForCompileUnit(DWARFCompileUnit *CU, const DWARFLineTable *LineTable, uint64_t Address, - bool NeedsAbsoluteFilePath, - std::string &FileName, - uint32_t &Line, uint32_t &Column) { - if (CU == 0 || LineTable == 0) + FileLineInfoKind Kind, + DILineInfo &Result) { + if (!CU || !LineTable) return false; // Get the index of row we're looking for in the line table. uint32_t RowIndex = LineTable->lookupAddress(Address); @@ -454,80 +458,71 @@ static bool getFileLineInfoForCompileUnit(DWARFCompileUnit *CU, return false; // Take file number and line/column from the row. const DWARFDebugLine::Row &Row = LineTable->Rows[RowIndex]; - if (!getFileNameForCompileUnit(CU, LineTable, Row.File, - NeedsAbsoluteFilePath, FileName)) + if (!getFileNameForCompileUnit(CU, LineTable, Row.File, Kind, + Result.FileName)) return false; - Line = Row.Line; - Column = Row.Column; + Result.Line = Row.Line; + Result.Column = Row.Column; return true; } +static bool getFunctionNameForAddress(DWARFCompileUnit *CU, uint64_t Address, + FunctionNameKind Kind, + std::string &FunctionName) { + if (Kind == FunctionNameKind::None) + return false; + // The address may correspond to instruction in some inlined function, + // so we have to build the chain of inlined functions and take the + // name of the topmost function in it. + const DWARFDebugInfoEntryInlinedChain &InlinedChain = + CU->getInlinedChainForAddress(Address); + if (InlinedChain.DIEs.size() == 0) + return false; + const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain.DIEs[0]; + if (const char *Name = + TopFunctionDIE.getSubroutineName(InlinedChain.U, Kind)) { + FunctionName = Name; + return true; + } + return false; +} + DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address, - DILineInfoSpecifier Specifier) { + DILineInfoSpecifier Spec) { + DILineInfo Result; + DWARFCompileUnit *CU = getCompileUnitForAddress(Address); if (!CU) - return DILineInfo(); - std::string FileName = ""; - std::string FunctionName = ""; - uint32_t Line = 0; - uint32_t Column = 0; - if (Specifier.needs(DILineInfoSpecifier::FunctionName)) { - // The address may correspond to instruction in some inlined function, - // so we have to build the chain of inlined functions and take the - // name of the topmost function in it. - const DWARFDebugInfoEntryInlinedChain &InlinedChain = - CU->getInlinedChainForAddress(Address); - if (InlinedChain.DIEs.size() > 0) { - const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain.DIEs[0]; - if (const char *Name = TopFunctionDIE.getSubroutineName(InlinedChain.U)) - FunctionName = Name; - } - } - if (Specifier.needs(DILineInfoSpecifier::FileLineInfo)) { + return Result; + getFunctionNameForAddress(CU, Address, Spec.FNKind, Result.FunctionName); + if (Spec.FLIKind != FileLineInfoKind::None) { const DWARFLineTable *LineTable = getLineTableForCompileUnit(CU); - const bool NeedsAbsoluteFilePath = - Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath); - getFileLineInfoForCompileUnit(CU, LineTable, Address, - NeedsAbsoluteFilePath, - FileName, Line, Column); + getFileLineInfoForCompileUnit(CU, LineTable, Address, Spec.FLIKind, Result); } - return DILineInfo(StringRef(FileName), StringRef(FunctionName), - Line, Column); + return Result; } -DILineInfoTable DWARFContext::getLineInfoForAddressRange(uint64_t Address, - uint64_t Size, - DILineInfoSpecifier Specifier) { +DILineInfoTable +DWARFContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size, + DILineInfoSpecifier Spec) { DILineInfoTable Lines; DWARFCompileUnit *CU = getCompileUnitForAddress(Address); if (!CU) return Lines; std::string FunctionName = ""; - if (Specifier.needs(DILineInfoSpecifier::FunctionName)) { - // The address may correspond to instruction in some inlined function, - // so we have to build the chain of inlined functions and take the - // name of the topmost function in it. - const DWARFDebugInfoEntryInlinedChain &InlinedChain = - CU->getInlinedChainForAddress(Address); - if (InlinedChain.DIEs.size() > 0) { - const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain.DIEs[0]; - if (const char *Name = TopFunctionDIE.getSubroutineName(InlinedChain.U)) - FunctionName = Name; - } - } + getFunctionNameForAddress(CU, Address, Spec.FNKind, FunctionName); // If the Specifier says we don't need FileLineInfo, just // return the top-most function at the starting address. - if (!Specifier.needs(DILineInfoSpecifier::FileLineInfo)) { - Lines.push_back( - std::make_pair(Address, DILineInfo("", FunctionName, 0, 0))); + if (Spec.FLIKind == FileLineInfoKind::None) { + DILineInfo Result; + Result.FunctionName = FunctionName; + Lines.push_back(std::make_pair(Address, Result)); return Lines; } const DWARFLineTable *LineTable = getLineTableForCompileUnit(CU); - const bool NeedsAbsoluteFilePath = - Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath); // Get the index of row we're looking for in the line table. std::vector RowVector; @@ -537,59 +532,67 @@ DILineInfoTable DWARFContext::getLineInfoForAddressRange(uint64_t Address, for (uint32_t RowIndex : RowVector) { // Take file number and line/column from the row. const DWARFDebugLine::Row &Row = LineTable->Rows[RowIndex]; - std::string FileName = ""; - getFileNameForCompileUnit(CU, LineTable, Row.File, - NeedsAbsoluteFilePath, FileName); - Lines.push_back(std::make_pair( - Row.Address, DILineInfo(FileName, FunctionName, Row.Line, Row.Column))); + DILineInfo Result; + getFileNameForCompileUnit(CU, LineTable, Row.File, Spec.FLIKind, + Result.FileName); + Result.FunctionName = FunctionName; + Result.Line = Row.Line; + Result.Column = Row.Column; + Lines.push_back(std::make_pair(Row.Address, Result)); } return Lines; } -DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address, - DILineInfoSpecifier Specifier) { +DIInliningInfo +DWARFContext::getInliningInfoForAddress(uint64_t Address, + DILineInfoSpecifier Spec) { + DIInliningInfo InliningInfo; + DWARFCompileUnit *CU = getCompileUnitForAddress(Address); if (!CU) - return DIInliningInfo(); + return InliningInfo; + const DWARFLineTable *LineTable = nullptr; const DWARFDebugInfoEntryInlinedChain &InlinedChain = CU->getInlinedChainForAddress(Address); - if (InlinedChain.DIEs.size() == 0) - return DIInliningInfo(); + if (InlinedChain.DIEs.size() == 0) { + // If there is no DIE for address (e.g. it is in unavailable .dwo file), + // try to at least get file/line info from symbol table. + if (Spec.FLIKind != FileLineInfoKind::None) { + DILineInfo Frame; + LineTable = getLineTableForCompileUnit(CU); + if (getFileLineInfoForCompileUnit(CU, LineTable, Address, Spec.FLIKind, + Frame)) { + InliningInfo.addFrame(Frame); + } + } + return InliningInfo; + } - DIInliningInfo InliningInfo; uint32_t CallFile = 0, CallLine = 0, CallColumn = 0; - const DWARFLineTable *LineTable = 0; for (uint32_t i = 0, n = InlinedChain.DIEs.size(); i != n; i++) { const DWARFDebugInfoEntryMinimal &FunctionDIE = InlinedChain.DIEs[i]; - std::string FileName = ""; - std::string FunctionName = ""; - uint32_t Line = 0; - uint32_t Column = 0; + DILineInfo Frame; // Get function name if necessary. - if (Specifier.needs(DILineInfoSpecifier::FunctionName)) { - if (const char *Name = FunctionDIE.getSubroutineName(InlinedChain.U)) - FunctionName = Name; - } - if (Specifier.needs(DILineInfoSpecifier::FileLineInfo)) { - const bool NeedsAbsoluteFilePath = - Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath); + if (const char *Name = + FunctionDIE.getSubroutineName(InlinedChain.U, Spec.FNKind)) + Frame.FunctionName = Name; + if (Spec.FLIKind != FileLineInfoKind::None) { if (i == 0) { // For the topmost frame, initialize the line table of this // compile unit and fetch file/line info from it. LineTable = getLineTableForCompileUnit(CU); // For the topmost routine, get file/line info from line table. - getFileLineInfoForCompileUnit(CU, LineTable, Address, - NeedsAbsoluteFilePath, - FileName, Line, Column); + getFileLineInfoForCompileUnit(CU, LineTable, Address, Spec.FLIKind, + Frame); } else { // Otherwise, use call file, call line and call column from // previous DIE in inlined chain. - getFileNameForCompileUnit(CU, LineTable, CallFile, - NeedsAbsoluteFilePath, FileName); - Line = CallLine; - Column = CallColumn; + getFileNameForCompileUnit(CU, LineTable, CallFile, Spec.FLIKind, + Frame.FileName); + Frame.Line = CallLine; + Frame.Column = CallColumn; } // Get call file/line/column of a current DIE. if (i + 1 < n) { @@ -597,8 +600,6 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address, CallColumn); } } - DILineInfo Frame(StringRef(FileName), StringRef(FunctionName), - Line, Column); InliningInfo.addFrame(Frame); } return InliningInfo; @@ -637,14 +638,15 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) if (!zlib::isAvailable() || !consumeCompressedDebugSectionHeader(data, OriginalSize)) continue; - std::unique_ptr UncompressedSection; - if (zlib::uncompress(data, UncompressedSection, OriginalSize) != - zlib::StatusOK) + UncompressedSections.resize(UncompressedSections.size() + 1); + if (zlib::uncompress(data, UncompressedSections.back(), OriginalSize) != + zlib::StatusOK) { + UncompressedSections.pop_back(); continue; + } // Make data point to uncompressed section contents and save its contents. name = name.substr(1); - data = UncompressedSection->getBuffer(); - UncompressedSections.push_back(std::move(UncompressedSection)); + data = UncompressedSections.back(); } StringRef *SectionData = @@ -669,7 +671,7 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) .Case("debug_str_offsets.dwo", &StringOffsetDWOSection) .Case("debug_addr", &AddrSection) // Any more debug info sections go here. - .Default(0); + .Default(nullptr); if (SectionData) { *SectionData = data; if (name == "debug_ranges") { @@ -700,7 +702,7 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) .Case("debug_loc", &LocSection.Relocs) .Case("debug_info.dwo", &InfoDWOSection.Relocs) .Case("debug_line", &LineSection.Relocs) - .Default(0); + .Default(nullptr); if (!Map) { // Find debug_types relocs by section rather than name as there are // multiple, comdat grouped, debug_types sections. diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h index ad6841a..6d1ae92 100644 --- a/lib/DebugInfo/DWARFContext.h +++ b/lib/DebugInfo/DWARFContext.h @@ -242,7 +242,7 @@ class DWARFContextInMemory : public DWARFContext { StringRef RangeDWOSection; StringRef AddrSection; - SmallVector, 4> UncompressedSections; + SmallVector, 4> UncompressedSections; public: DWARFContextInMemory(object::ObjectFile *); diff --git a/lib/DebugInfo/DWARFDebugAbbrev.cpp b/lib/DebugInfo/DWARFDebugAbbrev.cpp index fd5f5e9..8426bf9 100644 --- a/lib/DebugInfo/DWARFDebugAbbrev.cpp +++ b/lib/DebugInfo/DWARFDebugAbbrev.cpp @@ -12,24 +12,36 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -bool DWARFAbbreviationDeclarationSet::extract(DataExtractor data, - uint32_t* offset_ptr) { - const uint32_t beginOffset = *offset_ptr; - Offset = beginOffset; +DWARFAbbreviationDeclarationSet::DWARFAbbreviationDeclarationSet() { clear(); - DWARFAbbreviationDeclaration abbrevDeclaration; - uint32_t prevAbbrAode = 0; - while (abbrevDeclaration.extract(data, offset_ptr)) { - Decls.push_back(abbrevDeclaration); - if (IdxOffset == 0) { - IdxOffset = abbrevDeclaration.getCode(); +} + +void DWARFAbbreviationDeclarationSet::clear() { + Offset = 0; + FirstAbbrCode = 0; + Decls.clear(); +} + +bool DWARFAbbreviationDeclarationSet::extract(DataExtractor Data, + uint32_t *OffsetPtr) { + clear(); + const uint32_t BeginOffset = *OffsetPtr; + Offset = BeginOffset; + DWARFAbbreviationDeclaration AbbrDecl; + uint32_t PrevAbbrCode = 0; + while (AbbrDecl.extract(Data, OffsetPtr)) { + Decls.push_back(AbbrDecl); + if (FirstAbbrCode == 0) { + FirstAbbrCode = AbbrDecl.getCode(); } else { - if (prevAbbrAode + 1 != abbrevDeclaration.getCode()) - IdxOffset = UINT32_MAX;// Out of order indexes, we can't do O(1) lookups + if (PrevAbbrCode + 1 != AbbrDecl.getCode()) { + // Codes are not consecutive, can't do O(1) lookups. + FirstAbbrCode = UINT32_MAX; + } } - prevAbbrAode = abbrevDeclaration.getCode(); + PrevAbbrCode = AbbrDecl.getCode(); } - return beginOffset != *offset_ptr; + return BeginOffset != *OffsetPtr; } void DWARFAbbreviationDeclarationSet::dump(raw_ostream &OS) const { @@ -37,67 +49,67 @@ void DWARFAbbreviationDeclarationSet::dump(raw_ostream &OS) const { Decl.dump(OS); } -const DWARFAbbreviationDeclaration* -DWARFAbbreviationDeclarationSet::getAbbreviationDeclaration(uint32_t abbrCode) - const { - if (IdxOffset == UINT32_MAX) { +const DWARFAbbreviationDeclaration * +DWARFAbbreviationDeclarationSet::getAbbreviationDeclaration( + uint32_t AbbrCode) const { + if (FirstAbbrCode == UINT32_MAX) { for (const auto &Decl : Decls) { - if (Decl.getCode() == abbrCode) + if (Decl.getCode() == AbbrCode) return &Decl; } - } else { - uint32_t idx = abbrCode - IdxOffset; - if (idx < Decls.size()) - return &Decls[idx]; + return nullptr; } - return NULL; + if (AbbrCode < FirstAbbrCode || AbbrCode >= FirstAbbrCode + Decls.size()) + return nullptr; + return &Decls[AbbrCode - FirstAbbrCode]; } -DWARFDebugAbbrev::DWARFDebugAbbrev() : - AbbrevCollMap(), - PrevAbbrOffsetPos(AbbrevCollMap.end()) {} - +DWARFDebugAbbrev::DWARFDebugAbbrev() { + clear(); +} -void DWARFDebugAbbrev::parse(DataExtractor data) { - uint32_t offset = 0; +void DWARFDebugAbbrev::clear() { + AbbrDeclSets.clear(); + PrevAbbrOffsetPos = AbbrDeclSets.end(); +} - while (data.isValidOffset(offset)) { - uint32_t initial_cu_offset = offset; - DWARFAbbreviationDeclarationSet abbrevDeclSet; +void DWARFDebugAbbrev::extract(DataExtractor Data) { + clear(); - if (abbrevDeclSet.extract(data, &offset)) - AbbrevCollMap[initial_cu_offset] = abbrevDeclSet; - else + uint32_t Offset = 0; + DWARFAbbreviationDeclarationSet AbbrDecls; + while (Data.isValidOffset(Offset)) { + uint32_t CUAbbrOffset = Offset; + if (!AbbrDecls.extract(Data, &Offset)) break; + AbbrDeclSets[CUAbbrOffset] = AbbrDecls; } - PrevAbbrOffsetPos = AbbrevCollMap.end(); } void DWARFDebugAbbrev::dump(raw_ostream &OS) const { - if (AbbrevCollMap.empty()) { + if (AbbrDeclSets.empty()) { OS << "< EMPTY >\n"; return; } - for (const auto &I : AbbrevCollMap) { + for (const auto &I : AbbrDeclSets) { OS << format("Abbrev table for offset: 0x%8.8" PRIx64 "\n", I.first); I.second.dump(OS); } } const DWARFAbbreviationDeclarationSet* -DWARFDebugAbbrev::getAbbreviationDeclarationSet(uint64_t cu_abbr_offset) const { - DWARFAbbreviationDeclarationCollMapConstIter end = AbbrevCollMap.end(); - DWARFAbbreviationDeclarationCollMapConstIter pos; - if (PrevAbbrOffsetPos != end && - PrevAbbrOffsetPos->first == cu_abbr_offset) { +DWARFDebugAbbrev::getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const { + const auto End = AbbrDeclSets.end(); + if (PrevAbbrOffsetPos != End && PrevAbbrOffsetPos->first == CUAbbrOffset) { return &(PrevAbbrOffsetPos->second); - } else { - pos = AbbrevCollMap.find(cu_abbr_offset); - PrevAbbrOffsetPos = pos; } - if (pos != AbbrevCollMap.end()) - return &(pos->second); - return NULL; + const auto Pos = AbbrDeclSets.find(CUAbbrOffset); + if (Pos != End) { + PrevAbbrOffsetPos = Pos; + return &(Pos->second); + } + + return nullptr; } diff --git a/lib/DebugInfo/DWARFDebugAbbrev.h b/lib/DebugInfo/DWARFDebugAbbrev.h index c7c0436..3a9adba 100644 --- a/lib/DebugInfo/DWARFDebugAbbrev.h +++ b/lib/DebugInfo/DWARFDebugAbbrev.h @@ -17,55 +17,45 @@ namespace llvm { -typedef std::vector - DWARFAbbreviationDeclarationColl; -typedef DWARFAbbreviationDeclarationColl::iterator - DWARFAbbreviationDeclarationCollIter; -typedef DWARFAbbreviationDeclarationColl::const_iterator - DWARFAbbreviationDeclarationCollConstIter; - class DWARFAbbreviationDeclarationSet { uint32_t Offset; - uint32_t IdxOffset; + /// Code of the first abbreviation, if all abbreviations in the set have + /// consecutive codes. UINT32_MAX otherwise. + uint32_t FirstAbbrCode; std::vector Decls; - public: - DWARFAbbreviationDeclarationSet() - : Offset(0), IdxOffset(0) {} - DWARFAbbreviationDeclarationSet(uint32_t offset, uint32_t idxOffset) - : Offset(offset), IdxOffset(idxOffset) {} +public: + DWARFAbbreviationDeclarationSet(); - void clear() { - IdxOffset = 0; - Decls.clear(); - } uint32_t getOffset() const { return Offset; } void dump(raw_ostream &OS) const; - bool extract(DataExtractor data, uint32_t* offset_ptr); + bool extract(DataExtractor Data, uint32_t *OffsetPtr); const DWARFAbbreviationDeclaration * - getAbbreviationDeclaration(uint32_t abbrCode) const; + getAbbreviationDeclaration(uint32_t AbbrCode) const; + +private: + void clear(); }; class DWARFDebugAbbrev { -public: typedef std::map - DWARFAbbreviationDeclarationCollMap; - typedef DWARFAbbreviationDeclarationCollMap::iterator - DWARFAbbreviationDeclarationCollMapIter; - typedef DWARFAbbreviationDeclarationCollMap::const_iterator - DWARFAbbreviationDeclarationCollMapConstIter; + DWARFAbbreviationDeclarationSetMap; -private: - DWARFAbbreviationDeclarationCollMap AbbrevCollMap; - mutable DWARFAbbreviationDeclarationCollMapConstIter PrevAbbrOffsetPos; + DWARFAbbreviationDeclarationSetMap AbbrDeclSets; + mutable DWARFAbbreviationDeclarationSetMap::const_iterator PrevAbbrOffsetPos; public: DWARFDebugAbbrev(); + const DWARFAbbreviationDeclarationSet * - getAbbreviationDeclarationSet(uint64_t cu_abbr_offset) const; + getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const; + void dump(raw_ostream &OS) const; - void parse(DataExtractor data); + void extract(DataExtractor Data); + +private: + void clear(); }; } diff --git a/lib/DebugInfo/DWARFDebugArangeSet.h b/lib/DebugInfo/DWARFDebugArangeSet.h index c18b3c5..d6c2d8b 100644 --- a/lib/DebugInfo/DWARFDebugArangeSet.h +++ b/lib/DebugInfo/DWARFDebugArangeSet.h @@ -63,7 +63,6 @@ public: return desc_iterator_range(ArangeDescriptors.begin(), ArangeDescriptors.end()); } - uint32_t getNumDescriptors() const { return ArangeDescriptors.size(); } }; } diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp index dfab788..2524adc 100644 --- a/lib/DebugInfo/DWARFDebugAranges.cpp +++ b/lib/DebugInfo/DWARFDebugAranges.cpp @@ -10,6 +10,7 @@ #include "DWARFDebugAranges.h" #include "DWARFCompileUnit.h" #include "DWARFContext.h" +#include "DWARFDebugArangeSet.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include @@ -20,23 +21,11 @@ void DWARFDebugAranges::extract(DataExtractor DebugArangesData) { if (!DebugArangesData.isValidOffset(0)) return; uint32_t Offset = 0; - typedef std::vector RangeSetColl; - RangeSetColl Sets; DWARFDebugArangeSet Set; - uint32_t TotalRanges = 0; while (Set.extract(DebugArangesData, &Offset)) { - Sets.push_back(Set); - TotalRanges += Set.getNumDescriptors(); - } - if (TotalRanges == 0) - return; - - Aranges.reserve(TotalRanges); - for (const auto &I : Sets) { - uint32_t CUOffset = I.getCompileUnitDIEOffset(); - - for (const auto &Desc : I.descriptors()) { + uint32_t CUOffset = Set.getCompileUnitDIEOffset(); + for (const auto &Desc : Set.descriptors()) { uint64_t LowPC = Desc.Address; uint64_t HighPC = Desc.getEndAddress(); appendRange(CUOffset, LowPC, HighPC); @@ -58,13 +47,23 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) { // manually build aranges for the rest of them. for (const auto &CU : CTX->compile_units()) { uint32_t CUOffset = CU->getOffset(); - if (ParsedCUOffsets.insert(CUOffset).second) - CU->buildAddressRangeTable(this, true, CUOffset); + if (ParsedCUOffsets.insert(CUOffset).second) { + DWARFAddressRangesVector CURanges; + CU->collectAddressRanges(CURanges); + for (const auto &R : CURanges) { + appendRange(CUOffset, R.first, R.second); + } + } } sortAndMinimize(); } +void DWARFDebugAranges::clear() { + Aranges.clear(); + ParsedCUOffsets.clear(); +} + void DWARFDebugAranges::appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC) { if (!Aranges.empty()) { @@ -101,11 +100,6 @@ void DWARFDebugAranges::sortAndMinimize() { ++minimal_size; } - // If the sizes are the same, then no consecutive aranges can be - // combined, we are done. - if (minimal_size == orig_arange_size) - return; - // Else, make a new RangeColl that _only_ contains what we need. RangeColl minimal_aranges; minimal_aranges.resize(minimal_size); diff --git a/lib/DebugInfo/DWARFDebugAranges.h b/lib/DebugInfo/DWARFDebugAranges.h index 35ad8e5..de96d7f 100644 --- a/lib/DebugInfo/DWARFDebugAranges.h +++ b/lib/DebugInfo/DWARFDebugAranges.h @@ -10,9 +10,9 @@ #ifndef LLVM_DEBUGINFO_DWARFDEBUGARANGES_H #define LLVM_DEBUGINFO_DWARFDEBUGARANGES_H -#include "DWARFDebugArangeSet.h" #include "llvm/ADT/DenseSet.h" -#include +#include "llvm/Support/DataExtractor.h" +#include namespace llvm { @@ -20,20 +20,15 @@ class DWARFContext; class DWARFDebugAranges { public: - void clear() { - Aranges.clear(); - ParsedCUOffsets.clear(); - } - void generate(DWARFContext *CTX); - - // Use appendRange multiple times and then call sortAndMinimize. - void appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC); - uint32_t findAddress(uint64_t Address) const; private: + void clear(); void extract(DataExtractor DebugArangesData); + + // Use appendRange multiple times and then call sortAndMinimize. + void appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC); void sortAndMinimize(); struct Range { diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp index 5bf7b07..a33548e 100644 --- a/lib/DebugInfo/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARFDebugFrame.cpp @@ -26,8 +26,8 @@ using namespace dwarf; class llvm::FrameEntry { public: enum FrameKind {FK_CIE, FK_FDE}; - FrameEntry(FrameKind K, DataExtractor D, uint64_t Offset, uint64_t Length) - : Kind(K), Data(D), Offset(Offset), Length(Length) {} + FrameEntry(FrameKind K, uint64_t Offset, uint64_t Length) + : Kind(K), Offset(Offset), Length(Length) {} virtual ~FrameEntry() { } @@ -35,11 +35,12 @@ public: FrameKind getKind() const { return Kind; } virtual uint64_t getOffset() const { return Offset; } - /// \brief Parse and store a sequence of CFI instructions from our data - /// stream, starting at *Offset and ending at EndOffset. If everything + /// \brief Parse and store a sequence of CFI instructions from Data, + /// starting at *Offset and ending at EndOffset. If everything /// goes well, *Offset should be equal to EndOffset when this method /// returns. Otherwise, an error occurred. - virtual void parseInstructions(uint32_t *Offset, uint32_t EndOffset); + virtual void parseInstructions(DataExtractor Data, uint32_t *Offset, + uint32_t EndOffset); /// \brief Dump the entry header to the given output stream. virtual void dumpHeader(raw_ostream &OS) const = 0; @@ -50,10 +51,6 @@ public: protected: const FrameKind Kind; - /// \brief The data stream holding the section from which the entry was - /// parsed. - DataExtractor Data; - /// \brief Offset of this entry in the section. uint64_t Offset; @@ -97,8 +94,8 @@ protected: const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f; - -void FrameEntry::parseInstructions(uint32_t *Offset, uint32_t EndOffset) { +void FrameEntry::parseInstructions(DataExtractor Data, uint32_t *Offset, + uint32_t EndOffset) { while (*Offset < EndOffset) { uint8_t Opcode = Data.getU8(Offset); // Some instructions have a primary opcode encoded in the top bits. @@ -201,13 +198,13 @@ class CIE : public FrameEntry { public: // CIEs (and FDEs) are simply container classes, so the only sensible way to // create them is by providing the full parsed contents in the constructor. - CIE(DataExtractor D, uint64_t Offset, uint64_t Length, uint8_t Version, + CIE(uint64_t Offset, uint64_t Length, uint8_t Version, SmallString<8> Augmentation, uint64_t CodeAlignmentFactor, int64_t DataAlignmentFactor, uint64_t ReturnAddressRegister) - : FrameEntry(FK_CIE, D, Offset, Length), Version(Version), - Augmentation(Augmentation), CodeAlignmentFactor(CodeAlignmentFactor), - DataAlignmentFactor(DataAlignmentFactor), - ReturnAddressRegister(ReturnAddressRegister) {} + : FrameEntry(FK_CIE, Offset, Length), Version(Version), + Augmentation(Augmentation), CodeAlignmentFactor(CodeAlignmentFactor), + DataAlignmentFactor(DataAlignmentFactor), + ReturnAddressRegister(ReturnAddressRegister) {} ~CIE() { } @@ -229,7 +226,7 @@ public: static bool classof(const FrameEntry *FE) { return FE->getKind() == FK_CIE; - } + } private: /// The following fields are defined in section 6.4.1 of the DWARF standard v3 @@ -247,11 +244,11 @@ public: // Each FDE has a CIE it's "linked to". Our FDE contains is constructed with // an offset to the CIE (provided by parsing the FDE header). The CIE itself // is obtained lazily once it's actually required. - FDE(DataExtractor D, uint64_t Offset, uint64_t Length, - int64_t LinkedCIEOffset, uint64_t InitialLocation, uint64_t AddressRange) - : FrameEntry(FK_FDE, D, Offset, Length), LinkedCIEOffset(LinkedCIEOffset), - InitialLocation(InitialLocation), AddressRange(AddressRange), - LinkedCIE(NULL) {} + FDE(uint64_t Offset, uint64_t Length, int64_t LinkedCIEOffset, + uint64_t InitialLocation, uint64_t AddressRange) + : FrameEntry(FK_FDE, Offset, Length), LinkedCIEOffset(LinkedCIEOffset), + InitialLocation(InitialLocation), AddressRange(AddressRange), + LinkedCIE(nullptr) {} ~FDE() { } @@ -270,9 +267,9 @@ public: static bool classof(const FrameEntry *FE) { return FE->getKind() == FK_FDE; - } -private: + } +private: /// The following fields are defined in section 6.4.1 of the DWARF standard v3 uint64_t LinkedCIEOffset; uint64_t InitialLocation; @@ -285,14 +282,9 @@ private: DWARFDebugFrame::DWARFDebugFrame() { } - DWARFDebugFrame::~DWARFDebugFrame() { - for (const auto &Entry : Entries) { - delete Entry; - } } - static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data, uint32_t Offset, int Length) { errs() << "DUMP: "; @@ -334,7 +326,6 @@ void DWARFDebugFrame::parse(DataExtractor Data) { Id = Data.getUnsigned(&Offset, IsDWARF64 ? 8 : 4); bool IsCIE = ((IsDWARF64 && Id == DW64_CIE_ID) || Id == DW_CIE_ID); - FrameEntry *Entry = 0; if (IsCIE) { // Note: this is specifically DWARFv3 CIE header structure. It was // changed in DWARFv4. We currently don't support reading DWARFv4 @@ -346,30 +337,25 @@ void DWARFDebugFrame::parse(DataExtractor Data) { int64_t DataAlignmentFactor = Data.getSLEB128(&Offset); uint64_t ReturnAddressRegister = Data.getULEB128(&Offset); - Entry = new CIE(Data, StartOffset, Length, Version, - StringRef(Augmentation), CodeAlignmentFactor, - DataAlignmentFactor, ReturnAddressRegister); + Entries.emplace_back(new CIE(StartOffset, Length, Version, + StringRef(Augmentation), CodeAlignmentFactor, + DataAlignmentFactor, ReturnAddressRegister)); } else { // FDE uint64_t CIEPointer = Id; uint64_t InitialLocation = Data.getAddress(&Offset); uint64_t AddressRange = Data.getAddress(&Offset); - Entry = new FDE(Data, StartOffset, Length, CIEPointer, - InitialLocation, AddressRange); + Entries.emplace_back(new FDE(StartOffset, Length, CIEPointer, + InitialLocation, AddressRange)); } - assert(Entry && "Expected Entry to be populated with CIE or FDE"); - Entry->parseInstructions(&Offset, EndStructureOffset); + Entries.back()->parseInstructions(Data, &Offset, EndStructureOffset); - if (Offset == EndStructureOffset) { - // Entry instrucitons parsed successfully. - Entries.push_back(Entry); - } else { + if (Offset != EndStructureOffset) { std::string Str; raw_string_ostream OS(Str); - OS << format("Parsing entry instructions at %lx failed", - Entry->getOffset()); + OS << format("Parsing entry instructions at %lx failed", StartOffset); report_fatal_error(Str); } } diff --git a/lib/DebugInfo/DWARFDebugFrame.h b/lib/DebugInfo/DWARFDebugFrame.h index 7683849..bd4ef45 100644 --- a/lib/DebugInfo/DWARFDebugFrame.h +++ b/lib/DebugInfo/DWARFDebugFrame.h @@ -12,14 +12,13 @@ #include "llvm/Support/DataExtractor.h" #include "llvm/Support/raw_ostream.h" +#include #include - namespace llvm { class FrameEntry; - /// \brief A parsed .debug_frame section /// class DWARFDebugFrame { @@ -35,8 +34,7 @@ public: void parse(DataExtractor Data); private: - typedef std::vector EntryVector; - EntryVector Entries; + std::vector> Entries; }; diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp index bde25ec..b811ed7 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp @@ -18,6 +18,7 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace dwarf; +typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind; void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, const DWARFUnit *u, unsigned recurseDepth, @@ -99,11 +100,11 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit *U, uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr); if (0 == AbbrCode) { // NULL debug tag entry. - AbbrevDecl = NULL; + AbbrevDecl = nullptr; return true; } AbbrevDecl = U->getAbbreviations()->getAbbreviationDeclaration(AbbrCode); - if (0 == AbbrevDecl) { + if (nullptr == AbbrevDecl) { // Restore the original offset. *OffsetPtr = Offset; return false; @@ -226,54 +227,66 @@ bool DWARFDebugInfoEntryMinimal::getLowAndHighPC(const DWARFUnit *U, return (HighPC != -1ULL); } -void DWARFDebugInfoEntryMinimal::buildAddressRangeTable( - const DWARFUnit *U, DWARFDebugAranges *DebugAranges, - uint32_t UOffsetInAranges) const { - if (AbbrevDecl) { - if (isSubprogramDIE()) { - uint64_t LowPC, HighPC; - if (getLowAndHighPC(U, LowPC, HighPC)) - DebugAranges->appendRange(UOffsetInAranges, LowPC, HighPC); - // FIXME: try to append ranges from .debug_ranges section. - } - - const DWARFDebugInfoEntryMinimal *Child = getFirstChild(); - while (Child) { - Child->buildAddressRangeTable(U, DebugAranges, UOffsetInAranges); - Child = Child->getSibling(); - } - } -} - -bool DWARFDebugInfoEntryMinimal::addressRangeContainsAddress( - const DWARFUnit *U, const uint64_t Address) const { +DWARFAddressRangesVector +DWARFDebugInfoEntryMinimal::getAddressRanges(const DWARFUnit *U) const { if (isNULL()) - return false; + return DWARFAddressRangesVector(); + // Single range specified by low/high PC. uint64_t LowPC, HighPC; - if (getLowAndHighPC(U, LowPC, HighPC)) - return (LowPC <= Address && Address <= HighPC); - // Try to get address ranges from .debug_ranges section. + if (getLowAndHighPC(U, LowPC, HighPC)) { + return DWARFAddressRangesVector(1, std::make_pair(LowPC, HighPC)); + } + // Multiple ranges from .debug_ranges section. uint32_t RangesOffset = getAttributeValueAsSectionOffset(U, DW_AT_ranges, -1U); if (RangesOffset != -1U) { DWARFDebugRangeList RangeList; if (U->extractRangeList(RangesOffset, RangeList)) - return RangeList.containsAddress(U->getBaseAddress(), Address); + return RangeList.getAbsoluteRanges(U->getBaseAddress()); + } + return DWARFAddressRangesVector(); +} + +void DWARFDebugInfoEntryMinimal::collectChildrenAddressRanges( + const DWARFUnit *U, DWARFAddressRangesVector& Ranges) const { + if (isNULL()) + return; + if (isSubprogramDIE()) { + const auto &DIERanges = getAddressRanges(U); + Ranges.insert(Ranges.end(), DIERanges.begin(), DIERanges.end()); + } + + const DWARFDebugInfoEntryMinimal *Child = getFirstChild(); + while (Child) { + Child->collectChildrenAddressRanges(U, Ranges); + Child = Child->getSibling(); + } +} + +bool DWARFDebugInfoEntryMinimal::addressRangeContainsAddress( + const DWARFUnit *U, const uint64_t Address) const { + for (const auto& R : getAddressRanges(U)) { + if (R.first <= Address && Address < R.second) + return true; } return false; } const char * -DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFUnit *U) const { - if (!isSubroutineDIE()) - return 0; - // Try to get mangled name if possible. - if (const char *name = - getAttributeValueAsString(U, DW_AT_MIPS_linkage_name, 0)) - return name; - if (const char *name = getAttributeValueAsString(U, DW_AT_linkage_name, 0)) - return name; - if (const char *name = getAttributeValueAsString(U, DW_AT_name, 0)) +DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFUnit *U, + FunctionNameKind Kind) const { + if (!isSubroutineDIE() || Kind == FunctionNameKind::None) + return nullptr; + // Try to get mangled name only if it was asked for. + if (Kind == FunctionNameKind::LinkageName) { + if (const char *name = + getAttributeValueAsString(U, DW_AT_MIPS_linkage_name, nullptr)) + return name; + if (const char *name = + getAttributeValueAsString(U, DW_AT_linkage_name, nullptr)) + return name; + } + if (const char *name = getAttributeValueAsString(U, DW_AT_name, nullptr)) return name; // Try to get name from specification DIE. uint32_t spec_ref = @@ -281,7 +294,7 @@ DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFUnit *U) const { if (spec_ref != -1U) { DWARFDebugInfoEntryMinimal spec_die; if (spec_die.extractFast(U, &spec_ref)) { - if (const char *name = spec_die.getSubroutineName(U)) + if (const char *name = spec_die.getSubroutineName(U, Kind)) return name; } } @@ -291,11 +304,11 @@ DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFUnit *U) const { if (abs_origin_ref != -1U) { DWARFDebugInfoEntryMinimal abs_origin_die; if (abs_origin_die.extractFast(U, &abs_origin_ref)) { - if (const char *name = abs_origin_die.getSubroutineName(U)) + if (const char *name = abs_origin_die.getSubroutineName(U, Kind)) return name; } } - return 0; + return nullptr; } void DWARFDebugInfoEntryMinimal::getCallerFrame(const DWARFUnit *U, diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h index f30e531..916e1ed 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.h +++ b/lib/DebugInfo/DWARFDebugInfoEntry.h @@ -11,7 +11,9 @@ #define LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H #include "DWARFAbbreviationDeclaration.h" +#include "DWARFDebugRangeList.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/DIContext.h" #include "llvm/Support/DataTypes.h" namespace llvm { @@ -28,17 +30,13 @@ class DWARFDebugInfoEntryMinimal { /// Offset within the .debug_info of the start of this entry. uint32_t Offset; - /// How many to subtract from "this" to get the parent. - /// If zero this die has no parent. - uint32_t ParentIdx; - /// How many to add to "this" to get the sibling. uint32_t SiblingIdx; const DWARFAbbreviationDeclaration *AbbrevDecl; public: DWARFDebugInfoEntryMinimal() - : Offset(0), ParentIdx(0), SiblingIdx(0), AbbrevDecl(0) {} + : Offset(0), SiblingIdx(0), AbbrevDecl(nullptr) {} void dump(raw_ostream &OS, const DWARFUnit *u, unsigned recurseDepth, unsigned indent = 0) const; @@ -51,7 +49,7 @@ public: bool extractFast(const DWARFUnit *U, uint32_t *OffsetPtr); uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; } - bool isNULL() const { return AbbrevDecl == 0; } + bool isNULL() const { return AbbrevDecl == nullptr; } /// Returns true if DIE represents a subprogram (not inlined). bool isSubprogramDIE() const; @@ -63,45 +61,23 @@ public: bool hasChildren() const { return !isNULL() && AbbrevDecl->hasChildren(); } // We know we are kept in a vector of contiguous entries, so we know - // our parent will be some index behind "this". - DWARFDebugInfoEntryMinimal *getParent() { - return ParentIdx > 0 ? this - ParentIdx : 0; - } - const DWARFDebugInfoEntryMinimal *getParent() const { - return ParentIdx > 0 ? this - ParentIdx : 0; - } - // We know we are kept in a vector of contiguous entries, so we know // our sibling will be some index after "this". - DWARFDebugInfoEntryMinimal *getSibling() { - return SiblingIdx > 0 ? this + SiblingIdx : 0; - } const DWARFDebugInfoEntryMinimal *getSibling() const { - return SiblingIdx > 0 ? this + SiblingIdx : 0; + return SiblingIdx > 0 ? this + SiblingIdx : nullptr; } + // We know we are kept in a vector of contiguous entries, so we know // we don't need to store our child pointer, if we have a child it will // be the next entry in the list... - DWARFDebugInfoEntryMinimal *getFirstChild() { - return hasChildren() ? this + 1 : 0; - } const DWARFDebugInfoEntryMinimal *getFirstChild() const { - return hasChildren() ? this + 1 : 0; + return hasChildren() ? this + 1 : nullptr; } - void setParent(DWARFDebugInfoEntryMinimal *parent) { - if (parent) { - // We know we are kept in a vector of contiguous entries, so we know - // our parent will be some index behind "this". - ParentIdx = this - parent; - } else - ParentIdx = 0; - } - void setSibling(DWARFDebugInfoEntryMinimal *sibling) { - if (sibling) { + void setSibling(const DWARFDebugInfoEntryMinimal *Sibling) { + if (Sibling) { // We know we are kept in a vector of contiguous entries, so we know // our sibling will be some index after "this". - SiblingIdx = sibling - this; - sibling->setParent(getParent()); + SiblingIdx = Sibling - this; } else SiblingIdx = 0; } @@ -135,9 +111,10 @@ public: bool getLowAndHighPC(const DWARFUnit *U, uint64_t &LowPC, uint64_t &HighPC) const; - void buildAddressRangeTable(const DWARFUnit *U, - DWARFDebugAranges *DebugAranges, - uint32_t CUOffsetInAranges) const; + DWARFAddressRangesVector getAddressRanges(const DWARFUnit *U) const; + + void collectChildrenAddressRanges(const DWARFUnit *U, + DWARFAddressRangesVector &Ranges) const; bool addressRangeContainsAddress(const DWARFUnit *U, const uint64_t Address) const; @@ -146,7 +123,9 @@ public: /// returns its mangled name (or short name, if mangled is missing). /// This name may be fetched from specification or abstract origin /// for this subprogram. Returns null if no name is found. - const char *getSubroutineName(const DWARFUnit *U) const; + const char * + getSubroutineName(const DWARFUnit *U, + DILineInfoSpecifier::FunctionNameKind Kind) const; /// Retrieves values of DW_AT_call_file, DW_AT_call_line and /// DW_AT_call_column from DIE (or zeroes if they are missing). @@ -166,7 +145,7 @@ public: /// (except the last DIE) in this chain is contained in address /// range for next DIE in the chain. struct DWARFDebugInfoEntryInlinedChain { - DWARFDebugInfoEntryInlinedChain() : U(0) {} + DWARFDebugInfoEntryInlinedChain() : U(nullptr) {} SmallVector DIEs; const DWARFUnit *U; }; diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp index 43d9764..ce87635 100644 --- a/lib/DebugInfo/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARFDebugLine.cpp @@ -15,6 +15,20 @@ #include using namespace llvm; using namespace dwarf; +typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; + +DWARFDebugLine::Prologue::Prologue() { + clear(); +} + +void DWARFDebugLine::Prologue::clear() { + TotalLength = Version = PrologueLength = 0; + MinInstLength = MaxOpsPerInst = DefaultIsStmt = LineBase = LineRange = 0; + OpcodeBase = 0; + StandardOpcodeLengths.clear(); + IncludeDirectories.clear(); + FileNames.clear(); +} void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { OS << "Line table prologue:\n" @@ -51,6 +65,67 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { } } +bool DWARFDebugLine::Prologue::parse(DataExtractor debug_line_data, + uint32_t *offset_ptr) { + const uint32_t prologue_offset = *offset_ptr; + + clear(); + TotalLength = debug_line_data.getU32(offset_ptr); + Version = debug_line_data.getU16(offset_ptr); + if (Version < 2) + return false; + + PrologueLength = debug_line_data.getU32(offset_ptr); + const uint32_t end_prologue_offset = PrologueLength + *offset_ptr; + MinInstLength = debug_line_data.getU8(offset_ptr); + if (Version >= 4) + MaxOpsPerInst = debug_line_data.getU8(offset_ptr); + DefaultIsStmt = debug_line_data.getU8(offset_ptr); + LineBase = debug_line_data.getU8(offset_ptr); + LineRange = debug_line_data.getU8(offset_ptr); + OpcodeBase = debug_line_data.getU8(offset_ptr); + + StandardOpcodeLengths.reserve(OpcodeBase - 1); + for (uint32_t i = 1; i < OpcodeBase; ++i) { + uint8_t op_len = debug_line_data.getU8(offset_ptr); + StandardOpcodeLengths.push_back(op_len); + } + + while (*offset_ptr < end_prologue_offset) { + const char *s = debug_line_data.getCStr(offset_ptr); + if (s && s[0]) + IncludeDirectories.push_back(s); + else + break; + } + + while (*offset_ptr < end_prologue_offset) { + const char *name = debug_line_data.getCStr(offset_ptr); + if (name && name[0]) { + FileNameEntry fileEntry; + fileEntry.Name = name; + fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr); + fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr); + fileEntry.Length = debug_line_data.getULEB128(offset_ptr); + FileNames.push_back(fileEntry); + } else { + break; + } + } + + if (*offset_ptr != end_prologue_offset) { + fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should" + " have ended at 0x%8.8x but it ended at 0x%8.8x\n", + prologue_offset, end_prologue_offset, *offset_ptr); + return false; + } + return true; +} + +DWARFDebugLine::Row::Row(bool default_is_stmt) { + reset(default_is_stmt); +} + void DWARFDebugLine::Row::postAppend() { BasicBlock = false; PrologueEnd = false; @@ -82,6 +157,22 @@ void DWARFDebugLine::Row::dump(raw_ostream &OS) const { << '\n'; } +DWARFDebugLine::Sequence::Sequence() { + reset(); +} + +void DWARFDebugLine::Sequence::reset() { + LowPC = 0; + HighPC = 0; + FirstRowIndex = 0; + LastRowIndex = 0; + Empty = true; +} + +DWARFDebugLine::LineTable::LineTable() { + clear(); +} + void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const { Prologue.dump(OS); OS << '\n'; @@ -96,50 +187,40 @@ void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const { } } -DWARFDebugLine::State::~State() {} - -void DWARFDebugLine::State::appendRowToMatrix(uint32_t offset) { - if (Sequence::Empty) { - // Record the beginning of instruction sequence. - Sequence::Empty = false; - Sequence::LowPC = Address; - Sequence::FirstRowIndex = row; - } - ++row; // Increase the row number. - LineTable::appendRow(*this); - if (EndSequence) { - // Record the end of instruction sequence. - Sequence::HighPC = Address; - Sequence::LastRowIndex = row; - if (Sequence::isValid()) - LineTable::appendSequence(*this); - Sequence::reset(); - } - Row::postAppend(); +void DWARFDebugLine::LineTable::clear() { + Prologue.clear(); + Rows.clear(); + Sequences.clear(); } -void DWARFDebugLine::State::finalize() { - row = DoneParsingLineTable; - if (!Sequence::Empty) { - fprintf(stderr, "warning: last sequence in debug line table is not" - "terminated!\n"); - } - // Sort all sequences so that address lookup will work faster. - if (!Sequences.empty()) { - std::sort(Sequences.begin(), Sequences.end(), Sequence::orderByLowPC); - // Note: actually, instruction address ranges of sequences should not - // overlap (in shared objects and executables). If they do, the address - // lookup would still work, though, but result would be ambiguous. - // We don't report warning in this case. For example, - // sometimes .so compiled from multiple object files contains a few - // rudimentary sequences for address ranges [0x0, 0xsomething). - } +DWARFDebugLine::ParsingState::ParsingState(struct LineTable *LT) + : LineTable(LT), RowNumber(0) { + resetRowAndSequence(); } -DWARFDebugLine::DumpingState::~DumpingState() {} +void DWARFDebugLine::ParsingState::resetRowAndSequence() { + Row.reset(LineTable->Prologue.DefaultIsStmt); + Sequence.reset(); +} -void DWARFDebugLine::DumpingState::finalize() { - LineTable::dump(OS); +void DWARFDebugLine::ParsingState::appendRowToMatrix(uint32_t offset) { + if (Sequence.Empty) { + // Record the beginning of instruction sequence. + Sequence.Empty = false; + Sequence.LowPC = Row.Address; + Sequence.FirstRowIndex = RowNumber; + } + ++RowNumber; + LineTable->appendRow(Row); + if (Row.EndSequence) { + // Record the end of instruction sequence. + Sequence.HighPC = Row.Address; + Sequence.LastRowIndex = RowNumber; + if (Sequence.isValid()) + LineTable->appendSequence(Sequence); + Sequence.reset(); + } + Row.postAppend(); } const DWARFDebugLine::LineTable * @@ -147,7 +228,7 @@ DWARFDebugLine::getLineTable(uint32_t offset) const { LineTableConstIter pos = LineTableMap.find(offset); if (pos != LineTableMap.end()) return &pos->second; - return 0; + return nullptr; } const DWARFDebugLine::LineTable * @@ -155,91 +236,31 @@ DWARFDebugLine::getOrParseLineTable(DataExtractor debug_line_data, uint32_t offset) { std::pair pos = LineTableMap.insert(LineTableMapTy::value_type(offset, LineTable())); + LineTable *LT = &pos.first->second; if (pos.second) { - // Parse and cache the line table for at this offset. - State state; - if (!parseStatementTable(debug_line_data, RelocMap, &offset, state)) - return 0; - pos.first->second = state; + if (!LT->parse(debug_line_data, RelocMap, &offset)) + return nullptr; } - return &pos.first->second; + return LT; } -bool -DWARFDebugLine::parsePrologue(DataExtractor debug_line_data, - uint32_t *offset_ptr, Prologue *prologue) { - const uint32_t prologue_offset = *offset_ptr; - - prologue->clear(); - prologue->TotalLength = debug_line_data.getU32(offset_ptr); - prologue->Version = debug_line_data.getU16(offset_ptr); - if (prologue->Version < 2) - return false; - - prologue->PrologueLength = debug_line_data.getU32(offset_ptr); - const uint32_t end_prologue_offset = prologue->PrologueLength + *offset_ptr; - prologue->MinInstLength = debug_line_data.getU8(offset_ptr); - if (prologue->Version >= 4) - prologue->MaxOpsPerInst = debug_line_data.getU8(offset_ptr); - prologue->DefaultIsStmt = debug_line_data.getU8(offset_ptr); - prologue->LineBase = debug_line_data.getU8(offset_ptr); - prologue->LineRange = debug_line_data.getU8(offset_ptr); - prologue->OpcodeBase = debug_line_data.getU8(offset_ptr); - - prologue->StandardOpcodeLengths.reserve(prologue->OpcodeBase-1); - for (uint32_t i = 1; i < prologue->OpcodeBase; ++i) { - uint8_t op_len = debug_line_data.getU8(offset_ptr); - prologue->StandardOpcodeLengths.push_back(op_len); - } - - while (*offset_ptr < end_prologue_offset) { - const char *s = debug_line_data.getCStr(offset_ptr); - if (s && s[0]) - prologue->IncludeDirectories.push_back(s); - else - break; - } - - while (*offset_ptr < end_prologue_offset) { - const char *name = debug_line_data.getCStr(offset_ptr); - if (name && name[0]) { - FileNameEntry fileEntry; - fileEntry.Name = name; - fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr); - fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr); - fileEntry.Length = debug_line_data.getULEB128(offset_ptr); - prologue->FileNames.push_back(fileEntry); - } else { - break; - } - } - - if (*offset_ptr != end_prologue_offset) { - fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should" - " have ended at 0x%8.8x but it ended at 0x%8.8x\n", - prologue_offset, end_prologue_offset, *offset_ptr); - return false; - } - return true; -} - -bool DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, - const RelocAddrMap *RMap, - uint32_t *offset_ptr, State &state) { +bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data, + const RelocAddrMap *RMap, + uint32_t *offset_ptr) { const uint32_t debug_line_offset = *offset_ptr; - Prologue *prologue = &state.Prologue; + clear(); - if (!parsePrologue(debug_line_data, offset_ptr, prologue)) { + if (!Prologue.parse(debug_line_data, offset_ptr)) { // Restore our offset and return false to indicate failure! *offset_ptr = debug_line_offset; return false; } - const uint32_t end_offset = debug_line_offset + prologue->TotalLength + - sizeof(prologue->TotalLength); + const uint32_t end_offset = debug_line_offset + Prologue.TotalLength + + sizeof(Prologue.TotalLength); - state.reset(); + ParsingState State(this); while (*offset_ptr < end_offset) { uint8_t opcode = debug_line_data.getU8(offset_ptr); @@ -261,9 +282,9 @@ bool DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, // with a DW_LNE_end_sequence instruction which creates a row whose // address is that of the byte after the last target machine instruction // of the sequence. - state.EndSequence = true; - state.appendRowToMatrix(*offset_ptr); - state.reset(); + State.Row.EndSequence = true; + State.appendRowToMatrix(*offset_ptr); + State.resetRowAndSequence(); break; case DW_LNE_set_address: @@ -278,9 +299,10 @@ bool DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, RelocAddrMap::const_iterator AI = RMap->find(*offset_ptr); if (AI != RMap->end()) { const std::pair &R = AI->second; - state.Address = debug_line_data.getAddress(offset_ptr) + R.second; + State.Row.Address = + debug_line_data.getAddress(offset_ptr) + R.second; } else - state.Address = debug_line_data.getAddress(offset_ptr); + State.Row.Address = debug_line_data.getAddress(offset_ptr); } break; @@ -311,12 +333,12 @@ bool DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr); fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr); fileEntry.Length = debug_line_data.getULEB128(offset_ptr); - prologue->FileNames.push_back(fileEntry); + Prologue.FileNames.push_back(fileEntry); } break; case DW_LNE_set_discriminator: - state.Discriminator = debug_line_data.getULEB128(offset_ptr); + State.Row.Discriminator = debug_line_data.getULEB128(offset_ptr); break; default: @@ -325,52 +347,52 @@ bool DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, (*offset_ptr) += arg_size; break; } - } else if (opcode < prologue->OpcodeBase) { + } else if (opcode < Prologue.OpcodeBase) { switch (opcode) { // Standard Opcodes case DW_LNS_copy: // Takes no arguments. Append a row to the matrix using the // current values of the state-machine registers. Then set // the basic_block register to false. - state.appendRowToMatrix(*offset_ptr); + State.appendRowToMatrix(*offset_ptr); break; case DW_LNS_advance_pc: // Takes a single unsigned LEB128 operand, multiplies it by the // min_inst_length field of the prologue, and adds the // result to the address register of the state machine. - state.Address += debug_line_data.getULEB128(offset_ptr) * - prologue->MinInstLength; + State.Row.Address += + debug_line_data.getULEB128(offset_ptr) * Prologue.MinInstLength; break; case DW_LNS_advance_line: // Takes a single signed LEB128 operand and adds that value to // the line register of the state machine. - state.Line += debug_line_data.getSLEB128(offset_ptr); + State.Row.Line += debug_line_data.getSLEB128(offset_ptr); break; case DW_LNS_set_file: // Takes a single unsigned LEB128 operand and stores it in the file // register of the state machine. - state.File = debug_line_data.getULEB128(offset_ptr); + State.Row.File = debug_line_data.getULEB128(offset_ptr); break; case DW_LNS_set_column: // Takes a single unsigned LEB128 operand and stores it in the // column register of the state machine. - state.Column = debug_line_data.getULEB128(offset_ptr); + State.Row.Column = debug_line_data.getULEB128(offset_ptr); break; case DW_LNS_negate_stmt: // Takes no arguments. Set the is_stmt register of the state // machine to the logical negation of its current value. - state.IsStmt = !state.IsStmt; + State.Row.IsStmt = !State.Row.IsStmt; break; case DW_LNS_set_basic_block: // Takes no arguments. Set the basic_block register of the // state machine to true - state.BasicBlock = true; + State.Row.BasicBlock = true; break; case DW_LNS_const_add_pc: @@ -386,10 +408,10 @@ bool DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, // than twice that range will it need to use both DW_LNS_advance_pc // and a special opcode, requiring three or more bytes. { - uint8_t adjust_opcode = 255 - prologue->OpcodeBase; - uint64_t addr_offset = (adjust_opcode / prologue->LineRange) * - prologue->MinInstLength; - state.Address += addr_offset; + uint8_t adjust_opcode = 255 - Prologue.OpcodeBase; + uint64_t addr_offset = + (adjust_opcode / Prologue.LineRange) * Prologue.MinInstLength; + State.Row.Address += addr_offset; } break; @@ -403,25 +425,25 @@ bool DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, // judge when the computation of a special opcode overflows and // requires the use of DW_LNS_advance_pc. Such assemblers, however, // can use DW_LNS_fixed_advance_pc instead, sacrificing compression. - state.Address += debug_line_data.getU16(offset_ptr); + State.Row.Address += debug_line_data.getU16(offset_ptr); break; case DW_LNS_set_prologue_end: // Takes no arguments. Set the prologue_end register of the // state machine to true - state.PrologueEnd = true; + State.Row.PrologueEnd = true; break; case DW_LNS_set_epilogue_begin: // Takes no arguments. Set the basic_block register of the // state machine to true - state.EpilogueBegin = true; + State.Row.EpilogueBegin = true; break; case DW_LNS_set_isa: // Takes a single unsigned LEB128 operand and stores it in the // column register of the state machine. - state.Isa = debug_line_data.getULEB128(offset_ptr); + State.Row.Isa = debug_line_data.getULEB128(offset_ptr); break; default: @@ -429,9 +451,9 @@ bool DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, // of such opcodes because they are specified in the prologue // as a multiple of LEB128 operands for each opcode. { - assert(opcode - 1U < prologue->StandardOpcodeLengths.size()); - uint8_t opcode_length = prologue->StandardOpcodeLengths[opcode - 1]; - for (uint8_t i=0; iOpcodeBase; - uint64_t addr_offset = (adjust_opcode / prologue->LineRange) * - prologue->MinInstLength; - int32_t line_offset = prologue->LineBase + - (adjust_opcode % prologue->LineRange); - state.Line += line_offset; - state.Address += addr_offset; - state.appendRowToMatrix(*offset_ptr); + uint8_t adjust_opcode = opcode - Prologue.OpcodeBase; + uint64_t addr_offset = + (adjust_opcode / Prologue.LineRange) * Prologue.MinInstLength; + int32_t line_offset = + Prologue.LineBase + (adjust_opcode % Prologue.LineRange); + State.Row.Line += line_offset; + State.Row.Address += addr_offset; + State.appendRowToMatrix(*offset_ptr); } } - state.finalize(); + if (!State.Sequence.Empty) { + fprintf(stderr, "warning: last sequence in debug line table is not" + "terminated!\n"); + } + + // Sort all sequences so that address lookup will work faster. + if (!Sequences.empty()) { + std::sort(Sequences.begin(), Sequences.end(), Sequence::orderByLowPC); + // Note: actually, instruction address ranges of sequences should not + // overlap (in shared objects and executables). If they do, the address + // lookup would still work, though, but result would be ambiguous. + // We don't report warning in this case. For example, + // sometimes .so compiled from multiple object files contains a few + // rudimentary sequences for address ranges [0x0, 0xsomething). + } return end_offset; } -uint32_t -DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const { +uint32_t DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const { uint32_t unknown_index = UINT32_MAX; if (Sequences.empty()) return unknown_index; @@ -532,10 +567,8 @@ DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const { return index; } -bool -DWARFDebugLine::LineTable::lookupAddressRange(uint64_t address, - uint64_t size, - std::vector& result) const { +bool DWARFDebugLine::LineTable::lookupAddressRange( + uint64_t address, uint64_t size, std::vector &result) const { if (Sequences.empty()) return false; uint64_t end_addr = address + size; @@ -611,13 +644,14 @@ DWARFDebugLine::LineTable::lookupAddressRange(uint64_t address, bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, - bool NeedsAbsoluteFilePath, + FileLineInfoKind Kind, std::string &Result) const { - if (FileIndex == 0 || FileIndex > Prologue.FileNames.size()) + if (FileIndex == 0 || FileIndex > Prologue.FileNames.size() || + Kind == FileLineInfoKind::None) return false; const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1]; const char *FileName = Entry.Name; - if (!NeedsAbsoluteFilePath || + if (Kind != FileLineInfoKind::AbsoluteFilePath || sys::path::is_absolute(FileName)) { Result = FileName; return true; diff --git a/lib/DebugInfo/DWARFDebugLine.h b/lib/DebugInfo/DWARFDebugLine.h index a336f49..c7b7ec2 100644 --- a/lib/DebugInfo/DWARFDebugLine.h +++ b/lib/DebugInfo/DWARFDebugLine.h @@ -11,6 +11,7 @@ #define LLVM_DEBUGINFO_DWARFDEBUGLINE_H #include "DWARFRelocMap.h" +#include "llvm/DebugInfo/DIContext.h" #include "llvm/Support/DataExtractor.h" #include #include @@ -24,7 +25,7 @@ class DWARFDebugLine { public: DWARFDebugLine(const RelocAddrMap* LineInfoRelocMap) : RelocMap(LineInfoRelocMap) {} struct FileNameEntry { - FileNameEntry() : Name(0), DirIdx(0), ModTime(0), Length(0) {} + FileNameEntry() : Name(nullptr), DirIdx(0), ModTime(0), Length(0) {} const char *Name; uint64_t DirIdx; @@ -33,10 +34,7 @@ public: }; struct Prologue { - Prologue() - : TotalLength(0), Version(0), PrologueLength(0), MinInstLength(0), - MaxOpsPerInst(0), DefaultIsStmt(0), LineBase(0), LineRange(0), - OpcodeBase(0) {} + Prologue(); // The size in bytes of the statement information for this compilation unit // (not including the total_length field itself). @@ -77,19 +75,16 @@ public: int32_t getMaxLineIncrementForSpecialOpcode() const { return LineBase + (int8_t)LineRange - 1; } + + void clear(); void dump(raw_ostream &OS) const; - void clear() { - TotalLength = Version = PrologueLength = 0; - MinInstLength = LineBase = LineRange = OpcodeBase = 0; - StandardOpcodeLengths.clear(); - IncludeDirectories.clear(); - FileNames.clear(); - } + bool parse(DataExtractor debug_line_data, uint32_t *offset_ptr); }; // Standard .debug_line state machine structure. struct Row { - Row(bool default_is_stmt = false) { reset(default_is_stmt); } + explicit Row(bool default_is_stmt = false); + /// Called after a row is appended to the matrix. void postAppend(); void reset(bool default_is_stmt); @@ -151,14 +146,9 @@ public: unsigned LastRowIndex; bool Empty; - Sequence() { reset(); } - void reset() { - LowPC = 0; - HighPC = 0; - FirstRowIndex = 0; - LastRowIndex = 0; - Empty = true; - } + Sequence(); + void reset(); + static bool orderByLowPC(const Sequence& LHS, const Sequence& RHS) { return LHS.LowPC < RHS.LowPC; } @@ -171,31 +161,34 @@ public: }; struct LineTable { - void appendRow(const DWARFDebugLine::Row &state) { Rows.push_back(state); } - void appendSequence(const DWARFDebugLine::Sequence &sequence) { - Sequences.push_back(sequence); + LineTable(); + + void appendRow(const DWARFDebugLine::Row &R) { + Rows.push_back(R); } - void clear() { - Prologue.clear(); - Rows.clear(); - Sequences.clear(); + void appendSequence(const DWARFDebugLine::Sequence &S) { + Sequences.push_back(S); } // Returns the index of the row with file/line info for a given address, // or -1 if there is no such row. uint32_t lookupAddress(uint64_t address) const; - bool lookupAddressRange(uint64_t address, - uint64_t size, - std::vector& result) const; + bool lookupAddressRange(uint64_t address, uint64_t size, + std::vector &result) const; // Extracts filename by its index in filename table in prologue. // Returns true on success. bool getFileNameByIndex(uint64_t FileIndex, - bool NeedsAbsoluteFilePath, + DILineInfoSpecifier::FileLineInfoKind Kind, std::string &Result) const; void dump(raw_ostream &OS) const; + void clear(); + + /// Parse prologue and all rows. + bool parse(DataExtractor debug_line_data, const RelocAddrMap *RMap, + uint32_t *offset_ptr); struct Prologue Prologue; typedef std::vector RowVector; @@ -206,48 +199,26 @@ public: SequenceVector Sequences; }; - struct State : public Row, public Sequence, public LineTable { - // Special row codes. - enum { - StartParsingLineTable = 0, - DoneParsingLineTable = -1 - }; - - State() : row(StartParsingLineTable) {} - virtual ~State(); - - virtual void appendRowToMatrix(uint32_t offset); - virtual void finalize(); - virtual void reset() { - Row::reset(Prologue.DefaultIsStmt); - Sequence::reset(); - } - - // The row number that starts at zero for the prologue, and increases for - // each row added to the matrix. - unsigned row; - }; - - struct DumpingState : public State { - DumpingState(raw_ostream &OS) : OS(OS) {} - virtual ~DumpingState(); - void finalize() override; - private: - raw_ostream &OS; - }; - - static bool parsePrologue(DataExtractor debug_line_data, uint32_t *offset_ptr, - Prologue *prologue); - /// Parse a single line table (prologue and all rows). - static bool parseStatementTable(DataExtractor debug_line_data, - const RelocAddrMap *RMap, - uint32_t *offset_ptr, State &state); - const LineTable *getLineTable(uint32_t offset) const; const LineTable *getOrParseLineTable(DataExtractor debug_line_data, uint32_t offset); private: + struct ParsingState { + ParsingState(struct LineTable *LT); + + void resetRowAndSequence(); + void appendRowToMatrix(uint32_t offset); + + // Line table we're currently parsing. + struct LineTable *LineTable; + // The row number that starts at zero for the prologue, and increases for + // each row added to the matrix. + unsigned RowNumber; + struct Row Row; + struct Sequence Sequence; + }; + typedef std::map LineTableMapTy; typedef LineTableMapTy::iterator LineTableIter; typedef LineTableMapTy::const_iterator LineTableConstIter; diff --git a/lib/DebugInfo/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARFDebugRangeList.cpp index aa2a2be..07b23b3 100644 --- a/lib/DebugInfo/DWARFDebugRangeList.cpp +++ b/lib/DebugInfo/DWARFDebugRangeList.cpp @@ -54,13 +54,16 @@ void DWARFDebugRangeList::dump(raw_ostream &OS) const { OS << format("%08x \n", Offset); } -bool DWARFDebugRangeList::containsAddress(uint64_t BaseAddress, - uint64_t Address) const { +DWARFAddressRangesVector +DWARFDebugRangeList::getAbsoluteRanges(uint64_t BaseAddress) const { + DWARFAddressRangesVector Res; for (const RangeListEntry &RLE : Entries) { - if (RLE.isBaseAddressSelectionEntry(AddressSize)) + if (RLE.isBaseAddressSelectionEntry(AddressSize)) { BaseAddress = RLE.EndAddress; - else if (RLE.containsAddress(BaseAddress, Address)) - return true; + } else { + Res.push_back(std::make_pair(BaseAddress + RLE.StartAddress, + BaseAddress + RLE.EndAddress)); + } } - return false; + return Res; } diff --git a/lib/DebugInfo/DWARFDebugRangeList.h b/lib/DebugInfo/DWARFDebugRangeList.h index 4e34a91..587b550 100644 --- a/lib/DebugInfo/DWARFDebugRangeList.h +++ b/lib/DebugInfo/DWARFDebugRangeList.h @@ -17,6 +17,9 @@ namespace llvm { class raw_ostream; +/// DWARFAddressRangesVector - represents a set of absolute address ranges. +typedef std::vector> DWARFAddressRangesVector; + class DWARFDebugRangeList { public: struct RangeListEntry { @@ -50,10 +53,6 @@ public: else return StartAddress == -1ULL; } - bool containsAddress(uint64_t BaseAddress, uint64_t Address) const { - return (BaseAddress + StartAddress <= Address) && - (Address < BaseAddress + EndAddress); - } }; private: @@ -67,10 +66,10 @@ public: void clear(); void dump(raw_ostream &OS) const; bool extract(DataExtractor data, uint32_t *offset_ptr); - /// containsAddress - Returns true if range list contains the given - /// address. Has to be passed base address of the compile unit that - /// references this range list. - bool containsAddress(uint64_t BaseAddress, uint64_t Address) const; + /// getAbsoluteRanges - Returns absolute address ranges defined by this range + /// list. Has to be passed base address of the compile unit referencing this + /// range list. + DWARFAddressRangesVector getAbsoluteRanges(uint64_t BaseAddress) const; }; } // namespace llvm diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp index da71fb3..8d0f966 100644 --- a/lib/DebugInfo/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARFFormValue.cpp @@ -131,7 +131,7 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, const DWARFUnit *cu) { bool indirect = false; bool is_block = false; - Value.data = NULL; + Value.data = nullptr; // Read the value for the form into value and follow and DW_FORM_indirect // instances we run into do { @@ -241,7 +241,7 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, if (is_block) { StringRef str = data.getData().substr(*offset_ptr, Value.uval); - Value.data = NULL; + Value.data = nullptr; if (!str.empty()) { Value.data = reinterpret_cast(str.data()); *offset_ptr += Value.uval; @@ -488,7 +488,7 @@ Optional DWARFFormValue::getAsCString(const DWARFUnit *U) const { return None; if (Form == DW_FORM_string) return Value.cstr; - if (U == 0) + if (!U) return None; uint32_t Offset = Value.uval; if (Form == DW_FORM_GNU_str_index) { @@ -509,7 +509,7 @@ Optional DWARFFormValue::getAsAddress(const DWARFUnit *U) const { if (Form == DW_FORM_GNU_addr_index) { uint32_t Index = Value.uval; uint64_t Result; - if (U == 0 || !U->getAddrOffsetSectionItem(Index, Result)) + if (!U || !U->getAddrOffsetSectionItem(Index, Result)) return None; return Result; } @@ -525,7 +525,7 @@ Optional DWARFFormValue::getAsReference(const DWARFUnit *U) const { case DW_FORM_ref4: case DW_FORM_ref8: case DW_FORM_ref_udata: - if (U == 0) + if (!U) return None; return Value.uval + U->getOffset(); case DW_FORM_ref_addr: diff --git a/lib/DebugInfo/DWARFTypeUnit.h b/lib/DebugInfo/DWARFTypeUnit.h index 05e13ff..cf773b8 100644 --- a/lib/DebugInfo/DWARFTypeUnit.h +++ b/lib/DebugInfo/DWARFTypeUnit.h @@ -19,11 +19,13 @@ private: uint64_t TypeHash; uint32_t TypeOffset; public: - DWARFTypeUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS, - StringRef RS, StringRef SS, StringRef SOS, StringRef AOS, + DWARFTypeUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef RS, + StringRef SS, StringRef SOS, StringRef AOS, const RelocAddrMap *M, bool LE) - : DWARFUnit(DA, IS, AS, RS, SS, SOS, AOS, M, LE) {} - uint32_t getSize() const override { return DWARFUnit::getSize() + 12; } + : DWARFUnit(DA, IS, RS, SS, SOS, AOS, M, LE) {} + uint32_t getHeaderSize() const override { + return DWARFUnit::getHeaderSize() + 12; + } void dump(raw_ostream &OS); protected: bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) override; diff --git a/lib/DebugInfo/DWARFUnit.cpp b/lib/DebugInfo/DWARFUnit.cpp index 316c208..f5f5072 100644 --- a/lib/DebugInfo/DWARFUnit.cpp +++ b/lib/DebugInfo/DWARFUnit.cpp @@ -17,12 +17,12 @@ using namespace llvm; using namespace dwarf; -DWARFUnit::DWARFUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS, - StringRef RS, StringRef SS, StringRef SOS, StringRef AOS, +DWARFUnit::DWARFUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef RS, + StringRef SS, StringRef SOS, StringRef AOS, const RelocAddrMap *M, bool LE) - : Abbrev(DA), InfoSection(IS), AbbrevSection(AS), RangeSection(RS), - StringSection(SS), StringOffsetSection(SOS), AddrOffsetSection(AOS), - RelocMap(M), isLittleEndian(LE) { + : Abbrev(DA), InfoSection(IS), RangeSection(RS), StringSection(SS), + StringOffsetSection(SOS), AddrOffsetSection(AOS), RelocMap(M), + isLittleEndian(LE) { clear(); } @@ -54,18 +54,20 @@ bool DWARFUnit::getStringOffsetSectionItem(uint32_t Index, bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) { Length = debug_info.getU32(offset_ptr); Version = debug_info.getU16(offset_ptr); - uint64_t abbrOffset = debug_info.getU32(offset_ptr); + uint64_t AbbrOffset = debug_info.getU32(offset_ptr); AddrSize = debug_info.getU8(offset_ptr); - bool lengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1); - bool versionOK = DWARFContext::isSupportedVersion(Version); - bool abbrOffsetOK = AbbrevSection.size() > abbrOffset; - bool addrSizeOK = AddrSize == 4 || AddrSize == 8; + bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1); + bool VersionOK = DWARFContext::isSupportedVersion(Version); + bool AddrSizeOK = AddrSize == 4 || AddrSize == 8; - if (!lengthOK || !versionOK || !addrSizeOK || !abbrOffsetOK) + if (!LengthOK || !VersionOK || !AddrSizeOK) + return false; + + Abbrevs = Abbrev->getAbbreviationDeclarationSet(AbbrOffset); + if (Abbrevs == nullptr) return false; - Abbrevs = Abbrev->getAbbreviationDeclarationSet(abbrOffset); return true; } @@ -98,7 +100,7 @@ void DWARFUnit::clear() { Offset = 0; Length = 0; Version = 0; - Abbrevs = 0; + Abbrevs = nullptr; AddrSize = 0; BaseAddr = 0; RangeSectionBase = 0; @@ -110,8 +112,8 @@ void DWARFUnit::clear() { const char *DWARFUnit::getCompilationDir() { extractDIEsIfNeeded(true); if (DieArray.empty()) - return 0; - return DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, 0); + return nullptr; + return DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, nullptr); } uint64_t DWARFUnit::getDWOId() { @@ -124,38 +126,32 @@ uint64_t DWARFUnit::getDWOId() { } void DWARFUnit::setDIERelations() { - if (DieArray.empty()) + if (DieArray.size() <= 1) return; - DWARFDebugInfoEntryMinimal *die_array_begin = &DieArray.front(); - DWARFDebugInfoEntryMinimal *die_array_end = &DieArray.back(); - DWARFDebugInfoEntryMinimal *curr_die; - // We purposely are skipping the last element in the array in the loop below - // so that we can always have a valid next item - for (curr_die = die_array_begin; curr_die < die_array_end; ++curr_die) { - // Since our loop doesn't include the last element, we can always - // safely access the next die in the array. - DWARFDebugInfoEntryMinimal *next_die = curr_die + 1; - - const DWARFAbbreviationDeclaration *curr_die_abbrev = - curr_die->getAbbreviationDeclarationPtr(); - - if (curr_die_abbrev) { - // Normal DIE - if (curr_die_abbrev->hasChildren()) - next_die->setParent(curr_die); - else - curr_die->setSibling(next_die); + + std::vector ParentChain; + DWARFDebugInfoEntryMinimal *SiblingChain = nullptr; + for (auto &DIE : DieArray) { + if (SiblingChain) { + SiblingChain->setSibling(&DIE); + } + if (const DWARFAbbreviationDeclaration *AbbrDecl = + DIE.getAbbreviationDeclarationPtr()) { + // Normal DIE. + if (AbbrDecl->hasChildren()) { + ParentChain.push_back(&DIE); + SiblingChain = nullptr; + } else { + SiblingChain = &DIE; + } } else { - // NULL DIE that terminates a sibling chain - DWARFDebugInfoEntryMinimal *parent = curr_die->getParent(); - if (parent) - parent->setSibling(next_die); + // NULL entry terminates the sibling chain. + SiblingChain = ParentChain.back(); + ParentChain.pop_back(); } } - - // Since we skipped the last element, we need to fix it up! - if (die_array_begin < die_array_end) - curr_die->setParent(die_array_begin); + assert(SiblingChain == nullptr || SiblingChain == &DieArray[0]); + assert(ParentChain.empty()); } void DWARFUnit::extractDIEsToVector( @@ -166,13 +162,13 @@ void DWARFUnit::extractDIEsToVector( // Set the offset to that of the first DIE and calculate the start of the // next compilation unit header. - uint32_t Offset = getFirstDIEOffset(); + uint32_t DIEOffset = Offset + getHeaderSize(); uint32_t NextCUOffset = getNextUnitOffset(); DWARFDebugInfoEntryMinimal DIE; uint32_t Depth = 0; bool IsCUDie = true; - while (Offset < NextCUOffset && DIE.extractFast(this, &Offset)) { + while (DIEOffset < NextCUOffset && DIE.extractFast(this, &DIEOffset)) { if (IsCUDie) { if (AppendCUDie) Dies.push_back(DIE); @@ -187,9 +183,8 @@ void DWARFUnit::extractDIEsToVector( Dies.push_back(DIE); } - const DWARFAbbreviationDeclaration *AbbrDecl = - DIE.getAbbreviationDeclarationPtr(); - if (AbbrDecl) { + if (const DWARFAbbreviationDeclaration *AbbrDecl = + DIE.getAbbreviationDeclarationPtr()) { // Normal DIE if (AbbrDecl->hasChildren()) ++Depth; @@ -205,9 +200,9 @@ void DWARFUnit::extractDIEsToVector( // Give a little bit of info if we encounter corrupt DWARF (our offset // should always terminate at or before the start of the next compilation // unit header). - if (Offset > NextCUOffset) + if (DIEOffset > NextCUOffset) fprintf(stderr, "warning: DWARF compile unit extends beyond its " - "bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), Offset); + "bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), DIEOffset); } size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { @@ -241,25 +236,25 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { DWARFUnit::DWOHolder::DWOHolder(object::ObjectFile *DWOFile) : DWOFile(DWOFile), DWOContext(cast(DIContext::getDWARFContext(DWOFile))), - DWOU(0) { + DWOU(nullptr) { if (DWOContext->getNumDWOCompileUnits() > 0) DWOU = DWOContext->getDWOCompileUnitAtIndex(0); } bool DWARFUnit::parseDWO() { - if (DWO.get() != 0) + if (DWO.get()) return false; extractDIEsIfNeeded(true); if (DieArray.empty()) return false; const char *DWOFileName = - DieArray[0].getAttributeValueAsString(this, DW_AT_GNU_dwo_name, 0); - if (DWOFileName == 0) + DieArray[0].getAttributeValueAsString(this, DW_AT_GNU_dwo_name, nullptr); + if (!DWOFileName) return false; const char *CompilationDir = - DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, 0); + DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, nullptr); SmallString<16> AbsolutePath; - if (sys::path::is_relative(DWOFileName) && CompilationDir != 0) { + if (sys::path::is_relative(DWOFileName) && CompilationDir != nullptr) { sys::path::append(AbsolutePath, CompilationDir); } sys::path::append(AbsolutePath, DWOFileName); @@ -271,7 +266,7 @@ bool DWARFUnit::parseDWO() { DWO.reset(new DWOHolder(DWOFile.get())); DWARFUnit *DWOCU = DWO->getUnit(); // Verify that compile unit in .dwo file is valid. - if (DWOCU == 0 || DWOCU->getDWOId() != getDWOId()) { + if (!DWOCU || DWOCU->getDWOId() != getDWOId()) { DWO.reset(); return false; } @@ -298,33 +293,33 @@ void DWARFUnit::clearDIEs(bool KeepCUDie) { } } -void -DWARFUnit::buildAddressRangeTable(DWARFDebugAranges *debug_aranges, - bool clear_dies_if_already_not_parsed, - uint32_t CUOffsetInAranges) { +void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) { + // First, check if CU DIE describes address ranges for the unit. + const auto &CUDIERanges = getCompileUnitDIE()->getAddressRanges(this); + if (!CUDIERanges.empty()) { + CURanges.insert(CURanges.end(), CUDIERanges.begin(), CUDIERanges.end()); + return; + } + // This function is usually called if there in no .debug_aranges section // in order to produce a compile unit level set of address ranges that // is accurate. If the DIEs weren't parsed, then we don't want all dies for // all compile units to stay loaded when they weren't needed. So we can end // up parsing the DWARF and then throwing them all away to keep memory usage // down. - const bool clear_dies = extractDIEsIfNeeded(false) > 1 && - clear_dies_if_already_not_parsed; - DieArray[0].buildAddressRangeTable(this, debug_aranges, CUOffsetInAranges); + const bool ClearDIEs = extractDIEsIfNeeded(false) > 1; + DieArray[0].collectChildrenAddressRanges(this, CURanges); + + // Collect address ranges from DIEs in .dwo if necessary. bool DWOCreated = parseDWO(); - if (DWO.get()) { - // If there is a .dwo file for this compile unit, then skeleton CU DIE - // doesn't have children, and we should instead build address range table - // from DIEs in the .debug_info.dwo section of .dwo file. - DWO->getUnit()->buildAddressRangeTable( - debug_aranges, clear_dies_if_already_not_parsed, CUOffsetInAranges); - } - if (DWOCreated && clear_dies_if_already_not_parsed) + if (DWO.get()) + DWO->getUnit()->collectAddressRanges(CURanges); + if (DWOCreated) DWO.reset(); // Keep memory down by clearing DIEs if this generate function // caused them to be parsed. - if (clear_dies) + if (ClearDIEs) clearDIEs(true); } @@ -337,14 +332,14 @@ DWARFUnit::getSubprogramForAddress(uint64_t Address) { return &DIE; } } - return 0; + return nullptr; } DWARFDebugInfoEntryInlinedChain DWARFUnit::getInlinedChainForAddress(uint64_t Address) { // First, find a subprogram that contains the given address (the root // of inlined chain). - const DWARFUnit *ChainCU = 0; + const DWARFUnit *ChainCU = nullptr; const DWARFDebugInfoEntryMinimal *SubprogramDIE = getSubprogramForAddress(Address); if (SubprogramDIE) { diff --git a/lib/DebugInfo/DWARFUnit.h b/lib/DebugInfo/DWARFUnit.h index 5b4cf09..471da36 100644 --- a/lib/DebugInfo/DWARFUnit.h +++ b/lib/DebugInfo/DWARFUnit.h @@ -29,7 +29,6 @@ class raw_ostream; class DWARFUnit { const DWARFDebugAbbrev *Abbrev; StringRef InfoSection; - StringRef AbbrevSection; StringRef RangeSection; uint32_t RangeSectionBase; StringRef StringSection; @@ -60,12 +59,13 @@ class DWARFUnit { protected: virtual bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr); + /// Size in bytes of the unit header. + virtual uint32_t getHeaderSize() const { return 11; } public: - - DWARFUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS, - StringRef RS, StringRef SS, StringRef SOS, StringRef AOS, - const RelocAddrMap *M, bool LE); + DWARFUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef RS, + StringRef SS, StringRef SOS, StringRef AOS, const RelocAddrMap *M, + bool LE); virtual ~DWARFUnit(); @@ -102,12 +102,7 @@ public: DWARFDebugRangeList &RangeList) const; void clear(); uint32_t getOffset() const { return Offset; } - /// Size in bytes of the compile unit header. - virtual uint32_t getSize() const { return 11; } - uint32_t getFirstDIEOffset() const { return Offset + getSize(); } uint32_t getNextUnitOffset() const { return Offset + Length + 4; } - /// Size in bytes of the .debug_info data associated with this compile unit. - size_t getDebugInfoSize() const { return Length + 4 - getSize(); } uint32_t getLength() const { return Length; } uint16_t getVersion() const { return Version; } const DWARFAbbreviationDeclarationSet *getAbbreviations() const { @@ -123,15 +118,13 @@ public: const DWARFDebugInfoEntryMinimal * getCompileUnitDIE(bool extract_cu_die_only = true) { extractDIEsIfNeeded(extract_cu_die_only); - return DieArray.empty() ? NULL : &DieArray[0]; + return DieArray.empty() ? nullptr : &DieArray[0]; } const char *getCompilationDir(); uint64_t getDWOId(); - void buildAddressRangeTable(DWARFDebugAranges *debug_aranges, - bool clear_dies_if_already_not_parsed, - uint32_t CUOffsetInAranges); + void collectAddressRanges(DWARFAddressRangesVector &CURanges); /// getInlinedChainForAddress - fetches inlined chain for a given address. /// Returns empty chain if there is no subprogram containing address. The @@ -139,6 +132,9 @@ public: DWARFDebugInfoEntryInlinedChain getInlinedChainForAddress(uint64_t Address); private: + /// Size in bytes of the .debug_info data associated with this compile unit. + size_t getDebugInfoSize() const { return Length + 4 - getHeaderSize(); } + /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it /// hasn't already been done. Returns the number of DIEs parsed at this call. size_t extractDIEsIfNeeded(bool CUDieOnly); diff --git a/lib/DebugInfo/module.modulemap b/lib/DebugInfo/module.modulemap new file mode 100644 index 0000000..1fe5ab1 --- /dev/null +++ b/lib/DebugInfo/module.modulemap @@ -0,0 +1 @@ +module DebugInfo { requires cplusplus umbrella "." module * { export * } } diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 4768e67..6766ef1 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jit" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" @@ -25,6 +24,7 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/ErrorHandling.h" @@ -37,6 +37,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "jit" + STATISTIC(NumInitBytes, "Number of bytes of global vars initialized"); STATISTIC(NumGlobals , "Number of global vars initialized"); @@ -50,22 +52,31 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)( std::string *ErrorStr, JITMemoryManager *JMM, bool GVsWithCode, - TargetMachine *TM) = 0; + TargetMachine *TM) = nullptr; ExecutionEngine *(*ExecutionEngine::MCJITCtor)( Module *M, std::string *ErrorStr, RTDyldMemoryManager *MCJMM, bool GVsWithCode, - TargetMachine *TM) = 0; + TargetMachine *TM) = nullptr; ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M, - std::string *ErrorStr) = 0; + std::string *ErrorStr) =nullptr; ExecutionEngine::ExecutionEngine(Module *M) : EEState(*this), - LazyFunctionCreator(0) { + LazyFunctionCreator(nullptr) { CompilingLazily = false; GVCompilationDisabled = false; SymbolSearchingDisabled = false; + + // IR module verification is enabled by default in debug builds, and disabled + // by default in release builds. +#ifndef NDEBUG + VerifyModules = true; +#else + VerifyModules = false; +#endif + Modules.push_back(M); assert(M && "Module is null?"); } @@ -111,6 +122,10 @@ char *ExecutionEngine::getMemoryForGV(const GlobalVariable *GV) { return GVMemoryBlock::Create(GV, *getDataLayout()); } +void ExecutionEngine::addObjectFile(std::unique_ptr O) { + llvm_unreachable("ExecutionEngine subclass doesn't implement addObjectFile."); +} + bool ExecutionEngine::removeModule(Module *M) { for(SmallVectorImpl::iterator I = Modules.begin(), E = Modules.end(); I != E; ++I) { @@ -129,7 +144,7 @@ Function *ExecutionEngine::FindFunctionNamed(const char *FnName) { if (Function *F = Modules[i]->getFunction(FnName)) return F; } - return 0; + return nullptr; } @@ -141,7 +156,7 @@ void *ExecutionEngineState::RemoveMapping(const MutexGuard &, // FIXME: This is silly, we shouldn't end up with a mapping -> 0 in the // GlobalAddressMap. if (I == GlobalAddressMap.end()) - OldVal = 0; + OldVal = nullptr; else { OldVal = I->second; GlobalAddressMap.erase(I); @@ -157,14 +172,14 @@ void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) { DEBUG(dbgs() << "JIT: Map \'" << GV->getName() << "\' to [" << Addr << "]\n";); void *&CurVal = EEState.getGlobalAddressMap(locked)[GV]; - assert((CurVal == 0 || Addr == 0) && "GlobalMapping already established!"); + assert((!CurVal || !Addr) && "GlobalMapping already established!"); CurVal = Addr; // If we are using the reverse mapping, add it too. if (!EEState.getGlobalAddressReverseMap(locked).empty()) { AssertingVH &V = EEState.getGlobalAddressReverseMap(locked)[Addr]; - assert((V == 0 || GV == 0) && "GlobalMapping already established!"); + assert((!V || !GV) && "GlobalMapping already established!"); V = GV; } } @@ -193,7 +208,7 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) { EEState.getGlobalAddressMap(locked); // Deleting from the mapping? - if (Addr == 0) + if (!Addr) return EEState.RemoveMapping(locked, GV); void *&CurVal = Map[GV]; @@ -207,7 +222,7 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) { if (!EEState.getGlobalAddressReverseMap(locked).empty()) { AssertingVH &V = EEState.getGlobalAddressReverseMap(locked)[Addr]; - assert((V == 0 || GV == 0) && "GlobalMapping already established!"); + assert((!V || !GV) && "GlobalMapping already established!"); V = GV; } return OldVal; @@ -218,7 +233,7 @@ void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) { ExecutionEngineState::GlobalAddressMapTy::iterator I = EEState.getGlobalAddressMap(locked).find(GV); - return I != EEState.getGlobalAddressMap(locked).end() ? I->second : 0; + return I != EEState.getGlobalAddressMap(locked).end() ? I->second : nullptr; } const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) { @@ -235,7 +250,7 @@ const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) { std::map >::iterator I = EEState.getGlobalAddressReverseMap(locked).find(Addr); - return I != EEState.getGlobalAddressReverseMap(locked).end() ? I->second : 0; + return I != EEState.getGlobalAddressReverseMap(locked).end() ? I->second : nullptr; } namespace { @@ -243,11 +258,11 @@ class ArgvArray { char *Array; std::vector Values; public: - ArgvArray() : Array(NULL) {} + ArgvArray() : Array(nullptr) {} ~ArgvArray() { clear(); } void clear() { delete[] Array; - Array = NULL; + Array = nullptr; for (size_t I = 0, E = Values.size(); I != E; ++I) { delete[] Values[I]; } @@ -283,7 +298,7 @@ void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE, } // Null terminate it - EE->StoreValueToMemory(PTOGV(0), + EE->StoreValueToMemory(PTOGV(nullptr), (GenericValue*)(Array+InputArgv.size()*PtrSize), SBytePtr); return Array; @@ -303,11 +318,11 @@ void ExecutionEngine::runStaticConstructorsDestructors(Module *module, // Should be an array of '{ i32, void ()* }' structs. The first value is // the init priority, which we ignore. ConstantArray *InitList = dyn_cast(GV->getInitializer()); - if (InitList == 0) + if (!InitList) return; for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { ConstantStruct *CS = dyn_cast(InitList->getOperand(i)); - if (CS == 0) continue; + if (!CS) continue; Constant *FP = CS->getOperand(1); if (FP->isNullValue()) @@ -418,10 +433,10 @@ ExecutionEngine *ExecutionEngine::createJIT(Module *M, bool GVsWithCode, Reloc::Model RM, CodeModel::Model CMM) { - if (ExecutionEngine::JITCtor == 0) { + if (!ExecutionEngine::JITCtor) { if (ErrorStr) *ErrorStr = "JIT has not been linked in."; - return 0; + return nullptr; } // Use the defaults for extra parameters. Users can use EngineBuilder to @@ -437,7 +452,7 @@ ExecutionEngine *ExecutionEngine::createJIT(Module *M, // TODO: permit custom TargetOptions here TargetMachine *TM = EB.selectTarget(); - if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0; + if (!TM || (ErrorStr && ErrorStr->length() > 0)) return nullptr; return ExecutionEngine::JITCtor(M, ErrorStr, JMM, GVsWithCode, TM); } @@ -447,8 +462,8 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { // Make sure we can resolve symbols in the program as well. The zero arg // to the function tells DynamicLibrary to load the program, not a library. - if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr)) - return 0; + if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr, ErrorStr)) + return nullptr; assert(!(JMM && MCJMM)); @@ -461,7 +476,7 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { else { if (ErrorStr) *ErrorStr = "Cannot create an interpreter with a memory manager."; - return 0; + return nullptr; } } @@ -470,7 +485,7 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { *ErrorStr = "Cannot create a legacy JIT with a runtime dyld memory " "manager."; - return 0; + return nullptr; } // Unless the interpreter was explicitly selected or the JIT is not linked, @@ -483,16 +498,17 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { << " a different -march switch.\n"; } - if (UseMCJIT && ExecutionEngine::MCJITCtor) { - ExecutionEngine *EE = - ExecutionEngine::MCJITCtor(M, ErrorStr, MCJMM ? MCJMM : JMM, - AllocateGVsWithCode, TheTM.release()); - if (EE) return EE; - } else if (ExecutionEngine::JITCtor) { - ExecutionEngine *EE = - ExecutionEngine::JITCtor(M, ErrorStr, JMM, - AllocateGVsWithCode, TheTM.release()); - if (EE) return EE; + ExecutionEngine *EE = nullptr; + if (UseMCJIT && ExecutionEngine::MCJITCtor) + EE = ExecutionEngine::MCJITCtor(M, ErrorStr, MCJMM ? MCJMM : JMM, + AllocateGVsWithCode, TheTM.release()); + else if (ExecutionEngine::JITCtor) + EE = ExecutionEngine::JITCtor(M, ErrorStr, JMM, + AllocateGVsWithCode, TheTM.release()); + + if (EE) { + EE->setVerifyModules(VerifyModules); + return EE; } } @@ -503,16 +519,16 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { return ExecutionEngine::InterpCtor(M, ErrorStr); if (ErrorStr) *ErrorStr = "Interpreter has not been linked in."; - return 0; + return nullptr; } - if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0 && - ExecutionEngine::MCJITCtor == 0) { + if ((WhichEngine & EngineKind::JIT) && !ExecutionEngine::JITCtor && + !ExecutionEngine::MCJITCtor) { if (ErrorStr) *ErrorStr = "JIT has not been linked in."; } - return 0; + return nullptr; } void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) { @@ -848,7 +864,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { break; case Type::PointerTyID: if (isa(C)) - Result.PointerVal = 0; + Result.PointerVal = nullptr; else if (const Function *F = dyn_cast(C)) Result = PTOGV(getPointerToFunctionOrStub(const_cast(F))); else if (const GlobalVariable *GV = dyn_cast(C)) @@ -1193,20 +1209,18 @@ void ExecutionEngine::emitGlobals() { if (Modules.size() != 1) { for (unsigned m = 0, e = Modules.size(); m != e; ++m) { Module &M = *Modules[m]; - for (Module::const_global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) { - const GlobalValue *GV = I; - if (GV->hasLocalLinkage() || GV->isDeclaration() || - GV->hasAppendingLinkage() || !GV->hasName()) + for (const auto &GV : M.globals()) { + if (GV.hasLocalLinkage() || GV.isDeclaration() || + GV.hasAppendingLinkage() || !GV.hasName()) continue;// Ignore external globals and globals with internal linkage. const GlobalValue *&GVEntry = - LinkedGlobalsMap[std::make_pair(GV->getName(), GV->getType())]; + LinkedGlobalsMap[std::make_pair(GV.getName(), GV.getType())]; // If this is the first time we've seen this global, it is the canonical // version. if (!GVEntry) { - GVEntry = GV; + GVEntry = &GV; continue; } @@ -1216,8 +1230,8 @@ void ExecutionEngine::emitGlobals() { // Otherwise, we know it's linkonce/weak, replace it if this is a strong // symbol. FIXME is this right for common? - if (GV->hasExternalLinkage() || GVEntry->hasExternalWeakLinkage()) - GVEntry = GV; + if (GV.hasExternalLinkage() || GVEntry->hasExternalWeakLinkage()) + GVEntry = &GV; } } } @@ -1225,31 +1239,30 @@ void ExecutionEngine::emitGlobals() { std::vector NonCanonicalGlobals; for (unsigned m = 0, e = Modules.size(); m != e; ++m) { Module &M = *Modules[m]; - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { + for (const auto &GV : M.globals()) { // In the multi-module case, see what this global maps to. if (!LinkedGlobalsMap.empty()) { if (const GlobalValue *GVEntry = - LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())]) { + LinkedGlobalsMap[std::make_pair(GV.getName(), GV.getType())]) { // If something else is the canonical global, ignore this one. - if (GVEntry != &*I) { - NonCanonicalGlobals.push_back(I); + if (GVEntry != &GV) { + NonCanonicalGlobals.push_back(&GV); continue; } } } - if (!I->isDeclaration()) { - addGlobalMapping(I, getMemoryForGV(I)); + if (!GV.isDeclaration()) { + addGlobalMapping(&GV, getMemoryForGV(&GV)); } else { // External variable reference. Try to use the dynamic loader to // get a pointer to it. if (void *SymAddr = - sys::DynamicLibrary::SearchForAddressOfSymbol(I->getName())) - addGlobalMapping(I, SymAddr); + sys::DynamicLibrary::SearchForAddressOfSymbol(GV.getName())) + addGlobalMapping(&GV, SymAddr); else { report_fatal_error("Could not resolve external global address: " - +I->getName()); + +GV.getName()); } } } @@ -1269,16 +1282,15 @@ void ExecutionEngine::emitGlobals() { // Now that all of the globals are set up in memory, loop through them all // and initialize their contents. - for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) { - if (!I->isDeclaration()) { + for (const auto &GV : M.globals()) { + if (!GV.isDeclaration()) { if (!LinkedGlobalsMap.empty()) { if (const GlobalValue *GVEntry = - LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())]) - if (GVEntry != &*I) // Not the canonical variable. + LinkedGlobalsMap[std::make_pair(GV.getName(), GV.getType())]) + if (GVEntry != &GV) // Not the canonical variable. continue; } - EmitGlobalVariable(I); + EmitGlobalVariable(&GV); } } } @@ -1290,12 +1302,12 @@ void ExecutionEngine::emitGlobals() { void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) { void *GA = getPointerToGlobalIfAvailable(GV); - if (GA == 0) { + if (!GA) { // If it's not already specified, allocate memory for the global. GA = getMemoryForGV(GV); // If we failed to allocate memory for this global, return. - if (GA == 0) return; + if (!GA) return; addGlobalMapping(GV, GA); } diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp index db3dead..6ff1e7a 100644 --- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jit" #include "llvm-c/ExecutionEngine.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/GenericValue.h" @@ -23,17 +22,11 @@ using namespace llvm; +#define DEBUG_TYPE "jit" + // Wrapping the C bindings types. DEFINE_SIMPLE_CONVERSION_FUNCTIONS(GenericValue, LLVMGenericValueRef) -inline DataLayout *unwrap(LLVMTargetDataRef P) { - return reinterpret_cast(P); -} - -inline LLVMTargetDataRef wrap(const DataLayout *P) { - return reinterpret_cast(const_cast(P)); -} - inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) { return reinterpret_cast(P); } @@ -410,7 +403,7 @@ uint8_t *SimpleBindingMemoryManager::allocateDataSection( } bool SimpleBindingMemoryManager::finalizeMemory(std::string *ErrMsg) { - char *errMsgCString = 0; + char *errMsgCString = nullptr; bool result = Functions.FinalizeMemory(Opaque, &errMsgCString); assert((result || !errMsgCString) && "Did not expect an error message if FinalizeMemory succeeded"); @@ -433,7 +426,7 @@ LLVMMCJITMemoryManagerRef LLVMCreateSimpleMCJITMemoryManager( if (!AllocateCodeSection || !AllocateDataSection || !FinalizeMemory || !Destroy) - return NULL; + return nullptr; SimpleBindingMMFunctions functions; functions.AllocateCodeSection = AllocateCodeSection; diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index 2ca4e3e..9a65fa0 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -15,7 +15,6 @@ #include "llvm/Config/config.h" #include "llvm/ExecutionEngine/JITEventListener.h" -#define DEBUG_TYPE "amplifier-jit-event-listener" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Metadata.h" @@ -34,6 +33,8 @@ using namespace llvm; using namespace llvm::jitprofiling; +#define DEBUG_TYPE "amplifier-jit-event-listener" + namespace { class IntelJITEventListener : public JITEventListener { @@ -193,11 +194,10 @@ void IntelJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) { MethodAddressVector Functions; // Use symbol info to iterate functions in the object. - error_code ec; for (object::symbol_iterator I = Obj.begin_symbols(), E = Obj.end_symbols(); - I != E && !ec; - I.increment(ec)) { + I != E; + ++I) { std::vector LineInfo; std::string SourceFileName; @@ -234,7 +234,7 @@ void IntelJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) { FunctionMessage.line_number_table = 0; } else { SourceFileName = Lines.front().second.getFileName(); - FunctionMessage.source_file_name = (char *)SourceFileName.c_str(); + FunctionMessage.source_file_name = const_cast(SourceFileName.c_str()); FunctionMessage.line_number_size = LineInfo.size(); FunctionMessage.line_number_table = &*LineInfo.begin(); } diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index 8a80285..93bb2d1 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "interpreter" #include "Interpreter.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/Statistic.h" @@ -28,6 +27,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "interpreter" + STATISTIC(NumDynamicInsts, "Number of dynamic instructions executed"); static cl::opt PrintVolatile("interpreter-print-volatile", cl::Hidden, @@ -57,7 +58,7 @@ static void executeFAddInst(GenericValue &Dest, GenericValue Src1, IMPLEMENT_BINARY_OPERATOR(+, Double); default: dbgs() << "Unhandled type for FAdd instruction: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } } @@ -68,7 +69,7 @@ static void executeFSubInst(GenericValue &Dest, GenericValue Src1, IMPLEMENT_BINARY_OPERATOR(-, Double); default: dbgs() << "Unhandled type for FSub instruction: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } } @@ -79,7 +80,7 @@ static void executeFMulInst(GenericValue &Dest, GenericValue Src1, IMPLEMENT_BINARY_OPERATOR(*, Double); default: dbgs() << "Unhandled type for FMul instruction: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } } @@ -90,7 +91,7 @@ static void executeFDivInst(GenericValue &Dest, GenericValue Src1, IMPLEMENT_BINARY_OPERATOR(/, Double); default: dbgs() << "Unhandled type for FDiv instruction: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } } @@ -105,7 +106,7 @@ static void executeFRemInst(GenericValue &Dest, GenericValue Src1, break; default: dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } } @@ -142,7 +143,7 @@ static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2, IMPLEMENT_POINTER_ICMP(==); default: dbgs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } return Dest; } @@ -156,7 +157,7 @@ static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2, IMPLEMENT_POINTER_ICMP(!=); default: dbgs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } return Dest; } @@ -170,7 +171,7 @@ static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2, IMPLEMENT_POINTER_ICMP(<); default: dbgs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } return Dest; } @@ -184,7 +185,7 @@ static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2, IMPLEMENT_POINTER_ICMP(<); default: dbgs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } return Dest; } @@ -198,7 +199,7 @@ static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2, IMPLEMENT_POINTER_ICMP(>); default: dbgs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } return Dest; } @@ -212,7 +213,7 @@ static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2, IMPLEMENT_POINTER_ICMP(>); default: dbgs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } return Dest; } @@ -226,7 +227,7 @@ static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2, IMPLEMENT_POINTER_ICMP(<=); default: dbgs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } return Dest; } @@ -240,7 +241,7 @@ static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2, IMPLEMENT_POINTER_ICMP(<=); default: dbgs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } return Dest; } @@ -254,7 +255,7 @@ static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2, IMPLEMENT_POINTER_ICMP(>=); default: dbgs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } return Dest; } @@ -268,7 +269,7 @@ static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2, IMPLEMENT_POINTER_ICMP(>=); default: dbgs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } return Dest; } @@ -293,7 +294,7 @@ void Interpreter::visitICmpInst(ICmpInst &I) { case ICmpInst::ICMP_SGE: R = executeICMP_SGE(Src1, Src2, Ty); break; default: dbgs() << "Don't know how to handle this ICmp predicate!\n-->" << I; - llvm_unreachable(0); + llvm_unreachable(nullptr); } SetValue(&I, R, SF); @@ -329,7 +330,7 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2, IMPLEMENT_VECTOR_FCMP(==); default: dbgs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } return Dest; } @@ -385,7 +386,7 @@ static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2, IMPLEMENT_VECTOR_FCMP(!=); default: dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } // in vector case mask out NaN elements if (Ty->isVectorTy()) @@ -405,7 +406,7 @@ static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2, IMPLEMENT_VECTOR_FCMP(<=); default: dbgs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } return Dest; } @@ -419,7 +420,7 @@ static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2, IMPLEMENT_VECTOR_FCMP(>=); default: dbgs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } return Dest; } @@ -433,7 +434,7 @@ static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2, IMPLEMENT_VECTOR_FCMP(<); default: dbgs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } return Dest; } @@ -447,7 +448,7 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2, IMPLEMENT_VECTOR_FCMP(>); default: dbgs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } return Dest; } @@ -615,7 +616,7 @@ void Interpreter::visitFCmpInst(FCmpInst &I) { switch (I.getPredicate()) { default: dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I; - llvm_unreachable(0); + llvm_unreachable(nullptr); break; case FCmpInst::FCMP_FALSE: R = executeFCMP_BOOL(Src1, Src2, Ty, false); break; @@ -672,7 +673,7 @@ static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1, case FCmpInst::FCMP_TRUE: return executeFCMP_BOOL(Src1, Src2, Ty, true); default: dbgs() << "Unhandled Cmp predicate\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } } @@ -726,7 +727,7 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) { switch(I.getOpcode()){ default: dbgs() << "Don't know how to handle this binary operator!\n-->" << I; - llvm_unreachable(0); + llvm_unreachable(nullptr); break; case Instruction::Add: INTEGER_VECTOR_OPERATION(+) break; case Instruction::Sub: INTEGER_VECTOR_OPERATION(-) break; @@ -754,7 +755,7 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) { fmod(Src1.AggregateVal[i].DoubleVal, Src2.AggregateVal[i].DoubleVal); else { dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } } break; @@ -763,7 +764,7 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) { switch (I.getOpcode()) { default: dbgs() << "Don't know how to handle this binary operator!\n-->" << I; - llvm_unreachable(0); + llvm_unreachable(nullptr); break; case Instruction::Add: R.IntVal = Src1.IntVal + Src2.IntVal; break; case Instruction::Sub: R.IntVal = Src1.IntVal - Src2.IntVal; break; @@ -896,7 +897,7 @@ void Interpreter::visitSwitchInst(SwitchInst &I) { GenericValue CondVal = getOperandValue(Cond, SF); // Check to see if any of the cases match... - BasicBlock *Dest = 0; + BasicBlock *Dest = nullptr; for (SwitchInst::CaseIt i = I.case_begin(), e = I.case_end(); i != e; ++i) { GenericValue CaseVal = getOperandValue(i.getCaseValue(), SF); if (executeICMP_EQ(CondVal, CaseVal, ElTy).IntVal != 0) { @@ -979,7 +980,7 @@ void Interpreter::visitAllocaInst(AllocaInst &I) { << uintptr_t(Memory) << '\n'); GenericValue Result = PTOGV(Memory); - assert(Result.PointerVal != 0 && "Null pointer returned by malloc!"); + assert(Result.PointerVal && "Null pointer returned by malloc!"); SetValue(&I, Result, SF); if (I.getOpcode() == Instruction::Alloca) @@ -1732,7 +1733,7 @@ void Interpreter::visitVAArgInst(VAArgInst &I) { IMPLEMENT_VAARG(Double); default: dbgs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); } // Set the Value of this Instruction. @@ -1756,7 +1757,7 @@ void Interpreter::visitExtractElementInst(ExtractElementInst &I) { default: dbgs() << "Unhandled destination type for extractelement instruction: " << *Ty << "\n"; - llvm_unreachable(0); + llvm_unreachable(nullptr); break; case Type::IntegerTyID: Dest.IntVal = Src1.AggregateVal[indx].IntVal; @@ -2073,7 +2074,7 @@ GenericValue Interpreter::getOperandValue(Value *V, ExecutionContext &SF) { // void Interpreter::callFunction(Function *F, const std::vector &ArgVals) { - assert((ECStack.empty() || ECStack.back().Caller.getInstruction() == 0 || + assert((ECStack.empty() || !ECStack.back().Caller.getInstruction() || ECStack.back().Caller.arg_size() == ArgVals.size()) && "Incorrect number of arguments passed into function call!"); // Make a new stack frame... and fill it in. diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index a03c7f5..671bbee 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -98,13 +98,13 @@ static ExFunc lookupFunction(const Function *F) { sys::ScopedLock Writer(*FunctionsLock); ExFunc FnPtr = FuncNames[ExtName]; - if (FnPtr == 0) + if (!FnPtr) FnPtr = FuncNames["lle_X_" + F->getName().str()]; - if (FnPtr == 0) // Try calling a generic function... if it exists... + if (!FnPtr) // Try calling a generic function... if it exists... FnPtr = (ExFunc)(intptr_t) sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_" + F->getName().str()); - if (FnPtr != 0) + if (FnPtr) ExportedFunctions->insert(std::make_pair(F, FnPtr)); // Cache for later return FnPtr; } diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp index 6d4f6f7..c589457 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp +++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp @@ -38,7 +38,7 @@ ExecutionEngine *Interpreter::create(Module *M, std::string* ErrStr) { if (ErrStr) *ErrStr = EC.message(); // We got an error, just return 0 - return 0; + return nullptr; } return new Interpreter(M); diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h index 2e93cae..2145cde 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -108,7 +108,7 @@ public: /// create - Create an interpreter ExecutionEngine. This can never fail. /// - static ExecutionEngine *create(Module *M, std::string *ErrorStr = 0); + static ExecutionEngine *create(Module *M, std::string *ErrorStr = nullptr); /// run - Start execution with the specified function and arguments. /// @@ -118,7 +118,7 @@ public: void *getPointerToNamedFunction(const std::string &Name, bool AbortOnFailure = true) override { // FIXME: not implemented. - return 0; + return nullptr; } /// recompileAndRelinkFunction - For the interpreter, functions are always diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index d3ad77b..f8b2827 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -79,7 +79,7 @@ ExecutionEngine *JIT::createJIT(Module *M, // Try to register the program as a source of symbols to resolve against. // // FIXME: Don't do this here. - sys::DynamicLibrary::LoadLibraryPermanently(0, NULL); + sys::DynamicLibrary::LoadLibraryPermanently(nullptr, nullptr); // If the target supports JIT code generation, create the JIT. if (TargetJITInfo *TJ = TM->getJITInfo()) { @@ -87,7 +87,7 @@ ExecutionEngine *JIT::createJIT(Module *M, } else { if (ErrorStr) *ErrorStr = "target does not support JIT code generation"; - return 0; + return nullptr; } } @@ -157,7 +157,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, // Turn the machine code intermediate representation into bytes in memory that // may be executed. - if (TM.addPassesToEmitMachineCode(PM, *JCE)) { + if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) { report_fatal_error("Target does not support machine code emission!"); } @@ -190,7 +190,7 @@ void JIT::addModule(Module *M) { // Turn the machine code intermediate representation into bytes in memory // that may be executed. - if (TM.addPassesToEmitMachineCode(PM, *JCE)) { + if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) { report_fatal_error("Target does not support machine code emission!"); } @@ -210,7 +210,7 @@ bool JIT::removeModule(Module *M) { if (jitstate && jitstate->getModule() == M) { delete jitstate; - jitstate = 0; + jitstate = nullptr; } if (!jitstate && !Modules.empty()) { @@ -222,7 +222,7 @@ bool JIT::removeModule(Module *M) { // Turn the machine code intermediate representation into bytes in memory // that may be executed. - if (TM.addPassesToEmitMachineCode(PM, *JCE)) { + if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) { report_fatal_error("Target does not support machine code emission!"); } @@ -353,7 +353,7 @@ GenericValue JIT::runFunction(Function *F, // currently don't support varargs. SmallVector Args; for (unsigned i = 0, e = ArgValues.size(); i != e; ++i) { - Constant *C = 0; + Constant *C = nullptr; Type *ArgTy = FTy->getParamType(i); const GenericValue &AV = ArgValues[i]; switch (ArgTy->getTypeID()) { @@ -406,13 +406,13 @@ GenericValue JIT::runFunction(Function *F, } void JIT::RegisterJITEventListener(JITEventListener *L) { - if (L == NULL) + if (!L) return; MutexGuard locked(lock); EventListeners.push_back(L); } void JIT::UnregisterJITEventListener(JITEventListener *L) { - if (L == NULL) + if (!L) return; MutexGuard locked(lock); std::vector::reverse_iterator I= @@ -584,7 +584,7 @@ void *JIT::getPointerToNamedFunction(const std::string &Name, report_fatal_error("Program used external function '"+Name+ "' which could not be resolved!"); } - return 0; + return nullptr; } @@ -604,7 +604,7 @@ void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) { return (void*)&__dso_handle; #endif Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName()); - if (Ptr == 0) { + if (!Ptr) { report_fatal_error("Could not resolve external global address: " +GV->getName()); } @@ -629,10 +629,10 @@ void *JIT::recompileAndRelinkFunction(Function *F) { void *OldAddr = getPointerToGlobalIfAvailable(F); // If it's not already compiled there is no reason to patch it up. - if (OldAddr == 0) { return getPointerToFunction(F); } + if (!OldAddr) return getPointerToFunction(F); // Delete the old function mapping. - addGlobalMapping(F, 0); + addGlobalMapping(F, nullptr); // Recodegen the function runJITOnFunction(F); diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h index b1b0768..d2bd508 100644 --- a/lib/ExecutionEngine/JIT/JIT.h +++ b/lib/ExecutionEngine/JIT/JIT.h @@ -189,7 +189,7 @@ public: TargetMachine *TM); // Run the JIT on F and return information about the generated code - void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0) override; + void runJITOnFunction(Function *F, MachineCodeInfo *MCI = nullptr) override; void RegisterJITEventListener(JITEventListener *L) override; void UnregisterJITEventListener(JITEventListener *L) override; diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 9d215ec..cd7a500 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jit" #include "JIT.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -52,6 +51,8 @@ #endif using namespace llvm; +#define DEBUG_TYPE "jit" + STATISTIC(NumBytes, "Number of bytes of machine code compiled"); STATISTIC(NumRelos, "Number of relocations applied"); STATISTIC(NumRetries, "Number of retries with more memory"); @@ -343,7 +344,8 @@ namespace { void *FunctionBody; // Beginning of the function's allocation. void *Code; // The address the function's code actually starts at. void *ExceptionTable; - EmittedCode() : FunctionBody(0), Code(0), ExceptionTable(0) {} + EmittedCode() : FunctionBody(nullptr), Code(nullptr), + ExceptionTable(nullptr) {} }; struct EmittedFunctionConfig : public ValueMapConfig { typedef JITEmitter *ExtraData; @@ -360,7 +362,7 @@ namespace { public: JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM) - : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0), + : SizeEstimate(0), Resolver(jit, *this), MMI(nullptr), CurFn(nullptr), EmittedFunctions(this), TheJIT(&jit) { MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager(); if (jit.getJITInfo().needsGOT()) { @@ -516,7 +518,7 @@ void *JITResolver::getLazyFunctionStub(Function *F) { // Call the lazy resolver function if we are JIT'ing lazily. Otherwise we // must resolve the symbol now. void *Actual = TheJIT->isCompilingLazily() - ? (void *)(intptr_t)LazyResolverFn : (void *)0; + ? (void *)(intptr_t)LazyResolverFn : (void *)nullptr; // If this is an external declaration, attempt to resolve the address now // to place in the stub. @@ -525,7 +527,7 @@ void *JITResolver::getLazyFunctionStub(Function *F) { // If we resolved the symbol to a null address (eg. a weak external) // don't emit a stub. Return a null pointer to the application. - if (!Actual) return 0; + if (!Actual) return nullptr; } TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout(); @@ -592,8 +594,8 @@ void *JITResolver::getExternalFunctionStub(void *FnAddr) { if (Stub) return Stub; TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout(); - JE.startGVStub(0, SL.Size, SL.Alignment); - Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, JE); + JE.startGVStub(nullptr, SL.Size, SL.Alignment); + Stub = TheJIT->getJITInfo().emitFunctionStub(nullptr, FnAddr, JE); JE.finishGVStub(); DEBUG(dbgs() << "JIT: Stub emitted at [" << Stub @@ -619,8 +621,8 @@ void *JITResolver::JITCompilerFn(void *Stub) { JITResolver *JR = StubToResolverMap->getResolverFromStub(Stub); assert(JR && "Unable to find the corresponding JITResolver to the call site"); - Function* F = 0; - void* ActualPtr = 0; + Function* F = nullptr; + void* ActualPtr = nullptr; { // Only lock for getting the Function. The call getPointerToFunction made @@ -688,7 +690,7 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference, return TheJIT->getOrEmitGlobalVariable(GV); if (GlobalAlias *GA = dyn_cast(V)) - return TheJIT->getPointerToGlobal(GA->getAliasedGlobal()); + return TheJIT->getPointerToGlobal(GA->getAliasee()); // If we have already compiled the function, return a pointer to its body. Function *F = cast(V); @@ -735,7 +737,7 @@ void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) { const LLVMContext &Context = EmissionDetails.MF->getFunction()->getContext(); - if (DL.getScope(Context) != 0 && PrevDL != DL) { + if (DL.getScope(Context) != nullptr && PrevDL != DL) { JITEvent_EmittedFunctionDetails::LineStart NextLine; NextLine.Address = getCurrentPCValue(); NextLine.Loc = DL; @@ -824,7 +826,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { // Resolve the relocations to concrete pointers. for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { MachineRelocation &MR = Relocations[i]; - void *ResultPtr = 0; + void *ResultPtr = nullptr; if (!MR.letTargetResolve()) { if (MR.isExternalSymbol()) { ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(), @@ -870,7 +872,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { } } - CurFn = 0; + CurFn = nullptr; TheJIT->getJITInfo().relocate(BufferBegin, &Relocations[0], Relocations.size(), MemMgr->getGOTBase()); } @@ -899,7 +901,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { SizeEstimate = 0; } - BufferBegin = CurBufferPtr = 0; + BufferBegin = CurBufferPtr = nullptr; NumBytes += FnEnd-FnStart; // Invalidate the icache if necessary. @@ -1017,7 +1019,7 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) { ConstantPoolBase = allocateSpace(Size, Align); ConstantPool = MCP; - if (ConstantPoolBase == 0) return; // Buffer overflow. + if (!ConstantPoolBase) return; // Buffer overflow. DEBUG(dbgs() << "JIT: Emitted constant pool at [" << ConstantPoolBase << "] (size: " << Size << ", alignment: " << Align << ")\n"); @@ -1073,7 +1075,7 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { return; const std::vector &JT = MJTI->getJumpTables(); - if (JT.empty() || JumpTableBase == 0) return; + if (JT.empty() || !JumpTableBase) return; switch (MJTI->getEntryKind()) { @@ -1243,7 +1245,7 @@ void JIT::updateFunctionStub(Function *F) { void JIT::freeMachineCodeForFunction(Function *F) { // Delete translation for this from the ExecutionEngine, so it will get // retranslated next time it is used. - updateGlobalMapping(F, 0); + updateGlobalMapping(F, nullptr); // Free the actual memory for the function body and related stuff. static_cast(JCE)->deallocateMemForFunction(F); diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp index 0d1ea02..584b93f 100644 --- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp +++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jit" #include "llvm/ExecutionEngine/JITMemoryManager.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -40,6 +39,8 @@ using namespace llvm; +#define DEBUG_TYPE "jit" + STATISTIC(NumSlabs, "Number of slabs of memory allocated by the JIT"); JITMemoryManager::~JITMemoryManager() {} @@ -80,7 +81,7 @@ namespace { /// getFreeBlockBefore - If the block before this one is free, return it, /// otherwise return null. FreeRangeHeader *getFreeBlockBefore() const { - if (PrevAllocated) return 0; + if (PrevAllocated) return nullptr; intptr_t PrevSize = reinterpret_cast( const_cast(this))[-1]; return reinterpret_cast( @@ -174,7 +175,7 @@ FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) { // coalesce with it, update our notion of what the free list is. if (&FollowingFreeBlock == FreeList) { FreeList = FollowingFreeBlock.Next; - FreeListToReturn = 0; + FreeListToReturn = nullptr; assert(&FollowingFreeBlock != FreeList && "No tombstone block?"); } FollowingFreeBlock.RemoveFromFreeList(); @@ -269,13 +270,12 @@ namespace { class DefaultJITMemoryManager; - class JITSlabAllocator : public SlabAllocator { + class JITAllocator { DefaultJITMemoryManager &JMM; public: - JITSlabAllocator(DefaultJITMemoryManager &jmm) : JMM(jmm) { } - virtual ~JITSlabAllocator() { } - MemSlab *Allocate(size_t Size) override; - void Deallocate(MemSlab *Slab) override; + JITAllocator(DefaultJITMemoryManager &jmm) : JMM(jmm) { } + void *Allocate(size_t Size, size_t /*Alignment*/); + void Deallocate(void *Slab, size_t Size); }; /// DefaultJITMemoryManager - Manage memory for the JIT code generation. @@ -313,9 +313,10 @@ namespace { // Memory slabs allocated by the JIT. We refer to them as slabs so we don't // confuse them with the blocks of memory described above. std::vector CodeSlabs; - JITSlabAllocator BumpSlabAllocator; - BumpPtrAllocatorImpl StubAllocator; - BumpPtrAllocatorImpl DataAllocator; + BumpPtrAllocatorImpl StubAllocator; + BumpPtrAllocatorImpl DataAllocator; // Circular list of free blocks. FreeRangeHeader *FreeMemoryList; @@ -568,30 +569,24 @@ namespace { }; } -MemSlab *JITSlabAllocator::Allocate(size_t Size) { +void *JITAllocator::Allocate(size_t Size, size_t /*Alignment*/) { sys::MemoryBlock B = JMM.allocateNewSlab(Size); - MemSlab *Slab = (MemSlab*)B.base(); - Slab->Size = B.size(); - Slab->NextPtr = 0; - return Slab; + return B.base(); } -void JITSlabAllocator::Deallocate(MemSlab *Slab) { - sys::MemoryBlock B(Slab, Slab->Size); +void JITAllocator::Deallocate(void *Slab, size_t Size) { + sys::MemoryBlock B(Slab, Size); sys::Memory::ReleaseRWX(B); } DefaultJITMemoryManager::DefaultJITMemoryManager() - : + : #ifdef NDEBUG - PoisonMemory(false), + PoisonMemory(false), #else - PoisonMemory(true), + PoisonMemory(true), #endif - LastSlab(0, 0), - BumpSlabAllocator(*this), - StubAllocator(BumpSlabAllocator), - DataAllocator(BumpSlabAllocator) { + LastSlab(nullptr, 0), StubAllocator(*this), DataAllocator(*this) { // Allocate space for code. sys::MemoryBlock MemBlock = allocateNewSlab(DefaultCodeSlabSize); @@ -644,11 +639,11 @@ DefaultJITMemoryManager::DefaultJITMemoryManager() // Start out with the freelist pointing to Mem0. FreeMemoryList = Mem0; - GOTBase = NULL; + GOTBase = nullptr; } void DefaultJITMemoryManager::AllocateGOT() { - assert(GOTBase == 0 && "Cannot allocate the got multiple times"); + assert(!GOTBase && "Cannot allocate the got multiple times"); GOTBase = new uint8_t[sizeof(void*) * 8192]; HasGOT = true; } @@ -663,9 +658,9 @@ DefaultJITMemoryManager::~DefaultJITMemoryManager() { sys::MemoryBlock DefaultJITMemoryManager::allocateNewSlab(size_t size) { // Allocate a new block close to the last one. std::string ErrMsg; - sys::MemoryBlock *LastSlabPtr = LastSlab.base() ? &LastSlab : 0; + sys::MemoryBlock *LastSlabPtr = LastSlab.base() ? &LastSlab : nullptr; sys::MemoryBlock B = sys::Memory::AllocateRWX(size, LastSlabPtr, &ErrMsg); - if (B.base() == 0) { + if (!B.base()) { report_fatal_error("Allocation failed when allocating new memory in the" " JIT\n" + Twine(ErrMsg)); } @@ -726,7 +721,7 @@ bool DefaultJITMemoryManager::CheckInvariants(std::string &ErrorStr) { char *End = Start + I->size(); // Check each memory range. - for (MemoryRangeHeader *Hdr = (MemoryRangeHeader*)Start, *LastHdr = NULL; + for (MemoryRangeHeader *Hdr = (MemoryRangeHeader*)Start, *LastHdr = nullptr; Start <= (char*)Hdr && (char*)Hdr < End; Hdr = &Hdr->getBlockAfter()) { if (Hdr->ThisAllocated == 0) { @@ -895,7 +890,7 @@ void *DefaultJITMemoryManager::getPointerToNamedFunction(const std::string &Name report_fatal_error("Program used external function '"+Name+ "' which could not be resolved!"); } - return 0; + return nullptr; } diff --git a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt index 90f4d2f..922cd0d 100644 --- a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt +++ b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = MCJIT parent = ExecutionEngine -required_libraries = Core ExecutionEngine RuntimeDyld Support Target +required_libraries = Core ExecutionEngine Object RuntimeDyld Support Target diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 49b6727..42cb4ea 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -50,7 +50,7 @@ ExecutionEngine *MCJIT::createJIT(Module *M, // Try to register the program as a source of symbols to resolve against. // // FIXME: Don't do this here. - sys::DynamicLibrary::LoadLibraryPermanently(0, NULL); + sys::DynamicLibrary::LoadLibraryPermanently(nullptr, nullptr); return new MCJIT(M, TM, MemMgr ? MemMgr : new SectionMemoryManager(), GVsWithCode); @@ -58,8 +58,8 @@ ExecutionEngine *MCJIT::createJIT(Module *M, MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM, bool AllocateGVsWithCode) - : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(this, MM), Dyld(&MemMgr), - ObjCache(0) { + : ExecutionEngine(m), TM(tm), Ctx(nullptr), MemMgr(this, MM), Dyld(&MemMgr), + ObjCache(nullptr) { OwnedModules.addModule(m); setDataLayout(TM->getDataLayout()); @@ -113,8 +113,8 @@ bool MCJIT::removeModule(Module *M) { -void MCJIT::addObjectFile(object::ObjectFile *Obj) { - ObjectImage *LoadedObject = Dyld.loadObject(Obj); +void MCJIT::addObjectFile(std::unique_ptr Obj) { + ObjectImage *LoadedObject = Dyld.loadObject(std::move(Obj)); if (!LoadedObject || Dyld.hasError()) report_fatal_error(Dyld.getErrorString()); @@ -150,7 +150,8 @@ ObjectBufferStream* MCJIT::emitObject(Module *M) { // Turn the machine code intermediate representation into bytes in memory // that may be executed. - if (TM->addPassesToEmitMC(PM, Ctx, CompiledObject->getOStream(), false)) { + if (TM->addPassesToEmitMC(PM, Ctx, CompiledObject->getOStream(), + !getVerifyModules())) { report_fatal_error("Target does not support MC emission!"); } @@ -185,9 +186,9 @@ void MCJIT::generateCodeForModule(Module *M) { std::unique_ptr ObjectToLoad; // Try to load the pre-compiled object from cache if possible - if (0 != ObjCache) { + if (ObjCache) { std::unique_ptr PreCompiledObject(ObjCache->getObject(M)); - if (0 != PreCompiledObject.get()) + if (PreCompiledObject.get()) ObjectToLoad.reset(new ObjectBuffer(PreCompiledObject.release())); } @@ -285,7 +286,7 @@ Module *MCJIT::findModuleForSymbol(const std::string &Name, } } // We didn't find the symbol in any of our modules. - return NULL; + return nullptr; } uint64_t MCJIT::getSymbolAddress(const std::string &Name, @@ -307,10 +308,10 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name, std::unique_ptr ChildBin; // FIXME: Support nested archives? if (!ChildIt->getAsBinary(ChildBin) && ChildBin->isObject()) { - object::ObjectFile *OF = reinterpret_cast( - ChildBin.release()); + std::unique_ptr OF( + static_cast(ChildBin.release())); // This causes the object file to be loaded. - addObjectFile(OF); + addObjectFile(std::move(OF)); // The address should be here now. Addr = getExistingSymbolAddress(Name); if (Addr) @@ -365,7 +366,7 @@ void *MCJIT::getPointerToFunction(Function *F) { generateCodeForModule(M); else if (!OwnedModules.hasModuleBeenLoaded(M)) // If this function doesn't belong to one of our modules, we're done. - return NULL; + return nullptr; // FIXME: Should the Dyld be retaining module information? Probably not. // @@ -409,7 +410,7 @@ Function *MCJIT::FindFunctionNamedInModulePtrSet(const char *FnName, if (Function *F = (*I)->getFunction(FnName)) return F; } - return 0; + return nullptr; } Function *MCJIT::FindFunctionNamed(const char *FnName) { @@ -541,17 +542,17 @@ void *MCJIT::getPointerToNamedFunction(const std::string &Name, report_fatal_error("Program used external function '"+Name+ "' which could not be resolved!"); } - return 0; + return nullptr; } void MCJIT::RegisterJITEventListener(JITEventListener *L) { - if (L == NULL) + if (!L) return; MutexGuard locked(lock); EventListeners.push_back(L); } void MCJIT::UnregisterJITEventListener(JITEventListener *L) { - if (L == NULL) + if (!L) return; MutexGuard locked(lock); SmallVector::reverse_iterator I= diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index 066eceb..100e9a2 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -71,7 +71,7 @@ public: ClientMM->deregisterEHFrames(Addr, LoadAddr, Size); } - bool finalizeMemory(std::string *ErrMsg = 0) override { + bool finalizeMemory(std::string *ErrMsg = nullptr) override { return ClientMM->finalizeMemory(ErrMsg); } @@ -239,7 +239,7 @@ public: /// @name ExecutionEngine interface implementation /// @{ void addModule(Module *M) override; - void addObjectFile(object::ObjectFile *O) override; + void addObjectFile(std::unique_ptr O) override; void addArchive(object::Archive *O) override; bool removeModule(Module *M) override; diff --git a/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp index f1dd5a6..9ceaa90 100644 --- a/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp +++ b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp @@ -79,7 +79,7 @@ uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup, ec); if (ec) { // FIXME: Add error propagation to the interface. - return NULL; + return nullptr; } // Save this address as the basis for our next request diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp index 87cef2e..fd37a13 100644 --- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp @@ -15,7 +15,6 @@ #include "llvm/Config/config.h" #include "llvm/ExecutionEngine/JITEventListener.h" -#define DEBUG_TYPE "oprofile-jit-event-listener" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/CodeGen/MachineFunction.h" @@ -33,6 +32,8 @@ using namespace llvm; using namespace llvm::jitprofiling; +#define DEBUG_TYPE "oprofile-jit-event-listener" + namespace { class OProfileJITEventListener : public JITEventListener { @@ -170,11 +171,8 @@ void OProfileJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) { } // Use symbol info to iterate functions in the object. - error_code ec; - for (object::symbol_iterator I = Obj.begin_symbols(), - E = Obj.end_symbols(); - I != E && !ec; - I.increment(ec)) { + for (object::symbol_iterator I = Obj.begin_symbols(), E = Obj.end_symbols(); + I != E; ++I) { object::SymbolRef::Type SymType; if (I->getType(SymType)) continue; if (SymType == object::SymbolRef::ST_Function) { @@ -203,11 +201,8 @@ void OProfileJITEventListener::NotifyFreeingObject(const ObjectImage &Obj) { } // Use symbol info to iterate functions in the object. - error_code ec; - for (object::symbol_iterator I = Obj.begin_symbols(), - E = Obj.end_symbols(); - I != E && !ec; - I.increment(ec)) { + for (object::symbol_iterator I = Obj.begin_symbols(), E = Obj.end_symbols(); + I != E; ++I) { object::SymbolRef::Type SymType; if (I->getType(SymType)) continue; if (SymType == object::SymbolRef::ST_Function) { diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp index 6702e20..04edbd2 100644 --- a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "oprofile-wrapper" #include "llvm/ExecutionEngine/OProfileWrapper.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/Debug.h" @@ -29,6 +28,8 @@ #include #include +#define DEBUG_TYPE "oprofile-wrapper" + namespace { // Global mutex to ensure a single thread initializes oprofile agent. diff --git a/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp index 1d0e9b3..8546571 100644 --- a/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp @@ -45,7 +45,7 @@ extern "C" { // We put information about the JITed function in this global, which the // debugger reads. Make sure to specify the version statically, because the // debugger checks the version before we can set it during runtime. - struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; + struct jit_descriptor __jit_debug_descriptor = { 1, 0, nullptr, nullptr }; // Debuggers puts a breakpoint in this function. LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() { @@ -108,10 +108,10 @@ void NotifyDebugger(jit_code_entry* JITCodeEntry) { __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; // Insert this entry at the head of the list. - JITCodeEntry->prev_entry = NULL; + JITCodeEntry->prev_entry = nullptr; jit_code_entry* NextEntry = __jit_debug_descriptor.first_entry; JITCodeEntry->next_entry = NextEntry; - if (NextEntry != NULL) { + if (NextEntry) { NextEntry->prev_entry = JITCodeEntry; } __jit_debug_descriptor.first_entry = JITCodeEntry; @@ -142,11 +142,10 @@ void GDBJITRegistrar::registerObject(const ObjectBuffer &Object) { "Second attempt to perform debug registration."); jit_code_entry* JITCodeEntry = new jit_code_entry(); - if (JITCodeEntry == 0) { + if (!JITCodeEntry) { llvm::report_fatal_error( "Allocation failed when registering a JIT entry!\n"); - } - else { + } else { JITCodeEntry->symfile_addr = Buffer; JITCodeEntry->symfile_size = Size; @@ -198,7 +197,7 @@ void GDBJITRegistrar::deregisterObjectInternal( } delete JITCodeEntry; - JITCodeEntry = NULL; + JITCodeEntry = nullptr; } llvm::ManagedStatic TheRegistrar; diff --git a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h index 3693c69..4917b93 100644 --- a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h +++ b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h @@ -18,6 +18,8 @@ #include "llvm/ExecutionEngine/ObjectImage.h" #include "llvm/Object/ObjectFile.h" +#include + namespace llvm { namespace object { @@ -30,13 +32,13 @@ class ObjectImageCommon : public ObjectImage { void anchor() override; protected: - object::ObjectFile *ObjFile; + std::unique_ptr ObjFile; // This form of the constructor allows subclasses to use // format-specific subclasses of ObjectFile directly - ObjectImageCommon(ObjectBuffer *Input, object::ObjectFile *Obj) + ObjectImageCommon(ObjectBuffer *Input, std::unique_ptr Obj) : ObjectImage(Input), // saves Input as Buffer and takes ownership - ObjFile(Obj) + ObjFile(std::move(Obj)) { } @@ -44,12 +46,13 @@ public: ObjectImageCommon(ObjectBuffer* Input) : ObjectImage(Input) // saves Input as Buffer and takes ownership { - ObjFile = - object::ObjectFile::createObjectFile(Buffer->getMemBuffer()).get(); + // FIXME: error checking? createObjectFile returns an ErrorOr + // and should probably be checked for failure. + ObjFile.reset(object::ObjectFile::createObjectFile(Buffer->getMemBuffer()).get()); } - ObjectImageCommon(object::ObjectFile* Input) - : ObjectImage(NULL), ObjFile(Input) {} - virtual ~ObjectImageCommon() { delete ObjFile; } + ObjectImageCommon(std::unique_ptr Input) + : ObjectImage(nullptr), ObjFile(std::move(Input)) {} + virtual ~ObjectImageCommon() { } object::symbol_iterator begin_symbols() const override { return ObjFile->symbol_begin(); } @@ -66,7 +69,7 @@ public: StringRef getData() const override { return ObjFile->getData(); } - object::ObjectFile* getObjectFile() const override { return ObjFile; } + object::ObjectFile* getObjectFile() const override { return ObjFile.get(); } // Subclasses can override these methods to update the image with loaded // addresses for sections and common symbols diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 986d3a0..c1eb0fd 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dyld" #include "llvm/ExecutionEngine/RuntimeDyld.h" #include "JITRegistrar.h" #include "ObjectImageCommon.h" @@ -25,6 +24,8 @@ using namespace llvm; using namespace llvm::object; +#define DEBUG_TYPE "dyld" + // Empty out-of-line virtual destructor as the key function. RuntimeDyldImpl::~RuntimeDyldImpl() {} @@ -72,12 +73,40 @@ void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress, llvm_unreachable("Attempting to remap address of unknown section!"); } +static error_code getOffset(const SymbolRef &Sym, uint64_t &Result) { + uint64_t Address; + if (error_code EC = Sym.getAddress(Address)) + return EC; + + if (Address == UnknownAddressOrSize) { + Result = UnknownAddressOrSize; + return object_error::success; + } + + const ObjectFile *Obj = Sym.getObject(); + section_iterator SecI(Obj->section_begin()); + if (error_code EC = Sym.getSection(SecI)) + return EC; + + if (SecI == Obj->section_end()) { + Result = UnknownAddressOrSize; + return object_error::success; + } + + uint64_t SectionAddress; + if (error_code EC = SecI->getAddress(SectionAddress)) + return EC; + + Result = Address - SectionAddress; + return object_error::success; +} + ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) { MutexGuard locked(lock); std::unique_ptr Obj(InputObject); if (!Obj) - return NULL; + return nullptr; // Save information about our target Arch = (Triple::ArchType)Obj->getArch(); @@ -115,36 +144,33 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) { bool IsCommon = Flags & SymbolRef::SF_Common; if (IsCommon) { // Add the common symbols to a list. We'll allocate them all below. - uint32_t Align; - Check(I->getAlignment(Align)); - uint64_t Size = 0; - Check(I->getSize(Size)); - CommonSize += Size + Align; - CommonSymbols[*I] = CommonSymbolInfo(Size, Align); + if (!GlobalSymbolTable.count(Name)) { + uint32_t Align; + Check(I->getAlignment(Align)); + uint64_t Size = 0; + Check(I->getSize(Size)); + CommonSize += Size + Align; + CommonSymbols[*I] = CommonSymbolInfo(Size, Align); + } } else { if (SymType == object::SymbolRef::ST_Function || SymType == object::SymbolRef::ST_Data || SymType == object::SymbolRef::ST_Unknown) { - uint64_t FileOffset; + uint64_t SectOffset; StringRef SectionData; bool IsCode; section_iterator SI = Obj->end_sections(); - Check(I->getFileOffset(FileOffset)); + Check(getOffset(*I, SectOffset)); Check(I->getSection(SI)); if (SI == Obj->end_sections()) continue; Check(SI->getContents(SectionData)); Check(SI->isText(IsCode)); - const uint8_t *SymPtr = - (const uint8_t *)Obj->getData().data() + (uintptr_t)FileOffset; - uintptr_t SectOffset = - (uintptr_t)(SymPtr - (const uint8_t *)SectionData.begin()); unsigned SectionID = findOrEmitSection(*Obj, *SI, IsCode, LocalSections); LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset); - DEBUG(dbgs() << "\tFileOffset: " << format("%p", (uintptr_t)FileOffset) - << " flags: " << Flags << " SID: " << SectionID - << " Offset: " << format("%p", SectOffset)); + DEBUG(dbgs() << "\tOffset: " << format("%p", (uintptr_t)SectOffset) + << " flags: " << Flags << " SID: " << SectionID); GlobalSymbolTable[Name] = SymbolLoc(SectionID, SectOffset); } } @@ -153,7 +179,7 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) { // Allocate common symbols if (CommonSize != 0) - emitCommonSymbols(*Obj, CommonSymbols, CommonSize, LocalSymbols); + emitCommonSymbols(*Obj, CommonSymbols, CommonSize, GlobalSymbolTable); // Parse and process relocations DEBUG(dbgs() << "Parse relocations:\n"); @@ -163,7 +189,10 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) { StubMap Stubs; section_iterator RelocatedSection = SI->getRelocatedSection(); - if (SI->relocation_empty() && !ProcessAllSections) + relocation_iterator I = SI->relocation_begin(); + relocation_iterator E = SI->relocation_end(); + + if (I == E && !ProcessAllSections) continue; bool IsCode = false; @@ -172,14 +201,13 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) { findOrEmitSection(*Obj, *RelocatedSection, IsCode, LocalSections); DEBUG(dbgs() << "\tSectionID: " << SectionID << "\n"); - for (relocation_iterator I = SI->relocation_begin(), - E = SI->relocation_end(); I != E;) + for (; I != E;) I = processRelocationRef(SectionID, I, *Obj, LocalSections, LocalSymbols, Stubs); } // Give the subclasses a chance to tie-up any loose ends. - finalizeLoad(LocalSections); + finalizeLoad(*Obj, LocalSections); return Obj.release(); } @@ -400,7 +428,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, uintptr_t Allocate; unsigned SectionID = Sections.size(); uint8_t *Addr; - const char *pData = 0; + const char *pData = nullptr; // Some sections, such as debug info, don't need to be loaded for execution. // Leave those where they are. @@ -441,7 +469,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, // to handle later processing (and by 'handle' I mean don't do anything // with these sections). Allocate = 0; - Addr = 0; + Addr = nullptr; DEBUG(dbgs() << "emitSection SectionID: " << SectionID << " Name: " << Name << " obj addr: " << format("%p", data.data()) << " new addr: 0" << " DataSize: " << DataSize << " StubBufSize: " << StubBufSize @@ -490,7 +518,8 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE, } uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) { - if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) { + if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be || + Arch == Triple::arm64 || Arch == Triple::arm64_be) { // This stub has to be able to access the full address space, // since symbol lookup won't necessarily find a handy, in-range, // PLT stub for functions which could be anywhere. @@ -560,6 +589,8 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) { *Addr = 0xFF; // jmp *(Addr+1) = 0x25; // rip // 32-bit PC-relative address of the GOT entry will be stored at Addr+2 + } else if (Arch == Triple::x86) { + *Addr = 0xE9; // 32-bit pc-relative jump. } return Addr; } @@ -586,7 +617,7 @@ void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs, for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { const RelocationEntry &RE = Relocs[i]; // Ignore relocations for sections that were not loaded - if (Sections[RE.SectionID].Address == 0) + if (Sections[RE.SectionID].Address == nullptr) continue; resolveRelocation(RE, Value); } @@ -651,7 +682,7 @@ RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) { // though the public class spawns a new 'impl' instance for each load, // they share a single memory manager. This can become a problem when page // permissions are applied. - Dyld = 0; + Dyld = nullptr; MM = mm; ProcessAllSections = false; } @@ -672,21 +703,23 @@ createRuntimeDyldMachO(RTDyldMemoryManager *MM, bool ProcessAllSections) { return Dyld; } -ObjectImage *RuntimeDyld::loadObject(ObjectFile *InputObject) { +ObjectImage *RuntimeDyld::loadObject(std::unique_ptr InputObject) { std::unique_ptr InputImage; + ObjectFile &Obj = *InputObject; + if (InputObject->isELF()) { - InputImage.reset(RuntimeDyldELF::createObjectImageFromFile(InputObject)); + InputImage.reset(RuntimeDyldELF::createObjectImageFromFile(std::move(InputObject))); if (!Dyld) Dyld = createRuntimeDyldELF(MM, ProcessAllSections).release(); } else if (InputObject->isMachO()) { - InputImage.reset(RuntimeDyldMachO::createObjectImageFromFile(InputObject)); + InputImage.reset(RuntimeDyldMachO::createObjectImageFromFile(std::move(InputObject))); if (!Dyld) Dyld = createRuntimeDyldMachO(MM, ProcessAllSections).release(); } else report_fatal_error("Incompatible object format!"); - if (!Dyld->isCompatibleFile(InputObject)) + if (!Dyld->isCompatibleFile(&Obj)) report_fatal_error("Incompatible object format!"); Dyld->loadObject(InputImage.get()); @@ -740,7 +773,7 @@ ObjectImage *RuntimeDyld::loadObject(ObjectBuffer *InputBuffer) { void *RuntimeDyld::getSymbolAddress(StringRef Name) { if (!Dyld) - return NULL; + return nullptr; return Dyld->getSymbolAddress(Name); } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 3204b81..6ba24b9 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dyld" #include "RuntimeDyldELF.h" #include "JITRegistrar.h" #include "ObjectImageCommon.h" @@ -29,6 +28,8 @@ using namespace llvm; using namespace llvm::object; +#define DEBUG_TYPE "dyld" + namespace { static inline error_code check(error_code Err) { @@ -50,7 +51,12 @@ template class DyldELFObject : public ELFObjectFile { typedef typename ELFDataTypeTypedefHelper::value_type addr_type; + std::unique_ptr UnderlyingFile; + public: + DyldELFObject(std::unique_ptr UnderlyingFile, + MemoryBuffer *Wrapper, error_code &ec); + DyldELFObject(MemoryBuffer *Wrapper, error_code &ec); void updateSectionAddress(const SectionRef &Sec, uint64_t Addr); @@ -67,13 +73,11 @@ public: }; template class ELFObjectImage : public ObjectImageCommon { -protected: - DyldELFObject *DyldObj; bool Registered; public: - ELFObjectImage(ObjectBuffer *Input, DyldELFObject *Obj) - : ObjectImageCommon(Input, Obj), DyldObj(Obj), Registered(false) {} + ELFObjectImage(ObjectBuffer *Input, std::unique_ptr> Obj) + : ObjectImageCommon(Input, std::move(Obj)), Registered(false) {} virtual ~ELFObjectImage() { if (Registered) @@ -83,11 +87,13 @@ public: // Subclasses can override these methods to update the image with loaded // addresses for sections and common symbols void updateSectionAddress(const SectionRef &Sec, uint64_t Addr) override { - DyldObj->updateSectionAddress(Sec, Addr); + static_cast*>(getObjectFile()) + ->updateSectionAddress(Sec, Addr); } void updateSymbolAddress(const SymbolRef &Sym, uint64_t Addr) override { - DyldObj->updateSymbolAddress(Sym, Addr); + static_cast*>(getObjectFile()) + ->updateSymbolAddress(Sym, Addr); } void registerWithDebugger() override { @@ -109,6 +115,14 @@ DyldELFObject::DyldELFObject(MemoryBuffer *Wrapper, error_code &ec) } template +DyldELFObject::DyldELFObject(std::unique_ptr UnderlyingFile, + MemoryBuffer *Wrapper, error_code &ec) + : ELFObjectFile(Wrapper, ec), + UnderlyingFile(std::move(UnderlyingFile)) { + this->isDyldELFObject = true; +} + +template void DyldELFObject::updateSectionAddress(const SectionRef &Sec, uint64_t Addr) { DataRefImpl ShdrRef = Sec.getRawDataRefImpl(); @@ -164,30 +178,36 @@ void RuntimeDyldELF::deregisterEHFrames() { } ObjectImage * -RuntimeDyldELF::createObjectImageFromFile(object::ObjectFile *ObjFile) { +RuntimeDyldELF::createObjectImageFromFile(std::unique_ptr ObjFile) { if (!ObjFile) - return NULL; + return nullptr; error_code ec; MemoryBuffer *Buffer = MemoryBuffer::getMemBuffer(ObjFile->getData(), "", false); if (ObjFile->getBytesInAddress() == 4 && ObjFile->isLittleEndian()) { - DyldELFObject> *Obj = - new DyldELFObject>(Buffer, ec); - return new ELFObjectImage>(NULL, Obj); + auto Obj = + llvm::make_unique>>( + std::move(ObjFile), Buffer, ec); + return new ELFObjectImage>( + nullptr, std::move(Obj)); } else if (ObjFile->getBytesInAddress() == 4 && !ObjFile->isLittleEndian()) { - DyldELFObject> *Obj = - new DyldELFObject>(Buffer, ec); - return new ELFObjectImage>(NULL, Obj); + auto Obj = + llvm::make_unique>>( + std::move(ObjFile), Buffer, ec); + return new ELFObjectImage>(nullptr, std::move(Obj)); } else if (ObjFile->getBytesInAddress() == 8 && !ObjFile->isLittleEndian()) { - DyldELFObject> *Obj = - new DyldELFObject>(Buffer, ec); - return new ELFObjectImage>(NULL, Obj); + auto Obj = llvm::make_unique>>( + std::move(ObjFile), Buffer, ec); + return new ELFObjectImage>(nullptr, + std::move(Obj)); } else if (ObjFile->getBytesInAddress() == 8 && ObjFile->isLittleEndian()) { - DyldELFObject> *Obj = - new DyldELFObject>(Buffer, ec); - return new ELFObjectImage>(NULL, Obj); + auto Obj = + llvm::make_unique>>( + std::move(ObjFile), Buffer, ec); + return new ELFObjectImage>( + nullptr, std::move(Obj)); } else llvm_unreachable("Unexpected ELF format"); } @@ -201,28 +221,29 @@ ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) { error_code ec; if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) { - DyldELFObject> *Obj = - new DyldELFObject>( + auto Obj = + llvm::make_unique>>( Buffer->getMemBuffer(), ec); - return new ELFObjectImage>(Buffer, Obj); + return new ELFObjectImage>( + Buffer, std::move(Obj)); } else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB) { - DyldELFObject> *Obj = - new DyldELFObject>( + auto Obj = + llvm::make_unique>>( Buffer->getMemBuffer(), ec); - return new ELFObjectImage>(Buffer, Obj); + return new ELFObjectImage>(Buffer, + std::move(Obj)); } else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB) { - DyldELFObject> *Obj = - new DyldELFObject>( - Buffer->getMemBuffer(), ec); - return new ELFObjectImage>(Buffer, Obj); + auto Obj = llvm::make_unique>>( + Buffer->getMemBuffer(), ec); + return new ELFObjectImage>(Buffer, std::move(Obj)); } else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) { - DyldELFObject> *Obj = - new DyldELFObject>( + auto Obj = + llvm::make_unique>>( Buffer->getMemBuffer(), ec); - return new ELFObjectImage>(Buffer, Obj); + return new ELFObjectImage>(Buffer, std::move(Obj)); } else llvm_unreachable("Unexpected ELF format"); } @@ -845,6 +866,8 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, break; case Triple::aarch64: case Triple::aarch64_be: + case Triple::arm64: + case Triple::arm64_be: resolveAArch64Relocation(Section, Offset, Value, Type, Addend); break; case Triple::arm: // Fall through. @@ -950,7 +973,8 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( DEBUG(dbgs() << "\t\tSectionID: " << SectionID << " Offset: " << Offset << "\n"); - if (Arch == Triple::aarch64 && + if ((Arch == Triple::aarch64 || Arch == Triple::aarch64_be || + Arch == Triple::arm64 || Arch == Triple::arm64_be) && (RelType == ELF::R_AARCH64_CALL26 || RelType == ELF::R_AARCH64_JUMP26)) { // This is an AArch64 branch relocation, need to use a stub function. DEBUG(dbgs() << "\t\tThis is an AArch64 branch relocation."); @@ -1151,7 +1175,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( // Extra check to avoid relocation againt empty symbols (usually // the R_PPC64_TOC). if (SymType != SymbolRef::ST_Unknown && TargetName.empty()) - Value.SymbolName = NULL; + Value.SymbolName = nullptr; if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); @@ -1283,7 +1307,8 @@ void RuntimeDyldELF::updateGOTEntries(StringRef Name, uint64_t Addr) { for (it = GOTs.begin(); it != end; ++it) { GOTRelocations &GOTEntries = it->second; for (int i = 0, e = GOTEntries.size(); i != e; ++i) { - if (GOTEntries[i].SymbolName != 0 && GOTEntries[i].SymbolName == Name) { + if (GOTEntries[i].SymbolName != nullptr && + GOTEntries[i].SymbolName == Name) { GOTEntries[i].Offset = Addr; } } @@ -1297,6 +1322,9 @@ size_t RuntimeDyldELF::getGOTEntrySize() { switch (Arch) { case Triple::x86_64: case Triple::aarch64: + case Triple::aarch64_be: + case Triple::arm64: + case Triple::arm64_be: case Triple::ppc64: case Triple::ppc64le: case Triple::systemz: @@ -1331,7 +1359,7 @@ uint64_t RuntimeDyldELF::findGOTEntry(uint64_t LoadAddress, uint64_t Offset) { // Find the matching entry in our vector. uint64_t SymbolOffset = 0; for (int i = 0, e = GOTEntries.size(); i != e; ++i) { - if (GOTEntries[i].SymbolName == 0) { + if (!GOTEntries[i].SymbolName) { if (getSectionLoadAddress(GOTEntries[i].SectionID) == LoadAddress && GOTEntries[i].Offset == Offset) { GOTIndex = i; @@ -1369,7 +1397,8 @@ uint64_t RuntimeDyldELF::findGOTEntry(uint64_t LoadAddress, uint64_t Offset) { return 0; } -void RuntimeDyldELF::finalizeLoad(ObjSectionToIDMap &SectionMap) { +void RuntimeDyldELF::finalizeLoad(ObjectImage &ObjImg, + ObjSectionToIDMap &SectionMap) { // If necessary, allocate the global offset table if (MemMgr) { // Allocate the GOT if necessary diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index 27db5cd..a526073 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -59,7 +59,8 @@ class RuntimeDyldELF : public RuntimeDyldImpl { uint64_t Value, uint32_t Type, int64_t Addend); unsigned getMaxStubSize() override { - if (Arch == Triple::aarch64) + if (Arch == Triple::aarch64 || Arch == Triple::arm64 || + Arch == Triple::aarch64_be || Arch == Triple::arm64_be) return 20; // movz; movk; movk; movk; br if (Arch == Triple::arm || Arch == Triple::thumb) return 8; // 32-bit instruction and 32-bit address @@ -115,11 +116,12 @@ public: bool isCompatibleFile(const object::ObjectFile *Buffer) const override; void registerEHFrames() override; void deregisterEHFrames() override; - void finalizeLoad(ObjSectionToIDMap &SectionMap) override; + void finalizeLoad(ObjectImage &ObjImg, + ObjSectionToIDMap &SectionMap) override; virtual ~RuntimeDyldELF(); static ObjectImage *createObjectImage(ObjectBuffer *InputBuffer); - static ObjectImage *createObjectImageFromFile(object::ObjectFile *Obj); + static ObjectImage *createObjectImageFromFile(std::unique_ptr Obj); }; } // end namespace llvm diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index c153ee1..412cf20 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -90,9 +90,17 @@ public: /// used to make a relocation section relative instead of symbol relative. int64_t Addend; + struct SectionPair { + uint32_t SectionA; + uint32_t SectionB; + }; + /// SymOffset - Section offset of the relocation entry's symbol (used for GOT /// lookup). - uint64_t SymOffset; + union { + uint64_t SymOffset; + SectionPair Sections; + }; /// True if this is a PCRel relocation (MachO specific). bool IsPCRel; @@ -113,6 +121,16 @@ public: bool IsPCRel, unsigned Size) : SectionID(id), Offset(offset), RelType(type), Addend(addend), SymOffset(0), IsPCRel(IsPCRel), Size(Size) {} + + RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend, + unsigned SectionA, uint64_t SectionAOffset, unsigned SectionB, + uint64_t SectionBOffset, bool IsPCRel, unsigned Size) + : SectionID(id), Offset(offset), RelType(type), + Addend(SectionAOffset - SectionBOffset + addend), IsPCRel(IsPCRel), + Size(Size) { + Sections.SectionA = SectionA; + Sections.SectionB = SectionB; + } }; class RelocationValueRef { @@ -121,7 +139,8 @@ public: uint64_t Offset; int64_t Addend; const char *SymbolName; - RelocationValueRef() : SectionID(0), Offset(0), Addend(0), SymbolName(0) {} + RelocationValueRef() : SectionID(0), Offset(0), Addend(0), + SymbolName(nullptr) {} inline bool operator==(const RelocationValueRef &Other) const { return SectionID == Other.SectionID && Offset == Other.Offset && @@ -335,7 +354,7 @@ public: // Work in progress. SymbolTableMap::const_iterator pos = GlobalSymbolTable.find(Name); if (pos == GlobalSymbolTable.end()) - return 0; + return nullptr; SymbolLoc Loc = pos->second; return getSectionAddress(Loc.first) + Loc.second; } @@ -372,7 +391,7 @@ public: virtual void deregisterEHFrames(); - virtual void finalizeLoad(ObjSectionToIDMap &SectionMap) {} + virtual void finalizeLoad(ObjectImage &ObjImg, ObjSectionToIDMap &SectionMap) {} }; } // end namespace llvm diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 7eae9c2..2b425fb 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -11,17 +11,20 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dyld" #include "RuntimeDyldMachO.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" using namespace llvm; using namespace llvm::object; +#define DEBUG_TYPE "dyld" + namespace llvm { static unsigned char *processFDE(unsigned char *P, intptr_t DeltaForText, intptr_t DeltaForEH) { + DEBUG(dbgs() << "Processing FDE: Delta for text: " << DeltaForText + << ", Delta for EH: " << DeltaForEH << "\n"); uint32_t Length = *((uint32_t *)P); P += 4; unsigned char *Ret = P + Length; @@ -66,7 +69,7 @@ void RuntimeDyldMachO::registerEHFrames() { continue; SectionEntry *Text = &Sections[SectionInfo.TextSID]; SectionEntry *EHFrame = &Sections[SectionInfo.EHFrameSID]; - SectionEntry *ExceptTab = NULL; + SectionEntry *ExceptTab = nullptr; if (SectionInfo.ExceptTabSID != RTDYLD_INVALID_SECTION_ID) ExceptTab = &Sections[SectionInfo.ExceptTabSID]; @@ -87,7 +90,8 @@ void RuntimeDyldMachO::registerEHFrames() { UnregisteredEHFrameSections.clear(); } -void RuntimeDyldMachO::finalizeLoad(ObjSectionToIDMap &SectionMap) { +void RuntimeDyldMachO::finalizeLoad(ObjectImage &ObjImg, + ObjSectionToIDMap &SectionMap) { unsigned EHFrameSID = RTDYLD_INVALID_SECTION_ID; unsigned TextSID = RTDYLD_INVALID_SECTION_ID; unsigned ExceptTabSID = RTDYLD_INVALID_SECTION_ID; @@ -102,6 +106,12 @@ void RuntimeDyldMachO::finalizeLoad(ObjSectionToIDMap &SectionMap) { TextSID = i->second; else if (Name == "__gcc_except_tab") ExceptTabSID = i->second; + else if (Name == "__jump_table") + populateJumpTable(cast(*ObjImg.getObjectFile()), + Section, i->second); + else if (Name == "__pointers") + populatePointersSection(cast(*ObjImg.getObjectFile()), + Section, i->second); } UnregisteredEHFrameSections.push_back( EHFrameRelatedSections(EHFrameSID, TextSID, ExceptTabSID)); @@ -129,91 +139,87 @@ void RuntimeDyldMachO::finalizeLoad(ObjSectionToIDMap &SectionMap) { // symbol in the target address space. void RuntimeDyldMachO::resolveRelocation(const RelocationEntry &RE, uint64_t Value) { - const SectionEntry &Section = Sections[RE.SectionID]; - return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend, - RE.IsPCRel, RE.Size); -} - -void RuntimeDyldMachO::resolveRelocation(const SectionEntry &Section, - uint64_t Offset, uint64_t Value, - uint32_t Type, int64_t Addend, - bool isPCRel, unsigned LogSize) { - uint8_t *LocalAddress = Section.Address + Offset; - uint64_t FinalAddress = Section.LoadAddress + Offset; - unsigned MachoType = Type; - unsigned Size = 1 << LogSize; - - DEBUG(dbgs() << "resolveRelocation LocalAddress: " - << format("%p", LocalAddress) - << " FinalAddress: " << format("%p", FinalAddress) - << " Value: " << format("%p", Value) << " Addend: " << Addend - << " isPCRel: " << isPCRel << " MachoType: " << MachoType - << " Size: " << Size << "\n"); + DEBUG ( + const SectionEntry &Section = Sections[RE.SectionID]; + uint8_t* LocalAddress = Section.Address + RE.Offset; + uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + + dbgs() << "resolveRelocation Section: " << RE.SectionID + << " LocalAddress: " << format("%p", LocalAddress) + << " FinalAddress: " << format("%p", FinalAddress) + << " Value: " << format("%p", Value) + << " Addend: " << RE.Addend + << " isPCRel: " << RE.IsPCRel + << " MachoType: " << RE.RelType + << " Size: " << (1 << RE.Size) << "\n"; + ); // This just dispatches to the proper target specific routine. switch (Arch) { default: llvm_unreachable("Unsupported CPU type!"); case Triple::x86_64: - resolveX86_64Relocation(LocalAddress, FinalAddress, (uintptr_t)Value, - isPCRel, MachoType, Size, Addend); + resolveX86_64Relocation(RE, Value); break; case Triple::x86: - resolveI386Relocation(LocalAddress, FinalAddress, (uintptr_t)Value, isPCRel, - MachoType, Size, Addend); + resolveI386Relocation(RE, Value); break; case Triple::arm: // Fall through. case Triple::thumb: - resolveARMRelocation(LocalAddress, FinalAddress, (uintptr_t)Value, isPCRel, - MachoType, Size, Addend); + resolveARMRelocation(RE, Value); break; + case Triple::aarch64: case Triple::arm64: - resolveARM64Relocation(LocalAddress, FinalAddress, (uintptr_t)Value, - isPCRel, MachoType, Size, Addend); + resolveAArch64Relocation(RE, Value); break; } } -bool RuntimeDyldMachO::resolveI386Relocation(uint8_t *LocalAddress, - uint64_t FinalAddress, - uint64_t Value, bool isPCRel, - unsigned Type, unsigned Size, - int64_t Addend) { - if (isPCRel) - Value -= FinalAddress + 4; // see resolveX86_64Relocation +bool RuntimeDyldMachO::resolveI386Relocation(const RelocationEntry &RE, + uint64_t Value) { + const SectionEntry &Section = Sections[RE.SectionID]; + uint8_t* LocalAddress = Section.Address + RE.Offset; - switch (Type) { - default: - llvm_unreachable("Invalid relocation type!"); - case MachO::GENERIC_RELOC_VANILLA: { - uint8_t *p = LocalAddress; - uint64_t ValueToWrite = Value + Addend; - for (unsigned i = 0; i < Size; ++i) { - *p++ = (uint8_t)(ValueToWrite & 0xff); - ValueToWrite >>= 8; - } - return false; + if (RE.IsPCRel) { + uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + Value -= FinalAddress + 4; // see MachOX86_64::resolveRelocation. } - case MachO::GENERIC_RELOC_SECTDIFF: - case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: - case MachO::GENERIC_RELOC_PB_LA_PTR: - return Error("Relocation type not implemented yet!"); + + switch (RE.RelType) { + default: + llvm_unreachable("Invalid relocation type!"); + case MachO::GENERIC_RELOC_VANILLA: + return applyRelocationValue(LocalAddress, Value + RE.Addend, + 1 << RE.Size); + case MachO::GENERIC_RELOC_SECTDIFF: + case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: { + uint64_t SectionABase = Sections[RE.Sections.SectionA].LoadAddress; + uint64_t SectionBBase = Sections[RE.Sections.SectionB].LoadAddress; + assert((Value == SectionABase || Value == SectionBBase) && + "Unexpected SECTDIFF relocation value."); + Value = SectionABase - SectionBBase + RE.Addend; + return applyRelocationValue(LocalAddress, Value, 1 << RE.Size); + } + case MachO::GENERIC_RELOC_PB_LA_PTR: + return Error("Relocation type not implemented yet!"); } } -bool RuntimeDyldMachO::resolveX86_64Relocation(uint8_t *LocalAddress, - uint64_t FinalAddress, - uint64_t Value, bool isPCRel, - unsigned Type, unsigned Size, - int64_t Addend) { +bool RuntimeDyldMachO::resolveX86_64Relocation(const RelocationEntry &RE, + uint64_t Value) { + const SectionEntry &Section = Sections[RE.SectionID]; + uint8_t* LocalAddress = Section.Address + RE.Offset; + // If the relocation is PC-relative, the value to be encoded is the // pointer difference. - if (isPCRel) + if (RE.IsPCRel) { // FIXME: It seems this value needs to be adjusted by 4 for an effective PC // address. Is that expected? Only for branches, perhaps? - Value -= FinalAddress + 4; + uint64_t FinalAddress = Section.LoadAddress + RE.Offset; + Value -= FinalAddress + 4; // see MachOX86_64::resolveRelocation. + } - switch (Type) { + switch (RE.RelType) { default: llvm_unreachable("Invalid relocation type!"); case MachO::X86_64_RELOC_SIGNED_1: @@ -221,17 +227,8 @@ bool RuntimeDyldMachO::resolveX86_64Relocation(uint8_t *LocalAddress, case MachO::X86_64_RELOC_SIGNED_4: case MachO::X86_64_RELOC_SIGNED: case MachO::X86_64_RELOC_UNSIGNED: - case MachO::X86_64_RELOC_BRANCH: { - Value += Addend; - // Mask in the target value a byte at a time (we don't have an alignment - // guarantee for the target address, so this is safest). - uint8_t *p = (uint8_t *)LocalAddress; - for (unsigned i = 0; i < Size; ++i) { - *p++ = (uint8_t)Value; - Value >>= 8; - } - return false; - } + case MachO::X86_64_RELOC_BRANCH: + return applyRelocationValue(LocalAddress, Value + RE.Addend, 1 << RE.Size); case MachO::X86_64_RELOC_GOT_LOAD: case MachO::X86_64_RELOC_GOT: case MachO::X86_64_RELOC_SUBTRACTOR: @@ -240,14 +237,15 @@ bool RuntimeDyldMachO::resolveX86_64Relocation(uint8_t *LocalAddress, } } -bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress, - uint64_t FinalAddress, - uint64_t Value, bool isPCRel, - unsigned Type, unsigned Size, - int64_t Addend) { +bool RuntimeDyldMachO::resolveARMRelocation(const RelocationEntry &RE, + uint64_t Value) { + const SectionEntry &Section = Sections[RE.SectionID]; + uint8_t* LocalAddress = Section.Address + RE.Offset; + // If the relocation is PC-relative, the value to be encoded is the // pointer difference. - if (isPCRel) { + if (RE.IsPCRel) { + uint64_t FinalAddress = Section.LoadAddress + RE.Offset; Value -= FinalAddress; // ARM PCRel relocations have an effective-PC offset of two instructions // (four bytes in Thumb mode, 8 bytes in ARM mode). @@ -255,19 +253,11 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress, Value -= 8; } - switch (Type) { + switch (RE.RelType) { default: llvm_unreachable("Invalid relocation type!"); - case MachO::ARM_RELOC_VANILLA: { - // Mask in the target value a byte at a time (we don't have an alignment - // guarantee for the target address, so this is safest). - uint8_t *p = (uint8_t *)LocalAddress; - for (unsigned i = 0; i < Size; ++i) { - *p++ = (uint8_t)Value; - Value >>= 8; - } - break; - } + case MachO::ARM_RELOC_VANILLA: + return applyRelocationValue(LocalAddress, Value, 1 << RE.Size); case MachO::ARM_RELOC_BR24: { // Mask the value into the target address. We know instructions are // 32-bit aligned, so we can do it all at once. @@ -275,13 +265,16 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress, // The low two bits of the value are not encoded. Value >>= 2; // Mask the value to 24 bits. - Value &= 0xffffff; + uint64_t FinalValue = Value & 0xffffff; + // Check for overflow. + if (Value != FinalValue) + return Error("ARM BR24 relocation out of range."); // FIXME: If the destination is a Thumb function (and the instruction // is a non-predicated BL instruction), we need to change it to a BLX // instruction instead. // Insert the value into the instruction. - *p = (*p & ~0xffffff) | Value; + *p = (*p & ~0xffffff) | FinalValue; break; } case MachO::ARM_THUMB_RELOC_BR22: @@ -297,29 +290,23 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress, return false; } -bool RuntimeDyldMachO::resolveARM64Relocation(uint8_t *LocalAddress, - uint64_t FinalAddress, - uint64_t Value, bool isPCRel, - unsigned Type, unsigned Size, - int64_t Addend) { +bool RuntimeDyldMachO::resolveAArch64Relocation(const RelocationEntry &RE, + uint64_t Value) { + const SectionEntry &Section = Sections[RE.SectionID]; + uint8_t* LocalAddress = Section.Address + RE.Offset; + // If the relocation is PC-relative, the value to be encoded is the // pointer difference. - if (isPCRel) + if (RE.IsPCRel) { + uint64_t FinalAddress = Section.LoadAddress + RE.Offset; Value -= FinalAddress; + } - switch (Type) { + switch (RE.RelType) { default: llvm_unreachable("Invalid relocation type!"); - case MachO::ARM64_RELOC_UNSIGNED: { - // Mask in the target value a byte at a time (we don't have an alignment - // guarantee for the target address, so this is safest). - uint8_t *p = (uint8_t *)LocalAddress; - for (unsigned i = 0; i < Size; ++i) { - *p++ = (uint8_t)Value; - Value >>= 8; - } - break; - } + case MachO::ARM64_RELOC_UNSIGNED: + return applyRelocationValue(LocalAddress, Value, 1 << RE.Size); case MachO::ARM64_RELOC_BRANCH26: { // Mask the value into the target address. We know instructions are // 32-bit aligned, so we can do it all at once. @@ -327,9 +314,12 @@ bool RuntimeDyldMachO::resolveARM64Relocation(uint8_t *LocalAddress, // The low two bits of the value are not encoded. Value >>= 2; // Mask the value to 26 bits. - Value &= 0x3ffffff; + uint64_t FinalValue = Value & 0x3ffffff; + // Check for overflow. + if (FinalValue != Value) + return Error("ARM64 BRANCH26 relocation out of range."); // Insert the value into the instruction. - *p = (*p & ~0x3ffffff) | Value; + *p = (*p & ~0x3ffffff) | FinalValue; break; } case MachO::ARM64_RELOC_SUBTRACTOR: @@ -346,6 +336,198 @@ bool RuntimeDyldMachO::resolveARM64Relocation(uint8_t *LocalAddress, return false; } +void RuntimeDyldMachO::populateJumpTable(MachOObjectFile &Obj, + const SectionRef &JTSection, + unsigned JTSectionID) { + assert(!Obj.is64Bit() && + "__jump_table section not supported in 64-bit MachO."); + + MachO::dysymtab_command DySymTabCmd = Obj.getDysymtabLoadCommand(); + MachO::section Sec32 = Obj.getSection(JTSection.getRawDataRefImpl()); + uint32_t JTSectionSize = Sec32.size; + unsigned FirstIndirectSymbol = Sec32.reserved1; + unsigned JTEntrySize = Sec32.reserved2; + unsigned NumJTEntries = JTSectionSize / JTEntrySize; + uint8_t* JTSectionAddr = getSectionAddress(JTSectionID); + unsigned JTEntryOffset = 0; + + assert((JTSectionSize % JTEntrySize) == 0 && + "Jump-table section does not contain a whole number of stubs?"); + + for (unsigned i = 0; i < NumJTEntries; ++i) { + unsigned SymbolIndex = + Obj.getIndirectSymbolTableEntry(DySymTabCmd, FirstIndirectSymbol + i); + symbol_iterator SI = Obj.getSymbolByIndex(SymbolIndex); + StringRef IndirectSymbolName; + SI->getName(IndirectSymbolName); + uint8_t* JTEntryAddr = JTSectionAddr + JTEntryOffset; + createStubFunction(JTEntryAddr); + RelocationEntry RE(JTSectionID, JTEntryOffset + 1, + MachO::GENERIC_RELOC_VANILLA, 0, true, 2); + addRelocationForSymbol(RE, IndirectSymbolName); + JTEntryOffset += JTEntrySize; + } +} + +void RuntimeDyldMachO::populatePointersSection(MachOObjectFile &Obj, + const SectionRef &PTSection, + unsigned PTSectionID) { + assert(!Obj.is64Bit() && + "__pointers section not supported in 64-bit MachO."); + + MachO::dysymtab_command DySymTabCmd = Obj.getDysymtabLoadCommand(); + MachO::section Sec32 = Obj.getSection(PTSection.getRawDataRefImpl()); + uint32_t PTSectionSize = Sec32.size; + unsigned FirstIndirectSymbol = Sec32.reserved1; + const unsigned PTEntrySize = 4; + unsigned NumPTEntries = PTSectionSize / PTEntrySize; + unsigned PTEntryOffset = 0; + + assert((PTSectionSize % PTEntrySize) == 0 && + "Pointers section does not contain a whole number of stubs?"); + + DEBUG(dbgs() << "Populating __pointers, Section ID " << PTSectionID + << ", " << NumPTEntries << " entries, " + << PTEntrySize << " bytes each:\n"); + + for (unsigned i = 0; i < NumPTEntries; ++i) { + unsigned SymbolIndex = + Obj.getIndirectSymbolTableEntry(DySymTabCmd, FirstIndirectSymbol + i); + symbol_iterator SI = Obj.getSymbolByIndex(SymbolIndex); + StringRef IndirectSymbolName; + SI->getName(IndirectSymbolName); + DEBUG(dbgs() << " " << IndirectSymbolName << ": index " << SymbolIndex + << ", PT offset: " << PTEntryOffset << "\n"); + RelocationEntry RE(PTSectionID, PTEntryOffset, + MachO::GENERIC_RELOC_VANILLA, 0, false, 2); + addRelocationForSymbol(RE, IndirectSymbolName); + PTEntryOffset += PTEntrySize; + } +} + + +section_iterator getSectionByAddress(const MachOObjectFile &Obj, + uint64_t Addr) { + section_iterator SI = Obj.section_begin(); + section_iterator SE = Obj.section_end(); + + for (; SI != SE; ++SI) { + uint64_t SAddr, SSize; + SI->getAddress(SAddr); + SI->getSize(SSize); + if ((Addr >= SAddr) && (Addr < SAddr + SSize)) + return SI; + } + + return SE; +} + +relocation_iterator RuntimeDyldMachO::processSECTDIFFRelocation( + unsigned SectionID, + relocation_iterator RelI, + ObjectImage &Obj, + ObjSectionToIDMap &ObjSectionToID) { + const MachOObjectFile *MachO = + static_cast(Obj.getObjectFile()); + MachO::any_relocation_info RE = + MachO->getRelocation(RelI->getRawDataRefImpl()); + + SectionEntry &Section = Sections[SectionID]; + uint32_t RelocType = MachO->getAnyRelocationType(RE); + bool IsPCRel = MachO->getAnyRelocationPCRel(RE); + unsigned Size = MachO->getAnyRelocationLength(RE); + uint64_t Offset; + RelI->getOffset(Offset); + uint8_t *LocalAddress = Section.Address + Offset; + unsigned NumBytes = 1 << Size; + int64_t Addend = 0; + memcpy(&Addend, LocalAddress, NumBytes); + + ++RelI; + MachO::any_relocation_info RE2 = + MachO->getRelocation(RelI->getRawDataRefImpl()); + + uint32_t AddrA = MachO->getScatteredRelocationValue(RE); + section_iterator SAI = getSectionByAddress(*MachO, AddrA); + assert(SAI != MachO->section_end() && "Can't find section for address A"); + uint64_t SectionABase; + SAI->getAddress(SectionABase); + uint64_t SectionAOffset = AddrA - SectionABase; + SectionRef SectionA = *SAI; + bool IsCode; + SectionA.isText(IsCode); + uint32_t SectionAID = findOrEmitSection(Obj, SectionA, IsCode, + ObjSectionToID); + + uint32_t AddrB = MachO->getScatteredRelocationValue(RE2); + section_iterator SBI = getSectionByAddress(*MachO, AddrB); + assert(SBI != MachO->section_end() && "Can't find section for address B"); + uint64_t SectionBBase; + SBI->getAddress(SectionBBase); + uint64_t SectionBOffset = AddrB - SectionBBase; + SectionRef SectionB = *SBI; + uint32_t SectionBID = findOrEmitSection(Obj, SectionB, IsCode, + ObjSectionToID); + + if (Addend != AddrA - AddrB) + Error("Unexpected SECTDIFF relocation addend."); + + DEBUG(dbgs() << "Found SECTDIFF: AddrA: " << AddrA << ", AddrB: " << AddrB + << ", Addend: " << Addend << ", SectionA ID: " + << SectionAID << ", SectionAOffset: " << SectionAOffset + << ", SectionB ID: " << SectionBID << ", SectionBOffset: " + << SectionBOffset << "\n"); + RelocationEntry R(SectionID, Offset, RelocType, 0, + SectionAID, SectionAOffset, SectionBID, SectionBOffset, + IsPCRel, Size); + + addRelocationForSection(R, SectionAID); + addRelocationForSection(R, SectionBID); + + return ++RelI; +} + +relocation_iterator RuntimeDyldMachO::processI386ScatteredVANILLA( + unsigned SectionID, + relocation_iterator RelI, + ObjectImage &Obj, + ObjSectionToIDMap &ObjSectionToID) { + const MachOObjectFile *MachO = + static_cast(Obj.getObjectFile()); + MachO::any_relocation_info RE = + MachO->getRelocation(RelI->getRawDataRefImpl()); + + SectionEntry &Section = Sections[SectionID]; + uint32_t RelocType = MachO->getAnyRelocationType(RE); + bool IsPCRel = MachO->getAnyRelocationPCRel(RE); + unsigned Size = MachO->getAnyRelocationLength(RE); + uint64_t Offset; + RelI->getOffset(Offset); + uint8_t *LocalAddress = Section.Address + Offset; + unsigned NumBytes = 1 << Size; + int64_t Addend = 0; + memcpy(&Addend, LocalAddress, NumBytes); + + unsigned SymbolBaseAddr = MachO->getScatteredRelocationValue(RE); + section_iterator TargetSI = getSectionByAddress(*MachO, SymbolBaseAddr); + assert(TargetSI != MachO->section_end() && "Can't find section for symbol"); + uint64_t SectionBaseAddr; + TargetSI->getAddress(SectionBaseAddr); + SectionRef TargetSection = *TargetSI; + bool IsCode; + TargetSection.isText(IsCode); + uint32_t TargetSectionID = findOrEmitSection(Obj, TargetSection, IsCode, + ObjSectionToID); + + Addend -= SectionBaseAddr; + RelocationEntry R(SectionID, Offset, RelocType, Addend, + IsPCRel, Size); + + addRelocationForSection(R, TargetSectionID); + + return ++RelI; +} + relocation_iterator RuntimeDyldMachO::processRelocationRef( unsigned SectionID, relocation_iterator RelI, ObjectImage &Obj, ObjSectionToIDMap &ObjSectionToID, const SymbolTableMap &Symbols, @@ -358,18 +540,28 @@ relocation_iterator RuntimeDyldMachO::processRelocationRef( uint32_t RelType = MachO->getAnyRelocationType(RE); // FIXME: Properly handle scattered relocations. - // For now, optimistically skip these: they can often be ignored, as - // the static linker will already have applied the relocation, and it - // only needs to be reapplied if symbols move relative to one another. - // Note: This will fail horribly where the relocations *do* need to be - // applied, but that was already the case. - if (MachO->isRelocationScattered(RE)) - return ++RelI; + // Special case the couple of scattered relocations that we know how + // to handle: SECTDIFF relocations, and scattered VANILLA relocations + // on I386. + // For all other scattered relocations, just bail out and hope for the + // best, since the offsets computed by scattered relocations have often + // been optimisticaly filled in by the compiler. This will fail + // horribly where the relocations *do* need to be applied, but that was + // already the case. + if (MachO->isRelocationScattered(RE)) { + if (RelType == MachO::GENERIC_RELOC_SECTDIFF || + RelType == MachO::GENERIC_RELOC_LOCAL_SECTDIFF) + return processSECTDIFFRelocation(SectionID, RelI, Obj, ObjSectionToID); + else if (Arch == Triple::x86 && RelType == MachO::GENERIC_RELOC_VANILLA) + return processI386ScatteredVANILLA(SectionID, RelI, Obj, ObjSectionToID); + else + return ++RelI; + } RelocationValueRef Value; SectionEntry &Section = Sections[SectionID]; - bool isExtern = MachO->getPlainRelocationExternal(RE); + bool IsExtern = MachO->getPlainRelocationExternal(RE); bool IsPCRel = MachO->getAnyRelocationPCRel(RE); unsigned Size = MachO->getAnyRelocationLength(RE); uint64_t Offset; @@ -379,7 +571,7 @@ relocation_iterator RuntimeDyldMachO::processRelocationRef( uint64_t Addend = 0; memcpy(&Addend, LocalAddress, NumBytes); - if (isExtern) { + if (IsExtern) { // Obtain the symbol name which is referenced in the relocation symbol_iterator Symbol = RelI->getSymbol(); StringRef TargetName; @@ -401,6 +593,17 @@ relocation_iterator RuntimeDyldMachO::processRelocationRef( Value.Addend = Addend; } } + + // Addends for external, PC-rel relocations on i386 point back to the zero + // offset. Calculate the final offset from the relocation target instead. + // This allows us to use the same logic for both external and internal + // relocations in resolveI386RelocationRef. + if (Arch == Triple::x86 && IsPCRel) { + uint64_t RelocAddr = 0; + RelI->getAddress(RelocAddr); + Value.Addend += RelocAddr + 4; + } + } else { SectionRef Sec = MachO->getRelocationSection(RE); bool IsCode = false; @@ -417,6 +620,10 @@ relocation_iterator RuntimeDyldMachO::processRelocationRef( RelType == MachO::X86_64_RELOC_GOT_LOAD)) { assert(IsPCRel); assert(Size == 2); + + // FIXME: Teach the generic code above not to prematurely conflate + // relocation addends and symbol offsets. + Value.Addend -= Addend; StubMap::const_iterator i = Stubs.find(Value); uint8_t *Addr; if (i != Stubs.end()) { @@ -424,41 +631,45 @@ relocation_iterator RuntimeDyldMachO::processRelocationRef( } else { Stubs[Value] = Section.StubOffset; uint8_t *GOTEntry = Section.Address + Section.StubOffset; - RelocationEntry RE(SectionID, Section.StubOffset, - MachO::X86_64_RELOC_UNSIGNED, 0, false, 3); + RelocationEntry GOTRE(SectionID, Section.StubOffset, + MachO::X86_64_RELOC_UNSIGNED, Value.Addend, false, + 3); if (Value.SymbolName) - addRelocationForSymbol(RE, Value.SymbolName); + addRelocationForSymbol(GOTRE, Value.SymbolName); else - addRelocationForSection(RE, Value.SectionID); + addRelocationForSection(GOTRE, Value.SectionID); Section.StubOffset += 8; Addr = GOTEntry; } - resolveRelocation(Section, Offset, (uint64_t)Addr, - MachO::X86_64_RELOC_UNSIGNED, Value.Addend, true, 2); + RelocationEntry TargetRE(SectionID, Offset, + MachO::X86_64_RELOC_UNSIGNED, Addend, true, + 2); + resolveRelocation(TargetRE, (uint64_t)Addr); } else if (Arch == Triple::arm && (RelType & 0xf) == MachO::ARM_RELOC_BR24) { // This is an ARM branch relocation, need to use a stub function. // Look up for existing stub. StubMap::const_iterator i = Stubs.find(Value); - if (i != Stubs.end()) - resolveRelocation(Section, Offset, (uint64_t)Section.Address + i->second, - RelType, 0, IsPCRel, Size); - else { + uint8_t *Addr; + if (i != Stubs.end()) { + Addr = Section.Address + i->second; + } else { // Create a new stub function. Stubs[Value] = Section.StubOffset; uint8_t *StubTargetAddr = createStubFunction(Section.Address + Section.StubOffset); - RelocationEntry RE(SectionID, StubTargetAddr - Section.Address, - MachO::GENERIC_RELOC_VANILLA, Value.Addend); + RelocationEntry StubRE(SectionID, StubTargetAddr - Section.Address, + MachO::GENERIC_RELOC_VANILLA, Value.Addend); if (Value.SymbolName) - addRelocationForSymbol(RE, Value.SymbolName); + addRelocationForSymbol(StubRE, Value.SymbolName); else - addRelocationForSection(RE, Value.SectionID); - resolveRelocation(Section, Offset, - (uint64_t)Section.Address + Section.StubOffset, RelType, - 0, IsPCRel, Size); + addRelocationForSection(StubRE, Value.SectionID); + Addr = Section.Address + Section.StubOffset; Section.StubOffset += getMaxStubSize(); } + RelocationEntry TargetRE(Value.SectionID, Offset, RelType, 0, IsPCRel, + Size); + resolveRelocation(TargetRE, (uint64_t)Addr); } else { RelocationEntry RE(SectionID, Offset, RelType, Value.Addend, IsPCRel, Size); if (Value.SymbolName) diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h index 1006176..060eb8c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h @@ -25,22 +25,31 @@ using namespace llvm::object; namespace llvm { class RuntimeDyldMachO : public RuntimeDyldImpl { - bool resolveI386Relocation(uint8_t *LocalAddress, uint64_t FinalAddress, - uint64_t Value, bool isPCRel, unsigned Type, - unsigned Size, int64_t Addend); - bool resolveX86_64Relocation(uint8_t *LocalAddress, uint64_t FinalAddress, - uint64_t Value, bool isPCRel, unsigned Type, - unsigned Size, int64_t Addend); - bool resolveARMRelocation(uint8_t *LocalAddress, uint64_t FinalAddress, - uint64_t Value, bool isPCRel, unsigned Type, - unsigned Size, int64_t Addend); - bool resolveARM64Relocation(uint8_t *LocalAddress, uint64_t FinalAddress, - uint64_t Value, bool IsPCRel, unsigned Type, - unsigned Size, int64_t Addend); - - void resolveRelocation(const SectionEntry &Section, uint64_t Offset, - uint64_t Value, uint32_t Type, int64_t Addend, - bool isPCRel, unsigned Size); +private: + + /// Write the least significant 'Size' bytes in 'Value' out at the address + /// pointed to by Addr. + bool applyRelocationValue(uint8_t *Addr, uint64_t Value, unsigned Size) { + for (unsigned i = 0; i < Size; ++i) { + *Addr++ = (uint8_t)Value; + Value >>= 8; + } + + return false; + } + + bool resolveI386Relocation(const RelocationEntry &RE, uint64_t Value); + bool resolveX86_64Relocation(const RelocationEntry &RE, uint64_t Value); + bool resolveARMRelocation(const RelocationEntry &RE, uint64_t Value); + bool resolveAArch64Relocation(const RelocationEntry &RE, uint64_t Value); + + // Populate stubs in __jump_table section. + void populateJumpTable(MachOObjectFile &Obj, const SectionRef &JTSection, + unsigned JTSectionID); + + // Populate __pointers section. + void populatePointersSection(MachOObjectFile &Obj, const SectionRef &PTSection, + unsigned PTSectionID); unsigned getMaxStubSize() override { if (Arch == Triple::arm || Arch == Triple::thumb) @@ -53,6 +62,18 @@ class RuntimeDyldMachO : public RuntimeDyldImpl { unsigned getStubAlignment() override { return 1; } + relocation_iterator processSECTDIFFRelocation( + unsigned SectionID, + relocation_iterator RelI, + ObjectImage &ObjImg, + ObjSectionToIDMap &ObjSectionToID); + + relocation_iterator processI386ScatteredVANILLA( + unsigned SectionID, + relocation_iterator RelI, + ObjectImage &ObjImg, + ObjSectionToIDMap &ObjSectionToID); + struct EHFrameRelatedSections { EHFrameRelatedSections() : EHFrameSID(RTDYLD_INVALID_SECTION_ID), @@ -81,15 +102,16 @@ public: bool isCompatibleFormat(const ObjectBuffer *Buffer) const override; bool isCompatibleFile(const object::ObjectFile *Obj) const override; void registerEHFrames() override; - void finalizeLoad(ObjSectionToIDMap &SectionMap) override; + void finalizeLoad(ObjectImage &ObjImg, + ObjSectionToIDMap &SectionMap) override; static ObjectImage *createObjectImage(ObjectBuffer *InputBuffer) { return new ObjectImageCommon(InputBuffer); } static ObjectImage * - createObjectImageFromFile(object::ObjectFile *InputObject) { - return new ObjectImageCommon(InputObject); + createObjectImageFromFile(std::unique_ptr InputObject) { + return new ObjectImageCommon(std::move(InputObject)); } }; diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp index 9b7d348..b10d51f 100644 --- a/lib/ExecutionEngine/TargetSelect.cpp +++ b/lib/ExecutionEngine/TargetSelect.cpp @@ -47,7 +47,7 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple, TheTriple.setTriple(sys::getProcessTriple()); // Adjust the triple to match what the user requested. - const Target *TheTarget = 0; + const Target *TheTarget = nullptr; if (!MArch.empty()) { for (TargetRegistry::iterator it = TargetRegistry::begin(), ie = TargetRegistry::end(); it != ie; ++it) { @@ -61,7 +61,7 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple, if (ErrorStr) *ErrorStr = "No available targets are compatible with this -march, " "see -version for the available targets.\n"; - return 0; + return nullptr; } // Adjust the triple to match (if known), otherwise stick with the @@ -72,10 +72,10 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple, } else { std::string Error; TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), Error); - if (TheTarget == 0) { + if (!TheTarget) { if (ErrorStr) *ErrorStr = Error; - return 0; + return nullptr; } } diff --git a/lib/IR/Android.mk b/lib/IR/Android.mk index 071bb04..dd95703 100644 --- a/lib/IR/Android.mk +++ b/lib/IR/Android.mk @@ -30,6 +30,7 @@ vmcore_SRC_FILES := \ LeakDetector.cpp \ LegacyPassManager.cpp \ Mangler.cpp \ + MDBuilder.cpp \ Metadata.cpp \ Module.cpp \ Pass.cpp \ diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index d4670e4..0fef0d0 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -51,19 +51,19 @@ AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {} static const Module *getModuleFromVal(const Value *V) { if (const Argument *MA = dyn_cast(V)) - return MA->getParent() ? MA->getParent()->getParent() : 0; + return MA->getParent() ? MA->getParent()->getParent() : nullptr; if (const BasicBlock *BB = dyn_cast(V)) - return BB->getParent() ? BB->getParent()->getParent() : 0; + return BB->getParent() ? BB->getParent()->getParent() : nullptr; if (const Instruction *I = dyn_cast(V)) { - const Function *M = I->getParent() ? I->getParent()->getParent() : 0; - return M ? M->getParent() : 0; + const Function *M = I->getParent() ? I->getParent()->getParent() : nullptr; + return M ? M->getParent() : nullptr; } if (const GlobalValue *GV = dyn_cast(V)) return GV->getParent(); - return 0; + return nullptr; } static void PrintCallingConv(unsigned cc, raw_ostream &Out) { @@ -78,7 +78,6 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break; case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break; case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break; - case CallingConv::X86_CDeclMethod:Out << "x86_cdeclmethodcc"; break; case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break; case CallingConv::ARM_APCS: Out << "arm_apcscc"; break; case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break; @@ -421,10 +420,10 @@ static SlotTracker *createSlotTracker(const Value *V) { if (!MD->isFunctionLocal()) return new SlotTracker(MD->getFunction()); - return new SlotTracker((Function *)0); + return new SlotTracker((Function *)nullptr); } - return 0; + return nullptr; } #if 0 @@ -436,21 +435,21 @@ static SlotTracker *createSlotTracker(const Value *V) { // Module level constructor. Causes the contents of the Module (sans functions) // to be added to the slot table. SlotTracker::SlotTracker(const Module *M) - : TheModule(M), TheFunction(0), FunctionProcessed(false), + : TheModule(M), TheFunction(nullptr), FunctionProcessed(false), mNext(0), fNext(0), mdnNext(0), asNext(0) { } // Function level constructor. Causes the contents of the Module and the one // function provided to be added to the slot table. SlotTracker::SlotTracker(const Function *F) - : TheModule(F ? F->getParent() : 0), TheFunction(F), FunctionProcessed(false), - mNext(0), fNext(0), mdnNext(0), asNext(0) { + : TheModule(F ? F->getParent() : nullptr), TheFunction(F), + FunctionProcessed(false), mNext(0), fNext(0), mdnNext(0), asNext(0) { } inline void SlotTracker::initialize() { if (TheModule) { processModule(); - TheModule = 0; ///< Prevent re-processing next time we're called. + TheModule = nullptr; ///< Prevent re-processing next time we're called. } if (TheFunction && !FunctionProcessed) @@ -560,7 +559,7 @@ void SlotTracker::processFunction() { void SlotTracker::purgeFunction() { ST_DEBUG("begin purgeFunction!\n"); fMap.clear(); // Simply discard the function level map - TheFunction = 0; + TheFunction = nullptr; FunctionProcessed = false; ST_DEBUG("end purgeFunction!\n"); } @@ -1048,7 +1047,7 @@ static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node, Out << "!{"; for (unsigned mi = 0, me = Node->getNumOperands(); mi != me; ++mi) { const Value *V = Node->getOperand(mi); - if (V == 0) + if (!V) Out << "null"; else { TypePrinter->print(V->getType(), Out); @@ -1126,12 +1125,6 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, return; } - if (V->getValueID() == Value::PseudoSourceValueVal || - V->getValueID() == Value::FixedStackPseudoSourceValueVal) { - V->print(Out); - return; - } - char Prefix = '%'; int Slot; // If we have a SlotTracker, use it. @@ -1160,7 +1153,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, Slot = Machine->getLocalSlot(V); } delete Machine; - Machine = 0; + Machine = nullptr; } else { Slot = -1; } @@ -1194,7 +1187,7 @@ AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, const Module *M, AssemblyWriter::~AssemblyWriter() { } void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) { - if (Operand == 0) { + if (!Operand) { Out << ""; return; } @@ -1259,7 +1252,7 @@ void AssemblyWriter::writeAtomicCmpXchg(AtomicOrdering SuccessOrdering, void AssemblyWriter::writeParamOperand(const Value *Operand, AttributeSet Attrs, unsigned Idx) { - if (Operand == 0) { + if (!Operand) { Out << ""; return; } @@ -1500,10 +1493,16 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) { PrintLinkage(GA->getLinkage(), Out); + PointerType *Ty = GA->getType(); const Constant *Aliasee = GA->getAliasee(); + if (!Aliasee || Ty != Aliasee->getType()) { + if (unsigned AddressSpace = Ty->getAddressSpace()) + Out << "addrspace(" << AddressSpace << ") "; + TypePrinter.print(Ty->getElementType(), Out); + Out << ", "; + } - if (Aliasee == 0) { - TypePrinter.print(GA->getType(), Out); + if (!Aliasee) { Out << " <>"; } else { writeOperand(Aliasee, !isa(Aliasee)); @@ -1707,7 +1706,7 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) { Out << ""; } - if (BB->getParent() == 0) { + if (!BB->getParent()) { Out.PadToColumn(50); Out << "; Error: Block without parent!"; } else if (BB != &BB->getParent()->getEntryBlock()) { // Not the entry block? @@ -1774,8 +1773,12 @@ void AssemblyWriter::printInstruction(const Instruction &I) { Out << '%' << SlotNum << " = "; } - if (isa(I) && cast(I).isTailCall()) - Out << "tail "; + if (const CallInst *CI = dyn_cast(&I)) { + if (CI->isMustTailCall()) + Out << "musttail "; + else if (CI->isTailCall()) + Out << "tail "; + } // Print out the opcode... Out << I.getOpcodeName(); @@ -1804,7 +1807,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { writeAtomicRMWOperation(Out, RMWI->getOperation()); // Print out the type of the operands... - const Value *Operand = I.getNumOperands() ? I.getOperand(0) : 0; + const Value *Operand = I.getNumOperands() ? I.getOperand(0) : nullptr; // Special case conditional branches to swizzle the condition out to the front if (isa(I) && cast(I).isConditional()) { @@ -2147,15 +2150,15 @@ void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const { W.printModule(this); } -void NamedMDNode::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const { +void NamedMDNode::print(raw_ostream &ROS) const { SlotTracker SlotTable(getParent()); formatted_raw_ostream OS(ROS); - AssemblyWriter W(OS, SlotTable, getParent(), AAW); + AssemblyWriter W(OS, SlotTable, getParent(), nullptr); W.printNamedMDNode(this); } void Type::print(raw_ostream &OS) const { - if (this == 0) { + if (!this) { OS << ""; return; } @@ -2170,24 +2173,24 @@ void Type::print(raw_ostream &OS) const { } } -void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const { - if (this == 0) { +void Value::print(raw_ostream &ROS) const { + if (!this) { ROS << "printing a value\n"; return; } formatted_raw_ostream OS(ROS); if (const Instruction *I = dyn_cast(this)) { - const Function *F = I->getParent() ? I->getParent()->getParent() : 0; + const Function *F = I->getParent() ? I->getParent()->getParent() : nullptr; SlotTracker SlotTable(F); - AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), AAW); + AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), nullptr); W.printInstruction(*I); } else if (const BasicBlock *BB = dyn_cast(this)) { SlotTracker SlotTable(BB->getParent()); - AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), AAW); + AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), nullptr); W.printBasicBlock(BB); } else if (const GlobalValue *GV = dyn_cast(this)) { SlotTracker SlotTable(GV->getParent()); - AssemblyWriter W(OS, SlotTable, GV->getParent(), AAW); + AssemblyWriter W(OS, SlotTable, GV->getParent(), nullptr); if (const GlobalVariable *V = dyn_cast(GV)) W.printGlobal(V); else if (const Function *F = dyn_cast(GV)) @@ -2197,20 +2200,18 @@ void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const { } else if (const MDNode *N = dyn_cast(this)) { const Function *F = N->getFunction(); SlotTracker SlotTable(F); - AssemblyWriter W(OS, SlotTable, F ? F->getParent() : 0, AAW); + AssemblyWriter W(OS, SlotTable, F ? F->getParent() : nullptr, nullptr); W.printMDNodeBody(N); } else if (const Constant *C = dyn_cast(this)) { TypePrinting TypePrinter; TypePrinter.print(C->getType(), OS); OS << ' '; - WriteConstantInternal(OS, C, TypePrinter, 0, 0); + WriteConstantInternal(OS, C, TypePrinter, nullptr, nullptr); } else if (isa(this) || isa(this) || isa(this)) { this->printAsOperand(OS); } else { - // Otherwise we don't know what it is. Call the virtual function to - // allow a subclass to print itself. - printCustom(OS); + llvm_unreachable("Unknown value to print out!"); } } @@ -2220,7 +2221,7 @@ void Value::printAsOperand(raw_ostream &O, bool PrintType, const Module *M) cons if (!PrintType && ((!isa(this) && !isa(this)) || hasName() || isa(this))) { - WriteAsOperandInternal(O, this, 0, 0, M); + WriteAsOperandInternal(O, this, nullptr, nullptr, M); return; } @@ -2235,12 +2236,7 @@ void Value::printAsOperand(raw_ostream &O, bool PrintType, const Module *M) cons O << ' '; } - WriteAsOperandInternal(O, this, &TypePrinter, 0, M); -} - -// Value::printCustom - subclasses should override this to implement printing. -void Value::printCustom(raw_ostream &OS) const { - llvm_unreachable("Unknown value to print out!"); + WriteAsOperandInternal(O, this, &TypePrinter, nullptr, M); } // Value::dump - allow easy printing of Values from the debugger. @@ -2250,7 +2246,7 @@ void Value::dump() const { print(dbgs()); dbgs() << '\n'; } void Type::dump() const { print(dbgs()); } // Module::dump() - Allow printing of Modules from the debugger. -void Module::dump() const { print(dbgs(), 0); } +void Module::dump() const { print(dbgs(), nullptr); } // NamedMDNode::dump() - Allow printing of NamedMDNodes from the debugger. -void NamedMDNode::dump() const { print(dbgs(), 0); } +void NamedMDNode::dump() const { print(dbgs()); } diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index 9d9d948..a9074bb 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -16,6 +16,7 @@ #include "llvm/IR/Attributes.h" #include "AttributeImpl.h" #include "LLVMContextImpl.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Type.h" #include "llvm/Support/Atomic.h" @@ -192,6 +193,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return "noinline"; if (hasAttribute(Attribute::NonLazyBind)) return "nonlazybind"; + if (hasAttribute(Attribute::NonNull)) + return "nonnull"; if (hasAttribute(Attribute::NoRedZone)) return "noredzone"; if (hasAttribute(Attribute::NoReturn)) @@ -391,6 +394,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { case Attribute::Builtin: return 1ULL << 41; case Attribute::OptimizeNone: return 1ULL << 42; case Attribute::InAlloca: return 1ULL << 43; + case Attribute::NonNull: return 1ULL << 44; } llvm_unreachable("Unsupported attribute type"); } @@ -402,7 +406,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { AttributeSetNode *AttributeSetNode::get(LLVMContext &C, ArrayRef Attrs) { if (Attrs.empty()) - return 0; + return nullptr; // Otherwise, build a key to look up the existing attributes. LLVMContextImpl *pImpl = C.pImpl; @@ -595,7 +599,8 @@ AttributeSet AttributeSet::get(LLVMContext &C, return getImpl(C, Attrs); } -AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index, AttrBuilder &B) { +AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index, + const AttrBuilder &B) { if (!B.hasAttributes()) return AttributeSet(); @@ -617,9 +622,9 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index, AttrBuilder &B) { } // Add target-dependent (string) attributes. - for (AttrBuilder::td_iterator I = B.td_begin(), E = B.td_end(); - I != E; ++I) - Attrs.push_back(std::make_pair(Index, Attribute::get(C, I->first,I->second))); + for (const AttrBuilder::td_type &TDA : B.td_attrs()) + Attrs.push_back( + std::make_pair(Index, Attribute::get(C, TDA.first, TDA.second))); return get(C, Attrs); } @@ -836,7 +841,7 @@ bool AttributeSet::hasAttributes(unsigned Index) const { /// \brief Return true if the specified attribute is set for at least one /// parameter or for the return value. bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { - if (pImpl == 0) return false; + if (!pImpl) return false; for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) for (AttributeSetImpl::iterator II = pImpl->begin(I), @@ -877,14 +882,14 @@ std::string AttributeSet::getAsString(unsigned Index, /// \brief The attributes for the specified index are returned. AttributeSetNode *AttributeSet::getAttributes(unsigned Index) const { - if (!pImpl) return 0; + if (!pImpl) return nullptr; // Loop through to find the attribute node we want. for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) if (pImpl->getSlotIndex(I) == Index) return pImpl->getSlotNode(I); - return 0; + return nullptr; } AttributeSet::iterator AttributeSet::begin(unsigned Slot) const { @@ -1175,6 +1180,7 @@ AttributeSet AttributeFuncs::typeIncompatible(Type *Ty, uint64_t Index) { .addAttribute(Attribute::Nest) .addAttribute(Attribute::NoAlias) .addAttribute(Attribute::NoCapture) + .addAttribute(Attribute::NonNull) .addAttribute(Attribute::ReadNone) .addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::StructRet) diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index b7429b3..e255113 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -115,7 +115,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name == "x86.avx.movnt.ps.256" || Name == "x86.sse42.crc32.64.8" || (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { - NewFn = 0; + NewFn = nullptr; return true; } // SSE4.1 ptest functions may have an old signature. @@ -158,7 +158,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { } bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { - NewFn = 0; + NewFn = nullptr; bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn); // Upgrade intrinsic attributes. This does not change the function. @@ -170,7 +170,62 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { return Upgraded; } +static bool UpgradeGlobalStructors(GlobalVariable *GV) { + ArrayType *ATy = dyn_cast(GV->getType()->getElementType()); + StructType *OldTy = + ATy ? dyn_cast(ATy->getElementType()) : nullptr; + + // Only upgrade an array of a two field struct with the appropriate field + // types. + if (!OldTy || OldTy->getNumElements() != 2) + return false; + + // Get the upgraded 3 element type. + PointerType *VoidPtrTy = Type::getInt8Ty(GV->getContext())->getPointerTo(); + Type *Tys[3] = { + OldTy->getElementType(0), + OldTy->getElementType(1), + VoidPtrTy + }; + StructType *NewTy = + StructType::get(GV->getContext(), Tys, /*isPacked=*/false); + + // Build new constants with a null third field filled in. + Constant *OldInitC = GV->getInitializer(); + ConstantArray *OldInit = dyn_cast(OldInitC); + if (!OldInit && !isa(OldInitC)) + return false; + std::vector Initializers; + if (OldInit) { + for (Use &U : OldInit->operands()) { + ConstantStruct *Init = cast(&U); + Constant *NewInit = + ConstantStruct::get(NewTy, Init->getOperand(0), Init->getOperand(1), + Constant::getNullValue(VoidPtrTy), nullptr); + Initializers.push_back(NewInit); + } + } + assert(Initializers.size() == ATy->getNumElements()); + + // Replace the old GV with a new one. + ATy = ArrayType::get(NewTy, Initializers.size()); + Constant *NewInit = ConstantArray::get(ATy, Initializers); + GlobalVariable *NewGV = new GlobalVariable( + *GV->getParent(), ATy, GV->isConstant(), GV->getLinkage(), NewInit, "", + GV, GV->getThreadLocalMode(), GV->getType()->getAddressSpace(), + GV->isExternallyInitialized()); + NewGV->copyAttributesFrom(GV); + NewGV->takeName(GV); + assert(GV->use_empty() && "program cannot use initializer list"); + GV->eraseFromParent(); + return true; +} + bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { + if (GV->getName() == "llvm.global_ctors" || + GV->getName() == "llvm.global_dtors") + return UpgradeGlobalStructors(GV); + // Nothing to do yet. return false; } @@ -453,9 +508,9 @@ void llvm::UpgradeInstWithTBAATag(Instruction *I) { Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp) { if (Opc != Instruction::BitCast) - return 0; + return nullptr; - Temp = 0; + Temp = nullptr; Type *SrcTy = V->getType(); if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { @@ -469,12 +524,12 @@ Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); } - return 0; + return nullptr; } Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { if (Opc != Instruction::BitCast) - return 0; + return nullptr; Type *SrcTy = C->getType(); if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && @@ -489,7 +544,7 @@ Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { DestTy); } - return 0; + return nullptr; } /// Check the debug info version number, if it is out-dated, drop the debug diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp index 3079f0a..ba07433 100644 --- a/lib/IR/BasicBlock.cpp +++ b/lib/IR/BasicBlock.cpp @@ -27,7 +27,7 @@ using namespace llvm; ValueSymbolTable *BasicBlock::getValueSymbolTable() { if (Function *F = getParent()) return &F->getValueSymbolTable(); - return 0; + return nullptr; } const DataLayout *BasicBlock::getDataLayout() const { @@ -45,7 +45,7 @@ template class llvm::SymbolTableListTraits; BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent, BasicBlock *InsertBefore) - : Value(Type::getLabelTy(C), Value::BasicBlockVal), Parent(0) { + : Value(Type::getLabelTy(C), Value::BasicBlockVal), Parent(nullptr) { // Make sure that we get added to a function LeakDetector::addGarbageObject(this); @@ -81,7 +81,7 @@ BasicBlock::~BasicBlock() { } } - assert(getParent() == 0 && "BasicBlock still linked into the program!"); + assert(getParent() == nullptr && "BasicBlock still linked into the program!"); dropAllReferences(); InstList.clear(); } @@ -122,12 +122,12 @@ void BasicBlock::moveAfter(BasicBlock *MovePos) { TerminatorInst *BasicBlock::getTerminator() { - if (InstList.empty()) return 0; + if (InstList.empty()) return nullptr; return dyn_cast(&InstList.back()); } const TerminatorInst *BasicBlock::getTerminator() const { - if (InstList.empty()) return 0; + if (InstList.empty()) return nullptr; return dyn_cast(&InstList.back()); } @@ -186,10 +186,10 @@ void BasicBlock::dropAllReferences() { /// return the block, otherwise return a null pointer. BasicBlock *BasicBlock::getSinglePredecessor() { pred_iterator PI = pred_begin(this), E = pred_end(this); - if (PI == E) return 0; // No preds. + if (PI == E) return nullptr; // No preds. BasicBlock *ThePred = *PI; ++PI; - return (PI == E) ? ThePred : 0 /*multiple preds*/; + return (PI == E) ? ThePred : nullptr /*multiple preds*/; } /// getUniquePredecessor - If this basic block has a unique predecessor block, @@ -199,12 +199,12 @@ BasicBlock *BasicBlock::getSinglePredecessor() { /// a switch statement with multiple cases having the same destination). BasicBlock *BasicBlock::getUniquePredecessor() { pred_iterator PI = pred_begin(this), E = pred_end(this); - if (PI == E) return 0; // No preds. + if (PI == E) return nullptr; // No preds. BasicBlock *PredBB = *PI; ++PI; for (;PI != E; ++PI) { if (*PI != PredBB) - return 0; + return nullptr; // The same predecessor appears multiple times in the predecessor list. // This is OK. } @@ -277,7 +277,7 @@ void BasicBlock::removePredecessor(BasicBlock *Pred, PN->removeIncomingValue(Pred, false); // If all incoming values to the Phi are the same, we can replace the Phi // with that value. - Value* PNV = 0; + Value* PNV = nullptr; if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue())) if (PNV != PN) { PN->replaceAllUsesWith(PNV); diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt index 09117aa..b027ae5 100644 --- a/lib/IR/CMakeLists.txt +++ b/lib/IR/CMakeLists.txt @@ -7,12 +7,12 @@ add_llvm_library(LLVMCore ConstantRange.cpp Constants.cpp Core.cpp - DiagnosticInfo.cpp - DiagnosticPrinter.cpp DIBuilder.cpp DataLayout.cpp DebugInfo.cpp DebugLoc.cpp + DiagnosticInfo.cpp + DiagnosticPrinter.cpp Dominators.cpp Function.cpp GCOV.cpp @@ -28,6 +28,7 @@ add_llvm_library(LLVMCore LLVMContextImpl.cpp LeakDetector.cpp LegacyPassManager.cpp + MDBuilder.cpp Mangler.cpp Metadata.cpp Module.cpp diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index 612aba0..706e66f 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -51,7 +51,7 @@ static Constant *BitCastConstantVector(Constant *CV, VectorType *DstTy) { // Analysis/ConstantFolding.cpp unsigned NumElts = DstTy->getNumElements(); if (NumElts != CV->getType()->getVectorNumElements()) - return 0; + return nullptr; Type *DstEltTy = DstTy->getElementType(); @@ -94,7 +94,7 @@ foldConstantCastPair( // Let CastInst::isEliminableCastPair do the heavy lifting. return CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy, - 0, FakeIntPtrTy, 0); + nullptr, FakeIntPtrTy, nullptr); } static Constant *FoldBitCast(Constant *V, Type *DestTy) { @@ -139,7 +139,7 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) { if (VectorType *SrcTy = dyn_cast(V->getType())) { assert(DestPTy->getBitWidth() == SrcTy->getBitWidth() && "Not cast between same sized vectors!"); - SrcTy = NULL; + SrcTy = nullptr; // First, check for null. Undef is already handled. if (isa(V)) return Constant::getNullValue(DestTy); @@ -173,7 +173,7 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) { CI->getValue())); // Otherwise, can't fold this (vector?) - return 0; + return nullptr; } // Handle ConstantFP input: FP -> Integral. @@ -181,7 +181,7 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) { return ConstantInt::get(FP->getContext(), FP->getValueAPF().bitcastToAPInt()); - return 0; + return nullptr; } @@ -216,14 +216,14 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, // In the input is a constant expr, we might be able to recursively simplify. // If not, we definitely can't do anything. ConstantExpr *CE = dyn_cast(C); - if (CE == 0) return 0; - + if (!CE) return nullptr; + switch (CE->getOpcode()) { - default: return 0; + default: return nullptr; case Instruction::Or: { Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize); - if (RHS == 0) - return 0; + if (!RHS) + return nullptr; // X | -1 -> -1. if (ConstantInt *RHSC = dyn_cast(RHS)) @@ -231,32 +231,32 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, return RHSC; Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize); - if (LHS == 0) - return 0; + if (!LHS) + return nullptr; return ConstantExpr::getOr(LHS, RHS); } case Instruction::And: { Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize); - if (RHS == 0) - return 0; + if (!RHS) + return nullptr; // X & 0 -> 0. if (RHS->isNullValue()) return RHS; Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize); - if (LHS == 0) - return 0; + if (!LHS) + return nullptr; return ConstantExpr::getAnd(LHS, RHS); } case Instruction::LShr: { ConstantInt *Amt = dyn_cast(CE->getOperand(1)); - if (Amt == 0) - return 0; + if (!Amt) + return nullptr; unsigned ShAmt = Amt->getZExtValue(); // Cannot analyze non-byte shifts. if ((ShAmt & 7) != 0) - return 0; + return nullptr; ShAmt >>= 3; // If the extract is known to be all zeros, return zero. @@ -268,17 +268,17 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, return ExtractConstantBytes(CE->getOperand(0), ByteStart+ShAmt, ByteSize); // TODO: Handle the 'partially zero' case. - return 0; + return nullptr; } case Instruction::Shl: { ConstantInt *Amt = dyn_cast(CE->getOperand(1)); - if (Amt == 0) - return 0; + if (!Amt) + return nullptr; unsigned ShAmt = Amt->getZExtValue(); // Cannot analyze non-byte shifts. if ((ShAmt & 7) != 0) - return 0; + return nullptr; ShAmt >>= 3; // If the extract is known to be all zeros, return zero. @@ -290,7 +290,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, return ExtractConstantBytes(CE->getOperand(0), ByteStart-ShAmt, ByteSize); // TODO: Handle the 'partially zero' case. - return 0; + return nullptr; } case Instruction::ZExt: { @@ -324,7 +324,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart, } // TODO: Handle the 'partially zero' case. - return 0; + return nullptr; } } } @@ -376,7 +376,7 @@ static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, // If there's no interesting folding happening, bail so that we don't create // a constant that looks like it needs folding but really doesn't. if (!Folded) - return 0; + return nullptr; // Base case: Get a regular sizeof expression. Constant *C = ConstantExpr::getSizeOf(Ty); @@ -442,7 +442,7 @@ static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, // If there's no interesting folding happening, bail so that we don't create // a constant that looks like it needs folding but really doesn't. if (!Folded) - return 0; + return nullptr; // Base case: Get a regular alignof expression. Constant *C = ConstantExpr::getAlignOf(Ty); @@ -473,7 +473,7 @@ static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, unsigned NumElems = STy->getNumElements(); // An empty struct has no members. if (NumElems == 0) - return 0; + return nullptr; // Check for a struct with all members having the same size. Constant *MemberSize = getFoldedSizeOf(STy->getElementType(0), DestTy, true); @@ -497,7 +497,7 @@ static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, // If there's no interesting folding happening, bail so that we don't create // a constant that looks like it needs folding but really doesn't. if (!Folded) - return 0; + return nullptr; // Base case: Get a regular offsetof expression. Constant *C = ConstantExpr::getOffsetOf(Ty, FieldNo); @@ -582,7 +582,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, APFloat::rmNearestTiesToEven, &ignored); return ConstantFP::get(V->getContext(), Val); } - return 0; // Can't fold. + return nullptr; // Can't fold. case Instruction::FPToUI: case Instruction::FPToSI: if (ConstantFP *FPC = dyn_cast(V)) { @@ -595,11 +595,11 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, APInt Val(DestBitWidth, x); return ConstantInt::get(FPC->getContext(), Val); } - return 0; // Can't fold. + return nullptr; // Can't fold. case Instruction::IntToPtr: //always treated as unsigned if (V->isNullValue()) // Is it an integral null value? return ConstantPointerNull::get(cast(DestTy)); - return 0; // Other pointer types cannot be casted + return nullptr; // Other pointer types cannot be casted case Instruction::PtrToInt: // always treated as unsigned // Is it a null pointer value? if (V->isNullValue()) @@ -643,7 +643,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, } } // Other pointer types cannot be casted - return 0; + return nullptr; case Instruction::UIToFP: case Instruction::SIToFP: if (ConstantInt *CI = dyn_cast(V)) { @@ -655,21 +655,21 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, APFloat::rmNearestTiesToEven); return ConstantFP::get(V->getContext(), apf); } - return 0; + return nullptr; case Instruction::ZExt: if (ConstantInt *CI = dyn_cast(V)) { uint32_t BitWidth = cast(DestTy)->getBitWidth(); return ConstantInt::get(V->getContext(), CI->getValue().zext(BitWidth)); } - return 0; + return nullptr; case Instruction::SExt: if (ConstantInt *CI = dyn_cast(V)) { uint32_t BitWidth = cast(DestTy)->getBitWidth(); return ConstantInt::get(V->getContext(), CI->getValue().sext(BitWidth)); } - return 0; + return nullptr; case Instruction::Trunc: { uint32_t DestBitWidth = cast(DestTy)->getBitWidth(); if (ConstantInt *CI = dyn_cast(V)) { @@ -685,12 +685,12 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, if (Constant *Res = ExtractConstantBytes(V, 0, DestBitWidth / 8)) return Res; - return 0; + return nullptr; } case Instruction::BitCast: return FoldBitCast(V, DestTy); case Instruction::AddrSpaceCast: - return 0; + return nullptr; } } @@ -746,7 +746,7 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond, return ConstantExpr::getSelect(Cond, V1, FalseVal->getOperand(2)); } - return 0; + return nullptr; } Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val, @@ -766,14 +766,14 @@ Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val, return UndefValue::get(Val->getType()->getVectorElementType()); return Val->getAggregateElement(Index); } - return 0; + return nullptr; } Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val, Constant *Elt, Constant *Idx) { ConstantInt *CIdx = dyn_cast(Idx); - if (!CIdx) return 0; + if (!CIdx) return nullptr; const APInt &IdxVal = CIdx->getValue(); SmallVector Result; @@ -803,7 +803,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, return UndefValue::get(VectorType::get(EltTy, MaskNumElts)); // Don't break the bitcode reader hack. - if (isa(Mask)) return 0; + if (isa(Mask)) return nullptr; unsigned SrcNumElts = V1->getType()->getVectorNumElements(); @@ -842,7 +842,7 @@ Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg, if (Constant *C = Agg->getAggregateElement(Idxs[0])) return ConstantFoldExtractValueInstruction(C, Idxs.slice(1)); - return 0; + return nullptr; } Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, @@ -863,8 +863,8 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg, SmallVector Result; for (unsigned i = 0; i != NumElts; ++i) { Constant *C = Agg->getAggregateElement(i); - if (C == 0) return 0; - + if (!C) return nullptr; + if (Idxs[0] == i) C = ConstantFoldInsertValueInstruction(C, Val, Idxs.slice(1)); @@ -1209,7 +1209,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, } // We don't know how to fold this. - return 0; + return nullptr; } /// isZeroSizedType - This type is zero sized if its an array or structure of @@ -1289,7 +1289,7 @@ static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) { if (!isa(V1)) { if (!isa(V2)) { // We distilled thisUse the standard constant folder for a few cases - ConstantInt *R = 0; + ConstantInt *R = nullptr; R = dyn_cast( ConstantExpr::getFCmp(FCmpInst::FCMP_OEQ, V1, V2)); if (R && !R->isZero()) @@ -1355,7 +1355,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, !isa(V2)) { // We distilled this down to a simple case, use the standard constant // folder. - ConstantInt *R = 0; + ConstantInt *R = nullptr; ICmpInst::Predicate pred = ICmpInst::ICMP_EQ; R = dyn_cast(ConstantExpr::getICmp(pred, V1, V2)); if (R && !R->isZero()) @@ -1885,7 +1885,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, return ConstantExpr::getICmp(pred, C2, C1); } } - return 0; + return nullptr; } /// isInBoundsIndices - Test whether the given sequence of *normalized* indices @@ -1951,7 +1951,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, if (isa(C)) { PointerType *Ptr = cast(C->getType()); Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs); - assert(Ty != 0 && "Invalid indices for GEP!"); + assert(Ty && "Invalid indices for GEP!"); return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace())); } @@ -1965,7 +1965,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, if (isNull) { PointerType *Ptr = cast(C->getType()); Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs); - assert(Ty != 0 && "Invalid indices for GEP!"); + assert(Ty && "Invalid indices for GEP!"); return ConstantPointerNull::get(PointerType::get(Ty, Ptr->getAddressSpace())); } @@ -1977,7 +1977,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, // getelementptr instructions into a single instruction. // if (CE->getOpcode() == Instruction::GetElementPtr) { - Type *LastTy = 0; + Type *LastTy = nullptr; for (gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE); I != E; ++I) LastTy = *I; @@ -2072,7 +2072,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, bool Unknown = false; SmallVector NewIdxs; Type *Ty = C->getType(); - Type *Prev = 0; + Type *Prev = nullptr; for (unsigned i = 0, e = Idxs.size(); i != e; Prev = Ty, Ty = cast(Ty)->getTypeAtIndex(Idxs[i]), ++i) { if (ConstantInt *CI = dyn_cast(Idxs[i])) { @@ -2130,7 +2130,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, isa(C) && isInBoundsIndices(Idxs)) return ConstantExpr::getInBoundsGetElementPtr(C, Idxs); - return 0; + return nullptr; } Constant *llvm::ConstantFoldGetElementPtr(Constant *C, diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index 2a3a5fd..bb8d60b 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -182,13 +182,13 @@ Constant *Constant::getAllOnesValue(Type *Ty) { /// 'this' is a constant expr. Constant *Constant::getAggregateElement(unsigned Elt) const { if (const ConstantStruct *CS = dyn_cast(this)) - return Elt < CS->getNumOperands() ? CS->getOperand(Elt) : 0; + return Elt < CS->getNumOperands() ? CS->getOperand(Elt) : nullptr; if (const ConstantArray *CA = dyn_cast(this)) - return Elt < CA->getNumOperands() ? CA->getOperand(Elt) : 0; + return Elt < CA->getNumOperands() ? CA->getOperand(Elt) : nullptr; if (const ConstantVector *CV = dyn_cast(this)) - return Elt < CV->getNumOperands() ? CV->getOperand(Elt) : 0; + return Elt < CV->getNumOperands() ? CV->getOperand(Elt) : nullptr; if (const ConstantAggregateZero *CAZ =dyn_cast(this)) return CAZ->getElementValue(Elt); @@ -197,15 +197,16 @@ Constant *Constant::getAggregateElement(unsigned Elt) const { return UV->getElementValue(Elt); if (const ConstantDataSequential *CDS =dyn_cast(this)) - return Elt < CDS->getNumElements() ? CDS->getElementAsConstant(Elt) : 0; - return 0; + return Elt < CDS->getNumElements() ? CDS->getElementAsConstant(Elt) + : nullptr; + return nullptr; } Constant *Constant::getAggregateElement(Constant *Elt) const { assert(isa(Elt->getType()) && "Index must be an integer"); if (ConstantInt *CI = dyn_cast(Elt)) return getAggregateElement(CI->getZExtValue()); - return 0; + return nullptr; } @@ -309,7 +310,7 @@ bool Constant::isThreadDependent() const { bool Constant::isConstantUsed() const { for (const User *U : users()) { const Constant *UC = dyn_cast(U); - if (UC == 0 || isa(UC)) + if (!UC || isa(UC)) return true; if (UC->isConstantUsed()) @@ -397,7 +398,7 @@ void Constant::removeDeadConstantUsers() const { Value::const_user_iterator LastNonDeadUser = E; while (I != E) { const Constant *User = dyn_cast(*I); - if (User == 0) { + if (!User) { LastNonDeadUser = I; ++I; continue; @@ -431,7 +432,7 @@ void Constant::removeDeadConstantUsers() const { void ConstantInt::anchor() { } ConstantInt::ConstantInt(IntegerType *Ty, const APInt& V) - : Constant(Ty, ConstantIntVal, 0, 0), Val(V) { + : Constant(Ty, ConstantIntVal, nullptr, 0), Val(V) { assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type"); } @@ -644,7 +645,7 @@ Constant *ConstantFP::getInfinity(Type *Ty, bool Negative) { } ConstantFP::ConstantFP(Type *Ty, const APFloat& V) - : Constant(Ty, ConstantFPVal, 0, 0), Val(V) { + : Constant(Ty, ConstantFPVal, nullptr, 0), Val(V) { assert(&V.getSemantics() == TypeToFloatSemantics(Ty) && "FP type Mismatch"); } @@ -1235,7 +1236,7 @@ ConstantAggregateZero *ConstantAggregateZero::get(Type *Ty) { "Cannot create an aggregate zero of non-aggregate type!"); ConstantAggregateZero *&Entry = Ty->getContext().pImpl->CAZConstants[Ty]; - if (Entry == 0) + if (!Entry) Entry = new ConstantAggregateZero(Ty); return Entry; @@ -1283,7 +1284,7 @@ Constant *Constant::getSplatValue() const { return CV->getSplatValue(); if (const ConstantVector *CV = dyn_cast(this)) return CV->getSplatValue(); - return 0; + return nullptr; } /// getSplatValue - If this is a splat constant, where all of the @@ -1294,7 +1295,7 @@ Constant *ConstantVector::getSplatValue() const { // Then make sure all remaining elements point to the same value. for (unsigned I = 1, E = getNumOperands(); I < E; ++I) if (getOperand(I) != Elt) - return 0; + return nullptr; return Elt; } @@ -1315,7 +1316,7 @@ const APInt &Constant::getUniqueInteger() const { ConstantPointerNull *ConstantPointerNull::get(PointerType *Ty) { ConstantPointerNull *&Entry = Ty->getContext().pImpl->CPNConstants[Ty]; - if (Entry == 0) + if (!Entry) Entry = new ConstantPointerNull(Ty); return Entry; @@ -1335,7 +1336,7 @@ void ConstantPointerNull::destroyConstant() { UndefValue *UndefValue::get(Type *Ty) { UndefValue *&Entry = Ty->getContext().pImpl->UVConstants[Ty]; - if (Entry == 0) + if (!Entry) Entry = new UndefValue(Ty); return Entry; @@ -1353,14 +1354,14 @@ void UndefValue::destroyConstant() { // BlockAddress *BlockAddress::get(BasicBlock *BB) { - assert(BB->getParent() != 0 && "Block must have a parent"); + assert(BB->getParent() && "Block must have a parent"); return get(BB->getParent(), BB); } BlockAddress *BlockAddress::get(Function *F, BasicBlock *BB) { BlockAddress *&BA = F->getContext().pImpl->BlockAddresses[std::make_pair(F, BB)]; - if (BA == 0) + if (!BA) BA = new BlockAddress(F, BB); assert(BA->getFunction() == F && "Basic block moved between functions"); @@ -1377,10 +1378,10 @@ BlockAddress::BlockAddress(Function *F, BasicBlock *BB) BlockAddress *BlockAddress::lookup(const BasicBlock *BB) { if (!BB->hasAddressTaken()) - return 0; + return nullptr; const Function *F = BB->getParent(); - assert(F != 0 && "Block must have a parent"); + assert(F && "Block must have a parent"); BlockAddress *BA = F->getContext().pImpl->BlockAddresses.lookup(std::make_pair(F, BB)); assert(BA && "Refcount and block address map disagree!"); @@ -1411,7 +1412,7 @@ void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) { // and return early. BlockAddress *&NewBA = getContext().pImpl->BlockAddresses[std::make_pair(NewF, NewBB)]; - if (NewBA == 0) { + if (!NewBA) { getBasicBlock()->AdjustBlockAddressRefCount(-1); // Remove the old entry, this can't cause the map to rehash (just a @@ -1792,7 +1793,7 @@ Constant *ConstantExpr::getAlignOf(Type* Ty) { // Note that a non-inbounds gep is used, as null isn't within any object. Type *AligningTy = StructType::get(Type::getInt1Ty(Ty->getContext()), Ty, NULL); - Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo()); + Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo(0)); Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0); Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1); Constant *Indices[2] = { Zero, One }; @@ -1936,8 +1937,8 @@ ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) { Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) { assert(Val->getType()->isVectorTy() && "Tried to create extractelement operation on non-vector type!"); - assert(Idx->getType()->isIntegerTy(32) && - "Extractelement index must be i32 type!"); + assert(Idx->getType()->isIntegerTy() && + "Extractelement index must be an integer type!"); if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx)) return FC; // Fold a few common cases. @@ -1957,7 +1958,7 @@ Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt, "Tried to create insertelement operation on non-vector type!"); assert(Elt->getType() == Val->getType()->getVectorElementType() && "Insertelement types must match!"); - assert(Idx->getType()->isIntegerTy(32) && + assert(Idx->getType()->isIntegerTy() && "Insertelement index must be i32 type!"); if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx)) @@ -2145,7 +2146,7 @@ Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty) { switch (Opcode) { default: // Doesn't have an identity. - return 0; + return nullptr; case Instruction::Add: case Instruction::Or: @@ -2168,7 +2169,7 @@ Constant *ConstantExpr::getBinOpAbsorber(unsigned Opcode, Type *Ty) { switch (Opcode) { default: // Doesn't have an absorber. - return 0; + return nullptr; case Instruction::Or: return Constant::getAllOnesValue(Ty); @@ -2285,7 +2286,7 @@ Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) { // of i8, or a 1-element array of i32. They'll both end up in the same /// StringMap bucket, linked up by their Next pointers. Walk the list. ConstantDataSequential **Entry = &Slot.getValue(); - for (ConstantDataSequential *Node = *Entry; Node != 0; + for (ConstantDataSequential *Node = *Entry; Node; Entry = &Node->Next, Node = *Entry) if (Node->getType() == Ty) return Node; @@ -2312,7 +2313,7 @@ void ConstantDataSequential::destroyConstant() { ConstantDataSequential **Entry = &Slot->getValue(); // Remove the entry from the hash table. - if ((*Entry)->Next == 0) { + if (!(*Entry)->Next) { // If there is only one value in the bucket (common case) it must be this // entry, and removing the entry should remove the bucket completely. assert((*Entry) == this && "Hash mismatch in ConstantDataSequential"); @@ -2333,7 +2334,7 @@ void ConstantDataSequential::destroyConstant() { // If we were part of a list, make sure that we don't delete the list that is // still owned by the uniquing map. - Next = 0; + Next = nullptr; // Finally, actually delete it. destroyConstantImpl(); @@ -2561,7 +2562,7 @@ Constant *ConstantDataVector::getSplatValue() const { unsigned EltSize = getElementByteSize(); for (unsigned i = 1, e = getNumElements(); i != e; ++i) if (memcmp(Base, Base+i*EltSize, EltSize)) - return 0; + return nullptr; // If they're all the same, return the 0th one as a representative. return getElementAsConstant(0); @@ -2609,7 +2610,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To, AllSame &= Val == ToC; } - Constant *Replacement = 0; + Constant *Replacement = nullptr; if (AllSame && ToC->isNullValue()) { Replacement = ConstantAggregateZero::get(getType()); } else if (AllSame && isa(ToC)) { @@ -2695,7 +2696,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To, LLVMContextImpl *pImpl = getContext().pImpl; - Constant *Replacement = 0; + Constant *Replacement = nullptr; if (isAllZeros) { Replacement = ConstantAggregateZero::get(getType()); } else if (isAllUndef) { diff --git a/lib/IR/ConstantsContext.h b/lib/IR/ConstantsContext.h index 59b9d4d..f06509f 100644 --- a/lib/IR/ConstantsContext.h +++ b/lib/IR/ConstantsContext.h @@ -24,6 +24,9 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include +#include + +#define DEBUG_TYPE "ir" namespace llvm { template @@ -584,7 +587,7 @@ public: /// necessary. ConstantClass *getOrCreate(TypeClass *Ty, ValRefType V) { MapKey Lookup(Ty, V); - ConstantClass* Result = 0; + ConstantClass* Result = nullptr; typename MapTy::iterator I = Map.find(Lookup); // Is it in the map? @@ -720,7 +723,7 @@ public: /// necessary. ConstantClass *getOrCreate(TypeClass *Ty, Operands V) { LookupKey Lookup(Ty, V); - ConstantClass* Result = 0; + ConstantClass* Result = nullptr; typename MapTy::iterator I = Map.find_as(Lookup); // Is it in the map? diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index f52f466..27ce503 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -17,6 +17,8 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" @@ -28,6 +30,7 @@ #include "llvm/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Threading.h" @@ -39,6 +42,8 @@ using namespace llvm; +#define DEBUG_TYPE "ir" + void llvm::initializeCore(PassRegistry &Registry) { initializeDominatorTreeWrapperPassPass(Registry); initializePrintModulePassWrapperPass(Registry); @@ -76,6 +81,21 @@ LLVMContextRef LLVMGetGlobalContext() { return wrap(&getGlobalContext()); } +void LLVMContextSetDiagnosticHandler(LLVMContextRef C, + LLVMDiagnosticHandler Handler, + void *DiagnosticContext) { + unwrap(C)->setDiagnosticHandler( + LLVM_EXTENSION reinterpret_cast(Handler), + DiagnosticContext); +} + +void LLVMContextSetYieldCallback(LLVMContextRef C, LLVMYieldCallback Callback, + void *OpaqueHandle) { + auto YieldCallback = + LLVM_EXTENSION reinterpret_cast(Callback); + unwrap(C)->setYieldCallback(YieldCallback, OpaqueHandle); +} + void LLVMContextDispose(LLVMContextRef C) { delete unwrap(C); } @@ -89,6 +109,40 @@ unsigned LLVMGetMDKindID(const char* Name, unsigned SLen) { return LLVMGetMDKindIDInContext(LLVMGetGlobalContext(), Name, SLen); } +char *LLVMGetDiagInfoDescription(LLVMDiagnosticInfoRef DI) { + std::string MsgStorage; + raw_string_ostream Stream(MsgStorage); + DiagnosticPrinterRawOStream DP(Stream); + + unwrap(DI)->print(DP); + Stream.flush(); + + return LLVMCreateMessage(MsgStorage.c_str()); +} + +LLVMDiagnosticSeverity LLVMGetDiagInfoSeverity(LLVMDiagnosticInfoRef DI){ + LLVMDiagnosticSeverity severity; + + switch(unwrap(DI)->getSeverity()) { + default: + severity = LLVMDSError; + break; + case DS_Warning: + severity = LLVMDSWarning; + break; + case DS_Remark: + severity = LLVMDSRemark; + break; + case DS_Note: + severity = LLVMDSNote; + break; + } + + return severity; +} + + + /*===-- Operations on modules ---------------------------------------------===*/ @@ -136,7 +190,7 @@ LLVMBool LLVMPrintModuleToFile(LLVMModuleRef M, const char *Filename, return true; } - unwrap(M)->print(dest, NULL); + unwrap(M)->print(dest, nullptr); if (!error.empty()) { *ErrorMessage = strdup(error.c_str()); @@ -150,7 +204,7 @@ char *LLVMPrintModuleToString(LLVMModuleRef M) { std::string buf; raw_string_ostream os(buf); - unwrap(M)->print(os, NULL); + unwrap(M)->print(os, nullptr); os.flush(); return strdup(buf.c_str()); @@ -374,7 +428,7 @@ const char *LLVMGetStructName(LLVMTypeRef Ty) { StructType *Type = unwrap(Ty); if (!Type->hasName()) - return 0; + return nullptr; return Type->getName().data(); } @@ -496,7 +550,8 @@ LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) { } void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef MD) { - unwrap(Inst)->setMetadata(KindID, MD? unwrap(MD) : NULL); + unwrap(Inst)->setMetadata(KindID, + MD ? unwrap(MD) : nullptr); } /*--.. Conversion functions ................................................--*/ @@ -513,7 +568,7 @@ LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val) { Value *V = unwrap(Val); Value::use_iterator I = V->use_begin(); if (I == V->use_end()) - return 0; + return nullptr; return wrap(&*I); } @@ -521,7 +576,7 @@ LLVMUseRef LLVMGetNextUse(LLVMUseRef U) { Use *Next = unwrap(U)->getNext(); if (Next) return wrap(Next); - return 0; + return nullptr; } LLVMValueRef LLVMGetUser(LLVMUseRef U) { @@ -611,7 +666,7 @@ const char *LLVMGetMDString(LLVMValueRef V, unsigned* Len) { return S->getString().data(); } *Len = 0; - return 0; + return nullptr; } unsigned LLVMGetMDNodeNumOperands(LLVMValueRef V) @@ -650,7 +705,7 @@ void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char* name, NamedMDNode *N = unwrap(M)->getOrInsertNamedMetadata(name); if (!N) return; - MDNode *Op = Val ? unwrap(Val) : NULL; + MDNode *Op = Val ? unwrap(Val) : nullptr; if (Op) N->addOperand(Op); } @@ -1235,7 +1290,7 @@ const char *LLVMGetSection(LLVMValueRef Global) { } void LLVMSetSection(LLVMValueRef Global, const char *Section) { - unwrap(Global)->setSection(Section); + unwrap(Global)->setSection(Section); } LLVMVisibility LLVMGetVisibility(LLVMValueRef Global) { @@ -1285,7 +1340,7 @@ unsigned LLVMGetAlignment(LLVMValueRef V) { void LLVMSetAlignment(LLVMValueRef V, unsigned Bytes) { Value *P = unwrap(V); - if (GlobalValue *GV = dyn_cast(P)) + if (GlobalObject *GV = dyn_cast(P)) GV->setAlignment(Bytes); else if (AllocaInst *AI = dyn_cast(P)) AI->setAlignment(Bytes); @@ -1302,15 +1357,16 @@ void LLVMSetAlignment(LLVMValueRef V, unsigned Bytes) { LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) { return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false, - GlobalValue::ExternalLinkage, 0, Name)); + GlobalValue::ExternalLinkage, nullptr, Name)); } LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name, unsigned AddressSpace) { return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false, - GlobalValue::ExternalLinkage, 0, Name, 0, - GlobalVariable::NotThreadLocal, AddressSpace)); + GlobalValue::ExternalLinkage, nullptr, Name, + nullptr, GlobalVariable::NotThreadLocal, + AddressSpace)); } LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name) { @@ -1321,7 +1377,7 @@ LLVMValueRef LLVMGetFirstGlobal(LLVMModuleRef M) { Module *Mod = unwrap(M); Module::global_iterator I = Mod->global_begin(); if (I == Mod->global_end()) - return 0; + return nullptr; return wrap(I); } @@ -1329,7 +1385,7 @@ LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M) { Module *Mod = unwrap(M); Module::global_iterator I = Mod->global_end(); if (I == Mod->global_begin()) - return 0; + return nullptr; return wrap(--I); } @@ -1337,7 +1393,7 @@ LLVMValueRef LLVMGetNextGlobal(LLVMValueRef GlobalVar) { GlobalVariable *GV = unwrap(GlobalVar); Module::global_iterator I = GV; if (++I == GV->getParent()->global_end()) - return 0; + return nullptr; return wrap(I); } @@ -1345,7 +1401,7 @@ LLVMValueRef LLVMGetPreviousGlobal(LLVMValueRef GlobalVar) { GlobalVariable *GV = unwrap(GlobalVar); Module::global_iterator I = GV; if (I == GV->getParent()->global_begin()) - return 0; + return nullptr; return wrap(--I); } @@ -1356,7 +1412,7 @@ void LLVMDeleteGlobal(LLVMValueRef GlobalVar) { LLVMValueRef LLVMGetInitializer(LLVMValueRef GlobalVar) { GlobalVariable* GV = unwrap(GlobalVar); if ( !GV->hasInitializer() ) - return 0; + return nullptr; return wrap(GV->getInitializer()); } @@ -1432,8 +1488,10 @@ void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit) { LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee, const char *Name) { - return wrap(new GlobalAlias(unwrap(Ty), GlobalValue::ExternalLinkage, Name, - unwrap(Aliasee), unwrap (M))); + auto *PTy = cast(unwrap(Ty)); + return wrap(GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), + GlobalValue::ExternalLinkage, Name, + unwrap(Aliasee), unwrap(M))); } /*--.. Operations on functions .............................................--*/ @@ -1452,7 +1510,7 @@ LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M) { Module *Mod = unwrap(M); Module::iterator I = Mod->begin(); if (I == Mod->end()) - return 0; + return nullptr; return wrap(I); } @@ -1460,7 +1518,7 @@ LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M) { Module *Mod = unwrap(M); Module::iterator I = Mod->end(); if (I == Mod->begin()) - return 0; + return nullptr; return wrap(--I); } @@ -1468,7 +1526,7 @@ LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn) { Function *Func = unwrap(Fn); Module::iterator I = Func; if (++I == Func->getParent()->end()) - return 0; + return nullptr; return wrap(I); } @@ -1476,7 +1534,7 @@ LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn) { Function *Func = unwrap(Fn); Module::iterator I = Func; if (I == Func->getParent()->begin()) - return 0; + return nullptr; return wrap(--I); } @@ -1501,7 +1559,7 @@ void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC) { const char *LLVMGetGC(LLVMValueRef Fn) { Function *F = unwrap(Fn); - return F->hasGC()? F->getGC() : 0; + return F->hasGC()? F->getGC() : nullptr; } void LLVMSetGC(LLVMValueRef Fn, const char *GC) { @@ -1582,7 +1640,7 @@ LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn) { Function *Func = unwrap(Fn); Function::arg_iterator I = Func->arg_begin(); if (I == Func->arg_end()) - return 0; + return nullptr; return wrap(I); } @@ -1590,7 +1648,7 @@ LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn) { Function *Func = unwrap(Fn); Function::arg_iterator I = Func->arg_end(); if (I == Func->arg_begin()) - return 0; + return nullptr; return wrap(--I); } @@ -1598,7 +1656,7 @@ LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg) { Argument *A = unwrap(Arg); Function::arg_iterator I = A; if (++I == A->getParent()->arg_end()) - return 0; + return nullptr; return wrap(I); } @@ -1606,7 +1664,7 @@ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) { Argument *A = unwrap(Arg); Function::arg_iterator I = A; if (I == A->getParent()->arg_begin()) - return 0; + return nullptr; return wrap(--I); } @@ -1676,7 +1734,7 @@ LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn) { Function *Func = unwrap(Fn); Function::iterator I = Func->begin(); if (I == Func->end()) - return 0; + return nullptr; return wrap(I); } @@ -1684,7 +1742,7 @@ LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn) { Function *Func = unwrap(Fn); Function::iterator I = Func->end(); if (I == Func->begin()) - return 0; + return nullptr; return wrap(--I); } @@ -1692,7 +1750,7 @@ LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB) { BasicBlock *Block = unwrap(BB); Function::iterator I = Block; if (++I == Block->getParent()->end()) - return 0; + return nullptr; return wrap(I); } @@ -1700,7 +1758,7 @@ LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB) { BasicBlock *Block = unwrap(BB); Function::iterator I = Block; if (I == Block->getParent()->begin()) - return 0; + return nullptr; return wrap(--I); } @@ -1752,7 +1810,7 @@ LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB) { BasicBlock *Block = unwrap(BB); BasicBlock::iterator I = Block->begin(); if (I == Block->end()) - return 0; + return nullptr; return wrap(I); } @@ -1760,7 +1818,7 @@ LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB) { BasicBlock *Block = unwrap(BB); BasicBlock::iterator I = Block->end(); if (I == Block->begin()) - return 0; + return nullptr; return wrap(--I); } @@ -1768,7 +1826,7 @@ LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst) { Instruction *Instr = unwrap(Inst); BasicBlock::iterator I = Instr; if (++I == Instr->getParent()->end()) - return 0; + return nullptr; return wrap(I); } @@ -1776,7 +1834,7 @@ LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst) { Instruction *Instr = unwrap(Inst); BasicBlock::iterator I = Instr; if (I == Instr->getParent()->begin()) - return 0; + return nullptr; return wrap(--I); } @@ -1939,7 +1997,7 @@ void LLVMDisposeBuilder(LLVMBuilderRef Builder) { /*--.. Metadata builders ...................................................--*/ void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) { - MDNode *Loc = L ? unwrap(L) : NULL; + MDNode *Loc = L ? unwrap(L) : nullptr; unwrap(Builder)->SetCurrentDebugLocation(DebugLoc::getFromDILocation(Loc)); } @@ -2195,7 +2253,7 @@ LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty, AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy); Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), ITy, unwrap(Ty), AllocSize, - 0, 0, ""); + nullptr, nullptr, ""); return wrap(unwrap(B)->Insert(Malloc, Twine(Name))); } @@ -2206,13 +2264,13 @@ LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty, AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy); Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), ITy, unwrap(Ty), AllocSize, - unwrap(Val), 0, ""); + unwrap(Val), nullptr, ""); return wrap(unwrap(B)->Insert(Malloc, Twine(Name))); } LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) { - return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), 0, Name)); + return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), nullptr, Name)); } LLVMValueRef LLVMBuildArrayAlloca(LLVMBuilderRef B, LLVMTypeRef Ty, diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 1ea381a..92edacc 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -30,8 +30,9 @@ static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) { } DIBuilder::DIBuilder(Module &m) - : M(m), VMContext(M.getContext()), TempEnumTypes(0), TempRetainTypes(0), - TempSubprograms(0), TempGVs(0), DeclareFn(0), ValueFn(0) {} + : M(m), VMContext(M.getContext()), TempEnumTypes(nullptr), + TempRetainTypes(nullptr), TempSubprograms(nullptr), TempGVs(nullptr), + DeclareFn(nullptr), ValueFn(nullptr) {} /// finalize - Construct any deferred debug info descriptors. void DIBuilder::finalize() { @@ -80,7 +81,7 @@ void DIBuilder::finalize() { /// N. static MDNode *getNonCompileUnitScope(MDNode *N) { if (DIDescriptor(N).isCompileUnit()) - return NULL; + return nullptr; return N; } @@ -103,7 +104,7 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, StringRef SplitName, DebugEmissionKind Kind) { - assert(((Lang <= dwarf::DW_LANG_Python && Lang >= dwarf::DW_LANG_C89) || + assert(((Lang <= dwarf::DW_LANG_OCaml && Lang >= dwarf::DW_LANG_C89) || (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) && "Invalid Language tag"); assert(!Filename.empty() && @@ -146,13 +147,13 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, } static DIImportedEntity -createImportedModule(LLVMContext &C, DIScope Context, DIDescriptor NS, - unsigned Line, StringRef Name, - SmallVectorImpl > &AllImportedModules) { +createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope Context, + Value *NS, unsigned Line, StringRef Name, + SmallVectorImpl> &AllImportedModules) { const MDNode *R; if (Name.empty()) { Value *Elts[] = { - GetTagConstant(C, dwarf::DW_TAG_imported_module), + GetTagConstant(C, Tag), Context, NS, ConstantInt::get(Type::getInt32Ty(C), Line), @@ -160,7 +161,7 @@ createImportedModule(LLVMContext &C, DIScope Context, DIDescriptor NS, R = MDNode::get(C, Elts); } else { Value *Elts[] = { - GetTagConstant(C, dwarf::DW_TAG_imported_module), + GetTagConstant(C, Tag), Context, NS, ConstantInt::get(Type::getInt32Ty(C), Line), @@ -175,33 +176,32 @@ createImportedModule(LLVMContext &C, DIScope Context, DIDescriptor NS, } DIImportedEntity DIBuilder::createImportedModule(DIScope Context, - DINameSpace NS, unsigned Line, - StringRef Name) { - return ::createImportedModule(VMContext, Context, NS, Line, Name, - AllImportedModules); + DINameSpace NS, + unsigned Line) { + return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module, + Context, NS, Line, StringRef(), AllImportedModules); } DIImportedEntity DIBuilder::createImportedModule(DIScope Context, DIImportedEntity NS, - unsigned Line, - StringRef Name) { - return ::createImportedModule(VMContext, Context, NS, Line, Name, - AllImportedModules); + unsigned Line) { + return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module, + Context, NS, Line, StringRef(), AllImportedModules); } DIImportedEntity DIBuilder::createImportedDeclaration(DIScope Context, DIScope Decl, - unsigned Line) { - Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_imported_declaration), - Context, - Decl.getRef(), - ConstantInt::get(Type::getInt32Ty(VMContext), Line), - }; - DIImportedEntity M(MDNode::get(VMContext, Elts)); - assert(M.Verify() && "Imported module should be valid"); - AllImportedModules.push_back(TrackingVH(M)); - return M; + unsigned Line, StringRef Name) { + return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration, + Context, Decl.getRef(), Line, Name, + AllImportedModules); +} + +DIImportedEntity DIBuilder::createImportedDeclaration(DIScope Context, + DIImportedEntity Imp, + unsigned Line, StringRef Name) { + return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration, + Context, Imp, Line, Name, AllImportedModules); } /// createFile - Create a file descriptor to hold debugging information @@ -232,8 +232,8 @@ DIBasicType DIBuilder::createUnspecifiedType(StringRef Name) { // size, alignment, offset and flags are always empty here. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type), - NULL, // Filename - NULL, // Unused + nullptr, // Filename + nullptr, // Unused MDString::get(VMContext, Name), ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size @@ -260,8 +260,8 @@ DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, // offset and flags are always empty here. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_base_type), - NULL, // File/directory name - NULL, // Unused + nullptr, // File/directory name + nullptr, // Unused MDString::get(VMContext, Name), ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), @@ -279,8 +279,8 @@ DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { // Qualified types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, Tag), - NULL, // Filename - NULL, // Unused + nullptr, // Filename + nullptr, // Unused MDString::get(VMContext, StringRef()), // Empty name. ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size @@ -299,8 +299,8 @@ DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, // Pointer types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type), - NULL, // Filename - NULL, // Unused + nullptr, // Filename + nullptr, // Unused MDString::get(VMContext, Name), ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), @@ -317,9 +317,9 @@ DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy, // Pointer types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_ptr_to_member_type), - NULL, // Filename - NULL, // Unused - NULL, + nullptr, // Filename + nullptr, // Unused + nullptr, ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align @@ -338,9 +338,9 @@ DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) { // References are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, Tag), - NULL, // Filename - NULL, // TheCU, - NULL, // Name + nullptr, // Filename + nullptr, // TheCU, + nullptr, // Name ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align @@ -355,7 +355,6 @@ DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) { DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, unsigned LineNo, DIDescriptor Context) { // typedefs are encoded in DIDerivedType format. - assert(Ty.isType() && "Invalid typedef type!"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_typedef), File.getFileNode(), @@ -378,9 +377,9 @@ DIDerivedType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { assert(FriendTy.isType() && "Invalid friend type!"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_friend), - NULL, + nullptr, Ty.getRef(), - NULL, // Name + nullptr, // Name ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align @@ -400,9 +399,9 @@ DIDerivedType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, // TAG_inheritance is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_inheritance), - NULL, + nullptr, Ty.getRef(), - NULL, // Name + nullptr, // Name ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align @@ -631,7 +630,8 @@ DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), 0), VTableHolder.getRef(), TemplateParams, - UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier) + UniqueIdentifier.empty() ? nullptr + : MDString::get(VMContext, UniqueIdentifier) }; DICompositeType R(MDNode::get(VMContext, Elts)); assert(R.isCompositeType() && @@ -667,8 +667,9 @@ DICompositeType DIBuilder::createStructType(DIDescriptor Context, Elements, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), VTableHolder.getRef(), - NULL, - UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier) + nullptr, + UniqueIdentifier.empty() ? nullptr + : MDString::get(VMContext, UniqueIdentifier) }; DICompositeType R(MDNode::get(VMContext, Elts)); assert(R.isCompositeType() && @@ -697,12 +698,13 @@ DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - NULL, + nullptr, Elements, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), - NULL, - NULL, - UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier) + nullptr, + nullptr, + UniqueIdentifier.empty() ? nullptr + : MDString::get(VMContext, UniqueIdentifier) }; DICompositeType R(MDNode::get(VMContext, Elts)); if (!UniqueIdentifier.empty()) @@ -718,19 +720,19 @@ DICompositeType DIBuilder::createSubroutineType(DIFile File, Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type), Constant::getNullValue(Type::getInt32Ty(VMContext)), - NULL, + nullptr, MDString::get(VMContext, ""), ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset ConstantInt::get(Type::getInt32Ty(VMContext), Flags), // Flags - NULL, + nullptr, ParameterTypes, ConstantInt::get(Type::getInt32Ty(VMContext), 0), - NULL, - NULL, - NULL // Type Identifer + nullptr, + nullptr, + nullptr // Type Identifer }; return DICompositeType(MDNode::get(VMContext, Elts)); } @@ -755,9 +757,10 @@ DICompositeType DIBuilder::createEnumerationType( UnderlyingType.getRef(), Elements, ConstantInt::get(Type::getInt32Ty(VMContext), 0), - NULL, - NULL, - UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier) + nullptr, + nullptr, + UniqueIdentifier.empty() ? nullptr + : MDString::get(VMContext, UniqueIdentifier) }; DICompositeType CTy(MDNode::get(VMContext, Elts)); AllEnumTypes.push_back(CTy); @@ -772,8 +775,8 @@ DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, // TAG_array_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_array_type), - NULL, // Filename/Directory, - NULL, // Unused + nullptr, // Filename/Directory, + nullptr, // Unused MDString::get(VMContext, ""), ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), Size), @@ -783,9 +786,9 @@ DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, Ty.getRef(), Subscripts, ConstantInt::get(Type::getInt32Ty(VMContext), 0), - NULL, - NULL, - NULL // Type Identifer + nullptr, + nullptr, + nullptr // Type Identifer }; return DICompositeType(MDNode::get(VMContext, Elts)); } @@ -796,8 +799,8 @@ DICompositeType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, // A vector is an array type with the FlagVector flag applied. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_array_type), - NULL, // Filename/Directory, - NULL, // Unused + nullptr, // Filename/Directory, + nullptr, // Unused MDString::get(VMContext, ""), ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), Size), @@ -807,9 +810,9 @@ DICompositeType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, Ty.getRef(), Subscripts, ConstantInt::get(Type::getInt32Ty(VMContext), 0), - NULL, - NULL, - NULL // Type Identifer + nullptr, + nullptr, + nullptr // Type Identifer }; return DICompositeType(MDNode::get(VMContext, Elts)); } @@ -890,12 +893,47 @@ DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope, ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Offset ConstantInt::get(Type::getInt32Ty(VMContext), DIDescriptor::FlagFwdDecl), - NULL, + nullptr, + DIArray(), + ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang), + nullptr, + nullptr, //TemplateParams + UniqueIdentifier.empty() ? nullptr + : MDString::get(VMContext, UniqueIdentifier) + }; + MDNode *Node = MDNode::get(VMContext, Elts); + DICompositeType RetTy(Node); + assert(RetTy.isCompositeType() && + "createForwardDecl result should be a DIType"); + if (!UniqueIdentifier.empty()) + retainType(RetTy); + return RetTy; +} + +/// createForwardDecl - Create a temporary forward-declared type that +/// can be RAUW'd if the full type is seen. +DICompositeType DIBuilder::createReplaceableForwardDecl( + unsigned Tag, StringRef Name, DIDescriptor Scope, DIFile F, unsigned Line, + unsigned RuntimeLang, uint64_t SizeInBits, uint64_t AlignInBits, + StringRef UniqueIdentifier) { + // Create a temporary MDNode. + Value *Elts[] = { + GetTagConstant(VMContext, Tag), + F.getFileNode(), + DIScope(getNonCompileUnitScope(Scope)).getRef(), + MDString::get(VMContext, Name), + ConstantInt::get(Type::getInt32Ty(VMContext), Line), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), DIDescriptor::FlagFwdDecl), + nullptr, DIArray(), ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang), - NULL, - NULL, //TemplateParams - UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier) + nullptr, + nullptr, //TemplateParams + UniqueIdentifier.empty() ? nullptr + : MDString::get(VMContext, UniqueIdentifier) }; MDNode *Node = MDNode::getTemporary(VMContext, Elts); DICompositeType RetTy(Node); @@ -932,7 +970,7 @@ DIGlobalVariable DIBuilder::createGlobalVariable(StringRef Name, Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_variable), Constant::getNullValue(Type::getInt32Ty(VMContext)), - NULL, // TheCU, + nullptr, // TheCU, MDString::get(VMContext, Name), MDString::get(VMContext, Name), MDString::get(VMContext, LinkageName), @@ -1087,7 +1125,7 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name, ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt32Ty(VMContext), 0), - NULL, + nullptr, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), Fn, @@ -1121,7 +1159,6 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name, assert(getNonCompileUnitScope(Context) && "Methods should have both a Context and a context that isn't " "the compile unit."); - Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), F.getFileNode(), @@ -1141,7 +1178,7 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name, Fn, TParam, Constant::getNullValue(Type::getInt32Ty(VMContext)), - MDNode::getTemporary(VMContext, TElts), + nullptr, // FIXME: Do we want to use different scope/lines? ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) }; @@ -1189,6 +1226,13 @@ DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope, DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, unsigned Line, unsigned Col, unsigned Discriminator) { + // FIXME: This isn't thread safe nor the right way to defeat MDNode uniquing. + // I believe the right way is to have a self-referential element in the node. + // Also: why do we bother with line/column - they're not used and the + // documentation (SourceLevelDebugging.rst) claims the line/col are necessary + // for uniquing, yet then we have this other solution (because line/col were + // inadequate) anyway. Remove all 3 and replace them with a self-reference. + // Defeat MDNode uniquing for lexical blocks by using unique id. static unsigned int unique_id = 0; Value *Elts[] = { diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp index 6c18387..dea05fb 100644 --- a/lib/IR/DataLayout.cpp +++ b/lib/IR/DataLayout.cpp @@ -178,7 +178,7 @@ static const LayoutAlignElem DefaultAlignments[] = { void DataLayout::reset(StringRef Desc) { clear(); - LayoutMap = 0; + LayoutMap = nullptr; LittleEndian = false; StackNaturalAlign = 0; ManglingMode = MM_None; @@ -344,7 +344,7 @@ void DataLayout::parseSpecifier(StringRef Desc) { } } -DataLayout::DataLayout(const Module *M) : LayoutMap(0) { +DataLayout::DataLayout(const Module *M) : LayoutMap(nullptr) { const DataLayout *Other = M->getDataLayout(); if (Other) *this = *Other; @@ -357,7 +357,7 @@ bool DataLayout::operator==(const DataLayout &Other) const { StackNaturalAlign == Other.StackNaturalAlign && ManglingMode == Other.ManglingMode && LegalIntWidths == Other.LegalIntWidths && - Alignments == Other.Alignments && Pointers == Pointers; + Alignments == Other.Alignments && Pointers == Other.Pointers; assert(Ret == (getStringRepresentation() == Other.getStringRepresentation())); return Ret; } @@ -488,7 +488,7 @@ void DataLayout::clear() { Alignments.clear(); Pointers.clear(); delete static_cast(LayoutMap); - LayoutMap = 0; + LayoutMap = nullptr; } DataLayout::~DataLayout() { @@ -687,7 +687,7 @@ unsigned DataLayout::getABITypeAlignment(Type *Ty) const { /// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for /// an integer type of the specified bitwidth. unsigned DataLayout::getABIIntegerTypeAlignment(unsigned BitWidth) const { - return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0); + return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, nullptr); } unsigned DataLayout::getPrefTypeAlignment(Type *Ty) const { @@ -708,7 +708,7 @@ IntegerType *DataLayout::getIntPtrType(LLVMContext &C, Type *DataLayout::getIntPtrType(Type *Ty) const { assert(Ty->isPtrOrPtrVectorTy() && "Expected a pointer or pointer vector type."); - unsigned NumBits = getTypeSizeInBits(Ty->getScalarType()); + unsigned NumBits = getPointerTypeSizeInBits(Ty); IntegerType *IntTy = IntegerType::get(Ty->getContext(), NumBits); if (VectorType *VecTy = dyn_cast(Ty)) return VectorType::get(IntTy, VecTy->getNumElements()); @@ -719,7 +719,7 @@ Type *DataLayout::getSmallestLegalIntType(LLVMContext &C, unsigned Width) const for (unsigned LegalIntWidth : LegalIntWidths) if (Width <= LegalIntWidth) return Type::getIntNTy(C, LegalIntWidth); - return 0; + return nullptr; } unsigned DataLayout::getLargestLegalIntTypeSize() const { diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index c9d68af..db9e56d 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -53,8 +53,8 @@ bool DIDescriptor::Verify() const { } static Value *getField(const MDNode *DbgNode, unsigned Elt) { - if (DbgNode == 0 || Elt >= DbgNode->getNumOperands()) - return 0; + if (!DbgNode || Elt >= DbgNode->getNumOperands()) + return nullptr; return DbgNode->getOperand(Elt); } @@ -73,7 +73,7 @@ StringRef DIDescriptor::getStringField(unsigned Elt) const { } uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { - if (DbgNode == 0) + if (!DbgNode) return 0; if (Elt < DbgNode->getNumOperands()) @@ -85,7 +85,7 @@ uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { } int64_t DIDescriptor::getInt64Field(unsigned Elt) const { - if (DbgNode == 0) + if (!DbgNode) return 0; if (Elt < DbgNode->getNumOperands()) @@ -102,34 +102,34 @@ DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const { } GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const { - if (DbgNode == 0) - return 0; + if (!DbgNode) + return nullptr; if (Elt < DbgNode->getNumOperands()) return dyn_cast_or_null(DbgNode->getOperand(Elt)); - return 0; + return nullptr; } Constant *DIDescriptor::getConstantField(unsigned Elt) const { - if (DbgNode == 0) - return 0; + if (!DbgNode) + return nullptr; if (Elt < DbgNode->getNumOperands()) return dyn_cast_or_null(DbgNode->getOperand(Elt)); - return 0; + return nullptr; } Function *DIDescriptor::getFunctionField(unsigned Elt) const { - if (DbgNode == 0) - return 0; + if (!DbgNode) + return nullptr; if (Elt < DbgNode->getNumOperands()) return dyn_cast_or_null(DbgNode->getOperand(Elt)); - return 0; + return nullptr; } void DIDescriptor::replaceFunctionField(unsigned Elt, Function *F) { - if (DbgNode == 0) + if (!DbgNode) return; if (Elt < DbgNode->getNumOperands()) { @@ -335,7 +335,7 @@ unsigned DIArray::getNumElements() const { /// replaceAllUsesWith - Replace all uses of the MDNode used by this /// type with the one in the passed descriptor. -void DIType::replaceAllUsesWith(DIDescriptor &D) { +void DIType::replaceAllUsesWith(LLVMContext &VMContext, DIDescriptor D) { assert(DbgNode && "Trying to replace an unverified type!"); @@ -344,13 +344,19 @@ void DIType::replaceAllUsesWith(DIDescriptor &D) { // which, due to uniquing, has merged with the source. We shield clients from // this detail by allowing a value to be replaced with replaceAllUsesWith() // itself. - if (DbgNode != D) { - MDNode *Node = const_cast(DbgNode); - const MDNode *DN = D; - const Value *V = cast_or_null(DN); - Node->replaceAllUsesWith(const_cast(V)); - MDNode::deleteTemporary(Node); + const MDNode *DN = D; + if (DbgNode == DN) { + SmallVector Ops(DbgNode->getNumOperands()); + for (size_t i = 0; i != Ops.size(); ++i) + Ops[i] = DbgNode->getOperand(i); + DN = MDNode::get(VMContext, Ops); } + + MDNode *Node = const_cast(DbgNode); + const Value *V = cast_or_null(DN); + Node->replaceAllUsesWith(const_cast(V)); + MDNode::deleteTemporary(Node); + DbgNode = D; } /// replaceAllUsesWith - Replace all uses of the MDNode used by this @@ -358,19 +364,12 @@ void DIType::replaceAllUsesWith(DIDescriptor &D) { void DIType::replaceAllUsesWith(MDNode *D) { assert(DbgNode && "Trying to replace an unverified type!"); - - // Since we use a TrackingVH for the node, its easy for clients to manufacture - // legitimate situations where they want to replaceAllUsesWith() on something - // which, due to uniquing, has merged with the source. We shield clients from - // this detail by allowing a value to be replaced with replaceAllUsesWith() - // itself. - if (DbgNode != D) { - MDNode *Node = const_cast(DbgNode); - const MDNode *DN = D; - const Value *V = cast_or_null(DN); - Node->replaceAllUsesWith(const_cast(V)); - MDNode::deleteTemporary(Node); - } + assert(DbgNode != D && "This replacement should always happen"); + MDNode *Node = const_cast(DbgNode); + const MDNode *DN = D; + const Value *V = cast_or_null(DN); + Node->replaceAllUsesWith(const_cast(V)); + MDNode::deleteTemporary(Node); } /// Verify - Verify that a compile unit is well formed. @@ -759,7 +758,7 @@ DIScopeRef DIScope::getContext() const { return DIScopeRef(DINameSpace(DbgNode).getContext()); assert((isFile() || isCompileUnit()) && "Unhandled type of scope."); - return DIScopeRef(NULL); + return DIScopeRef(nullptr); } // If the scope node has a name, return that, else return an empty string. diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp index 1a2521e..43360d3 100644 --- a/lib/IR/DebugLoc.cpp +++ b/lib/IR/DebugLoc.cpp @@ -18,7 +18,7 @@ using namespace llvm; //===----------------------------------------------------------------------===// MDNode *DebugLoc::getScope(const LLVMContext &Ctx) const { - if (ScopeIdx == 0) return 0; + if (ScopeIdx == 0) return nullptr; if (ScopeIdx > 0) { // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at @@ -37,7 +37,7 @@ MDNode *DebugLoc::getScope(const LLVMContext &Ctx) const { MDNode *DebugLoc::getInlinedAt(const LLVMContext &Ctx) const { // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at // position specified. Zero is invalid. - if (ScopeIdx >= 0) return 0; + if (ScopeIdx >= 0) return nullptr; // Otherwise, the index is in the ScopeInlinedAtRecords array. assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() && @@ -49,7 +49,7 @@ MDNode *DebugLoc::getInlinedAt(const LLVMContext &Ctx) const { void DebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA, const LLVMContext &Ctx) const { if (ScopeIdx == 0) { - Scope = IA = 0; + Scope = IA = nullptr; return; } @@ -59,7 +59,7 @@ void DebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA, assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() && "Invalid ScopeIdx!"); Scope = Ctx.pImpl->ScopeRecords[ScopeIdx-1].get(); - IA = 0; + IA = nullptr; return; } @@ -96,8 +96,8 @@ DebugLoc DebugLoc::get(unsigned Line, unsigned Col, DebugLoc Result; // If no scope is available, this is an unknown location. - if (Scope == 0) return Result; - + if (!Scope) return Result; + // Saturate line and col to "unknown". if (Col > 255) Col = 0; if (Line >= (1 << 24)) Line = 0; @@ -106,7 +106,7 @@ DebugLoc DebugLoc::get(unsigned Line, unsigned Col, LLVMContext &Ctx = Scope->getContext(); // If there is no inlined-at location, use the ScopeRecords array. - if (InlinedAt == 0) + if (!InlinedAt) Result.ScopeIdx = Ctx.pImpl->getOrAddScopeRecordIdxEntry(Scope, 0); else Result.ScopeIdx = Ctx.pImpl->getOrAddScopeInlinedAtIdxEntry(Scope, @@ -118,7 +118,7 @@ DebugLoc DebugLoc::get(unsigned Line, unsigned Col, /// getAsMDNode - This method converts the compressed DebugLoc node into a /// DILocation-compatible MDNode. MDNode *DebugLoc::getAsMDNode(const LLVMContext &Ctx) const { - if (isUnknown()) return 0; + if (isUnknown()) return nullptr; MDNode *Scope, *IA; getScopeAndInlinedAt(Scope, IA, Ctx); @@ -137,7 +137,7 @@ MDNode *DebugLoc::getAsMDNode(const LLVMContext &Ctx) const { DebugLoc DebugLoc::getFromDILocation(MDNode *N) { DILocation Loc(N); MDNode *Scope = Loc.getScope(); - if (Scope == 0) return DebugLoc(); + if (!Scope) return DebugLoc(); return get(Loc.getLineNumber(), Loc.getColumnNumber(), Scope, Loc.getOrigLocation()); } @@ -146,8 +146,9 @@ DebugLoc DebugLoc::getFromDILocation(MDNode *N) { DebugLoc DebugLoc::getFromDILexicalBlock(MDNode *N) { DILexicalBlock LexBlock(N); MDNode *Scope = LexBlock.getContext(); - if (Scope == 0) return DebugLoc(); - return get(LexBlock.getLineNumber(), LexBlock.getColumnNumber(), Scope, NULL); + if (!Scope) return DebugLoc(); + return get(LexBlock.getLineNumber(), LexBlock.getColumnNumber(), Scope, + nullptr); } void DebugLoc::dump(const LLVMContext &Ctx) const { @@ -166,6 +167,28 @@ void DebugLoc::dump(const LLVMContext &Ctx) const { #endif } +void DebugLoc::print(const LLVMContext &Ctx, raw_ostream &OS) const { + if (!isUnknown()) { + // Print source line info. + DIScope Scope(getScope(Ctx)); + assert((!Scope || Scope.isScope()) && + "Scope of a DebugLoc should be null or a DIScope."); + if (Scope) + OS << Scope.getFilename(); + else + OS << ""; + OS << ':' << getLine(); + if (getCol() != 0) + OS << ':' << getCol(); + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt(Ctx)); + if (!InlinedAtDL.isUnknown()) { + OS << " @[ "; + InlinedAtDL.print(Ctx, OS); + OS << " ]"; + } + } +} + //===----------------------------------------------------------------------===// // DenseMap specialization //===----------------------------------------------------------------------===// @@ -234,7 +257,7 @@ void DebugRecVH::deleted() { // If this is a non-canonical reference, just drop the value to null, we know // it doesn't have a map entry. if (Idx == 0) { - setValPtr(0); + setValPtr(nullptr); return; } @@ -245,7 +268,7 @@ void DebugRecVH::deleted() { assert(Ctx->ScopeRecordIdx[Cur] == Idx && "Mapping out of date!"); Ctx->ScopeRecordIdx.erase(Cur); // Reset this VH to null and we're done. - setValPtr(0); + setValPtr(nullptr); Idx = 0; return; } @@ -259,7 +282,7 @@ void DebugRecVH::deleted() { MDNode *OldScope = Entry.first.get(); MDNode *OldInlinedAt = Entry.second.get(); - assert(OldScope != 0 && OldInlinedAt != 0 && + assert(OldScope && OldInlinedAt && "Entry should be non-canonical if either val dropped to null"); // Otherwise, we do have an entry in it, nuke it and we're done. @@ -269,7 +292,7 @@ void DebugRecVH::deleted() { // Reset this VH to null. Drop both 'Idx' values to null to indicate that // we're in non-canonical form now. - setValPtr(0); + setValPtr(nullptr); Entry.first.Idx = Entry.second.Idx = 0; } @@ -277,8 +300,8 @@ void DebugRecVH::allUsesReplacedWith(Value *NewVa) { // If being replaced with a non-mdnode value (e.g. undef) handle this as if // the mdnode got deleted. MDNode *NewVal = dyn_cast(NewVa); - if (NewVal == 0) return deleted(); - + if (!NewVal) return deleted(); + // If this is a non-canonical reference, just change it, we know it already // doesn't have a map entry. if (Idx == 0) { @@ -313,7 +336,7 @@ void DebugRecVH::allUsesReplacedWith(Value *NewVa) { MDNode *OldScope = Entry.first.get(); MDNode *OldInlinedAt = Entry.second.get(); - assert(OldScope != 0 && OldInlinedAt != 0 && + assert(OldScope && OldInlinedAt && "Entry should be non-canonical if either val dropped to null"); // Otherwise, we do have an entry in it, nuke it and we're done. diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp index d59d4cf..6eeb162 100644 --- a/lib/IR/DiagnosticInfo.cpp +++ b/lib/IR/DiagnosticInfo.cpp @@ -12,18 +12,80 @@ // Diagnostics reporting is still done as part of the LLVMContext. //===----------------------------------------------------------------------===// +#include "LLVMContextImpl.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Atomic.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Regex.h" #include using namespace llvm; +namespace { + +/// \brief Regular expression corresponding to the value given in one of the +/// -pass-remarks* command line flags. Passes whose name matches this regexp +/// will emit a diagnostic when calling the associated diagnostic function +/// (emitOptimizationRemark, emitOptimizationRemarkMissed or +/// emitOptimizationRemarkAnalysis). +struct PassRemarksOpt { + std::shared_ptr Pattern; + + void operator=(const std::string &Val) { + // Create a regexp object to match pass names for emitOptimizationRemark. + if (!Val.empty()) { + Pattern = std::make_shared(Val); + std::string RegexError; + if (!Pattern->isValid(RegexError)) + report_fatal_error("Invalid regular expression '" + Val + + "' in -pass-remarks: " + RegexError, + false); + } + }; +}; + +static PassRemarksOpt PassRemarksOptLoc; +static PassRemarksOpt PassRemarksMissedOptLoc; +static PassRemarksOpt PassRemarksAnalysisOptLoc; + +// -pass-remarks +// Command line flag to enable emitOptimizationRemark() +static cl::opt> +PassRemarks("pass-remarks", cl::value_desc("pattern"), + cl::desc("Enable optimization remarks from passes whose name match " + "the given regular expression"), + cl::Hidden, cl::location(PassRemarksOptLoc), cl::ValueRequired, + cl::ZeroOrMore); + +// -pass-remarks-missed +// Command line flag to enable emitOptimizationRemarkMissed() +static cl::opt> PassRemarksMissed( + "pass-remarks-missed", cl::value_desc("pattern"), + cl::desc("Enable missed optimization remarks from passes whose name match " + "the given regular expression"), + cl::Hidden, cl::location(PassRemarksMissedOptLoc), cl::ValueRequired, + cl::ZeroOrMore); + +// -pass-remarks-analysis +// Command line flag to enable emitOptimizationRemarkAnalysis() +static cl::opt> +PassRemarksAnalysis( + "pass-remarks-analysis", cl::value_desc("pattern"), + cl::desc( + "Enable optimization analysis remarks from passes whose name match " + "the given regular expression"), + cl::Hidden, cl::location(PassRemarksAnalysisOptLoc), cl::ValueRequired, + cl::ZeroOrMore); +} + int llvm::getNextAvailablePluginDiagnosticKind() { static sys::cas_flag PluginKindID = DK_FirstPluginKind; return (int)sys::AtomicIncrement(&PluginKindID); @@ -64,3 +126,66 @@ void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const { DP << getFileName() << ": "; DP << getMsg(); } + +bool DiagnosticInfoOptimizationRemarkBase::isLocationAvailable() const { + return getFunction().getParent()->getNamedMetadata("llvm.dbg.cu") != nullptr; +} + +void DiagnosticInfoOptimizationRemarkBase::getLocation(StringRef *Filename, + unsigned *Line, + unsigned *Column) const { + DILocation DIL(getDebugLoc().getAsMDNode(getFunction().getContext())); + *Filename = DIL.getFilename(); + *Line = DIL.getLineNumber(); + *Column = DIL.getColumnNumber(); +} + +const std::string DiagnosticInfoOptimizationRemarkBase::getLocationStr() const { + StringRef Filename(""); + unsigned Line = 0; + unsigned Column = 0; + if (isLocationAvailable()) + getLocation(&Filename, &Line, &Column); + return Twine(Filename + ":" + Twine(Line) + ":" + Twine(Column)).str(); +} + +void DiagnosticInfoOptimizationRemarkBase::print(DiagnosticPrinter &DP) const { + DP << getLocationStr() << ": " << getMsg(); +} + +bool DiagnosticInfoOptimizationRemark::isEnabled() const { + return PassRemarksOptLoc.Pattern && + PassRemarksOptLoc.Pattern->match(getPassName()); +} + +bool DiagnosticInfoOptimizationRemarkMissed::isEnabled() const { + return PassRemarksMissedOptLoc.Pattern && + PassRemarksMissedOptLoc.Pattern->match(getPassName()); +} + +bool DiagnosticInfoOptimizationRemarkAnalysis::isEnabled() const { + return PassRemarksAnalysisOptLoc.Pattern && + PassRemarksAnalysisOptLoc.Pattern->match(getPassName()); +} + +void llvm::emitOptimizationRemark(LLVMContext &Ctx, const char *PassName, + const Function &Fn, const DebugLoc &DLoc, + const Twine &Msg) { + Ctx.diagnose(DiagnosticInfoOptimizationRemark(PassName, Fn, DLoc, Msg)); +} + +void llvm::emitOptimizationRemarkMissed(LLVMContext &Ctx, const char *PassName, + const Function &Fn, + const DebugLoc &DLoc, + const Twine &Msg) { + Ctx.diagnose(DiagnosticInfoOptimizationRemarkMissed(PassName, Fn, DLoc, Msg)); +} + +void llvm::emitOptimizationRemarkAnalysis(LLVMContext &Ctx, + const char *PassName, + const Function &Fn, + const DebugLoc &DLoc, + const Twine &Msg) { + Ctx.diagnose( + DiagnosticInfoOptimizationRemarkAnalysis(PassName, Fn, DLoc, Msg)); +} diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index c2ea0e1..fe32c46 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -44,7 +44,7 @@ void Argument::anchor() { } Argument::Argument(Type *Ty, const Twine &Name, Function *Par) : Value(Ty, Value::ArgumentVal) { - Parent = 0; + Parent = nullptr; // Make sure that we get added to a function LeakDetector::addGarbageObject(this); @@ -76,6 +76,14 @@ unsigned Argument::getArgNo() const { return ArgIdx; } +/// hasNonNullAttr - Return true if this argument has the nonnull attribute on +/// it in its containing function. +bool Argument::hasNonNullAttr() const { + if (!getType()->isPointerTy()) return false; + return getParent()->getAttributes(). + hasAttribute(getArgNo()+1, Attribute::NonNull); +} + /// hasByValAttr - Return true if this argument has the byval attribute on it /// in its containing function. bool Argument::hasByValAttr() const { @@ -209,8 +217,8 @@ void Function::eraseFromParent() { Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name, Module *ParentModule) - : GlobalValue(PointerType::getUnqual(Ty), - Value::FunctionVal, 0, 0, Linkage, name) { + : GlobalObject(PointerType::getUnqual(Ty), + Value::FunctionVal, nullptr, 0, Linkage, name) { assert(FunctionType::isValidReturnType(getReturnType()) && "invalid return type"); SymTab = new ValueSymbolTable(); @@ -293,7 +301,7 @@ void Function::dropAllReferences() { BasicBlocks.begin()->eraseFromParent(); // Prefix data is stored in a side table. - setPrefixData(0); + setPrefixData(nullptr); } void Function::addAttribute(unsigned i, Attribute::AttrKind attr) { @@ -348,10 +356,10 @@ void Function::clearGC() { GCNames->erase(this); if (GCNames->empty()) { delete GCNames; - GCNames = 0; + GCNames = nullptr; if (GCNamePool->empty()) { delete GCNamePool; - GCNamePool = 0; + GCNamePool = nullptr; } } } @@ -361,7 +369,7 @@ void Function::clearGC() { /// create a Function) from the Function Src to this one. void Function::copyAttributesFrom(const GlobalValue *Src) { assert(isa(Src) && "Expected a Function!"); - GlobalValue::copyAttributesFrom(Src); + GlobalObject::copyAttributesFrom(Src); const Function *SrcF = cast(Src); setCallingConv(SrcF->getCallingConv()); setAttributes(SrcF->getAttributes()); @@ -372,7 +380,7 @@ void Function::copyAttributesFrom(const GlobalValue *Src) { if (SrcF->hasPrefixData()) setPrefixData(SrcF->getPrefixData()); else - setPrefixData(0); + setPrefixData(nullptr); } /// getIntrinsicID - This method returns the ID number of the specified diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp index f69bdc4..f2099d6 100644 --- a/lib/IR/GCOV.cpp +++ b/lib/IR/GCOV.cpp @@ -26,11 +26,6 @@ using namespace llvm; //===----------------------------------------------------------------------===// // GCOVFile implementation. -/// ~GCOVFile - Delete GCOVFile and its content. -GCOVFile::~GCOVFile() { - DeleteContainerPointers(Functions); -} - /// readGCNO - Read GCNO buffer. bool GCOVFile::readGCNO(GCOVBuffer &Buffer) { if (!Buffer.readGCNOFormat()) return false; @@ -39,10 +34,10 @@ bool GCOVFile::readGCNO(GCOVBuffer &Buffer) { if (!Buffer.readInt(Checksum)) return false; while (true) { if (!Buffer.readFunctionTag()) break; - GCOVFunction *GFun = new GCOVFunction(*this); + auto GFun = make_unique(*this); if (!GFun->readGCNO(Buffer, Version)) return false; - Functions.push_back(GFun); + Functions.push_back(std::move(GFun)); } GCNOInitialized = true; @@ -97,17 +92,15 @@ bool GCOVFile::readGCDA(GCOVBuffer &Buffer) { /// dump - Dump GCOVFile content to dbgs() for debugging purposes. void GCOVFile::dump() const { - for (SmallVectorImpl::const_iterator I = Functions.begin(), - E = Functions.end(); I != E; ++I) - (*I)->dump(); + for (const auto &FPtr : Functions) + FPtr->dump(); } /// collectLineCounts - Collect line counts. This must be used after /// reading .gcno and .gcda files. void GCOVFile::collectLineCounts(FileInfo &FI) { - for (SmallVectorImpl::iterator I = Functions.begin(), - E = Functions.end(); I != E; ++I) - (*I)->collectLineCounts(FI); + for (const auto &FPtr : Functions) + FPtr->collectLineCounts(FI); FI.setRunCount(RunCount); FI.setProgramCount(ProgramCount); } @@ -115,12 +108,6 @@ void GCOVFile::collectLineCounts(FileInfo &FI) { //===----------------------------------------------------------------------===// // GCOVFunction implementation. -/// ~GCOVFunction - Delete GCOVFunction and its content. -GCOVFunction::~GCOVFunction() { - DeleteContainerPointers(Blocks); - DeleteContainerPointers(Edges); -} - /// readGCNO - Read a function from the GCNO buffer. Return false if an error /// occurs. bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) { @@ -150,7 +137,7 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) { if (!Buff.readInt(BlockCount)) return false; for (uint32_t i = 0, e = BlockCount; i != e; ++i) { if (!Buff.readInt(Dummy)) return false; // Block flags; - Blocks.push_back(new GCOVBlock(*this, i)); + Blocks.push_back(make_unique(*this, i)); } // read edges. @@ -168,8 +155,8 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) { for (uint32_t i = 0, e = EdgeCount; i != e; ++i) { uint32_t Dst; if (!Buff.readInt(Dst)) return false; - GCOVEdge *Edge = new GCOVEdge(Blocks[BlockNo], Blocks[Dst]); - Edges.push_back(Edge); + Edges.push_back(make_unique(*Blocks[BlockNo], *Blocks[Dst])); + GCOVEdge *Edge = Edges.back().get(); Blocks[BlockNo]->addDstEdge(Edge); Blocks[Dst]->addSrcEdge(Edge); if (!Buff.readInt(Dummy)) return false; // Edge flag @@ -179,34 +166,46 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) { // read line table. while (Buff.readLineTag()) { uint32_t LineTableLength; + // Read the length of this line table. if (!Buff.readInt(LineTableLength)) return false; uint32_t EndPos = Buff.getCursor() + LineTableLength*4; uint32_t BlockNo; + // Read the block number this table is associated with. if (!Buff.readInt(BlockNo)) return false; if (BlockNo >= BlockCount) { errs() << "Unexpected block number: " << BlockNo << " (in " << Name << ").\n"; return false; } - GCOVBlock *Block = Blocks[BlockNo]; - if (!Buff.readInt(Dummy)) return false; // flag - while (Buff.getCursor() != (EndPos - 4)) { + GCOVBlock &Block = *Blocks[BlockNo]; + // Read the word that pads the beginning of the line table. This may be a + // flag of some sort, but seems to always be zero. + if (!Buff.readInt(Dummy)) return false; + + // Line information starts here and continues up until the last word. + if (Buff.getCursor() != (EndPos - sizeof(uint32_t))) { StringRef F; + // Read the source file name. if (!Buff.readString(F)) return false; if (Filename != F) { errs() << "Multiple sources for a single basic block: " << Filename << " != " << F << " (in " << Name << ").\n"; return false; } - if (Buff.getCursor() == (EndPos - 4)) break; - while (true) { + // Read lines up to, but not including, the null terminator. + while (Buff.getCursor() < (EndPos - 2 * sizeof(uint32_t))) { uint32_t Line; if (!Buff.readInt(Line)) return false; - if (!Line) break; - Block->addLine(Line); + // Line 0 means this instruction was injected by the compiler. Skip it. + if (!Line) continue; + Block.addLine(Line); } + // Read the null terminator. + if (!Buff.readInt(Dummy)) return false; } - if (!Buff.readInt(Dummy)) return false; // flag + // The last word is either a flag or padding, it isn't clear which. Skip + // over it. + if (!Buff.readInt(Dummy)) return false; } return true; } @@ -300,9 +299,8 @@ uint64_t GCOVFunction::getExitCount() const { /// dump - Dump GCOVFunction content to dbgs() for debugging purposes. void GCOVFunction::dump() const { dbgs() << "===== " << Name << " @ " << Filename << ":" << LineNumber << "\n"; - for (SmallVectorImpl::const_iterator I = Blocks.begin(), - E = Blocks.end(); I != E; ++I) - (*I)->dump(); + for (const auto &Block : Blocks) + Block->dump(); } /// collectLineCounts - Collect line counts. This must be used after @@ -313,9 +311,8 @@ void GCOVFunction::collectLineCounts(FileInfo &FI) { if (LineNumber == 0) return; - for (SmallVectorImpl::iterator I = Blocks.begin(), - E = Blocks.end(); I != E; ++I) - (*I)->collectLineCounts(FI); + for (const auto &Block : Blocks) + Block->collectLineCounts(FI); FI.addFunctionLine(Filename, LineNumber, this); } @@ -335,8 +332,8 @@ void GCOVBlock::addCount(size_t DstEdgeNo, uint64_t N) { assert(DstEdgeNo < DstEdges.size()); // up to caller to ensure EdgeNo is valid DstEdges[DstEdgeNo]->Count = N; Counter += N; - if (!DstEdges[DstEdgeNo]->Dst->getNumDstEdges()) - DstEdges[DstEdgeNo]->Dst->Counter += N; + if (!DstEdges[DstEdgeNo]->Dst.getNumDstEdges()) + DstEdges[DstEdgeNo]->Dst.Counter += N; } /// sortDstEdges - Sort destination edges by block number, nop if already @@ -363,7 +360,7 @@ void GCOVBlock::dump() const { dbgs() << "\tSource Edges : "; for (EdgeIterator I = SrcEdges.begin(), E = SrcEdges.end(); I != E; ++I) { const GCOVEdge *Edge = *I; - dbgs() << Edge->Src->Number << " (" << Edge->Count << "), "; + dbgs() << Edge->Src.Number << " (" << Edge->Count << "), "; } dbgs() << "\n"; } @@ -371,7 +368,7 @@ void GCOVBlock::dump() const { dbgs() << "\tDestination Edges : "; for (EdgeIterator I = DstEdges.begin(), E = DstEdges.end(); I != E; ++I) { const GCOVEdge *Edge = *I; - dbgs() << Edge->Dst->Number << " (" << Edge->Count << "), "; + dbgs() << Edge->Dst.Number << " (" << Edge->Count << "), "; } dbgs() << "\n"; } @@ -435,11 +432,35 @@ static raw_ostream &operator<<(raw_ostream &OS, const formatBranchInfo &FBI) { return OS; } +namespace { +class LineConsumer { + std::unique_ptr Buffer; + StringRef Remaining; +public: + LineConsumer(StringRef Filename) { + if (error_code EC = MemoryBuffer::getFileOrSTDIN(Filename, Buffer)) { + errs() << Filename << ": " << EC.message() << "\n"; + Remaining = ""; + } else + Remaining = Buffer->getBuffer(); + } + bool empty() { return Remaining.empty(); } + void printNext(raw_ostream &OS, uint32_t LineNum) { + StringRef Line; + if (empty()) + Line = "/*EOF*/"; + else + std::tie(Line, Remaining) = Remaining.split("\n"); + OS << format("%5u:", LineNum) << Line << "\n"; + } +}; +} + /// Convert a path to a gcov filename. If PreservePaths is true, this /// translates "/" to "#", ".." to "^", and drops ".", to match gcov. static std::string mangleCoveragePath(StringRef Filename, bool PreservePaths) { if (!PreservePaths) - return (sys::path::filename(Filename) + ".gcov").str(); + return sys::path::filename(Filename).str(); // This behaviour is defined by gcov in terms of text replacements, so it's // not likely to do anything useful on filesystems with different textual @@ -467,28 +488,52 @@ static std::string mangleCoveragePath(StringRef Filename, bool PreservePaths) { if (S < I) Result.append(S, I); - Result.append(".gcov"); return Result.str(); } +std::string FileInfo::getCoveragePath(StringRef Filename, + StringRef MainFilename) { + if (Options.NoOutput) + // This is probably a bug in gcov, but when -n is specified, paths aren't + // mangled at all, and the -l and -p options are ignored. Here, we do the + // same. + return Filename; + + std::string CoveragePath; + if (Options.LongFileNames && !Filename.equals(MainFilename)) + CoveragePath = + mangleCoveragePath(MainFilename, Options.PreservePaths) + "##"; + CoveragePath += + mangleCoveragePath(Filename, Options.PreservePaths) + ".gcov"; + return CoveragePath; +} + +std::unique_ptr +FileInfo::openCoveragePath(StringRef CoveragePath) { + if (Options.NoOutput) + return llvm::make_unique(); + + std::string ErrorInfo; + auto OS = llvm::make_unique(CoveragePath.str().c_str(), + ErrorInfo, sys::fs::F_Text); + if (!ErrorInfo.empty()) { + errs() << ErrorInfo << "\n"; + return llvm::make_unique(); + } + return std::move(OS); +} + /// print - Print source files with collected line count information. -void FileInfo::print(StringRef GCNOFile, StringRef GCDAFile) { +void FileInfo::print(StringRef MainFilename, StringRef GCNOFile, + StringRef GCDAFile) { for (StringMap::const_iterator I = LineInfo.begin(), E = LineInfo.end(); I != E; ++I) { StringRef Filename = I->first(); - std::unique_ptr Buff; - if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) { - errs() << Filename << ": " << ec.message() << "\n"; - return; - } - StringRef AllLines = Buff->getBuffer(); + auto AllLines = LineConsumer(Filename); - std::string CoveragePath = mangleCoveragePath(Filename, - Options.PreservePaths); - std::string ErrorInfo; - raw_fd_ostream OS(CoveragePath.c_str(), ErrorInfo, sys::fs::F_Text); - if (!ErrorInfo.empty()) - errs() << ErrorInfo << "\n"; + std::string CoveragePath = getCoveragePath(Filename, MainFilename); + std::unique_ptr S = openCoveragePath(CoveragePath); + raw_ostream &OS = *S; OS << " -: 0:Source:" << Filename << "\n"; OS << " -: 0:Graph:" << GCNOFile << "\n"; @@ -498,7 +543,8 @@ void FileInfo::print(StringRef GCNOFile, StringRef GCDAFile) { const LineData &Line = I->second; GCOVCoverage FileCoverage(Filename); - for (uint32_t LineIndex = 0; !AllLines.empty(); ++LineIndex) { + for (uint32_t LineIndex = 0; + LineIndex < Line.LastLine || !AllLines.empty(); ++LineIndex) { if (Options.BranchInfo) { FunctionLines::const_iterator FuncsIt = Line.Functions.find(LineIndex); if (FuncsIt != Line.Functions.end()) @@ -509,9 +555,7 @@ void FileInfo::print(StringRef GCNOFile, StringRef GCDAFile) { if (BlocksIt == Line.Blocks.end()) { // No basic blocks are on this line. Not an executable line of code. OS << " -:"; - std::pair P = AllLines.split('\n'); - OS << format("%5u:", LineIndex+1) << P.first << "\n"; - AllLines = P.second; + AllLines.printNext(OS, LineIndex + 1); } else { const BlockVector &Blocks = BlocksIt->second; @@ -573,9 +617,7 @@ void FileInfo::print(StringRef GCNOFile, StringRef GCDAFile) { } ++FileCoverage.LogicalLines; - std::pair P = AllLines.split('\n'); - OS << format("%5u:", LineIndex+1) << P.first << "\n"; - AllLines = P.second; + AllLines.printNext(OS, LineIndex + 1); uint32_t BlockNo = 0; uint32_t EdgeNo = 0; @@ -605,10 +647,11 @@ void FileInfo::print(StringRef GCNOFile, StringRef GCDAFile) { if (Options.FuncCoverage) printFuncCoverage(); printFileCoverage(); + return; } /// printFunctionSummary - Print function and block summary. -void FileInfo::printFunctionSummary(raw_fd_ostream &OS, +void FileInfo::printFunctionSummary(raw_ostream &OS, const FunctionVector &Funcs) const { for (FunctionVector::const_iterator I = Funcs.begin(), E = Funcs.end(); I != E; ++I) { @@ -617,8 +660,8 @@ void FileInfo::printFunctionSummary(raw_fd_ostream &OS, uint32_t BlocksExec = 0; for (GCOVFunction::BlockIterator I = Func->block_begin(), E = Func->block_end(); I != E; ++I) { - const GCOVBlock *Block = *I; - if (Block->getNumDstEdges() && Block->getCount()) + const GCOVBlock &Block = **I; + if (Block.getNumDstEdges() && Block.getCount()) ++BlocksExec; } @@ -630,7 +673,7 @@ void FileInfo::printFunctionSummary(raw_fd_ostream &OS, } /// printBlockInfo - Output counts for each block. -void FileInfo::printBlockInfo(raw_fd_ostream &OS, const GCOVBlock &Block, +void FileInfo::printBlockInfo(raw_ostream &OS, const GCOVBlock &Block, uint32_t LineIndex, uint32_t &BlockNo) const { if (Block.getCount() == 0) OS << " $$$$$:"; @@ -640,7 +683,7 @@ void FileInfo::printBlockInfo(raw_fd_ostream &OS, const GCOVBlock &Block, } /// printBranchInfo - Print conditional branch probabilities. -void FileInfo::printBranchInfo(raw_fd_ostream &OS, const GCOVBlock &Block, +void FileInfo::printBranchInfo(raw_ostream &OS, const GCOVBlock &Block, GCOVCoverage &Coverage, uint32_t &EdgeNo) { SmallVector BranchCounts; uint64_t TotalCounts = 0; @@ -670,7 +713,7 @@ void FileInfo::printBranchInfo(raw_fd_ostream &OS, const GCOVBlock &Block, } /// printUncondBranchInfo - Print unconditional branch probabilities. -void FileInfo::printUncondBranchInfo(raw_fd_ostream &OS, uint32_t &EdgeNo, +void FileInfo::printUncondBranchInfo(raw_ostream &OS, uint32_t &EdgeNo, uint64_t Count) const { OS << format("unconditional %2u ", EdgeNo++) << formatBranchInfo(Options, Count, Count) << "\n"; @@ -716,6 +759,8 @@ void FileInfo::printFileCoverage() const { const GCOVCoverage &Coverage = I->second; outs() << "File '" << Coverage.Name << "'\n"; printCoverage(Coverage); - outs() << Coverage.Name << ":creating '" << Filename << "'\n\n"; + if (!Options.NoOutput) + outs() << Coverage.Name << ":creating '" << Filename << "'\n"; + outs() << "\n"; } } diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp index f338dd7..c905cfe 100644 --- a/lib/IR/Globals.cpp +++ b/lib/IR/Globals.cpp @@ -53,23 +53,41 @@ void GlobalValue::destroyConstant() { /// copyAttributesFrom - copy all additional attributes (those not needed to /// create a GlobalValue) from the GlobalValue Src to this one. void GlobalValue::copyAttributesFrom(const GlobalValue *Src) { - setAlignment(Src->getAlignment()); - setSection(Src->getSection()); setVisibility(Src->getVisibility()); setUnnamedAddr(Src->hasUnnamedAddr()); setDLLStorageClass(Src->getDLLStorageClass()); } -void GlobalValue::setAlignment(unsigned Align) { - assert((!isa(this) || !Align) && - "GlobalAlias should not have an alignment!"); +unsigned GlobalValue::getAlignment() const { + if (auto *GA = dyn_cast(this)) + return GA->getAliasee()->getAlignment(); + + return cast(this)->getAlignment(); +} + +void GlobalObject::setAlignment(unsigned Align) { assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!"); assert(Align <= MaximumAlignment && "Alignment is greater than MaximumAlignment!"); - Alignment = Log2_32(Align) + 1; + setGlobalValueSubClassData(Log2_32(Align) + 1); assert(getAlignment() == Align && "Alignment representation error!"); } +void GlobalObject::copyAttributesFrom(const GlobalValue *Src) { + const auto *GV = cast(Src); + GlobalValue::copyAttributesFrom(GV); + setAlignment(GV->getAlignment()); + setSection(GV->getSection()); +} + +const std::string &GlobalValue::getSection() const { + if (auto *GA = dyn_cast(this)) + return GA->getAliasee()->getSection(); + return cast(this)->getSection(); +} + +void GlobalObject::setSection(StringRef S) { Section = S; } + bool GlobalValue::isDeclaration() const { // Globals are definitions if they have an initializer. if (const GlobalVariable *GV = dyn_cast(this)) @@ -83,22 +101,20 @@ bool GlobalValue::isDeclaration() const { assert(isa(this)); return false; } - + //===----------------------------------------------------------------------===// // GlobalVariable Implementation //===----------------------------------------------------------------------===// GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, - Constant *InitVal, - const Twine &Name, ThreadLocalMode TLMode, - unsigned AddressSpace, + Constant *InitVal, const Twine &Name, + ThreadLocalMode TLMode, unsigned AddressSpace, bool isExternallyInitialized) - : GlobalValue(PointerType::get(Ty, AddressSpace), - Value::GlobalVariableVal, - OperandTraits::op_begin(this), - InitVal != 0, Link, Name), - isConstantGlobal(constant), threadLocalMode(TLMode), - isExternallyInitializedConstant(isExternallyInitialized) { + : GlobalObject(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal, + OperandTraits::op_begin(this), + InitVal != nullptr, Link, Name), + isConstantGlobal(constant), threadLocalMode(TLMode), + isExternallyInitializedConstant(isExternallyInitialized) { if (InitVal) { assert(InitVal->getType() == Ty && "Initializer should be the same type as the GlobalVariable!"); @@ -110,24 +126,22 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant, LinkageTypes Link, Constant *InitVal, - const Twine &Name, - GlobalVariable *Before, ThreadLocalMode TLMode, - unsigned AddressSpace, + const Twine &Name, GlobalVariable *Before, + ThreadLocalMode TLMode, unsigned AddressSpace, bool isExternallyInitialized) - : GlobalValue(PointerType::get(Ty, AddressSpace), - Value::GlobalVariableVal, - OperandTraits::op_begin(this), - InitVal != 0, Link, Name), - isConstantGlobal(constant), threadLocalMode(TLMode), - isExternallyInitializedConstant(isExternallyInitialized) { + : GlobalObject(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal, + OperandTraits::op_begin(this), + InitVal != nullptr, Link, Name), + isConstantGlobal(constant), threadLocalMode(TLMode), + isExternallyInitializedConstant(isExternallyInitialized) { if (InitVal) { assert(InitVal->getType() == Ty && "Initializer should be the same type as the GlobalVariable!"); Op<0>() = InitVal; } - + LeakDetector::addGarbageObject(this); - + if (Before) Before->getParent()->getGlobalList().insert(Before, this); else @@ -171,9 +185,9 @@ void GlobalVariable::replaceUsesOfWithOnConstant(Value *From, Value *To, } void GlobalVariable::setInitializer(Constant *InitVal) { - if (InitVal == 0) { + if (!InitVal) { if (hasInitializer()) { - Op<0>().set(0); + Op<0>().set(nullptr); NumOperands = 0; } } else { @@ -189,7 +203,7 @@ void GlobalVariable::setInitializer(Constant *InitVal) { /// create a GlobalVariable) from the GlobalVariable Src to this one. void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) { assert(isa(Src) && "Expected a GlobalVariable!"); - GlobalValue::copyAttributesFrom(Src); + GlobalObject::copyAttributesFrom(Src); const GlobalVariable *SrcVar = cast(Src); setThreadLocalMode(SrcVar->getThreadLocalMode()); } @@ -199,20 +213,47 @@ void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) { // GlobalAlias Implementation //===----------------------------------------------------------------------===// -GlobalAlias::GlobalAlias(Type *Ty, LinkageTypes Link, - const Twine &Name, Constant* aliasee, +GlobalAlias::GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Link, + const Twine &Name, GlobalObject *Aliasee, Module *ParentModule) - : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name) { + : GlobalValue(PointerType::get(Ty, AddressSpace), Value::GlobalAliasVal, + &Op<0>(), 1, Link, Name) { LeakDetector::addGarbageObject(this); - - if (aliasee) - assert(aliasee->getType() == Ty && "Alias and aliasee types should match!"); - Op<0>() = aliasee; + Op<0>() = Aliasee; if (ParentModule) ParentModule->getAliasList().push_back(this); } +GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace, + LinkageTypes Link, const Twine &Name, + GlobalObject *Aliasee, Module *ParentModule) { + return new GlobalAlias(Ty, AddressSpace, Link, Name, Aliasee, ParentModule); +} + +GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace, + LinkageTypes Linkage, const Twine &Name, + Module *Parent) { + return create(Ty, AddressSpace, Linkage, Name, nullptr, Parent); +} + +GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace, + LinkageTypes Linkage, const Twine &Name, + GlobalObject *Aliasee) { + return create(Ty, AddressSpace, Linkage, Name, Aliasee, Aliasee->getParent()); +} + +GlobalAlias *GlobalAlias::create(LinkageTypes Link, const Twine &Name, + GlobalObject *Aliasee) { + PointerType *PTy = Aliasee->getType(); + return create(PTy->getElementType(), PTy->getAddressSpace(), Link, Name, + Aliasee); +} + +GlobalAlias *GlobalAlias::create(const Twine &Name, GlobalObject *Aliasee) { + return create(Aliasee->getLinkage(), Name, Aliasee); +} + void GlobalAlias::setParent(Module *parent) { if (getParent()) LeakDetector::addGarbageObject(this); @@ -229,42 +270,4 @@ void GlobalAlias::eraseFromParent() { getParent()->getAliasList().erase(this); } -void GlobalAlias::setAliasee(Constant *Aliasee) { - assert((!Aliasee || Aliasee->getType() == getType()) && - "Alias and aliasee types should match!"); - - setOperand(0, Aliasee); -} - -static GlobalValue *getAliaseeGV(GlobalAlias *GA) { - Constant *C = GA->getAliasee(); - assert(C && "Must alias something"); - - if (GlobalValue *GV = dyn_cast(C)) - return GV; - - ConstantExpr *CE = cast(C); - assert((CE->getOpcode() == Instruction::BitCast || - CE->getOpcode() == Instruction::AddrSpaceCast || - CE->getOpcode() == Instruction::GetElementPtr) && - "Unsupported aliasee"); - - return cast(CE->getOperand(0)); -} - -GlobalValue *GlobalAlias::getAliasedGlobal() { - SmallPtrSet Visited; - - GlobalAlias *GA = this; - - for (;;) { - GlobalValue *GV = getAliaseeGV(GA); - if (!Visited.insert(GV)) - return 0; - - // Iterate over aliasing chain. - GA = dyn_cast(GV); - if (!GA) - return GV; - } -} +void GlobalAlias::setAliasee(GlobalObject *Aliasee) { setOperand(0, Aliasee); } diff --git a/lib/IR/IRPrintingPasses.cpp b/lib/IR/IRPrintingPasses.cpp index 099c27c..c8a1747 100644 --- a/lib/IR/IRPrintingPasses.cpp +++ b/lib/IR/IRPrintingPasses.cpp @@ -94,7 +94,7 @@ public: return false; } - void getAnalysisUsage(AnalysisUsage &AU) const override{ + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } }; diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp index 62d191d..a3e1da3b1 100644 --- a/lib/IR/InlineAsm.cpp +++ b/lib/IR/InlineAsm.cpp @@ -274,7 +274,7 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) { break; default: StructType *STy = dyn_cast(Ty->getReturnType()); - if (STy == 0 || STy->getNumElements() != NumOutputs) + if (!STy || STy->getNumElements() != NumOutputs) return false; break; } diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index d31a92e..28cc4cb 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -23,7 +23,7 @@ using namespace llvm; Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps, Instruction *InsertBefore) - : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) { + : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(nullptr) { // Make sure that we get added to a basicblock LeakDetector::addGarbageObject(this); @@ -41,7 +41,7 @@ const DataLayout *Instruction::getDataLayout() const { Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps, BasicBlock *InsertAtEnd) - : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) { + : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(nullptr) { // Make sure that we get added to a basicblock LeakDetector::addGarbageObject(this); @@ -53,7 +53,7 @@ Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps, // Out of line virtual method, so the vtable, etc has a home. Instruction::~Instruction() { - assert(Parent == 0 && "Instruction still linked in the program!"); + assert(!Parent && "Instruction still linked in the program!"); if (hasMetadataHashEntry()) clearMetadataHashEntries(); } @@ -262,6 +262,58 @@ const char *Instruction::getOpcodeName(unsigned OpCode) { } } +/// Return true if both instructions have the same special state +/// This must be kept in sync with lib/Transforms/IPO/MergeFunctions.cpp. +static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2, + bool IgnoreAlignment = false) { + assert(I1->getOpcode() == I2->getOpcode() && + "Can not compare special state of different instructions"); + + if (const LoadInst *LI = dyn_cast(I1)) + return LI->isVolatile() == cast(I2)->isVolatile() && + (LI->getAlignment() == cast(I2)->getAlignment() || + IgnoreAlignment) && + LI->getOrdering() == cast(I2)->getOrdering() && + LI->getSynchScope() == cast(I2)->getSynchScope(); + if (const StoreInst *SI = dyn_cast(I1)) + return SI->isVolatile() == cast(I2)->isVolatile() && + (SI->getAlignment() == cast(I2)->getAlignment() || + IgnoreAlignment) && + SI->getOrdering() == cast(I2)->getOrdering() && + SI->getSynchScope() == cast(I2)->getSynchScope(); + if (const CmpInst *CI = dyn_cast(I1)) + return CI->getPredicate() == cast(I2)->getPredicate(); + if (const CallInst *CI = dyn_cast(I1)) + return CI->isTailCall() == cast(I2)->isTailCall() && + CI->getCallingConv() == cast(I2)->getCallingConv() && + CI->getAttributes() == cast(I2)->getAttributes(); + if (const InvokeInst *CI = dyn_cast(I1)) + return CI->getCallingConv() == cast(I2)->getCallingConv() && + CI->getAttributes() == + cast(I2)->getAttributes(); + if (const InsertValueInst *IVI = dyn_cast(I1)) + return IVI->getIndices() == cast(I2)->getIndices(); + if (const ExtractValueInst *EVI = dyn_cast(I1)) + return EVI->getIndices() == cast(I2)->getIndices(); + if (const FenceInst *FI = dyn_cast(I1)) + return FI->getOrdering() == cast(I2)->getOrdering() && + FI->getSynchScope() == cast(I2)->getSynchScope(); + if (const AtomicCmpXchgInst *CXI = dyn_cast(I1)) + return CXI->isVolatile() == cast(I2)->isVolatile() && + CXI->getSuccessOrdering() == + cast(I2)->getSuccessOrdering() && + CXI->getFailureOrdering() == + cast(I2)->getFailureOrdering() && + CXI->getSynchScope() == cast(I2)->getSynchScope(); + if (const AtomicRMWInst *RMWI = dyn_cast(I1)) + return RMWI->getOperation() == cast(I2)->getOperation() && + RMWI->isVolatile() == cast(I2)->isVolatile() && + RMWI->getOrdering() == cast(I2)->getOrdering() && + RMWI->getSynchScope() == cast(I2)->getSynchScope(); + + return true; +} + /// isIdenticalTo - Return true if the specified instruction is exactly /// identical to the current one. This means that all operands match and any /// extra information (e.g. load is volatile) agree. @@ -284,51 +336,13 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const { if (!std::equal(op_begin(), op_end(), I->op_begin())) return false; - // Check special state that is a part of some instructions. - if (const LoadInst *LI = dyn_cast(this)) - return LI->isVolatile() == cast(I)->isVolatile() && - LI->getAlignment() == cast(I)->getAlignment() && - LI->getOrdering() == cast(I)->getOrdering() && - LI->getSynchScope() == cast(I)->getSynchScope(); - if (const StoreInst *SI = dyn_cast(this)) - return SI->isVolatile() == cast(I)->isVolatile() && - SI->getAlignment() == cast(I)->getAlignment() && - SI->getOrdering() == cast(I)->getOrdering() && - SI->getSynchScope() == cast(I)->getSynchScope(); - if (const CmpInst *CI = dyn_cast(this)) - return CI->getPredicate() == cast(I)->getPredicate(); - if (const CallInst *CI = dyn_cast(this)) - return CI->isTailCall() == cast(I)->isTailCall() && - CI->getCallingConv() == cast(I)->getCallingConv() && - CI->getAttributes() == cast(I)->getAttributes(); - if (const InvokeInst *CI = dyn_cast(this)) - return CI->getCallingConv() == cast(I)->getCallingConv() && - CI->getAttributes() == cast(I)->getAttributes(); - if (const InsertValueInst *IVI = dyn_cast(this)) - return IVI->getIndices() == cast(I)->getIndices(); - if (const ExtractValueInst *EVI = dyn_cast(this)) - return EVI->getIndices() == cast(I)->getIndices(); - if (const FenceInst *FI = dyn_cast(this)) - return FI->getOrdering() == cast(FI)->getOrdering() && - FI->getSynchScope() == cast(FI)->getSynchScope(); - if (const AtomicCmpXchgInst *CXI = dyn_cast(this)) - return CXI->isVolatile() == cast(I)->isVolatile() && - CXI->getSuccessOrdering() == - cast(I)->getSuccessOrdering() && - CXI->getFailureOrdering() == - cast(I)->getFailureOrdering() && - CXI->getSynchScope() == cast(I)->getSynchScope(); - if (const AtomicRMWInst *RMWI = dyn_cast(this)) - return RMWI->getOperation() == cast(I)->getOperation() && - RMWI->isVolatile() == cast(I)->isVolatile() && - RMWI->getOrdering() == cast(I)->getOrdering() && - RMWI->getSynchScope() == cast(I)->getSynchScope(); if (const PHINode *thisPHI = dyn_cast(this)) { const PHINode *otherPHI = cast(I); return std::equal(thisPHI->block_begin(), thisPHI->block_end(), otherPHI->block_begin()); } - return true; + + return haveSameSpecialState(this, I); } // isSameOperationAs @@ -355,50 +369,7 @@ bool Instruction::isSameOperationAs(const Instruction *I, getOperand(i)->getType() != I->getOperand(i)->getType()) return false; - // Check special state that is a part of some instructions. - if (const LoadInst *LI = dyn_cast(this)) - return LI->isVolatile() == cast(I)->isVolatile() && - (LI->getAlignment() == cast(I)->getAlignment() || - IgnoreAlignment) && - LI->getOrdering() == cast(I)->getOrdering() && - LI->getSynchScope() == cast(I)->getSynchScope(); - if (const StoreInst *SI = dyn_cast(this)) - return SI->isVolatile() == cast(I)->isVolatile() && - (SI->getAlignment() == cast(I)->getAlignment() || - IgnoreAlignment) && - SI->getOrdering() == cast(I)->getOrdering() && - SI->getSynchScope() == cast(I)->getSynchScope(); - if (const CmpInst *CI = dyn_cast(this)) - return CI->getPredicate() == cast(I)->getPredicate(); - if (const CallInst *CI = dyn_cast(this)) - return CI->isTailCall() == cast(I)->isTailCall() && - CI->getCallingConv() == cast(I)->getCallingConv() && - CI->getAttributes() == cast(I)->getAttributes(); - if (const InvokeInst *CI = dyn_cast(this)) - return CI->getCallingConv() == cast(I)->getCallingConv() && - CI->getAttributes() == - cast(I)->getAttributes(); - if (const InsertValueInst *IVI = dyn_cast(this)) - return IVI->getIndices() == cast(I)->getIndices(); - if (const ExtractValueInst *EVI = dyn_cast(this)) - return EVI->getIndices() == cast(I)->getIndices(); - if (const FenceInst *FI = dyn_cast(this)) - return FI->getOrdering() == cast(I)->getOrdering() && - FI->getSynchScope() == cast(I)->getSynchScope(); - if (const AtomicCmpXchgInst *CXI = dyn_cast(this)) - return CXI->isVolatile() == cast(I)->isVolatile() && - CXI->getSuccessOrdering() == - cast(I)->getSuccessOrdering() && - CXI->getFailureOrdering() == - cast(I)->getFailureOrdering() && - CXI->getSynchScope() == cast(I)->getSynchScope(); - if (const AtomicRMWInst *RMWI = dyn_cast(this)) - return RMWI->getOperation() == cast(I)->getOperation() && - RMWI->isVolatile() == cast(I)->isVolatile() && - RMWI->getOrdering() == cast(I)->getOrdering() && - RMWI->getSynchScope() == cast(I)->getSynchScope(); - - return true; + return haveSameSpecialState(this, I, IgnoreAlignment); } /// isUsedOutsideOfBlock - Return true if there are any uses of I outside of the @@ -410,7 +381,7 @@ bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const { // instructions, just check to see whether the parent of the use matches up. const Instruction *I = cast(U.getUser()); const PHINode *PN = dyn_cast(I); - if (PN == 0) { + if (!PN) { if (I->getParent() != BB) return true; continue; diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 3aa8413..13c51b8 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -68,7 +68,7 @@ const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) { if (VT->getElementType() != Type::getInt1Ty(Op0->getContext())) return "vector select condition element type must be i1"; VectorType *ET = dyn_cast(Op1->getType()); - if (ET == 0) + if (!ET) return "selected values for vector select must be vectors"; if (ET->getNumElements() != VT->getNumElements()) return "vector select requires selected vectors to have " @@ -76,7 +76,7 @@ const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) { } else if (Op0->getType() != Type::getInt1Ty(Op0->getContext())) { return "select condition must be i1 or "; } - return 0; + return nullptr; } @@ -123,7 +123,7 @@ Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) { std::copy(block_begin() + Idx + 1, block_end(), block_begin() + Idx); // Nuke the last value. - Op<-1>().set(0); + Op<-1>().set(nullptr); --NumOperands; // If the PHI node is dead, because it has zero entries, nuke it now. @@ -164,7 +164,7 @@ Value *PHINode::hasConstantValue() const { for (unsigned i = 1, e = getNumIncomingValues(); i != e; ++i) if (getIncomingValue(i) != ConstantValue && getIncomingValue(i) != this) { if (ConstantValue != this) - return 0; // Incoming values not all the same. + return nullptr; // Incoming values not all the same. // The case where the first value is this PHI. ConstantValue = getIncomingValue(i); } @@ -180,14 +180,14 @@ Value *PHINode::hasConstantValue() const { LandingPadInst::LandingPadInst(Type *RetTy, Value *PersonalityFn, unsigned NumReservedValues, const Twine &NameStr, Instruction *InsertBefore) - : Instruction(RetTy, Instruction::LandingPad, 0, 0, InsertBefore) { + : Instruction(RetTy, Instruction::LandingPad, nullptr, 0, InsertBefore) { init(PersonalityFn, 1 + NumReservedValues, NameStr); } LandingPadInst::LandingPadInst(Type *RetTy, Value *PersonalityFn, unsigned NumReservedValues, const Twine &NameStr, BasicBlock *InsertAtEnd) - : Instruction(RetTy, Instruction::LandingPad, 0, 0, InsertAtEnd) { + : Instruction(RetTy, Instruction::LandingPad, nullptr, 0, InsertAtEnd) { init(PersonalityFn, 1 + NumReservedValues, NameStr); } @@ -324,7 +324,7 @@ CallInst::CallInst(const CallInst &CI) OperandTraits::op_end(this) - CI.getNumOperands(), CI.getNumOperands()) { setAttributes(CI.getAttributes()); - setTailCall(CI.isTailCall()); + setTailCallKind(CI.getTailCallKind()); setCallingConv(CI.getCallingConv()); std::copy(CI.op_begin(), CI.op_end(), op_begin()); @@ -420,8 +420,8 @@ static Instruction *createMalloc(Instruction *InsertBefore, // prototype malloc as "void *malloc(size_t)" MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, NULL); PointerType *AllocPtrType = PointerType::getUnqual(AllocTy); - CallInst *MCall = NULL; - Instruction *Result = NULL; + CallInst *MCall = nullptr; + Instruction *Result = nullptr; if (InsertBefore) { MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall", InsertBefore); Result = MCall; @@ -458,7 +458,7 @@ Instruction *CallInst::CreateMalloc(Instruction *InsertBefore, Value *AllocSize, Value *ArraySize, Function * MallocF, const Twine &Name) { - return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, AllocSize, + return createMalloc(InsertBefore, nullptr, IntPtrTy, AllocTy, AllocSize, ArraySize, MallocF, Name); } @@ -474,7 +474,7 @@ Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd, Type *IntPtrTy, Type *AllocTy, Value *AllocSize, Value *ArraySize, Function *MallocF, const Twine &Name) { - return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, AllocSize, + return createMalloc(nullptr, InsertAtEnd, IntPtrTy, AllocTy, AllocSize, ArraySize, MallocF, Name); } @@ -492,7 +492,7 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore, Type *IntPtrTy = Type::getInt8PtrTy(M->getContext()); // prototype free as "void free(void*)" Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL); - CallInst* Result = NULL; + CallInst* Result = nullptr; Value *PtrCast = Source; if (InsertBefore) { if (Source->getType() != IntPtrTy) @@ -512,14 +512,14 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore, /// CreateFree - Generate the IR for a call to the builtin free function. Instruction * CallInst::CreateFree(Value* Source, Instruction *InsertBefore) { - return createFree(Source, InsertBefore, NULL); + return createFree(Source, InsertBefore, nullptr); } /// CreateFree - Generate the IR for a call to the builtin free function. /// Note: This function does not add the call to the basic block, that is the /// responsibility of the caller. Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) { - Instruction* FreeCall = createFree(Source, NULL, InsertAtEnd); + Instruction* FreeCall = createFree(Source, nullptr, InsertAtEnd); assert(FreeCall && "CreateFree did not create a CallInst"); return FreeCall; } @@ -699,11 +699,11 @@ BasicBlock *ResumeInst::getSuccessorV(unsigned idx) const { UnreachableInst::UnreachableInst(LLVMContext &Context, Instruction *InsertBefore) : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable, - 0, 0, InsertBefore) { + nullptr, 0, InsertBefore) { } UnreachableInst::UnreachableInst(LLVMContext &Context, BasicBlock *InsertAtEnd) : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable, - 0, 0, InsertAtEnd) { + nullptr, 0, InsertAtEnd) { } unsigned UnreachableInst::getNumSuccessorsV() const { @@ -732,7 +732,7 @@ BranchInst::BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore) : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br, OperandTraits::op_end(this) - 1, 1, InsertBefore) { - assert(IfTrue != 0 && "Branch destination may not be null!"); + assert(IfTrue && "Branch destination may not be null!"); Op<-1>() = IfTrue; } BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond, @@ -752,7 +752,7 @@ BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd) : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br, OperandTraits::op_end(this) - 1, 1, InsertAtEnd) { - assert(IfTrue != 0 && "Branch destination may not be null!"); + assert(IfTrue && "Branch destination may not be null!"); Op<-1>() = IfTrue; } @@ -852,7 +852,7 @@ AllocaInst::AllocaInst(Type *Ty, Value *ArraySize, AllocaInst::AllocaInst(Type *Ty, const Twine &Name, Instruction *InsertBefore) : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, - getAISize(Ty->getContext(), 0), InsertBefore) { + getAISize(Ty->getContext(), nullptr), InsertBefore) { setAlignment(0); assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); @@ -861,7 +861,7 @@ AllocaInst::AllocaInst(Type *Ty, const Twine &Name, AllocaInst::AllocaInst(Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) : UnaryInstruction(PointerType::getUnqual(Ty), Alloca, - getAISize(Ty->getContext(), 0), InsertAtEnd) { + getAISize(Ty->getContext(), nullptr), InsertAtEnd) { setAlignment(0); assert(!Ty->isVoidTy() && "Cannot allocate void!"); setName(Name); @@ -1323,7 +1323,7 @@ AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val, FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, SynchronizationScope SynchScope, Instruction *InsertBefore) - : Instruction(Type::getVoidTy(C), Fence, 0, 0, InsertBefore) { + : Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertBefore) { setOrdering(Ordering); setSynchScope(SynchScope); } @@ -1331,7 +1331,7 @@ FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering, SynchronizationScope SynchScope, BasicBlock *InsertAtEnd) - : Instruction(Type::getVoidTy(C), Fence, 0, 0, InsertAtEnd) { + : Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertAtEnd) { setOrdering(Ordering); setSynchScope(SynchScope); } @@ -1369,7 +1369,7 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI) template static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef IdxList) { PointerType *PTy = dyn_cast(Ptr->getScalarType()); - if (!PTy) return 0; // Type isn't a pointer type! + if (!PTy) return nullptr; // Type isn't a pointer type! Type *Agg = PTy->getElementType(); // Handle the special case of the empty set index set, which is always valid. @@ -1379,17 +1379,17 @@ static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef IdxList) { // If there is at least one index, the top level type must be sized, otherwise // it cannot be 'stepped over'. if (!Agg->isSized()) - return 0; + return nullptr; unsigned CurIdx = 1; for (; CurIdx != IdxList.size(); ++CurIdx) { CompositeType *CT = dyn_cast(Agg); - if (!CT || CT->isPointerTy()) return 0; + if (!CT || CT->isPointerTy()) return nullptr; IndexTy Index = IdxList[CurIdx]; - if (!CT->indexValid(Index)) return 0; + if (!CT->indexValid(Index)) return nullptr; Agg = CT->getTypeAtIndex(Index); } - return CurIdx == IdxList.size() ? Agg : 0; + return CurIdx == IdxList.size() ? Agg : nullptr; } Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef IdxList) { @@ -1479,7 +1479,7 @@ ExtractElementInst::ExtractElementInst(Value *Val, Value *Index, bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) { - if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy(32)) + if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy()) return false; return true; } @@ -1526,7 +1526,7 @@ bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt, if (Elt->getType() != cast(Vec->getType())->getElementType()) return false;// Second operand of insertelement must be vector element type. - if (!Index->getType()->isIntegerTy(32)) + if (!Index->getType()->isIntegerTy()) return false; // Third operand of insertelement must be i32. return true; } @@ -1579,7 +1579,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, // Mask must be vector of i32. VectorType *MaskTy = dyn_cast(Mask->getType()); - if (MaskTy == 0 || !MaskTy->getElementType()->isIntegerTy(32)) + if (!MaskTy || !MaskTy->getElementType()->isIntegerTy(32)) return false; // Check to see if Mask is valid. @@ -1721,13 +1721,13 @@ Type *ExtractValueInst::getIndexedType(Type *Agg, // as easy to check those manually as well. if (ArrayType *AT = dyn_cast(Agg)) { if (Index >= AT->getNumElements()) - return 0; + return nullptr; } else if (StructType *ST = dyn_cast(Agg)) { if (Index >= ST->getNumElements()) - return 0; + return nullptr; } else { // Not a valid type to index into. - return 0; + return nullptr; } Agg = cast(Agg)->getTypeAtIndex(Index); @@ -2130,7 +2130,7 @@ bool CastInst::isNoopCast(const DataLayout *DL) const { return isNoopCast(Type::getInt64Ty(getContext())); } - Type *PtrOpTy = 0; + Type *PtrOpTy = nullptr; if (getOpcode() == Instruction::PtrToInt) PtrOpTy = getOperand(0)->getType(); else if (getOpcode() == Instruction::IntToPtr) @@ -3361,7 +3361,7 @@ void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumReserved) { SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases, Instruction *InsertBefore) : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch, - 0, 0, InsertBefore) { + nullptr, 0, InsertBefore) { init(Value, Default, 2+NumCases*2); } @@ -3372,12 +3372,12 @@ SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases, SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases, BasicBlock *InsertAtEnd) : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch, - 0, 0, InsertAtEnd) { + nullptr, 0, InsertAtEnd) { init(Value, Default, 2+NumCases*2); } SwitchInst::SwitchInst(const SwitchInst &SI) - : TerminatorInst(SI.getType(), Instruction::Switch, 0, 0) { + : TerminatorInst(SI.getType(), Instruction::Switch, nullptr, 0) { init(SI.getCondition(), SI.getDefaultDest(), SI.getNumOperands()); NumOperands = SI.getNumOperands(); Use *OL = OperandList, *InOL = SI.OperandList; @@ -3425,8 +3425,8 @@ void SwitchInst::removeCase(CaseIt i) { } // Nuke the last value. - OL[NumOps-2].set(0); - OL[NumOps-2+1].set(0); + OL[NumOps-2].set(nullptr); + OL[NumOps-2+1].set(nullptr); NumOperands = NumOps-2; } @@ -3492,14 +3492,14 @@ void IndirectBrInst::growOperands() { IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases, Instruction *InsertBefore) : TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr, - 0, 0, InsertBefore) { + nullptr, 0, InsertBefore) { init(Address, NumCases); } IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases, BasicBlock *InsertAtEnd) : TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr, - 0, 0, InsertAtEnd) { + nullptr, 0, InsertAtEnd) { init(Address, NumCases); } @@ -3541,7 +3541,7 @@ void IndirectBrInst::removeDestination(unsigned idx) { OL[idx+1] = OL[NumOps-1]; // Nuke the last value. - OL[NumOps-1].set(0); + OL[NumOps-1].set(nullptr); NumOperands = NumOps-1; } @@ -3587,9 +3587,10 @@ InsertValueInst *InsertValueInst::clone_impl() const { } AllocaInst *AllocaInst::clone_impl() const { - return new AllocaInst(getAllocatedType(), - (Value*)getOperand(0), - getAlignment()); + AllocaInst *Result = new AllocaInst(getAllocatedType(), + (Value *)getOperand(0), getAlignment()); + Result->setUsedWithInAlloca(isUsedWithInAlloca()); + return Result; } LoadInst *LoadInst::clone_impl() const { diff --git a/lib/IR/IntrinsicInst.cpp b/lib/IR/IntrinsicInst.cpp index 554f2be..5725284 100644 --- a/lib/IR/IntrinsicInst.cpp +++ b/lib/IR/IntrinsicInst.cpp @@ -35,7 +35,7 @@ static Value *CastOperand(Value *C) { if (ConstantExpr *CE = dyn_cast(C)) if (CE->isCast()) return CE->getOperand(0); - return NULL; + return nullptr; } Value *DbgInfoIntrinsic::StripCast(Value *C) { @@ -57,7 +57,7 @@ Value *DbgDeclareInst::getAddress() const { if (MDNode* MD = cast_or_null(getArgOperand(0))) return MD->getOperand(0); else - return NULL; + return nullptr; } //===----------------------------------------------------------------------===// diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp index 1bfc515..de825f0 100644 --- a/lib/IR/LLVMContext.cpp +++ b/lib/IR/LLVMContext.cpp @@ -15,6 +15,7 @@ #include "llvm/IR/LLVMContext.h" #include "LLVMContextImpl.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Instruction.h" @@ -114,6 +115,17 @@ void *LLVMContext::getDiagnosticContext() const { return pImpl->DiagnosticContext; } +void LLVMContext::setYieldCallback(YieldCallbackTy Callback, void *OpaqueHandle) +{ + pImpl->YieldCallback = Callback; + pImpl->YieldOpaqueHandle = OpaqueHandle; +} + +void LLVMContext::yield() { + if (pImpl->YieldCallback) + pImpl->YieldCallback(this, pImpl->YieldOpaqueHandle); +} + void LLVMContext::emitError(const Twine &ErrorStr) { diagnose(DiagnosticInfoInlineAsm(ErrorStr)); } @@ -125,10 +137,32 @@ void LLVMContext::emitError(const Instruction *I, const Twine &ErrorStr) { void LLVMContext::diagnose(const DiagnosticInfo &DI) { // If there is a report handler, use it. - if (pImpl->DiagnosticHandler != 0) { + if (pImpl->DiagnosticHandler) { pImpl->DiagnosticHandler(DI, pImpl->DiagnosticContext); return; } + + // Optimization remarks are selective. They need to check whether the regexp + // pattern, passed via one of the -pass-remarks* flags, matches the name of + // the pass that is emitting the diagnostic. If there is no match, ignore the + // diagnostic and return. + switch (DI.getKind()) { + case llvm::DK_OptimizationRemark: + if (!cast(DI).isEnabled()) + return; + break; + case llvm::DK_OptimizationRemarkMissed: + if (!cast(DI).isEnabled()) + return; + break; + case llvm::DK_OptimizationRemarkAnalysis: + if (!cast(DI).isEnabled()) + return; + break; + default: + break; + } + // Otherwise, print the message with a prefix based on the severity. std::string MsgStorage; raw_string_ostream Stream(MsgStorage); diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp index ebff9d3..4c2791f 100644 --- a/lib/IR/LLVMContextImpl.cpp +++ b/lib/IR/LLVMContextImpl.cpp @@ -14,12 +14,13 @@ #include "LLVMContextImpl.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Module.h" #include using namespace llvm; LLVMContextImpl::LLVMContextImpl(LLVMContext &C) - : TheTrueVal(0), TheFalseVal(0), + : TheTrueVal(nullptr), TheFalseVal(nullptr), VoidTy(C, Type::VoidTyID), LabelTy(C, Type::LabelTyID), HalfTy(C, Type::HalfTyID), @@ -35,10 +36,12 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C) Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64) { - InlineAsmDiagHandler = 0; - InlineAsmDiagContext = 0; - DiagnosticHandler = 0; - DiagnosticContext = 0; + InlineAsmDiagHandler = nullptr; + InlineAsmDiagContext = nullptr; + DiagnosticHandler = nullptr; + DiagnosticContext = nullptr; + YieldCallback = nullptr; + YieldOpaqueHandle = nullptr; NamedStructTypesUniqueID = 0; } @@ -46,8 +49,7 @@ namespace { struct DropReferences { // Takes the value_type of a ConstantUniqueMap's internal map, whose 'second' // is a Constant*. - template - void operator()(const PairT &P) { + template void operator()(const PairT &P) { P.second->dropAllReferences(); } }; @@ -64,12 +66,11 @@ struct DropFirst { } LLVMContextImpl::~LLVMContextImpl() { - // NOTE: We need to delete the contents of OwnedModules, but we have to - // duplicate it into a temporary vector, because the destructor of Module - // will try to remove itself from OwnedModules set. This would cause - // iterator invalidation if we iterated on the set directly. - std::vector Modules(OwnedModules.begin(), OwnedModules.end()); - DeleteContainerPointers(Modules); + // NOTE: We need to delete the contents of OwnedModules, but Module's dtor + // will call LLVMContextImpl::removeModule, thus invalidating iterators into + // the container. Avoid iterators during this operation: + while (!OwnedModules.empty()) + delete *OwnedModules.begin(); // Free the constants. This is important to do here to ensure that they are // freed before the LeakDetector is torn down. diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h index dc77d29..808c239 100644 --- a/lib/IR/LLVMContextImpl.h +++ b/lib/IR/LLVMContextImpl.h @@ -37,6 +37,9 @@ namespace llvm { class ConstantInt; class ConstantFP; +class DiagnosticInfoOptimizationRemark; +class DiagnosticInfoOptimizationRemarkMissed; +class DiagnosticInfoOptimizationRemarkAnalysis; class LLVMContext; class Type; class Value; @@ -56,8 +59,8 @@ struct DenseMapAPIntKeyInfo { return hash_combine(Key.type, Key.val); } }; - static inline KeyTy getEmptyKey() { return KeyTy(APInt(1,0), 0); } - static inline KeyTy getTombstoneKey() { return KeyTy(APInt(1,1), 0); } + static inline KeyTy getEmptyKey() { return KeyTy(APInt(1,0), nullptr); } + static inline KeyTy getTombstoneKey() { return KeyTy(APInt(1,1), nullptr); } static unsigned getHashValue(const KeyTy &Key) { return static_cast(hash_value(Key)); } @@ -242,6 +245,9 @@ public: LLVMContext::DiagnosticHandlerTy DiagnosticHandler; void *DiagnosticContext; + LLVMContext::YieldCallbackTy YieldCallback; + void *YieldOpaqueHandle; + typedef DenseMap IntMapTy; IntMapTy IntConstants; diff --git a/lib/IR/LeaksContext.h b/lib/IR/LeaksContext.h index 5038dc9..52ac170 100644 --- a/lib/IR/LeaksContext.h +++ b/lib/IR/LeaksContext.h @@ -12,8 +12,12 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_IR_LEAKSCONTEXT_H +#define LLVM_IR_LEAKSCONTEXT_H + #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/Value.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { @@ -30,10 +34,10 @@ struct PrinterTrait { template struct LeakDetectorImpl { explicit LeakDetectorImpl(const char* const name = "") : - Cache(0), Name(name) { } + Cache(nullptr), Name(name) { } void clear() { - Cache = 0; + Cache = nullptr; Ts.clear(); } @@ -57,15 +61,15 @@ struct LeakDetectorImpl { void removeGarbage(const T* o) { if (o == Cache) - Cache = 0; // Cache hit + Cache = nullptr; // Cache hit else Ts.erase(o); } bool hasGarbage(const std::string& Message) { - addGarbage(0); // Flush the Cache + addGarbage(nullptr); // Flush the Cache - assert(Cache == 0 && "No value should be cached anymore!"); + assert(!Cache && "No value should be cached anymore!"); if (!Ts.empty()) { errs() << "Leaked " << Name << " objects found: " << Message << ":\n"; @@ -90,3 +94,5 @@ private: }; } + +#endif // LLVM_IR_LEAKSCONTEXT_H diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index 7c5cc68..d3f3482 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/IRPrintingPasses.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/LegacyPassManagers.h" @@ -22,6 +23,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" +#include "llvm/Support/TimeValue.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include @@ -118,7 +120,7 @@ bool PMDataManager::isPassDebuggingExecutionsOrMore() const { void PassManagerPrettyStackEntry::print(raw_ostream &OS) const { - if (V == 0 && M == 0) + if (!V && !M) OS << "Releasing pass '"; else OS << "Running pass '"; @@ -129,7 +131,7 @@ void PassManagerPrettyStackEntry::print(raw_ostream &OS) const { OS << " on module '" << M->getModuleIdentifier() << "'.\n"; return; } - if (V == 0) { + if (!V) { OS << '\n'; return; } @@ -484,11 +486,11 @@ public: /// getPassTimer - Return the timer for the specified pass if it exists. Timer *getPassTimer(Pass *P) { if (P->getAsPMDataManager()) - return 0; + return nullptr; sys::SmartScopedLock Lock(*TimingInfoMutex); Timer *&T = TimingData[P]; - if (T == 0) + if (!T) T = new Timer(P->getPassName(), TG); return T; } @@ -579,7 +581,7 @@ void PMTopLevelManager::collectLastUses(SmallVectorImpl &LastUses, } AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) { - AnalysisUsage *AnUsage = NULL; + AnalysisUsage *AnUsage = nullptr; DenseMap::iterator DMI = AnUsageMap.find(P); if (DMI != AnUsageMap.end()) AnUsage = DMI->second; @@ -626,7 +628,7 @@ void PMTopLevelManager::schedulePass(Pass *P) { if (!AnalysisPass) { const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I); - if (PI == NULL) { + if (!PI) { // Pass P is not in the global PassRegistry dbgs() << "Pass '" << P->getPassName() << "' is not initialized." << "\n"; dbgs() << "Verify if there is a pass dependency cycle." << "\n"; @@ -733,7 +735,7 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) { } } - return 0; + return nullptr; } // Print passes managed by this top level manager. @@ -830,7 +832,7 @@ void PMDataManager::recordAvailableAnalysis(Pass *P) { // This pass is the current implementation of all of the interfaces it // implements as well. const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI); - if (PInf == 0) return; + if (!PInf) return; const std::vector &II = PInf->getInterfacesImplemented(); for (unsigned i = 0, e = II.size(); i != e; ++i) AvailableAnalysis[II[i]->getTypeInfo()] = P; @@ -847,7 +849,7 @@ bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) { for (SmallVectorImpl::iterator I = HigherLevelAnalysis.begin(), E = HigherLevelAnalysis.end(); I != E; ++I) { Pass *P1 = *I; - if (P1->getAsImmutablePass() == 0 && + if (P1->getAsImmutablePass() == nullptr && std::find(PreservedSet.begin(), PreservedSet.end(), P1->getPassID()) == PreservedSet.end()) @@ -887,7 +889,7 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) { for (DenseMap::iterator I = AvailableAnalysis.begin(), E = AvailableAnalysis.end(); I != E; ) { DenseMap::iterator Info = I++; - if (Info->second->getAsImmutablePass() == 0 && + if (Info->second->getAsImmutablePass() == nullptr && std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) == PreservedSet.end()) { // Remove this analysis @@ -911,7 +913,7 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) { I = InheritedAnalysis[Index]->begin(), E = InheritedAnalysis[Index]->end(); I != E; ) { DenseMap::iterator Info = I++; - if (Info->second->getAsImmutablePass() == 0 && + if (Info->second->getAsImmutablePass() == nullptr && std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) == PreservedSet.end()) { // Remove this analysis @@ -1028,7 +1030,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) { // Set P as P's last user until someone starts using P. // However, if P is a Pass Manager then it does not need // to record its last user. - if (P->getAsPMDataManager() == 0) + if (!P->getAsPMDataManager()) LastUses.push_back(P); TPM->setLastUser(LastUses, P); @@ -1095,7 +1097,7 @@ void PMDataManager::initializeAnalysisImpl(Pass *P) { I = AnUsage->getRequiredSet().begin(), E = AnUsage->getRequiredSet().end(); I != E; ++I) { Pass *Impl = findAnalysisPass(*I, true); - if (Impl == 0) + if (!Impl) // This may be analysis pass that is initialized on the fly. // If that is not the case then it will raise an assert when it is used. continue; @@ -1119,7 +1121,7 @@ Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) { if (SearchParent) return TPM->findAnalysisPass(AID); - return NULL; + return nullptr; } // Print list of passes that are last used by P. @@ -1158,7 +1160,8 @@ void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1, StringRef Msg) { if (PassDebugging < Executions) return; - dbgs() << (void*)this << std::string(getDepth()*2+1, ' '); + dbgs() << "[" << sys::TimeValue::now().str() << "] " << (void *)this + << std::string(getDepth() * 2 + 1, ' '); switch (S1) { case EXECUTION_MSG: dbgs() << "Executing Pass '" << P->getPassName(); @@ -1487,8 +1490,10 @@ bool FunctionPassManagerImpl::run(Function &F) { TimingInfo::createTheTimeInfo(); initializeAllAnalysisInfo(); - for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) + for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) { Changed |= getContainedManager(Index)->runOnFunction(F); + F.getContext().yield(); + } for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) getContainedManager(Index)->cleanup(); @@ -1657,6 +1662,8 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) { assert((P->getPotentialPassManagerType() < RequiredPass->getPotentialPassManagerType()) && "Unable to handle Pass that requires lower level Analysis pass"); + if (!RequiredPass) + return; FunctionPassManagerImpl *FPP = OnTheFlyManagers[P]; if (!FPP) { @@ -1666,14 +1673,24 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) { OnTheFlyManagers[P] = FPP; } - FPP->add(RequiredPass); + const PassInfo * RequiredPassPI = + PassRegistry::getPassRegistry()->getPassInfo(RequiredPass->getPassID()); - // Register P as the last user of RequiredPass. - if (RequiredPass) { - SmallVector LU; - LU.push_back(RequiredPass); - FPP->setLastUser(LU, P); + Pass *FoundPass = nullptr; + if (RequiredPassPI && RequiredPassPI->isAnalysis()) { + FoundPass = + ((PMTopLevelManager*)FPP)->findAnalysisPass(RequiredPass->getPassID()); } + if (!FoundPass) { + FoundPass = RequiredPass; + // This should be guaranteed to add RequiredPass to the passmanager given + // that we checked for an avaiable analysis above. + FPP->add(RequiredPass); + } + // Register P as the last user of FoundPass or RequiredPass. + SmallVector LU; + LU.push_back(FoundPass); + FPP->setLastUser(LU, P); } /// Return function pass corresponding to PassInfo PI, that is @@ -1709,8 +1726,10 @@ bool PassManagerImpl::run(Module &M) { } initializeAllAnalysisInfo(); - for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) + for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) { Changed |= getContainedManager(Index)->runOnModule(M); + M.getContext().yield(); + } for (SmallVectorImpl::const_iterator I = IPV.begin(), E = IPV.end(); I != E; ++I) { @@ -1773,7 +1792,7 @@ void TimingInfo::createTheTimeInfo() { Timer *llvm::getPassTimer(Pass *P) { if (TheTimeInfo) return TheTimeInfo->getPassTimer(P); - return 0; + return nullptr; } //===----------------------------------------------------------------------===// diff --git a/lib/IR/MDBuilder.cpp b/lib/IR/MDBuilder.cpp new file mode 100644 index 0000000..65cdf38 --- /dev/null +++ b/lib/IR/MDBuilder.cpp @@ -0,0 +1,139 @@ +//===---- llvm/MDBuilder.cpp - Builder for LLVM metadata ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MDBuilder class, which is used as a convenient way to +// create LLVM metadata with a consistent and simplified interface. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" +using namespace llvm; + +MDString *MDBuilder::createString(StringRef Str) { + return MDString::get(Context, Str); +} + +MDNode *MDBuilder::createFPMath(float Accuracy) { + if (Accuracy == 0.0) + return nullptr; + assert(Accuracy > 0.0 && "Invalid fpmath accuracy!"); + Value *Op = ConstantFP::get(Type::getFloatTy(Context), Accuracy); + return MDNode::get(Context, Op); +} + +MDNode *MDBuilder::createBranchWeights(uint32_t TrueWeight, + uint32_t FalseWeight) { + uint32_t Weights[] = {TrueWeight, FalseWeight}; + return createBranchWeights(Weights); +} + +MDNode *MDBuilder::createBranchWeights(ArrayRef Weights) { + assert(Weights.size() >= 2 && "Need at least two branch weights!"); + + SmallVector Vals(Weights.size() + 1); + Vals[0] = createString("branch_weights"); + + Type *Int32Ty = Type::getInt32Ty(Context); + for (unsigned i = 0, e = Weights.size(); i != e; ++i) + Vals[i + 1] = ConstantInt::get(Int32Ty, Weights[i]); + + return MDNode::get(Context, Vals); +} + +MDNode *MDBuilder::createRange(const APInt &Lo, const APInt &Hi) { + assert(Lo.getBitWidth() == Hi.getBitWidth() && "Mismatched bitwidths!"); + // If the range is everything then it is useless. + if (Hi == Lo) + return nullptr; + + // Return the range [Lo, Hi). + Type *Ty = IntegerType::get(Context, Lo.getBitWidth()); + Value *Range[2] = {ConstantInt::get(Ty, Lo), ConstantInt::get(Ty, Hi)}; + return MDNode::get(Context, Range); +} + +MDNode *MDBuilder::createAnonymousTBAARoot() { + // To ensure uniqueness the root node is self-referential. + MDNode *Dummy = MDNode::getTemporary(Context, ArrayRef()); + MDNode *Root = MDNode::get(Context, Dummy); + // At this point we have + // !0 = metadata !{} <- dummy + // !1 = metadata !{metadata !0} <- root + // Replace the dummy operand with the root node itself and delete the dummy. + Root->replaceOperandWith(0, Root); + MDNode::deleteTemporary(Dummy); + // We now have + // !1 = metadata !{metadata !1} <- self-referential root + return Root; +} + +MDNode *MDBuilder::createTBAARoot(StringRef Name) { + return MDNode::get(Context, createString(Name)); +} + +/// \brief Return metadata for a non-root TBAA node with the given name, +/// parent in the TBAA tree, and value for 'pointsToConstantMemory'. +MDNode *MDBuilder::createTBAANode(StringRef Name, MDNode *Parent, + bool isConstant) { + if (isConstant) { + Constant *Flags = ConstantInt::get(Type::getInt64Ty(Context), 1); + Value *Ops[3] = {createString(Name), Parent, Flags}; + return MDNode::get(Context, Ops); + } else { + Value *Ops[2] = {createString(Name), Parent}; + return MDNode::get(Context, Ops); + } +} + +/// \brief Return metadata for a tbaa.struct node with the given +/// struct field descriptions. +MDNode *MDBuilder::createTBAAStructNode(ArrayRef Fields) { + SmallVector Vals(Fields.size() * 3); + Type *Int64 = Type::getInt64Ty(Context); + for (unsigned i = 0, e = Fields.size(); i != e; ++i) { + Vals[i * 3 + 0] = ConstantInt::get(Int64, Fields[i].Offset); + Vals[i * 3 + 1] = ConstantInt::get(Int64, Fields[i].Size); + Vals[i * 3 + 2] = Fields[i].TBAA; + } + return MDNode::get(Context, Vals); +} + +/// \brief Return metadata for a TBAA struct node in the type DAG +/// with the given name, a list of pairs (offset, field type in the type DAG). +MDNode *MDBuilder::createTBAAStructTypeNode( + StringRef Name, ArrayRef> Fields) { + SmallVector Ops(Fields.size() * 2 + 1); + Type *Int64 = Type::getInt64Ty(Context); + Ops[0] = createString(Name); + for (unsigned i = 0, e = Fields.size(); i != e; ++i) { + Ops[i * 2 + 1] = Fields[i].first; + Ops[i * 2 + 2] = ConstantInt::get(Int64, Fields[i].second); + } + return MDNode::get(Context, Ops); +} + +/// \brief Return metadata for a TBAA scalar type node with the +/// given name, an offset and a parent in the TBAA type DAG. +MDNode *MDBuilder::createTBAAScalarTypeNode(StringRef Name, MDNode *Parent, + uint64_t Offset) { + ConstantInt *Off = ConstantInt::get(Type::getInt64Ty(Context), Offset); + Value *Ops[3] = {createString(Name), Parent, Off}; + return MDNode::get(Context, Ops); +} + +/// \brief Return metadata for a TBAA tag node with the given +/// base type, access type and offset relative to the base type. +MDNode *MDBuilder::createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType, + uint64_t Offset) { + Type *Int64 = Type::getInt64Ty(Context); + Value *Ops[3] = {BaseType, AccessType, ConstantInt::get(Int64, Offset)}; + return MDNode::get(Context, Ops); +} diff --git a/lib/IR/Mangler.cpp b/lib/IR/Mangler.cpp index d82388f..27d973b 100644 --- a/lib/IR/Mangler.cpp +++ b/lib/IR/Mangler.cpp @@ -108,7 +108,7 @@ void Mangler::getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV, } bool UseAt = false; - const Function *MSFunc = NULL; + const Function *MSFunc = nullptr; CallingConv::ID CC; if (DL->hasMicrosoftFastStdCallMangling()) { if ((MSFunc = dyn_cast(GV))) { diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp index ba39334..4d932d0 100644 --- a/lib/IR/Metadata.cpp +++ b/lib/IR/Metadata.cpp @@ -87,7 +87,7 @@ public: MDNodeOperand::~MDNodeOperand() {} void MDNodeOperand::deleted() { - getParent()->replaceOperand(this, 0); + getParent()->replaceOperand(this, nullptr); } void MDNodeOperand::allUsesReplacedWith(Value *NV) { @@ -148,10 +148,10 @@ MDNode::~MDNode() { } static const Function *getFunctionForValue(Value *V) { - if (!V) return NULL; + if (!V) return nullptr; if (Instruction *I = dyn_cast(V)) { BasicBlock *BB = I->getParent(); - return BB ? BB->getParent() : 0; + return BB ? BB->getParent() : nullptr; } if (Argument *A = dyn_cast(V)) return A->getParent(); @@ -159,15 +159,15 @@ static const Function *getFunctionForValue(Value *V) { return BB->getParent(); if (MDNode *MD = dyn_cast(V)) return MD->getFunction(); - return NULL; + return nullptr; } #ifndef NDEBUG static const Function *assertLocalFunction(const MDNode *N) { - if (!N->isFunctionLocal()) return 0; + if (!N->isFunctionLocal()) return nullptr; // FIXME: This does not handle cyclic function local metadata. - const Function *F = 0, *NewF = 0; + const Function *F = nullptr, *NewF = nullptr; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (Value *V = N->getOperand(i)) { if (MDNode *MD = dyn_cast(V)) @@ -175,10 +175,11 @@ static const Function *assertLocalFunction(const MDNode *N) { else NewF = getFunctionForValue(V); } - if (F == 0) + if (!F) F = NewF; - else - assert((NewF == 0 || F == NewF) &&"inconsistent function-local metadata"); + else + assert((NewF == nullptr || F == NewF) && + "inconsistent function-local metadata"); } return F; } @@ -192,11 +193,11 @@ const Function *MDNode::getFunction() const { #ifndef NDEBUG return assertLocalFunction(this); #else - if (!isFunctionLocal()) return NULL; + if (!isFunctionLocal()) return nullptr; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) if (const Function *F = getFunctionForValue(getOperand(i))) return F; - return NULL; + return nullptr; #endif } @@ -335,14 +336,14 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { // Likewise if the MDNode is function-local but for a different function. if (To && isFunctionLocalValue(To)) { if (!isFunctionLocal()) - To = 0; + To = nullptr; else { const Function *F = getFunction(); const Function *FV = getFunctionForValue(To); // Metadata can be function-local without having an associated function. // So only consider functions to have changed if non-null. if (F && FV && F != FV) - To = 0; + To = nullptr; } } @@ -366,7 +367,7 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { // anymore. This commonly occurs during destruction, and uniquing these // brings little reuse. Also, this means we don't need to include // isFunctionLocal bits in FoldingSetNodeIDs for MDNodes. - if (To == 0) { + if (!To) { setIsNotUniqued(); return; } @@ -407,7 +408,7 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) { MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) { if (!A || !B) - return NULL; + return nullptr; APFloat AVal = cast(A->getOperand(0))->getValueAPF(); APFloat BVal = cast(B->getOperand(0))->getValueAPF(); @@ -457,7 +458,7 @@ MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) { // the ones that overlap. if (!A || !B) - return NULL; + return nullptr; if (A == B) return A; @@ -512,7 +513,7 @@ MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) { ConstantRange Range(cast(EndPoints[0])->getValue(), cast(EndPoints[1])->getValue()); if (Range.isFullSet()) - return NULL; + return nullptr; } return MDNode::get(A->getContext(), EndPoints); @@ -527,7 +528,7 @@ static SmallVector, 4> &getNMDOps(void *Operands) { } NamedMDNode::NamedMDNode(const Twine &N) - : Name(N.str()), Parent(0), + : Name(N.str()), Parent(nullptr), Operands(new SmallVector, 4>()) { } @@ -575,7 +576,7 @@ StringRef NamedMDNode::getName() const { // void Instruction::setMetadata(StringRef Kind, MDNode *Node) { - if (Node == 0 && !hasMetadata()) return; + if (!Node && !hasMetadata()) return; setMetadata(getContext().getMDKindID(Kind), Node); } @@ -631,7 +632,7 @@ void Instruction::dropUnknownMetadata(ArrayRef KnownIDs) { /// node. This updates/replaces metadata if already present, or removes it if /// Node is null. void Instruction::setMetadata(unsigned KindID, MDNode *Node) { - if (Node == 0 && !hasMetadata()) return; + if (!Node && !hasMetadata()) return; // Handle 'dbg' as a special case since it is not stored in the hash table. if (KindID == LLVMContext::MD_dbg) { @@ -691,7 +692,7 @@ MDNode *Instruction::getMetadataImpl(unsigned KindID) const { if (KindID == LLVMContext::MD_dbg) return DbgLoc.getAsMDNode(getContext()); - if (!hasMetadataHashEntry()) return 0; + if (!hasMetadataHashEntry()) return nullptr; LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this]; assert(!Info.empty() && "bit out of sync with hash table"); @@ -699,7 +700,7 @@ MDNode *Instruction::getMetadataImpl(unsigned KindID) const { for (const auto &I : Info) if (I.first == KindID) return I.second; - return 0; + return nullptr; } void Instruction::getAllMetadataImpl(SmallVectorImpl #include #include @@ -95,7 +96,7 @@ Constant *Module::getOrInsertFunction(StringRef Name, AttributeSet AttributeList) { // See if we have a definition for the specified function already. GlobalValue *F = getNamedValue(Name); - if (F == 0) { + if (!F) { // Nope, add it Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name); if (!New->isIntrinsic()) // Intrinsics get attrs set on construction @@ -183,7 +184,7 @@ GlobalVariable *Module::getGlobalVariable(StringRef Name, bool AllowLocal) { dyn_cast_or_null(getNamedValue(Name))) if (AllowLocal || !Result->hasLocalLinkage()) return Result; - return 0; + return nullptr; } /// getOrInsertGlobal - Look up the specified global in the module symbol table. @@ -195,11 +196,11 @@ GlobalVariable *Module::getGlobalVariable(StringRef Name, bool AllowLocal) { Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) { // See if we have a definition for the specified global already. GlobalVariable *GV = dyn_cast_or_null(getNamedValue(Name)); - if (GV == 0) { + if (!GV) { // Nope, add it GlobalVariable *New = new GlobalVariable(*this, Ty, false, GlobalVariable::ExternalLinkage, - 0, Name); + nullptr, Name); return New; // Return the new declaration. } @@ -284,7 +285,7 @@ Value *Module::getModuleFlag(StringRef Key) const { if (Key == MFE.Key->getString()) return MFE.Val; } - return 0; + return nullptr; } /// getModuleFlagsMetadata - Returns the NamedMDNode in the module that @@ -350,7 +351,7 @@ void Module::setDataLayout(const DataLayout *Other) { const DataLayout *Module::getDataLayout() const { if (DataLayoutStr.empty()) - return 0; + return nullptr; return &DL; } @@ -429,3 +430,10 @@ void Module::dropAllReferences() { for(Module::alias_iterator I = alias_begin(), E = alias_end(); I != E; ++I) I->dropAllReferences(); } + +unsigned Module::getDwarfVersion() const { + Value *Val = getModuleFlag("Dwarf Version"); + if (!Val) + return dwarf::DWARF_VERSION; + return cast(Val)->getZExtValue(); +} diff --git a/lib/IR/Pass.cpp b/lib/IR/Pass.cpp index e16c5b7..bb55d2a 100644 --- a/lib/IR/Pass.cpp +++ b/lib/IR/Pass.cpp @@ -22,6 +22,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "ir" + //===----------------------------------------------------------------------===// // Pass Implementation // @@ -44,7 +46,7 @@ PassManagerType ModulePass::getPotentialPassManagerType() const { } bool Pass::mustPreserveAnalysisID(char &AID) const { - return Resolver->getAnalysisIfAvailable(&AID, true) != 0; + return Resolver->getAnalysisIfAvailable(&AID, true) != nullptr; } // dumpPassStructure - Implement the -debug-pass=Structure option @@ -90,11 +92,11 @@ void *Pass::getAdjustedAnalysisPointer(AnalysisID AID) { } ImmutablePass *Pass::getAsImmutablePass() { - return 0; + return nullptr; } PMDataManager *Pass::getAsPMDataManager() { - return 0; + return nullptr; } void Pass::setResolver(AnalysisResolver *AR) { @@ -112,7 +114,7 @@ void Pass::print(raw_ostream &O,const Module*) const { // dump - call print(cerr); void Pass::dump() const { - print(dbgs(), 0); + print(dbgs(), nullptr); } //===----------------------------------------------------------------------===// @@ -193,7 +195,7 @@ const PassInfo *Pass::lookupPassInfo(StringRef Arg) { Pass *Pass::createPass(AnalysisID ID) { const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(ID); if (!PI) - return NULL; + return nullptr; return PI->createPass(); } diff --git a/lib/IR/PassManager.cpp b/lib/IR/PassManager.cpp index ea15455..0defb6a 100644 --- a/lib/IR/PassManager.cpp +++ b/lib/IR/PassManager.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -32,6 +33,8 @@ PreservedAnalyses ModulePassManager::run(Module *M, ModuleAnalysisManager *AM) { if (AM) AM->invalidate(M, PassPA); PA.intersect(std::move(PassPA)); + + M->getContext().yield(); } if (DebugPM) @@ -59,7 +62,7 @@ ModuleAnalysisManager::ResultConceptT * ModuleAnalysisManager::getCachedResultImpl(void *PassID, Module *M) const { ModuleAnalysisResultMapT::const_iterator RI = ModuleAnalysisResults.find(PassID); - return RI == ModuleAnalysisResults.end() ? 0 : &*RI->second; + return RI == ModuleAnalysisResults.end() ? nullptr : &*RI->second; } void ModuleAnalysisManager::invalidateImpl(void *PassID, Module *M) { @@ -92,6 +95,8 @@ PreservedAnalyses FunctionPassManager::run(Function *F, if (AM) AM->invalidate(F, PassPA); PA.intersect(std::move(PassPA)); + + F->getContext().yield(); } if (DebugPM) @@ -135,7 +140,7 @@ FunctionAnalysisManager::ResultConceptT * FunctionAnalysisManager::getCachedResultImpl(void *PassID, Function *F) const { FunctionAnalysisResultMapT::const_iterator RI = FunctionAnalysisResults.find(std::make_pair(PassID, F)); - return RI == FunctionAnalysisResults.end() ? 0 : &*RI->second->second; + return RI == FunctionAnalysisResults.end() ? nullptr : &*RI->second->second; } void FunctionAnalysisManager::invalidateImpl(void *PassID, Function *F) { @@ -165,6 +170,8 @@ void FunctionAnalysisManager::invalidateImpl(Function *F, while (!InvalidatedPassIDs.empty()) FunctionAnalysisResults.erase( std::make_pair(InvalidatedPassIDs.pop_back_val(), F)); + if (ResultsList.empty()) + FunctionAnalysisResultLists.erase(F); } char FunctionAnalysisManagerModuleProxy::PassID; diff --git a/lib/IR/PassRegistry.cpp b/lib/IR/PassRegistry.cpp index 74dc0f1..6a5bee2 100644 --- a/lib/IR/PassRegistry.cpp +++ b/lib/IR/PassRegistry.cpp @@ -57,7 +57,7 @@ struct PassRegistryImpl { }; DenseMap AnalysisGroupInfoMap; - std::vector ToFree; + std::vector> ToFree; std::vector Listeners; }; } // end anonymous namespace @@ -75,20 +75,15 @@ void *PassRegistry::getImpl() const { PassRegistry::~PassRegistry() { sys::SmartScopedWriter Guard(*Lock); PassRegistryImpl *Impl = static_cast(pImpl); - - for (std::vector::iterator I = Impl->ToFree.begin(), - E = Impl->ToFree.end(); I != E; ++I) - delete *I; - delete Impl; - pImpl = 0; + pImpl = nullptr; } const PassInfo *PassRegistry::getPassInfo(const void *TI) const { sys::SmartScopedReader Guard(*Lock); PassRegistryImpl *Impl = static_cast(getImpl()); PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.find(TI); - return I != Impl->PassInfoMap.end() ? I->second : 0; + return I != Impl->PassInfoMap.end() ? I->second : nullptr; } const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const { @@ -96,7 +91,7 @@ const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const { PassRegistryImpl *Impl = static_cast(getImpl()); PassRegistryImpl::StringMapType::const_iterator I = Impl->PassInfoStringMap.find(Arg); - return I != Impl->PassInfoStringMap.end() ? I->second : 0; + return I != Impl->PassInfoStringMap.end() ? I->second : nullptr; } //===----------------------------------------------------------------------===// @@ -117,7 +112,7 @@ void PassRegistry::registerPass(const PassInfo &PI, bool ShouldFree) { I = Impl->Listeners.begin(), E = Impl->Listeners.end(); I != E; ++I) (*I)->passRegistered(&PI); - if (ShouldFree) Impl->ToFree.push_back(&PI); + if (ShouldFree) Impl->ToFree.push_back(std::unique_ptr(&PI)); } void PassRegistry::unregisterPass(const PassInfo &PI) { @@ -148,7 +143,7 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID, bool isDefault, bool ShouldFree) { PassInfo *InterfaceInfo = const_cast(getPassInfo(InterfaceID)); - if (InterfaceInfo == 0) { + if (!InterfaceInfo) { // First reference to Interface, register it now. registerPass(Registeree); InterfaceInfo = &Registeree; @@ -174,7 +169,7 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID, "Cannot add a pass to the same analysis group more than once!"); AGI.Implementations.insert(ImplementationInfo); if (isDefault) { - assert(InterfaceInfo->getNormalCtor() == 0 && + assert(InterfaceInfo->getNormalCtor() == nullptr && "Default implementation for analysis group already specified!"); assert(ImplementationInfo->getNormalCtor() && "Cannot specify pass as default if it does not have a default ctor"); @@ -185,7 +180,8 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID, } PassRegistryImpl *Impl = static_cast(getImpl()); - if (ShouldFree) Impl->ToFree.push_back(&Registeree); + if (ShouldFree) + Impl->ToFree.push_back(std::unique_ptr(&Registeree)); } void PassRegistry::addRegistrationListener(PassRegistrationListener *L) { diff --git a/lib/IR/SymbolTableListTraitsImpl.h b/lib/IR/SymbolTableListTraitsImpl.h index 5a383ee..8302597 100644 --- a/lib/IR/SymbolTableListTraitsImpl.h +++ b/lib/IR/SymbolTableListTraitsImpl.h @@ -65,7 +65,7 @@ void SymbolTableListTraits template void SymbolTableListTraits ::addNodeToList(ValueSubClass *V) { - assert(V->getParent() == 0 && "Value already in a container!!"); + assert(!V->getParent() && "Value already in a container!!"); ItemParentClass *Owner = getListOwner(); V->setParent(Owner); if (V->hasName()) @@ -76,7 +76,7 @@ void SymbolTableListTraits template void SymbolTableListTraits ::removeNodeFromList(ValueSubClass *V) { - V->setParent(0); + V->setParent(nullptr); if (V->hasName()) if (ValueSymbolTable *ST = TraitsClass::getSymTab(getListOwner())) ST->removeValueName(V->getValueName()); diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp index b02509f..1efde47 100644 --- a/lib/IR/Type.cpp +++ b/lib/IR/Type.cpp @@ -36,7 +36,7 @@ Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) { case MetadataTyID : return getMetadataTy(C); case X86_MMXTyID : return getX86_MMXTy(C); default: - return 0; + return nullptr; } } @@ -312,8 +312,8 @@ IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) { } IntegerType *&Entry = C.pImpl->IntegerTypes[NumBits]; - - if (Entry == 0) + + if (!Entry) Entry = new (C.pImpl->TypeAllocator) IntegerType(C, NumBits); return Entry; @@ -448,7 +448,7 @@ void StructType::setName(StringRef Name) { if (SymbolTableEntry) { // Delete the old string data. ((EntryTy *)SymbolTableEntry)->Destroy(SymbolTable.getAllocator()); - SymbolTableEntry = 0; + SymbolTableEntry = nullptr; } return; } @@ -497,7 +497,7 @@ StructType *StructType::get(LLVMContext &Context, bool isPacked) { } StructType *StructType::get(Type *type, ...) { - assert(type != 0 && "Cannot create a struct type with no elements with this"); + assert(type && "Cannot create a struct type with no elements with this"); LLVMContext &Ctx = type->getContext(); va_list ap; SmallVector StructFields; @@ -538,7 +538,7 @@ StructType *StructType::create(ArrayRef Elements) { } StructType *StructType::create(StringRef Name, Type *type, ...) { - assert(type != 0 && "Cannot create a struct type with no elements with this"); + assert(type && "Cannot create a struct type with no elements with this"); LLVMContext &Ctx = type->getContext(); va_list ap; SmallVector StructFields; @@ -576,13 +576,13 @@ bool StructType::isSized(SmallPtrSet *Visited) const { StringRef StructType::getName() const { assert(!isLiteral() && "Literal structs never have names"); - if (SymbolTableEntry == 0) return StringRef(); - + if (!SymbolTableEntry) return StringRef(); + return ((StringMapEntry *)SymbolTableEntry)->getKey(); } void StructType::setBody(Type *type, ...) { - assert(type != 0 && "Cannot create a struct type with no elements with this"); + assert(type && "Cannot create a struct type with no elements with this"); va_list ap; SmallVector StructFields; va_start(ap, type); @@ -680,8 +680,8 @@ ArrayType *ArrayType::get(Type *elementType, uint64_t NumElements) { LLVMContextImpl *pImpl = ElementType->getContext().pImpl; ArrayType *&Entry = pImpl->ArrayTypes[std::make_pair(ElementType, NumElements)]; - - if (Entry == 0) + + if (!Entry) Entry = new (pImpl->TypeAllocator) ArrayType(ElementType, NumElements); return Entry; } @@ -709,8 +709,8 @@ VectorType *VectorType::get(Type *elementType, unsigned NumElements) { LLVMContextImpl *pImpl = ElementType->getContext().pImpl; VectorType *&Entry = ElementType->getContext().pImpl ->VectorTypes[std::make_pair(ElementType, NumElements)]; - - if (Entry == 0) + + if (!Entry) Entry = new (pImpl->TypeAllocator) VectorType(ElementType, NumElements); return Entry; } @@ -734,7 +734,7 @@ PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) { PointerType *&Entry = AddressSpace == 0 ? CImpl->PointerTypes[EltTy] : CImpl->ASPointerTypes[std::make_pair(EltTy, AddressSpace)]; - if (Entry == 0) + if (!Entry) Entry = new (CImpl->TypeAllocator) PointerType(EltTy, AddressSpace); return Entry; } diff --git a/lib/IR/Use.cpp b/lib/IR/Use.cpp index 60a0c56..047861c 100644 --- a/lib/IR/Use.cpp +++ b/lib/IR/Use.cpp @@ -27,14 +27,14 @@ void Use::swap(Use &RHS) { Val = RHS.Val; Val->addUse(*this); } else { - Val = 0; + Val = nullptr; } if (OldVal) { RHS.Val = OldVal; RHS.Val->addUse(RHS); } else { - RHS.Val = 0; + RHS.Val = nullptr; } } @@ -49,7 +49,7 @@ unsigned Use::getOperandNo() const { return this - getUser()->op_begin(); } -// Sets up the waymarking algoritm's tags for a series of Uses. See the +// Sets up the waymarking algorithm's tags for a series of Uses. See the // algorithm details here: // // http://www.llvm.org/docs/ProgrammersManual.html#UserLayout diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index 97a562e..d734e4e 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -44,7 +44,7 @@ static inline Type *checkType(Type *Ty) { Value::Value(Type *ty, unsigned scid) : SubclassID(scid), HasValueHandle(0), SubclassOptionalData(0), SubclassData(0), VTy((Type*)checkType(ty)), - UseList(0), Name(0) { + UseList(nullptr), Name(nullptr) { // FIXME: Why isn't this in the subclass gunk?? // Note, we cannot call isa before the CallInst has been // constructed. @@ -141,7 +141,7 @@ unsigned Value::getNumUses() const { } static bool getSymTab(Value *V, ValueSymbolTable *&ST) { - ST = 0; + ST = nullptr; if (Instruction *I = dyn_cast(V)) { if (BasicBlock *P = I->getParent()) if (Function *PP = P->getParent()) @@ -203,7 +203,7 @@ void Value::setName(const Twine &NewName) { if (NameRef.empty()) { // Free the name for this value. Name->Destroy(); - Name = 0; + Name = nullptr; return; } @@ -225,7 +225,7 @@ void Value::setName(const Twine &NewName) { // Remove old name. ST->removeValueName(Name); Name->Destroy(); - Name = 0; + Name = nullptr; if (NameRef.empty()) return; @@ -241,7 +241,7 @@ void Value::setName(const Twine &NewName) { void Value::takeName(Value *V) { assert(SubclassID != MDStringVal && "Cannot take the name of an MDString!"); - ValueSymbolTable *ST = 0; + ValueSymbolTable *ST = nullptr; // If this value has a name, drop it. if (hasName()) { // Get the symtab this is in. @@ -256,7 +256,7 @@ void Value::takeName(Value *V) { if (ST) ST->removeValueName(Name); Name->Destroy(); - Name = 0; + Name = nullptr; } // Now we know that this has no name. @@ -283,7 +283,7 @@ void Value::takeName(Value *V) { if (ST == VST) { // Take the name! Name = V->Name; - V->Name = 0; + V->Name = nullptr; Name->setValue(this); return; } @@ -294,17 +294,73 @@ void Value::takeName(Value *V) { if (VST) VST->removeValueName(V->Name); Name = V->Name; - V->Name = 0; + V->Name = nullptr; Name->setValue(this); if (ST) ST->reinsertValue(this); } +static GlobalObject &findReplacementForAliasUse(Value &C) { + if (auto *GO = dyn_cast(&C)) + return *GO; + if (auto *GA = dyn_cast(&C)) + return *GA->getAliasee(); + auto *CE = cast(&C); + assert(CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::GetElementPtr || + CE->getOpcode() == Instruction::AddrSpaceCast); + if (CE->getOpcode() == Instruction::GetElementPtr) + assert(cast(CE)->hasAllZeroIndices()); + return findReplacementForAliasUse(*CE->getOperand(0)); +} + +static void replaceAliasUseWith(Use &U, Value *New) { + GlobalObject &Replacement = findReplacementForAliasUse(*New); + assert(&cast(*U) != &Replacement && + "replaceAliasUseWith cannot form an alias cycle"); + U.set(&Replacement); +} + +#ifndef NDEBUG +static bool contains(SmallPtrSet &Cache, ConstantExpr *Expr, + Constant *C) { + if (!Cache.insert(Expr)) + return false; + + for (auto &O : Expr->operands()) { + if (O == C) + return true; + auto *CE = dyn_cast(O); + if (!CE) + continue; + if (contains(Cache, CE, C)) + return true; + } + return false; +} + +static bool contains(Value *Expr, Value *V) { + if (Expr == V) + return true; + + auto *C = dyn_cast(V); + if (!C) + return false; + + auto *CE = dyn_cast(Expr); + if (!CE) + return false; + + SmallPtrSet Cache; + return contains(Cache, CE, C); +} +#endif void Value::replaceAllUsesWith(Value *New) { assert(New && "Value::replaceAllUsesWith() is invalid!"); - assert(New != this && "this->replaceAllUsesWith(this) is NOT valid!"); + assert(!contains(New, this) && + "this->replaceAllUsesWith(expr(this)) is NOT valid!"); assert(New->getType() == getType() && "replaceAllUses of value with new value of different type!"); @@ -316,7 +372,11 @@ void Value::replaceAllUsesWith(Value *New) { Use &U = *UseList; // Must handle Constants specially, we cannot call replaceUsesOfWith on a // constant because they are uniqued. - if (Constant *C = dyn_cast(U.getUser())) { + if (auto *C = dyn_cast(U.getUser())) { + if (isa(C)) { + replaceAliasUseWith(U, New); + continue; + } if (!isa(C)) { C->replaceUsesOfWithOnConstant(this, New, &U); continue; @@ -557,7 +617,7 @@ void ValueHandleBase::AddToUseList() { // If this value already has a ValueHandle, then it must be in the // ValueHandles map already. ValueHandleBase *&Entry = pImpl->ValueHandles[VP.getPointer()]; - assert(Entry != 0 && "Value doesn't have any handles?"); + assert(Entry && "Value doesn't have any handles?"); AddToExistingUseList(&Entry); return; } @@ -571,7 +631,7 @@ void ValueHandleBase::AddToUseList() { const void *OldBucketPtr = Handles.getPointerIntoBucketsArray(); ValueHandleBase *&Entry = Handles[VP.getPointer()]; - assert(Entry == 0 && "Value really did already have handles?"); + assert(!Entry && "Value really did already have handles?"); AddToExistingUseList(&Entry); VP.getPointer()->HasValueHandle = true; @@ -652,7 +712,7 @@ void ValueHandleBase::ValueIsDeleted(Value *V) { break; case Weak: // Weak just goes to null, which will unlink it from the list. - Entry->operator=(0); + Entry->operator=(nullptr); break; case Callback: // Forward to the subclass's implementation. diff --git a/lib/IR/ValueSymbolTable.cpp b/lib/IR/ValueSymbolTable.cpp index fffacb3..e9e979a 100644 --- a/lib/IR/ValueSymbolTable.cpp +++ b/lib/IR/ValueSymbolTable.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "valuesymtab" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/GlobalValue.h" @@ -20,6 +19,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "valuesymtab" + // Class destructor ValueSymbolTable::~ValueSymbolTable() { #ifndef NDEBUG // Only do this in -g mode... @@ -56,7 +57,7 @@ void ValueSymbolTable::reinsertValue(Value* V) { // Try insert the vmap entry with this suffix. ValueName &NewName = vmap.GetOrCreateValue(UniqueName); - if (NewName.getValue() == 0) { + if (!NewName.getValue()) { // Newly inserted name. Success! NewName.setValue(V); V->Name = &NewName; @@ -78,7 +79,7 @@ void ValueSymbolTable::removeValueName(ValueName *V) { ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) { // In the common case, the name is not already in the symbol table. ValueName &Entry = vmap.GetOrCreateValue(Name); - if (Entry.getValue() == 0) { + if (!Entry.getValue()) { Entry.setValue(V); //DEBUG(dbgs() << " Inserted value: " << Entry.getKeyData() << ": " // << *V << "\n"); @@ -95,7 +96,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) { // Try insert the vmap entry with this suffix. ValueName &NewName = vmap.GetOrCreateValue(UniqueName); - if (NewName.getValue() == 0) { + if (!NewName.getValue()) { // Newly inserted name. Success! NewName.setValue(V); //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n"); diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 089ad1c..bcc38c1 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -61,6 +61,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" @@ -76,15 +77,71 @@ #include using namespace llvm; -static cl::opt DisableDebugInfoVerifier("disable-debug-info-verifier", - cl::init(true)); +static cl::opt VerifyDebugInfo("verify-debug-info", cl::init(false)); namespace { -class Verifier : public InstVisitor { - friend class InstVisitor; - +struct VerifierSupport { raw_ostream &OS; const Module *M; + + /// \brief Track the brokenness of the module while recursively visiting. + bool Broken; + + explicit VerifierSupport(raw_ostream &OS) + : OS(OS), M(nullptr), Broken(false) {} + + void WriteValue(const Value *V) { + if (!V) + return; + if (isa(V)) { + OS << *V << '\n'; + } else { + V->printAsOperand(OS, true, M); + OS << '\n'; + } + } + + void WriteType(Type *T) { + if (!T) + return; + OS << ' ' << *T; + } + + // CheckFailed - A check failed, so print out the condition and the message + // that failed. This provides a nice place to put a breakpoint if you want + // to see why something is not correct. + void CheckFailed(const Twine &Message, const Value *V1 = nullptr, + const Value *V2 = nullptr, const Value *V3 = nullptr, + const Value *V4 = nullptr) { + OS << Message.str() << "\n"; + WriteValue(V1); + WriteValue(V2); + WriteValue(V3); + WriteValue(V4); + Broken = true; + } + + void CheckFailed(const Twine &Message, const Value *V1, Type *T2, + const Value *V3 = nullptr) { + OS << Message.str() << "\n"; + WriteValue(V1); + WriteType(T2); + WriteValue(V3); + Broken = true; + } + + void CheckFailed(const Twine &Message, Type *T1, Type *T2 = nullptr, + Type *T3 = nullptr) { + OS << Message.str() << "\n"; + WriteType(T1); + WriteType(T2); + WriteType(T3); + Broken = true; + } +}; +class Verifier : public InstVisitor, VerifierSupport { + friend class InstVisitor; + LLVMContext *Context; const DataLayout *DL; DominatorTree DT; @@ -104,15 +161,10 @@ class Verifier : public InstVisitor { /// personality function. const Value *PersonalityFn; - /// \brief Finder keeps track of all debug info MDNodes in a Module. - DebugInfoFinder Finder; - - /// \brief Track the brokenness of the module while recursively visiting. - bool Broken; - public: explicit Verifier(raw_ostream &OS = dbgs()) - : OS(OS), M(0), Context(0), DL(0), PersonalityFn(0), Broken(false) {} + : VerifierSupport(OS), Context(nullptr), DL(nullptr), + PersonalityFn(nullptr) {} bool verify(const Function &F) { M = F.getParent(); @@ -142,16 +194,11 @@ public: // FIXME: It's really gross that we have to cast away constness here. DT.recalculate(const_cast(F)); - Finder.reset(); Broken = false; // FIXME: We strip const here because the inst visitor strips const. visit(const_cast(F)); InstsInThisBlock.clear(); - PersonalityFn = 0; - - if (!DisableDebugInfoVerifier) - // Verify Debug Info. - verifyDebugInfo(); + PersonalityFn = nullptr; return !Broken; } @@ -159,7 +206,6 @@ public: bool verify(const Module &M) { this->M = &M; Context = &M.getContext(); - Finder.reset(); Broken = false; // Scan through, checking all of the external function's linkage now... @@ -187,13 +233,6 @@ public: visitModuleFlags(M); visitModuleIdents(M); - if (!DisableDebugInfoVerifier) { - Finder.reset(); - Finder.processModule(M); - // Verify Debug Info. - verifyDebugInfo(); - } - return !Broken; } @@ -262,6 +301,7 @@ private: void visitLandingPadInst(LandingPadInst &LPI); void VerifyCallSite(CallSite CS); + void verifyMustTailCall(CallInst &CI); bool PerformTypeCheck(Intrinsic::ID ID, Function *F, Type *Ty, int VT, unsigned ArgNo, std::string &Suffix); bool VerifyIntrinsicType(Type *Ty, ArrayRef &Infos, @@ -278,56 +318,21 @@ private: void VerifyBitcastType(const Value *V, Type *DestTy, Type *SrcTy); void VerifyConstantExprBitcastType(const ConstantExpr *CE); +}; +class DebugInfoVerifier : public VerifierSupport { +public: + explicit DebugInfoVerifier(raw_ostream &OS = dbgs()) : VerifierSupport(OS) {} - void verifyDebugInfo(); - - void WriteValue(const Value *V) { - if (!V) - return; - if (isa(V)) { - OS << *V << '\n'; - } else { - V->printAsOperand(OS, true, M); - OS << '\n'; - } - } - - void WriteType(Type *T) { - if (!T) - return; - OS << ' ' << *T; - } - - // CheckFailed - A check failed, so print out the condition and the message - // that failed. This provides a nice place to put a breakpoint if you want - // to see why something is not correct. - void CheckFailed(const Twine &Message, const Value *V1 = 0, - const Value *V2 = 0, const Value *V3 = 0, - const Value *V4 = 0) { - OS << Message.str() << "\n"; - WriteValue(V1); - WriteValue(V2); - WriteValue(V3); - WriteValue(V4); - Broken = true; - } - - void CheckFailed(const Twine &Message, const Value *V1, Type *T2, - const Value *V3 = 0) { - OS << Message.str() << "\n"; - WriteValue(V1); - WriteType(T2); - WriteValue(V3); - Broken = true; + bool verify(const Module &M) { + this->M = &M; + verifyDebugInfo(); + return !Broken; } - void CheckFailed(const Twine &Message, Type *T1, Type *T2 = 0, Type *T3 = 0) { - OS << Message.str() << "\n"; - WriteType(T1); - WriteType(T2); - WriteType(T3); - Broken = true; - } +private: + void verifyDebugInfo(); + void processInstructions(DebugInfoFinder &Finder); + void processCallInst(DebugInfoFinder &Finder, const CallInst &CI); }; } // End anonymous namespace @@ -345,18 +350,14 @@ private: void Verifier::visit(Instruction &I) { for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) - Assert1(I.getOperand(i) != 0, "Operand is null", &I); + Assert1(I.getOperand(i) != nullptr, "Operand is null", &I); InstVisitor::visit(I); } void Verifier::visitGlobalValue(const GlobalValue &GV) { - Assert1(!GV.isDeclaration() || - GV.isMaterializable() || - GV.hasExternalLinkage() || - GV.hasExternalWeakLinkage() || - (isa(GV) && - (GV.hasLocalLinkage() || GV.hasWeakLinkage())), + Assert1(!GV.isDeclaration() || GV.isMaterializable() || + GV.hasExternalLinkage() || GV.hasExternalWeakLinkage(), "Global is external, but doesn't have external or weak linkage!", &GV); @@ -395,14 +396,22 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) { "invalid linkage for intrinsic global variable", &GV); // Don't worry about emitting an error for it not being an array, // visitGlobalValue will complain on appending non-array. - if (ArrayType *ATy = dyn_cast(GV.getType())) { + if (ArrayType *ATy = dyn_cast(GV.getType()->getElementType())) { StructType *STy = dyn_cast(ATy->getElementType()); PointerType *FuncPtrTy = FunctionType::get(Type::getVoidTy(*Context), false)->getPointerTo(); - Assert1(STy && STy->getNumElements() == 2 && + // FIXME: Reject the 2-field form in LLVM 4.0. + Assert1(STy && (STy->getNumElements() == 2 || + STy->getNumElements() == 3) && STy->getTypeAtIndex(0u)->isIntegerTy(32) && STy->getTypeAtIndex(1) == FuncPtrTy, "wrong type for intrinsic global variable", &GV); + if (STy->getNumElements() == 3) { + Type *ETy = STy->getTypeAtIndex(2); + Assert1(ETy->isPointerTy() && + cast(ETy)->getElementType()->isIntegerTy(8), + "wrong type for intrinsic global variable", &GV); + } } } @@ -472,11 +481,7 @@ void Verifier::visitGlobalAlias(const GlobalAlias &GA) { "Alias should have external or external weak linkage!", &GA); Assert1(GA.getAliasee(), "Aliasee cannot be NULL!", &GA); - Assert1(GA.getType() == GA.getAliasee()->getType(), - "Alias and aliasee types should match!", &GA); Assert1(!GA.hasUnnamedAddr(), "Alias cannot have unnamed_addr!", &GA); - Assert1(!GA.hasSection(), "Alias cannot have a section!", &GA); - Assert1(!GA.getAlignment(), "Alias connot have an alignment", &GA); const Constant *Aliasee = GA.getAliasee(); const GlobalValue *GV = dyn_cast(Aliasee); @@ -492,14 +497,7 @@ void Verifier::visitGlobalAlias(const GlobalAlias &GA) { "addrspacecast of GlobalValue", &GA); - if (CE->getOpcode() == Instruction::BitCast) { - unsigned SrcAS = GV->getType()->getPointerAddressSpace(); - unsigned DstAS = CE->getType()->getPointerAddressSpace(); - - Assert1(SrcAS == DstAS, - "Alias bitcasts cannot be between different address spaces", - &GA); - } + VerifyConstantExprBitcastType(CE); } Assert1(!GV->isDeclaration(), "Alias must point to a definition", &GA); if (const GlobalAlias *GAAliasee = dyn_cast(GV)) { @@ -507,10 +505,6 @@ void Verifier::visitGlobalAlias(const GlobalAlias &GA) { &GA); } - const GlobalValue *AG = GA.getAliasedGlobal(); - Assert1(AG, "Aliasing chain should end with function or global variable", - &GA); - visitGlobalValue(GA); } @@ -522,7 +516,7 @@ void Verifier::visitNamedMDNode(const NamedMDNode &NMD) { Assert1(!MD->isFunctionLocal(), "Named metadata operand cannot be function local!", MD); - visitMDNode(*MD, 0); + visitMDNode(*MD, nullptr); } } @@ -548,7 +542,7 @@ void Verifier::visitMDNode(MDNode &MD, Function *F) { // If this was an instruction, bb, or argument, verify that it is in the // function that we expect. - Function *ActualF = 0; + Function *ActualF = nullptr; if (Instruction *I = dyn_cast(Op)) ActualF = I->getParent()->getParent(); else if (BasicBlock *BB = dyn_cast(Op)) @@ -821,6 +815,7 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs, bool SawNest = false; bool SawReturned = false; + bool SawSRet = false; for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) { unsigned Idx = Attrs.getSlotIndex(i); @@ -851,8 +846,12 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs, SawReturned = true; } - if (Attrs.hasAttribute(Idx, Attribute::StructRet)) - Assert1(Idx == 1, "Attribute sret is not on first parameter!", V); + if (Attrs.hasAttribute(Idx, Attribute::StructRet)) { + Assert1(!SawSRet, "Cannot have multiple 'sret' parameters!", V); + Assert1(Idx == 1 || Idx == 2, + "Attribute 'sret' is not on first or second parameter!", V); + SawSRet = true; + } if (Attrs.hasAttribute(Idx, Attribute::InAlloca)) { Assert1(Idx == FT->getNumParams(), @@ -1489,6 +1488,16 @@ void Verifier::VerifyCallSite(CallSite CS) { // Verify call attributes. VerifyFunctionAttrs(FTy, Attrs, I); + // Conservatively check the inalloca argument. + // We have a bug if we can find that there is an underlying alloca without + // inalloca. + if (CS.hasInAllocaArgument()) { + Value *InAllocaArg = CS.getArgument(FTy->getNumParams() - 1); + if (auto AI = dyn_cast(InAllocaArg->stripInBoundsOffsets())) + Assert2(AI->isUsedWithInAlloca(), + "inalloca argument for call has mismatched alloca", AI, I); + } + if (FTy->isVarArg()) { // FIXME? is 'nest' even legal here? bool SawNest = false; @@ -1530,7 +1539,7 @@ void Verifier::VerifyCallSite(CallSite CS) { } // Verify that there's no metadata unless it's a direct call to an intrinsic. - if (CS.getCalledFunction() == 0 || + if (CS.getCalledFunction() == nullptr || !CS.getCalledFunction()->getName().startswith("llvm.")) { for (FunctionType::param_iterator PI = FTy->param_begin(), PE = FTy->param_end(); PI != PE; ++PI) @@ -1541,9 +1550,102 @@ void Verifier::VerifyCallSite(CallSite CS) { visitInstruction(*I); } +/// Two types are "congruent" if they are identical, or if they are both pointer +/// types with different pointee types and the same address space. +static bool isTypeCongruent(Type *L, Type *R) { + if (L == R) + return true; + PointerType *PL = dyn_cast(L); + PointerType *PR = dyn_cast(R); + if (!PL || !PR) + return false; + return PL->getAddressSpace() == PR->getAddressSpace(); +} + +static AttrBuilder getParameterABIAttributes(int I, AttributeSet Attrs) { + static const Attribute::AttrKind ABIAttrs[] = { + Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca, + Attribute::InReg, Attribute::Returned}; + AttrBuilder Copy; + for (auto AK : ABIAttrs) { + if (Attrs.hasAttribute(I + 1, AK)) + Copy.addAttribute(AK); + } + if (Attrs.hasAttribute(I + 1, Attribute::Alignment)) + Copy.addAlignmentAttr(Attrs.getParamAlignment(I + 1)); + return Copy; +} + +void Verifier::verifyMustTailCall(CallInst &CI) { + Assert1(!CI.isInlineAsm(), "cannot use musttail call with inline asm", &CI); + + // - The caller and callee prototypes must match. Pointer types of + // parameters or return types may differ in pointee type, but not + // address space. + Function *F = CI.getParent()->getParent(); + auto GetFnTy = [](Value *V) { + return cast( + cast(V->getType())->getElementType()); + }; + FunctionType *CallerTy = GetFnTy(F); + FunctionType *CalleeTy = GetFnTy(CI.getCalledValue()); + Assert1(CallerTy->getNumParams() == CalleeTy->getNumParams(), + "cannot guarantee tail call due to mismatched parameter counts", &CI); + Assert1(CallerTy->isVarArg() == CalleeTy->isVarArg(), + "cannot guarantee tail call due to mismatched varargs", &CI); + Assert1(isTypeCongruent(CallerTy->getReturnType(), CalleeTy->getReturnType()), + "cannot guarantee tail call due to mismatched return types", &CI); + for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) { + Assert1( + isTypeCongruent(CallerTy->getParamType(I), CalleeTy->getParamType(I)), + "cannot guarantee tail call due to mismatched parameter types", &CI); + } + + // - The calling conventions of the caller and callee must match. + Assert1(F->getCallingConv() == CI.getCallingConv(), + "cannot guarantee tail call due to mismatched calling conv", &CI); + + // - All ABI-impacting function attributes, such as sret, byval, inreg, + // returned, and inalloca, must match. + AttributeSet CallerAttrs = F->getAttributes(); + AttributeSet CalleeAttrs = CI.getAttributes(); + for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) { + AttrBuilder CallerABIAttrs = getParameterABIAttributes(I, CallerAttrs); + AttrBuilder CalleeABIAttrs = getParameterABIAttributes(I, CalleeAttrs); + Assert2(CallerABIAttrs == CalleeABIAttrs, + "cannot guarantee tail call due to mismatched ABI impacting " + "function attributes", &CI, CI.getOperand(I)); + } + + // - The call must immediately precede a :ref:`ret ` instruction, + // or a pointer bitcast followed by a ret instruction. + // - The ret instruction must return the (possibly bitcasted) value + // produced by the call or void. + Value *RetVal = &CI; + Instruction *Next = CI.getNextNode(); + + // Handle the optional bitcast. + if (BitCastInst *BI = dyn_cast_or_null(Next)) { + Assert1(BI->getOperand(0) == RetVal, + "bitcast following musttail call must use the call", BI); + RetVal = BI; + Next = BI->getNextNode(); + } + + // Check the return. + ReturnInst *Ret = dyn_cast_or_null(Next); + Assert1(Ret, "musttail call must be precede a ret with an optional bitcast", + &CI); + Assert1(!Ret->getReturnValue() || Ret->getReturnValue() == RetVal, + "musttail call result must be returned", Ret); +} + void Verifier::visitCallInst(CallInst &CI) { VerifyCallSite(&CI); + if (CI.isMustTailCall()) + verifyMustTailCall(CI); + if (Function *F = CI.getCalledFunction()) if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) visitIntrinsicFunctionCall(ID, CI); @@ -1731,11 +1833,11 @@ void Verifier::visitLoadInst(LoadInst &LI) { "Atomic load must specify explicit alignment", &LI); if (!ElTy->isPointerTy()) { Assert2(ElTy->isIntegerTy(), - "atomic store operand must have integer type!", + "atomic load operand must have integer type!", &LI, ElTy); unsigned Size = ElTy->getPrimitiveSizeInBits(); Assert2(Size >= 8 && !(Size & (Size - 1)), - "atomic store operand must be power-of-two byte-sized integer", + "atomic load operand must be power-of-two byte-sized integer", &LI, ElTy); } } else { @@ -2020,8 +2122,8 @@ void Verifier::visitInstruction(Instruction &I) { // instruction, it is an error! for (Use &U : I.uses()) { if (Instruction *Used = dyn_cast(U.getUser())) - Assert2(Used->getParent() != 0, "Instruction referencing instruction not" - " embedded in a basic block!", &I, Used); + Assert2(Used->getParent() != nullptr, "Instruction referencing" + " instruction not embedded in a basic block!", &I, Used); else { CheckFailed("Use of instruction is not an instruction!", U); return; @@ -2029,7 +2131,7 @@ void Verifier::visitInstruction(Instruction &I) { } for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { - Assert1(I.getOperand(i) != 0, "Instruction has null operand!", &I); + Assert1(I.getOperand(i) != nullptr, "Instruction has null operand!", &I); // Check to make sure that only first-class-values are operands to // instructions. @@ -2103,11 +2205,6 @@ void Verifier::visitInstruction(Instruction &I) { MDNode *MD = I.getMetadata(LLVMContext::MD_range); Assert1(!MD || isa(I), "Ranges are only for loads!", &I); - if (!DisableDebugInfoVerifier) { - MD = I.getMetadata(LLVMContext::MD_dbg); - Finder.processLocation(*M, DILocation(MD)); - } - InstsInThisBlock.insert(&I); } @@ -2137,18 +2234,18 @@ bool Verifier::VerifyIntrinsicType(Type *Ty, case IITDescriptor::Integer: return !Ty->isIntegerTy(D.Integer_Width); case IITDescriptor::Vector: { VectorType *VT = dyn_cast(Ty); - return VT == 0 || VT->getNumElements() != D.Vector_Width || + return !VT || VT->getNumElements() != D.Vector_Width || VerifyIntrinsicType(VT->getElementType(), Infos, ArgTys); } case IITDescriptor::Pointer: { PointerType *PT = dyn_cast(Ty); - return PT == 0 || PT->getAddressSpace() != D.Pointer_AddressSpace || + return !PT || PT->getAddressSpace() != D.Pointer_AddressSpace || VerifyIntrinsicType(PT->getElementType(), Infos, ArgTys); } case IITDescriptor::Struct: { StructType *ST = dyn_cast(Ty); - if (ST == 0 || ST->getNumElements() != D.Struct_NumElements) + if (!ST || ST->getNumElements() != D.Struct_NumElements) return true; for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i) @@ -2307,17 +2404,7 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { MDNode *MD = cast(CI.getArgOperand(0)); Assert1(MD->getNumOperands() == 1, "invalid llvm.dbg.declare intrinsic call 2", &CI); - if (!DisableDebugInfoVerifier) - Finder.processDeclare(*M, cast(&CI)); } break; - case Intrinsic::dbg_value: { //llvm.dbg.value - if (!DisableDebugInfoVerifier) { - Assert1(CI.getArgOperand(0) && isa(CI.getArgOperand(0)), - "invalid llvm.dbg.value intrinsic call 1", &CI); - Finder.processValue(*M, cast(&CI)); - } - break; - } case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: @@ -2379,25 +2466,58 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { } } -void Verifier::verifyDebugInfo() { +void DebugInfoVerifier::verifyDebugInfo() { + if (!VerifyDebugInfo) + return; + + DebugInfoFinder Finder; + Finder.processModule(*M); + processInstructions(Finder); + // Verify Debug Info. - if (!DisableDebugInfoVerifier) { - for (DICompileUnit CU : Finder.compile_units()) { - Assert1(CU.Verify(), "DICompileUnit does not Verify!", CU); - } - for (DISubprogram S : Finder.subprograms()) { - Assert1(S.Verify(), "DISubprogram does not Verify!", S); - } - for (DIGlobalVariable GV : Finder.global_variables()) { - Assert1(GV.Verify(), "DIGlobalVariable does not Verify!", GV); - } - for (DIType T : Finder.types()) { - Assert1(T.Verify(), "DIType does not Verify!", T); - } - for (DIScope S : Finder.scopes()) { - Assert1(S.Verify(), "DIScope does not Verify!", S); - } + // + // NOTE: The loud braces are necessary for MSVC compatibility. + for (DICompileUnit CU : Finder.compile_units()) { + Assert1(CU.Verify(), "DICompileUnit does not Verify!", CU); } + for (DISubprogram S : Finder.subprograms()) { + Assert1(S.Verify(), "DISubprogram does not Verify!", S); + } + for (DIGlobalVariable GV : Finder.global_variables()) { + Assert1(GV.Verify(), "DIGlobalVariable does not Verify!", GV); + } + for (DIType T : Finder.types()) { + Assert1(T.Verify(), "DIType does not Verify!", T); + } + for (DIScope S : Finder.scopes()) { + Assert1(S.Verify(), "DIScope does not Verify!", S); + } +} + +void DebugInfoVerifier::processInstructions(DebugInfoFinder &Finder) { + for (const Function &F : *M) + for (auto I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { + if (MDNode *MD = I->getMetadata(LLVMContext::MD_dbg)) + Finder.processLocation(*M, DILocation(MD)); + if (const CallInst *CI = dyn_cast(&*I)) + processCallInst(Finder, *CI); + } +} + +void DebugInfoVerifier::processCallInst(DebugInfoFinder &Finder, + const CallInst &CI) { + if (Function *F = CI.getCalledFunction()) + if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID()) + switch (ID) { + case Intrinsic::dbg_declare: + Finder.processDeclare(*M, cast(&CI)); + break; + case Intrinsic::dbg_value: + Finder.processValue(*M, cast(&CI)); + break; + default: + break; + } } //===----------------------------------------------------------------------===// @@ -2427,7 +2547,8 @@ bool llvm::verifyModule(const Module &M, raw_ostream *OS) { // Note that this function's return value is inverted from what you would // expect of a function called "verify". - return !V.verify(M) || Broken; + DebugInfoVerifier DIV(OS ? *OS : NullStr); + return !V.verify(M) || !DIV.verify(M) || Broken; } namespace { @@ -2463,15 +2584,48 @@ struct VerifierLegacyPass : public FunctionPass { AU.setPreservesAll(); } }; +struct DebugInfoVerifierLegacyPass : public ModulePass { + static char ID; + + DebugInfoVerifier V; + bool FatalErrors; + + DebugInfoVerifierLegacyPass() : ModulePass(ID), FatalErrors(true) { + initializeDebugInfoVerifierLegacyPassPass(*PassRegistry::getPassRegistry()); + } + explicit DebugInfoVerifierLegacyPass(bool FatalErrors) + : ModulePass(ID), V(dbgs()), FatalErrors(FatalErrors) { + initializeDebugInfoVerifierLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override { + if (!V.verify(M) && FatalErrors) + report_fatal_error("Broken debug info found, compilation aborted!"); + + return false; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } +}; } char VerifierLegacyPass::ID = 0; INITIALIZE_PASS(VerifierLegacyPass, "verify", "Module Verifier", false, false) +char DebugInfoVerifierLegacyPass::ID = 0; +INITIALIZE_PASS(DebugInfoVerifierLegacyPass, "verify-di", "Debug Info Verifier", + false, false) + FunctionPass *llvm::createVerifierPass(bool FatalErrors) { return new VerifierLegacyPass(FatalErrors); } +ModulePass *llvm::createDebugInfoVerifierPass(bool FatalErrors) { + return new DebugInfoVerifierLegacyPass(FatalErrors); +} + PreservedAnalyses VerifierPass::run(Module *M) { if (verifyModule(*M, &dbgs()) && FatalErrors) report_fatal_error("Broken module found, compilation aborted!"); diff --git a/lib/IR/module.modulemap b/lib/IR/module.modulemap new file mode 100644 index 0000000..9698e91 --- /dev/null +++ b/lib/IR/module.modulemap @@ -0,0 +1 @@ +module IR { requires cplusplus umbrella "." module * { export * } } diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp index 8be8ab8..f4ed437 100644 --- a/lib/IRReader/IRReader.cpp +++ b/lib/IRReader/IRReader.cpp @@ -42,12 +42,12 @@ Module *llvm::getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err, // ParseBitcodeFile does not take ownership of the Buffer in the // case of an error. delete Buffer; - return NULL; + return nullptr; } return ModuleOrErr.get(); } - return ParseAssembly(Buffer, 0, Err, Context); + return ParseAssembly(Buffer, nullptr, Err, Context); } Module *llvm::getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err, @@ -56,7 +56,7 @@ Module *llvm::getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, File)) { Err = SMDiagnostic(Filename, SourceMgr::DK_Error, "Could not open input file: " + ec.message()); - return 0; + return nullptr; } return getLazyIRModule(File.release(), Err, Context); @@ -69,7 +69,7 @@ Module *llvm::ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err, if (isBitcode((const unsigned char *)Buffer->getBufferStart(), (const unsigned char *)Buffer->getBufferEnd())) { ErrorOr ModuleOrErr = parseBitcodeFile(Buffer, Context); - Module *M = 0; + Module *M = nullptr; if (error_code EC = ModuleOrErr.getError()) Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error, EC.message()); @@ -80,7 +80,7 @@ Module *llvm::ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err, return M; } - return ParseAssembly(Buffer, 0, Err, Context); + return ParseAssembly(Buffer, nullptr, Err, Context); } Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err, @@ -89,7 +89,7 @@ Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err, if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, File)) { Err = SMDiagnostic(Filename, SourceMgr::DK_Error, "Could not open input file: " + ec.message()); - return 0; + return nullptr; } return ParseIR(File.release(), Err, Context); @@ -111,7 +111,7 @@ LLVMBool LLVMParseIRInContext(LLVMContextRef ContextRef, std::string buf; raw_string_ostream os(buf); - Diag.print(NULL, os, false); + Diag.print(nullptr, os, false); os.flush(); *OutMessage = strdup(buf.c_str()); diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 51d0899..99236bd 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -63,20 +63,20 @@ const char* LTOCodeGenerator::getVersionString() { } LTOCodeGenerator::LTOCodeGenerator() - : Context(getGlobalContext()), Linker(new Module("ld-temp.o", Context)), - TargetMach(NULL), EmitDwarfDebugInfo(false), ScopeRestrictionsDone(false), - CodeModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC), NativeObjectFile(NULL), - DiagHandler(NULL), DiagContext(NULL) { + : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context)), + TargetMach(nullptr), EmitDwarfDebugInfo(false), + ScopeRestrictionsDone(false), CodeModel(LTO_CODEGEN_PIC_MODEL_DEFAULT), + NativeObjectFile(nullptr), DiagHandler(nullptr), DiagContext(nullptr) { initializeLTOPasses(); } LTOCodeGenerator::~LTOCodeGenerator() { delete TargetMach; delete NativeObjectFile; - TargetMach = NULL; - NativeObjectFile = NULL; + TargetMach = nullptr; + NativeObjectFile = nullptr; - Linker.deleteModule(); + IRLinker.deleteModule(); for (std::vector::iterator I = CodegenOptions.begin(), E = CodegenOptions.end(); @@ -114,7 +114,7 @@ void LTOCodeGenerator::initializeLTOPasses() { } bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) { - bool ret = Linker.linkInModule(mod->getLLVVMModule(), &errMsg); + bool ret = IRLinker.linkInModule(mod->getLLVVMModule(), &errMsg); const std::vector &undefs = mod->getAsmUndefinedRefs(); for (int i = 0, e = undefs.size(); i != e; ++i) @@ -140,7 +140,6 @@ void LTOCodeGenerator::setTargetOptions(TargetOptions options) { Options.StackAlignmentOverride = options.StackAlignmentOverride; Options.TrapFuncName = options.TrapFuncName; Options.PositionIndependentExecutable = options.PositionIndependentExecutable; - Options.EnableSegmentedStacks = options.EnableSegmentedStacks; Options.UseInitArray = options.UseInitArray; } @@ -162,6 +161,7 @@ void LTOCodeGenerator::setCodePICModel(lto_codegen_model model) { case LTO_CODEGEN_PIC_MODEL_STATIC: case LTO_CODEGEN_PIC_MODEL_DYNAMIC: case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC: + case LTO_CODEGEN_PIC_MODEL_DEFAULT: CodeModel = model; return; } @@ -186,7 +186,7 @@ bool LTOCodeGenerator::writeMergedModules(const char *path, } // write bitcode to it - WriteBitcodeToFile(Linker.getModule(), Out.os()); + WriteBitcodeToFile(IRLinker.getModule(), Out.os()); Out.os().close(); if (Out.os().has_error()) { @@ -245,7 +245,7 @@ const void* LTOCodeGenerator::compile(size_t* length, const char *name; if (!compile_to_file(&name, disableOpt, disableInline, disableGVNLoadPRE, errMsg)) - return NULL; + return nullptr; // remove old buffer if compile() called twice delete NativeObjectFile; @@ -255,7 +255,7 @@ const void* LTOCodeGenerator::compile(size_t* length, if (error_code ec = MemoryBuffer::getFile(name, BuffPtr, -1, false)) { errMsg = ec.message(); sys::fs::remove(NativeObjectPath); - return NULL; + return nullptr; } NativeObjectFile = BuffPtr.release(); @@ -263,24 +263,24 @@ const void* LTOCodeGenerator::compile(size_t* length, sys::fs::remove(NativeObjectPath); // return buffer, unless error - if (NativeObjectFile == NULL) - return NULL; + if (!NativeObjectFile) + return nullptr; *length = NativeObjectFile->getBufferSize(); return NativeObjectFile->getBufferStart(); } bool LTOCodeGenerator::determineTarget(std::string &errMsg) { - if (TargetMach != NULL) + if (TargetMach) return true; - std::string TripleStr = Linker.getModule()->getTargetTriple(); + std::string TripleStr = IRLinker.getModule()->getTargetTriple(); if (TripleStr.empty()) TripleStr = sys::getDefaultTargetTriple(); llvm::Triple Triple(TripleStr); // create target machine from info for merged modules const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg); - if (march == NULL) + if (!march) return false; // The relocation model is actually a static member of TargetMachine and @@ -296,10 +296,14 @@ bool LTOCodeGenerator::determineTarget(std::string &errMsg) { case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC: RelocModel = Reloc::DynamicNoPIC; break; + case LTO_CODEGEN_PIC_MODEL_DEFAULT: + // RelocModel is already the default, so leave it that way. + break; } - // construct LTOModule, hand over ownership of module and target - SubtargetFeatures Features; + // Construct LTOModule, hand over ownership of module and target. Use MAttr as + // the default set of features. + SubtargetFeatures Features(MAttr); Features.getDefaultSubtargetFeatures(Triple); std::string FeatureStr = Features.getString(); // Set a default CPU for Darwin triples. @@ -308,7 +312,8 @@ bool LTOCodeGenerator::determineTarget(std::string &errMsg) { MCpu = "core2"; else if (Triple.getArch() == llvm::Triple::x86) MCpu = "yonah"; - else if (Triple.getArch() == llvm::Triple::arm64) + else if (Triple.getArch() == llvm::Triple::arm64 || + Triple.getArch() == llvm::Triple::aarch64) MCpu = "cyclone"; } @@ -352,7 +357,7 @@ applyRestriction(GlobalValue &GV, static void findUsedValues(GlobalVariable *LLVMUsed, SmallPtrSet &UsedValues) { - if (LLVMUsed == 0) return; + if (!LLVMUsed) return; ConstantArray *Inits = cast(LLVMUsed->getInitializer()); for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) @@ -391,11 +396,12 @@ static void accumulateAndSortLibcalls(std::vector &Libcalls, void LTOCodeGenerator::applyScopeRestrictions() { if (ScopeRestrictionsDone) return; - Module *mergedModule = Linker.getModule(); + Module *mergedModule = IRLinker.getModule(); // Start off with a verification pass. PassManager passes; passes.add(createVerifierPass()); + passes.add(createDebugInfoVerifierPass()); // mark which symbols can not be internalized Mangler Mangler(TargetMach->getDataLayout()); @@ -424,9 +430,7 @@ void LTOCodeGenerator::applyScopeRestrictions() { if (!AsmUsed.empty()) { llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(Context); std::vector asmUsed2; - for (SmallPtrSet::const_iterator i = AsmUsed.begin(), - e = AsmUsed.end(); i !=e; ++i) { - GlobalValue *GV = *i; + for (auto *GV : AsmUsed) { Constant *c = ConstantExpr::getBitCast(GV, i8PTy); asmUsed2.push_back(c); } @@ -458,7 +462,7 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out, if (!this->determineTarget(errMsg)) return false; - Module *mergedModule = Linker.getModule(); + Module *mergedModule = IRLinker.getModule(); // Mark which symbols can not be internalized this->applyScopeRestrictions(); @@ -468,6 +472,7 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out, // Start off with a verification pass. passes.add(createVerifierPass()); + passes.add(createDebugInfoVerifierPass()); // Add an appropriate DataLayout instance for this module... mergedModule->setDataLayout(TargetMach->getDataLayout()); @@ -489,6 +494,7 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out, // Make sure everything is still good. passes.add(createVerifierPass()); + passes.add(createDebugInfoVerifierPass()); PassManager codeGenPasses; @@ -576,7 +582,7 @@ LTOCodeGenerator::setDiagnosticHandler(lto_diagnostic_handler_t DiagHandler, this->DiagHandler = DiagHandler; this->DiagContext = Ctxt; if (!DiagHandler) - return Context.setDiagnosticHandler(NULL, NULL); + return Context.setDiagnosticHandler(nullptr, nullptr); // Register the LTOCodeGenerator stub in the LLVMContext to forward the // diagnostic to the external DiagHandler. Context.setDiagnosticHandler(LTOCodeGenerator::DiagnosticHandler, this); diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index 567da04..d117514 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -100,7 +100,7 @@ LTOModule *LTOModule::makeLTOModule(const char *path, TargetOptions options, std::unique_ptr buffer; if (error_code ec = MemoryBuffer::getFile(path, buffer)) { errMsg = ec.message(); - return NULL; + return nullptr; } return makeLTOModule(buffer.release(), options, errMsg); } @@ -120,7 +120,7 @@ LTOModule *LTOModule::makeLTOModule(int fd, const char *path, if (error_code ec = MemoryBuffer::getOpenFileSlice(fd, path, buffer, map_size, offset)) { errMsg = ec.message(); - return NULL; + return nullptr; } return makeLTOModule(buffer.release(), options, errMsg); } @@ -130,7 +130,7 @@ LTOModule *LTOModule::makeLTOModule(const void *mem, size_t length, std::string &errMsg, StringRef path) { std::unique_ptr buffer(makeBuffer(mem, length, path)); if (!buffer) - return NULL; + return nullptr; return makeLTOModule(buffer.release(), options, errMsg); } @@ -143,7 +143,7 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer, if (error_code EC = ModuleOrErr.getError()) { errMsg = EC.message(); delete buffer; - return NULL; + return nullptr; } std::unique_ptr m(ModuleOrErr.get()); @@ -155,7 +155,7 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer, // find machine architecture for this module const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg); if (!march) - return NULL; + return nullptr; // construct LTOModule, hand over ownership of module and target SubtargetFeatures Features; @@ -168,7 +168,8 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer, CPU = "core2"; else if (Triple.getArch() == llvm::Triple::x86) CPU = "yonah"; - else if (Triple.getArch() == llvm::Triple::arm64) + else if (Triple.getArch() == llvm::Triple::arm64 || + Triple.getArch() == llvm::Triple::aarch64) CPU = "cyclone"; } @@ -189,7 +190,7 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer, if (Ret->parseSymbols(errMsg)) { delete Ret; - return NULL; + return nullptr; } Ret->parseMetadata(); @@ -396,7 +397,7 @@ void LTOModule::addDefinedSymbol(const GlobalValue *def, bool isFunction) { // set alignment part log2() can have rounding errors uint32_t align = def->getAlignment(); - uint32_t attr = align ? countTrailingZeros(def->getAlignment()) : 0; + uint32_t attr = align ? countTrailingZeros(align) : 0; // set permissions part if (isFunction) { @@ -418,17 +419,17 @@ void LTOModule::addDefinedSymbol(const GlobalValue *def, bool isFunction) { attr |= LTO_SYMBOL_DEFINITION_REGULAR; // set scope part - if (def->hasHiddenVisibility()) + if (def->hasLocalLinkage()) + // Ignore visibility if linkage is local. + attr |= LTO_SYMBOL_SCOPE_INTERNAL; + else if (def->hasHiddenVisibility()) attr |= LTO_SYMBOL_SCOPE_HIDDEN; else if (def->hasProtectedVisibility()) attr |= LTO_SYMBOL_SCOPE_PROTECTED; else if (canBeHidden(def)) attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN; - else if (def->hasExternalLinkage() || def->hasWeakLinkage() || - def->hasLinkOnceLinkage() || def->hasCommonLinkage()) - attr |= LTO_SYMBOL_SCOPE_DEFAULT; else - attr |= LTO_SYMBOL_SCOPE_INTERNAL; + attr |= LTO_SYMBOL_SCOPE_DEFAULT; StringSet::value_type &entry = _defines.GetOrCreateValue(Buffer); entry.setValue(1); @@ -460,7 +461,7 @@ void LTOModule::addAsmGlobalSymbol(const char *name, NameAndAttributes &info = _undefines[entry.getKey().data()]; - if (info.symbol == 0) { + if (info.symbol == nullptr) { // FIXME: This is trying to take care of module ASM like this: // // module asm ".zerofill __FOO, __foo, _bar_baz_qux, 0" @@ -474,7 +475,7 @@ void LTOModule::addAsmGlobalSymbol(const char *name, info.attributes = LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope; info.isFunction = false; - info.symbol = 0; + info.symbol = nullptr; // add to table of symbols _symbols.push_back(info); @@ -502,13 +503,13 @@ void LTOModule::addAsmGlobalSymbolUndef(const char *name) { if (entry.getValue().name) return; - uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;; + uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED; attr |= LTO_SYMBOL_SCOPE_DEFAULT; NameAndAttributes info; info.name = entry.getKey().data(); info.attributes = attr; info.isFunction = false; - info.symbol = 0; + info.symbol = nullptr; entry.setValue(info); } @@ -698,7 +699,8 @@ namespace { void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override {} void EmitBytes(StringRef Data) override {} - void EmitValueImpl(const MCExpr *Value, unsigned Size) override {} + void EmitValueImpl(const MCExpr *Value, unsigned Size, + const SMLoc &Loc) override {} void EmitULEB128Value(const MCExpr *Value) override {} void EmitSLEB128Value(const MCExpr *Value) override {} void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, @@ -709,9 +711,6 @@ namespace { bool EmitValueToOffset(const MCExpr *Offset, unsigned char Value) override { return false; } void EmitFileDirective(StringRef Filename) override {} - void EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel, - const MCSymbol *Label, - unsigned PointerSize) override {} void FinishImpl() override {} void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override { RecordProcEnd(Frame); @@ -738,7 +737,8 @@ bool LTOModule::addAsmGlobalSymbols(std::string &errMsg) { _target->getTargetTriple(), _target->getTargetCPU(), _target->getTargetFeatureString())); std::unique_ptr TAP( - T.createMCAsmParser(*STI, *Parser.get(), *MCII)); + T.createMCAsmParser(*STI, *Parser.get(), *MCII, + _target->Options.MCOptions)); if (!TAP) { errMsg = "target " + std::string(T.getName()) + " does not define AsmParser."; @@ -801,14 +801,8 @@ bool LTOModule::parseSymbols(std::string &errMsg) { return true; // add aliases - for (Module::alias_iterator a = _module->alias_begin(), - e = _module->alias_end(); a != e; ++a) { - if (isDeclaration(*a->getAliasedGlobal())) - // Is an alias to a declaration. - addPotentialUndefinedSymbol(a, false); - else - addDefinedDataSymbol(a); - } + for (const auto &Alias : _module->aliases()) + addDefinedDataSymbol(&Alias); // make symbols for all undefines for (StringMap::iterator u =_undefines.begin(), diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index c6476ce..45f2d4e 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -43,12 +43,12 @@ class TypeMapTy : public ValueMapTypeRemapper { /// we speculatively add types to MappedTypes, but keep track of them here in /// case we need to roll back. SmallVector SpeculativeTypes; - + /// SrcDefinitionsToResolve - This is a list of non-opaque structs in the /// source module that are mapped to an opaque struct in the destination /// module. SmallVector SrcDefinitionsToResolve; - + /// DstResolvedOpaqueTypes - This is the set of opaque types in the /// destination modules who are getting a body from the source module. SmallPtrSet DstResolvedOpaqueTypes; @@ -65,7 +65,7 @@ public: /// linkDefinedTypeBodies - Produce a body for an opaque type in the dest /// module from a type definition in the source module. void linkDefinedTypeBodies(); - + /// get - Return the mapped type to use for the specified input type from the /// source module. Type *get(Type *SrcTy); @@ -90,7 +90,7 @@ private: Type *remapType(Type *SrcTy) override { return get(SrcTy); } - + bool areTypesIsomorphic(Type *DstTy, Type *SrcTy); }; } @@ -98,12 +98,12 @@ private: void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) { Type *&Entry = MappedTypes[SrcTy]; if (Entry) return; - + if (DstTy == SrcTy) { Entry = DstTy; return; } - + // Check to see if these types are recursively isomorphic and establish a // mapping between them if so. if (!areTypesIsomorphic(DstTy, SrcTy)) { @@ -132,7 +132,7 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) { Entry = DstTy; return true; } - + // Okay, we have two types with identical kinds that we haven't seen before. // If this is an opaque struct type, special case it. @@ -158,18 +158,18 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) { return true; } } - + // If the number of subtypes disagree between the two types, then we fail. if (SrcTy->getNumContainedTypes() != DstTy->getNumContainedTypes()) return false; - + // Fail if any of the extra properties (e.g. array size) of the type disagree. if (isa(DstTy)) return false; // bitwidth disagrees. if (PointerType *PT = dyn_cast(DstTy)) { if (PT->getAddressSpace() != cast(SrcTy)->getAddressSpace()) return false; - + } else if (FunctionType *FT = dyn_cast(DstTy)) { if (FT->isVarArg() != cast(SrcTy)->isVarArg()) return false; @@ -195,7 +195,7 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) { if (!areTypesIsomorphic(DstTy->getContainedType(i), SrcTy->getContainedType(i))) return false; - + // If everything seems to have lined up, then everything is great. return true; } @@ -205,31 +205,31 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) { void TypeMapTy::linkDefinedTypeBodies() { SmallVector Elements; SmallString<16> TmpName; - + // Note that processing entries in this loop (calling 'get') can add new // entries to the SrcDefinitionsToResolve vector. while (!SrcDefinitionsToResolve.empty()) { StructType *SrcSTy = SrcDefinitionsToResolve.pop_back_val(); StructType *DstSTy = cast(MappedTypes[SrcSTy]); - + // TypeMap is a many-to-one mapping, if there were multiple types that // provide a body for DstSTy then previous iterations of this loop may have // already handled it. Just ignore this case. if (!DstSTy->isOpaque()) continue; assert(!SrcSTy->isOpaque() && "Not resolving a definition?"); - + // Map the body of the source type over to a new body for the dest type. Elements.resize(SrcSTy->getNumElements()); for (unsigned i = 0, e = Elements.size(); i != e; ++i) Elements[i] = getImpl(SrcSTy->getElementType(i)); - + DstSTy->setBody(Elements, SrcSTy->isPacked()); - + // If DstSTy has no name or has a longer name than STy, then viciously steal // STy's name. if (!SrcSTy->hasName()) continue; StringRef SrcName = SrcSTy->getName(); - + if (!DstSTy->hasName() || DstSTy->getName().size() > SrcName.size()) { TmpName.insert(TmpName.end(), SrcName.begin(), SrcName.end()); SrcSTy->setName(""); @@ -237,7 +237,7 @@ void TypeMapTy::linkDefinedTypeBodies() { TmpName.clear(); } } - + DstResolvedOpaqueTypes.clear(); } @@ -245,7 +245,7 @@ void TypeMapTy::linkDefinedTypeBodies() { /// source module. Type *TypeMapTy::get(Type *Ty) { Type *Result = getImpl(Ty); - + // If this caused a reference to any struct type, resolve it before returning. if (!SrcDefinitionsToResolve.empty()) linkDefinedTypeBodies(); @@ -257,7 +257,7 @@ Type *TypeMapTy::getImpl(Type *Ty) { // If we already have an entry for this type, return it. Type **Entry = &MappedTypes[Ty]; if (*Entry) return *Entry; - + // If this is not a named struct type, then just map all of the elements and // then rebuild the type from inside out. if (!isa(Ty) || cast(Ty)->isLiteral()) { @@ -265,7 +265,7 @@ Type *TypeMapTy::getImpl(Type *Ty) { // true for the anonymous {} struct, things like 'float', integers, etc. if (Ty->getNumContainedTypes() == 0) return *Entry = Ty; - + // Remap all of the elements, keeping track of whether any of them change. bool AnyChange = false; SmallVector ElementTypes; @@ -274,23 +274,23 @@ Type *TypeMapTy::getImpl(Type *Ty) { ElementTypes[i] = getImpl(Ty->getContainedType(i)); AnyChange |= ElementTypes[i] != Ty->getContainedType(i); } - + // If we found our type while recursively processing stuff, just use it. Entry = &MappedTypes[Ty]; if (*Entry) return *Entry; - + // If all of the element types mapped directly over, then the type is usable // as-is. if (!AnyChange) return *Entry = Ty; - + // Otherwise, rebuild a modified type. switch (Ty->getTypeID()) { default: llvm_unreachable("unknown derived type to remap"); case Type::ArrayTyID: return *Entry = ArrayType::get(ElementTypes[0], cast(Ty)->getNumElements()); - case Type::VectorTyID: + case Type::VectorTyID: return *Entry = VectorType::get(ElementTypes[0], cast(Ty)->getNumElements()); case Type::PointerTyID: @@ -331,7 +331,7 @@ Type *TypeMapTy::getImpl(Type *Ty) { // and is not required for the prettiness of the linked module, we just skip // it and always rebuild a type here. StructType *STy = cast(Ty); - + // If the type is opaque, we can just use it directly. if (STy->isOpaque()) { // A named structure type from src module is used. Add it to the Set of @@ -339,7 +339,7 @@ Type *TypeMapTy::getImpl(Type *Ty) { DstStructTypesSet.insert(STy); return *Entry = STy; } - + // Otherwise we create a new type and resolve its body later. This will be // resolved by the top level of get(). SrcDefinitionsToResolve.push_back(STy); @@ -379,8 +379,8 @@ namespace { /// function, which is the entrypoint for this file. class ModuleLinker { Module *DstM, *SrcM; - - TypeMapTy TypeMap; + + TypeMapTy TypeMap; ValueMaterializerTy ValMaterializer; /// ValueMap - Mapping of values from what they used to be in Src, to what @@ -388,25 +388,27 @@ namespace { /// some overhead due to the use of Value handles which the Linker doesn't /// actually need, but this allows us to reuse the ValueMapper code. ValueToValueMapTy ValueMap; - + + std::vector> ReplaceWithAlias; + struct AppendingVarInfo { GlobalVariable *NewGV; // New aggregate global in dest module. Constant *DstInit; // Old initializer from dest module. Constant *SrcInit; // Old initializer from src module. }; - + std::vector AppendingVars; - + unsigned Mode; // Mode to treat source module. - + // Set of items not to link in from source. SmallPtrSet DoNotLinkFromSource; - + // Vector of functions to lazily link in. std::vector LazilyLinkFunctions; bool SuppressWarnings; - + public: std::string ErrorMsg; @@ -417,7 +419,7 @@ namespace { SuppressWarnings(SuppressWarnings) {} bool run(); - + private: /// emitError - Helper method for setting a message and returning an error /// code. @@ -425,7 +427,7 @@ namespace { ErrorMsg = Message.str(); return true; } - + /// getLinkageResult - This analyzes the two global values and determines /// what the result will look like in the destination module. bool getLinkageResult(GlobalValue *Dest, const GlobalValue *Src, @@ -439,29 +441,29 @@ namespace { // If the source has no name it can't link. If it has local linkage, // there is no name match-up going on. if (!SrcGV->hasName() || SrcGV->hasLocalLinkage()) - return 0; - + return nullptr; + // Otherwise see if we have a match in the destination module's symtab. GlobalValue *DGV = DstM->getNamedValue(SrcGV->getName()); - if (DGV == 0) return 0; - + if (!DGV) return nullptr; + // If we found a global with the same name in the dest module, but it has // internal linkage, we are really not doing any linkage here. if (DGV->hasLocalLinkage()) - return 0; + return nullptr; // Otherwise, we do in fact link to the destination global. return DGV; } - + void computeTypeMapping(); - + bool linkAppendingVarProto(GlobalVariable *DstGV, GlobalVariable *SrcGV); bool linkGlobalProto(GlobalVariable *SrcGV); bool linkFunctionProto(Function *SrcF); bool linkAliasProto(GlobalAlias *SrcA); bool linkModuleFlagsMetadata(); - + void linkAppendingVarInit(const AppendingVarInfo &AVI); void linkGlobalInits(); void linkFunctionBody(Function *Dst, Function *Src); @@ -495,10 +497,16 @@ static void forceRenaming(GlobalValue *GV, StringRef Name) { /// a GlobalValue) from the SrcGV to the DestGV. static void copyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) { // Use the maximum alignment, rather than just copying the alignment of SrcGV. - unsigned Alignment = std::max(DestGV->getAlignment(), SrcGV->getAlignment()); + auto *DestGO = dyn_cast(DestGV); + unsigned Alignment; + if (DestGO) + Alignment = std::max(DestGO->getAlignment(), SrcGV->getAlignment()); + DestGV->copyAttributesFrom(SrcGV); - DestGV->setAlignment(Alignment); - + + if (DestGO) + DestGO->setAlignment(Alignment); + forceRenaming(DestGV, SrcGV->getName()); } @@ -518,7 +526,7 @@ static bool isLessConstraining(GlobalValue::VisibilityTypes a, Value *ValueMaterializerTy::materializeValueFor(Value *V) { Function *SF = dyn_cast(V); if (!SF) - return NULL; + return nullptr; Function *DF = Function::Create(TypeMap.get(SF->getFunctionType()), SF->getLinkage(), SF->getName(), DstM); @@ -541,10 +549,10 @@ bool ModuleLinker::getLinkageResult(GlobalValue *Dest, const GlobalValue *Src, assert(Dest && "Must have two globals being queried"); assert(!Src->hasLocalLinkage() && "If Src has internal linkage, Dest shouldn't be set!"); - + bool SrcIsDeclaration = Src->isDeclaration() && !Src->isMaterializable(); bool DestIsDeclaration = Dest->isDeclaration(); - + if (SrcIsDeclaration) { // If Src is external or if both Src & Dest are external.. Just link the // external globals, we aren't adding anything. @@ -598,6 +606,8 @@ bool ModuleLinker::getLinkageResult(GlobalValue *Dest, const GlobalValue *Src, // Compute the visibility. We follow the rules in the System V Application // Binary Interface. + assert(!GlobalValue::isLocalLinkage(LT) && + "Symbols with local linkage should not be merged"); Vis = isLessConstraining(Src->getVisibility(), Dest->getVisibility()) ? Dest->getVisibility() : Src->getVisibility(); return false; @@ -612,19 +622,19 @@ void ModuleLinker::computeTypeMapping() { for (Module::global_iterator I = SrcM->global_begin(), E = SrcM->global_end(); I != E; ++I) { GlobalValue *DGV = getLinkedToGlobal(I); - if (DGV == 0) continue; - + if (!DGV) continue; + if (!DGV->hasAppendingLinkage() || !I->hasAppendingLinkage()) { TypeMap.addTypeMapping(DGV->getType(), I->getType()); - continue; + continue; } - + // Unify the element type of appending arrays. ArrayType *DAT = cast(DGV->getType()->getElementType()); ArrayType *SAT = cast(I->getType()->getElementType()); TypeMap.addTypeMapping(DAT->getElementType(), SAT->getElementType()); } - + // Incorporate functions. for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I) { if (GlobalValue *DGV = getLinkedToGlobal(I)) @@ -643,14 +653,14 @@ void ModuleLinker::computeTypeMapping() { for (unsigned i = 0, e = SrcStructTypes.size(); i != e; ++i) { StructType *ST = SrcStructTypes[i]; if (!ST->hasName()) continue; - + // Check to see if there is a dot in the name followed by a digit. size_t DotPos = ST->getName().rfind('.'); if (DotPos == 0 || DotPos == StringRef::npos || ST->getName().back() == '.' || !isdigit(static_cast(ST->getName()[DotPos+1]))) continue; - + // Check to see if the destination module has a struct with the prefix name. if (StructType *DST = DstM->getTypeByName(ST->getName().substr(0, DotPos))) // Don't use it if this actually came from the source module. They're in @@ -675,9 +685,9 @@ void ModuleLinker::computeTypeMapping() { } // Don't bother incorporating aliases, they aren't generally typed well. - + // Now that we have discovered all of the type equivalences, get a body for - // any 'opaque' types in the dest module that are now resolved. + // any 'opaque' types in the dest module that are now resolved. TypeMap.linkDefinedTypeBodies(); } @@ -685,26 +695,26 @@ void ModuleLinker::computeTypeMapping() { /// them together now. Return true on error. bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, GlobalVariable *SrcGV) { - + if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage()) return emitError("Linking globals named '" + SrcGV->getName() + "': can only link appending global with another appending global!"); - + ArrayType *DstTy = cast(DstGV->getType()->getElementType()); ArrayType *SrcTy = cast(TypeMap.get(SrcGV->getType()->getElementType())); Type *EltTy = DstTy->getElementType(); - + // Check to see that they two arrays agree on type. if (EltTy != SrcTy->getElementType()) return emitError("Appending variables with different element types!"); if (DstGV->isConstant() != SrcGV->isConstant()) return emitError("Appending variables linked with different const'ness!"); - + if (DstGV->getAlignment() != SrcGV->getAlignment()) return emitError( "Appending variables with different alignment need to be linked!"); - + if (DstGV->getVisibility() != SrcGV->getVisibility()) return emitError( "Appending variables with different visibility need to be linked!"); @@ -716,20 +726,20 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, if (DstGV->getSection() != SrcGV->getSection()) return emitError( "Appending variables with different section name need to be linked!"); - + uint64_t NewSize = DstTy->getNumElements() + SrcTy->getNumElements(); ArrayType *NewType = ArrayType::get(EltTy, NewSize); - + // Create the new global variable. GlobalVariable *NG = new GlobalVariable(*DstGV->getParent(), NewType, SrcGV->isConstant(), - DstGV->getLinkage(), /*init*/0, /*name*/"", DstGV, + DstGV->getLinkage(), /*init*/nullptr, /*name*/"", DstGV, DstGV->getThreadLocalMode(), DstGV->getType()->getAddressSpace()); - + // Propagate alignment, visibility and section info. copyGVAttributes(NG, DstGV); - + AppendingVarInfo AVI; AVI.NewGV = NG; AVI.DstInit = DstGV->getInitializer(); @@ -742,10 +752,10 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType())); DstGV->eraseFromParent(); - + // Track the source variable so we don't try to link it. DoNotLinkFromSource.insert(SrcGV); - + return false; } @@ -760,7 +770,7 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) { // Concatenation of appending linkage variables is magic and handled later. if (DGV->hasAppendingLinkage() || SGV->hasAppendingLinkage()) return linkAppendingVarProto(cast(DGV), SGV); - + // Determine whether linkage of these two globals follows the source // module's definition or the destination module's definition. GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage; @@ -786,22 +796,22 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) { // Make sure to remember this mapping. ValueMap[SGV] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGV->getType())); - - // Track the source global so that we don't attempt to copy it over when + + // Track the source global so that we don't attempt to copy it over when // processing global initializers. DoNotLinkFromSource.insert(SGV); - + return false; } } - + // No linking to be performed or linking from the source: simply create an // identical version of the symbol over in the dest module... the // initializer will be filled in later by LinkGlobalInits. GlobalVariable *NewDGV = new GlobalVariable(*DstM, TypeMap.get(SGV->getType()->getElementType()), - SGV->isConstant(), SGV->getLinkage(), /*init*/0, - SGV->getName(), /*insertbefore*/0, + SGV->isConstant(), SGV->getLinkage(), /*init*/nullptr, + SGV->getName(), /*insertbefore*/nullptr, SGV->getThreadLocalMode(), SGV->getType()->getAddressSpace()); // Propagate alignment, visibility and section info. @@ -814,7 +824,7 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) { DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType())); DGV->eraseFromParent(); } - + // Make sure to remember this mapping. ValueMap[SGV] = NewDGV; return false; @@ -844,15 +854,15 @@ bool ModuleLinker::linkFunctionProto(Function *SF) { // Make sure to remember this mapping. ValueMap[SF] = ConstantExpr::getBitCast(DGV, TypeMap.get(SF->getType())); - - // Track the function from the source module so we don't attempt to remap + + // Track the function from the source module so we don't attempt to remap // it. DoNotLinkFromSource.insert(SF); - + return false; } } - + // If the function is to be lazily linked, don't create it just yet. // The ValueMaterializerTy will deal with creating it if it's used. if (!DGV && (SF->hasLocalLinkage() || SF->hasLinkOnceLinkage() || @@ -875,7 +885,7 @@ bool ModuleLinker::linkFunctionProto(Function *SF) { DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType())); DGV->eraseFromParent(); } - + ValueMap[SF] = NewDF; return false; } @@ -901,29 +911,27 @@ bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) { // Make sure to remember this mapping. ValueMap[SGA] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGA->getType())); - + // Track the alias from the source module so we don't attempt to remap it. DoNotLinkFromSource.insert(SGA); - + return false; } } - + // If there is no linkage to be performed or we're linking from the source, // bring over SGA. - GlobalAlias *NewDA = new GlobalAlias(TypeMap.get(SGA->getType()), - SGA->getLinkage(), SGA->getName(), - /*aliasee*/0, DstM); + auto *PTy = cast(TypeMap.get(SGA->getType())); + auto *NewDA = + GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), + SGA->getLinkage(), SGA->getName(), DstM); copyGVAttributes(NewDA, SGA); if (NewVisibility) NewDA->setVisibility(*NewVisibility); - if (DGV) { - // Any uses of DGV need to change to NewDA, with cast. - DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDA, DGV->getType())); - DGV->eraseFromParent(); - } - + if (DGV) + ReplaceWithAlias.push_back(std::make_pair(DGV, NewDA)); + ValueMap[SGA] = NewDA; return false; } @@ -934,15 +942,15 @@ static void getArrayElements(Constant *C, SmallVectorImpl &Dest) { for (unsigned i = 0; i != NumElements; ++i) Dest.push_back(C->getAggregateElement(i)); } - + void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) { // Merge the initializer. SmallVector Elements; getArrayElements(AVI.DstInit, Elements); - + Constant *SrcInit = MapValue(AVI.SrcInit, ValueMap, RF_None, &TypeMap, &ValMaterializer); getArrayElements(SrcInit, Elements); - + ArrayType *NewType = cast(AVI.NewGV->getType()->getElementType()); AVI.NewGV->setInitializer(ConstantArray::get(NewType, Elements)); } @@ -953,10 +961,10 @@ void ModuleLinker::linkGlobalInits() { // Loop over all of the globals in the src module, mapping them over as we go for (Module::const_global_iterator I = SrcM->global_begin(), E = SrcM->global_end(); I != E; ++I) { - + // Only process initialized GV's or ones not already in dest. - if (!I->hasInitializer() || DoNotLinkFromSource.count(I)) continue; - + if (!I->hasInitializer() || DoNotLinkFromSource.count(I)) continue; + // Grab destination global variable. GlobalVariable *DGV = cast(ValueMap[I]); // Figure out what the initializer looks like in the dest module. @@ -984,7 +992,7 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) { if (Mode == Linker::DestroySource) { // Splice the body of the source function into the dest function. Dst->getBasicBlockList().splice(Dst->end(), Src->getBasicBlockList()); - + // At this point, all of the instructions and values of the function are now // copied over. The only problem is that they are still referencing values in // the Source function as operands. Loop through all of the operands of the @@ -993,19 +1001,32 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries, &TypeMap, &ValMaterializer); - + } else { // Clone the body of the function into the dest function. SmallVector Returns; // Ignore returns. - CloneFunctionInto(Dst, Src, ValueMap, false, Returns, "", NULL, + CloneFunctionInto(Dst, Src, ValueMap, false, Returns, "", nullptr, &TypeMap, &ValMaterializer); } - + // There is no need to map the arguments anymore. for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end(); I != E; ++I) ValueMap.erase(I); - + +} + +static GlobalObject &getGlobalObjectInExpr(Constant &C) { + auto *GO = dyn_cast(&C); + if (GO) + return *GO; + auto *GA = dyn_cast(&C); + if (GA) + return *GA->getAliasee(); + auto &CE = cast(C); + assert(CE.getOpcode() == Instruction::BitCast || + CE.getOpcode() == Instruction::AddrSpaceCast); + return getGlobalObjectInExpr(*CE.getOperand(0)); } /// linkAliasBodies - Insert all of the aliases in Src into the Dest module. @@ -1016,10 +1037,27 @@ void ModuleLinker::linkAliasBodies() { continue; if (Constant *Aliasee = I->getAliasee()) { GlobalAlias *DA = cast(ValueMap[I]); - DA->setAliasee(MapValue(Aliasee, ValueMap, RF_None, - &TypeMap, &ValMaterializer)); + Constant *Val = + MapValue(Aliasee, ValueMap, RF_None, &TypeMap, &ValMaterializer); + DA->setAliasee(&getGlobalObjectInExpr(*Val)); } } + + // Any uses of DGV need to change to NewDA, with cast. + for (auto &Pair : ReplaceWithAlias) { + GlobalValue *DGV = Pair.first; + GlobalAlias *NewDA = Pair.second; + + for (auto *User : DGV->users()) { + if (auto *GA = dyn_cast(User)) { + if (GA == NewDA) + report_fatal_error("Linking these modules creates an alias cycle."); + } + } + + DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDA, DGV->getType())); + DGV->eraseFromParent(); + } } /// linkNamedMDNodes - Insert all of the named MDNodes in Src into the Dest @@ -1193,7 +1231,7 @@ bool ModuleLinker::linkModuleFlagsMetadata() { return HasErr; } - + bool ModuleLinker::run() { assert(DstM && "Null destination module"); assert(SrcM && "Null source module"); @@ -1264,13 +1302,13 @@ bool ModuleLinker::run() { for (unsigned i = 0, e = AppendingVars.size(); i != e; ++i) linkAppendingVarInit(AppendingVars[i]); - + // Link in the function bodies that are defined in the source module into // DstM. for (Module::iterator SF = SrcM->begin(), E = SrcM->end(); SF != E; ++SF) { // Skip if not linking from source. if (DoNotLinkFromSource.count(SF)) continue; - + Function *DF = cast(ValueMap[SF]); if (SF->hasPrefixData()) { // Link in the prefix data. @@ -1285,7 +1323,7 @@ bool ModuleLinker::run() { if (SF->Materialize(&ErrorMsg)) return true; } - + linkFunctionBody(DF, SF); SF->Dematerialize(); } @@ -1310,9 +1348,9 @@ bool ModuleLinker::run() { bool LinkedInAnyFunctions; do { LinkedInAnyFunctions = false; - + for(std::vector::iterator I = LazilyLinkFunctions.begin(), - E = LazilyLinkFunctions.end(); I != E; ++I) { + E = LazilyLinkFunctions.end(); I != E; ++I) { Function *SF = *I; if (!SF) continue; @@ -1334,7 +1372,7 @@ bool ModuleLinker::run() { if (SF->Materialize(&ErrorMsg)) return true; } - + // Erase from vector *before* the function body is linked - linkFunctionBody could // invalidate I. LazilyLinkFunctions.erase(I); @@ -1349,11 +1387,11 @@ bool ModuleLinker::run() { break; } } while (LinkedInAnyFunctions); - + // Now that all of the types from the source are used, resolve any structs // copied over to the dest that didn't exist there. TypeMap.linkDefinedTypeBodies(); - + return false; } @@ -1369,7 +1407,7 @@ Linker::~Linker() { void Linker::deleteModule() { delete Composite; - Composite = NULL; + Composite = nullptr; } bool Linker::linkInModule(Module *Src, unsigned Mode, std::string *ErrorMsg) { @@ -1392,7 +1430,7 @@ bool Linker::linkInModule(Module *Src, unsigned Mode, std::string *ErrorMsg) { /// error occurs, true is returned and ErrorMsg (if not null) is set to indicate /// the problem. Upon failure, the Dest module could be in a modified state, /// and shouldn't be relied on to be consistent. -bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode, +bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode, std::string *ErrorMsg) { Linker L(Dest); return L.linkInModule(Src, Mode, ErrorMsg); @@ -1406,7 +1444,7 @@ LLVMBool LLVMLinkModules(LLVMModuleRef Dest, LLVMModuleRef Src, LLVMLinkerMode Mode, char **OutMessages) { std::string Messages; LLVMBool Result = Linker::LinkModules(unwrap(Dest), unwrap(Src), - Mode, OutMessages? &Messages : 0); + Mode, OutMessages? &Messages : nullptr); if (OutMessages) *OutMessages = strdup(Messages.c_str()); return Result; diff --git a/lib/MC/Android.mk b/lib/MC/Android.mk index abf346b..975f4e3 100644 --- a/lib/MC/Android.mk +++ b/lib/MC/Android.mk @@ -20,7 +20,6 @@ mc_SRC_FILES := \ MCELFStreamer.cpp \ MCExpr.cpp \ MCExternalSymbolizer.cpp \ - MCFixup.cpp \ MCInst.cpp \ MCInstPrinter.cpp \ MCInstrAnalysis.cpp \ @@ -35,13 +34,14 @@ mc_SRC_FILES := \ MCRegisterInfo.cpp \ MCRelocationInfo.cpp \ MCSection.cpp \ - MCSectionCOFF.cpp \ + MCSectionCOFF.cpp \ MCSectionELF.cpp \ MCSectionMachO.cpp \ MCStreamer.cpp \ MCSubtargetInfo.cpp \ MCSymbol.cpp \ MCSymbolizer.cpp \ + MCTargetOptions.cpp \ MCValue.cpp \ MCWin64EH.cpp \ WinCOFFObjectWriter.cpp \ diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index ab7dabc..6a384c1 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -16,7 +16,6 @@ add_llvm_library(LLVMMC MCELF.cpp MCELFObjectTargetWriter.cpp MCELFStreamer.cpp - MCFixup.cpp MCFunction.cpp MCExpr.cpp MCExternalSymbolizer.cpp @@ -45,6 +44,7 @@ add_llvm_library(LLVMMC MCSubtargetInfo.cpp MCSymbol.cpp MCSymbolizer.cpp + MCTargetOptions.cpp MCValue.cpp MCWin64EH.cpp MachObjectWriter.cpp diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index e9b8fe2..0a54627 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -27,6 +28,8 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCValue.h" +#include "llvm/Object/StringTableBuilder.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ELF.h" @@ -105,10 +108,9 @@ class ELFObjectWriter : public MCObjectWriter { static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind); static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant); static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout); - static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data, + static bool isInSymtab(const MCAsmLayout &Layout, const MCSymbolData &Data, bool Used, bool Renamed); - static bool isLocal(const MCSymbolData &Data, bool isSignature, - bool isUsedInReloc); + static bool isLocal(const MCSymbolData &Data, bool isUsedInReloc); static bool IsELFMetaDataSection(const MCSectionData &SD); static uint64_t DataSectionSize(const MCSectionData &SD); static uint64_t GetSectionFileSize(const MCAsmLayout &Layout, @@ -131,11 +133,11 @@ class ELFObjectWriter : public MCObjectWriter { MCSymbolData *SymbolData; uint64_t StringIndex; uint32_t SectionIndex; + StringRef Name; // Support lexicographic sorting. bool operator<(const ELFSymbolData &RHS) const { - return SymbolData->getSymbol().getName() < - RHS.SymbolData->getSymbol().getName(); + return Name < RHS.Name; } }; @@ -148,13 +150,13 @@ class ELFObjectWriter : public MCObjectWriter { llvm::DenseMap> Relocations; - DenseMap SectionStringTableIndex; + StringTableBuilder ShStrTabBuilder; /// @} /// @name Symbol Table Data /// @{ - SmallString<256> StringTable; + StringTableBuilder StrTabBuilder; std::vector FileSymbolData; std::vector LocalSymbolData; std::vector ExternalSymbolData; @@ -214,7 +216,8 @@ class ELFObjectWriter : public MCObjectWriter { const MCAsmLayout &Layout, SectionIndexMapTy &SectionIndexMap); - bool shouldRelocateWithSymbol(const MCSymbolRefExpr *RefA, + bool shouldRelocateWithSymbol(const MCAssembler &Asm, + const MCSymbolRefExpr *RefA, const MCSymbolData *SD, uint64_t C, unsigned Type) const; @@ -253,6 +256,8 @@ class ELFObjectWriter : public MCObjectWriter { void CreateRelocationSections(MCAssembler &Asm, MCAsmLayout &Layout, RelMapTy &RelMap); + void CompressDebugSections(MCAssembler &Asm, MCAsmLayout &Layout); + void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout, const RelMapTy &RelMap); @@ -481,43 +486,18 @@ void ELFObjectWriter::WriteHeader(const MCAssembler &Asm, Write16(ShstrtabIndex); } -uint64_t ELFObjectWriter::SymbolValue(MCSymbolData &OrigData, +uint64_t ELFObjectWriter::SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout) { - MCSymbolData *Data = &OrigData; - if (Data->isCommon() && Data->isExternal()) - return Data->getCommonAlignment(); - - const MCSymbol *Symbol = &Data->getSymbol(); - bool IsThumbFunc = OrigData.getFlags() & ELF_Other_ThumbFunc; - - uint64_t Res = 0; - if (Symbol->isVariable()) { - const MCExpr *Expr = Symbol->getVariableValue(); - MCValue Value; - if (!Expr->EvaluateAsRelocatable(Value, &Layout)) - llvm_unreachable("Invalid expression"); + if (Data.isCommon() && Data.isExternal()) + return Data.getCommonAlignment(); - assert(!Value.getSymB()); - - Res = Value.getConstant(); - - if (const MCSymbolRefExpr *A = Value.getSymA()) { - Symbol = &A->getSymbol(); - Data = &Layout.getAssembler().getSymbolData(*Symbol); - } else { - Symbol = 0; - Data = 0; - } - } + uint64_t Res; + if (!Layout.getSymbolOffset(&Data, Res)) + return 0; - if (IsThumbFunc) + if (Layout.getAssembler().isThumbFunc(&Data.getSymbol())) Res |= 1; - if (!Symbol || !Symbol->isInSection()) - return Res; - - Res += Layout.getSymbolOffset(Data); - return Res; } @@ -526,15 +506,17 @@ void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, // The presence of symbol versions causes undefined symbols and // versions declared with @@@ to be renamed. - for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), - ie = Asm.symbol_end(); it != ie; ++it) { - const MCSymbol &Alias = it->getSymbol(); - const MCSymbol &Symbol = Alias.AliasedSymbol(); - MCSymbolData &SD = Asm.getSymbolData(Symbol); + for (MCSymbolData &OriginalData : Asm.symbols()) { + const MCSymbol &Alias = OriginalData.getSymbol(); // Not an alias. - if (&Symbol == &Alias) + if (!Alias.isVariable()) + continue; + auto *Ref = dyn_cast(Alias.getVariableValue()); + if (!Ref) continue; + const MCSymbol &Symbol = Ref->getSymbol(); + MCSymbolData &SD = Asm.getSymbolData(Symbol); StringRef AliasName = Alias.getName(); size_t Pos = AliasName.find('@'); @@ -543,8 +525,8 @@ void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, // Aliases defined with .symvar copy the binding from the symbol they alias. // This is the first place we are able to copy this information. - it->setExternal(SD.isExternal()); - MCELF::SetBinding(*it, MCELF::GetBinding(SD)); + OriginalData.setExternal(SD.isExternal()); + MCELF::SetBinding(OriginalData, MCELF::GetBinding(SD)); StringRef Rest = AliasName.substr(Pos); if (!Symbol.isUndefined() && !Rest.startswith("@@@")) @@ -594,26 +576,14 @@ static uint8_t mergeTypeForSet(uint8_t origType, uint8_t newType) { return Type; } -static const MCSymbol *getBaseSymbol(const MCAsmLayout &Layout, - const MCSymbol &Symbol) { - if (!Symbol.isVariable()) - return &Symbol; - - const MCExpr *Expr = Symbol.getVariableValue(); - MCValue Value; - if (!Expr->EvaluateAsRelocatable(Value, &Layout)) - llvm_unreachable("Invalid Expression"); - assert(!Value.getSymB()); - const MCSymbolRefExpr *A = Value.getSymA(); - if (!A) - return nullptr; - return getBaseSymbol(Layout, A->getSymbol()); -} - void ELFObjectWriter::WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD, const MCAsmLayout &Layout) { MCSymbolData &OrigData = *MSD.SymbolData; - const MCSymbol *Base = getBaseSymbol(Layout, OrigData.getSymbol()); + assert((!OrigData.getFragment() || + (&OrigData.getFragment()->getParent()->getSection() == + &OrigData.getSymbol().getSection())) && + "The symbol's section doesn't match the fragment's symbol"); + const MCSymbol *Base = Layout.getBaseSymbol(OrigData.getSymbol()); // This has to be in sync with when computeSymbolTable uses SHN_ABS or // SHN_COMMON. @@ -627,8 +597,6 @@ void ELFObjectWriter::WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD, BaseSD = &Layout.getAssembler().getSymbolData(*Base); Type = mergeTypeForSet(Type, MCELF::GetType(*BaseSD)); } - if (OrigData.getFlags() & ELF_Other_ThumbFunc) - Type = ELF::STT_FUNC; uint8_t Info = (Binding << ELF_STB_Shift) | (Type << ELF_STT_Shift); // Other and Visibility share the same byte with Visibility using the lower @@ -638,8 +606,6 @@ void ELFObjectWriter::WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD, Other |= Visibility; uint64_t Value = SymbolValue(OrigData, Layout); - if (OrigData.getFlags() & ELF_Other_ThumbFunc) - Value |= 1; uint64_t Size = 0; const MCExpr *ESize = OrigData.getSize(); @@ -664,7 +630,6 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF, SectionIndexMapTy &SectionIndexMap) { // The string table must be emitted first because we need the index // into the string table for all the symbol names. - assert(StringTable.size() && "Missing string table"); // FIXME: Make sure the start of the symbol table is aligned. @@ -725,7 +690,8 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF, // It is always valid to create a relocation with a symbol. It is preferable // to use a relocation with a section if that is possible. Using the section // allows us to omit some local symbols from the symbol table. -bool ELFObjectWriter::shouldRelocateWithSymbol(const MCSymbolRefExpr *RefA, +bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm, + const MCSymbolRefExpr *RefA, const MCSymbolData *SD, uint64_t C, unsigned Type) const { @@ -809,11 +775,37 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCSymbolRefExpr *RefA, if (Flags & ELF::SHF_TLS) return true; + // If the symbol is a thumb function the final relocation must set the lowest + // bit. With a symbol that is done by just having the symbol have that bit + // set, so we would lose the bit if we relocated with the section. + // FIXME: We could use the section but add the bit to the relocation value. + if (Asm.isThumbFunc(&Sym)) + return true; + if (TargetObjectWriter->needsRelocateWithSymbol(Type)) return true; return false; } +static const MCSymbol *getWeakRef(const MCSymbolRefExpr &Ref) { + const MCSymbol &Sym = Ref.getSymbol(); + + if (Ref.getKind() == MCSymbolRefExpr::VK_WEAKREF) + return &Sym; + + if (!Sym.isVariable()) + return nullptr; + + const MCExpr *Expr = Sym.getVariableValue(); + const auto *Inner = dyn_cast(Expr); + if (!Inner) + return nullptr; + + if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) + return &Inner->getSymbol(); + return nullptr; +} + void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, @@ -868,7 +860,7 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm, const MCSymbolData *SymAD = SymA ? &Asm.getSymbolData(*SymA) : nullptr; unsigned Type = GetRelocType(Target, Fixup, IsPCRel); - bool RelocateWithSymbol = shouldRelocateWithSymbol(RefA, SymAD, C, Type); + bool RelocateWithSymbol = shouldRelocateWithSymbol(Asm, RefA, SymAD, C, Type); if (!RelocateWithSymbol && SymA && !SymA->isUndefined()) C += Layout.getSymbolOffset(SymAD); @@ -899,8 +891,8 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm, if (const MCSymbol *R = Renames.lookup(SymA)) SymA = R; - if (RefA->getKind() == MCSymbolRefExpr::VK_WEAKREF) - WeakrefUsedInReloc.insert(SymA); + if (const MCSymbol *WeakRef = getWeakRef(*RefA)) + WeakrefUsedInReloc.insert(WeakRef); else UsedInReloc.insert(SymA); } @@ -913,13 +905,13 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm, uint64_t ELFObjectWriter::getSymbolIndexInSymbolTable(const MCAssembler &Asm, const MCSymbol *S) { - MCSymbolData &SD = Asm.getSymbolData(*S); + const MCSymbolData &SD = Asm.getSymbolData(*S); return SD.getIndex(); } -bool ELFObjectWriter::isInSymtab(const MCAssembler &Asm, - const MCSymbolData &Data, - bool Used, bool Renamed) { +bool ELFObjectWriter::isInSymtab(const MCAsmLayout &Layout, + const MCSymbolData &Data, bool Used, + bool Renamed) { const MCSymbol &Symbol = Data.getSymbol(); if (Symbol.isVariable()) { const MCExpr *Expr = Symbol.getVariableValue(); @@ -938,9 +930,11 @@ bool ELFObjectWriter::isInSymtab(const MCAssembler &Asm, if (Symbol.getName() == "_GLOBAL_OFFSET_TABLE_") return true; - const MCSymbol &A = Symbol.AliasedSymbol(); - if (Symbol.isVariable() && !A.isVariable() && A.isUndefined()) - return false; + if (Symbol.isVariable()) { + const MCSymbol *Base = Layout.getBaseSymbol(Symbol); + if (Base && Base->isUndefined()) + return false; + } bool IsGlobal = MCELF::GetBinding(Data) == ELF::STB_GLOBAL; if (!Symbol.isVariable() && Symbol.isUndefined() && !IsGlobal) @@ -952,20 +946,16 @@ bool ELFObjectWriter::isInSymtab(const MCAssembler &Asm, return true; } -bool ELFObjectWriter::isLocal(const MCSymbolData &Data, bool isSignature, - bool isUsedInReloc) { +bool ELFObjectWriter::isLocal(const MCSymbolData &Data, bool isUsedInReloc) { if (Data.isExternal()) return false; const MCSymbol &Symbol = Data.getSymbol(); - const MCSymbol &RefSymbol = Symbol.AliasedSymbol(); - - if (RefSymbol.isUndefined() && !RefSymbol.isVariable()) { - if (isSignature && !isUsedInReloc) - return true; + if (Symbol.isDefined()) + return true; + if (isUsedInReloc) return false; - } return true; } @@ -1013,58 +1003,36 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout, MCELF::SetBinding(Data, ELF::STB_GLOBAL); } - // Index 0 is always the empty string. - StringMap StringIndexMap; - StringTable += '\x00'; - - // FIXME: We could optimize suffixes in strtab in the same way we - // optimize them in shstrtab. - - for (MCAssembler::const_file_name_iterator it = Asm.file_names_begin(), - ie = Asm.file_names_end(); - it != ie; - ++it) { - StringRef Name = *it; - uint64_t &Entry = StringIndexMap[Name]; - if (!Entry) { - Entry = StringTable.size(); - StringTable += Name; - StringTable += '\x00'; - } - FileSymbolData.push_back(Entry); - } - // Add the data for the symbols. - for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), - ie = Asm.symbol_end(); it != ie; ++it) { - const MCSymbol &Symbol = it->getSymbol(); + for (MCSymbolData &SD : Asm.symbols()) { + const MCSymbol &Symbol = SD.getSymbol(); bool Used = UsedInReloc.count(&Symbol); bool WeakrefUsed = WeakrefUsedInReloc.count(&Symbol); bool isSignature = RevGroupMap.count(&Symbol); - if (!isInSymtab(Asm, *it, + if (!isInSymtab(Layout, SD, Used || WeakrefUsed || isSignature, Renames.count(&Symbol))) continue; ELFSymbolData MSD; - MSD.SymbolData = it; - const MCSymbol *BaseSymbol = getBaseSymbol(Layout, Symbol); + MSD.SymbolData = &SD; + const MCSymbol *BaseSymbol = Layout.getBaseSymbol(Symbol); // Undefined symbols are global, but this is the first place we // are able to set it. - bool Local = isLocal(*it, isSignature, Used); - if (!Local && MCELF::GetBinding(*it) == ELF::STB_LOCAL) { + bool Local = isLocal(SD, Used); + if (!Local && MCELF::GetBinding(SD) == ELF::STB_LOCAL) { assert(BaseSymbol); - MCSymbolData &SD = Asm.getSymbolData(*BaseSymbol); - MCELF::SetBinding(*it, ELF::STB_GLOBAL); + MCSymbolData &BaseData = Asm.getSymbolData(*BaseSymbol); MCELF::SetBinding(SD, ELF::STB_GLOBAL); + MCELF::SetBinding(BaseData, ELF::STB_GLOBAL); } if (!BaseSymbol) { MSD.SectionIndex = ELF::SHN_ABS; - } else if (it->isCommon()) { + } else if (SD.isCommon()) { assert(!Local); MSD.SectionIndex = ELF::SHN_COMMON; } else if (BaseSymbol->isUndefined()) { @@ -1073,7 +1041,7 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout, else MSD.SectionIndex = ELF::SHN_UNDEF; if (!Used && WeakrefUsed) - MCELF::SetBinding(*it, ELF::STB_WEAK); + MCELF::SetBinding(SD, ELF::STB_WEAK); } else { const MCSectionELF &Section = static_cast(BaseSymbol->getSection()); @@ -1085,7 +1053,6 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout, // @@ in defined ones. StringRef Name = Symbol.getName(); SmallString<32> Buf; - size_t Pos = Name.find("@@@"); if (Pos != StringRef::npos) { Buf += Name.substr(0, Pos); @@ -1093,14 +1060,8 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout, Buf += Name.substr(Pos + Skip); Name = Buf; } + MSD.Name = StrTabBuilder.add(Name); - uint64_t &Entry = StringIndexMap[Name]; - if (!Entry) { - Entry = StringTable.size(); - StringTable += Name; - StringTable += '\x00'; - } - MSD.StringIndex = Entry; if (MSD.SectionIndex == ELF::SHN_UNDEF) UndefinedSymbolData.push_back(MSD); else if (Local) @@ -1109,6 +1070,21 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout, ExternalSymbolData.push_back(MSD); } + for (auto i = Asm.file_names_begin(), e = Asm.file_names_end(); i != e; ++i) + StrTabBuilder.add(*i); + + StrTabBuilder.finalize(); + + for (auto i = Asm.file_names_begin(), e = Asm.file_names_end(); i != e; ++i) + FileSymbolData.push_back(StrTabBuilder.getOffset(*i)); + + for (ELFSymbolData& MSD : LocalSymbolData) + MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name); + for (ELFSymbolData& MSD : ExternalSymbolData) + MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name); + for (ELFSymbolData& MSD : UndefinedSymbolData) + MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name); + // Symbols are required to be in lexicographic order. array_pod_sort(LocalSymbolData.begin(), LocalSymbolData.end()); array_pod_sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); @@ -1168,6 +1144,151 @@ void ELFObjectWriter::CreateRelocationSections(MCAssembler &Asm, } } +static SmallVector +getUncompressedData(MCAsmLayout &Layout, + MCSectionData::FragmentListType &Fragments) { + SmallVector UncompressedData; + for (const MCFragment &F : Fragments) { + const SmallVectorImpl *Contents; + switch (F.getKind()) { + case MCFragment::FT_Data: + Contents = &cast(F).getContents(); + break; + case MCFragment::FT_Dwarf: + Contents = &cast(F).getContents(); + break; + case MCFragment::FT_DwarfFrame: + Contents = &cast(F).getContents(); + break; + default: + llvm_unreachable( + "Not expecting any other fragment types in a debug_* section"); + } + UncompressedData.append(Contents->begin(), Contents->end()); + } + return UncompressedData; +} + +// Include the debug info compression header: +// "ZLIB" followed by 8 bytes representing the uncompressed size of the section, +// useful for consumers to preallocate a buffer to decompress into. +static bool +prependCompressionHeader(uint64_t Size, + SmallVectorImpl &CompressedContents) { + static const StringRef Magic = "ZLIB"; + if (Size <= Magic.size() + sizeof(Size) + CompressedContents.size()) + return false; + if (sys::IsLittleEndianHost) + Size = sys::SwapByteOrder(Size); + CompressedContents.insert(CompressedContents.begin(), + Magic.size() + sizeof(Size), 0); + std::copy(Magic.begin(), Magic.end(), CompressedContents.begin()); + std::copy(reinterpret_cast(&Size), + reinterpret_cast(&Size + 1), + CompressedContents.begin() + Magic.size()); + return true; +} + +// Return a single fragment containing the compressed contents of the whole +// section. Null if the section was not compressed for any reason. +static std::unique_ptr +getCompressedFragment(MCAsmLayout &Layout, + MCSectionData::FragmentListType &Fragments) { + std::unique_ptr CompressedFragment(new MCDataFragment()); + + // Gather the uncompressed data from all the fragments, recording the + // alignment fragment, if seen, and any fixups. + SmallVector UncompressedData = + getUncompressedData(Layout, Fragments); + + SmallVectorImpl &CompressedContents = CompressedFragment->getContents(); + + zlib::Status Success = zlib::compress( + StringRef(UncompressedData.data(), UncompressedData.size()), + CompressedContents); + if (Success != zlib::StatusOK) + return nullptr; + + if (!prependCompressionHeader(UncompressedData.size(), CompressedContents)) + return nullptr; + + return CompressedFragment; +} + +typedef DenseMap> +DefiningSymbolMap; + +static void UpdateSymbols(const MCAsmLayout &Layout, + const std::vector &Symbols, + MCFragment &NewFragment) { + for (MCSymbolData *Sym : Symbols) { + Sym->setOffset(Sym->getOffset() + + Layout.getFragmentOffset(Sym->getFragment())); + Sym->setFragment(&NewFragment); + } +} + +static void CompressDebugSection(MCAssembler &Asm, MCAsmLayout &Layout, + const DefiningSymbolMap &DefiningSymbols, + const MCSectionELF &Section, + MCSectionData &SD) { + StringRef SectionName = Section.getSectionName(); + MCSectionData::FragmentListType &Fragments = SD.getFragmentList(); + + std::unique_ptr CompressedFragment = + getCompressedFragment(Layout, Fragments); + + // Leave the section as-is if the fragments could not be compressed. + if (!CompressedFragment) + return; + + // Update the fragment+offsets of any symbols referring to fragments in this + // section to refer to the new fragment. + auto I = DefiningSymbols.find(&SD); + if (I != DefiningSymbols.end()) + UpdateSymbols(Layout, I->second, *CompressedFragment); + + // Invalidate the layout for the whole section since it will have new and + // different fragments now. + Layout.invalidateFragmentsFrom(&Fragments.front()); + Fragments.clear(); + + // Complete the initialization of the new fragment + CompressedFragment->setParent(&SD); + CompressedFragment->setLayoutOrder(0); + Fragments.push_back(CompressedFragment.release()); + + // Rename from .debug_* to .zdebug_* + Asm.getContext().renameELFSection(&Section, + (".z" + SectionName.drop_front(1)).str()); +} + +void ELFObjectWriter::CompressDebugSections(MCAssembler &Asm, + MCAsmLayout &Layout) { + if (!Asm.getContext().getAsmInfo()->compressDebugSections()) + return; + + DefiningSymbolMap DefiningSymbols; + + for (MCSymbolData &SD : Asm.symbols()) + if (MCFragment *F = SD.getFragment()) + DefiningSymbols[F->getParent()].push_back(&SD); + + for (MCSectionData &SD : Asm) { + const MCSectionELF &Section = + static_cast(SD.getSection()); + StringRef SectionName = Section.getSectionName(); + + // Compressing debug_frame requires handling alignment fragments which is + // more work (possibly generalizing MCAssembler.cpp:writeFragment to allow + // for writing to arbitrary buffers) for little benefit. + if (!SectionName.startswith(".debug_") || SectionName == ".debug_frame") + continue; + + CompressDebugSection(Asm, Layout, DefiningSymbols, Section, SD); + } +} + void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout, const RelMapTy &RelMap) { for (MCAssembler::const_iterator it = Asm.begin(), @@ -1274,23 +1395,6 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm, } } -static int compareBySuffix(const MCSectionELF *const *a, - const MCSectionELF *const *b) { - const StringRef &NameA = (*a)->getSectionName(); - const StringRef &NameB = (*b)->getSectionName(); - const unsigned sizeA = NameA.size(); - const unsigned sizeB = NameB.size(); - const unsigned len = std::min(sizeA, sizeB); - for (unsigned int i = 0; i < len; ++i) { - char ca = NameA[sizeA - i - 1]; - char cb = NameB[sizeB - i - 1]; - if (ca != cb) - return cb - ca; - } - - return sizeB - sizeA; -} - void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout, SectionIndexMapTy &SectionIndexMap, @@ -1331,45 +1435,20 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, WriteSymbolTable(F, Asm, Layout, SectionIndexMap); F = new MCDataFragment(&StrtabSD); - F->getContents().append(StringTable.begin(), StringTable.end()); + F->getContents().append(StrTabBuilder.data().begin(), + StrTabBuilder.data().end()); F = new MCDataFragment(&ShstrtabSD); - std::vector Sections; - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { + // Section header string table. + for (auto it = Asm.begin(), ie = Asm.end(); it != ie; ++it) { const MCSectionELF &Section = static_cast(it->getSection()); - Sections.push_back(&Section); - } - array_pod_sort(Sections.begin(), Sections.end(), compareBySuffix); - - // Section header string table. - // - // The first entry of a string table holds a null character so skip - // section 0. - uint64_t Index = 1; - F->getContents().push_back('\x00'); - - for (unsigned int I = 0, E = Sections.size(); I != E; ++I) { - const MCSectionELF &Section = *Sections[I]; - - StringRef Name = Section.getSectionName(); - if (I != 0) { - StringRef PreviousName = Sections[I - 1]->getSectionName(); - if (PreviousName.endswith(Name)) { - SectionStringTableIndex[&Section] = Index - Name.size() - 1; - continue; - } - } - // Remember the index into the string table so we can write it - // into the sh_name field of the section header table. - SectionStringTableIndex[&Section] = Index; - - Index += Name.size() + 1; - F->getContents().append(Name.begin(), Name.end()); - F->getContents().push_back('\x00'); + ShStrTabBuilder.add(Section.getSectionName()); } + ShStrTabBuilder.finalize(); + F->getContents().append(ShStrTabBuilder.data().begin(), + ShStrTabBuilder.data().end()); } void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm, @@ -1437,7 +1516,7 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm, switch(Section.getType()) { case ELF::SHT_DYNAMIC: - sh_link = SectionStringTableIndex[&Section]; + sh_link = ShStrTabBuilder.getOffset(Section.getSectionName()); sh_info = 0; break; @@ -1518,7 +1597,8 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm, } } - WriteSecHdrEntry(SectionStringTableIndex[&Section], Section.getType(), + WriteSecHdrEntry(ShStrTabBuilder.getOffset(Section.getSectionName()), + Section.getType(), Section.getFlags(), 0, Offset, Size, sh_link, sh_info, Alignment, Section.getEntrySize()); } @@ -1652,6 +1732,8 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm, unsigned NumUserSections = Asm.size(); + CompressDebugSections(Asm, const_cast(Layout)); + DenseMap RelMap; CreateRelocationSections(Asm, const_cast(Layout), RelMap); diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index 9667145..c0777a6 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -61,8 +61,8 @@ MCAsmInfo::MCAsmInfo() { UsesELFSectionDirectiveForBSS = false; AlignmentIsInBytes = true; TextAlignFillValue = 0; - GPRel64Directive = 0; - GPRel32Directive = 0; + GPRel64Directive = nullptr; + GPRel32Directive = nullptr; GlobalDirective = "\t.globl\t"; HasSetDirective = true; HasAggressiveSymbolFolding = true; @@ -72,7 +72,7 @@ MCAsmInfo::MCAsmInfo() { HasSingleParameterDotFile = true; HasIdentDirective = false; HasNoDeadStrip = false; - WeakRefDirective = 0; + WeakRefDirective = nullptr; HasWeakDefDirective = false; HasWeakDefCanBeHiddenDirective = false; HasLinkOnceDirective = false; diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 884ccf9..7f8ae54 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -9,6 +9,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmBackend.h" @@ -31,6 +32,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include +#include using namespace llvm; namespace { @@ -49,34 +51,24 @@ private: unsigned IsVerboseAsm : 1; unsigned ShowInst : 1; - unsigned UseCFI : 1; unsigned UseDwarfDirectory : 1; - enum EHSymbolFlags { EHGlobal = 1, - EHWeakDefinition = 1 << 1, - EHPrivateExtern = 1 << 2 }; - DenseMap FlagMap; - - DenseMap SymbolMap; - void EmitRegisterName(int64_t Register); void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override; void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override; public: MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os, - bool isVerboseAsm, bool useCFI, bool useDwarfDirectory, + bool isVerboseAsm, bool useDwarfDirectory, MCInstPrinter *printer, MCCodeEmitter *emitter, MCAsmBackend *asmbackend, bool showInst) : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()), InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend), CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm), - ShowInst(showInst), UseCFI(useCFI), - UseDwarfDirectory(useDwarfDirectory) { + ShowInst(showInst), UseDwarfDirectory(useDwarfDirectory) { if (InstPrinter && IsVerboseAsm) InstPrinter->setCommentStream(CommentStream); } - ~MCAsmStreamer() {} inline void EmitEOL() { // If we don't have any comments, just emit a \n. @@ -130,7 +122,6 @@ public: void EmitLabel(MCSymbol *Symbol) override; void EmitDebugLabel(MCSymbol *Symbol) override; - void EmitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol) override; void EmitAssemblerFlag(MCAssemblerFlag Flag) override; void EmitLinkerOptions(ArrayRef Options) override; void EmitDataRegion(MCDataRegionType Kind) override; @@ -140,12 +131,6 @@ public: void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override; void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override; - void EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel, - const MCSymbol *Label, - unsigned PointerSize) override; - void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, - const MCSymbol *Label) override; - bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override; @@ -167,7 +152,7 @@ public: void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; - void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = nullptr, uint64_t Size = 0, unsigned ByteAlignment = 0) override; void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol, @@ -175,7 +160,8 @@ public: void EmitBytes(StringRef Data) override; - void EmitValueImpl(const MCExpr *Value, unsigned Size) override; + void EmitValueImpl(const MCExpr *Value, unsigned Size, + const SMLoc &Loc = SMLoc()) override; void EmitIntValue(uint64_t Value, unsigned Size) override; void EmitULEB128Value(const MCExpr *Value) override; @@ -254,8 +240,6 @@ public: void EmitRawTextImpl(StringRef String) override; void FinishImpl() override; - - virtual MCSymbolData &getOrCreateSymbolData(const MCSymbol *Symbol) override; }; } // end anonymous namespace. @@ -321,21 +305,6 @@ void MCAsmStreamer::ChangeSection(const MCSection *Section, Section->PrintSwitchToSection(*MAI, OS, Subsection); } -void MCAsmStreamer::EmitEHSymAttributes(const MCSymbol *Symbol, - MCSymbol *EHSymbol) { - if (UseCFI) - return; - - unsigned Flags = FlagMap.lookup(Symbol); - - if (Flags & EHGlobal) - EmitSymbolAttribute(EHSymbol, MCSA_Global); - if (Flags & EHWeakDefinition) - EmitSymbolAttribute(EHSymbol, MCSA_WeakDefinition); - if (Flags & EHPrivateExtern) - EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern); -} - void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); MCStreamer::EmitLabel(Symbol); @@ -441,22 +410,6 @@ void MCAsmStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) { EmitEOL(); } -void MCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, - const MCSymbol *LastLabel, - const MCSymbol *Label, - unsigned PointerSize) { - EmitDwarfSetLineAddr(LineDelta, Label, PointerSize); -} - -void MCAsmStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, - const MCSymbol *Label) { - EmitIntValue(dwarf::DW_CFA_advance_loc4, 1); - const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel); - AddrDelta = ForceExpAbs(AddrDelta); - EmitValue(AddrDelta, 4); -} - - bool MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { switch (Attribute) { @@ -486,7 +439,6 @@ bool MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, return true; case MCSA_Global: // .globl/.global OS << MAI->getGlobalDirective(); - FlagMap[Symbol] |= EHGlobal; break; case MCSA_Hidden: OS << "\t.hidden\t"; break; case MCSA_IndirectSymbol: OS << "\t.indirect_symbol\t"; break; @@ -497,14 +449,12 @@ bool MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, case MCSA_SymbolResolver: OS << "\t.symbol_resolver\t"; break; case MCSA_PrivateExtern: OS << "\t.private_extern\t"; - FlagMap[Symbol] |= EHPrivateExtern; break; case MCSA_Protected: OS << "\t.protected\t"; break; case MCSA_Reference: OS << "\t.reference\t"; break; case MCSA_Weak: OS << "\t.weak\t"; break; case MCSA_WeakDefinition: OS << "\t.weak_definition\t"; - FlagMap[Symbol] |= EHWeakDefinition; break; // .weak_reference case MCSA_WeakReference: OS << MAI->getWeakRefDirective(); break; @@ -560,7 +510,7 @@ void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { // Common symbols do not belong to any actual section. - AssignSection(Symbol, NULL); + AssignSection(Symbol, nullptr); OS << "\t.comm\t" << *Symbol << ',' << Size; if (ByteAlignment != 0) { @@ -579,7 +529,7 @@ void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlign) { // Common symbols do not belong to any actual section. - AssignSection(Symbol, NULL); + AssignSection(Symbol, nullptr); OS << "\t.lcomm\t" << *Symbol << ',' << Size; if (ByteAlign > 1) { @@ -610,7 +560,7 @@ void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, const MCSectionMachO *MOSection = ((const MCSectionMachO*)Section); OS << MOSection->getSegmentName() << "," << MOSection->getSectionName(); - if (Symbol != NULL) { + if (Symbol) { OS << ',' << *Symbol << ',' << Size; if (ByteAlignment != 0) OS << ',' << Log2_32(ByteAlignment); @@ -625,7 +575,7 @@ void MCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { AssignSection(Symbol, Section); - assert(Symbol != NULL && "Symbol shouldn't be NULL!"); + assert(Symbol && "Symbol shouldn't be NULL!"); // Instead of using the Section we'll just use the shortcut. // This is a mach-o specific directive and section. OS << ".tbss " << *Symbol << ", " << Size; @@ -702,11 +652,12 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size) { EmitValue(MCConstantExpr::Create(Value, getContext()), Size); } -void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size) { +void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, + const SMLoc &Loc) { assert(Size <= 8 && "Invalid size"); assert(getCurrentSection().first && "Cannot emit contents before setting section!"); - const char *Directive = 0; + const char *Directive = nullptr; switch (Size) { default: break; case 1: Directive = MAI->getData8bitsDirective(); break; @@ -775,13 +726,13 @@ void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) { } void MCAsmStreamer::EmitGPRel64Value(const MCExpr *Value) { - assert(MAI->getGPRel64Directive() != 0); + assert(MAI->getGPRel64Directive() != nullptr); OS << MAI->getGPRel64Directive() << *Value; EmitEOL(); } void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) { - assert(MAI->getGPRel32Directive() != 0); + assert(MAI->getGPRel32Directive() != nullptr); OS << MAI->getGPRel32Directive() << *Value; EmitEOL(); } @@ -973,10 +924,6 @@ void MCAsmStreamer::EmitIdent(StringRef IdentString) { void MCAsmStreamer::EmitCFISections(bool EH, bool Debug) { MCStreamer::EmitCFISections(EH, Debug); - - if (!UseCFI) - return; - OS << "\t.cfi_sections "; if (EH) { OS << ".eh_frame"; @@ -990,11 +937,6 @@ void MCAsmStreamer::EmitCFISections(bool EH, bool Debug) { } void MCAsmStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) { - if (!UseCFI) { - RecordProcStart(Frame); - return; - } - OS << "\t.cfi_startproc"; if (Frame.IsSimple) OS << " simple"; @@ -1002,11 +944,6 @@ void MCAsmStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) { } void MCAsmStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { - if (!UseCFI) { - RecordProcEnd(Frame); - return; - } - // Put a dummy non-null value in Frame.End to mark that this frame has been // closed. Frame.End = (MCSymbol *) 1; @@ -1027,10 +964,6 @@ void MCAsmStreamer::EmitRegisterName(int64_t Register) { void MCAsmStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) { MCStreamer::EmitCFIDefCfa(Register, Offset); - - if (!UseCFI) - return; - OS << "\t.cfi_def_cfa "; EmitRegisterName(Register); OS << ", " << Offset; @@ -1039,20 +972,12 @@ void MCAsmStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) { void MCAsmStreamer::EmitCFIDefCfaOffset(int64_t Offset) { MCStreamer::EmitCFIDefCfaOffset(Offset); - - if (!UseCFI) - return; - OS << "\t.cfi_def_cfa_offset " << Offset; EmitEOL(); } void MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) { MCStreamer::EmitCFIDefCfaRegister(Register); - - if (!UseCFI) - return; - OS << "\t.cfi_def_cfa_register "; EmitRegisterName(Register); EmitEOL(); @@ -1060,10 +985,6 @@ void MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) { void MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) { this->MCStreamer::EmitCFIOffset(Register, Offset); - - if (!UseCFI) - return; - OS << "\t.cfi_offset "; EmitRegisterName(Register); OS << ", " << Offset; @@ -1073,50 +994,30 @@ void MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) { void MCAsmStreamer::EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding) { MCStreamer::EmitCFIPersonality(Sym, Encoding); - - if (!UseCFI) - return; - OS << "\t.cfi_personality " << Encoding << ", " << *Sym; EmitEOL(); } void MCAsmStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) { MCStreamer::EmitCFILsda(Sym, Encoding); - - if (!UseCFI) - return; - OS << "\t.cfi_lsda " << Encoding << ", " << *Sym; EmitEOL(); } void MCAsmStreamer::EmitCFIRememberState() { MCStreamer::EmitCFIRememberState(); - - if (!UseCFI) - return; - OS << "\t.cfi_remember_state"; EmitEOL(); } void MCAsmStreamer::EmitCFIRestoreState() { MCStreamer::EmitCFIRestoreState(); - - if (!UseCFI) - return; - OS << "\t.cfi_restore_state"; EmitEOL(); } void MCAsmStreamer::EmitCFISameValue(int64_t Register) { MCStreamer::EmitCFISameValue(Register); - - if (!UseCFI) - return; - OS << "\t.cfi_same_value "; EmitRegisterName(Register); EmitEOL(); @@ -1124,10 +1025,6 @@ void MCAsmStreamer::EmitCFISameValue(int64_t Register) { void MCAsmStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) { MCStreamer::EmitCFIRelOffset(Register, Offset); - - if (!UseCFI) - return; - OS << "\t.cfi_rel_offset "; EmitRegisterName(Register); OS << ", " << Offset; @@ -1136,50 +1033,30 @@ void MCAsmStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) { void MCAsmStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) { MCStreamer::EmitCFIAdjustCfaOffset(Adjustment); - - if (!UseCFI) - return; - OS << "\t.cfi_adjust_cfa_offset " << Adjustment; EmitEOL(); } void MCAsmStreamer::EmitCFISignalFrame() { MCStreamer::EmitCFISignalFrame(); - - if (!UseCFI) - return; - OS << "\t.cfi_signal_frame"; EmitEOL(); } void MCAsmStreamer::EmitCFIUndefined(int64_t Register) { MCStreamer::EmitCFIUndefined(Register); - - if (!UseCFI) - return; - OS << "\t.cfi_undefined " << Register; EmitEOL(); } void MCAsmStreamer::EmitCFIRegister(int64_t Register1, int64_t Register2) { MCStreamer::EmitCFIRegister(Register1, Register2); - - if (!UseCFI) - return; - OS << "\t.cfi_register " << Register1 << ", " << Register2; EmitEOL(); } void MCAsmStreamer::EmitCFIWindowSave() { MCStreamer::EmitCFIWindowSave(); - - if (!UseCFI) - return; - OS << "\t.cfi_window_save"; EmitEOL(); } @@ -1257,14 +1134,17 @@ void MCAsmStreamer::EmitWin64EHHandlerData() { void MCAsmStreamer::EmitWin64EHPushReg(unsigned Register) { MCStreamer::EmitWin64EHPushReg(Register); - OS << "\t.seh_pushreg " << Register; + OS << "\t.seh_pushreg "; + EmitRegisterName(Register); EmitEOL(); } void MCAsmStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) { MCStreamer::EmitWin64EHSetFrame(Register, Offset); - OS << "\t.seh_setframe " << Register << ", " << Offset; + OS << "\t.seh_setframe "; + EmitRegisterName(Register); + OS << ", " << Offset; EmitEOL(); } @@ -1278,14 +1158,18 @@ void MCAsmStreamer::EmitWin64EHAllocStack(unsigned Size) { void MCAsmStreamer::EmitWin64EHSaveReg(unsigned Register, unsigned Offset) { MCStreamer::EmitWin64EHSaveReg(Register, Offset); - OS << "\t.seh_savereg " << Register << ", " << Offset; + OS << "\t.seh_savereg "; + EmitRegisterName(Register); + OS << ", " << Offset; EmitEOL(); } void MCAsmStreamer::EmitWin64EHSaveXMM(unsigned Register, unsigned Offset) { MCStreamer::EmitWin64EHSaveXMM(Register, Offset); - OS << "\t.seh_savexmm " << Register << ", " << Offset; + OS << "\t.seh_savexmm "; + EmitRegisterName(Register); + OS << ", " << Offset; EmitEOL(); } @@ -1455,26 +1339,13 @@ void MCAsmStreamer::FinishImpl() { EmitLabel(Label); } } - - if (!UseCFI) - EmitFrames(AsmBackend.get(), false); -} - -MCSymbolData &MCAsmStreamer::getOrCreateSymbolData(const MCSymbol *Symbol) { - MCSymbolData *&Entry = SymbolMap[Symbol]; - - if (!Entry) - Entry = new MCSymbolData(*Symbol, 0, 0, 0); - - return *Entry; } MCStreamer *llvm::createAsmStreamer(MCContext &Context, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useCFI, - bool useDwarfDirectory, MCInstPrinter *IP, - MCCodeEmitter *CE, MCAsmBackend *MAB, - bool ShowInst) { - return new MCAsmStreamer(Context, OS, isVerboseAsm, useCFI, useDwarfDirectory, - IP, CE, MAB, ShowInst); + bool isVerboseAsm, bool useDwarfDirectory, + MCInstPrinter *IP, MCCodeEmitter *CE, + MCAsmBackend *MAB, bool ShowInst) { + return new MCAsmStreamer(Context, OS, isVerboseAsm, useDwarfDirectory, IP, CE, + MAB, ShowInst); } diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 724ca29..886a5f5 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "assembler" #include "llvm/MC/MCAssembler.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" @@ -28,12 +27,11 @@ #include "llvm/Support/LEB128.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Compression.h" -#include "llvm/Support/Host.h" - +#include using namespace llvm; +#define DEBUG_TYPE "assembler" + namespace { namespace stats { STATISTIC(EmittedFragments, "Number of emitted assembler fragments - total"); @@ -119,36 +117,89 @@ uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const { return F->Offset; } -uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const { +// Simple getSymbolOffset helper for the non-varibale case. +static bool getLabelOffset(const MCAsmLayout &Layout, const MCSymbolData &SD, + bool ReportError, uint64_t &Val) { + if (!SD.getFragment()) { + if (ReportError) + report_fatal_error("unable to evaluate offset to undefined symbol '" + + SD.getSymbol().getName() + "'"); + return false; + } + Val = Layout.getFragmentOffset(SD.getFragment()) + SD.getOffset(); + return true; +} + +static bool getSymbolOffsetImpl(const MCAsmLayout &Layout, + const MCSymbolData *SD, bool ReportError, + uint64_t &Val) { const MCSymbol &S = SD->getSymbol(); - // If this is a variable, then recursively evaluate now. - if (S.isVariable()) { - MCValue Target; - if (!S.getVariableValue()->EvaluateAsRelocatable(Target, this)) - report_fatal_error("unable to evaluate offset for variable '" + - S.getName() + "'"); + if (!S.isVariable()) + return getLabelOffset(Layout, *SD, ReportError, Val); - // Verify that any used symbols are defined. - if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined()) - report_fatal_error("unable to evaluate offset to undefined symbol '" + - Target.getSymA()->getSymbol().getName() + "'"); - if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined()) - report_fatal_error("unable to evaluate offset to undefined symbol '" + - Target.getSymB()->getSymbol().getName() + "'"); - - uint64_t Offset = Target.getConstant(); - if (Target.getSymA()) - Offset += getSymbolOffset(&Assembler.getSymbolData( - Target.getSymA()->getSymbol())); - if (Target.getSymB()) - Offset -= getSymbolOffset(&Assembler.getSymbolData( - Target.getSymB()->getSymbol())); - return Offset; + // If SD is a variable, evaluate it. + MCValue Target; + if (!S.getVariableValue()->EvaluateAsValue(Target, &Layout)) + report_fatal_error("unable to evaluate offset for variable '" + + S.getName() + "'"); + + uint64_t Offset = Target.getConstant(); + + const MCAssembler &Asm = Layout.getAssembler(); + + const MCSymbolRefExpr *A = Target.getSymA(); + if (A) { + uint64_t ValA; + if (!getLabelOffset(Layout, Asm.getSymbolData(A->getSymbol()), ReportError, + ValA)) + return false; + Offset += ValA; } - assert(SD->getFragment() && "Invalid getOffset() on undefined symbol!"); - return getFragmentOffset(SD->getFragment()) + SD->getOffset(); + const MCSymbolRefExpr *B = Target.getSymB(); + if (B) { + uint64_t ValB; + if (!getLabelOffset(Layout, Asm.getSymbolData(B->getSymbol()), ReportError, + ValB)) + return false; + Offset -= ValB; + } + + Val = Offset; + return true; +} + +bool MCAsmLayout::getSymbolOffset(const MCSymbolData *SD, uint64_t &Val) const { + return getSymbolOffsetImpl(*this, SD, false, Val); +} + +uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const { + uint64_t Val; + getSymbolOffsetImpl(*this, SD, true, Val); + return Val; +} + +const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const { + if (!Symbol.isVariable()) + return &Symbol; + + const MCExpr *Expr = Symbol.getVariableValue(); + MCValue Value; + if (!Expr->EvaluateAsValue(Value, this)) + llvm_unreachable("Invalid Expression"); + + const MCSymbolRefExpr *RefB = Value.getSymB(); + if (RefB) + Assembler.getContext().FatalError( + SMLoc(), Twine("symbol '") + RefB->getSymbol().getName() + + "' could not be evaluated in a subtraction expression"); + + const MCSymbolRefExpr *A = Value.getSymA(); + if (!A) + return nullptr; + + return &A->getSymbol(); } uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const { @@ -215,7 +266,7 @@ MCFragment::~MCFragment() { } MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent) - : Kind(_Kind), Parent(_Parent), Atom(0), Offset(~UINT64_C(0)) + : Kind(_Kind), Parent(_Parent), Atom(nullptr), Offset(~UINT64_C(0)) { if (Parent) Parent->getFragmentList().push_back(this); @@ -233,40 +284,7 @@ MCEncodedFragmentWithFixups::~MCEncodedFragmentWithFixups() { /* *** */ -const SmallVectorImpl &MCCompressedFragment::getCompressedContents() const { - assert(getParent()->size() == 1 && - "Only compress sections containing a single fragment"); - if (CompressedContents.empty()) { - std::unique_ptr CompressedSection; - zlib::Status Success = - zlib::compress(StringRef(getContents().data(), getContents().size()), - CompressedSection); - (void)Success; - assert(Success == zlib::StatusOK); - CompressedContents.push_back('Z'); - CompressedContents.push_back('L'); - CompressedContents.push_back('I'); - CompressedContents.push_back('B'); - uint64_t Size = getContents().size(); - if (sys::IsLittleEndianHost) - Size = sys::SwapByteOrder(Size); - CompressedContents.append(reinterpret_cast(&Size), - reinterpret_cast(&Size + 1)); - CompressedContents.append(CompressedSection->getBuffer().begin(), - CompressedSection->getBuffer().end()); - } - return CompressedContents; -} - -SmallVectorImpl &MCCompressedFragment::getContents() { - assert(CompressedContents.empty() && - "Fragment contents should not be altered after compression"); - return MCDataFragment::getContents(); -} - -/* *** */ - -MCSectionData::MCSectionData() : Section(0) {} +MCSectionData::MCSectionData() : Section(nullptr) {} MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A) : Section(&_Section), @@ -286,7 +304,7 @@ MCSectionData::getSubsectionInsertionPoint(unsigned Subsection) { SmallVectorImpl >::iterator MI = std::lower_bound(SubsectionFragmentMap.begin(), SubsectionFragmentMap.end(), - std::make_pair(Subsection, (MCFragment *)0)); + std::make_pair(Subsection, (MCFragment *)nullptr)); bool ExactMatch = false; if (MI != SubsectionFragmentMap.end()) { ExactMatch = MI->first == Subsection; @@ -311,13 +329,13 @@ MCSectionData::getSubsectionInsertionPoint(unsigned Subsection) { /* *** */ -MCSymbolData::MCSymbolData() : Symbol(0) {} +MCSymbolData::MCSymbolData() : Symbol(nullptr) {} MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment, uint64_t _Offset, MCAssembler *A) : Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset), IsExternal(false), IsPrivateExtern(false), - CommonSize(0), SymbolSize(0), CommonAlign(0), + CommonSize(0), SymbolSize(nullptr), CommonAlign(0), Flags(0), Index(0) { if (A) @@ -358,6 +376,31 @@ void MCAssembler::reset() { getLOHContainer().reset(); } +bool MCAssembler::isThumbFunc(const MCSymbol *Symbol) const { + if (ThumbFuncs.count(Symbol)) + return true; + + if (!Symbol->isVariable()) + return false; + + // FIXME: It looks like gas supports some cases of the form "foo + 2". It + // is not clear if that is a bug or a feature. + const MCExpr *Expr = Symbol->getVariableValue(); + const MCSymbolRefExpr *Ref = dyn_cast(Expr); + if (!Ref) + return false; + + if (Ref->getKind() != MCSymbolRefExpr::VK_None) + return false; + + const MCSymbol &Sym = Ref->getSymbol(); + if (!isThumbFunc(&Sym)) + return false; + + ThumbFuncs.insert(Symbol); // Cache it. + return true; +} + bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const { // Non-temporary labels should always be visible to the linker. if (!Symbol.isTemporary()) @@ -378,13 +421,13 @@ const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const { // Absolute and undefined symbols have no defining atom. if (!SD->getFragment()) - return 0; + return nullptr; // Non-linker visible symbols in sections which can't be atomized have no // defining atom. if (!getBackend().isSectionAtomizable( SD->getFragment()->getParent()->getSection())) - return 0; + return nullptr; // Otherwise, return the atom for the containing fragment. return SD->getFragment()->getAtom(); @@ -467,8 +510,6 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, case MCFragment::FT_Relaxable: case MCFragment::FT_CompactEncodedInst: return cast(F).getContents().size(); - case MCFragment::FT_Compressed: - return cast(F).getCompressedContents().size(); case MCFragment::FT_Fill: return cast(F).getSize(); @@ -657,11 +698,6 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, break; } - case MCFragment::FT_Compressed: - ++stats::EmittedDataFragments; - OW->WriteBytes(cast(F).getCompressedContents()); - break; - case MCFragment::FT_Data: ++stats::EmittedDataFragments; writeFragmentContents(F, OW); @@ -738,7 +774,6 @@ void MCAssembler::writeSectionData(const MCSectionData *SD, ie = SD->end(); it != ie; ++it) { switch (it->getKind()) { default: llvm_unreachable("Invalid fragment in virtual section!"); - case MCFragment::FT_Compressed: case MCFragment::FT_Data: { // Check that we aren't trying to write a non-zero contents (or fixups) // into a virtual section. This is to support clients which use standard @@ -992,7 +1027,7 @@ bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD) { // remain NULL if none were relaxed. // When a fragment is relaxed, all the fragments following it should get // invalidated because their offset is going to change. - MCFragment *FirstRelaxedFragment = NULL; + MCFragment *FirstRelaxedFragment = nullptr; // Attempt to relax all the fragments in the section. for (MCSectionData::iterator I = SD.begin(), IE = SD.end(); I != IE; ++I) { @@ -1070,8 +1105,6 @@ void MCFragment::dump() { switch (getKind()) { case MCFragment::FT_Align: OS << "MCAlignFragment"; break; case MCFragment::FT_Data: OS << "MCDataFragment"; break; - case MCFragment::FT_Compressed: - OS << "MCCompressedFragment"; break; case MCFragment::FT_CompactEncodedInst: OS << "MCCompactEncodedInstFragment"; break; case MCFragment::FT_Fill: OS << "MCFillFragment"; break; @@ -1098,7 +1131,6 @@ void MCFragment::dump() { << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">"; break; } - case MCFragment::FT_Compressed: case MCFragment::FT_Data: { const MCDataFragment *DF = cast(this); OS << "\n "; diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 73ffdc0..c163268 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -29,19 +29,13 @@ using namespace llvm; -typedef std::pair SectionGroupPair; - -typedef StringMap MachOUniqueMapTy; -typedef std::map ELFUniqueMapTy; -typedef std::map COFFUniqueMapTy; - MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri, const MCObjectFileInfo *mofi, const SourceMgr *mgr, bool DoAutoReset) : SrcMgr(mgr), MAI(mai), MRI(mri), MOFI(mofi), Allocator(), Symbols(Allocator), UsedNames(Allocator), NextUniqueID(0), CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0), DwarfLocSeen(false), - GenDwarfForAssembly(false), GenDwarfFileNumber(0), + GenDwarfForAssembly(false), GenDwarfFileNumber(0), DwarfVersion(4), AllowTemporaryLabels(true), DwarfCompileUnitID(0), AutoReset(DoAutoReset) { @@ -49,12 +43,8 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri, if (EC) CompilationDir.clear(); - MachOUniquingMap = 0; - ELFUniquingMap = 0; - COFFUniquingMap = 0; - SecureLogFile = getenv("AS_SECURE_LOG_FILE"); - SecureLog = 0; + SecureLog = nullptr; SecureLogUsed = false; if (SrcMgr && SrcMgr->getNumBuffers() > 0) @@ -88,13 +78,9 @@ void MCContext::reset() { DwarfCompileUnitID = 0; CurrentDwarfLoc = MCDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0); - // If we have the MachO uniquing map, free it. - delete (MachOUniqueMapTy*)MachOUniquingMap; - delete (ELFUniqueMapTy*)ELFUniquingMap; - delete (COFFUniqueMapTy*)COFFUniquingMap; - MachOUniquingMap = 0; - ELFUniquingMap = 0; - COFFUniquingMap = 0; + MachOUniquingMap.clear(); + ELFUniquingMap.clear(); + COFFUniquingMap.clear(); NextUniqueID = 0; AllowTemporaryLabels = true; @@ -225,11 +211,6 @@ getMachOSection(StringRef Segment, StringRef Section, // may not have the same flags as the requested section, if so this should be // diagnosed by the client as an error. - // Create the map if it doesn't already exist. - if (MachOUniquingMap == 0) - MachOUniquingMap = new MachOUniqueMapTy(); - MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)MachOUniquingMap; - // Form the name to look up. SmallString<64> Name; Name += Segment; @@ -237,7 +218,7 @@ getMachOSection(StringRef Segment, StringRef Section, Name += Section; // Do the lookup, if we have a hit, return it. - const MCSectionMachO *&Entry = Map[Name.str()]; + const MCSectionMachO *&Entry = MachOUniquingMap[Name.str()]; if (Entry) return Entry; // Otherwise, return a new section. @@ -251,42 +232,48 @@ getELFSection(StringRef Section, unsigned Type, unsigned Flags, return getELFSection(Section, Type, Flags, Kind, 0, ""); } +void MCContext::renameELFSection(const MCSectionELF *Section, StringRef Name) { + StringRef GroupName; + if (const MCSymbol *Group = Section->getGroup()) + GroupName = Group->getName(); + + ELFUniquingMap.erase(SectionGroupPair(Section->getSectionName(), GroupName)); + auto I = + ELFUniquingMap.insert(std::make_pair(SectionGroupPair(Name, GroupName), + Section)).first; + StringRef CachedName = I->first.first; + const_cast(Section)->setSectionName(CachedName); +} + const MCSectionELF *MCContext:: getELFSection(StringRef Section, unsigned Type, unsigned Flags, SectionKind Kind, unsigned EntrySize, StringRef Group) { - if (ELFUniquingMap == 0) - ELFUniquingMap = new ELFUniqueMapTy(); - ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap; - - SmallString<32> ZDebugName; - if (MAI->compressDebugSections() && Section.startswith(".debug_") && - Section != ".debug_frame" && Section != ".debug_line") - Section = (".z" + Section.drop_front(1)).toStringRef(ZDebugName); - // Do the lookup, if we have a hit, return it. - std::pair Entry = Map.insert( - std::make_pair(SectionGroupPair(Section, Group), (MCSectionELF *)0)); - if (!Entry.second) return Entry.first->second; + auto IterBool = ELFUniquingMap.insert( + std::make_pair(SectionGroupPair(Section, Group), nullptr)); + auto &Entry = *IterBool.first; + if (!IterBool.second) return Entry.second; // Possibly refine the entry size first. if (!EntrySize) { EntrySize = MCSectionELF::DetermineEntrySize(Kind); } - MCSymbol *GroupSym = NULL; + MCSymbol *GroupSym = nullptr; if (!Group.empty()) GroupSym = GetOrCreateSymbol(Group); - MCSectionELF *Result = new (*this) MCSectionELF( - Entry.first->first.first, Type, Flags, Kind, EntrySize, GroupSym); - Entry.first->second = Result; + StringRef CachedName = Entry.first.first; + MCSectionELF *Result = new (*this) + MCSectionELF(CachedName, Type, Flags, Kind, EntrySize, GroupSym); + Entry.second = Result; return Result; } const MCSectionELF *MCContext::CreateELFGroupSection() { MCSectionELF *Result = new (*this) MCSectionELF(".group", ELF::SHT_GROUP, 0, - SectionKind::getReadOnly(), 4, NULL); + SectionKind::getReadOnly(), 4, nullptr); return Result; } @@ -294,26 +281,21 @@ const MCSectionCOFF * MCContext::getCOFFSection(StringRef Section, unsigned Characteristics, SectionKind Kind, StringRef COMDATSymName, int Selection, const MCSectionCOFF *Assoc) { - if (COFFUniquingMap == 0) - COFFUniquingMap = new COFFUniqueMapTy(); - COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap; - // Do the lookup, if we have a hit, return it. SectionGroupPair P(Section, COMDATSymName); - std::pair Entry = - Map.insert(std::make_pair(P, (MCSectionCOFF *)0)); - COFFUniqueMapTy::iterator Iter = Entry.first; - if (!Entry.second) + auto IterBool = COFFUniquingMap.insert(std::make_pair(P, nullptr)); + auto Iter = IterBool.first; + if (!IterBool.second) return Iter->second; - const MCSymbol *COMDATSymbol = NULL; + const MCSymbol *COMDATSymbol = nullptr; if (!COMDATSymName.empty()) COMDATSymbol = GetOrCreateSymbol(COMDATSymName); - MCSectionCOFF *Result = - new (*this) MCSectionCOFF(Iter->first.first, Characteristics, - COMDATSymbol, Selection, Assoc, Kind); + StringRef CachedName = Iter->first.first; + MCSectionCOFF *Result = new (*this) MCSectionCOFF( + CachedName, Characteristics, COMDATSymbol, Selection, Assoc, Kind); Iter->second = Result; return Result; @@ -326,14 +308,10 @@ MCContext::getCOFFSection(StringRef Section, unsigned Characteristics, } const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) { - if (COFFUniquingMap == 0) - COFFUniquingMap = new COFFUniqueMapTy(); - COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap; - SectionGroupPair P(Section, ""); - COFFUniqueMapTy::iterator Iter = Map.find(P); - if (Iter == Map.end()) - return 0; + auto Iter = COFFUniquingMap.find(P); + if (Iter == COFFUniquingMap.end()) + return nullptr; return Iter->second; } @@ -361,7 +339,7 @@ bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) { return !MCDwarfFiles[FileNumber].Name.empty(); } -void MCContext::FatalError(SMLoc Loc, const Twine &Msg) { +void MCContext::FatalError(SMLoc Loc, const Twine &Msg) const { // If we have a source manager and a location, use it. Otherwise just // use the generic report_fatal_error(). if (!SrcMgr || Loc == SMLoc()) diff --git a/lib/MC/MCDisassembler.cpp b/lib/MC/MCDisassembler.cpp index 7a2b1a1..77d9ce1 100644 --- a/lib/MC/MCDisassembler.cpp +++ b/lib/MC/MCDisassembler.cpp @@ -16,20 +16,6 @@ using namespace llvm; MCDisassembler::~MCDisassembler() { } -void MCDisassembler::setupForSymbolicDisassembly( - LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp, - void *DisInfo, MCContext *Ctx, std::unique_ptr &RelInfo) { - this->GetOpInfo = GetOpInfo; - this->SymbolLookUp = SymbolLookUp; - this->DisInfo = DisInfo; - this->Ctx = Ctx; - assert(Ctx != 0 && "No MCContext given for symbolic disassembly"); - if (!Symbolizer) - Symbolizer.reset(new MCExternalSymbolizer(*Ctx, std::move(RelInfo), - GetOpInfo, SymbolLookUp, - DisInfo)); -} - bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index b935b83..0530c26 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -41,20 +41,20 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, std::string Error; const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error); if (!TheTarget) - return 0; + return nullptr; const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple); if (!MRI) - return 0; + return nullptr; // Get the assembler info needed to setup the MCContext. const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, Triple); if (!MAI) - return 0; + return nullptr; const MCInstrInfo *MII = TheTarget->createMCInstrInfo(); if (!MII) - return 0; + return nullptr; // Package up features to be passed to target/subtarget std::string FeaturesStr; @@ -62,41 +62,40 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(Triple, CPU, FeaturesStr); if (!STI) - return 0; + return nullptr; // Set up the MCContext for creating symbols and MCExpr's. - MCContext *Ctx = new MCContext(MAI, MRI, 0); + MCContext *Ctx = new MCContext(MAI, MRI, nullptr); if (!Ctx) - return 0; + return nullptr; // Set up disassembler. - MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI); + MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI, *Ctx); if (!DisAsm) - return 0; + return nullptr; std::unique_ptr RelInfo( TheTarget->createMCRelocationInfo(Triple, *Ctx)); if (!RelInfo) - return 0; + return nullptr; std::unique_ptr Symbolizer(TheTarget->createMCSymbolizer( Triple, GetOpInfo, SymbolLookUp, DisInfo, Ctx, RelInfo.release())); DisAsm->setSymbolizer(std::move(Symbolizer)); - DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo, - Ctx, RelInfo); + // Set up the instruction printer. int AsmPrinterVariant = MAI->getAssemblerDialect(); MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant, *MAI, *MII, *MRI, *STI); if (!IP) - return 0; + return nullptr; LLVMDisasmContext *DC = new LLVMDisasmContext(Triple, DisInfo, TagType, GetOpInfo, SymbolLookUp, TheTarget, MAI, MRI, STI, MII, Ctx, DisAsm, IP); if (!DC) - return 0; + return nullptr; DC->setCPU(CPU); return DC; diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 72836ff..be6731a 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -9,6 +9,7 @@ #include "llvm/MC/MCDwarf.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/Config/config.h" @@ -16,8 +17,8 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -62,7 +63,7 @@ static inline uint64_t ScaleAddrDelta(MCContext &Context, uint64_t AddrDelta) { // and if there is information from the last .loc directive that has yet to have // a line entry made for it is made. // -void MCLineEntry::Make(MCStreamer *MCOS, const MCSection *Section) { +void MCLineEntry::Make(MCObjectStreamer *MCOS, const MCSection *Section) { if (!MCOS->getContext().getDwarfLocSeen()) return; @@ -113,7 +114,7 @@ static inline const MCExpr *MakeStartMinusEndExpr(const MCStreamer &MCOS, // in the LineSection. // static inline void -EmitDwarfLineTable(MCStreamer *MCOS, const MCSection *Section, +EmitDwarfLineTable(MCObjectStreamer *MCOS, const MCSection *Section, const MCLineSection::MCLineEntryCollection &LineEntries) { unsigned FileNum = 1; unsigned LastLine = 1; @@ -121,7 +122,7 @@ EmitDwarfLineTable(MCStreamer *MCOS, const MCSection *Section, unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0; unsigned Isa = 0; unsigned Discriminator = 0; - MCSymbol *LastLabel = NULL; + MCSymbol *LastLabel = nullptr; // Loop through each MCLineEntry and encode the dwarf line number table. for (auto it = LineEntries.begin(), @@ -204,7 +205,7 @@ EmitDwarfLineTable(MCStreamer *MCOS, const MCSection *Section, // // This emits the Dwarf file and the line tables. // -void MCDwarfLineTable::Emit(MCStreamer *MCOS) { +void MCDwarfLineTable::Emit(MCObjectStreamer *MCOS) { MCContext &context = MCOS->getContext(); auto &LineTables = context.getMCDwarfLineTables(); @@ -318,7 +319,7 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, return std::make_pair(LineStartSym, LineEndSym); } -void MCDwarfLineTable::EmitCU(MCStreamer *MCOS) const { +void MCDwarfLineTable::EmitCU(MCObjectStreamer *MCOS) const { MCSymbol *LineEndSym = Header.Emit(MCOS).second; // Put out the line tables. @@ -644,8 +645,8 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, const MCExpr *Length = MakeStartMinusEndExpr(*MCOS, *InfoStart, *InfoEnd, 4); MCOS->EmitAbsValue(Length, 4); - // The 2 byte DWARF version, which is 2. - MCOS->EmitIntValue(2, 2); + // The 2 byte DWARF version. + MCOS->EmitIntValue(context.getDwarfVersion(), 2); // The 4 byte offset to the debug abbrevs from the start of the .debug_abbrev, // it is at the start of that section so this is zero. @@ -688,7 +689,7 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, const SmallVectorImpl &MCDwarfDirs = context.getMCDwarfDirs(); if (MCDwarfDirs.size() > 0) { MCOS->EmitBytes(MCDwarfDirs[0]); - MCOS->EmitBytes("/"); + MCOS->EmitBytes(sys::path::get_separator()); } const SmallVectorImpl &MCDwarfFiles = MCOS->getContext().getMCDwarfFiles(); @@ -727,28 +728,24 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, // Third part: the list of label DIEs. // Loop on saved info for dwarf labels and create the DIEs for them. - const std::vector &Entries = - MCOS->getContext().getMCGenDwarfLabelEntries(); - for (std::vector::const_iterator it = - Entries.begin(), ie = Entries.end(); it != ie; - ++it) { - const MCGenDwarfLabelEntry *Entry = *it; - + const std::vector &Entries = + MCOS->getContext().getMCGenDwarfLabelEntries(); + for (const auto &Entry : Entries) { // The DW_TAG_label DIE abbrev (2). MCOS->EmitULEB128IntValue(2); // AT_name, of the label without any leading underbar. - MCOS->EmitBytes(Entry->getName()); + MCOS->EmitBytes(Entry.getName()); MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. // AT_decl_file, index into the file table. - MCOS->EmitIntValue(Entry->getFileNumber(), 4); + MCOS->EmitIntValue(Entry.getFileNumber(), 4); // AT_decl_line, source line number. - MCOS->EmitIntValue(Entry->getLineNumber(), 4); + MCOS->EmitIntValue(Entry.getLineNumber(), 4); // AT_low_pc, start address of the label. - const MCExpr *AT_low_pc = MCSymbolRefExpr::Create(Entry->getLabel(), + const MCExpr *AT_low_pc = MCSymbolRefExpr::Create(Entry.getLabel(), MCSymbolRefExpr::VK_None, context); MCOS->EmitValue(AT_low_pc, AddrSize); @@ -761,14 +758,6 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, // Add the NULL DIE terminating the DW_TAG_unspecified_parameters DIE's. MCOS->EmitIntValue(0, 1); } - // Deallocate the MCGenDwarfLabelEntry classes that saved away the info - // for the dwarf labels. - for (std::vector::const_iterator it = - Entries.begin(), ie = Entries.end(); it != ie; - ++it) { - const MCGenDwarfLabelEntry *Entry = *it; - delete Entry; - } // Add the NULL DIE terminating the Compile Unit DIE's. MCOS->EmitIntValue(0, 1); @@ -790,8 +779,8 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) { MCSymbol *LineSectionSymbol = nullptr; if (CreateDwarfSectionSymbols) LineSectionSymbol = MCOS->getDwarfLineTableSymbol(0); - MCSymbol *AbbrevSectionSymbol = NULL; - MCSymbol *InfoSectionSymbol = NULL; + MCSymbol *AbbrevSectionSymbol = nullptr; + MCSymbol *InfoSectionSymbol = nullptr; MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection()); if (CreateDwarfSectionSymbols) { InfoSectionSymbol = context.CreateTempSymbol(); @@ -856,9 +845,8 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS, MCOS->EmitLabel(Label); // Create and entry for the info and add it to the other entries. - MCGenDwarfLabelEntry *Entry = - new MCGenDwarfLabelEntry(Name, FileNumber, LineNumber, Label); - MCOS->getContext().addMCGenDwarfLabelEntry(Entry); + MCOS->getContext().addMCGenDwarfLabelEntry( + MCGenDwarfLabelEntry(Name, FileNumber, LineNumber, Label)); } static int getDataAlignmentFactor(MCStreamer &streamer) { @@ -894,7 +882,7 @@ static unsigned getSizeForEncoding(MCStreamer &streamer, static void EmitFDESymbol(MCStreamer &streamer, const MCSymbol &symbol, unsigned symbolEncoding, bool isEH, - const char *comment = 0) { + const char *comment = nullptr) { MCContext &context = streamer.getContext(); const MCAsmInfo *asmInfo = context.getAsmInfo(); const MCExpr *v = asmInfo->getExprForFDESymbol(&symbol, @@ -923,13 +911,11 @@ namespace { class FrameEmitterImpl { int CFAOffset; int CIENum; - bool UsingCFI; bool IsEH; const MCSymbol *SectionStart; public: - FrameEmitterImpl(bool usingCFI, bool isEH) - : CFAOffset(0), CIENum(0), UsingCFI(usingCFI), IsEH(isEH), - SectionStart(0) {} + FrameEmitterImpl(bool isEH) + : CFAOffset(0), CIENum(0), IsEH(isEH), SectionStart(nullptr) {} void setSectionStart(const MCSymbol *Label) { SectionStart = Label; } @@ -937,20 +923,20 @@ namespace { void EmitCompactUnwind(MCStreamer &streamer, const MCDwarfFrameInfo &frame); - const MCSymbol &EmitCIE(MCStreamer &streamer, + const MCSymbol &EmitCIE(MCObjectStreamer &streamer, const MCSymbol *personality, unsigned personalityEncoding, const MCSymbol *lsda, bool IsSignalFrame, unsigned lsdaEncoding, bool IsSimple); - MCSymbol *EmitFDE(MCStreamer &streamer, + MCSymbol *EmitFDE(MCObjectStreamer &streamer, const MCSymbol &cieStart, const MCDwarfFrameInfo &frame); - void EmitCFIInstructions(MCStreamer &streamer, + void EmitCFIInstructions(MCObjectStreamer &streamer, ArrayRef Instrs, MCSymbol *BaseLabel); - void EmitCFIInstruction(MCStreamer &Streamer, + void EmitCFIInstruction(MCObjectStreamer &Streamer, const MCCFIInstruction &Instr); }; @@ -1001,7 +987,7 @@ static void EmitEncodingByte(MCStreamer &Streamer, unsigned Encoding, Streamer.EmitIntValue(Encoding, 1); } -void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer, +void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer, const MCCFIInstruction &Instr) { int dataAlignmentFactor = getDataAlignmentFactor(Streamer); bool VerboseAsm = Streamer.isVerboseAsm(); @@ -1153,7 +1139,7 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer, /// EmitFrameMoves - Emit frame instructions to describe the layout of the /// frame. -void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer, +void FrameEmitterImpl::EmitCFIInstructions(MCObjectStreamer &streamer, ArrayRef Instrs, MCSymbol *BaseLabel) { for (unsigned i = 0, N = Instrs.size(); i < N; ++i) { @@ -1214,7 +1200,7 @@ void FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer, Encoding |= 0x40000000; // Range Start - unsigned FDEEncoding = MOFI->getFDEEncoding(UsingCFI); + unsigned FDEEncoding = MOFI->getFDEEncoding(); unsigned Size = getSizeForEncoding(Streamer, FDEEncoding); if (VerboseAsm) Streamer.AddComment("Range Start"); Streamer.EmitSymbolValue(Frame.Function, Size); @@ -1248,7 +1234,7 @@ void FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer, Streamer.EmitIntValue(0, Size); // No LSDA } -const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, +const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer, const MCSymbol *personality, unsigned personalityEncoding, const MCSymbol *lsda, @@ -1346,8 +1332,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, EmitEncodingByte(streamer, lsdaEncoding, "LSDA Encoding"); // Encoding of the FDE pointers - EmitEncodingByte(streamer, MOFI->getFDEEncoding(UsingCFI), - "FDE Encoding"); + EmitEncodingByte(streamer, MOFI->getFDEEncoding(), "FDE Encoding"); } // Initial Instructions @@ -1356,7 +1341,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, if (!IsSimple) { const std::vector &Instructions = MAI->getInitialFrameState(); - EmitCFIInstructions(streamer, Instructions, NULL); + EmitCFIInstructions(streamer, Instructions, nullptr); } // Padding @@ -1366,7 +1351,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, return *sectionStart; } -MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, +MCSymbol *FrameEmitterImpl::EmitFDE(MCObjectStreamer &streamer, const MCSymbol &cieStart, const MCDwarfFrameInfo &frame) { MCContext &context = streamer.getContext(); @@ -1405,8 +1390,8 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, } // PC Begin - unsigned PCEncoding = IsEH ? MOFI->getFDEEncoding(UsingCFI) - : (unsigned)dwarf::DW_EH_PE_absptr; + unsigned PCEncoding = + IsEH ? MOFI->getFDEEncoding() : (unsigned)dwarf::DW_EH_PE_absptr; unsigned PCSize = getSizeForEncoding(streamer, PCEncoding); EmitFDESymbol(streamer, *frame.Begin, PCEncoding, IsEH, "FDE initial location"); @@ -1443,8 +1428,12 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, namespace { struct CIEKey { - static const CIEKey getEmptyKey() { return CIEKey(0, 0, -1, false, false); } - static const CIEKey getTombstoneKey() { return CIEKey(0, -1, 0, false, false); } + static const CIEKey getEmptyKey() { + return CIEKey(nullptr, 0, -1, false, false); + } + static const CIEKey getTombstoneKey() { + return CIEKey(nullptr, -1, 0, false, false); + } CIEKey(const MCSymbol* Personality_, unsigned PersonalityEncoding_, unsigned LsdaEncoding_, bool IsSignalFrame_, bool IsSimple_) : @@ -1487,13 +1476,13 @@ namespace llvm { }; } -void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, MCAsmBackend *MAB, - bool UsingCFI, bool IsEH) { +void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB, + bool IsEH) { Streamer.generateCompactUnwindEncodings(MAB); MCContext &Context = Streamer.getContext(); const MCObjectFileInfo *MOFI = Context.getObjectFileInfo(); - FrameEmitterImpl Emitter(UsingCFI, IsEH); + FrameEmitterImpl Emitter(IsEH); ArrayRef FrameArray = Streamer.getFrameInfos(); // Emit the compact unwind info if available. @@ -1526,10 +1515,10 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, MCAsmBackend *MAB, Streamer.EmitLabel(SectionStart); Emitter.setSectionStart(SectionStart); - MCSymbol *FDEEnd = NULL; + MCSymbol *FDEEnd = nullptr; DenseMap CIEStarts; - const MCSymbol *DummyDebugKey = NULL; + const MCSymbol *DummyDebugKey = nullptr; NeedsEHFrameSection = !MOFI->getSupportsCompactUnwindWithoutEHFrame(); for (unsigned i = 0, n = FrameArray.size(); i < n; ++i) { const MCDwarfFrameInfo &Frame = FrameArray[i]; @@ -1537,7 +1526,7 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, MCAsmBackend *MAB, // Emit the label from the previous iteration if (FDEEnd) { Streamer.EmitLabel(FDEEnd); - FDEEnd = NULL; + FDEEnd = nullptr; } if (!NeedsEHFrameSection && Frame.CompactUnwindEncoding != @@ -1564,7 +1553,7 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, MCAsmBackend *MAB, Streamer.EmitLabel(FDEEnd); } -void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer, +void MCDwarfFrameEmitter::EmitAdvanceLoc(MCObjectStreamer &Streamer, uint64_t AddrDelta) { MCContext &Context = Streamer.getContext(); SmallString<256> Tmp; diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index f710c3e..767348c 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -275,11 +275,12 @@ void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, EmitCommonSymbol(Symbol, Size, ByteAlignment); } -void MCELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size) { +void MCELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, + const SMLoc &Loc) { if (getCurrentSectionData()->isBundleLocked()) report_fatal_error("Emitting values inside a locked bundle is forbidden"); fixSymbolsInTLSFixups(Value); - MCObjectStreamer::EmitValueImpl(Value, Size); + MCObjectStreamer::EmitValueImpl(Value, Size, Loc); } void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment, @@ -537,7 +538,7 @@ void MCELFStreamer::Flush() { } void MCELFStreamer::FinishImpl() { - EmitFrames(NULL, true); + EmitFrames(nullptr); Flush(); @@ -559,10 +560,6 @@ void MCELFStreamer::EmitThumbFunc(MCSymbol *Func) { llvm_unreachable("Generic ELF doesn't support this directive"); } -MCSymbolData &MCELFStreamer::getOrCreateSymbolData(const MCSymbol *Symbol) { - return getAssembler().getOrCreateSymbolData(*Symbol); -} - void MCELFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { llvm_unreachable("ELF doesn't support this directive"); } diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 7f2c478..f724716 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mcexpr" #include "llvm/MC/MCExpr.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" @@ -23,6 +22,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "mcexpr" + namespace { namespace stats { STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations"); @@ -270,6 +271,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_Mips_GOT_LO16: return "GOT_LO16"; case VK_Mips_CALL_HI16: return "CALL_HI16"; case VK_Mips_CALL_LO16: return "CALL_LO16"; + case VK_Mips_PCREL_HI16: return "PCREL_HI16"; + case VK_Mips_PCREL_LO16: return "PCREL_LO16"; case VK_COFF_IMGREL32: return "IMGREL32"; } llvm_unreachable("Invalid variant kind"); @@ -284,6 +287,8 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) { .Case("gotoff", VK_GOTOFF) .Case("GOTPCREL", VK_GOTPCREL) .Case("gotpcrel", VK_GOTPCREL) + .Case("GOT_PREL", VK_GOTPCREL) + .Case("got_prel", VK_GOTPCREL) .Case("GOTTPOFF", VK_GOTTPOFF) .Case("gottpoff", VK_GOTTPOFF) .Case("INDNTPOFF", VK_INDNTPOFF) @@ -444,12 +449,12 @@ void MCTargetExpr::anchor() {} /* *** */ bool MCExpr::EvaluateAsAbsolute(int64_t &Res) const { - return EvaluateAsAbsolute(Res, 0, 0, 0); + return EvaluateAsAbsolute(Res, nullptr, nullptr, nullptr); } bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout) const { - return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, 0); + return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr); } bool MCExpr::EvaluateAsAbsolute(int64_t &Res, @@ -459,7 +464,7 @@ bool MCExpr::EvaluateAsAbsolute(int64_t &Res, } bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const { - return EvaluateAsAbsolute(Res, &Asm, 0, 0); + return EvaluateAsAbsolute(Res, &Asm, nullptr, nullptr); } bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, @@ -477,7 +482,8 @@ bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm, // absolutize differences across sections and that is what the MachO writer // uses Addrs for. bool IsRelocatable = - EvaluateAsRelocatableImpl(Value, Asm, Layout, Addrs, /*InSet*/ Addrs); + EvaluateAsRelocatableImpl(Value, Asm, Layout, Addrs, /*InSet*/ Addrs, + /*ForceVarExpansion*/ false); // Record the current value. Res = Value.getConstant(); @@ -505,8 +511,8 @@ static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm, if (!Asm->getWriter().IsSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet)) return; - MCSymbolData &AD = Asm->getSymbolData(SA); - MCSymbolData &BD = Asm->getSymbolData(SB); + const MCSymbolData &AD = Asm->getSymbolData(SA); + const MCSymbolData &BD = Asm->getSymbolData(SB); if (AD.getFragment() == BD.getFragment()) { Addend += (AD.getOffset() - BD.getOffset()); @@ -518,7 +524,7 @@ static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm, // Clear the symbol expr pointers to indicate we have folded these // operands. - A = B = 0; + A = B = nullptr; return; } @@ -544,7 +550,7 @@ static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm, // Clear the symbol expr pointers to indicate we have folded these // operands. - A = B = 0; + A = B = nullptr; } /// \brief Evaluate the result of an add between (conceptually) two MCValues. @@ -627,15 +633,21 @@ static bool EvaluateSymbolicAdd(const MCAssembler *Asm, bool MCExpr::EvaluateAsRelocatable(MCValue &Res, const MCAsmLayout *Layout) const { - MCAssembler *Assembler = Layout ? &Layout->getAssembler() : 0; - return EvaluateAsRelocatableImpl(Res, Assembler, Layout, 0, false); + MCAssembler *Assembler = Layout ? &Layout->getAssembler() : nullptr; + return EvaluateAsRelocatableImpl(Res, Assembler, Layout, nullptr, false, + /*ForceVarExpansion*/ false); } -bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, - const MCAssembler *Asm, +bool MCExpr::EvaluateAsValue(MCValue &Res, const MCAsmLayout *Layout) const { + MCAssembler *Assembler = Layout ? &Layout->getAssembler() : nullptr; + return EvaluateAsRelocatableImpl(Res, Assembler, Layout, nullptr, false, + /*ForceVarExpansion*/ true); +} + +bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm, const MCAsmLayout *Layout, - const SectionAddrMap *Addrs, - bool InSet) const { + const SectionAddrMap *Addrs, bool InSet, + bool ForceVarExpansion) const { ++stats::MCExprEvaluate; switch (getKind()) { @@ -652,9 +664,9 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAsmInfo &MCAsmInfo = SRE->getMCAsmInfo(); // Evaluate recursively if this is a variable. - if (Sym.isVariable()) { - if (Sym.getVariableValue()->EvaluateAsRelocatableImpl(Res, Asm, Layout, - Addrs, true)) { + if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None) { + if (Sym.getVariableValue()->EvaluateAsRelocatableImpl( + Res, Asm, Layout, Addrs, true, ForceVarExpansion)) { const MCSymbolRefExpr *A = Res.getSymA(); const MCSymbolRefExpr *B = Res.getSymB(); @@ -668,15 +680,16 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, if (!A && !B) return true; } else { + if (ForceVarExpansion) + return true; bool IsSymbol = A && A->getSymbol().isDefined(); - bool IsWeakRef = SRE->getKind() == MCSymbolRefExpr::VK_WEAKREF; - if (!IsSymbol && !IsWeakRef) + if (!IsSymbol) return true; } } } - Res = MCValue::get(SRE, 0, 0); + Res = MCValue::get(SRE, nullptr, 0); return true; } @@ -684,8 +697,8 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCUnaryExpr *AUE = cast(this); MCValue Value; - if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout, - Addrs, InSet)) + if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout, Addrs, + InSet, ForceVarExpansion)) return false; switch (AUE->getOpcode()) { @@ -718,10 +731,10 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCBinaryExpr *ABE = cast(this); MCValue LHSValue, RHSValue; - if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout, - Addrs, InSet) || - !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout, - Addrs, InSet)) + if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout, Addrs, + InSet, ForceVarExpansion) || + !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout, Addrs, + InSet, ForceVarExpansion)) return false; // We only support a few operations on non-constant expressions, handle @@ -795,7 +808,7 @@ const MCSection *MCExpr::FindAssociatedSection() const { if (Sym.isDefined()) return &Sym.getSection(); - return 0; + return nullptr; } case Unary: diff --git a/lib/MC/MCExternalSymbolizer.cpp b/lib/MC/MCExternalSymbolizer.cpp index 839516e..7c3073a 100644 --- a/lib/MC/MCExternalSymbolizer.cpp +++ b/lib/MC/MCExternalSymbolizer.cpp @@ -83,7 +83,7 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI, return false; } - const MCExpr *Add = NULL; + const MCExpr *Add = nullptr; if (SymbolicOp.AddSymbol.Present) { if (SymbolicOp.AddSymbol.Name) { StringRef Name(SymbolicOp.AddSymbol.Name); @@ -94,7 +94,7 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI, } } - const MCExpr *Sub = NULL; + const MCExpr *Sub = nullptr; if (SymbolicOp.SubtractSymbol.Present) { if (SymbolicOp.SubtractSymbol.Name) { StringRef Name(SymbolicOp.SubtractSymbol.Name); @@ -105,7 +105,7 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI, } } - const MCExpr *Off = NULL; + const MCExpr *Off = nullptr; if (SymbolicOp.Value != 0) Off = MCConstantExpr::Create(SymbolicOp.Value, Ctx); @@ -116,17 +116,17 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI, LHS = MCBinaryExpr::CreateSub(Add, Sub, Ctx); else LHS = MCUnaryExpr::CreateMinus(Sub, Ctx); - if (Off != 0) + if (Off) Expr = MCBinaryExpr::CreateAdd(LHS, Off, Ctx); else Expr = LHS; } else if (Add) { - if (Off != 0) + if (Off) Expr = MCBinaryExpr::CreateAdd(Add, Off, Ctx); else Expr = Add; } else { - if (Off != 0) + if (Off) Expr = Off; else Expr = MCConstantExpr::Create(0, Ctx); @@ -189,7 +189,7 @@ MCSymbolizer *createMCSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo, void *DisInfo, MCContext *Ctx, MCRelocationInfo *RelInfo) { - assert(Ctx != 0 && "No MCContext given for symbolic disassembly"); + assert(Ctx && "No MCContext given for symbolic disassembly"); return new MCExternalSymbolizer(*Ctx, std::unique_ptr(RelInfo), diff --git a/lib/MC/MCFixup.cpp b/lib/MC/MCFixup.cpp deleted file mode 100644 index 8f15db5..0000000 --- a/lib/MC/MCFixup.cpp +++ /dev/null @@ -1,37 +0,0 @@ -//===- MCFixup.cpp - Assembly Fixup Implementation ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCFixup.h" -using namespace llvm; - -static MCSymbolRefExpr::VariantKind getAccessVariant(const MCExpr *Expr) { - switch (Expr->getKind()) { - case MCExpr::Unary: - case MCExpr::Target: - llvm_unreachable("unsupported"); - - case MCExpr::Constant: - return MCSymbolRefExpr::VK_None; - - case MCExpr::SymbolRef: { - const MCSymbolRefExpr *SRE = cast(Expr); - return SRE->getKind(); - } - case MCExpr::Binary: { - const MCBinaryExpr *ABE = cast(Expr); - assert(getAccessVariant(ABE->getRHS()) == MCSymbolRefExpr::VK_None); - return getAccessVariant(ABE->getLHS()); - } - } - llvm_unreachable("unknown MCExpr kind"); -} - -MCSymbolRefExpr::VariantKind MCFixup::getAccessVariant() const { - return ::getAccessVariant(getValue()); -} diff --git a/lib/MC/MCFunction.cpp b/lib/MC/MCFunction.cpp index 767e1e0..1ddc250 100644 --- a/lib/MC/MCFunction.cpp +++ b/lib/MC/MCFunction.cpp @@ -20,22 +20,17 @@ MCFunction::MCFunction(StringRef Name, MCModule *Parent) : Name(Name), ParentModule(Parent) {} -MCFunction::~MCFunction() { - for (iterator I = begin(), E = end(); I != E; ++I) - delete *I; -} - MCBasicBlock &MCFunction::createBlock(const MCTextAtom &TA) { - MCBasicBlock *MCBB = new MCBasicBlock(TA, this); - Blocks.push_back(MCBB); - return *MCBB; + std::unique_ptr MCBB(new MCBasicBlock(TA, this)); + Blocks.push_back(std::move(MCBB)); + return *Blocks.back(); } MCBasicBlock *MCFunction::find(uint64_t StartAddr) { for (const_iterator I = begin(), E = end(); I != E; ++I) if ((*I)->getInsts()->getBeginAddr() == StartAddr) - return *I; - return 0; + return I->get(); + return nullptr; } const MCBasicBlock *MCFunction::find(uint64_t StartAddr) const { diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp index 124cc14..d7b80f5 100644 --- a/lib/MC/MCInst.cpp +++ b/lib/MC/MCInst.cpp @@ -34,7 +34,7 @@ void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MCOperand::dump() const { - print(dbgs(), 0); + print(dbgs(), nullptr); dbgs() << "\n"; } #endif @@ -66,7 +66,7 @@ void MCInst::dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI, #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MCInst::dump() const { - print(dbgs(), 0); + print(dbgs(), nullptr); dbgs() << "\n"; } #endif diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 7e437f4..37d05e9 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -89,7 +89,7 @@ public: } void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override; - void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = nullptr, uint64_t Size = 0, unsigned ByteAlignment = 0) override; virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment = 0) override; @@ -172,7 +172,7 @@ void MCMachOStreamer::EmitDataRegion(DataRegionData::KindTy Kind) { MCSymbol *Start = getContext().CreateTempSymbol(); EmitLabel(Start); // Record the region for the object writer to use. - DataRegionData Data = { Kind, Start, NULL }; + DataRegionData Data = { Kind, Start, nullptr }; std::vector &Regions = getAssembler().getDataRegions(); Regions.push_back(Data); } @@ -183,7 +183,7 @@ void MCMachOStreamer::EmitDataRegionEnd() { std::vector &Regions = getAssembler().getDataRegions(); assert(Regions.size() && "Mismatched .end_data_region!"); DataRegionData &Data = Regions.back(); - assert(Data.End == NULL && "Mismatched .end_data_region!"); + assert(!Data.End && "Mismatched .end_data_region!"); // Create a temporary label to mark the end of the data region. Data.End = getContext().CreateTempSymbol(); EmitLabel(Data.End); @@ -237,10 +237,6 @@ void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) { // Remember that the function is a thumb function. Fixup and relocation // values will need adjusted. getAssembler().setIsThumbFunc(Symbol); - - // Mark the thumb bit on the symbol. - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - SD.setFlags(SD.getFlags() | SF_ThumbFunc); } bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, @@ -352,7 +348,7 @@ void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, // FIXME: Darwin 'as' does appear to allow redef of a .comm by itself. assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - AssignSection(Symbol, NULL); + AssignSection(Symbol, nullptr); MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); SD.setExternal(true); @@ -422,7 +418,7 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst, } void MCMachOStreamer::FinishImpl() { - EmitFrames(&getAssembler().getBackend(), true); + EmitFrames(&getAssembler().getBackend()); // We have to set the fragment atom associations so we can relax properly for // Mach-O. @@ -430,13 +426,12 @@ void MCMachOStreamer::FinishImpl() { // First, scan the symbol table to build a lookup table from fragments to // defining symbols. DenseMap DefiningSymbolMap; - for (MCAssembler::symbol_iterator it = getAssembler().symbol_begin(), - ie = getAssembler().symbol_end(); it != ie; ++it) { - if (getAssembler().isSymbolLinkerVisible(it->getSymbol()) && - it->getFragment()) { + for (MCSymbolData &SD : getAssembler().symbols()) { + if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()) && + SD.getFragment()) { // An atom defining symbol should never be internal to a fragment. - assert(it->getOffset() == 0 && "Invalid offset in atom defining symbol!"); - DefiningSymbolMap[it->getFragment()] = it; + assert(SD.getOffset() == 0 && "Invalid offset in atom defining symbol!"); + DefiningSymbolMap[SD.getFragment()] = &SD; } } @@ -444,7 +439,7 @@ void MCMachOStreamer::FinishImpl() { // symbol. for (MCAssembler::iterator it = getAssembler().begin(), ie = getAssembler().end(); it != ie; ++it) { - MCSymbolData *CurrentAtom = 0; + MCSymbolData *CurrentAtom = nullptr; for (MCSectionData::iterator it2 = it->begin(), ie2 = it->end(); it2 != ie2; ++it2) { if (MCSymbolData *SD = DefiningSymbolMap.lookup(it2)) diff --git a/lib/MC/MCModule.cpp b/lib/MC/MCModule.cpp index 7e9e18a..3ed7356 100644 --- a/lib/MC/MCModule.cpp +++ b/lib/MC/MCModule.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCModule.h" #include "llvm/MC/MCAtom.h" #include "llvm/MC/MCFunction.h" @@ -77,7 +78,7 @@ const MCAtom *MCModule::findAtomContaining(uint64_t Addr) const { Addr, AtomComp); if (I != atom_end() && (*I)->getBeginAddr() <= Addr) return *I; - return 0; + return nullptr; } MCAtom *MCModule::findAtomContaining(uint64_t Addr) { @@ -90,7 +91,7 @@ const MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) const { Addr, AtomCompInv); if (I != atom_end()) return *I; - return 0; + return nullptr; } MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) { @@ -99,8 +100,9 @@ MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) { } MCFunction *MCModule::createFunction(StringRef Name) { - Functions.push_back(new MCFunction(Name, this)); - return Functions.back(); + std::unique_ptr MCF(new MCFunction(Name, this)); + Functions.push_back(std::move(MCF)); + return Functions.back().get(); } static bool CompBBToAtom(MCBasicBlock *BB, const MCTextAtom *Atom) { @@ -130,13 +132,11 @@ void MCModule::trackBBForAtom(const MCTextAtom *Atom, MCBasicBlock *BB) { BBsByAtom.insert(I, BB); } +MCModule::MCModule() : Entrypoint(0) { } + MCModule::~MCModule() { for (AtomListTy::iterator AI = atom_begin(), AE = atom_end(); AI != AE; ++AI) delete *AI; - for (FunctionListTy::iterator FI = func_begin(), - FE = func_end(); - FI != FE; ++FI) - delete *FI; } diff --git a/lib/MC/MCModuleYAML.cpp b/lib/MC/MCModuleYAML.cpp index 102971b..f81cb14 100644 --- a/lib/MC/MCModuleYAML.cpp +++ b/lib/MC/MCModuleYAML.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Object/YAML.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/YAMLTraits.h" #include @@ -162,12 +163,14 @@ template <> struct ScalarTraits { static void output(const MCModuleYAML::Operand &, void *, llvm::raw_ostream &); static StringRef input(StringRef, void *, MCModuleYAML::Operand &); + static bool mustQuote(StringRef) { return false; } }; template <> struct ScalarTraits { static void output(const MCModuleYAML::OpcodeEnum &, void *, llvm::raw_ostream &); static StringRef input(StringRef, void *, MCModuleYAML::OpcodeEnum &); + static bool mustQuote(StringRef) { return false; } }; void ScalarEnumerationTraits::enumeration( @@ -276,7 +279,7 @@ class MCModule2YAML { const MCModule &MCM; MCModuleYAML::Module YAMLModule; void dumpAtom(const MCAtom *MCA); - void dumpFunction(const MCFunction *MCF); + void dumpFunction(const MCFunction &MCF); void dumpBasicBlock(const MCBasicBlock *MCBB); public: @@ -300,7 +303,7 @@ MCModule2YAML::MCModule2YAML(const MCModule &MCM) : MCM(MCM), YAMLModule() { dumpAtom(*AI); for (MCModule::const_func_iterator FI = MCM.func_begin(), FE = MCM.func_end(); FI != FE; ++FI) - dumpFunction(*FI); + dumpFunction(**FI); } void MCModule2YAML::dumpAtom(const MCAtom *MCA) { @@ -328,22 +331,22 @@ void MCModule2YAML::dumpAtom(const MCAtom *MCA) { } } -void MCModule2YAML::dumpFunction(const MCFunction *MCF) { +void MCModule2YAML::dumpFunction(const MCFunction &MCF) { YAMLModule.Functions.resize(YAMLModule.Functions.size() + 1); MCModuleYAML::Function &F = YAMLModule.Functions.back(); - F.Name = MCF->getName(); - for (MCFunction::const_iterator BBI = MCF->begin(), BBE = MCF->end(); + F.Name = MCF.getName(); + for (MCFunction::const_iterator BBI = MCF.begin(), BBE = MCF.end(); BBI != BBE; ++BBI) { - const MCBasicBlock *MCBB = *BBI; + const MCBasicBlock &MCBB = **BBI; F.BasicBlocks.resize(F.BasicBlocks.size() + 1); MCModuleYAML::BasicBlock &BB = F.BasicBlocks.back(); - BB.Address = MCBB->getInsts()->getBeginAddr(); - for (MCBasicBlock::pred_const_iterator PI = MCBB->pred_begin(), - PE = MCBB->pred_end(); + BB.Address = MCBB.getInsts()->getBeginAddr(); + for (MCBasicBlock::pred_const_iterator PI = MCBB.pred_begin(), + PE = MCBB.pred_end(); PI != PE; ++PI) BB.Preds.push_back((*PI)->getInsts()->getBeginAddr()); - for (MCBasicBlock::succ_const_iterator SI = MCBB->succ_begin(), - SE = MCBB->succ_end(); + for (MCBasicBlock::succ_const_iterator SI = MCBB.succ_begin(), + SE = MCBB.succ_end(); SI != SE; ++SI) BB.Succs.push_back((*SI)->getInsts()->getBeginAddr()); } diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index 894eada..4f2740e 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -41,11 +41,6 @@ namespace { void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override {} void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override {} - void EmitDwarfAdvanceLineAddr(int64_t LineDelta, - const MCSymbol *LastLabel, - const MCSymbol *Label, - unsigned PointerSize) override {} - bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override { return true; @@ -64,13 +59,14 @@ namespace { unsigned ByteAlignment) override {} void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override {} - void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = nullptr, uint64_t Size = 0, unsigned ByteAlignment = 0) override {} void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override {} void EmitBytes(StringRef Data) override {} - void EmitValueImpl(const MCExpr *Value, unsigned Size) override {} + void EmitValueImpl(const MCExpr *Value, unsigned Size, + const SMLoc &Loc = SMLoc()) override {} void EmitULEB128Value(const MCExpr *Value) override {} void EmitSLEB128Value(const MCExpr *Value) override {} void EmitGPRel32Value(const MCExpr *Value) override {} diff --git a/lib/MC/MCObjectDisassembler.cpp b/lib/MC/MCObjectDisassembler.cpp index 146da6d..8a258cb 100644 --- a/lib/MC/MCObjectDisassembler.cpp +++ b/lib/MC/MCObjectDisassembler.cpp @@ -31,10 +31,12 @@ using namespace llvm; using namespace object; +#define DEBUG_TYPE "mc" + MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj, const MCDisassembler &Dis, const MCInstrAnalysis &MIA) - : Obj(Obj), Dis(Dis), MIA(MIA), MOS(0) {} + : Obj(Obj), Dis(Dis), MIA(MIA), MOS(nullptr) {} uint64_t MCObjectDisassembler::getEntrypoint() { for (const SymbolRef &Symbol : Obj.symbols()) { @@ -115,8 +117,8 @@ void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) { Section.getName(SecName); if (isText) { - MCTextAtom *Text = 0; - MCDataAtom *InvalidData = 0; + MCTextAtom *Text = nullptr; + MCDataAtom *InvalidData = nullptr; uint64_t InstSize; for (uint64_t Index = 0; Index < SecSize; Index += InstSize) { @@ -129,11 +131,11 @@ void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) { Text->setName(SecName); } Text->addInst(Inst, InstSize); - InvalidData = 0; + InvalidData = nullptr; } else { assert(InstSize && "getInstruction() consumed no bytes"); if (!InvalidData) { - Text = 0; + Text = nullptr; InvalidData = Module->createDataAtom(CurAddr, CurAddr+InstSize - 1); } for (uint64_t I = 0; I < InstSize; ++I) @@ -160,7 +162,7 @@ namespace { BBInfoSetTy Preds; MCObjectDisassembler::AddressSetTy SuccAddrs; - BBInfo() : Atom(0), BB(0) {} + BBInfo() : Atom(nullptr), BB(nullptr) {} void addSucc(BBInfo &Succ) { Succs.insert(&Succ); @@ -480,7 +482,7 @@ MCObjectDisassembler::createFunction(MCModule *Module, uint64_t BeginAddr, continue; // FIXME: MCModule should provide a findFunctionByAddr() if ((*FI)->getEntryBlock()->getInsts()->getBeginAddr() == BeginAddr) - return *FI; + return FI->get(); } // Finally, just create a new one. diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 3b011c8..9d413af 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -10,6 +10,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSectionCOFF.h" @@ -22,12 +23,13 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { IsFunctionEHFrameSymbolPrivate = false; SupportsWeakOmittedEHFrame = false; - if (T.isOSDarwin() && T.getArch() == Triple::arm64) + if (T.isOSDarwin() && + (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64)) SupportsCompactUnwindWithoutEHFrame = true; PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; - LSDAEncoding = FDEEncoding = FDECFIEncoding = dwarf::DW_EH_PE_pcrel; + LSDAEncoding = FDECFIEncoding = dwarf::DW_EH_PE_pcrel; TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; @@ -44,7 +46,7 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { SectionKind::getDataRel()); // BSSSection might not be expected initialized on msvc. - BSSSection = 0; + BSSSection = nullptr; TLSDataSection // .tdata = Ctx->getMachOSection("__DATA", "__thread_data", @@ -147,10 +149,11 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { LSDASection = Ctx->getMachOSection("__TEXT", "__gcc_except_tab", 0, SectionKind::getReadOnlyWithRel()); - COFFDebugSymbolsSection = 0; + COFFDebugSymbolsSection = nullptr; if ((T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) || - (T.isOSDarwin() && T.getArch() == Triple::arm64)) { + (T.isOSDarwin() && + (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64))) { CompactUnwindSection = Ctx->getMachOSection("__LD", "__compact_unwind", MachO::S_ATTR_DEBUG, @@ -158,7 +161,7 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86) CompactUnwindDwarfEHFrameOnly = 0x04000000; - else if (T.getArch() == Triple::arm64) + else if (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64) CompactUnwindDwarfEHFrameOnly = 0x03000000; } @@ -245,29 +248,40 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { } void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { - if (T.getArch() == Triple::mips || - T.getArch() == Triple::mipsel) + switch (T.getArch()) { + case Triple::mips: + case Triple::mipsel: FDECFIEncoding = dwarf::DW_EH_PE_sdata4; - else if (T.getArch() == Triple::mips64 || - T.getArch() == Triple::mips64el) + break; + case Triple::mips64: + case Triple::mips64el: FDECFIEncoding = dwarf::DW_EH_PE_sdata8; - else + break; + default: FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; + break; + } - if (T.getArch() == Triple::x86) { + switch (T.getArch()) { + case Triple::arm: + case Triple::armeb: + case Triple::thumb: + case Triple::thumbeb: + if (Ctx->getAsmInfo()->getExceptionHandlingType() == ExceptionHandling::ARM) + break; + // Fallthrough if not using EHABI + case Triple::x86: PersonalityEncoding = (RelocM == Reloc::PIC_) ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_absptr; LSDAEncoding = (RelocM == Reloc::PIC_) ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_absptr; - FDEEncoding = (RelocM == Reloc::PIC_) - ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 - : dwarf::DW_EH_PE_absptr; TTypeEncoding = (RelocM == Reloc::PIC_) ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_absptr; - } else if (T.getArch() == Triple::x86_64) { + break; + case Triple::x86_64: if (RelocM == Reloc::PIC_) { PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | ((CMModel == CodeModel::Small || CMModel == CodeModel::Medium) @@ -275,7 +289,6 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { LSDAEncoding = dwarf::DW_EH_PE_pcrel | (CMModel == CodeModel::Small ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8); - FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | ((CMModel == CodeModel::Small || CMModel == CodeModel::Medium) ? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8); @@ -285,12 +298,14 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr; LSDAEncoding = (CMModel == CodeModel::Small) ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr; - FDEEncoding = dwarf::DW_EH_PE_udata4; TTypeEncoding = (CMModel == CodeModel::Small) ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr; } - } else if (T.getArch() == Triple::aarch64 || - T.getArch() == Triple::aarch64_be ) { + break; + case Triple::aarch64: + case Triple::aarch64_be: + case Triple::arm64: + case Triple::arm64_be: // The small model guarantees static code/data size < 4GB, but not where it // will be in memory. Most of these could end up >2GB away so even a signed // pc-relative 32-bit address is insufficient, theoretically. @@ -298,65 +313,64 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8; LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8; - FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8; } else { PersonalityEncoding = dwarf::DW_EH_PE_absptr; LSDAEncoding = dwarf::DW_EH_PE_absptr; - FDEEncoding = dwarf::DW_EH_PE_udata4; TTypeEncoding = dwarf::DW_EH_PE_absptr; } - } else if (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le) { + break; + case Triple::ppc64: + case Triple::ppc64le: PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; - FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; - } else if (T.getArch() == Triple::sparc) { + break; + case Triple::sparc: if (RelocM == Reloc::PIC_) { LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; - FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; } else { LSDAEncoding = dwarf::DW_EH_PE_absptr; PersonalityEncoding = dwarf::DW_EH_PE_absptr; - FDEEncoding = dwarf::DW_EH_PE_udata4; TTypeEncoding = dwarf::DW_EH_PE_absptr; } - } else if (T.getArch() == Triple::sparcv9) { + break; + case Triple::sparcv9: LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; if (RelocM == Reloc::PIC_) { PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; - FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; } else { PersonalityEncoding = dwarf::DW_EH_PE_absptr; - FDEEncoding = dwarf::DW_EH_PE_udata4; TTypeEncoding = dwarf::DW_EH_PE_absptr; } - } else if (T.getArch() == Triple::systemz) { + break; + case Triple::systemz: // All currently-defined code models guarantee that 4-byte PC-relative // values will be in range. if (RelocM == Reloc::PIC_) { PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; - FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4; } else { PersonalityEncoding = dwarf::DW_EH_PE_absptr; LSDAEncoding = dwarf::DW_EH_PE_absptr; - FDEEncoding = dwarf::DW_EH_PE_absptr; TTypeEncoding = dwarf::DW_EH_PE_absptr; } + break; + default: + break; } // Solaris requires different flags for .eh_frame to seemingly every other @@ -461,7 +475,7 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { ELF::SHF_ALLOC, SectionKind::getReadOnly()); - COFFDebugSymbolsSection = 0; + COFFDebugSymbolsSection = nullptr; // Debug Info Sections. DwarfAbbrevSection = @@ -548,6 +562,10 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { + // The object file format cannot represent common symbols with explicit + // alignments. + CommDirectiveSupportsAlignment = false; + // COFF BSSSection = Ctx->getCOFFSection(".bss", @@ -716,7 +734,7 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { DrectveSection = Ctx->getCOFFSection(".drectve", - COFF::IMAGE_SCN_LNK_INFO, + COFF::IMAGE_SCN_LNK_INFO | COFF::IMAGE_SCN_LNK_REMOVE, SectionKind::getMetadata()); PDataSection = @@ -751,17 +769,17 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm, IsFunctionEHFrameSymbolPrivate = true; SupportsCompactUnwindWithoutEHFrame = false; - PersonalityEncoding = LSDAEncoding = FDEEncoding = FDECFIEncoding = - TTypeEncoding = dwarf::DW_EH_PE_absptr; + PersonalityEncoding = LSDAEncoding = FDECFIEncoding = TTypeEncoding = + dwarf::DW_EH_PE_absptr; CompactUnwindDwarfEHFrameOnly = 0; - EHFrameSection = 0; // Created on demand. - CompactUnwindSection = 0; // Used only by selected targets. - DwarfAccelNamesSection = 0; // Used only by selected targets. - DwarfAccelObjCSection = 0; // Used only by selected targets. - DwarfAccelNamespaceSection = 0; // Used only by selected targets. - DwarfAccelTypesSection = 0; // Used only by selected targets. + EHFrameSection = nullptr; // Created on demand. + CompactUnwindSection = nullptr; // Used only by selected targets. + DwarfAccelNamesSection = nullptr; // Used only by selected targets. + DwarfAccelObjCSection = nullptr; // Used only by selected targets. + DwarfAccelNamespaceSection = nullptr; // Used only by selected targets. + DwarfAccelTypesSection = nullptr; // Used only by selected targets. Triple T(TT); Triple::ArchType Arch = T.getArch(); @@ -769,14 +787,15 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm, // cellspu-apple-darwin. Perhaps we should fix in Triple? if ((Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm || Arch == Triple::thumb || - Arch == Triple::arm64 || + Arch == Triple::arm64 || Arch == Triple::aarch64 || Arch == Triple::ppc || Arch == Triple::ppc64 || Arch == Triple::UnknownArch) && (T.isOSDarwin() || T.isOSBinFormatMachO())) { Env = IsMachO; InitMachOMCObjectFileInfo(T); - } else if ((Arch == Triple::x86 || Arch == Triple::x86_64) && - T.getObjectFormat() != Triple::ELF && T.isOSWindows()) { + } else if ((Arch == Triple::x86 || Arch == Triple::x86_64 || + Arch == Triple::arm || Arch == Triple::thumb) && + (T.isOSWindows() && T.getObjectFormat() == Triple::COFF)) { Env = IsCOFF; InitCOFFMCObjectFileInfo(T); } else { diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 4451264..a1aa602 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -20,7 +20,6 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/MC/MCSectionELF.h" using namespace llvm; MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, @@ -28,12 +27,13 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, : MCStreamer(Context), Assembler(new MCAssembler(Context, TAB, *Emitter_, *TAB.createObjectWriter(OS), OS)), - CurSectionData(0) {} + CurSectionData(nullptr), EmitEHFrame(true), EmitDebugFrame(false) {} MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter_, MCAssembler *_Assembler) - : MCStreamer(Context), Assembler(_Assembler), CurSectionData(0) {} + : MCStreamer(Context), Assembler(_Assembler), CurSectionData(nullptr), + EmitEHFrame(true), EmitDebugFrame(false) {} MCObjectStreamer::~MCObjectStreamer() { delete &Assembler->getBackend(); @@ -45,18 +45,31 @@ MCObjectStreamer::~MCObjectStreamer() { void MCObjectStreamer::reset() { if (Assembler) Assembler->reset(); - CurSectionData = 0; + CurSectionData = nullptr; CurInsertionPoint = MCSectionData::iterator(); + EmitEHFrame = true; + EmitDebugFrame = false; MCStreamer::reset(); } +void MCObjectStreamer::EmitFrames(MCAsmBackend *MAB) { + if (!getNumFrameInfos()) + return; + + if (EmitEHFrame) + MCDwarfFrameEmitter::Emit(*this, MAB, true); + + if (EmitDebugFrame) + MCDwarfFrameEmitter::Emit(*this, MAB, false); +} + MCFragment *MCObjectStreamer::getCurrentFragment() const { assert(getCurrentSectionData() && "No current section!"); if (CurInsertionPoint != getCurrentSectionData()->getFragmentList().begin()) return std::prev(CurInsertionPoint); - return 0; + return nullptr; } MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const { @@ -64,11 +77,7 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const { // When bundling is enabled, we don't want to add data to a fragment that // already has instructions (see MCELFStreamer::EmitInstToData for details) if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions())) { - const auto *Sec = dyn_cast(&getCurrentSectionData()->getSection()); - if (Sec && Sec->getSectionName().startswith(".zdebug_")) - F = new MCCompressedFragment(); - else - F = new MCDataFragment(); + F = new MCDataFragment(); insert(F); } return F; @@ -102,7 +111,14 @@ const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) { return Value; } -void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size) { +void MCObjectStreamer::EmitCFISections(bool EH, bool Debug) { + MCStreamer::EmitCFISections(EH, Debug); + EmitEHFrame = EH; + EmitDebugFrame = Debug; +} + +void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, + const SMLoc &Loc) { MCDataFragment *DF = getOrCreateDataFragment(); MCLineEntry::Make(this, getCurrentSection().first); @@ -115,7 +131,7 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size) { } DF->getFixups().push_back( MCFixup::Create(DF->getContents().size(), Value, - MCFixup::getKindForSize(Size, false))); + MCFixup::getKindForSize(Size, false), Loc)); DF->getContents().resize(DF->getContents().size() + Size, 0); } diff --git a/lib/MC/MCObjectSymbolizer.cpp b/lib/MC/MCObjectSymbolizer.cpp index ba80d15..b149596 100644 --- a/lib/MC/MCObjectSymbolizer.cpp +++ b/lib/MC/MCObjectSymbolizer.cpp @@ -215,11 +215,11 @@ const SectionRef *MCObjectSymbolizer::findSectionContaining(uint64_t Addr) { It = std::lower_bound(SortedSections.begin(), EndIt, Addr, SectionStartsBefore); if (It == EndIt) - return 0; + return nullptr; uint64_t SAddr; It->getAddress(SAddr); uint64_t SSize; It->getSize(SSize); if (Addr >= SAddr + SSize) - return 0; + return nullptr; return &*It; } @@ -229,7 +229,7 @@ const RelocationRef *MCObjectSymbolizer::findRelocationAt(uint64_t Addr) { AddrToRelocMap::const_iterator RI = AddrToReloc.find(Addr); if (RI == AddrToReloc.end()) - return 0; + return nullptr; return &RI->second; } @@ -257,40 +257,12 @@ void MCObjectSymbolizer::buildSectionList() { void MCObjectSymbolizer::buildRelocationByAddrMap() { for (const SectionRef &Section : Obj->sections()) { - section_iterator RelSecI = Section.getRelocatedSection(); - if (RelSecI == Obj->section_end()) - continue; - - uint64_t StartAddr; RelSecI->getAddress(StartAddr); - uint64_t Size; RelSecI->getSize(Size); - bool RequiredForExec; - RelSecI->isRequiredForExecution(RequiredForExec); - if (RequiredForExec == false || Size == 0) - continue; for (const RelocationRef &Reloc : Section.relocations()) { - // FIXME: libObject is inconsistent regarding error handling. The - // overwhelming majority of methods always return object_error::success, - // and assert for simple errors.. Here, ELFObjectFile::getRelocationOffset - // asserts when the file type isn't ET_REL. - // This workaround handles x86-64 elf, the only one that has a relocinfo. - uint64_t Offset; - if (Obj->isELF()) { - const ELF64LEObjectFile *ELFObj = dyn_cast(Obj); - if (ELFObj == 0) - break; - if (ELFObj->getELFFile()->getHeader()->e_type == ELF::ET_REL) { - Reloc.getOffset(Offset); - Offset += StartAddr; - } else { - Reloc.getAddress(Offset); - } - } else { - Reloc.getOffset(Offset); - Offset += StartAddr; - } + uint64_t Address; + Reloc.getAddress(Address); // At a specific address, only keep the first relocation. - if (AddrToReloc.find(Offset) == AddrToReloc.end()) - AddrToReloc[Offset] = Reloc; + if (AddrToReloc.find(Address) == AddrToReloc.end()) + AddrToReloc[Address] = Reloc; } } } diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index a3b68d8..bca516e 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -22,8 +22,8 @@ using namespace llvm; AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { - CurBuf = NULL; - CurPtr = NULL; + CurBuf = nullptr; + CurPtr = nullptr; isAtStartOfLine = true; AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); } @@ -39,7 +39,7 @@ void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) { else CurPtr = CurBuf->getBufferStart(); - TokStart = 0; + TokStart = nullptr; } /// ReturnError - Set the error to the specified string at the specified @@ -218,7 +218,7 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { // Look ahead to search for first non-hex digit, if it's [hH], then we treat the // integer as a hexadecimal, possibly with leading zeroes. static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) { - const char *FirstHex = 0; + const char *FirstHex = nullptr; const char *LookAhead = CurPtr; while (1) { if (isdigit(*LookAhead)) { diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 910a424..168597f 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include @@ -59,8 +60,9 @@ struct MCAsmMacroParameter { StringRef Name; MCAsmMacroArgument Value; bool Required; + bool Vararg; - MCAsmMacroParameter() : Required(false) { } + MCAsmMacroParameter() : Required(false), Vararg(false) {} }; typedef std::vector MCAsmMacroParameters; @@ -110,7 +112,7 @@ struct ParseStatementInfo { SmallVectorImpl *AsmRewrites; - ParseStatementInfo() : Opcode(~0U), ParseError(false), AsmRewrites(0) {} + ParseStatementInfo() : Opcode(~0U), ParseError(false), AsmRewrites(nullptr) {} ParseStatementInfo(SmallVectorImpl *rewrites) : Opcode(~0), ParseError(false), AsmRewrites(rewrites) {} @@ -292,7 +294,7 @@ private: void handleMacroExit(); /// \brief Extract AsmTokens for a macro argument. - bool parseMacroArgument(MCAsmMacroArgument &MA); + bool parseMacroArgument(MCAsmMacroArgument &MA, bool Vararg); /// \brief Parse all macro arguments for a given macro. bool parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A); @@ -495,9 +497,9 @@ enum { DEFAULT_ADDRSPACE = 0 }; AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, const MCAsmInfo &_MAI) : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM), - PlatformParser(0), CurBuffer(0), MacrosEnabledFlag(true), - CppHashLineNumber(0), AssemblerDialect(~0U), IsDarwin(false), - ParsingInlineAsm(false) { + PlatformParser(nullptr), CurBuffer(0), MacrosEnabledFlag(true), + HadError(false), CppHashLineNumber(0), AssemblerDialect(~0U), + IsDarwin(false), ParsingInlineAsm(false) { // Save the old handler. SavedDiagHandler = SrcMgr.getDiagHandler(); SavedDiagContext = SrcMgr.getDiagContext(); @@ -526,7 +528,8 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, } AsmParser::~AsmParser() { - assert(ActiveMacros.empty() && "Unexpected active macro instantiation!"); + assert((HadError || ActiveMacros.empty()) && + "Unexpected active macro instantiation!"); // Destroy any macros. for (StringMap::iterator it = MacroMap.begin(), @@ -959,7 +962,7 @@ AsmParser::applyModifierToExpr(const MCExpr *E, switch (E->getKind()) { case MCExpr::Target: case MCExpr::Constant: - return 0; + return nullptr; case MCExpr::SymbolRef: { const MCSymbolRefExpr *SRE = cast(E); @@ -977,7 +980,7 @@ AsmParser::applyModifierToExpr(const MCExpr *E, const MCUnaryExpr *UE = cast(E); const MCExpr *Sub = applyModifierToExpr(UE->getSubExpr(), Variant); if (!Sub) - return 0; + return nullptr; return MCUnaryExpr::Create(UE->getOpcode(), Sub, getContext()); } @@ -987,7 +990,7 @@ AsmParser::applyModifierToExpr(const MCExpr *E, const MCExpr *RHS = applyModifierToExpr(BE->getRHS(), Variant); if (!LHS && !RHS) - return 0; + return nullptr; if (!LHS) LHS = BE->getLHS(); @@ -1013,7 +1016,7 @@ AsmParser::applyModifierToExpr(const MCExpr *E, /// bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) { // Parse the expression. - Res = 0; + Res = nullptr; if (parsePrimaryExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc)) return true; @@ -1050,7 +1053,7 @@ bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) { } bool AsmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) { - Res = 0; + Res = nullptr; return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc); } @@ -1701,7 +1704,7 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { if (Parser->SavedDiagHandler) Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext); else - Diag.print(0, OS); + Diag.print(nullptr, OS); return; } @@ -1723,7 +1726,7 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { if (Parser->SavedDiagHandler) Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext); else - NewDiag.print(0, OS); + NewDiag.print(nullptr, OS); } // FIXME: This is mostly duplicated from the function in AsmLexer.cpp. The @@ -1739,6 +1742,7 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, ArrayRef Parameters, ArrayRef A, const SMLoc &L) { unsigned NParameters = Parameters.size(); + bool HasVararg = NParameters ? Parameters.back().Vararg : false; if ((!IsDarwin || NParameters != 0) && NParameters != A.size()) return Error(L, "Wrong number of arguments"); @@ -1820,13 +1824,16 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, Pos = I; } } else { + bool VarargParameter = HasVararg && Index == (NParameters - 1); for (MCAsmMacroArgument::const_iterator it = A[Index].begin(), ie = A[Index].end(); it != ie; ++it) - if (it->getKind() == AsmToken::String) - OS << it->getStringContents(); - else + // We expect no quotes around the string's contents when + // parsing for varargs. + if (it->getKind() != AsmToken::String || VarargParameter) OS << it->getString(); + else + OS << it->getStringContents(); Pos += 1 + Argument.size(); } @@ -1890,7 +1897,16 @@ private: }; } -bool AsmParser::parseMacroArgument(MCAsmMacroArgument &MA) { +bool AsmParser::parseMacroArgument(MCAsmMacroArgument &MA, bool Vararg) { + + if (Vararg) { + if (Lexer.isNot(AsmToken::EndOfStatement)) { + StringRef Str = parseStringToEndOfStatement(); + MA.push_back(AsmToken(AsmToken::String, Str)); + } + return false; + } + unsigned ParenLevel = 0; unsigned AddTokens = 0; @@ -1961,6 +1977,7 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M, // Parse two kinds of macro invocations: // - macros defined without any parameters accept an arbitrary number of them // - macros defined with parameters accept at most that many of them + bool HasVararg = NParameters ? M->Parameters.back().Vararg : false; for (unsigned Parameter = 0; !NParameters || Parameter < NParameters; ++Parameter) { SMLoc IDLoc = Lexer.getLoc(); @@ -1989,7 +2006,8 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M, return true; } - if (parseMacroArgument(FA.Value)) + bool Vararg = HasVararg && Parameter == (NParameters - 1); + if (parseMacroArgument(FA.Value, Vararg)) return true; unsigned PI = Parameter; @@ -2050,7 +2068,7 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M, const MCAsmMacro *AsmParser::lookupMacro(StringRef Name) { StringMap::iterator I = MacroMap.find(Name); - return (I == MacroMap.end()) ? NULL : I->getValue(); + return (I == MacroMap.end()) ? nullptr : I->getValue(); } void AsmParser::defineMacro(StringRef Name, const MCAsmMacro &Macro) { @@ -2364,7 +2382,7 @@ bool AsmParser::parseDirectiveValue(unsigned Size) { return Error(ExprLoc, "literal value out of range for directive"); getStreamer().EmitIntValue(IntValue, Size); } else - getStreamer().EmitValue(Value, Size); + getStreamer().EmitValue(Value, Size, ExprLoc); if (getLexer().is(AsmToken::EndOfStatement)) break; @@ -3240,6 +3258,12 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc) { MCAsmMacroParameters Parameters; while (getLexer().isNot(AsmToken::EndOfStatement)) { + + if (Parameters.size() && Parameters.back().Vararg) + return Error(Lexer.getLoc(), + "Vararg parameter '" + Parameters.back().Name + + "' should be last one in the list of parameters."); + MCAsmMacroParameter Parameter; if (parseIdentifier(Parameter.Name)) return TokError("expected identifier in '.macro' directive"); @@ -3257,6 +3281,8 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc) { if (Qualifier == "req") Parameter.Required = true; + else if (Qualifier == "vararg" && !IsDarwin) + Parameter.Vararg = true; else return Error(QualLoc, Qualifier + " is not a valid parameter qualifier " "for '" + Parameter.Name + "' in macro '" + Name + "'"); @@ -3268,7 +3294,7 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc) { SMLoc ParamLoc; ParamLoc = Lexer.getLoc(); - if (parseMacroArgument(Parameter.Value)) + if (parseMacroArgument(Parameter.Value, /*Vararg=*/false )) return true; if (Parameter.Required) @@ -3906,9 +3932,9 @@ bool AsmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) { MCSymbol *Sym = getContext().LookupSymbol(Name); if (expect_defined) - TheCondState.CondMet = (Sym != NULL && !Sym->isUndefined()); + TheCondState.CondMet = (Sym && !Sym->isUndefined()); else - TheCondState.CondMet = (Sym == NULL || Sym->isUndefined()); + TheCondState.CondMet = (!Sym || Sym->isUndefined()); TheCondState.Ignore = !TheCondState.CondMet; } @@ -4151,7 +4177,7 @@ MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) { // Check whether we have reached the end of the file. if (getLexer().is(AsmToken::Eof)) { Error(DirectiveLoc, "no matching '.endr' in definition"); - return 0; + return nullptr; } if (Lexer.is(AsmToken::Identifier) && @@ -4166,7 +4192,7 @@ MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) { Lex(); if (Lexer.isNot(AsmToken::EndOfStatement)) { TokError("unexpected token in '.endr' directive"); - return 0; + return nullptr; } break; } @@ -4260,7 +4286,7 @@ bool AsmParser::parseDirectiveIrp(SMLoc DirectiveLoc) { Lex(); MCAsmMacroArguments A; - if (parseMacroArguments(0, A)) + if (parseMacroArguments(nullptr, A)) return true; // Eat the end of statement. @@ -4300,7 +4326,7 @@ bool AsmParser::parseDirectiveIrpc(SMLoc DirectiveLoc) { Lex(); MCAsmMacroArguments A; - if (parseMacroArguments(0, A)) + if (parseMacroArguments(nullptr, A)) return true; if (A.size() != 1 || A.front().size() != 1) diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp index 76d3f81..decf01c 100644 --- a/lib/MC/MCParser/COFFAsmParser.cpp +++ b/lib/MC/MCParser/COFFAsmParser.cpp @@ -293,7 +293,7 @@ bool COFFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Characteristics, SectionKind Kind) { return ParseSectionSwitch(Section, Characteristics, Kind, "", - COFF::IMAGE_COMDAT_SELECT_ANY, 0); + COFF::IMAGE_COMDAT_SELECT_ANY, nullptr); } bool COFFAsmParser::ParseSectionSwitch(StringRef Section, @@ -359,7 +359,7 @@ bool COFFAsmParser::ParseDirectiveSection(StringRef, SMLoc) { } COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY; - const MCSectionCOFF *Assoc = 0; + const MCSectionCOFF *Assoc = nullptr; StringRef COMDATSymName; if (getLexer().is(AsmToken::Comma)) { Lex(); @@ -504,7 +504,7 @@ bool COFFAsmParser::parseCOMDATTypeAndAssoc(COFF::COMDATType &Type, /// ::= .linkonce [ identifier [ identifier ] ] bool COFFAsmParser::ParseDirectiveLinkOnce(StringRef, SMLoc Loc) { COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY; - const MCSectionCOFF *Assoc = 0; + const MCSectionCOFF *Assoc = nullptr; if (getLexer().is(AsmToken::Identifier)) if (parseCOMDATTypeAndAssoc(Type, Assoc)) return true; diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index 0856b6e..f74b30a 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -17,6 +17,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" using namespace llvm; @@ -612,8 +613,8 @@ bool DarwinAsmParser::parseDirectivePopSection(StringRef, SMLoc) { /// ::= .previous bool DarwinAsmParser::parseDirectivePrevious(StringRef DirName, SMLoc) { MCSectionSubPair PreviousSection = getStreamer().getPreviousSection(); - if (PreviousSection.first == NULL) - return TokError(".previous without corresponding .section"); + if (!PreviousSection.first) + return TokError(".previous without corresponding .section"); getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second); return false; } @@ -630,13 +631,13 @@ bool DarwinAsmParser::parseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) { // Get the secure log path. const char *SecureLogFile = getContext().getSecureLogFile(); - if (SecureLogFile == NULL) + if (!SecureLogFile) return Error(IDLoc, ".secure_log_unique used but AS_SECURE_LOG_FILE " "environment variable unset."); // Open the secure log file if we haven't already. raw_ostream *OS = getContext().getSecureLog(); - if (OS == NULL) { + if (!OS) { std::string Err; OS = new raw_fd_ostream(SecureLogFile, Err, sys::fs::F_Append | sys::fs::F_Text); diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index d79dd67..95c4971 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -193,7 +193,7 @@ bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) { bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type, unsigned Flags, SectionKind Kind) { - const MCExpr *Subsection = 0; + const MCExpr *Subsection = nullptr; if (getLexer().isNot(AsmToken::EndOfStatement)) { if (getParser().parseExpression(Subsection)) return true; @@ -411,7 +411,7 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush) { int64_t Size = 0; StringRef GroupName; unsigned Flags = 0; - const MCExpr *Subsection = 0; + const MCExpr *Subsection = nullptr; bool UseLastGroup = false; // Set the defaults first. @@ -554,7 +554,7 @@ EndStmt: bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) { MCSectionSubPair PreviousSection = getStreamer().getPreviousSection(); - if (PreviousSection.first == NULL) + if (PreviousSection.first == nullptr) return TokError(".previous without corresponding .section"); getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second); @@ -730,7 +730,7 @@ bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) { } bool ELFAsmParser::ParseDirectiveSubsection(StringRef, SMLoc) { - const MCExpr *Subsection = 0; + const MCExpr *Subsection = nullptr; if (getLexer().isNot(AsmToken::EndOfStatement)) { if (getParser().parseExpression(Subsection)) return true; diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp index 3867691..530814b 100644 --- a/lib/MC/MCParser/MCAsmLexer.cpp +++ b/lib/MC/MCParser/MCAsmLexer.cpp @@ -13,7 +13,7 @@ using namespace llvm; MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()), - TokStart(0), SkipSpace(true) { + TokStart(nullptr), SkipSpace(true) { } MCAsmLexer::~MCAsmLexer() { diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp index 6e1ebad..e417aa9 100644 --- a/lib/MC/MCParser/MCAsmParser.cpp +++ b/lib/MC/MCParser/MCAsmParser.cpp @@ -17,7 +17,7 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -MCAsmParser::MCAsmParser() : TargetParser(0), ShowParsedOperands(0) { +MCAsmParser::MCAsmParser() : TargetParser(nullptr), ShowParsedOperands(0) { } MCAsmParser::~MCAsmParser() { diff --git a/lib/MC/MCRelocationInfo.cpp b/lib/MC/MCRelocationInfo.cpp index 7d2ec1f..a00c009 100644 --- a/lib/MC/MCRelocationInfo.cpp +++ b/lib/MC/MCRelocationInfo.cpp @@ -23,14 +23,14 @@ MCRelocationInfo::~MCRelocationInfo() { const MCExpr * MCRelocationInfo::createExprForRelocation(object::RelocationRef Rel) { - return 0; + return nullptr; } const MCExpr * MCRelocationInfo::createExprForCAPIVariantKind(const MCExpr *SubExpr, unsigned VariantKind) { if (VariantKind != LLVMDisassembler_VariantKind_None) - return 0; + return nullptr; return SubExpr; } diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp index ad9ca88..335b8cd 100644 --- a/lib/MC/MCSectionCOFF.cpp +++ b/lib/MC/MCSectionCOFF.cpp @@ -34,7 +34,7 @@ void MCSectionCOFF::setSelection(int Selection, const MCSectionCOFF *Assoc) const { assert(Selection != 0 && "invalid COMDAT selection type"); assert((Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) == - (Assoc != 0) && + (Assoc != nullptr) && "associative COMDAT section must have an associated section"); this->Selection = Selection; this->Assoc = Assoc; @@ -62,7 +62,8 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, OS << 'r'; if (getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE) OS << 'n'; - + if (getCharacteristics() & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA) + OS << 'd'; OS << '"'; if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) { diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp index 9cc534d..46beda4 100644 --- a/lib/MC/MCSectionMachO.cpp +++ b/lib/MC/MCSectionMachO.cpp @@ -20,7 +20,7 @@ static const struct { const char *AssemblerName, *EnumName; } SectionTypeDescriptors[MachO::LAST_KNOWN_SECTION_TYPE+1] = { { "regular", "S_REGULAR" }, // 0x00 - { 0, "S_ZEROFILL" }, // 0x01 + { nullptr, "S_ZEROFILL" }, // 0x01 { "cstring_literals", "S_CSTRING_LITERALS" }, // 0x02 { "4byte_literals", "S_4BYTE_LITERALS" }, // 0x03 { "8byte_literals", "S_8BYTE_LITERALS" }, // 0x04 @@ -31,11 +31,11 @@ static const struct { { "mod_init_funcs", "S_MOD_INIT_FUNC_POINTERS" }, // 0x09 { "mod_term_funcs", "S_MOD_TERM_FUNC_POINTERS" }, // 0x0A { "coalesced", "S_COALESCED" }, // 0x0B - { 0, /*FIXME??*/ "S_GB_ZEROFILL" }, // 0x0C + { nullptr, /*FIXME??*/ "S_GB_ZEROFILL" }, // 0x0C { "interposing", "S_INTERPOSING" }, // 0x0D { "16byte_literals", "S_16BYTE_LITERALS" }, // 0x0E - { 0, /*FIXME??*/ "S_DTRACE_DOF" }, // 0x0F - { 0, /*FIXME??*/ "S_LAZY_DYLIB_SYMBOL_POINTERS" }, // 0x10 + { nullptr, /*FIXME??*/ "S_DTRACE_DOF" }, // 0x0F + { nullptr, /*FIXME??*/ "S_LAZY_DYLIB_SYMBOL_POINTERS" }, // 0x10 { "thread_local_regular", "S_THREAD_LOCAL_REGULAR" }, // 0x11 { "thread_local_zerofill", "S_THREAD_LOCAL_ZEROFILL" }, // 0x12 { "thread_local_variables", "S_THREAD_LOCAL_VARIABLES" }, // 0x13 @@ -62,11 +62,11 @@ ENTRY("no_dead_strip", S_ATTR_NO_DEAD_STRIP) ENTRY("live_support", S_ATTR_LIVE_SUPPORT) ENTRY("self_modifying_code", S_ATTR_SELF_MODIFYING_CODE) ENTRY("debug", S_ATTR_DEBUG) -ENTRY(0 /*FIXME*/, S_ATTR_SOME_INSTRUCTIONS) -ENTRY(0 /*FIXME*/, S_ATTR_EXT_RELOC) -ENTRY(0 /*FIXME*/, S_ATTR_LOC_RELOC) +ENTRY(nullptr /*FIXME*/, S_ATTR_SOME_INSTRUCTIONS) +ENTRY(nullptr /*FIXME*/, S_ATTR_EXT_RELOC) +ENTRY(nullptr /*FIXME*/, S_ATTR_LOC_RELOC) #undef ENTRY - { 0, "none", 0 }, // used if section has no attributes but has a stub size + { 0, "none", nullptr }, // used if section has no attributes but has a stub size }; MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section, diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 8fa55aa..7dccf0d 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -37,8 +37,7 @@ void MCTargetStreamer::finish() {} void MCTargetStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {} MCStreamer::MCStreamer(MCContext &Ctx) - : Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false), - CurrentW64UnwindInfo(0), LastSymbol(0) { + : Context(Ctx), CurrentW64UnwindInfo(nullptr), LastSymbol(nullptr) { SectionStack.push_back(std::pair()); } @@ -51,10 +50,8 @@ void MCStreamer::reset() { for (unsigned i = 0; i < getNumW64UnwindInfos(); ++i) delete W64UnwindInfos[i]; W64UnwindInfos.clear(); - EmitEHFrame = true; - EmitDebugFrame = false; - CurrentW64UnwindInfo = 0; - LastSymbol = 0; + CurrentW64UnwindInfo = nullptr; + LastSymbol = nullptr; SectionStack.clear(); SectionStack.push_back(std::pair()); } @@ -147,8 +144,9 @@ void MCStreamer::EmitAbsValue(const MCExpr *Value, unsigned Size) { } -void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size) { - EmitValueImpl(Value, Size); +void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size, + const SMLoc &Loc) { + EmitValueImpl(Value, Size, Loc); } void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size) { @@ -203,7 +201,7 @@ MCSymbol *MCStreamer::getDwarfLineTableSymbol(unsigned CUID) { MCDwarfFrameInfo *MCStreamer::getCurrentFrameInfo() { if (FrameInfos.empty()) - return 0; + return nullptr; return &FrameInfos.back(); } @@ -258,8 +256,6 @@ void MCStreamer::EmitCompactUnwindEncoding(uint32_t CompactUnwindEncoding) { void MCStreamer::EmitCFISections(bool EH, bool Debug) { assert(EH || Debug); - EmitEHFrame = EH; - EmitDebugFrame = Debug; } void MCStreamer::EmitCFIStartProc(bool IsSimple) { @@ -278,6 +274,10 @@ void MCStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) { } void MCStreamer::RecordProcStart(MCDwarfFrameInfo &Frame) { + // Report an error if we haven't seen a symbol yet where we'd bind + // .cfi_startproc. + if (!LastSymbol) + report_fatal_error("No symbol to start a frame"); Frame.Function = LastSymbol; // We need to create a local symbol to avoid relocations. Frame.Begin = getContext().CreateTempSymbol(); @@ -610,17 +610,6 @@ void MCStreamer::EmitRawText(const Twine &T) { EmitRawTextImpl(T.toStringRef(Str)); } -void MCStreamer::EmitFrames(MCAsmBackend *MAB, bool usingCFI) { - if (!getNumFrameInfos()) - return; - - if (EmitEHFrame) - MCDwarfFrameEmitter::Emit(*this, MAB, usingCFI, true); - - if (EmitDebugFrame) - MCDwarfFrameEmitter::Emit(*this, MAB, usingCFI, false); -} - void MCStreamer::EmitW64Tables() { if (!getNumW64UnwindInfos()) return; @@ -639,11 +628,6 @@ void MCStreamer::Finish() { FinishImpl(); } -MCSymbolData &MCStreamer::getOrCreateSymbolData(const MCSymbol *Symbol) { - report_fatal_error("Not supported!"); - return *(static_cast(0)); -} - void MCStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { Symbol->setVariableValue(Value); diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp index 8d8e290..4424c91 100644 --- a/lib/MC/MCSubtargetInfo.cpp +++ b/lib/MC/MCSubtargetInfo.cpp @@ -24,9 +24,7 @@ MCSchedModel MCSchedModel::DefaultSchedModel; // For unknown processors. void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) { SubtargetFeatures Features(FS); - FeatureBits = Features.getFeatureBits(CPU, ProcDesc, NumProcs, - ProcFeatures, NumFeatures); - + FeatureBits = Features.getFeatureBits(CPU, ProcDesc, ProcFeatures); InitCPUSchedModel(CPU); } @@ -40,16 +38,15 @@ MCSubtargetInfo::InitCPUSchedModel(StringRef CPU) { void MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS, - const SubtargetFeatureKV *PF, - const SubtargetFeatureKV *PD, + ArrayRef PF, + ArrayRef PD, const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR, const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA, const InstrStage *IS, const unsigned *OC, - const unsigned *FP, - unsigned NF, unsigned NP) { + const unsigned *FP) { TargetTriple = TT; ProcFeatures = PF; ProcDesc = PD; @@ -61,8 +58,6 @@ MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS, Stages = IS; OperandCycles = OC; ForwardingPaths = FP; - NumFeatures = NF; - NumProcs = NP; InitMCProcessorInfo(CPU, FS); } @@ -78,8 +73,7 @@ uint64_t MCSubtargetInfo::ToggleFeature(uint64_t FB) { /// bits. This version will also change all implied bits. uint64_t MCSubtargetInfo::ToggleFeature(StringRef FS) { SubtargetFeatures Features; - FeatureBits = Features.ToggleFeature(FeatureBits, FS, - ProcFeatures, NumFeatures); + FeatureBits = Features.ToggleFeature(FeatureBits, FS, ProcFeatures); return FeatureBits; } @@ -88,6 +82,7 @@ const MCSchedModel * MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const { assert(ProcSchedModels && "Processor machine model not available!"); + unsigned NumProcs = ProcDesc.size(); #ifndef NDEBUG for (size_t i = 1; i < NumProcs; i++) { assert(strcmp(ProcSchedModels[i - 1].Key, ProcSchedModels[i].Key) < 0 && diff --git a/lib/MC/MCTargetOptions.cpp b/lib/MC/MCTargetOptions.cpp new file mode 100644 index 0000000..8e946d5 --- /dev/null +++ b/lib/MC/MCTargetOptions.cpp @@ -0,0 +1,19 @@ +//===- lib/MC/MCTargetOptions.cpp - MC Target Options --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCTargetOptions.h" + +namespace llvm { + +MCTargetOptions::MCTargetOptions() + : SanitizeAddress(false), MCRelaxAll(false), MCNoExecStack(false), + MCSaveTempLabels(false), MCUseDwarfDirectory(false), + ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false) {} + +} // end namespace llvm diff --git a/lib/MC/MCValue.cpp b/lib/MC/MCValue.cpp index 68ecffb..9dfc56e 100644 --- a/lib/MC/MCValue.cpp +++ b/lib/MC/MCValue.cpp @@ -10,6 +10,7 @@ #include "llvm/MC/MCValue.h" #include "llvm/MC/MCExpr.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -38,6 +39,23 @@ void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void MCValue::dump() const { - print(dbgs(), 0); + print(dbgs(), nullptr); } #endif + +MCSymbolRefExpr::VariantKind MCValue::getAccessVariant() const { + const MCSymbolRefExpr *B = getSymB(); + if (B) { + if (B->getKind() != MCSymbolRefExpr::VK_None) + llvm_unreachable("unsupported"); + } + + const MCSymbolRefExpr *A = getSymA(); + if (!A) + return MCSymbolRefExpr::VK_None; + + MCSymbolRefExpr::VariantKind Kind = A->getKind(); + if (Kind == MCSymbolRefExpr::VK_WEAKREF) + return MCSymbolRefExpr::VK_None; + return Kind; +} diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 5fcea5f..cbaf0b8 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -26,6 +26,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "mc" + void MachObjectWriter::reset() { Relocations.clear(); IndirectSymBase.clear(); @@ -349,6 +351,9 @@ void MachObjectWriter::WriteNlist(MachSymbolData &MSD, } } + if (Layout.getAssembler().isThumbFunc(&Symbol)) + Flags |= SF_ThumbFunc; + // struct nlist (12 bytes) Write32(MSD.StringIndex); @@ -516,15 +521,14 @@ ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, // table, then sort the symbols is chosen to match 'as'. Even though it // doesn't matter for correctness, this is important for letting us diff .o // files. - for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), - ie = Asm.symbol_end(); it != ie; ++it) { - const MCSymbol &Symbol = it->getSymbol(); + for (MCSymbolData &SD : Asm.symbols()) { + const MCSymbol &Symbol = SD.getSymbol(); // Ignore non-linker visible symbols. - if (!Asm.isSymbolLinkerVisible(it->getSymbol())) + if (!Asm.isSymbolLinkerVisible(SD.getSymbol())) continue; - if (!it->isExternal() && !Symbol.isUndefined()) + if (!SD.isExternal() && !Symbol.isUndefined()) continue; uint64_t &Entry = StringIndexMap[Symbol.getName()]; @@ -535,7 +539,7 @@ ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, } MachSymbolData MSD; - MSD.SymbolData = it; + MSD.SymbolData = &SD; MSD.StringIndex = Entry; if (Symbol.isUndefined()) { @@ -552,15 +556,14 @@ ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, } // Now add the data for local symbols. - for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), - ie = Asm.symbol_end(); it != ie; ++it) { - const MCSymbol &Symbol = it->getSymbol(); + for (MCSymbolData &SD : Asm.symbols()) { + const MCSymbol &Symbol = SD.getSymbol(); // Ignore non-linker visible symbols. - if (!Asm.isSymbolLinkerVisible(it->getSymbol())) + if (!Asm.isSymbolLinkerVisible(SD.getSymbol())) continue; - if (it->isExternal() || Symbol.isUndefined()) + if (SD.isExternal() || Symbol.isUndefined()) continue; uint64_t &Entry = StringIndexMap[Symbol.getName()]; @@ -571,7 +574,7 @@ ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, } MachSymbolData MSD; - MSD.SymbolData = it; + MSD.SymbolData = &SD; MSD.StringIndex = Entry; if (Symbol.isAbsolute()) { @@ -621,10 +624,7 @@ void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm, void MachObjectWriter::markAbsoluteVariableSymbols(MCAssembler &Asm, const MCAsmLayout &Layout) { - for (MCAssembler::symbol_iterator i = Asm.symbol_begin(), - e = Asm.symbol_end(); - i != e; ++i) { - MCSymbolData &SD = *i; + for (MCSymbolData &SD : Asm.symbols()) { if (!SD.getSymbol().isVariable()) continue; @@ -669,7 +669,7 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, // - addr(atom(B)) - offset(B) // and the offsets are not relocatable, so the fixup is fully resolved when // addr(atom(A)) - addr(atom(B)) == 0. - const MCSymbolData *A_Base = 0, *B_Base = 0; + const MCSymbolData *A_Base = nullptr, *B_Base = nullptr; const MCSymbol &SA = DataA.getSymbol().AliasedSymbol(); const MCSection &SecA = SA.getSection(); diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp index 2fb91f2..27525c7 100644 --- a/lib/MC/SubtargetFeature.cpp +++ b/lib/MC/SubtargetFeature.cpp @@ -51,40 +51,12 @@ static inline bool isEnabled(const StringRef Feature) { return Ch == '+'; } -/// PrependFlag - Return a string with a prepended flag; '+' or '-'. -/// -static inline std::string PrependFlag(const StringRef Feature, - bool IsEnabled) { - assert(!Feature.empty() && "Empty string"); - if (hasFlag(Feature)) - return Feature; - std::string Prefix = IsEnabled ? "+" : "-"; - Prefix += Feature; - return Prefix; -} - /// Split - Splits a string of comma separated items in to a vector of strings. /// static void Split(std::vector &V, const StringRef S) { - if (S.empty()) - return; - - // Start at beginning of string. - size_t Pos = 0; - while (true) { - // Find the next comma - size_t Comma = S.find(',', Pos); - // If no comma found then the rest of the string is used - if (Comma == std::string::npos) { - // Add string to vector - V.push_back(S.substr(Pos)); - break; - } - // Otherwise add substring to vector - V.push_back(S.substr(Pos, Comma - Pos)); - // Advance to next item - Pos = Comma + 1; - } + SmallVector Tmp; + S.split(Tmp, ",", -1, false /* KeepEmpty */); + V.assign(Tmp.begin(), Tmp.end()); } /// Join a vector of strings to a string with a comma separating each element. @@ -109,63 +81,55 @@ static std::string Join(const std::vector &V) { } /// Adding features. -void SubtargetFeatures::AddFeature(const StringRef String, - bool IsEnabled) { - // Don't add empty features - if (!String.empty()) { - // Convert to lowercase, prepend flag and add to vector - Features.push_back(PrependFlag(String.lower(), IsEnabled)); - } +void SubtargetFeatures::AddFeature(const StringRef String) { + // Don't add empty features or features we already have. + if (!String.empty()) + // Convert to lowercase, prepend flag if we don't already have a flag. + Features.push_back(hasFlag(String) ? String.str() : "+" + String.lower()); } /// Find KV in array using binary search. -static const SubtargetFeatureKV *Find(StringRef S, const SubtargetFeatureKV *A, - size_t L) { - // Determine the end of the array - const SubtargetFeatureKV *Hi = A + L; +static const SubtargetFeatureKV *Find(StringRef S, + ArrayRef A) { // Binary search the array - const SubtargetFeatureKV *F = std::lower_bound(A, Hi, S); + auto F = std::lower_bound(A.begin(), A.end(), S); // If not found then return NULL - if (F == Hi || StringRef(F->Key) != S) return NULL; + if (F == A.end() || StringRef(F->Key) != S) return nullptr; // Return the found array item return F; } /// getLongestEntryLength - Return the length of the longest entry in the table. /// -static size_t getLongestEntryLength(const SubtargetFeatureKV *Table, - size_t Size) { +static size_t getLongestEntryLength(ArrayRef Table) { size_t MaxLen = 0; - for (size_t i = 0; i < Size; i++) - MaxLen = std::max(MaxLen, std::strlen(Table[i].Key)); + for (auto &I : Table) + MaxLen = std::max(MaxLen, std::strlen(I.Key)); return MaxLen; } /// Display help for feature choices. /// -static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize, - const SubtargetFeatureKV *FeatTable, size_t FeatTableSize) { +static void Help(ArrayRef CPUTable, + ArrayRef FeatTable) { // Determine the length of the longest CPU and Feature entries. - unsigned MaxCPULen = getLongestEntryLength(CPUTable, CPUTableSize); - unsigned MaxFeatLen = getLongestEntryLength(FeatTable, FeatTableSize); + unsigned MaxCPULen = getLongestEntryLength(CPUTable); + unsigned MaxFeatLen = getLongestEntryLength(FeatTable); // Print the CPU table. errs() << "Available CPUs for this target:\n\n"; - for (size_t i = 0; i != CPUTableSize; i++) - errs() << format(" %-*s - %s.\n", - MaxCPULen, CPUTable[i].Key, CPUTable[i].Desc); + for (auto &CPU : CPUTable) + errs() << format(" %-*s - %s.\n", MaxCPULen, CPU.Key, CPU.Desc); errs() << '\n'; // Print the Feature table. errs() << "Available features for this target:\n\n"; - for (size_t i = 0; i != FeatTableSize; i++) - errs() << format(" %-*s - %s.\n", - MaxFeatLen, FeatTable[i].Key, FeatTable[i].Desc); + for (auto &Feature : FeatTable) + errs() << format(" %-*s - %s.\n", MaxFeatLen, Feature.Key, Feature.Desc); errs() << '\n'; errs() << "Use +feature to enable a feature, or -feature to disable it.\n" "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n"; - std::exit(1); } //===----------------------------------------------------------------------===// @@ -187,16 +151,13 @@ std::string SubtargetFeatures::getString() const { /// static void SetImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry, - const SubtargetFeatureKV *FeatureTable, - size_t FeatureTableSize) { - for (size_t i = 0; i < FeatureTableSize; ++i) { - const SubtargetFeatureKV &FE = FeatureTable[i]; - + ArrayRef FeatureTable) { + for (auto &FE : FeatureTable) { if (FeatureEntry->Value == FE.Value) continue; if (FeatureEntry->Implies & FE.Value) { Bits |= FE.Value; - SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize); + SetImpliedBits(Bits, &FE, FeatureTable); } } } @@ -206,16 +167,13 @@ void SetImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry, /// static void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry, - const SubtargetFeatureKV *FeatureTable, - size_t FeatureTableSize) { - for (size_t i = 0; i < FeatureTableSize; ++i) { - const SubtargetFeatureKV &FE = FeatureTable[i]; - + ArrayRef FeatureTable) { + for (auto &FE : FeatureTable) { if (FeatureEntry->Value == FE.Value) continue; if (FE.Implies & FeatureEntry->Value) { Bits &= ~FE.Value; - ClearImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize); + ClearImpliedBits(Bits, &FE, FeatureTable); } } } @@ -224,23 +182,23 @@ void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry, /// bits. uint64_t SubtargetFeatures::ToggleFeature(uint64_t Bits, const StringRef Feature, - const SubtargetFeatureKV *FeatureTable, - size_t FeatureTableSize) { + ArrayRef FeatureTable) { + // Find feature in table. const SubtargetFeatureKV *FeatureEntry = - Find(StripFlag(Feature), FeatureTable, FeatureTableSize); + Find(StripFlag(Feature), FeatureTable); // If there is a match if (FeatureEntry) { if ((Bits & FeatureEntry->Value) == FeatureEntry->Value) { Bits &= ~FeatureEntry->Value; // For each feature that implies this, clear it. - ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize); + ClearImpliedBits(Bits, FeatureEntry, FeatureTable); } else { Bits |= FeatureEntry->Value; // For each feature that this implies, set it. - SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize); + SetImpliedBits(Bits, FeatureEntry, FeatureTable); } } else { errs() << "'" << Feature @@ -254,20 +212,20 @@ SubtargetFeatures::ToggleFeature(uint64_t Bits, const StringRef Feature, /// getFeatureBits - Get feature bits a CPU. /// -uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU, - const SubtargetFeatureKV *CPUTable, - size_t CPUTableSize, - const SubtargetFeatureKV *FeatureTable, - size_t FeatureTableSize) { - if (!FeatureTableSize || !CPUTableSize) +uint64_t +SubtargetFeatures::getFeatureBits(const StringRef CPU, + ArrayRef CPUTable, + ArrayRef FeatureTable) { + + if (CPUTable.empty() || FeatureTable.empty()) return 0; #ifndef NDEBUG - for (size_t i = 1; i < CPUTableSize; i++) { + for (size_t i = 1, e = CPUTable.size(); i != e; ++i) { assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 && "CPU table is not sorted"); } - for (size_t i = 1; i < FeatureTableSize; i++) { + for (size_t i = 1, e = FeatureTable.size(); i != e; ++i) { assert(strcmp(FeatureTable[i - 1].Key, FeatureTable[i].Key) < 0 && "CPU features table is not sorted"); } @@ -276,21 +234,21 @@ uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU, // Check if help is needed if (CPU == "help") - Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize); + Help(CPUTable, FeatureTable); // Find CPU entry if CPU name is specified. - if (!CPU.empty()) { - const SubtargetFeatureKV *CPUEntry = Find(CPU, CPUTable, CPUTableSize); + else if (!CPU.empty()) { + const SubtargetFeatureKV *CPUEntry = Find(CPU, CPUTable); + // If there is a match if (CPUEntry) { // Set base feature bits Bits = CPUEntry->Value; // Set the feature implied by this CPU feature, if any. - for (size_t i = 0; i < FeatureTableSize; ++i) { - const SubtargetFeatureKV &FE = FeatureTable[i]; + for (auto &FE : FeatureTable) { if (CPUEntry->Value & FE.Value) - SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize); + SetImpliedBits(Bits, &FE, FeatureTable); } } else { errs() << "'" << CPU @@ -300,16 +258,14 @@ uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU, } // Iterate through each feature - for (size_t i = 0, E = Features.size(); i < E; i++) { - const StringRef Feature = Features[i]; - + for (auto &Feature : Features) { // Check for help if (Feature == "+help") - Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize); + Help(CPUTable, FeatureTable); // Find feature in table. const SubtargetFeatureKV *FeatureEntry = - Find(StripFlag(Feature), FeatureTable, FeatureTableSize); + Find(StripFlag(Feature), FeatureTable); // If there is a match if (FeatureEntry) { // Enable/disable feature in bits @@ -317,12 +273,12 @@ uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU, Bits |= FeatureEntry->Value; // For each feature that this implies, set it. - SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize); + SetImpliedBits(Bits, FeatureEntry, FeatureTable); } else { Bits &= ~FeatureEntry->Value; // For each feature that implies this, clear it. - ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize); + ClearImpliedBits(Bits, FeatureEntry, FeatureTable); } } else { errs() << "'" << Feature @@ -337,8 +293,8 @@ uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU, /// print - Print feature string. /// void SubtargetFeatures::print(raw_ostream &OS) const { - for (size_t i = 0, e = Features.size(); i != e; ++i) - OS << Features[i] << " "; + for (auto &F : Features) + OS << F << " "; OS << "\n"; } diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index 500acd8..961cbc6 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -11,12 +11,11 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "WinCOFFObjectWriter" - #include "llvm/MC/MCWinCOFFObjectWriter.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" @@ -35,6 +34,8 @@ using namespace llvm; +#define DEBUG_TYPE "WinCOFFObjectWriter" + namespace { typedef SmallString name; @@ -81,7 +82,7 @@ struct COFFRelocation { COFF::relocation Data; COFFSymbol *Symb; - COFFRelocation() : Symb(NULL) {} + COFFRelocation() : Symb(nullptr) {} static size_t size() { return COFF::RelocationSize; } }; @@ -118,8 +119,8 @@ public: class WinCOFFObjectWriter : public MCObjectWriter { public: - typedef std::vector symbols; - typedef std::vector sections; + typedef std::vector> symbols; + typedef std::vector> sections; typedef DenseMap symbol_map; typedef DenseMap section_map; @@ -137,7 +138,6 @@ public: symbol_map SymbolMap; WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, raw_ostream &OS); - virtual ~WinCOFFObjectWriter(); COFFSymbol *createSymbol(StringRef Name); COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol * Symbol); @@ -160,7 +160,7 @@ public: // Entity writing methods. void WriteFileHeader(const COFF::header &Header); - void WriteSymbol(const COFFSymbol *S); + void WriteSymbol(const COFFSymbol &S); void WriteAuxiliarySymbols(const COFFSymbol::AuxiliarySymbols &S); void WriteSectionHeader(const COFF::section &S); void WriteRelocation(const COFF::relocation &R); @@ -192,10 +192,10 @@ static inline void write_uint32_le(void *Data, uint32_t const &Value) { COFFSymbol::COFFSymbol(StringRef name) : Name(name.begin(), name.end()) - , Other(NULL) - , Section(NULL) + , Other(nullptr) + , Section(nullptr) , Relocations(0) - , MCData(NULL) { + , MCData(nullptr) { memset(&Data, 0, sizeof(Data)); } @@ -214,7 +214,7 @@ void COFFSymbol::set_name_offset(uint32_t Offset) { /// logic to decide if the symbol should be reported in the symbol table bool COFFSymbol::should_keep() const { // no section means its external, keep it - if (Section == NULL) + if (!Section) return true; // if it has relocations pointing at it, keep it @@ -244,8 +244,8 @@ bool COFFSymbol::should_keep() const { COFFSection::COFFSection(StringRef name) : Name(name) - , MCData(NULL) - , Symbol(NULL) { + , MCData(nullptr) + , Symbol(nullptr) { memset(&Header, 0, sizeof(Header)); } @@ -308,13 +308,6 @@ WinCOFFObjectWriter::WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, Header.Machine = TargetObjectWriter->getMachine(); } -WinCOFFObjectWriter::~WinCOFFObjectWriter() { - for (symbols::iterator I = Symbols.begin(), E = Symbols.end(); I != E; ++I) - delete *I; - for (sections::iterator I = Sections.begin(), E = Sections.end(); I != E; ++I) - delete *I; -} - COFFSymbol *WinCOFFObjectWriter::createSymbol(StringRef Name) { return createCOFFEntity(Name, Symbols); } @@ -338,11 +331,9 @@ COFFSection *WinCOFFObjectWriter::createSection(StringRef Name) { template object_t *WinCOFFObjectWriter::createCOFFEntity(StringRef Name, list_t &List) { - object_t *Object = new object_t(Name); - - List.push_back(Object); + List.push_back(make_unique(Name)); - return Object; + return List.back().get(); } /// This function takes a section data object from the assembler @@ -394,7 +385,19 @@ void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) { SectionMap[&SectionData.getSection()] = coff_section; } -/// This function takes a section data object from the assembler +static uint64_t getSymbolValue(const MCSymbolData &Data, + const MCAsmLayout &Layout) { + if (Data.isCommon() && Data.isExternal()) + return Data.getCommonSize(); + + uint64_t Res; + if (!Layout.getSymbolOffset(&Data, Res)) + return 0; + + return Res; +} + +/// This function takes a symbol data object from the assembler /// and creates the associated COFF symbol staging object. void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler, @@ -436,31 +439,29 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData, coff_symbol->MCData = &SymbolData; } else { - const MCSymbolData &ResSymData = - Assembler.getSymbolData(Symbol.AliasedSymbol()); - - if (Symbol.isVariable()) { - int64_t Addr; - if (Symbol.getVariableValue()->EvaluateAsAbsolute(Addr, Layout)) - coff_symbol->Data.Value = Addr; - } + const MCSymbolData &ResSymData = Assembler.getSymbolData(Symbol); + const MCSymbol *Base = Layout.getBaseSymbol(Symbol); + coff_symbol->Data.Value = getSymbolValue(ResSymData, Layout); coff_symbol->Data.Type = (ResSymData.getFlags() & 0x0000FFFF) >> 0; coff_symbol->Data.StorageClass = (ResSymData.getFlags() & 0x00FF0000) >> 16; // If no storage class was specified in the streamer, define it here. if (coff_symbol->Data.StorageClass == 0) { - bool external = ResSymData.isExternal() || (ResSymData.Fragment == NULL); + bool external = ResSymData.isExternal() || !ResSymData.Fragment; coff_symbol->Data.StorageClass = external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC; } - if (Symbol.isAbsolute() || Symbol.AliasedSymbol().isVariable()) + if (!Base) { coff_symbol->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE; - else if (ResSymData.Fragment != NULL) - coff_symbol->Section = - SectionMap[&ResSymData.Fragment->getParent()->getSection()]; + } else { + const MCSymbolData &BaseData = Assembler.getSymbolData(*Base); + if (BaseData.Fragment) + coff_symbol->Section = + SectionMap[&BaseData.Fragment->getParent()->getSection()]; + } coff_symbol->MCData = &ResSymData; } @@ -561,14 +562,14 @@ void WinCOFFObjectWriter::WriteFileHeader(const COFF::header &Header) { WriteLE16(Header.Characteristics); } -void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol *S) { - WriteBytes(StringRef(S->Data.Name, COFF::NameSize)); - WriteLE32(S->Data.Value); - WriteLE16(S->Data.SectionNumber); - WriteLE16(S->Data.Type); - Write8(S->Data.StorageClass); - Write8(S->Data.NumberOfAuxSymbols); - WriteAuxiliarySymbols(S->Aux); +void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol &S) { + WriteBytes(StringRef(S.Data.Name, COFF::NameSize)); + WriteLE32(S.Data.Value); + WriteLE16(S.Data.SectionNumber); + WriteLE16(S.Data.Type); + Write8(S.Data.StorageClass); + Write8(S.Data.NumberOfAuxSymbols); + WriteAuxiliarySymbols(S.Aux); } void WinCOFFObjectWriter::WriteAuxiliarySymbols( @@ -640,16 +641,42 @@ void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, // "Define" each section & symbol. This creates section & symbol // entries in the staging area. - for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e; i++) - DefineSection(*i); + static_assert(sizeof(((COFF::AuxiliaryFile *)nullptr)->FileName) == COFF::SymbolSize, + "size mismatch for COFF::AuxiliaryFile::FileName"); + for (auto FI = Asm.file_names_begin(), FE = Asm.file_names_end(); + FI != FE; ++FI) { + // round up to calculate the number of auxiliary symbols required + unsigned Count = (FI->size() + COFF::SymbolSize - 1) / COFF::SymbolSize; + + COFFSymbol *file = createSymbol(".file"); + file->Data.SectionNumber = COFF::IMAGE_SYM_DEBUG; + file->Data.StorageClass = COFF::IMAGE_SYM_CLASS_FILE; + file->Aux.resize(Count); + + unsigned Offset = 0; + unsigned Length = FI->size(); + for (auto & Aux : file->Aux) { + Aux.AuxType = ATFile; + + if (Length > COFF::SymbolSize) { + memcpy(Aux.Aux.File.FileName, FI->c_str() + Offset, COFF::SymbolSize); + Length = Length - COFF::SymbolSize; + } else { + memcpy(Aux.Aux.File.FileName, FI->c_str() + Offset, Length); + memset(&Aux.Aux.File.FileName[Length], 0, COFF::SymbolSize - Length); + Length = 0; + } - for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(), - e = Asm.symbol_end(); - i != e; i++) { - if (ExportSymbol(*i, Asm)) { - DefineSymbol(*i, Asm, Layout); + Offset = Offset + COFF::SymbolSize; } } + + for (const auto & Section : Asm) + DefineSection(Section); + + for (MCSymbolData &SD : Asm.symbols()) + if (ExportSymbol(SD, Asm)) + DefineSymbol(SD, Asm, Layout); } void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, @@ -659,7 +686,7 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) { - assert(Target.getSymA() != NULL && "Relocation must reference a symbol!"); + assert(Target.getSymA() && "Relocation must reference a symbol!"); const MCSymbol &Symbol = Target.getSymA()->getSymbol(); const MCSymbol &A = Symbol.AliasedSymbol(); @@ -668,7 +695,7 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, Fixup.getLoc(), Twine("symbol '") + A.getName() + "' can not be undefined"); - MCSymbolData &A_SD = Asm.getSymbolData(A); + const MCSymbolData &A_SD = Asm.getSymbolData(A); MCSectionData const *SectionData = Fragment->getParent(); @@ -685,7 +712,7 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, if (SymB) { const MCSymbol *B = &SymB->getSymbol(); - MCSymbolData &B_SD = Asm.getSymbolData(*B); + const MCSymbolData &B_SD = Asm.getSymbolData(*B); if (!B_SD.getFragment()) Asm.getContext().FatalError( Fixup.getLoc(), @@ -737,11 +764,52 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, // FIXME: Can anyone explain what this does other than adjust for the size // of the offset? - if (Reloc.Data.Type == COFF::IMAGE_REL_AMD64_REL32 || - Reloc.Data.Type == COFF::IMAGE_REL_I386_REL32) + if ((Header.Machine == COFF::IMAGE_FILE_MACHINE_AMD64 && + Reloc.Data.Type == COFF::IMAGE_REL_AMD64_REL32) || + (Header.Machine == COFF::IMAGE_FILE_MACHINE_I386 && + Reloc.Data.Type == COFF::IMAGE_REL_I386_REL32)) FixedValue += 4; - coff_section->Relocations.push_back(Reloc); + if (Header.Machine == COFF::IMAGE_FILE_MACHINE_ARMNT) { + switch (Reloc.Data.Type) { + case COFF::IMAGE_REL_ARM_ABSOLUTE: + case COFF::IMAGE_REL_ARM_ADDR32: + case COFF::IMAGE_REL_ARM_ADDR32NB: + case COFF::IMAGE_REL_ARM_TOKEN: + case COFF::IMAGE_REL_ARM_SECTION: + case COFF::IMAGE_REL_ARM_SECREL: + break; + case COFF::IMAGE_REL_ARM_BRANCH11: + case COFF::IMAGE_REL_ARM_BLX11: + // IMAGE_REL_ARM_BRANCH11 and IMAGE_REL_ARM_BLX11 are only used for + // pre-ARMv7, which implicitly rules it out of ARMNT (it would be valid + // for Windows CE). + case COFF::IMAGE_REL_ARM_BRANCH24: + case COFF::IMAGE_REL_ARM_BLX24: + case COFF::IMAGE_REL_ARM_MOV32A: + // IMAGE_REL_ARM_BRANCH24, IMAGE_REL_ARM_BLX24, IMAGE_REL_ARM_MOV32A are + // only used for ARM mode code, which is documented as being unsupported + // by Windows on ARM. Empirical proof indicates that masm is able to + // generate the relocations however the rest of the MSVC toolchain is + // unable to handle it. + llvm_unreachable("unsupported relocation"); + break; + case COFF::IMAGE_REL_ARM_MOV32T: + break; + case COFF::IMAGE_REL_ARM_BRANCH20T: + case COFF::IMAGE_REL_ARM_BRANCH24T: + case COFF::IMAGE_REL_ARM_BLX23T: + // IMAGE_REL_BRANCH20T, IMAGE_REL_ARM_BRANCH24T, IMAGE_REL_ARM_BLX23T all + // perform a 4 byte adjustment to the relocation. Relative branches are + // offset by 4 on ARM, however, because there is no RELA relocations, all + // branches are offset by 4. + FixedValue = FixedValue + 4; + break; + } + } + + if (TargetObjectWriter->recordRelocation(Fixup)) + coff_section->Relocations.push_back(Reloc); } void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, @@ -750,77 +818,64 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, Header.NumberOfSections = 0; DenseMap SectionIndices; - for (sections::iterator i = Sections.begin(), - e = Sections.end(); i != e; i++) { - if (Layout.getSectionAddressSize((*i)->MCData) > 0) { + for (auto & Section : Sections) { + if (Layout.getSectionAddressSize(Section->MCData) > 0) { size_t Number = ++Header.NumberOfSections; - SectionIndices[*i] = Number; - MakeSectionReal(**i, Number); + SectionIndices[Section.get()] = Number; + MakeSectionReal(*Section, Number); } else { - (*i)->Number = -1; + Section->Number = -1; } } Header.NumberOfSymbols = 0; - for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) { - COFFSymbol *coff_symbol = *i; - MCSymbolData const *SymbolData = coff_symbol->MCData; - + for (auto & Symbol : Symbols) { // Update section number & offset for symbols that have them. - if ((SymbolData != NULL) && (SymbolData->Fragment != NULL)) { - assert(coff_symbol->Section != NULL); + if (Symbol->Section) + Symbol->Data.SectionNumber = Symbol->Section->Number; - coff_symbol->Data.SectionNumber = coff_symbol->Section->Number; - coff_symbol->Data.Value = Layout.getFragmentOffset(SymbolData->Fragment) - + SymbolData->Offset; - } - - if (coff_symbol->should_keep()) { - MakeSymbolReal(*coff_symbol, Header.NumberOfSymbols++); + if (Symbol->should_keep()) { + MakeSymbolReal(*Symbol, Header.NumberOfSymbols++); // Update auxiliary symbol info. - coff_symbol->Data.NumberOfAuxSymbols = coff_symbol->Aux.size(); - Header.NumberOfSymbols += coff_symbol->Data.NumberOfAuxSymbols; + Symbol->Data.NumberOfAuxSymbols = Symbol->Aux.size(); + Header.NumberOfSymbols += Symbol->Data.NumberOfAuxSymbols; } else - coff_symbol->Index = -1; + Symbol->Index = -1; } // Fixup weak external references. - for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) { - COFFSymbol *coff_symbol = *i; - if (coff_symbol->Other != NULL) { - assert(coff_symbol->Index != -1); - assert(coff_symbol->Aux.size() == 1 && - "Symbol must contain one aux symbol!"); - assert(coff_symbol->Aux[0].AuxType == ATWeakExternal && + for (auto & Symbol : Symbols) { + if (Symbol->Other) { + assert(Symbol->Index != -1); + assert(Symbol->Aux.size() == 1 && "Symbol must contain one aux symbol!"); + assert(Symbol->Aux[0].AuxType == ATWeakExternal && "Symbol's aux symbol must be a Weak External!"); - coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = coff_symbol->Other->Index; + Symbol->Aux[0].Aux.WeakExternal.TagIndex = Symbol->Other->Index; } } // Fixup associative COMDAT sections. - for (sections::iterator i = Sections.begin(), - e = Sections.end(); i != e; i++) { - if ((*i)->Symbol->Aux[0].Aux.SectionDefinition.Selection != + for (auto & Section : Sections) { + if (Section->Symbol->Aux[0].Aux.SectionDefinition.Selection != COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) continue; - const MCSectionCOFF &MCSec = static_cast( - (*i)->MCData->getSection()); + const MCSectionCOFF &MCSec = + static_cast(Section->MCData->getSection()); COFFSection *Assoc = SectionMap.lookup(MCSec.getAssocSection()); - if (!Assoc) { + if (!Assoc) report_fatal_error(Twine("Missing associated COMDAT section ") + MCSec.getAssocSection()->getSectionName() + " for section " + MCSec.getSectionName()); - } // Skip this section if the associated section is unused. if (Assoc->Number == -1) continue; - (*i)->Symbol->Aux[0].Aux.SectionDefinition.Number = SectionIndices[Assoc]; + Section->Symbol->Aux[0].Aux.SectionDefinition.Number = SectionIndices[Assoc]; } @@ -831,15 +886,13 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, offset += COFF::HeaderSize; offset += COFF::SectionSize * Header.NumberOfSections; - for (MCAssembler::const_iterator i = Asm.begin(), - e = Asm.end(); - i != e; i++) { - COFFSection *Sec = SectionMap[&i->getSection()]; + for (const auto & Section : Asm) { + COFFSection *Sec = SectionMap[&Section.getSection()]; if (Sec->Number == -1) continue; - Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(i); + Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(&Section); if (IsPhysicalSection(Sec)) { Sec->Header.PointerToRawData = offset; @@ -866,16 +919,14 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, offset += COFF::RelocationSize * Sec->Relocations.size(); - for (relocations::iterator cr = Sec->Relocations.begin(), - er = Sec->Relocations.end(); - cr != er; ++cr) { - assert((*cr).Symb->Index != -1); - (*cr).Data.SymbolTableIndex = (*cr).Symb->Index; + for (auto & Relocation : Sec->Relocations) { + assert(Relocation.Symb->Index != -1); + Relocation.Data.SymbolTableIndex = Relocation.Symb->Index; } } - assert(Sec->Symbol->Aux.size() == 1 - && "Section's symbol must have one aux!"); + assert(Sec->Symbol->Aux.size() == 1 && + "Section's symbol must have one aux!"); AuxSymbol &Aux = Sec->Symbol->Aux[0]; assert(Aux.AuxType == ATSectionDefinition && "Section's symbol's aux symbol must be a Section Definition!"); @@ -898,13 +949,13 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, sections::iterator i, ie; MCAssembler::const_iterator j, je; - for (i = Sections.begin(), ie = Sections.end(); i != ie; i++) - if ((*i)->Number != -1) { - if ((*i)->Relocations.size() >= 0xffff) { - (*i)->Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL; - } - WriteSectionHeader((*i)->Header); + for (auto & Section : Sections) { + if (Section->Number != -1) { + if (Section->Relocations.size() >= 0xffff) + Section->Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL; + WriteSectionHeader(Section->Header); } + } for (i = Sections.begin(), ie = Sections.end(), j = Asm.begin(), je = Asm.end(); @@ -934,11 +985,8 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, WriteRelocation(r); } - for (relocations::const_iterator k = (*i)->Relocations.begin(), - ke = (*i)->Relocations.end(); - k != ke; k++) { - WriteRelocation(k->Data); - } + for (const auto & Relocation : (*i)->Relocations) + WriteRelocation(Relocation.Data); } else assert((*i)->Header.PointerToRelocations == 0 && "Section::PointerToRelocations is insane!"); @@ -948,9 +996,9 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, assert(OS.tell() == Header.PointerToSymbolTable && "Header::PointerToSymbolTable is insane!"); - for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) - if ((*i)->Index != -1) - WriteSymbol(*i); + for (auto & Symbol : Symbols) + if (Symbol->Index != -1) + WriteSymbol(*Symbol); OS.write((char const *)&Strings.Data.front(), Strings.Data.size()); } diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp index 5bd7b8f..e6df465 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/WinCOFFStreamer.cpp @@ -7,12 +7,11 @@ // //===----------------------------------------------------------------------===// // -// This file contains an implementation of a Win32 COFF object file streamer. +// This file contains an implementation of a Windows COFF object file streamer. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "WinCOFFStreamer" - +#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" @@ -27,6 +26,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWin64EH.h" +#include "llvm/MC/MCWinCOFFStreamer.h" #include "llvm/Support/COFF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -35,95 +35,33 @@ using namespace llvm; -namespace { -class WinCOFFStreamer : public MCObjectStreamer { -public: - MCSymbol const *CurSymbol; - - WinCOFFStreamer(MCContext &Context, - MCAsmBackend &MAB, - MCCodeEmitter &CE, - raw_ostream &OS); - - void AddCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment, bool External); - - // MCStreamer interface - - void InitSections() override; - void EmitLabel(MCSymbol *Symbol) override; - void EmitDebugLabel(MCSymbol *Symbol) override; - void EmitAssemblerFlag(MCAssemblerFlag Flag) override; - void EmitThumbFunc(MCSymbol *Func) override; - bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; - void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override; - void BeginCOFFSymbolDef(MCSymbol const *Symbol) override; - void EmitCOFFSymbolStorageClass(int StorageClass) override; - void EmitCOFFSymbolType(int Type) override; - void EndCOFFSymbolDef() override; - void EmitCOFFSectionIndex(MCSymbol const *Symbol) override; - void EmitCOFFSecRel32(MCSymbol const *Symbol) override; - void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) override; - void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) override; - void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) override; - void EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - uint64_t Size,unsigned ByteAlignment) override; - void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, - uint64_t Size, unsigned ByteAlignment) override; - void EmitFileDirective(StringRef Filename) override; - void EmitIdent(StringRef IdentString) override; - void EmitWin64EHHandlerData() override; - void FinishImpl() override; - -private: - void EmitInstToData(const MCInst &Inst, const MCSubtargetInfo &STI) override { - MCDataFragment *DF = getOrCreateDataFragment(); - - SmallVector Fixups; - SmallString<256> Code; - raw_svector_ostream VecOS(Code); - getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups, STI); - VecOS.flush(); - - // Add the fixups and data. - for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { - Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size()); - DF->getFixups().push_back(Fixups[i]); - } - DF->getContents().append(Code.begin(), Code.end()); - } -}; -} // end anonymous namespace. - -WinCOFFStreamer::WinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, - MCCodeEmitter &CE, raw_ostream &OS) - : MCObjectStreamer(Context, MAB, OS, &CE), CurSymbol(NULL) {} - -void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment, bool External) { - assert(!Symbol->isInSection() && "Symbol must not already have a section!"); +#define DEBUG_TYPE "WinCOFFStreamer" - const MCSection *Section = getContext().getObjectFileInfo()->getBSSSection(); - MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section); - if (SectionData.getAlignment() < ByteAlignment) - SectionData.setAlignment(ByteAlignment); +namespace llvm { +MCWinCOFFStreamer::MCWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, + MCCodeEmitter &CE, raw_ostream &OS) + : MCObjectStreamer(Context, MAB, OS, &CE), CurSymbol(nullptr) {} - MCSymbolData &SymbolData = getAssembler().getOrCreateSymbolData(*Symbol); - SymbolData.setExternal(External); +void MCWinCOFFStreamer::EmitInstToData(const MCInst &Inst, + const MCSubtargetInfo &STI) { + MCDataFragment *DF = getOrCreateDataFragment(); - AssignSection(Symbol, Section); + SmallVector Fixups; + SmallString<256> Code; + raw_svector_ostream VecOS(Code); + getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups, STI); + VecOS.flush(); - if (ByteAlignment != 1) - new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectionData); + // Add the fixups and data. + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { + Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size()); + DF->getFixups().push_back(Fixups[i]); + } - SymbolData.setFragment(new MCFillFragment(0, 0, Size, &SectionData)); + DF->getContents().append(Code.begin(), Code.end()); } -// MCStreamer interface - -void WinCOFFStreamer::InitSections() { +void MCWinCOFFStreamer::InitSections() { // FIXME: this is identical to the ELF one. // This emulates the same behavior of GNU as. This makes it easier // to compare the output as the major sections are in the same order. @@ -139,165 +77,182 @@ void WinCOFFStreamer::InitSections() { SwitchSection(getContext().getObjectFileInfo()->getTextSection()); } -void WinCOFFStreamer::EmitLabel(MCSymbol *Symbol) { +void MCWinCOFFStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); MCObjectStreamer::EmitLabel(Symbol); } -void WinCOFFStreamer::EmitDebugLabel(MCSymbol *Symbol) { +void MCWinCOFFStreamer::EmitDebugLabel(MCSymbol *Symbol) { EmitLabel(Symbol); } -void WinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { + +void MCWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { llvm_unreachable("not implemented"); } -void WinCOFFStreamer::EmitThumbFunc(MCSymbol *Func) { +void MCWinCOFFStreamer::EmitThumbFunc(MCSymbol *Func) { llvm_unreachable("not implemented"); } -bool WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, - MCSymbolAttr Attribute) { +bool MCWinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) { assert(Symbol && "Symbol must be non-null!"); - assert((Symbol->isInSection() - ? Symbol->getSection().getVariant() == MCSection::SV_COFF - : true) && "Got non-COFF section in the COFF backend!"); + assert((!Symbol->isInSection() || + Symbol->getSection().getVariant() == MCSection::SV_COFF) && + "Got non-COFF section in the COFF backend!"); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + switch (Attribute) { + default: return false; case MCSA_WeakReference: - case MCSA_Weak: { - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - SD.modifyFlags(COFF::SF_WeakExternal, COFF::SF_WeakExternal); - SD.setExternal(true); - } + case MCSA_Weak: + SD.modifyFlags(COFF::SF_WeakExternal, COFF::SF_WeakExternal); + SD.setExternal(true); break; - case MCSA_Global: - getAssembler().getOrCreateSymbolData(*Symbol).setExternal(true); + SD.setExternal(true); break; - - default: - return false; } return true; } -void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { +void MCWinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { llvm_unreachable("not implemented"); } -void WinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) { - assert((Symbol->isInSection() - ? Symbol->getSection().getVariant() == MCSection::SV_COFF - : true) && "Got non-COFF section in the COFF backend!"); - assert(CurSymbol == NULL && "EndCOFFSymbolDef must be called between calls " - "to BeginCOFFSymbolDef!"); +void MCWinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) { + assert((!Symbol->isInSection() || + Symbol->getSection().getVariant() == MCSection::SV_COFF) && + "Got non-COFF section in the COFF backend!"); + + if (CurSymbol) + FatalError("starting a new symbol definition without completing the " + "previous one"); CurSymbol = Symbol; } -void WinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { - assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!"); - assert((StorageClass & ~0xFF) == 0 && "StorageClass must only have data in " - "the first byte!"); +void MCWinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { + if (!CurSymbol) + FatalError("storage class specified outside of symbol definition"); + + if (StorageClass & ~0xff) + FatalError(Twine("storage class value '") + itostr(StorageClass) + + "' out of range"); - getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags( - StorageClass << COFF::SF_ClassShift, - COFF::SF_ClassMask); + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*CurSymbol); + SD.modifyFlags(StorageClass << COFF::SF_ClassShift, COFF::SF_ClassMask); } -void WinCOFFStreamer::EmitCOFFSymbolType(int Type) { - assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!"); - assert((Type & ~0xFFFF) == 0 && "Type must only have data in the first 2 " - "bytes"); +void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) { + if (!CurSymbol) + FatalError("symbol type specified outside of a symbol definition"); + + if (Type & ~0xffff) + FatalError(Twine("type value '") + itostr(Type) + "' out of range"); - getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags( - Type << COFF::SF_TypeShift, - COFF::SF_TypeMask); + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*CurSymbol); + SD.modifyFlags(Type << COFF::SF_TypeShift, COFF::SF_TypeMask); } -void WinCOFFStreamer::EndCOFFSymbolDef() { - assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!"); - CurSymbol = NULL; +void MCWinCOFFStreamer::EndCOFFSymbolDef() { + if (!CurSymbol) + FatalError("ending symbol definition without starting one"); + CurSymbol = nullptr; } -void WinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { +void MCWinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { MCDataFragment *DF = getOrCreateDataFragment(); - DF->getFixups().push_back(MCFixup::Create( - DF->getContents().size(), MCSymbolRefExpr::Create(Symbol, getContext()), - FK_SecRel_2)); + const MCSymbolRefExpr *SRE = MCSymbolRefExpr::Create(Symbol, getContext()); + MCFixup Fixup = MCFixup::Create(DF->getContents().size(), SRE, FK_SecRel_2); + DF->getFixups().push_back(Fixup); DF->getContents().resize(DF->getContents().size() + 4, 0); } -void WinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) { +void MCWinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) { MCDataFragment *DF = getOrCreateDataFragment(); - DF->getFixups().push_back(MCFixup::Create( - DF->getContents().size(), MCSymbolRefExpr::Create(Symbol, getContext()), - FK_SecRel_4)); + const MCSymbolRefExpr *SRE = MCSymbolRefExpr::Create(Symbol, getContext()); + MCFixup Fixup = MCFixup::Create(DF->getContents().size(), SRE, FK_SecRel_4); + DF->getFixups().push_back(Fixup); DF->getContents().resize(DF->getContents().size() + 4, 0); } -void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { - llvm_unreachable("not implemented"); +void MCWinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + llvm_unreachable("not supported"); } -void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) { - assert((Symbol->isInSection() - ? Symbol->getSection().getVariant() == MCSection::SV_COFF - : true) && "Got non-COFF section in the COFF backend!"); - AddCommonSymbol(Symbol, Size, ByteAlignment, true); +void MCWinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + assert((!Symbol->isInSection() || + Symbol->getSection().getVariant() == MCSection::SV_COFF) && + "Got non-COFF section in the COFF backend!"); + + if (ByteAlignment > 32) + report_fatal_error("alignment is limited to 32-bytes"); + + AssignSection(Symbol, nullptr); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + SD.setExternal(true); + SD.setCommon(Size, ByteAlignment); } -void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) { - assert((Symbol->isInSection() - ? Symbol->getSection().getVariant() == MCSection::SV_COFF - : true) && "Got non-COFF section in the COFF backend!"); - AddCommonSymbol(Symbol, Size, ByteAlignment, false); +void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + assert(!Symbol->isInSection() && "Symbol must not already have a section!"); + + const MCSection *Section = getContext().getObjectFileInfo()->getBSSSection(); + MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section); + if (SectionData.getAlignment() < ByteAlignment) + SectionData.setAlignment(ByteAlignment); + + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + SD.setExternal(false); + + AssignSection(Symbol, Section); + + if (ByteAlignment != 1) + new MCAlignFragment(ByteAlignment, /*_Value=*/0, /*_ValueSize=*/0, + ByteAlignment, &SectionData); + + MCFillFragment *Fragment = + new MCFillFragment(/*_Value=*/0, /*_ValueSize=*/0, Size, &SectionData); + SD.setFragment(Fragment); } -void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - uint64_t Size,unsigned ByteAlignment) { +void MCWinCOFFStreamer::EmitZerofill(const MCSection *Section, + MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { llvm_unreachable("not implemented"); } -void WinCOFFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, - uint64_t Size, unsigned ByteAlignment) { +void MCWinCOFFStreamer::EmitTBSSSymbol(const MCSection *Section, + MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { llvm_unreachable("not implemented"); } -void WinCOFFStreamer::EmitFileDirective(StringRef Filename) { - // Ignore for now, linkers don't care, and proper debug - // info will be a much large effort. +void MCWinCOFFStreamer::EmitFileDirective(StringRef Filename) { + getAssembler().addFileName(Filename); } // TODO: Implement this if you want to emit .comment section in COFF obj files. -void WinCOFFStreamer::EmitIdent(StringRef IdentString) { - llvm_unreachable("unsupported directive"); +void MCWinCOFFStreamer::EmitIdent(StringRef IdentString) { + llvm_unreachable("not implemented"); } -void WinCOFFStreamer::EmitWin64EHHandlerData() { - MCStreamer::EmitWin64EHHandlerData(); - - // We have to emit the unwind info now, because this directive - // actually switches to the .xdata section! - MCWin64EHUnwindEmitter::EmitUnwindInfo(*this, getCurrentW64UnwindInfo()); +void MCWinCOFFStreamer::EmitWin64EHHandlerData() { + llvm_unreachable("not implemented"); } -void WinCOFFStreamer::FinishImpl() { - EmitFrames(NULL, true); - EmitW64Tables(); +void MCWinCOFFStreamer::FinishImpl() { MCObjectStreamer::FinishImpl(); } -namespace llvm -{ - MCStreamer *createWinCOFFStreamer(MCContext &Context, - MCAsmBackend &MAB, - MCCodeEmitter &CE, - raw_ostream &OS, - bool RelaxAll) { - WinCOFFStreamer *S = new WinCOFFStreamer(Context, MAB, CE, OS); - S->getAssembler().setRelaxAll(RelaxAll); - return S; - } +LLVM_ATTRIBUTE_NORETURN +void MCWinCOFFStreamer::FatalError(const Twine &Msg) const { + getContext().FatalError(SMLoc(), Msg); +} } + diff --git a/lib/Object/Android.mk b/lib/Object/Android.mk index 7dfa44f..bd9659c 100644 --- a/lib/Object/Android.mk +++ b/lib/Object/Android.mk @@ -12,6 +12,7 @@ object_SRC_FILES := \ MachOUniversal.cpp \ Object.cpp \ ObjectFile.cpp \ + StringTableBuilder.cpp \ SymbolicFile.cpp diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index 999bf28..304ca47 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -13,7 +13,6 @@ #include "llvm/Object/Archive.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Endian.h" @@ -111,7 +110,7 @@ Archive::Child Archive::Child::getNext() const { // Check to see if this is past the end of the archive. if (NextLoc >= Parent->Data->getBufferEnd()) - return Child(Parent, NULL); + return Child(Parent, nullptr); return Child(Parent, NextLoc); } @@ -183,14 +182,6 @@ error_code Archive::Child::getMemoryBuffer(std::unique_ptr &Result return error_code::success(); } -error_code Archive::Child::getMemoryBuffer(OwningPtr &Result, - bool FullPath) const { - std::unique_ptr MB; - error_code ec = getMemoryBuffer(MB, FullPath); - Result = std::move(MB); - return ec; -} - error_code Archive::Child::getAsBinary(std::unique_ptr &Result, LLVMContext *Context) const { std::unique_ptr ret; @@ -204,14 +195,6 @@ error_code Archive::Child::getAsBinary(std::unique_ptr &Result, return object_error::success; } -error_code Archive::Child::getAsBinary(OwningPtr &Result, - LLVMContext *Context) const { - std::unique_ptr B; - error_code ec = getAsBinary(B, Context); - Result = std::move(B); - return ec; -} - ErrorOr Archive::create(MemoryBuffer *Source) { error_code EC; std::unique_ptr Ret(new Archive(Source, EC)); @@ -349,7 +332,7 @@ Archive::child_iterator Archive::child_begin(bool SkipInternal) const { } Archive::child_iterator Archive::child_end() const { - return Child(this, NULL); + return Child(this, nullptr); } error_code Archive::Symbol::getName(StringRef &Result) const { diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt index dc18296..cd8c9ef 100644 --- a/lib/Object/CMakeLists.txt +++ b/lib/Object/CMakeLists.txt @@ -12,6 +12,7 @@ add_llvm_library(LLVMObject MachOUniversal.cpp Object.cpp ObjectFile.cpp + StringTableBuilder.cpp SymbolicFile.cpp YAML.cpp ) diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index a75ebbf..262c040 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -138,7 +138,7 @@ error_code COFFObjectFile::getSymbolName(DataRefImpl Ref, error_code COFFObjectFile::getSymbolAddress(DataRefImpl Ref, uint64_t &Result) const { const coff_symbol *Symb = toSymb(Ref); - const coff_section *Section = NULL; + const coff_section *Section = nullptr; if (error_code EC = getSection(Symb->SectionNumber, Section)) return EC; @@ -163,7 +163,7 @@ error_code COFFObjectFile::getSymbolType(DataRefImpl Ref, } else { uint32_t Characteristics = 0; if (!COFF::isReservedSectionNumber(Symb->SectionNumber)) { - const coff_section *Section = NULL; + const coff_section *Section = nullptr; if (error_code EC = getSection(Symb->SectionNumber, Section)) return EC; Characteristics = Section->Characteristics; @@ -208,7 +208,7 @@ error_code COFFObjectFile::getSymbolSize(DataRefImpl Ref, // in the same section as this symbol, and looking for either the next // symbol, or the end of the section. const coff_symbol *Symb = toSymb(Ref); - const coff_section *Section = NULL; + const coff_section *Section = nullptr; if (error_code EC = getSection(Symb->SectionNumber, Section)) return EC; @@ -227,7 +227,7 @@ error_code COFFObjectFile::getSymbolSection(DataRefImpl Ref, if (COFF::isReservedSectionNumber(Symb->SectionNumber)) { Result = section_end(); } else { - const coff_section *Sec = 0; + const coff_section *Sec = nullptr; if (error_code EC = getSection(Symb->SectionNumber, Sec)) return EC; DataRefImpl Ref; Ref.p = reinterpret_cast(Sec); @@ -334,7 +334,7 @@ error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl SecRef, bool &Result) const { const coff_section *Sec = toSec(SecRef); const coff_symbol *Symb = toSymb(SymbRef); - const coff_section *SymbSec = 0; + const coff_section *SymbSec = nullptr; if (error_code EC = getSection(Symb->SectionNumber, SymbSec)) return EC; if (SymbSec == Sec) Result = true; @@ -389,11 +389,6 @@ relocation_iterator COFFObjectFile::section_rel_end(DataRefImpl Ref) const { return relocation_iterator(RelocationRef(Ret, this)); } -bool COFFObjectFile::section_rel_empty(DataRefImpl Ref) const { - const coff_section *Sec = toSec(Ref); - return Sec->NumberOfRelocations == 0; -} - // Initialize the pointer to the symbol table. error_code COFFObjectFile::initSymbolTablePtr() { if (error_code EC = getObject( @@ -512,10 +507,11 @@ error_code COFFObjectFile::initExportTablePtr() { COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &EC, bool BufferOwned) - : ObjectFile(Binary::ID_COFF, Object, BufferOwned), COFFHeader(0), - PE32Header(0), PE32PlusHeader(0), DataDirectory(0), SectionTable(0), - SymbolTable(0), StringTable(0), StringTableSize(0), ImportDirectory(0), - NumberOfImportDirectory(0), ExportDirectory(0) { + : ObjectFile(Binary::ID_COFF, Object, BufferOwned), COFFHeader(nullptr), + PE32Header(nullptr), PE32PlusHeader(nullptr), DataDirectory(nullptr), + SectionTable(nullptr), SymbolTable(nullptr), StringTable(nullptr), + StringTableSize(0), ImportDirectory(nullptr), NumberOfImportDirectory(0), + ExportDirectory(nullptr) { // Check that we at least have enough room for a header. if (!checkSize(Data, EC, sizeof(coff_file_header))) return; @@ -637,8 +633,8 @@ export_directory_iterator COFFObjectFile::export_directory_begin() const { } export_directory_iterator COFFObjectFile::export_directory_end() const { - if (ExportDirectory == 0) - return export_directory_iterator(ExportDirectoryEntryRef(0, 0, this)); + if (!ExportDirectory) + return export_directory_iterator(ExportDirectoryEntryRef(nullptr, 0, this)); ExportDirectoryEntryRef Ref(ExportDirectory, ExportDirectory->AddressTableEntries, this); return export_directory_iterator(Ref); @@ -728,7 +724,7 @@ error_code COFFObjectFile::getSection(int32_t Index, const coff_section *&Result) const { // Check for special index values. if (COFF::isReservedSectionNumber(Index)) - Result = NULL; + Result = nullptr; else if (Index > 0 && Index <= COFFHeader->NumberOfSections) // We already verified the section table data, so no need to check again. Result = SectionTable + (Index - 1); @@ -778,7 +774,7 @@ error_code COFFObjectFile::getSymbolName(const coff_symbol *Symbol, ArrayRef COFFObjectFile::getSymbolAuxData( const coff_symbol *Symbol) const { - const uint8_t *Aux = NULL; + const uint8_t *Aux = nullptr; if (Symbol->NumberOfAuxSymbols > 0) { // AUX data comes immediately after the symbol in COFF @@ -923,6 +919,27 @@ error_code COFFObjectFile::getRelocationTypeName(DataRefImpl Rel, Res = "Unknown"; } break; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + switch (Reloc->Type) { + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ABSOLUTE); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ADDR32); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ADDR32NB); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH24); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH11); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_TOKEN); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX24); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX11); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_SECTION); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_SECREL); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_MOV32A); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_MOV32T); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH20T); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH24T); + LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX23T); + default: + Res = "Unknown"; + } + break; case COFF::IMAGE_FILE_MACHINE_I386: switch (Reloc->Type) { LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_ABSOLUTE); @@ -952,7 +969,7 @@ error_code COFFObjectFile::getRelocationTypeName(DataRefImpl Rel, error_code COFFObjectFile::getRelocationValueString(DataRefImpl Rel, SmallVectorImpl &Result) const { const coff_relocation *Reloc = toRel(Rel); - const coff_symbol *Symb = 0; + const coff_symbol *Symb = nullptr; if (error_code EC = getSymbol(Reloc->SymbolTableIndex, Symb)) return EC; DataRefImpl Sym; Sym.p = reinterpret_cast(Symb); diff --git a/lib/Object/COFFYAML.cpp b/lib/Object/COFFYAML.cpp index 94b72ff..49c5dda 100644 --- a/lib/Object/COFFYAML.cpp +++ b/lib/Object/COFFYAML.cpp @@ -38,6 +38,7 @@ void ScalarEnumerationTraits::enumeration( void ScalarEnumerationTraits::enumeration( IO &IO, COFFYAML::WeakExternalCharacteristics &Value) { + IO.enumCase(Value, "0", 0); ECase(IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY); ECase(IMAGE_WEAK_EXTERN_SEARCH_LIBRARY); ECase(IMAGE_WEAK_EXTERN_SEARCH_ALIAS); @@ -132,8 +133,8 @@ void ScalarEnumerationTraits::enumeration( ECase(IMAGE_SYM_DTYPE_ARRAY); } -void ScalarEnumerationTraits::enumeration( - IO &IO, COFF::RelocationTypeX86 &Value) { +void ScalarEnumerationTraits::enumeration( + IO &IO, COFF::RelocationTypeI386 &Value) { ECase(IMAGE_REL_I386_ABSOLUTE); ECase(IMAGE_REL_I386_DIR16); ECase(IMAGE_REL_I386_REL16); @@ -145,6 +146,10 @@ void ScalarEnumerationTraits::enumeration( ECase(IMAGE_REL_I386_TOKEN); ECase(IMAGE_REL_I386_SECREL7); ECase(IMAGE_REL_I386_REL32); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, COFF::RelocationTypeAMD64 &Value) { ECase(IMAGE_REL_AMD64_ABSOLUTE); ECase(IMAGE_REL_AMD64_ADDR64); ECase(IMAGE_REL_AMD64_ADDR32); @@ -272,22 +277,33 @@ struct NHeaderCharacteristics { COFF::Characteristics Characteristics; }; +template struct NType { - NType(IO &) : Type(COFF::RelocationTypeX86(0)) {} - NType(IO &, uint16_t T) : Type(COFF::RelocationTypeX86(T)) {} + NType(IO &) : Type(RelocType(0)) {} + NType(IO &, uint16_t T) : Type(RelocType(T)) {} uint16_t denormalize(IO &) { return Type; } - COFF::RelocationTypeX86 Type; + RelocType Type; }; } void MappingTraits::mapping(IO &IO, COFFYAML::Relocation &Rel) { - MappingNormalization NT(IO, Rel.Type); - IO.mapRequired("VirtualAddress", Rel.VirtualAddress); IO.mapRequired("SymbolName", Rel.SymbolName); - IO.mapRequired("Type", NT->Type); + + COFF::header &H = *static_cast(IO.getContext()); + if (H.Machine == COFF::IMAGE_FILE_MACHINE_I386) { + MappingNormalization, uint16_t> NT( + IO, Rel.Type); + IO.mapRequired("Type", NT->Type); + } else if (H.Machine == COFF::IMAGE_FILE_MACHINE_AMD64) { + MappingNormalization, uint16_t> NT( + IO, Rel.Type); + IO.mapRequired("Type", NT->Type); + } else { + IO.mapRequired("Type", Rel.Type); + } } void MappingTraits::mapping(IO &IO, COFF::header &H) { @@ -297,6 +313,7 @@ void MappingTraits::mapping(IO &IO, COFF::header &H) { IO.mapRequired("Machine", NM->Machine); IO.mapOptional("Characteristics", NC->Characteristics); + IO.setContext(static_cast(&H)); } void MappingTraits::mapping( diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp index e9a88bf..df4dd5e 100644 --- a/lib/Object/ELF.cpp +++ b/lib/Object/ELF.cpp @@ -159,6 +159,15 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL_HI16); LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL_LO16); LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GLOB_DAT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PC21_S2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PC26_S2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PC18_S3); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PC19_S2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PCHI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PCLO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS16_GOT16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS16_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS16_LO16); LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_COPY); LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_JUMP_SLOT); LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_26_S1); @@ -177,6 +186,7 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_TLS_TPREL_HI16); LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_TLS_TPREL_LO16); LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_NUM); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PC32); default: break; } diff --git a/lib/Object/ELFYAML.cpp b/lib/Object/ELFYAML.cpp index d513670..7d50f23 100644 --- a/lib/Object/ELFYAML.cpp +++ b/lib/Object/ELFYAML.cpp @@ -12,8 +12,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Object/ELFYAML.h" +#include "llvm/Support/Casting.h" namespace llvm { + +ELFYAML::Section::~Section() {} + namespace yaml { void @@ -239,44 +243,57 @@ void ScalarEnumerationTraits::enumeration( void ScalarBitSetTraits::bitset(IO &IO, ELFYAML::ELF_EF &Value) { + const auto *Object = static_cast(IO.getContext()); + assert(Object && "The IO context is not initialized"); #define BCase(X) IO.bitSetCase(Value, #X, ELF::X); - BCase(EF_ARM_SOFT_FLOAT) - BCase(EF_ARM_VFP_FLOAT) - BCase(EF_ARM_EABI_UNKNOWN) - BCase(EF_ARM_EABI_VER1) - BCase(EF_ARM_EABI_VER2) - BCase(EF_ARM_EABI_VER3) - BCase(EF_ARM_EABI_VER4) - BCase(EF_ARM_EABI_VER5) - BCase(EF_ARM_EABIMASK) - BCase(EF_MIPS_NOREORDER) - BCase(EF_MIPS_PIC) - BCase(EF_MIPS_CPIC) - BCase(EF_MIPS_ABI2) - BCase(EF_MIPS_32BITMODE) - BCase(EF_MIPS_ABI_O32) - BCase(EF_MIPS_MICROMIPS) - BCase(EF_MIPS_ARCH_ASE_M16) - BCase(EF_MIPS_ARCH_1) - BCase(EF_MIPS_ARCH_2) - BCase(EF_MIPS_ARCH_3) - BCase(EF_MIPS_ARCH_4) - BCase(EF_MIPS_ARCH_5) - BCase(EF_MIPS_ARCH_32) - BCase(EF_MIPS_ARCH_64) - BCase(EF_MIPS_ARCH_32R2) - BCase(EF_MIPS_ARCH_64R2) - BCase(EF_MIPS_ARCH) - BCase(EF_HEXAGON_MACH_V2) - BCase(EF_HEXAGON_MACH_V3) - BCase(EF_HEXAGON_MACH_V4) - BCase(EF_HEXAGON_MACH_V5) - BCase(EF_HEXAGON_ISA_MACH) - BCase(EF_HEXAGON_ISA_V2) - BCase(EF_HEXAGON_ISA_V3) - BCase(EF_HEXAGON_ISA_V4) - BCase(EF_HEXAGON_ISA_V5) +#define BCaseMask(X, M) IO.maskedBitSetCase(Value, #X, ELF::X, ELF::M); + switch (Object->Header.Machine) { + case ELF::EM_ARM: + BCase(EF_ARM_SOFT_FLOAT) + BCase(EF_ARM_VFP_FLOAT) + BCaseMask(EF_ARM_EABI_UNKNOWN, EF_ARM_EABIMASK) + BCaseMask(EF_ARM_EABI_VER1, EF_ARM_EABIMASK) + BCaseMask(EF_ARM_EABI_VER2, EF_ARM_EABIMASK) + BCaseMask(EF_ARM_EABI_VER3, EF_ARM_EABIMASK) + BCaseMask(EF_ARM_EABI_VER4, EF_ARM_EABIMASK) + BCaseMask(EF_ARM_EABI_VER5, EF_ARM_EABIMASK) + break; + case ELF::EM_MIPS: + BCase(EF_MIPS_NOREORDER) + BCase(EF_MIPS_PIC) + BCase(EF_MIPS_CPIC) + BCase(EF_MIPS_ABI2) + BCase(EF_MIPS_32BITMODE) + BCase(EF_MIPS_ABI_O32) + BCase(EF_MIPS_MICROMIPS) + BCase(EF_MIPS_ARCH_ASE_M16) + BCaseMask(EF_MIPS_ARCH_1, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_2, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_3, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_4, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_5, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_32, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_64, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_32R2, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_64R2, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_32R6, EF_MIPS_ARCH) + BCaseMask(EF_MIPS_ARCH_64R6, EF_MIPS_ARCH) + break; + case ELF::EM_HEXAGON: + BCase(EF_HEXAGON_MACH_V2) + BCase(EF_HEXAGON_MACH_V3) + BCase(EF_HEXAGON_MACH_V4) + BCase(EF_HEXAGON_MACH_V5) + BCase(EF_HEXAGON_ISA_V2) + BCase(EF_HEXAGON_ISA_V3) + BCase(EF_HEXAGON_ISA_V4) + BCase(EF_HEXAGON_ISA_V5) + break; + default: + llvm_unreachable("Unsupported architecture"); + } #undef BCase +#undef BCaseMask } void ScalarEnumerationTraits::enumeration( @@ -300,6 +317,23 @@ void ScalarEnumerationTraits::enumeration( ECase(SHT_PREINIT_ARRAY) ECase(SHT_GROUP) ECase(SHT_SYMTAB_SHNDX) + ECase(SHT_LOOS) + ECase(SHT_GNU_ATTRIBUTES) + ECase(SHT_GNU_HASH) + ECase(SHT_GNU_verdef) + ECase(SHT_GNU_verneed) + ECase(SHT_GNU_versym) + ECase(SHT_HIOS) + ECase(SHT_LOPROC) + ECase(SHT_ARM_EXIDX) + ECase(SHT_ARM_PREEMPTMAP) + ECase(SHT_ARM_ATTRIBUTES) + ECase(SHT_ARM_DEBUGOVERLAY) + ECase(SHT_ARM_OVERLAYSECTION) + ECase(SHT_HEX_ORDERED) + ECase(SHT_X86_64_UNWIND) + ECase(SHT_MIPS_REGINFO) + ECase(SHT_MIPS_OPTIONS) #undef ECase } @@ -334,6 +368,270 @@ void ScalarEnumerationTraits::enumeration( #undef ECase } +void ScalarEnumerationTraits::enumeration( + IO &IO, ELFYAML::ELF_REL &Value) { + const auto *Object = static_cast(IO.getContext()); + assert(Object && "The IO context is not initialized"); +#define ECase(X) IO.enumCase(Value, #X, ELF::X); + switch (Object->Header.Machine) { + case ELF::EM_X86_64: + ECase(R_X86_64_NONE) + ECase(R_X86_64_64) + ECase(R_X86_64_PC32) + ECase(R_X86_64_GOT32) + ECase(R_X86_64_PLT32) + ECase(R_X86_64_COPY) + ECase(R_X86_64_GLOB_DAT) + ECase(R_X86_64_JUMP_SLOT) + ECase(R_X86_64_RELATIVE) + ECase(R_X86_64_GOTPCREL) + ECase(R_X86_64_32) + ECase(R_X86_64_32S) + ECase(R_X86_64_16) + ECase(R_X86_64_PC16) + ECase(R_X86_64_8) + ECase(R_X86_64_PC8) + ECase(R_X86_64_DTPMOD64) + ECase(R_X86_64_DTPOFF64) + ECase(R_X86_64_TPOFF64) + ECase(R_X86_64_TLSGD) + ECase(R_X86_64_TLSLD) + ECase(R_X86_64_DTPOFF32) + ECase(R_X86_64_GOTTPOFF) + ECase(R_X86_64_TPOFF32) + ECase(R_X86_64_PC64) + ECase(R_X86_64_GOTOFF64) + ECase(R_X86_64_GOTPC32) + ECase(R_X86_64_GOT64) + ECase(R_X86_64_GOTPCREL64) + ECase(R_X86_64_GOTPC64) + ECase(R_X86_64_GOTPLT64) + ECase(R_X86_64_PLTOFF64) + ECase(R_X86_64_SIZE32) + ECase(R_X86_64_SIZE64) + ECase(R_X86_64_GOTPC32_TLSDESC) + ECase(R_X86_64_TLSDESC_CALL) + ECase(R_X86_64_TLSDESC) + ECase(R_X86_64_IRELATIVE) + break; + case ELF::EM_MIPS: + ECase(R_MIPS_NONE) + ECase(R_MIPS_16) + ECase(R_MIPS_32) + ECase(R_MIPS_REL32) + ECase(R_MIPS_26) + ECase(R_MIPS_HI16) + ECase(R_MIPS_LO16) + ECase(R_MIPS_GPREL16) + ECase(R_MIPS_LITERAL) + ECase(R_MIPS_GOT16) + ECase(R_MIPS_PC16) + ECase(R_MIPS_CALL16) + ECase(R_MIPS_GPREL32) + ECase(R_MIPS_UNUSED1) + ECase(R_MIPS_UNUSED2) + ECase(R_MIPS_SHIFT5) + ECase(R_MIPS_SHIFT6) + ECase(R_MIPS_64) + ECase(R_MIPS_GOT_DISP) + ECase(R_MIPS_GOT_PAGE) + ECase(R_MIPS_GOT_OFST) + ECase(R_MIPS_GOT_HI16) + ECase(R_MIPS_GOT_LO16) + ECase(R_MIPS_SUB) + ECase(R_MIPS_INSERT_A) + ECase(R_MIPS_INSERT_B) + ECase(R_MIPS_DELETE) + ECase(R_MIPS_HIGHER) + ECase(R_MIPS_HIGHEST) + ECase(R_MIPS_CALL_HI16) + ECase(R_MIPS_CALL_LO16) + ECase(R_MIPS_SCN_DISP) + ECase(R_MIPS_REL16) + ECase(R_MIPS_ADD_IMMEDIATE) + ECase(R_MIPS_PJUMP) + ECase(R_MIPS_RELGOT) + ECase(R_MIPS_JALR) + ECase(R_MIPS_TLS_DTPMOD32) + ECase(R_MIPS_TLS_DTPREL32) + ECase(R_MIPS_TLS_DTPMOD64) + ECase(R_MIPS_TLS_DTPREL64) + ECase(R_MIPS_TLS_GD) + ECase(R_MIPS_TLS_LDM) + ECase(R_MIPS_TLS_DTPREL_HI16) + ECase(R_MIPS_TLS_DTPREL_LO16) + ECase(R_MIPS_TLS_GOTTPREL) + ECase(R_MIPS_TLS_TPREL32) + ECase(R_MIPS_TLS_TPREL64) + ECase(R_MIPS_TLS_TPREL_HI16) + ECase(R_MIPS_TLS_TPREL_LO16) + ECase(R_MIPS_GLOB_DAT) + ECase(R_MIPS_PC21_S2) + ECase(R_MIPS_PC26_S2) + ECase(R_MIPS_PC18_S3) + ECase(R_MIPS_PC19_S2) + ECase(R_MIPS_PCHI16) + ECase(R_MIPS_PCLO16) + ECase(R_MIPS16_GOT16) + ECase(R_MIPS16_HI16) + ECase(R_MIPS16_LO16) + ECase(R_MIPS_COPY) + ECase(R_MIPS_JUMP_SLOT) + ECase(R_MICROMIPS_26_S1) + ECase(R_MICROMIPS_HI16) + ECase(R_MICROMIPS_LO16) + ECase(R_MICROMIPS_GOT16) + ECase(R_MICROMIPS_PC16_S1) + ECase(R_MICROMIPS_CALL16) + ECase(R_MICROMIPS_GOT_DISP) + ECase(R_MICROMIPS_GOT_PAGE) + ECase(R_MICROMIPS_GOT_OFST) + ECase(R_MICROMIPS_TLS_GD) + ECase(R_MICROMIPS_TLS_LDM) + ECase(R_MICROMIPS_TLS_DTPREL_HI16) + ECase(R_MICROMIPS_TLS_DTPREL_LO16) + ECase(R_MICROMIPS_TLS_TPREL_HI16) + ECase(R_MICROMIPS_TLS_TPREL_LO16) + ECase(R_MIPS_NUM) + ECase(R_MIPS_PC32) + break; + case ELF::EM_HEXAGON: + ECase(R_HEX_NONE) + ECase(R_HEX_B22_PCREL) + ECase(R_HEX_B15_PCREL) + ECase(R_HEX_B7_PCREL) + ECase(R_HEX_LO16) + ECase(R_HEX_HI16) + ECase(R_HEX_32) + ECase(R_HEX_16) + ECase(R_HEX_8) + ECase(R_HEX_GPREL16_0) + ECase(R_HEX_GPREL16_1) + ECase(R_HEX_GPREL16_2) + ECase(R_HEX_GPREL16_3) + ECase(R_HEX_HL16) + ECase(R_HEX_B13_PCREL) + ECase(R_HEX_B9_PCREL) + ECase(R_HEX_B32_PCREL_X) + ECase(R_HEX_32_6_X) + ECase(R_HEX_B22_PCREL_X) + ECase(R_HEX_B15_PCREL_X) + ECase(R_HEX_B13_PCREL_X) + ECase(R_HEX_B9_PCREL_X) + ECase(R_HEX_B7_PCREL_X) + ECase(R_HEX_16_X) + ECase(R_HEX_12_X) + ECase(R_HEX_11_X) + ECase(R_HEX_10_X) + ECase(R_HEX_9_X) + ECase(R_HEX_8_X) + ECase(R_HEX_7_X) + ECase(R_HEX_6_X) + ECase(R_HEX_32_PCREL) + ECase(R_HEX_COPY) + ECase(R_HEX_GLOB_DAT) + ECase(R_HEX_JMP_SLOT) + ECase(R_HEX_RELATIVE) + ECase(R_HEX_PLT_B22_PCREL) + ECase(R_HEX_GOTREL_LO16) + ECase(R_HEX_GOTREL_HI16) + ECase(R_HEX_GOTREL_32) + ECase(R_HEX_GOT_LO16) + ECase(R_HEX_GOT_HI16) + ECase(R_HEX_GOT_32) + ECase(R_HEX_GOT_16) + ECase(R_HEX_DTPMOD_32) + ECase(R_HEX_DTPREL_LO16) + ECase(R_HEX_DTPREL_HI16) + ECase(R_HEX_DTPREL_32) + ECase(R_HEX_DTPREL_16) + ECase(R_HEX_GD_PLT_B22_PCREL) + ECase(R_HEX_GD_GOT_LO16) + ECase(R_HEX_GD_GOT_HI16) + ECase(R_HEX_GD_GOT_32) + ECase(R_HEX_GD_GOT_16) + ECase(R_HEX_IE_LO16) + ECase(R_HEX_IE_HI16) + ECase(R_HEX_IE_32) + ECase(R_HEX_IE_GOT_LO16) + ECase(R_HEX_IE_GOT_HI16) + ECase(R_HEX_IE_GOT_32) + ECase(R_HEX_IE_GOT_16) + ECase(R_HEX_TPREL_LO16) + ECase(R_HEX_TPREL_HI16) + ECase(R_HEX_TPREL_32) + ECase(R_HEX_TPREL_16) + ECase(R_HEX_6_PCREL_X) + ECase(R_HEX_GOTREL_32_6_X) + ECase(R_HEX_GOTREL_16_X) + ECase(R_HEX_GOTREL_11_X) + ECase(R_HEX_GOT_32_6_X) + ECase(R_HEX_GOT_16_X) + ECase(R_HEX_GOT_11_X) + ECase(R_HEX_DTPREL_32_6_X) + ECase(R_HEX_DTPREL_16_X) + ECase(R_HEX_DTPREL_11_X) + ECase(R_HEX_GD_GOT_32_6_X) + ECase(R_HEX_GD_GOT_16_X) + ECase(R_HEX_GD_GOT_11_X) + ECase(R_HEX_IE_32_6_X) + ECase(R_HEX_IE_16_X) + ECase(R_HEX_IE_GOT_32_6_X) + ECase(R_HEX_IE_GOT_16_X) + ECase(R_HEX_IE_GOT_11_X) + ECase(R_HEX_TPREL_32_6_X) + ECase(R_HEX_TPREL_16_X) + ECase(R_HEX_TPREL_11_X) + break; + case ELF::EM_386: + ECase(R_386_NONE) + ECase(R_386_32) + ECase(R_386_PC32) + ECase(R_386_GOT32) + ECase(R_386_PLT32) + ECase(R_386_COPY) + ECase(R_386_GLOB_DAT) + ECase(R_386_JUMP_SLOT) + ECase(R_386_RELATIVE) + ECase(R_386_GOTOFF) + ECase(R_386_GOTPC) + ECase(R_386_32PLT) + ECase(R_386_TLS_TPOFF) + ECase(R_386_TLS_IE) + ECase(R_386_TLS_GOTIE) + ECase(R_386_TLS_LE) + ECase(R_386_TLS_GD) + ECase(R_386_TLS_LDM) + ECase(R_386_16) + ECase(R_386_PC16) + ECase(R_386_8) + ECase(R_386_PC8) + ECase(R_386_TLS_GD_32) + ECase(R_386_TLS_GD_PUSH) + ECase(R_386_TLS_GD_CALL) + ECase(R_386_TLS_GD_POP) + ECase(R_386_TLS_LDM_32) + ECase(R_386_TLS_LDM_PUSH) + ECase(R_386_TLS_LDM_CALL) + ECase(R_386_TLS_LDM_POP) + ECase(R_386_TLS_LDO_32) + ECase(R_386_TLS_IE_32) + ECase(R_386_TLS_LE_32) + ECase(R_386_TLS_DTPMOD32) + ECase(R_386_TLS_DTPOFF32) + ECase(R_386_TLS_TPOFF32) + ECase(R_386_TLS_GOTDESC) + ECase(R_386_TLS_DESC_CALL) + ECase(R_386_TLS_DESC) + ECase(R_386_IRELATIVE) + ECase(R_386_NUM) + break; + default: + llvm_unreachable("Unsupported architecture"); + } +#undef ECase +} + void MappingTraits::mapping(IO &IO, ELFYAML::FileHeader &FileHdr) { IO.mapRequired("Class", FileHdr.Class); @@ -360,21 +658,72 @@ void MappingTraits::mapping( IO.mapOptional("Weak", Symbols.Weak); } -void MappingTraits::mapping(IO &IO, - ELFYAML::Section &Section) { +static void commonSectionMapping(IO &IO, ELFYAML::Section &Section) { IO.mapOptional("Name", Section.Name, StringRef()); IO.mapRequired("Type", Section.Type); IO.mapOptional("Flags", Section.Flags, ELFYAML::ELF_SHF(0)); IO.mapOptional("Address", Section.Address, Hex64(0)); - IO.mapOptional("Content", Section.Content); - IO.mapOptional("Link", Section.Link); + IO.mapOptional("Link", Section.Link, StringRef()); + IO.mapOptional("Info", Section.Info, StringRef()); IO.mapOptional("AddressAlign", Section.AddressAlign, Hex64(0)); } +static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) { + commonSectionMapping(IO, Section); + IO.mapOptional("Content", Section.Content); + IO.mapOptional("Size", Section.Size, Hex64(Section.Content.binary_size())); +} + +static void sectionMapping(IO &IO, ELFYAML::RelocationSection &Section) { + commonSectionMapping(IO, Section); + IO.mapOptional("Relocations", Section.Relocations); +} + +void MappingTraits>::mapping( + IO &IO, std::unique_ptr &Section) { + ELFYAML::ELF_SHT sectionType; + if (IO.outputting()) + sectionType = Section->Type; + else + IO.mapRequired("Type", sectionType); + + switch (sectionType) { + case ELF::SHT_REL: + case ELF::SHT_RELA: + if (!IO.outputting()) + Section.reset(new ELFYAML::RelocationSection()); + sectionMapping(IO, *cast(Section.get())); + break; + default: + if (!IO.outputting()) + Section.reset(new ELFYAML::RawContentSection()); + sectionMapping(IO, *cast(Section.get())); + } +} + +StringRef MappingTraits>::validate( + IO &io, std::unique_ptr &Section) { + const auto *RawSection = dyn_cast(Section.get()); + if (!RawSection || RawSection->Size >= RawSection->Content.binary_size()) + return StringRef(); + return "Section size must be greater or equal to the content size"; +} + +void MappingTraits::mapping(IO &IO, + ELFYAML::Relocation &Rel) { + IO.mapRequired("Offset", Rel.Offset); + IO.mapRequired("Symbol", Rel.Symbol); + IO.mapRequired("Type", Rel.Type); + IO.mapOptional("Addend", Rel.Addend); +} + void MappingTraits::mapping(IO &IO, ELFYAML::Object &Object) { + assert(!IO.getContext() && "The IO context is initialized already"); + IO.setContext(&Object); IO.mapRequired("FileHeader", Object.Header); IO.mapOptional("Sections", Object.Sections); IO.mapOptional("Symbols", Object.Symbols); + IO.setContext(nullptr); } } // end namespace yaml diff --git a/lib/Object/LLVMBuild.txt b/lib/Object/LLVMBuild.txt index a87da6e..7813832 100644 --- a/lib/Object/LLVMBuild.txt +++ b/lib/Object/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Object parent = Libraries -required_libraries = Support BitReader +required_libraries = BitReader Core Support diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 12132a4..c6bab03 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -420,7 +420,8 @@ MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, bool IsLittleEndian, bool Is64bits, error_code &EC, bool BufferOwned) : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object, BufferOwned), - SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL), DataInCodeLoadCmd(NULL) { + SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr), + DataInCodeLoadCmd(nullptr) { uint32_t LoadCommandCount = this->getHeader().ncmds; MachO::LoadCommandType SegmentLoadType = is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT; @@ -471,10 +472,18 @@ error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const { if (is64Bit()) { MachO::nlist_64 Entry = getSymbol64TableEntry(Symb); - Res = Entry.n_value; + if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF && + Entry.n_value == 0) + Res = UnknownAddressOrSize; + else + Res = Entry.n_value; } else { MachO::nlist Entry = getSymbolTableEntry(Symb); - Res = Entry.n_value; + if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF && + Entry.n_value == 0) + Res = UnknownAddressOrSize; + else + Res = Entry.n_value; } return object_error::success; } @@ -500,6 +509,10 @@ error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI, nlist_base Entry = getSymbolTableEntryBase(this, DRI); uint64_t Value; getSymbolAddress(DRI, Value); + if (Value == UnknownAddressOrSize) { + Result = UnknownAddressOrSize; + return object_error::success; + } BeginOffset = Value; @@ -518,6 +531,8 @@ error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI, DataRefImpl DRI = Symbol.getRawDataRefImpl(); Entry = getSymbolTableEntryBase(this, DRI); getSymbolAddress(DRI, Value); + if (Value == UnknownAddressOrSize) + continue; if (Entry.n_sect == SectionIndex && Value > BeginOffset) if (!EndOffset || Value < EndOffset) EndOffset = Value; @@ -577,7 +592,7 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const { if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF) { uint64_t Value; getSymbolAddress(DRI, Value); - if (Value) + if (Value && Value != UnknownAddressOrSize) Result |= SymbolRef::SF_Common; } } @@ -685,15 +700,21 @@ MachOObjectFile::isSectionText(DataRefImpl Sec, bool &Res) const { return object_error::success; } -error_code MachOObjectFile::isSectionData(DataRefImpl DRI, bool &Result) const { - // FIXME: Unimplemented. - Result = false; +error_code MachOObjectFile::isSectionData(DataRefImpl Sec, bool &Result) const { + uint32_t Flags = getSectionFlags(this, Sec); + unsigned SectionType = Flags & MachO::SECTION_TYPE; + Result = !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) && + !(SectionType == MachO::S_ZEROFILL || + SectionType == MachO::S_GB_ZEROFILL); return object_error::success; } -error_code MachOObjectFile::isSectionBSS(DataRefImpl DRI, bool &Result) const { - // FIXME: Unimplemented. - Result = false; +error_code MachOObjectFile::isSectionBSS(DataRefImpl Sec, bool &Result) const { + uint32_t Flags = getSectionFlags(this, Sec); + unsigned SectionType = Flags & MachO::SECTION_TYPE; + Result = !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) && + (SectionType == MachO::S_ZEROFILL || + SectionType == MachO::S_GB_ZEROFILL); return object_error::success; } @@ -755,65 +776,50 @@ MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb, } relocation_iterator MachOObjectFile::section_rel_begin(DataRefImpl Sec) const { - uint32_t Offset; - if (is64Bit()) { - MachO::section_64 Sect = getSection64(Sec); - Offset = Sect.reloff; - } else { - MachO::section Sect = getSection(Sec); - Offset = Sect.reloff; - } - DataRefImpl Ret; - Ret.p = reinterpret_cast(getPtr(this, Offset)); + Ret.d.a = Sec.d.a; + Ret.d.b = 0; return relocation_iterator(RelocationRef(Ret, this)); } relocation_iterator MachOObjectFile::section_rel_end(DataRefImpl Sec) const { - uint32_t Offset; uint32_t Num; if (is64Bit()) { MachO::section_64 Sect = getSection64(Sec); - Offset = Sect.reloff; Num = Sect.nreloc; } else { MachO::section Sect = getSection(Sec); - Offset = Sect.reloff; Num = Sect.nreloc; } - const MachO::any_relocation_info *P = - reinterpret_cast(getPtr(this, Offset)); - DataRefImpl Ret; - Ret.p = reinterpret_cast(P + Num); + Ret.d.a = Sec.d.a; + Ret.d.b = Num; return relocation_iterator(RelocationRef(Ret, this)); } -bool MachOObjectFile::section_rel_empty(DataRefImpl Sec) const { - if (is64Bit()) { - MachO::section_64 Sect = getSection64(Sec); - return Sect.nreloc == 0; - } else { - MachO::section Sect = getSection(Sec); - return Sect.nreloc == 0; - } -} - void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const { - const MachO::any_relocation_info *P = - reinterpret_cast(Rel.p); - Rel.p = reinterpret_cast(P + 1); + ++Rel.d.b; } error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const { - report_fatal_error("getRelocationAddress not implemented in MachOObjectFile"); + uint64_t Offset; + getRelocationOffset(Rel, Offset); + + DataRefImpl Sec; + Sec.d.a = Rel.d.a; + uint64_t SecAddress; + getSectionAddress(Sec, SecAddress); + Res = SecAddress + Offset; + return object_error::success; } error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel, uint64_t &Res) const { + assert(getHeader().filetype == MachO::MH_OBJECT && + "Only implemented for MH_OBJECT"); MachO::any_relocation_info RE = getRelocation(Rel); Res = getAnyRelocationAddress(RE); return object_error::success; @@ -986,7 +992,7 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel, } case MachO::X86_64_RELOC_SUBTRACTOR: { DataRefImpl RelNext = Rel; - RelNext.d.a++; + moveRelocationNext(RelNext); MachO::any_relocation_info RENext = getRelocation(RelNext); // X86_64_RELOC_SUBTRACTOR must be followed by a relocation of type @@ -1034,7 +1040,7 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel, return object_error::success; case MachO::GENERIC_RELOC_SECTDIFF: { DataRefImpl RelNext = Rel; - RelNext.d.a++; + moveRelocationNext(RelNext); MachO::any_relocation_info RENext = getRelocation(RelNext); // X86 sect diff's must be followed by a relocation of type @@ -1056,7 +1062,7 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel, switch (Type) { case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: { DataRefImpl RelNext = Rel; - RelNext.d.a++; + moveRelocationNext(RelNext); MachO::any_relocation_info RENext = getRelocation(RelNext); // X86 sect diff's must be followed by a relocation of type @@ -1095,7 +1101,7 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel, printRelocationTargetName(this, RE, fmt); DataRefImpl RelNext = Rel; - RelNext.d.a++; + moveRelocationNext(RelNext); MachO::any_relocation_info RENext = getRelocation(RelNext); // ARM half relocs must be followed by a relocation of type @@ -1172,13 +1178,7 @@ error_code MachOObjectFile::getLibraryPath(DataRefImpl LibData, } basic_symbol_iterator MachOObjectFile::symbol_begin_impl() const { - DataRefImpl DRI; - if (!SymtabLoadCmd) - return basic_symbol_iterator(SymbolRef(DRI, this)); - - MachO::symtab_command Symtab = getSymtabLoadCommand(); - DRI.p = reinterpret_cast(getPtr(this, Symtab.symoff)); - return basic_symbol_iterator(SymbolRef(DRI, this)); + return getSymbolByIndex(0); } basic_symbol_iterator MachOObjectFile::symbol_end_impl() const { @@ -1196,6 +1196,20 @@ basic_symbol_iterator MachOObjectFile::symbol_end_impl() const { return basic_symbol_iterator(SymbolRef(DRI, this)); } +basic_symbol_iterator MachOObjectFile::getSymbolByIndex(unsigned Index) const { + DataRefImpl DRI; + if (!SymtabLoadCmd) + return basic_symbol_iterator(SymbolRef(DRI, this)); + + MachO::symtab_command Symtab = getSymtabLoadCommand(); + assert(Index < Symtab.nsyms && "Requested symbol index is out of range."); + unsigned SymbolTableEntrySize = + is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist); + DRI.p = reinterpret_cast(getPtr(this, Symtab.symoff)); + DRI.p += Index * SymbolTableEntrySize; + return basic_symbol_iterator(SymbolRef(DRI, this)); +} + section_iterator MachOObjectFile::section_begin() const { DataRefImpl DRI; return section_iterator(SectionRef(DRI, this)); @@ -1486,8 +1500,21 @@ MachOObjectFile::getVersionMinLoadCommand(const LoadCommandInfo &L) const { MachO::any_relocation_info MachOObjectFile::getRelocation(DataRefImpl Rel) const { - const char *P = reinterpret_cast(Rel.p); - return getStruct(this, P); + DataRefImpl Sec; + Sec.d.a = Rel.d.a; + uint32_t Offset; + if (is64Bit()) { + MachO::section_64 Sect = getSection64(Sec); + Offset = Sect.reloff; + } else { + MachO::section Sect = getSection(Sec); + Offset = Sect.reloff; + } + + auto P = reinterpret_cast( + getPtr(this, Offset)) + Rel.d.b; + return getStruct( + this, reinterpret_cast(P)); } MachO::data_in_code_entry diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp index 70baa9f..5085efd 100644 --- a/lib/Object/MachOUniversal.cpp +++ b/lib/Object/MachOUniversal.cpp @@ -14,6 +14,7 @@ #include "llvm/Object/MachOUniversal.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Object/Archive.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" @@ -57,7 +58,7 @@ static T getUniversalBinaryStruct(const char *Ptr) { MachOUniversalBinary::ObjectForArch::ObjectForArch( const MachOUniversalBinary *Parent, uint32_t Index) : Parent(Parent), Index(Index) { - if (Parent == 0 || Index > Parent->getNumberOfObjects()) { + if (!Parent || Index > Parent->getNumberOfObjects()) { clear(); } else { // Parse object header. @@ -90,6 +91,25 @@ error_code MachOUniversalBinary::ObjectForArch::getAsObjectFile( return object_error::parse_failed; } +error_code MachOUniversalBinary::ObjectForArch::getAsArchive( + std::unique_ptr &Result) const { + if (Parent) { + StringRef ParentData = Parent->getData(); + StringRef ObjectData = ParentData.substr(Header.offset, Header.size); + std::string ObjectName = + Parent->getFileName().str() + ":" + + Triple::getArchTypeName(MachOObjectFile::getArch(Header.cputype)); + MemoryBuffer *ObjBuffer = MemoryBuffer::getMemBuffer( + ObjectData, ObjectName, false); + ErrorOr Obj = Archive::create(ObjBuffer); + if (error_code EC = Obj.getError()) + return EC; + Result.reset(Obj.get()); + return object_error::success; + } + return object_error::parse_failed; +} + void MachOUniversalBinary::anchor() { } ErrorOr diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index 243bd44..b0068a8 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -60,7 +60,7 @@ wrap(const relocation_iterator *SI) { // ObjectFile creation LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) { ErrorOr ObjOrErr(ObjectFile::createObjectFile(unwrap(MemBuf))); - ObjectFile *Obj = ObjOrErr ? ObjOrErr.get() : 0; + ObjectFile *Obj = ObjOrErr ? ObjOrErr.get() : nullptr; return wrap(Obj); } @@ -184,13 +184,6 @@ uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) { return ret; } -uint64_t LLVMGetSymbolFileOffset(LLVMSymbolIteratorRef SI) { - uint64_t ret; - if (error_code ec = (*unwrap(SI))->getFileOffset(ret)) - report_fatal_error(ec.message()); - return ret; -} - uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) { uint64_t ret; if (error_code ec = (*unwrap(SI))->getSize(ret)) diff --git a/lib/Object/StringTableBuilder.cpp b/lib/Object/StringTableBuilder.cpp new file mode 100644 index 0000000..9152834 --- /dev/null +++ b/lib/Object/StringTableBuilder.cpp @@ -0,0 +1,51 @@ +//===-- StringTableBuilder.cpp - String table building utility ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Object/StringTableBuilder.h" + +using namespace llvm; + +static bool compareBySuffix(StringRef a, StringRef b) { + size_t sizeA = a.size(); + size_t sizeB = b.size(); + size_t len = std::min(sizeA, sizeB); + for (size_t i = 0; i < len; ++i) { + char ca = a[sizeA - i - 1]; + char cb = b[sizeB - i - 1]; + if (ca != cb) + return ca > cb; + } + return sizeA > sizeB; +} + +void StringTableBuilder::finalize() { + SmallVector Strings; + for (auto i = StringIndexMap.begin(), e = StringIndexMap.end(); i != e; ++i) + Strings.push_back(i->getKey()); + + std::sort(Strings.begin(), Strings.end(), compareBySuffix); + + // FIXME: Starting with a null byte is ELF specific. Generalize this so we + // can use the class with other object formats. + StringTable += '\x00'; + + StringRef Previous; + for (StringRef s : Strings) { + if (Previous.endswith(s)) { + StringIndexMap[s] = StringTable.size() - 1 - s.size(); + continue; + } + + StringIndexMap[s] = StringTable.size(); + StringTable += s; + StringTable += '\x00'; + Previous = s; + } +} diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp index fecd237..a5ab8d7 100644 --- a/lib/Option/ArgList.cpp +++ b/lib/Option/ArgList.cpp @@ -9,6 +9,7 @@ #include "llvm/Option/ArgList.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Option/Arg.h" #include "llvm/Option/Option.h" @@ -32,11 +33,6 @@ void arg_iterator::SkipToNextArg() { } } -// - -ArgList::ArgList() { -} - ArgList::~ArgList() { } @@ -45,14 +41,9 @@ void ArgList::append(Arg *A) { } void ArgList::eraseArg(OptSpecifier Id) { - for (iterator it = begin(), ie = end(); it != ie; ) { - if ((*it)->getOption().matches(Id)) { - it = Args.erase(it); - ie = end(); - } else { - ++it; - } - } + Args.erase(std::remove_if(begin(), end(), + [=](Arg *A) { return A->getOption().matches(Id); }), + end()); } Arg *ArgList::getLastArgNoClaim(OptSpecifier Id) const { @@ -60,11 +51,11 @@ Arg *ArgList::getLastArgNoClaim(OptSpecifier Id) const { for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it) if ((*it)->getOption().matches(Id)) return *it; - return 0; + return nullptr; } Arg *ArgList::getLastArg(OptSpecifier Id) const { - Arg *Res = 0; + Arg *Res = nullptr; for (const_iterator it = begin(), ie = end(); it != ie; ++it) { if ((*it)->getOption().matches(Id)) { Res = *it; @@ -76,7 +67,7 @@ Arg *ArgList::getLastArg(OptSpecifier Id) const { } Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1) const { - Arg *Res = 0; + Arg *Res = nullptr; for (const_iterator it = begin(), ie = end(); it != ie; ++it) { if ((*it)->getOption().matches(Id0) || (*it)->getOption().matches(Id1)) { @@ -91,7 +82,7 @@ Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1) const { Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2) const { - Arg *Res = 0; + Arg *Res = nullptr; for (const_iterator it = begin(), ie = end(); it != ie; ++it) { if ((*it)->getOption().matches(Id0) || (*it)->getOption().matches(Id1) || @@ -106,7 +97,7 @@ Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2, OptSpecifier Id3) const { - Arg *Res = 0; + Arg *Res = nullptr; for (const_iterator it = begin(), ie = end(); it != ie; ++it) { if ((*it)->getOption().matches(Id0) || (*it)->getOption().matches(Id1) || @@ -123,7 +114,7 @@ Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2, OptSpecifier Id3, OptSpecifier Id4) const { - Arg *Res = 0; + Arg *Res = nullptr; for (const_iterator it = begin(), ie = end(); it != ie; ++it) { if ((*it)->getOption().matches(Id0) || (*it)->getOption().matches(Id1) || @@ -141,7 +132,7 @@ Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2, OptSpecifier Id3, OptSpecifier Id4, OptSpecifier Id5) const { - Arg *Res = 0; + Arg *Res = nullptr; for (const_iterator it = begin(), ie = end(); it != ie; ++it) { if ((*it)->getOption().matches(Id0) || (*it)->getOption().matches(Id1) || @@ -161,7 +152,7 @@ Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2, OptSpecifier Id3, OptSpecifier Id4, OptSpecifier Id5, OptSpecifier Id6) const { - Arg *Res = 0; + Arg *Res = nullptr; for (const_iterator it = begin(), ie = end(); it != ie; ++it) { if ((*it)->getOption().matches(Id0) || (*it)->getOption().matches(Id1) || @@ -182,7 +173,7 @@ Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2, OptSpecifier Id3, OptSpecifier Id4, OptSpecifier Id5, OptSpecifier Id6, OptSpecifier Id7) const { - Arg *Res = 0; + Arg *Res = nullptr; for (const_iterator it = begin(), ie = end(); it != ie; ++it) { if ((*it)->getOption().matches(Id0) || (*it)->getOption().matches(Id1) || @@ -348,52 +339,50 @@ DerivedArgList::DerivedArgList(const InputArgList &_BaseArgs) : BaseArgs(_BaseArgs) { } -DerivedArgList::~DerivedArgList() { - // We only own the arguments we explicitly synthesized. - for (iterator it = SynthesizedArgs.begin(), ie = SynthesizedArgs.end(); - it != ie; ++it) - delete *it; -} +DerivedArgList::~DerivedArgList() {} const char *DerivedArgList::MakeArgString(StringRef Str) const { return BaseArgs.MakeArgString(Str); } +void DerivedArgList::AddSynthesizedArg(Arg *A) { + SynthesizedArgs.push_back(std::unique_ptr(A)); +} + Arg *DerivedArgList::MakeFlagArg(const Arg *BaseArg, const Option Opt) const { - Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) + - Twine(Opt.getName())), - BaseArgs.MakeIndex(Opt.getName()), BaseArg); - SynthesizedArgs.push_back(A); - return A; + SynthesizedArgs.push_back(make_unique( + Opt, + ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())), + BaseArgs.MakeIndex(Opt.getName()), BaseArg)); + return SynthesizedArgs.back().get(); } Arg *DerivedArgList::MakePositionalArg(const Arg *BaseArg, const Option Opt, StringRef Value) const { unsigned Index = BaseArgs.MakeIndex(Value); - Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) + - Twine(Opt.getName())), - Index, BaseArgs.getArgString(Index), BaseArg); - SynthesizedArgs.push_back(A); - return A; + SynthesizedArgs.push_back(make_unique( + Opt, + ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())), + Index, BaseArgs.getArgString(Index), BaseArg)); + return SynthesizedArgs.back().get(); } Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt, StringRef Value) const { unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value); - Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) + - Twine(Opt.getName())), - Index, BaseArgs.getArgString(Index + 1), BaseArg); - SynthesizedArgs.push_back(A); - return A; + SynthesizedArgs.push_back(make_unique( + Opt, + ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())), + Index, BaseArgs.getArgString(Index + 1), BaseArg)); + return SynthesizedArgs.back().get(); } Arg *DerivedArgList::MakeJoinedArg(const Arg *BaseArg, const Option Opt, StringRef Value) const { unsigned Index = BaseArgs.MakeIndex(Opt.getName().str() + Value.str()); - Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) + - Twine(Opt.getName())), Index, - BaseArgs.getArgString(Index) + Opt.getName().size(), - BaseArg); - SynthesizedArgs.push_back(A); - return A; + SynthesizedArgs.push_back(make_unique( + Opt, + ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())), + Index, BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg)); + return SynthesizedArgs.back().get(); } diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp index 6fa459a..6842f4d 100644 --- a/lib/Option/OptTable.cpp +++ b/lib/Option/OptTable.cpp @@ -62,7 +62,7 @@ static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) { for (const char * const *APre = A.Prefixes, * const *BPre = B.Prefixes; - *APre != 0 && *BPre != 0; ++APre, ++BPre) { + *APre != nullptr && *BPre != nullptr; ++APre, ++BPre){ if (int N = StrCmpOptionName(*APre, *BPre)) return N < 0; } @@ -136,7 +136,7 @@ OptTable::OptTable(const Info *_OptionInfos, unsigned _NumOptionInfos, for (unsigned i = FirstSearchableIndex + 1, e = getNumOptions() + 1; i != e; ++i) { if (const char *const *P = getInfo(i).Prefixes) { - for (; *P != 0; ++P) { + for (; *P != nullptr; ++P) { PrefixesUnion.insert(*P); } } @@ -160,7 +160,7 @@ OptTable::~OptTable() { const Option OptTable::getOption(OptSpecifier Opt) const { unsigned id = Opt.getID(); if (id == 0) - return Option(0, 0); + return Option(nullptr, nullptr); assert((unsigned) (id - 1) < getNumOptions() && "Invalid ID."); return Option(&getInfo(id), this); } @@ -178,7 +178,7 @@ static bool isInput(const llvm::StringSet<> &Prefixes, StringRef Arg) { /// \returns Matched size. 0 means no match. static unsigned matchOption(const OptTable::Info *I, StringRef Str, bool IgnoreCase) { - for (const char * const *Pre = I->Prefixes; *Pre != 0; ++Pre) { + for (const char * const *Pre = I->Prefixes; *Pre != nullptr; ++Pre) { StringRef Prefix(*Pre); if (Str.startswith(Prefix)) { StringRef Rest = Str.substr(Prefix.size()); @@ -240,7 +240,7 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index, // Otherwise, see if this argument was missing values. if (Prev != Index) - return 0; + return nullptr; } // If we failed to find an option and this arg started with /, then it's diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp index 7b5ff2b..10662a3 100644 --- a/lib/Option/Option.cpp +++ b/lib/Option/Option.cpp @@ -58,8 +58,8 @@ void Option::dump() const { if (Info->Prefixes) { llvm::errs() << " Prefixes:["; - for (const char * const *Pre = Info->Prefixes; *Pre != 0; ++Pre) { - llvm::errs() << '"' << *Pre << (*(Pre + 1) == 0 ? "\"" : "\", "); + for (const char * const *Pre = Info->Prefixes; *Pre != nullptr; ++Pre) { + llvm::errs() << '"' << *Pre << (*(Pre + 1) == nullptr ? "\"" : "\", "); } llvm::errs() << ']'; } @@ -116,7 +116,7 @@ Arg *Option::accept(const ArgList &Args, switch (getKind()) { case FlagClass: { if (ArgSize != strlen(Args.getArgString(Index))) - return 0; + return nullptr; Arg *A = new Arg(UnaliasedOption, Spelling, Index++); if (getAliasArgs()) { @@ -166,11 +166,11 @@ Arg *Option::accept(const ArgList &Args, // Matches iff this is an exact match. // FIXME: Avoid strlen. if (ArgSize != strlen(Args.getArgString(Index))) - return 0; + return nullptr; Index += 2; if (Index > Args.getNumInputArgStrings()) - return 0; + return nullptr; return new Arg(UnaliasedOption, Spelling, Index - 2, Args.getArgString(Index - 1)); @@ -178,11 +178,11 @@ Arg *Option::accept(const ArgList &Args, // Matches iff this is an exact match. // FIXME: Avoid strlen. if (ArgSize != strlen(Args.getArgString(Index))) - return 0; + return nullptr; Index += 1 + getNumArgs(); if (Index > Args.getNumInputArgStrings()) - return 0; + return nullptr; Arg *A = new Arg(UnaliasedOption, Spelling, Index - 1 - getNumArgs(), Args.getArgString(Index - getNumArgs())); @@ -201,7 +201,7 @@ Arg *Option::accept(const ArgList &Args, // Otherwise it must be separate. Index += 2; if (Index > Args.getNumInputArgStrings()) - return 0; + return nullptr; return new Arg(UnaliasedOption, Spelling, Index - 2, Args.getArgString(Index - 1)); @@ -210,7 +210,7 @@ Arg *Option::accept(const ArgList &Args, // Always matches. Index += 2; if (Index > Args.getNumInputArgStrings()) - return 0; + return nullptr; return new Arg(UnaliasedOption, Spelling, Index - 2, Args.getArgString(Index - 2) + ArgSize, @@ -219,7 +219,7 @@ Arg *Option::accept(const ArgList &Args, // Matches iff this is an exact match. // FIXME: Avoid strlen. if (ArgSize != strlen(Args.getArgString(Index))) - return 0; + return nullptr; Arg *A = new Arg(UnaliasedOption, Spelling, Index++); while (Index < Args.getNumInputArgStrings()) A->getValues().push_back(Args.getArgString(Index++)); diff --git a/lib/ProfileData/Android.mk b/lib/ProfileData/Android.mk new file mode 100644 index 0000000..5ae5ba8 --- /dev/null +++ b/lib/ProfileData/Android.mk @@ -0,0 +1,33 @@ +LOCAL_PATH:= $(call my-dir) + +profiledata_SRC_FILES := \ + InstrProf.cpp \ + InstrProfReader.cpp \ + InstrProfWriter.cpp + + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_MODULE:= libLLVMProfileData +LOCAL_MODULE_TAGS := optional +LOCAL_SRC_FILES := $(profiledata_SRC_FILES) + +include $(LLVM_HOST_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS)) +include $(CLEAR_VARS) + +LOCAL_MODULE:= libLLVMProfileData +LOCAL_MODULE_TAGS := optional +LOCAL_SRC_FILES := $(profiledata_SRC_FILES) + +include $(LLVM_DEVICE_BUILD_MK) +include $(LLVM_GEN_INTRINSICS_MK) +include $(BUILD_STATIC_LIBRARY) +endif diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp index 850f613..de2b13d 100644 --- a/lib/ProfileData/InstrProf.cpp +++ b/lib/ProfileData/InstrProf.cpp @@ -33,6 +33,8 @@ class InstrProfErrorCategoryType : public error_category { return "Invalid header"; case instrprof_error::unsupported_version: return "Unsupported format version"; + case instrprof_error::unsupported_hash_type: + return "Unsupported hash function"; case instrprof_error::too_large: return "Too much profile data"; case instrprof_error::truncated: @@ -50,7 +52,7 @@ class InstrProfErrorCategoryType : public error_category { } llvm_unreachable("A value of instrprof_error has no message."); } - error_condition default_error_condition(int EV) const { + error_condition default_error_condition(int EV) const override { if (EV == instrprof_error::success) return errc::success; return errc::invalid_argument; diff --git a/lib/ProfileData/InstrProfIndexed.h b/lib/ProfileData/InstrProfIndexed.h new file mode 100644 index 0000000..7761704 --- /dev/null +++ b/lib/ProfileData/InstrProfIndexed.h @@ -0,0 +1,55 @@ +//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Shared header for the instrumented profile data reader and writer. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_ +#define LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_ + +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MD5.h" + +namespace llvm { + +namespace IndexedInstrProf { +enum class HashT : uint32_t { + MD5, + + Last = MD5 +}; + +static inline uint64_t MD5Hash(StringRef Str) { + MD5 Hash; + Hash.update(Str); + llvm::MD5::MD5Result Result; + Hash.final(Result); + // Return the least significant 8 bytes. Our MD5 implementation returns the + // result in little endian, so we may need to swap bytes. + using namespace llvm::support; + return endian::read(Result); +} + +static inline uint64_t ComputeHash(HashT Type, StringRef K) { + switch (Type) { + case HashT::MD5: + return IndexedInstrProf::MD5Hash(K); + } + llvm_unreachable("Unhandled hash type"); +} + +const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" +const uint64_t Version = 1; +const HashT HashType = HashT::MD5; +} + +} // end namespace llvm + +#endif // LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_ diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index b07f402..7014f5e 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -15,30 +15,62 @@ #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/InstrProf.h" +#include "InstrProfIndexed.h" + #include using namespace llvm; -error_code InstrProfReader::create(std::string Path, - std::unique_ptr &Result) { - std::unique_ptr Buffer; +static error_code setupMemoryBuffer(std::string Path, + std::unique_ptr &Buffer) { if (error_code EC = MemoryBuffer::getFileOrSTDIN(Path, Buffer)) return EC; // Sanity check the file. if (Buffer->getBufferSize() > std::numeric_limits::max()) return instrprof_error::too_large; + return instrprof_error::success; +} + +static error_code initializeReader(InstrProfReader &Reader) { + return Reader.readHeader(); +} + +error_code InstrProfReader::create(std::string Path, + std::unique_ptr &Result) { + // Set up the buffer to read. + std::unique_ptr Buffer; + if (error_code EC = setupMemoryBuffer(Path, Buffer)) + return EC; // Create the reader. - if (RawInstrProfReader64::hasFormat(*Buffer)) + if (IndexedInstrProfReader::hasFormat(*Buffer)) + Result.reset(new IndexedInstrProfReader(std::move(Buffer))); + else if (RawInstrProfReader64::hasFormat(*Buffer)) Result.reset(new RawInstrProfReader64(std::move(Buffer))); else if (RawInstrProfReader32::hasFormat(*Buffer)) Result.reset(new RawInstrProfReader32(std::move(Buffer))); else Result.reset(new TextInstrProfReader(std::move(Buffer))); - // Read the header and return the result. - return Result->readHeader(); + // Initialize the reader and return the result. + return initializeReader(*Result); +} + +error_code IndexedInstrProfReader::create( + std::string Path, std::unique_ptr &Result) { + // Set up the buffer to read. + std::unique_ptr Buffer; + if (error_code EC = setupMemoryBuffer(Path, Buffer)) + return EC; + + // Create the reader. + if (!IndexedInstrProfReader::hasFormat(*Buffer)) + return instrprof_error::bad_magic; + Result.reset(new IndexedInstrProfReader(std::move(Buffer))); + + // Initialize the reader and return the result. + return initializeReader(*Result); } void InstrProfIterator::Increment() { @@ -69,6 +101,8 @@ error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { return error(instrprof_error::truncated); if ((Line++)->getAsInteger(10, NumCounters)) return error(instrprof_error::malformed); + if (NumCounters == 0) + return error(instrprof_error::malformed); // Read each counter and fill our internal storage with the values. Counts.clear(); @@ -138,6 +172,29 @@ error_code RawInstrProfReader::readHeader() { return readHeader(*Header); } +template +error_code RawInstrProfReader::readNextHeader(const char *CurrentPos) { + const char *End = DataBuffer->getBufferEnd(); + // Skip zero padding between profiles. + while (CurrentPos != End && *CurrentPos == 0) + ++CurrentPos; + // If there's nothing left, we're done. + if (CurrentPos == End) + return instrprof_error::eof; + // If there isn't enough space for another header, this is probably just + // garbage at the end of the file. + if (CurrentPos + sizeof(RawHeader) > End) + return instrprof_error::malformed; + // The magic should have the same byte order as in the previous header. + uint64_t Magic = *reinterpret_cast(CurrentPos); + if (Magic != swap(getRawMagic())) + return instrprof_error::bad_magic; + + // There's another profile to read, so we need to process the header. + auto *Header = reinterpret_cast(CurrentPos); + return readHeader(*Header); +} + static uint64_t getRawVersion() { return 1; } @@ -156,16 +213,17 @@ error_code RawInstrProfReader::readHeader(const RawHeader &Header) { ptrdiff_t DataOffset = sizeof(RawHeader); ptrdiff_t CountersOffset = DataOffset + sizeof(ProfileData) * DataSize; ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize; - size_t FileSize = NamesOffset + sizeof(char) * NamesSize; + size_t ProfileSize = NamesOffset + sizeof(char) * NamesSize; - if (FileSize != DataBuffer->getBufferSize()) + auto *Start = reinterpret_cast(&Header); + if (Start + ProfileSize > DataBuffer->getBufferEnd()) return error(instrprof_error::bad_header); - const char *Start = DataBuffer->getBufferStart(); Data = reinterpret_cast(Start + DataOffset); DataEnd = Data + DataSize; CountersStart = reinterpret_cast(Start + CountersOffset); NamesStart = Start + NamesOffset; + ProfileEnd = Start + ProfileSize; return success(); } @@ -174,12 +232,15 @@ template error_code RawInstrProfReader::readNextRecord(InstrProfRecord &Record) { if (Data == DataEnd) - return error(instrprof_error::eof); + if (error_code EC = readNextHeader(ProfileEnd)) + return EC; // Get the raw data. StringRef RawName(getName(Data->NamePtr), swap(Data->NameSize)); - auto RawCounts = makeArrayRef(getCounter(Data->CounterPtr), - swap(Data->NumCounters)); + uint32_t NumCounters = swap(Data->NumCounters); + if (NumCounters == 0) + return error(instrprof_error::malformed); + auto RawCounts = makeArrayRef(getCounter(Data->CounterPtr), NumCounters); // Check bounds. auto *NamesStartAsCounter = reinterpret_cast(NamesStart); @@ -210,3 +271,83 @@ namespace llvm { template class RawInstrProfReader; template class RawInstrProfReader; } + +InstrProfLookupTrait::hash_value_type +InstrProfLookupTrait::ComputeHash(StringRef K) { + return IndexedInstrProf::ComputeHash(HashType, K); +} + +bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { + if (DataBuffer.getBufferSize() < 8) + return false; + using namespace support; + uint64_t Magic = + endian::read(DataBuffer.getBufferStart()); + return Magic == IndexedInstrProf::Magic; +} + +error_code IndexedInstrProfReader::readHeader() { + const unsigned char *Start = + (const unsigned char *)DataBuffer->getBufferStart(); + const unsigned char *Cur = Start; + if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) + return error(instrprof_error::truncated); + + using namespace support; + + // Check the magic number. + uint64_t Magic = endian::readNext(Cur); + if (Magic != IndexedInstrProf::Magic) + return error(instrprof_error::bad_magic); + + // Read the version. + uint64_t Version = endian::readNext(Cur); + if (Version != IndexedInstrProf::Version) + return error(instrprof_error::unsupported_version); + + // Read the maximal function count. + MaxFunctionCount = endian::readNext(Cur); + + // Read the hash type and start offset. + IndexedInstrProf::HashT HashType = static_cast( + endian::readNext(Cur)); + if (HashType > IndexedInstrProf::HashT::Last) + return error(instrprof_error::unsupported_hash_type); + uint64_t HashOffset = endian::readNext(Cur); + + // The rest of the file is an on disk hash table. + Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur, Start, + InstrProfLookupTrait(HashType))); + // Set up our iterator for readNextRecord. + RecordIterator = Index->data_begin(); + + return success(); +} + +error_code IndexedInstrProfReader::getFunctionCounts( + StringRef FuncName, uint64_t &FuncHash, std::vector &Counts) { + const auto &Iter = Index->find(FuncName); + if (Iter == Index->end()) + return error(instrprof_error::unknown_function); + + // Found it. Make sure it's valid before giving back a result. + const InstrProfRecord &Record = *Iter; + if (Record.Name.empty()) + return error(instrprof_error::malformed); + FuncHash = Record.Hash; + Counts = Record.Counts; + return success(); +} + +error_code IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) { + // Are we out of records? + if (RecordIterator == Index->data_end()) + return error(instrprof_error::eof); + + // Read the next one. + Record = *RecordIterator; + ++RecordIterator; + if (Record.Name.empty()) + return error(instrprof_error::malformed); + return success(); +} diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index 3024f96..83c41d9 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -13,10 +13,59 @@ //===----------------------------------------------------------------------===// #include "llvm/ProfileData/InstrProfWriter.h" -#include "llvm/Support/Endian.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/OnDiskHashTable.h" + +#include "InstrProfIndexed.h" using namespace llvm; +namespace { +class InstrProfRecordTrait { +public: + typedef StringRef key_type; + typedef StringRef key_type_ref; + + typedef const InstrProfWriter::CounterData *const data_type; + typedef const InstrProfWriter::CounterData *const data_type_ref; + + typedef uint64_t hash_value_type; + typedef uint64_t offset_type; + + static hash_value_type ComputeHash(key_type_ref K) { + return IndexedInstrProf::ComputeHash(IndexedInstrProf::HashType, K); + } + + static std::pair + EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { + using namespace llvm::support; + endian::Writer LE(Out); + + offset_type N = K.size(); + LE.write(N); + + offset_type M = (1 + V->Counts.size()) * sizeof(uint64_t); + LE.write(M); + + return std::make_pair(N, M); + } + + static void EmitKey(raw_ostream &Out, key_type_ref K, offset_type N){ + Out.write(K.data(), N); + } + + static void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, + offset_type) { + using namespace llvm::support; + endian::Writer LE(Out); + LE.write(V->Hash); + for (uint64_t I : V->Counts) + LE.write(I); + } +}; +} + error_code InstrProfWriter::addFunctionCounts(StringRef FunctionName, uint64_t FunctionHash, ArrayRef Counters) { @@ -26,7 +75,7 @@ error_code InstrProfWriter::addFunctionCounts(StringRef FunctionName, auto &Data = FunctionData[FunctionName]; Data.Hash = FunctionHash; Data.Counts = Counters; - return instrprof_error::success;; + return instrprof_error::success; } auto &Data = Where->getValue(); @@ -45,16 +94,33 @@ error_code InstrProfWriter::addFunctionCounts(StringRef FunctionName, return instrprof_error::success; } -void InstrProfWriter::write(raw_ostream &OS) { - // Write out the counts for each function. +void InstrProfWriter::write(raw_fd_ostream &OS) { + OnDiskChainedHashTableGenerator Generator; + uint64_t MaxFunctionCount = 0; + + // Populate the hash table generator. for (const auto &I : FunctionData) { - StringRef Name = I.getKey(); - uint64_t Hash = I.getValue().Hash; - const std::vector &Counts = I.getValue().Counts; - - OS << Name << "\n" << Hash << "\n" << Counts.size() << "\n"; - for (uint64_t Count : Counts) - OS << Count << "\n"; - OS << "\n"; + Generator.insert(I.getKey(), &I.getValue()); + if (I.getValue().Counts[0] > MaxFunctionCount) + MaxFunctionCount = I.getValue().Counts[0]; } + + using namespace llvm::support; + endian::Writer LE(OS); + + // Write the header. + LE.write(IndexedInstrProf::Magic); + LE.write(IndexedInstrProf::Version); + LE.write(MaxFunctionCount); + LE.write(static_cast(IndexedInstrProf::HashType)); + + // Save a space to write the hash table start location. + uint64_t HashTableStartLoc = OS.tell(); + LE.write(0); + // Write the hash table. + uint64_t HashTableStart = Generator.Emit(OS); + + // Go back and fill in the hash table start. + OS.seek(HashTableStartLoc); + LE.write(HashTableStart); } diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 85ce31b..f9fe095 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -1358,7 +1358,7 @@ APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) { switch (PackCategoriesIntoKey(category, rhs.category)) { default: - llvm_unreachable(0); + llvm_unreachable(nullptr); case PackCategoriesIntoKey(fcNaN, fcZero): case PackCategoriesIntoKey(fcNaN, fcNormal): @@ -1485,7 +1485,7 @@ APFloat::multiplySpecials(const APFloat &rhs) { switch (PackCategoriesIntoKey(category, rhs.category)) { default: - llvm_unreachable(0); + llvm_unreachable(nullptr); case PackCategoriesIntoKey(fcNaN, fcZero): case PackCategoriesIntoKey(fcNaN, fcNormal): @@ -1529,7 +1529,7 @@ APFloat::divideSpecials(const APFloat &rhs) { switch (PackCategoriesIntoKey(category, rhs.category)) { default: - llvm_unreachable(0); + llvm_unreachable(nullptr); case PackCategoriesIntoKey(fcZero, fcNaN): case PackCategoriesIntoKey(fcNormal, fcNaN): @@ -1570,7 +1570,7 @@ APFloat::modSpecials(const APFloat &rhs) { switch (PackCategoriesIntoKey(category, rhs.category)) { default: - llvm_unreachable(0); + llvm_unreachable(nullptr); case PackCategoriesIntoKey(fcNaN, fcZero): case PackCategoriesIntoKey(fcNaN, fcNormal): @@ -1679,7 +1679,7 @@ APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) fs = multiplySpecials(rhs); if (isFiniteNonZero()) { - lostFraction lost_fraction = multiplySignificand(rhs, 0); + lostFraction lost_fraction = multiplySignificand(rhs, nullptr); fs = normalize(rounding_mode, lost_fraction); if (lost_fraction != lfExactlyZero) fs = (opStatus) (fs | opInexact); @@ -1882,7 +1882,7 @@ APFloat::compare(const APFloat &rhs) const switch (PackCategoriesIntoKey(category, rhs.category)) { default: - llvm_unreachable(0); + llvm_unreachable(nullptr); case PackCategoriesIntoKey(fcNaN, fcZero): case PackCategoriesIntoKey(fcNaN, fcNormal): @@ -2439,7 +2439,7 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts, if (exp >= 0) { /* multiplySignificand leaves the precision-th bit set to 1. */ - calcLostFraction = decSig.multiplySignificand(pow5, NULL); + calcLostFraction = decSig.multiplySignificand(pow5, nullptr); powHUerr = powStatus != opOK; } else { calcLostFraction = decSig.divideSignificand(pow5); @@ -3331,7 +3331,7 @@ APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api) if (Sem == &PPCDoubleDouble) return initFromPPCDoubleDoubleAPInt(api); - llvm_unreachable(0); + llvm_unreachable(nullptr); } APFloat @@ -3795,7 +3795,7 @@ APFloat::opStatus APFloat::next(bool nextDown) { if (isSignaling()) { result = opInvalidOp; // For consistency, propagate the sign of the sNaN to the qNaN. - makeNaN(false, isNegative(), 0); + makeNaN(false, isNegative(), nullptr); } break; case fcZero: diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 0c46725..fa929eb 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "apint" #include "llvm/ADT/APInt.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" @@ -28,6 +27,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "apint" + /// A utility function for allocating memory, checking for allocation failures, /// and ensuring the contents are zeroed. inline static uint64_t* getClearedMemory(unsigned numWords) { @@ -1683,10 +1684,10 @@ void APInt::divide(const APInt LHS, unsigned lhsWords, // Allocate space for the temporary values we need either on the stack, if // it will fit, or on the heap if it won't. unsigned SPACE[128]; - unsigned *U = 0; - unsigned *V = 0; - unsigned *Q = 0; - unsigned *R = 0; + unsigned *U = nullptr; + unsigned *V = nullptr; + unsigned *Q = nullptr; + unsigned *R = nullptr; if ((Remainder?4:3)*n+2*m+1 <= 128) { U = &SPACE[0]; V = &SPACE[m+n+1]; @@ -1872,7 +1873,7 @@ APInt APInt::udiv(const APInt& RHS) const { // We have to compute it the hard way. Invoke the Knuth divide algorithm. APInt Quotient(1,0); // to hold result. - divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0); + divide(*this, lhsWords, RHS, rhsWords, &Quotient, nullptr); return Quotient; } @@ -1920,7 +1921,7 @@ APInt APInt::urem(const APInt& RHS) const { // We have to compute it the hard way. Invoke the Knuth divide algorithm. APInt Remainder(1,0); - divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder); + divide(*this, lhsWords, RHS, rhsWords, nullptr, &Remainder); return Remainder; } diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp index 7e17748..7c306b2 100644 --- a/lib/Support/Allocator.cpp +++ b/lib/Support/Allocator.cpp @@ -21,29 +21,10 @@ namespace llvm { -SlabAllocator::~SlabAllocator() { } - -MallocSlabAllocator::~MallocSlabAllocator() { } - -MemSlab *MallocSlabAllocator::Allocate(size_t Size) { - MemSlab *Slab = (MemSlab*)Allocator.Allocate(Size, 0); - Slab->Size = Size; - Slab->NextPtr = 0; - return Slab; -} - -void MallocSlabAllocator::Deallocate(MemSlab *Slab) { - Allocator.Deallocate(Slab); -} - -void BumpPtrAllocatorBase::PrintStats() const { - unsigned NumSlabs = 0; - size_t TotalMemory = 0; - for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) { - TotalMemory += Slab->Size; - ++NumSlabs; - } +namespace detail { +void printBumpPtrAllocatorStats(unsigned NumSlabs, size_t BytesAllocated, + size_t TotalMemory) { errs() << "\nNumber of memory regions: " << NumSlabs << '\n' << "Bytes used: " << BytesAllocated << '\n' << "Bytes allocated: " << TotalMemory << '\n' @@ -51,13 +32,7 @@ void BumpPtrAllocatorBase::PrintStats() const { << " (includes alignment, etc)\n"; } -size_t BumpPtrAllocatorBase::getTotalMemory() const { - size_t TotalMemory = 0; - for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) { - TotalMemory += Slab->Size; - } - return TotalMemory; -} +} // End namespace detail. void PrintRecyclerStats(size_t Size, size_t Align, diff --git a/lib/Support/Atomic.cpp b/lib/Support/Atomic.cpp index 9559ad7..2ef32b0 100644 --- a/lib/Support/Atomic.cpp +++ b/lib/Support/Atomic.cpp @@ -17,6 +17,7 @@ using namespace llvm; #if defined(_MSC_VER) +#include #include #undef MemoryFence #endif diff --git a/lib/Support/BlockFrequency.cpp b/lib/Support/BlockFrequency.cpp index 00cf75b..6f7e341 100644 --- a/lib/Support/BlockFrequency.cpp +++ b/lib/Support/BlockFrequency.cpp @@ -18,94 +18,8 @@ using namespace llvm; -/// Multiply FREQ by N and store result in W array. -static void mult96bit(uint64_t freq, uint32_t N, uint32_t W[3]) { - uint64_t u0 = freq & UINT32_MAX; - uint64_t u1 = freq >> 32; - - // Represent 96-bit value as W[2]:W[1]:W[0]; - uint64_t t = u0 * N; - uint64_t k = t >> 32; - W[0] = t; - t = u1 * N + k; - W[1] = t; - W[2] = t >> 32; -} - -/// Divide 96-bit value stored in W[2]:W[1]:W[0] by D. Since our word size is a -/// 32 bit unsigned integer, we can use a short division algorithm. -static uint64_t divrem96bit(uint32_t W[3], uint32_t D, uint32_t *Rout) { - // We assume that W[2] is non-zero since if W[2] is not then the user should - // just use hardware division. - assert(W[2] && "This routine assumes that W[2] is non-zero since if W[2] is " - "zero, the caller should just use 64/32 hardware."); - uint32_t Q[3] = { 0, 0, 0 }; - - // The generalized short division algorithm sets i to m + n - 1, where n is - // the number of words in the divisior and m is the number of words by which - // the divident exceeds the divisor (i.e. m + n == the length of the dividend - // in words). Due to our assumption that W[2] is non-zero, we know that the - // dividend is of length 3 implying since n is 1 that m = 2. Thus we set i to - // m + n - 1 = 2 + 1 - 1 = 2. - uint32_t R = 0; - for (int i = 2; i >= 0; --i) { - uint64_t PartialD = uint64_t(R) << 32 | W[i]; - if (PartialD == 0) { - Q[i] = 0; - R = 0; - } else if (PartialD < D) { - Q[i] = 0; - R = uint32_t(PartialD); - } else if (PartialD == D) { - Q[i] = 1; - R = 0; - } else { - Q[i] = uint32_t(PartialD / D); - R = uint32_t(PartialD - (Q[i] * D)); - } - } - - // If Q[2] is non-zero, then we overflowed. - uint64_t Result; - if (Q[2]) { - Result = UINT64_MAX; - R = D; - } else { - // Form the final uint64_t result, avoiding endianness issues. - Result = uint64_t(Q[0]) | (uint64_t(Q[1]) << 32); - } - - if (Rout) - *Rout = R; - - return Result; -} - -uint32_t BlockFrequency::scale(uint32_t N, uint32_t D) { - assert(D != 0 && "Division by zero"); - - // Calculate Frequency * N. - uint64_t MulLo = (Frequency & UINT32_MAX) * N; - uint64_t MulHi = (Frequency >> 32) * N; - uint64_t MulRes = (MulHi << 32) + MulLo; - - // If the product fits in 64 bits, just use built-in division. - if (MulHi <= UINT32_MAX && MulRes >= MulLo) { - Frequency = MulRes / D; - return MulRes % D; - } - - // Product overflowed, use 96-bit operations. - // 96-bit value represented as W[2]:W[1]:W[0]. - uint32_t W[3]; - uint32_t R; - mult96bit(Frequency, N, W); - Frequency = divrem96bit(W, D, &R); - return R; -} - BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) { - scale(Prob.getNumerator(), Prob.getDenominator()); + Frequency = Prob.scale(Frequency); return *this; } @@ -117,7 +31,7 @@ BlockFrequency::operator*(const BranchProbability &Prob) const { } BlockFrequency &BlockFrequency::operator/=(const BranchProbability &Prob) { - scale(Prob.getDenominator(), Prob.getNumerator()); + Frequency = Prob.scaleByInverse(Frequency); return *this; } @@ -156,8 +70,3 @@ BlockFrequency &BlockFrequency::operator>>=(const unsigned count) { Frequency |= Frequency == 0; return *this; } - -uint32_t BlockFrequency::scale(const BranchProbability &Prob) { - return scale(Prob.getNumerator(), Prob.getDenominator()); -} - diff --git a/lib/Support/BranchProbability.cpp b/lib/Support/BranchProbability.cpp index e8b83e5..65878d6 100644 --- a/lib/Support/BranchProbability.cpp +++ b/lib/Support/BranchProbability.cpp @@ -18,19 +18,56 @@ using namespace llvm; -void BranchProbability::print(raw_ostream &OS) const { - OS << N << " / " << D << " = " << format("%g%%", ((double)N / D) * 100.0); +raw_ostream &BranchProbability::print(raw_ostream &OS) const { + return OS << N << " / " << D << " = " + << format("%g%%", ((double)N / D) * 100.0); } -void BranchProbability::dump() const { - dbgs() << *this << '\n'; -} +void BranchProbability::dump() const { print(dbgs()) << '\n'; } + +static uint64_t scale(uint64_t Num, uint32_t N, uint32_t D) { + assert(D && "divide by 0"); + + // Fast path for multiplying by 1.0. + if (!Num || D == N) + return Num; + + // Split Num into upper and lower parts to multiply, then recombine. + uint64_t ProductHigh = (Num >> 32) * N; + uint64_t ProductLow = (Num & UINT32_MAX) * N; + + // Split into 32-bit digits. + uint32_t Upper32 = ProductHigh >> 32; + uint32_t Lower32 = ProductLow & UINT32_MAX; + uint32_t Mid32Partial = ProductHigh & UINT32_MAX; + uint32_t Mid32 = Mid32Partial + (ProductLow >> 32); + + // Carry. + Upper32 += Mid32 < Mid32Partial; -namespace llvm { + // Check for overflow. + if (Upper32 >= D) + return UINT64_MAX; + + uint64_t Rem = (uint64_t(Upper32) << 32) | Mid32; + uint64_t UpperQ = Rem / D; + + // Check for overflow. + if (UpperQ > UINT32_MAX) + return UINT64_MAX; + + Rem = ((Rem % D) << 32) | Lower32; + uint64_t LowerQ = Rem / D; + uint64_t Q = (UpperQ << 32) + LowerQ; + + // Check for overflow. + return Q < LowerQ ? UINT64_MAX : Q; +} -raw_ostream &operator<<(raw_ostream &OS, const BranchProbability &Prob) { - Prob.print(OS); - return OS; +uint64_t BranchProbability::scale(uint64_t Num) const { + return ::scale(Num, N, D); } +uint64_t BranchProbability::scaleByInverse(uint64_t Num) const { + return ::scale(Num, D, N); } diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index b3c2614..37bbf48 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -38,6 +38,8 @@ using namespace llvm; using namespace cl; +#define DEBUG_TYPE "commandline" + //===----------------------------------------------------------------------===// // Template instantiations and anchors. // @@ -81,7 +83,7 @@ void StringSaver::anchor() {} // Globals for name and overview of program. Program name is not a string to // avoid static ctor/dtor issues. static char ProgramName[80] = ""; -static const char *ProgramOverview = 0; +static const char *ProgramOverview = nullptr; // This collects additional help to be printed. static ManagedStatic > MoreHelp; @@ -100,10 +102,10 @@ void cl::MarkOptionsChanged() { /// RegisteredOptionList - This is the list of the command line options that /// have statically constructed themselves. -static Option *RegisteredOptionList = 0; +static Option *RegisteredOptionList = nullptr; void Option::addArgument() { - assert(NextRegistered == 0 && "argument multiply registered!"); + assert(!NextRegistered && "argument multiply registered!"); NextRegistered = RegisteredOptionList; RegisteredOptionList = this; @@ -111,7 +113,7 @@ void Option::addArgument() { } void Option::removeArgument() { - assert(NextRegistered != 0 && "argument never registered"); + assert(NextRegistered && "argument never registered"); assert(RegisteredOptionList == this && "argument is not the last registered"); RegisteredOptionList = NextRegistered; MarkOptionsChanged(); @@ -144,7 +146,7 @@ static void GetOptionInfo(SmallVectorImpl &PositionalOpts, SmallVectorImpl &SinkOpts, StringMap &OptionsMap) { SmallVector OptionNames; - Option *CAOpt = 0; // The ConsumeAfter option if it exists. + Option *CAOpt = nullptr; // The ConsumeAfter option if it exists. for (Option *O = RegisteredOptionList; O; O = O->getNextRegisteredOption()) { // If this option wants to handle multiple option names, get the full set. // This handles enum options like "-O1 -O2" etc. @@ -189,7 +191,7 @@ static void GetOptionInfo(SmallVectorImpl &PositionalOpts, static Option *LookupOption(StringRef &Arg, StringRef &Value, const StringMap &OptionsMap) { // Reject all dashes. - if (Arg.empty()) return 0; + if (Arg.empty()) return nullptr; size_t EqualPos = Arg.find('='); @@ -197,14 +199,14 @@ static Option *LookupOption(StringRef &Arg, StringRef &Value, if (EqualPos == StringRef::npos) { // Look up the option. StringMap::const_iterator I = OptionsMap.find(Arg); - return I != OptionsMap.end() ? I->second : 0; + return I != OptionsMap.end() ? I->second : nullptr; } // If the argument before the = is a valid option name, we match. If not, // return Arg unmolested. StringMap::const_iterator I = OptionsMap.find(Arg.substr(0, EqualPos)); - if (I == OptionsMap.end()) return 0; + if (I == OptionsMap.end()) return nullptr; Value = Arg.substr(EqualPos+1); Arg = Arg.substr(0, EqualPos); @@ -219,7 +221,7 @@ static Option *LookupNearestOption(StringRef Arg, const StringMap &OptionsMap, std::string &NearestString) { // Reject all dashes. - if (Arg.empty()) return 0; + if (Arg.empty()) return nullptr; // Split on any equal sign. std::pair SplitArg = Arg.split('='); @@ -227,7 +229,7 @@ static Option *LookupNearestOption(StringRef Arg, StringRef &RHS = SplitArg.second; // Find the closest match. - Option *Best = 0; + Option *Best = nullptr; unsigned BestDistance = 0; for (StringMap::const_iterator it = OptionsMap.begin(), ie = OptionsMap.end(); it != ie; ++it) { @@ -300,7 +302,7 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName, // Enforce value requirements switch (Handler->getValueExpectedFlag()) { case ValueRequired: - if (Value.data() == 0) { // No value specified? + if (!Value.data()) { // No value specified? if (i+1 >= argc) return Handler->error("requires a value!"); // Steal the next argument, like for '-o filename' @@ -349,7 +351,7 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName, static bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i) { int Dummy = i; - return ProvideOption(Handler, Handler->ArgStr, Arg, 0, 0, Dummy); + return ProvideOption(Handler, Handler->ArgStr, Arg, 0, nullptr, Dummy); } @@ -385,7 +387,7 @@ static Option *getOptionPred(StringRef Name, size_t &Length, Length = Name.size(); return OMI->second; // Found one! } - return 0; // No option found! + return nullptr; // No option found! } /// HandlePrefixedOrGroupedOption - The specified argument string (which started @@ -395,12 +397,12 @@ static Option *getOptionPred(StringRef Name, size_t &Length, static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value, bool &ErrorParsing, const StringMap &OptionsMap) { - if (Arg.size() == 1) return 0; + if (Arg.size() == 1) return nullptr; // Do the lookup! size_t Length = 0; Option *PGOpt = getOptionPred(Arg, Length, isPrefixedOrGrouping, OptionsMap); - if (PGOpt == 0) return 0; + if (!PGOpt) return nullptr; // If the option is a prefixed option, then the value is simply the // rest of the name... so fall through to later processing, by @@ -427,7 +429,7 @@ static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value, "Option can not be cl::Grouping AND cl::ValueRequired!"); int Dummy = 0; ErrorParsing |= ProvideOption(PGOpt, OneArgName, - StringRef(), 0, 0, Dummy); + StringRef(), 0, nullptr, Dummy); // Get the next grouping option. PGOpt = getOptionPred(Arg, Length, isGrouping, OptionsMap); @@ -746,7 +748,7 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv, argc = static_cast(newArgv.size()); // Copy the program name into ProgName, making sure not to overflow it. - std::string ProgName = sys::path::filename(argv[0]); + StringRef ProgName = sys::path::filename(argv[0]); size_t Len = std::min(ProgName.size(), size_t(79)); memcpy(ProgramName, ProgName.data(), Len); ProgramName[Len] = '\0'; @@ -760,7 +762,7 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv, // Determine whether or not there are an unlimited number of positionals bool HasUnlimitedPositionals = false; - Option *ConsumeAfterOpt = 0; + Option *ConsumeAfterOpt = nullptr; if (!PositionalOpts.empty()) { if (PositionalOpts[0]->getNumOccurrencesFlag() == cl::ConsumeAfter) { assert(PositionalOpts.size() > 1 && @@ -770,7 +772,7 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv, // Calculate how many positional values are _required_. bool UnboundedFound = false; - for (size_t i = ConsumeAfterOpt != 0, e = PositionalOpts.size(); + for (size_t i = ConsumeAfterOpt ? 1 : 0, e = PositionalOpts.size(); i != e; ++i) { Option *Opt = PositionalOpts[i]; if (RequiresValue(Opt)) @@ -806,13 +808,13 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv, // If the program has named positional arguments, and the name has been run // across, keep track of which positional argument was named. Otherwise put // the positional args into the PositionalVals list... - Option *ActivePositionalArg = 0; + Option *ActivePositionalArg = nullptr; // Loop over all of the arguments... processing them. bool DashDashFound = false; // Have we read '--'? for (int i = 1; i < argc; ++i) { - Option *Handler = 0; - Option *NearestHandler = 0; + Option *Handler = nullptr; + Option *NearestHandler = nullptr; std::string NearestHandlerString; StringRef Value; StringRef ArgName = ""; @@ -845,8 +847,7 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv, // All of the positional arguments have been fulfulled, give the rest to // the consume after option... if it's specified... // - if (PositionalVals.size() >= NumPositionalRequired && - ConsumeAfterOpt != 0) { + if (PositionalVals.size() >= NumPositionalRequired && ConsumeAfterOpt) { for (++i; i < argc; ++i) PositionalVals.push_back(std::make_pair(argv[i],i)); break; // Handle outside of the argument processing loop... @@ -884,18 +885,18 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv, Handler = LookupOption(ArgName, Value, Opts); // Check to see if this "option" is really a prefixed or grouped argument. - if (Handler == 0) + if (!Handler) Handler = HandlePrefixedOrGroupedOption(ArgName, Value, ErrorParsing, Opts); // Otherwise, look for the closest available option to report to the user // in the upcoming error. - if (Handler == 0 && SinkOpts.empty()) + if (!Handler && SinkOpts.empty()) NearestHandler = LookupNearestOption(ArgName, Opts, NearestHandlerString); } - if (Handler == 0) { + if (!Handler) { if (SinkOpts.empty()) { errs() << ProgramName << ": Unknown command line argument '" << argv[i] << "'. Try: '" << argv[0] << " -help'\n"; @@ -939,7 +940,7 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv, << " positional arguments: See: " << argv[0] << " -help\n"; ErrorParsing = true; - } else if (ConsumeAfterOpt == 0) { + } else if (!ConsumeAfterOpt) { // Positional args have already been handled if ConsumeAfter is specified. unsigned ValNo = 0, NumVals = static_cast(PositionalVals.size()); for (size_t i = 0, e = PositionalOpts.size(); i != e; ++i) { @@ -1044,7 +1045,7 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv, // bool Option::error(const Twine &Message, StringRef ArgName) { - if (ArgName.data() == 0) ArgName = ArgStr; + if (!ArgName.data()) ArgName = ArgStr; if (ArgName.empty()) errs() << HelpStr; // Be nice for positional arguments else @@ -1455,12 +1456,12 @@ public: outs() << "USAGE: " << ProgramName << " [options]"; // Print out the positional options. - Option *CAOpt = 0; // The cl::ConsumeAfter option, if it exists... + Option *CAOpt = nullptr; // The cl::ConsumeAfter option, if it exists... if (!PositionalOpts.empty() && PositionalOpts[0]->getNumOccurrencesFlag() == ConsumeAfter) CAOpt = PositionalOpts[0]; - for (size_t i = CAOpt != 0, e = PositionalOpts.size(); i != e; ++i) { + for (size_t i = CAOpt != nullptr, e = PositionalOpts.size(); i != e; ++i) { if (PositionalOpts[i]->ArgStr[0]) outs() << " --" << PositionalOpts[i]->ArgStr; outs() << " " << PositionalOpts[i]->HelpStr; @@ -1555,7 +1556,7 @@ protected: outs() << (*Category)->getName() << ":\n"; // Check if description is set. - if ((*Category)->getDescription() != 0) + if ((*Category)->getDescription() != nullptr) outs() << (*Category)->getDescription() << "\n\n"; else outs() << "\n"; @@ -1686,9 +1687,9 @@ void cl::PrintOptionValues() { Opts[i].second->printOptionValue(MaxArgLen, PrintAllOptions); } -static void (*OverrideVersionPrinter)() = 0; +static void (*OverrideVersionPrinter)() = nullptr; -static std::vector* ExtraVersionPrinters = 0; +static std::vector* ExtraVersionPrinters = nullptr; namespace { class VersionPrinter { @@ -1721,7 +1722,7 @@ public: void operator=(bool OptionWasSpecified) { if (!OptionWasSpecified) return; - if (OverrideVersionPrinter != 0) { + if (OverrideVersionPrinter != nullptr) { (*OverrideVersionPrinter)(); exit(0); } @@ -1729,7 +1730,7 @@ public: // Iterate over any registered extra printers and call them to add further // information. - if (ExtraVersionPrinters != 0) { + if (ExtraVersionPrinters != nullptr) { outs() << '\n'; for (std::vector::iterator I = ExtraVersionPrinters->begin(), E = ExtraVersionPrinters->end(); @@ -1779,7 +1780,7 @@ void cl::SetVersionPrinter(void (*func)()) { } void cl::AddExtraVersionPrinter(void (*func)()) { - if (ExtraVersionPrinters == 0) + if (!ExtraVersionPrinters) ExtraVersionPrinters = new std::vector; ExtraVersionPrinters->push_back(func); diff --git a/lib/Support/Compression.cpp b/lib/Support/Compression.cpp index 5e53361..c32eb213 100644 --- a/lib/Support/Compression.cpp +++ b/lib/Support/Compression.cpp @@ -16,7 +16,6 @@ #include "llvm/Config/config.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MemoryBuffer.h" #if LLVM_ENABLE_ZLIB == 1 && HAVE_ZLIB_H #include #endif @@ -47,36 +46,26 @@ static zlib::Status encodeZlibReturnValue(int ReturnValue) { bool zlib::isAvailable() { return true; } zlib::Status zlib::compress(StringRef InputBuffer, - std::unique_ptr &CompressedBuffer, + SmallVectorImpl &CompressedBuffer, CompressionLevel Level) { unsigned long CompressedSize = ::compressBound(InputBuffer.size()); - std::unique_ptr TmpBuffer(new char[CompressedSize]); + CompressedBuffer.resize(CompressedSize); int CLevel = encodeZlibCompressionLevel(Level); Status Res = encodeZlibReturnValue(::compress2( - (Bytef *)TmpBuffer.get(), &CompressedSize, + (Bytef *)CompressedBuffer.data(), &CompressedSize, (const Bytef *)InputBuffer.data(), InputBuffer.size(), CLevel)); - if (Res == StatusOK) { - CompressedBuffer.reset(MemoryBuffer::getMemBufferCopy( - StringRef(TmpBuffer.get(), CompressedSize))); - // Tell MSan that memory initialized by zlib is valid. - __msan_unpoison(CompressedBuffer->getBufferStart(), CompressedSize); - } + CompressedBuffer.resize(CompressedSize); return Res; } zlib::Status zlib::uncompress(StringRef InputBuffer, - std::unique_ptr &UncompressedBuffer, + SmallVectorImpl &UncompressedBuffer, size_t UncompressedSize) { - std::unique_ptr TmpBuffer(new char[UncompressedSize]); - Status Res = encodeZlibReturnValue( - ::uncompress((Bytef *)TmpBuffer.get(), (uLongf *)&UncompressedSize, - (const Bytef *)InputBuffer.data(), InputBuffer.size())); - if (Res == StatusOK) { - UncompressedBuffer.reset(MemoryBuffer::getMemBufferCopy( - StringRef(TmpBuffer.get(), UncompressedSize))); - // Tell MSan that memory initialized by zlib is valid. - __msan_unpoison(UncompressedBuffer->getBufferStart(), UncompressedSize); - } + UncompressedBuffer.resize(UncompressedSize); + Status Res = encodeZlibReturnValue(::uncompress( + (Bytef *)UncompressedBuffer.data(), (uLongf *)&UncompressedSize, + (const Bytef *)InputBuffer.data(), InputBuffer.size())); + UncompressedBuffer.resize(UncompressedSize); return Res; } @@ -87,12 +76,12 @@ uint32_t zlib::crc32(StringRef Buffer) { #else bool zlib::isAvailable() { return false; } zlib::Status zlib::compress(StringRef InputBuffer, - std::unique_ptr &CompressedBuffer, + SmallVectorImpl &CompressedBuffer, CompressionLevel Level) { return zlib::StatusUnsupported; } zlib::Status zlib::uncompress(StringRef InputBuffer, - std::unique_ptr &UncompressedBuffer, + SmallVectorImpl &UncompressedBuffer, size_t UncompressedSize) { return zlib::StatusUnsupported; } diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp index ccc0089..a426377 100644 --- a/lib/Support/CrashRecoveryContext.cpp +++ b/lib/Support/CrashRecoveryContext.cpp @@ -89,16 +89,16 @@ CrashRecoveryContext::~CrashRecoveryContext() { } bool CrashRecoveryContext::isRecoveringFromCrash() { - return tlIsRecoveringFromCrash->get() != 0; + return tlIsRecoveringFromCrash->get() != nullptr; } CrashRecoveryContext *CrashRecoveryContext::GetCurrent() { if (!gCrashRecoveryEnabled) - return 0; + return nullptr; const CrashRecoveryContextImpl *CRCI = CurrentContext->get(); if (!CRCI) - return 0; + return nullptr; return CRCI->CRC; } @@ -120,7 +120,7 @@ CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) { if (cleanup == head) { head = cleanup->next; if (head) - head->prev = 0; + head->prev = nullptr; } else { cleanup->prev->next = cleanup->next; @@ -261,7 +261,7 @@ static void CrashRecoverySignalHandler(int Signal) { sigset_t SigMask; sigemptyset(&SigMask); sigaddset(&SigMask, Signal); - sigprocmask(SIG_UNBLOCK, &SigMask, 0); + sigprocmask(SIG_UNBLOCK, &SigMask, nullptr); if (CRCI) const_cast(CRCI)->HandleCrash(); @@ -296,12 +296,12 @@ void CrashRecoveryContext::Disable() { // Restore the previous signal handlers. for (unsigned i = 0; i != NumSignals; ++i) - sigaction(Signals[i], &PrevActions[i], 0); + sigaction(Signals[i], &PrevActions[i], nullptr); } #endif -bool CrashRecoveryContext::RunSafely(void (*Fn)(void*), void *UserData) { +bool CrashRecoveryContext::RunSafely(function_ref Fn) { // If crash recovery is disabled, do nothing. if (gCrashRecoveryEnabled) { assert(!Impl && "Crash recovery context already initialized!"); @@ -313,7 +313,7 @@ bool CrashRecoveryContext::RunSafely(void (*Fn)(void*), void *UserData) { } } - Fn(UserData); + Fn(); return true; } @@ -334,8 +334,7 @@ const std::string &CrashRecoveryContext::getBacktrace() const { namespace { struct RunSafelyOnThreadInfo { - void (*Fn)(void*); - void *Data; + function_ref Fn; CrashRecoveryContext *CRC; bool Result; }; @@ -344,11 +343,11 @@ struct RunSafelyOnThreadInfo { static void RunSafelyOnThread_Dispatch(void *UserData) { RunSafelyOnThreadInfo *Info = reinterpret_cast(UserData); - Info->Result = Info->CRC->RunSafely(Info->Fn, Info->Data); + Info->Result = Info->CRC->RunSafely(Info->Fn); } -bool CrashRecoveryContext::RunSafelyOnThread(void (*Fn)(void*), void *UserData, +bool CrashRecoveryContext::RunSafelyOnThread(function_ref Fn, unsigned RequestedStackSize) { - RunSafelyOnThreadInfo Info = { Fn, UserData, this, false }; + RunSafelyOnThreadInfo Info = { Fn, this, false }; llvm_execute_on_thread(RunSafelyOnThread_Dispatch, &Info, RequestedStackSize); if (CrashRecoveryContextImpl *CRC = (CrashRecoveryContextImpl *)Impl) CRC->setSwitchedThread(); diff --git a/lib/Support/DAGDeltaAlgorithm.cpp b/lib/Support/DAGDeltaAlgorithm.cpp index 29acb7d..0d504ee 100644 --- a/lib/Support/DAGDeltaAlgorithm.cpp +++ b/lib/Support/DAGDeltaAlgorithm.cpp @@ -42,6 +42,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "dag-delta" + namespace { class DAGDeltaAlgorithmImpl { diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp index a564d21..7b82921 100644 --- a/lib/Support/DataExtractor.cpp +++ b/lib/Support/DataExtractor.cpp @@ -44,7 +44,7 @@ static T *getUs(uint32_t *offset_ptr, T *dst, uint32_t count, // success return dst; } - return NULL; + return nullptr; } uint8_t DataExtractor::getU8(uint32_t *offset_ptr) const { @@ -125,7 +125,7 @@ const char *DataExtractor::getCStr(uint32_t *offset_ptr) const { *offset_ptr = pos + 1; return Data.data() + offset; } - return NULL; + return nullptr; } uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const { diff --git a/lib/Support/DataStream.cpp b/lib/Support/DataStream.cpp index 1caeddf..eec8584 100644 --- a/lib/Support/DataStream.cpp +++ b/lib/Support/DataStream.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "Data-stream" #include "llvm/Support/DataStream.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/FileSystem.h" @@ -30,6 +29,8 @@ #endif using namespace llvm; +#define DEBUG_TYPE "Data-stream" + // Interface goals: // * StreamableMemoryObject doesn't care about complexities like using // threads/async callbacks to actually overlap download+compile @@ -83,7 +84,7 @@ DataStreamer *getDataFileStreamer(const std::string &Filename, if (error_code e = s->OpenFile(Filename)) { *StrError = std::string("Could not open ") + Filename + ": " + e.message() + "\n"; - return NULL; + return nullptr; } return s; } diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp index d9cb8a9..ad4d4ef 100644 --- a/lib/Support/Debug.cpp +++ b/lib/Support/Debug.cpp @@ -109,7 +109,7 @@ raw_ostream &llvm::dbgs() { if (EnableDebugBuffering && DebugFlag && DebugBufferSize != 0) // TODO: Add a handler for SIGUSER1-type signals so the user can // force a debug dump. - sys::AddSignalHandler(&debug_user_sig_handler, 0); + sys::AddSignalHandler(&debug_user_sig_handler, nullptr); // Otherwise we've already set the debug stream buffer size to // zero, disabling buffering so it will output directly to errs(). } diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp index 6604cc7..c9efa61 100644 --- a/lib/Support/Dwarf.cpp +++ b/lib/Support/Dwarf.cpp @@ -100,7 +100,7 @@ const char *llvm::dwarf::TagString(unsigned Tag) { return "DW_TAG_GNU_formal_parameter_pack"; case DW_TAG_APPLE_property: return "DW_TAG_APPLE_property"; } - return 0; + return nullptr; } /// ChildrenString - Return the string for the specified children flag. @@ -110,7 +110,7 @@ const char *llvm::dwarf::ChildrenString(unsigned Children) { case DW_CHILDREN_no: return "DW_CHILDREN_no"; case DW_CHILDREN_yes: return "DW_CHILDREN_yes"; } - return 0; + return nullptr; } /// AttributeString - Return the string for the specified attribute. @@ -271,7 +271,7 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) { case DW_AT_GNU_pubnames: return "DW_AT_GNU_pubnames"; case DW_AT_GNU_pubtypes: return "DW_AT_GNU_pubtypes"; } - return 0; + return nullptr; } /// FormEncodingString - Return the string for the specified form encoding. @@ -308,7 +308,7 @@ const char *llvm::dwarf::FormEncodingString(unsigned Encoding) { case DW_FORM_GNU_addr_index: return "DW_FORM_GNU_addr_index"; case DW_FORM_GNU_str_index: return "DW_FORM_GNU_str_index"; } - return 0; + return nullptr; } /// OperationEncodingString - Return the string for the specified operation @@ -477,7 +477,7 @@ const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) { case DW_OP_GNU_addr_index: return "DW_OP_GNU_addr_index"; case DW_OP_GNU_const_index: return "DW_OP_GNU_const_index"; } - return 0; + return nullptr; } /// AttributeEncodingString - Return the string for the specified attribute @@ -503,7 +503,7 @@ const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) { case DW_ATE_lo_user: return "DW_ATE_lo_user"; case DW_ATE_hi_user: return "DW_ATE_hi_user"; } - return 0; + return nullptr; } /// DecimalSignString - Return the string for the specified decimal sign @@ -516,7 +516,7 @@ const char *llvm::dwarf::DecimalSignString(unsigned Sign) { case DW_DS_leading_separate: return "DW_DS_leading_separate"; case DW_DS_trailing_separate: return "DW_DS_trailing_separate"; } - return 0; + return nullptr; } /// EndianityString - Return the string for the specified endianity. @@ -529,7 +529,7 @@ const char *llvm::dwarf::EndianityString(unsigned Endian) { case DW_END_lo_user: return "DW_END_lo_user"; case DW_END_hi_user: return "DW_END_hi_user"; } - return 0; + return nullptr; } /// AccessibilityString - Return the string for the specified accessibility. @@ -541,7 +541,7 @@ const char *llvm::dwarf::AccessibilityString(unsigned Access) { case DW_ACCESS_protected: return "DW_ACCESS_protected"; case DW_ACCESS_private: return "DW_ACCESS_private"; } - return 0; + return nullptr; } /// VisibilityString - Return the string for the specified visibility. @@ -552,7 +552,7 @@ const char *llvm::dwarf::VisibilityString(unsigned Visibility) { case DW_VIS_exported: return "DW_VIS_exported"; case DW_VIS_qualified: return "DW_VIS_qualified"; } - return 0; + return nullptr; } /// VirtualityString - Return the string for the specified virtuality. @@ -563,7 +563,7 @@ const char *llvm::dwarf::VirtualityString(unsigned Virtuality) { case DW_VIRTUALITY_virtual: return "DW_VIRTUALITY_virtual"; case DW_VIRTUALITY_pure_virtual: return "DW_VIRTUALITY_pure_virtual"; } - return 0; + return nullptr; } /// LanguageString - Return the string for the specified language. @@ -600,7 +600,7 @@ const char *llvm::dwarf::LanguageString(unsigned Language) { case DW_LANG_lo_user: return "DW_LANG_lo_user"; case DW_LANG_hi_user: return "DW_LANG_hi_user"; } - return 0; + return nullptr; } /// CaseString - Return the string for the specified identifier case. @@ -612,7 +612,7 @@ const char *llvm::dwarf::CaseString(unsigned Case) { case DW_ID_down_case: return "DW_ID_down_case"; case DW_ID_case_insensitive: return "DW_ID_case_insensitive"; } - return 0; + return nullptr; } /// ConventionString - Return the string for the specified calling convention. @@ -625,7 +625,7 @@ const char *llvm::dwarf::ConventionString(unsigned Convention) { case DW_CC_lo_user: return "DW_CC_lo_user"; case DW_CC_hi_user: return "DW_CC_hi_user"; } - return 0; + return nullptr; } /// InlineCodeString - Return the string for the specified inline code. @@ -637,7 +637,7 @@ const char *llvm::dwarf::InlineCodeString(unsigned Code) { case DW_INL_declared_not_inlined: return "DW_INL_declared_not_inlined"; case DW_INL_declared_inlined: return "DW_INL_declared_inlined"; } - return 0; + return nullptr; } /// ArrayOrderString - Return the string for the specified array order. @@ -647,7 +647,7 @@ const char *llvm::dwarf::ArrayOrderString(unsigned Order) { case DW_ORD_row_major: return "DW_ORD_row_major"; case DW_ORD_col_major: return "DW_ORD_col_major"; } - return 0; + return nullptr; } /// DiscriminantString - Return the string for the specified discriminant @@ -657,7 +657,7 @@ const char *llvm::dwarf::DiscriminantString(unsigned Discriminant) { case DW_DSC_label: return "DW_DSC_label"; case DW_DSC_range: return "DW_DSC_range"; } - return 0; + return nullptr; } /// LNStandardString - Return the string for the specified line number standard. @@ -677,7 +677,7 @@ const char *llvm::dwarf::LNStandardString(unsigned Standard) { case DW_LNS_set_epilogue_begin: return "DW_LNS_set_epilogue_begin"; case DW_LNS_set_isa: return "DW_LNS_set_isa"; } - return 0; + return nullptr; } /// LNExtendedString - Return the string for the specified line number extended @@ -692,7 +692,7 @@ const char *llvm::dwarf::LNExtendedString(unsigned Encoding) { case DW_LNE_lo_user: return "DW_LNE_lo_user"; case DW_LNE_hi_user: return "DW_LNE_hi_user"; } - return 0; + return nullptr; } /// MacinfoString - Return the string for the specified macinfo type encodings. @@ -706,7 +706,7 @@ const char *llvm::dwarf::MacinfoString(unsigned Encoding) { case DW_MACINFO_end_file: return "DW_MACINFO_end_file"; case DW_MACINFO_vendor_ext: return "DW_MACINFO_vendor_ext"; } - return 0; + return nullptr; } /// CallFrameString - Return the string for the specified call frame instruction @@ -745,7 +745,7 @@ const char *llvm::dwarf::CallFrameString(unsigned Encoding) { case DW_CFA_lo_user: return "DW_CFA_lo_user"; case DW_CFA_hi_user: return "DW_CFA_hi_user"; } - return 0; + return nullptr; } const char *llvm::dwarf::AtomTypeString(unsigned AT) { @@ -761,7 +761,7 @@ const char *llvm::dwarf::AtomTypeString(unsigned AT) { case DW_ATOM_type_flags: return "DW_ATOM_type_flags"; } - return 0; + return nullptr; } const char *llvm::dwarf::GDBIndexEntryKindString(GDBIndexEntryKind Kind) { diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp index 5d77153..82d7c0c 100644 --- a/lib/Support/DynamicLibrary.cpp +++ b/lib/Support/DynamicLibrary.cpp @@ -51,14 +51,14 @@ using namespace llvm::sys; //=== independent code. //===----------------------------------------------------------------------===// -static DenseSet *OpenedHandles = 0; +static DenseSet *OpenedHandles = nullptr; DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, std::string *errMsg) { SmartScopedLock lock(*SymbolsMutex); void *handle = dlopen(filename, RTLD_LAZY|RTLD_GLOBAL); - if (handle == 0) { + if (!handle) { if (errMsg) *errMsg = dlerror(); return DynamicLibrary(); } @@ -66,11 +66,11 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, #ifdef __CYGWIN__ // Cygwin searches symbols only in the main // with the handle of dlopen(NULL, RTLD_GLOBAL). - if (filename == NULL) + if (!filename) handle = RTLD_DEFAULT; #endif - if (OpenedHandles == 0) + if (!OpenedHandles) OpenedHandles = new DenseSet(); // If we've already loaded this library, dlclose() the handle in order to @@ -83,7 +83,7 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) { if (!isValid()) - return NULL; + return nullptr; return dlsym(Data, symbolName); } @@ -166,7 +166,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { #endif #undef EXPLICIT_SYMBOL - return 0; + return nullptr; } #endif // LLVM_ON_WIN32 diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp index 1aa8303..342c4f0 100644 --- a/lib/Support/ErrorHandling.cpp +++ b/lib/Support/ErrorHandling.cpp @@ -34,8 +34,8 @@ using namespace llvm; -static fatal_error_handler_t ErrorHandler = 0; -static void *ErrorHandlerUserData = 0; +static fatal_error_handler_t ErrorHandler = nullptr; +static void *ErrorHandlerUserData = nullptr; void llvm::install_fatal_error_handler(fatal_error_handler_t handler, void *user_data) { @@ -47,7 +47,7 @@ void llvm::install_fatal_error_handler(fatal_error_handler_t handler, } void llvm::remove_fatal_error_handler() { - ErrorHandler = 0; + ErrorHandler = nullptr; } void llvm::report_fatal_error(const char *Reason, bool GenCrashDiag) { diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp index 8f2c9fc..49311c2 100644 --- a/lib/Support/FileOutputBuffer.cpp +++ b/lib/Support/FileOutputBuffer.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/FileOutputBuffer.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" @@ -85,19 +84,9 @@ error_code FileOutputBuffer::create(StringRef FilePath, return error_code::success(); } -error_code FileOutputBuffer::create(StringRef FilePath, - size_t Size, - OwningPtr &Result, - unsigned Flags) { - std::unique_ptr FOB; - error_code ec = create(FilePath, Size, FOB, Flags); - Result = std::move(FOB); - return ec; -} - error_code FileOutputBuffer::commit(int64_t NewSmallerSize) { // Unmap buffer, letting OS flush dirty pages to file on disk. - Region.reset(0); + Region.reset(nullptr); // If requested, resize file as part of commit. if ( NewSmallerSize != -1 ) { diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp index 145f12d..4635114 100644 --- a/lib/Support/FoldingSet.cpp +++ b/lib/Support/FoldingSet.cpp @@ -190,7 +190,7 @@ FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const { static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) { // The low bit is set if this is the pointer back to the bucket. if (reinterpret_cast(NextInBucketPtr) & 1) - return 0; + return nullptr; return static_cast(NextInBucketPtr); } @@ -262,7 +262,7 @@ void FoldingSetImpl::GrowHashTable() { while (Node *NodeInBucket = GetNextPtr(Probe)) { // Figure out the next link, remove NodeInBucket from the old link. Probe = NodeInBucket->getNextInBucket(); - NodeInBucket->SetNextInBucket(0); + NodeInBucket->SetNextInBucket(nullptr); // Insert the node into the new bucket, after recomputing the hash. InsertNode(NodeInBucket, @@ -285,7 +285,7 @@ FoldingSetImpl::Node void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets); void *Probe = *Bucket; - InsertPos = 0; + InsertPos = nullptr; FoldingSetNodeID TempID; while (Node *NodeInBucket = GetNextPtr(Probe)) { @@ -298,14 +298,14 @@ FoldingSetImpl::Node // Didn't find the node, return null with the bucket as the InsertPos. InsertPos = Bucket; - return 0; + return nullptr; } /// InsertNode - Insert the specified node into the folding set, knowing that it /// is not already in the map. InsertPos must be obtained from /// FindNodeOrInsertPos. void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { - assert(N->getNextInBucket() == 0); + assert(!N->getNextInBucket()); // Do we need to grow the hashtable? if (NumNodes+1 > NumBuckets*2) { GrowHashTable(); @@ -323,7 +323,7 @@ void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { // If this is the first insertion into this bucket, its next pointer will be // null. Pretend as if it pointed to itself, setting the low bit to indicate // that it is a pointer to the bucket. - if (Next == 0) + if (!Next) Next = reinterpret_cast(reinterpret_cast(Bucket)|1); // Set the node's next pointer, and make the bucket point to the node. @@ -337,10 +337,10 @@ bool FoldingSetImpl::RemoveNode(Node *N) { // Because each bucket is a circular list, we don't need to compute N's hash // to remove it. void *Ptr = N->getNextInBucket(); - if (Ptr == 0) return false; // Not in folding set. + if (!Ptr) return false; // Not in folding set. --NumNodes; - N->SetNextInBucket(0); + N->SetNextInBucket(nullptr); // Remember what N originally pointed to, either a bucket or another node. void *NodeNextPtr = Ptr; @@ -390,7 +390,7 @@ FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) { FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) { // Skip to the first non-null non-self-cycle bucket. while (*Bucket != reinterpret_cast(-1) && - (*Bucket == 0 || GetNextPtr(*Bucket) == 0)) + (!*Bucket || !GetNextPtr(*Bucket))) ++Bucket; NodePtr = static_cast(*Bucket); @@ -410,7 +410,7 @@ void FoldingSetIteratorImpl::advance() { do { ++Bucket; } while (*Bucket != reinterpret_cast(-1) && - (*Bucket == 0 || GetNextPtr(*Bucket) == 0)); + (!*Bucket || !GetNextPtr(*Bucket))); NodePtr = static_cast(*Bucket); } @@ -420,5 +420,5 @@ void FoldingSetIteratorImpl::advance() { // FoldingSetBucketIteratorImpl Implementation FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) { - Ptr = (*Bucket == 0 || GetNextPtr(*Bucket) == 0) ? (void*) Bucket : *Bucket; + Ptr = (!*Bucket || !GetNextPtr(*Bucket)) ? (void*) Bucket : *Bucket; } diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp index 9febf66..618ec26 100644 --- a/lib/Support/FormattedStream.cpp +++ b/lib/Support/FormattedStream.cpp @@ -81,7 +81,7 @@ void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) { TheStream->write(Ptr, Size); // Reset the scanning pointer. - Scanned = 0; + Scanned = nullptr; } /// fouts() - This returns a reference to a formatted_raw_ostream for diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp index 83aa255..f5b2943 100644 --- a/lib/Support/GraphWriter.cpp +++ b/lib/Support/GraphWriter.cpp @@ -83,7 +83,7 @@ static bool LLVM_ATTRIBUTE_UNUSED ExecGraphViewer(StringRef ExecPath, std::vector &args, StringRef Filename, bool wait, std::string &ErrMsg) { if (wait) { - if (sys::ExecuteAndWait(ExecPath, &args[0],0,0,0,0,&ErrMsg)) { + if (sys::ExecuteAndWait(ExecPath, &args[0],nullptr,nullptr,0,0,&ErrMsg)) { errs() << "Error: " << ErrMsg << "\n"; return false; } @@ -91,7 +91,7 @@ ExecGraphViewer(StringRef ExecPath, std::vector &args, errs() << " done. \n"; } else { - sys::ExecuteNoWait(ExecPath, &args[0],0,0,0,&ErrMsg); + sys::ExecuteNoWait(ExecPath, &args[0],nullptr,nullptr,0,&ErrMsg); errs() << "Remember to erase graph file: " << Filename.str() << "\n"; } return true; @@ -108,7 +108,7 @@ void llvm::DisplayGraph(StringRef FilenameRef, bool wait, std::vector args; args.push_back(Graphviz.c_str()); args.push_back(Filename.c_str()); - args.push_back(0); + args.push_back(nullptr); errs() << "Running 'Graphviz' program... "; if (!ExecGraphViewer(Graphviz, args, Filename, wait, ErrMsg)) diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index b6e2cb1..fd0472e 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -39,6 +39,8 @@ #include #endif +#define DEBUG_TYPE "host-detection" + //===----------------------------------------------------------------------===// // // Implementations of the CPU detection routines @@ -221,6 +223,7 @@ StringRef sys::getHostCPUName() { (EBX & 0x20); GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); bool Em64T = (EDX >> 29) & 0x1; + bool HasTBM = (ECX >> 21) & 0x1; if (memcmp(text.c, "GenuineIntel", 12) == 0) { switch (Family) { @@ -433,9 +436,11 @@ StringRef sys::getHostCPUName() { case 21: if (!HasAVX) // If the OS doesn't support AVX provide a sane fallback. return "btver1"; + if (Model >= 0x50) + return "bdver4"; // 50h-6Fh: Excavator if (Model >= 0x30) return "bdver3"; // 30h-3Fh: Steamroller - if (Model >= 0x10) + if (Model >= 0x10 || HasTBM) return "bdver2"; // 10h-1Fh: Piledriver return "bdver1"; // 00h-0Fh: Bulldozer case 22: @@ -681,7 +686,7 @@ StringRef sys::getHostCPUName() { } #endif -#if defined(__linux__) && defined(__arm__) +#if defined(__linux__) && (defined(__arm__) || defined(__aarch64__)) bool sys::getHostCPUFeatures(StringMap &Features) { std::string Err; DataStreamer *DS = getDataFileStreamer("/proc/cpuinfo", &Err); @@ -710,8 +715,24 @@ bool sys::getHostCPUFeatures(StringMap &Features) { break; } +#if defined(__aarch64__) + // Keep track of which crypto features we have seen + enum { + CAP_AES = 0x1, + CAP_PMULL = 0x2, + CAP_SHA1 = 0x4, + CAP_SHA2 = 0x8 + }; + uint32_t crypto = 0; +#endif + for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { StringRef LLVMFeatureStr = StringSwitch(CPUFeatures[I]) +#if defined(__aarch64__) + .Case("asimd", "neon") + .Case("fp", "fp-armv8") + .Case("crc32", "crc") +#else .Case("half", "fp16") .Case("neon", "neon") .Case("vfpv3", "vfp3") @@ -719,12 +740,32 @@ bool sys::getHostCPUFeatures(StringMap &Features) { .Case("vfpv4", "vfp4") .Case("idiva", "hwdiv-arm") .Case("idivt", "hwdiv") +#endif .Default(""); +#if defined(__aarch64__) + // We need to check crypto seperately since we need all of the crypto + // extensions to enable the subtarget feature + if (CPUFeatures[I] == "aes") + crypto |= CAP_AES; + else if (CPUFeatures[I] == "pmull") + crypto |= CAP_PMULL; + else if (CPUFeatures[I] == "sha1") + crypto |= CAP_SHA1; + else if (CPUFeatures[I] == "sha2") + crypto |= CAP_SHA2; +#endif + if (LLVMFeatureStr != "") Features.GetOrCreateValue(LLVMFeatureStr).setValue(true); } +#if defined(__aarch64__) + // If we have all crypto bits we can add the feature + if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2)) + Features.GetOrCreateValue("crypto").setValue(true); +#endif + return true; } #else diff --git a/lib/Support/IntervalMap.cpp b/lib/Support/IntervalMap.cpp index 4dfcc40..e11a7f2 100644 --- a/lib/Support/IntervalMap.cpp +++ b/lib/Support/IntervalMap.cpp @@ -58,7 +58,7 @@ void Path::moveLeft(unsigned Level) { } } else if (height() < Level) // end() may have created a height=0 path. - path.resize(Level + 1, Entry(0, 0, 0)); + path.resize(Level + 1, Entry(nullptr, 0, 0)); // NR is the subtree containing our left sibling. --path[l].offset; diff --git a/lib/Support/LineIterator.cpp b/lib/Support/LineIterator.cpp index 056d817..947a8fb 100644 --- a/lib/Support/LineIterator.cpp +++ b/lib/Support/LineIterator.cpp @@ -13,9 +13,10 @@ using namespace llvm; line_iterator::line_iterator(const MemoryBuffer &Buffer, char CommentMarker) - : Buffer(Buffer.getBufferSize() ? &Buffer : 0), + : Buffer(Buffer.getBufferSize() ? &Buffer : nullptr), CommentMarker(CommentMarker), LineNumber(1), - CurrentLine(Buffer.getBufferSize() ? Buffer.getBufferStart() : 0, 0) { + CurrentLine(Buffer.getBufferSize() ? Buffer.getBufferStart() : nullptr, + 0) { // Ensure that if we are constructed on a non-empty memory buffer that it is // a null terminated buffer. if (Buffer.getBufferSize()) { @@ -53,7 +54,7 @@ void line_iterator::advance() { if (*Pos == '\0') { // We've hit the end of the buffer, reset ourselves to the end state. - Buffer = 0; + Buffer = nullptr; CurrentLine = StringRef(); return; } diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp index cd1cbcb..9b4bfbe 100644 --- a/lib/Support/LockFileManager.cpp +++ b/lib/Support/LockFileManager.cpp @@ -43,8 +43,11 @@ LockFileManager::readLockFile(StringRef LockFileName) { std::tie(Hostname, PIDStr) = getToken(MB->getBuffer(), " "); PIDStr = PIDStr.substr(PIDStr.find_first_not_of(" ")); int PID; - if (!PIDStr.getAsInteger(10, PID)) - return std::make_pair(std::string(Hostname), PID); + if (!PIDStr.getAsInteger(10, PID)) { + auto Owner = std::make_pair(std::string(Hostname), PID); + if (processStillExecuting(Owner.first, Owner.second)) + return Owner; + } // Delete the lock file. It's invalid anyway. sys::fs::remove(LockFileName); @@ -171,9 +174,9 @@ LockFileManager::~LockFileManager() { sys::fs::remove(UniqueLockFileName.str()); } -void LockFileManager::waitForUnlock() { +LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() { if (getState() != LFS_Shared) - return; + return Res_Success; #if LLVM_ON_WIN32 unsigned long Interval = 1; @@ -193,7 +196,7 @@ void LockFileManager::waitForUnlock() { #if LLVM_ON_WIN32 Sleep(Interval); #else - nanosleep(&Interval, NULL); + nanosleep(&Interval, nullptr); #endif bool LockFileJustDisappeared = false; @@ -211,7 +214,7 @@ void LockFileManager::waitForUnlock() { // available now. if (LockFileGone) { if (sys::fs::exists(FileName.str())) { - return; + return Res_Success; } // The lock file is gone, so now we're waiting for the original file to @@ -234,7 +237,7 @@ void LockFileManager::waitForUnlock() { // owning the lock died without cleaning up, just bail out. if (!LockFileGone && !processStillExecuting((*Owner).first, (*Owner).second)) { - return; + return Res_OwnerDied; } // Exponentially increase the time we wait for the lock to be removed. @@ -257,4 +260,5 @@ void LockFileManager::waitForUnlock() { ); // Give up. + return Res_Timeout; } diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp index 098cccb..6a1c2a5 100644 --- a/lib/Support/ManagedStatic.cpp +++ b/lib/Support/ManagedStatic.cpp @@ -17,15 +17,16 @@ #include using namespace llvm; -static const ManagedStaticBase *StaticList = 0; +static const ManagedStaticBase *StaticList = nullptr; void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(), void (*Deleter)(void*)) const { + assert(Creator); if (llvm_is_multithreaded()) { llvm_acquire_global_lock(); - if (Ptr == 0) { - void* tmp = Creator ? Creator() : 0; + if (!Ptr) { + void* tmp = Creator(); TsanHappensBefore(this); sys::MemoryFence(); @@ -45,9 +46,9 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(), llvm_release_global_lock(); } else { - assert(Ptr == 0 && DeleterFn == 0 && Next == 0 && + assert(!Ptr && !DeleterFn && !Next && "Partially initialized ManagedStatic!?"); - Ptr = Creator ? Creator() : 0; + Ptr = Creator(); DeleterFn = Deleter; // Add to list of managed statics. @@ -62,14 +63,14 @@ void ManagedStaticBase::destroy() const { "Not destroyed in reverse order of construction?"); // Unlink from list. StaticList = Next; - Next = 0; + Next = nullptr; // Destroy memory. DeleterFn(Ptr); // Cleanup. - Ptr = 0; - DeleterFn = 0; + Ptr = nullptr; + DeleterFn = nullptr; } /// llvm_shutdown - Deallocate and destroy all ManagedStatic variables. diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 2d593a8..629d885 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/MemoryBuffer.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/Config/config.h" #include "llvm/Support/Errno.h" @@ -27,19 +26,11 @@ #include #include #include -#include #include #if !defined(_MSC_VER) && !defined(__MINGW32__) #include #else #include -// Simplistic definitinos of these macros for use in getOpenFile. -#ifndef S_ISREG -#define S_ISREG(x) (1) -#endif -#ifndef S_ISBLK -#define S_ISBLK(x) (0) -#endif #endif using namespace llvm; @@ -117,7 +108,7 @@ MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData, MemoryBuffer *MemoryBuffer::getMemBufferCopy(StringRef InputData, StringRef BufferName) { MemoryBuffer *Buf = getNewUninitMemBuffer(InputData.size(), BufferName); - if (!Buf) return 0; + if (!Buf) return nullptr; memcpy(const_cast(Buf->getBufferStart()), InputData.data(), InputData.size()); return Buf; @@ -137,7 +128,7 @@ MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size, RoundUpToAlignment(sizeof(MemoryBufferMem) + BufferName.size() + 1, 16); size_t RealLen = AlignedStringLen + Size + 1; char *Mem = static_cast(operator new(RealLen, std::nothrow)); - if (!Mem) return 0; + if (!Mem) return nullptr; // The name is stored after the class itself. CopyStringRef(Mem + sizeof(MemoryBufferMem), BufferName); @@ -155,7 +146,7 @@ MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size, /// the MemoryBuffer object. MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) { MemoryBuffer *SB = getNewUninitMemBuffer(Size, BufferName); - if (!SB) return 0; + if (!SB) return nullptr; memset(const_cast(SB->getBufferStart()), 0, Size); return SB; } @@ -173,15 +164,6 @@ error_code MemoryBuffer::getFileOrSTDIN(StringRef Filename, return getFile(Filename, Result, FileSize); } -error_code MemoryBuffer::getFileOrSTDIN(StringRef Filename, - OwningPtr &Result, - int64_t FileSize) { - std::unique_ptr MB; - error_code ec = getFileOrSTDIN(Filename, MB, FileSize); - Result = std::move(MB); - return ec; -} - //===----------------------------------------------------------------------===// // MemoryBuffer::getFile implementation. @@ -252,44 +234,38 @@ static error_code getMemoryBufferForStream(int FD, static error_code getFileAux(const char *Filename, std::unique_ptr &Result, int64_t FileSize, - bool RequiresNullTerminator); + bool RequiresNullTerminator, + bool IsVolatileSize); error_code MemoryBuffer::getFile(Twine Filename, std::unique_ptr &Result, int64_t FileSize, - bool RequiresNullTerminator) { + bool RequiresNullTerminator, + bool IsVolatileSize) { // Ensure the path is null terminated. SmallString<256> PathBuf; StringRef NullTerminatedName = Filename.toNullTerminatedStringRef(PathBuf); return getFileAux(NullTerminatedName.data(), Result, FileSize, - RequiresNullTerminator); -} - -error_code MemoryBuffer::getFile(Twine Filename, - OwningPtr &Result, - int64_t FileSize, - bool RequiresNullTerminator) { - std::unique_ptr MB; - error_code ec = getFile(Filename, MB, FileSize, RequiresNullTerminator); - Result = std::move(MB); - return ec; + RequiresNullTerminator, IsVolatileSize); } static error_code getOpenFileImpl(int FD, const char *Filename, std::unique_ptr &Result, uint64_t FileSize, uint64_t MapSize, - int64_t Offset, bool RequiresNullTerminator); + int64_t Offset, bool RequiresNullTerminator, + bool IsVolatileSize); static error_code getFileAux(const char *Filename, std::unique_ptr &Result, int64_t FileSize, - bool RequiresNullTerminator) { + bool RequiresNullTerminator, + bool IsVolatileSize) { int FD; error_code EC = sys::fs::openFileForRead(Filename, FD); if (EC) return EC; error_code ret = getOpenFileImpl(FD, Filename, Result, FileSize, FileSize, 0, - RequiresNullTerminator); + RequiresNullTerminator, IsVolatileSize); close(FD); return ret; } @@ -299,7 +275,14 @@ static bool shouldUseMmap(int FD, size_t MapSize, off_t Offset, bool RequiresNullTerminator, - int PageSize) { + int PageSize, + bool IsVolatileSize) { + // mmap may leave the buffer without null terminator if the file size changed + // by the time the last page is mapped in, so avoid it if the file size is + // likely to change. + if (IsVolatileSize) + return false; + // We don't use mmap for small files because this can severely fragment our // address space. if (MapSize < 4 * 4096 || MapSize < (unsigned)PageSize) @@ -315,9 +298,8 @@ static bool shouldUseMmap(int FD, // RequiresNullTerminator = false and MapSize != -1. if (FileSize == size_t(-1)) { sys::fs::file_status Status; - error_code EC = sys::fs::status(FD, Status); - if (EC) - return EC; + if (sys::fs::status(FD, Status)) + return false; FileSize = Status.getSize(); } @@ -328,15 +310,6 @@ static bool shouldUseMmap(int FD, if (End != FileSize) return false; -#if defined(_WIN32) || defined(__CYGWIN__) - // Don't peek the next page if file is multiple of *physical* pagesize(4k) - // but is not multiple of AllocationGranularity(64k), - // when a null terminator is required. - // FIXME: It's not good to hardcode 4096 here. dwPageSize shows 4096. - if ((FileSize & (4096 - 1)) == 0) - return false; -#endif - // Don't try to map files that are exactly a multiple of the system page size // if we need a null terminator. if ((FileSize & (PageSize -1)) == 0) @@ -348,7 +321,8 @@ static bool shouldUseMmap(int FD, static error_code getOpenFileImpl(int FD, const char *Filename, std::unique_ptr &Result, uint64_t FileSize, uint64_t MapSize, - int64_t Offset, bool RequiresNullTerminator) { + int64_t Offset, bool RequiresNullTerminator, + bool IsVolatileSize) { static int PageSize = sys::process::get_self()->page_size(); // Default is to map the full file. @@ -375,7 +349,7 @@ static error_code getOpenFileImpl(int FD, const char *Filename, } if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator, - PageSize)) { + PageSize, IsVolatileSize)) { error_code EC; Result.reset(new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile( RequiresNullTerminator, FD, MapSize, Offset, EC)); @@ -412,9 +386,7 @@ static error_code getOpenFileImpl(int FD, const char *Filename, return error_code(errno, posix_category()); } if (NumRead == 0) { - assert(0 && "We got inaccurate FileSize value or fstat reported an " - "invalid file size."); - *BufPtr = '\0'; // null-terminate at the actual size. + memset(BufPtr, 0, BytesLeft); // zero-initialize rest of the buffer. break; } BytesLeft -= NumRead; @@ -428,35 +400,18 @@ static error_code getOpenFileImpl(int FD, const char *Filename, error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, std::unique_ptr &Result, uint64_t FileSize, - bool RequiresNullTerminator) { + bool RequiresNullTerminator, + bool IsVolatileSize) { return getOpenFileImpl(FD, Filename, Result, FileSize, FileSize, 0, - RequiresNullTerminator); -} - -error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, - OwningPtr &Result, - uint64_t FileSize, - bool RequiresNullTerminator) { - std::unique_ptr MB; - error_code ec = getOpenFileImpl(FD, Filename, MB, FileSize, FileSize, 0, - RequiresNullTerminator); - Result = std::move(MB); - return ec; + RequiresNullTerminator, IsVolatileSize); } error_code MemoryBuffer::getOpenFileSlice(int FD, const char *Filename, std::unique_ptr &Result, - uint64_t MapSize, int64_t Offset) { - return getOpenFileImpl(FD, Filename, Result, -1, MapSize, Offset, false); -} - -error_code MemoryBuffer::getOpenFileSlice(int FD, const char *Filename, - OwningPtr &Result, - uint64_t MapSize, int64_t Offset) { - std::unique_ptr MB; - error_code ec = getOpenFileImpl(FD, Filename, MB, -1, MapSize, Offset, false); - Result = std::move(MB); - return ec; + uint64_t MapSize, int64_t Offset, + bool IsVolatileSize) { + return getOpenFileImpl(FD, Filename, Result, -1, MapSize, Offset, false, + IsVolatileSize); } //===----------------------------------------------------------------------===// @@ -472,10 +427,3 @@ error_code MemoryBuffer::getSTDIN(std::unique_ptr &Result) { return getMemoryBufferForStream(0, "", Result); } - -error_code MemoryBuffer::getSTDIN(OwningPtr &Result) { - std::unique_ptr MB; - error_code ec = getSTDIN(MB); - Result = std::move(MB); - return ec; -} diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp index 37c9d73..c8d3844 100644 --- a/lib/Support/Mutex.cpp +++ b/lib/Support/Mutex.cpp @@ -42,7 +42,7 @@ using namespace sys; // Construct a Mutex using pthread calls MutexImpl::MutexImpl( bool recursive) - : data_(0) + : data_(nullptr) { // Declare the pthread_mutex data structures pthread_mutex_t* mutex = @@ -75,7 +75,7 @@ MutexImpl::MutexImpl( bool recursive) MutexImpl::~MutexImpl() { pthread_mutex_t* mutex = static_cast(data_); - assert(mutex != 0); + assert(mutex != nullptr); pthread_mutex_destroy(mutex); free(mutex); } @@ -84,7 +84,7 @@ bool MutexImpl::acquire() { pthread_mutex_t* mutex = static_cast(data_); - assert(mutex != 0); + assert(mutex != nullptr); int errorcode = pthread_mutex_lock(mutex); return errorcode == 0; @@ -94,7 +94,7 @@ bool MutexImpl::release() { pthread_mutex_t* mutex = static_cast(data_); - assert(mutex != 0); + assert(mutex != nullptr); int errorcode = pthread_mutex_unlock(mutex); return errorcode == 0; @@ -104,7 +104,7 @@ bool MutexImpl::tryacquire() { pthread_mutex_t* mutex = static_cast(data_); - assert(mutex != 0); + assert(mutex != nullptr); int errorcode = pthread_mutex_trylock(mutex); return errorcode == 0; diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index 5b73631..b8d676f 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -569,6 +569,12 @@ bool is_separator(char value) { } } +static const char preferred_separator_string[] = { preferred_separator, '\0' }; + +const StringRef get_separator() { + return preferred_separator_string; +} + void system_temp_directory(bool erasedOnReboot, SmallVectorImpl &result) { result.clear(); @@ -577,7 +583,7 @@ void system_temp_directory(bool erasedOnReboot, SmallVectorImpl &result) { // macros defined in on darwin >= 9 int ConfName = erasedOnReboot? _CS_DARWIN_USER_TEMP_DIR : _CS_DARWIN_USER_CACHE_DIR; - size_t ConfLen = confstr(ConfName, 0, 0); + size_t ConfLen = confstr(ConfName, nullptr, 0); if (ConfLen > 0) { do { result.resize(ConfLen); diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp index d4e205c..987778a 100644 --- a/lib/Support/PrettyStackTrace.cpp +++ b/lib/Support/PrettyStackTrace.cpp @@ -46,7 +46,7 @@ static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){ /// PrintCurStackTrace - Print the current stack trace to the specified stream. static void PrintCurStackTrace(raw_ostream &OS) { // Don't print an empty trace. - if (PrettyStackTraceHead->get() == 0) return; + if (!PrettyStackTraceHead->get()) return; // If there are pretty stack frames registered, walk and emit them. OS << "Stack dump:\n"; @@ -136,7 +136,7 @@ void PrettyStackTraceProgram::print(raw_ostream &OS) const { } static bool RegisterCrashPrinter() { - sys::AddSignalHandler(CrashHandler, 0); + sys::AddSignalHandler(CrashHandler, nullptr); return false; } diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp index 6a34f2d..3b6309c 100644 --- a/lib/Support/RWMutex.cpp +++ b/lib/Support/RWMutex.cpp @@ -44,7 +44,7 @@ using namespace sys; // Construct a RWMutex using pthread calls RWMutexImpl::RWMutexImpl() - : data_(0) + : data_(nullptr) { // Declare the pthread_rwlock data structures pthread_rwlock_t* rwlock = @@ -56,7 +56,7 @@ RWMutexImpl::RWMutexImpl() #endif // Initialize the rwlock - int errorcode = pthread_rwlock_init(rwlock, NULL); + int errorcode = pthread_rwlock_init(rwlock, nullptr); (void)errorcode; assert(errorcode == 0); @@ -68,7 +68,7 @@ RWMutexImpl::RWMutexImpl() RWMutexImpl::~RWMutexImpl() { pthread_rwlock_t* rwlock = static_cast(data_); - assert(rwlock != 0); + assert(rwlock != nullptr); pthread_rwlock_destroy(rwlock); free(rwlock); } @@ -77,7 +77,7 @@ bool RWMutexImpl::reader_acquire() { pthread_rwlock_t* rwlock = static_cast(data_); - assert(rwlock != 0); + assert(rwlock != nullptr); int errorcode = pthread_rwlock_rdlock(rwlock); return errorcode == 0; @@ -87,7 +87,7 @@ bool RWMutexImpl::reader_release() { pthread_rwlock_t* rwlock = static_cast(data_); - assert(rwlock != 0); + assert(rwlock != nullptr); int errorcode = pthread_rwlock_unlock(rwlock); return errorcode == 0; @@ -97,7 +97,7 @@ bool RWMutexImpl::writer_acquire() { pthread_rwlock_t* rwlock = static_cast(data_); - assert(rwlock != 0); + assert(rwlock != nullptr); int errorcode = pthread_rwlock_wrlock(rwlock); return errorcode == 0; @@ -107,7 +107,7 @@ bool RWMutexImpl::writer_release() { pthread_rwlock_t* rwlock = static_cast(data_); - assert(rwlock != 0); + assert(rwlock != nullptr); int errorcode = pthread_rwlock_unlock(rwlock); return errorcode == 0; diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp index 1115534..f7fe1e4 100644 --- a/lib/Support/Regex.cpp +++ b/lib/Support/Regex.cpp @@ -43,7 +43,7 @@ bool Regex::isValid(std::string &Error) { if (!error) return true; - size_t len = llvm_regerror(error, preg, NULL, 0); + size_t len = llvm_regerror(error, preg, nullptr, 0); Error.resize(len - 1); llvm_regerror(error, preg, &Error[0], len); diff --git a/lib/Support/SearchForAddressOfSpecialSymbol.cpp b/lib/Support/SearchForAddressOfSpecialSymbol.cpp index 2d23902..55f3320 100644 --- a/lib/Support/SearchForAddressOfSpecialSymbol.cpp +++ b/lib/Support/SearchForAddressOfSpecialSymbol.cpp @@ -48,7 +48,7 @@ static void *DoSearch(const char* symbolName) { #endif #undef EXPLICIT_SYMBOL - return 0; + return nullptr; } namespace llvm { diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp index 844e416..a80e095 100644 --- a/lib/Support/SmallPtrSet.cpp +++ b/lib/Support/SmallPtrSet.cpp @@ -103,7 +103,7 @@ const void * const *SmallPtrSetImplBase::FindBucketFor(const void *Ptr) const { unsigned ArraySize = CurArraySize; unsigned ProbeAmt = 1; const void *const *Array = CurArray; - const void *const *Tombstone = 0; + const void *const *Tombstone = nullptr; while (1) { // Found Ptr's bucket? if (Array[Bucket] == Ptr) diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index 4bfd96a..acd75fb 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Support/Locale.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" using namespace llvm; @@ -60,7 +61,7 @@ size_t SourceMgr::AddIncludeFile(const std::string &Filename, // If the file didn't exist directly, see if it's in an include path. for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) { - IncludedFile = IncludeDirectories[i] + "/" + Filename; + IncludedFile = IncludeDirectories[i] + sys::path::get_separator().data() + Filename; MemoryBuffer::getFile(IncludedFile.c_str(), NewBuf); } @@ -114,7 +115,7 @@ SourceMgr::getLineAndColumn(SMLoc Loc, int BufferID) const { if (*Ptr == '\n') ++LineNo; // Allocate the line number cache if it doesn't exist. - if (LineNoCache == 0) + if (!LineNoCache) LineNoCache = new LineNoCacheTy(); // Update the line # cache. @@ -228,7 +229,7 @@ void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc, PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); } - Diagnostic.print(0, OS, ShowColors); + Diagnostic.print(nullptr, OS, ShowColors); } void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp index 9ac1f86..72a6d82 100644 --- a/lib/Support/StringMap.cpp +++ b/lib/Support/StringMap.cpp @@ -27,7 +27,7 @@ StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) { } // Otherwise, initialize it with zero buckets to avoid the allocation. - TheTable = 0; + TheTable = nullptr; NumBuckets = 0; NumItems = 0; NumTombstones = 0; @@ -70,7 +70,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) { while (1) { StringMapEntryBase *BucketItem = TheTable[BucketNo]; // If we found an empty bucket, this key isn't in the table yet, return it. - if (LLVM_LIKELY(BucketItem == 0)) { + if (LLVM_LIKELY(!BucketItem)) { // If we found a tombstone, we want to reuse the tombstone instead of an // empty bucket. This reduces probing. if (FirstTombstone != -1) { @@ -124,7 +124,7 @@ int StringMapImpl::FindKey(StringRef Key) const { while (1) { StringMapEntryBase *BucketItem = TheTable[BucketNo]; // If we found an empty bucket, this key isn't in the table yet, return. - if (LLVM_LIKELY(BucketItem == 0)) + if (LLVM_LIKELY(!BucketItem)) return -1; if (BucketItem == getTombstoneVal()) { @@ -166,7 +166,7 @@ void StringMapImpl::RemoveKey(StringMapEntryBase *V) { /// table, returning it. If the key is not in the table, this returns null. StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) { int Bucket = FindKey(Key); - if (Bucket == -1) return 0; + if (Bucket == -1) return nullptr; StringMapEntryBase *Result = TheTable[Bucket]; TheTable[Bucket] = getTombstoneVal(); @@ -212,7 +212,7 @@ void StringMapImpl::RehashTable() { // Fast case, bucket available. unsigned FullHash = HashTable[I]; unsigned NewBucket = FullHash & (NewSize-1); - if (NewTableArray[NewBucket] == 0) { + if (!NewTableArray[NewBucket]) { NewTableArray[FullHash & (NewSize-1)] = Bucket; NewHashArray[FullHash & (NewSize-1)] = FullHash; continue; diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index bd2a37b..cde8258 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -281,7 +281,7 @@ void StringRef::split(SmallVectorImpl &A, // rest.data() is used to distinguish cases like "a," that splits into // "a" + "" and "a" that splits into "a" + 0. for (int splits = 0; - rest.data() != NULL && (MaxSplit < 0 || splits < MaxSplit); + rest.data() != nullptr && (MaxSplit < 0 || splits < MaxSplit); ++splits) { std::pair p = rest.split(Separators); @@ -290,7 +290,7 @@ void StringRef::split(SmallVectorImpl &A, rest = p.second; } // If we have a tail left, add it. - if (rest.data() != NULL && (rest.size() != 0 || KeepEmpty)) + if (rest.data() != nullptr && (rest.size() != 0 || KeepEmpty)) A.push_back(rest); } diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp index 8d91a53..a008831 100644 --- a/lib/Support/TargetRegistry.cpp +++ b/lib/Support/TargetRegistry.cpp @@ -17,7 +17,7 @@ using namespace llvm; // Clients are responsible for avoid race conditions in registration. -static Target *FirstTarget = 0; +static Target *FirstTarget = nullptr; TargetRegistry::iterator TargetRegistry::begin() { return iterator(FirstTarget); @@ -29,7 +29,7 @@ const Target *TargetRegistry::lookupTarget(const std::string &ArchName, // Allocate target machine. First, check whether the user has explicitly // specified an architecture to compile for. If so we have to look it up by // name, because it might be a backend that has no mapping to a target triple. - const Target *TheTarget = 0; + const Target *TheTarget = nullptr; if (!ArchName.empty()) { for (TargetRegistry::iterator it = TargetRegistry::begin(), ie = TargetRegistry::end(); it != ie; ++it) { @@ -41,7 +41,7 @@ const Target *TargetRegistry::lookupTarget(const std::string &ArchName, if (!TheTarget) { Error = "error: invalid target '" + ArchName + "'.\n"; - return 0; + return nullptr; } // Adjust the triple to match (if known), otherwise stick with the @@ -53,11 +53,11 @@ const Target *TargetRegistry::lookupTarget(const std::string &ArchName, // Get the target specific parser. std::string TempError; TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), TempError); - if (TheTarget == 0) { + if (!TheTarget) { Error = ": error: unable to get target for '" + TheTriple.getTriple() + "', see --version and --triple.\n"; - return 0; + return nullptr; } } @@ -69,16 +69,16 @@ const Target *TargetRegistry::lookupTarget(const std::string &TT, // Provide special warning when no targets are initialized. if (begin() == end()) { Error = "Unable to find target for this triple (no targets are registered)"; - return 0; + return nullptr; } - const Target *Matching = 0; + const Target *Matching = nullptr; Triple::ArchType Arch = Triple(TT).getArch(); for (iterator it = begin(), ie = end(); it != ie; ++it) { if (it->ArchMatchFn(Arch)) { if (Matching) { Error = std::string("Cannot choose between targets \"") + Matching->Name + "\" and \"" + it->Name + "\""; - return 0; + return nullptr; } Matching = &*it; } @@ -87,7 +87,7 @@ const Target *TargetRegistry::lookupTarget(const std::string &TT, if (!Matching) { Error = "No available targets are compatible with this triple, " "see -version for the available targets."; - return 0; + return nullptr; } return Matching; @@ -121,7 +121,7 @@ const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) { if (TheTarget && !TheTarget->hasJIT()) { Error = "No JIT compatible target available for this host"; - return 0; + return nullptr; } return TheTarget; diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp index aebbcad..2dec9eb 100644 --- a/lib/Support/ThreadLocal.cpp +++ b/lib/Support/ThreadLocal.cpp @@ -53,7 +53,7 @@ using namespace sys; ThreadLocalImpl::ThreadLocalImpl() : data() { static_assert(sizeof(pthread_key_t) <= sizeof(data), "size too big"); pthread_key_t* key = reinterpret_cast(&data); - int errorcode = pthread_key_create(key, NULL); + int errorcode = pthread_key_create(key, nullptr); assert(errorcode == 0); (void) errorcode; } @@ -78,7 +78,7 @@ const void* ThreadLocalImpl::getInstance() { } void ThreadLocalImpl::removeInstance() { - setInstance(0); + setInstance(nullptr); } } diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp index 9d7ac6c..1acfa79 100644 --- a/lib/Support/Threading.cpp +++ b/lib/Support/Threading.cpp @@ -21,7 +21,7 @@ using namespace llvm; static bool multithreaded_mode = false; -static sys::Mutex* global_lock = 0; +static sys::Mutex* global_lock = nullptr; bool llvm::llvm_start_multithreaded() { #if LLVM_ENABLE_THREADS != 0 @@ -73,7 +73,7 @@ struct ThreadInfo { static void *ExecuteOnThread_Dispatch(void *Arg) { ThreadInfo *TI = reinterpret_cast(Arg); TI->UserFn(TI->UserData); - return 0; + return nullptr; } void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData, @@ -97,7 +97,7 @@ void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData, goto error; // Wait for the thread and clean up. - ::pthread_join(Thread, 0); + ::pthread_join(Thread, nullptr); error: ::pthread_attr_destroy(&Attr); diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index 7cf4d37..61465ae 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" @@ -77,7 +78,7 @@ raw_ostream *llvm::CreateInfoOutputFile() { } -static TimerGroup *DefaultTimerGroup = 0; +static TimerGroup *DefaultTimerGroup = nullptr; static TimerGroup *getDefaultTimerGroup() { TimerGroup *tmp = DefaultTimerGroup; sys::MemoryFence(); @@ -100,7 +101,7 @@ static TimerGroup *getDefaultTimerGroup() { //===----------------------------------------------------------------------===// void Timer::init(StringRef N) { - assert(TG == 0 && "Timer already initialized"); + assert(!TG && "Timer already initialized"); Name.assign(N.begin(), N.end()); Started = false; TG = getDefaultTimerGroup(); @@ -108,7 +109,7 @@ void Timer::init(StringRef N) { } void Timer::init(StringRef N, TimerGroup &tg) { - assert(TG == 0 && "Timer already initialized"); + assert(!TG && "Timer already initialized"); Name.assign(N.begin(), N.end()); Started = false; TG = &tg; @@ -235,11 +236,11 @@ static Timer &getNamedRegionTimer(StringRef Name) { NamedRegionTimer::NamedRegionTimer(StringRef Name, bool Enabled) - : TimeRegion(!Enabled ? 0 : &getNamedRegionTimer(Name)) {} + : TimeRegion(!Enabled ? nullptr : &getNamedRegionTimer(Name)) {} NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef GroupName, bool Enabled) - : TimeRegion(!Enabled ? 0 : &NamedGroupedTimers->get(Name, GroupName)) {} + : TimeRegion(!Enabled ? nullptr : &NamedGroupedTimers->get(Name, GroupName)){} //===----------------------------------------------------------------------===// // TimerGroup Implementation @@ -247,10 +248,10 @@ NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef GroupName, /// TimerGroupList - This is the global list of TimerGroups, maintained by the /// TimerGroup ctor/dtor and is protected by the TimerLock lock. -static TimerGroup *TimerGroupList = 0; +static TimerGroup *TimerGroupList = nullptr; TimerGroup::TimerGroup(StringRef name) - : Name(name.begin(), name.end()), FirstTimer(0) { + : Name(name.begin(), name.end()), FirstTimer(nullptr) { // Add the group to TimerGroupList. sys::SmartScopedLock L(*TimerLock); @@ -264,7 +265,7 @@ TimerGroup::TimerGroup(StringRef name) TimerGroup::~TimerGroup() { // If the timer group is destroyed before the timers it owns, accumulate and // print the timing data. - while (FirstTimer != 0) + while (FirstTimer) removeTimer(*FirstTimer); // Remove the group from the TimerGroupList. @@ -282,7 +283,7 @@ void TimerGroup::removeTimer(Timer &T) { if (T.Started) TimersToPrint.push_back(std::make_pair(T.Time, T.Name)); - T.TG = 0; + T.TG = nullptr; // Unlink the timer from our list. *T.Prev = T.Next; @@ -291,7 +292,7 @@ void TimerGroup::removeTimer(Timer &T) { // Print the report when all timers in this group are destroyed if some of // them were started. - if (FirstTimer != 0 || TimersToPrint.empty()) + if (FirstTimer || TimersToPrint.empty()) return; raw_ostream *OutStream = CreateInfoOutputFile(); diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 71abb9d..b3d48fb 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -24,6 +24,7 @@ const char *Triple::getArchTypeName(ArchType Kind) { case arm: return "arm"; case armeb: return "armeb"; case arm64: return "arm64"; + case arm64_be: return "arm64_be"; case hexagon: return "hexagon"; case mips: return "mips"; case mipsel: return "mipsel"; @@ -57,7 +58,7 @@ const char *Triple::getArchTypeName(ArchType Kind) { const char *Triple::getArchTypePrefix(ArchType Kind) { switch (Kind) { default: - return 0; + return nullptr; case aarch64: case aarch64_be: return "aarch64"; @@ -67,7 +68,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case thumb: case thumbeb: return "arm"; - case arm64: return "arm64"; + case arm64: + case arm64_be: return "arm64"; case ppc64: case ppc64le: @@ -178,6 +180,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("arm", arm) .Case("armeb", armeb) .Case("arm64", arm64) + .Case("arm64_be", arm64_be) .Case("mips", mips) .Case("mipsel", mipsel) .Case("mips64", mips64) @@ -210,7 +213,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { // Returns architecture name that is understood by the target assembler. const char *Triple::getArchNameForAssembler() { if (!isOSDarwin() && getVendor() != Triple::Apple) - return NULL; + return nullptr; return StringSwitch(getArchName()) .Case("i386", "i386") @@ -225,6 +228,7 @@ const char *Triple::getArchNameForAssembler() { .Cases("armv7", "thumbv7", "armv7") .Case("armeb", "armeb") .Case("arm64", "arm64") + .Case("arm64_be", "arm64") .Case("r600", "r600") .Case("nvptx", "nvptx") .Case("nvptx64", "nvptx64") @@ -232,7 +236,7 @@ const char *Triple::getArchNameForAssembler() { .Case("amdil", "amdil") .Case("spir", "spir") .Case("spir64", "spir64") - .Default(NULL); + .Default(nullptr); } static Triple::ArchType parseArch(StringRef ArchName) { @@ -257,6 +261,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("thumbeb", Triple::thumbeb) .StartsWith("thumbebv", Triple::thumbeb) .Case("arm64", Triple::arm64) + .Case("arm64_be", Triple::arm64_be) .Case("msp430", Triple::msp430) .Cases("mips", "mipseb", "mipsallegrex", Triple::mips) .Cases("mipsel", "mipsallegrexel", Triple::mipsel) @@ -797,6 +802,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { return 32; case llvm::Triple::arm64: + case llvm::Triple::arm64_be: case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: case llvm::Triple::mips64: @@ -832,6 +838,7 @@ Triple Triple::get32BitArchVariant() const { case Triple::aarch64: case Triple::aarch64_be: case Triple::arm64: + case Triple::arm64_be: case Triple::msp430: case Triple::systemz: case Triple::ppc64le: @@ -899,6 +906,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::systemz: case Triple::x86_64: case Triple::arm64: + case Triple::arm64_be: // Already 64-bit. break; diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index 08cd34d..23b49b7 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -121,7 +121,7 @@ Memory::allocateMappedMemory(size_t NumBytes, Protect, MMFlags, fd, 0); if (Addr == MAP_FAILED) { if (NearBlock) //Try again without a near hint - return allocateMappedMemory(NumBytes, 0, PFlags, EC); + return allocateMappedMemory(NumBytes, nullptr, PFlags, EC); EC = error_code(errno, system_category()); return MemoryBlock(); @@ -139,13 +139,13 @@ Memory::allocateMappedMemory(size_t NumBytes, error_code Memory::releaseMappedMemory(MemoryBlock &M) { - if (M.Address == 0 || M.Size == 0) + if (M.Address == nullptr || M.Size == 0) return error_code::success(); if (0 != ::munmap(M.Address, M.Size)) return error_code(errno, system_category()); - M.Address = 0; + M.Address = nullptr; M.Size = 0; return error_code::success(); @@ -153,7 +153,7 @@ Memory::releaseMappedMemory(MemoryBlock &M) { error_code Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) { - if (M.Address == 0 || M.Size == 0) + if (M.Address == nullptr || M.Size == 0) return error_code::success(); if (!Flags) @@ -203,7 +203,7 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock, ; void* start = NearBlock ? (unsigned char*)NearBlock->base() + - NearBlock->size() : 0; + NearBlock->size() : nullptr; #if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__)) void *pa = ::mmap(start, PageSize*NumPages, PROT_READ|PROT_EXEC, @@ -214,7 +214,7 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock, #endif if (pa == MAP_FAILED) { if (NearBlock) //Try again without a near hint - return AllocateRWX(NumBytes, 0); + return AllocateRWX(NumBytes, nullptr); MakeErrMsg(ErrMsg, "Can't allocate RWX Memory"); return MemoryBlock(); @@ -246,7 +246,7 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock, } bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) { - if (M.Address == 0 || M.Size == 0) return false; + if (M.Address == nullptr || M.Size == 0) return false; if (0 != ::munmap(M.Address, M.Size)) return MakeErrMsg(ErrMsg, "Can't release RWX Memory"); return false; diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index 1c91053..519a016 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -89,7 +89,7 @@ namespace { static error_code TempDir(SmallVectorImpl &result) { // FIXME: Don't use TMPDIR if program is SUID or SGID enabled. - const char *dir = 0; + const char *dir = nullptr; (dir = std::getenv("TMPDIR")) || (dir = std::getenv("TMP")) || (dir = std::getenv("TEMP")) || (dir = std::getenv("TEMPDIR")) || #ifdef P_tmpdir @@ -246,7 +246,7 @@ error_code current_path(SmallVectorImpl &result) { #endif while (true) { - if (::getcwd(result.data(), result.capacity()) == 0) { + if (::getcwd(result.data(), result.capacity()) == nullptr) { // See if there was a real error. if (errno != errc::not_enough_memory) return error_code(errno, system_category()); @@ -494,7 +494,7 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { #ifdef MAP_FILE flags |= MAP_FILE; #endif - Mapping = ::mmap(0, Size, prot, flags, FD, Offset); + Mapping = ::mmap(nullptr, Size, prot, flags, FD, Offset); if (Mapping == MAP_FAILED) return error_code(errno, system_category()); return error_code::success(); @@ -525,7 +525,7 @@ mapped_file_region::mapped_file_region(const Twine &path, ec = init(ofd, true, offset); if (ec) - Mapping = 0; + Mapping = nullptr; } mapped_file_region::mapped_file_region(int fd, @@ -545,7 +545,7 @@ mapped_file_region::mapped_file_region(int fd, ec = init(fd, closefd, offset); if (ec) - Mapping = 0; + Mapping = nullptr; } mapped_file_region::~mapped_file_region() { @@ -555,7 +555,7 @@ mapped_file_region::~mapped_file_region() { mapped_file_region::mapped_file_region(mapped_file_region &&other) : Mode(other.Mode), Size(other.Size), Mapping(other.Mapping) { - other.Mapping = 0; + other.Mapping = nullptr; } mapped_file_region::mapmode mapped_file_region::flags() const { @@ -587,7 +587,7 @@ error_code detail::directory_iterator_construct(detail::DirIterState &it, StringRef path){ SmallString<128> path_null(path); DIR *directory = ::opendir(path_null.c_str()); - if (directory == 0) + if (!directory) return error_code(errno, system_category()); it.IterationHandle = reinterpret_cast(directory); @@ -608,9 +608,9 @@ error_code detail::directory_iterator_destruct(detail::DirIterState &it) { error_code detail::directory_iterator_increment(detail::DirIterState &it) { errno = 0; dirent *cur_dir = ::readdir(reinterpret_cast(it.IterationHandle)); - if (cur_dir == 0 && errno != 0) { + if (cur_dir == nullptr && errno != 0) { return error_code(errno, system_category()); - } else if (cur_dir != 0) { + } else if (cur_dir != nullptr) { StringRef name(cur_dir->d_name, NAMLEN(cur_dir)); if ((name.size() == 1 && name[0] == '.') || (name.size() == 2 && name[0] == '.' && name[1] == '.')) @@ -630,7 +630,7 @@ error_code get_magic(const Twine &path, uint32_t len, // Open path. std::FILE *file = std::fopen(Path.data(), "rb"); - if (file == 0) + if (!file) return error_code(errno, system_category()); // Reserve storage. @@ -667,7 +667,7 @@ error_code map_file_pages(const Twine &path, off_t file_offset, size_t size, #ifdef MAP_FILE flags |= MAP_FILE; #endif - result = ::mmap(0, size, prot, flags, fd, file_offset); + result = ::mmap(nullptr, size, prot, flags, fd, file_offset); if (result == MAP_FAILED) { return error_code(errno, system_category()); } diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index 9fb4356..8faa638 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -270,7 +270,7 @@ static bool terminalHasColors(int fd) { MutexGuard G(M); int errret = 0; - if (setupterm((char *)0, fd, &errret) != 0) + if (setupterm((char *)nullptr, fd, &errret) != 0) // Regardless of why, if we can't get terminfo, we shouldn't try to print // colors. return false; @@ -292,7 +292,7 @@ static bool terminalHasColors(int fd) { // Now extract the structure allocated by setupterm and free its memory // through a really silly dance. - struct term *termp = set_curterm((struct term *)0); + struct term *termp = set_curterm((struct term *)nullptr); (void)del_curterm(termp); // Drop any errors here. // Return true if we found a color capabilities for the current terminal. diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index b4df928..1225a9c 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -70,7 +70,7 @@ sys::FindProgramByName(const std::string& progName) { // Get the path. If its empty, we can't do anything to find it. const char *PathStr = getenv("PATH"); - if (PathStr == 0) + if (!PathStr) return ""; // Now we have a colon separated list of directories to search; try them. @@ -99,7 +99,7 @@ sys::FindProgramByName(const std::string& progName) { } static bool RedirectIO(const StringRef *Path, int FD, std::string* ErrMsg) { - if (Path == 0) // Noop + if (!Path) // Noop return false; std::string File; if (Path->empty()) @@ -129,7 +129,7 @@ static bool RedirectIO(const StringRef *Path, int FD, std::string* ErrMsg) { #ifdef HAVE_POSIX_SPAWN static bool RedirectIO_PS(const std::string *Path, int FD, std::string *ErrMsg, posix_spawn_file_actions_t *FileActions) { - if (Path == 0) // Noop + if (!Path) // Noop return false; const char *File; if (Path->empty()) @@ -195,7 +195,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, #ifdef HAVE_POSIX_SPAWN if (memoryLimit == 0) { posix_spawn_file_actions_t FileActionsStore; - posix_spawn_file_actions_t *FileActions = 0; + posix_spawn_file_actions_t *FileActions = nullptr; // If we call posix_spawn_file_actions_addopen we have to make sure the // c strings we pass to it stay alive until the call to posix_spawn, @@ -203,7 +203,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, std::string RedirectsStorage[3]; if (redirects) { - std::string *RedirectsStr[3] = {0, 0, 0}; + std::string *RedirectsStr[3] = {nullptr, nullptr, nullptr}; for (int I = 0; I < 3; ++I) { if (redirects[I]) { RedirectsStorage[I] = *redirects[I]; @@ -218,7 +218,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, if (RedirectIO_PS(RedirectsStr[0], 0, ErrMsg, FileActions) || RedirectIO_PS(RedirectsStr[1], 1, ErrMsg, FileActions)) return false; - if (redirects[1] == 0 || redirects[2] == 0 || + if (redirects[1] == nullptr || redirects[2] == nullptr || *redirects[1] != *redirects[2]) { // Just redirect stderr if (RedirectIO_PS(RedirectsStr[2], 2, ErrMsg, FileActions)) @@ -242,8 +242,9 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, // Explicitly initialized to prevent what appears to be a valgrind false // positive. pid_t PID = 0; - int Err = posix_spawn(&PID, Program.str().c_str(), FileActions, /*attrp*/0, - const_cast(args), const_cast(envp)); + int Err = posix_spawn(&PID, Program.str().c_str(), FileActions, + /*attrp*/nullptr, const_cast(args), + const_cast(envp)); if (FileActions) posix_spawn_file_actions_destroy(FileActions); @@ -294,7 +295,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, // Execute! std::string PathStr = Program; - if (envp != 0) + if (envp != nullptr) execve(PathStr.c_str(), const_cast(args), const_cast(envp)); @@ -360,7 +361,7 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, // Turn off the alarm and restore the signal handler alarm(0); - sigaction(SIGALRM, &Old, 0); + sigaction(SIGALRM, &Old, nullptr); // Wait for child to die if (wait(&status) != ChildPid) @@ -381,7 +382,7 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, // We exited normally without timeout, so turn off the timer. if (SecondsToWait && !WaitUntilTerminates) { alarm(0); - sigaction(SIGALRM, &Old, 0); + sigaction(SIGALRM, &Old, nullptr); } // Return the proper exit status. Detect error conditions diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index b4c78d6..1841fea 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -44,7 +44,7 @@ static RETSIGTYPE SignalHandler(int Sig); // defined below. static SmartMutex SignalsMutex; /// InterruptFunction - The function to call if ctrl-c is pressed. -static void (*InterruptFunction)() = 0; +static void (*InterruptFunction)() = nullptr; static std::vector FilesToRemove; static std::vector > CallBacksToRun; @@ -55,7 +55,7 @@ static std::vector > CallBacksToRun; static const int IntSigs[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2 }; -static const int *const IntSigsEnd = array_endof(IntSigs); +static const int *const IntSigsEnd = std::end(IntSigs); // KillSigs - Signals that represent that we have a bug, and our prompt // termination has been ordered. @@ -74,7 +74,7 @@ static const int KillSigs[] = { , SIGEMT #endif }; -static const int *const KillSigsEnd = array_endof(KillSigs); +static const int *const KillSigsEnd = std::end(KillSigs); static unsigned NumRegisteredSignals = 0; static struct { @@ -113,7 +113,7 @@ static void UnregisterHandlers() { // Restore all of the signal handlers to how they were before we showed up. for (unsigned i = 0, e = NumRegisteredSignals; i != e; ++i) sigaction(RegisteredSignalInfo[i].SigNo, - &RegisteredSignalInfo[i].SA, 0); + &RegisteredSignalInfo[i].SA, nullptr); NumRegisteredSignals = 0; } @@ -160,7 +160,7 @@ static RETSIGTYPE SignalHandler(int Sig) { // Unmask all potentially blocked kill signals. sigset_t SigMask; sigfillset(&SigMask); - sigprocmask(SIG_UNBLOCK, &SigMask, 0); + sigprocmask(SIG_UNBLOCK, &SigMask, nullptr); SignalsMutex.acquire(); RemoveFilesToRemove(); @@ -169,7 +169,7 @@ static RETSIGTYPE SignalHandler(int Sig) { if (InterruptFunction) { void (*IF)() = InterruptFunction; SignalsMutex.release(); - InterruptFunction = 0; + InterruptFunction = nullptr; IF(); // run the interrupt function. return; } @@ -212,7 +212,7 @@ void llvm::sys::SetInterruptFunction(void (*IF)()) { bool llvm::sys::RemoveFileOnSignal(StringRef Filename, std::string* ErrMsg) { SignalsMutex.acquire(); - std::string *OldPtr = FilesToRemove.empty() ? 0 : &FilesToRemove[0]; + std::string *OldPtr = FilesToRemove.empty() ? nullptr : &FilesToRemove[0]; FilesToRemove.push_back(Filename); // We want to call 'c_str()' on every std::string in this vector so that if @@ -279,8 +279,8 @@ void llvm::sys::PrintStackTrace(FILE *FD) { const char* name = strrchr(dlinfo.dli_fname, '/'); int nwidth; - if (name == NULL) nwidth = strlen(dlinfo.dli_fname); - else nwidth = strlen(name) - 1; + if (!name) nwidth = strlen(dlinfo.dli_fname); + else nwidth = strlen(name) - 1; if (nwidth > width) width = nwidth; } @@ -292,22 +292,22 @@ void llvm::sys::PrintStackTrace(FILE *FD) { fprintf(FD, "%-2d", i); const char* name = strrchr(dlinfo.dli_fname, '/'); - if (name == NULL) fprintf(FD, " %-*s", width, dlinfo.dli_fname); - else fprintf(FD, " %-*s", width, name+1); + if (!name) fprintf(FD, " %-*s", width, dlinfo.dli_fname); + else fprintf(FD, " %-*s", width, name+1); fprintf(FD, " %#0*lx", (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]); - if (dlinfo.dli_sname != NULL) { + if (dlinfo.dli_sname != nullptr) { fputc(' ', FD); # if HAVE_CXXABI_H int res; - char* d = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &res); + char* d = abi::__cxa_demangle(dlinfo.dli_sname, nullptr, nullptr, &res); # else char* d = NULL; # endif - if (d == NULL) fputs(dlinfo.dli_sname, FD); - else fputs(d, FD); + if (!d) fputs(dlinfo.dli_sname, FD); + else fputs(d, FD); free(d); // FIXME: When we move to C++11, use %t length modifier. It's not in @@ -331,7 +331,7 @@ static void PrintStackTraceSignalHandler(void *) { /// PrintStackTraceOnErrorSignal - When an error signal (such as SIGABRT or /// SIGSEGV) is delivered to the process, print a stack trace and then exit. void llvm::sys::PrintStackTraceOnErrorSignal() { - AddSignalHandler(PrintStackTraceSignalHandler, 0); + AddSignalHandler(PrintStackTraceSignalHandler, nullptr); #if defined(__APPLE__) && defined(ENABLE_CRASH_OVERRIDES) // Environment variable to disable any kind of crash dialog. diff --git a/lib/Support/Unix/TimeValue.inc b/lib/Support/Unix/TimeValue.inc index 80532b0..7d4acf7 100644 --- a/lib/Support/Unix/TimeValue.inc +++ b/lib/Support/Unix/TimeValue.inc @@ -26,15 +26,17 @@ std::string TimeValue::str() const { struct tm Storage; struct tm *LT = ::localtime_r(&OurTime, &Storage); assert(LT); - char Buffer[25]; - strftime(Buffer, 25, "%b %e %H:%M %Y", LT); - return std::string(Buffer); + char Buffer1[sizeof("YYYY-MM-DD HH:MM:SS")]; + strftime(Buffer1, sizeof(Buffer1), "%Y-%m-%d %H:%M:%S", LT); + char Buffer2[sizeof("YYYY-MM-DD HH:MM:SS.MMMUUUNNN")]; + snprintf(Buffer2, sizeof(Buffer2), "%s.%.9u", Buffer1, this->nanoseconds()); + return std::string(Buffer2); } TimeValue TimeValue::now() { struct timeval the_time; timerclear(&the_time); - if (0 != ::gettimeofday(&the_time,0)) { + if (0 != ::gettimeofday(&the_time,nullptr)) { // This is *really* unlikely to occur because the only gettimeofday // errors concern the timezone parameter which we're passing in as 0. // In the unlikely case it does happen, just return MinTime, no error diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc index 504471e..5d0278f 100644 --- a/lib/Support/Windows/DynamicLibrary.inc +++ b/lib/Support/Windows/DynamicLibrary.inc @@ -58,7 +58,7 @@ extern "C" { stricmp(ModuleName, "msvcr70") != 0 && #ifndef __MINGW32__ // Mingw32 uses msvcrt.dll by default. Don't ignore it. - // Otherwise, user should be aware, what he's doing :) + // Otherwise the user should be aware what they are doing. stricmp(ModuleName, "msvcrt") != 0 && #endif stricmp(ModuleName, "msvcrt20") != 0 && diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc index a87c9e8..c3df801 100644 --- a/lib/Support/Windows/Process.inc +++ b/lib/Support/Windows/Process.inc @@ -82,16 +82,14 @@ TimeValue self_process::get_system_time() const { return getTimeValueFromFILETIME(KernelTime); } -// This function retrieves the page size using GetSystemInfo and is present -// solely so it can be called once to initialize the self_process member below. +// This function retrieves the page size using GetNativeSystemInfo() and is +// present solely so it can be called once to initialize the self_process member +// below. static unsigned getPageSize() { - // NOTE: A 32-bit application running under WOW64 is supposed to use - // GetNativeSystemInfo. However, this interface is not present prior - // to Windows XP so to use it requires dynamic linking. It is not clear - // how this affects the reported page size, if at all. One could argue - // that LLVM ought to run as 64-bits on a 64-bit system, anyway. + // GetNativeSystemInfo() provides the physical page size which may differ + // from GetSystemInfo() in 32-bit applications running under WOW64. SYSTEM_INFO info; - GetSystemInfo(&info); + GetNativeSystemInfo(&info); // FIXME: FileOffset in MapViewOfFile() should be aligned to not dwPageSize, // but dwAllocationGranularity. return static_cast(info.dwPageSize); diff --git a/lib/Support/Windows/TimeValue.inc b/lib/Support/Windows/TimeValue.inc index 6c59024..0223ab4 100644 --- a/lib/Support/Windows/TimeValue.inc +++ b/lib/Support/Windows/TimeValue.inc @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "WindowsSupport.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" #include #include @@ -32,6 +34,7 @@ TimeValue TimeValue::now() { } std::string TimeValue::str() const { + std::string S; struct tm *LT; #ifdef __MINGW32__ // Old versions of mingw don't have _localtime64_s. Remove this once we drop support @@ -47,13 +50,11 @@ std::string TimeValue::str() const { LT = &Storage; #endif - char Buffer[25]; - // FIXME: the windows version of strftime doesn't support %e - strftime(Buffer, 25, "%b %d %H:%M %Y", LT); - assert((Buffer[3] == ' ' && isdigit(Buffer[5]) && Buffer[6] == ' ') && - "Unexpected format in strftime()!"); - // Emulate %e on %d to mute '0'. - if (Buffer[4] == '0') - Buffer[4] = ' '; - return std::string(Buffer); + char Buffer[sizeof("YYYY-MM-DD HH:MM:SS")]; + strftime(Buffer, sizeof(Buffer), "%Y-%m-%d %H:%M:%S", LT); + raw_string_ostream OS(S); + OS << format("%s.%.9u", static_cast(Buffer), + this->nanoseconds()); + OS.flush(); + return S; } diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp index 73ce5e0..3be02ee 100644 --- a/lib/Support/YAMLParser.cpp +++ b/lib/Support/YAMLParser.cpp @@ -1876,14 +1876,14 @@ Node *KeyValueNode::getValue() { void MappingNode::increment() { if (failed()) { IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; return; } if (CurrentEntry) { CurrentEntry->skip(); if (Type == MT_Inline) { IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; return; } } @@ -1896,13 +1896,13 @@ void MappingNode::increment() { case Token::TK_BlockEnd: getNext(); IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; default: setError("Unexpected token. Expected Key or Block End", T); case Token::TK_Error: IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } } else { switch (T.Kind) { @@ -1915,14 +1915,14 @@ void MappingNode::increment() { case Token::TK_Error: // Set this to end iterator. IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; default: setError( "Unexpected token. Expected Key, Flow Entry, or Flow " "Mapping End." , T); IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } } } @@ -1930,7 +1930,7 @@ void MappingNode::increment() { void SequenceNode::increment() { if (failed()) { IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; return; } if (CurrentEntry) @@ -1941,37 +1941,37 @@ void SequenceNode::increment() { case Token::TK_BlockEntry: getNext(); CurrentEntry = parseBlockNode(); - if (CurrentEntry == 0) { // An error occurred. + if (!CurrentEntry) { // An error occurred. IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } break; case Token::TK_BlockEnd: getNext(); IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; default: setError( "Unexpected token. Expected Block Entry or Block End." , T); case Token::TK_Error: IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } } else if (SeqType == ST_Indentless) { switch (T.Kind) { case Token::TK_BlockEntry: getNext(); CurrentEntry = parseBlockNode(); - if (CurrentEntry == 0) { // An error occurred. + if (!CurrentEntry) { // An error occurred. IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } break; default: case Token::TK_Error: IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; } } else if (SeqType == ST_Flow) { switch (T.Kind) { @@ -1985,7 +1985,7 @@ void SequenceNode::increment() { case Token::TK_Error: // Set this to end iterator. IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; case Token::TK_StreamEnd: case Token::TK_DocumentEnd: @@ -1993,13 +1993,13 @@ void SequenceNode::increment() { setError("Could not find closing ]!", T); // Set this to end iterator. IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; default: if (!WasPreviousTokenFlowEntry) { setError("Expected , between entries!", T); IsAtEnd = true; - CurrentEntry = 0; + CurrentEntry = nullptr; break; } // Otherwise it must be a flow entry. @@ -2013,7 +2013,7 @@ void SequenceNode::increment() { } } -Document::Document(Stream &S) : stream(S), Root(0) { +Document::Document(Stream &S) : stream(S), Root(nullptr) { // Tag maps starts with two default mappings. TagMap["!"] = "!"; TagMap["!!"] = "tag:yaml.org,2002:"; @@ -2070,7 +2070,7 @@ parse_property: case Token::TK_Anchor: if (AnchorInfo.Kind == Token::TK_Anchor) { setError("Already encountered an anchor for this node!", T); - return 0; + return nullptr; } AnchorInfo = getNext(); // Consume TK_Anchor. T = peekNext(); @@ -2078,7 +2078,7 @@ parse_property: case Token::TK_Tag: if (TagInfo.Kind == Token::TK_Tag) { setError("Already encountered a tag for this node!", T); - return 0; + return nullptr; } TagInfo = getNext(); // Consume TK_Tag. T = peekNext(); @@ -2146,10 +2146,10 @@ parse_property: // !!null null. return new (NodeAllocator) NullNode(stream.CurrentDoc); case Token::TK_Error: - return 0; + return nullptr; } llvm_unreachable("Control flow shouldn't reach here."); - return 0; + return nullptr; } bool Document::parseDirectives() { diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp index 5472e0e..e5f9494 100644 --- a/lib/Support/YAMLTraits.cpp +++ b/lib/Support/YAMLTraits.cpp @@ -47,7 +47,7 @@ Input::Input(StringRef InputContent, void *DiagHandlerCtxt) : IO(Ctxt), Strm(new Stream(InputContent, SrcMgr)), - CurrentNode(NULL) { + CurrentNode(nullptr) { if (DiagHandler) SrcMgr.setDiagHandler(DiagHandler, DiagHandlerCtxt); DocIterator = Strm->begin(); @@ -158,10 +158,9 @@ void Input::endMapping() { MapHNode *MN = dyn_cast_or_null(CurrentNode); if (!MN) return; - for (MapHNode::NameToNode::iterator i = MN->Mapping.begin(), - End = MN->Mapping.end(); i != End; ++i) { - if (!MN->isValidKey(i->first())) { - setError(i->second, Twine("unknown key '") + i->first() + "'"); + for (const auto &NN : MN->Mapping) { + if (!MN->isValidKey(NN.first())) { + setError(NN.second, Twine("unknown key '") + NN.first() + "'"); break; } } @@ -255,9 +254,8 @@ bool Input::bitSetMatch(const char *Str, bool) { return false; if (SequenceHNode *SQ = dyn_cast(CurrentNode)) { unsigned Index = 0; - for (std::vector::iterator i = SQ->Entries.begin(), - End = SQ->Entries.end(); i != End; ++i) { - if (ScalarHNode *SN = dyn_cast(*i)) { + for (HNode *N : SQ->Entries) { + if (ScalarHNode *SN = dyn_cast(N)) { if (SN->value().equals(Str)) { BitValuesUsed[Index] = true; return true; @@ -287,7 +285,7 @@ void Input::endBitSetScalar() { } } -void Input::scalarString(StringRef &S) { +void Input::scalarString(StringRef &S, bool) { if (ScalarHNode *SN = dyn_cast(CurrentNode)) { S = SN->value(); } else { @@ -319,9 +317,8 @@ Input::HNode *Input::createHNodes(Node *N) { return new ScalarHNode(N, KeyStr); } else if (SequenceNode *SQ = dyn_cast(N)) { SequenceHNode *SQHNode = new SequenceHNode(N); - for (SequenceNode::iterator i = SQ->begin(), End = SQ->end(); i != End; - ++i) { - HNode *Entry = this->createHNodes(i); + for (Node &SN : *SQ) { + HNode *Entry = this->createHNodes(&SN); if (EC) break; SQHNode->Entries.push_back(Entry); @@ -329,9 +326,8 @@ Input::HNode *Input::createHNodes(Node *N) { return SQHNode; } else if (MappingNode *Map = dyn_cast(N)) { MapHNode *mapHNode = new MapHNode(N); - for (MappingNode::iterator i = Map->begin(), End = Map->end(); i != End; - ++i) { - ScalarNode *KeyScalar = dyn_cast(i->getKey()); + for (KeyValueNode &KVN : *Map) { + ScalarNode *KeyScalar = dyn_cast(KVN.getKey()); StringStorage.clear(); StringRef KeyStr = KeyScalar->getValue(StringStorage); if (!StringStorage.empty()) { @@ -341,7 +337,7 @@ Input::HNode *Input::createHNodes(Node *N) { memcpy(Buf, &StringStorage[0], Len); KeyStr = StringRef(Buf, Len); } - HNode *ValueHNode = this->createHNodes(i->getValue()); + HNode *ValueHNode = this->createHNodes(KVN.getValue()); if (EC) break; mapHNode->Mapping[KeyStr] = ValueHNode; @@ -351,14 +347,13 @@ Input::HNode *Input::createHNodes(Node *N) { return new EmptyHNode(N); } else { setError(N, "unknown node kind"); - return NULL; + return nullptr; } } bool Input::MapHNode::isValidKey(StringRef Key) { - for (SmallVectorImpl::iterator i = ValidKeys.begin(), - End = ValidKeys.end(); i != End; ++i) { - if (Key.equals(*i)) + for (const char *K : ValidKeys) { + if (Key.equals(K)) return true; } return false; @@ -373,17 +368,13 @@ bool Input::canElideEmptySequence() { } Input::MapHNode::~MapHNode() { - for (MapHNode::NameToNode::iterator i = Mapping.begin(), End = Mapping.end(); - i != End; ++i) { - delete i->second; - } + for (auto &N : Mapping) + delete N.second; } Input::SequenceHNode::~SequenceHNode() { - for (std::vector::iterator i = Entries.begin(), End = Entries.end(); - i != End; ++i) { - delete *i; - } + for (HNode *N : Entries) + delete N; } @@ -550,10 +541,7 @@ void Output::endBitSetScalar() { this->outputUpToEndOfLine(" ]"); } -void Output::scalarString(StringRef &S) { - const char ScalarSafeChars[] = "abcdefghijklmnopqrstuvwxyz" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-/^., \t"; - +void Output::scalarString(StringRef &S, bool MustQuote) { this->newLineCheck(); if (S.empty()) { // Print '' for the empty string because leaving the field empty is not @@ -561,10 +549,8 @@ void Output::scalarString(StringRef &S) { this->outputUpToEndOfLine("''"); return; } - if (S.find_first_not_of(ScalarSafeChars) == StringRef::npos && - !isspace(S.front()) && !isspace(S.back())) { - // If the string consists only of safe characters, print it out without - // quotes. + if (!MustQuote) { + // Only quote if we must. this->outputUpToEndOfLine(S); return; } diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 3c45743..f55838e 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -87,8 +87,8 @@ void raw_ostream::SetBuffered() { void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size, BufferKind Mode) { - assert(((Mode == Unbuffered && BufferStart == 0 && Size == 0) || - (Mode != Unbuffered && BufferStart && Size)) && + assert(((Mode == Unbuffered && !BufferStart && Size == 0) || + (Mode != Unbuffered && BufferStart && Size != 0)) && "stream must be unbuffered or have at least one byte"); // Make sure the current buffer is free of content (we can't flush here; the // child buffer management logic will be in write_impl). @@ -433,7 +433,7 @@ void format_object_base::home() { raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo, sys::fs::OpenFlags Flags) : Error(false), UseAtomicWrites(false), pos(0) { - assert(Filename != 0 && "Filename is null"); + assert(Filename && "Filename is null"); ErrorInfo.clear(); // Handle "-" as stdout. Note that when we do this, we consider ourself diff --git a/lib/Support/regengine.inc b/lib/Support/regengine.inc index 7e41f96..62d8c26 100644 --- a/lib/Support/regengine.inc +++ b/lib/Support/regengine.inc @@ -205,7 +205,7 @@ matcher(struct re_guts *g, const char *string, size_t nmatch, if (nmatch == 1 && !g->backrefs) break; /* no further info needed */ - /* oh my, he wants the subexpressions... */ + /* oh my, they want the subexpressions... */ if (m->pmatch == NULL) m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) * sizeof(llvm_regmatch_t)); diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp index fd81ab4..476026d 100644 --- a/lib/TableGen/Main.cpp +++ b/lib/TableGen/Main.cpp @@ -17,6 +17,7 @@ #include "TGParser.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/system_error.h" diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index a43665b..c553a21 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -101,13 +101,13 @@ bool RecTy::baseClassOf(const RecTy *RHS) const{ } Init *BitRecTy::convertValue(BitsInit *BI) { - if (BI->getNumBits() != 1) return 0; // Only accept if just one bit! + if (BI->getNumBits() != 1) return nullptr; // Only accept if just one bit! return BI->getBit(0); } Init *BitRecTy::convertValue(IntInit *II) { int64_t Val = II->getValue(); - if (Val != 0 && Val != 1) return 0; // Only accept 0 or 1 for a bit! + if (Val != 0 && Val != 1) return nullptr; // Only accept 0 or 1 for a bit! return BitInit::get(Val != 0); } @@ -116,7 +116,7 @@ Init *BitRecTy::convertValue(TypedInit *VI) { RecTy *Ty = VI->getType(); if (isa(Ty) || isa(Ty) || isa(Ty)) return VI; // Accept variable if it is already of bit type! - return 0; + return nullptr; } bool BitRecTy::baseClassOf(const RecTy *RHS) const{ @@ -151,7 +151,7 @@ Init *BitsRecTy::convertValue(UnsetInit *UI) { } Init *BitsRecTy::convertValue(BitInit *UI) { - if (Size != 1) return 0; // Can only convert single bit. + if (Size != 1) return nullptr; // Can only convert single bit. return BitsInit::get(UI); } @@ -170,7 +170,7 @@ Init *BitsRecTy::convertValue(IntInit *II) { int64_t Value = II->getValue(); // Make sure this bitfield is large enough to hold the integer value. if (!canFitInBitfield(Value, Size)) - return 0; + return nullptr; SmallVector NewBits(Size); @@ -184,7 +184,7 @@ Init *BitsRecTy::convertValue(BitsInit *BI) { // If the number of bits is right, return it. Otherwise we need to expand or // truncate. if (BI->getNumBits() == Size) return BI; - return 0; + return nullptr; } Init *BitsRecTy::convertValue(TypedInit *VI) { @@ -199,7 +199,7 @@ Init *BitsRecTy::convertValue(TypedInit *VI) { return BitsInit::get(NewBits); } - return 0; + return nullptr; } bool BitsRecTy::baseClassOf(const RecTy *RHS) const{ @@ -219,7 +219,7 @@ Init *IntRecTy::convertValue(BitsInit *BI) { if (BitInit *Bit = dyn_cast(BI->getBit(i))) { Result |= Bit->getValue() << i; } else { - return 0; + return nullptr; } return IntInit::get(Result); } @@ -227,7 +227,7 @@ Init *IntRecTy::convertValue(BitsInit *BI) { Init *IntRecTy::convertValue(TypedInit *TI) { if (TI->getType()->typeIsConvertibleTo(this)) return TI; // Accept variable if already of the right type! - return 0; + return nullptr; } bool IntRecTy::baseClassOf(const RecTy *RHS) const{ @@ -238,7 +238,7 @@ bool IntRecTy::baseClassOf(const RecTy *RHS) const{ Init *StringRecTy::convertValue(UnOpInit *BO) { if (BO->getOpcode() == UnOpInit::CAST) { Init *L = BO->getOperand()->convertInitializerTo(this); - if (L == 0) return 0; + if (!L) return nullptr; if (L != BO->getOperand()) return UnOpInit::get(UnOpInit::CAST, L, new StringRecTy); return BO; @@ -251,7 +251,7 @@ Init *StringRecTy::convertValue(BinOpInit *BO) { if (BO->getOpcode() == BinOpInit::STRCONCAT) { Init *L = BO->getLHS()->convertInitializerTo(this); Init *R = BO->getRHS()->convertInitializerTo(this); - if (L == 0 || R == 0) return 0; + if (!L || !R) return nullptr; if (L != BO->getLHS() || R != BO->getRHS()) return BinOpInit::get(BinOpInit::STRCONCAT, L, R, new StringRecTy); return BO; @@ -264,7 +264,7 @@ Init *StringRecTy::convertValue(BinOpInit *BO) { Init *StringRecTy::convertValue(TypedInit *TI) { if (isa(TI->getType())) return TI; // Accept variable if already of the right type! - return 0; + return nullptr; } std::string ListRecTy::getAsString() const { @@ -280,10 +280,10 @@ Init *ListRecTy::convertValue(ListInit *LI) { if (Init *CI = LI->getElement(i)->convertInitializerTo(Ty)) Elements.push_back(CI); else - return 0; + return nullptr; if (!isa(LI->getType())) - return 0; + return nullptr; return ListInit::get(Elements, this); } @@ -293,7 +293,7 @@ Init *ListRecTy::convertValue(TypedInit *TI) { if (ListRecTy *LRT = dyn_cast(TI->getType())) if (LRT->getElementType()->typeIsConvertibleTo(getElementType())) return TI; - return 0; + return nullptr; } bool ListRecTy::baseClassOf(const RecTy *RHS) const{ @@ -305,30 +305,30 @@ bool ListRecTy::baseClassOf(const RecTy *RHS) const{ Init *DagRecTy::convertValue(TypedInit *TI) { if (TI->getType()->typeIsConvertibleTo(this)) return TI; - return 0; + return nullptr; } Init *DagRecTy::convertValue(UnOpInit *BO) { if (BO->getOpcode() == UnOpInit::CAST) { Init *L = BO->getOperand()->convertInitializerTo(this); - if (L == 0) return 0; + if (!L) return nullptr; if (L != BO->getOperand()) return UnOpInit::get(UnOpInit::CAST, L, new DagRecTy); return BO; } - return 0; + return nullptr; } Init *DagRecTy::convertValue(BinOpInit *BO) { if (BO->getOpcode() == BinOpInit::CONCAT) { Init *L = BO->getLHS()->convertInitializerTo(this); Init *R = BO->getRHS()->convertInitializerTo(this); - if (L == 0 || R == 0) return 0; + if (!L || !R) return nullptr; if (L != BO->getLHS() || R != BO->getRHS()) return BinOpInit::get(BinOpInit::CONCAT, L, R, new DagRecTy); return BO; } - return 0; + return nullptr; } RecordRecTy *RecordRecTy::get(Record *R) { @@ -342,7 +342,7 @@ std::string RecordRecTy::getAsString() const { Init *RecordRecTy::convertValue(DefInit *DI) { // Ensure that DI is a subclass of Rec. if (!DI->getDef()->isSubClassOf(Rec)) - return 0; + return nullptr; return DI; } @@ -352,7 +352,7 @@ Init *RecordRecTy::convertValue(TypedInit *TI) { if (RRT->getRecord()->isSubClassOf(getRecord()) || RRT->getRecord() == getRecord()) return TI; - return 0; + return nullptr; } bool RecordRecTy::baseClassOf(const RecTy *RHS) const{ @@ -391,7 +391,7 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) { ++i) { RecordRecTy *SuperRecTy1 = RecordRecTy::get(*i); RecTy *NewType1 = resolveTypes(SuperRecTy1, T2); - if (NewType1 != 0) { + if (NewType1) { if (NewType1 != SuperRecTy1) { delete SuperRecTy1; } @@ -409,7 +409,7 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) { ++i) { RecordRecTy *SuperRecTy2 = RecordRecTy::get(*i); RecTy *NewType2 = resolveTypes(T1, SuperRecTy2); - if (NewType2 != 0) { + if (NewType2) { if (NewType2 != SuperRecTy2) { delete SuperRecTy2; } @@ -417,7 +417,7 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) { } } } - return 0; + return nullptr; } @@ -462,7 +462,7 @@ BitsInit *BitsInit::get(ArrayRef Range) { FoldingSetNodeID ID; ProfileBitsInit(ID, Range); - void *IP = 0; + void *IP = nullptr; if (BitsInit *I = ThePool.FindNodeOrInsertPos(ID, IP)) return I; @@ -482,7 +482,7 @@ BitsInit::convertInitializerBitRange(const std::vector &Bits) const { for (unsigned i = 0, e = Bits.size(); i != e; ++i) { if (Bits[i] >= getNumBits()) - return 0; + return nullptr; NewBits[i] = getBit(Bits[i]); } return BitsInit::get(NewBits); @@ -516,8 +516,8 @@ Init *BitsInit::resolveReferences(Record &R, const RecordVal *RV) const { bool Changed = false; SmallVector NewBits(getNumBits()); - Init *CachedInit = 0; - Init *CachedBitVar = 0; + Init *CachedInit = nullptr; + Init *CachedBitVar = nullptr; bool CachedBitVarChanged = false; for (unsigned i = 0, e = getNumBits(); i != e; ++i) { @@ -590,7 +590,7 @@ IntInit::convertInitializerBitRange(const std::vector &Bits) const { for (unsigned i = 0, e = Bits.size(); i != e; ++i) { if (Bits[i] >= 64) - return 0; + return nullptr; NewBits[i] = BitInit::get(Value & (INT64_C(1) << Bits[i])); } @@ -623,18 +623,18 @@ static void ProfileListInit(FoldingSetNodeID &ID, ListInit *ListInit::get(ArrayRef Range, RecTy *EltTy) { typedef FoldingSet Pool; static Pool ThePool; + static std::vector> TheActualPool; - // Just use the FoldingSetNodeID to compute a hash. Use a DenseMap - // for actual storage. FoldingSetNodeID ID; ProfileListInit(ID, Range, EltTy); - void *IP = 0; + void *IP = nullptr; if (ListInit *I = ThePool.FindNodeOrInsertPos(ID, IP)) return I; ListInit *I = new ListInit(Range, EltTy); ThePool.InsertNode(I, IP); + TheActualPool.push_back(std::unique_ptr(I)); return I; } @@ -651,7 +651,7 @@ ListInit::convertInitListSlice(const std::vector &Elements) const { std::vector Vals; for (unsigned i = 0, e = Elements.size(); i != e; ++i) { if (Elements[i] >= getSize()) - return 0; + return nullptr; Vals.push_back(getElement(Elements[i])); } return ListInit::get(Vals, getType()); @@ -660,7 +660,7 @@ ListInit::convertInitListSlice(const std::vector &Elements) const { Record *ListInit::getElementAsRecord(unsigned i) const { assert(i < Values.size() && "List element index out of range!"); DefInit *DI = dyn_cast(Values[i]); - if (DI == 0) + if (!DI) PrintFatalError("Expected record in list!"); return DI->getDef(); } @@ -690,14 +690,14 @@ Init *ListInit::resolveReferences(Record &R, const RecordVal *RV) const { Init *ListInit::resolveListElementReference(Record &R, const RecordVal *IRV, unsigned Elt) const { if (Elt >= getSize()) - return 0; // Out of range reference. + return nullptr; // Out of range reference. Init *E = getElement(Elt); // If the element is set to some value, or if we are resolving a reference // to a specific variable and that variable is explicitly unset, then // replace the VarListElementInit with it. if (IRV || !isa(E)) return E; - return 0; + return nullptr; } std::string ListInit::getAsString() const { @@ -714,7 +714,7 @@ Init *OpInit::resolveListElementReference(Record &R, const RecordVal *IRV, Init *Resolved = resolveReferences(R, IRV); OpInit *OResolved = dyn_cast(Resolved); if (OResolved) { - Resolved = OResolved->Fold(&R, 0); + Resolved = OResolved->Fold(&R, nullptr); } if (Resolved != this) { @@ -728,7 +728,7 @@ Init *OpInit::resolveListElementReference(Record &R, const RecordVal *IRV, } } - return 0; + return nullptr; } Init *OpInit::getBit(unsigned Bit) const { @@ -813,7 +813,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { if (ListInit *LHSl = dyn_cast(LHS)) { if (LHSl->getSize() == 0) { assert(0 && "Empty list in car"); - return 0; + return nullptr; } return LHSl->getElement(0); } @@ -823,7 +823,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { if (ListInit *LHSl = dyn_cast(LHS)) { if (LHSl->getSize() == 0) { assert(0 && "Empty list in cdr"); - return 0; + return nullptr; } // Note the +1. We can't just pass the result of getValues() // directly. @@ -862,8 +862,8 @@ Init *UnOpInit::resolveReferences(Record &R, const RecordVal *RV) const { Init *lhs = LHS->resolveReferences(R, RV); if (LHS != lhs) - return (UnOpInit::get(getOpcode(), lhs, getType()))->Fold(&R, 0); - return Fold(&R, 0); + return (UnOpInit::get(getOpcode(), lhs, getType()))->Fold(&R, nullptr); + return Fold(&R, nullptr); } std::string UnOpInit::getAsString() const { @@ -902,7 +902,7 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { if (LHSs && RHSs) { DefInit *LOp = dyn_cast(LHSs->getOperator()); DefInit *ROp = dyn_cast(RHSs->getOperator()); - if (LOp == 0 || ROp == 0 || LOp->getDef() != ROp->getDef()) + if (!LOp || !ROp || LOp->getDef() != ROp->getDef()) PrintFatalError("Concated Dag operators do not match!"); std::vector Args; std::vector ArgNames; @@ -918,6 +918,18 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { } break; } + case LISTCONCAT: { + ListInit *LHSs = dyn_cast(LHS); + ListInit *RHSs = dyn_cast(RHS); + if (LHSs && RHSs) { + std::vector Args; + Args.insert(Args.end(), LHSs->begin(), LHSs->end()); + Args.insert(Args.end(), RHSs->begin(), RHSs->end()); + return ListInit::get( + Args, static_cast(LHSs->getType())->getElementType()); + } + break; + } case STRCONCAT: { StringInit *LHSs = dyn_cast(LHS); StringInit *RHSs = dyn_cast(RHS); @@ -974,8 +986,8 @@ Init *BinOpInit::resolveReferences(Record &R, const RecordVal *RV) const { Init *rhs = RHS->resolveReferences(R, RV); if (LHS != lhs || RHS != rhs) - return (BinOpInit::get(getOpcode(), lhs, rhs, getType()))->Fold(&R, 0); - return Fold(&R, 0); + return (BinOpInit::get(getOpcode(), lhs, rhs, getType()))->Fold(&R,nullptr); + return Fold(&R, nullptr); } std::string BinOpInit::getAsString() const { @@ -987,6 +999,7 @@ std::string BinOpInit::getAsString() const { case SRA: Result = "!sra"; break; case SRL: Result = "!srl"; break; case EQ: Result = "!eq"; break; + case LISTCONCAT: Result = "!listconcat"; break; case STRCONCAT: Result = "!strconcat"; break; } return Result + "(" + LHS->getAsString() + ", " + RHS->getAsString() + ")"; @@ -1031,11 +1044,7 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg, if (TArg && TArg->getType()->getAsString() == "dag") { Init *Result = ForeachHelper(LHS, Arg, RHSo, Type, CurRec, CurMultiClass); - if (Result != 0) { - return Result; - } else { - return 0; - } + return Result; } for (int i = 0; i < RHSo->getNumOperands(); ++i) { @@ -1044,7 +1053,7 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg, if (RHSoo) { Init *Result = EvaluateOperation(RHSoo, LHS, Arg, Type, CurRec, CurMultiClass); - if (Result != 0) { + if (Result) { NewOperands.push_back(Result); } else { NewOperands.push_back(Arg); @@ -1059,10 +1068,7 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg, // Now run the operator and use its result as the new leaf const OpInit *NewOp = RHSo->clone(NewOperands); Init *NewVal = NewOp->Fold(CurRec, CurMultiClass); - if (NewVal != NewOp) - return NewVal; - - return 0; + return (NewVal != NewOp) ? NewVal : nullptr; } static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type, @@ -1086,7 +1092,7 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type, Init *Val = MHSd->getOperator(); Init *Result = EvaluateOperation(RHSo, LHS, Val, Type, CurRec, CurMultiClass); - if (Result != 0) { + if (Result) { Val = Result; } @@ -1100,7 +1106,7 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type, // Process args Init *Result = EvaluateOperation(RHSo, LHS, Arg, Type, CurRec, CurMultiClass); - if (Result != 0) { + if (Result) { Arg = Result; } @@ -1138,7 +1144,7 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type, return ListInit::get(NewList, MHSl->getType()); } } - return 0; + return nullptr; } Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { @@ -1195,7 +1201,7 @@ Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { case FOREACH: { Init *Result = ForeachHelper(LHS, MHS, RHS, getType(), CurRec, CurMultiClass); - if (Result != 0) { + if (Result) { return Result; } break; @@ -1227,16 +1233,16 @@ Init *TernOpInit::resolveReferences(Record &R, IntInit *Value = dyn_cast(lhs); if (Init *I = lhs->convertInitializerTo(IntRecTy::get())) Value = dyn_cast(I); - if (Value != 0) { + if (Value) { // Short-circuit if (Value->getValue()) { Init *mhs = MHS->resolveReferences(R, RV); return (TernOpInit::get(getOpcode(), lhs, mhs, - RHS, getType()))->Fold(&R, 0); + RHS, getType()))->Fold(&R, nullptr); } else { Init *rhs = RHS->resolveReferences(R, RV); return (TernOpInit::get(getOpcode(), lhs, MHS, - rhs, getType()))->Fold(&R, 0); + rhs, getType()))->Fold(&R, nullptr); } } } @@ -1246,8 +1252,8 @@ Init *TernOpInit::resolveReferences(Record &R, if (LHS != lhs || MHS != mhs || RHS != rhs) return (TernOpInit::get(getOpcode(), lhs, mhs, rhs, - getType()))->Fold(&R, 0); - return Fold(&R, 0); + getType()))->Fold(&R, nullptr); + return Fold(&R, nullptr); } std::string TernOpInit::getAsString() const { @@ -1265,19 +1271,19 @@ RecTy *TypedInit::getFieldType(const std::string &FieldName) const { if (RecordRecTy *RecordType = dyn_cast(getType())) if (RecordVal *Field = RecordType->getRecord()->getValue(FieldName)) return Field->getType(); - return 0; + return nullptr; } Init * TypedInit::convertInitializerBitRange(const std::vector &Bits) const { BitsRecTy *T = dyn_cast(getType()); - if (T == 0) return 0; // Cannot subscript a non-bits variable. + if (!T) return nullptr; // Cannot subscript a non-bits variable. unsigned NumBits = T->getNumBits(); SmallVector NewBits(Bits.size()); for (unsigned i = 0, e = Bits.size(); i != e; ++i) { if (Bits[i] >= NumBits) - return 0; + return nullptr; NewBits[i] = VarBitInit::get(const_cast(this), Bits[i]); } @@ -1287,7 +1293,7 @@ TypedInit::convertInitializerBitRange(const std::vector &Bits) const { Init * TypedInit::convertInitListSlice(const std::vector &Elements) const { ListRecTy *T = dyn_cast(getType()); - if (T == 0) return 0; // Cannot subscript a non-list variable. + if (!T) return nullptr; // Cannot subscript a non-list variable. if (Elements.size() == 1) return VarListElementInit::get(const_cast(this), Elements[0]); @@ -1332,8 +1338,8 @@ Init *VarInit::getBit(unsigned Bit) const { Init *VarInit::resolveListElementReference(Record &R, const RecordVal *IRV, unsigned Elt) const { - if (R.isTemplateArg(getNameInit())) return 0; - if (IRV && IRV->getNameInit() != getNameInit()) return 0; + if (R.isTemplateArg(getNameInit())) return nullptr; + if (IRV && IRV->getNameInit() != getNameInit()) return nullptr; RecordVal *RV = R.getValue(getNameInit()); assert(RV && "Reference to a non-existent variable?"); @@ -1345,14 +1351,14 @@ Init *VarInit::resolveListElementReference(Record &R, } if (Elt >= LI->getSize()) - return 0; // Out of range reference. + return nullptr; // Out of range reference. Init *E = LI->getElement(Elt); // If the element is set to some value, or if we are resolving a reference // to a specific variable and that variable is explicitly unset, then // replace the VarListElementInit with it. if (IRV || !isa(E)) return E; - return 0; + return nullptr; } @@ -1360,7 +1366,7 @@ RecTy *VarInit::getFieldType(const std::string &FieldName) const { if (RecordRecTy *RTy = dyn_cast(getType())) if (const RecordVal *RV = RTy->getRecord()->getValue(FieldName)) return RV->getType(); - return 0; + return nullptr; } Init *VarInit::getFieldInit(Record &R, const RecordVal *RV, @@ -1368,15 +1374,15 @@ Init *VarInit::getFieldInit(Record &R, const RecordVal *RV, if (isa(getType())) if (const RecordVal *Val = R.getValue(VarName)) { if (RV != Val && (RV || isa(Val->getValue()))) - return 0; + return nullptr; Init *TheInit = Val->getValue(); assert(TheInit != this && "Infinite loop detected!"); if (Init *I = TheInit->getFieldInit(R, RV, FieldName)) return I; else - return 0; + return nullptr; } - return 0; + return nullptr; } /// resolveReferences - This method is used by classes that refer to other @@ -1386,7 +1392,7 @@ Init *VarInit::getFieldInit(Record &R, const RecordVal *RV, /// Init *VarInit::resolveReferences(Record &R, const RecordVal *RV) const { if (RecordVal *Val = R.getValue(VarName)) - if (RV == Val || (RV == 0 && !isa(Val->getValue()))) + if (RV == Val || (!RV && !isa(Val->getValue()))) return Val->getValue(); return const_cast(this); } @@ -1462,7 +1468,7 @@ Init *VarListElementInit:: resolveListElementReference(Record &R, return Result; } - return 0; + return nullptr; } DefInit *DefInit::get(Record *R) { @@ -1472,7 +1478,7 @@ DefInit *DefInit::get(Record *R) { RecTy *DefInit::getFieldType(const std::string &FieldName) const { if (const RecordVal *RV = Def->getValue(FieldName)) return RV->getType(); - return 0; + return nullptr; } Init *DefInit::getFieldInit(Record &R, const RecordVal *RV, @@ -1507,7 +1513,7 @@ Init *FieldInit::resolveListElementReference(Record &R, const RecordVal *RV, unsigned Elt) const { if (Init *ListVal = Rec->getFieldInit(R, RV, FieldName)) if (ListInit *LI = dyn_cast(ListVal)) { - if (Elt >= LI->getSize()) return 0; + if (Elt >= LI->getSize()) return nullptr; Init *E = LI->getElement(Elt); // If the element is set to some value, or if we are resolving a @@ -1516,7 +1522,7 @@ Init *FieldInit::resolveListElementReference(Record &R, const RecordVal *RV, if (RV || !isa(E)) return E; } - return 0; + return nullptr; } Init *FieldInit::resolveReferences(Record &R, const RecordVal *RV) const { @@ -1560,7 +1566,7 @@ DagInit::get(Init *V, const std::string &VN, FoldingSetNodeID ID; ProfileDagInit(ID, V, VN, ArgRange, NameRange); - void *IP = 0; + void *IP = nullptr; if (DagInit *I = ThePool.FindNodeOrInsertPos(ID, IP)) return I; @@ -1784,7 +1790,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) { /// Init *Record::getValueInit(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); - if (R == 0 || R->getValue() == 0) + if (!R || !R->getValue()) PrintFatalError(getLoc(), "Record `" + getName() + "' does not have a field named `" + FieldName + "'!\n"); return R->getValue(); @@ -1797,7 +1803,7 @@ Init *Record::getValueInit(StringRef FieldName) const { /// std::string Record::getValueAsString(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); - if (R == 0 || R->getValue() == 0) + if (!R || !R->getValue()) PrintFatalError(getLoc(), "Record `" + getName() + "' does not have a field named `" + FieldName + "'!\n"); @@ -1813,7 +1819,7 @@ std::string Record::getValueAsString(StringRef FieldName) const { /// BitsInit *Record::getValueAsBitsInit(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); - if (R == 0 || R->getValue() == 0) + if (!R || !R->getValue()) PrintFatalError(getLoc(), "Record `" + getName() + "' does not have a field named `" + FieldName + "'!\n"); @@ -1829,7 +1835,7 @@ BitsInit *Record::getValueAsBitsInit(StringRef FieldName) const { /// ListInit *Record::getValueAsListInit(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); - if (R == 0 || R->getValue() == 0) + if (!R || !R->getValue()) PrintFatalError(getLoc(), "Record `" + getName() + "' does not have a field named `" + FieldName + "'!\n"); @@ -1864,7 +1870,7 @@ Record::getValueAsListOfDefs(StringRef FieldName) const { /// int64_t Record::getValueAsInt(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); - if (R == 0 || R->getValue() == 0) + if (!R || !R->getValue()) PrintFatalError(getLoc(), "Record `" + getName() + "' does not have a field named `" + FieldName + "'!\n"); @@ -1918,7 +1924,7 @@ Record::getValueAsListOfStrings(StringRef FieldName) const { /// Record *Record::getValueAsDef(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); - if (R == 0 || R->getValue() == 0) + if (!R || !R->getValue()) PrintFatalError(getLoc(), "Record `" + getName() + "' does not have a field named `" + FieldName + "'!\n"); @@ -1934,7 +1940,7 @@ Record *Record::getValueAsDef(StringRef FieldName) const { /// bool Record::getValueAsBit(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); - if (R == 0 || R->getValue() == 0) + if (!R || !R->getValue()) PrintFatalError(getLoc(), "Record `" + getName() + "' does not have a field named `" + FieldName + "'!\n"); @@ -1946,7 +1952,7 @@ bool Record::getValueAsBit(StringRef FieldName) const { bool Record::getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const { const RecordVal *R = getValue(FieldName); - if (R == 0 || R->getValue() == 0) + if (!R || !R->getValue()) PrintFatalError(getLoc(), "Record `" + getName() + "' does not have a field named `" + FieldName.str() + "'!\n"); @@ -1967,7 +1973,7 @@ bool Record::getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const { /// DagInit *Record::getValueAsDag(StringRef FieldName) const { const RecordVal *R = getValue(FieldName); - if (R == 0 || R->getValue() == 0) + if (!R || !R->getValue()) PrintFatalError(getLoc(), "Record `" + getName() + "' does not have a field named `" + FieldName + "'!\n"); diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp index c6be4f8..1ec2eea 100644 --- a/lib/TableGen/TGLexer.cpp +++ b/lib/TableGen/TGLexer.cpp @@ -30,7 +30,7 @@ TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) { CurBuffer = 0; CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); CurPtr = CurBuf->getBufferStart(); - TokStart = 0; + TokStart = nullptr; } SMLoc TGLexer::getLoc() const { @@ -389,12 +389,12 @@ tgtok::TokKind TGLexer::LexNumber() { return ReturnError(TokStart, "Invalid hexadecimal number"); errno = 0; - CurIntVal = strtoll(NumStart, 0, 16); + CurIntVal = strtoll(NumStart, nullptr, 16); if (errno == EINVAL) return ReturnError(TokStart, "Invalid hexadecimal number"); if (errno == ERANGE) { errno = 0; - CurIntVal = (int64_t)strtoull(NumStart, 0, 16); + CurIntVal = (int64_t)strtoull(NumStart, nullptr, 16); if (errno == EINVAL) return ReturnError(TokStart, "Invalid hexadecimal number"); if (errno == ERANGE) @@ -410,7 +410,7 @@ tgtok::TokKind TGLexer::LexNumber() { // Requires at least one binary digit. if (CurPtr == NumStart) return ReturnError(CurPtr-2, "Invalid binary number"); - CurIntVal = strtoll(NumStart, 0, 2); + CurIntVal = strtoll(NumStart, nullptr, 2); return tgtok::IntVal; } } @@ -425,7 +425,7 @@ tgtok::TokKind TGLexer::LexNumber() { while (isdigit(CurPtr[0])) ++CurPtr; - CurIntVal = strtoll(TokStart, 0, 10); + CurIntVal = strtoll(TokStart, nullptr, 10); return tgtok::IntVal; } @@ -478,6 +478,7 @@ tgtok::TokKind TGLexer::LexExclaim() { .Case("empty", tgtok::XEmpty) .Case("subst", tgtok::XSubst) .Case("foreach", tgtok::XForEach) + .Case("listconcat", tgtok::XListConcat) .Case("strconcat", tgtok::XStrConcat) .Default(tgtok::Error); diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h index d1bd70d..1e599f8 100644 --- a/lib/TableGen/TGLexer.h +++ b/lib/TableGen/TGLexer.h @@ -47,7 +47,7 @@ namespace tgtok { MultiClass, String, // !keywords. - XConcat, XADD, XSRA, XSRL, XSHL, XStrConcat, XCast, XSubst, + XConcat, XADD, XSRA, XSRL, XSHL, XListConcat, XStrConcat, XCast, XSubst, XForEach, XHead, XTail, XEmpty, XIf, XEq, // Integer value. diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index 4ba769c..038e018 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -29,18 +29,18 @@ struct SubClassReference { SMRange RefRange; Record *Rec; std::vector TemplateArgs; - SubClassReference() : Rec(0) {} + SubClassReference() : Rec(nullptr) {} - bool isInvalid() const { return Rec == 0; } + bool isInvalid() const { return Rec == nullptr; } }; struct SubMultiClassReference { SMRange RefRange; MultiClass *MC; std::vector TemplateArgs; - SubMultiClassReference() : MC(0) {} + SubMultiClassReference() : MC(nullptr) {} - bool isInvalid() const { return MC == 0; } + bool isInvalid() const { return MC == nullptr; } void dump() const; }; @@ -61,7 +61,7 @@ void SubMultiClassReference::dump() const { } // end namespace llvm bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) { - if (CurRec == 0) + if (!CurRec) CurRec = &CurMultiClass->Rec; if (RecordVal *ERV = CurRec->getValue(RV.getNameInit())) { @@ -83,10 +83,10 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName, const std::vector &BitList, Init *V) { if (!V) return false; - if (CurRec == 0) CurRec = &CurMultiClass->Rec; + if (!CurRec) CurRec = &CurMultiClass->Rec; RecordVal *RV = CurRec->getValue(ValName); - if (RV == 0) + if (!RV) return Error(Loc, "Value '" + ValName->getAsUnquotedString() + "' unknown!"); @@ -103,19 +103,19 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName, // if (!BitList.empty()) { BitsInit *CurVal = dyn_cast(RV->getValue()); - if (CurVal == 0) + if (!CurVal) return Error(Loc, "Value '" + ValName->getAsUnquotedString() + "' is not a bits type"); // Convert the incoming value to a bits type of the appropriate size... Init *BI = V->convertInitializerTo(BitsRecTy::get(BitList.size())); - if (BI == 0) { + if (!BI) { return Error(Loc, "Initializer is not compatible with bit range"); } // We should have a BitsInit type now. BitsInit *BInit = dyn_cast(BI); - assert(BInit != 0); + assert(BInit != nullptr); SmallVector NewBits(CurVal->getNumBits()); @@ -129,7 +129,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName, } for (unsigned i = 0, e = CurVal->getNumBits(); i != e; ++i) - if (NewBits[i] == 0) + if (!NewBits[i]) NewBits[i] = CurVal->getBit(i); V = BitsInit::get(NewBits); @@ -314,14 +314,14 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){ assert(IterVals.size() < Loops.size()); ForeachLoop &CurLoop = Loops[IterVals.size()]; ListInit *List = dyn_cast(CurLoop.ListValue); - if (List == 0) { + if (!List) { Error(Loc, "Loop list is not a list"); return true; } // Process each value. for (int64_t i = 0; i < List->getSize(); ++i) { - Init *ItemVal = List->resolveListElementReference(*CurRec, 0, i); + Init *ItemVal = List->resolveListElementReference(*CurRec, nullptr, i); IterVals.push_back(IterRecord(CurLoop.IterVar, ItemVal)); if (ProcessForeachDefs(CurRec, Loc, IterVals)) return true; @@ -339,7 +339,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){ for (unsigned i = 0, e = IterVals.size(); i != e; ++i) { VarInit *IterVar = IterVals[i].IterVar; TypedInit *IVal = dyn_cast(IterVals[i].IterValue); - if (IVal == 0) { + if (!IVal) { Error(Loc, "foreach iterator value is untyped"); return true; } @@ -400,21 +400,21 @@ Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) { // These are all of the tokens that can begin an object body. // Some of these can also begin values but we disallow those cases // because they are unlikely to be useful. - return 0; + return nullptr; default: break; } - Record *CurRec = 0; + Record *CurRec = nullptr; if (CurMultiClass) CurRec = &CurMultiClass->Rec; - RecTy *Type = 0; + RecTy *Type = nullptr; if (CurRec) { const TypedInit *CurRecName = dyn_cast(CurRec->getNameInit()); if (!CurRecName) { TokError("Record name is not typed!"); - return 0; + return nullptr; } Type = CurRecName->getType(); } @@ -430,11 +430,11 @@ Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) { Record *TGParser::ParseClassID() { if (Lex.getCode() != tgtok::Id) { TokError("expected name for ClassID"); - return 0; + return nullptr; } Record *Result = Records.getClass(Lex.getCurStrVal()); - if (Result == 0) + if (!Result) TokError("Couldn't find class '" + Lex.getCurStrVal() + "'"); Lex.Lex(); @@ -449,11 +449,11 @@ Record *TGParser::ParseClassID() { MultiClass *TGParser::ParseMultiClassID() { if (Lex.getCode() != tgtok::Id) { TokError("expected name for MultiClassID"); - return 0; + return nullptr; } MultiClass *Result = MultiClasses[Lex.getCurStrVal()]; - if (Result == 0) + if (!Result) TokError("Couldn't find multiclass '" + Lex.getCurStrVal() + "'"); Lex.Lex(); @@ -477,7 +477,7 @@ ParseSubClassReference(Record *CurRec, bool isDefm) { } else { Result.Rec = ParseClassID(); } - if (Result.Rec == 0) return Result; + if (!Result.Rec) return Result; // If there is no template arg list, we're done. if (Lex.getCode() != tgtok::less) { @@ -488,19 +488,19 @@ ParseSubClassReference(Record *CurRec, bool isDefm) { if (Lex.getCode() == tgtok::greater) { TokError("subclass reference requires a non-empty list of template values"); - Result.Rec = 0; + Result.Rec = nullptr; return Result; } Result.TemplateArgs = ParseValueList(CurRec, Result.Rec); if (Result.TemplateArgs.empty()) { - Result.Rec = 0; // Error parsing value list. + Result.Rec = nullptr; // Error parsing value list. return Result; } if (Lex.getCode() != tgtok::greater) { TokError("expected '>' in template value list"); - Result.Rec = 0; + Result.Rec = nullptr; return Result; } Lex.Lex(); @@ -522,7 +522,7 @@ ParseSubMultiClassReference(MultiClass *CurMC) { Result.RefRange.Start = Lex.getLoc(); Result.MC = ParseMultiClassID(); - if (Result.MC == 0) return Result; + if (!Result.MC) return Result; // If there is no template arg list, we're done. if (Lex.getCode() != tgtok::less) { @@ -533,19 +533,19 @@ ParseSubMultiClassReference(MultiClass *CurMC) { if (Lex.getCode() == tgtok::greater) { TokError("subclass reference requires a non-empty list of template values"); - Result.MC = 0; + Result.MC = nullptr; return Result; } Result.TemplateArgs = ParseValueList(&CurMC->Rec, &Result.MC->Rec); if (Result.TemplateArgs.empty()) { - Result.MC = 0; // Error parsing value list. + Result.MC = nullptr; // Error parsing value list. return Result; } if (Lex.getCode() != tgtok::greater) { TokError("expected '>' in template value list"); - Result.MC = 0; + Result.MC = nullptr; return Result; } Lex.Lex(); @@ -677,7 +677,7 @@ bool TGParser::ParseOptionalBitList(std::vector &Ranges) { /// RecTy *TGParser::ParseType() { switch (Lex.getCode()) { - default: TokError("Unknown token when expecting a type"); return 0; + default: TokError("Unknown token when expecting a type"); return nullptr; case tgtok::String: Lex.Lex(); return StringRecTy::get(); case tgtok::Code: Lex.Lex(); return StringRecTy::get(); case tgtok::Bit: Lex.Lex(); return BitRecTy::get(); @@ -685,20 +685,20 @@ RecTy *TGParser::ParseType() { case tgtok::Dag: Lex.Lex(); return DagRecTy::get(); case tgtok::Id: if (Record *R = ParseClassID()) return RecordRecTy::get(R); - return 0; + return nullptr; case tgtok::Bits: { if (Lex.Lex() != tgtok::less) { // Eat 'bits' TokError("expected '<' after bits type"); - return 0; + return nullptr; } if (Lex.Lex() != tgtok::IntVal) { // Eat '<' TokError("expected integer in bits type"); - return 0; + return nullptr; } uint64_t Val = Lex.getCurIntVal(); if (Lex.Lex() != tgtok::greater) { // Eat count. TokError("expected '>' at end of bits type"); - return 0; + return nullptr; } Lex.Lex(); // Eat '>' return BitsRecTy::get(Val); @@ -706,15 +706,15 @@ RecTy *TGParser::ParseType() { case tgtok::List: { if (Lex.Lex() != tgtok::less) { // Eat 'bits' TokError("expected '<' after list type"); - return 0; + return nullptr; } Lex.Lex(); // Eat '<' RecTy *SubType = ParseType(); - if (SubType == 0) return 0; + if (!SubType) return nullptr; if (Lex.getCode() != tgtok::greater) { TokError("expected '>' at end of list type"); - return 0; + return nullptr; } Lex.Lex(); // Eat '>' return ListRecTy::get(SubType); @@ -772,7 +772,7 @@ Init *TGParser::ParseIDValue(Record *CurRec, if (Mode == ParseValueMode) { Error(NameLoc, "Variable not defined: '" + Name + "'"); - return 0; + return nullptr; } return StringInit::get(Name); @@ -786,13 +786,13 @@ Init *TGParser::ParseOperation(Record *CurRec) { switch (Lex.getCode()) { default: TokError("unknown operation"); - return 0; + return nullptr; case tgtok::XHead: case tgtok::XTail: case tgtok::XEmpty: case tgtok::XCast: { // Value ::= !unop '(' Value ')' UnOpInit::UnaryOp Code; - RecTy *Type = 0; + RecTy *Type = nullptr; switch (Lex.getCode()) { default: llvm_unreachable("Unhandled code!"); @@ -802,9 +802,9 @@ Init *TGParser::ParseOperation(Record *CurRec) { Type = ParseOperatorType(); - if (Type == 0) { + if (!Type) { TokError("did not get type for unary operator"); - return 0; + return nullptr; } break; @@ -824,12 +824,12 @@ Init *TGParser::ParseOperation(Record *CurRec) { } if (Lex.getCode() != tgtok::l_paren) { TokError("expected '(' after unary operator"); - return 0; + return nullptr; } Lex.Lex(); // eat the '(' Init *LHS = ParseValue(CurRec); - if (LHS == 0) return 0; + if (!LHS) return nullptr; if (Code == UnOpInit::HEAD || Code == UnOpInit::TAIL @@ -837,36 +837,36 @@ Init *TGParser::ParseOperation(Record *CurRec) { ListInit *LHSl = dyn_cast(LHS); StringInit *LHSs = dyn_cast(LHS); TypedInit *LHSt = dyn_cast(LHS); - if (LHSl == 0 && LHSs == 0 && LHSt == 0) { + if (!LHSl && !LHSs && !LHSt) { TokError("expected list or string type argument in unary operator"); - return 0; + return nullptr; } if (LHSt) { ListRecTy *LType = dyn_cast(LHSt->getType()); StringRecTy *SType = dyn_cast(LHSt->getType()); - if (LType == 0 && SType == 0) { + if (!LType && !SType) { TokError("expected list or string type argumnet in unary operator"); - return 0; + return nullptr; } } if (Code == UnOpInit::HEAD || Code == UnOpInit::TAIL) { - if (LHSl == 0 && LHSt == 0) { + if (!LHSl && !LHSt) { TokError("expected list type argumnet in unary operator"); - return 0; + return nullptr; } if (LHSl && LHSl->getSize() == 0) { TokError("empty list argument in unary operator"); - return 0; + return nullptr; } if (LHSl) { Init *Item = LHSl->getElement(0); TypedInit *Itemt = dyn_cast(Item); - if (Itemt == 0) { + if (!Itemt) { TokError("untyped list element in unary operator"); - return 0; + return nullptr; } if (Code == UnOpInit::HEAD) { Type = Itemt->getType(); @@ -876,9 +876,9 @@ Init *TGParser::ParseOperation(Record *CurRec) { } else { assert(LHSt && "expected list type argument in unary operator"); ListRecTy *LType = dyn_cast(LHSt->getType()); - if (LType == 0) { + if (!LType) { TokError("expected list type argumnet in unary operator"); - return 0; + return nullptr; } if (Code == UnOpInit::HEAD) { Type = LType->getElementType(); @@ -891,7 +891,7 @@ Init *TGParser::ParseOperation(Record *CurRec) { if (Lex.getCode() != tgtok::r_paren) { TokError("expected ')' in unary operator"); - return 0; + return nullptr; } Lex.Lex(); // eat the ')' return (UnOpInit::get(Code, LHS, Type))->Fold(CurRec, CurMultiClass); @@ -903,13 +903,14 @@ Init *TGParser::ParseOperation(Record *CurRec) { case tgtok::XSRL: case tgtok::XSHL: case tgtok::XEq: + case tgtok::XListConcat: case tgtok::XStrConcat: { // Value ::= !binop '(' Value ',' Value ')' tgtok::TokKind OpTok = Lex.getCode(); SMLoc OpLoc = Lex.getLoc(); Lex.Lex(); // eat the operation BinOpInit::BinaryOp Code; - RecTy *Type = 0; + RecTy *Type = nullptr; switch (OpTok) { default: llvm_unreachable("Unhandled code!"); @@ -919,6 +920,10 @@ Init *TGParser::ParseOperation(Record *CurRec) { case tgtok::XSRL: Code = BinOpInit::SRL; Type = IntRecTy::get(); break; case tgtok::XSHL: Code = BinOpInit::SHL; Type = IntRecTy::get(); break; case tgtok::XEq: Code = BinOpInit::EQ; Type = BitRecTy::get(); break; + case tgtok::XListConcat: + Code = BinOpInit::LISTCONCAT; + // We don't know the list type until we parse the first argument + break; case tgtok::XStrConcat: Code = BinOpInit::STRCONCAT; Type = StringRecTy::get(); @@ -927,31 +932,44 @@ Init *TGParser::ParseOperation(Record *CurRec) { if (Lex.getCode() != tgtok::l_paren) { TokError("expected '(' after binary operator"); - return 0; + return nullptr; } Lex.Lex(); // eat the '(' SmallVector InitList; InitList.push_back(ParseValue(CurRec)); - if (InitList.back() == 0) return 0; + if (!InitList.back()) return nullptr; while (Lex.getCode() == tgtok::comma) { Lex.Lex(); // eat the ',' InitList.push_back(ParseValue(CurRec)); - if (InitList.back() == 0) return 0; + if (!InitList.back()) return nullptr; } if (Lex.getCode() != tgtok::r_paren) { TokError("expected ')' in operator"); - return 0; + return nullptr; } Lex.Lex(); // eat the ')' + // If we are doing !listconcat, we should know the type by now + if (OpTok == tgtok::XListConcat) { + if (VarInit *Arg0 = dyn_cast(InitList[0])) + Type = Arg0->getType(); + else if (ListInit *Arg0 = dyn_cast(InitList[0])) + Type = Arg0->getType(); + else { + InitList[0]->dump(); + Error(OpLoc, "expected a list"); + return nullptr; + } + } + // We allow multiple operands to associative operators like !strconcat as // shorthand for nesting them. - if (Code == BinOpInit::STRCONCAT) { + if (Code == BinOpInit::STRCONCAT || Code == BinOpInit::LISTCONCAT) { while (InitList.size() > 2) { Init *RHS = InitList.pop_back_val(); RHS = (BinOpInit::get(Code, InitList.back(), RHS, Type)) @@ -965,14 +983,14 @@ Init *TGParser::ParseOperation(Record *CurRec) { ->Fold(CurRec, CurMultiClass); Error(OpLoc, "expected two operands to operator"); - return 0; + return nullptr; } case tgtok::XIf: case tgtok::XForEach: case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')' TernOpInit::TernaryOp Code; - RecTy *Type = 0; + RecTy *Type = nullptr; tgtok::TokKind LexCode = Lex.getCode(); Lex.Lex(); // eat the operation @@ -990,42 +1008,42 @@ Init *TGParser::ParseOperation(Record *CurRec) { } if (Lex.getCode() != tgtok::l_paren) { TokError("expected '(' after ternary operator"); - return 0; + return nullptr; } Lex.Lex(); // eat the '(' Init *LHS = ParseValue(CurRec); - if (LHS == 0) return 0; + if (!LHS) return nullptr; if (Lex.getCode() != tgtok::comma) { TokError("expected ',' in ternary operator"); - return 0; + return nullptr; } Lex.Lex(); // eat the ',' Init *MHS = ParseValue(CurRec); - if (MHS == 0) return 0; + if (!MHS) return nullptr; if (Lex.getCode() != tgtok::comma) { TokError("expected ',' in ternary operator"); - return 0; + return nullptr; } Lex.Lex(); // eat the ',' Init *RHS = ParseValue(CurRec); - if (RHS == 0) return 0; + if (!RHS) return nullptr; if (Lex.getCode() != tgtok::r_paren) { TokError("expected ')' in binary operator"); - return 0; + return nullptr; } Lex.Lex(); // eat the ')' switch (LexCode) { default: llvm_unreachable("Unhandled code!"); case tgtok::XIf: { - RecTy *MHSTy = 0; - RecTy *RHSTy = 0; + RecTy *MHSTy = nullptr; + RecTy *RHSTy = nullptr; if (TypedInit *MHSt = dyn_cast(MHS)) MHSTy = MHSt->getType(); @@ -1049,7 +1067,7 @@ Init *TGParser::ParseOperation(Record *CurRec) { if (!MHSTy || !RHSTy) { TokError("could not get type for !if"); - return 0; + return nullptr; } if (MHSTy->typeIsConvertibleTo(RHSTy)) { @@ -1058,24 +1076,24 @@ Init *TGParser::ParseOperation(Record *CurRec) { Type = MHSTy; } else { TokError("inconsistent types for !if"); - return 0; + return nullptr; } break; } case tgtok::XForEach: { TypedInit *MHSt = dyn_cast(MHS); - if (MHSt == 0) { + if (!MHSt) { TokError("could not get type for !foreach"); - return 0; + return nullptr; } Type = MHSt->getType(); break; } case tgtok::XSubst: { TypedInit *RHSt = dyn_cast(RHS); - if (RHSt == 0) { + if (!RHSt) { TokError("could not get type for !subst"); - return 0; + return nullptr; } Type = RHSt->getType(); break; @@ -1093,24 +1111,24 @@ Init *TGParser::ParseOperation(Record *CurRec) { /// OperatorType ::= '<' Type '>' /// RecTy *TGParser::ParseOperatorType() { - RecTy *Type = 0; + RecTy *Type = nullptr; if (Lex.getCode() != tgtok::less) { TokError("expected type name for operator"); - return 0; + return nullptr; } Lex.Lex(); // eat the < Type = ParseType(); - if (Type == 0) { + if (!Type) { TokError("expected type name for operator"); - return 0; + return nullptr; } if (Lex.getCode() != tgtok::greater) { TokError("expected type name for operator"); - return 0; + return nullptr; } Lex.Lex(); // eat the > @@ -1134,11 +1152,12 @@ RecTy *TGParser::ParseOperatorType() { /// SimpleValue ::= SHLTOK '(' Value ',' Value ')' /// SimpleValue ::= SRATOK '(' Value ',' Value ')' /// SimpleValue ::= SRLTOK '(' Value ',' Value ')' +/// SimpleValue ::= LISTCONCATTOK '(' Value ',' Value ')' /// SimpleValue ::= STRCONCATTOK '(' Value ',' Value ')' /// Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) { - Init *R = 0; + Init *R = nullptr; switch (Lex.getCode()) { default: TokError("Unknown token when parsing a value"); break; case tgtok::paste: @@ -1177,7 +1196,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, // Value ::= ID '<' ValueListNE '>' if (Lex.Lex() == tgtok::greater) { TokError("expected non-empty value list"); - return 0; + return nullptr; } // This is a CLASS expression. This is supposed to synthesize @@ -1186,15 +1205,15 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, Record *Class = Records.getClass(Name); if (!Class) { Error(NameLoc, "Expected a class name, got '" + Name + "'"); - return 0; + return nullptr; } std::vector ValueList = ParseValueList(CurRec, Class); - if (ValueList.empty()) return 0; + if (ValueList.empty()) return nullptr; if (Lex.getCode() != tgtok::greater) { TokError("expected '>' at end of value list"); - return 0; + return nullptr; } Lex.Lex(); // eat the '>' SMLoc EndLoc = Lex.getLoc(); @@ -1208,7 +1227,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, SCRef.TemplateArgs = ValueList; // Add info about the subclass to NewRec. if (AddSubClass(NewRec, SCRef)) - return 0; + return nullptr; if (!CurMultiClass) { NewRec->resolveReferences(); Records.addDef(NewRec); @@ -1250,11 +1269,11 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, if (Lex.getCode() != tgtok::r_brace) { Vals = ParseValueList(CurRec); - if (Vals.empty()) return 0; + if (Vals.empty()) return nullptr; } if (Lex.getCode() != tgtok::r_brace) { TokError("expected '}' at end of bit list value"); - return 0; + return nullptr; } Lex.Lex(); // eat the '}' @@ -1262,10 +1281,10 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, for (unsigned i = 0, e = Vals.size(); i != e; ++i) { Init *Bit = Vals[i]->convertInitializerTo(BitRecTy::get()); - if (Bit == 0) { + if (!Bit) { Error(BraceLoc, "Element #" + utostr(i) + " (" + Vals[i]->getAsString()+ ") is not convertable to a bit"); - return 0; + return nullptr; } NewBits[Vals.size()-i-1] = Bit; } @@ -1275,87 +1294,87 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, Lex.Lex(); // eat the '[' std::vector Vals; - RecTy *DeducedEltTy = 0; - ListRecTy *GivenListTy = 0; + RecTy *DeducedEltTy = nullptr; + ListRecTy *GivenListTy = nullptr; - if (ItemType != 0) { + if (ItemType) { ListRecTy *ListType = dyn_cast(ItemType); - if (ListType == 0) { + if (!ListType) { std::string s; raw_string_ostream ss(s); ss << "Type mismatch for list, expected list type, got " << ItemType->getAsString(); TokError(ss.str()); - return 0; + return nullptr; } GivenListTy = ListType; } if (Lex.getCode() != tgtok::r_square) { - Vals = ParseValueList(CurRec, 0, - GivenListTy ? GivenListTy->getElementType() : 0); - if (Vals.empty()) return 0; + Vals = ParseValueList(CurRec, nullptr, + GivenListTy ? GivenListTy->getElementType() : nullptr); + if (Vals.empty()) return nullptr; } if (Lex.getCode() != tgtok::r_square) { TokError("expected ']' at end of list value"); - return 0; + return nullptr; } Lex.Lex(); // eat the ']' - RecTy *GivenEltTy = 0; + RecTy *GivenEltTy = nullptr; if (Lex.getCode() == tgtok::less) { // Optional list element type Lex.Lex(); // eat the '<' GivenEltTy = ParseType(); - if (GivenEltTy == 0) { + if (!GivenEltTy) { // Couldn't parse element type - return 0; + return nullptr; } if (Lex.getCode() != tgtok::greater) { TokError("expected '>' at end of list element type"); - return 0; + return nullptr; } Lex.Lex(); // eat the '>' } // Check elements - RecTy *EltTy = 0; + RecTy *EltTy = nullptr; for (std::vector::iterator i = Vals.begin(), ie = Vals.end(); i != ie; ++i) { TypedInit *TArg = dyn_cast(*i); - if (TArg == 0) { + if (!TArg) { TokError("Untyped list element"); - return 0; + return nullptr; } - if (EltTy != 0) { + if (EltTy) { EltTy = resolveTypes(EltTy, TArg->getType()); - if (EltTy == 0) { + if (!EltTy) { TokError("Incompatible types in list elements"); - return 0; + return nullptr; } } else { EltTy = TArg->getType(); } } - if (GivenEltTy != 0) { - if (EltTy != 0) { + if (GivenEltTy) { + if (EltTy) { // Verify consistency if (!EltTy->typeIsConvertibleTo(GivenEltTy)) { TokError("Incompatible types in list elements"); - return 0; + return nullptr; } } EltTy = GivenEltTy; } - if (EltTy == 0) { - if (ItemType == 0) { + if (!EltTy) { + if (!ItemType) { TokError("No type for list"); - return 0; + return nullptr; } DeducedEltTy = GivenListTy->getElementType(); } else { @@ -1363,7 +1382,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, if (GivenListTy) { if (!EltTy->typeIsConvertibleTo(GivenListTy->getElementType())) { TokError("Element type mismatch for list"); - return 0; + return nullptr; } } DeducedEltTy = EltTy; @@ -1375,18 +1394,18 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, Lex.Lex(); // eat the '(' if (Lex.getCode() != tgtok::Id && Lex.getCode() != tgtok::XCast) { TokError("expected identifier in dag init"); - return 0; + return nullptr; } Init *Operator = ParseValue(CurRec); - if (Operator == 0) return 0; + if (!Operator) return nullptr; // If the operator name is present, parse it. std::string OperatorName; if (Lex.getCode() == tgtok::colon) { if (Lex.Lex() != tgtok::VarName) { // eat the ':' TokError("expected variable name in dag operator"); - return 0; + return nullptr; } OperatorName = Lex.getCurStrVal(); Lex.Lex(); // eat the VarName. @@ -1395,12 +1414,12 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, std::vector > DagArgs; if (Lex.getCode() != tgtok::r_paren) { DagArgs = ParseDagArgList(CurRec); - if (DagArgs.empty()) return 0; + if (DagArgs.empty()) return nullptr; } if (Lex.getCode() != tgtok::r_paren) { TokError("expected ')' in dag init"); - return 0; + return nullptr; } Lex.Lex(); // eat the ')' @@ -1417,6 +1436,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, case tgtok::XSRL: case tgtok::XSHL: case tgtok::XEq: + case tgtok::XListConcat: case tgtok::XStrConcat: // Value ::= !binop '(' Value ',' Value ')' case tgtok::XIf: case tgtok::XForEach: @@ -1437,7 +1457,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, /// Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) { Init *Result = ParseSimpleValue(CurRec, ItemType, Mode); - if (Result == 0) return 0; + if (!Result) return nullptr; // Parse the suffixes now if present. while (1) { @@ -1451,20 +1471,20 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) { SMLoc CurlyLoc = Lex.getLoc(); Lex.Lex(); // eat the '{' std::vector Ranges = ParseRangeList(); - if (Ranges.empty()) return 0; + if (Ranges.empty()) return nullptr; // Reverse the bitlist. std::reverse(Ranges.begin(), Ranges.end()); Result = Result->convertInitializerBitRange(Ranges); - if (Result == 0) { + if (!Result) { Error(CurlyLoc, "Invalid bit range for value"); - return 0; + return nullptr; } // Eat the '}'. if (Lex.getCode() != tgtok::r_brace) { TokError("expected '}' at end of bit range list"); - return 0; + return nullptr; } Lex.Lex(); break; @@ -1473,18 +1493,18 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) { SMLoc SquareLoc = Lex.getLoc(); Lex.Lex(); // eat the '[' std::vector Ranges = ParseRangeList(); - if (Ranges.empty()) return 0; + if (Ranges.empty()) return nullptr; Result = Result->convertInitListSlice(Ranges); - if (Result == 0) { + if (!Result) { Error(SquareLoc, "Invalid range for list slice"); - return 0; + return nullptr; } // Eat the ']'. if (Lex.getCode() != tgtok::r_square) { TokError("expected ']' at end of list slice"); - return 0; + return nullptr; } Lex.Lex(); break; @@ -1492,12 +1512,12 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) { case tgtok::period: if (Lex.Lex() != tgtok::Id) { // eat the . TokError("expected field identifier after '.'"); - return 0; + return nullptr; } if (!Result->getFieldType(Lex.getCurStrVal())) { TokError("Cannot access field '" + Lex.getCurStrVal() + "' of value '" + Result->getAsString() + "'"); - return 0; + return nullptr; } Result = FieldInit::get(Result, Lex.getCurStrVal()); Lex.Lex(); // eat field name @@ -1512,14 +1532,14 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) { TypedInit *LHS = dyn_cast(Result); if (!LHS) { Error(PasteLoc, "LHS of paste is not typed!"); - return 0; + return nullptr; } if (LHS->getType() != StringRecTy::get()) { LHS = UnOpInit::get(UnOpInit::CAST, LHS, StringRecTy::get()); } - TypedInit *RHS = 0; + TypedInit *RHS = nullptr; Lex.Lex(); // Eat the '#'. switch (Lex.getCode()) { @@ -1539,7 +1559,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) { RHS = dyn_cast(RHSResult); if (!RHS) { Error(PasteLoc, "RHS of paste is not typed!"); - return 0; + return nullptr; } if (RHS->getType() != StringRecTy::get()) { @@ -1575,7 +1595,7 @@ TGParser::ParseDagArgList(Record *CurRec) { } else { // DagArg ::= Value (':' VARNAME)? Init *Val = ParseValue(CurRec); - if (Val == 0) + if (!Val) return std::vector >(); // If the variable name is present, add it. @@ -1610,7 +1630,7 @@ std::vector TGParser::ParseValueList(Record *CurRec, Record *ArgsRec, std::vector Result; RecTy *ItemType = EltTy; unsigned int ArgN = 0; - if (ArgsRec != 0 && EltTy == 0) { + if (ArgsRec && !EltTy) { const std::vector &TArgs = ArgsRec->getTemplateArgs(); if (!TArgs.size()) { TokError("template argument provided to non-template class"); @@ -1626,12 +1646,12 @@ std::vector TGParser::ParseValueList(Record *CurRec, Record *ArgsRec, ++ArgN; } Result.push_back(ParseValue(CurRec, ItemType)); - if (Result.back() == 0) return std::vector(); + if (!Result.back()) return std::vector(); while (Lex.getCode() == tgtok::comma) { Lex.Lex(); // Eat the comma - if (ArgsRec != 0 && EltTy == 0) { + if (ArgsRec && !EltTy) { const std::vector &TArgs = ArgsRec->getTemplateArgs(); if (ArgN >= TArgs.size()) { TokError("too many template arguments"); @@ -1643,7 +1663,7 @@ std::vector TGParser::ParseValueList(Record *CurRec, Record *ArgsRec, ++ArgN; } Result.push_back(ParseValue(CurRec, ItemType)); - if (Result.back() == 0) return std::vector(); + if (!Result.back()) return std::vector(); } return Result; @@ -1667,11 +1687,11 @@ Init *TGParser::ParseDeclaration(Record *CurRec, if (HasField) Lex.Lex(); RecTy *Type = ParseType(); - if (Type == 0) return 0; + if (!Type) return nullptr; if (Lex.getCode() != tgtok::Id) { TokError("Expected identifier in declaration"); - return 0; + return nullptr; } SMLoc IdLoc = Lex.getLoc(); @@ -1691,16 +1711,16 @@ Init *TGParser::ParseDeclaration(Record *CurRec, // Add the value. if (AddValue(CurRec, IdLoc, RecordVal(DeclName, Type, HasField))) - return 0; + return nullptr; // If a value is present, parse it. if (Lex.getCode() == tgtok::equal) { Lex.Lex(); SMLoc ValLoc = Lex.getLoc(); Init *Val = ParseValue(CurRec, Type); - if (Val == 0 || + if (!Val || SetValue(CurRec, ValLoc, DeclName, std::vector(), Val)) - return 0; + return nullptr; } return DeclName; @@ -1717,7 +1737,7 @@ Init *TGParser::ParseDeclaration(Record *CurRec, VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) { if (Lex.getCode() != tgtok::Id) { TokError("Expected identifier in foreach declaration"); - return 0; + return nullptr; } Init *DeclName = StringInit::get(Lex.getCurStrVal()); @@ -1726,27 +1746,27 @@ VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) { // If a value is present, parse it. if (Lex.getCode() != tgtok::equal) { TokError("Expected '=' in foreach declaration"); - return 0; + return nullptr; } Lex.Lex(); // Eat the '=' - RecTy *IterType = 0; + RecTy *IterType = nullptr; std::vector Ranges; switch (Lex.getCode()) { - default: TokError("Unknown token when expecting a range list"); return 0; + default: TokError("Unknown token when expecting a range list"); return nullptr; case tgtok::l_square: { // '[' ValueList ']' - Init *List = ParseSimpleValue(0, 0, ParseForeachMode); + Init *List = ParseSimpleValue(nullptr, nullptr, ParseForeachMode); ForeachListValue = dyn_cast(List); - if (ForeachListValue == 0) { + if (!ForeachListValue) { TokError("Expected a Value list"); - return 0; + return nullptr; } RecTy *ValueType = ForeachListValue->getType(); ListRecTy *ListType = dyn_cast(ValueType); - if (ListType == 0) { + if (!ListType) { TokError("Value list is not of list type"); - return 0; + return nullptr; } IterType = ListType->getElementType(); break; @@ -1754,7 +1774,7 @@ VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) { case tgtok::IntVal: { // RangePiece. if (ParseRangePiece(Ranges)) - return 0; + return nullptr; break; } @@ -1763,7 +1783,7 @@ VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) { Ranges = ParseRangeList(); if (Lex.getCode() != tgtok::r_brace) { TokError("expected '}' at end of bit range list"); - return 0; + return nullptr; } Lex.Lex(); break; @@ -1780,7 +1800,7 @@ VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) { } if (!IterType) - return 0; + return nullptr; return VarInit::get(DeclName, IterType); } @@ -1800,7 +1820,7 @@ bool TGParser::ParseTemplateArgList(Record *CurRec) { // Read the first declaration. Init *TemplArg = ParseDeclaration(CurRec, true/*templateargs*/); - if (TemplArg == 0) + if (!TemplArg) return true; TheRecToAddTo->addTemplateArg(TemplArg); @@ -1810,7 +1830,7 @@ bool TGParser::ParseTemplateArgList(Record *CurRec) { // Read the following declarations. TemplArg = ParseDeclaration(CurRec, true/*templateargs*/); - if (TemplArg == 0) + if (!TemplArg) return true; TheRecToAddTo->addTemplateArg(TemplArg); } @@ -1828,7 +1848,7 @@ bool TGParser::ParseTemplateArgList(Record *CurRec) { /// BodyItem ::= LET ID OptionalBitList '=' Value ';' bool TGParser::ParseBodyItem(Record *CurRec) { if (Lex.getCode() != tgtok::Let) { - if (ParseDeclaration(CurRec, false) == 0) + if (!ParseDeclaration(CurRec, false)) return true; if (Lex.getCode() != tgtok::semi) @@ -1855,13 +1875,13 @@ bool TGParser::ParseBodyItem(Record *CurRec) { Lex.Lex(); // eat the '='. RecordVal *Field = CurRec->getValue(FieldName); - if (Field == 0) + if (!Field) return TokError("Value '" + FieldName + "' unknown!"); RecTy *Type = Field->getType(); Init *Val = ParseValue(CurRec, Type); - if (Val == 0) return true; + if (!Val) return true; if (Lex.getCode() != tgtok::semi) return TokError("expected ';' after let expression"); @@ -1927,7 +1947,7 @@ bool TGParser::ParseObjectBody(Record *CurRec) { SubClassReference SubClass = ParseSubClassReference(CurRec, false); while (1) { // Check for error. - if (SubClass.Rec == 0) return true; + if (!SubClass.Rec) return true; // Add it. if (AddSubClass(CurRec, SubClass)) @@ -1998,7 +2018,7 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) { } else if (ParseObjectBody(CurRec)) return true; - if (CurMultiClass == 0) // Def's in multiclasses aren't really defs. + if (!CurMultiClass) // Def's in multiclasses aren't really defs. // See Record::setName(). This resolve step will see any new name // for the def that might have been created when resolving // inheritance, values and arguments above. @@ -2040,9 +2060,9 @@ bool TGParser::ParseForeach(MultiClass *CurMultiClass) { // Make a temporary object to record items associated with the for // loop. - ListInit *ListValue = 0; + ListInit *ListValue = nullptr; VarInit *IterName = ParseForeachDeclaration(ListValue); - if (IterName == 0) + if (!IterName) return TokError("expected declaration in for"); if (Lex.getCode() != tgtok::In) @@ -2144,8 +2164,8 @@ std::vector TGParser::ParseLetList() { } Lex.Lex(); // eat the '='. - Init *Val = ParseValue(0); - if (Val == 0) return std::vector(); + Init *Val = ParseValue(nullptr); + if (!Val) return std::vector(); // Now that we have everything, add the record. Result.push_back(LetRecord(Name, Bits, Val, NameLoc)); @@ -2228,7 +2248,7 @@ bool TGParser::ParseMultiClass() { // If there are template args, parse them. if (Lex.getCode() == tgtok::less) - if (ParseTemplateArgList(0)) + if (ParseTemplateArgList(nullptr)) return true; bool inherits = false; @@ -2244,7 +2264,7 @@ bool TGParser::ParseMultiClass() { ParseSubMultiClassReference(CurMultiClass); while (1) { // Check for error. - if (SubMultiClass.MC == 0) return true; + if (!SubMultiClass.MC) return true; // Add it. if (AddSubMultiClass(CurMultiClass, SubMultiClass)) @@ -2283,7 +2303,7 @@ bool TGParser::ParseMultiClass() { Lex.Lex(); // eat the '}'. } - CurMultiClass = 0; + CurMultiClass = nullptr; return false; } @@ -2301,7 +2321,7 @@ InstantiateMulticlassDef(MultiClass &MC, // as a prefix. bool IsAnonymous = false; - if (DefmPrefix == 0) { + if (!DefmPrefix) { DefmPrefix = StringInit::get(GetNewAnonymousName()); IsAnonymous = true; } @@ -2310,7 +2330,7 @@ InstantiateMulticlassDef(MultiClass &MC, StringInit *DefNameString = dyn_cast(DefName); - if (DefNameString != 0) { + if (DefNameString) { // We have a fully expanded string so there are no operators to // resolve. We should concatenate the given prefix and name. DefName = @@ -2338,13 +2358,13 @@ InstantiateMulticlassDef(MultiClass &MC, Error(DefmPrefixRange.Start, "Could not resolve " + CurRec->getNameInitAsString() + ":NAME to '" + DefmPrefix->getAsUnquotedString() + "'"); - return 0; + return nullptr; } // If the DefNameString didn't resolve, we probably have a reference to // NAME and need to replace it. We need to do at least this much greedily, // otherwise nested multiclasses will end up with incorrect NAME expansions. - if (DefNameString == 0) { + if (!DefNameString) { RecordVal *DefNameRV = CurRec->getValue("NAME"); CurRec->resolveReferencesTo(DefNameRV); } @@ -2369,7 +2389,7 @@ InstantiateMulticlassDef(MultiClass &MC, Error(DefmPrefixRange.Start, "def '" + CurRec->getNameInitAsString() + "' already defined, instantiating defm with subdef '" + DefProto->getNameInitAsString() + "'"); - return 0; + return nullptr; } Records.addDef(CurRec); @@ -2453,7 +2473,7 @@ bool TGParser::ResolveMulticlassDef(MultiClass &MC, bool TGParser::ParseDefm(MultiClass *CurMultiClass) { assert(Lex.getCode() == tgtok::Defm && "Unexpected token!"); SMLoc DefmLoc = Lex.getLoc(); - Init *DefmPrefix = 0; + Init *DefmPrefix = nullptr; if (Lex.Lex() == tgtok::Id) { // eat the defm. DefmPrefix = ParseObjectName(CurMultiClass); @@ -2473,10 +2493,10 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) { Lex.Lex(); SMLoc SubClassLoc = Lex.getLoc(); - SubClassReference Ref = ParseSubClassReference(0, true); + SubClassReference Ref = ParseSubClassReference(nullptr, true); while (1) { - if (Ref.Rec == 0) return true; + if (!Ref.Rec) return true; // To instantiate a multiclass, we need to first get the multiclass, then // instantiate each def contained in the multiclass with the SubClassRef @@ -2522,21 +2542,21 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) { // A defm can inherit from regular classes (non-multiclass) as // long as they come in the end of the inheritance list. - InheritFromClass = (Records.getClass(Lex.getCurStrVal()) != 0); + InheritFromClass = (Records.getClass(Lex.getCurStrVal()) != nullptr); if (InheritFromClass) break; - Ref = ParseSubClassReference(0, true); + Ref = ParseSubClassReference(nullptr, true); } if (InheritFromClass) { // Process all the classes to inherit as if they were part of a // regular 'def' and inherit all record values. - SubClassReference SubClass = ParseSubClassReference(0, false); + SubClassReference SubClass = ParseSubClassReference(nullptr, false); while (1) { // Check for error. - if (SubClass.Rec == 0) return true; + if (!SubClass.Rec) return true; // Get the expanded definition prototypes and teach them about // the record values the current class to inherit has @@ -2553,7 +2573,7 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) { if (Lex.getCode() != tgtok::comma) break; Lex.Lex(); // eat ','. - SubClass = ParseSubClassReference(0, false); + SubClass = ParseSubClassReference(nullptr, false); } } diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h index ce31f8e..6fd442a 100644 --- a/lib/TableGen/TGParser.h +++ b/lib/TableGen/TGParser.h @@ -85,7 +85,7 @@ class TGParser { public: TGParser(SourceMgr &SrcMgr, RecordKeeper &records) - : Lex(SrcMgr), CurMultiClass(0), Records(records), AnonCounter(0) {} + : Lex(SrcMgr), CurMultiClass(nullptr), Records(records), AnonCounter(0) {} /// ParseFile - Main entrypoint for parsing a tblgen file. These parser /// routines return true on error, or false on success. @@ -131,7 +131,7 @@ private: // Semantic analysis methods. bool ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals); private: // Parser methods. - bool ParseObjectList(MultiClass *MC = 0); + bool ParseObjectList(MultiClass *MC = nullptr); bool ParseObject(MultiClass *MC); bool ParseClass(); bool ParseMultiClass(); @@ -169,12 +169,12 @@ private: // Parser methods. Init *ParseIDValue(Record *CurRec, const std::string &Name, SMLoc NameLoc, IDParseMode Mode = ParseValueMode); - Init *ParseSimpleValue(Record *CurRec, RecTy *ItemType = 0, + Init *ParseSimpleValue(Record *CurRec, RecTy *ItemType = nullptr, IDParseMode Mode = ParseValueMode); - Init *ParseValue(Record *CurRec, RecTy *ItemType = 0, + Init *ParseValue(Record *CurRec, RecTy *ItemType = nullptr, IDParseMode Mode = ParseValueMode); - std::vector ParseValueList(Record *CurRec, Record *ArgsRec = 0, - RecTy *EltTy = 0); + std::vector ParseValueList(Record *CurRec, Record *ArgsRec = nullptr, + RecTy *EltTy = nullptr); std::vector > ParseDagArgList(Record *); bool ParseOptionalRangeList(std::vector &Ranges); bool ParseOptionalBitList(std::vector &Ranges); diff --git a/lib/TableGen/module.modulemap b/lib/TableGen/module.modulemap new file mode 100644 index 0000000..8dac0a2 --- /dev/null +++ b/lib/TableGen/module.modulemap @@ -0,0 +1 @@ +module TableGen { requires cplusplus umbrella "." module * { export * } } diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h index 0297de1..1c022aa 100644 --- a/lib/Target/AArch64/AArch64.h +++ b/lib/Target/AArch64/AArch64.h @@ -1,4 +1,4 @@ -//==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=// +//==-- AArch64.h - Top-level interface for AArch64 --------------*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -12,35 +12,38 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_AARCH64_H -#define LLVM_TARGET_AARCH64_H +#ifndef TARGET_AArch64_H +#define TARGET_AArch64_H +#include "Utils/AArch64BaseInfo.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/DataTypes.h" namespace llvm { -class AArch64AsmPrinter; -class FunctionPass; class AArch64TargetMachine; -class MachineInstr; -class MCInst; - -FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM, - CodeGenOpt::Level OptLevel); +class FunctionPass; +class MachineFunctionPass; + +FunctionPass *createAArch64DeadRegisterDefinitions(); +FunctionPass *createAArch64ConditionalCompares(); +FunctionPass *createAArch64AdvSIMDScalar(); +FunctionPass *createAArch64BranchRelaxation(); +FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM, + CodeGenOpt::Level OptLevel); +FunctionPass *createAArch64StorePairSuppressPass(); +FunctionPass *createAArch64ExpandPseudoPass(); +FunctionPass *createAArch64LoadStoreOptimizationPass(); +ModulePass *createAArch64PromoteConstantPass(); +FunctionPass *createAArch64AddressTypePromotionPass(); +/// \brief Creates an ARM-specific Target Transformation Info pass. +ImmutablePass * +createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM); FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); -FunctionPass *createAArch64BranchFixupPass(); - -/// \brief Creates an AArch64-specific Target Transformation Info pass. -ImmutablePass *createAArch64TargetTransformInfoPass( - const AArch64TargetMachine *TM); - -void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, - AArch64AsmPrinter &AP); - - -} +FunctionPass *createAArch64CollectLOHPass(); +} // end namespace llvm #endif diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index e49afd6..1ad5ac8 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -1,4 +1,4 @@ -//===- AArch64.td - Describe the AArch64 Target Machine -------*- tblgen -*-==// +//=- AArch64.td - Describe the AArch64 Target Machine --------*- tablegen -*-=// // // The LLVM Compiler Infrastructure // @@ -7,12 +7,11 @@ // //===----------------------------------------------------------------------===// // -// This is the top level entry point for the AArch64 target. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Target-independent interfaces +// Target-independent interfaces which we are implementing //===----------------------------------------------------------------------===// include "llvm/Target/Target.td" @@ -22,7 +21,7 @@ include "llvm/Target/Target.td" // def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", - "Enable ARMv8 FP">; + "Enable ARMv8 FP">; def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable Advanced SIMD instructions", [FeatureFPARMv8]>; @@ -30,54 +29,106 @@ def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", "Enable cryptographic instructions">; +def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", + "Enable ARMv8 CRC-32 checksum instructions">; + +/// Cyclone has register move instructions which are "free". +def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", + "Has zero-cycle register moves">; + +/// Cyclone has instructions which zero registers for "free". +def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", + "Has zero-cycle zeroing instructions">; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "AArch64RegisterInfo.td" +include "AArch64CallingConvention.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions //===----------------------------------------------------------------------===// -// AArch64 Processors -// include "AArch64Schedule.td" +include "AArch64InstrInfo.td" -class ProcNoItin Features> - : Processor; +def AArch64InstrInfo : InstrInfo; -def : Processor<"generic", GenericItineraries, [FeatureFPARMv8, FeatureNEON]>; +//===----------------------------------------------------------------------===// +// AArch64 Processors supported. +// +include "AArch64SchedA53.td" +include "AArch64SchedCyclone.td" def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", "Cortex-A53 ARM processors", [FeatureFPARMv8, FeatureNEON, - FeatureCrypto]>; + FeatureCrypto, + FeatureCRC]>; def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", "Cortex-A57 ARM processors", [FeatureFPARMv8, FeatureNEON, - FeatureCrypto]>; + FeatureCrypto, + FeatureCRC]>; + +def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone", + "Cyclone", + [FeatureFPARMv8, + FeatureNEON, + FeatureCrypto, + FeatureCRC, + FeatureZCRegMove, FeatureZCZeroing]>; + +def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8, + FeatureNEON, + FeatureCRC]>; def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; -def : Processor<"cortex-a57", NoItineraries, [ProcA57]>; +def : ProcessorModel<"cortex-a57", NoSchedModel, [ProcA57]>; +def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>; //===----------------------------------------------------------------------===// -// Register File Description +// Assembly parser //===----------------------------------------------------------------------===// -include "AArch64RegisterInfo.td" +def GenericAsmParserVariant : AsmParserVariant { + int Variant = 0; + string Name = "generic"; +} -include "AArch64CallingConv.td" +def AppleAsmParserVariant : AsmParserVariant { + int Variant = 1; + string Name = "apple-neon"; +} //===----------------------------------------------------------------------===// -// Instruction Descriptions +// Assembly printer //===----------------------------------------------------------------------===// +// AArch64 Uses the MC printer for asm output, so make sure the TableGen +// AsmWriter bits get associated with the correct class. +def GenericAsmWriter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; + int Variant = 0; + bit isMCAsmWriter = 1; +} -include "AArch64InstrInfo.td" - -def AArch64InstrInfo : InstrInfo { - let noNamedPositionallyEncodedOperands = 1; +def AppleAsmWriter : AsmWriter { + let AsmWriterClassName = "AppleInstPrinter"; + int Variant = 1; + int isMCAsmWriter = 1; } //===----------------------------------------------------------------------===// -// Declare the target which we are implementing +// Target Declaration //===----------------------------------------------------------------------===// def AArch64 : Target { let InstructionSet = AArch64InstrInfo; + let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant]; + let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter]; } diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp new file mode 100644 index 0000000..04906f6 --- /dev/null +++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp @@ -0,0 +1,492 @@ +//===-- AArch64AddressTypePromotion.cpp --- Promote type for addr accesses -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass tries to promote the computations use to obtained a sign extended +// value used into memory accesses. +// E.g. +// a = add nsw i32 b, 3 +// d = sext i32 a to i64 +// e = getelementptr ..., i64 d +// +// => +// f = sext i32 b to i64 +// a = add nsw i64 f, 3 +// e = getelementptr ..., i64 a +// +// This is legal to do so if the computations are markers with either nsw or nuw +// markers. +// Moreover, the current heuristic is simple: it does not create new sext +// operations, i.e., it gives up when a sext would have forked (e.g., if +// a = add i32 b, c, two sexts are required to promote the computation). +// +// FIXME: This pass may be useful for other targets too. +// ===---------------------------------------------------------------------===// + +#include "AArch64.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-type-promotion" + +static cl::opt +EnableAddressTypePromotion("aarch64-type-promotion", cl::Hidden, + cl::desc("Enable the type promotion pass"), + cl::init(true)); +static cl::opt +EnableMerge("aarch64-type-promotion-merge", cl::Hidden, + cl::desc("Enable merging of redundant sexts when one is dominating" + " the other."), + cl::init(true)); + +//===----------------------------------------------------------------------===// +// AArch64AddressTypePromotion +//===----------------------------------------------------------------------===// + +namespace llvm { +void initializeAArch64AddressTypePromotionPass(PassRegistry &); +} + +namespace { +class AArch64AddressTypePromotion : public FunctionPass { + +public: + static char ID; + AArch64AddressTypePromotion() + : FunctionPass(ID), Func(nullptr), ConsideredSExtType(nullptr) { + initializeAArch64AddressTypePromotionPass(*PassRegistry::getPassRegistry()); + } + + const char *getPassName() const override { + return "AArch64 Address Type Promotion"; + } + + /// Iterate over the functions and promote the computation of interesting + // sext instructions. + bool runOnFunction(Function &F) override; + +private: + /// The current function. + Function *Func; + /// Filter out all sexts that does not have this type. + /// Currently initialized with Int64Ty. + Type *ConsideredSExtType; + + // This transformation requires dominator info. + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + FunctionPass::getAnalysisUsage(AU); + } + + typedef SmallPtrSet SetOfInstructions; + typedef SmallVector Instructions; + typedef DenseMap ValueToInsts; + + /// Check if it is profitable to move a sext through this instruction. + /// Currently, we consider it is profitable if: + /// - Inst is used only once (no need to insert truncate). + /// - Inst has only one operand that will require a sext operation (we do + /// do not create new sext operation). + bool shouldGetThrough(const Instruction *Inst); + + /// Check if it is possible and legal to move a sext through this + /// instruction. + /// Current heuristic considers that we can get through: + /// - Arithmetic operation marked with the nsw or nuw flag. + /// - Other sext operation. + /// - Truncate operation if it was just dropping sign extended bits. + bool canGetThrough(const Instruction *Inst); + + /// Move sext operations through safe to sext instructions. + bool propagateSignExtension(Instructions &SExtInsts); + + /// Is this sext should be considered for code motion. + /// We look for sext with ConsideredSExtType and uses in at least one + // GetElementPtrInst. + bool shouldConsiderSExt(const Instruction *SExt) const; + + /// Collect all interesting sext operations, i.e., the ones with the right + /// type and used in memory accesses. + /// More precisely, a sext instruction is considered as interesting if it + /// is used in a "complex" getelementptr or it exits at least another + /// sext instruction that sign extended the same initial value. + /// A getelementptr is considered as "complex" if it has more than 2 + // operands. + void analyzeSExtension(Instructions &SExtInsts); + + /// Merge redundant sign extension operations in common dominator. + void mergeSExts(ValueToInsts &ValToSExtendedUses, + SetOfInstructions &ToRemove); +}; +} // end anonymous namespace. + +char AArch64AddressTypePromotion::ID = 0; + +INITIALIZE_PASS_BEGIN(AArch64AddressTypePromotion, "aarch64-type-promotion", + "AArch64 Type Promotion Pass", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(AArch64AddressTypePromotion, "aarch64-type-promotion", + "AArch64 Type Promotion Pass", false, false) + +FunctionPass *llvm::createAArch64AddressTypePromotionPass() { + return new AArch64AddressTypePromotion(); +} + +bool AArch64AddressTypePromotion::canGetThrough(const Instruction *Inst) { + if (isa(Inst)) + return true; + + const BinaryOperator *BinOp = dyn_cast(Inst); + if (BinOp && isa(BinOp) && + (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap())) + return true; + + // sext(trunc(sext)) --> sext + if (isa(Inst) && isa(Inst->getOperand(0))) { + const Instruction *Opnd = cast(Inst->getOperand(0)); + // Check that the truncate just drop sign extended bits. + if (Inst->getType()->getIntegerBitWidth() >= + Opnd->getOperand(0)->getType()->getIntegerBitWidth() && + Inst->getOperand(0)->getType()->getIntegerBitWidth() <= + ConsideredSExtType->getIntegerBitWidth()) + return true; + } + + return false; +} + +bool AArch64AddressTypePromotion::shouldGetThrough(const Instruction *Inst) { + // If the type of the sext is the same as the considered one, this sext + // will become useless. + // Otherwise, we will have to do something to preserve the original value, + // unless it is used once. + if (isa(Inst) && + (Inst->getType() == ConsideredSExtType || Inst->hasOneUse())) + return true; + + // If the Inst is used more that once, we may need to insert truncate + // operations and we don't do that at the moment. + if (!Inst->hasOneUse()) + return false; + + // This truncate is used only once, thus if we can get thourgh, it will become + // useless. + if (isa(Inst)) + return true; + + // If both operands are not constant, a new sext will be created here. + // Current heuristic is: each step should be profitable. + // Therefore we don't allow to increase the number of sext even if it may + // be profitable later on. + if (isa(Inst) && isa(Inst->getOperand(1))) + return true; + + return false; +} + +static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) { + if (isa(Inst) && OpIdx == 0) + return false; + return true; +} + +bool +AArch64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const { + if (SExt->getType() != ConsideredSExtType) + return false; + + for (const Use &U : SExt->uses()) { + if (isa(*U)) + return true; + } + + return false; +} + +// Input: +// - SExtInsts contains all the sext instructions that are use direclty in +// GetElementPtrInst, i.e., access to memory. +// Algorithm: +// - For each sext operation in SExtInsts: +// Let var be the operand of sext. +// while it is profitable (see shouldGetThrough), legal, and safe +// (see canGetThrough) to move sext through var's definition: +// * promote the type of var's definition. +// * fold var into sext uses. +// * move sext above var's definition. +// * update sext operand to use the operand of var that should be sign +// extended (by construction there is only one). +// +// E.g., +// a = ... i32 c, 3 +// b = sext i32 a to i64 <- is it legal/safe/profitable to get through 'a' +// ... +// = b +// => Yes, update the code +// b = sext i32 c to i64 +// a = ... i64 b, 3 +// ... +// = a +// Iterate on 'c'. +bool +AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) { + DEBUG(dbgs() << "*** Propagate Sign Extension ***\n"); + + bool LocalChange = false; + SetOfInstructions ToRemove; + ValueToInsts ValToSExtendedUses; + while (!SExtInsts.empty()) { + // Get through simple chain. + Instruction *SExt = SExtInsts.pop_back_val(); + + DEBUG(dbgs() << "Consider:\n" << *SExt << '\n'); + + // If this SExt has already been merged continue. + if (SExt->use_empty() && ToRemove.count(SExt)) { + DEBUG(dbgs() << "No uses => marked as delete\n"); + continue; + } + + // Now try to get through the chain of definitions. + while (isa(SExt->getOperand(0))) { + Instruction *Inst = dyn_cast(SExt->getOperand(0)); + DEBUG(dbgs() << "Try to get through:\n" << *Inst << '\n'); + if (!canGetThrough(Inst) || !shouldGetThrough(Inst)) { + // We cannot get through something that is not an Instruction + // or not safe to SExt. + DEBUG(dbgs() << "Cannot get through\n"); + break; + } + + LocalChange = true; + // If this is a sign extend, it becomes useless. + if (isa(Inst) || isa(Inst)) { + DEBUG(dbgs() << "SExt or trunc, mark it as to remove\n"); + // We cannot use replaceAllUsesWith here because we may trigger some + // assertion on the type as all involved sext operation may have not + // been moved yet. + while (!Inst->use_empty()) { + Value::use_iterator UseIt = Inst->use_begin(); + Instruction *UseInst = dyn_cast(*UseIt); + assert(UseInst && "Use of sext is not an Instruction!"); + UseInst->setOperand(UseIt->getOperandNo(), SExt); + } + ToRemove.insert(Inst); + SExt->setOperand(0, Inst->getOperand(0)); + SExt->moveBefore(Inst); + continue; + } + + // Get through the Instruction: + // 1. Update its type. + // 2. Replace the uses of SExt by Inst. + // 3. Sign extend each operand that needs to be sign extended. + + // Step #1. + Inst->mutateType(SExt->getType()); + // Step #2. + SExt->replaceAllUsesWith(Inst); + // Step #3. + Instruction *SExtForOpnd = SExt; + + DEBUG(dbgs() << "Propagate SExt to operands\n"); + for (int OpIdx = 0, EndOpIdx = Inst->getNumOperands(); OpIdx != EndOpIdx; + ++OpIdx) { + DEBUG(dbgs() << "Operand:\n" << *(Inst->getOperand(OpIdx)) << '\n'); + if (Inst->getOperand(OpIdx)->getType() == SExt->getType() || + !shouldSExtOperand(Inst, OpIdx)) { + DEBUG(dbgs() << "No need to propagate\n"); + continue; + } + // Check if we can statically sign extend the operand. + Value *Opnd = Inst->getOperand(OpIdx); + if (const ConstantInt *Cst = dyn_cast(Opnd)) { + DEBUG(dbgs() << "Statically sign extend\n"); + Inst->setOperand(OpIdx, ConstantInt::getSigned(SExt->getType(), + Cst->getSExtValue())); + continue; + } + // UndefValue are typed, so we have to statically sign extend them. + if (isa(Opnd)) { + DEBUG(dbgs() << "Statically sign extend\n"); + Inst->setOperand(OpIdx, UndefValue::get(SExt->getType())); + continue; + } + + // Otherwise we have to explicity sign extend it. + assert(SExtForOpnd && + "Only one operand should have been sign extended"); + + SExtForOpnd->setOperand(0, Opnd); + + DEBUG(dbgs() << "Move before:\n" << *Inst << "\nSign extend\n"); + // Move the sign extension before the insertion point. + SExtForOpnd->moveBefore(Inst); + Inst->setOperand(OpIdx, SExtForOpnd); + // If more sext are required, new instructions will have to be created. + SExtForOpnd = nullptr; + } + if (SExtForOpnd == SExt) { + DEBUG(dbgs() << "Sign extension is useless now\n"); + ToRemove.insert(SExt); + break; + } + } + + // If the use is already of the right type, connect its uses to its argument + // and delete it. + // This can happen for an Instruction which all uses are sign extended. + if (!ToRemove.count(SExt) && + SExt->getType() == SExt->getOperand(0)->getType()) { + DEBUG(dbgs() << "Sign extension is useless, attach its use to " + "its argument\n"); + SExt->replaceAllUsesWith(SExt->getOperand(0)); + ToRemove.insert(SExt); + } else + ValToSExtendedUses[SExt->getOperand(0)].push_back(SExt); + } + + if (EnableMerge) + mergeSExts(ValToSExtendedUses, ToRemove); + + // Remove all instructions marked as ToRemove. + for (Instruction *I: ToRemove) + I->eraseFromParent(); + return LocalChange; +} + +void AArch64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses, + SetOfInstructions &ToRemove) { + DominatorTree &DT = getAnalysis().getDomTree(); + + for (auto &Entry : ValToSExtendedUses) { + Instructions &Insts = Entry.second; + Instructions CurPts; + for (Instruction *Inst : Insts) { + if (ToRemove.count(Inst)) + continue; + bool inserted = false; + for (auto Pt : CurPts) { + if (DT.dominates(Inst, Pt)) { + DEBUG(dbgs() << "Replace all uses of:\n" << *Pt << "\nwith:\n" + << *Inst << '\n'); + (Pt)->replaceAllUsesWith(Inst); + ToRemove.insert(Pt); + Pt = Inst; + inserted = true; + break; + } + if (!DT.dominates(Pt, Inst)) + // Give up if we need to merge in a common dominator as the + // expermients show it is not profitable. + continue; + + DEBUG(dbgs() << "Replace all uses of:\n" << *Inst << "\nwith:\n" + << *Pt << '\n'); + Inst->replaceAllUsesWith(Pt); + ToRemove.insert(Inst); + inserted = true; + break; + } + if (!inserted) + CurPts.push_back(Inst); + } + } +} + +void AArch64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) { + DEBUG(dbgs() << "*** Analyze Sign Extensions ***\n"); + + DenseMap SeenChains; + + for (auto &BB : *Func) { + for (auto &II : BB) { + Instruction *SExt = &II; + + // Collect all sext operation per type. + if (!isa(SExt) || !shouldConsiderSExt(SExt)) + continue; + + DEBUG(dbgs() << "Found:\n" << (*SExt) << '\n'); + + // Cases where we actually perform the optimization: + // 1. SExt is used in a getelementptr with more than 2 operand => + // likely we can merge some computation if they are done on 64 bits. + // 2. The beginning of the SExt chain is SExt several time. => + // code sharing is possible. + + bool insert = false; + // #1. + for (const Use &U : SExt->uses()) { + const Instruction *Inst = dyn_cast(U); + if (Inst && Inst->getNumOperands() > 2) { + DEBUG(dbgs() << "Interesting use in GetElementPtrInst\n" << *Inst + << '\n'); + insert = true; + break; + } + } + + // #2. + // Check the head of the chain. + Instruction *Inst = SExt; + Value *Last; + do { + int OpdIdx = 0; + const BinaryOperator *BinOp = dyn_cast(Inst); + if (BinOp && isa(BinOp->getOperand(0))) + OpdIdx = 1; + Last = Inst->getOperand(OpdIdx); + Inst = dyn_cast(Last); + } while (Inst && canGetThrough(Inst) && shouldGetThrough(Inst)); + + DEBUG(dbgs() << "Head of the chain:\n" << *Last << '\n'); + DenseMap::iterator AlreadySeen = + SeenChains.find(Last); + if (insert || AlreadySeen != SeenChains.end()) { + DEBUG(dbgs() << "Insert\n"); + SExtInsts.push_back(SExt); + if (AlreadySeen != SeenChains.end() && AlreadySeen->second != nullptr) { + DEBUG(dbgs() << "Insert chain member\n"); + SExtInsts.push_back(AlreadySeen->second); + SeenChains[Last] = nullptr; + } + } else { + DEBUG(dbgs() << "Record its chain membership\n"); + SeenChains[Last] = SExt; + } + } + } +} + +bool AArch64AddressTypePromotion::runOnFunction(Function &F) { + if (!EnableAddressTypePromotion || F.isDeclaration()) + return false; + Func = &F; + ConsideredSExtType = Type::getInt64Ty(Func->getContext()); + + DEBUG(dbgs() << "*** " << getPassName() << ": " << Func->getName() << '\n'); + + Instructions SExtInsts; + analyzeSExtension(SExtInsts); + return propagateSignExtension(SExtInsts); +} diff --git a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp new file mode 100644 index 0000000..734fb21 --- /dev/null +++ b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp @@ -0,0 +1,387 @@ +//===-- AArch64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// When profitable, replace GPR targeting i64 instructions with their +// AdvSIMD scalar equivalents. Generally speaking, "profitable" is defined +// as minimizing the number of cross-class register copies. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// TODO: Graph based predicate heuristics. +// Walking the instruction list linearly will get many, perhaps most, of +// the cases, but to do a truly thorough job of this, we need a more +// wholistic approach. +// +// This optimization is very similar in spirit to the register allocator's +// spill placement, only here we're determining where to place cross-class +// register copies rather than spills. As such, a similar approach is +// called for. +// +// We want to build up a set of graphs of all instructions which are candidates +// for transformation along with instructions which generate their inputs and +// consume their outputs. For each edge in the graph, we assign a weight +// based on whether there is a copy required there (weight zero if not) and +// the block frequency of the block containing the defining or using +// instruction, whichever is less. Our optimization is then a graph problem +// to minimize the total weight of all the graphs, then transform instructions +// and add or remove copy instructions as called for to implement the +// solution. +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64RegisterInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "aarch64-simd-scalar" + +// Allow forcing all i64 operations with equivalent SIMD instructions to use +// them. For stress-testing the transformation function. +static cl::opt +TransformAll("aarch64-simd-scalar-force-all", + cl::desc("Force use of AdvSIMD scalar instructions everywhere"), + cl::init(false), cl::Hidden); + +STATISTIC(NumScalarInsnsUsed, "Number of scalar instructions used"); +STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted"); +STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted"); + +namespace { +class AArch64AdvSIMDScalar : public MachineFunctionPass { + MachineRegisterInfo *MRI; + const AArch64InstrInfo *TII; + +private: + // isProfitableToTransform - Predicate function to determine whether an + // instruction should be transformed to its equivalent AdvSIMD scalar + // instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example. + bool isProfitableToTransform(const MachineInstr *MI) const; + + // transformInstruction - Perform the transformation of an instruction + // to its equivalant AdvSIMD scalar instruction. Update inputs and outputs + // to be the correct register class, minimizing cross-class copies. + void transformInstruction(MachineInstr *MI); + + // processMachineBasicBlock - Main optimzation loop. + bool processMachineBasicBlock(MachineBasicBlock *MBB); + +public: + static char ID; // Pass identification, replacement for typeid. + explicit AArch64AdvSIMDScalar() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &F) override; + + const char *getPassName() const override { + return "AdvSIMD Scalar Operation Optimization"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +char AArch64AdvSIMDScalar::ID = 0; +} // end anonymous namespace + +static bool isGPR64(unsigned Reg, unsigned SubReg, + const MachineRegisterInfo *MRI) { + if (SubReg) + return false; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::GPR64RegClass); + return AArch64::GPR64RegClass.contains(Reg); +} + +static bool isFPR64(unsigned Reg, unsigned SubReg, + const MachineRegisterInfo *MRI) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR64RegClass) && + SubReg == 0) || + (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR128RegClass) && + SubReg == AArch64::dsub); + // Physical register references just check the register class directly. + return (AArch64::FPR64RegClass.contains(Reg) && SubReg == 0) || + (AArch64::FPR128RegClass.contains(Reg) && SubReg == AArch64::dsub); +} + +// getSrcFromCopy - Get the original source register for a GPR64 <--> FPR64 +// copy instruction. Return zero_reg if the instruction is not a copy. +static unsigned getSrcFromCopy(const MachineInstr *MI, + const MachineRegisterInfo *MRI, + unsigned &SubReg) { + SubReg = 0; + // The "FMOV Xd, Dn" instruction is the typical form. + if (MI->getOpcode() == AArch64::FMOVDXr || + MI->getOpcode() == AArch64::FMOVXDr) + return MI->getOperand(1).getReg(); + // A lane zero extract "UMOV.d Xd, Vn[0]" is equivalent. We shouldn't see + // these at this stage, but it's easy to check for. + if (MI->getOpcode() == AArch64::UMOVvi64 && MI->getOperand(2).getImm() == 0) { + SubReg = AArch64::dsub; + return MI->getOperand(1).getReg(); + } + // Or just a plain COPY instruction. This can be directly to/from FPR64, + // or it can be a dsub subreg reference to an FPR128. + if (MI->getOpcode() == AArch64::COPY) { + if (isFPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(), + MRI) && + isGPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(), MRI)) + return MI->getOperand(1).getReg(); + if (isGPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(), + MRI) && + isFPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(), + MRI)) { + SubReg = MI->getOperand(1).getSubReg(); + return MI->getOperand(1).getReg(); + } + } + + // Otherwise, this is some other kind of instruction. + return 0; +} + +// getTransformOpcode - For any opcode for which there is an AdvSIMD equivalent +// that we're considering transforming to, return that AdvSIMD opcode. For all +// others, return the original opcode. +static int getTransformOpcode(unsigned Opc) { + switch (Opc) { + default: + break; + // FIXME: Lots more possibilities. + case AArch64::ADDXrr: + return AArch64::ADDv1i64; + case AArch64::SUBXrr: + return AArch64::SUBv1i64; + } + // No AdvSIMD equivalent, so just return the original opcode. + return Opc; +} + +static bool isTransformable(const MachineInstr *MI) { + int Opc = MI->getOpcode(); + return Opc != getTransformOpcode(Opc); +} + +// isProfitableToTransform - Predicate function to determine whether an +// instruction should be transformed to its equivalent AdvSIMD scalar +// instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example. +bool +AArch64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const { + // If this instruction isn't eligible to be transformed (no SIMD equivalent), + // early exit since that's the common case. + if (!isTransformable(MI)) + return false; + + // Count the number of copies we'll need to add and approximate the number + // of copies that a transform will enable us to remove. + unsigned NumNewCopies = 3; + unsigned NumRemovableCopies = 0; + + unsigned OrigSrc0 = MI->getOperand(1).getReg(); + unsigned OrigSrc1 = MI->getOperand(2).getReg(); + unsigned Src0 = 0, SubReg0; + unsigned Src1 = 0, SubReg1; + if (!MRI->def_empty(OrigSrc0)) { + MachineRegisterInfo::def_instr_iterator Def = + MRI->def_instr_begin(OrigSrc0); + assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); + Src0 = getSrcFromCopy(&*Def, MRI, SubReg0); + // If the source was from a copy, we don't need to insert a new copy. + if (Src0) + --NumNewCopies; + // If there are no other users of the original source, we can delete + // that instruction. + if (Src0 && MRI->hasOneNonDBGUse(OrigSrc0)) + ++NumRemovableCopies; + } + if (!MRI->def_empty(OrigSrc1)) { + MachineRegisterInfo::def_instr_iterator Def = + MRI->def_instr_begin(OrigSrc1); + assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); + Src1 = getSrcFromCopy(&*Def, MRI, SubReg1); + if (Src1) + --NumNewCopies; + // If there are no other users of the original source, we can delete + // that instruction. + if (Src1 && MRI->hasOneNonDBGUse(OrigSrc1)) + ++NumRemovableCopies; + } + + // If any of the uses of the original instructions is a cross class copy, + // that's a copy that will be removable if we transform. Likewise, if + // any of the uses is a transformable instruction, it's likely the tranforms + // will chain, enabling us to save a copy there, too. This is an aggressive + // heuristic that approximates the graph based cost analysis described above. + unsigned Dst = MI->getOperand(0).getReg(); + bool AllUsesAreCopies = true; + for (MachineRegisterInfo::use_instr_nodbg_iterator + Use = MRI->use_instr_nodbg_begin(Dst), + E = MRI->use_instr_nodbg_end(); + Use != E; ++Use) { + unsigned SubReg; + if (getSrcFromCopy(&*Use, MRI, SubReg) || isTransformable(&*Use)) + ++NumRemovableCopies; + // If the use is an INSERT_SUBREG, that's still something that can + // directly use the FPR64, so we don't invalidate AllUsesAreCopies. It's + // preferable to have it use the FPR64 in most cases, as if the source + // vector is an IMPLICIT_DEF, the INSERT_SUBREG just goes away entirely. + // Ditto for a lane insert. + else if (Use->getOpcode() == AArch64::INSERT_SUBREG || + Use->getOpcode() == AArch64::INSvi64gpr) + ; + else + AllUsesAreCopies = false; + } + // If all of the uses of the original destination register are copies to + // FPR64, then we won't end up having a new copy back to GPR64 either. + if (AllUsesAreCopies) + --NumNewCopies; + + // If a transform will not increase the number of cross-class copies required, + // return true. + if (NumNewCopies <= NumRemovableCopies) + return true; + + // Finally, even if we otherwise wouldn't transform, check if we're forcing + // transformation of everything. + return TransformAll; +} + +static MachineInstr *insertCopy(const AArch64InstrInfo *TII, MachineInstr *MI, + unsigned Dst, unsigned Src, bool IsKill) { + MachineInstrBuilder MIB = + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AArch64::COPY), + Dst) + .addReg(Src, getKillRegState(IsKill)); + DEBUG(dbgs() << " adding copy: " << *MIB); + ++NumCopiesInserted; + return MIB; +} + +// transformInstruction - Perform the transformation of an instruction +// to its equivalant AdvSIMD scalar instruction. Update inputs and outputs +// to be the correct register class, minimizing cross-class copies. +void AArch64AdvSIMDScalar::transformInstruction(MachineInstr *MI) { + DEBUG(dbgs() << "Scalar transform: " << *MI); + + MachineBasicBlock *MBB = MI->getParent(); + int OldOpc = MI->getOpcode(); + int NewOpc = getTransformOpcode(OldOpc); + assert(OldOpc != NewOpc && "transform an instruction to itself?!"); + + // Check if we need a copy for the source registers. + unsigned OrigSrc0 = MI->getOperand(1).getReg(); + unsigned OrigSrc1 = MI->getOperand(2).getReg(); + unsigned Src0 = 0, SubReg0; + unsigned Src1 = 0, SubReg1; + if (!MRI->def_empty(OrigSrc0)) { + MachineRegisterInfo::def_instr_iterator Def = + MRI->def_instr_begin(OrigSrc0); + assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); + Src0 = getSrcFromCopy(&*Def, MRI, SubReg0); + // If there are no other users of the original source, we can delete + // that instruction. + if (Src0 && MRI->hasOneNonDBGUse(OrigSrc0)) { + assert(Src0 && "Can't delete copy w/o a valid original source!"); + Def->eraseFromParent(); + ++NumCopiesDeleted; + } + } + if (!MRI->def_empty(OrigSrc1)) { + MachineRegisterInfo::def_instr_iterator Def = + MRI->def_instr_begin(OrigSrc1); + assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); + Src1 = getSrcFromCopy(&*Def, MRI, SubReg1); + // If there are no other users of the original source, we can delete + // that instruction. + if (Src1 && MRI->hasOneNonDBGUse(OrigSrc1)) { + assert(Src1 && "Can't delete copy w/o a valid original source!"); + Def->eraseFromParent(); + ++NumCopiesDeleted; + } + } + // If we weren't able to reference the original source directly, create a + // copy. + if (!Src0) { + SubReg0 = 0; + Src0 = MRI->createVirtualRegister(&AArch64::FPR64RegClass); + insertCopy(TII, MI, Src0, OrigSrc0, true); + } + if (!Src1) { + SubReg1 = 0; + Src1 = MRI->createVirtualRegister(&AArch64::FPR64RegClass); + insertCopy(TII, MI, Src1, OrigSrc1, true); + } + + // Create a vreg for the destination. + // FIXME: No need to do this if the ultimate user expects an FPR64. + // Check for that and avoid the copy if possible. + unsigned Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass); + + // For now, all of the new instructions have the same simple three-register + // form, so no need to special case based on what instruction we're + // building. + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(NewOpc), Dst) + .addReg(Src0, getKillRegState(true), SubReg0) + .addReg(Src1, getKillRegState(true), SubReg1); + + // Now copy the result back out to a GPR. + // FIXME: Try to avoid this if all uses could actually just use the FPR64 + // directly. + insertCopy(TII, MI, MI->getOperand(0).getReg(), Dst, true); + + // Erase the old instruction. + MI->eraseFromParent(); + + ++NumScalarInsnsUsed; +} + +// processMachineBasicBlock - Main optimzation loop. +bool AArch64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) { + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { + MachineInstr *MI = I; + ++I; + if (isProfitableToTransform(MI)) { + transformInstruction(MI); + Changed = true; + } + } + return Changed; +} + +// runOnMachineFunction - Pass entry point from PassManager. +bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) { + bool Changed = false; + DEBUG(dbgs() << "***** AArch64AdvSIMDScalar *****\n"); + + const TargetMachine &TM = mf.getTarget(); + MRI = &mf.getRegInfo(); + TII = static_cast(TM.getInstrInfo()); + + // Just check things on a one-block-at-a-time basis. + for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) + if (processMachineBasicBlock(I)) + Changed = true; + return Changed; +} + +// createAArch64AdvSIMDScalar - Factory function used by AArch64TargetMachine +// to add the pass to the PassManager. +FunctionPass *llvm::createAArch64AdvSIMDScalar() { + return new AArch64AdvSIMDScalar(); +} diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index f0b52d3..c3ee9bb 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -1,4 +1,4 @@ -//===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===// +//===-- AArch64AsmPrinter.cpp - AArch64 LLVM assembly writer --------------===// // // The LLVM Compiler Infrastructure // @@ -8,236 +8,337 @@ //===----------------------------------------------------------------------===// // // This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to GAS-format AArch64 assembly language. +// of machine-dependent LLVM code to the AArch64 assembly language. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" -#include "AArch64AsmPrinter.h" +#include "AArch64.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64MCInstLower.h" +#include "AArch64RegisterInfo.h" +#include "AArch64Subtarget.h" #include "InstPrinter/AArch64InstPrinter.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/Mangler.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCLinkerOptimizationHint.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/TargetRegistry.h" - using namespace llvm; -/// Try to print a floating-point register as if it belonged to a specified -/// register-class. For example the inline asm operand modifier "b" requires its -/// argument to be printed as "bN". -static bool printModifiedFPRAsmOperand(const MachineOperand &MO, - const TargetRegisterInfo *TRI, - char RegType, raw_ostream &O) { - if (!MO.isReg()) - return true; - - for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { - if (AArch64::FPR8RegClass.contains(*AR)) { - O << RegType << TRI->getEncodingValue(MO.getReg()); - return false; +#define DEBUG_TYPE "asm-printer" + +namespace { + +class AArch64AsmPrinter : public AsmPrinter { + /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can + /// make the right decision when printing asm code for different targets. + const AArch64Subtarget *Subtarget; + + AArch64MCInstLower MCInstLowering; + StackMaps SM; + +public: + AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) + : AsmPrinter(TM, Streamer), + Subtarget(&TM.getSubtarget()), + MCInstLowering(OutContext, *Mang, *this), SM(*this), AArch64FI(nullptr), + LOHLabelCounter(0) {} + + const char *getPassName() const override { + return "AArch64 Assembly Printer"; + } + + /// \brief Wrapper for MCInstLowering.lowerOperand() for the + /// tblgen'erated pseudo lowering. + bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const { + return MCInstLowering.lowerOperand(MO, MCOp); + } + + void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI); + void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI); + /// \brief tblgen'erated driver function for lowering simple MI->MC + /// pseudo instructions. + bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, + const MachineInstr *MI); + + void EmitInstruction(const MachineInstr *MI) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AsmPrinter::getAnalysisUsage(AU); + AU.setPreservesAll(); + } + + bool runOnMachineFunction(MachineFunction &F) override { + AArch64FI = F.getInfo(); + return AsmPrinter::runOnMachineFunction(F); + } + +private: + MachineLocation getDebugValueLocation(const MachineInstr *MI) const; + void printOperand(const MachineInstr *MI, unsigned OpNum, raw_ostream &O); + bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O); + bool printAsmRegInClass(const MachineOperand &MO, + const TargetRegisterClass *RC, bool isVector, + raw_ostream &O); + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) override; + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) override; + + void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); + + void EmitFunctionBodyEnd() override; + + MCSymbol *GetCPISymbol(unsigned CPID) const override; + void EmitEndOfAsmFile(Module &M) override; + AArch64FunctionInfo *AArch64FI; + + /// \brief Emit the LOHs contained in AArch64FI. + void EmitLOHs(); + + typedef std::map MInstToMCSymbol; + MInstToMCSymbol LOHInstToLabel; + unsigned LOHLabelCounter; +}; + +} // end of anonymous namespace + +//===----------------------------------------------------------------------===// + +void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { + if (Subtarget->isTargetMachO()) { + // Funny Darwin hack: This flag tells the linker that no global symbols + // contain code that falls through to other global symbols (e.g. the obvious + // implementation of multiple entry points). If this doesn't occur, the + // linker can safely perform dead code stripping. Since LLVM never + // generates code that does this, it is always safe to set. + OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); + SM.serializeToStackMapSection(); + } + + // Emit a .data.rel section containing any stubs that were created. + if (Subtarget->isTargetELF()) { + const TargetLoweringObjectFileELF &TLOFELF = + static_cast(getObjFileLowering()); + + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); + const DataLayout *TD = TM.getDataLayout(); + + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + OutStreamer.EmitLabel(Stubs[i].first); + OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), + TD->getPointerSize(0)); + } + Stubs.clear(); } } - // The register doesn't correspond to anything floating-point like. - return true; } -/// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR -/// with the obvious type and an immediate 0 as either wzr or xzr. -static bool printModifiedGPRAsmOperand(const MachineOperand &MO, - const TargetRegisterInfo *TRI, - const TargetRegisterClass &RegClass, - raw_ostream &O) { - char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x'; +MachineLocation +AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { + MachineLocation Location; + assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); + // Frame address. Currently handles register +- offset only. + if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) + Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); + else { + DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); + } + return Location; +} - if (MO.isImm() && MO.getImm() == 0) { - O << Prefix << "zr"; - return false; - } else if (MO.isReg()) { - if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) { - O << (Prefix == 'x' ? "sp" : "wsp"); - return false; - } +void AArch64AsmPrinter::EmitLOHs() { + SmallVector MCArgs; - for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { - if (RegClass.contains(*AR)) { - O << AArch64InstPrinter::getRegisterName(*AR); - return false; - } + for (const auto &D : AArch64FI->getLOHContainer()) { + for (const MachineInstr *MI : D.getArgs()) { + MInstToMCSymbol::iterator LabelIt = LOHInstToLabel.find(MI); + assert(LabelIt != LOHInstToLabel.end() && + "Label hasn't been inserted for LOH related instruction"); + MCArgs.push_back(LabelIt->second); } + OutStreamer.EmitLOHDirective(D.getKind(), MCArgs); + MCArgs.clear(); } +} - return true; +void AArch64AsmPrinter::EmitFunctionBodyEnd() { + if (!AArch64FI->getLOHRelated().empty()) + EmitLOHs(); } -bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO, - bool PrintImmediatePrefix, - StringRef Suffix, raw_ostream &O) { - StringRef Name; - StringRef Modifier; +/// GetCPISymbol - Return the symbol for the specified constant pool entry. +MCSymbol *AArch64AsmPrinter::GetCPISymbol(unsigned CPID) const { + // Darwin uses a linker-private symbol name for constant-pools (to + // avoid addends on the relocation?), ELF has no such concept and + // uses a normal private symbol. + if (getDataLayout().getLinkerPrivateGlobalPrefix()[0]) + return OutContext.GetOrCreateSymbol( + Twine(getDataLayout().getLinkerPrivateGlobalPrefix()) + "CPI" + + Twine(getFunctionNumber()) + "_" + Twine(CPID)); + + return OutContext.GetOrCreateSymbol( + Twine(getDataLayout().getPrivateGlobalPrefix()) + "CPI" + + Twine(getFunctionNumber()) + "_" + Twine(CPID)); +} + +void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, + raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNum); switch (MO.getType()) { default: - return true; - case MachineOperand::MO_GlobalAddress: - Name = getSymbol(MO.getGlobal())->getName(); - - // Global variables may be accessed either via a GOT or in various fun and - // interesting TLS-model specific ways. Set the prefix modifier as - // appropriate here. - if (const GlobalVariable *GV = dyn_cast(MO.getGlobal())) { - Reloc::Model RelocM = TM.getRelocationModel(); - if (GV->isThreadLocal()) { - switch (TM.getTLSModel(GV)) { - case TLSModel::GeneralDynamic: - Modifier = "tlsdesc"; - break; - case TLSModel::LocalDynamic: - Modifier = "dtprel"; - break; - case TLSModel::InitialExec: - Modifier = "gottprel"; - break; - case TLSModel::LocalExec: - Modifier = "tprel"; - break; - } - } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { - Modifier = "got"; - } - } + assert(0 && ""); + case MachineOperand::MO_Register: { + unsigned Reg = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + assert(!MO.getSubReg() && "Subregs should be eliminated!"); + O << AArch64InstPrinter::getRegisterName(Reg); + break; + } + case MachineOperand::MO_Immediate: { + int64_t Imm = MO.getImm(); + O << '#' << Imm; break; - case MachineOperand::MO_BlockAddress: - Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName(); + } + } +} + +bool AArch64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode, + raw_ostream &O) { + unsigned Reg = MO.getReg(); + switch (Mode) { + default: + return true; // Unknown mode. + case 'w': + Reg = getWRegFromXReg(Reg); break; - case MachineOperand::MO_ConstantPoolIndex: - Name = GetCPISymbol(MO.getIndex())->getName(); + case 'x': + Reg = getXRegFromWReg(Reg); break; } - // Some instructions (notably ADRP) don't take the # prefix for - // immediates. Only print it if asked to. - if (PrintImmediatePrefix) - O << '#'; - - // Only need the joining "_" if both the prefix and the suffix are - // non-null. This little block simply takes care of the four possibly - // combinations involved there. - if (Modifier == "" && Suffix == "") - O << Name; - else if (Modifier == "" && Suffix != "") - O << ":" << Suffix << ':' << Name; - else if (Modifier != "" && Suffix == "") - O << ":" << Modifier << ':' << Name; - else - O << ":" << Modifier << '_' << Suffix << ':' << Name; + O << AArch64InstPrinter::getRegisterName(Reg); + return false; +} +// Prints the register in MO using class RC using the offset in the +// new register class. This should not be used for cross class +// printing. +bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO, + const TargetRegisterClass *RC, + bool isVector, raw_ostream &O) { + assert(MO.isReg() && "Should only get here with a register!"); + const AArch64RegisterInfo *RI = + static_cast(TM.getRegisterInfo()); + unsigned Reg = MO.getReg(); + unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg)); + assert(RI->regsOverlap(RegToPrint, Reg)); + O << AArch64InstPrinter::getRegisterName( + RegToPrint, isVector ? AArch64::vreg : AArch64::NoRegAltName); return false; } bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + const MachineOperand &MO = MI->getOperand(OpNum); - if (!ExtraCode) - ExtraCode = ""; + // First try the generic code, which knows about modifiers like 'c' and 'n'. + if (!AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O)) + return false; - switch(ExtraCode[0]) { - default: - if (!AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O)) + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) + return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: + return true; // Unknown modifier. + case 'w': // Print W register + case 'x': // Print X register + if (MO.isReg()) + return printAsmMRegister(MO, ExtraCode[0], O); + if (MO.isImm() && MO.getImm() == 0) { + unsigned Reg = ExtraCode[0] == 'w' ? AArch64::WZR : AArch64::XZR; + O << AArch64InstPrinter::getRegisterName(Reg); return false; - break; - case 'w': - // Output 32-bit general register operand, constant zero as wzr, or stack - // pointer as wsp. Ignored when used with other operand types. - if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::GPR32RegClass, O)) - return false; - break; - case 'x': - // Output 64-bit general register operand, constant zero as xzr, or stack - // pointer as sp. Ignored when used with other operand types. - if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::GPR64RegClass, O)) - return false; - break; - case 'H': - // Output higher numbered of a 64-bit general register pair - case 'Q': - // Output least significant register of a 64-bit general register pair - case 'R': - // Output most significant register of a 64-bit general register pair - - // FIXME note: these three operand modifiers will require, to some extent, - // adding a paired GPR64 register class. Initial investigation suggests that - // assertions are hit unless it has a type and is made legal for that type - // in ISelLowering. After that step is made, the number of modifications - // needed explodes (operation legality, calling conventions, stores, reg - // copies ...). - llvm_unreachable("FIXME: Unimplemented register pairs"); - case 'b': - case 'h': - case 's': - case 'd': - case 'q': - if (!printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - ExtraCode[0], O)) - return false; - break; - case 'A': - // Output symbolic address with appropriate relocation modifier (also - // suitable for ADRP). - if (!printSymbolicAddress(MI->getOperand(OpNum), false, "", O)) - return false; - break; - case 'L': - // Output bits 11:0 of symbolic address with appropriate :lo12: relocation - // modifier. - if (!printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O)) + } + printOperand(MI, OpNum, O); return false; - break; - case 'G': - // Output bits 23:12 of symbolic address with appropriate :hi12: relocation - // modifier (currently only for TLS local exec). - if (!printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O)) + case 'b': // Print B register. + case 'h': // Print H register. + case 's': // Print S register. + case 'd': // Print D register. + case 'q': // Print Q register. + if (MO.isReg()) { + const TargetRegisterClass *RC; + switch (ExtraCode[0]) { + case 'b': + RC = &AArch64::FPR8RegClass; + break; + case 'h': + RC = &AArch64::FPR16RegClass; + break; + case 's': + RC = &AArch64::FPR32RegClass; + break; + case 'd': + RC = &AArch64::FPR64RegClass; + break; + case 'q': + RC = &AArch64::FPR128RegClass; + break; + default: + return true; + } + return printAsmRegInClass(MO, RC, false /* vector */, O); + } + printOperand(MI, OpNum, O); return false; - break; - case 'a': - return PrintAsmMemoryOperand(MI, OpNum, AsmVariant, ExtraCode, O); + } } - // There's actually no operand modifier, which leads to a slightly eclectic - // set of behaviour which we have to handle here. - const MachineOperand &MO = MI->getOperand(OpNum); - switch (MO.getType()) { - default: - llvm_unreachable("Unexpected operand for inline assembly"); - case MachineOperand::MO_Register: - // GCC prints the unmodified operand of a 'w' constraint as the vector - // register. Technically, we could allocate the argument as a VPR128, but - // that leads to extremely dodgy copies being generated to get the data - // there. - if (printModifiedFPRAsmOperand(MO, TRI, 'v', O)) - O << AArch64InstPrinter::getRegisterName(MO.getReg()); - break; - case MachineOperand::MO_Immediate: - O << '#' << MO.getImm(); - break; - case MachineOperand::MO_FPImmediate: - assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected"); - O << "#0.0"; - break; - case MachineOperand::MO_BlockAddress: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_GlobalAddress: - return printSymbolicAddress(MO, false, "", O); + // According to ARM, we should emit x and v registers unless we have a + // modifier. + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + + // If this is a w or x register, print an x register. + if (AArch64::GPR32allRegClass.contains(Reg) || + AArch64::GPR64allRegClass.contains(Reg)) + return printAsmMRegister(MO, 'x', O); + + // If this is a b, h, s, d, or q register, print it as a v register. + return printAsmRegInClass(MO, &AArch64::FPR128RegClass, true /* vector */, + O); } + printOperand(MI, OpNum, O); return false; } @@ -246,15 +347,90 @@ bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { - // Currently both the memory constraints (m and Q) behave the same and amount - // to the address as a single register. In future, we may allow "m" to provide - // both a base and an offset. + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + const MachineOperand &MO = MI->getOperand(OpNum); - assert(MO.isReg() && "unexpected inline assembly memory operand"); - O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']'; + assert(MO.isReg() && "unexpected inline asm memory operand"); + O << "[" << AArch64InstPrinter::getRegisterName(MO.getReg()) << "]"; return false; } +void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, + raw_ostream &OS) { + unsigned NOps = MI->getNumOperands(); + assert(NOps == 4); + OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; + // cast away const; DIetc do not take const operands for some reason. + DIVariable V(const_cast(MI->getOperand(NOps - 1).getMetadata())); + OS << V.getName(); + OS << " <- "; + // Frame address. Currently handles register +- offset only. + assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); + OS << '['; + printOperand(MI, 0, OS); + OS << '+'; + printOperand(MI, 1, OS); + OS << ']'; + OS << "+"; + printOperand(MI, NOps - 2, OS); +} + +void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI) { + unsigned NumNOPBytes = MI.getOperand(1).getImm(); + + SM.recordStackMap(MI); + // Emit padding. + assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); + for (unsigned i = 0; i < NumNOPBytes; i += 4) + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0)); +} + +// Lower a patchpoint of the form: +// [], , , , +void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI) { + SM.recordPatchPoint(MI); + + PatchPointOpers Opers(&MI); + + int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm(); + unsigned EncodedBytes = 0; + if (CallTarget) { + assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget && + "High 16 bits of call target should be zero."); + unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); + EncodedBytes = 16; + // Materialize the jump address: + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVZWi) + .addReg(ScratchReg) + .addImm((CallTarget >> 32) & 0xFFFF) + .addImm(32)); + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVKWi) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm((CallTarget >> 16) & 0xFFFF) + .addImm(16)); + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVKWi) + .addReg(ScratchReg) + .addReg(ScratchReg) + .addImm(CallTarget & 0xFFFF) + .addImm(0)); + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::BLR).addReg(ScratchReg)); + } + // Emit padding. + unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm(); + assert(NumBytes >= EncodedBytes && + "Patchpoint can't request size less than the length of a call."); + assert((NumBytes - EncodedBytes) % 4 == 0 && + "Invalid number of NOP bytes requested!"); + for (unsigned i = EncodedBytes; i < NumBytes; i += 4) + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0)); +} + +// Simple pseudo-instructions have their lowering (with expansion to real +// instructions) auto-generated. #include "AArch64GenMCPseudoLowering.inc" void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { @@ -262,41 +438,87 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { if (emitPseudoExpansionLowering(OutStreamer, MI)) return; - MCInst TmpInst; - LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this); - EmitToStreamer(OutStreamer, TmpInst); -} + if (AArch64FI->getLOHRelated().count(MI)) { + // Generate a label for LOH related instruction + MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++); + // Associate the instruction with the label + LOHInstToLabel[MI] = LOHLabel; + OutStreamer.EmitLabel(LOHLabel); + } -void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { - if (Subtarget->isTargetELF()) { - const TargetLoweringObjectFileELF &TLOFELF = - static_cast(getObjFileLowering()); + // Do any manual lowerings. + switch (MI->getOpcode()) { + default: + break; + case AArch64::DBG_VALUE: { + if (isVerbose() && OutStreamer.hasRawTextSupport()) { + SmallString<128> TmpStr; + raw_svector_ostream OS(TmpStr); + PrintDebugValueComment(MI, OS); + OutStreamer.EmitRawText(StringRef(OS.str())); + } + return; + } - MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo(); + // Tail calls use pseudo instructions so they have the proper code-gen + // attributes (isCall, isReturn, etc.). We lower them to the real + // instruction here. + case AArch64::TCRETURNri: { + MCInst TmpInst; + TmpInst.setOpcode(AArch64::BR); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + EmitToStreamer(OutStreamer, TmpInst); + return; + } + case AArch64::TCRETURNdi: { + MCOperand Dest; + MCInstLowering.lowerOperand(MI->getOperand(0), Dest); + MCInst TmpInst; + TmpInst.setOpcode(AArch64::B); + TmpInst.addOperand(Dest); + EmitToStreamer(OutStreamer, TmpInst); + return; + } + case AArch64::TLSDESC_BLR: { + MCOperand Callee, Sym; + MCInstLowering.lowerOperand(MI->getOperand(0), Callee); + MCInstLowering.lowerOperand(MI->getOperand(1), Sym); - // Output stubs for external and common global variables. - MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); - if (!Stubs.empty()) { - OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); - const DataLayout *TD = TM.getDataLayout(); + // First emit a relocation-annotation. This expands to no code, but requests + // the following instruction gets an R_AARCH64_TLSDESC_CALL. + MCInst TLSDescCall; + TLSDescCall.setOpcode(AArch64::TLSDESCCALL); + TLSDescCall.addOperand(Sym); + EmitToStreamer(OutStreamer, TLSDescCall); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - OutStreamer.EmitLabel(Stubs[i].first); - OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(0)); - } - Stubs.clear(); - } + // Other than that it's just a normal indirect call to the function loaded + // from the descriptor. + MCInst BLR; + BLR.setOpcode(AArch64::BLR); + BLR.addOperand(Callee); + EmitToStreamer(OutStreamer, BLR); + + return; + } + + case TargetOpcode::STACKMAP: + return LowerSTACKMAP(OutStreamer, SM, *MI); + + case TargetOpcode::PATCHPOINT: + return LowerPATCHPOINT(OutStreamer, SM, *MI); } -} -bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) { - return AsmPrinter::runOnMachineFunction(MF); + // Finally, do the automated lowerings for everything else. + MCInst TmpInst; + MCInstLowering.Lower(MI, TmpInst); + EmitToStreamer(OutStreamer, TmpInst); } // Force static initialization. extern "C" void LLVMInitializeAArch64AsmPrinter() { - RegisterAsmPrinter X(TheAArch64leTarget); - RegisterAsmPrinter Y(TheAArch64beTarget); -} + RegisterAsmPrinter X(TheAArch64leTarget); + RegisterAsmPrinter Y(TheAArch64beTarget); + RegisterAsmPrinter Z(TheARM64leTarget); + RegisterAsmPrinter W(TheARM64beTarget); +} diff --git a/lib/Target/AArch64/AArch64AsmPrinter.h b/lib/Target/AArch64/AArch64AsmPrinter.h deleted file mode 100644 index 824f003..0000000 --- a/lib/Target/AArch64/AArch64AsmPrinter.h +++ /dev/null @@ -1,76 +0,0 @@ -// AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the AArch64 assembly printer class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64ASMPRINTER_H -#define LLVM_AARCH64ASMPRINTER_H - -#include "AArch64.h" -#include "AArch64TargetMachine.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Support/Compiler.h" - -namespace llvm { - -class MCOperand; - -class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter { - - /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can - /// make the right decision when printing asm code for different targets. - const AArch64Subtarget *Subtarget; - - // emitPseudoExpansionLowering - tblgen'erated. - bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, - const MachineInstr *MI); - - public: - explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) { - Subtarget = &TM.getSubtarget(); - } - - bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const; - - MCOperand lowerSymbolOperand(const MachineOperand &MO, - const MCSymbol *Sym) const; - - void EmitInstruction(const MachineInstr *MI); - void EmitEndOfAsmFile(Module &M); - - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - - /// printSymbolicAddress - Given some kind of reasonably bare symbolic - /// reference, print out the appropriate asm string to represent it. If - /// appropriate, a relocation-specifier will be produced, composed of a - /// general class derived from the MO parameter and an instruction-specific - /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is - /// given. - bool printSymbolicAddress(const MachineOperand &MO, - bool PrintImmediatePrefix, - StringRef Suffix, raw_ostream &O); - - virtual const char *getPassName() const { - return "AArch64 Assembly Printer"; - } - - virtual bool runOnMachineFunction(MachineFunction &MF); -}; -} // end namespace llvm - -#endif diff --git a/lib/Target/AArch64/AArch64BranchFixupPass.cpp b/lib/Target/AArch64/AArch64BranchFixupPass.cpp deleted file mode 100644 index c03cdde..0000000 --- a/lib/Target/AArch64/AArch64BranchFixupPass.cpp +++ /dev/null @@ -1,600 +0,0 @@ -//===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass that fixes AArch64 branches which have ended up out -// of range for their immediate operands. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "aarch64-branch-fixup" -#include "AArch64.h" -#include "AArch64InstrInfo.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -STATISTIC(NumSplit, "Number of uncond branches inserted"); -STATISTIC(NumCBrFixed, "Number of cond branches fixed"); - -/// Return the worst case padding that could result from unknown offset bits. -/// This does not include alignment padding caused by known offset bits. -/// -/// @param LogAlign log2(alignment) -/// @param KnownBits Number of known low offset bits. -static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) { - if (KnownBits < LogAlign) - return (1u << LogAlign) - (1u << KnownBits); - return 0; -} - -namespace { - /// Due to limited PC-relative displacements, conditional branches to distant - /// blocks may need converting into an unconditional equivalent. For example: - /// tbz w1, #0, far_away - /// becomes - /// tbnz w1, #0, skip - /// b far_away - /// skip: - class AArch64BranchFixup : public MachineFunctionPass { - /// Information about the offset and size of a single basic block. - struct BasicBlockInfo { - /// Distance from the beginning of the function to the beginning of this - /// basic block. - /// - /// Offsets are computed assuming worst case padding before an aligned - /// block. This means that subtracting basic block offsets always gives a - /// conservative estimate of the real distance which may be smaller. - /// - /// Because worst case padding is used, the computed offset of an aligned - /// block may not actually be aligned. - unsigned Offset; - - /// Size of the basic block in bytes. If the block contains inline - /// assembly, this is a worst case estimate. - /// - /// The size does not include any alignment padding whether from the - /// beginning of the block, or from an aligned jump table at the end. - unsigned Size; - - /// The number of low bits in Offset that are known to be exact. The - /// remaining bits of Offset are an upper bound. - uint8_t KnownBits; - - /// When non-zero, the block contains instructions (inline asm) of unknown - /// size. The real size may be smaller than Size bytes by a multiple of 1 - /// << Unalign. - uint8_t Unalign; - - BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {} - - /// Compute the number of known offset bits internally to this block. - /// This number should be used to predict worst case padding when - /// splitting the block. - unsigned internalKnownBits() const { - unsigned Bits = Unalign ? Unalign : KnownBits; - // If the block size isn't a multiple of the known bits, assume the - // worst case padding. - if (Size & ((1u << Bits) - 1)) - Bits = countTrailingZeros(Size); - return Bits; - } - - /// Compute the offset immediately following this block. If LogAlign is - /// specified, return the offset the successor block will get if it has - /// this alignment. - unsigned postOffset(unsigned LogAlign = 0) const { - unsigned PO = Offset + Size; - if (!LogAlign) - return PO; - // Add alignment padding from the terminator. - return PO + UnknownPadding(LogAlign, internalKnownBits()); - } - - /// Compute the number of known low bits of postOffset. If this block - /// contains inline asm, the number of known bits drops to the - /// instruction alignment. An aligned terminator may increase the number - /// of know bits. - /// If LogAlign is given, also consider the alignment of the next block. - unsigned postKnownBits(unsigned LogAlign = 0) const { - return std::max(LogAlign, internalKnownBits()); - } - }; - - std::vector BBInfo; - - /// One per immediate branch, keeping the machine instruction pointer, - /// conditional or unconditional, the max displacement, and (if IsCond is - /// true) the corresponding inverted branch opcode. - struct ImmBranch { - MachineInstr *MI; - unsigned OffsetBits : 31; - bool IsCond : 1; - ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond) - : MI(mi), OffsetBits(offsetbits), IsCond(cond) {} - }; - - /// Keep track of all the immediate branch instructions. - /// - std::vector ImmBranches; - - MachineFunction *MF; - const AArch64InstrInfo *TII; - public: - static char ID; - AArch64BranchFixup() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const { - return "AArch64 branch fixup pass"; - } - - private: - void initializeFunctionInfo(); - MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); - void adjustBBOffsetsAfter(MachineBasicBlock *BB); - bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, - unsigned OffsetBits); - bool fixupImmediateBr(ImmBranch &Br); - bool fixupConditionalBr(ImmBranch &Br); - - void computeBlockSize(MachineBasicBlock *MBB); - unsigned getOffsetOf(MachineInstr *MI) const; - void dumpBBs(); - void verify(); - }; - char AArch64BranchFixup::ID = 0; -} - -/// check BBOffsets -void AArch64BranchFixup::verify() { -#ifndef NDEBUG - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = MBBI; - unsigned MBBId = MBB->getNumber(); - assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset); - } -#endif -} - -/// print block size and offset information - debugging -void AArch64BranchFixup::dumpBBs() { - DEBUG({ - for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { - const BasicBlockInfo &BBI = BBInfo[J]; - dbgs() << format("%08x BB#%u\t", BBI.Offset, J) - << " kb=" << unsigned(BBI.KnownBits) - << " ua=" << unsigned(BBI.Unalign) - << format(" size=%#x\n", BBInfo[J].Size); - } - }); -} - -/// Returns an instance of the branch fixup pass. -FunctionPass *llvm::createAArch64BranchFixupPass() { - return new AArch64BranchFixup(); -} - -bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) { - MF = &mf; - DEBUG(dbgs() << "***** AArch64BranchFixup ******"); - TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo(); - - // This pass invalidates liveness information when it splits basic blocks. - MF->getRegInfo().invalidateLiveness(); - - // Renumber all of the machine basic blocks in the function, guaranteeing that - // the numbers agree with the position of the block in the function. - MF->RenumberBlocks(); - - // Do the initial scan of the function, building up information about the - // sizes of each block and location of each immediate branch. - initializeFunctionInfo(); - - // Iteratively fix up branches until there is no change. - unsigned NoBRIters = 0; - bool MadeChange = false; - while (true) { - DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n'); - bool BRChange = false; - for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) - BRChange |= fixupImmediateBr(ImmBranches[i]); - if (BRChange && ++NoBRIters > 30) - report_fatal_error("Branch Fix Up pass failed to converge!"); - DEBUG(dumpBBs()); - - if (!BRChange) - break; - MadeChange = true; - } - - // After a while, this might be made debug-only, but it is not expensive. - verify(); - - DEBUG(dbgs() << '\n'; dumpBBs()); - - BBInfo.clear(); - ImmBranches.clear(); - - return MadeChange; -} - -/// Return true if the specified basic block can fallthrough into the block -/// immediately after it. -static bool BBHasFallthrough(MachineBasicBlock *MBB) { - // Get the next machine basic block in the function. - MachineFunction::iterator MBBI = MBB; - // Can't fall off end of function. - if (std::next(MBBI) == MBB->getParent()->end()) - return false; - - MachineBasicBlock *NextBB = std::next(MBBI); - for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) - if (*I == NextBB) - return true; - - return false; -} - -/// Do the initial scan of the function, building up information about the sizes -/// of each block, and each immediate branch. -void AArch64BranchFixup::initializeFunctionInfo() { - BBInfo.clear(); - BBInfo.resize(MF->getNumBlockIDs()); - - // First thing, compute the size of all basic blocks, and see if the function - // has any inline assembly in it. If so, we have to be conservative about - // alignment assumptions, as we don't know for sure the size of any - // instructions in the inline assembly. - for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) - computeBlockSize(I); - - // The known bits of the entry block offset are determined by the function - // alignment. - BBInfo.front().KnownBits = MF->getAlignment(); - - // Compute block offsets and known bits. - adjustBBOffsetsAfter(MF->begin()); - - // Now go back through the instructions and build up our data structures. - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock &MBB = *MBBI; - - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - if (I->isDebugValue()) - continue; - - int Opc = I->getOpcode(); - if (I->isBranch()) { - bool IsCond = false; - - // The offsets encoded in instructions here scale by the instruction - // size (4 bytes), effectively increasing their range by 2 bits. - unsigned Bits = 0; - switch (Opc) { - default: - continue; // Ignore other JT branches - case AArch64::TBZxii: - case AArch64::TBZwii: - case AArch64::TBNZxii: - case AArch64::TBNZwii: - IsCond = true; - Bits = 14 + 2; - break; - case AArch64::Bcc: - case AArch64::CBZx: - case AArch64::CBZw: - case AArch64::CBNZx: - case AArch64::CBNZw: - IsCond = true; - Bits = 19 + 2; - break; - case AArch64::Bimm: - Bits = 26 + 2; - break; - } - - // Record this immediate branch. - ImmBranches.push_back(ImmBranch(I, Bits, IsCond)); - } - } - } -} - -/// Compute the size and some alignment information for MBB. This function -/// updates BBInfo directly. -void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) { - BasicBlockInfo &BBI = BBInfo[MBB->getNumber()]; - BBI.Size = 0; - BBI.Unalign = 0; - - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) { - BBI.Size += TII->getInstSizeInBytes(*I); - // For inline asm, GetInstSizeInBytes returns a conservative estimate. - // The actual size may be smaller, but still a multiple of the instr size. - if (I->isInlineAsm()) - BBI.Unalign = 2; - } -} - -/// Return the current offset of the specified machine instruction from the -/// start of the function. This offset changes as stuff is moved around inside -/// the function. -unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const { - MachineBasicBlock *MBB = MI->getParent(); - - // The offset is composed of two things: the sum of the sizes of all MBB's - // before this instruction's block, and the offset from the start of the block - // it is in. - unsigned Offset = BBInfo[MBB->getNumber()].Offset; - - // Sum instructions before MI in MBB. - for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { - assert(I != MBB->end() && "Didn't find MI in its own basic block?"); - Offset += TII->getInstSizeInBytes(*I); - } - return Offset; -} - -/// Split the basic block containing MI into two blocks, which are joined by -/// an unconditional branch. Update data structures and renumber blocks to -/// account for this change and returns the newly created block. -MachineBasicBlock * -AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) { - MachineBasicBlock *OrigBB = MI->getParent(); - - // Create a new MBB for the code after the OrigBB. - MachineBasicBlock *NewBB = - MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); - MachineFunction::iterator MBBI = OrigBB; ++MBBI; - MF->insert(MBBI, NewBB); - - // Splice the instructions starting with MI over to NewBB. - NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); - - // Add an unconditional branch from OrigBB to NewBB. - // Note the new unconditional branch is not being recorded. - // There doesn't seem to be meaningful DebugInfo available; this doesn't - // correspond to anything in the source. - BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB); - ++NumSplit; - - // Update the CFG. All succs of OrigBB are now succs of NewBB. - NewBB->transferSuccessors(OrigBB); - - // OrigBB branches to NewBB. - OrigBB->addSuccessor(NewBB); - - // Update internal data structures to account for the newly inserted MBB. - MF->RenumberBlocks(NewBB); - - // Insert an entry into BBInfo to align it properly with the (newly - // renumbered) block numbers. - BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); - - // Figure out how large the OrigBB is. As the first half of the original - // block, it cannot contain a tablejump. The size includes - // the new jump we added. (It should be possible to do this without - // recounting everything, but it's very confusing, and this is rarely - // executed.) - computeBlockSize(OrigBB); - - // Figure out how large the NewMBB is. As the second half of the original - // block, it may contain a tablejump. - computeBlockSize(NewBB); - - // All BBOffsets following these blocks must be modified. - adjustBBOffsetsAfter(OrigBB); - - return NewBB; -} - -void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) { - unsigned BBNum = BB->getNumber(); - for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) { - // Get the offset and known bits at the end of the layout predecessor. - // Include the alignment of the current block. - unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment(); - unsigned Offset = BBInfo[i - 1].postOffset(LogAlign); - unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign); - - // This is where block i begins. Stop if the offset is already correct, - // and we have updated 2 blocks. This is the maximum number of blocks - // changed before calling this function. - if (i > BBNum + 2 && - BBInfo[i].Offset == Offset && - BBInfo[i].KnownBits == KnownBits) - break; - - BBInfo[i].Offset = Offset; - BBInfo[i].KnownBits = KnownBits; - } -} - -/// Returns true if the distance between specific MI and specific BB can fit in -/// MI's displacement field. -bool AArch64BranchFixup::isBBInRange(MachineInstr *MI, - MachineBasicBlock *DestBB, - unsigned OffsetBits) { - int64_t BrOffset = getOffsetOf(MI); - int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset; - - DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() - << " from BB#" << MI->getParent()->getNumber() - << " bits available=" << OffsetBits - << " from " << getOffsetOf(MI) << " to " << DestOffset - << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); - - return isIntN(OffsetBits, DestOffset - BrOffset); -} - -/// Fix up an immediate branch whose destination is too far away to fit in its -/// displacement field. -bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) { - MachineInstr *MI = Br.MI; - MachineBasicBlock *DestBB = 0; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (MI->getOperand(i).isMBB()) { - DestBB = MI->getOperand(i).getMBB(); - break; - } - } - assert(DestBB && "Branch with no destination BB?"); - - // Check to see if the DestBB is already in-range. - if (isBBInRange(MI, DestBB, Br.OffsetBits)) - return false; - - assert(Br.IsCond && "Only conditional branches should need fixup"); - return fixupConditionalBr(Br); -} - -/// Fix up a conditional branch whose destination is too far away to fit in its -/// displacement field. It is converted to an inverse conditional branch + an -/// unconditional branch to the destination. -bool -AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) { - MachineInstr *MI = Br.MI; - MachineBasicBlock *MBB = MI->getParent(); - unsigned CondBrMBBOperand = 0; - - // The general idea is to add an unconditional branch to the destination and - // invert the conditional branch to jump over it. Complications occur around - // fallthrough and unreachable ends to the block. - // b.lt L1 - // => - // b.ge L2 - // b L1 - // L2: - - // First we invert the conditional branch, by creating a replacement if - // necessary. This if statement contains all the special handling of different - // branch types. - if (MI->getOpcode() == AArch64::Bcc) { - // The basic block is operand number 1 for Bcc - CondBrMBBOperand = 1; - - A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm(); - CC = A64InvertCondCode(CC); - MI->getOperand(0).setImm(CC); - } else { - MachineInstrBuilder InvertedMI; - int InvertedOpcode; - switch (MI->getOpcode()) { - default: llvm_unreachable("Unknown branch type"); - case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break; - case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break; - case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break; - case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break; - case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break; - case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break; - case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break; - case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break; - } - - InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode)); - for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) { - InvertedMI.addOperand(MI->getOperand(i)); - if (MI->getOperand(i).isMBB()) - CondBrMBBOperand = i; - } - - MI->eraseFromParent(); - MI = Br.MI = InvertedMI; - } - - // If the branch is at the end of its MBB and that has a fall-through block, - // direct the updated conditional branch to the fall-through - // block. Otherwise, split the MBB before the next instruction. - MachineInstr *BMI = &MBB->back(); - bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); - - ++NumCBrFixed; - if (BMI != MI) { - if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) && - BMI->getOpcode() == AArch64::Bimm) { - // Last MI in the BB is an unconditional branch. We can swap destinations: - // b.eq L1 (temporarily b.ne L1 after first change) - // b L2 - // => - // b.ne L2 - // b L1 - MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); - if (isBBInRange(MI, NewDest, Br.OffsetBits)) { - DEBUG(dbgs() << " Invert Bcc condition and swap its destination with " - << *BMI); - MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB(); - BMI->getOperand(0).setMBB(DestBB); - MI->getOperand(CondBrMBBOperand).setMBB(NewDest); - return true; - } - } - } - - if (NeedSplit) { - MachineBasicBlock::iterator MBBI = MI; ++MBBI; - splitBlockBeforeInstr(MBBI); - // No need for the branch to the next block. We're adding an unconditional - // branch to the destination. - int delta = TII->getInstSizeInBytes(MBB->back()); - BBInfo[MBB->getNumber()].Size -= delta; - MBB->back().eraseFromParent(); - // BBInfo[SplitBB].Offset is wrong temporarily, fixed below - } - - // After splitting and removing the unconditional branch from the original BB, - // the structure is now: - // oldbb: - // [things] - // b.invertedCC L1 - // splitbb/fallthroughbb: - // [old b L2/real continuation] - // - // We now have to change the conditional branch to point to splitbb and add an - // unconditional branch after it to L1, giving the final structure: - // oldbb: - // [things] - // b.invertedCC splitbb - // b L1 - // splitbb/fallthroughbb: - // [old b L2/real continuation] - MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB)); - - DEBUG(dbgs() << " Insert B to BB#" - << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber() - << " also invert condition and change dest. to BB#" - << NextBB->getNumber() << "\n"); - - // Insert a new unconditional branch and fixup the destination of the - // conditional one. Also update the ImmBranch as well as adding a new entry - // for the new branch. - BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm)) - .addMBB(MI->getOperand(CondBrMBBOperand).getMBB()); - MI->getOperand(CondBrMBBOperand).setMBB(NextBB); - - BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back()); - - // 26 bits written down in Bimm, specifying a multiple of 4. - unsigned OffsetBits = 26 + 2; - ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false)); - - adjustBBOffsetsAfter(MBB); - return true; -} diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp new file mode 100644 index 0000000..5209452 --- /dev/null +++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp @@ -0,0 +1,510 @@ +//===-- AArch64BranchRelaxation.cpp - AArch64 branch relaxation -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +#define DEBUG_TYPE "aarch64-branch-relax" + +static cl::opt +BranchRelaxation("aarch64-branch-relax", cl::Hidden, cl::init(true), + cl::desc("Relax out of range conditional branches")); + +static cl::opt +TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), + cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); + +static cl::opt +CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), + cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); + +static cl::opt +BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), + cl::desc("Restrict range of Bcc instructions (DEBUG)")); + +STATISTIC(NumSplit, "Number of basic blocks split"); +STATISTIC(NumRelaxed, "Number of conditional branches relaxed"); + +namespace { +class AArch64BranchRelaxation : public MachineFunctionPass { + /// BasicBlockInfo - Information about the offset and size of a single + /// basic block. + struct BasicBlockInfo { + /// Offset - Distance from the beginning of the function to the beginning + /// of this basic block. + /// + /// The offset is always aligned as required by the basic block. + unsigned Offset; + + /// Size - Size of the basic block in bytes. If the block contains + /// inline assembly, this is a worst case estimate. + /// + /// The size does not include any alignment padding whether from the + /// beginning of the block, or from an aligned jump table at the end. + unsigned Size; + + BasicBlockInfo() : Offset(0), Size(0) {} + + /// Compute the offset immediately following this block. If LogAlign is + /// specified, return the offset the successor block will get if it has + /// this alignment. + unsigned postOffset(unsigned LogAlign = 0) const { + unsigned PO = Offset + Size; + unsigned Align = 1 << LogAlign; + return (PO + Align - 1) / Align * Align; + } + }; + + SmallVector BlockInfo; + + MachineFunction *MF; + const AArch64InstrInfo *TII; + + bool relaxBranchInstructions(); + void scanFunction(); + MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); + void adjustBlockOffsets(MachineBasicBlock &MBB); + bool isBlockInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); + bool fixupConditionalBranch(MachineInstr *MI); + void computeBlockSize(const MachineBasicBlock &MBB); + unsigned getInstrOffset(MachineInstr *MI) const; + void dumpBBs(); + void verify(); + +public: + static char ID; + AArch64BranchRelaxation() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "AArch64 branch relaxation pass"; + } +}; +char AArch64BranchRelaxation::ID = 0; +} + +/// verify - check BBOffsets, BBSizes, alignment of islands +void AArch64BranchRelaxation::verify() { +#ifndef NDEBUG + unsigned PrevNum = MF->begin()->getNumber(); + for (MachineBasicBlock &MBB : *MF) { + unsigned Align = MBB.getAlignment(); + unsigned Num = MBB.getNumber(); + assert(BlockInfo[Num].Offset % (1u << Align) == 0); + assert(!Num || BlockInfo[PrevNum].postOffset() <= BlockInfo[Num].Offset); + PrevNum = Num; + } +#endif +} + +/// print block size and offset information - debugging +void AArch64BranchRelaxation::dumpBBs() { + for (auto &MBB : *MF) { + const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()]; + dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset) + << format("size=%#x\n", BBI.Size); + } +} + +/// BBHasFallthrough - Return true if the specified basic block can fallthrough +/// into the block immediately after it. +static bool BBHasFallthrough(MachineBasicBlock *MBB) { + // Get the next machine basic block in the function. + MachineFunction::iterator MBBI = MBB; + // Can't fall off end of function. + MachineBasicBlock *NextBB = std::next(MBBI); + if (NextBB == MBB->getParent()->end()) + return false; + + for (MachineBasicBlock *S : MBB->successors()) + if (S == NextBB) + return true; + + return false; +} + +/// scanFunction - Do the initial scan of the function, building up +/// information about each block. +void AArch64BranchRelaxation::scanFunction() { + BlockInfo.clear(); + BlockInfo.resize(MF->getNumBlockIDs()); + + // First thing, compute the size of all basic blocks, and see if the function + // has any inline assembly in it. If so, we have to be conservative about + // alignment assumptions, as we don't know for sure the size of any + // instructions in the inline assembly. + for (MachineBasicBlock &MBB : *MF) + computeBlockSize(MBB); + + // Compute block offsets and known bits. + adjustBlockOffsets(*MF->begin()); +} + +/// computeBlockSize - Compute the size for MBB. +/// This function updates BlockInfo directly. +void AArch64BranchRelaxation::computeBlockSize(const MachineBasicBlock &MBB) { + unsigned Size = 0; + for (const MachineInstr &MI : MBB) + Size += TII->GetInstSizeInBytes(&MI); + BlockInfo[MBB.getNumber()].Size = Size; +} + +/// getInstrOffset - Return the current offset of the specified machine +/// instruction from the start of the function. This offset changes as stuff is +/// moved around inside the function. +unsigned AArch64BranchRelaxation::getInstrOffset(MachineInstr *MI) const { + MachineBasicBlock *MBB = MI->getParent(); + + // The offset is composed of two things: the sum of the sizes of all MBB's + // before this instruction's block, and the offset from the start of the block + // it is in. + unsigned Offset = BlockInfo[MBB->getNumber()].Offset; + + // Sum instructions before MI in MBB. + for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { + assert(I != MBB->end() && "Didn't find MI in its own basic block?"); + Offset += TII->GetInstSizeInBytes(I); + } + return Offset; +} + +void AArch64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) { + unsigned PrevNum = Start.getNumber(); + for (auto &MBB : make_range(MachineFunction::iterator(Start), MF->end())) { + unsigned Num = MBB.getNumber(); + if (!Num) // block zero is never changed from offset zero. + continue; + // Get the offset and known bits at the end of the layout predecessor. + // Include the alignment of the current block. + unsigned LogAlign = MBB.getAlignment(); + BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(LogAlign); + PrevNum = Num; + } +} + +/// Split the basic block containing MI into two blocks, which are joined by +/// an unconditional branch. Update data structures and renumber blocks to +/// account for this change and returns the newly created block. +/// NOTE: Successor list of the original BB is out of date after this function, +/// and must be updated by the caller! Other transforms follow using this +/// utility function, so no point updating now rather than waiting. +MachineBasicBlock * +AArch64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) { + MachineBasicBlock *OrigBB = MI->getParent(); + + // Create a new MBB for the code after the OrigBB. + MachineBasicBlock *NewBB = + MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); + MachineFunction::iterator MBBI = OrigBB; + ++MBBI; + MF->insert(MBBI, NewBB); + + // Splice the instructions starting with MI over to NewBB. + NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); + + // Add an unconditional branch from OrigBB to NewBB. + // Note the new unconditional branch is not being recorded. + // There doesn't seem to be meaningful DebugInfo available; this doesn't + // correspond to anything in the source. + BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::B)).addMBB(NewBB); + + // Insert an entry into BlockInfo to align it properly with the block numbers. + BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); + + // Figure out how large the OrigBB is. As the first half of the original + // block, it cannot contain a tablejump. The size includes + // the new jump we added. (It should be possible to do this without + // recounting everything, but it's very confusing, and this is rarely + // executed.) + computeBlockSize(*OrigBB); + + // Figure out how large the NewMBB is. As the second half of the original + // block, it may contain a tablejump. + computeBlockSize(*NewBB); + + // All BBOffsets following these blocks must be modified. + adjustBlockOffsets(*OrigBB); + + ++NumSplit; + + return NewBB; +} + +/// isBlockInRange - Returns true if the distance between specific MI and +/// specific BB can fit in MI's displacement field. +bool AArch64BranchRelaxation::isBlockInRange(MachineInstr *MI, + MachineBasicBlock *DestBB, + unsigned Bits) { + unsigned MaxOffs = ((1 << (Bits - 1)) - 1) << 2; + unsigned BrOffset = getInstrOffset(MI); + unsigned DestOffset = BlockInfo[DestBB->getNumber()].Offset; + + DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() + << " from BB#" << MI->getParent()->getNumber() + << " max delta=" << MaxOffs << " from " << getInstrOffset(MI) + << " to " << DestOffset << " offset " + << int(DestOffset - BrOffset) << "\t" << *MI); + + // Branch before the Dest. + if (BrOffset <= DestOffset) + return (DestOffset - BrOffset <= MaxOffs); + return (BrOffset - DestOffset <= MaxOffs); +} + +static bool isConditionalBranch(unsigned Opc) { + switch (Opc) { + default: + return false; + case AArch64::TBZW: + case AArch64::TBNZW: + case AArch64::TBZX: + case AArch64::TBNZX: + case AArch64::CBZW: + case AArch64::CBNZW: + case AArch64::CBZX: + case AArch64::CBNZX: + case AArch64::Bcc: + return true; + } +} + +static MachineBasicBlock *getDestBlock(MachineInstr *MI) { + switch (MI->getOpcode()) { + default: + assert(0 && "unexpected opcode!"); + case AArch64::TBZW: + case AArch64::TBNZW: + case AArch64::TBZX: + case AArch64::TBNZX: + return MI->getOperand(2).getMBB(); + case AArch64::CBZW: + case AArch64::CBNZW: + case AArch64::CBZX: + case AArch64::CBNZX: + case AArch64::Bcc: + return MI->getOperand(1).getMBB(); + } +} + +static unsigned getOppositeConditionOpcode(unsigned Opc) { + switch (Opc) { + default: + assert(0 && "unexpected opcode!"); + case AArch64::TBNZW: return AArch64::TBZW; + case AArch64::TBNZX: return AArch64::TBZX; + case AArch64::TBZW: return AArch64::TBNZW; + case AArch64::TBZX: return AArch64::TBNZX; + case AArch64::CBNZW: return AArch64::CBZW; + case AArch64::CBNZX: return AArch64::CBZX; + case AArch64::CBZW: return AArch64::CBNZW; + case AArch64::CBZX: return AArch64::CBNZX; + case AArch64::Bcc: return AArch64::Bcc; // Condition is an operand for Bcc. + } +} + +static unsigned getBranchDisplacementBits(unsigned Opc) { + switch (Opc) { + default: + assert(0 && "unexpected opcode!"); + case AArch64::TBNZW: + case AArch64::TBZW: + case AArch64::TBNZX: + case AArch64::TBZX: + return TBZDisplacementBits; + case AArch64::CBNZW: + case AArch64::CBZW: + case AArch64::CBNZX: + case AArch64::CBZX: + return CBZDisplacementBits; + case AArch64::Bcc: + return BCCDisplacementBits; + } +} + +static inline void invertBccCondition(MachineInstr *MI) { + assert(MI->getOpcode() == AArch64::Bcc && "Unexpected opcode!"); + AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(0).getImm(); + CC = AArch64CC::getInvertedCondCode(CC); + MI->getOperand(0).setImm((int64_t)CC); +} + +/// fixupConditionalBranch - Fix up a conditional branch whose destination is +/// too far away to fit in its displacement field. It is converted to an inverse +/// conditional branch + an unconditional branch to the destination. +bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) { + MachineBasicBlock *DestBB = getDestBlock(MI); + + // Add an unconditional branch to the destination and invert the branch + // condition to jump over it: + // tbz L1 + // => + // tbnz L2 + // b L1 + // L2: + + // If the branch is at the end of its MBB and that has a fall-through block, + // direct the updated conditional branch to the fall-through block. Otherwise, + // split the MBB before the next instruction. + MachineBasicBlock *MBB = MI->getParent(); + MachineInstr *BMI = &MBB->back(); + bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); + + if (BMI != MI) { + if (std::next(MachineBasicBlock::iterator(MI)) == + std::prev(MBB->getLastNonDebugInstr()) && + BMI->getOpcode() == AArch64::B) { + // Last MI in the BB is an unconditional branch. Can we simply invert the + // condition and swap destinations: + // beq L1 + // b L2 + // => + // bne L2 + // b L1 + MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); + if (isBlockInRange(MI, NewDest, + getBranchDisplacementBits(MI->getOpcode()))) { + DEBUG(dbgs() << " Invert condition and swap its destination with " + << *BMI); + BMI->getOperand(0).setMBB(DestBB); + unsigned OpNum = (MI->getOpcode() == AArch64::TBZW || + MI->getOpcode() == AArch64::TBNZW || + MI->getOpcode() == AArch64::TBZX || + MI->getOpcode() == AArch64::TBNZX) + ? 2 + : 1; + MI->getOperand(OpNum).setMBB(NewDest); + MI->setDesc(TII->get(getOppositeConditionOpcode(MI->getOpcode()))); + if (MI->getOpcode() == AArch64::Bcc) + invertBccCondition(MI); + return true; + } + } + } + + if (NeedSplit) { + // Analyze the branch so we know how to update the successor lists. + MachineBasicBlock *TBB, *FBB; + SmallVector Cond; + TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, false); + + MachineBasicBlock *NewBB = splitBlockBeforeInstr(MI); + // No need for the branch to the next block. We're adding an unconditional + // branch to the destination. + int delta = TII->GetInstSizeInBytes(&MBB->back()); + BlockInfo[MBB->getNumber()].Size -= delta; + MBB->back().eraseFromParent(); + // BlockInfo[SplitBB].Offset is wrong temporarily, fixed below + + // Update the successor lists according to the transformation to follow. + // Do it here since if there's no split, no update is needed. + MBB->replaceSuccessor(FBB, NewBB); + NewBB->addSuccessor(FBB); + } + MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB)); + + DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() + << ", invert condition and change dest. to BB#" + << NextBB->getNumber() << "\n"); + + // Insert a new conditional branch and a new unconditional branch. + MachineInstrBuilder MIB = BuildMI( + MBB, DebugLoc(), TII->get(getOppositeConditionOpcode(MI->getOpcode()))) + .addOperand(MI->getOperand(0)); + if (MI->getOpcode() == AArch64::TBZW || MI->getOpcode() == AArch64::TBNZW || + MI->getOpcode() == AArch64::TBZX || MI->getOpcode() == AArch64::TBNZX) + MIB.addOperand(MI->getOperand(1)); + if (MI->getOpcode() == AArch64::Bcc) + invertBccCondition(MIB); + MIB.addMBB(NextBB); + BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); + BuildMI(MBB, DebugLoc(), TII->get(AArch64::B)).addMBB(DestBB); + BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); + + // Remove the old conditional branch. It may or may not still be in MBB. + BlockInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI); + MI->eraseFromParent(); + + // Finally, keep the block offsets up to date. + adjustBlockOffsets(*MBB); + return true; +} + +bool AArch64BranchRelaxation::relaxBranchInstructions() { + bool Changed = false; + // Relaxing branches involves creating new basic blocks, so re-eval + // end() for termination. + for (auto &MBB : *MF) { + MachineInstr *MI = MBB.getFirstTerminator(); + if (isConditionalBranch(MI->getOpcode()) && + !isBlockInRange(MI, getDestBlock(MI), + getBranchDisplacementBits(MI->getOpcode()))) { + fixupConditionalBranch(MI); + ++NumRelaxed; + Changed = true; + } + } + return Changed; +} + +bool AArch64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + + // If the pass is disabled, just bail early. + if (!BranchRelaxation) + return false; + + DEBUG(dbgs() << "***** AArch64BranchRelaxation *****\n"); + + TII = (const AArch64InstrInfo *)MF->getTarget().getInstrInfo(); + + // Renumber all of the machine basic blocks in the function, guaranteeing that + // the numbers agree with the position of the block in the function. + MF->RenumberBlocks(); + + // Do the initial scan of the function, building up information about the + // sizes of each block. + scanFunction(); + + DEBUG(dbgs() << " Basic blocks before relaxation\n"); + DEBUG(dumpBBs()); + + bool MadeChange = false; + while (relaxBranchInstructions()) + MadeChange = true; + + // After a while, this might be made debug-only, but it is not expensive. + verify(); + + DEBUG(dbgs() << " Basic blocks after relaxation\n"); + DEBUG(dbgs() << '\n'; dumpBBs()); + + BlockInfo.clear(); + + return MadeChange; +} + +/// createAArch64BranchRelaxation - returns an instance of the constpool +/// island pass. +FunctionPass *llvm::createAArch64BranchRelaxation() { + return new AArch64BranchRelaxation(); +} diff --git a/lib/Target/AArch64/AArch64CallingConv.td b/lib/Target/AArch64/AArch64CallingConv.td deleted file mode 100644 index 9fe6aae..0000000 --- a/lib/Target/AArch64/AArch64CallingConv.td +++ /dev/null @@ -1,197 +0,0 @@ -//==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This describes the calling conventions for AArch64 architecture. -//===----------------------------------------------------------------------===// - - -// The AArch64 Procedure Call Standard is unfortunately specified at a slightly -// higher level of abstraction than LLVM's target interface presents. In -// particular, it refers (like other ABIs, in fact) directly to -// structs. However, generic LLVM code takes the liberty of lowering structure -// arguments to the component fields before we see them. -// -// As a result, the obvious direct map from LLVM IR to PCS concepts can't be -// implemented, so the goals of this calling convention are, in decreasing -// priority order: -// 1. Expose *some* way to express the concepts required to implement the -// generic PCS from a front-end. -// 2. Provide a sane ABI for pure LLVM. -// 3. Follow the generic PCS as closely as is naturally possible. -// -// The suggested front-end implementation of PCS features is: -// * Integer, float and vector arguments of all sizes which end up in -// registers are passed and returned via the natural LLVM type. -// * Structure arguments with size <= 16 bytes are passed and returned in -// registers as similar integer or composite types. For example: -// [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed). -// * HFAs in registers follow rules similar to small structs: appropriate -// composite types. -// * Structure arguments with size > 16 bytes are passed via a pointer, -// handled completely by the front-end. -// * Structure return values > 16 bytes via an sret pointer argument. -// * Other stack-based arguments (not large structs) are passed using byval -// pointers. Padding arguments are added beforehand to guarantee a large -// struct doesn't later use integer registers. -// -// N.b. this means that it is the front-end's responsibility (if it cares about -// PCS compliance) to check whether enough registers are available for an -// argument when deciding how to pass it. - -class CCIfAlign: - CCIf<"ArgFlags.getOrigAlign() == " # Align, A>; - -def CC_A64_APCS : CallingConv<[ - // SRet is an LLVM-specific concept, so it takes precedence over general ABI - // concerns. However, this rule will be used by C/C++ frontends to implement - // structure return. - CCIfSRet>, - - // Put ByVal arguments directly on the stack. Minimum size and alignment of a - // slot is 64-bit. - CCIfByVal>, - - // Canonicalise the various types that live in different floating-point - // registers. This makes sense because the PCS does not distinguish Short - // Vectors and Floating-point types. - CCIfType<[v1i16, v2i8], CCBitConvertToType>, - CCIfType<[v1i32, v4i8, v2i16], CCBitConvertToType>, - CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64, v1f64], CCBitConvertToType>, - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCBitConvertToType>, - - // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision - // Floating-point or Short Vector Type and the NSRN is less than 8, then the - // argument is allocated to the least significant bits of register - // v[NSRN]. The NSRN is incremented by one. The argument has now been - // allocated." - CCIfType<[v1i8], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>, - CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, - CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, - CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - - // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated - // SIMD and Floating-point registers (NSRN - number of elements < 8), then the - // argument is allocated to SIMD and Floating-point registers (with one - // register per element of the HFA). The NSRN is incremented by the number of - // registers used. The argument has now been allocated." - // - // N.b. As above, this rule is the responsibility of the front-end. - - // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of - // the argument is rounded up to the nearest multiple of 8 bytes." - // - // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short - // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural - // Alignment of the Argument's type." - // - // It is expected that these will be satisfied by adding dummy arguments to - // the prototype. - - // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point - // type then the size of the argument is set to 8 bytes. The effect is as if - // the argument had been copied to the least significant bits of a 64-bit - // register and the remaining bits filled with unspecified values." - CCIfType<[f16, f32], CCPromoteToType>, - - // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad- - // precision Floating-point or Short Vector Type, then the argument is copied - // to memory at the adjusted NSAA. The NSAA is incremented by the size of the - // argument. The argument has now been allocated." - CCIfType<[f64], CCAssignToStack<8, 8>>, - CCIfType<[f128], CCAssignToStack<16, 16>>, - - // PCS: "C.7: If the argument is an Integral Type, the size of the argument is - // less than or equal to 8 bytes and the NGRN is less than 8, the argument is - // copied to the least significant bits of x[NGRN]. The NGRN is incremented by - // one. The argument has now been allocated." - - // First we implement C.8 and C.9 (128-bit types get even registers). i128 is - // represented as two i64s, the first one being split. If we delayed this - // operation C.8 would never be reached. - CCIfType<[i64], - CCIfSplit>>, - - // Note: the promotion also implements C.14. - CCIfType<[i8, i16, i32], CCPromoteToType>, - - // And now the real implementation of C.7 - CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>, - - // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded - // up to the next even number." - // - // "C.9: If the argument is an Integral Type, the size of the argument is - // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN] - // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the - // memory representation of the argument. The NGRN is incremented by two. The - // argument has now been allocated." - // - // Subtlety here: what if alignment is 16 but it is not an integral type? All - // floating-point types have been allocated already, which leaves composite - // types: this is why a front-end may need to produce i128 for a struct <= 16 - // bytes. - - // PCS: "C.10 If the argument is a Composite Type and the size in double-words - // of the argument is not more than 8 minus NGRN, then the argument is copied - // into consecutive general-purpose registers, starting at x[NGRN]. The - // argument is passed as though it had been loaded into the registers from a - // double-word aligned address with an appropriate sequence of LDR - // instructions loading consecutive registers from memory (the contents of any - // unused parts of the registers are unspecified by this standard). The NGRN - // is incremented by the number of registers used. The argument has now been - // allocated." - // - // Another one that's the responsibility of the front-end (sigh). - - // PCS: "C.11: The NGRN is set to 8." - CCCustom<"CC_AArch64NoMoreRegs">, - - // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural - // Alignment of the argument's type." - // - // PCS: "C.13: If the argument is a composite type then the argument is copied - // to memory at the adjusted NSAA. The NSAA is by the size of the - // argument. The argument has now been allocated." - // - // Note that the effect of this corresponds to a memcpy rather than register - // stores so that the struct ends up correctly addressable at the adjusted - // NSAA. - - // PCS: "C.14: If the size of the argument is less than 8 bytes then the size - // of the argument is set to 8 bytes. The effect is as if the argument was - // copied to the least significant bits of a 64-bit register and the remaining - // bits filled with unspecified values." - // - // Integer types were widened above. Floating-point and composite types have - // already been allocated completely. Nothing to do. - - // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA - // is incremented by the size of the argument. The argument has now been - // allocated." - CCIfType<[i64], CCIfSplit>>, - CCIfType<[i64], CCAssignToStack<8, 8>> - -]>; - -// According to the PCS, X19-X30 are callee-saved, however only the low 64-bits -// of vector registers (8-15) are callee-saved. The order here is is picked up -// by PrologEpilogInserter.cpp to allocate stack slots, starting from top of -// stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at -// [sp-16], ... -def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19), - (sequence "D%u", 15, 8))>; - - -// TLS descriptor calls are extremely restricted in their changes, to allow -// optimisations in the (hopefully) more common fast path where no real action -// is needed. They actually have to preserve all registers, except for the -// unavoidable X30 and the return register X0. -def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1), - (sequence "Q%u", 31, 0))>; diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td new file mode 100644 index 0000000..ded2e17 --- /dev/null +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -0,0 +1,240 @@ +//=- AArch64CallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for AArch64 architecture. +// +//===----------------------------------------------------------------------===// + +/// CCIfAlign - Match of the original alignment of the arg +class CCIfAlign : + CCIf; +/// CCIfBigEndian - Match only if we're in big endian mode. +class CCIfBigEndian : + CCIf<"State.getTarget().getDataLayout()->isBigEndian()", A>; + +class CCIfUnallocated : + CCIf<"!State.isAllocated(AArch64::" # Reg # ")", A>; + +//===----------------------------------------------------------------------===// +// ARM AAPCS64 Calling Convention +//===----------------------------------------------------------------------===// + +def CC_AArch64_AAPCS : CallingConv<[ + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32], CCBitConvertToType>, + + // Big endian vectors must be passed as if they were 1-element vectors so that + // their lanes are in a consistent order. + CCIfBigEndian>>, + CCIfBigEndian>>, + + // An SRet is passed in X8, not X0 like a normal pointer parameter. + CCIfSRet>>, + + // Put ByVal arguments directly on the stack. Minimum size and alignment of a + // slot is 64-bit. + CCIfByVal>, + + // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, + // up to eight each of GPR and FPR. + CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType>>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], + [X0, X1, X2, X3, X4, X5, X6, X7]>>, + // i128 is split to two i64s, we can't fit half to register X7. + CCIfType<[i64], CCIfSplit>>, + + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfType<[i64], CCIfSplit>>, + + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], + [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], + CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + // If more than will fit in registers, pass them on the stack instead. + CCIfType<[i1, i8, i16], CCAssignToStack<8, 8>>, + CCIfType<[i32, f32], CCAssignToStack<8, 8>>, + CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8], + CCAssignToStack<8, 8>>, + CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], + CCAssignToStack<16, 16>> +]>; + +def RetCC_AArch64_AAPCS : CallingConv<[ + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32], CCBitConvertToType>, + + // Big endian vectors must be passed as if they were 1-element vectors so that + // their lanes are in a consistent order. + CCIfBigEndian>>, + CCIfBigEndian>>, + + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], + [X0, X1, X2, X3, X4, X5, X6, X7]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], + [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], + CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> +]>; + + +// Darwin uses a calling convention which differs in only two ways +// from the standard one at this level: +// + i128s (i.e. split i64s) don't need even registers. +// + Stack slots are sized as needed rather than being at least 64-bit. +def CC_AArch64_DarwinPCS : CallingConv<[ + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, + + // An SRet is passed in X8, not X0 like a normal pointer parameter. + CCIfSRet>>, + + // Put ByVal arguments directly on the stack. Minimum size and alignment of a + // slot is 64-bit. + CCIfByVal>, + + // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, + // up to eight each of GPR and FPR. + CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType>>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], + [X0, X1, X2, X3, X4, X5, X6, X7]>>, + // i128 is split to two i64s, we can't fit half to register X7. + CCIfType<[i64], + CCIfSplit>>, + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfType<[i64], CCIfSplit>>, + + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], + [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], + CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + // If more than will fit in registers, pass them on the stack instead. + CCIfType<[i1, i8], CCAssignToStack<1, 1>>, + CCIfType<[i16], CCAssignToStack<2, 2>>, + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8], + CCAssignToStack<8, 8>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>> +]>; + +def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ + CCIfType<[v2f32], CCBitConvertToType>, + CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, + + // Handle all scalar types as either i64 or f64. + CCIfType<[i8, i16, i32], CCPromoteToType>, + CCIfType<[f32], CCPromoteToType>, + + // Everything is on the stack. + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfType<[i64], CCIfSplit>>, + CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], CCAssignToStack<8, 8>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>> +]>; + +// The WebKit_JS calling convention only passes the first argument (the callee) +// in register and the remaining arguments on stack. We allow 32bit stack slots, +// so that WebKit can write partial values in the stack and define the other +// 32bit quantity as undef. +def CC_AArch64_WebKit_JS : CallingConv<[ + // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0). + CCIfType<[i1, i8, i16], CCIfUnallocated<"X0", CCPromoteToType>>, + CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>, + + // Pass the remaining arguments on the stack instead. + CCIfType<[i1, i8, i16], CCAssignToStack<4, 4>>, + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[i64, f64], CCAssignToStack<8, 8>> +]>; + +def RetCC_AArch64_WebKit_JS : CallingConv<[ + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], + [X0, X1, X2, X3, X4, X5, X6, X7]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], + [W0, W1, W2, W3, W4, W5, W6, W7]>>, + CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], + [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> +]>; + +// FIXME: LR is only callee-saved in the sense that *we* preserve it and are +// presumably a callee to someone. External functions may not do so, but this +// is currently safe since BL has LR as an implicit-def and what happens after a +// tail call doesn't matter. +// +// It would be better to model its preservation semantics properly (create a +// vreg on entry, use it in RET & tail call generation; make that vreg def if we +// end up saving LR as part of a call frame). Watch this space... +def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, + X23, X24, X25, X26, X27, X28, + D8, D9, D10, D11, + D12, D13, D14, D15)>; + +// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since +// 'this' and the pointer return value are both passed in X0 in these cases, +// this can be partially modelled by treating X0 as a callee-saved register; +// only the resulting RegMask is used; the SaveList is ignored +// +// (For generic ARM 64-bit ABI code, clang will not generate constructors or +// destructors with 'this' returns, so this RegMask will not be used in that +// case) +def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>; + +// The function used by Darwin to obtain the address of a thread-local variable +// guarantees more than a normal AAPCS function. x16 and x17 are used on the +// fast path for calculation, but other registers except X0 (argument/return) +// and LR (it is a call, after all) are preserved. +def CSR_AArch64_TLS_Darwin + : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17), + FP, + (sequence "Q%u", 0, 31))>; + +// The ELF stub used for TLS-descriptor access saves every feasible +// register. Only X0 and LR are clobbered. +def CSR_AArch64_TLS_ELF + : CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP, + (sequence "Q%u", 0, 31))>; + +def CSR_AArch64_AllRegs + : CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP, + (sequence "X%u", 0, 28), FP, LR, SP, + (sequence "B%u", 0, 31), (sequence "H%u", 0, 31), + (sequence "S%u", 0, 31), (sequence "D%u", 0, 31), + (sequence "Q%u", 0, 31))>; + diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp new file mode 100644 index 0000000..4d23dc5 --- /dev/null +++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp @@ -0,0 +1,147 @@ +//===-- AArch64CleanupLocalDynamicTLSPass.cpp ---------------------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Local-dynamic access to thread-local variables proceeds in three stages. +// +// 1. The offset of this Module's thread-local area from TPIDR_EL0 is calculated +// in much the same way as a general-dynamic TLS-descriptor access against +// the special symbol _TLS_MODULE_BASE. +// 2. The variable's offset from _TLS_MODULE_BASE_ is calculated using +// instructions with "dtprel" modifiers. +// 3. These two are added, together with TPIDR_EL0, to obtain the variable's +// true address. +// +// This is only better than general-dynamic access to the variable if two or +// more of the first stage TLS-descriptor calculations can be combined. This +// pass looks through a function and performs such combinations. +// +//===----------------------------------------------------------------------===// +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64TargetMachine.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +using namespace llvm; + +namespace { +struct LDTLSCleanup : public MachineFunctionPass { + static char ID; + LDTLSCleanup() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override { + AArch64FunctionInfo *AFI = MF.getInfo(); + if (AFI->getNumLocalDynamicTLSAccesses() < 2) { + // No point folding accesses if there isn't at least two. + return false; + } + + MachineDominatorTree *DT = &getAnalysis(); + return VisitNode(DT->getRootNode(), 0); + } + + // Visit the dominator subtree rooted at Node in pre-order. + // If TLSBaseAddrReg is non-null, then use that to replace any + // TLS_base_addr instructions. Otherwise, create the register + // when the first such instruction is seen, and then use it + // as we encounter more instructions. + bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { + MachineBasicBlock *BB = Node->getBlock(); + bool Changed = false; + + // Traverse the current block. + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; + ++I) { + switch (I->getOpcode()) { + case AArch64::TLSDESC_BLR: + // Make sure it's a local dynamic access. + if (!I->getOperand(1).isSymbol() || + strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) + break; + + if (TLSBaseAddrReg) + I = replaceTLSBaseAddrCall(I, TLSBaseAddrReg); + else + I = setRegister(I, &TLSBaseAddrReg); + Changed = true; + break; + default: + break; + } + } + + // Visit the children of this block in the dominator tree. + for (MachineDomTreeNode *N : *Node) { + Changed |= VisitNode(N, TLSBaseAddrReg); + } + + return Changed; + } + + // Replace the TLS_base_addr instruction I with a copy from + // TLSBaseAddrReg, returning the new instruction. + MachineInstr *replaceTLSBaseAddrCall(MachineInstr *I, + unsigned TLSBaseAddrReg) { + MachineFunction *MF = I->getParent()->getParent(); + const AArch64TargetMachine *TM = + static_cast(&MF->getTarget()); + const AArch64InstrInfo *TII = TM->getInstrInfo(); + + // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the + // code sequence assumes the address will be. + MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), + AArch64::X0).addReg(TLSBaseAddrReg); + + // Erase the TLS_base_addr instruction. + I->eraseFromParent(); + + return Copy; + } + + // Create a virtal register in *TLSBaseAddrReg, and populate it by + // inserting a copy instruction after I. Returns the new instruction. + MachineInstr *setRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { + MachineFunction *MF = I->getParent()->getParent(); + const AArch64TargetMachine *TM = + static_cast(&MF->getTarget()); + const AArch64InstrInfo *TII = TM->getInstrInfo(); + + // Create a virtual register for the TLS base address. + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass); + + // Insert a copy from X0 to TLSBaseAddrReg for later. + MachineInstr *Next = I->getNextNode(); + MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), + *TLSBaseAddrReg).addReg(AArch64::X0); + + return Copy; + } + + const char *getPassName() const override { + return "Local Dynamic TLS Access Clean-up"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} + +char LDTLSCleanup::ID = 0; +FunctionPass *llvm::createAArch64CleanupLocalDynamicTLSPass() { + return new LDTLSCleanup(); +} diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp new file mode 100644 index 0000000..6b1f096 --- /dev/null +++ b/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -0,0 +1,1117 @@ +//===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that collect the Linker Optimization Hint (LOH). +// This pass should be run at the very end of the compilation flow, just before +// assembly printer. +// To be useful for the linker, the LOH must be printed into the assembly file. +// +// A LOH describes a sequence of instructions that may be optimized by the +// linker. +// This same sequence cannot be optimized by the compiler because some of +// the information will be known at link time. +// For instance, consider the following sequence: +// L1: adrp xA, sym@PAGE +// L2: add xB, xA, sym@PAGEOFF +// L3: ldr xC, [xB, #imm] +// This sequence can be turned into: +// A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB: +// L3: ldr xC, sym+#imm +// It may also be turned into either the following more efficient +// code sequences: +// - If sym@PAGEOFF + #imm fits the encoding space of L3. +// L1: adrp xA, sym@PAGE +// L3: ldr xC, [xB, sym@PAGEOFF + #imm] +// - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB: +// L1: adr xA, sym +// L3: ldr xC, [xB, #imm] +// +// To be valid a LOH must meet all the requirements needed by all the related +// possible linker transformations. +// For instance, using the running example, the constraints to emit +// ".loh AdrpAddLdr" are: +// - L1, L2, and L3 instructions are of the expected type, i.e., +// respectively ADRP, ADD (immediate), and LD. +// - The result of L1 is used only by L2. +// - The register argument (xA) used in the ADD instruction is defined +// only by L1. +// - The result of L2 is used only by L3. +// - The base address (xB) in L3 is defined only L2. +// - The ADRP in L1 and the ADD in L2 must reference the same symbol using +// @PAGE/@PAGEOFF with no additional constants +// +// Currently supported LOHs are: +// * So called non-ADRP-related: +// - .loh AdrpAddLdr L1, L2, L3: +// L1: adrp xA, sym@PAGE +// L2: add xB, xA, sym@PAGEOFF +// L3: ldr xC, [xB, #imm] +// - .loh AdrpLdrGotLdr L1, L2, L3: +// L1: adrp xA, sym@GOTPAGE +// L2: ldr xB, [xA, sym@GOTPAGEOFF] +// L3: ldr xC, [xB, #imm] +// - .loh AdrpLdr L1, L3: +// L1: adrp xA, sym@PAGE +// L3: ldr xC, [xA, sym@PAGEOFF] +// - .loh AdrpAddStr L1, L2, L3: +// L1: adrp xA, sym@PAGE +// L2: add xB, xA, sym@PAGEOFF +// L3: str xC, [xB, #imm] +// - .loh AdrpLdrGotStr L1, L2, L3: +// L1: adrp xA, sym@GOTPAGE +// L2: ldr xB, [xA, sym@GOTPAGEOFF] +// L3: str xC, [xB, #imm] +// - .loh AdrpAdd L1, L2: +// L1: adrp xA, sym@PAGE +// L2: add xB, xA, sym@PAGEOFF +// For all these LOHs, L1, L2, L3 form a simple chain: +// L1 result is used only by L2 and L2 result by L3. +// L3 LOH-related argument is defined only by L2 and L2 LOH-related argument +// by L1. +// All these LOHs aim at using more efficient load/store patterns by folding +// some instructions used to compute the address directly into the load/store. +// +// * So called ADRP-related: +// - .loh AdrpAdrp L2, L1: +// L2: ADRP xA, sym1@PAGE +// L1: ADRP xA, sym2@PAGE +// L2 dominates L1 and xA is not redifined between L2 and L1 +// This LOH aims at getting rid of redundant ADRP instructions. +// +// The overall design for emitting the LOHs is: +// 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo. +// 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it: +// 1. Associates them a label. +// 2. Emits them in a MCStreamer (EmitLOHDirective). +// - The MCMachOStreamer records them into the MCAssembler. +// - The MCAsmStreamer prints them. +// - Other MCStreamers ignore them. +// 3. Closes the MCStreamer: +// - The MachObjectWriter gets them from the MCAssembler and writes +// them in the object file. +// - Other ObjectWriters ignore them. +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +#define DEBUG_TYPE "aarch64-collect-loh" + +static cl::opt +PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden, + cl::desc("Restrict analysis to registers invovled" + " in LOHs"), + cl::init(true)); + +static cl::opt +BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden, + cl::desc("Restrict analysis at basic block scope"), + cl::init(true)); + +STATISTIC(NumADRPSimpleCandidate, + "Number of simplifiable ADRP dominate by another"); +STATISTIC(NumADRPComplexCandidate2, + "Number of simplifiable ADRP reachable by 2 defs"); +STATISTIC(NumADRPComplexCandidate3, + "Number of simplifiable ADRP reachable by 3 defs"); +STATISTIC(NumADRPComplexCandidateOther, + "Number of simplifiable ADRP reachable by 4 or more defs"); +STATISTIC(NumADDToSTRWithImm, + "Number of simplifiable STR with imm reachable by ADD"); +STATISTIC(NumLDRToSTRWithImm, + "Number of simplifiable STR with imm reachable by LDR"); +STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD"); +STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR"); +STATISTIC(NumADDToLDRWithImm, + "Number of simplifiable LDR with imm reachable by ADD"); +STATISTIC(NumLDRToLDRWithImm, + "Number of simplifiable LDR with imm reachable by LDR"); +STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD"); +STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR"); +STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP"); +STATISTIC(NumCplxLvl1, "Number of complex case of level 1"); +STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1"); +STATISTIC(NumCplxLvl2, "Number of complex case of level 2"); +STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2"); +STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD"); +STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD"); + +namespace llvm { +void initializeAArch64CollectLOHPass(PassRegistry &); +} + +namespace { +struct AArch64CollectLOH : public MachineFunctionPass { + static char ID; + AArch64CollectLOH() : MachineFunctionPass(ID) { + initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "AArch64 Collect Linker Optimization Hint (LOH)"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + } + +private: +}; + +/// A set of MachineInstruction. +typedef SetVector SetOfMachineInstr; +/// Map a basic block to a set of instructions per register. +/// This is used to represent the exposed uses of a basic block +/// per register. +typedef MapVector +BlockToSetOfInstrsPerColor; +/// Map a basic block to an instruction per register. +/// This is used to represent the live-out definitions of a basic block +/// per register. +typedef MapVector +BlockToInstrPerColor; +/// Map an instruction to a set of instructions. Used to represent the +/// mapping def to reachable uses or use to definitions. +typedef MapVector InstrToInstrs; +/// Map a basic block to a BitVector. +/// This is used to record the kill registers per basic block. +typedef MapVector BlockToRegSet; + +/// Map a register to a dense id. +typedef DenseMap MapRegToId; +/// Map a dense id to a register. Used for debug purposes. +typedef SmallVector MapIdToReg; +} // end anonymous namespace. + +char AArch64CollectLOH::ID = 0; + +INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh", + "AArch64 Collect Linker Optimization Hint (LOH)", false, + false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh", + "AArch64 Collect Linker Optimization Hint (LOH)", false, + false) + +/// Given a couple (MBB, reg) get the corresponding set of instruction from +/// the given "sets". +/// If this couple does not reference any set, an empty set is added to "sets" +/// for this couple and returned. +/// \param nbRegs is used internally allocate some memory. It must be consistent +/// with the way sets is used. +static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets, + const MachineBasicBlock &MBB, unsigned reg, + unsigned nbRegs) { + SetOfMachineInstr *result; + BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB); + if (it != sets.end()) + result = it->second; + else + result = sets[&MBB] = new SetOfMachineInstr[nbRegs]; + + return result[reg]; +} + +/// Given a couple (reg, MI) get the corresponding set of instructions from the +/// the given "sets". +/// This is used to get the uses record in sets of a definition identified by +/// MI and reg, i.e., MI defines reg. +/// If the couple does not reference anything, an empty set is added to +/// "sets[reg]". +/// \pre set[reg] is valid. +static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg, + const MachineInstr &MI) { + return sets[reg][&MI]; +} + +/// Same as getUses but does not modify the input map: sets. +/// \return NULL if the couple (reg, MI) is not in sets. +static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg, + const MachineInstr &MI) { + InstrToInstrs::const_iterator Res = sets[reg].find(&MI); + if (Res != sets[reg].end()) + return &(Res->second); + return nullptr; +} + +/// Initialize the reaching definition algorithm: +/// For each basic block BB in MF, record: +/// - its kill set. +/// - its reachable uses (uses that are exposed to BB's predecessors). +/// - its the generated definitions. +/// \param DummyOp if not NULL, specifies a Dummy Operation to be added to +/// the list of uses of exposed defintions. +/// \param ADRPMode specifies to only consider ADRP instructions for generated +/// definition. It also consider definitions of ADRP instructions as uses and +/// ignore other uses. The ADRPMode is used to collect the information for LHO +/// that involve ADRP operation only. +static void initReachingDef(MachineFunction &MF, + InstrToInstrs *ColorOpToReachedUses, + BlockToInstrPerColor &Gen, BlockToRegSet &Kill, + BlockToSetOfInstrsPerColor &ReachableUses, + const MapRegToId &RegToId, + const MachineInstr *DummyOp, bool ADRPMode) { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + + unsigned NbReg = RegToId.size(); + + for (MachineBasicBlock &MBB : MF) { + const MachineInstr **&BBGen = Gen[&MBB]; + BBGen = new const MachineInstr *[NbReg]; + memset(BBGen, 0, sizeof(const MachineInstr *) * NbReg); + + BitVector &BBKillSet = Kill[&MBB]; + BBKillSet.resize(NbReg); + for (const MachineInstr &MI : MBB) { + bool IsADRP = MI.getOpcode() == AArch64::ADRP; + + // Process uses first. + if (IsADRP || !ADRPMode) + for (const MachineOperand &MO : MI.operands()) { + // Treat ADRP def as use, as the goal of the analysis is to find + // ADRP defs reached by other ADRP defs. + if (!MO.isReg() || (!ADRPMode && !MO.isUse()) || + (ADRPMode && (!IsADRP || !MO.isDef()))) + continue; + unsigned CurReg = MO.getReg(); + MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); + if (ItCurRegId == RegToId.end()) + continue; + CurReg = ItCurRegId->second; + + // if CurReg has not been defined, this use is reachable. + if (!BBGen[CurReg] && !BBKillSet.test(CurReg)) + getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI); + // current basic block definition for this color, if any, is in Gen. + if (BBGen[CurReg]) + getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI); + } + + // Process clobbers. + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isRegMask()) + continue; + // Clobbers kill the related colors. + const uint32_t *PreservedRegs = MO.getRegMask(); + + // Set generated regs. + for (const auto Entry : RegToId) { + unsigned Reg = Entry.second; + // Use the global register ID when querying APIs external to this + // pass. + if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) { + // Do not register clobbered definition for no ADRP. + // This definition is not used anyway (otherwise register + // allocation is wrong). + BBGen[Reg] = ADRPMode ? &MI : nullptr; + BBKillSet.set(Reg); + } + } + } + + // Process register defs. + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned CurReg = MO.getReg(); + MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); + if (ItCurRegId == RegToId.end()) + continue; + + for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) { + MapRegToId::const_iterator ItRegId = RegToId.find(*AI); + assert(ItRegId != RegToId.end() && + "Sub-register of an " + "involved register, not recorded as involved!"); + BBKillSet.set(ItRegId->second); + BBGen[ItRegId->second] = &MI; + } + BBGen[ItCurRegId->second] = &MI; + } + } + + // If we restrict our analysis to basic block scope, conservatively add a + // dummy + // use for each generated value. + if (!ADRPMode && DummyOp && !MBB.succ_empty()) + for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) + if (BBGen[CurReg]) + getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp); + } +} + +/// Reaching def core algorithm: +/// while an Out has changed +/// for each bb +/// for each color +/// In[bb][color] = U Out[bb.predecessors][color] +/// insert reachableUses[bb][color] in each in[bb][color] +/// op.reachedUses +/// +/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) +static void reachingDefAlgorithm(MachineFunction &MF, + InstrToInstrs *ColorOpToReachedUses, + BlockToSetOfInstrsPerColor &In, + BlockToSetOfInstrsPerColor &Out, + BlockToInstrPerColor &Gen, BlockToRegSet &Kill, + BlockToSetOfInstrsPerColor &ReachableUses, + unsigned NbReg) { + bool HasChanged; + do { + HasChanged = false; + for (MachineBasicBlock &MBB : MF) { + unsigned CurReg; + for (CurReg = 0; CurReg < NbReg; ++CurReg) { + SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg); + SetOfMachineInstr &BBReachableUses = + getSet(ReachableUses, MBB, CurReg, NbReg); + SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg); + unsigned Size = BBOutSet.size(); + // In[bb][color] = U Out[bb.predecessors][color] + for (MachineBasicBlock *PredMBB : MBB.predecessors()) { + SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg); + BBInSet.insert(PredOutSet.begin(), PredOutSet.end()); + } + // insert reachableUses[bb][color] in each in[bb][color] op.reachedses + for (const MachineInstr *MI : BBInSet) { + SetOfMachineInstr &OpReachedUses = + getUses(ColorOpToReachedUses, CurReg, *MI); + OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end()); + } + // Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) + if (!Kill[&MBB].test(CurReg)) + BBOutSet.insert(BBInSet.begin(), BBInSet.end()); + if (Gen[&MBB][CurReg]) + BBOutSet.insert(Gen[&MBB][CurReg]); + HasChanged |= BBOutSet.size() != Size; + } + } + } while (HasChanged); +} + +/// Release all memory dynamically allocated during the reaching +/// definition algorithm. +static void finitReachingDef(BlockToSetOfInstrsPerColor &In, + BlockToSetOfInstrsPerColor &Out, + BlockToInstrPerColor &Gen, + BlockToSetOfInstrsPerColor &ReachableUses) { + for (auto &IT : Out) + delete[] IT.second; + for (auto &IT : In) + delete[] IT.second; + for (auto &IT : ReachableUses) + delete[] IT.second; + for (auto &IT : Gen) + delete[] IT.second; +} + +/// Reaching definition algorithm. +/// \param MF function on which the algorithm will operate. +/// \param[out] ColorOpToReachedUses will contain the result of the reaching +/// def algorithm. +/// \param ADRPMode specify whether the reaching def algorithm should be tuned +/// for ADRP optimization. \see initReachingDef for more details. +/// \param DummyOp if not NULL, the algorithm will work at +/// basic block scope and will set for every exposed definition a use to +/// @p DummyOp. +/// \pre ColorOpToReachedUses is an array of at least number of registers of +/// InstrToInstrs. +static void reachingDef(MachineFunction &MF, + InstrToInstrs *ColorOpToReachedUses, + const MapRegToId &RegToId, bool ADRPMode = false, + const MachineInstr *DummyOp = nullptr) { + // structures: + // For each basic block. + // Out: a set per color of definitions that reach the + // out boundary of this block. + // In: Same as Out but for in boundary. + // Gen: generated color in this block (one operation per color). + // Kill: register set of killed color in this block. + // ReachableUses: a set per color of uses (operation) reachable + // for "In" definitions. + BlockToSetOfInstrsPerColor Out, In, ReachableUses; + BlockToInstrPerColor Gen; + BlockToRegSet Kill; + + // Initialize Gen, kill and reachableUses. + initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId, + DummyOp, ADRPMode); + + // Algo. + if (!DummyOp) + reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill, + ReachableUses, RegToId.size()); + + // finit. + finitReachingDef(In, Out, Gen, ReachableUses); +} + +#ifndef NDEBUG +/// print the result of the reaching definition algorithm. +static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses, + unsigned NbReg, const TargetRegisterInfo *TRI, + const MapIdToReg &IdToReg) { + unsigned CurReg; + for (CurReg = 0; CurReg < NbReg; ++CurReg) { + if (ColorOpToReachedUses[CurReg].empty()) + continue; + DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n"); + + for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) { + DEBUG(dbgs() << "Def:\n"); + DEBUG(DefsIt.first->print(dbgs())); + DEBUG(dbgs() << "Reachable uses:\n"); + for (const MachineInstr *MI : DefsIt.second) { + DEBUG(MI->print(dbgs())); + } + } + } +} +#endif // NDEBUG + +/// Answer the following question: Can Def be one of the definition +/// involved in a part of a LOH? +static bool canDefBePartOfLOH(const MachineInstr *Def) { + unsigned Opc = Def->getOpcode(); + // Accept ADRP, ADDLow and LOADGot. + switch (Opc) { + default: + return false; + case AArch64::ADRP: + return true; + case AArch64::ADDXri: + // Check immediate to see if the immediate is an address. + switch (Def->getOperand(2).getType()) { + default: + return false; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_BlockAddress: + return true; + } + case AArch64::LDRXui: + // Check immediate to see if the immediate is an address. + switch (Def->getOperand(2).getType()) { + default: + return false; + case MachineOperand::MO_GlobalAddress: + return true; + } + } + // Unreachable. + return false; +} + +/// Check whether the given instruction can the end of a LOH chain involving a +/// store. +static bool isCandidateStore(const MachineInstr *Instr) { + switch (Instr->getOpcode()) { + default: + return false; + case AArch64::STRBui: + case AArch64::STRHui: + case AArch64::STRWui: + case AArch64::STRXui: + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STRQui: + // In case we have str xA, [xA, #imm], this is two different uses + // of xA and we cannot fold, otherwise the xA stored may be wrong, + // even if #imm == 0. + if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg()) + return true; + } + return false; +} + +/// Given the result of a reaching definition algorithm in ColorOpToReachedUses, +/// Build the Use to Defs information and filter out obvious non-LOH candidates. +/// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions. +/// In non-ADRPMode, non-LOH candidates are "uses" with several definition, +/// i.e., no simple chain. +/// \param ADRPMode -- \see initReachingDef. +static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs, + const InstrToInstrs *ColorOpToReachedUses, + const MapRegToId &RegToId, + bool ADRPMode = false) { + + SetOfMachineInstr NotCandidate; + unsigned NbReg = RegToId.size(); + MapRegToId::const_iterator EndIt = RegToId.end(); + for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) { + // If this color is never defined, continue. + if (ColorOpToReachedUses[CurReg].empty()) + continue; + + for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) { + for (const MachineInstr *MI : DefsIt.second) { + const MachineInstr *Def = DefsIt.first; + MapRegToId::const_iterator It; + // if all the reaching defs are not adrp, this use will not be + // simplifiable. + if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) || + (!ADRPMode && !canDefBePartOfLOH(Def)) || + (!ADRPMode && isCandidateStore(MI) && + // store are LOH candidate iff the end of the chain is used as + // base. + ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt || + It->second != CurReg))) { + NotCandidate.insert(MI); + continue; + } + // Do not consider self reaching as a simplifiable case for ADRP. + if (!ADRPMode || MI != DefsIt.first) { + UseToReachingDefs[MI].insert(DefsIt.first); + // If UsesIt has several reaching definitions, it is not + // candidate for simplificaton in non-ADRPMode. + if (!ADRPMode && UseToReachingDefs[MI].size() > 1) + NotCandidate.insert(MI); + } + } + } + } + for (const MachineInstr *Elem : NotCandidate) { + DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n"); + // It would have been better if we could just remove the entry + // from the map. Because of that, we have to filter the garbage + // (second.empty) in the subsequence analysis. + UseToReachingDefs[Elem].clear(); + } +} + +/// Based on the use to defs information (in ADRPMode), compute the +/// opportunities of LOH ADRP-related. +static void computeADRP(const InstrToInstrs &UseToDefs, + AArch64FunctionInfo &AArch64FI, + const MachineDominatorTree *MDT) { + DEBUG(dbgs() << "*** Compute LOH for ADRP\n"); + for (const auto &Entry : UseToDefs) { + unsigned Size = Entry.second.size(); + if (Size == 0) + continue; + if (Size == 1) { + const MachineInstr *L2 = *Entry.second.begin(); + const MachineInstr *L1 = Entry.first; + if (!MDT->dominates(L2, L1)) { + DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1 + << '\n'); + continue; + } + DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n'); + SmallVector Args; + Args.push_back(L2); + Args.push_back(L1); + AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, Args); + ++NumADRPSimpleCandidate; + } +#ifdef DEBUG + else if (Size == 2) + ++NumADRPComplexCandidate2; + else if (Size == 3) + ++NumADRPComplexCandidate3; + else + ++NumADRPComplexCandidateOther; +#endif + // if Size < 1, the use should have been removed from the candidates + assert(Size >= 1 && "No reaching defs for that use!"); + } +} + +/// Check whether the given instruction can be the end of a LOH chain +/// involving a load. +static bool isCandidateLoad(const MachineInstr *Instr) { + switch (Instr->getOpcode()) { + default: + return false; + case AArch64::LDRSBWui: + case AArch64::LDRSBXui: + case AArch64::LDRSHWui: + case AArch64::LDRSHXui: + case AArch64::LDRSWui: + case AArch64::LDRBui: + case AArch64::LDRHui: + case AArch64::LDRWui: + case AArch64::LDRXui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT) + return false; + return true; + } + // Unreachable. + return false; +} + +/// Check whether the given instruction can load a litteral. +static bool supportLoadFromLiteral(const MachineInstr *Instr) { + switch (Instr->getOpcode()) { + default: + return false; + case AArch64::LDRSWui: + case AArch64::LDRWui: + case AArch64::LDRXui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + return true; + } + // Unreachable. + return false; +} + +/// Check whether the given instruction is a LOH candidate. +/// \param UseToDefs is used to check that Instr is at the end of LOH supported +/// chain. +/// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are +/// already been filtered out. +static bool isCandidate(const MachineInstr *Instr, + const InstrToInstrs &UseToDefs, + const MachineDominatorTree *MDT) { + if (!isCandidateLoad(Instr) && !isCandidateStore(Instr)) + return false; + + const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin(); + if (Def->getOpcode() != AArch64::ADRP) { + // At this point, Def is ADDXri or LDRXui of the right type of + // symbol, because we filtered out the uses that were not defined + // by these kind of instructions (+ ADRP). + + // Check if this forms a simple chain: each intermediate node must + // dominates the next one. + if (!MDT->dominates(Def, Instr)) + return false; + // Move one node up in the simple chain. + if (UseToDefs.find(Def) == + UseToDefs.end() + // The map may contain garbage we have to ignore. + || + UseToDefs.find(Def)->second.empty()) + return false; + Instr = Def; + Def = *UseToDefs.find(Def)->second.begin(); + } + // Check if we reached the top of the simple chain: + // - top is ADRP. + // - check the simple chain property: each intermediate node must + // dominates the next one. + if (Def->getOpcode() == AArch64::ADRP) + return MDT->dominates(Def, Instr); + return false; +} + +static bool registerADRCandidate(const MachineInstr &Use, + const InstrToInstrs &UseToDefs, + const InstrToInstrs *DefsPerColorToUses, + AArch64FunctionInfo &AArch64FI, + SetOfMachineInstr *InvolvedInLOHs, + const MapRegToId &RegToId) { + // Look for opportunities to turn ADRP -> ADD or + // ADRP -> LDR GOTPAGEOFF into ADR. + // If ADRP has more than one use. Give up. + if (Use.getOpcode() != AArch64::ADDXri && + (Use.getOpcode() != AArch64::LDRXui || + !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT))) + return false; + InstrToInstrs::const_iterator It = UseToDefs.find(&Use); + // The map may contain garbage that we need to ignore. + if (It == UseToDefs.end() || It->second.empty()) + return false; + const MachineInstr &Def = **It->second.begin(); + if (Def.getOpcode() != AArch64::ADRP) + return false; + // Check the number of users of ADRP. + const SetOfMachineInstr *Users = + getUses(DefsPerColorToUses, + RegToId.find(Def.getOperand(0).getReg())->second, Def); + if (Users->size() > 1) { + ++NumADRComplexCandidate; + return false; + } + ++NumADRSimpleCandidate; + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) && + "ADRP already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) && + "ADD already involved in LOH."); + DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n'); + + SmallVector Args; + Args.push_back(&Def); + Args.push_back(&Use); + + AArch64FI.addLOHDirective(Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd + : MCLOH_AdrpLdrGot, + Args); + return true; +} + +/// Based on the use to defs information (in non-ADRPMode), compute the +/// opportunities of LOH non-ADRP-related +static void computeOthers(const InstrToInstrs &UseToDefs, + const InstrToInstrs *DefsPerColorToUses, + AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId, + const MachineDominatorTree *MDT) { + SetOfMachineInstr *InvolvedInLOHs = nullptr; +#ifdef DEBUG + SetOfMachineInstr InvolvedInLOHsStorage; + InvolvedInLOHs = &InvolvedInLOHsStorage; +#endif // DEBUG + DEBUG(dbgs() << "*** Compute LOH for Others\n"); + // ADRP -> ADD/LDR -> LDR/STR pattern. + // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern. + + // FIXME: When the statistics are not important, + // This initial filtering loop can be merged into the next loop. + // Currently, we didn't do it to have the same code for both DEBUG and + // NDEBUG builds. Indeed, the iterator of the second loop would need + // to be changed. + SetOfMachineInstr PotentialCandidates; + SetOfMachineInstr PotentialADROpportunities; + for (auto &Use : UseToDefs) { + // If no definition is available, this is a non candidate. + if (Use.second.empty()) + continue; + // Keep only instructions that are load or store and at the end of + // a ADRP -> ADD/LDR/Nothing chain. + // We already filtered out the no-chain cases. + if (!isCandidate(Use.first, UseToDefs, MDT)) { + PotentialADROpportunities.insert(Use.first); + continue; + } + PotentialCandidates.insert(Use.first); + } + + // Make the following distinctions for statistics as the linker does + // know how to decode instructions: + // - ADD/LDR/Nothing make there different patterns. + // - LDR/STR make two different patterns. + // Hence, 6 - 1 base patterns. + // (because ADRP-> Nothing -> STR is not simplifiable) + + // The linker is only able to have a simple semantic, i.e., if pattern A + // do B. + // However, we want to see the opportunity we may miss if we were able to + // catch more complex cases. + + // PotentialCandidates are result of a chain ADRP -> ADD/LDR -> + // A potential candidate becomes a candidate, if its current immediate + // operand is zero and all nodes of the chain have respectively only one user +#ifdef DEBUG + SetOfMachineInstr DefsOfPotentialCandidates; +#endif + for (const MachineInstr *Candidate : PotentialCandidates) { + // Get the definition of the candidate i.e., ADD or LDR. + const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin(); + // Record the elements of the chain. + const MachineInstr *L1 = Def; + const MachineInstr *L2 = nullptr; + unsigned ImmediateDefOpc = Def->getOpcode(); + if (Def->getOpcode() != AArch64::ADRP) { + // Check the number of users of this node. + const SetOfMachineInstr *Users = + getUses(DefsPerColorToUses, + RegToId.find(Def->getOperand(0).getReg())->second, *Def); + if (Users->size() > 1) { +#ifdef DEBUG + // if all the uses of this def are in potential candidate, this is + // a complex candidate of level 2. + bool IsLevel2 = true; + for (const MachineInstr *MI : *Users) { + if (!PotentialCandidates.count(MI)) { + ++NumTooCplxLvl2; + IsLevel2 = false; + break; + } + } + if (IsLevel2) + ++NumCplxLvl2; +#endif // DEBUG + PotentialADROpportunities.insert(Def); + continue; + } + L2 = Def; + Def = *UseToDefs.find(Def)->second.begin(); + L1 = Def; + } // else the element in the middle of the chain is nothing, thus + // Def already contains the first element of the chain. + + // Check the number of users of the first node in the chain, i.e., ADRP + const SetOfMachineInstr *Users = + getUses(DefsPerColorToUses, + RegToId.find(Def->getOperand(0).getReg())->second, *Def); + if (Users->size() > 1) { +#ifdef DEBUG + // if all the uses of this def are in the defs of the potential candidate, + // this is a complex candidate of level 1 + if (DefsOfPotentialCandidates.empty()) { + // lazy init + DefsOfPotentialCandidates = PotentialCandidates; + for (const MachineInstr *Candidate : PotentialCandidates) { + if (!UseToDefs.find(Candidate)->second.empty()) + DefsOfPotentialCandidates.insert( + *UseToDefs.find(Candidate)->second.begin()); + } + } + bool Found = false; + for (auto &Use : *Users) { + if (!DefsOfPotentialCandidates.count(Use)) { + ++NumTooCplxLvl1; + Found = true; + break; + } + } + if (!Found) + ++NumCplxLvl1; +#endif // DEBUG + continue; + } + + bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri); + // If the chain is three instructions long and ldr is the second element, + // then this ldr must load form GOT, otherwise this is not a correct chain. + if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != AArch64II::MO_GOT) + continue; + SmallVector Args; + MCLOHType Kind; + if (isCandidateLoad(Candidate)) { + if (!L2) { + // At this point, the candidate LOH indicates that the ldr instruction + // may use a direct access to the symbol. There is not such encoding + // for loads of byte and half. + if (!supportLoadFromLiteral(Candidate)) + continue; + + DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate + << '\n'); + Kind = MCLOH_AdrpLdr; + Args.push_back(L1); + Args.push_back(Candidate); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && + "L1 already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && + "Candidate already involved in LOH."); + ++NumADRPToLDR; + } else { + DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") + << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate + << '\n'); + + Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr; + Args.push_back(L1); + Args.push_back(L2); + Args.push_back(Candidate); + + PotentialADROpportunities.remove(L2); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && + "L1 already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && + "L2 already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && + "Candidate already involved in LOH."); +#ifdef DEBUG + // get the immediate of the load + if (Candidate->getOperand(2).getImm() == 0) + if (ImmediateDefOpc == AArch64::ADDXri) + ++NumADDToLDR; + else + ++NumLDRToLDR; + else if (ImmediateDefOpc == AArch64::ADDXri) + ++NumADDToLDRWithImm; + else + ++NumLDRToLDRWithImm; +#endif // DEBUG + } + } else { + if (ImmediateDefOpc == AArch64::ADRP) + continue; + else { + + DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") + << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate + << '\n'); + + Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr; + Args.push_back(L1); + Args.push_back(L2); + Args.push_back(Candidate); + + PotentialADROpportunities.remove(L2); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && + "L1 already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && + "L2 already involved in LOH."); + assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && + "Candidate already involved in LOH."); +#ifdef DEBUG + // get the immediate of the store + if (Candidate->getOperand(2).getImm() == 0) + if (ImmediateDefOpc == AArch64::ADDXri) + ++NumADDToSTR; + else + ++NumLDRToSTR; + else if (ImmediateDefOpc == AArch64::ADDXri) + ++NumADDToSTRWithImm; + else + ++NumLDRToSTRWithImm; +#endif // DEBUG + } + } + AArch64FI.addLOHDirective(Kind, Args); + } + + // Now, we grabbed all the big patterns, check ADR opportunities. + for (const MachineInstr *Candidate : PotentialADROpportunities) + registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI, + InvolvedInLOHs, RegToId); +} + +/// Look for every register defined by potential LOHs candidates. +/// Map these registers with dense id in @p RegToId and vice-versa in +/// @p IdToReg. @p IdToReg is populated only in DEBUG mode. +static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId, + MapIdToReg &IdToReg, + const TargetRegisterInfo *TRI) { + unsigned CurRegId = 0; + if (!PreCollectRegister) { + unsigned NbReg = TRI->getNumRegs(); + for (; CurRegId < NbReg; ++CurRegId) { + RegToId[CurRegId] = CurRegId; + DEBUG(IdToReg.push_back(CurRegId)); + DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches")); + } + return; + } + + DEBUG(dbgs() << "** Collect Involved Register\n"); + for (const auto &MBB : MF) { + for (const MachineInstr &MI : MBB) { + if (!canDefBePartOfLOH(&MI)) + continue; + + // Process defs + for (MachineInstr::const_mop_iterator IO = MI.operands_begin(), + IOEnd = MI.operands_end(); + IO != IOEnd; ++IO) { + if (!IO->isReg() || !IO->isDef()) + continue; + unsigned CurReg = IO->getReg(); + for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) + if (RegToId.find(*AI) == RegToId.end()) { + DEBUG(IdToReg.push_back(*AI); + assert(IdToReg[CurRegId] == *AI && + "Reg index mismatches insertion index.")); + RegToId[*AI] = CurRegId++; + DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n'); + } + } + } + } +} + +bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) { + const TargetMachine &TM = MF.getTarget(); + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const MachineDominatorTree *MDT = &getAnalysis(); + + MapRegToId RegToId; + MapIdToReg IdToReg; + AArch64FunctionInfo *AArch64FI = MF.getInfo(); + assert(AArch64FI && "No MachineFunctionInfo for this function!"); + + DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n'); + + collectInvolvedReg(MF, RegToId, IdToReg, TRI); + if (RegToId.empty()) + return false; + + MachineInstr *DummyOp = nullptr; + if (BasicBlockScopeOnly) { + const AArch64InstrInfo *TII = + static_cast(TM.getInstrInfo()); + // For local analysis, create a dummy operation to record uses that are not + // local. + DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc()); + } + + unsigned NbReg = RegToId.size(); + bool Modified = false; + + // Start with ADRP. + InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg]; + + // Compute the reaching def in ADRP mode, meaning ADRP definitions + // are first considered as uses. + reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp); + DEBUG(dbgs() << "ADRP reaching defs\n"); + DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); + + // Translate the definition to uses map into a use to definitions map to ease + // statistic computation. + InstrToInstrs ADRPToReachingDefs; + reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true); + + // Compute LOH for ADRP. + computeADRP(ADRPToReachingDefs, *AArch64FI, MDT); + delete[] ColorOpToReachedUses; + + // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern. + ColorOpToReachedUses = new InstrToInstrs[NbReg]; + + // first perform a regular reaching def analysis. + reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp); + DEBUG(dbgs() << "All reaching defs\n"); + DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); + + // Turn that into a use to defs to ease statistic computation. + InstrToInstrs UsesToReachingDefs; + reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false); + + // Compute other than AdrpAdrp LOH. + computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId, + MDT); + delete[] ColorOpToReachedUses; + + if (BasicBlockScopeOnly) + MF.DeleteMachineInstr(DummyOp); + + return Modified; +} + +/// createAArch64CollectLOHPass - returns an instance of the Statistic for +/// linker optimization pass. +FunctionPass *llvm::createAArch64CollectLOHPass() { + return new AArch64CollectLOH(); +} diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp new file mode 100644 index 0000000..452cdec --- /dev/null +++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp @@ -0,0 +1,919 @@ +//===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AArch64ConditionalCompares pass which reduces +// branching and code size by using the conditional compare instructions CCMP, +// CCMN, and FCMP. +// +// The CFG transformations for forming conditional compares are very similar to +// if-conversion, and this pass should run immediately before the early +// if-conversion pass. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-ccmp" + +// Absolute maximum number of instructions allowed per speculated block. +// This bypasses all other heuristics, so it should be set fairly high. +static cl::opt BlockInstrLimit( + "aarch64-ccmp-limit", cl::init(30), cl::Hidden, + cl::desc("Maximum number of instructions per speculated block.")); + +// Stress testing mode - disable heuristics. +static cl::opt Stress("aarch64-stress-ccmp", cl::Hidden, + cl::desc("Turn all knobs to 11")); + +STATISTIC(NumConsidered, "Number of ccmps considered"); +STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)"); +STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)"); +STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)"); +STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)"); +STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)"); +STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)"); +STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)"); +STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)"); +STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)"); +STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)"); + +STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)"); + +STATISTIC(NumConverted, "Number of ccmp instructions created"); +STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted"); + +//===----------------------------------------------------------------------===// +// SSACCmpConv +//===----------------------------------------------------------------------===// +// +// The SSACCmpConv class performs ccmp-conversion on SSA form machine code +// after determining if it is possible. The class contains no heuristics; +// external code should be used to determine when ccmp-conversion is a good +// idea. +// +// CCmp-formation works on a CFG representing chained conditions, typically +// from C's short-circuit || and && operators: +// +// From: Head To: Head +// / | CmpBB +// / | / | +// | CmpBB / | +// | / | Tail | +// | / | | | +// Tail | | | +// | | | | +// ... ... ... ... +// +// The Head block is terminated by a br.cond instruction, and the CmpBB block +// contains compare + br.cond. Tail must be a successor of both. +// +// The cmp-conversion turns the compare instruction in CmpBB into a conditional +// compare, and merges CmpBB into Head, speculatively executing its +// instructions. The AArch64 conditional compare instructions have an immediate +// operand that specifies the NZCV flag values when the condition is false and +// the compare isn't executed. This makes it possible to chain compares with +// different condition codes. +// +// Example: +// +// if (a == 5 || b == 17) +// foo(); +// +// Head: +// cmp w0, #5 +// b.eq Tail +// CmpBB: +// cmp w1, #17 +// b.eq Tail +// ... +// Tail: +// bl _foo +// +// Becomes: +// +// Head: +// cmp w0, #5 +// ccmp w1, #17, 4, ne ; 4 = nZcv +// b.eq Tail +// ... +// Tail: +// bl _foo +// +// The ccmp condition code is the one that would cause the Head terminator to +// branch to CmpBB. +// +// FIXME: It should also be possible to speculate a block on the critical edge +// between Head and Tail, just like if-converting a diamond. +// +// FIXME: Handle PHIs in Tail by turning them into selects (if-conversion). + +namespace { +class SSACCmpConv { + MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + +public: + /// The first block containing a conditional branch, dominating everything + /// else. + MachineBasicBlock *Head; + + /// The block containing cmp+br.cond with a successor shared with Head. + MachineBasicBlock *CmpBB; + + /// The common successor for Head and CmpBB. + MachineBasicBlock *Tail; + + /// The compare instruction in CmpBB that can be converted to a ccmp. + MachineInstr *CmpMI; + +private: + /// The branch condition in Head as determined by AnalyzeBranch. + SmallVector HeadCond; + + /// The condition code that makes Head branch to CmpBB. + AArch64CC::CondCode HeadCmpBBCC; + + /// The branch condition in CmpBB. + SmallVector CmpBBCond; + + /// The condition code that makes CmpBB branch to Tail. + AArch64CC::CondCode CmpBBTailCC; + + /// Check if the Tail PHIs are trivially convertible. + bool trivialTailPHIs(); + + /// Remove CmpBB from the Tail PHIs. + void updateTailPHIs(); + + /// Check if an operand defining DstReg is dead. + bool isDeadDef(unsigned DstReg); + + /// Find the compare instruction in MBB that controls the conditional branch. + /// Return NULL if a convertible instruction can't be found. + MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB); + + /// Return true if all non-terminator instructions in MBB can be safely + /// speculated. + bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI); + +public: + /// runOnMachineFunction - Initialize per-function data structures. + void runOnMachineFunction(MachineFunction &MF) { + this->MF = &MF; + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + } + + /// If the sub-CFG headed by MBB can be cmp-converted, initialize the + /// internal state, and return true. + bool canConvert(MachineBasicBlock *MBB); + + /// Cmo-convert the last block passed to canConvertCmp(), assuming + /// it is possible. Add any erased blocks to RemovedBlocks. + void convert(SmallVectorImpl &RemovedBlocks); + + /// Return the expected code size delta if the conversion into a + /// conditional compare is performed. + int expectedCodeSizeDelta() const; +}; +} // end anonymous namespace + +// Check that all PHIs in Tail are selecting the same value from Head and CmpBB. +// This means that no if-conversion is required when merging CmpBB into Head. +bool SSACCmpConv::trivialTailPHIs() { + for (auto &I : *Tail) { + if (!I.isPHI()) + break; + unsigned HeadReg = 0, CmpBBReg = 0; + // PHI operands come in (VReg, MBB) pairs. + for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) { + MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB(); + unsigned Reg = I.getOperand(oi).getReg(); + if (MBB == Head) { + assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands"); + HeadReg = Reg; + } + if (MBB == CmpBB) { + assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands"); + CmpBBReg = Reg; + } + } + if (HeadReg != CmpBBReg) + return false; + } + return true; +} + +// Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply +// removing the CmpBB operands. The Head operands will be identical. +void SSACCmpConv::updateTailPHIs() { + for (auto &I : *Tail) { + if (!I.isPHI()) + break; + // I is a PHI. It can have multiple entries for CmpBB. + for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) { + // PHI operands are (Reg, MBB) at (oi-2, oi-1). + if (I.getOperand(oi - 1).getMBB() == CmpBB) { + I.RemoveOperand(oi - 1); + I.RemoveOperand(oi - 2); + } + } + } +} + +// This pass runs before the AArch64DeadRegisterDefinitions pass, so compares +// are still writing virtual registers without any uses. +bool SSACCmpConv::isDeadDef(unsigned DstReg) { + // Writes to the zero register are dead. + if (DstReg == AArch64::WZR || DstReg == AArch64::XZR) + return true; + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + return false; + // A virtual register def without any uses will be marked dead later, and + // eventually replaced by the zero register. + return MRI->use_nodbg_empty(DstReg); +} + +// Parse a condition code returned by AnalyzeBranch, and compute the CondCode +// corresponding to TBB. +// Return +static bool parseCond(ArrayRef Cond, AArch64CC::CondCode &CC) { + // A normal br.cond simply has the condition code. + if (Cond[0].getImm() != -1) { + assert(Cond.size() == 1 && "Unknown Cond array format"); + CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); + return true; + } + // For tbz and cbz instruction, the opcode is next. + switch (Cond[1].getImm()) { + default: + // This includes tbz / tbnz branches which can't be converted to + // ccmp + br.cond. + return false; + case AArch64::CBZW: + case AArch64::CBZX: + assert(Cond.size() == 3 && "Unknown Cond array format"); + CC = AArch64CC::EQ; + return true; + case AArch64::CBNZW: + case AArch64::CBNZX: + assert(Cond.size() == 3 && "Unknown Cond array format"); + CC = AArch64CC::NE; + return true; + } +} + +MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) { + MachineBasicBlock::iterator I = MBB->getFirstTerminator(); + if (I == MBB->end()) + return nullptr; + // The terminator must be controlled by the flags. + if (!I->readsRegister(AArch64::NZCV)) { + switch (I->getOpcode()) { + case AArch64::CBZW: + case AArch64::CBZX: + case AArch64::CBNZW: + case AArch64::CBNZX: + // These can be converted into a ccmp against #0. + return I; + } + ++NumCmpTermRejs; + DEBUG(dbgs() << "Flags not used by terminator: " << *I); + return nullptr; + } + + // Now find the instruction controlling the terminator. + for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) { + --I; + assert(!I->isTerminator() && "Spurious terminator"); + switch (I->getOpcode()) { + // cmp is an alias for subs with a dead destination register. + case AArch64::SUBSWri: + case AArch64::SUBSXri: + // cmn is an alias for adds with a dead destination register. + case AArch64::ADDSWri: + case AArch64::ADDSXri: + // Check that the immediate operand is within range, ccmp wants a uimm5. + // Rd = SUBSri Rn, imm, shift + if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) { + DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I); + ++NumImmRangeRejs; + return nullptr; + } + // Fall through. + case AArch64::SUBSWrr: + case AArch64::SUBSXrr: + case AArch64::ADDSWrr: + case AArch64::ADDSXrr: + if (isDeadDef(I->getOperand(0).getReg())) + return I; + DEBUG(dbgs() << "Can't convert compare with live destination: " << *I); + ++NumLiveDstRejs; + return nullptr; + case AArch64::FCMPSrr: + case AArch64::FCMPDrr: + case AArch64::FCMPESrr: + case AArch64::FCMPEDrr: + return I; + } + + // Check for flag reads and clobbers. + MIOperands::PhysRegInfo PRI = + MIOperands(I).analyzePhysReg(AArch64::NZCV, TRI); + + if (PRI.Reads) { + // The ccmp doesn't produce exactly the same flags as the original + // compare, so reject the transform if there are uses of the flags + // besides the terminators. + DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I); + ++NumMultNZCVUses; + return nullptr; + } + + if (PRI.Clobbers) { + DEBUG(dbgs() << "Not convertible compare: " << *I); + ++NumUnknNZCVDefs; + return nullptr; + } + } + DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n'); + return nullptr; +} + +/// Determine if all the instructions in MBB can safely +/// be speculated. The terminators are not considered. +/// +/// Only CmpMI is allowed to clobber the flags. +/// +bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB, + const MachineInstr *CmpMI) { + // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to + // get right. + if (!MBB->livein_empty()) { + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n"); + return false; + } + + unsigned InstrCount = 0; + + // Check all instructions, except the terminators. It is assumed that + // terminators never have side effects or define any used register values. + for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) { + if (I.isDebugValue()) + continue; + + if (++InstrCount > BlockInstrLimit && !Stress) { + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than " + << BlockInstrLimit << " instructions.\n"); + return false; + } + + // There shouldn't normally be any phis in a single-predecessor block. + if (I.isPHI()) { + DEBUG(dbgs() << "Can't hoist: " << I); + return false; + } + + // Don't speculate loads. Note that it may be possible and desirable to + // speculate GOT or constant pool loads that are guaranteed not to trap, + // but we don't support that for now. + if (I.mayLoad()) { + DEBUG(dbgs() << "Won't speculate load: " << I); + return false; + } + + // We never speculate stores, so an AA pointer isn't necessary. + bool DontMoveAcrossStore = true; + if (!I.isSafeToMove(TII, nullptr, DontMoveAcrossStore)) { + DEBUG(dbgs() << "Can't speculate: " << I); + return false; + } + + // Only CmpMI is allowed to clobber the flags. + if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) { + DEBUG(dbgs() << "Clobbers flags: " << I); + return false; + } + } + return true; +} + +/// Analyze the sub-cfg rooted in MBB, and return true if it is a potential +/// candidate for cmp-conversion. Fill out the internal state. +/// +bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) { + Head = MBB; + Tail = CmpBB = nullptr; + + if (Head->succ_size() != 2) + return false; + MachineBasicBlock *Succ0 = Head->succ_begin()[0]; + MachineBasicBlock *Succ1 = Head->succ_begin()[1]; + + // CmpBB can only have a single predecessor. Tail is allowed many. + if (Succ0->pred_size() != 1) + std::swap(Succ0, Succ1); + + // Succ0 is our candidate for CmpBB. + if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2) + return false; + + CmpBB = Succ0; + Tail = Succ1; + + if (!CmpBB->isSuccessor(Tail)) + return false; + + // The CFG topology checks out. + DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() << " -> BB#" + << CmpBB->getNumber() << " -> BB#" << Tail->getNumber() << '\n'); + ++NumConsidered; + + // Tail is allowed to have many predecessors, but we can't handle PHIs yet. + // + // FIXME: Real PHIs could be if-converted as long as the CmpBB values are + // defined before The CmpBB cmp clobbers the flags. Alternatively, it should + // always be safe to sink the ccmp down to immediately before the CmpBB + // terminators. + if (!trivialTailPHIs()) { + DEBUG(dbgs() << "Can't handle phis in Tail.\n"); + ++NumPhiRejs; + return false; + } + + if (!Tail->livein_empty()) { + DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n"); + ++NumPhysRejs; + return false; + } + + // CmpBB should never have PHIs since Head is its only predecessor. + // FIXME: Clean them up if it happens. + if (!CmpBB->empty() && CmpBB->front().isPHI()) { + DEBUG(dbgs() << "Can't handle phis in CmpBB.\n"); + ++NumPhi2Rejs; + return false; + } + + if (!CmpBB->livein_empty()) { + DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n"); + ++NumPhysRejs; + return false; + } + + // The branch we're looking to eliminate must be analyzable. + HeadCond.clear(); + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + if (TII->AnalyzeBranch(*Head, TBB, FBB, HeadCond)) { + DEBUG(dbgs() << "Head branch not analyzable.\n"); + ++NumHeadBranchRejs; + return false; + } + + // This is weird, probably some sort of degenerate CFG, or an edge to a + // landing pad. + if (!TBB || HeadCond.empty()) { + DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in Head.\n"); + ++NumHeadBranchRejs; + return false; + } + + if (!parseCond(HeadCond, HeadCmpBBCC)) { + DEBUG(dbgs() << "Unsupported branch type on Head\n"); + ++NumHeadBranchRejs; + return false; + } + + // Make sure the branch direction is right. + if (TBB != CmpBB) { + assert(TBB == Tail && "Unexpected TBB"); + HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC); + } + + CmpBBCond.clear(); + TBB = FBB = nullptr; + if (TII->AnalyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) { + DEBUG(dbgs() << "CmpBB branch not analyzable.\n"); + ++NumCmpBranchRejs; + return false; + } + + if (!TBB || CmpBBCond.empty()) { + DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in CmpBB.\n"); + ++NumCmpBranchRejs; + return false; + } + + if (!parseCond(CmpBBCond, CmpBBTailCC)) { + DEBUG(dbgs() << "Unsupported branch type on CmpBB\n"); + ++NumCmpBranchRejs; + return false; + } + + if (TBB != Tail) + CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC); + + DEBUG(dbgs() << "Head->CmpBB on " << AArch64CC::getCondCodeName(HeadCmpBBCC) + << ", CmpBB->Tail on " << AArch64CC::getCondCodeName(CmpBBTailCC) + << '\n'); + + CmpMI = findConvertibleCompare(CmpBB); + if (!CmpMI) + return false; + + if (!canSpeculateInstrs(CmpBB, CmpMI)) { + ++NumSpeculateRejs; + return false; + } + return true; +} + +void SSACCmpConv::convert(SmallVectorImpl &RemovedBlocks) { + DEBUG(dbgs() << "Merging BB#" << CmpBB->getNumber() << " into BB#" + << Head->getNumber() << ":\n" << *CmpBB); + + // All CmpBB instructions are moved into Head, and CmpBB is deleted. + // Update the CFG first. + updateTailPHIs(); + Head->removeSuccessor(CmpBB); + CmpBB->removeSuccessor(Tail); + Head->transferSuccessorsAndUpdatePHIs(CmpBB); + DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc(); + TII->RemoveBranch(*Head); + + // If the Head terminator was one of the cbz / tbz branches with built-in + // compare, we need to insert an explicit compare instruction in its place. + if (HeadCond[0].getImm() == -1) { + ++NumCompBranches; + unsigned Opc = 0; + switch (HeadCond[1].getImm()) { + case AArch64::CBZW: + case AArch64::CBNZW: + Opc = AArch64::SUBSWri; + break; + case AArch64::CBZX: + case AArch64::CBNZX: + Opc = AArch64::SUBSXri; + break; + default: + llvm_unreachable("Cannot convert Head branch"); + } + const MCInstrDesc &MCID = TII->get(Opc); + // Create a dummy virtual register for the SUBS def. + unsigned DestReg = + MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF)); + // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz. + BuildMI(*Head, Head->end(), TermDL, MCID) + .addReg(DestReg, RegState::Define | RegState::Dead) + .addOperand(HeadCond[2]) + .addImm(0) + .addImm(0); + // SUBS uses the GPR*sp register classes. + MRI->constrainRegClass(HeadCond[2].getReg(), + TII->getRegClass(MCID, 1, TRI, *MF)); + } + + Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end()); + + // Now replace CmpMI with a ccmp instruction that also considers the incoming + // flags. + unsigned Opc = 0; + unsigned FirstOp = 1; // First CmpMI operand to copy. + bool isZBranch = false; // CmpMI is a cbz/cbnz instruction. + switch (CmpMI->getOpcode()) { + default: + llvm_unreachable("Unknown compare opcode"); + case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break; + case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break; + case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break; + case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break; + case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break; + case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break; + case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break; + case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break; + case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break; + case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break; + case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break; + case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break; + case AArch64::CBZW: + case AArch64::CBNZW: + Opc = AArch64::CCMPWi; + FirstOp = 0; + isZBranch = true; + break; + case AArch64::CBZX: + case AArch64::CBNZX: + Opc = AArch64::CCMPXi; + FirstOp = 0; + isZBranch = true; + break; + } + + // The ccmp instruction should set the flags according to the comparison when + // Head would have branched to CmpBB. + // The NZCV immediate operand should provide flags for the case where Head + // would have branched to Tail. These flags should cause the new Head + // terminator to branch to tail. + unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC); + const MCInstrDesc &MCID = TII->get(Opc); + MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(), + TII->getRegClass(MCID, 0, TRI, *MF)); + if (CmpMI->getOperand(FirstOp + 1).isReg()) + MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(), + TII->getRegClass(MCID, 1, TRI, *MF)); + MachineInstrBuilder MIB = + BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID) + .addOperand(CmpMI->getOperand(FirstOp)); // Register Rn + if (isZBranch) + MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0 + else + MIB.addOperand(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate + MIB.addImm(NZCV).addImm(HeadCmpBBCC); + + // If CmpMI was a terminator, we need a new conditional branch to replace it. + // This now becomes a Head terminator. + if (isZBranch) { + bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW || + CmpMI->getOpcode() == AArch64::CBNZX; + BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc)) + .addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ) + .addOperand(CmpMI->getOperand(1)); // Branch target. + } + CmpMI->eraseFromParent(); + Head->updateTerminator(); + + RemovedBlocks.push_back(CmpBB); + CmpBB->eraseFromParent(); + DEBUG(dbgs() << "Result:\n" << *Head); + ++NumConverted; +} + +int SSACCmpConv::expectedCodeSizeDelta() const { + int delta = 0; + // If the Head terminator was one of the cbz / tbz branches with built-in + // compare, we need to insert an explicit compare instruction in its place + // plus a branch instruction. + if (HeadCond[0].getImm() == -1) { + switch (HeadCond[1].getImm()) { + case AArch64::CBZW: + case AArch64::CBNZW: + case AArch64::CBZX: + case AArch64::CBNZX: + // Therefore delta += 1 + delta = 1; + break; + default: + llvm_unreachable("Cannot convert Head branch"); + } + } + // If the Cmp terminator was one of the cbz / tbz branches with + // built-in compare, it will be turned into a compare instruction + // into Head, but we do not save any instruction. + // Otherwise, we save the branch instruction. + switch (CmpMI->getOpcode()) { + default: + --delta; + break; + case AArch64::CBZW: + case AArch64::CBNZW: + case AArch64::CBZX: + case AArch64::CBNZX: + break; + } + return delta; +} + +//===----------------------------------------------------------------------===// +// AArch64ConditionalCompares Pass +//===----------------------------------------------------------------------===// + +namespace { +class AArch64ConditionalCompares : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const MCSchedModel *SchedModel; + // Does the proceeded function has Oz attribute. + bool MinSize; + MachineRegisterInfo *MRI; + MachineDominatorTree *DomTree; + MachineLoopInfo *Loops; + MachineTraceMetrics *Traces; + MachineTraceMetrics::Ensemble *MinInstr; + SSACCmpConv CmpConv; + +public: + static char ID; + AArch64ConditionalCompares() : MachineFunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + const char *getPassName() const override { + return "AArch64 Conditional Compares"; + } + +private: + bool tryConvert(MachineBasicBlock *); + void updateDomTree(ArrayRef Removed); + void updateLoops(ArrayRef Removed); + void invalidateTraces(); + bool shouldConvert(); +}; +} // end anonymous namespace + +char AArch64ConditionalCompares::ID = 0; + +namespace llvm { +void initializeAArch64ConditionalComparesPass(PassRegistry &); +} + +INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp", + "AArch64 CCMP Pass", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) +INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp", + "AArch64 CCMP Pass", false, false) + +FunctionPass *llvm::createAArch64ConditionalCompares() { + return new AArch64ConditionalCompares(); +} + +void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// Update the dominator tree after if-conversion erased some blocks. +void AArch64ConditionalCompares::updateDomTree( + ArrayRef Removed) { + // convert() removes CmpBB which was previously dominated by Head. + // CmpBB children should be transferred to Head. + MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head); + for (unsigned i = 0, e = Removed.size(); i != e; ++i) { + MachineDomTreeNode *Node = DomTree->getNode(Removed[i]); + assert(Node != HeadNode && "Cannot erase the head node"); + assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head"); + while (Node->getNumChildren()) + DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode); + DomTree->eraseNode(Removed[i]); + } +} + +/// Update LoopInfo after if-conversion. +void +AArch64ConditionalCompares::updateLoops(ArrayRef Removed) { + if (!Loops) + return; + for (unsigned i = 0, e = Removed.size(); i != e; ++i) + Loops->removeBlock(Removed[i]); +} + +/// Invalidate MachineTraceMetrics before if-conversion. +void AArch64ConditionalCompares::invalidateTraces() { + Traces->invalidate(CmpConv.Head); + Traces->invalidate(CmpConv.CmpBB); +} + +/// Apply cost model and heuristics to the if-conversion in IfConv. +/// Return true if the conversion is a good idea. +/// +bool AArch64ConditionalCompares::shouldConvert() { + // Stress testing mode disables all cost considerations. + if (Stress) + return true; + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + + // Head dominates CmpBB, so it is always included in its trace. + MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB); + + // If code size is the main concern + if (MinSize) { + int CodeSizeDelta = CmpConv.expectedCodeSizeDelta(); + DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n'); + // If we are minimizing the code size, do the conversion whatever + // the cost is. + if (CodeSizeDelta < 0) + return true; + if (CodeSizeDelta > 0) { + DEBUG(dbgs() << "Code size is increasing, give up on this one.\n"); + return false; + } + // CodeSizeDelta == 0, continue with the regular heuristics + } + + // Heuristic: The compare conversion delays the execution of the branch + // instruction because we must wait for the inputs to the second compare as + // well. The branch has no dependent instructions, but delaying it increases + // the cost of a misprediction. + // + // Set a limit on the delay we will accept. + unsigned DelayLimit = SchedModel->MispredictPenalty * 3 / 4; + + // Instruction depths can be computed for all trace instructions above CmpBB. + unsigned HeadDepth = + Trace.getInstrCycles(CmpConv.Head->getFirstTerminator()).Depth; + unsigned CmpBBDepth = + Trace.getInstrCycles(CmpConv.CmpBB->getFirstTerminator()).Depth; + DEBUG(dbgs() << "Head depth: " << HeadDepth + << "\nCmpBB depth: " << CmpBBDepth << '\n'); + if (CmpBBDepth > HeadDepth + DelayLimit) { + DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit + << " cycles.\n"); + return false; + } + + // Check the resource depth at the bottom of CmpBB - these instructions will + // be speculated. + unsigned ResDepth = Trace.getResourceDepth(true); + DEBUG(dbgs() << "Resources: " << ResDepth << '\n'); + + // Heuristic: The speculatively executed instructions must all be able to + // merge into the Head block. The Head critical path should dominate the + // resource cost of the speculated instructions. + if (ResDepth > HeadDepth) { + DEBUG(dbgs() << "Too many instructions to speculate.\n"); + return false; + } + return true; +} + +bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) { + bool Changed = false; + while (CmpConv.canConvert(MBB) && shouldConvert()) { + invalidateTraces(); + SmallVector RemovedBlocks; + CmpConv.convert(RemovedBlocks); + Changed = true; + updateDomTree(RemovedBlocks); + updateLoops(RemovedBlocks); + } + return Changed; +} + +bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n" + << "********** Function: " << MF.getName() << '\n'); + TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); + SchedModel = + MF.getTarget().getSubtarget().getSchedModel(); + MRI = &MF.getRegInfo(); + DomTree = &getAnalysis(); + Loops = getAnalysisIfAvailable(); + Traces = &getAnalysis(); + MinInstr = nullptr; + MinSize = MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::MinSize); + + bool Changed = false; + CmpConv.runOnMachineFunction(MF); + + // Visit blocks in dominator tree pre-order. The pre-order enables multiple + // cmp-conversions from the same head block. + // Note that updateDomTree() modifies the children of the DomTree node + // currently being visited. The df_iterator supports that; it doesn't look at + // child_begin() / child_end() until after a node has been visited. + for (auto *I : depth_first(DomTree)) + if (tryConvert(I->getBlock())) + Changed = true; + + return Changed; +} diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp new file mode 100644 index 0000000..a2d853c --- /dev/null +++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -0,0 +1,134 @@ +//==-- AArch64DeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// When allowed by the instruction, replace a dead definition of a GPR with +// the zero register. This makes the code a bit friendlier towards the +// hardware's register renamer. +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64RegisterInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "aarch64-dead-defs" + +STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced"); + +namespace { +class AArch64DeadRegisterDefinitions : public MachineFunctionPass { +private: + const TargetRegisterInfo *TRI; + bool implicitlyDefinesOverlappingReg(unsigned Reg, const MachineInstr &MI); + bool processMachineBasicBlock(MachineBasicBlock &MBB); + bool usesFrameIndex(const MachineInstr &MI); +public: + static char ID; // Pass identification, replacement for typeid. + explicit AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &F) override; + + const char *getPassName() const override { return "Dead register definitions"; } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +char AArch64DeadRegisterDefinitions::ID = 0; +} // end anonymous namespace + +bool AArch64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg( + unsigned Reg, const MachineInstr &MI) { + for (const MachineOperand &MO : MI.implicit_operands()) + if (MO.isReg() && MO.isDef()) + if (TRI->regsOverlap(Reg, MO.getReg())) + return true; + return false; +} + +bool AArch64DeadRegisterDefinitions::usesFrameIndex(const MachineInstr &MI) { + for (const MachineOperand &Op : MI.uses()) + if (Op.isFI()) + return true; + return false; +} + +bool AArch64DeadRegisterDefinitions::processMachineBasicBlock( + MachineBasicBlock &MBB) { + bool Changed = false; + for (MachineInstr &MI : MBB) { + if (usesFrameIndex(MI)) { + // We need to skip this instruction because while it appears to have a + // dead def it uses a frame index which might expand into a multi + // instruction sequence during EPI. + DEBUG(dbgs() << " Ignoring, operand is frame index\n"); + continue; + } + for (int i = 0, e = MI.getDesc().getNumDefs(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (MO.isReg() && MO.isDead() && MO.isDef()) { + assert(!MO.isImplicit() && "Unexpected implicit def!"); + DEBUG(dbgs() << " Dead def operand #" << i << " in:\n "; + MI.print(dbgs())); + // Be careful not to change the register if it's a tied operand. + if (MI.isRegTiedToUseOperand(i)) { + DEBUG(dbgs() << " Ignoring, def is tied operand.\n"); + continue; + } + // Don't change the register if there's an implicit def of a subreg or + // supperreg. + if (implicitlyDefinesOverlappingReg(MO.getReg(), MI)) { + DEBUG(dbgs() << " Ignoring, implicitly defines overlap reg.\n"); + continue; + } + // Make sure the instruction take a register class that contains + // the zero register and replace it if so. + unsigned NewReg; + switch (MI.getDesc().OpInfo[i].RegClass) { + default: + DEBUG(dbgs() << " Ignoring, register is not a GPR.\n"); + continue; + case AArch64::GPR32RegClassID: + NewReg = AArch64::WZR; + break; + case AArch64::GPR64RegClassID: + NewReg = AArch64::XZR; + break; + } + DEBUG(dbgs() << " Replacing with zero register. New:\n "); + MO.setReg(NewReg); + DEBUG(MI.print(dbgs())); + ++NumDeadDefsReplaced; + } + } + } + return Changed; +} + +// Scan the function for instructions that have a dead definition of a +// register. Replace that register with the zero register when possible. +bool AArch64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) { + TRI = MF.getTarget().getRegisterInfo(); + bool Changed = false; + DEBUG(dbgs() << "***** AArch64DeadRegisterDefinitions *****\n"); + + for (auto &MBB : MF) + if (processMachineBasicBlock(MBB)) + Changed = true; + return Changed; +} + +FunctionPass *llvm::createAArch64DeadRegisterDefinitions() { + return new AArch64DeadRegisterDefinitions(); +} diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp new file mode 100644 index 0000000..a76fd76 --- /dev/null +++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -0,0 +1,749 @@ +//==-- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expands pseudo instructions into target +// instructions to allow proper scheduling and other late optimizations. This +// pass should be run after register allocation but before the post-regalloc +// scheduling pass. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "AArch64InstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/MathExtras.h" +using namespace llvm; + +namespace { +class AArch64ExpandPseudo : public MachineFunctionPass { +public: + static char ID; + AArch64ExpandPseudo() : MachineFunctionPass(ID) {} + + const AArch64InstrInfo *TII; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + const char *getPassName() const override { + return "AArch64 pseudo instruction expansion pass"; + } + +private: + bool expandMBB(MachineBasicBlock &MBB); + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); + bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + unsigned BitSize); +}; +char AArch64ExpandPseudo::ID = 0; +} + +/// \brief Transfer implicit operands on the pseudo instruction to the +/// instructions created from the expansion. +static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, + MachineInstrBuilder &DefMI) { + const MCInstrDesc &Desc = OldMI.getDesc(); + for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e; + ++i) { + const MachineOperand &MO = OldMI.getOperand(i); + assert(MO.isReg() && MO.getReg()); + if (MO.isUse()) + UseMI.addOperand(MO); + else + DefMI.addOperand(MO); + } +} + +/// \brief Helper function which extracts the specified 16-bit chunk from a +/// 64-bit value. +static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) { + assert(ChunkIdx < 4 && "Out of range chunk index specified!"); + + return (Imm >> (ChunkIdx * 16)) & 0xFFFF; +} + +/// \brief Helper function which replicates a 16-bit chunk within a 64-bit +/// value. Indices correspond to element numbers in a v4i16. +static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) { + assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!"); + const unsigned ShiftAmt = ToIdx * 16; + + // Replicate the source chunk to the destination position. + const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt; + // Clear the destination chunk. + Imm &= ~(0xFFFFLL << ShiftAmt); + // Insert the replicated chunk. + return Imm | Chunk; +} + +/// \brief Helper function which tries to materialize a 64-bit value with an +/// ORR + MOVK instruction sequence. +static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + const AArch64InstrInfo *TII, unsigned ChunkIdx) { + assert(ChunkIdx < 4 && "Out of range chunk index specified!"); + const unsigned ShiftAmt = ChunkIdx * 16; + + uint64_t Encoding; + if (AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) { + // Create the ORR-immediate instruction. + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) + .addOperand(MI.getOperand(0)) + .addReg(AArch64::XZR) + .addImm(Encoding); + + // Create the MOVK instruction. + const unsigned Imm16 = getChunk(UImm, ChunkIdx); + const unsigned DstReg = MI.getOperand(0).getReg(); + const bool DstIsDead = MI.getOperand(0).isDead(); + MachineInstrBuilder MIB1 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg) + .addImm(Imm16) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); + + transferImpOps(MI, MIB, MIB1); + MI.eraseFromParent(); + return true; + } + + return false; +} + +/// \brief Check whether the given 16-bit chunk replicated to full 64-bit width +/// can be materialized with an ORR instruction. +static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) { + Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk; + + return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding); +} + +/// \brief Check for identical 16-bit chunks within the constant and if so +/// materialize them with a single ORR instruction. The remaining one or two +/// 16-bit chunks will be materialized with MOVK instructions. +/// +/// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order +/// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with +/// an ORR instruction. +/// +static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + const AArch64InstrInfo *TII) { + typedef DenseMap CountMap; + CountMap Counts; + + // Scan the constant and count how often every chunk occurs. + for (unsigned Idx = 0; Idx < 4; ++Idx) + ++Counts[getChunk(UImm, Idx)]; + + // Traverse the chunks to find one which occurs more than once. + for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end(); + Chunk != End; ++Chunk) { + const uint64_t ChunkVal = Chunk->first; + const unsigned Count = Chunk->second; + + uint64_t Encoding = 0; + + // We are looking for chunks which have two or three instances and can be + // materialized with an ORR instruction. + if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding)) + continue; + + const bool CountThree = Count == 3; + // Create the ORR-immediate instruction. + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) + .addOperand(MI.getOperand(0)) + .addReg(AArch64::XZR) + .addImm(Encoding); + + const unsigned DstReg = MI.getOperand(0).getReg(); + const bool DstIsDead = MI.getOperand(0).isDead(); + + unsigned ShiftAmt = 0; + uint64_t Imm16 = 0; + // Find the first chunk not materialized with the ORR instruction. + for (; ShiftAmt < 64; ShiftAmt += 16) { + Imm16 = (UImm >> ShiftAmt) & 0xFFFF; + + if (Imm16 != ChunkVal) + break; + } + + // Create the first MOVK instruction. + MachineInstrBuilder MIB1 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) + .addReg(DstReg, + RegState::Define | getDeadRegState(DstIsDead && CountThree)) + .addReg(DstReg) + .addImm(Imm16) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); + + // In case we have three instances the whole constant is now materialized + // and we can exit. + if (CountThree) { + transferImpOps(MI, MIB, MIB1); + MI.eraseFromParent(); + return true; + } + + // Find the remaining chunk which needs to be materialized. + for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) { + Imm16 = (UImm >> ShiftAmt) & 0xFFFF; + + if (Imm16 != ChunkVal) + break; + } + + // Create the second MOVK instruction. + MachineInstrBuilder MIB2 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg) + .addImm(Imm16) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt)); + + transferImpOps(MI, MIB, MIB2); + MI.eraseFromParent(); + return true; + } + + return false; +} + +/// \brief Check whether this chunk matches the pattern '1...0...'. This pattern +/// starts a contiguous sequence of ones if we look at the bits from the LSB +/// towards the MSB. +static bool isStartChunk(uint64_t Chunk) { + if (Chunk == 0 || Chunk == UINT64_MAX) + return false; + + return (CountLeadingOnes_64(Chunk) + countTrailingZeros(Chunk)) == 64; +} + +/// \brief Check whether this chunk matches the pattern '0...1...' This pattern +/// ends a contiguous sequence of ones if we look at the bits from the LSB +/// towards the MSB. +static bool isEndChunk(uint64_t Chunk) { + if (Chunk == 0 || Chunk == UINT64_MAX) + return false; + + return (countLeadingZeros(Chunk) + CountTrailingOnes_64(Chunk)) == 64; +} + +/// \brief Clear or set all bits in the chunk at the given index. +static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) { + const uint64_t Mask = 0xFFFF; + + if (Clear) + // Clear chunk in the immediate. + Imm &= ~(Mask << (Idx * 16)); + else + // Set all bits in the immediate for the particular chunk. + Imm |= Mask << (Idx * 16); + + return Imm; +} + +/// \brief Check whether the constant contains a sequence of contiguous ones, +/// which might be interrupted by one or two chunks. If so, materialize the +/// sequence of contiguous ones with an ORR instruction. +/// Materialize the chunks which are either interrupting the sequence or outside +/// of the sequence with a MOVK instruction. +/// +/// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk +/// which ends the sequence (0...1...). Then we are looking for constants which +/// contain at least one S and E chunk. +/// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|. +/// +/// We are also looking for constants like |S|A|B|E| where the contiguous +/// sequence of ones wraps around the MSB into the LSB. +/// +static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + const AArch64InstrInfo *TII) { + const int NotSet = -1; + const uint64_t Mask = 0xFFFF; + + int StartIdx = NotSet; + int EndIdx = NotSet; + // Try to find the chunks which start/end a contiguous sequence of ones. + for (int Idx = 0; Idx < 4; ++Idx) { + int64_t Chunk = getChunk(UImm, Idx); + // Sign extend the 16-bit chunk to 64-bit. + Chunk = (Chunk << 48) >> 48; + + if (isStartChunk(Chunk)) + StartIdx = Idx; + else if (isEndChunk(Chunk)) + EndIdx = Idx; + } + + // Early exit in case we can't find a start/end chunk. + if (StartIdx == NotSet || EndIdx == NotSet) + return false; + + // Outside of the contiguous sequence of ones everything needs to be zero. + uint64_t Outside = 0; + // Chunks between the start and end chunk need to have all their bits set. + uint64_t Inside = Mask; + + // If our contiguous sequence of ones wraps around from the MSB into the LSB, + // just swap indices and pretend we are materializing a contiguous sequence + // of zeros surrounded by a contiguous sequence of ones. + if (StartIdx > EndIdx) { + std::swap(StartIdx, EndIdx); + std::swap(Outside, Inside); + } + + uint64_t OrrImm = UImm; + int FirstMovkIdx = NotSet; + int SecondMovkIdx = NotSet; + + // Find out which chunks we need to patch up to obtain a contiguous sequence + // of ones. + for (int Idx = 0; Idx < 4; ++Idx) { + const uint64_t Chunk = getChunk(UImm, Idx); + + // Check whether we are looking at a chunk which is not part of the + // contiguous sequence of ones. + if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) { + OrrImm = updateImm(OrrImm, Idx, Outside == 0); + + // Remember the index we need to patch. + if (FirstMovkIdx == NotSet) + FirstMovkIdx = Idx; + else + SecondMovkIdx = Idx; + + // Check whether we are looking a chunk which is part of the contiguous + // sequence of ones. + } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) { + OrrImm = updateImm(OrrImm, Idx, Inside != Mask); + + // Remember the index we need to patch. + if (FirstMovkIdx == NotSet) + FirstMovkIdx = Idx; + else + SecondMovkIdx = Idx; + } + } + assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!"); + + // Create the ORR-immediate instruction. + uint64_t Encoding = 0; + AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding); + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri)) + .addOperand(MI.getOperand(0)) + .addReg(AArch64::XZR) + .addImm(Encoding); + + const unsigned DstReg = MI.getOperand(0).getReg(); + const bool DstIsDead = MI.getOperand(0).isDead(); + + const bool SingleMovk = SecondMovkIdx == NotSet; + // Create the first MOVK instruction. + MachineInstrBuilder MIB1 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) + .addReg(DstReg, + RegState::Define | getDeadRegState(DstIsDead && SingleMovk)) + .addReg(DstReg) + .addImm(getChunk(UImm, FirstMovkIdx)) + .addImm( + AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16)); + + // Early exit in case we only need to emit a single MOVK instruction. + if (SingleMovk) { + transferImpOps(MI, MIB, MIB1); + MI.eraseFromParent(); + return true; + } + + // Create the second MOVK instruction. + MachineInstrBuilder MIB2 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg) + .addImm(getChunk(UImm, SecondMovkIdx)) + .addImm( + AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16)); + + transferImpOps(MI, MIB, MIB2); + MI.eraseFromParent(); + return true; +} + +/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more +/// real move-immediate instructions to synthesize the immediate. +bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned BitSize) { + MachineInstr &MI = *MBBI; + uint64_t Imm = MI.getOperand(1).getImm(); + const unsigned Mask = 0xFFFF; + + // Try a MOVI instruction (aka ORR-immediate with the zero register). + uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); + uint64_t Encoding; + if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { + unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri); + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) + .addOperand(MI.getOperand(0)) + .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) + .addImm(Encoding); + transferImpOps(MI, MIB, MIB); + MI.eraseFromParent(); + return true; + } + + // Scan the immediate and count the number of 16-bit chunks which are either + // all ones or all zeros. + unsigned OneChunks = 0; + unsigned ZeroChunks = 0; + for (unsigned Shift = 0; Shift < BitSize; Shift += 16) { + const unsigned Chunk = (Imm >> Shift) & Mask; + if (Chunk == Mask) + OneChunks++; + else if (Chunk == 0) + ZeroChunks++; + } + + // Since we can't materialize the constant with a single ORR instruction, + // let's see whether we can materialize 3/4 of the constant with an ORR + // instruction and use an additional MOVK instruction to materialize the + // remaining 1/4. + // + // We are looking for constants with a pattern like: |A|X|B|X| or |X|A|X|B|. + // + // E.g. assuming |A|X|A|X| is a pattern which can be materialized with ORR, + // we would create the following instruction sequence: + // + // ORR x0, xzr, |A|X|A|X| + // MOVK x0, |B|, LSL #16 + // + // Only look at 64-bit constants which can't be materialized with a single + // instruction e.g. which have less than either three all zero or all one + // chunks. + // + // Ignore 32-bit constants here, they always can be materialized with a + // MOVZ/MOVN + MOVK pair. Since the 32-bit constant can't be materialized + // with a single ORR, the best sequence we can achieve is a ORR + MOVK pair. + // Thus we fall back to the default code below which in the best case creates + // a single MOVZ/MOVN instruction (in case one chunk is all zero or all one). + // + if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) { + // If we interpret the 64-bit constant as a v4i16, are elements 0 and 2 + // identical? + if (getChunk(UImm, 0) == getChunk(UImm, 2)) { + // See if we can come up with a constant which can be materialized with + // ORR-immediate by replicating element 3 into element 1. + uint64_t OrrImm = replicateChunk(UImm, 3, 1); + if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1)) + return true; + + // See if we can come up with a constant which can be materialized with + // ORR-immediate by replicating element 1 into element 3. + OrrImm = replicateChunk(UImm, 1, 3); + if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3)) + return true; + + // If we interpret the 64-bit constant as a v4i16, are elements 1 and 3 + // identical? + } else if (getChunk(UImm, 1) == getChunk(UImm, 3)) { + // See if we can come up with a constant which can be materialized with + // ORR-immediate by replicating element 2 into element 0. + uint64_t OrrImm = replicateChunk(UImm, 2, 0); + if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0)) + return true; + + // See if we can come up with a constant which can be materialized with + // ORR-immediate by replicating element 1 into element 3. + OrrImm = replicateChunk(UImm, 0, 2); + if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2)) + return true; + } + } + + // Check for identical 16-bit chunks within the constant and if so materialize + // them with a single ORR instruction. The remaining one or two 16-bit chunks + // will be materialized with MOVK instructions. + if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII)) + return true; + + // Check whether the constant contains a sequence of contiguous ones, which + // might be interrupted by one or two chunks. If so, materialize the sequence + // of contiguous ones with an ORR instruction. Materialize the chunks which + // are either interrupting the sequence or outside of the sequence with a + // MOVK instruction. + if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII)) + return true; + + // Use a MOVZ or MOVN instruction to set the high bits, followed by one or + // more MOVK instructions to insert additional 16-bit portions into the + // lower bits. + bool isNeg = false; + + // Use MOVN to materialize the high bits if we have more all one chunks + // than all zero chunks. + if (OneChunks > ZeroChunks) { + isNeg = true; + Imm = ~Imm; + } + + unsigned FirstOpc; + if (BitSize == 32) { + Imm &= (1LL << 32) - 1; + FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi); + } else { + FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi); + } + unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN + unsigned LastShift = 0; // LSL amount for last MOVK + if (Imm != 0) { + unsigned LZ = countLeadingZeros(Imm); + unsigned TZ = countTrailingZeros(Imm); + Shift = ((63 - LZ) / 16) * 16; + LastShift = (TZ / 16) * 16; + } + unsigned Imm16 = (Imm >> Shift) & Mask; + unsigned DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + MachineInstrBuilder MIB1 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc)) + .addReg(DstReg, RegState::Define | + getDeadRegState(DstIsDead && Shift == LastShift)) + .addImm(Imm16) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift)); + + // If a MOVN was used for the high bits of a negative value, flip the rest + // of the bits back for use with MOVK. + if (isNeg) + Imm = ~Imm; + + if (Shift == LastShift) { + transferImpOps(MI, MIB1, MIB1); + MI.eraseFromParent(); + return true; + } + + MachineInstrBuilder MIB2; + unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi); + while (Shift != LastShift) { + Shift -= 16; + Imm16 = (Imm >> Shift) & Mask; + if (Imm16 == (isNeg ? Mask : 0)) + continue; // This 16-bit portion is already set correctly. + MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) + .addReg(DstReg, + RegState::Define | + getDeadRegState(DstIsDead && Shift == LastShift)) + .addReg(DstReg) + .addImm(Imm16) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift)); + } + + transferImpOps(MI, MIB1, MIB2); + MI.eraseFromParent(); + return true; +} + +/// \brief If MBBI references a pseudo instruction that should be expanded here, +/// do the expansion and return true. Otherwise return false. +bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + default: + break; + + case AArch64::ADDWrr: + case AArch64::SUBWrr: + case AArch64::ADDXrr: + case AArch64::SUBXrr: + case AArch64::ADDSWrr: + case AArch64::SUBSWrr: + case AArch64::ADDSXrr: + case AArch64::SUBSXrr: + case AArch64::ANDWrr: + case AArch64::ANDXrr: + case AArch64::BICWrr: + case AArch64::BICXrr: + case AArch64::ANDSWrr: + case AArch64::ANDSXrr: + case AArch64::BICSWrr: + case AArch64::BICSXrr: + case AArch64::EONWrr: + case AArch64::EONXrr: + case AArch64::EORWrr: + case AArch64::EORXrr: + case AArch64::ORNWrr: + case AArch64::ORNXrr: + case AArch64::ORRWrr: + case AArch64::ORRXrr: { + unsigned Opcode; + switch (MI.getOpcode()) { + default: + return false; + case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; + case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; + case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; + case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; + case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; + case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; + case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; + case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; + case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; + case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; + case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; + case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; + case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; + case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; + case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; + case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; + case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; + case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; + case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; + case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; + case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; + case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; + case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; + case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; + } + MachineInstrBuilder MIB1 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode), + MI.getOperand(0).getReg()) + .addOperand(MI.getOperand(1)) + .addOperand(MI.getOperand(2)) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + transferImpOps(MI, MIB1, MIB1); + MI.eraseFromParent(); + return true; + } + + case AArch64::FCVTSHpseudo: { + MachineOperand Src = MI.getOperand(1); + Src.setImplicit(); + unsigned SrcH = + TII->getRegisterInfo().getSubReg(Src.getReg(), AArch64::hsub); + auto MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::FCVTSHr)) + .addOperand(MI.getOperand(0)) + .addReg(SrcH, RegState::Undef) + .addOperand(Src); + transferImpOps(MI, MIB, MIB); + MI.eraseFromParent(); + return true; + } + case AArch64::LOADgot: { + // Expand into ADRP + LDR. + unsigned DstReg = MI.getOperand(0).getReg(); + const MachineOperand &MO1 = MI.getOperand(1); + unsigned Flags = MO1.getTargetFlags(); + MachineInstrBuilder MIB1 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); + MachineInstrBuilder MIB2 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui)) + .addOperand(MI.getOperand(0)) + .addReg(DstReg); + + if (MO1.isGlobal()) { + MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); + MIB2.addGlobalAddress(MO1.getGlobal(), 0, + Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + } else if (MO1.isSymbol()) { + MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE); + MIB2.addExternalSymbol(MO1.getSymbolName(), + Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + } else { + assert(MO1.isCPI() && + "Only expect globals, externalsymbols, or constant pools"); + MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), + Flags | AArch64II::MO_PAGE); + MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), + Flags | AArch64II::MO_PAGEOFF | + AArch64II::MO_NC); + } + + transferImpOps(MI, MIB1, MIB2); + MI.eraseFromParent(); + return true; + } + + case AArch64::MOVaddr: + case AArch64::MOVaddrJT: + case AArch64::MOVaddrCP: + case AArch64::MOVaddrBA: + case AArch64::MOVaddrTLS: + case AArch64::MOVaddrEXT: { + // Expand into ADRP + ADD. + unsigned DstReg = MI.getOperand(0).getReg(); + MachineInstrBuilder MIB1 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) + .addOperand(MI.getOperand(1)); + + MachineInstrBuilder MIB2 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) + .addOperand(MI.getOperand(0)) + .addReg(DstReg) + .addOperand(MI.getOperand(2)) + .addImm(0); + + transferImpOps(MI, MIB1, MIB2); + MI.eraseFromParent(); + return true; + } + + case AArch64::MOVi32imm: + return expandMOVImm(MBB, MBBI, 32); + case AArch64::MOVi64imm: + return expandMOVImm(MBB, MBBI, 64); + case AArch64::RET_ReallyLR: + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) + .addReg(AArch64::LR); + MI.eraseFromParent(); + return true; + } + return false; +} + +/// \brief Iterate over the instructions in basic block MBB and expand any +/// pseudo instructions. Return true if anything was modified. +bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= expandMI(MBB, MBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast(MF.getTarget().getInstrInfo()); + + bool Modified = false; + for (auto &MBB : MF) + Modified |= expandMBB(MBB); + return Modified; +} + +/// \brief Returns an instance of the pseudo instruction expansion pass. +FunctionPass *llvm::createAArch64ExpandPseudoPass() { + return new AArch64ExpandPseudo(); +} diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp new file mode 100644 index 0000000..c3b5369 --- /dev/null +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -0,0 +1,1981 @@ +//===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the AArch64-specific support for the FastISel class. Some +// of the target-specific code is generated by tablegen in the file +// AArch64GenFastISel.inc, which is #included here. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64TargetMachine.h" +#include "AArch64Subtarget.h" +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +namespace { + +class AArch64FastISel : public FastISel { + + class Address { + public: + typedef enum { + RegBase, + FrameIndexBase + } BaseKind; + + private: + BaseKind Kind; + union { + unsigned Reg; + int FI; + } Base; + int64_t Offset; + + public: + Address() : Kind(RegBase), Offset(0) { Base.Reg = 0; } + void setKind(BaseKind K) { Kind = K; } + BaseKind getKind() const { return Kind; } + bool isRegBase() const { return Kind == RegBase; } + bool isFIBase() const { return Kind == FrameIndexBase; } + void setReg(unsigned Reg) { + assert(isRegBase() && "Invalid base register access!"); + Base.Reg = Reg; + } + unsigned getReg() const { + assert(isRegBase() && "Invalid base register access!"); + return Base.Reg; + } + void setFI(unsigned FI) { + assert(isFIBase() && "Invalid base frame index access!"); + Base.FI = FI; + } + unsigned getFI() const { + assert(isFIBase() && "Invalid base frame index access!"); + return Base.FI; + } + void setOffset(int64_t O) { Offset = O; } + int64_t getOffset() { return Offset; } + + bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); } + }; + + /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can + /// make the right decision when generating code for different targets. + const AArch64Subtarget *Subtarget; + LLVMContext *Context; + +private: + // Selection routines. + bool SelectLoad(const Instruction *I); + bool SelectStore(const Instruction *I); + bool SelectBranch(const Instruction *I); + bool SelectIndirectBr(const Instruction *I); + bool SelectCmp(const Instruction *I); + bool SelectSelect(const Instruction *I); + bool SelectFPExt(const Instruction *I); + bool SelectFPTrunc(const Instruction *I); + bool SelectFPToInt(const Instruction *I, bool Signed); + bool SelectIntToFP(const Instruction *I, bool Signed); + bool SelectRem(const Instruction *I, unsigned ISDOpcode); + bool SelectCall(const Instruction *I, const char *IntrMemName); + bool SelectIntrinsicCall(const IntrinsicInst &I); + bool SelectRet(const Instruction *I); + bool SelectTrunc(const Instruction *I); + bool SelectIntExt(const Instruction *I); + bool SelectMul(const Instruction *I); + + // Utility helper routines. + bool isTypeLegal(Type *Ty, MVT &VT); + bool isLoadStoreTypeLegal(Type *Ty, MVT &VT); + bool ComputeAddress(const Value *Obj, Address &Addr); + bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor, + bool UseUnscaled); + void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, + unsigned Flags, bool UseUnscaled); + bool IsMemCpySmall(uint64_t Len, unsigned Alignment); + bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, + unsigned Alignment); + // Emit functions. + bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt); + bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, + bool UseUnscaled = false); + bool EmitStore(MVT VT, unsigned SrcReg, Address Addr, + bool UseUnscaled = false); + unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); + unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); + + unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT); + unsigned AArch64MaterializeGV(const GlobalValue *GV); + + // Call handling routines. +private: + CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; + bool ProcessCallArgs(SmallVectorImpl &Args, + SmallVectorImpl &ArgRegs, + SmallVectorImpl &ArgVTs, + SmallVectorImpl &ArgFlags, + SmallVectorImpl &RegArgs, CallingConv::ID CC, + unsigned &NumBytes); + bool FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, + const Instruction *I, CallingConv::ID CC, unsigned &NumBytes); + +public: + // Backend specific FastISel code. + unsigned TargetMaterializeAlloca(const AllocaInst *AI) override; + unsigned TargetMaterializeConstant(const Constant *C) override; + + explicit AArch64FastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) + : FastISel(funcInfo, libInfo) { + Subtarget = &TM.getSubtarget(); + Context = &funcInfo.Fn->getContext(); + } + + bool TargetSelectInstruction(const Instruction *I) override; + +#include "AArch64GenFastISel.inc" +}; + +} // end anonymous namespace + +#include "AArch64GenCallingConv.inc" + +CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { + if (CC == CallingConv::WebKit_JS) + return CC_AArch64_WebKit_JS; + return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; +} + +unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) { + assert(TLI.getValueType(AI->getType(), true) == MVT::i64 && + "Alloca should always return a pointer."); + + // Don't handle dynamic allocas. + if (!FuncInfo.StaticAllocaMap.count(AI)) + return 0; + + DenseMap::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + + if (SI != FuncInfo.StaticAllocaMap.end()) { + unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), + ResultReg) + .addFrameIndex(SI->second) + .addImm(0) + .addImm(0); + return ResultReg; + } + + return 0; +} + +unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) { + if (VT != MVT::f32 && VT != MVT::f64) + return 0; + + const APFloat Val = CFP->getValueAPF(); + bool is64bit = (VT == MVT::f64); + + // This checks to see if we can use FMOV instructions to materialize + // a constant, otherwise we have to materialize via the constant pool. + if (TLI.isFPImmLegal(Val, VT)) { + int Imm; + unsigned Opc; + if (is64bit) { + Imm = AArch64_AM::getFP64Imm(Val); + Opc = AArch64::FMOVDi; + } else { + Imm = AArch64_AM::getFP32Imm(Val); + Opc = AArch64::FMOVSi; + } + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addImm(Imm); + return ResultReg; + } + + // Materialize via constant pool. MachineConstantPool wants an explicit + // alignment. + unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); + if (Align == 0) + Align = DL.getTypeAllocSize(CFP->getType()); + + unsigned Idx = MCP.getConstantPoolIndex(cast(CFP), Align); + unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), + ADRPReg).addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGE); + + unsigned Opc = is64bit ? AArch64::LDRDui : AArch64::LDRSui; + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(ADRPReg) + .addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + return ResultReg; +} + +unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) { + // We can't handle thread-local variables quickly yet. Unfortunately we have + // to peer through any aliases to find out if that rule applies. + const GlobalValue *TLSGV = GV; + if (const GlobalAlias *GA = dyn_cast(GV)) + TLSGV = GA->getAliasee(); + + // MachO still uses GOT for large code-model accesses, but ELF requires + // movz/movk sequences, which FastISel doesn't handle yet. + if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO()) + return 0; + + if (const GlobalVariable *GVar = dyn_cast(TLSGV)) + if (GVar->isThreadLocal()) + return 0; + + unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); + + EVT DestEVT = TLI.getValueType(GV->getType(), true); + if (!DestEVT.isSimple()) + return 0; + + unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); + unsigned ResultReg; + + if (OpFlags & AArch64II::MO_GOT) { + // ADRP + LDRX + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), + ADRPReg) + .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE); + + ResultReg = createResultReg(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), + ResultReg) + .addReg(ADRPReg) + .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | + AArch64II::MO_NC); + } else { + // ADRP + ADDX + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), + ADRPReg).addGlobalAddress(GV, 0, AArch64II::MO_PAGE); + + ResultReg = createResultReg(&AArch64::GPR64spRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), + ResultReg) + .addReg(ADRPReg) + .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC) + .addImm(0); + } + return ResultReg; +} + +unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) { + EVT CEVT = TLI.getValueType(C->getType(), true); + + // Only handle simple types. + if (!CEVT.isSimple()) + return 0; + MVT VT = CEVT.getSimpleVT(); + + // FIXME: Handle ConstantInt. + if (const ConstantFP *CFP = dyn_cast(C)) + return AArch64MaterializeFP(CFP, VT); + else if (const GlobalValue *GV = dyn_cast(C)) + return AArch64MaterializeGV(GV); + + return 0; +} + +// Computes the address to get to an object. +bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) { + const User *U = nullptr; + unsigned Opcode = Instruction::UserOp1; + if (const Instruction *I = dyn_cast(Obj)) { + // Don't walk into other basic blocks unless the object is an alloca from + // another block, otherwise it may not have a virtual register assigned. + if (FuncInfo.StaticAllocaMap.count(static_cast(Obj)) || + FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { + Opcode = I->getOpcode(); + U = I; + } + } else if (const ConstantExpr *C = dyn_cast(Obj)) { + Opcode = C->getOpcode(); + U = C; + } + + if (const PointerType *Ty = dyn_cast(Obj->getType())) + if (Ty->getAddressSpace() > 255) + // Fast instruction selection doesn't support the special + // address spaces. + return false; + + switch (Opcode) { + default: + break; + case Instruction::BitCast: { + // Look through bitcasts. + return ComputeAddress(U->getOperand(0), Addr); + } + case Instruction::IntToPtr: { + // Look past no-op inttoptrs. + if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) + return ComputeAddress(U->getOperand(0), Addr); + break; + } + case Instruction::PtrToInt: { + // Look past no-op ptrtoints. + if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) + return ComputeAddress(U->getOperand(0), Addr); + break; + } + case Instruction::GetElementPtr: { + Address SavedAddr = Addr; + uint64_t TmpOffset = Addr.getOffset(); + + // Iterate through the GEP folding the constants into offsets where + // we can. + gep_type_iterator GTI = gep_type_begin(U); + for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; + ++i, ++GTI) { + const Value *Op = *i; + if (StructType *STy = dyn_cast(*GTI)) { + const StructLayout *SL = DL.getStructLayout(STy); + unsigned Idx = cast(Op)->getZExtValue(); + TmpOffset += SL->getElementOffset(Idx); + } else { + uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); + for (;;) { + if (const ConstantInt *CI = dyn_cast(Op)) { + // Constant-offset addressing. + TmpOffset += CI->getSExtValue() * S; + break; + } + if (canFoldAddIntoGEP(U, Op)) { + // A compatible add with a constant operand. Fold the constant. + ConstantInt *CI = + cast(cast(Op)->getOperand(1)); + TmpOffset += CI->getSExtValue() * S; + // Iterate on the other operand. + Op = cast(Op)->getOperand(0); + continue; + } + // Unsupported + goto unsupported_gep; + } + } + } + + // Try to grab the base operand now. + Addr.setOffset(TmpOffset); + if (ComputeAddress(U->getOperand(0), Addr)) + return true; + + // We failed, restore everything and try the other options. + Addr = SavedAddr; + + unsupported_gep: + break; + } + case Instruction::Alloca: { + const AllocaInst *AI = cast(Obj); + DenseMap::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) { + Addr.setKind(Address::FrameIndexBase); + Addr.setFI(SI->second); + return true; + } + break; + } + } + + // Try to get this in a register if nothing else has worked. + if (!Addr.isValid()) + Addr.setReg(getRegForValue(Obj)); + return Addr.isValid(); +} + +bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { + EVT evt = TLI.getValueType(Ty, true); + + // Only handle simple types. + if (evt == MVT::Other || !evt.isSimple()) + return false; + VT = evt.getSimpleVT(); + + // This is a legal type, but it's not something we handle in fast-isel. + if (VT == MVT::f128) + return false; + + // Handle all other legal types, i.e. a register that will directly hold this + // value. + return TLI.isTypeLegal(VT); +} + +bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) { + if (isTypeLegal(Ty, VT)) + return true; + + // If this is a type than can be sign or zero-extended to a basic operation + // go ahead and accept it now. For stores, this reflects truncation. + if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) + return true; + + return false; +} + +bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT, + int64_t ScaleFactor, bool UseUnscaled) { + bool needsLowering = false; + int64_t Offset = Addr.getOffset(); + switch (VT.SimpleTy) { + default: + return false; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + case MVT::f32: + case MVT::f64: + if (!UseUnscaled) + // Using scaled, 12-bit, unsigned immediate offsets. + needsLowering = ((Offset & 0xfff) != Offset); + else + // Using unscaled, 9-bit, signed immediate offsets. + needsLowering = (Offset > 256 || Offset < -256); + break; + } + + // FIXME: If this is a stack pointer and the offset needs to be simplified + // then put the alloca address into a register, set the base type back to + // register and continue. This should almost never happen. + if (needsLowering && Addr.getKind() == Address::FrameIndexBase) { + return false; + } + + // Since the offset is too large for the load/store instruction get the + // reg+offset into a register. + if (needsLowering) { + uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor; + unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false, + UnscaledOffset, MVT::i64); + if (ResultReg == 0) + return false; + Addr.setReg(ResultReg); + Addr.setOffset(0); + } + return true; +} + +void AArch64FastISel::AddLoadStoreOperands(Address &Addr, + const MachineInstrBuilder &MIB, + unsigned Flags, bool UseUnscaled) { + int64_t Offset = Addr.getOffset(); + // Frame base works a bit differently. Handle it separately. + if (Addr.getKind() == Address::FrameIndexBase) { + int FI = Addr.getFI(); + // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size + // and alignment should be based on the VT. + MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(FI, Offset), Flags, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); + // Now add the rest of the operands. + MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO); + } else { + // Now add the rest of the operands. + MIB.addReg(Addr.getReg()); + MIB.addImm(Offset); + } +} + +bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, + bool UseUnscaled) { + // Negative offsets require unscaled, 9-bit, signed immediate offsets. + // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. + if (!UseUnscaled && Addr.getOffset() < 0) + UseUnscaled = true; + + unsigned Opc; + const TargetRegisterClass *RC; + bool VTIsi1 = false; + int64_t ScaleFactor = 0; + switch (VT.SimpleTy) { + default: + return false; + case MVT::i1: + VTIsi1 = true; + // Intentional fall-through. + case MVT::i8: + Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui; + RC = &AArch64::GPR32RegClass; + ScaleFactor = 1; + break; + case MVT::i16: + Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui; + RC = &AArch64::GPR32RegClass; + ScaleFactor = 2; + break; + case MVT::i32: + Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui; + RC = &AArch64::GPR32RegClass; + ScaleFactor = 4; + break; + case MVT::i64: + Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui; + RC = &AArch64::GPR64RegClass; + ScaleFactor = 8; + break; + case MVT::f32: + Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui; + RC = TLI.getRegClassFor(VT); + ScaleFactor = 4; + break; + case MVT::f64: + Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui; + RC = TLI.getRegClassFor(VT); + ScaleFactor = 8; + break; + } + // Scale the offset. + if (!UseUnscaled) { + int64_t Offset = Addr.getOffset(); + if (Offset & (ScaleFactor - 1)) + // Retry using an unscaled, 9-bit, signed immediate offset. + return EmitLoad(VT, ResultReg, Addr, /*UseUnscaled*/ true); + + Addr.setOffset(Offset / ScaleFactor); + } + + // Simplify this down to something we can handle. + if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled)) + return false; + + // Create the base instruction, then add the operands. + ResultReg = createResultReg(RC); + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Opc), ResultReg); + AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled); + + // Loading an i1 requires special handling. + if (VTIsi1) { + MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass); + unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri), + ANDReg) + .addReg(ResultReg) + .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); + ResultReg = ANDReg; + } + return true; +} + +bool AArch64FastISel::SelectLoad(const Instruction *I) { + MVT VT; + // Verify we have a legal type before going any further. Currently, we handle + // simple types that will directly fit in a register (i32/f32/i64/f64) or + // those that can be sign or zero-extended to a basic operation (i1/i8/i16). + if (!isLoadStoreTypeLegal(I->getType(), VT) || cast(I)->isAtomic()) + return false; + + // See if we can handle this address. + Address Addr; + if (!ComputeAddress(I->getOperand(0), Addr)) + return false; + + unsigned ResultReg; + if (!EmitLoad(VT, ResultReg, Addr)) + return false; + + UpdateValueMap(I, ResultReg); + return true; +} + +bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr, + bool UseUnscaled) { + // Negative offsets require unscaled, 9-bit, signed immediate offsets. + // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. + if (!UseUnscaled && Addr.getOffset() < 0) + UseUnscaled = true; + + unsigned StrOpc; + bool VTIsi1 = false; + int64_t ScaleFactor = 0; + // Using scaled, 12-bit, unsigned immediate offsets. + switch (VT.SimpleTy) { + default: + return false; + case MVT::i1: + VTIsi1 = true; + case MVT::i8: + StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui; + ScaleFactor = 1; + break; + case MVT::i16: + StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui; + ScaleFactor = 2; + break; + case MVT::i32: + StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui; + ScaleFactor = 4; + break; + case MVT::i64: + StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui; + ScaleFactor = 8; + break; + case MVT::f32: + StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui; + ScaleFactor = 4; + break; + case MVT::f64: + StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui; + ScaleFactor = 8; + break; + } + // Scale the offset. + if (!UseUnscaled) { + int64_t Offset = Addr.getOffset(); + if (Offset & (ScaleFactor - 1)) + // Retry using an unscaled, 9-bit, signed immediate offset. + return EmitStore(VT, SrcReg, Addr, /*UseUnscaled*/ true); + + Addr.setOffset(Offset / ScaleFactor); + } + + // Simplify this down to something we can handle. + if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled)) + return false; + + // Storing an i1 requires special handling. + if (VTIsi1) { + MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass); + unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri), + ANDReg) + .addReg(SrcReg) + .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); + SrcReg = ANDReg; + } + // Create the base instruction, then add the operands. + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(StrOpc)).addReg(SrcReg); + AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled); + return true; +} + +bool AArch64FastISel::SelectStore(const Instruction *I) { + MVT VT; + Value *Op0 = I->getOperand(0); + // Verify we have a legal type before going any further. Currently, we handle + // simple types that will directly fit in a register (i32/f32/i64/f64) or + // those that can be sign or zero-extended to a basic operation (i1/i8/i16). + if (!isLoadStoreTypeLegal(Op0->getType(), VT) || + cast(I)->isAtomic()) + return false; + + // Get the value to be stored into a register. + unsigned SrcReg = getRegForValue(Op0); + if (SrcReg == 0) + return false; + + // See if we can handle this address. + Address Addr; + if (!ComputeAddress(I->getOperand(1), Addr)) + return false; + + if (!EmitStore(VT, SrcReg, Addr)) + return false; + return true; +} + +static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { + switch (Pred) { + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_UEQ: + default: + // AL is our "false" for now. The other two need more compares. + return AArch64CC::AL; + case CmpInst::ICMP_EQ: + case CmpInst::FCMP_OEQ: + return AArch64CC::EQ; + case CmpInst::ICMP_SGT: + case CmpInst::FCMP_OGT: + return AArch64CC::GT; + case CmpInst::ICMP_SGE: + case CmpInst::FCMP_OGE: + return AArch64CC::GE; + case CmpInst::ICMP_UGT: + case CmpInst::FCMP_UGT: + return AArch64CC::HI; + case CmpInst::FCMP_OLT: + return AArch64CC::MI; + case CmpInst::ICMP_ULE: + case CmpInst::FCMP_OLE: + return AArch64CC::LS; + case CmpInst::FCMP_ORD: + return AArch64CC::VC; + case CmpInst::FCMP_UNO: + return AArch64CC::VS; + case CmpInst::FCMP_UGE: + return AArch64CC::PL; + case CmpInst::ICMP_SLT: + case CmpInst::FCMP_ULT: + return AArch64CC::LT; + case CmpInst::ICMP_SLE: + case CmpInst::FCMP_ULE: + return AArch64CC::LE; + case CmpInst::FCMP_UNE: + case CmpInst::ICMP_NE: + return AArch64CC::NE; + case CmpInst::ICMP_UGE: + return AArch64CC::HS; + case CmpInst::ICMP_ULT: + return AArch64CC::LO; + } +} + +bool AArch64FastISel::SelectBranch(const Instruction *I) { + const BranchInst *BI = cast(I); + MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; + MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; + + if (const CmpInst *CI = dyn_cast(BI->getCondition())) { + if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { + // We may not handle every CC for now. + AArch64CC::CondCode CC = getCompareCC(CI->getPredicate()); + if (CC == AArch64CC::AL) + return false; + + // Emit the cmp. + if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) + return false; + + // Emit the branch. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) + .addImm(CC) + .addMBB(TBB); + FuncInfo.MBB->addSuccessor(TBB); + + FastEmitBranch(FBB, DbgLoc); + return true; + } + } else if (TruncInst *TI = dyn_cast(BI->getCondition())) { + MVT SrcVT; + if (TI->hasOneUse() && TI->getParent() == I->getParent() && + (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) { + unsigned CondReg = getRegForValue(TI->getOperand(0)); + if (CondReg == 0) + return false; + + // Issue an extract_subreg to get the lower 32-bits. + if (SrcVT == MVT::i64) + CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true, + AArch64::sub_32); + + MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass); + unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::ANDWri), ANDReg) + .addReg(CondReg) + .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::SUBSWri)) + .addReg(ANDReg) + .addReg(ANDReg) + .addImm(0) + .addImm(0); + + unsigned CC = AArch64CC::NE; + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + CC = AArch64CC::EQ; + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) + .addImm(CC) + .addMBB(TBB); + FuncInfo.MBB->addSuccessor(TBB); + FastEmitBranch(FBB, DbgLoc); + return true; + } + } else if (const ConstantInt *CI = + dyn_cast(BI->getCondition())) { + uint64_t Imm = CI->getZExtValue(); + MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) + .addMBB(Target); + FuncInfo.MBB->addSuccessor(Target); + return true; + } + + unsigned CondReg = getRegForValue(BI->getCondition()); + if (CondReg == 0) + return false; + + // We've been divorced from our compare! Our block was split, and + // now our compare lives in a predecessor block. We musn't + // re-compare here, as the children of the compare aren't guaranteed + // live across the block boundary (we *could* check for this). + // Regardless, the compare has been done in the predecessor block, + // and it left a value for us in a virtual register. Ergo, we test + // the one-bit value left in the virtual register. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri), + AArch64::WZR) + .addReg(CondReg) + .addImm(0) + .addImm(0); + + unsigned CC = AArch64CC::NE; + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + CC = AArch64CC::EQ; + } + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) + .addImm(CC) + .addMBB(TBB); + FuncInfo.MBB->addSuccessor(TBB); + FastEmitBranch(FBB, DbgLoc); + return true; +} + +bool AArch64FastISel::SelectIndirectBr(const Instruction *I) { + const IndirectBrInst *BI = cast(I); + unsigned AddrReg = getRegForValue(BI->getOperand(0)); + if (AddrReg == 0) + return false; + + // Emit the indirect branch. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR)) + .addReg(AddrReg); + + // Make sure the CFG is up-to-date. + for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i) + FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]); + + return true; +} + +bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) { + Type *Ty = Src1Value->getType(); + EVT SrcEVT = TLI.getValueType(Ty, true); + if (!SrcEVT.isSimple()) + return false; + MVT SrcVT = SrcEVT.getSimpleVT(); + + // Check to see if the 2nd operand is a constant that we can encode directly + // in the compare. + uint64_t Imm; + bool UseImm = false; + bool isNegativeImm = false; + if (const ConstantInt *ConstInt = dyn_cast(Src2Value)) { + if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 || + SrcVT == MVT::i8 || SrcVT == MVT::i1) { + const APInt &CIVal = ConstInt->getValue(); + + Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue(); + if (CIVal.isNegative()) { + isNegativeImm = true; + Imm = -Imm; + } + // FIXME: We can handle more immediates using shifts. + UseImm = ((Imm & 0xfff) == Imm); + } + } else if (const ConstantFP *ConstFP = dyn_cast(Src2Value)) { + if (SrcVT == MVT::f32 || SrcVT == MVT::f64) + if (ConstFP->isZero() && !ConstFP->isNegative()) + UseImm = true; + } + + unsigned ZReg; + unsigned CmpOpc; + bool isICmp = true; + bool needsExt = false; + switch (SrcVT.SimpleTy) { + default: + return false; + case MVT::i1: + case MVT::i8: + case MVT::i16: + needsExt = true; + // Intentional fall-through. + case MVT::i32: + ZReg = AArch64::WZR; + if (UseImm) + CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri; + else + CmpOpc = AArch64::SUBSWrr; + break; + case MVT::i64: + ZReg = AArch64::XZR; + if (UseImm) + CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri; + else + CmpOpc = AArch64::SUBSXrr; + break; + case MVT::f32: + isICmp = false; + CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr; + break; + case MVT::f64: + isICmp = false; + CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr; + break; + } + + unsigned SrcReg1 = getRegForValue(Src1Value); + if (SrcReg1 == 0) + return false; + + unsigned SrcReg2; + if (!UseImm) { + SrcReg2 = getRegForValue(Src2Value); + if (SrcReg2 == 0) + return false; + } + + // We have i1, i8, or i16, we need to either zero extend or sign extend. + if (needsExt) { + SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt); + if (SrcReg1 == 0) + return false; + if (!UseImm) { + SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt); + if (SrcReg2 == 0) + return false; + } + } + + if (isICmp) { + if (UseImm) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) + .addReg(ZReg) + .addReg(SrcReg1) + .addImm(Imm) + .addImm(0); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) + .addReg(ZReg) + .addReg(SrcReg1) + .addReg(SrcReg2); + } else { + if (UseImm) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) + .addReg(SrcReg1); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) + .addReg(SrcReg1) + .addReg(SrcReg2); + } + return true; +} + +bool AArch64FastISel::SelectCmp(const Instruction *I) { + const CmpInst *CI = cast(I); + + // We may not handle every CC for now. + AArch64CC::CondCode CC = getCompareCC(CI->getPredicate()); + if (CC == AArch64CC::AL) + return false; + + // Emit the cmp. + if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) + return false; + + // Now set a register based on the comparison. + AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); + unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), + ResultReg) + .addReg(AArch64::WZR) + .addReg(AArch64::WZR) + .addImm(invertedCC); + + UpdateValueMap(I, ResultReg); + return true; +} + +bool AArch64FastISel::SelectSelect(const Instruction *I) { + const SelectInst *SI = cast(I); + + EVT DestEVT = TLI.getValueType(SI->getType(), true); + if (!DestEVT.isSimple()) + return false; + + MVT DestVT = DestEVT.getSimpleVT(); + if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 && + DestVT != MVT::f64) + return false; + + unsigned CondReg = getRegForValue(SI->getCondition()); + if (CondReg == 0) + return false; + unsigned TrueReg = getRegForValue(SI->getTrueValue()); + if (TrueReg == 0) + return false; + unsigned FalseReg = getRegForValue(SI->getFalseValue()); + if (FalseReg == 0) + return false; + + + MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass); + unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri), + ANDReg) + .addReg(CondReg) + .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri)) + .addReg(ANDReg) + .addReg(ANDReg) + .addImm(0) + .addImm(0); + + unsigned SelectOpc; + switch (DestVT.SimpleTy) { + default: + return false; + case MVT::i32: + SelectOpc = AArch64::CSELWr; + break; + case MVT::i64: + SelectOpc = AArch64::CSELXr; + break; + case MVT::f32: + SelectOpc = AArch64::FCSELSrrr; + break; + case MVT::f64: + SelectOpc = AArch64::FCSELDrrr; + break; + } + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc), + ResultReg) + .addReg(TrueReg) + .addReg(FalseReg) + .addImm(AArch64CC::NE); + + UpdateValueMap(I, ResultReg); + return true; +} + +bool AArch64FastISel::SelectFPExt(const Instruction *I) { + Value *V = I->getOperand(0); + if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) + return false; + + unsigned Op = getRegForValue(V); + if (Op == 0) + return false; + + unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr), + ResultReg).addReg(Op); + UpdateValueMap(I, ResultReg); + return true; +} + +bool AArch64FastISel::SelectFPTrunc(const Instruction *I) { + Value *V = I->getOperand(0); + if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) + return false; + + unsigned Op = getRegForValue(V); + if (Op == 0) + return false; + + unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr), + ResultReg).addReg(Op); + UpdateValueMap(I, ResultReg); + return true; +} + +// FPToUI and FPToSI +bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) { + MVT DestVT; + if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) + return false; + + unsigned SrcReg = getRegForValue(I->getOperand(0)); + if (SrcReg == 0) + return false; + + EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); + if (SrcVT == MVT::f128) + return false; + + unsigned Opc; + if (SrcVT == MVT::f64) { + if (Signed) + Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; + else + Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; + } else { + if (Signed) + Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; + else + Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; + } + unsigned ResultReg = createResultReg( + DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(SrcReg); + UpdateValueMap(I, ResultReg); + return true; +} + +bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) { + MVT DestVT; + if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) + return false; + assert ((DestVT == MVT::f32 || DestVT == MVT::f64) && + "Unexpected value type."); + + unsigned SrcReg = getRegForValue(I->getOperand(0)); + if (SrcReg == 0) + return false; + + EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); + + // Handle sign-extension. + if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { + SrcReg = + EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); + if (SrcReg == 0) + return false; + } + + MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass + : &AArch64::GPR32RegClass); + + unsigned Opc; + if (SrcVT == MVT::i64) { + if (Signed) + Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; + else + Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; + } else { + if (Signed) + Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; + else + Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; + } + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(SrcReg); + UpdateValueMap(I, ResultReg); + return true; +} + +bool AArch64FastISel::ProcessCallArgs( + SmallVectorImpl &Args, SmallVectorImpl &ArgRegs, + SmallVectorImpl &ArgVTs, SmallVectorImpl &ArgFlags, + SmallVectorImpl &RegArgs, CallingConv::ID CC, + unsigned &NumBytes) { + SmallVector ArgLocs; + CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context); + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); + + // Get a count of how many bytes are to be pushed on the stack. + NumBytes = CCInfo.getNextStackOffset(); + + // Issue CALLSEQ_START + unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) + .addImm(NumBytes); + + // Process the args. + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + unsigned Arg = ArgRegs[VA.getValNo()]; + MVT ArgVT = ArgVTs[VA.getValNo()]; + + // Handle arg promotion: SExt, ZExt, AExt. + switch (VA.getLocInfo()) { + case CCValAssign::Full: + break; + case CCValAssign::SExt: { + MVT DestVT = VA.getLocVT(); + MVT SrcVT = ArgVT; + Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ false); + if (Arg == 0) + return false; + ArgVT = DestVT; + break; + } + case CCValAssign::AExt: + // Intentional fall-through. + case CCValAssign::ZExt: { + MVT DestVT = VA.getLocVT(); + MVT SrcVT = ArgVT; + Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ true); + if (Arg == 0) + return false; + ArgVT = DestVT; + break; + } + default: + llvm_unreachable("Unknown arg promotion!"); + } + + // Now copy/store arg to correct locations. + if (VA.isRegLoc() && !VA.needsCustom()) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg); + RegArgs.push_back(VA.getLocReg()); + } else if (VA.needsCustom()) { + // FIXME: Handle custom args. + return false; + } else { + assert(VA.isMemLoc() && "Assuming store on stack."); + + // Need to store on the stack. + unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; + + unsigned BEAlign = 0; + if (ArgSize < 8 && !Subtarget->isLittleEndian()) + BEAlign = 8 - ArgSize; + + Address Addr; + Addr.setKind(Address::RegBase); + Addr.setReg(AArch64::SP); + Addr.setOffset(VA.getLocMemOffset() + BEAlign); + + if (!EmitStore(ArgVT, Arg, Addr)) + return false; + } + } + return true; +} + +bool AArch64FastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, + const Instruction *I, CallingConv::ID CC, + unsigned &NumBytes) { + // Issue CALLSEQ_END + unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) + .addImm(NumBytes) + .addImm(0); + + // Now the return value. + if (RetVT != MVT::isVoid) { + SmallVector RVLocs; + CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context); + CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); + + // Only handle a single return value. + if (RVLocs.size() != 1) + return false; + + // Copy all of the result registers out of their specified physreg. + MVT CopyVT = RVLocs[0].getValVT(); + unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), + ResultReg).addReg(RVLocs[0].getLocReg()); + UsedRegs.push_back(RVLocs[0].getLocReg()); + + // Finally update the result. + UpdateValueMap(I, ResultReg); + } + + return true; +} + +bool AArch64FastISel::SelectCall(const Instruction *I, + const char *IntrMemName = nullptr) { + const CallInst *CI = cast(I); + const Value *Callee = CI->getCalledValue(); + + // Don't handle inline asm or intrinsics. + if (isa(Callee)) + return false; + + // Only handle global variable Callees. + const GlobalValue *GV = dyn_cast(Callee); + if (!GV) + return false; + + // Check the calling convention. + ImmutableCallSite CS(CI); + CallingConv::ID CC = CS.getCallingConv(); + + // Let SDISel handle vararg functions. + PointerType *PT = cast(CS.getCalledValue()->getType()); + FunctionType *FTy = cast(PT->getElementType()); + if (FTy->isVarArg()) + return false; + + // Handle *simple* calls for now. + MVT RetVT; + Type *RetTy = I->getType(); + if (RetTy->isVoidTy()) + RetVT = MVT::isVoid; + else if (!isTypeLegal(RetTy, RetVT)) + return false; + + // Set up the argument vectors. + SmallVector Args; + SmallVector ArgRegs; + SmallVector ArgVTs; + SmallVector ArgFlags; + Args.reserve(CS.arg_size()); + ArgRegs.reserve(CS.arg_size()); + ArgVTs.reserve(CS.arg_size()); + ArgFlags.reserve(CS.arg_size()); + + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + // If we're lowering a memory intrinsic instead of a regular call, skip the + // last two arguments, which shouldn't be passed to the underlying function. + if (IntrMemName && e - i <= 2) + break; + + unsigned Arg = getRegForValue(*i); + if (Arg == 0) + return false; + + ISD::ArgFlagsTy Flags; + unsigned AttrInd = i - CS.arg_begin() + 1; + if (CS.paramHasAttr(AttrInd, Attribute::SExt)) + Flags.setSExt(); + if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) + Flags.setZExt(); + + // FIXME: Only handle *easy* calls for now. + if (CS.paramHasAttr(AttrInd, Attribute::InReg) || + CS.paramHasAttr(AttrInd, Attribute::StructRet) || + CS.paramHasAttr(AttrInd, Attribute::Nest) || + CS.paramHasAttr(AttrInd, Attribute::ByVal)) + return false; + + MVT ArgVT; + Type *ArgTy = (*i)->getType(); + if (!isTypeLegal(ArgTy, ArgVT) && + !(ArgVT == MVT::i1 || ArgVT == MVT::i8 || ArgVT == MVT::i16)) + return false; + + // We don't handle vector parameters yet. + if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) + return false; + + unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); + Flags.setOrigAlign(OriginalAlignment); + + Args.push_back(*i); + ArgRegs.push_back(Arg); + ArgVTs.push_back(ArgVT); + ArgFlags.push_back(Flags); + } + + // Handle the arguments now that we've gotten them. + SmallVector RegArgs; + unsigned NumBytes; + if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) + return false; + + // Issue the call. + MachineInstrBuilder MIB; + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BL)); + if (!IntrMemName) + MIB.addGlobalAddress(GV, 0, 0); + else + MIB.addExternalSymbol(IntrMemName, 0); + + // Add implicit physical register uses to the call. + for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) + MIB.addReg(RegArgs[i], RegState::Implicit); + + // Add a register mask with the call-preserved registers. + // Proper defs for return values will be added by setPhysRegsDeadExcept(). + MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv())); + + // Finish off the call including any return values. + SmallVector UsedRegs; + if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) + return false; + + // Set all unused physreg defs as dead. + static_cast(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); + + return true; +} + +bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) { + if (Alignment) + return Len / Alignment <= 4; + else + return Len < 32; +} + +bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src, + uint64_t Len, unsigned Alignment) { + // Make sure we don't bloat code by inlining very large memcpy's. + if (!IsMemCpySmall(Len, Alignment)) + return false; + + int64_t UnscaledOffset = 0; + Address OrigDest = Dest; + Address OrigSrc = Src; + + while (Len) { + MVT VT; + if (!Alignment || Alignment >= 8) { + if (Len >= 8) + VT = MVT::i64; + else if (Len >= 4) + VT = MVT::i32; + else if (Len >= 2) + VT = MVT::i16; + else { + VT = MVT::i8; + } + } else { + // Bound based on alignment. + if (Len >= 4 && Alignment == 4) + VT = MVT::i32; + else if (Len >= 2 && Alignment == 2) + VT = MVT::i16; + else { + VT = MVT::i8; + } + } + + bool RV; + unsigned ResultReg; + RV = EmitLoad(VT, ResultReg, Src); + assert(RV == true && "Should be able to handle this load."); + RV = EmitStore(VT, ResultReg, Dest); + assert(RV == true && "Should be able to handle this store."); + (void)RV; + + int64_t Size = VT.getSizeInBits() / 8; + Len -= Size; + UnscaledOffset += Size; + + // We need to recompute the unscaled offset for each iteration. + Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); + Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); + } + + return true; +} + +bool AArch64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) { + // FIXME: Handle more intrinsics. + switch (I.getIntrinsicID()) { + default: + return false; + case Intrinsic::memcpy: + case Intrinsic::memmove: { + const MemTransferInst &MTI = cast(I); + // Don't handle volatile. + if (MTI.isVolatile()) + return false; + + // Disable inlining for memmove before calls to ComputeAddress. Otherwise, + // we would emit dead code because we don't currently handle memmoves. + bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy); + if (isa(MTI.getLength()) && isMemCpy) { + // Small memcpy's are common enough that we want to do them without a call + // if possible. + uint64_t Len = cast(MTI.getLength())->getZExtValue(); + unsigned Alignment = MTI.getAlignment(); + if (IsMemCpySmall(Len, Alignment)) { + Address Dest, Src; + if (!ComputeAddress(MTI.getRawDest(), Dest) || + !ComputeAddress(MTI.getRawSource(), Src)) + return false; + if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment)) + return true; + } + } + + if (!MTI.getLength()->getType()->isIntegerTy(64)) + return false; + + if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255) + // Fast instruction selection doesn't support the special + // address spaces. + return false; + + const char *IntrMemName = isa(I) ? "memcpy" : "memmove"; + return SelectCall(&I, IntrMemName); + } + case Intrinsic::memset: { + const MemSetInst &MSI = cast(I); + // Don't handle volatile. + if (MSI.isVolatile()) + return false; + + if (!MSI.getLength()->getType()->isIntegerTy(64)) + return false; + + if (MSI.getDestAddressSpace() > 255) + // Fast instruction selection doesn't support the special + // address spaces. + return false; + + return SelectCall(&I, "memset"); + } + case Intrinsic::trap: { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) + .addImm(1); + return true; + } + } + return false; +} + +bool AArch64FastISel::SelectRet(const Instruction *I) { + const ReturnInst *Ret = cast(I); + const Function &F = *I->getParent()->getParent(); + + if (!FuncInfo.CanLowerReturn) + return false; + + if (F.isVarArg()) + return false; + + // Build a list of return value registers. + SmallVector RetRegs; + + if (Ret->getNumOperands() > 0) { + CallingConv::ID CC = F.getCallingConv(); + SmallVector Outs; + GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ValLocs; + CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, + I->getContext()); + CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS + : RetCC_AArch64_AAPCS; + CCInfo.AnalyzeReturn(Outs, RetCC); + + // Only handle a single return value for now. + if (ValLocs.size() != 1) + return false; + + CCValAssign &VA = ValLocs[0]; + const Value *RV = Ret->getOperand(0); + + // Don't bother handling odd stuff for now. + if (VA.getLocInfo() != CCValAssign::Full) + return false; + // Only handle register returns for now. + if (!VA.isRegLoc()) + return false; + unsigned Reg = getRegForValue(RV); + if (Reg == 0) + return false; + + unsigned SrcReg = Reg + VA.getValNo(); + unsigned DestReg = VA.getLocReg(); + // Avoid a cross-class copy. This is very unlikely. + if (!MRI.getRegClass(SrcReg)->contains(DestReg)) + return false; + + EVT RVEVT = TLI.getValueType(RV->getType()); + if (!RVEVT.isSimple()) + return false; + + // Vectors (of > 1 lane) in big endian need tricky handling. + if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1) + return false; + + MVT RVVT = RVEVT.getSimpleVT(); + if (RVVT == MVT::f128) + return false; + MVT DestVT = VA.getValVT(); + // Special handling for extended integers. + if (RVVT != DestVT) { + if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) + return false; + + if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) + return false; + + bool isZExt = Outs[0].Flags.isZExt(); + SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt); + if (SrcReg == 0) + return false; + } + + // Make the copy. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); + + // Add register to return instruction. + RetRegs.push_back(VA.getLocReg()); + } + + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::RET_ReallyLR)); + for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) + MIB.addReg(RetRegs[i], RegState::Implicit); + return true; +} + +bool AArch64FastISel::SelectTrunc(const Instruction *I) { + Type *DestTy = I->getType(); + Value *Op = I->getOperand(0); + Type *SrcTy = Op->getType(); + + EVT SrcEVT = TLI.getValueType(SrcTy, true); + EVT DestEVT = TLI.getValueType(DestTy, true); + if (!SrcEVT.isSimple()) + return false; + if (!DestEVT.isSimple()) + return false; + + MVT SrcVT = SrcEVT.getSimpleVT(); + MVT DestVT = DestEVT.getSimpleVT(); + + if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && + SrcVT != MVT::i8) + return false; + if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && + DestVT != MVT::i1) + return false; + + unsigned SrcReg = getRegForValue(Op); + if (!SrcReg) + return false; + + // If we're truncating from i64 to a smaller non-legal type then generate an + // AND. Otherwise, we know the high bits are undefined and a truncate doesn't + // generate any code. + if (SrcVT == MVT::i64) { + uint64_t Mask = 0; + switch (DestVT.SimpleTy) { + default: + // Trunc i64 to i32 is handled by the target-independent fast-isel. + return false; + case MVT::i1: + Mask = 0x1; + break; + case MVT::i8: + Mask = 0xff; + break; + case MVT::i16: + Mask = 0xffff; + break; + } + // Issue an extract_subreg to get the lower 32-bits. + unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true, + AArch64::sub_32); + MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass); + // Create the AND instruction which performs the actual truncation. + unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri), + ANDReg) + .addReg(Reg32) + .addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32)); + SrcReg = ANDReg; + } + + UpdateValueMap(I, SrcReg); + return true; +} + +unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) { + assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || + DestVT == MVT::i64) && + "Unexpected value type."); + // Handle i8 and i16 as i32. + if (DestVT == MVT::i8 || DestVT == MVT::i16) + DestVT = MVT::i32; + + if (isZExt) { + MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass); + unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri), + ResultReg) + .addReg(SrcReg) + .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); + + if (DestVT == MVT::i64) { + // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the + // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. + unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::SUBREG_TO_REG), Reg64) + .addImm(0) + .addReg(ResultReg) + .addImm(AArch64::sub_32); + ResultReg = Reg64; + } + return ResultReg; + } else { + if (DestVT == MVT::i64) { + // FIXME: We're SExt i1 to i64. + return 0; + } + unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri), + ResultReg) + .addReg(SrcReg) + .addImm(0) + .addImm(0); + return ResultReg; + } +} + +unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, + bool isZExt) { + assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); + unsigned Opc; + unsigned Imm = 0; + + switch (SrcVT.SimpleTy) { + default: + return 0; + case MVT::i1: + return Emiti1Ext(SrcReg, DestVT, isZExt); + case MVT::i8: + if (DestVT == MVT::i64) + Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri; + else + Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri; + Imm = 7; + break; + case MVT::i16: + if (DestVT == MVT::i64) + Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri; + else + Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri; + Imm = 15; + break; + case MVT::i32: + assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); + Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri; + Imm = 31; + break; + } + + // Handle i8 and i16 as i32. + if (DestVT == MVT::i8 || DestVT == MVT::i16) + DestVT = MVT::i32; + else if (DestVT == MVT::i64) { + unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(AArch64::SUBREG_TO_REG), Src64) + .addImm(0) + .addReg(SrcReg) + .addImm(AArch64::sub_32); + SrcReg = Src64; + } + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(SrcReg) + .addImm(0) + .addImm(Imm); + + return ResultReg; +} + +bool AArch64FastISel::SelectIntExt(const Instruction *I) { + // On ARM, in general, integer casts don't involve legal types; this code + // handles promotable integers. The high bits for a type smaller than + // the register size are assumed to be undefined. + Type *DestTy = I->getType(); + Value *Src = I->getOperand(0); + Type *SrcTy = Src->getType(); + + bool isZExt = isa(I); + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) + return false; + + EVT SrcEVT = TLI.getValueType(SrcTy, true); + EVT DestEVT = TLI.getValueType(DestTy, true); + if (!SrcEVT.isSimple()) + return false; + if (!DestEVT.isSimple()) + return false; + + MVT SrcVT = SrcEVT.getSimpleVT(); + MVT DestVT = DestEVT.getSimpleVT(); + unsigned ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt); + if (ResultReg == 0) + return false; + UpdateValueMap(I, ResultReg); + return true; +} + +bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) { + EVT DestEVT = TLI.getValueType(I->getType(), true); + if (!DestEVT.isSimple()) + return false; + + MVT DestVT = DestEVT.getSimpleVT(); + if (DestVT != MVT::i64 && DestVT != MVT::i32) + return false; + + unsigned DivOpc; + bool is64bit = (DestVT == MVT::i64); + switch (ISDOpcode) { + default: + return false; + case ISD::SREM: + DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; + break; + case ISD::UREM: + DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; + break; + } + unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; + unsigned Src0Reg = getRegForValue(I->getOperand(0)); + if (!Src0Reg) + return false; + + unsigned Src1Reg = getRegForValue(I->getOperand(1)); + if (!Src1Reg) + return false; + + unsigned QuotReg = createResultReg(TLI.getRegClassFor(DestVT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), QuotReg) + .addReg(Src0Reg) + .addReg(Src1Reg); + // The remainder is computed as numerator - (quotient * denominator) using the + // MSUB instruction. + unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg) + .addReg(QuotReg) + .addReg(Src1Reg) + .addReg(Src0Reg); + UpdateValueMap(I, ResultReg); + return true; +} + +bool AArch64FastISel::SelectMul(const Instruction *I) { + EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true); + if (!SrcEVT.isSimple()) + return false; + MVT SrcVT = SrcEVT.getSimpleVT(); + + // Must be simple value type. Don't handle vectors. + if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && + SrcVT != MVT::i8) + return false; + + unsigned Opc; + unsigned ZReg; + switch (SrcVT.SimpleTy) { + default: + return false; + case MVT::i8: + case MVT::i16: + case MVT::i32: + ZReg = AArch64::WZR; + Opc = AArch64::MADDWrrr; + break; + case MVT::i64: + ZReg = AArch64::XZR; + Opc = AArch64::MADDXrrr; + break; + } + + unsigned Src0Reg = getRegForValue(I->getOperand(0)); + if (!Src0Reg) + return false; + + unsigned Src1Reg = getRegForValue(I->getOperand(1)); + if (!Src1Reg) + return false; + + // Create the base instruction, then add the operands. + unsigned ResultReg = createResultReg(TLI.getRegClassFor(SrcVT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(Src0Reg) + .addReg(Src1Reg) + .addReg(ZReg); + UpdateValueMap(I, ResultReg); + return true; +} + +bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) { + switch (I->getOpcode()) { + default: + break; + case Instruction::Load: + return SelectLoad(I); + case Instruction::Store: + return SelectStore(I); + case Instruction::Br: + return SelectBranch(I); + case Instruction::IndirectBr: + return SelectIndirectBr(I); + case Instruction::FCmp: + case Instruction::ICmp: + return SelectCmp(I); + case Instruction::Select: + return SelectSelect(I); + case Instruction::FPExt: + return SelectFPExt(I); + case Instruction::FPTrunc: + return SelectFPTrunc(I); + case Instruction::FPToSI: + return SelectFPToInt(I, /*Signed=*/true); + case Instruction::FPToUI: + return SelectFPToInt(I, /*Signed=*/false); + case Instruction::SIToFP: + return SelectIntToFP(I, /*Signed=*/true); + case Instruction::UIToFP: + return SelectIntToFP(I, /*Signed=*/false); + case Instruction::SRem: + return SelectRem(I, ISD::SREM); + case Instruction::URem: + return SelectRem(I, ISD::UREM); + case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast(I)) + return SelectIntrinsicCall(*II); + return SelectCall(I); + case Instruction::Ret: + return SelectRet(I); + case Instruction::Trunc: + return SelectTrunc(I); + case Instruction::ZExt: + case Instruction::SExt: + return SelectIntExt(I); + case Instruction::Mul: + // FIXME: This really should be handled by the target-independent selector. + return SelectMul(I); + } + return false; + // Silence warnings. + (void)&CC_AArch64_DarwinPCS_VarArg; +} + +namespace llvm { +llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) { + return new AArch64FastISel(funcInfo, libInfo); +} +} diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index b29587a..deb306a 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1,4 +1,4 @@ -//===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===// +//===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====// // // The LLVM Compiler Infrastructure // @@ -11,227 +11,444 @@ // //===----------------------------------------------------------------------===// -#include "AArch64.h" #include "AArch64FrameLowering.h" #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" +#include "AArch64Subtarget.h" +#include "AArch64TargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/IR/Function.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; -void AArch64FrameLowering::splitSPAdjustments(uint64_t Total, - uint64_t &Initial, - uint64_t &Residual) const { - // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP - // instructions have a 7-bit signed immediate scaled by 8, giving a reach of - // 0x1f8, but stack adjustment should always be a multiple of 16. - if (Total <= 0x1f0) { - Initial = Total; - Residual = 0; - } else { - Initial = 0x1f0; - Residual = Total - Initial; +#define DEBUG_TYPE "frame-info" + +static cl::opt EnableRedZone("aarch64-redzone", + cl::desc("enable use of redzone on AArch64"), + cl::init(false), cl::Hidden); + +STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); + +static unsigned estimateStackSize(MachineFunction &MF) { + const MachineFrameInfo *FFI = MF.getFrameInfo(); + int Offset = 0; + for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -FFI->getObjectOffset(i); + if (FixedOff > Offset) + Offset = FixedOff; + } + for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { + if (FFI->isDeadObjectIndex(i)) + continue; + Offset += FFI->getObjectSize(i); + unsigned Align = FFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset + Align - 1) / Align * Align; } + // This does not include the 16 bytes used for fp and lr. + return (unsigned)Offset; } -void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { - AArch64MachineFunctionInfo *FuncInfo = - MF.getInfo(); - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); +bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { + if (!EnableRedZone) + return false; + // Don't use the red zone if the function explicitly asks us not to. + // This is typically used for kernel code. + if (MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::NoRedZone)) + return false; + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const AArch64FunctionInfo *AFI = MF.getInfo(); + unsigned NumBytes = AFI->getLocalStackSize(); + + // Note: currently hasFP() is always true for hasCalls(), but that's an + // implementation detail of the current code, not a strict requirement, + // so stay safe here and check both. + if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128) + return false; + return true; +} + +/// hasFP - Return true if the specified function should have a dedicated frame +/// pointer register. +bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + +#ifndef NDEBUG + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + assert(!RegInfo->needsStackRealignment(MF) && + "No stack realignment on AArch64!"); +#endif + + return (MFI->hasCalls() || MFI->hasVarSizedObjects() || + MFI->isFrameAddressTaken()); +} + +/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is +/// not required, we reserve argument space for call sites in the function +/// immediately on entry to the current function. This eliminates the need for +/// add/sub sp brackets around call sites. Returns true if the call frame is +/// included as part of the stack frame. +bool +AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { + return !MF.getFrameInfo()->hasVarSizedObjects(); +} + +void AArch64FrameLowering::eliminateCallFramePseudoInstr( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { + const AArch64InstrInfo *TII = + static_cast(MF.getTarget().getInstrInfo()); + DebugLoc DL = I->getDebugLoc(); + int Opc = I->getOpcode(); + bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); + uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; + + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + if (!TFI->hasReservedCallFrame(MF)) { + unsigned Align = getStackAlignment(); + + int64_t Amount = I->getOperand(0).getImm(); + Amount = RoundUpToAlignment(Amount, Align); + if (!IsDestroy) + Amount = -Amount; + + // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it + // doesn't have to pop anything), then the first operand will be zero too so + // this adjustment is a no-op. + if (CalleePopAmount == 0) { + // FIXME: in-function stack adjustment for calls is limited to 24-bits + // because there's no guaranteed temporary register available. + // + // ADD/SUB (immediate) has only LSL #0 and LSL #12 avaiable. + // 1) For offset <= 12-bit, we use LSL #0 + // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses + // LSL #0, and the other uses LSL #12. + // + // Mostly call frames will be allocated at the start of a function so + // this is OK, but it is a limitation that needs dealing with. + assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); + emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII); + } + } else if (CalleePopAmount != 0) { + // If the calling convention demands that the callee pops arguments from the + // stack, we want to add it back if we have a reserved call frame. + assert(CalleePopAmount < 0xffffff && "call frame too large"); + emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount, + TII); + } + MBB.erase(I); +} + +void AArch64FrameLowering::emitCalleeSavedFrameMoves( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + unsigned FramePtr) const { + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - bool NeedsFrameMoves = MMI.hasDebugInfo() - || MF.getFunction()->needsUnwindTableEntry(); - - uint64_t NumInitialBytes, NumResidualBytes; - - // Currently we expect the stack to be laid out by - // sub sp, sp, #initial - // stp x29, x30, [sp, #offset] - // ... - // str xxx, [sp, #offset] - // sub sp, sp, #rest (possibly via extra instructions). - if (MFI->getCalleeSavedInfo().size()) { - // If there are callee-saved registers, we want to store them efficiently as - // a block, and virtual base assignment happens too early to do it for us so - // we adjust the stack in two phases: first just for callee-saved fiddling, - // then to allocate the rest of the frame. - splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes); - } else { - // If there aren't any callee-saved registers, two-phase adjustment is - // inefficient. It's more efficient to adjust with NumInitialBytes too - // because when we're in a "callee pops argument space" situation, that pop - // must be tacked onto Initial for correctness. - NumInitialBytes = MFI->getStackSize(); - NumResidualBytes = 0; - } + const AArch64InstrInfo *TII = TM.getInstrInfo(); + DebugLoc DL = MBB.findDebugLoc(MBBI); + + // Add callee saved registers to move list. + const std::vector &CSI = MFI->getCalleeSavedInfo(); + if (CSI.empty()) + return; + + const DataLayout *TD = MF.getTarget().getDataLayout(); + bool HasFP = hasFP(MF); + + // Calculate amount of bytes used for return address storing. + int stackGrowth = -TD->getPointerSize(0); + + // Calculate offsets. + int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth; + unsigned TotalSkipped = 0; + for (const auto &Info : CSI) { + unsigned Reg = Info.getReg(); + int64_t Offset = MFI->getObjectOffset(Info.getFrameIdx()) - + getOffsetOfLocalArea() + saveAreaOffset; + + // Don't output a new CFI directive if we're re-saving the frame pointer or + // link register. This happens when the PrologEpilogInserter has inserted an + // extra "STP" of the frame pointer and link register -- the "emitPrologue" + // method automatically generates the directives when frame pointers are + // used. If we generate CFI directives for the extra "STP"s, the linker will + // lose track of the correct values for the frame pointer and link register. + if (HasFP && (FramePtr == Reg || Reg == AArch64::LR)) { + TotalSkipped += stackGrowth; + continue; + } - // Tell everyone else how much adjustment we're expecting them to use. In - // particular if an adjustment is required for a tail call the epilogue could - // have a different view of things. - FuncInfo->setInitialStackAdjust(NumInitialBytes); - - emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes, - MachineInstr::FrameSetup); - - if (NeedsFrameMoves && NumInitialBytes) { - // We emit this update even if the CFA is set from a frame pointer later so - // that the CFA is valid in the interim. - MachineLocation Dst(MachineLocation::VirtualFP); - unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfa(nullptr, Reg, -NumInitialBytes)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + nullptr, DwarfReg, Offset - TotalSkipped)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } +} - // Otherwise we need to set the frame pointer and/or add a second stack - // adjustment. - - bool FPNeedsSetting = hasFP(MF); - for (; MBBI != MBB.end(); ++MBBI) { - // Note that this search makes strong assumptions about the operation used - // to store the frame-pointer: it must be "STP x29, x30, ...". This could - // change in future, but until then there's no point in implementing - // untestable more generic cases. - if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR - && MBBI->getOperand(0).getReg() == AArch64::X29) { - int64_t X29FrameIdx = MBBI->getOperand(2).getIndex(); - FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx)); - - ++MBBI; - emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP, - AArch64::X29, - NumInitialBytes + MFI->getObjectOffset(X29FrameIdx), - MachineInstr::FrameSetup); +void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. + MachineBasicBlock::iterator MBBI = MBB.begin(); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const Function *Fn = MF.getFunction(); + const AArch64RegisterInfo *RegInfo = TM.getRegisterInfo(); + const AArch64InstrInfo *TII = TM.getInstrInfo(); + MachineModuleInfo &MMI = MF.getMMI(); + AArch64FunctionInfo *AFI = MF.getInfo(); + bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); + bool HasFP = hasFP(MF); + DebugLoc DL = MBB.findDebugLoc(MBBI); - // The offset adjustment used when emitting debugging locations relative - // to whatever frame base is set. AArch64 uses the default frame base (FP - // or SP) and this adjusts the calculations to be correct. - MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx) - - MFI->getStackSize()); - - if (NeedsFrameMoves) { - unsigned Reg = MRI->getDwarfRegNum(AArch64::X29, true); - unsigned Offset = MFI->getObjectOffset(X29FrameIdx); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfa(nullptr, Reg, Offset)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } + int NumBytes = (int)MFI->getStackSize(); + if (!AFI->hasStackFrame()) { + assert(!HasFP && "unexpected function without stack frame but with FP"); + + // All of the stack allocation is for locals. + AFI->setLocalStackSize(NumBytes); - FPNeedsSetting = false; + // Label used to tie together the PROLOG_LABEL and the MachineMoves. + MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); + + // REDZONE: If the stack size is less than 128 bytes, we don't need + // to actually allocate. + if (NumBytes && !canUseRedZone(MF)) { + emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, + MachineInstr::FrameSetup); + + // Encode the stack size of the leaf function. + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } else if (NumBytes) { + ++NumRedZoneFunctions; } - if (!MBBI->getFlag(MachineInstr::FrameSetup)) - break; + return; } - assert(!FPNeedsSetting && "Frame pointer couldn't be set"); + // Only set up FP if we actually need to. + int FPOffset = 0; + if (HasFP) { + // First instruction must a) allocate the stack and b) have an immediate + // that is a multiple of -2. + assert((MBBI->getOpcode() == AArch64::STPXpre || + MBBI->getOpcode() == AArch64::STPDpre) && + MBBI->getOperand(3).getReg() == AArch64::SP && + MBBI->getOperand(4).getImm() < 0 && + (MBBI->getOperand(4).getImm() & 1) == 0); + + // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space + // required for the callee saved register area we get the frame pointer + // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8. + FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8; + assert(FPOffset >= 0 && "Bad Framepointer Offset"); + } - emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes, - MachineInstr::FrameSetup); + // Move past the saves of the callee-saved registers. + while (MBBI->getOpcode() == AArch64::STPXi || + MBBI->getOpcode() == AArch64::STPDi || + MBBI->getOpcode() == AArch64::STPXpre || + MBBI->getOpcode() == AArch64::STPDpre) { + ++MBBI; + NumBytes -= 16; + } + assert(NumBytes >= 0 && "Negative stack allocation size!?"); + if (HasFP) { + // Issue sub fp, sp, FPOffset or + // mov fp,sp when FPOffset is zero. + // Note: All stores of callee-saved registers are marked as "FrameSetup". + // This code marks the instruction(s) that set the FP also. + emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, + MachineInstr::FrameSetup); + } - // Now we emit the rest of the frame setup information, if necessary: we've - // already noted the FP and initial SP moves so we're left with the prologue's - // final SP update and callee-saved register locations. - if (!NeedsFrameMoves) - return; + // All of the remaining stack allocations are for locals. + AFI->setLocalStackSize(NumBytes); - // The rest of the stack adjustment - if (!hasFP(MF) && NumResidualBytes) { - MachineLocation Dst(MachineLocation::VirtualFP); - unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true); - unsigned Offset = NumResidualBytes + NumInitialBytes; - unsigned CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + // Allocate space for the rest of the frame. + if (NumBytes) { + // If we're a leaf function, try using the red zone. + if (!canUseRedZone(MF)) + emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, + MachineInstr::FrameSetup); } - // And any callee-saved registers (it's fine to leave them to the end here, - // because the old values are still valid at this point. - const std::vector &CSI = MFI->getCalleeSavedInfo(); - if (CSI.size()) { - for (std::vector::const_iterator I = CSI.begin(), - E = CSI.end(); I != E; ++I) { - unsigned Offset = MFI->getObjectOffset(I->getFrameIdx()); - unsigned Reg = MRI->getDwarfRegNum(I->getReg(), true); + // If we need a base pointer, set it up here. It's whatever the value of the + // stack pointer is at this point. Any variable size objects will be allocated + // after this, so we can still use the base pointer to reference locals. + // + // FIXME: Clarify FrameSetup flags here. + // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is + // needed. + // + if (RegInfo->hasBasePointer(MF)) + TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false); + + if (needsFrameMoves) { + const DataLayout *TD = MF.getTarget().getDataLayout(); + const int StackGrowth = -TD->getPointerSize(0); + unsigned FramePtr = RegInfo->getFrameRegister(MF); + + // An example of the prologue: + // + // .globl __foo + // .align 2 + // __foo: + // Ltmp0: + // .cfi_startproc + // .cfi_personality 155, ___gxx_personality_v0 + // Leh_func_begin: + // .cfi_lsda 16, Lexception33 + // + // stp xa,bx, [sp, -#offset]! + // ... + // stp x28, x27, [sp, #offset-32] + // stp fp, lr, [sp, #offset-16] + // add fp, sp, #offset - 16 + // sub sp, sp, #1360 + // + // The Stack: + // +-------------------------------------------+ + // 10000 | ........ | ........ | ........ | ........ | + // 10004 | ........ | ........ | ........ | ........ | + // +-------------------------------------------+ + // 10008 | ........ | ........ | ........ | ........ | + // 1000c | ........ | ........ | ........ | ........ | + // +===========================================+ + // 10010 | X28 Register | + // 10014 | X28 Register | + // +-------------------------------------------+ + // 10018 | X27 Register | + // 1001c | X27 Register | + // +===========================================+ + // 10020 | Frame Pointer | + // 10024 | Frame Pointer | + // +-------------------------------------------+ + // 10028 | Link Register | + // 1002c | Link Register | + // +===========================================+ + // 10030 | ........ | ........ | ........ | ........ | + // 10034 | ........ | ........ | ........ | ........ | + // +-------------------------------------------+ + // 10038 | ........ | ........ | ........ | ........ | + // 1003c | ........ | ........ | ........ | ........ | + // +-------------------------------------------+ + // + // [sp] = 10030 :: >>initial value<< + // sp = 10020 :: stp fp, lr, [sp, #-16]! + // fp = sp == 10020 :: mov fp, sp + // [sp] == 10020 :: stp x28, x27, [sp, #-16]! + // sp == 10010 :: >>final value<< + // + // The frame pointer (w29) points to address 10020. If we use an offset of + // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 + // for w27, and -32 for w28: + // + // Ltmp1: + // .cfi_def_cfa w29, 16 + // Ltmp2: + // .cfi_offset w30, -8 + // Ltmp3: + // .cfi_offset w29, -16 + // Ltmp4: + // .cfi_offset w27, -24 + // Ltmp5: + // .cfi_offset w28, -32 + + if (HasFP) { + // Define the current CFA rule to use the provided FP. + unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createOffset(nullptr, Reg, Offset)); - BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // Record the location of the stored LR + unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true); + CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createOffset(nullptr, LR, StackGrowth)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + + // Record the location of the stored FP + CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } else { + // Encode the stack size of the leaf function. + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize())); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } + + // Now emit the moves for whatever callee saved regs we have. + emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr); } } -void -AArch64FrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - AArch64MachineFunctionInfo *FuncInfo = - MF.getInfo(); +static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs) { + for (unsigned i = 0; CSRegs[i]; ++i) + if (Reg == CSRegs[i]) + return true; + return false; +} + +static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) { + unsigned RtIdx = 0; + if (MI->getOpcode() == AArch64::LDPXpost || + MI->getOpcode() == AArch64::LDPDpost) + RtIdx = 1; + + if (MI->getOpcode() == AArch64::LDPXpost || + MI->getOpcode() == AArch64::LDPDpost || + MI->getOpcode() == AArch64::LDPXi || MI->getOpcode() == AArch64::LDPDi) { + if (!isCalleeSavedRegister(MI->getOperand(RtIdx).getReg(), CSRegs) || + !isCalleeSavedRegister(MI->getOperand(RtIdx + 1).getReg(), CSRegs) || + MI->getOperand(RtIdx + 2).getReg() != AArch64::SP) + return false; + return true; + } + return false; +} + +void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const AArch64InstrInfo *TII = + static_cast(MF.getTarget().getInstrInfo()); + const AArch64RegisterInfo *RegInfo = static_cast( + MF.getTarget().getRegisterInfo()); DebugLoc DL = MBBI->getDebugLoc(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); unsigned RetOpcode = MBBI->getOpcode(); + int NumBytes = MFI->getStackSize(); + const AArch64FunctionInfo *AFI = MF.getInfo(); + // Initial and residual are named for consitency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. - uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust(); - uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes; uint64_t ArgumentPopSize = 0; - if (RetOpcode == AArch64::TC_RETURNdi || - RetOpcode == AArch64::TC_RETURNxi) { - MachineOperand &JumpTarget = MBBI->getOperand(0); + if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) { MachineOperand &StackAdjust = MBBI->getOperand(1); - MachineInstrBuilder MIB; - if (RetOpcode == AArch64::TC_RETURNdi) { - MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm)); - if (JumpTarget.isGlobal()) { - MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), - JumpTarget.getTargetFlags()); - } else { - assert(JumpTarget.isSymbol() && "unexpected tail call destination"); - MIB.addExternalSymbol(JumpTarget.getSymbolName(), - JumpTarget.getTargetFlags()); - } - } else { - assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg() - && "Unexpected tail call"); - - MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx)); - MIB.addReg(JumpTarget.getReg(), RegState::Kill); - } - - // Add the extra operands onto the new tail call instruction even though - // they're not used directly (so that liveness is tracked properly etc). - for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) - MIB->addOperand(MBBI->getOperand(i)); - - - // Delete the pseudo instruction TC_RETURN. - MachineInstr *NewMI = std::prev(MBBI); - MBB.erase(MBBI); - MBBI = NewMI; - // For a tail-call in a callee-pops-arguments environment, some or all of // the stack may actually be in use for the call's arguments, this is // calculated during LowerCall and consumed here... @@ -241,386 +458,434 @@ AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // conveniently stored in the MachineFunctionInfo by // LowerFormalArguments. This will, of course, be zero for the C calling // convention. - ArgumentPopSize = FuncInfo->getArgumentStackToRestore(); + ArgumentPopSize = AFI->getArgumentStackToRestore(); } - assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0 - && "refusing to adjust stack by misaligned amt"); - - // We may need to address callee-saved registers differently, so find out the - // bound on the frame indices. - const std::vector &CSI = MFI.getCalleeSavedInfo(); - int MinCSFI = 0; - int MaxCSFI = -1; - - if (CSI.size()) { - MinCSFI = CSI[0].getFrameIdx(); - MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + // The stack frame should be like below, + // + // ---------------------- --- + // | | | + // | BytesInStackArgArea| CalleeArgStackSize + // | (NumReusableBytes) | (of tail call) + // | | --- + // | | | + // ---------------------| --- | + // | | | | + // | CalleeSavedReg | | | + // | (NumRestores * 16) | | | + // | | | | + // ---------------------| | NumBytes + // | | StackSize (StackAdjustUp) + // | LocalStackSize | | | + // | (covering callee | | | + // | args) | | | + // | | | | + // ---------------------- --- --- + // + // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize + // = StackSize + ArgumentPopSize + // + // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps + // it as the 2nd argument of AArch64ISD::TC_RETURN. + NumBytes += ArgumentPopSize; + + unsigned NumRestores = 0; + // Move past the restores of the callee-saved registers. + MachineBasicBlock::iterator LastPopI = MBBI; + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); + if (LastPopI != MBB.begin()) { + do { + ++NumRestores; + --LastPopI; + } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs)); + if (!isCSRestore(LastPopI, CSRegs)) { + ++LastPopI; + --NumRestores; + } } - - // The "residual" stack update comes first from this direction and guarantees - // that SP is NumInitialBytes below its value on function entry, either by a - // direct update or restoring it from the frame pointer. - if (NumInitialBytes + ArgumentPopSize != 0) { - emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, - NumInitialBytes + ArgumentPopSize); - --MBBI; + NumBytes -= NumRestores * 16; + assert(NumBytes >= 0 && "Negative stack allocation size!?"); + + if (!hasFP(MF)) { + // If this was a redzone leaf function, we don't need to restore the + // stack pointer. + if (!canUseRedZone(MF)) + emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, + TII); + return; } + // Restore the original stack pointer. + // FIXME: Rather than doing the math here, we should instead just use + // non-post-indexed loads for the restores if we aren't actually going to + // be able to save any instructions. + if (NumBytes || MFI->hasVarSizedObjects()) + emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, + -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags); +} - // MBBI now points to the instruction just past the last callee-saved - // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp" - // otherwise). +/// getFrameIndexOffset - Returns the displacement from the frame register to +/// the stack frame of the specified index. +int AArch64FrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + unsigned FrameReg; + return getFrameIndexReference(MF, FI, FrameReg); +} - // Now we need to find out where to put the bulk of the stack adjustment - MachineBasicBlock::iterator FirstEpilogue = MBBI; - while (MBBI != MBB.begin()) { - --MBBI; +/// getFrameIndexReference - Provide a base+offset reference to an FI slot for +/// debug info. It's the same as what we use for resolving the code-gen +/// references for now. FIXME: This can go wrong when references are +/// SP-relative and simple call frames aren't used. +int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, + unsigned &FrameReg) const { + return resolveFrameIndexReference(MF, FI, FrameReg); +} - unsigned FrameOp; - for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) { - if (MBBI->getOperand(FrameOp).isFI()) - break; +int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, + int FI, unsigned &FrameReg, + bool PreferFP) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const AArch64RegisterInfo *RegInfo = static_cast( + MF.getTarget().getRegisterInfo()); + const AArch64FunctionInfo *AFI = MF.getInfo(); + int FPOffset = MFI->getObjectOffset(FI) + 16; + int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); + bool isFixed = MFI->isFixedObjectIndex(FI); + + // Use frame pointer to reference fixed objects. Use it for locals if + // there are VLAs (and thus the SP isn't reliable as a base). + // Make sure useFPForScavengingIndex() does the right thing for the emergency + // spill slot. + bool UseFP = false; + if (AFI->hasStackFrame()) { + // Note: Keeping the following as multiple 'if' statements rather than + // merging to a single expression for readability. + // + // Argument access should always use the FP. + if (isFixed) { + UseFP = hasFP(MF); + } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) { + // Use SP or FP, whichever gives us the best chance of the offset + // being in range for direct access. If the FPOffset is positive, + // that'll always be best, as the SP will be even further away. + // If the FPOffset is negative, we have to keep in mind that the + // available offset range for negative offsets is smaller than for + // positive ones. If we have variable sized objects, we're stuck with + // using the FP regardless, though, as the SP offset is unknown + // and we don't have a base pointer available. If an offset is + // available via the FP and the SP, use whichever is closest. + if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 || + (FPOffset >= -256 && Offset > -FPOffset)) + UseFP = true; } - - // If this instruction doesn't have a frame index we've reached the end of - // the callee-save restoration. - if (FrameOp == MBBI->getNumOperands()) - break; - - // Likewise if it *is* a local reference, but not to a callee-saved object. - int FrameIdx = MBBI->getOperand(FrameOp).getIndex(); - if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI) - break; - - FirstEpilogue = MBBI; } - if (MF.getFrameInfo()->hasVarSizedObjects()) { - int64_t StaticFrameBase; - StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset()); - emitRegUpdate(MBB, FirstEpilogue, DL, TII, - AArch64::XSP, AArch64::X29, AArch64::NoRegister, - StaticFrameBase); - } else { - emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes); + if (UseFP) { + FrameReg = RegInfo->getFrameRegister(MF); + return FPOffset; } -} -int64_t -AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF, - int FrameIndex, - unsigned &FrameReg, - int SPAdj, - bool IsCalleeSaveOp) const { - AArch64MachineFunctionInfo *FuncInfo = - MF.getInfo(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - - int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex); - - assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0) - && "callee-saved register in unexpected place"); - - // If the frame for this function is particularly large, we adjust the stack - // in two phases which means the callee-save related operations see a - // different (intermediate) stack size. - int64_t FrameRegPos; - if (IsCalleeSaveOp) { - FrameReg = AArch64::XSP; - FrameRegPos = -static_cast(FuncInfo->getInitialStackAdjust()); - } else if (useFPForAddressing(MF)) { - // Have to use the frame pointer since we have no idea where SP is. - FrameReg = AArch64::X29; - FrameRegPos = FuncInfo->getFramePointerOffset(); - } else { - FrameReg = AArch64::XSP; - FrameRegPos = -static_cast(MFI->getStackSize()) + SPAdj; + // Use the base pointer if we have one. + if (RegInfo->hasBasePointer(MF)) + FrameReg = RegInfo->getBaseRegister(); + else { + FrameReg = AArch64::SP; + // If we're using the red zone for this function, the SP won't actually + // be adjusted, so the offsets will be negative. They're also all + // within range of the signed 9-bit immediate instructions. + if (canUseRedZone(MF)) + Offset -= AFI->getLocalStackSize(); } - return TopOfFrameOffset - FrameRegPos; + return Offset; } -void -AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - const AArch64RegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const AArch64InstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - - if (hasFP(MF)) { - MF.getRegInfo().setPhysRegUsed(AArch64::X29); - MF.getRegInfo().setPhysRegUsed(AArch64::X30); - } +static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { + if (Reg != AArch64::LR) + return getKillRegState(true); - // If addressing of local variables is going to be more complicated than - // shoving a base register and an offset into the instruction then we may well - // need to scavenge registers. We should either specifically add an - // callee-save register for this purpose or allocate an extra spill slot. - bool BigStack = - MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF) - || MFI->hasVarSizedObjects() // Access will be from X29: messes things up - || (MFI->adjustsStack() && !hasReservedCallFrame(MF)); - - if (!BigStack) - return; - - // We certainly need some slack space for the scavenger, preferably an extra - // register. - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); - uint16_t ExtraReg = AArch64::NoRegister; - - for (unsigned i = 0; CSRegs[i]; ++i) { - if (AArch64::GPR64RegClass.contains(CSRegs[i]) && - !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) { - ExtraReg = CSRegs[i]; - break; - } - } - - if (ExtraReg != 0) { - MF.getRegInfo().setPhysRegUsed(ExtraReg); - } else { - assert(RS && "Expect register scavenger to be available"); - - // Create a stack slot for scavenging purposes. PrologEpilogInserter - // helpfully places it near either SP or FP for us to avoid - // infinitely-regression during scavenging. - const TargetRegisterClass *RC = &AArch64::GPR64RegClass; - RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } + // LR maybe referred to later by an @llvm.returnaddress intrinsic. + bool LRLiveIn = MF.getRegInfo().isLiveIn(AArch64::LR); + bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken()); + return getKillRegState(LRKill); } -bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB, - unsigned Reg) const { - // If @llvm.returnaddress is called then it will refer to X30 by some means; - // the prologue store does not kill the register. - if (Reg == AArch64::X30) { - if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken() - && MBB.getParent()->getRegInfo().isLiveIn(Reg)) - return false; +bool AArch64FrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + unsigned Count = CSI.size(); + DebugLoc DL; + assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); + + if (MI != MBB.end()) + DL = MI->getDebugLoc(); + + for (unsigned i = 0; i < Count; i += 2) { + unsigned idx = Count - i - 2; + unsigned Reg1 = CSI[idx].getReg(); + unsigned Reg2 = CSI[idx + 1].getReg(); + // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI + // list to come in sorted by frame index so that we can issue the store + // pair instructions directly. Assert if we see anything otherwise. + // + // The order of the registers in the list is controlled by + // getCalleeSavedRegs(), so they will always be in-order, as well. + assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() && + "Out of order callee saved regs!"); + unsigned StrOpc; + assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); + assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); + // Issue sequence of non-sp increment and pi sp spills for cs regs. The + // first spill is a pre-increment that allocates the stack. + // For example: + // stp x22, x21, [sp, #-48]! // addImm(-6) + // stp x20, x19, [sp, #16] // addImm(+2) + // stp fp, lr, [sp, #32] // addImm(+4) + // Rationale: This sequence saves uop updates compared to a sequence of + // pre-increment spills like stp xi,xj,[sp,#-16]! + // Note: Similar rational and sequence for restores in epilog. + if (AArch64::GPR64RegClass.contains(Reg1)) { + assert(AArch64::GPR64RegClass.contains(Reg2) && + "Expected GPR64 callee-saved register pair!"); + // For first spill use pre-increment store. + if (i == 0) + StrOpc = AArch64::STPXpre; + else + StrOpc = AArch64::STPXi; + } else if (AArch64::FPR64RegClass.contains(Reg1)) { + assert(AArch64::FPR64RegClass.contains(Reg2) && + "Expected FPR64 callee-saved register pair!"); + // For first spill use pre-increment store. + if (i == 0) + StrOpc = AArch64::STPDpre; + else + StrOpc = AArch64::STPDi; + } else + llvm_unreachable("Unexpected callee saved register!"); + DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", " + << TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx() + << ", " << CSI[idx + 1].getFrameIdx() << ")\n"); + // Compute offset: i = 0 => offset = -Count; + // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc. + const int Offset = (i == 0) ? -Count : i; + assert((Offset >= -64 && Offset <= 63) && + "Offset out of bounds for STP immediate"); + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); + if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre) + MIB.addReg(AArch64::SP, RegState::Define); + + MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) + .addReg(Reg1, getPrologueDeath(MF, Reg1)) + .addReg(AArch64::SP) + .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit + .setMIFlag(MachineInstr::FrameSetup); } - - // In all other cases, physical registers are dead after they've been saved - // but live at the beginning of the prologue block. - MBB.addLiveIn(Reg); return true; } -void -AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const std::vector &CSI, - const TargetRegisterInfo *TRI, - const LoadStoreMethod PossClasses[], - unsigned NumClasses) const { - DebugLoc DL = MBB.findDebugLoc(MBBI); +bool AArch64FrameLowering::restoreCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + unsigned Count = CSI.size(); + DebugLoc DL; + assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); + + if (MI != MBB.end()) + DL = MI->getDebugLoc(); + + for (unsigned i = 0; i < Count; i += 2) { + unsigned Reg1 = CSI[i].getReg(); + unsigned Reg2 = CSI[i + 1].getReg(); + // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI + // list to come in sorted by frame index so that we can issue the store + // pair instructions directly. Assert if we see anything otherwise. + assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() && + "Out of order callee saved regs!"); + // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only + // the last load is sp-pi post-increment and de-allocates the stack: + // For example: + // ldp fp, lr, [sp, #32] // addImm(+4) + // ldp x20, x19, [sp, #16] // addImm(+2) + // ldp x22, x21, [sp], #48 // addImm(+6) + // Note: see comment in spillCalleeSavedRegisters() + unsigned LdrOpc; + + assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); + assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); + if (AArch64::GPR64RegClass.contains(Reg1)) { + assert(AArch64::GPR64RegClass.contains(Reg2) && + "Expected GPR64 callee-saved register pair!"); + if (i == Count - 2) + LdrOpc = AArch64::LDPXpost; + else + LdrOpc = AArch64::LDPXi; + } else if (AArch64::FPR64RegClass.contains(Reg1)) { + assert(AArch64::FPR64RegClass.contains(Reg2) && + "Expected FPR64 callee-saved register pair!"); + if (i == Count - 2) + LdrOpc = AArch64::LDPDpost; + else + LdrOpc = AArch64::LDPDi; + } else + llvm_unreachable("Unexpected callee saved register!"); + DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", " + << TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx() + << ", " << CSI[i + 1].getFrameIdx() << ")\n"); + + // Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4; + // etc. + const int Offset = (i == Count - 2) ? Count : Count - i - 2; + assert((Offset >= -64 && Offset <= 63) && + "Offset out of bounds for LDP immediate"); + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); + if (LdrOpc == AArch64::LDPXpost || LdrOpc == AArch64::LDPDpost) + MIB.addReg(AArch64::SP, RegState::Define); + + MIB.addReg(Reg2, getDefRegState(true)) + .addReg(Reg1, getDefRegState(true)) + .addReg(AArch64::SP) + .addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8] + // where the factor * 8 is implicit + } + return true; +} - // A certain amount of implicit contract is present here. The actual stack - // offsets haven't been allocated officially yet, so for strictly correct code - // we rely on the fact that the elements of CSI are allocated in order - // starting at SP, purely as dictated by size and alignment. In practice since - // this function handles the only accesses to those slots it's not quite so - // important. - // - // We have also ordered the Callee-saved register list in AArch64CallingConv - // so that the above scheme puts registers in order: in particular we want - // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2) - for (unsigned i = 0, e = CSI.size(); i < e; ++i) { - unsigned Reg = CSI[i].getReg(); - - // First we need to find out which register class the register belongs to so - // that we can use the correct load/store instrucitons. - unsigned ClassIdx; - for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) { - if (PossClasses[ClassIdx].RegClass->contains(Reg)) - break; - } - assert(ClassIdx != NumClasses - && "Asked to store register in unexpected class"); - const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass; - - // Now we need to decide whether it's possible to emit a paired instruction: - // for this we want the next register to be in the same class. - MachineInstrBuilder NewMI; - bool Pair = false; - if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) { - Pair = true; - unsigned StLow = 0, StHigh = 0; - if (isPrologue) { - // Most of these registers will be live-in to the MBB and killed by our - // store, though there are exceptions (see determinePrologueDeath). - StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg())); - StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); - } else { - StLow = RegState::Define; - StHigh = RegState::Define; - } +void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( + MachineFunction &MF, RegScavenger *RS) const { + const AArch64RegisterInfo *RegInfo = static_cast( + MF.getTarget().getRegisterInfo()); + AArch64FunctionInfo *AFI = MF.getInfo(); + MachineRegisterInfo *MRI = &MF.getRegInfo(); + SmallVector UnspilledCSGPRs; + SmallVector UnspilledCSFPRs; - NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode)) - .addReg(CSI[i+1].getReg(), StLow) - .addReg(CSI[i].getReg(), StHigh); + // The frame record needs to be created by saving the appropriate registers + if (hasFP(MF)) { + MRI->setPhysRegUsed(AArch64::FP); + MRI->setPhysRegUsed(AArch64::LR); + } - // If it's a paired op, we've consumed two registers - ++i; - } else { - unsigned State; - if (isPrologue) { - State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg())); + // Spill the BasePtr if it's used. Do this first thing so that the + // getCalleeSavedRegs() below will get the right answer. + if (RegInfo->hasBasePointer(MF)) + MRI->setPhysRegUsed(RegInfo->getBaseRegister()); + + // If any callee-saved registers are used, the frame cannot be eliminated. + unsigned NumGPRSpilled = 0; + unsigned NumFPRSpilled = 0; + bool ExtraCSSpill = false; + bool CanEliminateFrame = true; + DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:"); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); + + // Check pairs of consecutive callee-saved registers. + for (unsigned i = 0; CSRegs[i]; i += 2) { + assert(CSRegs[i + 1] && "Odd number of callee-saved registers!"); + + const unsigned OddReg = CSRegs[i]; + const unsigned EvenReg = CSRegs[i + 1]; + assert((AArch64::GPR64RegClass.contains(OddReg) && + AArch64::GPR64RegClass.contains(EvenReg)) ^ + (AArch64::FPR64RegClass.contains(OddReg) && + AArch64::FPR64RegClass.contains(EvenReg)) && + "Register class mismatch!"); + + const bool OddRegUsed = MRI->isPhysRegUsed(OddReg); + const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg); + + // Early exit if none of the registers in the register pair is actually + // used. + if (!OddRegUsed && !EvenRegUsed) { + if (AArch64::GPR64RegClass.contains(OddReg)) { + UnspilledCSGPRs.push_back(OddReg); + UnspilledCSGPRs.push_back(EvenReg); } else { - State = RegState::Define; + UnspilledCSFPRs.push_back(OddReg); + UnspilledCSFPRs.push_back(EvenReg); } + continue; + } - NewMI = BuildMI(MBB, MBBI, DL, - TII.get(PossClasses[ClassIdx].SingleOpcode)) - .addReg(CSI[i].getReg(), State); + unsigned Reg = AArch64::NoRegister; + // If only one of the registers of the register pair is used, make sure to + // mark the other one as used as well. + if (OddRegUsed ^ EvenRegUsed) { + // Find out which register is the additional spill. + Reg = OddRegUsed ? EvenReg : OddReg; + MRI->setPhysRegUsed(Reg); } - // Note that the FrameIdx refers to the second register in a pair: it will - // be allocated the smaller numeric address and so is the one an LDP/STP - // address must use. - int FrameIdx = CSI[i].getFrameIdx(); - MachineMemOperand::MemOperandFlags Flags; - Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad; - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), - Flags, - Pair ? TheClass.getSize() * 2 : TheClass.getSize(), - MFI.getObjectAlignment(FrameIdx)); - - NewMI.addFrameIndex(FrameIdx) - .addImm(0) // address-register offset - .addMemOperand(MMO); - - if (isPrologue) - NewMI.setMIFlags(MachineInstr::FrameSetup); - - // For aesthetic reasons, during an epilogue we want to emit complementary - // operations to the prologue, but in the opposite order. So we still - // iterate through the CalleeSavedInfo list in order, but we put the - // instructions successively earlier in the MBB. - if (!isPrologue) - --MBBI; + DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo)); + DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo)); + + assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) || + (RegInfo->getEncodingValue(OddReg) + 1 == + RegInfo->getEncodingValue(EvenReg))) && + "Register pair of non-adjacent registers!"); + if (AArch64::GPR64RegClass.contains(OddReg)) { + NumGPRSpilled += 2; + // If it's not a reserved register, we can use it in lieu of an + // emergency spill slot for the register scavenger. + // FIXME: It would be better to instead keep looking and choose another + // unspilled register that isn't reserved, if there is one. + if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg)) + ExtraCSSpill = true; + } else + NumFPRSpilled += 2; + + CanEliminateFrame = false; } -} - -bool -AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - static const LoadStoreMethod PossibleClasses[] = { - {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR}, - {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR}, - }; - const unsigned NumClasses = llvm::array_lengthof(PossibleClasses); - - emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI, - PossibleClasses, NumClasses); - - return true; -} - -bool -AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const { - - if (CSI.empty()) - return false; - - static const LoadStoreMethod PossibleClasses[] = { - {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR}, - {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR}, - }; - const unsigned NumClasses = llvm::array_lengthof(PossibleClasses); - - emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI, - PossibleClasses, NumClasses); - - return true; -} - -bool -AArch64FrameLowering::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo(); - - // This is a decision of ABI compliance. The AArch64 PCS gives various options - // for conformance, and even at the most stringent level more or less permits - // elimination for leaf functions because there's no loss of functionality - // (for debugging etc).. - if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls()) - return true; - // The following are hard-limits: incorrect code will be generated if we try - // to omit the frame. - return (RI->needsStackRealignment(MF) || - MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken()); -} - -bool -AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const { - return MF.getFrameInfo()->hasVarSizedObjects(); -} - -bool -AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - - // Of the various reasons for having a frame pointer, it's actually only - // variable-sized objects that prevent reservation of a call frame. - return !(hasFP(MF) && MFI->hasVarSizedObjects()); -} - -void -AArch64FrameLowering::eliminateCallFramePseudoInstr( - MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const { - const AArch64InstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - DebugLoc dl = MI->getDebugLoc(); - int Opcode = MI->getOpcode(); - bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode(); - uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0; - - if (!hasReservedCallFrame(MF)) { - unsigned Align = getStackAlignment(); - - int64_t Amount = MI->getOperand(0).getImm(); - Amount = RoundUpToAlignment(Amount, Align); - if (!IsDestroy) Amount = -Amount; + // FIXME: Set BigStack if any stack slot references may be out of range. + // For now, just conservatively guestimate based on unscaled indexing + // range. We'll end up allocating an unnecessary spill slot a lot, but + // realistically that's not a big deal at this stage of the game. + // The CSR spill slots have not been allocated yet, so estimateStackSize + // won't include them. + MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); + DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); + bool BigStack = (CFSize >= 256); + if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) + AFI->setHasStackFrame(true); + + // Estimate if we might need to scavenge a register at some point in order + // to materialize a stack offset. If so, either spill one additional + // callee-saved register or reserve a special spill slot to facilitate + // register scavenging. If we already spilled an extra callee-saved register + // above to keep the number of spills even, we don't need to do anything else + // here. + if (BigStack && !ExtraCSSpill) { + + // If we're adding a register to spill here, we have to add two of them + // to keep the number of regs to spill even. + assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!"); + unsigned Count = 0; + while (!UnspilledCSGPRs.empty() && Count < 2) { + unsigned Reg = UnspilledCSGPRs.back(); + UnspilledCSGPRs.pop_back(); + DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo) + << " to get a scratch register.\n"); + MRI->setPhysRegUsed(Reg); + ExtraCSSpill = true; + ++Count; + } - // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it - // doesn't have to pop anything), then the first operand will be zero too so - // this adjustment is a no-op. - if (CalleePopAmount == 0) { - // FIXME: in-function stack adjustment for calls is limited to 12-bits - // because there's no guaranteed temporary register available. Mostly call - // frames will be allocated at the start of a function so this is OK, but - // it is a limitation that needs dealing with. - assert(Amount > -0xfff && Amount < 0xfff && "call frame too large"); - emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount); + // If we didn't find an extra callee-saved register to spill, create + // an emergency spill slot. + if (!ExtraCSSpill) { + const TargetRegisterClass *RC = &AArch64::GPR64RegClass; + int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false); + RS->addScavengingFrameIndex(FI); + DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI + << " as the emergency spill slot.\n"); } - } else if (CalleePopAmount != 0) { - // If the calling convention demands that the callee pops arguments from the - // stack, we want to add it back if we have a reserved call frame. - assert(CalleePopAmount < 0xfff && "call frame too large"); - emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount); } - - MBB.erase(MI); } diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h index 032dd90..0e00d16 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -1,4 +1,4 @@ -//==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=// +//==-- AArch64FrameLowering.h - TargetFrameLowering for AArch64 --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -7,100 +7,67 @@ // //===----------------------------------------------------------------------===// // -// This class implements the AArch64-specific parts of the TargetFrameLowering -// class. +// // //===----------------------------------------------------------------------===// -#ifndef LLVM_AARCH64_FRAMEINFO_H -#define LLVM_AARCH64_FRAMEINFO_H +#ifndef AArch64_FRAMELOWERING_H +#define AArch64_FRAMELOWERING_H -#include "AArch64Subtarget.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { + class AArch64Subtarget; +class AArch64TargetMachine; class AArch64FrameLowering : public TargetFrameLowering { -private: - // In order to unify the spilling and restoring of callee-saved registers into - // emitFrameMemOps, we need to be able to specify which instructions to use - // for the relevant memory operations on each register class. An array of the - // following struct is populated and passed in to achieve this. - struct LoadStoreMethod { - const TargetRegisterClass *RegClass; // E.g. GPR64RegClass - - // The preferred instruction. - unsigned PairOpcode; // E.g. LSPair64_STR - - // Sometimes only a single register can be handled at once. - unsigned SingleOpcode; // E.g. LS64_STR - }; -protected: - const AArch64Subtarget &STI; + const AArch64TargetMachine &TM; public: - explicit AArch64FrameLowering(const AArch64Subtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16), - STI(sti) { - } + explicit AArch64FrameLowering(const AArch64TargetMachine &TM, + const AArch64Subtarget &STI) + : TargetFrameLowering(StackGrowsDown, 16, 0, 16, + false /*StackRealignable*/), + TM(TM) {} - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into - /// the function. - virtual void emitPrologue(MachineFunction &MF) const; - virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - - /// Decides how much stack adjustment to perform in each phase of the prologue - /// and epilogue. - void splitSPAdjustments(uint64_t Total, uint64_t &Initial, - uint64_t &Residual) const; - - int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex, - unsigned &FrameReg, int SPAdj, - bool IsCalleeSaveOp) const; - - virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const; - - virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const; - virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const; + void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned FramePtr) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const; - - /// If the register is X30 (i.e. LR) and the return address is used in the - /// function then the callee-save store doesn't actually kill the register, - /// otherwise it does. - bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const; - - /// This function emits the loads or stores required during prologue and - /// epilogue as efficiently as possible. - /// - /// The operations involved in setting up and tearing down the frame are - /// similar enough to warrant a shared function, particularly as discrepancies - /// between the two would be disastrous. - void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI, - const LoadStoreMethod PossibleClasses[], - unsigned NumClasses) const; - - - virtual bool hasFP(const MachineFunction &MF) const; - - virtual bool useFPForAddressing(const MachineFunction &MF) const; - - /// On AA - virtual bool hasReservedCallFrame(const MachineFunction &MF) const; + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + + int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; + int resolveFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg, + bool PreferFP = false) const; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const override; + + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const override; + + /// \brief Can this function use the red zone for local allocations. + bool canUseRedZone(const MachineFunction &MF) const; + + bool hasFP(const MachineFunction &MF) const override; + bool hasReservedCallFrame(const MachineFunction &MF) const override; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const override; }; } // End llvm namespace diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index dac4b32..7007ffc 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -11,118 +11,119 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "aarch64-isel" -#include "AArch64.h" -#include "AArch64InstrInfo.h" -#include "AArch64Subtarget.h" #include "AArch64TargetMachine.h" -#include "Utils/AArch64BaseInfo.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/APSInt.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/Function.h" // To access function attributes. #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "aarch64-isel" + //===--------------------------------------------------------------------===// -/// AArch64 specific code to select AArch64 machine instructions for -/// SelectionDAG operations. +/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine +/// instructions for SelectionDAG operations. /// namespace { class AArch64DAGToDAGISel : public SelectionDAGISel { AArch64TargetMachine &TM; - /// Keep a pointer to the AArch64Subtarget around so that we can + /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. const AArch64Subtarget *Subtarget; + bool ForCodeSize; + public: explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), TM(tm), - Subtarget(&TM.getSubtarget()) { - } + : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr), + ForCodeSize(false) {} - virtual const char *getPassName() const { + const char *getPassName() const override { return "AArch64 Instruction Selection"; } - // Include the pieces autogenerated from the target description. -#include "AArch64GenDAGISel.inc" - - template - bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) { - const ConstantSDNode *CN = dyn_cast(N); - if (!CN || CN->getZExtValue() % MemSize != 0 - || CN->getZExtValue() / MemSize > 0xfff) - return false; - - UImm12 = CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64); - return true; - } - - template - bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { - return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); - } - - /// Used for pre-lowered address-reference nodes, so we already know - /// the fields match. This operand's job is simply to add an - /// appropriate shift operand to the MOVZ/MOVK instruction. - template - bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) { - Imm = N; - Shift = CurDAG->getTargetConstant(LogShift, MVT::i32); - return true; + bool runOnMachineFunction(MachineFunction &MF) override { + AttributeSet FnAttrs = MF.getFunction()->getAttributes(); + ForCodeSize = + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize) || + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + Subtarget = &TM.getSubtarget(); + return SelectionDAGISel::runOnMachineFunction(MF); } - bool SelectFPZeroOperand(SDValue N, SDValue &Dummy); - - bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, - unsigned RegWidth); + SDNode *Select(SDNode *Node) override; + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, - std::vector &OutOps); - - bool SelectLogicalImm(SDValue N, SDValue &Imm); - - template - bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) { - return SelectTSTBOperand(N, FixedPos, RegWidth); + std::vector &OutOps) override; + + SDNode *SelectMLAV64LaneV128(SDNode *N); + SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N); + bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); + bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); + bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); + bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { + return SelectShiftedRegister(N, false, Reg, Shift); + } + bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { + return SelectShiftedRegister(N, true, Reg, Shift); + } + bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed(N, 1, Base, OffImm); + } + bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed(N, 2, Base, OffImm); + } + bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed(N, 4, Base, OffImm); + } + bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed(N, 8, Base, OffImm); + } + bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeIndexed(N, 16, Base, OffImm); + } + bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeUnscaled(N, 1, Base, OffImm); + } + bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeUnscaled(N, 2, Base, OffImm); + } + bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeUnscaled(N, 4, Base, OffImm); + } + bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeUnscaled(N, 8, Base, OffImm); + } + bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { + return SelectAddrModeUnscaled(N, 16, Base, OffImm); } - bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth); - - SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, - unsigned Op64); - - /// Put the given constant into a pool and return a DAG which will give its - /// address. - SDValue getConstantPoolItemAddress(SDLoc DL, const Constant *CV); - - SDNode *TrySelectToMoveImm(SDNode *N); - SDNode *LowerToFPLitPool(SDNode *Node); - SDNode *SelectToLitPool(SDNode *N); - - SDNode* Select(SDNode*); -private: - /// Get the opcode for table lookup instruction - unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec); - - /// Select NEON table lookup intrinsics. NumVecs should be 1, 2, 3 or 4. - /// IsExt is to indicate if the result will be extended with an argument. - SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt); + template + bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, + SDValue &SignExtend, SDValue &DoShift) { + return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); + } - /// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4. - SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *Opcode); + template + bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, + SDValue &SignExtend, SDValue &DoShift) { + return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); + } - /// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4. - SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *Opcodes); /// Form sequences of consecutive 64/128-bit registers for use in NEON /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have @@ -136,315 +137,713 @@ private: SDValue createTuple(ArrayRef Vecs, unsigned RegClassIDs[], unsigned SubRegs[]); - /// Select NEON load-duplicate intrinsics. NumVecs should be 2, 3 or 4. - /// The opcode array specifies the instructions used for load. - SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *Opcodes); + SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); + + SDNode *SelectIndexedLoad(SDNode *N, bool &Done); + + SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, + unsigned SubRegIdx); + SDNode *SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, + unsigned SubRegIdx); + SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); + SDNode *SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); + + SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); + SDNode *SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); + SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); + SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); + + SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node); + SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node); + + SDNode *SelectBitfieldExtractOp(SDNode *N); + SDNode *SelectBitfieldInsertOp(SDNode *N); + + SDNode *SelectLIBM(SDNode *N); + +// Include the pieces autogenerated from the target description. +#include "AArch64GenDAGISel.inc" + +private: + bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, + SDValue &Shift); + bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, + SDValue &OffImm); + bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, + SDValue &OffImm); + bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, + SDValue &Offset, SDValue &SignExtend, + SDValue &DoShift); + bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, + SDValue &Offset, SDValue &SignExtend, + SDValue &DoShift); + bool isWorthFolding(SDValue V) const; + bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, + SDValue &Offset, SDValue &SignExtend); - /// Select NEON load/store lane intrinsics. NumVecs should be 2, 3 or 4. - /// The opcode arrays specify the instructions used for load/store. - SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, - unsigned NumVecs, const uint16_t *Opcodes); + template + bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { + return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); + } - SDValue getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD, - SDValue Operand); + bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); }; +} // end anonymous namespace + +/// isIntImmediate - This method tests to see if the node is a constant +/// operand. If so Imm will receive the 32-bit value. +static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { + if (const ConstantSDNode *C = dyn_cast(N)) { + Imm = C->getZExtValue(); + return true; + } + return false; } -bool -AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, - unsigned RegWidth) { - const ConstantFPSDNode *CN = dyn_cast(N); - if (!CN) return false; +// isIntImmediate - This method tests to see if a constant operand. +// If so Imm will receive the value. +static bool isIntImmediate(SDValue N, uint64_t &Imm) { + return isIntImmediate(N.getNode(), Imm); +} - // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits - // is between 1 and 32 for a destination w-register, or 1 and 64 for an - // x-register. - // - // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we - // want THIS_NODE to be 2^fbits. This is much easier to deal with using - // integers. - bool IsExact; +// isOpcWithIntImmediate - This method tests to see if the node is a specific +// opcode and that it has a immediate integer right operand. +// If so Imm will receive the 32 bit value. +static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, + uint64_t &Imm) { + return N->getOpcode() == Opc && + isIntImmediate(N->getOperand(1).getNode(), Imm); +} - // fbits is between 1 and 64 in the worst-case, which means the fmul - // could have 2^64 as an actual operand. Need 65 bits of precision. - APSInt IntVal(65, true); - CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); +bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( + const SDValue &Op, char ConstraintCode, std::vector &OutOps) { + assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); + // Require the address to be in a register. That is safe for all AArch64 + // variants and it is hard to do anything much smarter without knowing + // how the operand is used. + OutOps.push_back(Op); + return false; +} - // N.b. isPowerOf2 also checks for > 0. - if (!IsExact || !IntVal.isPowerOf2()) return false; - unsigned FBits = IntVal.logBase2(); +/// SelectArithImmed - Select an immediate value that can be represented as +/// a 12-bit value shifted left by either 0 or 12. If so, return true with +/// Val set to the 12-bit value and Shift set to the shifter operand. +bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, + SDValue &Shift) { + // This function is called from the addsub_shifted_imm ComplexPattern, + // which lists [imm] as the list of opcode it's interested in, however + // we still need to check whether the operand is actually an immediate + // here because the ComplexPattern opcode list is only used in + // root-level opcode matching. + if (!isa(N.getNode())) + return false; - // Checks above should have guaranteed that we haven't lost information in - // finding FBits, but it must still be in range. - if (FBits == 0 || FBits > RegWidth) return false; + uint64_t Immed = cast(N.getNode())->getZExtValue(); + unsigned ShiftAmt; - FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32); + if (Immed >> 12 == 0) { + ShiftAmt = 0; + } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { + ShiftAmt = 12; + Immed = Immed >> 12; + } else + return false; + + unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); + Val = CurDAG->getTargetConstant(Immed, MVT::i32); + Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); return true; } -bool -AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector &OutOps) { - switch (ConstraintCode) { - default: llvm_unreachable("Unrecognised AArch64 memory constraint"); - case 'm': - // FIXME: more freedom is actually permitted for 'm'. We can go - // hunting for a base and an offset if we want. Of course, since - // we don't really know how the operand is going to be used we're - // probably restricted to the load/store pair's simm7 as an offset - // range anyway. - case 'Q': - OutOps.push_back(Op); +/// SelectNegArithImmed - As above, but negates the value before trying to +/// select it. +bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, + SDValue &Shift) { + // This function is called from the addsub_shifted_imm ComplexPattern, + // which lists [imm] as the list of opcode it's interested in, however + // we still need to check whether the operand is actually an immediate + // here because the ComplexPattern opcode list is only used in + // root-level opcode matching. + if (!isa(N.getNode())) + return false; + + // The immediate operand must be a 24-bit zero-extended immediate. + uint64_t Immed = cast(N.getNode())->getZExtValue(); + + // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" + // have the opposite effect on the C flag, so this pattern mustn't match under + // those circumstances. + if (Immed == 0) + return false; + + if (N.getValueType() == MVT::i32) + Immed = ~((uint32_t)Immed) + 1; + else + Immed = ~Immed + 1ULL; + if (Immed & 0xFFFFFFFFFF000000ULL) + return false; + + Immed &= 0xFFFFFFULL; + return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift); +} + +/// getShiftTypeForNode - Translate a shift node to the corresponding +/// ShiftType value. +static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { + switch (N.getOpcode()) { + default: + return AArch64_AM::InvalidShiftExtend; + case ISD::SHL: + return AArch64_AM::LSL; + case ISD::SRL: + return AArch64_AM::LSR; + case ISD::SRA: + return AArch64_AM::ASR; + case ISD::ROTR: + return AArch64_AM::ROR; } +} +/// \brief Determine wether it is worth to fold V into an extended register. +bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { + // it hurts if the a value is used at least twice, unless we are optimizing + // for code size. + if (ForCodeSize || V.hasOneUse()) + return true; return false; } -bool -AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) { - ConstantFPSDNode *Imm = dyn_cast(N); - if (!Imm || !Imm->getValueAPF().isPosZero()) +/// SelectShiftedRegister - Select a "shifted register" operand. If the value +/// is not shifted, set the Shift operand to default of "LSL 0". The logical +/// instructions allow the shifted register to be rotated, but the arithmetic +/// instructions do not. The AllowROR parameter specifies whether ROR is +/// supported. +bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, + SDValue &Reg, SDValue &Shift) { + AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); + if (ShType == AArch64_AM::InvalidShiftExtend) + return false; + if (!AllowROR && ShType == AArch64_AM::ROR) return false; - // Doesn't actually carry any information, but keeps TableGen quiet. - Dummy = CurDAG->getTargetConstant(0, MVT::i32); - return true; + if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { + unsigned BitSize = N.getValueType().getSizeInBits(); + unsigned Val = RHS->getZExtValue() & (BitSize - 1); + unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); + + Reg = N.getOperand(0); + Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); + return isWorthFolding(N); + } + + return false; +} + +/// getExtendTypeForNode - Translate an extend node to the corresponding +/// ExtendType value. +static AArch64_AM::ShiftExtendType +getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { + if (N.getOpcode() == ISD::SIGN_EXTEND || + N.getOpcode() == ISD::SIGN_EXTEND_INREG) { + EVT SrcVT; + if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) + SrcVT = cast(N.getOperand(1))->getVT(); + else + SrcVT = N.getOperand(0).getValueType(); + + if (!IsLoadStore && SrcVT == MVT::i8) + return AArch64_AM::SXTB; + else if (!IsLoadStore && SrcVT == MVT::i16) + return AArch64_AM::SXTH; + else if (SrcVT == MVT::i32) + return AArch64_AM::SXTW; + assert(SrcVT != MVT::i64 && "extend from 64-bits?"); + + return AArch64_AM::InvalidShiftExtend; + } else if (N.getOpcode() == ISD::ZERO_EXTEND || + N.getOpcode() == ISD::ANY_EXTEND) { + EVT SrcVT = N.getOperand(0).getValueType(); + if (!IsLoadStore && SrcVT == MVT::i8) + return AArch64_AM::UXTB; + else if (!IsLoadStore && SrcVT == MVT::i16) + return AArch64_AM::UXTH; + else if (SrcVT == MVT::i32) + return AArch64_AM::UXTW; + assert(SrcVT != MVT::i64 && "extend from 64-bits?"); + + return AArch64_AM::InvalidShiftExtend; + } else if (N.getOpcode() == ISD::AND) { + ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); + if (!CSD) + return AArch64_AM::InvalidShiftExtend; + uint64_t AndMask = CSD->getZExtValue(); + + switch (AndMask) { + default: + return AArch64_AM::InvalidShiftExtend; + case 0xFF: + return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; + case 0xFFFF: + return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; + case 0xFFFFFFFF: + return AArch64_AM::UXTW; + } + } + + return AArch64_AM::InvalidShiftExtend; } -bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) { - uint32_t Bits; - uint32_t RegWidth = N.getValueType().getSizeInBits(); +// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. +static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { + if (DL->getOpcode() != AArch64ISD::DUPLANE16 && + DL->getOpcode() != AArch64ISD::DUPLANE32) + return false; - ConstantSDNode *CN = dyn_cast(N); - if (!CN) return false; + SDValue SV = DL->getOperand(0); + if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) + return false; - if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits)) + SDValue EV = SV.getOperand(1); + if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) return false; - Imm = CurDAG->getTargetConstant(Bits, MVT::i32); + ConstantSDNode *DLidx = cast(DL->getOperand(1).getNode()); + ConstantSDNode *EVidx = cast(EV.getOperand(1).getNode()); + LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); + LaneOp = EV.getOperand(0); + return true; } -SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) { - SDNode *ResNode; - SDLoc dl(Node); - EVT DestType = Node->getValueType(0); - unsigned DestWidth = DestType.getSizeInBits(); - - unsigned MOVOpcode; - EVT MOVType; - int UImm16, Shift; - uint32_t LogicalBits; - - uint64_t BitPat = cast(Node)->getZExtValue(); - if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) { - MOVType = DestType; - MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii; - } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) { - MOVType = DestType; - MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii; - } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) { - // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can - // use a 32-bit instruction: "movn w0, 0xedbc". - MOVType = MVT::i32; - MOVOpcode = AArch64::MOVNwii; - } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits)) { - MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi; - uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR; - - return CurDAG->getMachineNode(MOVOpcode, dl, DestType, - CurDAG->getRegister(ZR, DestType), - CurDAG->getTargetConstant(LogicalBits, MVT::i32)); - } else { - // Can't handle it in one instruction. There's scope for permitting two (or - // more) instructions, but that'll need more thought. - return NULL; +// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a +// high lane extract. +static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, + SDValue &LaneOp, int &LaneIdx) { + + if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { + std::swap(Op0, Op1); + if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) + return false; + } + StdOp = Op1; + return true; +} + +/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand +/// is a lane in the upper half of a 128-bit vector. Recognize and select this +/// so that we don't emit unnecessary lane extracts. +SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. + SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. + int LaneIdx = -1; // Will hold the lane index. + + if (Op1.getOpcode() != ISD::MUL || + !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, + LaneIdx)) { + std::swap(Op0, Op1); + if (Op1.getOpcode() != ISD::MUL || + !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, + LaneIdx)) + return nullptr; + } + + SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); + + SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; + + unsigned MLAOpc = ~0U; + + switch (N->getSimpleValueType(0).SimpleTy) { + default: + llvm_unreachable("Unrecognized MLA."); + case MVT::v4i16: + MLAOpc = AArch64::MLAv4i16_indexed; + break; + case MVT::v8i16: + MLAOpc = AArch64::MLAv8i16_indexed; + break; + case MVT::v2i32: + MLAOpc = AArch64::MLAv2i32_indexed; + break; + case MVT::v4i32: + MLAOpc = AArch64::MLAv4i32_indexed; + break; } - ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType, - CurDAG->getTargetConstant(UImm16, MVT::i32), - CurDAG->getTargetConstant(Shift, MVT::i32)); + return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops); +} + +SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) { + SDValue SMULLOp0; + SDValue SMULLOp1; + int LaneIdx; + + if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, + LaneIdx)) + return nullptr; + + SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); + + SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; + + unsigned SMULLOpc = ~0U; + + if (IntNo == Intrinsic::aarch64_neon_smull) { + switch (N->getSimpleValueType(0).SimpleTy) { + default: + llvm_unreachable("Unrecognized SMULL."); + case MVT::v4i32: + SMULLOpc = AArch64::SMULLv4i16_indexed; + break; + case MVT::v2i64: + SMULLOpc = AArch64::SMULLv2i32_indexed; + break; + } + } else if (IntNo == Intrinsic::aarch64_neon_umull) { + switch (N->getSimpleValueType(0).SimpleTy) { + default: + llvm_unreachable("Unrecognized SMULL."); + case MVT::v4i32: + SMULLOpc = AArch64::UMULLv4i16_indexed; + break; + case MVT::v2i64: + SMULLOpc = AArch64::UMULLv2i32_indexed; + break; + } + } else + llvm_unreachable("Unrecognized intrinsic."); + + return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops); +} + +/// Instructions that accept extend modifiers like UXTW expect the register +/// being extended to be a GPR32, but the incoming DAG might be acting on a +/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if +/// this is the case. +static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { + if (N.getValueType() == MVT::i32) + return N; + + SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); + MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + SDLoc(N), MVT::i32, N, SubReg); + return SDValue(Node, 0); +} + + +/// SelectArithExtendedRegister - Select a "extended register" operand. This +/// operand folds in an extend followed by an optional left shift. +bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, + SDValue &Shift) { + unsigned ShiftVal = 0; + AArch64_AM::ShiftExtendType Ext; + + if (N.getOpcode() == ISD::SHL) { + ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); + if (!CSD) + return false; + ShiftVal = CSD->getZExtValue(); + if (ShiftVal > 4) + return false; + + Ext = getExtendTypeForNode(N.getOperand(0)); + if (Ext == AArch64_AM::InvalidShiftExtend) + return false; + + Reg = N.getOperand(0).getOperand(0); + } else { + Ext = getExtendTypeForNode(N); + if (Ext == AArch64_AM::InvalidShiftExtend) + return false; - if (MOVType != DestType) { - ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, - MVT::i64, MVT::i32, MVT::Other, - CurDAG->getTargetConstant(0, MVT::i64), - SDValue(ResNode, 0), - CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32)); + Reg = N.getOperand(0); } - return ResNode; + // AArch64 mandates that the RHS of the operation must use the smallest + // register classs that could contain the size being extended from. Thus, + // if we're folding a (sext i8), we need the RHS to be a GPR32, even though + // there might not be an actual 32-bit value in the program. We can + // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. + assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); + Reg = narrowIfNeeded(CurDAG, Reg); + Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32); + return isWorthFolding(N); } -SDValue -AArch64DAGToDAGISel::getConstantPoolItemAddress(SDLoc DL, - const Constant *CV) { - EVT PtrVT = getTargetLowering()->getPointerTy(); - - switch (getTargetLowering()->getTargetMachine().getCodeModel()) { - case CodeModel::Small: { - unsigned Alignment = - getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType()); - return CurDAG->getNode( - AArch64ISD::WrapperSmall, DL, PtrVT, - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG), - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12), - CurDAG->getConstant(Alignment, MVT::i32)); - } - case CodeModel::Large: { - SDNode *LitAddr; - LitAddr = CurDAG->getMachineNode( - AArch64::MOVZxii, DL, PtrVT, - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3), - CurDAG->getTargetConstant(3, MVT::i32)); - LitAddr = CurDAG->getMachineNode( - AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC), - CurDAG->getTargetConstant(2, MVT::i32)); - LitAddr = CurDAG->getMachineNode( - AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC), - CurDAG->getTargetConstant(1, MVT::i32)); - LitAddr = CurDAG->getMachineNode( - AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), - CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC), - CurDAG->getTargetConstant(0, MVT::i32)); - return SDValue(LitAddr, 0); +/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit +/// immediate" address. The "Size" argument is the size in bytes of the memory +/// reference, which determines the scale. +bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, + SDValue &Base, SDValue &OffImm) { + const TargetLowering *TLI = getTargetLowering(); + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + OffImm = CurDAG->getTargetConstant(0, MVT::i64); + return true; } - default: - llvm_unreachable("Only small and large code models supported now"); + + if (N.getOpcode() == AArch64ISD::ADDlow) { + GlobalAddressSDNode *GAN = + dyn_cast(N.getOperand(1).getNode()); + Base = N.getOperand(0); + OffImm = N.getOperand(1); + if (!GAN) + return true; + + const GlobalValue *GV = GAN->getGlobal(); + unsigned Alignment = GV->getAlignment(); + const DataLayout *DL = TLI->getDataLayout(); + if (Alignment == 0 && !Subtarget->isTargetDarwin()) + Alignment = DL->getABITypeAlignment(GV->getType()->getElementType()); + + if (Alignment >= Size) + return true; + } + + if (CurDAG->isBaseWithConstantOffset(N)) { + if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { + int64_t RHSC = (int64_t)RHS->getZExtValue(); + unsigned Scale = Log2_32(Size); + if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + } + OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64); + return true; + } + } } + + // Before falling back to our general case, check if the unscaled + // instructions can handle this. If so, that's preferable. + if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) + return false; + + // Base only. The address will be materialized into a register before + // the memory is accessed. + // add x0, Xbase, #offset + // ldr x0, [x0] + Base = N; + OffImm = CurDAG->getTargetConstant(0, MVT::i64); + return true; } -SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { - SDLoc DL(Node); - uint64_t UnsignedVal = cast(Node)->getZExtValue(); - int64_t SignedVal = cast(Node)->getSExtValue(); - EVT DestType = Node->getValueType(0); - - // Since we may end up loading a 64-bit constant from a 32-bit entry the - // constant in the pool may have a different type to the eventual node. - ISD::LoadExtType Extension; - EVT MemType; - - assert((DestType == MVT::i64 || DestType == MVT::i32) - && "Only expect integer constants at the moment"); - - if (DestType == MVT::i32) { - Extension = ISD::NON_EXTLOAD; - MemType = MVT::i32; - } else if (UnsignedVal <= UINT32_MAX) { - Extension = ISD::ZEXTLOAD; - MemType = MVT::i32; - } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) { - Extension = ISD::SEXTLOAD; - MemType = MVT::i32; - } else { - Extension = ISD::NON_EXTLOAD; - MemType = MVT::i64; - } - - Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(), - MemType.getSizeInBits()), - UnsignedVal); - SDValue PoolAddr = getConstantPoolItemAddress(DL, CV); - unsigned Alignment = - getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType()); - - return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(), - PoolAddr, - MachinePointerInfo::getConstantPool(), MemType, - /* isVolatile = */ false, - /* isNonTemporal = */ false, - Alignment).getNode(); +/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit +/// immediate" address. This should only match when there is an offset that +/// is not valid for a scaled immediate addressing mode. The "Size" argument +/// is the size in bytes of the memory reference, which is needed here to know +/// what is valid for a scaled immediate. +bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, + SDValue &Base, + SDValue &OffImm) { + if (!CurDAG->isBaseWithConstantOffset(N)) + return false; + if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { + int64_t RHSC = RHS->getSExtValue(); + // If the offset is valid as a scaled immediate, don't match here. + if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && + RHSC < (0x1000 << Log2_32(Size))) + return false; + if (RHSC >= -256 && RHSC < 256) { + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast(Base)->getIndex(); + const TargetLowering *TLI = getTargetLowering(); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + } + OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64); + return true; + } + } + return false; } -SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) { - SDLoc DL(Node); - const ConstantFP *FV = cast(Node)->getConstantFPValue(); - EVT DestType = Node->getValueType(0); - - unsigned Alignment = - getTargetLowering()->getDataLayout()->getABITypeAlignment(FV->getType()); - SDValue PoolAddr = getConstantPoolItemAddress(DL, FV); - - return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr, - MachinePointerInfo::getConstantPool(), - /* isVolatile = */ false, - /* isNonTemporal = */ false, - /* isInvariant = */ true, - Alignment).getNode(); +static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { + SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); + SDValue ImpDef = SDValue( + CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64), + 0); + MachineSDNode *Node = CurDAG->getMachineNode( + TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg); + return SDValue(Node, 0); } -bool -AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos, - unsigned RegWidth) { - const ConstantSDNode *CN = dyn_cast(N); - if (!CN) return false; +/// \brief Check if the given SHL node (\p N), can be used to form an +/// extended register for an addressing mode. +bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, + bool WantExtend, SDValue &Offset, + SDValue &SignExtend) { + assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); + ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); + if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) + return false; + + if (WantExtend) { + AArch64_AM::ShiftExtendType Ext = + getExtendTypeForNode(N.getOperand(0), true); + if (Ext == AArch64_AM::InvalidShiftExtend) + return false; - uint64_t Val = CN->getZExtValue(); + Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); + SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); + } else { + Offset = N.getOperand(0); + SignExtend = CurDAG->getTargetConstant(0, MVT::i32); + } - if (!isPowerOf2_64(Val)) return false; + unsigned LegalShiftVal = Log2_32(Size); + unsigned ShiftVal = CSD->getZExtValue(); - unsigned TestedBit = Log2_64(Val); - // Checks above should have guaranteed that we haven't lost information in - // finding TestedBit, but it must still be in range. - if (TestedBit >= RegWidth) return false; + if (ShiftVal != 0 && ShiftVal != LegalShiftVal) + return false; - FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64); - return true; + if (isWorthFolding(N)) + return true; + + return false; } -SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, - unsigned Op16,unsigned Op32, - unsigned Op64) { - // Mostly direct translation to the given operations, except that we preserve - // the AtomicOrdering for use later on. - AtomicSDNode *AN = cast(Node); - EVT VT = AN->getMemoryVT(); - - unsigned Op; - if (VT == MVT::i8) - Op = Op8; - else if (VT == MVT::i16) - Op = Op16; - else if (VT == MVT::i32) - Op = Op32; - else if (VT == MVT::i64) - Op = Op64; - else - llvm_unreachable("Unexpected atomic operation"); +bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, + SDValue &Base, SDValue &Offset, + SDValue &SignExtend, + SDValue &DoShift) { + if (N.getOpcode() != ISD::ADD) + return false; + SDValue LHS = N.getOperand(0); + SDValue RHS = N.getOperand(1); - SmallVector Ops; - for (unsigned i = 1; i < AN->getNumOperands(); ++i) - Ops.push_back(AN->getOperand(i)); + // We don't want to match immediate adds here, because they are better lowered + // to the register-immediate addressing modes. + if (isa(LHS) || isa(RHS)) + return false; + + // Check if this particular node is reused in any non-memory related + // operation. If yes, do not try to fold this node into the address + // computation, since the computation will be kept. + const SDNode *Node = N.getNode(); + for (SDNode *UI : Node->uses()) { + if (!isa(*UI)) + return false; + } + + // Remember if it is worth folding N when it produces extended register. + bool IsExtendedRegisterWorthFolding = isWorthFolding(N); + + // Try to match a shifted extend on the RHS. + if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && + SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { + Base = LHS; + DoShift = CurDAG->getTargetConstant(true, MVT::i32); + return true; + } + + // Try to match a shifted extend on the LHS. + if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && + SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { + Base = RHS; + DoShift = CurDAG->getTargetConstant(true, MVT::i32); + return true; + } + + // There was no shift, whatever else we find. + DoShift = CurDAG->getTargetConstant(false, MVT::i32); + + AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; + // Try to match an unshifted extend on the LHS. + if (IsExtendedRegisterWorthFolding && + (Ext = getExtendTypeForNode(LHS, true)) != + AArch64_AM::InvalidShiftExtend) { + Base = RHS; + Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); + SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); + if (isWorthFolding(LHS)) + return true; + } + + // Try to match an unshifted extend on the RHS. + if (IsExtendedRegisterWorthFolding && + (Ext = getExtendTypeForNode(RHS, true)) != + AArch64_AM::InvalidShiftExtend) { + Base = LHS; + Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); + SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32); + if (isWorthFolding(RHS)) + return true; + } + + return false; +} + +bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, + SDValue &Base, SDValue &Offset, + SDValue &SignExtend, + SDValue &DoShift) { + if (N.getOpcode() != ISD::ADD) + return false; + SDValue LHS = N.getOperand(0); + SDValue RHS = N.getOperand(1); + + // We don't want to match immediate adds here, because they are better lowered + // to the register-immediate addressing modes. + if (isa(LHS) || isa(RHS)) + return false; + + // Check if this particular node is reused in any non-memory related + // operation. If yes, do not try to fold this node into the address + // computation, since the computation will be kept. + const SDNode *Node = N.getNode(); + for (SDNode *UI : Node->uses()) { + if (!isa(*UI)) + return false; + } + + // Remember if it is worth folding N when it produces extended register. + bool IsExtendedRegisterWorthFolding = isWorthFolding(N); + + // Try to match a shifted extend on the RHS. + if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && + SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { + Base = LHS; + DoShift = CurDAG->getTargetConstant(true, MVT::i32); + return true; + } - Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); - Ops.push_back(AN->getOperand(0)); // Chain moves to the end + // Try to match a shifted extend on the LHS. + if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && + SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { + Base = RHS; + DoShift = CurDAG->getTargetConstant(true, MVT::i32); + return true; + } - return CurDAG->SelectNodeTo(Node, Op, - AN->getValueType(0), MVT::Other, - &Ops[0], Ops.size()); + // Match any non-shifted, non-extend, non-immediate add expression. + Base = LHS; + Offset = RHS; + SignExtend = CurDAG->getTargetConstant(false, MVT::i32); + DoShift = CurDAG->getTargetConstant(false, MVT::i32); + // Reg1 + Reg2 is free: no check needed. + return true; } SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef Regs) { - static unsigned RegClassIDs[] = { AArch64::DPairRegClassID, - AArch64::DTripleRegClassID, - AArch64::DQuadRegClassID }; - static unsigned SubRegs[] = { AArch64::dsub_0, AArch64::dsub_1, - AArch64::dsub_2, AArch64::dsub_3 }; + static unsigned RegClassIDs[] = { + AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; + static unsigned SubRegs[] = { AArch64::dsub0, AArch64::dsub1, + AArch64::dsub2, AArch64::dsub3 }; return createTuple(Regs, RegClassIDs, SubRegs); } SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef Regs) { - static unsigned RegClassIDs[] = { AArch64::QPairRegClassID, - AArch64::QTripleRegClassID, - AArch64::QQuadRegClassID }; - static unsigned SubRegs[] = { AArch64::qsub_0, AArch64::qsub_1, - AArch64::qsub_2, AArch64::qsub_3 }; + static unsigned RegClassIDs[] = { + AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; + static unsigned SubRegs[] = { AArch64::qsub0, AArch64::qsub1, + AArch64::qsub2, AArch64::qsub3 }; return createTuple(Regs, RegClassIDs, SubRegs); } @@ -478,1100 +877,2159 @@ SDValue AArch64DAGToDAGISel::createTuple(ArrayRef Regs, return SDValue(N, 0); } +SDNode *AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, + unsigned Opc, bool isExt) { + SDLoc dl(N); + EVT VT = N->getValueType(0); -// Get the register stride update opcode of a VLD/VST instruction that -// is otherwise equivalent to the given fixed stride updating instruction. -static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { - switch (Opc) { - default: break; - case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register; - case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register; - case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register; - case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register; - case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register; - case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register; - case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register; - case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register; - - case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register; - case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register; - case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register; - case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register; - case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register; - case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register; - case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register; - - case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register; - case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register; - case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register; - case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register; - case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register; - case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register; - case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register; - - case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register; - case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register; - case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register; - case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register; - case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register; - case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register; - case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register; - - case AArch64::LD1x2WB_8B_fixed: return AArch64::LD1x2WB_8B_register; - case AArch64::LD1x2WB_4H_fixed: return AArch64::LD1x2WB_4H_register; - case AArch64::LD1x2WB_2S_fixed: return AArch64::LD1x2WB_2S_register; - case AArch64::LD1x2WB_1D_fixed: return AArch64::LD1x2WB_1D_register; - case AArch64::LD1x2WB_16B_fixed: return AArch64::LD1x2WB_16B_register; - case AArch64::LD1x2WB_8H_fixed: return AArch64::LD1x2WB_8H_register; - case AArch64::LD1x2WB_4S_fixed: return AArch64::LD1x2WB_4S_register; - case AArch64::LD1x2WB_2D_fixed: return AArch64::LD1x2WB_2D_register; - - case AArch64::LD1x3WB_8B_fixed: return AArch64::LD1x3WB_8B_register; - case AArch64::LD1x3WB_4H_fixed: return AArch64::LD1x3WB_4H_register; - case AArch64::LD1x3WB_2S_fixed: return AArch64::LD1x3WB_2S_register; - case AArch64::LD1x3WB_1D_fixed: return AArch64::LD1x3WB_1D_register; - case AArch64::LD1x3WB_16B_fixed: return AArch64::LD1x3WB_16B_register; - case AArch64::LD1x3WB_8H_fixed: return AArch64::LD1x3WB_8H_register; - case AArch64::LD1x3WB_4S_fixed: return AArch64::LD1x3WB_4S_register; - case AArch64::LD1x3WB_2D_fixed: return AArch64::LD1x3WB_2D_register; - - case AArch64::LD1x4WB_8B_fixed: return AArch64::LD1x4WB_8B_register; - case AArch64::LD1x4WB_4H_fixed: return AArch64::LD1x4WB_4H_register; - case AArch64::LD1x4WB_2S_fixed: return AArch64::LD1x4WB_2S_register; - case AArch64::LD1x4WB_1D_fixed: return AArch64::LD1x4WB_1D_register; - case AArch64::LD1x4WB_16B_fixed: return AArch64::LD1x4WB_16B_register; - case AArch64::LD1x4WB_8H_fixed: return AArch64::LD1x4WB_8H_register; - case AArch64::LD1x4WB_4S_fixed: return AArch64::LD1x4WB_4S_register; - case AArch64::LD1x4WB_2D_fixed: return AArch64::LD1x4WB_2D_register; - - case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register; - case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register; - case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register; - case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register; - case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register; - case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register; - case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register; - case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register; - - case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register; - case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register; - case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register; - case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register; - case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register; - case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register; - case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register; - - case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register; - case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register; - case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register; - case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register; - case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register; - case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register; - case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register; - - case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register; - case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register; - case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register; - case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register; - case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register; - case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register; - case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register; - - case AArch64::ST1x2WB_8B_fixed: return AArch64::ST1x2WB_8B_register; - case AArch64::ST1x2WB_4H_fixed: return AArch64::ST1x2WB_4H_register; - case AArch64::ST1x2WB_2S_fixed: return AArch64::ST1x2WB_2S_register; - case AArch64::ST1x2WB_1D_fixed: return AArch64::ST1x2WB_1D_register; - case AArch64::ST1x2WB_16B_fixed: return AArch64::ST1x2WB_16B_register; - case AArch64::ST1x2WB_8H_fixed: return AArch64::ST1x2WB_8H_register; - case AArch64::ST1x2WB_4S_fixed: return AArch64::ST1x2WB_4S_register; - case AArch64::ST1x2WB_2D_fixed: return AArch64::ST1x2WB_2D_register; - - case AArch64::ST1x3WB_8B_fixed: return AArch64::ST1x3WB_8B_register; - case AArch64::ST1x3WB_4H_fixed: return AArch64::ST1x3WB_4H_register; - case AArch64::ST1x3WB_2S_fixed: return AArch64::ST1x3WB_2S_register; - case AArch64::ST1x3WB_1D_fixed: return AArch64::ST1x3WB_1D_register; - case AArch64::ST1x3WB_16B_fixed: return AArch64::ST1x3WB_16B_register; - case AArch64::ST1x3WB_8H_fixed: return AArch64::ST1x3WB_8H_register; - case AArch64::ST1x3WB_4S_fixed: return AArch64::ST1x3WB_4S_register; - case AArch64::ST1x3WB_2D_fixed: return AArch64::ST1x3WB_2D_register; - - case AArch64::ST1x4WB_8B_fixed: return AArch64::ST1x4WB_8B_register; - case AArch64::ST1x4WB_4H_fixed: return AArch64::ST1x4WB_4H_register; - case AArch64::ST1x4WB_2S_fixed: return AArch64::ST1x4WB_2S_register; - case AArch64::ST1x4WB_1D_fixed: return AArch64::ST1x4WB_1D_register; - case AArch64::ST1x4WB_16B_fixed: return AArch64::ST1x4WB_16B_register; - case AArch64::ST1x4WB_8H_fixed: return AArch64::ST1x4WB_8H_register; - case AArch64::ST1x4WB_4S_fixed: return AArch64::ST1x4WB_4S_register; - case AArch64::ST1x4WB_2D_fixed: return AArch64::ST1x4WB_2D_register; - - // Post-index of duplicate loads - case AArch64::LD2R_WB_8B_fixed: return AArch64::LD2R_WB_8B_register; - case AArch64::LD2R_WB_4H_fixed: return AArch64::LD2R_WB_4H_register; - case AArch64::LD2R_WB_2S_fixed: return AArch64::LD2R_WB_2S_register; - case AArch64::LD2R_WB_1D_fixed: return AArch64::LD2R_WB_1D_register; - case AArch64::LD2R_WB_16B_fixed: return AArch64::LD2R_WB_16B_register; - case AArch64::LD2R_WB_8H_fixed: return AArch64::LD2R_WB_8H_register; - case AArch64::LD2R_WB_4S_fixed: return AArch64::LD2R_WB_4S_register; - case AArch64::LD2R_WB_2D_fixed: return AArch64::LD2R_WB_2D_register; - - case AArch64::LD3R_WB_8B_fixed: return AArch64::LD3R_WB_8B_register; - case AArch64::LD3R_WB_4H_fixed: return AArch64::LD3R_WB_4H_register; - case AArch64::LD3R_WB_2S_fixed: return AArch64::LD3R_WB_2S_register; - case AArch64::LD3R_WB_1D_fixed: return AArch64::LD3R_WB_1D_register; - case AArch64::LD3R_WB_16B_fixed: return AArch64::LD3R_WB_16B_register; - case AArch64::LD3R_WB_8H_fixed: return AArch64::LD3R_WB_8H_register; - case AArch64::LD3R_WB_4S_fixed: return AArch64::LD3R_WB_4S_register; - case AArch64::LD3R_WB_2D_fixed: return AArch64::LD3R_WB_2D_register; - - case AArch64::LD4R_WB_8B_fixed: return AArch64::LD4R_WB_8B_register; - case AArch64::LD4R_WB_4H_fixed: return AArch64::LD4R_WB_4H_register; - case AArch64::LD4R_WB_2S_fixed: return AArch64::LD4R_WB_2S_register; - case AArch64::LD4R_WB_1D_fixed: return AArch64::LD4R_WB_1D_register; - case AArch64::LD4R_WB_16B_fixed: return AArch64::LD4R_WB_16B_register; - case AArch64::LD4R_WB_8H_fixed: return AArch64::LD4R_WB_8H_register; - case AArch64::LD4R_WB_4S_fixed: return AArch64::LD4R_WB_4S_register; - case AArch64::LD4R_WB_2D_fixed: return AArch64::LD4R_WB_2D_register; - - // Post-index of lane loads - case AArch64::LD2LN_WB_B_fixed: return AArch64::LD2LN_WB_B_register; - case AArch64::LD2LN_WB_H_fixed: return AArch64::LD2LN_WB_H_register; - case AArch64::LD2LN_WB_S_fixed: return AArch64::LD2LN_WB_S_register; - case AArch64::LD2LN_WB_D_fixed: return AArch64::LD2LN_WB_D_register; - - case AArch64::LD3LN_WB_B_fixed: return AArch64::LD3LN_WB_B_register; - case AArch64::LD3LN_WB_H_fixed: return AArch64::LD3LN_WB_H_register; - case AArch64::LD3LN_WB_S_fixed: return AArch64::LD3LN_WB_S_register; - case AArch64::LD3LN_WB_D_fixed: return AArch64::LD3LN_WB_D_register; - - case AArch64::LD4LN_WB_B_fixed: return AArch64::LD4LN_WB_B_register; - case AArch64::LD4LN_WB_H_fixed: return AArch64::LD4LN_WB_H_register; - case AArch64::LD4LN_WB_S_fixed: return AArch64::LD4LN_WB_S_register; - case AArch64::LD4LN_WB_D_fixed: return AArch64::LD4LN_WB_D_register; - - // Post-index of lane stores - case AArch64::ST2LN_WB_B_fixed: return AArch64::ST2LN_WB_B_register; - case AArch64::ST2LN_WB_H_fixed: return AArch64::ST2LN_WB_H_register; - case AArch64::ST2LN_WB_S_fixed: return AArch64::ST2LN_WB_S_register; - case AArch64::ST2LN_WB_D_fixed: return AArch64::ST2LN_WB_D_register; - - case AArch64::ST3LN_WB_B_fixed: return AArch64::ST3LN_WB_B_register; - case AArch64::ST3LN_WB_H_fixed: return AArch64::ST3LN_WB_H_register; - case AArch64::ST3LN_WB_S_fixed: return AArch64::ST3LN_WB_S_register; - case AArch64::ST3LN_WB_D_fixed: return AArch64::ST3LN_WB_D_register; - - case AArch64::ST4LN_WB_B_fixed: return AArch64::ST4LN_WB_B_register; - case AArch64::ST4LN_WB_H_fixed: return AArch64::ST4LN_WB_H_register; - case AArch64::ST4LN_WB_S_fixed: return AArch64::ST4LN_WB_S_register; - case AArch64::ST4LN_WB_D_fixed: return AArch64::ST4LN_WB_D_register; - } - return Opc; // If not one we handle, return it unchanged. -} - -SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, - unsigned NumVecs, - const uint16_t *Opcodes) { - assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); + unsigned ExtOff = isExt; - EVT VT = N->getValueType(0); - unsigned OpcodeIndex; - bool is64BitVector = VT.is64BitVector(); - switch (VT.getScalarType().getSizeInBits()) { - case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; - case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; - case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; - case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; - default: llvm_unreachable("unhandled vector load type"); - } - unsigned Opc = Opcodes[OpcodeIndex]; + // Form a REG_SEQUENCE to force register allocation. + unsigned Vec0Off = ExtOff + 1; + SmallVector Regs(N->op_begin() + Vec0Off, + N->op_begin() + Vec0Off + NumVecs); + SDValue RegSeq = createQTuple(Regs); - SmallVector Ops; - unsigned AddrOpIdx = isUpdating ? 1 : 2; - Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address + SmallVector Ops; + if (isExt) + Ops.push_back(N->getOperand(1)); + Ops.push_back(RegSeq); + Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); + return CurDAG->getMachineNode(Opc, dl, VT, Ops); +} - if (isUpdating) { - SDValue Inc = N->getOperand(AddrOpIdx + 1); - if (!isa(Inc.getNode())) // Increment in Register - Opc = getVLDSTRegisterUpdateOpcode(Opc); - Ops.push_back(Inc); +SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { + LoadSDNode *LD = cast(N); + if (LD->isUnindexed()) + return nullptr; + EVT VT = LD->getMemoryVT(); + EVT DstVT = N->getValueType(0); + ISD::MemIndexedMode AM = LD->getAddressingMode(); + bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; + + // We're not doing validity checking here. That was done when checking + // if we should mark the load as indexed or not. We're just selecting + // the right instruction. + unsigned Opcode = 0; + + ISD::LoadExtType ExtType = LD->getExtensionType(); + bool InsertTo64 = false; + if (VT == MVT::i64) + Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; + else if (VT == MVT::i32) { + if (ExtType == ISD::NON_EXTLOAD) + Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; + else if (ExtType == ISD::SEXTLOAD) + Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; + else { + Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; + InsertTo64 = true; + // The result of the load is only i32. It's the subreg_to_reg that makes + // it into an i64. + DstVT = MVT::i32; + } + } else if (VT == MVT::i16) { + if (ExtType == ISD::SEXTLOAD) { + if (DstVT == MVT::i64) + Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; + else + Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; + } else { + Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; + InsertTo64 = DstVT == MVT::i64; + // The result of the load is only i32. It's the subreg_to_reg that makes + // it into an i64. + DstVT = MVT::i32; + } + } else if (VT == MVT::i8) { + if (ExtType == ISD::SEXTLOAD) { + if (DstVT == MVT::i64) + Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; + else + Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; + } else { + Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; + InsertTo64 = DstVT == MVT::i64; + // The result of the load is only i32. It's the subreg_to_reg that makes + // it into an i64. + DstVT = MVT::i32; + } + } else if (VT == MVT::f32) { + Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; + } else if (VT == MVT::f64 || VT.is64BitVector()) { + Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; + } else if (VT.is128BitVector()) { + Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; + } else + return nullptr; + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + ConstantSDNode *OffsetOp = cast(LD->getOffset()); + int OffsetVal = (int)OffsetOp->getZExtValue(); + SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64); + SDValue Ops[] = { Base, Offset, Chain }; + SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, DstVT, + MVT::Other, Ops); + // Either way, we're replacing the node, so tell the caller that. + Done = true; + SDValue LoadedVal = SDValue(Res, 1); + if (InsertTo64) { + SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); + LoadedVal = + SDValue(CurDAG->getMachineNode( + AArch64::SUBREG_TO_REG, SDLoc(N), MVT::i64, + CurDAG->getTargetConstant(0, MVT::i64), LoadedVal, SubReg), + 0); } - Ops.push_back(N->getOperand(0)); // Push back the Chain + ReplaceUses(SDValue(N, 0), LoadedVal); + ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); + ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); - SmallVector ResTys; - // Push back the type of return super register - if (NumVecs == 1) - ResTys.push_back(VT); - else if (NumVecs == 3) - ResTys.push_back(MVT::Untyped); - else { - EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, - is64BitVector ? NumVecs : NumVecs * 2); - ResTys.push_back(ResTy); - } - - if (isUpdating) - ResTys.push_back(MVT::i64); // Type of the updated register - ResTys.push_back(MVT::Other); // Type of the Chain + return nullptr; +} + +SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, + unsigned Opc, unsigned SubRegIdx) { SDLoc dl(N); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + EVT VT = N->getValueType(0); + SDValue Chain = N->getOperand(0); - // Transfer memoperands. - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast(N)->getMemOperand(); - cast(VLd)->setMemRefs(MemOp, MemOp + 1); + SmallVector Ops; + Ops.push_back(N->getOperand(2)); // Mem operand; + Ops.push_back(Chain); - if (NumVecs == 1) - return VLd; - - // If NumVecs > 1, the return result is a super register containing 2-4 - // consecutive vector registers. - SDValue SuperReg = SDValue(VLd, 0); - - unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - ReplaceUses(SDValue(N, Vec), - CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); - // Update users of the Chain - ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); - if (isUpdating) - ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); - - return NULL; + std::vector ResTys; + ResTys.push_back(MVT::Untyped); + ResTys.push_back(MVT::Other); + + SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + SDValue SuperReg = SDValue(Ld, 0); + for (unsigned i = 0; i < NumVecs; ++i) + ReplaceUses(SDValue(N, i), + CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); + + ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); + return nullptr; } -SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, - unsigned NumVecs, - const uint16_t *Opcodes) { - assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); +SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, + unsigned Opc, unsigned SubRegIdx) { SDLoc dl(N); + EVT VT = N->getValueType(0); + SDValue Chain = N->getOperand(0); - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast(N)->getMemOperand(); + SmallVector Ops; + Ops.push_back(N->getOperand(1)); // Mem operand + Ops.push_back(N->getOperand(2)); // Incremental + Ops.push_back(Chain); - unsigned AddrOpIdx = isUpdating ? 1 : 2; - unsigned Vec0Idx = 3; - EVT VT = N->getOperand(Vec0Idx).getValueType(); - unsigned OpcodeIndex; - bool is64BitVector = VT.is64BitVector(); - switch (VT.getScalarType().getSizeInBits()) { - case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; - case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; - case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; - case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; - default: llvm_unreachable("unhandled vector store type"); - } - unsigned Opc = Opcodes[OpcodeIndex]; + std::vector ResTys; + ResTys.push_back(MVT::i64); // Type of the write back register + ResTys.push_back(MVT::Untyped); + ResTys.push_back(MVT::Other); + + SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + + // Update uses of write back register + ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); + + // Update uses of vector list + SDValue SuperReg = SDValue(Ld, 1); + if (NumVecs == 1) + ReplaceUses(SDValue(N, 0), SuperReg); + else + for (unsigned i = 0; i < NumVecs; ++i) + ReplaceUses(SDValue(N, i), + CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); + + // Update the chain + ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); + return nullptr; +} + +SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, + unsigned Opc) { + SDLoc dl(N); + EVT VT = N->getOperand(2)->getValueType(0); + + // Form a REG_SEQUENCE to force register allocation. + bool Is128Bit = VT.getSizeInBits() == 128; + SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); + SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); + + SmallVector Ops; + Ops.push_back(RegSeq); + Ops.push_back(N->getOperand(NumVecs + 2)); + Ops.push_back(N->getOperand(0)); + SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); + + return St; +} +SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, + unsigned Opc) { + SDLoc dl(N); + EVT VT = N->getOperand(2)->getValueType(0); SmallVector ResTys; - if (isUpdating) - ResTys.push_back(MVT::i64); + ResTys.push_back(MVT::i64); // Type of the write back register ResTys.push_back(MVT::Other); // Type for the Chain + // Form a REG_SEQUENCE to force register allocation. + bool Is128Bit = VT.getSizeInBits() == 128; + SmallVector Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); + SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); + SmallVector Ops; - Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address + Ops.push_back(RegSeq); + Ops.push_back(N->getOperand(NumVecs + 1)); // base register + Ops.push_back(N->getOperand(NumVecs + 2)); // Incremental + Ops.push_back(N->getOperand(0)); // Chain + SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + + return St; +} + +/// WidenVector - Given a value in the V64 register class, produce the +/// equivalent value in the V128 register class. +class WidenVector { + SelectionDAG &DAG; - if (isUpdating) { - SDValue Inc = N->getOperand(AddrOpIdx + 1); - if (!isa(Inc.getNode())) // Increment in Register - Opc = getVLDSTRegisterUpdateOpcode(Opc); - Ops.push_back(Inc); +public: + WidenVector(SelectionDAG &DAG) : DAG(DAG) {} + + SDValue operator()(SDValue V64Reg) { + EVT VT = V64Reg.getValueType(); + unsigned NarrowSize = VT.getVectorNumElements(); + MVT EltTy = VT.getVectorElementType().getSimpleVT(); + MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); + SDLoc DL(V64Reg); + + SDValue Undef = + SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); + return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); } +}; + +/// NarrowVector - Given a value in the V128 register class, produce the +/// equivalent value in the V64 register class. +static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { + EVT VT = V128Reg.getValueType(); + unsigned WideSize = VT.getVectorNumElements(); + MVT EltTy = VT.getVectorElementType().getSimpleVT(); + MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); + + return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, + V128Reg); +} + +SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, + unsigned Opc) { + SDLoc dl(N); + EVT VT = N->getValueType(0); + bool Narrow = VT.getSizeInBits() == 64; + + // Form a REG_SEQUENCE to force register allocation. + SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); + + if (Narrow) + std::transform(Regs.begin(), Regs.end(), Regs.begin(), + WidenVector(*CurDAG)); + + SDValue RegSeq = createQTuple(Regs); + + std::vector ResTys; + ResTys.push_back(MVT::Untyped); + ResTys.push_back(MVT::Other); - SmallVector Regs(N->op_begin() + Vec0Idx, - N->op_begin() + Vec0Idx + NumVecs); - SDValue SrcReg = is64BitVector ? createDTuple(Regs) : createQTuple(Regs); - Ops.push_back(SrcReg); + unsigned LaneNo = + cast(N->getOperand(NumVecs + 2))->getZExtValue(); - // Push back the Chain + SmallVector Ops; + Ops.push_back(RegSeq); + Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); + Ops.push_back(N->getOperand(NumVecs + 3)); Ops.push_back(N->getOperand(0)); + SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + SDValue SuperReg = SDValue(Ld, 0); + + EVT WideVT = RegSeq.getOperand(1)->getValueType(0); + static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2, + AArch64::qsub3 }; + for (unsigned i = 0; i < NumVecs; ++i) { + SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); + if (Narrow) + NV = NarrowVector(NV, *CurDAG); + ReplaceUses(SDValue(N, i), NV); + } - // Transfer memoperands. - SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); - cast(VSt)->setMemRefs(MemOp, MemOp + 1); + ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); - return VSt; + return Ld; } -SDValue -AArch64DAGToDAGISel::getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD, - SDValue Operand) { - SDNode *Reg = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, DL, - VT, VTD, MVT::Other, - CurDAG->getTargetConstant(0, MVT::i64), - Operand, - CurDAG->getTargetConstant(AArch64::sub_64, MVT::i32)); - return SDValue(Reg, 0); +SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, + unsigned Opc) { + SDLoc dl(N); + EVT VT = N->getValueType(0); + bool Narrow = VT.getSizeInBits() == 64; + + // Form a REG_SEQUENCE to force register allocation. + SmallVector Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); + + if (Narrow) + std::transform(Regs.begin(), Regs.end(), Regs.begin(), + WidenVector(*CurDAG)); + + SDValue RegSeq = createQTuple(Regs); + + std::vector ResTys; + ResTys.push_back(MVT::i64); // Type of the write back register + ResTys.push_back(MVT::Untyped); + ResTys.push_back(MVT::Other); + + unsigned LaneNo = + cast(N->getOperand(NumVecs + 1))->getZExtValue(); + + SmallVector Ops; + Ops.push_back(RegSeq); + Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); // Lane Number + Ops.push_back(N->getOperand(NumVecs + 2)); // Base register + Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental + Ops.push_back(N->getOperand(0)); + SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + + // Update uses of the write back register + ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); + + // Update uses of the vector list + SDValue SuperReg = SDValue(Ld, 1); + if (NumVecs == 1) { + ReplaceUses(SDValue(N, 0), + Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); + } else { + EVT WideVT = RegSeq.getOperand(1)->getValueType(0); + static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2, + AArch64::qsub3 }; + for (unsigned i = 0; i < NumVecs; ++i) { + SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, + SuperReg); + if (Narrow) + NV = NarrowVector(NV, *CurDAG); + ReplaceUses(SDValue(N, i), NV); + } + } + + // Update the Chain + ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); + + return Ld; } -SDNode *AArch64DAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, - unsigned NumVecs, - const uint16_t *Opcodes) { - assert(NumVecs >=2 && NumVecs <= 4 && "Load Dup NumVecs out-of-range"); +SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, + unsigned Opc) { SDLoc dl(N); + EVT VT = N->getOperand(2)->getValueType(0); + bool Narrow = VT.getSizeInBits() == 64; + + // Form a REG_SEQUENCE to force register allocation. + SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); + + if (Narrow) + std::transform(Regs.begin(), Regs.end(), Regs.begin(), + WidenVector(*CurDAG)); + + SDValue RegSeq = createQTuple(Regs); + + unsigned LaneNo = + cast(N->getOperand(NumVecs + 2))->getZExtValue(); - EVT VT = N->getValueType(0); - unsigned OpcodeIndex; - bool is64BitVector = VT.is64BitVector(); - switch (VT.getScalarType().getSizeInBits()) { - case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; - case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; - case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; - case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; - default: llvm_unreachable("unhandled vector duplicate lane load type"); - } - unsigned Opc = Opcodes[OpcodeIndex]; - - SDValue SuperReg; SmallVector Ops; - Ops.push_back(N->getOperand(1)); // Push back the Memory Address - if (isUpdating) { - SDValue Inc = N->getOperand(2); - if (!isa(Inc.getNode())) // Increment in Register - Opc = getVLDSTRegisterUpdateOpcode(Opc); - Ops.push_back(Inc); - } - Ops.push_back(N->getOperand(0)); // Push back the Chain - - SmallVector ResTys; - // Push back the type of return super register - if (NumVecs == 3) - ResTys.push_back(MVT::Untyped); - else { - EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, - is64BitVector ? NumVecs : NumVecs * 2); - ResTys.push_back(ResTy); - } - if (isUpdating) - ResTys.push_back(MVT::i64); // Type of the updated register - ResTys.push_back(MVT::Other); // Type of the Chain - SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + Ops.push_back(RegSeq); + Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); + Ops.push_back(N->getOperand(NumVecs + 3)); + Ops.push_back(N->getOperand(0)); + SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); // Transfer memoperands. MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast(N)->getMemOperand(); - cast(VLdDup)->setMemRefs(MemOp, MemOp + 1); - - SuperReg = SDValue(VLdDup, 0); - unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0; - // Update uses of each registers in super register - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - ReplaceUses(SDValue(N, Vec), - CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); - // Update uses of the Chain - ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); - if (isUpdating) - ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); - return NULL; + cast(St)->setMemRefs(MemOp, MemOp + 1); + + return St; } -// We only have 128-bit vector type of load/store lane instructions. -// If it is 64-bit vector, we also select it to the 128-bit instructions. -// Just use SUBREG_TO_REG to adapt the input to 128-bit vector and -// EXTRACT_SUBREG to get the 64-bit vector from the 128-bit vector output. -SDNode *AArch64DAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, - bool isUpdating, unsigned NumVecs, - const uint16_t *Opcodes) { - assert(NumVecs >= 2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); +SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, + unsigned Opc) { SDLoc dl(N); - unsigned AddrOpIdx = isUpdating ? 1 : 2; - unsigned Vec0Idx = 3; + EVT VT = N->getOperand(2)->getValueType(0); + bool Narrow = VT.getSizeInBits() == 64; - SDValue Chain = N->getOperand(0); - unsigned Lane = - cast(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); - EVT VT = N->getOperand(Vec0Idx).getValueType(); - bool is64BitVector = VT.is64BitVector(); - EVT VT64; // 64-bit Vector Type - - if (is64BitVector) { - VT64 = VT; - VT = EVT::getVectorVT(*CurDAG->getContext(), VT.getVectorElementType(), - VT.getVectorNumElements() * 2); - } - - unsigned OpcodeIndex; - switch (VT.getScalarType().getSizeInBits()) { - case 8: OpcodeIndex = 0; break; - case 16: OpcodeIndex = 1; break; - case 32: OpcodeIndex = 2; break; - case 64: OpcodeIndex = 3; break; - default: llvm_unreachable("unhandled vector lane load/store type"); - } - unsigned Opc = Opcodes[OpcodeIndex]; - - SmallVector ResTys; - if (IsLoad) { - // Push back the type of return super register - if (NumVecs == 3) - ResTys.push_back(MVT::Untyped); - else { - EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, - is64BitVector ? NumVecs : NumVecs * 2); - ResTys.push_back(ResTy); - } - } - if (isUpdating) - ResTys.push_back(MVT::i64); // Type of the updated register - ResTys.push_back(MVT::Other); // Type of Chain - SmallVector Ops; - Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address - if (isUpdating) { - SDValue Inc = N->getOperand(AddrOpIdx + 1); - if (!isa(Inc.getNode())) // Increment in Register - Opc = getVLDSTRegisterUpdateOpcode(Opc); - Ops.push_back(Inc); - } - - SmallVector Regs(N->op_begin() + Vec0Idx, - N->op_begin() + Vec0Idx + NumVecs); - if (is64BitVector) - for (unsigned i = 0; i < Regs.size(); i++) - Regs[i] = getTargetSubregToReg(AArch64::sub_64, dl, VT, VT64, Regs[i]); - SDValue SuperReg = createQTuple(Regs); - - Ops.push_back(SuperReg); // Source Reg - SDValue LaneValue = CurDAG->getTargetConstant(Lane, MVT::i32); - Ops.push_back(LaneValue); - Ops.push_back(Chain); // Push back the Chain - - SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + // Form a REG_SEQUENCE to force register allocation. + SmallVector Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); + + if (Narrow) + std::transform(Regs.begin(), Regs.end(), Regs.begin(), + WidenVector(*CurDAG)); + + SDValue RegSeq = createQTuple(Regs); + + SmallVector ResTys; + ResTys.push_back(MVT::i64); // Type of the write back register + ResTys.push_back(MVT::Other); + + unsigned LaneNo = + cast(N->getOperand(NumVecs + 1))->getZExtValue(); + + SmallVector Ops; + Ops.push_back(RegSeq); + Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); + Ops.push_back(N->getOperand(NumVecs + 2)); // Base Register + Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental + Ops.push_back(N->getOperand(0)); + SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + + // Transfer memoperands. MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast(N)->getMemOperand(); - cast(VLdLn)->setMemRefs(MemOp, MemOp + 1); - if (!IsLoad) - return VLdLn; - - // Extract the subregisters. - SuperReg = SDValue(VLdLn, 0); - unsigned Sub0 = AArch64::qsub_0; - // Update uses of each registers in super register - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDValue SUB0 = CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg); - if (is64BitVector) { - SUB0 = CurDAG->getTargetExtractSubreg(AArch64::sub_64, dl, VT64, SUB0); - } - ReplaceUses(SDValue(N, Vec), SUB0); + cast(St)->setMemRefs(MemOp, MemOp + 1); + + return St; +} + +static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, + unsigned &Opc, SDValue &Opd0, + unsigned &LSB, unsigned &MSB, + unsigned NumberOfIgnoredLowBits, + bool BiggerPattern) { + assert(N->getOpcode() == ISD::AND && + "N must be a AND operation to call this function"); + + EVT VT = N->getValueType(0); + + // Here we can test the type of VT and return false when the type does not + // match, but since it is done prior to that call in the current context + // we turned that into an assert to avoid redundant code. + assert((VT == MVT::i32 || VT == MVT::i64) && + "Type checking must have been done before calling this function"); + + // FIXME: simplify-demanded-bits in DAGCombine will probably have + // changed the AND node to a 32-bit mask operation. We'll have to + // undo that as part of the transform here if we want to catch all + // the opportunities. + // Currently the NumberOfIgnoredLowBits argument helps to recover + // form these situations when matching bigger pattern (bitfield insert). + + // For unsigned extracts, check for a shift right and mask + uint64_t And_imm = 0; + if (!isOpcWithIntImmediate(N, ISD::AND, And_imm)) + return false; + + const SDNode *Op0 = N->getOperand(0).getNode(); + + // Because of simplify-demanded-bits in DAGCombine, the mask may have been + // simplified. Try to undo that + And_imm |= (1 << NumberOfIgnoredLowBits) - 1; + + // The immediate is a mask of the low bits iff imm & (imm+1) == 0 + if (And_imm & (And_imm + 1)) + return false; + + bool ClampMSB = false; + uint64_t Srl_imm = 0; + // Handle the SRL + ANY_EXTEND case. + if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && + isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) { + // Extend the incoming operand of the SRL to 64-bit. + Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); + // Make sure to clamp the MSB so that we preserve the semantics of the + // original operations. + ClampMSB = true; + } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && + isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, + Srl_imm)) { + // If the shift result was truncated, we can still combine them. + Opd0 = Op0->getOperand(0).getOperand(0); + + // Use the type of SRL node. + VT = Opd0->getValueType(0); + } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) { + Opd0 = Op0->getOperand(0); + } else if (BiggerPattern) { + // Let's pretend a 0 shift right has been performed. + // The resulting code will be at least as good as the original one + // plus it may expose more opportunities for bitfield insert pattern. + // FIXME: Currently we limit this to the bigger pattern, because + // some optimizations expect AND and not UBFM + Opd0 = N->getOperand(0); + } else + return false; + + assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) && + "bad amount in shift node!"); + + LSB = Srl_imm; + MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm) + : CountTrailingOnes_64(And_imm)) - + 1; + if (ClampMSB) + // Since we're moving the extend before the right shift operation, we need + // to clamp the MSB to make sure we don't shift in undefined bits instead of + // the zeros which would get shifted in with the original right shift + // operation. + MSB = MSB > 31 ? 31 : MSB; + + Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; + return true; +} + +static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, + unsigned &LSB, unsigned &MSB) { + // We are looking for the following pattern which basically extracts a single + // bit from the source value and places it in the LSB of the destination + // value, all other bits of the destination value or set to zero: + // + // Value2 = AND Value, MaskImm + // SRL Value2, ShiftImm + // + // with MaskImm >> ShiftImm == 1. + // + // This gets selected into a single UBFM: + // + // UBFM Value, ShiftImm, ShiftImm + // + + if (N->getOpcode() != ISD::SRL) + return false; + + uint64_t And_mask = 0; + if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask)) + return false; + + Opd0 = N->getOperand(0).getOperand(0); + + uint64_t Srl_imm = 0; + if (!isIntImmediate(N->getOperand(1), Srl_imm)) + return false; + + // Check whether we really have a one bit extract here. + if (And_mask >> Srl_imm == 0x1) { + if (N->getValueType(0) == MVT::i32) + Opc = AArch64::UBFMWri; + else + Opc = AArch64::UBFMXri; + + LSB = MSB = Srl_imm; + + return true; } - ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); - if (isUpdating) - ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); - return NULL; + + return false; } -unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit, - unsigned NumOfVec) { - assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range"); +static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, + unsigned &LSB, unsigned &MSB, + bool BiggerPattern) { + assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && + "N must be a SHR/SRA operation to call this function"); + + EVT VT = N->getValueType(0); + + // Here we can test the type of VT and return false when the type does not + // match, but since it is done prior to that call in the current context + // we turned that into an assert to avoid redundant code. + assert((VT == MVT::i32 || VT == MVT::i64) && + "Type checking must have been done before calling this function"); + + // Check for AND + SRL doing a one bit extract. + if (isOneBitExtractOpFromShr(N, Opc, Opd0, LSB, MSB)) + return true; - unsigned Opc = 0; - switch (NumOfVec) { + // we're looking for a shift of a shift + uint64_t Shl_imm = 0; + uint64_t Trunc_bits = 0; + if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { + Opd0 = N->getOperand(0).getOperand(0); + } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && + N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { + // We are looking for a shift of truncate. Truncate from i64 to i32 could + // be considered as setting high 32 bits as zero. Our strategy here is to + // always generate 64bit UBFM. This consistency will help the CSE pass + // later find more redundancy. + Opd0 = N->getOperand(0).getOperand(0); + Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); + VT = Opd0->getValueType(0); + assert(VT == MVT::i64 && "the promoted type should be i64"); + } else if (BiggerPattern) { + // Let's pretend a 0 shift left has been performed. + // FIXME: Currently we limit this to the bigger pattern case, + // because some optimizations expect AND and not UBFM + Opd0 = N->getOperand(0); + } else + return false; + + assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!"); + uint64_t Srl_imm = 0; + if (!isIntImmediate(N->getOperand(1), Srl_imm)) + return false; + + assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && + "bad amount in shift node!"); + // Note: The width operand is encoded as width-1. + unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1; + int sLSB = Srl_imm - Shl_imm; + if (sLSB < 0) + return false; + LSB = sLSB; + MSB = LSB + Width; + // SRA requires a signed extraction + if (VT == MVT::i32) + Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; + else + Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; + return true; +} + +static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, + SDValue &Opd0, unsigned &LSB, unsigned &MSB, + unsigned NumberOfIgnoredLowBits = 0, + bool BiggerPattern = false) { + if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) + return false; + + switch (N->getOpcode()) { default: + if (!N->isMachineOpcode()) + return false; break; - case 1: - if (IsExt) - Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b; + case ISD::AND: + return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB, + NumberOfIgnoredLowBits, BiggerPattern); + case ISD::SRL: + case ISD::SRA: + return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern); + } + + unsigned NOpc = N->getMachineOpcode(); + switch (NOpc) { + default: + return false; + case AArch64::SBFMWri: + case AArch64::UBFMWri: + case AArch64::SBFMXri: + case AArch64::UBFMXri: + Opc = NOpc; + Opd0 = N->getOperand(0); + LSB = cast(N->getOperand(1).getNode())->getZExtValue(); + MSB = cast(N->getOperand(2).getNode())->getZExtValue(); + return true; + } + // Unreachable + return false; +} + +SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) { + unsigned Opc, LSB, MSB; + SDValue Opd0; + if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB)) + return nullptr; + + EVT VT = N->getValueType(0); + + // If the bit extract operation is 64bit but the original type is 32bit, we + // need to add one EXTRACT_SUBREG. + if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { + SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64), + CurDAG->getTargetConstant(MSB, MVT::i64)}; + + SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64); + SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32); + MachineSDNode *Node = + CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32, + SDValue(BFM, 0), SubReg); + return Node; + } + + SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, VT), + CurDAG->getTargetConstant(MSB, VT)}; + return CurDAG->SelectNodeTo(N, Opc, VT, Ops); +} + +/// Does DstMask form a complementary pair with the mask provided by +/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, +/// this asks whether DstMask zeroes precisely those bits that will be set by +/// the other half. +static bool isBitfieldDstMask(uint64_t DstMask, APInt BitsToBeInserted, + unsigned NumberOfIgnoredHighBits, EVT VT) { + assert((VT == MVT::i32 || VT == MVT::i64) && + "i32 or i64 mask type expected!"); + unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; + + APInt SignificantDstMask = APInt(BitWidth, DstMask); + APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); + + return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && + (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue(); +} + +// Look for bits that will be useful for later uses. +// A bit is consider useless as soon as it is dropped and never used +// before it as been dropped. +// E.g., looking for useful bit of x +// 1. y = x & 0x7 +// 2. z = y >> 2 +// After #1, x useful bits are 0x7, then the useful bits of x, live through +// y. +// After #2, the useful bits of x are 0x4. +// However, if x is used on an unpredicatable instruction, then all its bits +// are useful. +// E.g. +// 1. y = x & 0x7 +// 2. z = y >> 2 +// 3. str x, [@x] +static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); + +static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, + unsigned Depth) { + uint64_t Imm = + cast(Op.getOperand(1).getNode())->getZExtValue(); + Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); + UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); + getUsefulBits(Op, UsefulBits, Depth + 1); +} + +static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, + uint64_t Imm, uint64_t MSB, + unsigned Depth) { + // inherit the bitwidth value + APInt OpUsefulBits(UsefulBits); + OpUsefulBits = 1; + + if (MSB >= Imm) { + OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); + --OpUsefulBits; + // The interesting part will be in the lower part of the result + getUsefulBits(Op, OpUsefulBits, Depth + 1); + // The interesting part was starting at Imm in the argument + OpUsefulBits = OpUsefulBits.shl(Imm); + } else { + OpUsefulBits = OpUsefulBits.shl(MSB + 1); + --OpUsefulBits; + // The interesting part will be shifted in the result + OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm); + getUsefulBits(Op, OpUsefulBits, Depth + 1); + // The interesting part was at zero in the argument + OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm); + } + + UsefulBits &= OpUsefulBits; +} + +static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, + unsigned Depth) { + uint64_t Imm = + cast(Op.getOperand(1).getNode())->getZExtValue(); + uint64_t MSB = + cast(Op.getOperand(2).getNode())->getZExtValue(); + + getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); +} + +static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, + unsigned Depth) { + uint64_t ShiftTypeAndValue = + cast(Op.getOperand(2).getNode())->getZExtValue(); + APInt Mask(UsefulBits); + Mask.clearAllBits(); + Mask.flipAllBits(); + + if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { + // Shift Left + uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); + Mask = Mask.shl(ShiftAmt); + getUsefulBits(Op, Mask, Depth + 1); + Mask = Mask.lshr(ShiftAmt); + } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { + // Shift Right + // We do not handle AArch64_AM::ASR, because the sign will change the + // number of useful bits + uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); + Mask = Mask.lshr(ShiftAmt); + getUsefulBits(Op, Mask, Depth + 1); + Mask = Mask.shl(ShiftAmt); + } else + return; + + UsefulBits &= Mask; +} + +static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, + unsigned Depth) { + uint64_t Imm = + cast(Op.getOperand(2).getNode())->getZExtValue(); + uint64_t MSB = + cast(Op.getOperand(3).getNode())->getZExtValue(); + + if (Op.getOperand(1) == Orig) + return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); + + APInt OpUsefulBits(UsefulBits); + OpUsefulBits = 1; + + if (MSB >= Imm) { + OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); + --OpUsefulBits; + UsefulBits &= ~OpUsefulBits; + getUsefulBits(Op, UsefulBits, Depth + 1); + } else { + OpUsefulBits = OpUsefulBits.shl(MSB + 1); + --OpUsefulBits; + UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm)); + getUsefulBits(Op, UsefulBits, Depth + 1); + } +} + +static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, + SDValue Orig, unsigned Depth) { + + // Users of this node should have already been instruction selected + // FIXME: Can we turn that into an assert? + if (!UserNode->isMachineOpcode()) + return; + + switch (UserNode->getMachineOpcode()) { + default: + return; + case AArch64::ANDSWri: + case AArch64::ANDSXri: + case AArch64::ANDWri: + case AArch64::ANDXri: + // We increment Depth only when we call the getUsefulBits + return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, + Depth); + case AArch64::UBFMWri: + case AArch64::UBFMXri: + return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); + + case AArch64::ORRWrs: + case AArch64::ORRXrs: + if (UserNode->getOperand(1) != Orig) + return; + return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, + Depth); + case AArch64::BFMWri: + case AArch64::BFMXri: + return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); + } +} + +static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { + if (Depth >= 6) + return; + // Initialize UsefulBits + if (!Depth) { + unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits(); + // At the beginning, assume every produced bits is useful + UsefulBits = APInt(Bitwidth, 0); + UsefulBits.flipAllBits(); + } + APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); + + for (SDNode *Node : Op.getNode()->uses()) { + // A use cannot produce useful bits + APInt UsefulBitsForUse = APInt(UsefulBits); + getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); + UsersUsefulBits |= UsefulBitsForUse; + } + // UsefulBits contains the produced bits that are meaningful for the + // current definition, thus a user cannot make a bit meaningful at + // this point + UsefulBits &= UsersUsefulBits; +} + +/// Create a machine node performing a notional SHL of Op by ShlAmount. If +/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is +/// 0, return Op unchanged. +static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { + if (ShlAmount == 0) + return Op; + + EVT VT = Op.getValueType(); + unsigned BitWidth = VT.getSizeInBits(); + unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; + + SDNode *ShiftNode; + if (ShlAmount > 0) { + // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt + ShiftNode = CurDAG->getMachineNode( + UBFMOpc, SDLoc(Op), VT, Op, + CurDAG->getTargetConstant(BitWidth - ShlAmount, VT), + CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, VT)); + } else { + // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 + assert(ShlAmount < 0 && "expected right shift"); + int ShrAmount = -ShlAmount; + ShiftNode = CurDAG->getMachineNode( + UBFMOpc, SDLoc(Op), VT, Op, CurDAG->getTargetConstant(ShrAmount, VT), + CurDAG->getTargetConstant(BitWidth - 1, VT)); + } + + return SDValue(ShiftNode, 0); +} + +/// Does this tree qualify as an attempt to move a bitfield into position, +/// essentially "(and (shl VAL, N), Mask)". +static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, + SDValue &Src, int &ShiftAmount, + int &MaskWidth) { + EVT VT = Op.getValueType(); + unsigned BitWidth = VT.getSizeInBits(); + (void)BitWidth; + assert(BitWidth == 32 || BitWidth == 64); + + APInt KnownZero, KnownOne; + CurDAG->computeKnownBits(Op, KnownZero, KnownOne); + + // Non-zero in the sense that they're not provably zero, which is the key + // point if we want to use this value + uint64_t NonZeroBits = (~KnownZero).getZExtValue(); + + // Discard a constant AND mask if present. It's safe because the node will + // already have been factored into the computeKnownBits calculation above. + uint64_t AndImm; + if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { + assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0); + Op = Op.getOperand(0); + } + + uint64_t ShlImm; + if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) + return false; + Op = Op.getOperand(0); + + if (!isShiftedMask_64(NonZeroBits)) + return false; + + ShiftAmount = countTrailingZeros(NonZeroBits); + MaskWidth = CountTrailingOnes_64(NonZeroBits >> ShiftAmount); + + // BFI encompasses sufficiently many nodes that it's worth inserting an extra + // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL + // amount. + Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); + + return true; +} + +// Given a OR operation, check if we have the following pattern +// ubfm c, b, imm, imm2 (or something that does the same jobs, see +// isBitfieldExtractOp) +// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and +// countTrailingZeros(mask2) == imm2 - imm + 1 +// f = d | c +// if yes, given reference arguments will be update so that one can replace +// the OR instruction with: +// f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2 +static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst, + SDValue &Src, unsigned &ImmR, + unsigned &ImmS, SelectionDAG *CurDAG) { + assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); + + // Set Opc + EVT VT = N->getValueType(0); + if (VT == MVT::i32) + Opc = AArch64::BFMWri; + else if (VT == MVT::i64) + Opc = AArch64::BFMXri; + else + return false; + + // Because of simplify-demanded-bits in DAGCombine, involved masks may not + // have the expected shape. Try to undo that. + APInt UsefulBits; + getUsefulBits(SDValue(N, 0), UsefulBits); + + unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); + unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); + + // OR is commutative, check both possibilities (does llvm provide a + // way to do that directely, e.g., via code matcher?) + SDValue OrOpd1Val = N->getOperand(1); + SDNode *OrOpd0 = N->getOperand(0).getNode(); + SDNode *OrOpd1 = N->getOperand(1).getNode(); + for (int i = 0; i < 2; + ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) { + unsigned BFXOpc; + int DstLSB, Width; + if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, + NumberOfIgnoredLowBits, true)) { + // Check that the returned opcode is compatible with the pattern, + // i.e., same type and zero extended (U and not S) + if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || + (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) + continue; + + // Compute the width of the bitfield insertion + DstLSB = 0; + Width = ImmS - ImmR + 1; + // FIXME: This constraint is to catch bitfield insertion we may + // want to widen the pattern if we want to grab general bitfied + // move case + if (Width <= 0) + continue; + + // If the mask on the insertee is correct, we have a BFXIL operation. We + // can share the ImmR and ImmS values from the already-computed UBFM. + } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src, + DstLSB, Width)) { + ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); + ImmS = Width - 1; + } else + continue; + + // Check the second part of the pattern + EVT VT = OrOpd1->getValueType(0); + assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); + + // Compute the Known Zero for the candidate of the first operand. + // This allows to catch more general case than just looking for + // AND with imm. Indeed, simplify-demanded-bits may have removed + // the AND instruction because it proves it was useless. + APInt KnownZero, KnownOne; + CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne); + + // Check if there is enough room for the second operand to appear + // in the first one + APInt BitsToBeInserted = + APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width); + + if ((BitsToBeInserted & ~KnownZero) != 0) + continue; + + // Set the first operand + uint64_t Imm; + if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && + isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) + // In that case, we can eliminate the AND + Dst = OrOpd1->getOperand(0); else - Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b; + // Maybe the AND has been removed by simplify-demanded-bits + // or is useful because it discards more bits + Dst = OrOpd1Val; + + // both parts match + return true; + } + + return false; +} + +SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) { + if (N->getOpcode() != ISD::OR) + return nullptr; + + unsigned Opc; + unsigned LSB, MSB; + SDValue Opd0, Opd1; + + if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG)) + return nullptr; + + EVT VT = N->getValueType(0); + SDValue Ops[] = { Opd0, + Opd1, + CurDAG->getTargetConstant(LSB, VT), + CurDAG->getTargetConstant(MSB, VT) }; + return CurDAG->SelectNodeTo(N, Opc, VT, Ops); +} + +SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) { + EVT VT = N->getValueType(0); + unsigned Variant; + unsigned Opc; + unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr }; + + if (VT == MVT::f32) { + Variant = 0; + } else if (VT == MVT::f64) { + Variant = 1; + } else + return nullptr; // Unrecognized argument type. Fall back on default codegen. + + // Pick the FRINTX variant needed to set the flags. + unsigned FRINTXOpc = FRINTXOpcs[Variant]; + + switch (N->getOpcode()) { + default: + return nullptr; // Unrecognized libm ISD node. Fall back on default codegen. + case ISD::FCEIL: { + unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr }; + Opc = FRINTPOpcs[Variant]; break; - case 2: - if (IsExt) - Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b; - else - Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b; + } + case ISD::FFLOOR: { + unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr }; + Opc = FRINTMOpcs[Variant]; break; - case 3: - if (IsExt) - Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b; - else - Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b; + } + case ISD::FTRUNC: { + unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr }; + Opc = FRINTZOpcs[Variant]; break; - case 4: - if (IsExt) - Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b; - else - Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b; + } + case ISD::FROUND: { + unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr }; + Opc = FRINTAOpcs[Variant]; break; } + } - return Opc; + SDLoc dl(N); + SDValue In = N->getOperand(0); + SmallVector Ops; + Ops.push_back(In); + + if (!TM.Options.UnsafeFPMath) { + SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In); + Ops.push_back(SDValue(FRINTX, 1)); + } + + return CurDAG->getMachineNode(Opc, dl, VT, Ops); } -SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs, - bool IsExt) { - assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); - SDLoc dl(N); +bool +AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, + unsigned RegWidth) { + APFloat FVal(0.0); + if (ConstantFPSDNode *CN = dyn_cast(N)) + FVal = CN->getValueAPF(); + else if (LoadSDNode *LN = dyn_cast(N)) { + // Some otherwise illegal constants are allowed in this case. + if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || + !isa(LN->getOperand(1)->getOperand(1))) + return false; + + ConstantPoolSDNode *CN = + dyn_cast(LN->getOperand(1)->getOperand(1)); + FVal = cast(CN->getConstVal())->getValueAPF(); + } else + return false; + + // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits + // is between 1 and 32 for a destination w-register, or 1 and 64 for an + // x-register. + // + // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we + // want THIS_NODE to be 2^fbits. This is much easier to deal with using + // integers. + bool IsExact; - // Check the element of look up table is 64-bit or not - unsigned Vec0Idx = IsExt ? 2 : 1; - assert(!N->getOperand(Vec0Idx + 0).getValueType().is64BitVector() && - "The element of lookup table for vtbl and vtbx must be 128-bit"); + // fbits is between 1 and 64 in the worst-case, which means the fmul + // could have 2^64 as an actual operand. Need 65 bits of precision. + APSInt IntVal(65, true); + FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); - // Check the return value type is 64-bit or not - EVT ResVT = N->getValueType(0); - bool is64BitRes = ResVT.is64BitVector(); + // N.b. isPowerOf2 also checks for > 0. + if (!IsExact || !IntVal.isPowerOf2()) return false; + unsigned FBits = IntVal.logBase2(); - // Create new SDValue for vector list - SmallVector Regs(N->op_begin() + Vec0Idx, - N->op_begin() + Vec0Idx + NumVecs); - SDValue TblReg = createQTuple(Regs); - unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs); + // Checks above should have guaranteed that we haven't lost information in + // finding FBits, but it must still be in range. + if (FBits == 0 || FBits > RegWidth) return false; - SmallVector Ops; - if (IsExt) - Ops.push_back(N->getOperand(1)); - Ops.push_back(TblReg); - Ops.push_back(N->getOperand(Vec0Idx + NumVecs)); - return CurDAG->getMachineNode(Opc, dl, ResVT, Ops); + FixedPos = CurDAG->getTargetConstant(FBits, MVT::i32); + return true; } SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { // Dump information about the Node being selected - DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); + DEBUG(errs() << "Selecting: "); + DEBUG(Node->dump(CurDAG)); + DEBUG(errs() << "\n"); + // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { - DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n"); + DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); Node->setNodeId(-1); - return NULL; + return nullptr; } - switch (Node->getOpcode()) { - case ISD::ATOMIC_LOAD_ADD: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_ADD_I8, - AArch64::ATOMIC_LOAD_ADD_I16, - AArch64::ATOMIC_LOAD_ADD_I32, - AArch64::ATOMIC_LOAD_ADD_I64); - case ISD::ATOMIC_LOAD_SUB: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_SUB_I8, - AArch64::ATOMIC_LOAD_SUB_I16, - AArch64::ATOMIC_LOAD_SUB_I32, - AArch64::ATOMIC_LOAD_SUB_I64); - case ISD::ATOMIC_LOAD_AND: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_AND_I8, - AArch64::ATOMIC_LOAD_AND_I16, - AArch64::ATOMIC_LOAD_AND_I32, - AArch64::ATOMIC_LOAD_AND_I64); - case ISD::ATOMIC_LOAD_OR: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_OR_I8, - AArch64::ATOMIC_LOAD_OR_I16, - AArch64::ATOMIC_LOAD_OR_I32, - AArch64::ATOMIC_LOAD_OR_I64); - case ISD::ATOMIC_LOAD_XOR: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_XOR_I8, - AArch64::ATOMIC_LOAD_XOR_I16, - AArch64::ATOMIC_LOAD_XOR_I32, - AArch64::ATOMIC_LOAD_XOR_I64); - case ISD::ATOMIC_LOAD_NAND: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_NAND_I8, - AArch64::ATOMIC_LOAD_NAND_I16, - AArch64::ATOMIC_LOAD_NAND_I32, - AArch64::ATOMIC_LOAD_NAND_I64); - case ISD::ATOMIC_LOAD_MIN: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_MIN_I8, - AArch64::ATOMIC_LOAD_MIN_I16, - AArch64::ATOMIC_LOAD_MIN_I32, - AArch64::ATOMIC_LOAD_MIN_I64); - case ISD::ATOMIC_LOAD_MAX: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_MAX_I8, - AArch64::ATOMIC_LOAD_MAX_I16, - AArch64::ATOMIC_LOAD_MAX_I32, - AArch64::ATOMIC_LOAD_MAX_I64); - case ISD::ATOMIC_LOAD_UMIN: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_UMIN_I8, - AArch64::ATOMIC_LOAD_UMIN_I16, - AArch64::ATOMIC_LOAD_UMIN_I32, - AArch64::ATOMIC_LOAD_UMIN_I64); - case ISD::ATOMIC_LOAD_UMAX: - return SelectAtomic(Node, - AArch64::ATOMIC_LOAD_UMAX_I8, - AArch64::ATOMIC_LOAD_UMAX_I16, - AArch64::ATOMIC_LOAD_UMAX_I32, - AArch64::ATOMIC_LOAD_UMAX_I64); - case ISD::ATOMIC_SWAP: - return SelectAtomic(Node, - AArch64::ATOMIC_SWAP_I8, - AArch64::ATOMIC_SWAP_I16, - AArch64::ATOMIC_SWAP_I32, - AArch64::ATOMIC_SWAP_I64); - case ISD::ATOMIC_CMP_SWAP: - return SelectAtomic(Node, - AArch64::ATOMIC_CMP_SWAP_I8, - AArch64::ATOMIC_CMP_SWAP_I16, - AArch64::ATOMIC_CMP_SWAP_I32, - AArch64::ATOMIC_CMP_SWAP_I64); - case ISD::FrameIndex: { - int FI = cast(Node)->getIndex(); - EVT PtrTy = getTargetLowering()->getPointerTy(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy); - return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy, - TFI, CurDAG->getTargetConstant(0, PtrTy)); - } - case ISD::Constant: { - SDNode *ResNode = 0; - if (cast(Node)->getZExtValue() == 0) { - // XZR and WZR are probably even better than an actual move: most of the - // time they can be folded into another instruction with *no* cost. - - EVT Ty = Node->getValueType(0); - assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type"); - uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR; - ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), - SDLoc(Node), - Register, Ty).getNode(); - } + // Few custom selection stuff. + SDNode *ResNode = nullptr; + EVT VT = Node->getValueType(0); - // Next best option is a move-immediate, see if we can do that. - if (!ResNode) { - ResNode = TrySelectToMoveImm(Node); - } - - if (ResNode) - return ResNode; + switch (Node->getOpcode()) { + default: + break; - // If even that fails we fall back to a lit-pool entry at the moment. Future - // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions. - ResNode = SelectToLitPool(Node); - assert(ResNode && "We need *some* way to materialise a constant"); + case ISD::ADD: + if (SDNode *I = SelectMLAV64LaneV128(Node)) + return I; + break; - // We want to continue selection at this point since the litpool access - // generated used generic nodes for simplicity. - ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); - Node = ResNode; + case ISD::LOAD: { + // Try to select as an indexed load. Fall through to normal processing + // if we can't. + bool Done = false; + SDNode *I = SelectIndexedLoad(Node, Done); + if (Done) + return I; break; } - case ISD::ConstantFP: { - if (A64Imms::isFPImm(cast(Node)->getValueAPF())) { - // FMOV will take care of it from TableGen - break; - } - SDNode *ResNode = LowerToFPLitPool(Node); - ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); + case ISD::SRL: + case ISD::AND: + case ISD::SRA: + if (SDNode *I = SelectBitfieldExtractOp(Node)) + return I; + break; - // We want to continue selection at this point since the litpool access - // generated used generic nodes for simplicity. - Node = ResNode; + case ISD::OR: + if (SDNode *I = SelectBitfieldInsertOp(Node)) + return I; break; + + case ISD::EXTRACT_VECTOR_ELT: { + // Extracting lane zero is a special case where we can just use a plain + // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for + // the rest of the compiler, especially the register allocator and copyi + // propagation, to reason about, so is preferred when it's possible to + // use it. + ConstantSDNode *LaneNode = cast(Node->getOperand(1)); + // Bail and use the default Select() for non-zero lanes. + if (LaneNode->getZExtValue() != 0) + break; + // If the element type is not the same as the result type, likewise + // bail and use the default Select(), as there's more to do than just + // a cross-class COPY. This catches extracts of i8 and i16 elements + // since they will need an explicit zext. + if (VT != Node->getOperand(0).getValueType().getVectorElementType()) + break; + unsigned SubReg; + switch (Node->getOperand(0) + .getValueType() + .getVectorElementType() + .getSizeInBits()) { + default: + assert(0 && "Unexpected vector element type!"); + case 64: + SubReg = AArch64::dsub; + break; + case 32: + SubReg = AArch64::ssub; + break; + case 16: // FALLTHROUGH + case 8: + llvm_unreachable("unexpected zext-requiring extract element!"); + } + SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT, + Node->getOperand(0)); + DEBUG(dbgs() << "ISEL: Custom selection!\n=> "); + DEBUG(Extract->dumpr(CurDAG)); + DEBUG(dbgs() << "\n"); + return Extract.getNode(); } - case AArch64ISD::NEON_LD1_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD1WB_8B_fixed, AArch64::LD1WB_4H_fixed, - AArch64::LD1WB_2S_fixed, AArch64::LD1WB_1D_fixed, - AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed, - AArch64::LD1WB_4S_fixed, AArch64::LD1WB_2D_fixed - }; - return SelectVLD(Node, true, 1, Opcodes); - } - case AArch64ISD::NEON_LD2_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD2WB_8B_fixed, AArch64::LD2WB_4H_fixed, - AArch64::LD2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed, - AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed, - AArch64::LD2WB_4S_fixed, AArch64::LD2WB_2D_fixed - }; - return SelectVLD(Node, true, 2, Opcodes); - } - case AArch64ISD::NEON_LD3_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD3WB_8B_fixed, AArch64::LD3WB_4H_fixed, - AArch64::LD3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed, - AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed, - AArch64::LD3WB_4S_fixed, AArch64::LD3WB_2D_fixed - }; - return SelectVLD(Node, true, 3, Opcodes); - } - case AArch64ISD::NEON_LD4_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD4WB_8B_fixed, AArch64::LD4WB_4H_fixed, - AArch64::LD4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed, - AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed, - AArch64::LD4WB_4S_fixed, AArch64::LD4WB_2D_fixed - }; - return SelectVLD(Node, true, 4, Opcodes); - } - case AArch64ISD::NEON_LD1x2_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD1x2WB_8B_fixed, AArch64::LD1x2WB_4H_fixed, - AArch64::LD1x2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed, - AArch64::LD1x2WB_16B_fixed, AArch64::LD1x2WB_8H_fixed, - AArch64::LD1x2WB_4S_fixed, AArch64::LD1x2WB_2D_fixed - }; - return SelectVLD(Node, true, 2, Opcodes); - } - case AArch64ISD::NEON_LD1x3_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD1x3WB_8B_fixed, AArch64::LD1x3WB_4H_fixed, - AArch64::LD1x3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed, - AArch64::LD1x3WB_16B_fixed, AArch64::LD1x3WB_8H_fixed, - AArch64::LD1x3WB_4S_fixed, AArch64::LD1x3WB_2D_fixed - }; - return SelectVLD(Node, true, 3, Opcodes); - } - case AArch64ISD::NEON_LD1x4_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD1x4WB_8B_fixed, AArch64::LD1x4WB_4H_fixed, - AArch64::LD1x4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed, - AArch64::LD1x4WB_16B_fixed, AArch64::LD1x4WB_8H_fixed, - AArch64::LD1x4WB_4S_fixed, AArch64::LD1x4WB_2D_fixed - }; - return SelectVLD(Node, true, 4, Opcodes); - } - case AArch64ISD::NEON_ST1_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST1WB_8B_fixed, AArch64::ST1WB_4H_fixed, - AArch64::ST1WB_2S_fixed, AArch64::ST1WB_1D_fixed, - AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed, - AArch64::ST1WB_4S_fixed, AArch64::ST1WB_2D_fixed - }; - return SelectVST(Node, true, 1, Opcodes); - } - case AArch64ISD::NEON_ST2_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST2WB_8B_fixed, AArch64::ST2WB_4H_fixed, - AArch64::ST2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed, - AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed, - AArch64::ST2WB_4S_fixed, AArch64::ST2WB_2D_fixed - }; - return SelectVST(Node, true, 2, Opcodes); - } - case AArch64ISD::NEON_ST3_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST3WB_8B_fixed, AArch64::ST3WB_4H_fixed, - AArch64::ST3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed, - AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed, - AArch64::ST3WB_4S_fixed, AArch64::ST3WB_2D_fixed - }; - return SelectVST(Node, true, 3, Opcodes); - } - case AArch64ISD::NEON_ST4_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST4WB_8B_fixed, AArch64::ST4WB_4H_fixed, - AArch64::ST4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed, - AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed, - AArch64::ST4WB_4S_fixed, AArch64::ST4WB_2D_fixed - }; - return SelectVST(Node, true, 4, Opcodes); - } - case AArch64ISD::NEON_LD2DUP: { - static const uint16_t Opcodes[] = { - AArch64::LD2R_8B, AArch64::LD2R_4H, AArch64::LD2R_2S, - AArch64::LD2R_1D, AArch64::LD2R_16B, AArch64::LD2R_8H, - AArch64::LD2R_4S, AArch64::LD2R_2D - }; - return SelectVLDDup(Node, false, 2, Opcodes); - } - case AArch64ISD::NEON_LD3DUP: { - static const uint16_t Opcodes[] = { - AArch64::LD3R_8B, AArch64::LD3R_4H, AArch64::LD3R_2S, - AArch64::LD3R_1D, AArch64::LD3R_16B, AArch64::LD3R_8H, - AArch64::LD3R_4S, AArch64::LD3R_2D - }; - return SelectVLDDup(Node, false, 3, Opcodes); - } - case AArch64ISD::NEON_LD4DUP: { - static const uint16_t Opcodes[] = { - AArch64::LD4R_8B, AArch64::LD4R_4H, AArch64::LD4R_2S, - AArch64::LD4R_1D, AArch64::LD4R_16B, AArch64::LD4R_8H, - AArch64::LD4R_4S, AArch64::LD4R_2D - }; - return SelectVLDDup(Node, false, 4, Opcodes); - } - case AArch64ISD::NEON_LD2DUP_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD2R_WB_8B_fixed, AArch64::LD2R_WB_4H_fixed, - AArch64::LD2R_WB_2S_fixed, AArch64::LD2R_WB_1D_fixed, - AArch64::LD2R_WB_16B_fixed, AArch64::LD2R_WB_8H_fixed, - AArch64::LD2R_WB_4S_fixed, AArch64::LD2R_WB_2D_fixed - }; - return SelectVLDDup(Node, true, 2, Opcodes); - } - case AArch64ISD::NEON_LD3DUP_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD3R_WB_8B_fixed, AArch64::LD3R_WB_4H_fixed, - AArch64::LD3R_WB_2S_fixed, AArch64::LD3R_WB_1D_fixed, - AArch64::LD3R_WB_16B_fixed, AArch64::LD3R_WB_8H_fixed, - AArch64::LD3R_WB_4S_fixed, AArch64::LD3R_WB_2D_fixed - }; - return SelectVLDDup(Node, true, 3, Opcodes); - } - case AArch64ISD::NEON_LD4DUP_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD4R_WB_8B_fixed, AArch64::LD4R_WB_4H_fixed, - AArch64::LD4R_WB_2S_fixed, AArch64::LD4R_WB_1D_fixed, - AArch64::LD4R_WB_16B_fixed, AArch64::LD4R_WB_8H_fixed, - AArch64::LD4R_WB_4S_fixed, AArch64::LD4R_WB_2D_fixed - }; - return SelectVLDDup(Node, true, 4, Opcodes); - } - case AArch64ISD::NEON_LD2LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD2LN_WB_B_fixed, AArch64::LD2LN_WB_H_fixed, - AArch64::LD2LN_WB_S_fixed, AArch64::LD2LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, true, true, 2, Opcodes); - } - case AArch64ISD::NEON_LD3LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD3LN_WB_B_fixed, AArch64::LD3LN_WB_H_fixed, - AArch64::LD3LN_WB_S_fixed, AArch64::LD3LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, true, true, 3, Opcodes); - } - case AArch64ISD::NEON_LD4LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::LD4LN_WB_B_fixed, AArch64::LD4LN_WB_H_fixed, - AArch64::LD4LN_WB_S_fixed, AArch64::LD4LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, true, true, 4, Opcodes); - } - case AArch64ISD::NEON_ST2LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST2LN_WB_B_fixed, AArch64::ST2LN_WB_H_fixed, - AArch64::ST2LN_WB_S_fixed, AArch64::ST2LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, false, true, 2, Opcodes); - } - case AArch64ISD::NEON_ST3LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST3LN_WB_B_fixed, AArch64::ST3LN_WB_H_fixed, - AArch64::ST3LN_WB_S_fixed, AArch64::ST3LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, false, true, 3, Opcodes); - } - case AArch64ISD::NEON_ST4LN_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST4LN_WB_B_fixed, AArch64::ST4LN_WB_H_fixed, - AArch64::ST4LN_WB_S_fixed, AArch64::ST4LN_WB_D_fixed - }; - return SelectVLDSTLane(Node, false, true, 4, Opcodes); - } - case AArch64ISD::NEON_ST1x2_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST1x2WB_8B_fixed, AArch64::ST1x2WB_4H_fixed, - AArch64::ST1x2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed, - AArch64::ST1x2WB_16B_fixed, AArch64::ST1x2WB_8H_fixed, - AArch64::ST1x2WB_4S_fixed, AArch64::ST1x2WB_2D_fixed - }; - return SelectVST(Node, true, 2, Opcodes); - } - case AArch64ISD::NEON_ST1x3_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST1x3WB_8B_fixed, AArch64::ST1x3WB_4H_fixed, - AArch64::ST1x3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed, - AArch64::ST1x3WB_16B_fixed, AArch64::ST1x3WB_8H_fixed, - AArch64::ST1x3WB_4S_fixed, AArch64::ST1x3WB_2D_fixed - }; - return SelectVST(Node, true, 3, Opcodes); - } - case AArch64ISD::NEON_ST1x4_UPD: { - static const uint16_t Opcodes[] = { - AArch64::ST1x4WB_8B_fixed, AArch64::ST1x4WB_4H_fixed, - AArch64::ST1x4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed, - AArch64::ST1x4WB_16B_fixed, AArch64::ST1x4WB_8H_fixed, - AArch64::ST1x4WB_4S_fixed, AArch64::ST1x4WB_2D_fixed - }; - return SelectVST(Node, true, 4, Opcodes); - } - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast(Node->getOperand(0))->getZExtValue(); - bool IsExt = false; - switch (IntNo) { - default: - break; - case Intrinsic::aarch64_neon_vtbx1: - IsExt = true; - case Intrinsic::aarch64_neon_vtbl1: - return SelectVTBL(Node, 1, IsExt); - case Intrinsic::aarch64_neon_vtbx2: - IsExt = true; - case Intrinsic::aarch64_neon_vtbl2: - return SelectVTBL(Node, 2, IsExt); - case Intrinsic::aarch64_neon_vtbx3: - IsExt = true; - case Intrinsic::aarch64_neon_vtbl3: - return SelectVTBL(Node, 3, IsExt); - case Intrinsic::aarch64_neon_vtbx4: - IsExt = true; - case Intrinsic::aarch64_neon_vtbl4: - return SelectVTBL(Node, 4, IsExt); + case ISD::Constant: { + // Materialize zero constants as copies from WZR/XZR. This allows + // the coalescer to propagate these into other instructions. + ConstantSDNode *ConstNode = cast(Node); + if (ConstNode->isNullValue()) { + if (VT == MVT::i32) + return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), + AArch64::WZR, MVT::i32).getNode(); + else if (VT == MVT::i64) + return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), + AArch64::XZR, MVT::i64).getNode(); } break; } - case ISD::INTRINSIC_VOID: + + case ISD::FrameIndex: { + // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. + int FI = cast(Node)->getIndex(); + unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); + const TargetLowering *TLI = getTargetLowering(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); + SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), + CurDAG->getTargetConstant(Shifter, MVT::i32) }; + return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); + } case ISD::INTRINSIC_W_CHAIN: { unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); switch (IntNo) { default: break; - case Intrinsic::arm_neon_vld1: { - static const uint16_t Opcodes[] = { - AArch64::LD1_8B, AArch64::LD1_4H, AArch64::LD1_2S, AArch64::LD1_1D, - AArch64::LD1_16B, AArch64::LD1_8H, AArch64::LD1_4S, AArch64::LD1_2D - }; - return SelectVLD(Node, false, 1, Opcodes); - } - case Intrinsic::arm_neon_vld2: { - static const uint16_t Opcodes[] = { - AArch64::LD2_8B, AArch64::LD2_4H, AArch64::LD2_2S, AArch64::LD1x2_1D, - AArch64::LD2_16B, AArch64::LD2_8H, AArch64::LD2_4S, AArch64::LD2_2D - }; - return SelectVLD(Node, false, 2, Opcodes); + case Intrinsic::aarch64_ldaxp: + case Intrinsic::aarch64_ldxp: { + unsigned Op = + IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; + SDValue MemAddr = Node->getOperand(2); + SDLoc DL(Node); + SDValue Chain = Node->getOperand(0); + + SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, + MVT::Other, MemAddr, Chain); + + // Transfer memoperands. + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(Node)->getMemOperand(); + cast(Ld)->setMemRefs(MemOp, MemOp + 1); + return Ld; } - case Intrinsic::arm_neon_vld3: { - static const uint16_t Opcodes[] = { - AArch64::LD3_8B, AArch64::LD3_4H, AArch64::LD3_2S, AArch64::LD1x3_1D, - AArch64::LD3_16B, AArch64::LD3_8H, AArch64::LD3_4S, AArch64::LD3_2D - }; - return SelectVLD(Node, false, 3, Opcodes); + case Intrinsic::aarch64_stlxp: + case Intrinsic::aarch64_stxp: { + unsigned Op = + IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; + SDLoc DL(Node); + SDValue Chain = Node->getOperand(0); + SDValue ValLo = Node->getOperand(2); + SDValue ValHi = Node->getOperand(3); + SDValue MemAddr = Node->getOperand(4); + + // Place arguments in the right order. + SmallVector Ops; + Ops.push_back(ValLo); + Ops.push_back(ValHi); + Ops.push_back(MemAddr); + Ops.push_back(Chain); + + SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); + // Transfer memoperands. + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(Node)->getMemOperand(); + cast(St)->setMemRefs(MemOp, MemOp + 1); + + return St; } - case Intrinsic::arm_neon_vld4: { - static const uint16_t Opcodes[] = { - AArch64::LD4_8B, AArch64::LD4_4H, AArch64::LD4_2S, AArch64::LD1x4_1D, - AArch64::LD4_16B, AArch64::LD4_8H, AArch64::LD4_4S, AArch64::LD4_2D - }; - return SelectVLD(Node, false, 4, Opcodes); - } - case Intrinsic::aarch64_neon_vld1x2: { - static const uint16_t Opcodes[] = { - AArch64::LD1x2_8B, AArch64::LD1x2_4H, AArch64::LD1x2_2S, - AArch64::LD1x2_1D, AArch64::LD1x2_16B, AArch64::LD1x2_8H, - AArch64::LD1x2_4S, AArch64::LD1x2_2D - }; - return SelectVLD(Node, false, 2, Opcodes); - } - case Intrinsic::aarch64_neon_vld1x3: { - static const uint16_t Opcodes[] = { - AArch64::LD1x3_8B, AArch64::LD1x3_4H, AArch64::LD1x3_2S, - AArch64::LD1x3_1D, AArch64::LD1x3_16B, AArch64::LD1x3_8H, - AArch64::LD1x3_4S, AArch64::LD1x3_2D - }; - return SelectVLD(Node, false, 3, Opcodes); - } - case Intrinsic::aarch64_neon_vld1x4: { - static const uint16_t Opcodes[] = { - AArch64::LD1x4_8B, AArch64::LD1x4_4H, AArch64::LD1x4_2S, - AArch64::LD1x4_1D, AArch64::LD1x4_16B, AArch64::LD1x4_8H, - AArch64::LD1x4_4S, AArch64::LD1x4_2D - }; - return SelectVLD(Node, false, 4, Opcodes); - } - case Intrinsic::arm_neon_vst1: { - static const uint16_t Opcodes[] = { - AArch64::ST1_8B, AArch64::ST1_4H, AArch64::ST1_2S, AArch64::ST1_1D, - AArch64::ST1_16B, AArch64::ST1_8H, AArch64::ST1_4S, AArch64::ST1_2D - }; - return SelectVST(Node, false, 1, Opcodes); - } - case Intrinsic::arm_neon_vst2: { - static const uint16_t Opcodes[] = { - AArch64::ST2_8B, AArch64::ST2_4H, AArch64::ST2_2S, AArch64::ST1x2_1D, - AArch64::ST2_16B, AArch64::ST2_8H, AArch64::ST2_4S, AArch64::ST2_2D - }; - return SelectVST(Node, false, 2, Opcodes); + case Intrinsic::aarch64_neon_ld1x2: + if (VT == MVT::v8i8) + return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); + break; + case Intrinsic::aarch64_neon_ld1x3: + if (VT == MVT::v8i8) + return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); + break; + case Intrinsic::aarch64_neon_ld1x4: + if (VT == MVT::v8i8) + return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); + break; + case Intrinsic::aarch64_neon_ld2: + if (VT == MVT::v8i8) + return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); + break; + case Intrinsic::aarch64_neon_ld3: + if (VT == MVT::v8i8) + return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); + break; + case Intrinsic::aarch64_neon_ld4: + if (VT == MVT::v8i8) + return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); + break; + case Intrinsic::aarch64_neon_ld2r: + if (VT == MVT::v8i8) + return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); + break; + case Intrinsic::aarch64_neon_ld3r: + if (VT == MVT::v8i8) + return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); + break; + case Intrinsic::aarch64_neon_ld4r: + if (VT == MVT::v8i8) + return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); + break; + case Intrinsic::aarch64_neon_ld2lane: + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectLoadLane(Node, 2, AArch64::LD2i8); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectLoadLane(Node, 2, AArch64::LD2i16); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectLoadLane(Node, 2, AArch64::LD2i32); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectLoadLane(Node, 2, AArch64::LD2i64); + break; + case Intrinsic::aarch64_neon_ld3lane: + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectLoadLane(Node, 3, AArch64::LD3i8); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectLoadLane(Node, 3, AArch64::LD3i16); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectLoadLane(Node, 3, AArch64::LD3i32); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectLoadLane(Node, 3, AArch64::LD3i64); + break; + case Intrinsic::aarch64_neon_ld4lane: + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectLoadLane(Node, 4, AArch64::LD4i8); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectLoadLane(Node, 4, AArch64::LD4i16); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectLoadLane(Node, 4, AArch64::LD4i32); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectLoadLane(Node, 4, AArch64::LD4i64); + break; } - case Intrinsic::arm_neon_vst3: { - static const uint16_t Opcodes[] = { - AArch64::ST3_8B, AArch64::ST3_4H, AArch64::ST3_2S, AArch64::ST1x3_1D, - AArch64::ST3_16B, AArch64::ST3_8H, AArch64::ST3_4S, AArch64::ST3_2D - }; - return SelectVST(Node, false, 3, Opcodes); + } break; + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntNo = cast(Node->getOperand(0))->getZExtValue(); + switch (IntNo) { + default: + break; + case Intrinsic::aarch64_neon_tbl2: + return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two + : AArch64::TBLv16i8Two, + false); + case Intrinsic::aarch64_neon_tbl3: + return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three + : AArch64::TBLv16i8Three, + false); + case Intrinsic::aarch64_neon_tbl4: + return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four + : AArch64::TBLv16i8Four, + false); + case Intrinsic::aarch64_neon_tbx2: + return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two + : AArch64::TBXv16i8Two, + true); + case Intrinsic::aarch64_neon_tbx3: + return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three + : AArch64::TBXv16i8Three, + true); + case Intrinsic::aarch64_neon_tbx4: + return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four + : AArch64::TBXv16i8Four, + true); + case Intrinsic::aarch64_neon_smull: + case Intrinsic::aarch64_neon_umull: + if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node)) + return N; + break; } - case Intrinsic::arm_neon_vst4: { - static const uint16_t Opcodes[] = { - AArch64::ST4_8B, AArch64::ST4_4H, AArch64::ST4_2S, AArch64::ST1x4_1D, - AArch64::ST4_16B, AArch64::ST4_8H, AArch64::ST4_4S, AArch64::ST4_2D - }; - return SelectVST(Node, false, 4, Opcodes); + break; + } + case ISD::INTRINSIC_VOID: { + unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); + if (Node->getNumOperands() >= 3) + VT = Node->getOperand(2)->getValueType(0); + switch (IntNo) { + default: + break; + case Intrinsic::aarch64_neon_st1x2: { + if (VT == MVT::v8i8) + return SelectStore(Node, 2, AArch64::ST1Twov8b); + else if (VT == MVT::v16i8) + return SelectStore(Node, 2, AArch64::ST1Twov16b); + else if (VT == MVT::v4i16) + return SelectStore(Node, 2, AArch64::ST1Twov4h); + else if (VT == MVT::v8i16) + return SelectStore(Node, 2, AArch64::ST1Twov8h); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectStore(Node, 2, AArch64::ST1Twov2s); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectStore(Node, 2, AArch64::ST1Twov4s); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectStore(Node, 2, AArch64::ST1Twov2d); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectStore(Node, 2, AArch64::ST1Twov1d); + break; } - case Intrinsic::aarch64_neon_vst1x2: { - static const uint16_t Opcodes[] = { - AArch64::ST1x2_8B, AArch64::ST1x2_4H, AArch64::ST1x2_2S, - AArch64::ST1x2_1D, AArch64::ST1x2_16B, AArch64::ST1x2_8H, - AArch64::ST1x2_4S, AArch64::ST1x2_2D - }; - return SelectVST(Node, false, 2, Opcodes); + case Intrinsic::aarch64_neon_st1x3: { + if (VT == MVT::v8i8) + return SelectStore(Node, 3, AArch64::ST1Threev8b); + else if (VT == MVT::v16i8) + return SelectStore(Node, 3, AArch64::ST1Threev16b); + else if (VT == MVT::v4i16) + return SelectStore(Node, 3, AArch64::ST1Threev4h); + else if (VT == MVT::v8i16) + return SelectStore(Node, 3, AArch64::ST1Threev8h); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectStore(Node, 3, AArch64::ST1Threev2s); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectStore(Node, 3, AArch64::ST1Threev4s); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectStore(Node, 3, AArch64::ST1Threev2d); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectStore(Node, 3, AArch64::ST1Threev1d); + break; } - case Intrinsic::aarch64_neon_vst1x3: { - static const uint16_t Opcodes[] = { - AArch64::ST1x3_8B, AArch64::ST1x3_4H, AArch64::ST1x3_2S, - AArch64::ST1x3_1D, AArch64::ST1x3_16B, AArch64::ST1x3_8H, - AArch64::ST1x3_4S, AArch64::ST1x3_2D - }; - return SelectVST(Node, false, 3, Opcodes); + case Intrinsic::aarch64_neon_st1x4: { + if (VT == MVT::v8i8) + return SelectStore(Node, 4, AArch64::ST1Fourv8b); + else if (VT == MVT::v16i8) + return SelectStore(Node, 4, AArch64::ST1Fourv16b); + else if (VT == MVT::v4i16) + return SelectStore(Node, 4, AArch64::ST1Fourv4h); + else if (VT == MVT::v8i16) + return SelectStore(Node, 4, AArch64::ST1Fourv8h); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectStore(Node, 4, AArch64::ST1Fourv2s); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectStore(Node, 4, AArch64::ST1Fourv4s); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectStore(Node, 4, AArch64::ST1Fourv2d); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectStore(Node, 4, AArch64::ST1Fourv1d); + break; } - case Intrinsic::aarch64_neon_vst1x4: { - static const uint16_t Opcodes[] = { - AArch64::ST1x4_8B, AArch64::ST1x4_4H, AArch64::ST1x4_2S, - AArch64::ST1x4_1D, AArch64::ST1x4_16B, AArch64::ST1x4_8H, - AArch64::ST1x4_4S, AArch64::ST1x4_2D - }; - return SelectVST(Node, false, 4, Opcodes); + case Intrinsic::aarch64_neon_st2: { + if (VT == MVT::v8i8) + return SelectStore(Node, 2, AArch64::ST2Twov8b); + else if (VT == MVT::v16i8) + return SelectStore(Node, 2, AArch64::ST2Twov16b); + else if (VT == MVT::v4i16) + return SelectStore(Node, 2, AArch64::ST2Twov4h); + else if (VT == MVT::v8i16) + return SelectStore(Node, 2, AArch64::ST2Twov8h); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectStore(Node, 2, AArch64::ST2Twov2s); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectStore(Node, 2, AArch64::ST2Twov4s); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectStore(Node, 2, AArch64::ST2Twov2d); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectStore(Node, 2, AArch64::ST1Twov1d); + break; } - case Intrinsic::arm_neon_vld2lane: { - static const uint16_t Opcodes[] = { - AArch64::LD2LN_B, AArch64::LD2LN_H, AArch64::LD2LN_S, AArch64::LD2LN_D - }; - return SelectVLDSTLane(Node, true, false, 2, Opcodes); + case Intrinsic::aarch64_neon_st3: { + if (VT == MVT::v8i8) + return SelectStore(Node, 3, AArch64::ST3Threev8b); + else if (VT == MVT::v16i8) + return SelectStore(Node, 3, AArch64::ST3Threev16b); + else if (VT == MVT::v4i16) + return SelectStore(Node, 3, AArch64::ST3Threev4h); + else if (VT == MVT::v8i16) + return SelectStore(Node, 3, AArch64::ST3Threev8h); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectStore(Node, 3, AArch64::ST3Threev2s); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectStore(Node, 3, AArch64::ST3Threev4s); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectStore(Node, 3, AArch64::ST3Threev2d); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectStore(Node, 3, AArch64::ST1Threev1d); + break; } - case Intrinsic::arm_neon_vld3lane: { - static const uint16_t Opcodes[] = { - AArch64::LD3LN_B, AArch64::LD3LN_H, AArch64::LD3LN_S, AArch64::LD3LN_D - }; - return SelectVLDSTLane(Node, true, false, 3, Opcodes); + case Intrinsic::aarch64_neon_st4: { + if (VT == MVT::v8i8) + return SelectStore(Node, 4, AArch64::ST4Fourv8b); + else if (VT == MVT::v16i8) + return SelectStore(Node, 4, AArch64::ST4Fourv16b); + else if (VT == MVT::v4i16) + return SelectStore(Node, 4, AArch64::ST4Fourv4h); + else if (VT == MVT::v8i16) + return SelectStore(Node, 4, AArch64::ST4Fourv8h); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectStore(Node, 4, AArch64::ST4Fourv2s); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectStore(Node, 4, AArch64::ST4Fourv4s); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectStore(Node, 4, AArch64::ST4Fourv2d); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectStore(Node, 4, AArch64::ST1Fourv1d); + break; } - case Intrinsic::arm_neon_vld4lane: { - static const uint16_t Opcodes[] = { - AArch64::LD4LN_B, AArch64::LD4LN_H, AArch64::LD4LN_S, AArch64::LD4LN_D - }; - return SelectVLDSTLane(Node, true, false, 4, Opcodes); + case Intrinsic::aarch64_neon_st2lane: { + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectStoreLane(Node, 2, AArch64::ST2i8); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectStoreLane(Node, 2, AArch64::ST2i16); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectStoreLane(Node, 2, AArch64::ST2i32); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectStoreLane(Node, 2, AArch64::ST2i64); + break; } - case Intrinsic::arm_neon_vst2lane: { - static const uint16_t Opcodes[] = { - AArch64::ST2LN_B, AArch64::ST2LN_H, AArch64::ST2LN_S, AArch64::ST2LN_D - }; - return SelectVLDSTLane(Node, false, false, 2, Opcodes); + case Intrinsic::aarch64_neon_st3lane: { + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectStoreLane(Node, 3, AArch64::ST3i8); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectStoreLane(Node, 3, AArch64::ST3i16); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectStoreLane(Node, 3, AArch64::ST3i32); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectStoreLane(Node, 3, AArch64::ST3i64); + break; } - case Intrinsic::arm_neon_vst3lane: { - static const uint16_t Opcodes[] = { - AArch64::ST3LN_B, AArch64::ST3LN_H, AArch64::ST3LN_S, AArch64::ST3LN_D - }; - return SelectVLDSTLane(Node, false, false, 3, Opcodes); + case Intrinsic::aarch64_neon_st4lane: { + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectStoreLane(Node, 4, AArch64::ST4i8); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectStoreLane(Node, 4, AArch64::ST4i16); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectStoreLane(Node, 4, AArch64::ST4i32); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectStoreLane(Node, 4, AArch64::ST4i64); + break; } - case Intrinsic::arm_neon_vst4lane: { - static const uint16_t Opcodes[] = { - AArch64::ST4LN_B, AArch64::ST4LN_H, AArch64::ST4LN_S, AArch64::ST4LN_D - }; - return SelectVLDSTLane(Node, false, false, 4, Opcodes); } - } // End of switch IntNo + } + case AArch64ISD::LD2post: { + if (VT == MVT::v8i8) + return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); + break; + } + case AArch64ISD::LD3post: { + if (VT == MVT::v8i8) + return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); + break; + } + case AArch64ISD::LD4post: { + if (VT == MVT::v8i8) + return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); + break; + } + case AArch64ISD::LD1x2post: { + if (VT == MVT::v8i8) + return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); + break; + } + case AArch64ISD::LD1x3post: { + if (VT == MVT::v8i8) + return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); + break; + } + case AArch64ISD::LD1x4post: { + if (VT == MVT::v8i8) + return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); + break; + } + case AArch64ISD::LD1DUPpost: { + if (VT == MVT::v8i8) + return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); + break; + } + case AArch64ISD::LD2DUPpost: { + if (VT == MVT::v8i8) + return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); + break; + } + case AArch64ISD::LD3DUPpost: { + if (VT == MVT::v8i8) + return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); + break; + } + case AArch64ISD::LD4DUPpost: { + if (VT == MVT::v8i8) + return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); + else if (VT == MVT::v16i8) + return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); + else if (VT == MVT::v4i16) + return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); + else if (VT == MVT::v8i16) + return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); + break; + } + case AArch64ISD::LD1LANEpost: { + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); + break; + } + case AArch64ISD::LD2LANEpost: { + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); + break; + } + case AArch64ISD::LD3LANEpost: { + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); + break; + } + case AArch64ISD::LD4LANEpost: { + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); + break; + } + case AArch64ISD::ST2post: { + VT = Node->getOperand(1).getValueType(); + if (VT == MVT::v8i8) + return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); + else if (VT == MVT::v16i8) + return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); + else if (VT == MVT::v4i16) + return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); + else if (VT == MVT::v8i16) + return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); + break; + } + case AArch64ISD::ST3post: { + VT = Node->getOperand(1).getValueType(); + if (VT == MVT::v8i8) + return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); + else if (VT == MVT::v16i8) + return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); + else if (VT == MVT::v4i16) + return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); + else if (VT == MVT::v8i16) + return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); + break; + } + case AArch64ISD::ST4post: { + VT = Node->getOperand(1).getValueType(); + if (VT == MVT::v8i8) + return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); + else if (VT == MVT::v16i8) + return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); + else if (VT == MVT::v4i16) + return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); + else if (VT == MVT::v8i16) + return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); + break; + } + case AArch64ISD::ST1x2post: { + VT = Node->getOperand(1).getValueType(); + if (VT == MVT::v8i8) + return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); + else if (VT == MVT::v16i8) + return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); + else if (VT == MVT::v4i16) + return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); + else if (VT == MVT::v8i16) + return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); + break; + } + case AArch64ISD::ST1x3post: { + VT = Node->getOperand(1).getValueType(); + if (VT == MVT::v8i8) + return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); + else if (VT == MVT::v16i8) + return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); + else if (VT == MVT::v4i16) + return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); + else if (VT == MVT::v8i16) + return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); + break; + } + case AArch64ISD::ST1x4post: { + VT = Node->getOperand(1).getValueType(); + if (VT == MVT::v8i8) + return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); + else if (VT == MVT::v16i8) + return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); + else if (VT == MVT::v4i16) + return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); + else if (VT == MVT::v8i16) + return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); + else if (VT == MVT::v2i32 || VT == MVT::v2f32) + return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); + else if (VT == MVT::v4i32 || VT == MVT::v4f32) + return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); + else if (VT == MVT::v1i64 || VT == MVT::v1f64) + return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); + else if (VT == MVT::v2i64 || VT == MVT::v2f64) + return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); + break; + } + case AArch64ISD::ST2LANEpost: { + VT = Node->getOperand(1).getValueType(); + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); + break; + } + case AArch64ISD::ST3LANEpost: { + VT = Node->getOperand(1).getValueType(); + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); + break; + } + case AArch64ISD::ST4LANEpost: { + VT = Node->getOperand(1).getValueType(); + if (VT == MVT::v16i8 || VT == MVT::v8i8) + return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); + else if (VT == MVT::v8i16 || VT == MVT::v4i16) + return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); + else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || + VT == MVT::v2f32) + return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); + else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || + VT == MVT::v1f64) + return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); break; - } // End of case ISD::INTRINSIC_VOID and :ISD::INTRINSIC_W_CHAIN - default: - break; // Let generic code handle it } - SDNode *ResNode = SelectCode(Node); + case ISD::FCEIL: + case ISD::FFLOOR: + case ISD::FTRUNC: + case ISD::FROUND: + if (SDNode *I = SelectLIBM(Node)) + return I; + break; + } - DEBUG(dbgs() << "=> "; - if (ResNode == NULL || ResNode == Node) - Node->dump(CurDAG); - else - ResNode->dump(CurDAG); - dbgs() << "\n"); + // Select the default instruction + ResNode = SelectCode(Node); + + DEBUG(errs() << "=> "); + if (ResNode == nullptr || ResNode == Node) + DEBUG(Node->dump(CurDAG)); + else + DEBUG(ResNode->dump(CurDAG)); + DEBUG(errs() << "\n"); return ResNode; } -/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for -/// instruction scheduling. -FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM, +/// createAArch64ISelDag - This pass converts a legalized DAG into a +/// AArch64-specific DAG, ready for instruction scheduling. +FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOpt::Level OptLevel) { return new AArch64DAGToDAGISel(TM, OptLevel); } diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 388973a..80d6669 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1,4 +1,4 @@ -//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===// +//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===// // // The LLVM Compiler Infrastructure // @@ -7,46 +7,87 @@ // //===----------------------------------------------------------------------===// // -// This file defines the interfaces that AArch64 uses to lower LLVM code into a -// selection DAG. +// This file implements the AArch64TargetLowering class. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "aarch64-isel" -#include "AArch64.h" #include "AArch64ISelLowering.h" +#include "AArch64PerfectShuffle.h" +#include "AArch64Subtarget.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64TargetMachine.h" #include "AArch64TargetObjectFile.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/CodeGen/Analysis.h" +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/Support/MathExtras.h" - +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; -static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) { - assert (TM.getSubtarget().isTargetELF() && - "unknown subtarget type"); - return new AArch64ElfTargetObjectFile(); -} +#define DEBUG_TYPE "aarch64-lower" -AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) - : TargetLowering(TM, createTLOF(TM)), Itins(TM.getInstrItineraryData()) { +STATISTIC(NumTailCalls, "Number of tail calls"); +STATISTIC(NumShiftInserts, "Number of vector shift inserts"); + +enum AlignMode { + StrictAlign, + NoStrictAlign +}; + +static cl::opt +Align(cl::desc("Load/store alignment support"), + cl::Hidden, cl::init(NoStrictAlign), + cl::values( + clEnumValN(StrictAlign, "aarch64-strict-align", + "Disallow all unaligned memory accesses"), + clEnumValN(NoStrictAlign, "aarch64-no-strict-align", + "Allow unaligned memory accesses"), + clEnumValEnd)); + +// Place holder until extr generation is tested fully. +static cl::opt +EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden, + cl::desc("Allow AArch64 (or (shift)(shift))->extract"), + cl::init(true)); + +static cl::opt +EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, + cl::desc("Allow AArch64 SLI/SRI formation"), + cl::init(false)); + +//===----------------------------------------------------------------------===// +// AArch64 Lowering public interface. +//===----------------------------------------------------------------------===// +static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { + if (TM.getSubtarget().isTargetDarwin()) + return new AArch64_MachoTargetObjectFile(); - const AArch64Subtarget *Subtarget = &TM.getSubtarget(); + return new AArch64_ELFTargetObjectFile(); +} - // SIMD compares set the entire lane's bits to 1 +AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) + : TargetLowering(TM, createTLOF(TM)) { + Subtarget = &TM.getSubtarget(); + + // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so + // we have to make something up. Arbitrarily, choose ZeroOrOne. + setBooleanContents(ZeroOrOneBooleanContent); + // When comparing vectors the result sets the different elements in the + // vector to all-one or all-zero. setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - // Scalar register <-> type mapping - addRegisterClass(MVT::i32, &AArch64::GPR32RegClass); - addRegisterClass(MVT::i64, &AArch64::GPR64RegClass); + // Set up the register classes. + addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass); + addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass); if (Subtarget->hasFPARMv8()) { addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); @@ -56,201 +97,86 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) } if (Subtarget->hasNEON()) { - // And the vectors - addRegisterClass(MVT::v1i8, &AArch64::FPR8RegClass); - addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass); - addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass); - addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass); - addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass); - addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass); - addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass); - addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass); - addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass); - addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass); + addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass); + addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass); + // Someone set us up the NEON. + addDRTypeForNEON(MVT::v2f32); + addDRTypeForNEON(MVT::v8i8); + addDRTypeForNEON(MVT::v4i16); + addDRTypeForNEON(MVT::v2i32); + addDRTypeForNEON(MVT::v1i64); + addDRTypeForNEON(MVT::v1f64); + + addQRTypeForNEON(MVT::v4f32); + addQRTypeForNEON(MVT::v2f64); + addQRTypeForNEON(MVT::v16i8); + addQRTypeForNEON(MVT::v8i16); + addQRTypeForNEON(MVT::v4i32); + addQRTypeForNEON(MVT::v2i64); } + // Compute derived properties from the register classes computeRegisterProperties(); - // We combine OR nodes for bitfield and NEON BSL operations. - setTargetDAGCombine(ISD::OR); - - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::SRA); - setTargetDAGCombine(ISD::SRL); - setTargetDAGCombine(ISD::SHL); - - setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); - setTargetDAGCombine(ISD::INTRINSIC_VOID); - setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); - - // AArch64 does not have i1 loads, or much of anything for i1 really. - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); - - setStackPointerRegisterToSaveRestore(AArch64::XSP); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); - setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); - - // We'll lower globals to wrappers for selection. + // Provide all sorts of operation actions setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); - - // A64 instructions have the comparison predicate attached to the user of the - // result, but having a separate comparison is valuable for matching. + setOperationAction(ISD::SETCC, MVT::i32, Custom); + setOperationAction(ISD::SETCC, MVT::i64, Custom); + setOperationAction(ISD::SETCC, MVT::f32, Custom); + setOperationAction(ISD::SETCC, MVT::f64, Custom); + setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Custom); setOperationAction(ISD::BR_CC, MVT::i64, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Custom); setOperationAction(ISD::BR_CC, MVT::f64, Custom); - setOperationAction(ISD::SELECT, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); - - setOperationAction(ISD::BRCOND, MVT::Other, Custom); - - setOperationAction(ISD::SETCC, MVT::i32, Custom); - setOperationAction(ISD::SETCC, MVT::i64, Custom); - setOperationAction(ISD::SETCC, MVT::f32, Custom); - setOperationAction(ISD::SETCC, MVT::f64, Custom); - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::JumpTable, MVT::i64, Custom); - setOperationAction(ISD::VASTART, MVT::Other, Custom); - setOperationAction(ISD::VACOPY, MVT::Other, Custom); - setOperationAction(ISD::VAEND, MVT::Other, Expand); - setOperationAction(ISD::VAARG, MVT::Other, Expand); - - setOperationAction(ISD::BlockAddress, MVT::i64, Custom); - setOperationAction(ISD::ConstantPool, MVT::i64, Custom); - - setOperationAction(ISD::ROTL, MVT::i32, Expand); - setOperationAction(ISD::ROTL, MVT::i64, Expand); - - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i64, Expand); - - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::SDIVREM, MVT::i32, Expand); - setOperationAction(ISD::SDIVREM, MVT::i64, Expand); - - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); - - setOperationAction(ISD::CTPOP, MVT::i32, Expand); - setOperationAction(ISD::CTPOP, MVT::i64, Expand); - - // Legal floating-point operations. - setOperationAction(ISD::FABS, MVT::f32, Legal); - setOperationAction(ISD::FABS, MVT::f64, Legal); - - setOperationAction(ISD::FCEIL, MVT::f32, Legal); - setOperationAction(ISD::FCEIL, MVT::f64, Legal); - - setOperationAction(ISD::FFLOOR, MVT::f32, Legal); - setOperationAction(ISD::FFLOOR, MVT::f64, Legal); - - setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); - - setOperationAction(ISD::FNEG, MVT::f32, Legal); - setOperationAction(ISD::FNEG, MVT::f64, Legal); - - setOperationAction(ISD::FRINT, MVT::f32, Legal); - setOperationAction(ISD::FRINT, MVT::f64, Legal); - - setOperationAction(ISD::FSQRT, MVT::f32, Legal); - setOperationAction(ISD::FSQRT, MVT::f64, Legal); - - setOperationAction(ISD::FTRUNC, MVT::f32, Legal); - setOperationAction(ISD::FTRUNC, MVT::f64, Legal); - - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); - setOperationAction(ISD::ConstantFP, MVT::f64, Legal); - setOperationAction(ISD::ConstantFP, MVT::f128, Legal); - - // Illegal floating-point operations. - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - - setOperationAction(ISD::FCOS, MVT::f32, Expand); - setOperationAction(ISD::FCOS, MVT::f64, Expand); - - setOperationAction(ISD::FEXP, MVT::f32, Expand); - setOperationAction(ISD::FEXP, MVT::f64, Expand); - - setOperationAction(ISD::FEXP2, MVT::f32, Expand); - setOperationAction(ISD::FEXP2, MVT::f64, Expand); - - setOperationAction(ISD::FLOG, MVT::f32, Expand); - setOperationAction(ISD::FLOG, MVT::f64, Expand); - - setOperationAction(ISD::FLOG2, MVT::f32, Expand); - setOperationAction(ISD::FLOG2, MVT::f64, Expand); - - setOperationAction(ISD::FLOG10, MVT::f32, Expand); - setOperationAction(ISD::FLOG10, MVT::f64, Expand); - - setOperationAction(ISD::FPOW, MVT::f32, Expand); - setOperationAction(ISD::FPOW, MVT::f64, Expand); - - setOperationAction(ISD::FPOWI, MVT::f32, Expand); - setOperationAction(ISD::FPOWI, MVT::f64, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); setOperationAction(ISD::FREM, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f80, Expand); - setOperationAction(ISD::FSIN, MVT::f32, Expand); - setOperationAction(ISD::FSIN, MVT::f64, Expand); - - setOperationAction(ISD::FSINCOS, MVT::f32, Expand); - setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + // Custom lowering hooks are needed for XOR + // to fold it into CSINC/CSINV. + setOperationAction(ISD::XOR, MVT::i32, Custom); + setOperationAction(ISD::XOR, MVT::i64, Custom); // Virtually no operation on f128 is legal, but LLVM can't expand them when // there's a valid register class, so we need custom operations in most cases. - setOperationAction(ISD::FABS, MVT::f128, Expand); - setOperationAction(ISD::FADD, MVT::f128, Custom); - setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); - setOperationAction(ISD::FCOS, MVT::f128, Expand); - setOperationAction(ISD::FDIV, MVT::f128, Custom); - setOperationAction(ISD::FMA, MVT::f128, Expand); - setOperationAction(ISD::FMUL, MVT::f128, Custom); - setOperationAction(ISD::FNEG, MVT::f128, Expand); - setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand); - setOperationAction(ISD::FP_ROUND, MVT::f128, Expand); - setOperationAction(ISD::FPOW, MVT::f128, Expand); - setOperationAction(ISD::FREM, MVT::f128, Expand); - setOperationAction(ISD::FRINT, MVT::f128, Expand); - setOperationAction(ISD::FSIN, MVT::f128, Expand); - setOperationAction(ISD::FSINCOS, MVT::f128, Expand); - setOperationAction(ISD::FSQRT, MVT::f128, Expand); - setOperationAction(ISD::FSUB, MVT::f128, Custom); - setOperationAction(ISD::FTRUNC, MVT::f128, Expand); - setOperationAction(ISD::SETCC, MVT::f128, Custom); - setOperationAction(ISD::BR_CC, MVT::f128, Custom); - setOperationAction(ISD::SELECT, MVT::f128, Expand); - setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); - setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); + setOperationAction(ISD::FABS, MVT::f128, Expand); + setOperationAction(ISD::FADD, MVT::f128, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); + setOperationAction(ISD::FCOS, MVT::f128, Expand); + setOperationAction(ISD::FDIV, MVT::f128, Custom); + setOperationAction(ISD::FMA, MVT::f128, Expand); + setOperationAction(ISD::FMUL, MVT::f128, Custom); + setOperationAction(ISD::FNEG, MVT::f128, Expand); + setOperationAction(ISD::FPOW, MVT::f128, Expand); + setOperationAction(ISD::FREM, MVT::f128, Expand); + setOperationAction(ISD::FRINT, MVT::f128, Expand); + setOperationAction(ISD::FSIN, MVT::f128, Expand); + setOperationAction(ISD::FSINCOS, MVT::f128, Expand); + setOperationAction(ISD::FSQRT, MVT::f128, Expand); + setOperationAction(ISD::FSUB, MVT::f128, Custom); + setOperationAction(ISD::FTRUNC, MVT::f128, Expand); + setOperationAction(ISD::SETCC, MVT::f128, Custom); + setOperationAction(ISD::BR_CC, MVT::f128, Custom); + setOperationAction(ISD::SELECT, MVT::f128, Custom); + setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); // Lowering for many of the conversions is actually specified by the non-f128 // type. The LowerXXX function will be trivial when f128 isn't involved. @@ -266,623 +192,583 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom); - setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); - setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); + setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); + setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); - // i128 shift operation support - setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); - setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); - setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); - - // This prevents LLVM trying to compress double constants into a floating - // constant-pool entry and trying to load from there. It's of doubtful benefit - // for A64: we'd need LDR followed by FCVT, I believe. - setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); + // Variable arguments. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Custom); + setOperationAction(ISD::VACOPY, MVT::Other, Custom); + setOperationAction(ISD::VAEND, MVT::Other, Expand); - setTruncStoreAction(MVT::f128, MVT::f64, Expand); - setTruncStoreAction(MVT::f128, MVT::f32, Expand); - setTruncStoreAction(MVT::f128, MVT::f16, Expand); - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - setTruncStoreAction(MVT::f64, MVT::f16, Expand); - setTruncStoreAction(MVT::f32, MVT::f16, Expand); + // Variable-sized objects. + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); + // Exception handling. + // FIXME: These are guesses. Has this been defined yet? setExceptionPointerRegister(AArch64::X0); setExceptionSelectorRegister(AArch64::X1); - if (Subtarget->hasNEON()) { - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v1i64, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v16i8, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Expand); - - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i16, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); - - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1f64, Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); - - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Legal); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Legal); - - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); - - setOperationAction(ISD::SETCC, MVT::v8i8, Custom); - setOperationAction(ISD::SETCC, MVT::v16i8, Custom); - setOperationAction(ISD::SETCC, MVT::v4i16, Custom); - setOperationAction(ISD::SETCC, MVT::v8i16, Custom); - setOperationAction(ISD::SETCC, MVT::v2i32, Custom); - setOperationAction(ISD::SETCC, MVT::v4i32, Custom); - setOperationAction(ISD::SETCC, MVT::v1i64, Custom); - setOperationAction(ISD::SETCC, MVT::v2i64, Custom); - setOperationAction(ISD::SETCC, MVT::v2f32, Custom); - setOperationAction(ISD::SETCC, MVT::v4f32, Custom); - setOperationAction(ISD::SETCC, MVT::v1f64, Custom); - setOperationAction(ISD::SETCC, MVT::v2f64, Custom); - - setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal); - setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); - setOperationAction(ISD::FFLOOR, MVT::v1f64, Legal); - setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); - - setOperationAction(ISD::FCEIL, MVT::v2f32, Legal); - setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); - setOperationAction(ISD::FCEIL, MVT::v1f64, Legal); - setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); - - setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal); - setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); - setOperationAction(ISD::FTRUNC, MVT::v1f64, Legal); - setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); - - setOperationAction(ISD::FRINT, MVT::v2f32, Legal); - setOperationAction(ISD::FRINT, MVT::v4f32, Legal); - setOperationAction(ISD::FRINT, MVT::v1f64, Legal); - setOperationAction(ISD::FRINT, MVT::v2f64, Legal); - - setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Legal); - setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); - - setOperationAction(ISD::FROUND, MVT::v2f32, Legal); - setOperationAction(ISD::FROUND, MVT::v4f32, Legal); - setOperationAction(ISD::FROUND, MVT::v1f64, Legal); - setOperationAction(ISD::FROUND, MVT::v2f64, Legal); - - setOperationAction(ISD::SINT_TO_FP, MVT::v1i8, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v1i16, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v1i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); + // Constant pool entries + setOperationAction(ISD::ConstantPool, MVT::i64, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v1i8, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v1i16, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v1i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); + // BlockAddress + setOperationAction(ISD::BlockAddress, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v1i8, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v1i16, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v1i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Custom); - - setOperationAction(ISD::FP_TO_UINT, MVT::v1i8, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v1i16, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v1i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Custom); - - // Neon does not support vector divide/remainder operations except - // floating-point divide. - setOperationAction(ISD::SDIV, MVT::v1i8, Expand); - setOperationAction(ISD::SDIV, MVT::v8i8, Expand); - setOperationAction(ISD::SDIV, MVT::v16i8, Expand); - setOperationAction(ISD::SDIV, MVT::v1i16, Expand); - setOperationAction(ISD::SDIV, MVT::v4i16, Expand); - setOperationAction(ISD::SDIV, MVT::v8i16, Expand); - setOperationAction(ISD::SDIV, MVT::v1i32, Expand); - setOperationAction(ISD::SDIV, MVT::v2i32, Expand); - setOperationAction(ISD::SDIV, MVT::v4i32, Expand); - setOperationAction(ISD::SDIV, MVT::v1i64, Expand); - setOperationAction(ISD::SDIV, MVT::v2i64, Expand); - - setOperationAction(ISD::UDIV, MVT::v1i8, Expand); - setOperationAction(ISD::UDIV, MVT::v8i8, Expand); - setOperationAction(ISD::UDIV, MVT::v16i8, Expand); - setOperationAction(ISD::UDIV, MVT::v1i16, Expand); - setOperationAction(ISD::UDIV, MVT::v4i16, Expand); - setOperationAction(ISD::UDIV, MVT::v8i16, Expand); - setOperationAction(ISD::UDIV, MVT::v1i32, Expand); - setOperationAction(ISD::UDIV, MVT::v2i32, Expand); - setOperationAction(ISD::UDIV, MVT::v4i32, Expand); - setOperationAction(ISD::UDIV, MVT::v1i64, Expand); - setOperationAction(ISD::UDIV, MVT::v2i64, Expand); - - setOperationAction(ISD::SREM, MVT::v1i8, Expand); - setOperationAction(ISD::SREM, MVT::v8i8, Expand); - setOperationAction(ISD::SREM, MVT::v16i8, Expand); - setOperationAction(ISD::SREM, MVT::v1i16, Expand); - setOperationAction(ISD::SREM, MVT::v4i16, Expand); - setOperationAction(ISD::SREM, MVT::v8i16, Expand); - setOperationAction(ISD::SREM, MVT::v1i32, Expand); - setOperationAction(ISD::SREM, MVT::v2i32, Expand); - setOperationAction(ISD::SREM, MVT::v4i32, Expand); - setOperationAction(ISD::SREM, MVT::v1i64, Expand); - setOperationAction(ISD::SREM, MVT::v2i64, Expand); - - setOperationAction(ISD::UREM, MVT::v1i8, Expand); - setOperationAction(ISD::UREM, MVT::v8i8, Expand); - setOperationAction(ISD::UREM, MVT::v16i8, Expand); - setOperationAction(ISD::UREM, MVT::v1i16, Expand); - setOperationAction(ISD::UREM, MVT::v4i16, Expand); - setOperationAction(ISD::UREM, MVT::v8i16, Expand); - setOperationAction(ISD::UREM, MVT::v1i32, Expand); - setOperationAction(ISD::UREM, MVT::v2i32, Expand); - setOperationAction(ISD::UREM, MVT::v4i32, Expand); - setOperationAction(ISD::UREM, MVT::v1i64, Expand); - setOperationAction(ISD::UREM, MVT::v2i64, Expand); - - setOperationAction(ISD::FREM, MVT::v2f32, Expand); - setOperationAction(ISD::FREM, MVT::v4f32, Expand); - setOperationAction(ISD::FREM, MVT::v1f64, Expand); - setOperationAction(ISD::FREM, MVT::v2f64, Expand); - - setOperationAction(ISD::SELECT, MVT::v8i8, Expand); - setOperationAction(ISD::SELECT, MVT::v16i8, Expand); - setOperationAction(ISD::SELECT, MVT::v4i16, Expand); - setOperationAction(ISD::SELECT, MVT::v8i16, Expand); - setOperationAction(ISD::SELECT, MVT::v2i32, Expand); - setOperationAction(ISD::SELECT, MVT::v4i32, Expand); - setOperationAction(ISD::SELECT, MVT::v1i64, Expand); - setOperationAction(ISD::SELECT, MVT::v2i64, Expand); - setOperationAction(ISD::SELECT, MVT::v2f32, Expand); - setOperationAction(ISD::SELECT, MVT::v4f32, Expand); - setOperationAction(ISD::SELECT, MVT::v1f64, Expand); - setOperationAction(ISD::SELECT, MVT::v2f64, Expand); - - setOperationAction(ISD::SELECT_CC, MVT::v8i8, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v16i8, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v4i16, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v8i16, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v2i32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v4i32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v1i64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v2i64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v2f32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v4f32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v1f64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::v2f64, Custom); - - // Vector ExtLoad and TruncStore are expanded. - for (unsigned I = MVT::FIRST_VECTOR_VALUETYPE; - I <= MVT::LAST_VECTOR_VALUETYPE; ++I) { - MVT VT = (MVT::SimpleValueType) I; - setLoadExtAction(ISD::SEXTLOAD, VT, Expand); - setLoadExtAction(ISD::ZEXTLOAD, VT, Expand); - setLoadExtAction(ISD::EXTLOAD, VT, Expand); - for (unsigned II = MVT::FIRST_VECTOR_VALUETYPE; - II <= MVT::LAST_VECTOR_VALUETYPE; ++II) { - MVT VT1 = (MVT::SimpleValueType) II; - // A TruncStore has two vector types of the same number of elements - // and different element sizes. - if (VT.getVectorNumElements() == VT1.getVectorNumElements() && - VT.getVectorElementType().getSizeInBits() - > VT1.getVectorElementType().getSizeInBits()) - setTruncStoreAction(VT, VT1, Expand); - } - } + // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences. + setOperationAction(ISD::ADDC, MVT::i32, Custom); + setOperationAction(ISD::ADDE, MVT::i32, Custom); + setOperationAction(ISD::SUBC, MVT::i32, Custom); + setOperationAction(ISD::SUBE, MVT::i32, Custom); + setOperationAction(ISD::ADDC, MVT::i64, Custom); + setOperationAction(ISD::ADDE, MVT::i64, Custom); + setOperationAction(ISD::SUBC, MVT::i64, Custom); + setOperationAction(ISD::SUBE, MVT::i64, Custom); + + // AArch64 lacks both left-rotate and popcount instructions. + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Expand); - // There is no v1i64/v2i64 multiply, expand v1i64/v2i64 to GPR i64 multiply. - // FIXME: For a v2i64 multiply, we copy VPR to GPR and do 2 i64 multiplies, - // and then copy back to VPR. This solution may be optimized by Following 3 - // NEON instructions: - // pmull v2.1q, v0.1d, v1.1d - // pmull2 v3.1q, v0.2d, v1.2d - // ins v2.d[1], v3.d[0] - // As currently we can't verify the correctness of such assumption, we can - // do such optimization in the future. - setOperationAction(ISD::MUL, MVT::v1i64, Expand); - setOperationAction(ISD::MUL, MVT::v2i64, Expand); + // AArch64 doesn't have {U|S}MUL_LOHI. + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::FCOS, MVT::v2f64, Expand); - setOperationAction(ISD::FCOS, MVT::v4f32, Expand); - setOperationAction(ISD::FCOS, MVT::v2f32, Expand); - setOperationAction(ISD::FSIN, MVT::v2f64, Expand); - setOperationAction(ISD::FSIN, MVT::v4f32, Expand); - setOperationAction(ISD::FSIN, MVT::v2f32, Expand); - setOperationAction(ISD::FPOW, MVT::v2f64, Expand); - setOperationAction(ISD::FPOW, MVT::v4f32, Expand); - setOperationAction(ISD::FPOW, MVT::v2f32, Expand); - } - setTargetDAGCombine(ISD::SETCC); - setTargetDAGCombine(ISD::SIGN_EXTEND); - setTargetDAGCombine(ISD::VSELECT); -} + // Expand the undefined-at-zero variants to cttz/ctlz to their defined-at-zero + // counterparts, which AArch64 supports directly. + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); -EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { - // It's reasonably important that this value matches the "natural" legal - // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself - // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64). - if (!VT.isVector()) return MVT::i32; - return VT.changeVectorElementTypeToInteger(); -} + setOperationAction(ISD::CTPOP, MVT::i32, Custom); + setOperationAction(ISD::CTPOP, MVT::i64, Custom); -static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, - unsigned &LdrOpc, - unsigned &StrOpc) { - static const unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword, - AArch64::LDXR_word, AArch64::LDXR_dword}; - static const unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword, - AArch64::LDAXR_word, AArch64::LDAXR_dword}; - static const unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword, - AArch64::STXR_word, AArch64::STXR_dword}; - static const unsigned StoreRels[] = {AArch64::STLXR_byte,AArch64::STLXR_hword, - AArch64::STLXR_word, AArch64::STLXR_dword}; - - const unsigned *LoadOps, *StoreOps; - if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) - LoadOps = LoadAcqs; - else - LoadOps = LoadBares; + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); - if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) - StoreOps = StoreRels; - else - StoreOps = StoreBares; + // Custom lower Add/Sub/Mul with overflow. + setOperationAction(ISD::SADDO, MVT::i32, Custom); + setOperationAction(ISD::SADDO, MVT::i64, Custom); + setOperationAction(ISD::UADDO, MVT::i32, Custom); + setOperationAction(ISD::UADDO, MVT::i64, Custom); + setOperationAction(ISD::SSUBO, MVT::i32, Custom); + setOperationAction(ISD::SSUBO, MVT::i64, Custom); + setOperationAction(ISD::USUBO, MVT::i32, Custom); + setOperationAction(ISD::USUBO, MVT::i64, Custom); + setOperationAction(ISD::SMULO, MVT::i32, Custom); + setOperationAction(ISD::SMULO, MVT::i64, Custom); + setOperationAction(ISD::UMULO, MVT::i32, Custom); + setOperationAction(ISD::UMULO, MVT::i64, Custom); - assert(isPowerOf2_32(Size) && Size <= 8 && - "unsupported size for atomic binary op!"); + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FPOW, MVT::f32, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); + + // AArch64 has implementations of a lot of rounding-like FP operations. + static MVT RoundingTypes[] = { MVT::f32, MVT::f64}; + for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) { + MVT Ty = RoundingTypes[I]; + setOperationAction(ISD::FFLOOR, Ty, Legal); + setOperationAction(ISD::FNEARBYINT, Ty, Legal); + setOperationAction(ISD::FCEIL, Ty, Legal); + setOperationAction(ISD::FRINT, Ty, Legal); + setOperationAction(ISD::FTRUNC, Ty, Legal); + setOperationAction(ISD::FROUND, Ty, Legal); + } - LdrOpc = LoadOps[Log2_32(Size)]; - StrOpc = StoreOps[Log2_32(Size)]; -} + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); -// FIXME: AArch64::DTripleRegClass and AArch64::QTripleRegClass don't really -// have value type mapped, and they are both being defined as MVT::untyped. -// Without knowing the MVT type, MachineLICM::getRegisterClassIDAndCost -// would fail to figure out the register pressure correctly. -std::pair -AArch64TargetLowering::findRepresentativeClass(MVT VT) const{ - const TargetRegisterClass *RRC = 0; - uint8_t Cost = 1; - switch (VT.SimpleTy) { - default: - return TargetLowering::findRepresentativeClass(VT); - case MVT::v4i64: - RRC = &AArch64::QPairRegClass; - Cost = 2; - break; - case MVT::v8i64: - RRC = &AArch64::QQuadRegClass; - Cost = 4; - break; + if (Subtarget->isTargetMachO()) { + // For iOS, we don't want to the normal expansion of a libcall to + // sincos. We want to issue a libcall to __sincos_stret to avoid memory + // traffic. + setOperationAction(ISD::FSINCOS, MVT::f64, Custom); + setOperationAction(ISD::FSINCOS, MVT::f32, Custom); + } else { + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); } - return std::make_pair(RRC, Cost); -} -MachineBasicBlock * -AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size, - unsigned BinOpcode) const { - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + // AArch64 does not have floating-point extending loads, i1 sign-extending + // load, floating-point truncating stores, or v2i32->v2i16 truncating store. + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f128, MVT::f80, Expand); + setTruncStoreAction(MVT::f128, MVT::f64, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); + setTruncStoreAction(MVT::f128, MVT::f16, Expand); + // Indexed loads and stores are supported. + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, MVT::i8, Legal); + setIndexedLoadAction(im, MVT::i16, Legal); + setIndexedLoadAction(im, MVT::i32, Legal); + setIndexedLoadAction(im, MVT::i64, Legal); + setIndexedLoadAction(im, MVT::f64, Legal); + setIndexedLoadAction(im, MVT::f32, Legal); + setIndexedStoreAction(im, MVT::i8, Legal); + setIndexedStoreAction(im, MVT::i16, Legal); + setIndexedStoreAction(im, MVT::i32, Legal); + setIndexedStoreAction(im, MVT::i64, Legal); + setIndexedStoreAction(im, MVT::f64, Legal); + setIndexedStoreAction(im, MVT::f32, Legal); + } - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; + // Trap. + setOperationAction(ISD::TRAP, MVT::Other, Legal); - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned incr = MI->getOperand(2).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, ldrOpc, strOpc); - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - const TargetRegisterClass *TRC - = Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; - unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldxr dest, ptr - // scratch, dest, incr - // stxr stxr_status, scratch, ptr - // cbnz stxr_status, loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (BinOpcode) { - // All arithmetic operations we'll be creating are designed to take an extra - // shift or extend operand, which we can conveniently set to zero. - - // Operand order needs to go the other way for NAND. - if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl) - BuildMI(BB, dl, TII->get(BinOpcode), scratch) - .addReg(incr).addReg(dest).addImm(0); - else - BuildMI(BB, dl, TII->get(BinOpcode), scratch) - .addReg(dest).addReg(incr).addImm(0); - } + // We combine OR nodes for bitfield operations. + setTargetDAGCombine(ISD::OR); - // From the stxr, the register is GPR32; from the cmp it's GPR32wsp - unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); + // Vector add and sub nodes may conceal a high-half opportunity. + // Also, try to fold ADD into CSINC/CSINV.. + setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::SUB); - BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr); - BuildMI(BB, dl, TII->get(AArch64::CBNZw)) - .addReg(stxr_status).addMBB(loopMBB); + setTargetDAGCombine(ISD::XOR); + setTargetDAGCombine(ISD::SINT_TO_FP); + setTargetDAGCombine(ISD::UINT_TO_FP); - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); - // exitMBB: - // ... - BB = exitMBB; + setTargetDAGCombine(ISD::ANY_EXTEND); + setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::BITCAST); + setTargetDAGCombine(ISD::CONCAT_VECTORS); + setTargetDAGCombine(ISD::STORE); - MI->eraseFromParent(); // The instruction is gone now. + setTargetDAGCombine(ISD::MUL); - return BB; -} + setTargetDAGCombine(ISD::SELECT); + setTargetDAGCombine(ISD::VSELECT); -MachineBasicBlock * -AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size, - unsigned CmpOp, - A64CC::CondCodes Cond) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + setTargetDAGCombine(ISD::INTRINSIC_VOID); + setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); + setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; + MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8; + MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4; + MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4; - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned incr = MI->getOperand(2).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); + setStackPointerRegisterToSaveRestore(AArch64::SP); - unsigned oldval = dest; - DebugLoc dl = MI->getDebugLoc(); + setSchedulingPreference(Sched::Hybrid); - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const TargetRegisterClass *TRC, *TRCsp; - if (Size == 8) { - TRC = &AArch64::GPR64RegClass; - TRCsp = &AArch64::GPR64xspRegClass; - } else { - TRC = &AArch64::GPR32RegClass; - TRCsp = &AArch64::GPR32wspRegClass; - } + // Enable TBZ/TBNZ + MaskAndBranchFoldingIsLegal = true; + + setMinFunctionAlignment(2); + + RequireStrictAlign = (Align == StrictAlign); + + setHasExtractBitsInsn(true); + + if (Subtarget->hasNEON()) { + // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to + // silliness like this: + setOperationAction(ISD::FABS, MVT::v1f64, Expand); + setOperationAction(ISD::FADD, MVT::v1f64, Expand); + setOperationAction(ISD::FCEIL, MVT::v1f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand); + setOperationAction(ISD::FCOS, MVT::v1f64, Expand); + setOperationAction(ISD::FDIV, MVT::v1f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand); + setOperationAction(ISD::FMA, MVT::v1f64, Expand); + setOperationAction(ISD::FMUL, MVT::v1f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand); + setOperationAction(ISD::FNEG, MVT::v1f64, Expand); + setOperationAction(ISD::FPOW, MVT::v1f64, Expand); + setOperationAction(ISD::FREM, MVT::v1f64, Expand); + setOperationAction(ISD::FROUND, MVT::v1f64, Expand); + setOperationAction(ISD::FRINT, MVT::v1f64, Expand); + setOperationAction(ISD::FSIN, MVT::v1f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand); + setOperationAction(ISD::FSQRT, MVT::v1f64, Expand); + setOperationAction(ISD::FSUB, MVT::v1f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand); + setOperationAction(ISD::SETCC, MVT::v1f64, Expand); + setOperationAction(ISD::BR_CC, MVT::v1f64, Expand); + setOperationAction(ISD::SELECT, MVT::v1f64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand); + + setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand); + setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand); + setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand); + + setOperationAction(ISD::MUL, MVT::v1i64, Expand); + + // AArch64 doesn't have a direct vector ->f32 conversion instructions for + // elements smaller than i32, so promote the input to i32 first. + setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote); + // Similarly, there is no direct i32 -> f64 vector conversion instruction. + setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, ldrOpc, strOpc); + // AArch64 doesn't have MUL.2d: + setOperationAction(ISD::MUL, MVT::v2i64, Expand); + setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal); + setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); + // Likewise, narrowing and extending vector loads/stores aren't handled + // directly. + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { + + setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, + Expand); + + setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + + setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); + + for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) + setTruncStoreAction((MVT::SimpleValueType)VT, + (MVT::SimpleValueType)InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); + } - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); + // AArch64 has implementations of a lot of rounding-like FP operations. + static MVT RoundingVecTypes[] = {MVT::v2f32, MVT::v4f32, MVT::v2f64 }; + for (unsigned I = 0; I < array_lengthof(RoundingVecTypes); ++I) { + MVT Ty = RoundingVecTypes[I]; + setOperationAction(ISD::FFLOOR, Ty, Legal); + setOperationAction(ISD::FNEARBYINT, Ty, Legal); + setOperationAction(ISD::FCEIL, Ty, Legal); + setOperationAction(ISD::FRINT, Ty, Legal); + setOperationAction(ISD::FTRUNC, Ty, Legal); + setOperationAction(ISD::FROUND, Ty, Legal); + } + } +} - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); +void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) { + if (VT == MVT::v2f32) { + setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); + AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i32); - unsigned scratch = MRI.createVirtualRegister(TRC); - MRI.constrainRegClass(scratch, TRCsp); + setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); + AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i32); + } else if (VT == MVT::v2f64 || VT == MVT::v4f32) { + setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); + AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i64); - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); + setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); + AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i64); + } - // loopMBB: - // ldxr dest, ptr - // cmp incr, dest (, sign extend if necessary) - // csel scratch, dest, incr, cond - // stxr stxr_status, scratch, ptr - // cbnz stxr_status, loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); + // Mark vector float intrinsics as expand. + if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) { + setOperationAction(ISD::FSIN, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FCOS, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FPOWI, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FPOW, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FLOG, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FLOG2, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FLOG10, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FEXP, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FEXP2, VT.getSimpleVT(), Expand); + } - // Build compare and cmov instructions. - MRI.constrainRegClass(incr, TRCsp); - BuildMI(BB, dl, TII->get(CmpOp)) - .addReg(incr).addReg(oldval).addImm(0); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom); + setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); + setOperationAction(ISD::AND, VT.getSimpleVT(), Custom); + setOperationAction(ISD::OR, VT.getSimpleVT(), Custom); + setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); + + setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); + setOperationAction(ISD::VSELECT, VT.getSimpleVT(), Expand); + setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand); + + // CNT supports only B element sizes. + if (VT != MVT::v8i8 && VT != MVT::v16i8) + setOperationAction(ISD::CTPOP, VT.getSimpleVT(), Expand); + + setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); + setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); + setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); + + setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom); + setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom); + + if (Subtarget->isLittleEndian()) { + for (unsigned im = (unsigned)ISD::PRE_INC; + im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { + setIndexedLoadAction(im, VT.getSimpleVT(), Legal); + setIndexedStoreAction(im, VT.getSimpleVT(), Legal); + } + } +} - BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc), - scratch) - .addReg(oldval).addReg(incr).addImm(Cond); +void AArch64TargetLowering::addDRTypeForNEON(MVT VT) { + addRegisterClass(VT, &AArch64::FPR64RegClass); + addTypeForNEON(VT, MVT::v2i32); +} - unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); +void AArch64TargetLowering::addQRTypeForNEON(MVT VT) { + addRegisterClass(VT, &AArch64::FPR128RegClass); + addTypeForNEON(VT, MVT::v4i32); +} - BuildMI(BB, dl, TII->get(strOpc), stxr_status) - .addReg(scratch).addReg(ptr); - BuildMI(BB, dl, TII->get(AArch64::CBNZw)) - .addReg(stxr_status).addMBB(loopMBB); +EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { + if (!VT.isVector()) + return MVT::i32; + return VT.changeVectorElementTypeToInteger(); +} - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); +/// computeKnownBitsForTargetNode - Determine which of the bits specified in +/// Mask are known to be either zero or one and return them in the +/// KnownZero/KnownOne bitsets. +void AArch64TargetLowering::computeKnownBitsForTargetNode( + const SDValue Op, APInt &KnownZero, APInt &KnownOne, + const SelectionDAG &DAG, unsigned Depth) const { + switch (Op.getOpcode()) { + default: + break; + case AArch64ISD::CSEL: { + APInt KnownZero2, KnownOne2; + DAG.computeKnownBits(Op->getOperand(0), KnownZero, KnownOne, Depth + 1); + DAG.computeKnownBits(Op->getOperand(1), KnownZero2, KnownOne2, Depth + 1); + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; + break; + } + case ISD::INTRINSIC_W_CHAIN: { + ConstantSDNode *CN = cast(Op->getOperand(1)); + Intrinsic::ID IntID = static_cast(CN->getZExtValue()); + switch (IntID) { + default: return; + case Intrinsic::aarch64_ldaxr: + case Intrinsic::aarch64_ldxr: { + unsigned BitWidth = KnownOne.getBitWidth(); + EVT VT = cast(Op)->getMemoryVT(); + unsigned MemBits = VT.getScalarType().getSizeInBits(); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + return; + } + } + break; + } + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: { + unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); + switch (IntNo) { + default: + break; + case Intrinsic::aarch64_neon_umaxv: + case Intrinsic::aarch64_neon_uminv: { + // Figure out the datatype of the vector operand. The UMINV instruction + // will zero extend the result, so we can mark as known zero all the + // bits larger than the element datatype. 32-bit or larget doesn't need + // this as those are legal types and will be handled by isel directly. + MVT VT = Op.getOperand(1).getValueType().getSimpleVT(); + unsigned BitWidth = KnownZero.getBitWidth(); + if (VT == MVT::v8i8 || VT == MVT::v16i8) { + assert(BitWidth >= 8 && "Unexpected width!"); + APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8); + KnownZero |= Mask; + } else if (VT == MVT::v4i16 || VT == MVT::v8i16) { + assert(BitWidth >= 16 && "Unexpected width!"); + APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16); + KnownZero |= Mask; + } + break; + } break; + } + } + } +} - // exitMBB: - // ... - BB = exitMBB; +MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const { + return MVT::i64; +} - MI->eraseFromParent(); // The instruction is gone now. +unsigned AArch64TargetLowering::getMaximalGlobalOffset() const { + // FIXME: On AArch64, this depends on the type. + // Basically, the addressable offsets are o to 4095 * Ty.getSizeInBytes(). + // and the offset has to be a multiple of the related size in bytes. + return 4095; +} - return BB; +FastISel * +AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const { + return AArch64::createFastISel(funcInfo, libInfo); } -MachineBasicBlock * -AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size) const { - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned oldval = MI->getOperand(2).getReg(); - unsigned newval = MI->getOperand(3).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(4).getImm()); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); - - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const TargetRegisterClass *TRCsp; - TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass; - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, ldrOpc, strOpc); - - MachineFunction *MF = BB->getParent(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; // insert the new blocks after the current block - - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // thisMBB: - // ... - // fallthrough --> loop1MBB - BB->addSuccessor(loop1MBB); - - // loop1MBB: - // ldxr dest, [ptr] - // cmp dest, oldval - // b.ne exitMBB - BB = loop1MBB; - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - - unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl; - MRI.constrainRegClass(dest, TRCsp); - BuildMI(BB, dl, TII->get(CmpOp)) - .addReg(dest).addReg(oldval).addImm(0); - BuildMI(BB, dl, TII->get(AArch64::Bcc)) - .addImm(A64CC::NE).addMBB(exitMBB); - BB->addSuccessor(loop2MBB); - BB->addSuccessor(exitMBB); - - // loop2MBB: - // strex stxr_status, newval, [ptr] - // cbnz stxr_status, loop1MBB - BB = loop2MBB; - unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass); - - BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr); - BuildMI(BB, dl, TII->get(AArch64::CBNZw)) - .addReg(stxr_status).addMBB(loop1MBB); - BB->addSuccessor(loop1MBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; +const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: + return nullptr; + case AArch64ISD::CALL: return "AArch64ISD::CALL"; + case AArch64ISD::ADRP: return "AArch64ISD::ADRP"; + case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow"; + case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot"; + case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG"; + case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND"; + case AArch64ISD::CSEL: return "AArch64ISD::CSEL"; + case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL"; + case AArch64ISD::CSINV: return "AArch64ISD::CSINV"; + case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG"; + case AArch64ISD::CSINC: return "AArch64ISD::CSINC"; + case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; + case AArch64ISD::TLSDESC_CALL: return "AArch64ISD::TLSDESC_CALL"; + case AArch64ISD::ADC: return "AArch64ISD::ADC"; + case AArch64ISD::SBC: return "AArch64ISD::SBC"; + case AArch64ISD::ADDS: return "AArch64ISD::ADDS"; + case AArch64ISD::SUBS: return "AArch64ISD::SUBS"; + case AArch64ISD::ADCS: return "AArch64ISD::ADCS"; + case AArch64ISD::SBCS: return "AArch64ISD::SBCS"; + case AArch64ISD::ANDS: return "AArch64ISD::ANDS"; + case AArch64ISD::FCMP: return "AArch64ISD::FCMP"; + case AArch64ISD::FMIN: return "AArch64ISD::FMIN"; + case AArch64ISD::FMAX: return "AArch64ISD::FMAX"; + case AArch64ISD::DUP: return "AArch64ISD::DUP"; + case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8"; + case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16"; + case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32"; + case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64"; + case AArch64ISD::MOVI: return "AArch64ISD::MOVI"; + case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift"; + case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit"; + case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl"; + case AArch64ISD::FMOV: return "AArch64ISD::FMOV"; + case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift"; + case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl"; + case AArch64ISD::BICi: return "AArch64ISD::BICi"; + case AArch64ISD::ORRi: return "AArch64ISD::ORRi"; + case AArch64ISD::BSL: return "AArch64ISD::BSL"; + case AArch64ISD::NEG: return "AArch64ISD::NEG"; + case AArch64ISD::EXTR: return "AArch64ISD::EXTR"; + case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1"; + case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2"; + case AArch64ISD::UZP1: return "AArch64ISD::UZP1"; + case AArch64ISD::UZP2: return "AArch64ISD::UZP2"; + case AArch64ISD::TRN1: return "AArch64ISD::TRN1"; + case AArch64ISD::TRN2: return "AArch64ISD::TRN2"; + case AArch64ISD::REV16: return "AArch64ISD::REV16"; + case AArch64ISD::REV32: return "AArch64ISD::REV32"; + case AArch64ISD::REV64: return "AArch64ISD::REV64"; + case AArch64ISD::EXT: return "AArch64ISD::EXT"; + case AArch64ISD::VSHL: return "AArch64ISD::VSHL"; + case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR"; + case AArch64ISD::VASHR: return "AArch64ISD::VASHR"; + case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ"; + case AArch64ISD::CMGE: return "AArch64ISD::CMGE"; + case AArch64ISD::CMGT: return "AArch64ISD::CMGT"; + case AArch64ISD::CMHI: return "AArch64ISD::CMHI"; + case AArch64ISD::CMHS: return "AArch64ISD::CMHS"; + case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ"; + case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE"; + case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT"; + case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz"; + case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz"; + case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz"; + case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz"; + case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz"; + case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz"; + case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz"; + case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz"; + case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz"; + case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz"; + case AArch64ISD::NOT: return "AArch64ISD::NOT"; + case AArch64ISD::BIT: return "AArch64ISD::BIT"; + case AArch64ISD::CBZ: return "AArch64ISD::CBZ"; + case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ"; + case AArch64ISD::TBZ: return "AArch64ISD::TBZ"; + case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ"; + case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN"; + case AArch64ISD::SITOF: return "AArch64ISD::SITOF"; + case AArch64ISD::UITOF: return "AArch64ISD::UITOF"; + case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I"; + case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I"; + case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I"; + case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I"; + case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I"; + case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge"; + case AArch64ISD::LD2post: return "AArch64ISD::LD2post"; + case AArch64ISD::LD3post: return "AArch64ISD::LD3post"; + case AArch64ISD::LD4post: return "AArch64ISD::LD4post"; + case AArch64ISD::ST2post: return "AArch64ISD::ST2post"; + case AArch64ISD::ST3post: return "AArch64ISD::ST3post"; + case AArch64ISD::ST4post: return "AArch64ISD::ST4post"; + case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post"; + case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post"; + case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post"; + case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post"; + case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post"; + case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post"; + case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost"; + case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost"; + case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost"; + case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost"; + case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost"; + case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost"; + case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost"; + case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost"; + case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost"; + case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost"; + case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost"; + } } MachineBasicBlock * AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const { - // We materialise the F128CSEL pseudo-instruction using conditional branches - // and loads, giving an instruciton sequence like: - // str q0, [sp] - // b.ne IfTrue - // b Finish - // IfTrue: - // str q1, [sp] - // Finish: - // ldr q0, [sp] - // - // Using virtual registers would probably not be beneficial since COPY - // instructions are expensive for f128 (there's no actual instruction to - // implement them). - // - // An alternative would be to do an integer-CSEL on some address. E.g.: - // mov x0, sp - // add x1, sp, #16 - // str q0, [x0] - // str q1, [x1] - // csel x0, x0, x1, ne - // ldr q0, [x0] - // - // It's unclear which approach is actually optimal. + // We materialise the F128CSEL pseudo-instruction as some control flow and a + // phi node: + + // OrigBB: + // [... previous instrs leading to comparison ...] + // b.ne TrueBB + // b EndBB + // TrueBB: + // ; Fallthrough + // EndBB: + // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB] + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); MachineFunction *MF = MBB->getParent(); const BasicBlock *LLVM_BB = MBB->getBasicBlock(); @@ -906,49 +792,24 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI, MBB->end()); EndBB->transferSuccessorsAndUpdatePHIs(MBB); - // We need somewhere to store the f128 value needed. - int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16); - - // [... start of incoming MBB ...] - // str qIFFALSE, [sp] - // b.cc IfTrue - // b Done - BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR)) - .addReg(IfFalseReg) - .addFrameIndex(ScratchFI) - .addImm(0); - BuildMI(MBB, DL, TII->get(AArch64::Bcc)) - .addImm(CondCode) - .addMBB(TrueBB); - BuildMI(MBB, DL, TII->get(AArch64::Bimm)) - .addMBB(EndBB); + BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB); + BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB); MBB->addSuccessor(TrueBB); MBB->addSuccessor(EndBB); + // TrueBB falls through to the end. + TrueBB->addSuccessor(EndBB); + if (!NZCVKilled) { - // NZCV is live-through TrueBB. TrueBB->addLiveIn(AArch64::NZCV); EndBB->addLiveIn(AArch64::NZCV); } - // IfTrue: - // str qIFTRUE, [sp] - BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR)) - .addReg(IfTrueReg) - .addFrameIndex(ScratchFI) - .addImm(0); - - // Note: fallthrough. We can rely on LLVM adding a branch if it reorders the - // blocks. - TrueBB->addSuccessor(EndBB); - - // Done: - // ldr qDEST, [sp] - // [... rest of incoming MBB ...] - MachineInstr *StartOfEnd = EndBB->begin(); - BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg) - .addFrameIndex(ScratchFI) - .addImm(0); + BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg) + .addReg(IfTrueReg) + .addMBB(TrueBB) + .addReg(IfFalseReg) + .addMBB(MBB); MI->eraseFromParent(); return EndBB; @@ -956,853 +817,1140 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI, MachineBasicBlock * AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const { + MachineBasicBlock *BB) const { switch (MI->getOpcode()) { - default: llvm_unreachable("Unhandled instruction with custom inserter"); - case AArch64::F128CSEL: - return EmitF128CSEL(MI, MBB); - case AArch64::ATOMIC_LOAD_ADD_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl); - case AArch64::ATOMIC_LOAD_ADD_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl); - case AArch64::ATOMIC_LOAD_ADD_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl); - case AArch64::ATOMIC_LOAD_ADD_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl); - - case AArch64::ATOMIC_LOAD_SUB_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl); - case AArch64::ATOMIC_LOAD_SUB_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl); - case AArch64::ATOMIC_LOAD_SUB_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl); - case AArch64::ATOMIC_LOAD_SUB_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl); - - case AArch64::ATOMIC_LOAD_AND_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl); - case AArch64::ATOMIC_LOAD_AND_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl); - case AArch64::ATOMIC_LOAD_AND_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl); - case AArch64::ATOMIC_LOAD_AND_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl); - - case AArch64::ATOMIC_LOAD_OR_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl); - case AArch64::ATOMIC_LOAD_OR_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl); - case AArch64::ATOMIC_LOAD_OR_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl); - case AArch64::ATOMIC_LOAD_OR_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl); - - case AArch64::ATOMIC_LOAD_XOR_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl); - case AArch64::ATOMIC_LOAD_XOR_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl); - case AArch64::ATOMIC_LOAD_XOR_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl); - case AArch64::ATOMIC_LOAD_XOR_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl); - - case AArch64::ATOMIC_LOAD_NAND_I8: - return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl); - case AArch64::ATOMIC_LOAD_NAND_I16: - return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl); - case AArch64::ATOMIC_LOAD_NAND_I32: - return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl); - case AArch64::ATOMIC_LOAD_NAND_I64: - return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl); - - case AArch64::ATOMIC_LOAD_MIN_I8: - return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT); - case AArch64::ATOMIC_LOAD_MIN_I16: - return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT); - case AArch64::ATOMIC_LOAD_MIN_I32: - return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT); - case AArch64::ATOMIC_LOAD_MIN_I64: - return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT); - - case AArch64::ATOMIC_LOAD_MAX_I8: - return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT); - case AArch64::ATOMIC_LOAD_MAX_I16: - return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT); - case AArch64::ATOMIC_LOAD_MAX_I32: - return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT); - case AArch64::ATOMIC_LOAD_MAX_I64: - return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT); - - case AArch64::ATOMIC_LOAD_UMIN_I8: - return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI); - case AArch64::ATOMIC_LOAD_UMIN_I16: - return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI); - case AArch64::ATOMIC_LOAD_UMIN_I32: - return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI); - case AArch64::ATOMIC_LOAD_UMIN_I64: - return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI); - - case AArch64::ATOMIC_LOAD_UMAX_I8: - return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO); - case AArch64::ATOMIC_LOAD_UMAX_I16: - return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO); - case AArch64::ATOMIC_LOAD_UMAX_I32: - return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO); - case AArch64::ATOMIC_LOAD_UMAX_I64: - return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO); - - case AArch64::ATOMIC_SWAP_I8: - return emitAtomicBinary(MI, MBB, 1, 0); - case AArch64::ATOMIC_SWAP_I16: - return emitAtomicBinary(MI, MBB, 2, 0); - case AArch64::ATOMIC_SWAP_I32: - return emitAtomicBinary(MI, MBB, 4, 0); - case AArch64::ATOMIC_SWAP_I64: - return emitAtomicBinary(MI, MBB, 8, 0); - - case AArch64::ATOMIC_CMP_SWAP_I8: - return emitAtomicCmpSwap(MI, MBB, 1); - case AArch64::ATOMIC_CMP_SWAP_I16: - return emitAtomicCmpSwap(MI, MBB, 2); - case AArch64::ATOMIC_CMP_SWAP_I32: - return emitAtomicCmpSwap(MI, MBB, 4); - case AArch64::ATOMIC_CMP_SWAP_I64: - return emitAtomicCmpSwap(MI, MBB, 8); - } -} + default: +#ifndef NDEBUG + MI->dump(); +#endif + assert(0 && "Unexpected instruction for custom inserter!"); + break; + case AArch64::F128CSEL: + return EmitF128CSEL(MI, BB); -const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - case AArch64ISD::BR_CC: return "AArch64ISD::BR_CC"; - case AArch64ISD::Call: return "AArch64ISD::Call"; - case AArch64ISD::FPMOV: return "AArch64ISD::FPMOV"; - case AArch64ISD::GOTLoad: return "AArch64ISD::GOTLoad"; - case AArch64ISD::BFI: return "AArch64ISD::BFI"; - case AArch64ISD::EXTR: return "AArch64ISD::EXTR"; - case AArch64ISD::Ret: return "AArch64ISD::Ret"; - case AArch64ISD::SBFX: return "AArch64ISD::SBFX"; - case AArch64ISD::SELECT_CC: return "AArch64ISD::SELECT_CC"; - case AArch64ISD::SETCC: return "AArch64ISD::SETCC"; - case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN"; - case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER"; - case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL"; - case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge"; - case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall"; - - case AArch64ISD::NEON_MOVIMM: - return "AArch64ISD::NEON_MOVIMM"; - case AArch64ISD::NEON_MVNIMM: - return "AArch64ISD::NEON_MVNIMM"; - case AArch64ISD::NEON_FMOVIMM: - return "AArch64ISD::NEON_FMOVIMM"; - case AArch64ISD::NEON_CMP: - return "AArch64ISD::NEON_CMP"; - case AArch64ISD::NEON_CMPZ: - return "AArch64ISD::NEON_CMPZ"; - case AArch64ISD::NEON_TST: - return "AArch64ISD::NEON_TST"; - case AArch64ISD::NEON_QSHLs: - return "AArch64ISD::NEON_QSHLs"; - case AArch64ISD::NEON_QSHLu: - return "AArch64ISD::NEON_QSHLu"; - case AArch64ISD::NEON_VDUP: - return "AArch64ISD::NEON_VDUP"; - case AArch64ISD::NEON_VDUPLANE: - return "AArch64ISD::NEON_VDUPLANE"; - case AArch64ISD::NEON_REV16: - return "AArch64ISD::NEON_REV16"; - case AArch64ISD::NEON_REV32: - return "AArch64ISD::NEON_REV32"; - case AArch64ISD::NEON_REV64: - return "AArch64ISD::NEON_REV64"; - case AArch64ISD::NEON_UZP1: - return "AArch64ISD::NEON_UZP1"; - case AArch64ISD::NEON_UZP2: - return "AArch64ISD::NEON_UZP2"; - case AArch64ISD::NEON_ZIP1: - return "AArch64ISD::NEON_ZIP1"; - case AArch64ISD::NEON_ZIP2: - return "AArch64ISD::NEON_ZIP2"; - case AArch64ISD::NEON_TRN1: - return "AArch64ISD::NEON_TRN1"; - case AArch64ISD::NEON_TRN2: - return "AArch64ISD::NEON_TRN2"; - case AArch64ISD::NEON_LD1_UPD: - return "AArch64ISD::NEON_LD1_UPD"; - case AArch64ISD::NEON_LD2_UPD: - return "AArch64ISD::NEON_LD2_UPD"; - case AArch64ISD::NEON_LD3_UPD: - return "AArch64ISD::NEON_LD3_UPD"; - case AArch64ISD::NEON_LD4_UPD: - return "AArch64ISD::NEON_LD4_UPD"; - case AArch64ISD::NEON_ST1_UPD: - return "AArch64ISD::NEON_ST1_UPD"; - case AArch64ISD::NEON_ST2_UPD: - return "AArch64ISD::NEON_ST2_UPD"; - case AArch64ISD::NEON_ST3_UPD: - return "AArch64ISD::NEON_ST3_UPD"; - case AArch64ISD::NEON_ST4_UPD: - return "AArch64ISD::NEON_ST4_UPD"; - case AArch64ISD::NEON_LD1x2_UPD: - return "AArch64ISD::NEON_LD1x2_UPD"; - case AArch64ISD::NEON_LD1x3_UPD: - return "AArch64ISD::NEON_LD1x3_UPD"; - case AArch64ISD::NEON_LD1x4_UPD: - return "AArch64ISD::NEON_LD1x4_UPD"; - case AArch64ISD::NEON_ST1x2_UPD: - return "AArch64ISD::NEON_ST1x2_UPD"; - case AArch64ISD::NEON_ST1x3_UPD: - return "AArch64ISD::NEON_ST1x3_UPD"; - case AArch64ISD::NEON_ST1x4_UPD: - return "AArch64ISD::NEON_ST1x4_UPD"; - case AArch64ISD::NEON_LD2DUP: - return "AArch64ISD::NEON_LD2DUP"; - case AArch64ISD::NEON_LD3DUP: - return "AArch64ISD::NEON_LD3DUP"; - case AArch64ISD::NEON_LD4DUP: - return "AArch64ISD::NEON_LD4DUP"; - case AArch64ISD::NEON_LD2DUP_UPD: - return "AArch64ISD::NEON_LD2DUP_UPD"; - case AArch64ISD::NEON_LD3DUP_UPD: - return "AArch64ISD::NEON_LD3DUP_UPD"; - case AArch64ISD::NEON_LD4DUP_UPD: - return "AArch64ISD::NEON_LD4DUP_UPD"; - case AArch64ISD::NEON_LD2LN_UPD: - return "AArch64ISD::NEON_LD2LN_UPD"; - case AArch64ISD::NEON_LD3LN_UPD: - return "AArch64ISD::NEON_LD3LN_UPD"; - case AArch64ISD::NEON_LD4LN_UPD: - return "AArch64ISD::NEON_LD4LN_UPD"; - case AArch64ISD::NEON_ST2LN_UPD: - return "AArch64ISD::NEON_ST2LN_UPD"; - case AArch64ISD::NEON_ST3LN_UPD: - return "AArch64ISD::NEON_ST3LN_UPD"; - case AArch64ISD::NEON_ST4LN_UPD: - return "AArch64ISD::NEON_ST4LN_UPD"; - case AArch64ISD::NEON_VEXTRACT: - return "AArch64ISD::NEON_VEXTRACT"; - default: - return NULL; + case TargetOpcode::STACKMAP: + case TargetOpcode::PATCHPOINT: + return emitPatchPoint(MI, BB); } + llvm_unreachable("Unexpected instruction for custom inserter!"); } -static const uint16_t AArch64FPRArgRegs[] = { - AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, - AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7 -}; -static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs); - -static const uint16_t AArch64ArgRegs[] = { - AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, - AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7 -}; -static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs); +//===----------------------------------------------------------------------===// +// AArch64 Lowering private implementation. +//===----------------------------------------------------------------------===// -static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State) { - // Mark all remaining general purpose registers as allocated. We don't - // backtrack: if (for example) an i128 gets put on the stack, no subsequent - // i64 will go in registers (C.11). - for (unsigned i = 0; i < NumArgRegs; ++i) - State.AllocateReg(AArch64ArgRegs[i]); +//===----------------------------------------------------------------------===// +// Lowering Code +//===----------------------------------------------------------------------===// - return false; +/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64 +/// CC +static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) { + switch (CC) { + default: + llvm_unreachable("Unknown condition code!"); + case ISD::SETNE: + return AArch64CC::NE; + case ISD::SETEQ: + return AArch64CC::EQ; + case ISD::SETGT: + return AArch64CC::GT; + case ISD::SETGE: + return AArch64CC::GE; + case ISD::SETLT: + return AArch64CC::LT; + case ISD::SETLE: + return AArch64CC::LE; + case ISD::SETUGT: + return AArch64CC::HI; + case ISD::SETUGE: + return AArch64CC::HS; + case ISD::SETULT: + return AArch64CC::LO; + case ISD::SETULE: + return AArch64CC::LS; + } } -#include "AArch64GenCallingConv.inc" - -CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { - - switch(CC) { - default: llvm_unreachable("Unsupported calling convention"); - case CallingConv::Fast: - case CallingConv::C: - return CC_A64_APCS; +/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC. +static void changeFPCCToAArch64CC(ISD::CondCode CC, + AArch64CC::CondCode &CondCode, + AArch64CC::CondCode &CondCode2) { + CondCode2 = AArch64CC::AL; + switch (CC) { + default: + llvm_unreachable("Unknown FP condition!"); + case ISD::SETEQ: + case ISD::SETOEQ: + CondCode = AArch64CC::EQ; + break; + case ISD::SETGT: + case ISD::SETOGT: + CondCode = AArch64CC::GT; + break; + case ISD::SETGE: + case ISD::SETOGE: + CondCode = AArch64CC::GE; + break; + case ISD::SETOLT: + CondCode = AArch64CC::MI; + break; + case ISD::SETOLE: + CondCode = AArch64CC::LS; + break; + case ISD::SETONE: + CondCode = AArch64CC::MI; + CondCode2 = AArch64CC::GT; + break; + case ISD::SETO: + CondCode = AArch64CC::VC; + break; + case ISD::SETUO: + CondCode = AArch64CC::VS; + break; + case ISD::SETUEQ: + CondCode = AArch64CC::EQ; + CondCode2 = AArch64CC::VS; + break; + case ISD::SETUGT: + CondCode = AArch64CC::HI; + break; + case ISD::SETUGE: + CondCode = AArch64CC::PL; + break; + case ISD::SETLT: + case ISD::SETULT: + CondCode = AArch64CC::LT; + break; + case ISD::SETLE: + case ISD::SETULE: + CondCode = AArch64CC::LE; + break; + case ISD::SETNE: + case ISD::SETUNE: + CondCode = AArch64CC::NE; + break; } } -void -AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, - SDLoc DL, SDValue &Chain) const { - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - AArch64MachineFunctionInfo *FuncInfo - = MF.getInfo(); - - SmallVector MemOps; +/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 +/// CC usable with the vector instructions. Fewer operations are available +/// without a real NZCV register, so we have to use less efficient combinations +/// to get the same effect. +static void changeVectorFPCCToAArch64CC(ISD::CondCode CC, + AArch64CC::CondCode &CondCode, + AArch64CC::CondCode &CondCode2, + bool &Invert) { + Invert = false; + switch (CC) { + default: + // Mostly the scalar mappings work fine. + changeFPCCToAArch64CC(CC, CondCode, CondCode2); + break; + case ISD::SETUO: + Invert = true; // Fallthrough + case ISD::SETO: + CondCode = AArch64CC::MI; + CondCode2 = AArch64CC::GE; + break; + case ISD::SETUEQ: + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUGT: + case ISD::SETUGE: + // All of the compare-mask comparisons are ordered, but we can switch + // between the two by a double inversion. E.g. ULE == !OGT. + Invert = true; + changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2); + break; + } +} - unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs, - NumArgRegs); - unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs, - NumFPRArgRegs); +static bool isLegalArithImmed(uint64_t C) { + // Matches AArch64DAGToDAGISel::SelectArithImmed(). + return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0); +} - unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR); - int GPRIdx = 0; - if (GPRSaveSize != 0) { - GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false); +static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, + SDLoc dl, SelectionDAG &DAG) { + EVT VT = LHS.getValueType(); + + if (VT.isFloatingPoint()) + return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS); + + // The CMP instruction is just an alias for SUBS, and representing it as + // SUBS means that it's possible to get CSE with subtract operations. + // A later phase can perform the optimization of setting the destination + // register to WZR/XZR if it ends up being unused. + unsigned Opcode = AArch64ISD::SUBS; + + if (RHS.getOpcode() == ISD::SUB && isa(RHS.getOperand(0)) && + cast(RHS.getOperand(0))->getZExtValue() == 0 && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on + // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags + // can be set differently by this operation. It comes down to whether + // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then + // everything is fine. If not then the optimization is wrong. Thus general + // comparisons are only valid if op2 != 0. + + // So, finally, the only LLVM-native comparisons that don't mention C and V + // are SETEQ and SETNE. They're the only ones we can safely use CMN for in + // the absence of information about op2. + Opcode = AArch64ISD::ADDS; + RHS = RHS.getOperand(1); + } else if (LHS.getOpcode() == ISD::AND && isa(RHS) && + cast(RHS)->getZExtValue() == 0 && + !isUnsignedIntSetCC(CC)) { + // Similarly, (CMP (and X, Y), 0) can be implemented with a TST + // (a.k.a. ANDS) except that the flags are only guaranteed to work for one + // of the signed comparisons. + Opcode = AArch64ISD::ANDS; + RHS = LHS.getOperand(1); + LHS = LHS.getOperand(0); + } - SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy()); + return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS) + .getValue(1); +} - for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) { - unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass); - SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); - SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(i * 8), - false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, - DAG.getConstant(8, getPointerTy())); +static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, + SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) { + if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { + EVT VT = RHS.getValueType(); + uint64_t C = RHSC->getZExtValue(); + if (!isLegalArithImmed(C)) { + // Constant does not fit, try adjusting it by one? + switch (CC) { + default: + break; + case ISD::SETLT: + case ISD::SETGE: + if ((VT == MVT::i32 && C != 0x80000000 && + isLegalArithImmed((uint32_t)(C - 1))) || + (VT == MVT::i64 && C != 0x80000000ULL && + isLegalArithImmed(C - 1ULL))) { + CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; + C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; + RHS = DAG.getConstant(C, VT); + } + break; + case ISD::SETULT: + case ISD::SETUGE: + if ((VT == MVT::i32 && C != 0 && + isLegalArithImmed((uint32_t)(C - 1))) || + (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) { + CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; + C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; + RHS = DAG.getConstant(C, VT); + } + break; + case ISD::SETLE: + case ISD::SETGT: + if ((VT == MVT::i32 && C != 0x7fffffff && + isLegalArithImmed((uint32_t)(C + 1))) || + (VT == MVT::i64 && C != 0x7ffffffffffffffULL && + isLegalArithImmed(C + 1ULL))) { + CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; + C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; + RHS = DAG.getConstant(C, VT); + } + break; + case ISD::SETULE: + case ISD::SETUGT: + if ((VT == MVT::i32 && C != 0xffffffff && + isLegalArithImmed((uint32_t)(C + 1))) || + (VT == MVT::i64 && C != 0xfffffffffffffffULL && + isLegalArithImmed(C + 1ULL))) { + CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; + C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; + RHS = DAG.getConstant(C, VT); + } + break; + } } } - if (getSubtarget()->hasFPARMv8()) { - unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); - int FPRIdx = 0; - // According to the AArch64 Procedure Call Standard, section B.1/B.3, we - // can omit a register save area if we know we'll never use registers of - // that class. - if (FPRSaveSize != 0) { - FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false); - - SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); + SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); + AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC); + AArch64cc = DAG.getConstant(AArch64CC, MVT::i32); + return Cmp; +} - for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { - unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i], - &AArch64::FPR128RegClass); - SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128); - SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(i * 16), - false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, - DAG.getConstant(16, getPointerTy())); +static std::pair +getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { + assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && + "Unsupported value type"); + SDValue Value, Overflow; + SDLoc DL(Op); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + unsigned Opc = 0; + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown overflow instruction!"); + case ISD::SADDO: + Opc = AArch64ISD::ADDS; + CC = AArch64CC::VS; + break; + case ISD::UADDO: + Opc = AArch64ISD::ADDS; + CC = AArch64CC::HS; + break; + case ISD::SSUBO: + Opc = AArch64ISD::SUBS; + CC = AArch64CC::VS; + break; + case ISD::USUBO: + Opc = AArch64ISD::SUBS; + CC = AArch64CC::LO; + break; + // Multiply needs a little bit extra work. + case ISD::SMULO: + case ISD::UMULO: { + CC = AArch64CC::NE; + bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false; + if (Op.getValueType() == MVT::i32) { + unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + // For a 32 bit multiply with overflow check we want the instruction + // selector to generate a widening multiply (SMADDL/UMADDL). For that we + // need to generate the following pattern: + // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b)) + LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS); + RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS); + SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); + SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul, + DAG.getConstant(0, MVT::i64)); + // On AArch64 the upper 32 bits are always zero extended for a 32 bit + // operation. We need to clear out the upper 32 bits, because we used a + // widening multiply that wrote all 64 bits. In the end this should be a + // noop. + Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add); + if (IsSigned) { + // The signed overflow check requires more than just a simple check for + // any bit set in the upper 32 bits of the result. These bits could be + // just the sign bits of a negative number. To perform the overflow + // check we have to arithmetic shift right the 32nd bit of the result by + // 31 bits. Then we compare the result to the upper 32 bits. + SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add, + DAG.getConstant(32, MVT::i64)); + UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits); + SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value, + DAG.getConstant(31, MVT::i64)); + // It is important that LowerBits is last, otherwise the arithmetic + // shift will not be folded into the compare (SUBS). + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32); + Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits) + .getValue(1); + } else { + // The overflow check for unsigned multiply is easy. We only need to + // check if any of the upper 32 bits are set. This can be done with a + // CMP (shifted register). For that we need to generate the following + // pattern: + // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32) + SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, + DAG.getConstant(32, MVT::i64)); + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); + Overflow = + DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64), + UpperBits).getValue(1); } + break; + } + assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type"); + // For the 64 bit multiply + Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); + if (IsSigned) { + SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS); + SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value, + DAG.getConstant(63, MVT::i64)); + // It is important that LowerBits is last, otherwise the arithmetic + // shift will not be folded into the compare (SUBS). + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); + Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits) + .getValue(1); + } else { + SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS); + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); + Overflow = + DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64), + UpperBits).getValue(1); } - FuncInfo->setVariadicFPRIdx(FPRIdx); - FuncInfo->setVariadicFPRSize(FPRSaveSize); + break; } + } // switch (...) - unsigned StackOffset = RoundUpToAlignment(CCInfo.getNextStackOffset(), 8); - int StackIdx = MFI->CreateFixedObject(8, StackOffset, true); + if (Opc) { + SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32); - FuncInfo->setVariadicStackIdx(StackIdx); - FuncInfo->setVariadicGPRIdx(GPRIdx); - FuncInfo->setVariadicGPRSize(GPRSaveSize); - - if (!MemOps.empty()) { - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], - MemOps.size()); + // Emit the AArch64 operation with overflow check. + Value = DAG.getNode(Opc, DL, VTs, LHS, RHS); + Overflow = Value.getValue(1); } + return std::make_pair(Value, Overflow); } +SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, + RTLIB::Libcall Call) const { + SmallVector Ops; + for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) + Ops.push_back(Op.getOperand(i)); -SDValue -AArch64TargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { - MachineFunction &MF = DAG.getMachineFunction(); - AArch64MachineFunctionInfo *FuncInfo - = MF.getInfo(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; + return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false, + SDLoc(Op)).first; +} - SmallVector ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv)); +static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) { + SDValue Sel = Op.getOperand(0); + SDValue Other = Op.getOperand(1); - SmallVector ArgValues; + // If neither operand is a SELECT_CC, give up. + if (Sel.getOpcode() != ISD::SELECT_CC) + std::swap(Sel, Other); + if (Sel.getOpcode() != ISD::SELECT_CC) + return Op; - SDValue ArgValue; - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - ISD::ArgFlagsTy Flags = Ins[i].Flags; + // The folding we want to perform is: + // (xor x, (select_cc a, b, cc, 0, -1) ) + // --> + // (csel x, (xor x, -1), cc ...) + // + // The latter will get matched to a CSINV instruction. - if (Flags.isByVal()) { - // Byval is used for small structs and HFAs in the PCS, but the system - // should work in a non-compliant manner for larger structs. - EVT PtrTy = getPointerTy(); - int Size = Flags.getByValSize(); - unsigned NumRegs = (Size + 7) / 8; + ISD::CondCode CC = cast(Sel.getOperand(4))->get(); + SDValue LHS = Sel.getOperand(0); + SDValue RHS = Sel.getOperand(1); + SDValue TVal = Sel.getOperand(2); + SDValue FVal = Sel.getOperand(3); + SDLoc dl(Sel); - uint32_t BEAlign = 0; - if (Size < 8 && !getSubtarget()->isLittle()) - BEAlign = 8-Size; - unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs, - VA.getLocMemOffset() + BEAlign, - false); - SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy); - InVals.push_back(FrameIdxN); + // FIXME: This could be generalized to non-integer comparisons. + if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64) + return Op; - continue; - } else if (VA.isRegLoc()) { - MVT RegVT = VA.getLocVT(); - const TargetRegisterClass *RC = getRegClassFor(RegVT); - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + ConstantSDNode *CFVal = dyn_cast(FVal); + ConstantSDNode *CTVal = dyn_cast(TVal); - ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); - } else { // VA.isRegLoc() - assert(VA.isMemLoc()); + // The the values aren't constants, this isn't the pattern we're looking for. + if (!CFVal || !CTVal) + return Op; - int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, - VA.getLocMemOffset(), true); + // We can commute the SELECT_CC by inverting the condition. This + // might be needed to make this fit into a CSINV pattern. + if (CTVal->isAllOnesValue() && CFVal->isNullValue()) { + std::swap(TVal, FVal); + std::swap(CTVal, CFVal); + CC = ISD::getSetCCInverse(CC, true); + } - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, false, 0); + // If the constants line up, perform the transform! + if (CTVal->isNullValue() && CFVal->isAllOnesValue()) { + SDValue CCVal; + SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); + FVal = Other; + TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other, + DAG.getConstant(-1ULL, Other.getValueType())); - } + return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal, + CCVal, Cmp); + } - switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: break; - case CCValAssign::BCvt: - ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue); - break; - case CCValAssign::SExt: - case CCValAssign::ZExt: - case CCValAssign::AExt: - case CCValAssign::FPExt: { - unsigned DestSize = VA.getValVT().getSizeInBits(); - unsigned DestSubReg; - - switch (DestSize) { - case 8: DestSubReg = AArch64::sub_8; break; - case 16: DestSubReg = AArch64::sub_16; break; - case 32: DestSubReg = AArch64::sub_32; break; - case 64: DestSubReg = AArch64::sub_64; break; - default: llvm_unreachable("Unexpected argument promotion"); - } + return Op; +} - ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, - VA.getValVT(), ArgValue, - DAG.getTargetConstant(DestSubReg, MVT::i32)), - 0); - break; - } - } +static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); - InVals.push_back(ArgValue); + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + + unsigned Opc; + bool ExtraOp = false; + switch (Op.getOpcode()) { + default: + assert(0 && "Invalid code"); + case ISD::ADDC: + Opc = AArch64ISD::ADDS; + break; + case ISD::SUBC: + Opc = AArch64ISD::SUBS; + break; + case ISD::ADDE: + Opc = AArch64ISD::ADCS; + ExtraOp = true; + break; + case ISD::SUBE: + Opc = AArch64ISD::SBCS; + ExtraOp = true; + break; } - if (isVarArg) - SaveVarArgRegisters(CCInfo, DAG, dl, Chain); + if (!ExtraOp) + return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1)); + return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1), + Op.getOperand(2)); +} + +static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) + return SDValue(); - unsigned StackArgSize = CCInfo.getNextStackOffset(); - if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) { - // This is a non-standard ABI so by fiat I say we're allowed to make full - // use of the stack area to be popped, which must be aligned to 16 bytes in - // any case: - StackArgSize = RoundUpToAlignment(StackArgSize, 16); + AArch64CC::CondCode CC; + // The actual operation that sets the overflow or carry flag. + SDValue Value, Overflow; + std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG); - // If we're expected to restore the stack (e.g. fastcc) then we'll be adding - // a multiple of 16. - FuncInfo->setArgumentStackToRestore(StackArgSize); + // We use 0 and 1 as false and true values. + SDValue TVal = DAG.getConstant(1, MVT::i32); + SDValue FVal = DAG.getConstant(0, MVT::i32); - // This realignment carries over to the available bytes below. Our own - // callers will guarantee the space is free by giving an aligned value to - // CALLSEQ_START. - } - // Even if we're not expected to free up the space, it's useful to know how - // much is there while considering tail calls (because we can reuse it). - FuncInfo->setBytesInStackArgArea(StackArgSize); + // We use an inverted condition, because the conditional select is inverted + // too. This will allow it to be selected to a single instruction: + // CSINC Wd, WZR, WZR, invert(cond). + SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), MVT::i32); + Overflow = DAG.getNode(AArch64ISD::CSEL, SDLoc(Op), MVT::i32, FVal, TVal, + CCVal, Overflow); - return Chain; + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow); } -SDValue -AArch64TargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - SDLoc dl, SelectionDAG &DAG) const { - // CCValAssign - represent the assignment of the return value to a location. - SmallVector RVLocs; +// Prefetch operands are: +// 1: Address to prefetch +// 2: bool isWrite +// 3: int locality (0 = no locality ... 3 = extreme locality) +// 4: bool isDataCache +static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + unsigned IsWrite = cast(Op.getOperand(2))->getZExtValue(); + unsigned Locality = cast(Op.getOperand(3))->getZExtValue(); + // The data thing is not used. + // unsigned isData = cast(Op.getOperand(4))->getZExtValue(); + + bool IsStream = !Locality; + // When the locality number is set + if (Locality) { + // The front-end should have filtered out the out-of-range values + assert(Locality <= 3 && "Prefetch locality out-of-range"); + // The locality degree is the opposite of the cache speed. + // Put the number the other way around. + // The encoding starts at 0 for level 1 + Locality = 3 - Locality; + } - // CCState - Info about the registers and stack slots. - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + // built the mask value encoding the expected behavior. + unsigned PrfOp = (IsWrite << 4) | // Load/Store bit + (Locality << 1) | // Cache level bits + (unsigned)IsStream; // Stream bit + return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0), + DAG.getConstant(PrfOp, MVT::i32), Op.getOperand(1)); +} - // Analyze outgoing return values. - CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv)); +SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); - SDValue Flag; - SmallVector RetOps(1, Chain); + RTLIB::Libcall LC; + LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); - for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { - // PCS: "If the type, T, of the result of a function is such that - // void func(T arg) would require that arg be passed as a value in a - // register (or set of registers) according to the rules in 5.4, then the - // result is returned in the same registers as would be used for such an - // argument. - // - // Otherwise, the caller shall reserve a block of memory of sufficient - // size and alignment to hold the result. The address of the memory block - // shall be passed as an additional argument to the function in x8." - // - // This is implemented in two places. The register-return values are dealt - // with here, more complex returns are passed as an sret parameter, which - // means we don't have to worry about it during actual return. - CCValAssign &VA = RVLocs[i]; - assert(VA.isRegLoc() && "Only register-returns should be created by PCS"); + return LowerF128Call(Op, DAG, LC); +} +SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op, + SelectionDAG &DAG) const { + if (Op.getOperand(0).getValueType() != MVT::f128) { + // It's legal except when f128 is involved + return Op; + } - SDValue Arg = OutVals[i]; + RTLIB::Libcall LC; + LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); - // There's no convenient note in the ABI about this as there is for normal - // arguments, but it says return values are passed in the same registers as - // an argument would be. I believe that includes the comments about - // unspecified higher bits, putting the burden of widening on the *caller* - // for return values. - switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info"); - case CCValAssign::Full: break; - case CCValAssign::SExt: - case CCValAssign::ZExt: - case CCValAssign::AExt: - // Floating-point values should only be extended when they're going into - // memory, which can't happen here so an integer extend is acceptable. - Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); - break; - case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); - break; - } + // FP_ROUND node has a second operand indicating whether it is known to be + // precise. That doesn't take part in the LibCall so we can't directly use + // LowerF128Call. + SDValue SrcVal = Op.getOperand(0); + return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, + /*isSigned*/ false, SDLoc(Op)).first; +} - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); - Flag = Chain.getValue(1); - RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); - } +static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { + // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp. + // Any additional optimization in this function should be recorded + // in the cost tables. + EVT InVT = Op.getOperand(0).getValueType(); + EVT VT = Op.getValueType(); - RetOps[0] = Chain; // Update chain. + // FP_TO_XINT conversion from the same type are legal. + if (VT.getSizeInBits() == InVT.getSizeInBits()) + return Op; - // Add the flag if we have it. - if (Flag.getNode()) - RetOps.push_back(Flag); + if (InVT == MVT::v2f64 || InVT == MVT::v4f32) { + SDLoc dl(Op); + SDValue Cv = + DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(), + Op.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv); + } else if (InVT == MVT::v2f32) { + SDLoc dl(Op); + SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, Op.getOperand(0)); + return DAG.getNode(Op.getOpcode(), dl, VT, Ext); + } - return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other, - &RetOps[0], RetOps.size()); + // Type changing conversions are illegal. + return SDValue(); } -unsigned AArch64TargetLowering::getByValTypeAlignment(Type *Ty) const { - // This is a new backend. For anything more precise than this a FE should - // set an explicit alignment. - return 4; -} +SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, + SelectionDAG &DAG) const { + if (Op.getOperand(0).getValueType().isVector()) + return LowerVectorFP_TO_INT(Op, DAG); -SDValue -AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const { - SelectionDAG &DAG = CLI.DAG; - SDLoc &dl = CLI.DL; - SmallVectorImpl &Outs = CLI.Outs; - SmallVectorImpl &OutVals = CLI.OutVals; - SmallVectorImpl &Ins = CLI.Ins; - SDValue Chain = CLI.Chain; - SDValue Callee = CLI.Callee; - bool &IsTailCall = CLI.IsTailCall; - CallingConv::ID CallConv = CLI.CallConv; - bool IsVarArg = CLI.IsVarArg; + if (Op.getOperand(0).getValueType() != MVT::f128) { + // It's legal except when f128 is involved + return Op; + } - MachineFunction &MF = DAG.getMachineFunction(); - AArch64MachineFunctionInfo *FuncInfo - = MF.getInfo(); - bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; - bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet(); - bool IsSibCall = false; + RTLIB::Libcall LC; + if (Op.getOpcode() == ISD::FP_TO_SINT) + LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType()); + else + LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); - if (IsTailCall) { - IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), - Outs, OutVals, Ins, DAG); + SmallVector Ops; + for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) + Ops.push_back(Op.getOperand(i)); - // A sibling call is one where we're under the usual C ABI and not planning - // to change that but can still do a tail call: - if (!TailCallOpt && IsTailCall) - IsSibCall = true; - } + return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false, + SDLoc(Op)).first; +} - SmallVector ArgLocs; - CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); +static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { + // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp. + // Any additional optimization in this function should be recorded + // in the cost tables. + EVT VT = Op.getValueType(); + SDLoc dl(Op); + SDValue In = Op.getOperand(0); + EVT InVT = In.getValueType(); - // On AArch64 (and all other architectures I'm aware of) the most this has to - // do is adjust the stack pointer. - unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16); - if (IsSibCall) { - // Since we're not changing the ABI to make this a tail call, the memory - // operands are already available in the caller's incoming argument space. - NumBytes = 0; + // v2i32 to v2f32 is legal. + if (VT == MVT::v2f32 && InVT == MVT::v2i32) + return Op; + + // This function only handles v2f64 outputs. + if (VT == MVT::v2f64) { + // Extend the input argument to a v2i64 that we can feed into the + // floating point conversion. Zero or sign extend based on whether + // we're doing a signed or unsigned float conversion. + unsigned Opc = + Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; + assert(Op.getNumOperands() == 1 && "FP conversions take one argument"); + SDValue Promoted = DAG.getNode(Opc, dl, MVT::v2i64, Op.getOperand(0)); + return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Promoted); } - // FPDiff is the byte offset of the call's argument area from the callee's. - // Stores to callee stack arguments will be placed in FixedStackSlots offset - // by this amount for a tail call. In a sibling call it must be 0 because the - // caller will deallocate the entire stack and the callee still expects its - // arguments to begin at SP+0. Completely unused for non-tail calls. - int FPDiff = 0; + // Scalarize v2i64 to v2f32 conversions. + std::vector BuildVectorOps; + for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { + SDValue Sclr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, In, + DAG.getConstant(i, MVT::i64)); + Sclr = DAG.getNode(Op->getOpcode(), dl, MVT::f32, Sclr); + BuildVectorOps.push_back(Sclr); + } - if (IsTailCall && !IsSibCall) { - unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BuildVectorOps); +} - // FPDiff will be negative if this tail call requires more space than we - // would automatically have in our incoming argument space. Positive if we - // can actually shrink the stack. - FPDiff = NumReusableBytes - NumBytes; +SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { + if (Op.getValueType().isVector()) + return LowerVectorINT_TO_FP(Op, DAG); - // The stack pointer must be 16-byte aligned at all times it's used for a - // memory operation, which in practice means at *all* times and in - // particular across call boundaries. Therefore our own arguments started at - // a 16-byte aligned SP and the delta applied for the tail call should - // satisfy the same constraint. - assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); + // i128 conversions are libcalls. + if (Op.getOperand(0).getValueType() == MVT::i128) + return SDValue(); + + // Other conversions are legal, unless it's to the completely software-based + // fp128. + if (Op.getValueType() != MVT::f128) + return Op; + + RTLIB::Libcall LC; + if (Op.getOpcode() == ISD::SINT_TO_FP) + LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); + else + LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); + + return LowerF128Call(Op, DAG, LC); +} + +SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op, + SelectionDAG &DAG) const { + // For iOS, we want to call an alternative entry point: __sincos_stret, + // which returns the values in two S / D registers. + SDLoc dl(Op); + SDValue Arg = Op.getOperand(0); + EVT ArgVT = Arg.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + + ArgListTy Args; + ArgListEntry Entry; + + Entry.Node = Arg; + Entry.Ty = ArgTy; + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + const char *LibcallName = + (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; + SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); + + StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(CallingConv::Fast, RetTy, Callee, &Args, 0); + + std::pair CallResult = LowerCallTo(CLI); + return CallResult.first; +} + +SDValue AArch64TargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: + llvm_unreachable("unimplemented operand"); + return SDValue(); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: + return LowerGlobalTLSAddress(Op, DAG); + case ISD::SETCC: + return LowerSETCC(Op, DAG); + case ISD::BR_CC: + return LowerBR_CC(Op, DAG); + case ISD::SELECT: + return LowerSELECT(Op, DAG); + case ISD::SELECT_CC: + return LowerSELECT_CC(Op, DAG); + case ISD::JumpTable: + return LowerJumpTable(Op, DAG); + case ISD::ConstantPool: + return LowerConstantPool(Op, DAG); + case ISD::BlockAddress: + return LowerBlockAddress(Op, DAG); + case ISD::VASTART: + return LowerVASTART(Op, DAG); + case ISD::VACOPY: + return LowerVACOPY(Op, DAG); + case ISD::VAARG: + return LowerVAARG(Op, DAG); + case ISD::ADDC: + case ISD::ADDE: + case ISD::SUBC: + case ISD::SUBE: + return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + return LowerXALUO(Op, DAG); + case ISD::FADD: + return LowerF128Call(Op, DAG, RTLIB::ADD_F128); + case ISD::FSUB: + return LowerF128Call(Op, DAG, RTLIB::SUB_F128); + case ISD::FMUL: + return LowerF128Call(Op, DAG, RTLIB::MUL_F128); + case ISD::FDIV: + return LowerF128Call(Op, DAG, RTLIB::DIV_F128); + case ISD::FP_ROUND: + return LowerFP_ROUND(Op, DAG); + case ISD::FP_EXTEND: + return LowerFP_EXTEND(Op, DAG); + case ISD::FRAMEADDR: + return LowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: + return LowerRETURNADDR(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return LowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return LowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::BUILD_VECTOR: + return LowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: + return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::EXTRACT_SUBVECTOR: + return LowerEXTRACT_SUBVECTOR(Op, DAG); + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: + return LowerVectorSRA_SRL_SHL(Op, DAG); + case ISD::SHL_PARTS: + return LowerShiftLeftParts(Op, DAG); + case ISD::SRL_PARTS: + case ISD::SRA_PARTS: + return LowerShiftRightParts(Op, DAG); + case ISD::CTPOP: + return LowerCTPOP(Op, DAG); + case ISD::FCOPYSIGN: + return LowerFCOPYSIGN(Op, DAG); + case ISD::AND: + return LowerVectorAND(Op, DAG); + case ISD::OR: + return LowerVectorOR(Op, DAG); + case ISD::XOR: + return LowerXOR(Op, DAG); + case ISD::PREFETCH: + return LowerPREFETCH(Op, DAG); + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + return LowerINT_TO_FP(Op, DAG); + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + return LowerFP_TO_INT(Op, DAG); + case ISD::FSINCOS: + return LowerFSINCOS(Op, DAG); } +} - if (!IsSibCall) - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), - dl); +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned AArch64TargetLowering::getFunctionAlignment(const Function *F) const { + return 2; +} - SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP, - getPointerTy()); +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// - SmallVector MemOpChains; - SmallVector, 8> RegsToPass; +#include "AArch64GenCallingConv.inc" + +/// Selects the correct CCAssignFn for a the given CallingConvention +/// value. +CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, + bool IsVarArg) const { + switch (CC) { + default: + llvm_unreachable("Unsupported calling convention."); + case CallingConv::WebKit_JS: + return CC_AArch64_WebKit_JS; + case CallingConv::C: + case CallingConv::Fast: + if (!Subtarget->isTargetDarwin()) + return CC_AArch64_AAPCS; + return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS; + } +} +SDValue AArch64TargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, SDLoc DL, SelectionDAG &DAG, + SmallVectorImpl &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + // At this point, Ins[].VT may already be promoted to i32. To correctly + // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and + // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT. + // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here + // we use a special version of AnalyzeFormalArguments to pass in ValVT and + // LocVT. + unsigned NumArgs = Ins.size(); + Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); + unsigned CurArgIdx = 0; + for (unsigned i = 0; i != NumArgs; ++i) { + MVT ValVT = Ins[i].VT; + std::advance(CurOrigArg, Ins[i].OrigArgIndex - CurArgIdx); + CurArgIdx = Ins[i].OrigArgIndex; + + // Get type of the original argument. + EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true); + MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other; + // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. + if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) + ValVT = MVT::i8; + else if (ActualMVT == MVT::i16) + ValVT = MVT::i16; + + CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false); + bool Res = + AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo); + assert(!Res && "Call operand has unhandled type"); + (void)Res; + } + assert(ArgLocs.size() == Ins.size()); + SmallVector ArgValues; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - ISD::ArgFlagsTy Flags = Outs[i].Flags; - SDValue Arg = OutVals[i]; - - // Callee does the actual widening, so all extensions just use an implicit - // definition of the rest of the Loc. Aesthetically, this would be nicer as - // an ANY_EXTEND, but that isn't valid for floating-point types and this - // alternative works on integer types too. - switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: break; - case CCValAssign::SExt: - case CCValAssign::ZExt: - case CCValAssign::AExt: - case CCValAssign::FPExt: { - unsigned SrcSize = VA.getValVT().getSizeInBits(); - unsigned SrcSubReg; - - switch (SrcSize) { - case 8: SrcSubReg = AArch64::sub_8; break; - case 16: SrcSubReg = AArch64::sub_16; break; - case 32: SrcSubReg = AArch64::sub_32; break; - case 64: SrcSubReg = AArch64::sub_64; break; - default: llvm_unreachable("Unexpected argument promotion"); - } - Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, - VA.getLocVT(), - DAG.getUNDEF(VA.getLocVT()), - Arg, - DAG.getTargetConstant(SrcSubReg, MVT::i32)), - 0); + if (Ins[i].Flags.isByVal()) { + // Byval is used for HFAs in the PCS, but the system should work in a + // non-compliant manner for larger structs. + EVT PtrTy = getPointerTy(); + int Size = Ins[i].Flags.getByValSize(); + unsigned NumRegs = (Size + 7) / 8; - break; - } - case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); - break; - } + // FIXME: This works on big-endian for composite byvals, which are the common + // case. It should also work for fundamental types too. + unsigned FrameIdx = + MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false); + SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy); + InVals.push_back(FrameIdxN); - if (VA.isRegLoc()) { - // A normal register (sub-) argument. For now we just note it down because - // we want to copy things into registers as late as possible to avoid - // register-pressure (and possibly worse). - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); continue; - } + } if (VA.isRegLoc()) { + // Arguments stored in registers. + EVT RegVT = VA.getLocVT(); + + SDValue ArgValue; + const TargetRegisterClass *RC; + + if (RegVT == MVT::i32) + RC = &AArch64::GPR32RegClass; + else if (RegVT == MVT::i64) + RC = &AArch64::GPR64RegClass; + else if (RegVT == MVT::f32) + RC = &AArch64::FPR32RegClass; + else if (RegVT == MVT::f64 || RegVT.is64BitVector()) + RC = &AArch64::FPR64RegClass; + else if (RegVT == MVT::f128 || RegVT.is128BitVector()) + RC = &AArch64::FPR128RegClass; + else + llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); - assert(VA.isMemLoc() && "unexpected argument location"); + // Transform the arguments in physical registers into virtual ones. + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); + ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); - SDValue DstAddr; - MachinePointerInfo DstInfo; - if (IsTailCall) { - uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() : - VA.getLocVT().getSizeInBits(); - OpSize = (OpSize + 7) / 8; - int32_t Offset = VA.getLocMemOffset() + FPDiff; - int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); + // If this is an 8, 16 or 32-bit value, it is really passed promoted + // to 64 bits. Insert an assert[sz]ext to capture this, then + // truncate to the right size. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::BCvt: + ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue); + break; + case CCValAssign::AExt: + case CCValAssign::SExt: + case CCValAssign::ZExt: + // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt + // nodes after our lowering. + assert(RegVT == Ins[i].VT && "incorrect register location selected"); + break; + } - DstAddr = DAG.getFrameIndex(FI, getPointerTy()); - DstInfo = MachinePointerInfo::getFixedStack(FI); + InVals.push_back(ArgValue); + + } else { // VA.isRegLoc() + assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem"); + unsigned ArgOffset = VA.getLocMemOffset(); + unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; - // Make sure any stack arguments overlapping with where we're storing are - // loaded before this eventual operation. Otherwise they'll be clobbered. - Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI); - } else { - uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize()*8 : - VA.getLocVT().getSizeInBits(); - OpSize = (OpSize + 7) / 8; uint32_t BEAlign = 0; - if (OpSize < 8 && !getSubtarget()->isLittle()) - BEAlign = 8-OpSize; - SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() + BEAlign); + if (ArgSize < 8 && !Subtarget->isLittleEndian()) + BEAlign = 8 - ArgSize; - DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); - DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset()); - } + int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true); - if (Flags.isByVal()) { - SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64); - SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode, - Flags.getByValAlign(), - /*isVolatile = */ false, - /*alwaysInline = */ false, - DstInfo, MachinePointerInfo(0)); - MemOpChains.push_back(Cpy); - } else { - // Normal stack argument, put it where it's needed. - SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo, - false, false, 0); - MemOpChains.push_back(Store); - } - } + // Create load nodes to retrieve arguments from the stack. + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + SDValue ArgValue; - // The loads and stores generated above shouldn't clash with each - // other. Combining them with this TokenFactor notes that fact for the rest of - // the backend. - if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; + switch (VA.getLocInfo()) { + default: + break; + case CCValAssign::SExt: + ExtType = ISD::SEXTLOAD; + break; + case CCValAssign::ZExt: + ExtType = ISD::ZEXTLOAD; + break; + case CCValAssign::AExt: + ExtType = ISD::EXTLOAD; + break; + } - // Most of the rest of the instructions need to be glued together; we don't - // want assignments to actual registers used by a call to be rearranged by a - // well-meaning scheduler. - SDValue InFlag; + ArgValue = DAG.getExtLoad(ExtType, DL, VA.getValVT(), Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + VA.getLocVT(), + false, false, false, 0); - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); + InVals.push_back(ArgValue); + } } - // The linker is responsible for inserting veneers when necessary to put a - // function call destination in range, so we don't need to bother with a - // wrapper here. - if (GlobalAddressSDNode *G = dyn_cast(Callee)) { - const GlobalValue *GV = G->getGlobal(); - Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); - } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { - const char *Sym = S->getSymbol(); - Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); - } + // varargs + if (isVarArg) { + if (!Subtarget->isTargetDarwin()) { + // The AAPCS variadic function ABI is identical to the non-variadic + // one. As a result there may be more arguments in registers and we should + // save them for future reference. + saveVarArgRegisters(CCInfo, DAG, DL, Chain); + } - // We don't usually want to end the call-sequence here because we would tidy - // the frame up *after* the call, however in the ABI-changing tail-call case - // we've carefully laid out the parameters so that when sp is reset they'll be - // in the correct location. - if (IsTailCall && !IsSibCall) { - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag, dl); - InFlag = Chain.getValue(1); + AArch64FunctionInfo *AFI = MF.getInfo(); + // This will point to the next argument passed via stack. + unsigned StackOffset = CCInfo.getNextStackOffset(); + // We currently pass all varargs at 8-byte alignment. + StackOffset = ((StackOffset + 7) & ~7); + AFI->setVarArgsStackIndex(MFI->CreateFixedObject(4, StackOffset, true)); } - // We produce the following DAG scheme for the actual call instruction: - // (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag? - // - // Most arguments aren't going to be used and just keep the values live as - // far as LLVM is concerned. It's expected to be selected as simply "bl - // callee" (for a direct, non-tail call). - std::vector Ops; - Ops.push_back(Chain); - Ops.push_back(Callee); + AArch64FunctionInfo *FuncInfo = MF.getInfo(); + unsigned StackArgSize = CCInfo.getNextStackOffset(); + bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; + if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) { + // This is a non-standard ABI so by fiat I say we're allowed to make full + // use of the stack area to be popped, which must be aligned to 16 bytes in + // any case: + StackArgSize = RoundUpToAlignment(StackArgSize, 16); - if (IsTailCall) { - // Each tail call may have to adjust the stack by a different amount, so - // this information must travel along with the operation for eventual - // consumption by emitEpilogue. - Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32)); + // If we're expected to restore the stack (e.g. fastcc) then we'll be adding + // a multiple of 16. + FuncInfo->setArgumentStackToRestore(StackArgSize); + + // This realignment carries over to the available bytes below. Our own + // callers will guarantee the space is free by giving an aligned value to + // CALLSEQ_START. } + // Even if we're not expected to free up the space, it's useful to know how + // much is there while considering tail calls (because we can reuse it). + FuncInfo->setBytesInStackArgArea(StackArgSize); - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); + return Chain; +} +void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo, + SelectionDAG &DAG, SDLoc DL, + SDValue &Chain) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + AArch64FunctionInfo *FuncInfo = MF.getInfo(); - // Add a register mask operand representing the call-preserved registers. This - // is used later in codegen to constrain register-allocation. - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); + SmallVector MemOps; - // If we needed glue, put it in as the last argument. - if (InFlag.getNode()) - Ops.push_back(InFlag); + static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2, + AArch64::X3, AArch64::X4, AArch64::X5, + AArch64::X6, AArch64::X7 }; + static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs); + unsigned FirstVariadicGPR = + CCInfo.getFirstUnallocated(GPRArgRegs, NumGPRArgRegs); - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR); + int GPRIdx = 0; + if (GPRSaveSize != 0) { + GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false); - if (IsTailCall) { - return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy()); + + for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) { + unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass); + SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); + SDValue Store = + DAG.getStore(Val.getValue(1), DL, Val, FIN, + MachinePointerInfo::getStack(i * 8), false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, + DAG.getConstant(8, getPointerTy())); + } } + FuncInfo->setVarArgsGPRIndex(GPRIdx); + FuncInfo->setVarArgsGPRSize(GPRSaveSize); - Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); + if (Subtarget->hasFPARMv8()) { + static const MCPhysReg FPRArgRegs[] = { + AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, + AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7}; + static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs); + unsigned FirstVariadicFPR = + CCInfo.getFirstUnallocated(FPRArgRegs, NumFPRArgRegs); + + unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); + int FPRIdx = 0; + if (FPRSaveSize != 0) { + FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false); - // Now we can reclaim the stack, just as well do it before working out where - // our return value is. - if (!IsSibCall) { - uint64_t CalleePopBytes - = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0; + SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(CalleePopBytes, true), - InFlag, dl); - InFlag = Chain.getValue(1); + for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { + unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass); + SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128); + + SDValue Store = + DAG.getStore(Val.getValue(1), DL, Val, FIN, + MachinePointerInfo::getStack(i * 16), false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, + DAG.getConstant(16, getPointerTy())); + } + } + FuncInfo->setVarArgsFPRIndex(FPRIdx); + FuncInfo->setVarArgsFPRSize(FPRSaveSize); } - return LowerCallResult(Chain, InFlag, CallConv, - IsVarArg, Ins, dl, DAG, InVals); + if (!MemOps.empty()) { + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); + } } -SDValue -AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool IsVarArg, - const SmallVectorImpl &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { +/// LowerCallResult - Lower the result values of a call into the +/// appropriate copies out of appropriate physical registers. +SDValue AArch64TargetLowering::LowerCallResult( + SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, SDLoc DL, SelectionDAG &DAG, + SmallVectorImpl &InVals, bool isThisReturn, + SDValue ThisVal) const { + CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS + ? RetCC_AArch64_WebKit_JS + : RetCC_AArch64_AAPCS; // Assign locations to each value returned by this call. SmallVector RVLocs; - CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv)); + CCInfo.AnalyzeCallResult(Ins, RetCC); + // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign VA = RVLocs[i]; - // Return values that are too big to fit into registers should use an sret - // pointer, so this can be a lot simpler than the main argument code. - assert(VA.isRegLoc() && "Memory locations not expected for call return"); + // Pass 'this' value directly from the argument to return value, to avoid + // reg unit interference + if (i == 0 && isThisReturn) { + assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 && + "unexpected return calling convention register assignment"); + InVals.push_back(ThisVal); + continue; + } - SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), - InFlag); + SDValue Val = + DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag); Chain = Val.getValue(1); InFlag = Val.getValue(2); switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: break; - case CCValAssign::BCvt: - Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: break; - case CCValAssign::ZExt: - case CCValAssign::SExt: - case CCValAssign::AExt: - // Floating-point arguments only get extended/truncated if they're going - // in memory, so using the integer operation is acceptable here. - Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); + case CCValAssign::BCvt: + Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); break; } @@ -1812,17 +1960,12 @@ AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, return Chain; } -bool -AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, - bool IsVarArg, - bool IsCalleeStructRet, - bool IsCallerStructRet, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - SelectionDAG& DAG) const { - +bool AArch64TargetLowering::isEligibleForTailCallOptimization( + SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, + bool isCalleeStructRet, bool isCallerStructRet, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, SelectionDAG &DAG) const { // For CallingConv::C this function knows whether the ABI needs // changing. That's not true for other conventions so they will have to opt in // manually. @@ -1838,7 +1981,8 @@ AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // we want to reuse during a tail call. Working around this *is* possible (see // X86) but less efficient and uglier in LowerCall. for (Function::const_arg_iterator i = CallerF->arg_begin(), - e = CallerF->arg_end(); i != e; ++i) + e = CallerF->arg_end(); + i != e; ++i) if (i->hasByValAttr()) return false; @@ -1854,10 +1998,10 @@ AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // I want anyone implementing a new calling convention to think long and hard // about this assert. - assert((!IsVarArg || CalleeCC == CallingConv::C) - && "Unexpected variadic calling convention"); + assert((!isVarArg || CalleeCC == CallingConv::C) && + "Unexpected variadic calling convention"); - if (IsVarArg && !Outs.empty()) { + if (isVarArg && !Outs.empty()) { // At least two cases here: if caller is fastcc then we can't have any // memory arguments (we'd be expected to clean up the stack afterwards). If // caller is C then we could potentially use its argument area. @@ -1865,10 +2009,10 @@ AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // FIXME: for now we take the most conservative of these in both cases: // disallow all variadic memory operands. SmallVector ArgLocs; - CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(), + CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true)); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) if (!ArgLocs[i].isRegLoc()) return false; @@ -1880,12 +2024,12 @@ AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, SmallVector RVLocs1; CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), getTargetMachine(), RVLocs1, *DAG.getContext()); - CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC)); + CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForCall(CalleeCC, isVarArg)); SmallVector RVLocs2; CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), getTargetMachine(), RVLocs2, *DAG.getContext()); - CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC)); + CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForCall(CallerCC, isVarArg)); if (RVLocs1.size() != RVLocs2.size()) return false; @@ -1909,28 +2053,18 @@ AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, return true; SmallVector ArgLocs; - CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(), + CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); + CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); - const AArch64MachineFunctionInfo *FuncInfo - = MF.getInfo(); + const AArch64FunctionInfo *FuncInfo = MF.getInfo(); // If the stack arguments for this call would fit into our own save area then // the call can be made tail. return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea(); } -bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, - bool TailCallOpt) const { - return CallCC == CallingConv::Fast && TailCallOpt; -} - -bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const { - return CallCC == CallingConv::Fast; -} - SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, SelectionDAG &DAG, MachineFrameInfo *MFI, @@ -1946,7 +2080,8 @@ SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, // Add a chain value for each stack argument corresponding for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(), - UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U) + UE = DAG.getEntryNode().getNode()->use_end(); + U != UE; ++U) if (LoadSDNode *L = dyn_cast(*U)) if (FrameIndexSDNode *FI = dyn_cast(L->getBasePtr())) if (FI->getIndex() < 0) { @@ -1959,625 +2094,609 @@ SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, ArgChains.push_back(SDValue(L, 1)); } - // Build a tokenfactor for all the chains. - return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, - &ArgChains[0], ArgChains.size()); + // Build a tokenfactor for all the chains. + return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); } -static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) { - switch (CC) { - case ISD::SETEQ: return A64CC::EQ; - case ISD::SETGT: return A64CC::GT; - case ISD::SETGE: return A64CC::GE; - case ISD::SETLT: return A64CC::LT; - case ISD::SETLE: return A64CC::LE; - case ISD::SETNE: return A64CC::NE; - case ISD::SETUGT: return A64CC::HI; - case ISD::SETUGE: return A64CC::HS; - case ISD::SETULT: return A64CC::LO; - case ISD::SETULE: return A64CC::LS; - default: llvm_unreachable("Unexpected condition code"); - } +bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC, + bool TailCallOpt) const { + return CallCC == CallingConv::Fast && TailCallOpt; } -bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const { - // icmp is implemented using adds/subs immediate, which take an unsigned - // 12-bit immediate, optionally shifted left by 12 bits. +bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const { + return CallCC == CallingConv::Fast; +} - // Symmetric by using adds/subs - if (Val < 0) - Val = -Val; +/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain, +/// and add input and output parameter nodes. +SDValue +AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc &DL = CLI.DL; + SmallVector &Outs = CLI.Outs; + SmallVector &OutVals = CLI.OutVals; + SmallVector &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + bool &IsTailCall = CLI.IsTailCall; + CallingConv::ID CallConv = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; - return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0; -} + MachineFunction &MF = DAG.getMachineFunction(); + bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); + bool IsThisReturn = false; -SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS, - ISD::CondCode CC, SDValue &A64cc, - SelectionDAG &DAG, SDLoc &dl) const { - if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { - int64_t C = 0; - EVT VT = RHSC->getValueType(0); - bool knownInvalid = false; - - // I'm not convinced the rest of LLVM handles these edge cases properly, but - // we can at least get it right. - if (isSignedIntSetCC(CC)) { - C = RHSC->getSExtValue(); - } else if (RHSC->getZExtValue() > INT64_MAX) { - // A 64-bit constant not representable by a signed 64-bit integer is far - // too big to fit into a SUBS immediate anyway. - knownInvalid = true; - } else { - C = RHSC->getZExtValue(); - } + AArch64FunctionInfo *FuncInfo = MF.getInfo(); + bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; + bool IsSibCall = false; - if (!knownInvalid && !isLegalICmpImmediate(C)) { - // Constant does not fit, try adjusting it by one? - switch (CC) { - default: break; - case ISD::SETLT: - case ISD::SETGE: - if (isLegalICmpImmediate(C-1)) { - CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; - RHS = DAG.getConstant(C-1, VT); - } - break; - case ISD::SETULT: - case ISD::SETUGE: - if (isLegalICmpImmediate(C-1)) { - CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; - RHS = DAG.getConstant(C-1, VT); - } - break; - case ISD::SETLE: - case ISD::SETGT: - if (isLegalICmpImmediate(C+1)) { - CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; - RHS = DAG.getConstant(C+1, VT); - } - break; - case ISD::SETULE: - case ISD::SETUGT: - if (isLegalICmpImmediate(C+1)) { - CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; - RHS = DAG.getConstant(C+1, VT); - } - break; - } - } + if (IsTailCall) { + // Check if it's really possible to do a tail call. + IsTailCall = isEligibleForTailCallOptimization( + Callee, CallConv, IsVarArg, IsStructRet, + MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG); + if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall()) + report_fatal_error("failed to perform tail call elimination on a call " + "site marked musttail"); + + // A sibling call is one where we're under the usual C ABI and not planning + // to change that but can still do a tail call: + if (!TailCallOpt && IsTailCall) + IsSibCall = true; + + if (IsTailCall) + ++NumTailCalls; } - A64CC::CondCodes CondCode = IntCCToA64CC(CC); - A64cc = DAG.getConstant(CondCode, MVT::i32); - return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, - DAG.getCondCode(CC)); -} + // Analyze operands of the call, assigning locations to each operand. + SmallVector ArgLocs; + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); -static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC, - A64CC::CondCodes &Alternative) { - A64CC::CondCodes CondCode = A64CC::Invalid; - Alternative = A64CC::Invalid; + if (IsVarArg) { + // Handle fixed and variable vector arguments differently. + // Variable vector arguments always go into memory. + unsigned NumArgs = Outs.size(); + + for (unsigned i = 0; i != NumArgs; ++i) { + MVT ArgVT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, + /*IsVarArg=*/ !Outs[i].IsFixed); + bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); + assert(!Res && "Call operand has unhandled type"); + (void)Res; + } + } else { + // At this point, Outs[].VT may already be promoted to i32. To correctly + // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and + // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT. + // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here + // we use a special version of AnalyzeCallOperands to pass in ValVT and + // LocVT. + unsigned NumArgs = Outs.size(); + for (unsigned i = 0; i != NumArgs; ++i) { + MVT ValVT = Outs[i].VT; + // Get type of the original argument. + EVT ActualVT = getValueType(CLI.getArgs()[Outs[i].OrigArgIndex].Ty, + /*AllowUnknown*/ true); + MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. + if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) + ValVT = MVT::i8; + else if (ActualMVT == MVT::i16) + ValVT = MVT::i16; + + CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false); + bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo); + assert(!Res && "Call operand has unhandled type"); + (void)Res; + } + } - switch (CC) { - default: llvm_unreachable("Unknown FP condition!"); - case ISD::SETEQ: - case ISD::SETOEQ: CondCode = A64CC::EQ; break; - case ISD::SETGT: - case ISD::SETOGT: CondCode = A64CC::GT; break; - case ISD::SETGE: - case ISD::SETOGE: CondCode = A64CC::GE; break; - case ISD::SETOLT: CondCode = A64CC::MI; break; - case ISD::SETOLE: CondCode = A64CC::LS; break; - case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break; - case ISD::SETO: CondCode = A64CC::VC; break; - case ISD::SETUO: CondCode = A64CC::VS; break; - case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break; - case ISD::SETUGT: CondCode = A64CC::HI; break; - case ISD::SETUGE: CondCode = A64CC::PL; break; - case ISD::SETLT: - case ISD::SETULT: CondCode = A64CC::LT; break; - case ISD::SETLE: - case ISD::SETULE: CondCode = A64CC::LE; break; - case ISD::SETNE: - case ISD::SETUNE: CondCode = A64CC::NE; break; + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = CCInfo.getNextStackOffset(); + + if (IsSibCall) { + // Since we're not changing the ABI to make this a tail call, the memory + // operands are already available in the caller's incoming argument space. + NumBytes = 0; } - return CondCode; -} -SDValue -AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT PtrVT = getPointerTy(); - const BlockAddress *BA = cast(Op)->getBlockAddress(); + // FPDiff is the byte offset of the call's argument area from the callee's. + // Stores to callee stack arguments will be placed in FixedStackSlots offset + // by this amount for a tail call. In a sibling call it must be 0 because the + // caller will deallocate the entire stack and the callee still expects its + // arguments to begin at SP+0. Completely unused for non-tail calls. + int FPDiff = 0; - switch(getTargetMachine().getCodeModel()) { - case CodeModel::Small: - // The most efficient code is PC-relative anyway for the small memory model, - // so we don't need to worry about relocation model. - return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - DAG.getTargetBlockAddress(BA, PtrVT, 0, - AArch64II::MO_NO_FLAG), - DAG.getTargetBlockAddress(BA, PtrVT, 0, - AArch64II::MO_LO12), - DAG.getConstant(/*Alignment=*/ 4, MVT::i32)); - case CodeModel::Large: - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G3), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G2_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G1_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G0_NC)); - default: - llvm_unreachable("Only small and large code models supported now"); + if (IsTailCall && !IsSibCall) { + unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea(); + + // Since callee will pop argument stack as a tail call, we must keep the + // popped size 16-byte aligned. + NumBytes = RoundUpToAlignment(NumBytes, 16); + + // FPDiff will be negative if this tail call requires more space than we + // would automatically have in our incoming argument space. Positive if we + // can actually shrink the stack. + FPDiff = NumReusableBytes - NumBytes; + + // The stack pointer must be 16-byte aligned at all times it's used for a + // memory operation, which in practice means at *all* times and in + // particular across call boundaries. Therefore our own arguments started at + // a 16-byte aligned SP and the delta applied for the tail call should + // satisfy the same constraint. + assert(FPDiff % 16 == 0 && "unaligned stack on tail call"); } -} + // Adjust the stack pointer for the new arguments... + // These operations are automatically eliminated by the prolog/epilog pass + if (!IsSibCall) + Chain = + DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), DL); -// (BRCOND chain, val, dest) -SDValue -AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - SDValue Chain = Op.getOperand(0); - SDValue TheBit = Op.getOperand(1); - SDValue DestBB = Op.getOperand(2); + SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy()); - // AArch64 BooleanContents is the default UndefinedBooleanContent, which means - // that as the consumer we are responsible for ignoring rubbish in higher - // bits. - TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit, - DAG.getConstant(1, MVT::i32)); + SmallVector, 8> RegsToPass; + SmallVector MemOpChains; - SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit, - DAG.getConstant(0, TheBit.getValueType()), - DAG.getCondCode(ISD::SETNE)); + // Walk the register/memloc assignments, inserting copies/loads. + for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; + ++i, ++realArgIdx) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = OutVals[realArgIdx]; + ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; - return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain, - A64CMP, DAG.getConstant(A64CC::NE, MVT::i32), - DestBB); -} + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + if (Outs[realArgIdx].ArgVT == MVT::i1) { + // AAPCS requires i1 to be zero-extended to 8-bits by the caller. + Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg); + Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg); + } + Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::FPExt: + Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg); + break; + } -// (BR_CC chain, condcode, lhs, rhs, dest) -SDValue -AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - SDValue Chain = Op.getOperand(0); - ISD::CondCode CC = cast(Op.getOperand(1))->get(); - SDValue LHS = Op.getOperand(2); - SDValue RHS = Op.getOperand(3); - SDValue DestBB = Op.getOperand(4); + if (VA.isRegLoc()) { + if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i64) { + assert(VA.getLocVT() == MVT::i64 && + "unexpected calling convention register assignment"); + assert(!Ins.empty() && Ins[0].VT == MVT::i64 && + "unexpected use of 'returned'"); + IsThisReturn = true; + } + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + assert(VA.isMemLoc()); - if (LHS.getValueType() == MVT::f128) { - // f128 comparisons are lowered to runtime calls by a routine which sets - // LHS, RHS and CC appropriately for the rest of this function to continue. - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + SDValue DstAddr; + MachinePointerInfo DstInfo; - // If softenSetCCOperands returned a scalar, we need to compare the result - // against zero to select between true and false values. - if (RHS.getNode() == 0) { - RHS = DAG.getConstant(0, LHS.getValueType()); - CC = ISD::SETNE; + // FIXME: This works on big-endian for composite byvals, which are the + // common case. It should also work for fundamental types too. + uint32_t BEAlign = 0; + unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8 + : VA.getLocVT().getSizeInBits(); + OpSize = (OpSize + 7) / 8; + if (!Subtarget->isLittleEndian() && !Flags.isByVal()) { + if (OpSize < 8) + BEAlign = 8 - OpSize; + } + unsigned LocMemOffset = VA.getLocMemOffset(); + int32_t Offset = LocMemOffset + BEAlign; + SDValue PtrOff = DAG.getIntPtrConstant(Offset); + PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff); + + if (IsTailCall) { + Offset = Offset + FPDiff; + int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); + + DstAddr = DAG.getFrameIndex(FI, getPointerTy()); + DstInfo = MachinePointerInfo::getFixedStack(FI); + + // Make sure any stack arguments overlapping with where we're storing + // are loaded before this eventual operation. Otherwise they'll be + // clobbered. + Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI); + } else { + SDValue PtrOff = DAG.getIntPtrConstant(Offset); + + DstAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff); + DstInfo = MachinePointerInfo::getStack(LocMemOffset); + } + + if (Outs[i].Flags.isByVal()) { + SDValue SizeNode = + DAG.getConstant(Outs[i].Flags.getByValSize(), MVT::i64); + SDValue Cpy = DAG.getMemcpy( + Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(), + /*isVolatile = */ false, + /*alwaysInline = */ false, DstInfo, MachinePointerInfo()); + + MemOpChains.push_back(Cpy); + } else { + // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already + // promoted to a legal register type i32, we should truncate Arg back to + // i1/i8/i16. + if (Arg.getValueType().isSimple() && + Arg.getValueType().getSimpleVT() == MVT::i32 && + (VA.getLocVT() == MVT::i1 || VA.getLocVT() == MVT::i8 || + VA.getLocVT() == MVT::i16)) + Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getLocVT(), Arg); + + SDValue Store = + DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, false, false, 0); + MemOpChains.push_back(Store); + } } } - if (LHS.getValueType().isInteger()) { - SDValue A64cc; + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); - // Integers are handled in a separate function because the combinations of - // immediates and tests can get hairy and we may want to fiddle things. - SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into the appropriate regs. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } - return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, - Chain, CmpOp, A64cc, DestBB); + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + if (getTargetMachine().getCodeModel() == CodeModel::Large && + Subtarget->isTargetMachO()) { + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + const GlobalValue *GV = G->getGlobal(); + bool InternalLinkage = GV->hasInternalLinkage(); + if (InternalLinkage) + Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0); + else { + Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, + AArch64II::MO_GOT); + Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee); + } + } else if (ExternalSymbolSDNode *S = + dyn_cast(Callee)) { + const char *Sym = S->getSymbol(); + Callee = + DAG.getTargetExternalSymbol(Sym, getPointerTy(), AArch64II::MO_GOT); + Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee); + } + } else if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + const GlobalValue *GV = G->getGlobal(); + Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0); + } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + const char *Sym = S->getSymbol(); + Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), 0); } - // Note that some LLVM floating-point CondCodes can't be lowered to a single - // conditional branch, hence FPCCToA64CC can set a second test, where either - // passing is sufficient. - A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; - CondCode = FPCCToA64CC(CC, Alternative); - SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); - SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, - DAG.getCondCode(CC)); - SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, - Chain, SetCC, A64cc, DestBB); + // We don't usually want to end the call-sequence here because we would tidy + // the frame up *after* the call, however in the ABI-changing tail-call case + // we've carefully laid out the parameters so that when sp is reset they'll be + // in the correct location. + if (IsTailCall && !IsSibCall) { + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(0, true), InFlag, DL); + InFlag = Chain.getValue(1); + } - if (Alternative != A64CC::Invalid) { - A64cc = DAG.getConstant(Alternative, MVT::i32); - A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, - A64BR_CC, SetCC, A64cc, DestBB); + std::vector Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + if (IsTailCall) { + // Each tail call may have to adjust the stack by a different amount, so + // this information must travel along with the operation for eventual + // consumption by emitEpilogue. + Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32)); } - return A64BR_CC; -} + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); -SDValue -AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG, - RTLIB::Libcall Call) const { - ArgListTy Args; - ArgListEntry Entry; - for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { - EVT ArgVT = Op.getOperand(i).getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy; - Entry.isSExt = false; - Entry.isZExt = false; - Args.push_back(Entry); - } - SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy()); + // Add a register mask operand representing the call-preserved registers. + const uint32_t *Mask; + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const AArch64RegisterInfo *ARI = + static_cast(TRI); + if (IsThisReturn) { + // For 'this' returns, use the X0-preserving mask if applicable + Mask = ARI->getThisReturnPreservedMask(CallConv); + if (!Mask) { + IsThisReturn = false; + Mask = ARI->getCallPreservedMask(CallConv); + } + } else + Mask = ARI->getCallPreservedMask(CallConv); - Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext()); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); - // By default, the input chain to this libcall is the entry node of the - // function. If the libcall is going to be emitted as a tail call then - // isUsedByReturnOnly will change it to the right chain if the return - // node which is being folded has a non-entry input chain. - SDValue InChain = DAG.getEntryNode(); + if (InFlag.getNode()) + Ops.push_back(InFlag); - // isTailCall may be true since the callee does not reference caller stack - // frame. Check if it's in the right position. - SDValue TCChain = InChain; - bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain); - if (isTailCall) - InChain = TCChain; + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, false, false, false, false, - 0, getLibcallCallingConv(Call), isTailCall, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, SDLoc(Op)); - std::pair CallInfo = LowerCallTo(CLI); + // If we're doing a tall call, use a TC_RETURN here rather than an + // actual call instruction. + if (IsTailCall) + return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops); - if (!CallInfo.second.getNode()) - // It's a tailcall, return the chain (which is the DAG root). - return DAG.getRoot(); + // Returns a chain and a flag for retval copy to use. + Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops); + InFlag = Chain.getValue(1); - return CallInfo.first; -} + uint64_t CalleePopBytes = DoesCalleeRestoreStack(CallConv, TailCallOpt) + ? RoundUpToAlignment(NumBytes, 16) + : 0; -SDValue -AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { - if (Op.getOperand(0).getValueType() != MVT::f128) { - // It's legal except when f128 is involved - return Op; - } + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), + DAG.getIntPtrConstant(CalleePopBytes, true), + InFlag, DL); + if (!Ins.empty()) + InFlag = Chain.getValue(1); - RTLIB::Libcall LC; - LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); + // Handle result values, copying them out of physregs into vregs that we + // return. + return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG, + InVals, IsThisReturn, + IsThisReturn ? OutVals[0] : SDValue()); +} - SDValue SrcVal = Op.getOperand(0); - return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, - /*isSigned*/ false, SDLoc(Op)).first; +bool AArch64TargetLowering::CanLowerReturn( + CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, + const SmallVectorImpl &Outs, LLVMContext &Context) const { + CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS + ? RetCC_AArch64_WebKit_JS + : RetCC_AArch64_AAPCS; + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC); } SDValue -AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { - assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); +AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + SDLoc DL, SelectionDAG &DAG) const { + CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS + ? RetCC_AArch64_WebKit_JS + : RetCC_AArch64_AAPCS; + SmallVector RVLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + CCInfo.AnalyzeReturn(Outs, RetCC); - RTLIB::Libcall LC; - LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); + // Copy the result values into the output registers. + SDValue Flag; + SmallVector RetOps(1, Chain); + for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); + ++i, ++realRVLocIdx) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + SDValue Arg = OutVals[realRVLocIdx]; - return LowerF128ToCall(Op, DAG, LC); -} + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown loc info!"); + case CCValAssign::Full: + if (Outs[i].ArgVT == MVT::i1) { + // AAPCS requires i1 to be zero-extended to i8 by the producer of the + // value. This is strictly redundant on Darwin (which uses "zeroext + // i1"), but will be optimised out before ISel. + Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg); + Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); + } + break; + case CCValAssign::BCvt: + Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); + break; + } -static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG, - bool IsSigned) { - SDLoc dl(Op); - EVT VT = Op.getValueType(); - SDValue Vec = Op.getOperand(0); - EVT OpVT = Vec.getValueType(); - unsigned Opc = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT; - - if (VT.getVectorNumElements() == 1) { - assert(OpVT == MVT::v1f64 && "Unexpected vector type!"); - if (VT.getSizeInBits() == OpVT.getSizeInBits()) - return Op; - return DAG.UnrollVectorOp(Op.getNode()); - } - - if (VT.getSizeInBits() > OpVT.getSizeInBits()) { - assert(Vec.getValueType() == MVT::v2f32 && VT == MVT::v2i64 && - "Unexpected vector type!"); - Vec = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, Vec); - return DAG.getNode(Opc, dl, VT, Vec); - } else if (VT.getSizeInBits() < OpVT.getSizeInBits()) { - EVT CastVT = EVT::getIntegerVT(*DAG.getContext(), - OpVT.getVectorElementType().getSizeInBits()); - CastVT = - EVT::getVectorVT(*DAG.getContext(), CastVT, VT.getVectorNumElements()); - Vec = DAG.getNode(Opc, dl, CastVT, Vec); - return DAG.getNode(ISD::TRUNCATE, dl, VT, Vec); - } - return DAG.getNode(Opc, dl, VT, Vec); -} - -static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { - // We custom lower concat_vectors with 4, 8, or 16 operands that are all the - // same operand and of type v1* using the DUP instruction. - unsigned NumOps = Op->getNumOperands(); - if (NumOps == 2) { - assert(Op.getValueType().getSizeInBits() == 128 && "unexpected concat"); - return Op; + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag); + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); } - if (NumOps != 4 && NumOps != 8 && NumOps != 16) - return SDValue(); + RetOps[0] = Chain; // Update chain. - // Must be a single value for VDUP. - SDValue Op0 = Op.getOperand(0); - for (unsigned i = 1; i < NumOps; ++i) { - SDValue OpN = Op.getOperand(i); - if (Op0 != OpN) - return SDValue(); - } + // Add the flag if we have it. + if (Flag.getNode()) + RetOps.push_back(Flag); - // Verify the value type. - EVT EltVT = Op0.getValueType(); - switch (NumOps) { - default: llvm_unreachable("Unexpected number of operands"); - case 4: - if (EltVT != MVT::v1i16 && EltVT != MVT::v1i32) - return SDValue(); - break; - case 8: - if (EltVT != MVT::v1i8 && EltVT != MVT::v1i16) - return SDValue(); - break; - case 16: - if (EltVT != MVT::v1i8) - return SDValue(); - break; - } + return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps); +} +//===----------------------------------------------------------------------===// +// Other Lowering Code +//===----------------------------------------------------------------------===// + +SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + EVT PtrVT = getPointerTy(); SDLoc DL(Op); - EVT VT = Op.getValueType(); - // VDUP produces better code for constants. - if (Op0->getOpcode() == ISD::BUILD_VECTOR) - return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Op0->getOperand(0)); - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, Op0, - DAG.getConstant(0, MVT::i64)); + const GlobalValue *GV = cast(Op)->getGlobal(); + unsigned char OpFlags = + Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); + + assert(cast(Op)->getOffset() == 0 && + "unexpected offset in global node"); + + // This also catched the large code model case for Darwin. + if ((OpFlags & AArch64II::MO_GOT) != 0) { + SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); + // FIXME: Once remat is capable of dealing with instructions with register + // operands, expand this into two nodes instead of using a wrapper node. + return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr); + } + + if (getTargetMachine().getCodeModel() == CodeModel::Large) { + const unsigned char MO_NC = AArch64II::MO_NC; + return DAG.getNode( + AArch64ISD::WrapperLarge, DL, PtrVT, + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G3), + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G2 | MO_NC), + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G1 | MO_NC), + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G0 | MO_NC)); + } else { + // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and + // the only correct model on Darwin. + SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + OpFlags | AArch64II::MO_PAGE); + unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC; + SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags); + + SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); + return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + } } +/// \brief Convert a TLS address reference into the correct sequence of loads +/// and calls to compute the variable's address (for Darwin, currently) and +/// return an SDValue containing the final node. + +/// Darwin only has one TLS scheme which must be capable of dealing with the +/// fully general situation, in the worst case. This means: +/// + "extern __thread" declaration. +/// + Defined in a possibly unknown dynamic library. +/// +/// The general system is that each __thread variable has a [3 x i64] descriptor +/// which contains information used by the runtime to calculate the address. The +/// only part of this the compiler needs to know about is the first xword, which +/// contains a function pointer that must be called with the address of the +/// entire descriptor in "x0". +/// +/// Since this descriptor may be in a different unit, in general even the +/// descriptor must be accessed via an indirect load. The "ideal" code sequence +/// is: +/// adrp x0, _var@TLVPPAGE +/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor +/// ldr x1, [x0] ; x1 contains 1st entry of descriptor, +/// ; the function pointer +/// blr x1 ; Uses descriptor address in x0 +/// ; Address of _var is now in x0. +/// +/// If the address of _var's descriptor *is* known to the linker, then it can +/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for +/// a slight efficiency gain. SDValue -AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, - bool IsSigned) const { - if (Op.getValueType().isVector()) - return LowerVectorFP_TO_INT(Op, DAG, IsSigned); - if (Op.getOperand(0).getValueType() != MVT::f128) { - // It's legal except when f128 is involved - return Op; - } - - RTLIB::Libcall LC; - if (IsSigned) - LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType()); - else - LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); - - return LowerF128ToCall(Op, DAG, LC); -} - -SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MFI->setReturnAddressIsTaken(true); +AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin"); - if (verifyReturnAddressArgumentIsConstant(Op, DAG)) - return SDValue(); - - EVT VT = Op.getValueType(); - SDLoc dl(Op); - unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); - if (Depth) { - SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); - SDValue Offset = DAG.getConstant(8, MVT::i64); - return DAG.getLoad(VT, dl, DAG.getEntryNode(), - DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), - MachinePointerInfo(), false, false, false, 0); - } + SDLoc DL(Op); + MVT PtrVT = getPointerTy(); + const GlobalValue *GV = cast(Op)->getGlobal(); - // Return X30, which contains the return address. Mark it an implicit live-in. - unsigned Reg = MF.addLiveIn(AArch64::X30, getRegClassFor(MVT::i64)); - return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, MVT::i64); -} + SDValue TLVPAddr = + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); + SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr); + // The first entry in the descriptor is a function pointer that we must call + // to obtain the address of the variable. + SDValue Chain = DAG.getEntryNode(); + SDValue FuncTLVGet = + DAG.getLoad(MVT::i64, DL, Chain, DescAddr, MachinePointerInfo::getGOT(), + false, true, true, 8); + Chain = FuncTLVGet.getValue(1); -SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) - const { MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - MFI->setFrameAddressIsTaken(true); - - EVT VT = Op.getValueType(); - SDLoc dl(Op); - unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = AArch64::X29; - SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); - while (Depth--) - FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, - MachinePointerInfo(), - false, false, false, 0); - return FrameAddr; -} - -SDValue -AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op, - SelectionDAG &DAG) const { - assert(getTargetMachine().getCodeModel() == CodeModel::Large); - assert(getTargetMachine().getRelocationModel() == Reloc::Static); - - EVT PtrVT = getPointerTy(); - SDLoc dl(Op); - const GlobalAddressSDNode *GN = cast(Op); - const GlobalValue *GV = GN->getGlobal(); - - SDValue GlobalAddr = DAG.getNode( - AArch64ISD::WrapperLarge, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G3), - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G2_NC), - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G1_NC), - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G0_NC)); - - if (GN->getOffset() != 0) - return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr, - DAG.getConstant(GN->getOffset(), PtrVT)); - - return GlobalAddr; -} - -SDValue -AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op, - SelectionDAG &DAG) const { - assert(getTargetMachine().getCodeModel() == CodeModel::Small); - - EVT PtrVT = getPointerTy(); - SDLoc dl(Op); - const GlobalAddressSDNode *GN = cast(Op); - const GlobalValue *GV = GN->getGlobal(); - unsigned Alignment = GV->getAlignment(); - Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) { - // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate - // to zero when they remain undefined. In PIC mode the GOT can take care of - // this, but in absolute mode we use a constant pool load. - SDValue PoolAddr; - PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, - DAG.getTargetConstantPool(GV, PtrVT, 0, 0, - AArch64II::MO_NO_FLAG), - DAG.getTargetConstantPool(GV, PtrVT, 0, 0, - AArch64II::MO_LO12), - DAG.getConstant(8, MVT::i32)); - SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr, - MachinePointerInfo::getConstantPool(), - /*isVolatile=*/ false, - /*isNonTemporal=*/ true, - /*isInvariant=*/ true, 8); - if (GN->getOffset() != 0) - return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr, - DAG.getConstant(GN->getOffset(), PtrVT)); - - return GlobalAddr; - } - - if (Alignment == 0) { - const PointerType *GVPtrTy = cast(GV->getType()); - if (GVPtrTy->getElementType()->isSized()) { - Alignment - = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType()); - } else { - // Be conservative if we can't guess, not that it really matters: - // functions and labels aren't valid for loads, and the methods used to - // actually calculate an address work with any alignment. - Alignment = 1; - } - } - - unsigned char HiFixup, LoFixup; - bool UseGOT = getSubtarget()->GVIsIndirectSymbol(GV, RelocM); - - if (UseGOT) { - HiFixup = AArch64II::MO_GOT; - LoFixup = AArch64II::MO_GOT_LO12; - Alignment = 8; - } else { - HiFixup = AArch64II::MO_NO_FLAG; - LoFixup = AArch64II::MO_LO12; - } - - // AArch64's small model demands the following sequence: - // ADRP x0, somewhere - // ADD x0, x0, #:lo12:somewhere ; (or LDR directly). - SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - HiFixup), - DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, - LoFixup), - DAG.getConstant(Alignment, MVT::i32)); - - if (UseGOT) { - GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(), - GlobalRef); - } - - if (GN->getOffset() != 0) - return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef, - DAG.getConstant(GN->getOffset(), PtrVT)); - - return GlobalRef; -} + MFI->setAdjustsStack(true); -SDValue -AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, - SelectionDAG &DAG) const { - // TableGen doesn't have easy access to the CodeModel or RelocationModel, so - // we make those distinctions here. - - switch (getTargetMachine().getCodeModel()) { - case CodeModel::Small: - return LowerGlobalAddressELFSmall(Op, DAG); - case CodeModel::Large: - return LowerGlobalAddressELFLarge(Op, DAG); - default: - llvm_unreachable("Only small and large code models supported now"); - } -} - -SDValue -AArch64TargetLowering::LowerConstantPool(SDValue Op, - SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT PtrVT = getPointerTy(); - ConstantPoolSDNode *CN = cast(Op); - const Constant *C = CN->getConstVal(); - - switch(getTargetMachine().getCodeModel()) { - case CodeModel::Small: - // The most efficient code is PC-relative anyway for the small memory model, - // so we don't need to worry about relocation model. - return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - DAG.getTargetConstantPool(C, PtrVT, 0, 0, - AArch64II::MO_NO_FLAG), - DAG.getTargetConstantPool(C, PtrVT, 0, 0, - AArch64II::MO_LO12), - DAG.getConstant(CN->getAlignment(), MVT::i32)); - case CodeModel::Large: - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G3), - DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC), - DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC), - DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC)); - default: - llvm_unreachable("Only small and large code models supported now"); - } + // TLS calls preserve all registers except those that absolutely must be + // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be + // silly). + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const AArch64RegisterInfo *ARI = + static_cast(TRI); + const uint32_t *Mask = ARI->getTLSCallPreservedMask(); + + // Finally, we can make the call. This is just a degenerate version of a + // normal AArch64 call node: x0 takes the address of the descriptor, and + // returns the address of the variable in this thread. + Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue()); + Chain = + DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue), + Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64), + DAG.getRegisterMask(Mask), Chain.getValue(1)); + return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1)); } -SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr, - SDValue DescAddr, - SDLoc DL, - SelectionDAG &DAG) const { +/// When accessing thread-local variables under either the general-dynamic or +/// local-dynamic system, we make a "TLS-descriptor" call. The variable will +/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry +/// is a function pointer to carry out the resolution. This function takes the +/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All +/// other registers (except LR, NZCV) are preserved. +/// +/// Thus, the ideal call sequence on AArch64 is: +/// +/// adrp x0, :tlsdesc:thread_var +/// ldr x8, [x0, :tlsdesc_lo12:thread_var] +/// add x0, x0, :tlsdesc_lo12:thread_var +/// .tlsdesccall thread_var +/// blr x8 +/// (TPIDR_EL0 offset now in x0). +/// +/// The ".tlsdesccall" directive instructs the assembler to insert a particular +/// relocation to help the linker relax this sequence if it turns out to be too +/// conservative. +/// +/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this +/// is harmless. +SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr, + SDValue DescAddr, SDLoc DL, + SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); // The function we need to call is simply the first entry in the GOT for this // descriptor, load it in preparation. - SDValue Func, Chain; - Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(), - DescAddr); + SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr); + + // TLS calls preserve all registers except those that absolutely must be + // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be + // silly). + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const AArch64RegisterInfo *ARI = + static_cast(TRI); + const uint32_t *Mask = ARI->getTLSCallPreservedMask(); // The function takes only one argument: the address of the descriptor itself // in X0. - SDValue Glue; + SDValue Glue, Chain; Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue); Glue = Chain.getValue(1); - // Finally, there's a special calling-convention which means that the lookup - // must preserve all registers (except X0, obviously). - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const AArch64RegisterInfo *A64RI - = static_cast(TRI); - const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask(); - // We're now ready to populate the argument list, as with a normal call: - std::vector Ops; + SmallVector Ops; Ops.push_back(Chain); Ops.push_back(Func); Ops.push_back(SymAddr); @@ -2586,22 +2705,18 @@ SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr, Ops.push_back(Glue); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, &Ops[0], - Ops.size()); + Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops); Glue = Chain.getValue(1); - // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it - // back to the generic handling code. return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue); } SDValue -AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, - SelectionDAG &DAG) const { - assert(getSubtarget()->isTargetELF() && - "TLS not implemented for non-ELF targets"); - assert(getTargetMachine().getCodeModel() == CodeModel::Small - && "TLS only supported in small memory model"); +AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetELF() && "This function expects an ELF target"); + assert(getTargetMachine().getCodeModel() == CodeModel::Small && + "ELF TLS only supported in small memory model"); const GlobalAddressSDNode *GA = cast(Op); TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); @@ -2613,39 +2728,22 @@ AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT); - if (Model == TLSModel::InitialExec) { - TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - AArch64II::MO_GOTTPREL), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - AArch64II::MO_GOTTPREL_LO12), - DAG.getConstant(8, MVT::i32)); - TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(), - TPOff); - } else if (Model == TLSModel::LocalExec) { - SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, - AArch64II::MO_TPREL_G1); - SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, - AArch64II::MO_TPREL_G0_NC); - - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar, - DAG.getTargetConstant(1, MVT::i32)), 0); - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, - TPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), 0); - } else if (Model == TLSModel::GeneralDynamic) { - // Accesses used in this sequence go via the TLS descriptor which lives in - // the GOT. Prepare an address we can use to handle this. - SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - AArch64II::MO_TLSDESC); - SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - AArch64II::MO_TLSDESC_LO12); - SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - HiDesc, LoDesc, - DAG.getConstant(8, MVT::i32)); - SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0); - - TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG); + if (Model == TLSModel::LocalExec) { + SDValue HiVar = DAG.getTargetGlobalAddress( + GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1); + SDValue LoVar = DAG.getTargetGlobalAddress( + GV, DL, PtrVT, 0, + AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC); + + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar, + DAG.getTargetConstant(16, MVT::i32)), + 0); + TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar, + DAG.getTargetConstant(0, MVT::i32)), + 0); + } else if (Model == TLSModel::InitialExec) { + TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); + TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff); } else if (Model == TLSModel::LocalDynamic) { // Local-dynamic accesses proceed in two phases. A general-dynamic TLS // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate @@ -2653,367 +2751,354 @@ AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, // calculation. // These accesses will need deduplicating if there's more than one. - AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction() - .getInfo(); + AArch64FunctionInfo *MFI = + DAG.getMachineFunction().getInfo(); MFI->incNumLocalDynamicTLSAccesses(); - - // Get the location of _TLS_MODULE_BASE_: - SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, - AArch64II::MO_TLSDESC); - SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, - AArch64II::MO_TLSDESC_LO12); - SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT, - HiDesc, LoDesc, - DAG.getConstant(8, MVT::i32)); - SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT); - - ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG); - - // Get the variable's offset from _TLS_MODULE_BASE_ - SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, - AArch64II::MO_DTPREL_G1); - SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0, - AArch64II::MO_DTPREL_G0_NC); - - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar, - DAG.getTargetConstant(0, MVT::i32)), 0); - TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, - TPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), 0); + // Accesses used in this sequence go via the TLS descriptor which lives in + // the GOT. Prepare an address we can use to handle this. + SDValue HiDesc = DAG.getTargetExternalSymbol( + "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE); + SDValue LoDesc = DAG.getTargetExternalSymbol( + "_TLS_MODULE_BASE_", PtrVT, + AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + + // First argument to the descriptor call is the address of the descriptor + // itself. + SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc); + DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); + + // The call needs a relocation too for linker relaxation. It doesn't make + // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of + // the address. + SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, + AArch64II::MO_TLS); + + // Now we can calculate the offset from TPIDR_EL0 to this module's + // thread-local area. + TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG); + + // Now use :dtprel_whatever: operations to calculate this variable's offset + // in its thread-storage area. + SDValue HiVar = DAG.getTargetGlobalAddress( + GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1); + SDValue LoVar = DAG.getTargetGlobalAddress( + GV, DL, MVT::i64, 0, + AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC); + + SDValue DTPOff = + SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar, + DAG.getTargetConstant(16, MVT::i32)), + 0); + DTPOff = + SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar, + DAG.getTargetConstant(0, MVT::i32)), + 0); + + TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff); + } else if (Model == TLSModel::GeneralDynamic) { + // Accesses used in this sequence go via the TLS descriptor which lives in + // the GOT. Prepare an address we can use to handle this. + SDValue HiDesc = DAG.getTargetGlobalAddress( + GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE); + SDValue LoDesc = DAG.getTargetGlobalAddress( + GV, DL, PtrVT, 0, + AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + + // First argument to the descriptor call is the address of the descriptor + // itself. + SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc); + DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); + + // The call needs a relocation too for linker relaxation. It doesn't make + // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of + // the address. + SDValue SymAddr = + DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS); + + // Finally we can make a call to calculate the offset from tpidr_el0. + TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG); } else - llvm_unreachable("Unsupported TLS access model"); - + llvm_unreachable("Unsupported ELF TLS access model"); return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); } -static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG, - bool IsSigned) { +SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + if (Subtarget->isTargetDarwin()) + return LowerDarwinGlobalTLSAddress(Op, DAG); + else if (Subtarget->isTargetELF()) + return LowerELFGlobalTLSAddress(Op, DAG); + + llvm_unreachable("Unexpected platform trying to use TLS"); +} +SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + ISD::CondCode CC = cast(Op.getOperand(1))->get(); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue Dest = Op.getOperand(4); SDLoc dl(Op); - EVT VT = Op.getValueType(); - SDValue Vec = Op.getOperand(0); - unsigned Opc = IsSigned ? ISD::SINT_TO_FP : ISD::UINT_TO_FP; - if (VT.getVectorNumElements() == 1) { - assert(VT == MVT::v1f64 && "Unexpected vector type!"); - if (VT.getSizeInBits() == Vec.getValueSizeInBits()) - return Op; - return DAG.UnrollVectorOp(Op.getNode()); - } + // Handle f128 first, since lowering it will result in comparing the return + // value of a libcall against zero, which is just what the rest of LowerBR_CC + // is expecting to deal with. + if (LHS.getValueType() == MVT::f128) { + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); - if (VT.getSizeInBits() < Vec.getValueSizeInBits()) { - assert(Vec.getValueType() == MVT::v2i64 && VT == MVT::v2f32 && - "Unexpected vector type!"); - Vec = DAG.getNode(Opc, dl, MVT::v2f64, Vec); - return DAG.getNode(ISD::FP_ROUND, dl, VT, Vec, DAG.getIntPtrConstant(0)); - } else if (VT.getSizeInBits() > Vec.getValueSizeInBits()) { - unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; - EVT CastVT = EVT::getIntegerVT(*DAG.getContext(), - VT.getVectorElementType().getSizeInBits()); - CastVT = - EVT::getVectorVT(*DAG.getContext(), CastVT, VT.getVectorNumElements()); - Vec = DAG.getNode(CastOpc, dl, CastVT, Vec); + // If softenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (!RHS.getNode()) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } } - return DAG.getNode(Opc, dl, VT, Vec); -} + // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch + // instruction. + unsigned Opc = LHS.getOpcode(); + if (LHS.getResNo() == 1 && isa(RHS) && + cast(RHS)->isOne() && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { + assert((CC == ISD::SETEQ || CC == ISD::SETNE) && + "Unexpected condition code."); + // Only lower legal XALUO ops. + if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0))) + return SDValue(); -SDValue -AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, - bool IsSigned) const { - if (Op.getValueType().isVector()) - return LowerVectorINT_TO_FP(Op, DAG, IsSigned); - if (Op.getValueType() != MVT::f128) { - // Legal for everything except f128. - return Op; - } + // The actual operation with overflow check. + AArch64CC::CondCode OFCC; + SDValue Value, Overflow; + std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG); - RTLIB::Libcall LC; - if (IsSigned) - LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); - else - LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); + if (CC == ISD::SETNE) + OFCC = getInvertedCondCode(OFCC); + SDValue CCVal = DAG.getConstant(OFCC, MVT::i32); - return LowerF128ToCall(Op, DAG, LC); -} + return DAG.getNode(AArch64ISD::BRCOND, SDLoc(LHS), MVT::Other, Chain, Dest, + CCVal, Overflow); + } + if (LHS.getValueType().isInteger()) { + assert((LHS.getValueType() == RHS.getValueType()) && + (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)); + + // If the RHS of the comparison is zero, we can potentially fold this + // to a specialized branch. + const ConstantSDNode *RHSC = dyn_cast(RHS); + if (RHSC && RHSC->getZExtValue() == 0) { + if (CC == ISD::SETEQ) { + // See if we can use a TBZ to fold in an AND as well. + // TBZ has a smaller branch displacement than CBZ. If the offset is + // out of bounds, a late MI-layer pass rewrites branches. + // 403.gcc is an example that hits this case. + if (LHS.getOpcode() == ISD::AND && + isa(LHS.getOperand(1)) && + isPowerOf2_64(LHS.getConstantOperandVal(1))) { + SDValue Test = LHS.getOperand(0); + uint64_t Mask = LHS.getConstantOperandVal(1); + + // TBZ only operates on i64's, but the ext should be free. + if (Test.getValueType() == MVT::i32) + Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64); + + return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test, + DAG.getConstant(Log2_64(Mask), MVT::i64), Dest); + } -SDValue -AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { - JumpTableSDNode *JT = cast(Op); - SDLoc dl(JT); - EVT PtrVT = getPointerTy(); + return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest); + } else if (CC == ISD::SETNE) { + // See if we can use a TBZ to fold in an AND as well. + // TBZ has a smaller branch displacement than CBZ. If the offset is + // out of bounds, a late MI-layer pass rewrites branches. + // 403.gcc is an example that hits this case. + if (LHS.getOpcode() == ISD::AND && + isa(LHS.getOperand(1)) && + isPowerOf2_64(LHS.getConstantOperandVal(1))) { + SDValue Test = LHS.getOperand(0); + uint64_t Mask = LHS.getConstantOperandVal(1); + + // TBNZ only operates on i64's, but the ext should be free. + if (Test.getValueType() == MVT::i32) + Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64); + + return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test, + DAG.getConstant(Log2_64(Mask), MVT::i64), Dest); + } - // When compiling PIC, jump tables get put in the code section so a static - // relocation-style is acceptable for both cases. - switch (getTargetMachine().getCodeModel()) { - case CodeModel::Small: - return DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT, - DAG.getTargetJumpTable(JT->getIndex(), PtrVT), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, - AArch64II::MO_LO12), - DAG.getConstant(1, MVT::i32)); - case CodeModel::Large: - return DAG.getNode( - AArch64ISD::WrapperLarge, dl, PtrVT, - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G3), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G2_NC), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G1_NC), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G0_NC)); - default: - llvm_unreachable("Only small and large code models supported now"); - } -} + return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest); + } + } -// (SELECT testbit, iftrue, iffalse) -SDValue -AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); - SDValue TheBit = Op.getOperand(0); - SDValue IfTrue = Op.getOperand(1); - SDValue IfFalse = Op.getOperand(2); + SDValue CCVal; + SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); + return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, + Cmp); + } - // AArch64 BooleanContents is the default UndefinedBooleanContent, which means - // that as the consumer we are responsible for ignoring rubbish in higher - // bits. - TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit, - DAG.getConstant(1, MVT::i32)); - SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit, - DAG.getConstant(0, TheBit.getValueType()), - DAG.getCondCode(ISD::SETNE)); + assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); + + // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally + // clean. Some of them require two branches to implement. + SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); + AArch64CC::CondCode CC1, CC2; + changeFPCCToAArch64CC(CC, CC1, CC2); + SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); + SDValue BR1 = + DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp); + if (CC2 != AArch64CC::AL) { + SDValue CC2Val = DAG.getConstant(CC2, MVT::i32); + return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val, + Cmp); + } - return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), - A64CMP, IfTrue, IfFalse, - DAG.getConstant(A64CC::NE, MVT::i32)); + return BR1; } -static SDValue LowerVectorSETCC(SDValue Op, SelectionDAG &DAG) { - SDLoc DL(Op); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - ISD::CondCode CC = cast(Op.getOperand(2))->get(); +SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op, + SelectionDAG &DAG) const { EVT VT = Op.getValueType(); - bool Invert = false; - SDValue Op0, Op1; - unsigned Opcode; + SDLoc DL(Op); - if (LHS.getValueType().isInteger()) { + SDValue In1 = Op.getOperand(0); + SDValue In2 = Op.getOperand(1); + EVT SrcVT = In2.getValueType(); + if (SrcVT != VT) { + if (SrcVT == MVT::f32 && VT == MVT::f64) + In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2); + else if (SrcVT == MVT::f64 && VT == MVT::f32) + In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0)); + else + // FIXME: Src type is different, bail out for now. Can VT really be a + // vector type? + return SDValue(); + } - // Attempt to use Vector Integer Compare Mask Test instruction. - // TST = icmp ne (and (op0, op1), zero). - if (CC == ISD::SETNE) { - if (((LHS.getOpcode() == ISD::AND) && - ISD::isBuildVectorAllZeros(RHS.getNode())) || - ((RHS.getOpcode() == ISD::AND) && - ISD::isBuildVectorAllZeros(LHS.getNode()))) { - - SDValue AndOp = (LHS.getOpcode() == ISD::AND) ? LHS : RHS; - SDValue NewLHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(0)); - SDValue NewRHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(1)); - return DAG.getNode(AArch64ISD::NEON_TST, DL, VT, NewLHS, NewRHS); - } + EVT VecVT; + EVT EltVT; + SDValue EltMask, VecVal1, VecVal2; + if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) { + EltVT = MVT::i32; + VecVT = MVT::v4i32; + EltMask = DAG.getConstant(0x80000000ULL, EltVT); + + if (!VT.isVector()) { + VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT, + DAG.getUNDEF(VecVT), In1); + VecVal2 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT, + DAG.getUNDEF(VecVT), In2); + } else { + VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1); + VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2); } - - // Attempt to use Vector Integer Compare Mask against Zero instr (Signed). - // Note: Compare against Zero does not support unsigned predicates. - if ((ISD::isBuildVectorAllZeros(RHS.getNode()) || - ISD::isBuildVectorAllZeros(LHS.getNode())) && - !isUnsignedIntSetCC(CC)) { - - // If LHS is the zero value, swap operands and CondCode. - if (ISD::isBuildVectorAllZeros(LHS.getNode())) { - CC = getSetCCSwappedOperands(CC); - Op0 = RHS; - } else - Op0 = LHS; - - // Ensure valid CondCode for Compare Mask against Zero instruction: - // EQ, GE, GT, LE, LT. - if (ISD::SETNE == CC) { - Invert = true; - CC = ISD::SETEQ; - } - - // Using constant type to differentiate integer and FP compares with zero. - Op1 = DAG.getConstant(0, MVT::i32); - Opcode = AArch64ISD::NEON_CMPZ; - + } else if (VT == MVT::f64 || VT == MVT::v2f64) { + EltVT = MVT::i64; + VecVT = MVT::v2i64; + + // We want to materialize a mask with the the high bit set, but the AdvSIMD + // immediate moves cannot materialize that in a single instruction for + // 64-bit elements. Instead, materialize zero and then negate it. + EltMask = DAG.getConstant(0, EltVT); + + if (!VT.isVector()) { + VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT, + DAG.getUNDEF(VecVT), In1); + VecVal2 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT, + DAG.getUNDEF(VecVT), In2); } else { - // Attempt to use Vector Integer Compare Mask instr (Signed/Unsigned). - // Ensure valid CondCode for Compare Mask instr: EQ, GE, GT, UGE, UGT. - bool Swap = false; - switch (CC) { - default: - llvm_unreachable("Illegal integer comparison."); - case ISD::SETEQ: - case ISD::SETGT: - case ISD::SETGE: - case ISD::SETUGT: - case ISD::SETUGE: - break; - case ISD::SETNE: - Invert = true; - CC = ISD::SETEQ; - break; - case ISD::SETULT: - case ISD::SETULE: - case ISD::SETLT: - case ISD::SETLE: - Swap = true; - CC = getSetCCSwappedOperands(CC); - } - - if (Swap) - std::swap(LHS, RHS); - - Opcode = AArch64ISD::NEON_CMP; - Op0 = LHS; - Op1 = RHS; + VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1); + VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2); } + } else { + llvm_unreachable("Invalid type for copysign!"); + } - // Generate Compare Mask instr or Compare Mask against Zero instr. - SDValue NeonCmp = - DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC)); + std::vector BuildVectorOps; + for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) + BuildVectorOps.push_back(EltMask); - if (Invert) - NeonCmp = DAG.getNOT(DL, NeonCmp, VT); + SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, BuildVectorOps); - return NeonCmp; + // If we couldn't materialize the mask above, then the mask vector will be + // the zero vector, and we need to negate it here. + if (VT == MVT::f64 || VT == MVT::v2f64) { + BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec); + BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec); + BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec); } - // Now handle Floating Point cases. - // Attempt to use Vector Floating Point Compare Mask against Zero instruction. - if (ISD::isBuildVectorAllZeros(RHS.getNode()) || - ISD::isBuildVectorAllZeros(LHS.getNode())) { + SDValue Sel = + DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec); - // If LHS is the zero value, swap operands and CondCode. - if (ISD::isBuildVectorAllZeros(LHS.getNode())) { - CC = getSetCCSwappedOperands(CC); - Op0 = RHS; - } else - Op0 = LHS; - - // Using constant type to differentiate integer and FP compares with zero. - Op1 = DAG.getConstantFP(0, MVT::f32); - Opcode = AArch64ISD::NEON_CMPZ; - } else { - // Attempt to use Vector Floating Point Compare Mask instruction. - Op0 = LHS; - Op1 = RHS; - Opcode = AArch64ISD::NEON_CMP; - } + if (VT == MVT::f32) + return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel); + else if (VT == MVT::f64) + return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel); + else + return DAG.getNode(ISD::BITCAST, DL, VT, Sel); +} - SDValue NeonCmpAlt; - // Some register compares have to be implemented with swapped CC and operands, - // e.g.: OLT implemented as OGT with swapped operands. - bool SwapIfRegArgs = false; +SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { + if (DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::NoImplicitFloat)) + return SDValue(); - // Ensure valid CondCode for FP Compare Mask against Zero instruction: - // EQ, GE, GT, LE, LT. - // And ensure valid CondCode for FP Compare Mask instruction: EQ, GE, GT. - switch (CC) { - default: - llvm_unreachable("Illegal FP comparison"); - case ISD::SETUNE: - case ISD::SETNE: - Invert = true; // Fallthrough - case ISD::SETOEQ: - case ISD::SETEQ: - CC = ISD::SETEQ; - break; - case ISD::SETOLT: - case ISD::SETLT: - CC = ISD::SETLT; - SwapIfRegArgs = true; - break; - case ISD::SETOGT: - case ISD::SETGT: - CC = ISD::SETGT; - break; - case ISD::SETOLE: - case ISD::SETLE: - CC = ISD::SETLE; - SwapIfRegArgs = true; - break; - case ISD::SETOGE: - case ISD::SETGE: - CC = ISD::SETGE; - break; - case ISD::SETUGE: - Invert = true; - CC = ISD::SETLT; - SwapIfRegArgs = true; - break; - case ISD::SETULE: - Invert = true; - CC = ISD::SETGT; - break; - case ISD::SETUGT: - Invert = true; - CC = ISD::SETLE; - SwapIfRegArgs = true; - break; - case ISD::SETULT: - Invert = true; - CC = ISD::SETGE; - break; - case ISD::SETUEQ: - Invert = true; // Fallthrough - case ISD::SETONE: - // Expand this to (OGT |OLT). - NeonCmpAlt = - DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGT)); - CC = ISD::SETLT; - SwapIfRegArgs = true; - break; - case ISD::SETUO: - Invert = true; // Fallthrough - case ISD::SETO: - // Expand this to (OGE | OLT). - NeonCmpAlt = - DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGE)); - CC = ISD::SETLT; - SwapIfRegArgs = true; - break; - } + // While there is no integer popcount instruction, it can + // be more efficiently lowered to the following sequence that uses + // AdvSIMD registers/instructions as long as the copies to/from + // the AdvSIMD registers are cheap. + // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd + // CNT V0.8B, V0.8B // 8xbyte pop-counts + // ADDV B0, V0.8B // sum 8xbyte pop-counts + // UMOV X0, V0.B[0] // copy byte result back to integer reg + SDValue Val = Op.getOperand(0); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue ZeroVec = DAG.getUNDEF(MVT::v8i8); - if (Opcode == AArch64ISD::NEON_CMP && SwapIfRegArgs) { - CC = getSetCCSwappedOperands(CC); - std::swap(Op0, Op1); + SDValue VecVal; + if (VT == MVT::i32) { + VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); + VecVal = DAG.getTargetInsertSubreg(AArch64::ssub, DL, MVT::v8i8, ZeroVec, + VecVal); + } else { + VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val); } - // Generate FP Compare Mask instr or FP Compare Mask against Zero instr - SDValue NeonCmp = DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC)); + SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, VecVal); + SDValue UaddLV = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, + DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, MVT::i32), CtPop); - if (NeonCmpAlt.getNode()) - NeonCmp = DAG.getNode(ISD::OR, DL, VT, NeonCmp, NeonCmpAlt); + if (VT == MVT::i64) + UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV); + return UaddLV; +} - if (Invert) - NeonCmp = DAG.getNOT(DL, NeonCmp, VT); +SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - return NeonCmp; -} + if (Op.getValueType().isVector()) + return LowerVSETCC(Op, DAG); -// (SETCC lhs, rhs, condcode) -SDValue -AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(2))->get(); - EVT VT = Op.getValueType(); + SDLoc dl(Op); - if (VT.isVector()) - return LowerVectorSETCC(Op, DAG); + // We chose ZeroOrOneBooleanContents, so use zero and one. + EVT VT = Op.getValueType(); + SDValue TVal = DAG.getConstant(1, VT); + SDValue FVal = DAG.getConstant(0, VT); + // Handle f128 first, since one possible outcome is a normal integer + // comparison which gets picked up by the next if statement. if (LHS.getValueType() == MVT::f128) { - // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS - // for the rest of the function (some i32 or i64 values). softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); // If softenSetCCOperands returned a scalar, use it. - if (RHS.getNode() == 0) { + if (!RHS.getNode()) { assert(LHS.getValueType() == Op.getValueType() && "Unexpected setcc expansion!"); return LHS; @@ -3021,205 +3106,403 @@ AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { } if (LHS.getValueType().isInteger()) { - SDValue A64cc; + SDValue CCVal; + SDValue Cmp = + getAArch64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl); + + // Note that we inverted the condition above, so we reverse the order of + // the true and false operands here. This will allow the setcc to be + // matched to a single CSINC instruction. + return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp); + } + + // Now we know we're dealing with FP values. + assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); - // Integers are handled in a separate function because the combinations of - // immediates and tests can get hairy and we may want to fiddle things. - SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); + // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead + // and do the comparison. + SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); - return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, - CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT), - A64cc); + AArch64CC::CondCode CC1, CC2; + changeFPCCToAArch64CC(CC, CC1, CC2); + if (CC2 == AArch64CC::AL) { + changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2); + SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); + + // Note that we inverted the condition above, so we reverse the order of + // the true and false operands here. This will allow the setcc to be + // matched to a single CSINC instruction. + return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp); + } else { + // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't + // totally clean. Some of them require two CSELs to implement. As is in + // this case, we emit the first CSEL and then emit a second using the output + // of the first as the RHS. We're effectively OR'ing the two CC's together. + + // FIXME: It would be nice if we could match the two CSELs to two CSINCs. + SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); + SDValue CS1 = + DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); + + SDValue CC2Val = DAG.getConstant(CC2, MVT::i32); + return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); } +} - // Note that some LLVM floating-point CondCodes can't be lowered to a single - // conditional branch, hence FPCCToA64CC can set a second test, where either - // passing is sufficient. - A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; - CondCode = FPCCToA64CC(CC, Alternative); - SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); - SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, - DAG.getCondCode(CC)); - SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, - CmpOp, DAG.getConstant(1, VT), - DAG.getConstant(0, VT), A64cc); +/// A SELECT_CC operation is really some kind of max or min if both values being +/// compared are, in some sense, equal to the results in either case. However, +/// it is permissible to compare f32 values and produce directly extended f64 +/// values. +/// +/// Extending the comparison operands would also be allowed, but is less likely +/// to happen in practice since their use is right here. Note that truncate +/// operations would *not* be semantically equivalent. +static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) { + if (Cmp == Result) + return true; - if (Alternative != A64CC::Invalid) { - A64cc = DAG.getConstant(Alternative, MVT::i32); - A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, - DAG.getConstant(1, VT), A64SELECT_CC, A64cc); + ConstantFPSDNode *CCmp = dyn_cast(Cmp); + ConstantFPSDNode *CResult = dyn_cast(Result); + if (CCmp && CResult && Cmp.getValueType() == MVT::f32 && + Result.getValueType() == MVT::f64) { + bool Lossy; + APFloat CmpVal = CCmp->getValueAPF(); + CmpVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &Lossy); + return CResult->getValueAPF().bitwiseIsEqual(CmpVal); } - return A64SELECT_CC; + return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp; } -static SDValue LowerVectorSELECT_CC(SDValue Op, SelectionDAG &DAG) { - SDLoc dl(Op); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue IfTrue = Op.getOperand(2); - SDValue IfFalse = Op.getOperand(3); - EVT IfTrueVT = IfTrue.getValueType(); - EVT CondVT = IfTrueVT.changeVectorElementTypeToInteger(); - ISD::CondCode CC = cast(Op.getOperand(4))->get(); +SDValue AArch64TargetLowering::LowerSELECT(SDValue Op, + SelectionDAG &DAG) const { + SDValue CC = Op->getOperand(0); + SDValue TVal = Op->getOperand(1); + SDValue FVal = Op->getOperand(2); + SDLoc DL(Op); - // If LHS & RHS are floating point and IfTrue & IfFalse are vectors, we will - // use NEON compare. - if ((LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)) { - EVT EltVT = LHS.getValueType(); - unsigned EltNum = 128 / EltVT.getSizeInBits(); - EVT VT = EVT::getVectorVT(*DAG.getContext(), EltVT, EltNum); - unsigned SubConstant = - (LHS.getValueType() == MVT::f32) ? AArch64::sub_32 :AArch64::sub_64; - EVT CEltT = (LHS.getValueType() == MVT::f32) ? MVT::i32 : MVT::i64; - EVT CVT = EVT::getVectorVT(*DAG.getContext(), CEltT, EltNum); - - LHS - = SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, - VT, DAG.getTargetConstant(0, MVT::i32), LHS, - DAG.getTargetConstant(SubConstant, MVT::i32)), 0); - RHS - = SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl, - VT, DAG.getTargetConstant(0, MVT::i32), RHS, - DAG.getTargetConstant(SubConstant, MVT::i32)), 0); - - SDValue VSetCC = DAG.getSetCC(dl, CVT, LHS, RHS, CC); - SDValue ResCC = LowerVectorSETCC(VSetCC, DAG); - if (CEltT.getSizeInBits() < IfTrueVT.getSizeInBits()) { - EVT DUPVT = - EVT::getVectorVT(*DAG.getContext(), CEltT, - IfTrueVT.getSizeInBits() / CEltT.getSizeInBits()); - ResCC = DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, DUPVT, ResCC, - DAG.getConstant(0, MVT::i64, false)); - - ResCC = DAG.getNode(ISD::BITCAST, dl, CondVT, ResCC); - } else { - // FIXME: If IfTrue & IfFalse hold v1i8, v1i16 or v1i32, this function - // can't handle them and will hit this assert. - assert(CEltT.getSizeInBits() == IfTrueVT.getSizeInBits() && - "Vector of IfTrue & IfFalse is too small."); - - unsigned ExEltNum = - EltNum * IfTrueVT.getSizeInBits() / ResCC.getValueSizeInBits(); - EVT ExVT = EVT::getVectorVT(*DAG.getContext(), CEltT, ExEltNum); - ResCC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExVT, ResCC, - DAG.getConstant(0, MVT::i64, false)); - ResCC = DAG.getNode(ISD::BITCAST, dl, CondVT, ResCC); - } - SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(), - ResCC, IfTrue, IfFalse); - return VSelect; - } - - // Here we handle the case that LHS & RHS are integer and IfTrue & IfFalse are - // vectors. - A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; - CondCode = FPCCToA64CC(CC, Alternative); - SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); - SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, - DAG.getCondCode(CC)); - EVT SEVT = MVT::i32; - if (IfTrue.getValueType().getVectorElementType().getSizeInBits() > 32) - SEVT = MVT::i64; - SDValue AllOne = DAG.getConstant(-1, SEVT); - SDValue AllZero = DAG.getConstant(0, SEVT); - SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, SEVT, SetCC, - AllOne, AllZero, A64cc); - - if (Alternative != A64CC::Invalid) { - A64cc = DAG.getConstant(Alternative, MVT::i32); - A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), - SetCC, AllOne, A64SELECT_CC, A64cc); - } - SDValue VDup; - if (IfTrue.getValueType().getVectorNumElements() == 1) - VDup = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, CondVT, A64SELECT_CC); - else - VDup = DAG.getNode(AArch64ISD::NEON_VDUP, dl, CondVT, A64SELECT_CC); - SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(), - VDup, IfTrue, IfFalse); - return VSelect; -} + unsigned Opc = CC.getOpcode(); + // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select + // instruction. + if (CC.getResNo() == 1 && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { + // Only lower legal XALUO ops. + if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0))) + return SDValue(); -// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode) -SDValue -AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { - SDLoc dl(Op); + AArch64CC::CondCode OFCC; + SDValue Value, Overflow; + std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CC.getValue(0), DAG); + SDValue CCVal = DAG.getConstant(OFCC, MVT::i32); + + return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, + CCVal, Overflow); + } + + if (CC.getOpcode() == ISD::SETCC) + return DAG.getSelectCC(DL, CC.getOperand(0), CC.getOperand(1), TVal, FVal, + cast(CC.getOperand(2))->get()); + else + return DAG.getSelectCC(DL, CC, DAG.getConstant(0, CC.getValueType()), TVal, + FVal, ISD::SETNE); +} + +SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, + SelectionDAG &DAG) const { + ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); - SDValue IfTrue = Op.getOperand(2); - SDValue IfFalse = Op.getOperand(3); - ISD::CondCode CC = cast(Op.getOperand(4))->get(); - - if (IfTrue.getValueType().isVector()) - return LowerVectorSELECT_CC(Op, DAG); + SDValue TVal = Op.getOperand(2); + SDValue FVal = Op.getOperand(3); + SDLoc dl(Op); + // Handle f128 first, because it will result in a comparison of some RTLIB + // call result against zero. if (LHS.getValueType() == MVT::f128) { - // f128 comparisons are lowered to libcalls, but slot in nicely here - // afterwards. softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. - if (RHS.getNode() == 0) { + if (!RHS.getNode()) { RHS = DAG.getConstant(0, LHS.getValueType()); CC = ISD::SETNE; } } + // Handle integers first. if (LHS.getValueType().isInteger()) { - SDValue A64cc; + assert((LHS.getValueType() == RHS.getValueType()) && + (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)); + + unsigned Opcode = AArch64ISD::CSEL; + + // If both the TVal and the FVal are constants, see if we can swap them in + // order to for a CSINV or CSINC out of them. + ConstantSDNode *CFVal = dyn_cast(FVal); + ConstantSDNode *CTVal = dyn_cast(TVal); + + if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) { + std::swap(TVal, FVal); + std::swap(CTVal, CFVal); + CC = ISD::getSetCCInverse(CC, true); + } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) { + std::swap(TVal, FVal); + std::swap(CTVal, CFVal); + CC = ISD::getSetCCInverse(CC, true); + } else if (TVal.getOpcode() == ISD::XOR) { + // If TVal is a NOT we want to swap TVal and FVal so that we can match + // with a CSINV rather than a CSEL. + ConstantSDNode *CVal = dyn_cast(TVal.getOperand(1)); + + if (CVal && CVal->isAllOnesValue()) { + std::swap(TVal, FVal); + std::swap(CTVal, CFVal); + CC = ISD::getSetCCInverse(CC, true); + } + } else if (TVal.getOpcode() == ISD::SUB) { + // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so + // that we can match with a CSNEG rather than a CSEL. + ConstantSDNode *CVal = dyn_cast(TVal.getOperand(0)); + + if (CVal && CVal->isNullValue()) { + std::swap(TVal, FVal); + std::swap(CTVal, CFVal); + CC = ISD::getSetCCInverse(CC, true); + } + } else if (CTVal && CFVal) { + const int64_t TrueVal = CTVal->getSExtValue(); + const int64_t FalseVal = CFVal->getSExtValue(); + bool Swap = false; + + // If both TVal and FVal are constants, see if FVal is the + // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC + // instead of a CSEL in that case. + if (TrueVal == ~FalseVal) { + Opcode = AArch64ISD::CSINV; + } else if (TrueVal == -FalseVal) { + Opcode = AArch64ISD::CSNEG; + } else if (TVal.getValueType() == MVT::i32) { + // If our operands are only 32-bit wide, make sure we use 32-bit + // arithmetic for the check whether we can use CSINC. This ensures that + // the addition in the check will wrap around properly in case there is + // an overflow (which would not be the case if we do the check with + // 64-bit arithmetic). + const uint32_t TrueVal32 = CTVal->getZExtValue(); + const uint32_t FalseVal32 = CFVal->getZExtValue(); + + if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) { + Opcode = AArch64ISD::CSINC; + + if (TrueVal32 > FalseVal32) { + Swap = true; + } + } + // 64-bit check whether we can use CSINC. + } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) { + Opcode = AArch64ISD::CSINC; + + if (TrueVal > FalseVal) { + Swap = true; + } + } + + // Swap TVal and FVal if necessary. + if (Swap) { + std::swap(TVal, FVal); + std::swap(CTVal, CFVal); + CC = ISD::getSetCCInverse(CC, true); + } + + if (Opcode != AArch64ISD::CSEL) { + // Drop FVal since we can get its value by simply inverting/negating + // TVal. + FVal = TVal; + } + } + + SDValue CCVal; + SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); + + EVT VT = Op.getValueType(); + return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp); + } + + // Now we know we're dealing with FP values. + assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); + assert(LHS.getValueType() == RHS.getValueType()); + EVT VT = Op.getValueType(); + + // Try to match this select into a max/min operation, which have dedicated + // opcode in the instruction set. + // FIXME: This is not correct in the presence of NaNs, so we only enable this + // in no-NaNs mode. + if (getTargetMachine().Options.NoNaNsFPMath) { + SDValue MinMaxLHS = TVal, MinMaxRHS = FVal; + if (selectCCOpsAreFMaxCompatible(LHS, MinMaxRHS) && + selectCCOpsAreFMaxCompatible(RHS, MinMaxLHS)) { + CC = ISD::getSetCCSwappedOperands(CC); + std::swap(MinMaxLHS, MinMaxRHS); + } - // Integers are handled in a separate function because the combinations of - // immediates and tests can get hairy and we may want to fiddle things. - SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl); + if (selectCCOpsAreFMaxCompatible(LHS, MinMaxLHS) && + selectCCOpsAreFMaxCompatible(RHS, MinMaxRHS)) { + switch (CC) { + default: + break; + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETOGT: + case ISD::SETOGE: + return DAG.getNode(AArch64ISD::FMAX, dl, VT, MinMaxLHS, MinMaxRHS); + break; + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETOLT: + case ISD::SETOLE: + return DAG.getNode(AArch64ISD::FMIN, dl, VT, MinMaxLHS, MinMaxRHS); + break; + } + } + } - return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), CmpOp, - IfTrue, IfFalse, A64cc); + // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead + // and do the comparison. + SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG); + + // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally + // clean. Some of them require two CSELs to implement. + AArch64CC::CondCode CC1, CC2; + changeFPCCToAArch64CC(CC, CC1, CC2); + SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); + SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); + + // If we need a second CSEL, emit it, using the output of the first as the + // RHS. We're effectively OR'ing the two CC's together. + if (CC2 != AArch64CC::AL) { + SDValue CC2Val = DAG.getConstant(CC2, MVT::i32); + return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); } - // Note that some LLVM floating-point CondCodes can't be lowered to a single - // conditional branch, hence FPCCToA64CC can set a second test, where either - // passing is sufficient. - A64CC::CondCodes CondCode, Alternative = A64CC::Invalid; - CondCode = FPCCToA64CC(CC, Alternative); - SDValue A64cc = DAG.getConstant(CondCode, MVT::i32); - SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS, - DAG.getCondCode(CC)); - SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, - Op.getValueType(), - SetCC, IfTrue, IfFalse, A64cc); + // Otherwise, return the output of the first CSEL. + return CS1; +} - if (Alternative != A64CC::Invalid) { - A64cc = DAG.getConstant(Alternative, MVT::i32); - A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), - SetCC, IfTrue, A64SELECT_CC, A64cc); +SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, + SelectionDAG &DAG) const { + // Jump table entries as PC relative offsets. No additional tweaking + // is necessary here. Just get the address of the jump table. + JumpTableSDNode *JT = cast(Op); + EVT PtrVT = getPointerTy(); + SDLoc DL(Op); + if (getTargetMachine().getCodeModel() == CodeModel::Large && + !Subtarget->isTargetMachO()) { + const unsigned char MO_NC = AArch64II::MO_NC; + return DAG.getNode( + AArch64ISD::WrapperLarge, DL, PtrVT, + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G3), + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G2 | MO_NC), + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G1 | MO_NC), + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, + AArch64II::MO_G0 | MO_NC)); } - return A64SELECT_CC; + SDValue Hi = + DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_PAGE); + SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, + AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); + return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); } -SDValue -AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { - const Value *DestSV = cast(Op.getOperand(3))->getValue(); - const Value *SrcSV = cast(Op.getOperand(4))->getValue(); +SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { + ConstantPoolSDNode *CP = cast(Op); + EVT PtrVT = getPointerTy(); + SDLoc DL(Op); + + if (getTargetMachine().getCodeModel() == CodeModel::Large) { + // Use the GOT for the large code model on iOS. + if (Subtarget->isTargetMachO()) { + SDValue GotAddr = DAG.getTargetConstantPool( + CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), + AArch64II::MO_GOT); + return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr); + } - // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes - // rather than just 8. - return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), - Op.getOperand(1), Op.getOperand(2), - DAG.getConstant(32, MVT::i32), 8, false, false, - MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); + const unsigned char MO_NC = AArch64II::MO_NC; + return DAG.getNode( + AArch64ISD::WrapperLarge, DL, PtrVT, + DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), + CP->getOffset(), AArch64II::MO_G3), + DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), + CP->getOffset(), AArch64II::MO_G2 | MO_NC), + DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), + CP->getOffset(), AArch64II::MO_G1 | MO_NC), + DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), + CP->getOffset(), AArch64II::MO_G0 | MO_NC)); + } else { + // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on + // ELF, the only valid one on Darwin. + SDValue Hi = + DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), + CP->getOffset(), AArch64II::MO_PAGE); + SDValue Lo = DAG.getTargetConstantPool( + CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), + AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + + SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); + return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + } } -SDValue -AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { +SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { + const BlockAddress *BA = cast(Op)->getBlockAddress(); + EVT PtrVT = getPointerTy(); + SDLoc DL(Op); + if (getTargetMachine().getCodeModel() == CodeModel::Large && + !Subtarget->isTargetMachO()) { + const unsigned char MO_NC = AArch64II::MO_NC; + return DAG.getNode( + AArch64ISD::WrapperLarge, DL, PtrVT, + DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G3), + DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G2 | MO_NC), + DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G1 | MO_NC), + DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G0 | MO_NC)); + } else { + SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGE); + SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGEOFF | + AArch64II::MO_NC); + SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); + return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + } +} + +SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op, + SelectionDAG &DAG) const { + AArch64FunctionInfo *FuncInfo = + DAG.getMachineFunction().getInfo(); + + SDLoc DL(Op); + SDValue FR = + DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy()); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), + MachinePointerInfo(SV), false, false, 0); +} + +SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, + SelectionDAG &DAG) const { // The layout of the va_list struct is specified in the AArch64 Procedure Call // Standard, section B.3. MachineFunction &MF = DAG.getMachineFunction(); - AArch64MachineFunctionInfo *FuncInfo - = MF.getInfo(); + AArch64FunctionInfo *FuncInfo = MF.getInfo(); SDLoc DL(Op); SDValue Chain = Op.getOperand(0); @@ -3228,1471 +3511,1911 @@ AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { SmallVector MemOps; // void *__stack at offset 0 - SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(), - getPointerTy()); + SDValue Stack = + DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy()); MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList, - MachinePointerInfo(SV), false, false, 0)); + MachinePointerInfo(SV), false, false, 8)); // void *__gr_top at offset 8 - int GPRSize = FuncInfo->getVariadicGPRSize(); + int GPRSize = FuncInfo->getVarArgsGPRSize(); if (GPRSize > 0) { SDValue GRTop, GRTopAddr; GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, DAG.getConstant(8, getPointerTy())); - GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy()); + GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), getPointerTy()); GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop, DAG.getConstant(GPRSize, getPointerTy())); MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr, - MachinePointerInfo(SV, 8), - false, false, 0)); + MachinePointerInfo(SV, 8), false, false, 8)); } // void *__vr_top at offset 16 - int FPRSize = FuncInfo->getVariadicFPRSize(); + int FPRSize = FuncInfo->getVarArgsFPRSize(); if (FPRSize > 0) { SDValue VRTop, VRTopAddr; VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, DAG.getConstant(16, getPointerTy())); - VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy()); + VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), getPointerTy()); VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop, DAG.getConstant(FPRSize, getPointerTy())); MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr, - MachinePointerInfo(SV, 16), - false, false, 0)); + MachinePointerInfo(SV, 16), false, false, 8)); } // int __gr_offs at offset 24 SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, DAG.getConstant(24, getPointerTy())); MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32), - GROffsAddr, MachinePointerInfo(SV, 24), - false, false, 0)); + GROffsAddr, MachinePointerInfo(SV, 24), false, + false, 4)); // int __vr_offs at offset 28 SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, DAG.getConstant(28, getPointerTy())); MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32), - VROffsAddr, MachinePointerInfo(SV, 28), - false, false, 0)); + VROffsAddr, MachinePointerInfo(SV, 28), false, + false, 4)); - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], - MemOps.size()); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); } -SDValue -AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { - switch (Op.getOpcode()) { - default: llvm_unreachable("Don't know how to custom lower this!"); - case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128); - case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128); - case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128); - case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128); - case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true); - case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false); - case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true); - case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false); - case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); - case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); - case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); - case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); - - case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); - case ISD::SRL_PARTS: - case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); - - case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); - case ISD::BRCOND: return LowerBRCOND(Op, DAG); - case ISD::BR_CC: return LowerBR_CC(Op, DAG); - case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG); - case ISD::ConstantPool: return LowerConstantPool(Op, DAG); - case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); - case ISD::JumpTable: return LowerJumpTable(Op, DAG); - case ISD::SELECT: return LowerSELECT(Op, DAG); - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::SETCC: return LowerSETCC(Op, DAG); - case ISD::VACOPY: return LowerVACOPY(Op, DAG); - case ISD::VASTART: return LowerVASTART(Op, DAG); - case ISD::BUILD_VECTOR: - return LowerBUILD_VECTOR(Op, DAG, getSubtarget()); - case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); - case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); - } - - return SDValue(); +SDValue AArch64TargetLowering::LowerVASTART(SDValue Op, + SelectionDAG &DAG) const { + return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG) + : LowerAAPCS_VASTART(Op, DAG); } -/// Check if the specified splat value corresponds to a valid vector constant -/// for a Neon instruction with a "modified immediate" operand (e.g., MOVI). If -/// so, return the encoded 8-bit immediate and the OpCmode instruction fields -/// values. -static bool isNeonModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, - unsigned SplatBitSize, SelectionDAG &DAG, - bool is128Bits, NeonModImmType type, EVT &VT, - unsigned &Imm, unsigned &OpCmode) { - switch (SplatBitSize) { - default: - llvm_unreachable("unexpected size for isNeonModifiedImm"); - case 8: { - if (type != Neon_Mov_Imm) - return false; - assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); - // Neon movi per byte: Op=0, Cmode=1110. - OpCmode = 0xe; - Imm = SplatBits; - VT = is128Bits ? MVT::v16i8 : MVT::v8i8; - break; - } - case 16: { - // Neon move inst per halfword - VT = is128Bits ? MVT::v8i16 : MVT::v4i16; - if ((SplatBits & ~0xff) == 0) { - // Value = 0x00nn is 0x00nn LSL 0 - // movi: Op=0, Cmode=1000; mvni: Op=1, Cmode=1000 - // bic: Op=1, Cmode=1001; orr: Op=0, Cmode=1001 - // Op=x, Cmode=100y - Imm = SplatBits; - OpCmode = 0x8; - break; - } - if ((SplatBits & ~0xff00) == 0) { - // Value = 0xnn00 is 0x00nn LSL 8 - // movi: Op=0, Cmode=1010; mvni: Op=1, Cmode=1010 - // bic: Op=1, Cmode=1011; orr: Op=0, Cmode=1011 - // Op=x, Cmode=101x - Imm = SplatBits >> 8; - OpCmode = 0xa; - break; - } - // can't handle any other - return false; - } - - case 32: { - // First the LSL variants (MSL is unusable by some interested instructions). +SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, + SelectionDAG &DAG) const { + // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single + // pointer. + unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32; + const Value *DestSV = cast(Op.getOperand(3))->getValue(); + const Value *SrcSV = cast(Op.getOperand(4))->getValue(); - // Neon move instr per word, shift zeros - VT = is128Bits ? MVT::v4i32 : MVT::v2i32; - if ((SplatBits & ~0xff) == 0) { - // Value = 0x000000nn is 0x000000nn LSL 0 - // movi: Op=0, Cmode= 0000; mvni: Op=1, Cmode= 0000 - // bic: Op=1, Cmode= 0001; orr: Op=0, Cmode= 0001 - // Op=x, Cmode=000x - Imm = SplatBits; - OpCmode = 0; - break; - } - if ((SplatBits & ~0xff00) == 0) { - // Value = 0x0000nn00 is 0x000000nn LSL 8 - // movi: Op=0, Cmode= 0010; mvni: Op=1, Cmode= 0010 - // bic: Op=1, Cmode= 0011; orr : Op=0, Cmode= 0011 - // Op=x, Cmode=001x - Imm = SplatBits >> 8; - OpCmode = 0x2; - break; - } - if ((SplatBits & ~0xff0000) == 0) { - // Value = 0x00nn0000 is 0x000000nn LSL 16 - // movi: Op=0, Cmode= 0100; mvni: Op=1, Cmode= 0100 - // bic: Op=1, Cmode= 0101; orr: Op=0, Cmode= 0101 - // Op=x, Cmode=010x - Imm = SplatBits >> 16; - OpCmode = 0x4; - break; - } - if ((SplatBits & ~0xff000000) == 0) { - // Value = 0xnn000000 is 0x000000nn LSL 24 - // movi: Op=0, Cmode= 0110; mvni: Op=1, Cmode= 0110 - // bic: Op=1, Cmode= 0111; orr: Op=0, Cmode= 0111 - // Op=x, Cmode=011x - Imm = SplatBits >> 24; - OpCmode = 0x6; - break; - } + return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1), + Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32), + 8, false, false, MachinePointerInfo(DestSV), + MachinePointerInfo(SrcSV)); +} - // Now the MSL immediates. +SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetDarwin() && + "automatic va_arg instruction only works on Darwin"); - // Neon move instr per word, shift ones - if ((SplatBits & ~0xffff) == 0 && - ((SplatBits | SplatUndef) & 0xff) == 0xff) { - // Value = 0x0000nnff is 0x000000nn MSL 8 - // movi: Op=0, Cmode= 1100; mvni: Op=1, Cmode= 1100 - // Op=x, Cmode=1100 - Imm = SplatBits >> 8; - OpCmode = 0xc; - break; - } - if ((SplatBits & ~0xffffff) == 0 && - ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { - // Value = 0x00nnffff is 0x000000nn MSL 16 - // movi: Op=1, Cmode= 1101; mvni: Op=1, Cmode= 1101 - // Op=x, Cmode=1101 - Imm = SplatBits >> 16; - OpCmode = 0xd; - break; - } - // can't handle any other - return false; + const Value *V = cast(Op.getOperand(2))->getValue(); + EVT VT = Op.getValueType(); + SDLoc DL(Op); + SDValue Chain = Op.getOperand(0); + SDValue Addr = Op.getOperand(1); + unsigned Align = Op.getConstantOperandVal(3); + + SDValue VAList = DAG.getLoad(getPointerTy(), DL, Chain, Addr, + MachinePointerInfo(V), false, false, false, 0); + Chain = VAList.getValue(1); + + if (Align > 8) { + assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2"); + VAList = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(Align - 1, getPointerTy())); + VAList = DAG.getNode(ISD::AND, DL, getPointerTy(), VAList, + DAG.getConstant(-(int64_t)Align, getPointerTy())); } - case 64: { - if (type != Neon_Mov_Imm) - return false; - // Neon move instr bytemask, where each byte is either 0x00 or 0xff. - // movi Op=1, Cmode=1110. - OpCmode = 0x1e; - uint64_t BitMask = 0xff; - uint64_t Val = 0; - unsigned ImmMask = 1; - Imm = 0; - for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { - if (((SplatBits | SplatUndef) & BitMask) == BitMask) { - Val |= BitMask; - Imm |= ImmMask; - } else if ((SplatBits & BitMask) != 0) { - return false; - } - BitMask <<= 8; - ImmMask <<= 1; - } - SplatBits = Val; - VT = is128Bits ? MVT::v2i64 : MVT::v1i64; - break; + Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); + uint64_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy); + + // Scalar integer and FP values smaller than 64 bits are implicitly extended + // up to 64 bits. At the very least, we have to increase the striding of the + // vaargs list to match this, and for FP values we need to introduce + // FP_ROUND nodes as well. + if (VT.isInteger() && !VT.isVector()) + ArgSize = 8; + bool NeedFPTrunc = false; + if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) { + ArgSize = 8; + NeedFPTrunc = true; } + + // Increment the pointer, VAList, to the next vaarg + SDValue VANext = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, + DAG.getConstant(ArgSize, getPointerTy())); + // Store the incremented VAList to the legalized pointer + SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V), + false, false, 0); + + // Load the actual argument out of the pointer VAList + if (NeedFPTrunc) { + // Load the value as an f64. + SDValue WideFP = DAG.getLoad(MVT::f64, DL, APStore, VAList, + MachinePointerInfo(), false, false, false, 0); + // Round the value down to an f32. + SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0), + DAG.getIntPtrConstant(1)); + SDValue Ops[] = { NarrowFP, WideFP.getValue(1) }; + // Merge the rounded value with the chain output of the load. + return DAG.getMergeValues(Ops, DL); } - return true; + return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo(), false, + false, false, 0); } -static SDValue PerformANDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); - - // We're looking for an SRA/SHL pair which form an SBFX. - - if (VT != MVT::i32 && VT != MVT::i64) - return SDValue(); - - if (!isa(N->getOperand(1))) - return SDValue(); - - uint64_t TruncMask = N->getConstantOperandVal(1); - if (!isMask_64(TruncMask)) - return SDValue(); - - uint64_t Width = CountPopulation_64(TruncMask); - SDValue Shift = N->getOperand(0); - - if (Shift.getOpcode() != ISD::SRL) - return SDValue(); +SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setFrameAddressIsTaken(true); - if (!isa(Shift->getOperand(1))) - return SDValue(); - uint64_t LSB = Shift->getConstantOperandVal(1); + EVT VT = Op.getValueType(); + SDLoc DL(Op); + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + SDValue FrameAddr = + DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT); + while (Depth--) + FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr, + MachinePointerInfo(), false, false, false, 0); + return FrameAddr; +} - if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits()) - return SDValue(); +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, + EVT VT) const { + unsigned Reg = StringSwitch(RegName) + .Case("sp", AArch64::SP) + .Default(0); + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); +} - return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0), - DAG.getConstant(LSB, MVT::i64), - DAG.getConstant(LSB + Width - 1, MVT::i64)); -} - -/// For a true bitfield insert, the bits getting into that contiguous mask -/// should come from the low part of an existing value: they must be formed from -/// a compatible SHL operation (unless they're already low). This function -/// checks that condition and returns the least-significant bit that's -/// intended. If the operation not a field preparation, -1 is returned. -static int32_t getLSBForBFI(SelectionDAG &DAG, SDLoc DL, EVT VT, - SDValue &MaskedVal, uint64_t Mask) { - if (!isShiftedMask_64(Mask)) - return -1; - - // Now we need to alter MaskedVal so that it is an appropriate input for a BFI - // instruction. BFI will do a left-shift by LSB before applying the mask we've - // spotted, so in general we should pre-emptively "undo" that by making sure - // the incoming bits have had a right-shift applied to them. - // - // This right shift, however, will combine with existing left/right shifts. In - // the simplest case of a completely straight bitfield operation, it will be - // expected to completely cancel out with an existing SHL. More complicated - // cases (e.g. bitfield to bitfield copy) may still need a real shift before - // the BFI. - - uint64_t LSB = countTrailingZeros(Mask); - int64_t ShiftRightRequired = LSB; - if (MaskedVal.getOpcode() == ISD::SHL && - isa(MaskedVal.getOperand(1))) { - ShiftRightRequired -= MaskedVal.getConstantOperandVal(1); - MaskedVal = MaskedVal.getOperand(0); - } else if (MaskedVal.getOpcode() == ISD::SRL && - isa(MaskedVal.getOperand(1))) { - ShiftRightRequired += MaskedVal.getConstantOperandVal(1); - MaskedVal = MaskedVal.getOperand(0); - } - - if (ShiftRightRequired > 0) - MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal, - DAG.getConstant(ShiftRightRequired, MVT::i64)); - else if (ShiftRightRequired < 0) { - // We could actually end up with a residual left shift, for example with - // "struc.bitfield = val << 1". - MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal, - DAG.getConstant(-ShiftRightRequired, MVT::i64)); - } - - return LSB; -} - -/// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by -/// a mask and an extension. Returns true if a BFI was found and provides -/// information on its surroundings. -static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask, - bool &Extended) { - Extended = false; - if (N.getOpcode() == ISD::ZERO_EXTEND) { - Extended = true; - N = N.getOperand(0); - } - - if (N.getOpcode() == ISD::AND && isa(N.getOperand(1))) { - Mask = N->getConstantOperandVal(1); - N = N.getOperand(0); - } else { - // Mask is the whole width. - Mask = -1ULL >> (64 - N.getValueType().getSizeInBits()); - } +SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setReturnAddressIsTaken(true); - if (N.getOpcode() == AArch64ISD::BFI) { - BFI = N; - return true; + EVT VT = Op.getValueType(); + SDLoc DL(Op); + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + if (Depth) { + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); + SDValue Offset = DAG.getConstant(8, getPointerTy()); + return DAG.getLoad(VT, DL, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), + MachinePointerInfo(), false, false, false, 0); } - return false; + // Return LR, which contains the return address. Mark it an implicit live-in. + unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass); + return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT); } -/// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which -/// is roughly equivalent to (and (BFI ...), mask). This form is used because it -/// can often be further combined with a larger mask. Ultimately, we want mask -/// to be 2^32-1 or 2^64-1 so the AND can be skipped. -static SDValue tryCombineToBFI(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const AArch64Subtarget *Subtarget) { - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); - - assert(N->getOpcode() == ISD::OR && "Unexpected root"); - - // We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or - // abandon the effort. - SDValue LHS = N->getOperand(0); - if (LHS.getOpcode() != ISD::AND) - return SDValue(); +/// LowerShiftRightParts - Lower SRA_PARTS, which returns two +/// i64 values and take a 2 x i64 value to shift plus a shift amount. +SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getNumOperands() == 3 && "Not a double-shift!"); + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getSizeInBits(); + SDLoc dl(Op); + SDValue ShOpLo = Op.getOperand(0); + SDValue ShOpHi = Op.getOperand(1); + SDValue ShAmt = Op.getOperand(2); + SDValue ARMcc; + unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; - uint64_t LHSMask; - if (isa(LHS.getOperand(1))) - LHSMask = LHS->getConstantOperandVal(1); - else - return SDValue(); + assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); - // We also need the RHS to be (and SOMETHING, MASK). Find out what that mask - // is or abandon the effort. - SDValue RHS = N->getOperand(1); - if (RHS.getOpcode() != ISD::AND) - return SDValue(); + SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, + DAG.getConstant(VTBits, MVT::i64), ShAmt); + SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); + SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, + DAG.getConstant(VTBits, MVT::i64)); + SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); - uint64_t RHSMask; - if (isa(RHS.getOperand(1))) - RHSMask = RHS->getConstantOperandVal(1); - else - return SDValue(); + SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64), + ISD::SETGE, dl, DAG); + SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32); + + SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); + SDValue Lo = + DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp); + + // AArch64 shifts larger than the register width are wrapped rather than + // clamped, so we can't just emit "hi >> x". + SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); + SDValue TrueValHi = Opc == ISD::SRA + ? DAG.getNode(Opc, dl, VT, ShOpHi, + DAG.getConstant(VTBits - 1, MVT::i64)) + : DAG.getConstant(0, VT); + SDValue Hi = + DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp); - // Can't do anything if the masks are incompatible. - if (LHSMask & RHSMask) - return SDValue(); + SDValue Ops[2] = { Lo, Hi }; + return DAG.getMergeValues(Ops, dl); +} - // Now we need one of the masks to be a contiguous field. Without loss of - // generality that should be the RHS one. - SDValue Bitfield = LHS.getOperand(0); - if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) { - // We know that LHS is a candidate new value, and RHS isn't already a better - // one. - std::swap(LHS, RHS); - std::swap(LHSMask, RHSMask); - } +/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two +/// i64 values and take a 2 x i64 value to shift plus a shift amount. +SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getNumOperands() == 3 && "Not a double-shift!"); + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getSizeInBits(); + SDLoc dl(Op); + SDValue ShOpLo = Op.getOperand(0); + SDValue ShOpHi = Op.getOperand(1); + SDValue ShAmt = Op.getOperand(2); + SDValue ARMcc; - // We've done our best to put the right operands in the right places, all we - // can do now is check whether a BFI exists. - Bitfield = RHS.getOperand(0); - int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask); - if (LSB == -1) - return SDValue(); + assert(Op.getOpcode() == ISD::SHL_PARTS); + SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, + DAG.getConstant(VTBits, MVT::i64), ShAmt); + SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); + SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, + DAG.getConstant(VTBits, MVT::i64)); + SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); + SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); - uint32_t Width = CountPopulation_64(RHSMask); - assert(Width && "Expected non-zero bitfield width"); + SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT, - LHS.getOperand(0), Bitfield, - DAG.getConstant(LSB, MVT::i64), - DAG.getConstant(Width, MVT::i64)); + SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64), + ISD::SETGE, dl, DAG); + SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32); + SDValue Hi = + DAG.getNode(AArch64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp); - // Mask is trivial - if ((LHSMask | RHSMask) == (-1ULL >> (64 - VT.getSizeInBits()))) - return BFI; + // AArch64 shifts of larger than register sizes are wrapped rather than + // clamped, so we can't just emit "lo << a" if a is too big. + SDValue TrueValLo = DAG.getConstant(0, VT); + SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); + SDValue Lo = + DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp); - return DAG.getNode(ISD::AND, DL, VT, BFI, - DAG.getConstant(LHSMask | RHSMask, VT)); + SDValue Ops[2] = { Lo, Hi }; + return DAG.getMergeValues(Ops, dl); } -/// Search for the bitwise combining (with careful masks) of a MaskedBFI and its -/// original input. This is surprisingly common because SROA splits things up -/// into i8 chunks, so the originally detected MaskedBFI may actually only act -/// on the low (say) byte of a word. This is then orred into the rest of the -/// word afterwards. -/// -/// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)). -/// -/// If MASK1 and MASK2 are compatible, we can fold the whole thing into the -/// MaskedBFI. We can also deal with a certain amount of extend/truncate being -/// involved. -static SDValue tryCombineToLargerBFI(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const AArch64Subtarget *Subtarget) { - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); +bool AArch64TargetLowering::isOffsetFoldingLegal( + const GlobalAddressSDNode *GA) const { + // The AArch64 target doesn't support folding offsets into global addresses. + return false; +} - // First job is to hunt for a MaskedBFI on either the left or right. Swap - // operands if it's actually on the right. - SDValue BFI; - SDValue PossExtraMask; - uint64_t ExistingMask = 0; - bool Extended = false; - if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended)) - PossExtraMask = N->getOperand(1); - else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended)) - PossExtraMask = N->getOperand(0); - else - return SDValue(); +bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { + // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases. + // FIXME: We should be able to handle f128 as well with a clever lowering. + if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32)) + return true; - // We can only combine a BFI with another compatible mask. - if (PossExtraMask.getOpcode() != ISD::AND || - !isa(PossExtraMask.getOperand(1))) - return SDValue(); + if (VT == MVT::f64) + return AArch64_AM::getFP64Imm(Imm) != -1; + else if (VT == MVT::f32) + return AArch64_AM::getFP32Imm(Imm) != -1; + return false; +} - uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1); +//===----------------------------------------------------------------------===// +// AArch64 Optimization Hooks +//===----------------------------------------------------------------------===// - // Masks must be compatible. - if (ExtraMask & ExistingMask) - return SDValue(); +//===----------------------------------------------------------------------===// +// AArch64 Inline Assembly Support +//===----------------------------------------------------------------------===// - SDValue OldBFIVal = BFI.getOperand(0); - SDValue NewBFIVal = BFI.getOperand(1); - if (Extended) { - // We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be - // 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments - // need to be made compatible. - assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32 - && "Invalid types for BFI"); - OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal); - NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal); +// Table of Constraints +// TODO: This is the current set of constraints supported by ARM for the +// compiler, not all of them may make sense, e.g. S may be difficult to support. +// +// r - A general register +// w - An FP/SIMD register of some size in the range v0-v31 +// x - An FP/SIMD register of some size in the range v0-v15 +// I - Constant that can be used with an ADD instruction +// J - Constant that can be used with a SUB instruction +// K - Constant that can be used with a 32-bit logical instruction +// L - Constant that can be used with a 64-bit logical instruction +// M - Constant that can be used as a 32-bit MOV immediate +// N - Constant that can be used as a 64-bit MOV immediate +// Q - A memory reference with base register and no offset +// S - A symbolic address +// Y - Floating point constant zero +// Z - Integer constant zero +// +// Note that general register operands will be output using their 64-bit x +// register name, whatever the size of the variable, unless the asm operand +// is prefixed by the %w modifier. Floating-point and SIMD register operands +// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or +// %q modifier. + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +AArch64TargetLowering::ConstraintType +AArch64TargetLowering::getConstraintType(const std::string &Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: + break; + case 'z': + return C_Other; + case 'x': + case 'w': + return C_RegisterClass; + // An address with a single base register. Due to the way we + // currently handle addresses it is the same as 'r'. + case 'Q': + return C_Memory; + } } + return TargetLowering::getConstraintType(Constraint); +} - // We need the MaskedBFI to be combined with a mask of the *same* value. - if (PossExtraMask.getOperand(0) != OldBFIVal) - return SDValue(); - - BFI = DAG.getNode(AArch64ISD::BFI, DL, VT, - OldBFIVal, NewBFIVal, - BFI.getOperand(2), BFI.getOperand(3)); - - // If the masking is trivial, we don't need to create it. - if ((ExtraMask | ExistingMask) == (-1ULL >> (64 - VT.getSizeInBits()))) - return BFI; - - return DAG.getNode(ISD::AND, DL, VT, BFI, - DAG.getConstant(ExtraMask | ExistingMask, VT)); +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight +AArch64TargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (!CallOperandVal) + return CW_Default; + Type *type = CallOperandVal->getType(); + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + case 'x': + case 'w': + if (type->isFloatingPointTy() || type->isVectorTy()) + weight = CW_Register; + break; + case 'z': + weight = CW_Constant; + break; + } + return weight; } -/// An EXTR instruction is made up of two shifts, ORed together. This helper -/// searches for and classifies those shifts. -static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, - bool &FromHi) { - if (N.getOpcode() == ISD::SHL) - FromHi = false; - else if (N.getOpcode() == ISD::SRL) - FromHi = true; - else - return false; +std::pair +AArch64TargetLowering::getRegForInlineAsmConstraint( + const std::string &Constraint, MVT VT) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 'r': + if (VT.getSizeInBits() == 64) + return std::make_pair(0U, &AArch64::GPR64commonRegClass); + return std::make_pair(0U, &AArch64::GPR32commonRegClass); + case 'w': + if (VT == MVT::f32) + return std::make_pair(0U, &AArch64::FPR32RegClass); + if (VT.getSizeInBits() == 64) + return std::make_pair(0U, &AArch64::FPR64RegClass); + if (VT.getSizeInBits() == 128) + return std::make_pair(0U, &AArch64::FPR128RegClass); + break; + // The instructions that this constraint is designed for can + // only take 128-bit registers so just use that regclass. + case 'x': + if (VT.getSizeInBits() == 128) + return std::make_pair(0U, &AArch64::FPR128_loRegClass); + break; + } + } + if (StringRef("{cc}").equals_lower(Constraint)) + return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass); - if (!isa(N.getOperand(1))) - return false; + // Use the default implementation in TargetLowering to convert the register + // constraint into a member of a register class. + std::pair Res; + Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + + // Not found as a standard register? + if (!Res.second) { + unsigned Size = Constraint.size(); + if ((Size == 4 || Size == 5) && Constraint[0] == '{' && + tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') { + const std::string Reg = + std::string(&Constraint[2], &Constraint[Size - 1]); + int RegNo = atoi(Reg.c_str()); + if (RegNo >= 0 && RegNo <= 31) { + // v0 - v31 are aliases of q0 - q31. + // By default we'll emit v0-v31 for this unless there's a modifier where + // we'll emit the correct register as well. + Res.first = AArch64::FPR128RegClass.getRegister(RegNo); + Res.second = &AArch64::FPR128RegClass; + } + } + } - ShiftAmount = N->getConstantOperandVal(1); - Src = N->getOperand(0); - return true; + return Res; } -/// EXTR instruction extracts a contiguous chunk of bits from two existing -/// registers viewed as a high/low pair. This function looks for the pattern: -/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an -/// EXTR. Can't quite be done in TableGen because the two immediates aren't -/// independent. -static SDValue tryCombineToEXTR(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); +/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops +/// vector. If it is invalid, don't add anything to Ops. +void AArch64TargetLowering::LowerAsmOperandForConstraint( + SDValue Op, std::string &Constraint, std::vector &Ops, + SelectionDAG &DAG) const { + SDValue Result; - assert(N->getOpcode() == ISD::OR && "Unexpected root"); + // Currently only support length 1 constraints. + if (Constraint.length() != 1) + return; - if (VT != MVT::i32 && VT != MVT::i64) - return SDValue(); + char ConstraintLetter = Constraint[0]; + switch (ConstraintLetter) { + default: + break; - SDValue LHS; - uint32_t ShiftLHS = 0; - bool LHSFromHi = 0; - if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi)) - return SDValue(); + // This set of constraints deal with valid constants for various instructions. + // Validate and return a target constant for them if we can. + case 'z': { + // 'z' maps to xzr or wzr so it needs an input of 0. + ConstantSDNode *C = dyn_cast(Op); + if (!C || C->getZExtValue() != 0) + return; - SDValue RHS; - uint32_t ShiftRHS = 0; - bool RHSFromHi = 0; - if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi)) - return SDValue(); + if (Op.getValueType() == MVT::i64) + Result = DAG.getRegister(AArch64::XZR, MVT::i64); + else + Result = DAG.getRegister(AArch64::WZR, MVT::i32); + break; + } - // If they're both trying to come from the high part of the register, they're - // not really an EXTR. - if (LHSFromHi == RHSFromHi) - return SDValue(); + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + ConstantSDNode *C = dyn_cast(Op); + if (!C) + return; - if (ShiftLHS + ShiftRHS != VT.getSizeInBits()) - return SDValue(); + // Grab the value and do some validation. + uint64_t CVal = C->getZExtValue(); + switch (ConstraintLetter) { + // The I constraint applies only to simple ADD or SUB immediate operands: + // i.e. 0 to 4095 with optional shift by 12 + // The J constraint applies only to ADD or SUB immediates that would be + // valid when negated, i.e. if [an add pattern] were to be output as a SUB + // instruction [or vice versa], in other words -1 to -4095 with optional + // left shift by 12. + case 'I': + if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal)) + break; + return; + case 'J': { + uint64_t NVal = -C->getSExtValue(); + if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) + break; + return; + } + // The K and L constraints apply *only* to logical immediates, including + // what used to be the MOVI alias for ORR (though the MOVI alias has now + // been removed and MOV should be used). So these constraints have to + // distinguish between bit patterns that are valid 32-bit or 64-bit + // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but + // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice + // versa. + case 'K': + if (AArch64_AM::isLogicalImmediate(CVal, 32)) + break; + return; + case 'L': + if (AArch64_AM::isLogicalImmediate(CVal, 64)) + break; + return; + // The M and N constraints are a superset of K and L respectively, for use + // with the MOV (immediate) alias. As well as the logical immediates they + // also match 32 or 64-bit immediates that can be loaded either using a + // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca + // (M) or 64-bit 0x1234000000000000 (N) etc. + // As a note some of this code is liberally stolen from the asm parser. + case 'M': { + if (!isUInt<32>(CVal)) + return; + if (AArch64_AM::isLogicalImmediate(CVal, 32)) + break; + if ((CVal & 0xFFFF) == CVal) + break; + if ((CVal & 0xFFFF0000ULL) == CVal) + break; + uint64_t NCVal = ~(uint32_t)CVal; + if ((NCVal & 0xFFFFULL) == NCVal) + break; + if ((NCVal & 0xFFFF0000ULL) == NCVal) + break; + return; + } + case 'N': { + if (AArch64_AM::isLogicalImmediate(CVal, 64)) + break; + if ((CVal & 0xFFFFULL) == CVal) + break; + if ((CVal & 0xFFFF0000ULL) == CVal) + break; + if ((CVal & 0xFFFF00000000ULL) == CVal) + break; + if ((CVal & 0xFFFF000000000000ULL) == CVal) + break; + uint64_t NCVal = ~CVal; + if ((NCVal & 0xFFFFULL) == NCVal) + break; + if ((NCVal & 0xFFFF0000ULL) == NCVal) + break; + if ((NCVal & 0xFFFF00000000ULL) == NCVal) + break; + if ((NCVal & 0xFFFF000000000000ULL) == NCVal) + break; + return; + } + default: + return; + } - if (LHSFromHi) { - std::swap(LHS, RHS); - std::swap(ShiftLHS, ShiftRHS); + // All assembler immediates are 64-bit integers. + Result = DAG.getTargetConstant(CVal, MVT::i64); + break; } - return DAG.getNode(AArch64ISD::EXTR, DL, VT, - LHS, RHS, - DAG.getConstant(ShiftRHS, MVT::i64)); -} - -/// Target-specific dag combine xforms for ISD::OR -static SDValue PerformORCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const AArch64Subtarget *Subtarget) { + if (Result.getNode()) { + Ops.push_back(Result); + return; + } - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); + return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} - if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) - return SDValue(); +//===----------------------------------------------------------------------===// +// AArch64 Advanced SIMD Support +//===----------------------------------------------------------------------===// - // Attempt to recognise bitfield-insert operations. - SDValue Res = tryCombineToBFI(N, DCI, Subtarget); - if (Res.getNode()) - return Res; +/// WidenVector - Given a value in the V64 register class, produce the +/// equivalent value in the V128 register class. +static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) { + EVT VT = V64Reg.getValueType(); + unsigned NarrowSize = VT.getVectorNumElements(); + MVT EltTy = VT.getVectorElementType().getSimpleVT(); + MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); + SDLoc DL(V64Reg); + + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy), + V64Reg, DAG.getConstant(0, MVT::i32)); +} - // Attempt to combine an existing MaskedBFI operation into one with a larger - // mask. - Res = tryCombineToLargerBFI(N, DCI, Subtarget); - if (Res.getNode()) - return Res; +/// getExtFactor - Determine the adjustment factor for the position when +/// generating an "extract from vector registers" instruction. +static unsigned getExtFactor(SDValue &V) { + EVT EltType = V.getValueType().getVectorElementType(); + return EltType.getSizeInBits() / 8; +} - Res = tryCombineToEXTR(N, DCI); - if (Res.getNode()) - return Res; +/// NarrowVector - Given a value in the V128 register class, produce the +/// equivalent value in the V64 register class. +static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { + EVT VT = V128Reg.getValueType(); + unsigned WideSize = VT.getVectorNumElements(); + MVT EltTy = VT.getVectorElementType().getSimpleVT(); + MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); + SDLoc DL(V128Reg); - if (!Subtarget->hasNEON()) - return SDValue(); + return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg); +} - // Attempt to use vector immediate-form BSL - // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. +// Gather data to see if the operation can be modelled as a +// shuffle in combination with VEXTs. +SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + EVT VT = Op.getValueType(); + unsigned NumElts = VT.getVectorNumElements(); - SDValue N0 = N->getOperand(0); - if (N0.getOpcode() != ISD::AND) - return SDValue(); + SmallVector SourceVecs; + SmallVector MinElts; + SmallVector MaxElts; - SDValue N1 = N->getOperand(1); - if (N1.getOpcode() != ISD::AND) - return SDValue(); + for (unsigned i = 0; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { + // A shuffle can only come from building a vector from various + // elements of other vectors. + return SDValue(); + } - if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) { - APInt SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - BuildVectorSDNode *BVN0 = dyn_cast(N0->getOperand(1)); - APInt SplatBits0; - if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, - HasAnyUndefs) && - !HasAnyUndefs) { - BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(1)); - APInt SplatBits1; - if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs && - SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && - SplatBits0 == ~SplatBits1) { - - return DAG.getNode(ISD::VSELECT, DL, VT, N0->getOperand(1), - N0->getOperand(0), N1->getOperand(0)); + // Record this extraction against the appropriate vector if possible... + SDValue SourceVec = V.getOperand(0); + unsigned EltNo = cast(V.getOperand(1))->getZExtValue(); + bool FoundSource = false; + for (unsigned j = 0; j < SourceVecs.size(); ++j) { + if (SourceVecs[j] == SourceVec) { + if (MinElts[j] > EltNo) + MinElts[j] = EltNo; + if (MaxElts[j] < EltNo) + MaxElts[j] = EltNo; + FoundSource = true; + break; } } + + // Or record a new source if not... + if (!FoundSource) { + SourceVecs.push_back(SourceVec); + MinElts.push_back(EltNo); + MaxElts.push_back(EltNo); + } } - return SDValue(); -} + // Currently only do something sane when at most two source vectors + // involved. + if (SourceVecs.size() > 2) + return SDValue(); -/// Target-specific dag combine xforms for ISD::SRA -static SDValue PerformSRACombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { + SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; + int VEXTOffsets[2] = { 0, 0 }; - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); + // This loop extracts the usage patterns of the source vectors + // and prepares appropriate SDValues for a shuffle if possible. + for (unsigned i = 0; i < SourceVecs.size(); ++i) { + if (SourceVecs[i].getValueType() == VT) { + // No VEXT necessary + ShuffleSrcs[i] = SourceVecs[i]; + VEXTOffsets[i] = 0; + continue; + } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { + // We can pad out the smaller vector for free, so if it's part of a + // shuffle... + ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, SourceVecs[i], + DAG.getUNDEF(SourceVecs[i].getValueType())); + continue; + } - // We're looking for an SRA/SHL pair which form an SBFX. + // Don't attempt to extract subvectors from BUILD_VECTOR sources + // that expand or trunc the original value. + // TODO: We can try to bitcast and ANY_EXTEND the result but + // we need to consider the cost of vector ANY_EXTEND, and the + // legality of all the types. + if (SourceVecs[i].getValueType().getVectorElementType() != + VT.getVectorElementType()) + return SDValue(); - if (VT != MVT::i32 && VT != MVT::i64) - return SDValue(); + // Since only 64-bit and 128-bit vectors are legal on ARM and + // we've eliminated the other cases... + assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts && + "unexpected vector sizes in ReconstructShuffle"); - if (!isa(N->getOperand(1))) - return SDValue(); + if (MaxElts[i] - MinElts[i] >= NumElts) { + // Span too large for a VEXT to cope + return SDValue(); + } - uint64_t ExtraSignBits = N->getConstantOperandVal(1); - SDValue Shift = N->getOperand(0); + if (MinElts[i] >= NumElts) { + // The extraction can just take the second half + VEXTOffsets[i] = NumElts; + ShuffleSrcs[i] = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i], + DAG.getIntPtrConstant(NumElts)); + } else if (MaxElts[i] < NumElts) { + // The extraction can just take the first half + VEXTOffsets[i] = 0; + ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, + SourceVecs[i], DAG.getIntPtrConstant(0)); + } else { + // An actual VEXT is needed + VEXTOffsets[i] = MinElts[i]; + SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, + SourceVecs[i], DAG.getIntPtrConstant(0)); + SDValue VEXTSrc2 = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i], + DAG.getIntPtrConstant(NumElts)); + unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1); + ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2, + DAG.getConstant(Imm, MVT::i32)); + } + } - if (Shift.getOpcode() != ISD::SHL) - return SDValue(); + SmallVector Mask; - if (!isa(Shift->getOperand(1))) - return SDValue(); + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Entry = Op.getOperand(i); + if (Entry.getOpcode() == ISD::UNDEF) { + Mask.push_back(-1); + continue; + } - uint64_t BitsOnLeft = Shift->getConstantOperandVal(1); - uint64_t Width = VT.getSizeInBits() - ExtraSignBits; - uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft; + SDValue ExtractVec = Entry.getOperand(0); + int ExtractElt = + cast(Op.getOperand(i).getOperand(1))->getSExtValue(); + if (ExtractVec == SourceVecs[0]) { + Mask.push_back(ExtractElt - VEXTOffsets[0]); + } else { + Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); + } + } - if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits()) - return SDValue(); + // Final check before we try to produce nonsense... + if (isShuffleMaskLegal(Mask, VT)) + return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], + &Mask[0]); - return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0), - DAG.getConstant(LSB, MVT::i64), - DAG.getConstant(LSB + Width - 1, MVT::i64)); + return SDValue(); } -/// Check if this is a valid build_vector for the immediate operand of -/// a vector shift operation, where all the elements of the build_vector -/// must have the same constant integer value. -static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { - // Ignore bit_converts. - while (Op.getOpcode() == ISD::BITCAST) - Op = Op.getOperand(0); - BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, - HasAnyUndefs, ElementBits) || - SplatBitSize > ElementBits) - return false; - Cnt = SplatBits.getSExtValue(); - return true; -} +// check if an EXT instruction can handle the shuffle mask when the +// vector sources of the shuffle are the same. +static bool isSingletonEXTMask(ArrayRef M, EVT VT, unsigned &Imm) { + unsigned NumElts = VT.getVectorNumElements(); -/// Check if this is a valid build_vector for the immediate operand of -/// a vector shift left operation. That value must be in the range: -/// 0 <= Value < ElementBits -static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) { - assert(VT.isVector() && "vector shift count is not a vector type"); - unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); - if (!getVShiftImm(Op, ElementBits, Cnt)) + // Assume that the first shuffle index is not UNDEF. Fail if it is. + if (M[0] < 0) return false; - return (Cnt >= 0 && Cnt < ElementBits); -} -/// Check if this is a valid build_vector for the immediate operand of a -/// vector shift right operation. The value must be in the range: -/// 1 <= Value <= ElementBits -static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) { - assert(VT.isVector() && "vector shift count is not a vector type"); - unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); - if (!getVShiftImm(Op, ElementBits, Cnt)) - return false; - return (Cnt >= 1 && Cnt <= ElementBits); -} + Imm = M[0]; -static SDValue GenForSextInreg(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - EVT SrcVT, EVT DestVT, EVT SubRegVT, - const int *Mask, SDValue Src) { - SelectionDAG &DAG = DCI.DAG; - SDValue Bitcast - = DAG.getNode(ISD::BITCAST, SDLoc(N), SrcVT, Src); - SDValue Sext - = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), DestVT, Bitcast); - SDValue ShuffleVec - = DAG.getVectorShuffle(DestVT, SDLoc(N), Sext, DAG.getUNDEF(DestVT), Mask); - SDValue ExtractSubreg - = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), - SubRegVT, ShuffleVec, - DAG.getTargetConstant(AArch64::sub_64, MVT::i32)), 0); - return ExtractSubreg; -} - -/// Checks for vector shifts and lowers them. -static SDValue PerformShiftCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const AArch64Subtarget *ST) { - SelectionDAG &DAG = DCI.DAG; - EVT VT = N->getValueType(0); - if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64)) - return PerformSRACombine(N, DCI); + // If this is a VEXT shuffle, the immediate value is the index of the first + // element. The other shuffle indices must be the successive elements after + // the first one. + unsigned ExpectedElt = Imm; + for (unsigned i = 1; i < NumElts; ++i) { + // Increment the expected index. If it wraps around, just follow it + // back to index zero and keep going. + ++ExpectedElt; + if (ExpectedElt == NumElts) + ExpectedElt = 0; - // We're looking for an SRA/SHL pair to help generating instruction - // sshll v0.8h, v0.8b, #0 - // The instruction STXL is also the alias of this instruction. - // - // For example, for DAG like below, - // v2i32 = sra (v2i32 (shl v2i32, 16)), 16 - // we can transform it into - // v2i32 = EXTRACT_SUBREG - // (v4i32 (suffle_vector - // (v4i32 (sext (v4i16 (bitcast v2i32))), - // undef, (0, 2, u, u)), - // sub_64 - // - // With this transformation we expect to generate "SSHLL + UZIP1" - // Sometimes UZIP1 can be optimized away by combining with other context. - int64_t ShrCnt, ShlCnt; - if (N->getOpcode() == ISD::SRA - && (VT == MVT::v2i32 || VT == MVT::v4i16) - && isVShiftRImm(N->getOperand(1), VT, ShrCnt) - && N->getOperand(0).getOpcode() == ISD::SHL - && isVShiftRImm(N->getOperand(0).getOperand(1), VT, ShlCnt)) { - SDValue Src = N->getOperand(0).getOperand(0); - if (VT == MVT::v2i32 && ShrCnt == 16 && ShlCnt == 16) { - // sext_inreg(v2i32, v2i16) - // We essentially only care the Mask {0, 2, u, u} - int Mask[4] = {0, 2, 4, 6}; - return GenForSextInreg(N, DCI, MVT::v4i16, MVT::v4i32, MVT::v2i32, - Mask, Src); - } - else if (VT == MVT::v2i32 && ShrCnt == 24 && ShlCnt == 24) { - // sext_inreg(v2i16, v2i8) - // We essentially only care the Mask {0, u, 4, u, u, u, u, u, u, u, u, u} - int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14}; - return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v2i32, - Mask, Src); - } - else if (VT == MVT::v4i16 && ShrCnt == 8 && ShlCnt == 8) { - // sext_inreg(v4i16, v4i8) - // We essentially only care the Mask {0, 2, 4, 6, u, u, u, u, u, u, u, u} - int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14}; - return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v4i16, - Mask, Src); - } + if (M[i] < 0) + continue; // ignore UNDEF indices + if (ExpectedElt != static_cast(M[i])) + return false; } - // Nothing to be done for scalar shifts. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (!VT.isVector() || !TLI.isTypeLegal(VT)) - return SDValue(); - - assert(ST->hasNEON() && "unexpected vector shift"); - int64_t Cnt; + return true; +} - switch (N->getOpcode()) { - default: - llvm_unreachable("unexpected shift opcode"); +// check if an EXT instruction can handle the shuffle mask when the +// vector sources of the shuffle are different. +static bool isEXTMask(ArrayRef M, EVT VT, bool &ReverseEXT, + unsigned &Imm) { + // Look for the first non-undef element. + const int *FirstRealElt = std::find_if(M.begin(), M.end(), + [](int Elt) {return Elt >= 0;}); - case ISD::SHL: - if (isVShiftLImm(N->getOperand(1), VT, Cnt)) { - SDValue RHS = - DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT, - DAG.getConstant(Cnt, MVT::i32)); - return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS); - } - break; + // Benefit form APInt to handle overflow when calculating expected element. + unsigned NumElts = VT.getVectorNumElements(); + unsigned MaskBits = APInt(32, NumElts * 2).logBase2(); + APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1); + // The following shuffle indices must be the successive elements after the + // first real element. + const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(), + [&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;}); + if (FirstWrongElt != M.end()) + return false; - case ISD::SRA: - case ISD::SRL: - if (isVShiftRImm(N->getOperand(1), VT, Cnt)) { - SDValue RHS = - DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT, - DAG.getConstant(Cnt, MVT::i32)); - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS); - } - break; - } + // The index of an EXT is the first element if it is not UNDEF. + // Watch out for the beginning UNDEFs. The EXT index should be the expected + // value of the first element. E.g. + // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. + // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>. + // ExpectedElt is the last mask index plus 1. + Imm = ExpectedElt.getZExtValue(); + + // There are two difference cases requiring to reverse input vectors. + // For example, for vector <4 x i32> we have the following cases, + // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>) + // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>) + // For both cases, we finally use mask <5, 6, 7, 0>, which requires + // to reverse two input vectors. + if (Imm < NumElts) + ReverseEXT = true; + else + Imm -= NumElts; - return SDValue(); + return true; } -/// ARM-specific DAG combining for intrinsics. -static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { - unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); - - switch (IntNo) { - default: - // Don't do anything for most intrinsics. - break; - - case Intrinsic::arm_neon_vqshifts: - case Intrinsic::arm_neon_vqshiftu: - EVT VT = N->getOperand(1).getValueType(); - int64_t Cnt; - if (!isVShiftLImm(N->getOperand(2), VT, Cnt)) - break; - unsigned VShiftOpc = (IntNo == Intrinsic::arm_neon_vqshifts) - ? AArch64ISD::NEON_QSHLs - : AArch64ISD::NEON_QSHLu; - return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), - N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); +/// isREVMask - Check if a vector shuffle corresponds to a REV +/// instruction with the specified blocksize. (The order of the elements +/// within each block of the vector is reversed.) +static bool isREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { + assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && + "Only possible block sizes for REV are: 16, 32, 64"); + + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + + unsigned NumElts = VT.getVectorNumElements(); + unsigned BlockElts = M[0] + 1; + // If the first shuffle index is UNDEF, be optimistic. + if (M[0] < 0) + BlockElts = BlockSize / EltSz; + + if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) + return false; + + for (unsigned i = 0; i < NumElts; ++i) { + if (M[i] < 0) + continue; // ignore UNDEF indices + if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) + return false; } - return SDValue(); + return true; } -/// Target-specific DAG combine function for NEON load/store intrinsics -/// to merge base address updates. -static SDValue CombineBaseUpdate(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) - return SDValue(); +static bool isZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + unsigned Idx = WhichResult * NumElts / 2; + for (unsigned i = 0; i != NumElts; i += 2) { + if ((M[i] >= 0 && (unsigned)M[i] != Idx) || + (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts)) + return false; + Idx += 1; + } - SelectionDAG &DAG = DCI.DAG; - bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || - N->getOpcode() == ISD::INTRINSIC_W_CHAIN); - unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); - SDValue Addr = N->getOperand(AddrOpIdx); + return true; +} - // Search for a use of the address operand that is an increment. - for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), - UE = Addr.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - if (User->getOpcode() != ISD::ADD || - UI.getUse().getResNo() != Addr.getResNo()) - continue; +static bool isUZPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i != NumElts; ++i) { + if (M[i] < 0) + continue; // ignore UNDEF indices + if ((unsigned)M[i] != 2 * i + WhichResult) + return false; + } - // Check that the add is independent of the load/store. Otherwise, folding - // it would create a cycle. - if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) - continue; + return true; +} - // Find the new opcode for the updating load/store. - bool isLoad = true; - bool isLaneOp = false; - unsigned NewOpc = 0; - unsigned NumVecs = 0; - if (isIntrinsic) { - unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); - switch (IntNo) { - default: llvm_unreachable("unexpected intrinsic for Neon base update"); - case Intrinsic::arm_neon_vld1: NewOpc = AArch64ISD::NEON_LD1_UPD; - NumVecs = 1; break; - case Intrinsic::arm_neon_vld2: NewOpc = AArch64ISD::NEON_LD2_UPD; - NumVecs = 2; break; - case Intrinsic::arm_neon_vld3: NewOpc = AArch64ISD::NEON_LD3_UPD; - NumVecs = 3; break; - case Intrinsic::arm_neon_vld4: NewOpc = AArch64ISD::NEON_LD4_UPD; - NumVecs = 4; break; - case Intrinsic::arm_neon_vst1: NewOpc = AArch64ISD::NEON_ST1_UPD; - NumVecs = 1; isLoad = false; break; - case Intrinsic::arm_neon_vst2: NewOpc = AArch64ISD::NEON_ST2_UPD; - NumVecs = 2; isLoad = false; break; - case Intrinsic::arm_neon_vst3: NewOpc = AArch64ISD::NEON_ST3_UPD; - NumVecs = 3; isLoad = false; break; - case Intrinsic::arm_neon_vst4: NewOpc = AArch64ISD::NEON_ST4_UPD; - NumVecs = 4; isLoad = false; break; - case Intrinsic::aarch64_neon_vld1x2: NewOpc = AArch64ISD::NEON_LD1x2_UPD; - NumVecs = 2; break; - case Intrinsic::aarch64_neon_vld1x3: NewOpc = AArch64ISD::NEON_LD1x3_UPD; - NumVecs = 3; break; - case Intrinsic::aarch64_neon_vld1x4: NewOpc = AArch64ISD::NEON_LD1x4_UPD; - NumVecs = 4; break; - case Intrinsic::aarch64_neon_vst1x2: NewOpc = AArch64ISD::NEON_ST1x2_UPD; - NumVecs = 2; isLoad = false; break; - case Intrinsic::aarch64_neon_vst1x3: NewOpc = AArch64ISD::NEON_ST1x3_UPD; - NumVecs = 3; isLoad = false; break; - case Intrinsic::aarch64_neon_vst1x4: NewOpc = AArch64ISD::NEON_ST1x4_UPD; - NumVecs = 4; isLoad = false; break; - case Intrinsic::arm_neon_vld2lane: NewOpc = AArch64ISD::NEON_LD2LN_UPD; - NumVecs = 2; isLaneOp = true; break; - case Intrinsic::arm_neon_vld3lane: NewOpc = AArch64ISD::NEON_LD3LN_UPD; - NumVecs = 3; isLaneOp = true; break; - case Intrinsic::arm_neon_vld4lane: NewOpc = AArch64ISD::NEON_LD4LN_UPD; - NumVecs = 4; isLaneOp = true; break; - case Intrinsic::arm_neon_vst2lane: NewOpc = AArch64ISD::NEON_ST2LN_UPD; - NumVecs = 2; isLoad = false; isLaneOp = true; break; - case Intrinsic::arm_neon_vst3lane: NewOpc = AArch64ISD::NEON_ST3LN_UPD; - NumVecs = 3; isLoad = false; isLaneOp = true; break; - case Intrinsic::arm_neon_vst4lane: NewOpc = AArch64ISD::NEON_ST4LN_UPD; - NumVecs = 4; isLoad = false; isLaneOp = true; break; - } - } else { - isLaneOp = true; - switch (N->getOpcode()) { - default: llvm_unreachable("unexpected opcode for Neon base update"); - case AArch64ISD::NEON_LD2DUP: NewOpc = AArch64ISD::NEON_LD2DUP_UPD; - NumVecs = 2; break; - case AArch64ISD::NEON_LD3DUP: NewOpc = AArch64ISD::NEON_LD3DUP_UPD; - NumVecs = 3; break; - case AArch64ISD::NEON_LD4DUP: NewOpc = AArch64ISD::NEON_LD4DUP_UPD; - NumVecs = 4; break; - } - } +static bool isTRNMask(ArrayRef M, EVT VT, unsigned &WhichResult) { + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i < NumElts; i += 2) { + if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) || + (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult)) + return false; + } + return true; +} - // Find the size of memory referenced by the load/store. - EVT VecTy; - if (isLoad) - VecTy = N->getValueType(0); - else - VecTy = N->getOperand(AddrOpIdx + 1).getValueType(); - unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; - if (isLaneOp) - NumBytes /= VecTy.getVectorNumElements(); +/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of +/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". +/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. +static bool isZIP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult) { + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + unsigned Idx = WhichResult * NumElts / 2; + for (unsigned i = 0; i != NumElts; i += 2) { + if ((M[i] >= 0 && (unsigned)M[i] != Idx) || + (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx)) + return false; + Idx += 1; + } - // If the increment is a constant, it must match the memory ref size. - SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); - if (ConstantSDNode *CInc = dyn_cast(Inc.getNode())) { - uint32_t IncVal = CInc->getZExtValue(); - if (IncVal != NumBytes) - continue; - Inc = DAG.getTargetConstant(IncVal, MVT::i32); - } + return true; +} - // Create the new updating load/store node. - EVT Tys[6]; - unsigned NumResultVecs = (isLoad ? NumVecs : 0); - unsigned n; - for (n = 0; n < NumResultVecs; ++n) - Tys[n] = VecTy; - Tys[n++] = MVT::i64; - Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs + 2); - SmallVector Ops; - Ops.push_back(N->getOperand(0)); // incoming chain - Ops.push_back(N->getOperand(AddrOpIdx)); - Ops.push_back(Inc); - for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { - Ops.push_back(N->getOperand(i)); +/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of +/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". +/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, +static bool isUZP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult) { + unsigned Half = VT.getVectorNumElements() / 2; + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned j = 0; j != 2; ++j) { + unsigned Idx = WhichResult; + for (unsigned i = 0; i != Half; ++i) { + int MIdx = M[i + j * Half]; + if (MIdx >= 0 && (unsigned)MIdx != Idx) + return false; + Idx += 2; } - MemIntrinsicSDNode *MemInt = cast(N); - SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, - Ops.data(), Ops.size(), - MemInt->getMemoryVT(), - MemInt->getMemOperand()); + } - // Update the uses. - std::vector NewResults; - for (unsigned i = 0; i < NumResultVecs; ++i) { - NewResults.push_back(SDValue(UpdN.getNode(), i)); - } - NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain - DCI.CombineTo(N, NewResults); - DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); + return true; +} - break; +/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of +/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". +/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. +static bool isTRN_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult) { + unsigned NumElts = VT.getVectorNumElements(); + WhichResult = (M[0] == 0 ? 0 : 1); + for (unsigned i = 0; i < NumElts; i += 2) { + if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) || + (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult)) + return false; } - return SDValue(); + return true; } -/// For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) -/// intrinsic, and if all the other uses of that intrinsic are also VDUPLANEs. -/// If so, combine them to a vldN-dup operation and return true. -static SDValue CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { - SelectionDAG &DAG = DCI.DAG; - EVT VT = N->getValueType(0); +static bool isINSMask(ArrayRef M, int NumInputElements, + bool &DstIsLeft, int &Anomaly) { + if (M.size() != static_cast(NumInputElements)) + return false; - // Check if the VDUPLANE operand is a vldN-dup intrinsic. - SDNode *VLD = N->getOperand(0).getNode(); - if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) - return SDValue(); - unsigned NumVecs = 0; - unsigned NewOpc = 0; - unsigned IntNo = cast(VLD->getOperand(1))->getZExtValue(); - if (IntNo == Intrinsic::arm_neon_vld2lane) { - NumVecs = 2; - NewOpc = AArch64ISD::NEON_LD2DUP; - } else if (IntNo == Intrinsic::arm_neon_vld3lane) { - NumVecs = 3; - NewOpc = AArch64ISD::NEON_LD3DUP; - } else if (IntNo == Intrinsic::arm_neon_vld4lane) { - NumVecs = 4; - NewOpc = AArch64ISD::NEON_LD4DUP; - } else { - return SDValue(); - } + int NumLHSMatch = 0, NumRHSMatch = 0; + int LastLHSMismatch = -1, LastRHSMismatch = -1; - // First check that all the vldN-lane uses are VDUPLANEs and that the lane - // numbers match the load. - unsigned VLDLaneNo = - cast(VLD->getOperand(NumVecs + 3))->getZExtValue(); - for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); - UI != UE; ++UI) { - // Ignore uses of the chain result. - if (UI.getUse().getResNo() == NumVecs) + for (int i = 0; i < NumInputElements; ++i) { + if (M[i] == -1) { + ++NumLHSMatch; + ++NumRHSMatch; continue; - SDNode *User = *UI; - if (User->getOpcode() != AArch64ISD::NEON_VDUPLANE || - VLDLaneNo != cast(User->getOperand(1))->getZExtValue()) - return SDValue(); - } + } - // Create the vldN-dup node. - EVT Tys[5]; - unsigned n; - for (n = 0; n < NumVecs; ++n) - Tys[n] = VT; - Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(Tys, NumVecs + 1); - SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; - MemIntrinsicSDNode *VLDMemInt = cast(VLD); - SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, Ops, 2, - VLDMemInt->getMemoryVT(), - VLDMemInt->getMemOperand()); - - // Update the uses. - for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); - UI != UE; ++UI) { - unsigned ResNo = UI.getUse().getResNo(); - // Ignore uses of the chain result. - if (ResNo == NumVecs) - continue; - SDNode *User = *UI; - DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); + if (M[i] == i) + ++NumLHSMatch; + else + LastLHSMismatch = i; + + if (M[i] == i + NumInputElements) + ++NumRHSMatch; + else + LastRHSMismatch = i; } - // Now the vldN-lane intrinsic is dead except for its chain result. - // Update uses of the chain. - std::vector VLDDupResults; - for (unsigned n = 0; n < NumVecs; ++n) - VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); - VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); - DCI.CombineTo(VLD, VLDDupResults); + if (NumLHSMatch == NumInputElements - 1) { + DstIsLeft = true; + Anomaly = LastLHSMismatch; + return true; + } else if (NumRHSMatch == NumInputElements - 1) { + DstIsLeft = false; + Anomaly = LastRHSMismatch; + return true; + } - return SDValue(N, 0); + return false; } -// v1i1 setcc -> -// v1i1 (bitcast (i1 setcc (extract_vector_elt, extract_vector_elt)) -// FIXME: Currently the type legalizer can't handle SETCC having v1i1 as result. -// If it can legalize "v1i1 SETCC" correctly, no need to combine such SETCC. -static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) { - EVT ResVT = N->getValueType(0); - - if (!ResVT.isVector() || ResVT.getVectorNumElements() != 1 || - ResVT.getVectorElementType() != MVT::i1) - return SDValue(); - - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - EVT CmpVT = LHS.getValueType(); - LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), - CmpVT.getVectorElementType(), LHS, - DAG.getConstant(0, MVT::i64)); - RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), - CmpVT.getVectorElementType(), RHS, - DAG.getConstant(0, MVT::i64)); - SDValue SetCC = - DAG.getSetCC(SDLoc(N), MVT::i1, LHS, RHS, - cast(N->getOperand(2))->get()); - return DAG.getNode(ISD::BITCAST, SDLoc(N), ResVT, SetCC); -} +static bool isConcatMask(ArrayRef Mask, EVT VT, bool SplitLHS) { + if (VT.getSizeInBits() != 128) + return false; -// vselect (v1i1 setcc) -> -// vselect (v1iXX setcc) (XX is the size of the compared operand type) -// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as -// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine -// such VSELECT. -static SDValue PerformVSelectCombine(SDNode *N, SelectionDAG &DAG) { - SDValue N0 = N->getOperand(0); - EVT CCVT = N0.getValueType(); + unsigned NumElts = VT.getVectorNumElements(); - if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 || - CCVT.getVectorElementType() != MVT::i1) - return SDValue(); + for (int I = 0, E = NumElts / 2; I != E; I++) { + if (Mask[I] != I) + return false; + } - EVT ResVT = N->getValueType(0); - EVT CmpVT = N0.getOperand(0).getValueType(); - // Only combine when the result type is of the same size as the compared - // operands. - if (ResVT.getSizeInBits() != CmpVT.getSizeInBits()) - return SDValue(); + int Offset = NumElts / 2; + for (int I = NumElts / 2, E = NumElts; I != E; I++) { + if (Mask[I] != I + SplitLHS * Offset) + return false; + } - SDValue IfTrue = N->getOperand(1); - SDValue IfFalse = N->getOperand(2); - SDValue SetCC = - DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(), - N0.getOperand(0), N0.getOperand(1), - cast(N0.getOperand(2))->get()); - return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC, - IfTrue, IfFalse); + return true; } -// sign_extend (extract_vector_elt (v1i1 setcc)) -> -// extract_vector_elt (v1iXX setcc) -// (XX is the size of the compared operand type) -static SDValue PerformSignExtendCombine(SDNode *N, SelectionDAG &DAG) { - SDValue N0 = N->getOperand(0); - SDValue Vec = N0.getOperand(0); +static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue V0 = Op.getOperand(0); + SDValue V1 = Op.getOperand(1); + ArrayRef Mask = cast(Op)->getMask(); - if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - Vec.getOpcode() != ISD::SETCC) + if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() || + VT.getVectorElementType() != V1.getValueType().getVectorElementType()) return SDValue(); - EVT ResVT = N->getValueType(0); - EVT CmpVT = Vec.getOperand(0).getValueType(); - // Only optimize when the result type is of the same size as the element - // type of the compared operand. - if (ResVT.getSizeInBits() != CmpVT.getVectorElementType().getSizeInBits()) - return SDValue(); + bool SplitV0 = V0.getValueType().getSizeInBits() == 128; - SDValue Lane = N0.getOperand(1); - SDValue SetCC = - DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(), - Vec.getOperand(0), Vec.getOperand(1), - cast(Vec.getOperand(2))->get()); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ResVT, - SetCC, Lane); -} + if (!isConcatMask(Mask, VT, SplitV0)) + return SDValue(); -SDValue -AArch64TargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const { - switch (N->getOpcode()) { - default: break; - case ISD::AND: return PerformANDCombine(N, DCI); - case ISD::OR: return PerformORCombine(N, DCI, getSubtarget()); - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - return PerformShiftCombine(N, DCI, getSubtarget()); - case ISD::SETCC: return PerformSETCCCombine(N, DCI.DAG); - case ISD::VSELECT: return PerformVSelectCombine(N, DCI.DAG); - case ISD::SIGN_EXTEND: return PerformSignExtendCombine(N, DCI.DAG); - case ISD::INTRINSIC_WO_CHAIN: - return PerformIntrinsicCombine(N, DCI.DAG); - case AArch64ISD::NEON_VDUPLANE: - return CombineVLDDUP(N, DCI); - case AArch64ISD::NEON_LD2DUP: - case AArch64ISD::NEON_LD3DUP: - case AArch64ISD::NEON_LD4DUP: - return CombineBaseUpdate(N, DCI); - case ISD::INTRINSIC_VOID: - case ISD::INTRINSIC_W_CHAIN: - switch (cast(N->getOperand(1))->getZExtValue()) { - case Intrinsic::arm_neon_vld1: - case Intrinsic::arm_neon_vld2: - case Intrinsic::arm_neon_vld3: - case Intrinsic::arm_neon_vld4: - case Intrinsic::arm_neon_vst1: - case Intrinsic::arm_neon_vst2: - case Intrinsic::arm_neon_vst3: - case Intrinsic::arm_neon_vst4: - case Intrinsic::arm_neon_vld2lane: - case Intrinsic::arm_neon_vld3lane: - case Intrinsic::arm_neon_vld4lane: - case Intrinsic::aarch64_neon_vld1x2: - case Intrinsic::aarch64_neon_vld1x3: - case Intrinsic::aarch64_neon_vld1x4: - case Intrinsic::aarch64_neon_vst1x2: - case Intrinsic::aarch64_neon_vst1x3: - case Intrinsic::aarch64_neon_vst1x4: - case Intrinsic::arm_neon_vst2lane: - case Intrinsic::arm_neon_vst3lane: - case Intrinsic::arm_neon_vst4lane: - return CombineBaseUpdate(N, DCI); - default: - break; - } + EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + VT.getVectorNumElements() / 2); + if (SplitV0) { + V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0, + DAG.getConstant(0, MVT::i64)); } - return SDValue(); + if (V1.getValueType().getSizeInBits() == 128) { + V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1, + DAG.getConstant(0, MVT::i64)); + } + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1); } -bool -AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { - VT = VT.getScalarType(); +/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit +/// the specified operations to build the shuffle. +static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, + SDValue RHS, SelectionDAG &DAG, + SDLoc dl) { + unsigned OpNum = (PFEntry >> 26) & 0x0F; + unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1); + unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1); + + enum { + OP_COPY = 0, // Copy, used for things like to say it is <0,1,2,3> + OP_VREV, + OP_VDUP0, + OP_VDUP1, + OP_VDUP2, + OP_VDUP3, + OP_VEXT1, + OP_VEXT2, + OP_VEXT3, + OP_VUZPL, // VUZP, left result + OP_VUZPR, // VUZP, right result + OP_VZIPL, // VZIP, left result + OP_VZIPR, // VZIP, right result + OP_VTRNL, // VTRN, left result + OP_VTRNR // VTRN, right result + }; + + if (OpNum == OP_COPY) { + if (LHSID == (1 * 9 + 2) * 9 + 3) + return LHS; + assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!"); + return RHS; + } - if (!VT.isSimple()) - return false; + SDValue OpLHS, OpRHS; + OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); + OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); + EVT VT = OpLHS.getValueType(); - switch (VT.getSimpleVT().SimpleTy) { - case MVT::f16: - case MVT::f32: - case MVT::f64: - return true; - case MVT::f128: - return false; + switch (OpNum) { default: - break; - } + llvm_unreachable("Unknown shuffle opcode!"); + case OP_VREV: + // VREV divides the vector in half and swaps within the half. + if (VT.getVectorElementType() == MVT::i32 || + VT.getVectorElementType() == MVT::f32) + return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS); + // vrev <4 x i16> -> REV32 + if (VT.getVectorElementType() == MVT::i16) + return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS); + // vrev <4 x i8> -> REV16 + assert(VT.getVectorElementType() == MVT::i8); + return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS); + case OP_VDUP0: + case OP_VDUP1: + case OP_VDUP2: + case OP_VDUP3: { + EVT EltTy = VT.getVectorElementType(); + unsigned Opcode; + if (EltTy == MVT::i8) + Opcode = AArch64ISD::DUPLANE8; + else if (EltTy == MVT::i16) + Opcode = AArch64ISD::DUPLANE16; + else if (EltTy == MVT::i32 || EltTy == MVT::f32) + Opcode = AArch64ISD::DUPLANE32; + else if (EltTy == MVT::i64 || EltTy == MVT::f64) + Opcode = AArch64ISD::DUPLANE64; + else + llvm_unreachable("Invalid vector element type?"); - return false; + if (VT.getSizeInBits() == 64) + OpLHS = WidenVector(OpLHS, DAG); + SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, MVT::i64); + return DAG.getNode(Opcode, dl, VT, OpLHS, Lane); + } + case OP_VEXT1: + case OP_VEXT2: + case OP_VEXT3: { + unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS); + return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS, + DAG.getConstant(Imm, MVT::i32)); + } + case OP_VUZPL: + return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS, + OpRHS); + case OP_VUZPR: + return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS, + OpRHS); + case OP_VZIPL: + return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS, + OpRHS); + case OP_VZIPR: + return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS, + OpRHS); + case OP_VTRNL: + return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS, + OpRHS); + case OP_VTRNR: + return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS, + OpRHS); + } } -// Check whether a shuffle_vector could be presented as concat_vector. -bool AArch64TargetLowering::isConcatVector(SDValue Op, SelectionDAG &DAG, - SDValue V0, SDValue V1, - const int *Mask, - SDValue &Res) const { + +static SDValue GenerateTBL(SDValue Op, ArrayRef ShuffleMask, + SelectionDAG &DAG) { + // Check to see if we can use the TBL instruction. + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); SDLoc DL(Op); - EVT VT = Op.getValueType(); - if (VT.getSizeInBits() != 128) - return false; - if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() || - VT.getVectorElementType() != V1.getValueType().getVectorElementType()) - return false; - unsigned NumElts = VT.getVectorNumElements(); - bool isContactVector = true; - bool splitV0 = false; - if (V0.getValueType().getSizeInBits() == 128) - splitV0 = true; + EVT EltVT = Op.getValueType().getVectorElementType(); + unsigned BytesPerElt = EltVT.getSizeInBits() / 8; - for (int I = 0, E = NumElts / 2; I != E; I++) { - if (Mask[I] != I) { - isContactVector = false; - break; + SmallVector TBLMask; + for (int Val : ShuffleMask) { + for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { + unsigned Offset = Byte + Val * BytesPerElt; + TBLMask.push_back(DAG.getConstant(Offset, MVT::i32)); } } - if (isContactVector) { - int offset = NumElts / 2; - for (int I = NumElts / 2, E = NumElts; I != E; I++) { - if (Mask[I] != I + splitV0 * offset) { - isContactVector = false; - break; - } - } + MVT IndexVT = MVT::v8i8; + unsigned IndexLen = 8; + if (Op.getValueType().getSizeInBits() == 128) { + IndexVT = MVT::v16i8; + IndexLen = 16; } - if (isContactVector) { - EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), - NumElts / 2); - if (splitV0) { - V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0, - DAG.getConstant(0, MVT::i64)); - } - if (V1.getValueType().getSizeInBits() == 128) { - V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1, - DAG.getConstant(0, MVT::i64)); + SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1); + SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2); + + SDValue Shuffle; + if (V2.getNode()->getOpcode() == ISD::UNDEF) { + if (IndexLen == 8) + V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst); + Shuffle = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, + DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst, + DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, + makeArrayRef(TBLMask.data(), IndexLen))); + } else { + if (IndexLen == 8) { + V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst); + Shuffle = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, + DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst, + DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, + makeArrayRef(TBLMask.data(), IndexLen))); + } else { + // FIXME: We cannot, for the moment, emit a TBL2 instruction because we + // cannot currently represent the register constraints on the input + // table registers. + // Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst, + // DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, + // &TBLMask[0], IndexLen)); + Shuffle = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, + DAG.getConstant(Intrinsic::aarch64_neon_tbl2, MVT::i32), V1Cst, V2Cst, + DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, + makeArrayRef(TBLMask.data(), IndexLen))); } - Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1); - return true; } - return false; + return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle); } -// Check whether a Build Vector could be presented as Shuffle Vector. -// This Shuffle Vector maybe not legalized, so the length of its operand and -// the length of result may not equal. -bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, - SDValue &V0, SDValue &V1, - int *Mask) const { - SDLoc DL(Op); - EVT VT = Op.getValueType(); - unsigned NumElts = VT.getVectorNumElements(); - unsigned V0NumElts = 0; - - // Check if all elements are extracted from less than 3 vectors. - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Elt = Op.getOperand(i); - if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - Elt.getOperand(0).getValueType().getVectorElementType() != - VT.getVectorElementType()) - return false; - - if (V0.getNode() == 0) { - V0 = Elt.getOperand(0); - V0NumElts = V0.getValueType().getVectorNumElements(); - } - if (Elt.getOperand(0) == V0) { - Mask[i] = (cast(Elt->getOperand(1))->getZExtValue()); - continue; - } else if (V1.getNode() == 0) { - V1 = Elt.getOperand(0); - } - if (Elt.getOperand(0) == V1) { - unsigned Lane = cast(Elt->getOperand(1))->getZExtValue(); - Mask[i] = (Lane + V0NumElts); - continue; - } else { - return false; - } - } - return true; +static unsigned getDUPLANEOp(EVT EltType) { + if (EltType == MVT::i8) + return AArch64ISD::DUPLANE8; + if (EltType == MVT::i16) + return AArch64ISD::DUPLANE16; + if (EltType == MVT::i32 || EltType == MVT::f32) + return AArch64ISD::DUPLANE32; + if (EltType == MVT::i64 || EltType == MVT::f64) + return AArch64ISD::DUPLANE64; + + llvm_unreachable("Invalid vector element type?"); } -// LowerShiftRightParts - Lower SRL_PARTS and SRA_PARTS, which returns two -/// i64 values and take a 2 x i64 value to shift plus a shift amount. -SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getNumOperands() == 3 && "Not a quad-shift!"); +SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); EVT VT = Op.getValueType(); - unsigned VTBits = VT.getSizeInBits(); - SDLoc dl(Op); - SDValue ShOpLo = Op.getOperand(0); - SDValue ShOpHi = Op.getOperand(1); - SDValue ShAmt = Op.getOperand(2); - unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; - assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); - SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, - DAG.getConstant(VTBits, MVT::i64), ShAmt); - SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); - SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, - DAG.getConstant(VTBits, MVT::i64)); - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); - SDValue Tmp3 = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); + ShuffleVectorSDNode *SVN = cast(Op.getNode()); + + // Convert shuffles that are directly supported on NEON to target-specific + // DAG nodes, instead of keeping them as shuffles and matching them again + // during code selection. This is more efficient and avoids the possibility + // of inconsistencies between legalization and selection. + ArrayRef ShuffleMask = SVN->getMask(); - SDValue A64cc; - SDValue CmpOp = getSelectableIntSetCC(ExtraShAmt, - DAG.getConstant(0, MVT::i64), - ISD::SETGE, A64cc, - DAG, dl); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + + if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], + V1.getValueType().getSimpleVT())) { + int Lane = SVN->getSplatIndex(); + // If this is undef splat, generate it via "just" vdup, if possible. + if (Lane == -1) + Lane = 0; + + if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) + return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(), + V1.getOperand(0)); + // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non- + // constant. If so, we can just reference the lane's definition directly. + if (V1.getOpcode() == ISD::BUILD_VECTOR && + !isa(V1.getOperand(Lane))) + return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane)); + + // Otherwise, duplicate from the lane of the input vector. + unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType()); + + // SelectionDAGBuilder may have "helpfully" already extracted or conatenated + // to make a vector of the same size as this SHUFFLE. We can ignore the + // extract entirely, and canonicalise the concat using WidenVector. + if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) { + Lane += cast(V1.getOperand(1))->getZExtValue(); + V1 = V1.getOperand(0); + } else if (V1.getOpcode() == ISD::CONCAT_VECTORS) { + unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2; + Lane -= Idx * VT.getVectorNumElements() / 2; + V1 = WidenVector(V1.getOperand(Idx), DAG); + } else if (VT.getSizeInBits() == 64) + V1 = WidenVector(V1, DAG); + + return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, MVT::i64)); + } - SDValue Hi = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, - DAG.getConstant(0, Tmp3.getValueType()), Tmp3, - A64cc); - SDValue Lo = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, - TrueVal, FalseVal, A64cc); + if (isREVMask(ShuffleMask, VT, 64)) + return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2); + if (isREVMask(ShuffleMask, VT, 32)) + return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2); + if (isREVMask(ShuffleMask, VT, 16)) + return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2); + + bool ReverseEXT = false; + unsigned Imm; + if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) { + if (ReverseEXT) + std::swap(V1, V2); + Imm *= getExtFactor(V1); + return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2, + DAG.getConstant(Imm, MVT::i32)); + } else if (V2->getOpcode() == ISD::UNDEF && + isSingletonEXTMask(ShuffleMask, VT, Imm)) { + Imm *= getExtFactor(V1); + return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1, + DAG.getConstant(Imm, MVT::i32)); + } - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + unsigned WhichResult; + if (isZIPMask(ShuffleMask, VT, WhichResult)) { + unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2; + return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); + } + if (isUZPMask(ShuffleMask, VT, WhichResult)) { + unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2; + return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); + } + if (isTRNMask(ShuffleMask, VT, WhichResult)) { + unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2; + return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); + } + + if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) { + unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2; + return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); + } + if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) { + unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2; + return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); + } + if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) { + unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2; + return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); + } + + SDValue Concat = tryFormConcatFromShuffle(Op, DAG); + if (Concat.getNode()) + return Concat; + + bool DstIsLeft; + int Anomaly; + int NumInputElements = V1.getValueType().getVectorNumElements(); + if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) { + SDValue DstVec = DstIsLeft ? V1 : V2; + SDValue DstLaneV = DAG.getConstant(Anomaly, MVT::i64); + + SDValue SrcVec = V1; + int SrcLane = ShuffleMask[Anomaly]; + if (SrcLane >= NumInputElements) { + SrcVec = V2; + SrcLane -= VT.getVectorNumElements(); + } + SDValue SrcLaneV = DAG.getConstant(SrcLane, MVT::i64); + + EVT ScalarVT = VT.getVectorElementType(); + if (ScalarVT.getSizeInBits() < 32) + ScalarVT = MVT::i32; + + return DAG.getNode( + ISD::INSERT_VECTOR_ELT, dl, VT, DstVec, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV), + DstLaneV); + } + + // If the shuffle is not directly supported and it has 4 elements, use + // the PerfectShuffle-generated table to synthesize it from other shuffles. + unsigned NumElts = VT.getVectorNumElements(); + if (NumElts == 4) { + unsigned PFIndexes[4]; + for (unsigned i = 0; i != 4; ++i) { + if (ShuffleMask[i] < 0) + PFIndexes[i] = 8; + else + PFIndexes[i] = ShuffleMask[i]; + } + + // Compute the index in the perfect shuffle table. + unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 + + PFIndexes[2] * 9 + PFIndexes[3]; + unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; + unsigned Cost = (PFEntry >> 30); + + if (Cost <= 4) + return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); + } + + return GenerateTBL(Op, ShuffleMask, DAG); } -/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two -/// i64 values and take a 2 x i64 value to shift plus a shift amount. -SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getNumOperands() == 3 && "Not a quad-shift!"); - EVT VT = Op.getValueType(); - unsigned VTBits = VT.getSizeInBits(); +static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, + APInt &UndefBits) { + EVT VT = BVN->getValueType(0); + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { + unsigned NumSplats = VT.getSizeInBits() / SplatBitSize; + + for (unsigned i = 0; i < NumSplats; ++i) { + CnstBits <<= SplatBitSize; + UndefBits <<= SplatBitSize; + CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits()); + UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits()); + } + + return true; + } + + return false; +} + +SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op, + SelectionDAG &DAG) const { + BuildVectorSDNode *BVN = + dyn_cast(Op.getOperand(1).getNode()); + SDValue LHS = Op.getOperand(0); SDLoc dl(Op); - SDValue ShOpLo = Op.getOperand(0); - SDValue ShOpHi = Op.getOperand(1); - SDValue ShAmt = Op.getOperand(2); + EVT VT = Op.getValueType(); - assert(Op.getOpcode() == ISD::SHL_PARTS); - SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, - DAG.getConstant(VTBits, MVT::i64), ShAmt); - SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); - SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, - DAG.getConstant(VTBits, MVT::i64)); - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); - SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue Tmp4 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); + if (!BVN) + return Op; - SDValue A64cc; - SDValue CmpOp = getSelectableIntSetCC(ExtraShAmt, - DAG.getConstant(0, MVT::i64), - ISD::SETGE, A64cc, - DAG, dl); + APInt CnstBits(VT.getSizeInBits(), 0); + APInt UndefBits(VT.getSizeInBits(), 0); + if (resolveBuildVector(BVN, CnstBits, UndefBits)) { + // We only have BIC vector immediate instruction, which is and-not. + CnstBits = ~CnstBits; + + // We make use of a little bit of goto ickiness in order to avoid having to + // duplicate the immediate matching logic for the undef toggled case. + bool SecondTry = false; + AttemptModImm: + + if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) { + CnstBits = CnstBits.zextOrTrunc(64); + uint64_t CnstVal = CnstBits.getZExtValue(); + + if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(0, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } - SDValue Lo = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, - DAG.getConstant(0, Tmp4.getValueType()), Tmp4, - A64cc); - SDValue Hi = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp, - Tmp3, FalseVal, A64cc); + if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(8, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(16, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(24, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; + SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(0, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; + SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(8, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + } + + if (SecondTry) + goto FailedModImm; + SecondTry = true; + CnstBits = ~UndefBits; + goto AttemptModImm; + } + +// We can always fall back to a non-immediate AND. +FailedModImm: + return Op; } -// If this is a case we can't handle, return null and let the default -// expansion code take care of it. -SDValue -AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, - const AArch64Subtarget *ST) const { +// Specialized code to quickly find if PotentialBVec is a BuildVector that +// consists of only the same constant int value, returned in reference arg +// ConstVal +static bool isAllConstantBuildVector(const SDValue &PotentialBVec, + uint64_t &ConstVal) { + BuildVectorSDNode *Bvec = dyn_cast(PotentialBVec); + if (!Bvec) + return false; + ConstantSDNode *FirstElt = dyn_cast(Bvec->getOperand(0)); + if (!FirstElt) + return false; + EVT VT = Bvec->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + for (unsigned i = 1; i < NumElts; ++i) + if (dyn_cast(Bvec->getOperand(i)) != FirstElt) + return false; + ConstVal = FirstElt->getZExtValue(); + return true; +} - BuildVectorSDNode *BVN = cast(Op.getNode()); - SDLoc DL(Op); +static unsigned getIntrinsicID(const SDNode *N) { + unsigned Opcode = N->getOpcode(); + switch (Opcode) { + default: + return Intrinsic::not_intrinsic; + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IID = cast(N->getOperand(0))->getZExtValue(); + if (IID < Intrinsic::num_intrinsics) + return IID; + return Intrinsic::not_intrinsic; + } + } +} + +// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)), +// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a +// BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2. +// Also, logical shift right -> sri, with the same structure. +static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + + if (!VT.isVector()) + return SDValue(); + + SDLoc DL(N); + + // Is the first op an AND? + const SDValue And = N->getOperand(0); + if (And.getOpcode() != ISD::AND) + return SDValue(); + + // Is the second op an shl or lshr? + SDValue Shift = N->getOperand(1); + // This will have been turned into: AArch64ISD::VSHL vector, #shift + // or AArch64ISD::VLSHR vector, #shift + unsigned ShiftOpc = Shift.getOpcode(); + if ((ShiftOpc != AArch64ISD::VSHL && ShiftOpc != AArch64ISD::VLSHR)) + return SDValue(); + bool IsShiftRight = ShiftOpc == AArch64ISD::VLSHR; + + // Is the shift amount constant? + ConstantSDNode *C2node = dyn_cast(Shift.getOperand(1)); + if (!C2node) + return SDValue(); + + // Is the and mask vector all constant? + uint64_t C1; + if (!isAllConstantBuildVector(And.getOperand(1), C1)) + return SDValue(); + + // Is C1 == ~C2, taking into account how much one can shift elements of a + // particular size? + uint64_t C2 = C2node->getZExtValue(); + unsigned ElemSizeInBits = VT.getVectorElementType().getSizeInBits(); + if (C2 > ElemSizeInBits) + return SDValue(); + unsigned ElemMask = (1 << ElemSizeInBits) - 1; + if ((C1 & ElemMask) != (~C2 & ElemMask)) + return SDValue(); + + SDValue X = And.getOperand(0); + SDValue Y = Shift.getOperand(0); + + unsigned Intrin = + IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli; + SDValue ResultSLI = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrin, MVT::i32), X, Y, Shift.getOperand(1)); + + DEBUG(dbgs() << "aarch64-lower: transformed: \n"); + DEBUG(N->dump(&DAG)); + DEBUG(dbgs() << "into: \n"); + DEBUG(ResultSLI->dump(&DAG)); + + ++NumShiftInserts; + return ResultSLI; +} + +SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op, + SelectionDAG &DAG) const { + // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2)) + if (EnableAArch64SlrGeneration) { + SDValue Res = tryLowerToSLI(Op.getNode(), DAG); + if (Res.getNode()) + return Res; + } + + BuildVectorSDNode *BVN = + dyn_cast(Op.getOperand(0).getNode()); + SDValue LHS = Op.getOperand(1); + SDLoc dl(Op); EVT VT = Op.getValueType(); - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; + // OR commutes, so try swapping the operands. + if (!BVN) { + LHS = Op.getOperand(0); + BVN = dyn_cast(Op.getOperand(1).getNode()); + } + if (!BVN) + return Op; - unsigned UseNeonMov = VT.getSizeInBits() >= 64; - - // Note we favor lowering MOVI over MVNI. - // This has implications on the definition of patterns in TableGen to select - // BIC immediate instructions but not ORR immediate instructions. - // If this lowering order is changed, TableGen patterns for BIC immediate and - // ORR immediate instructions have to be updated. - if (UseNeonMov && - BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { - if (SplatBitSize <= 64) { - // First attempt to use vector immediate-form MOVI - EVT NeonMovVT; - unsigned Imm = 0; - unsigned OpCmode = 0; - - if (isNeonModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), - SplatBitSize, DAG, VT.is128BitVector(), - Neon_Mov_Imm, NeonMovVT, Imm, OpCmode)) { - SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32); - SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32); - - if (ImmVal.getNode() && OpCmodeVal.getNode()) { - SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MOVIMM, DL, NeonMovVT, - ImmVal, OpCmodeVal); - return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov); - } + APInt CnstBits(VT.getSizeInBits(), 0); + APInt UndefBits(VT.getSizeInBits(), 0); + if (resolveBuildVector(BVN, CnstBits, UndefBits)) { + // We make use of a little bit of goto ickiness in order to avoid having to + // duplicate the immediate matching logic for the undef toggled case. + bool SecondTry = false; + AttemptModImm: + + if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) { + CnstBits = CnstBits.zextOrTrunc(64); + uint64_t CnstVal = CnstBits.getZExtValue(); + + if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(0, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - // Then attempt to use vector immediate-form MVNI - uint64_t NegatedImm = (~SplatBits).getZExtValue(); - if (isNeonModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, - DAG, VT.is128BitVector(), Neon_Mvn_Imm, NeonMovVT, - Imm, OpCmode)) { - SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32); - SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32); - if (ImmVal.getNode() && OpCmodeVal.getNode()) { - SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MVNIMM, DL, NeonMovVT, - ImmVal, OpCmodeVal); - return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov); - } + if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(8, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } - // Attempt to use vector immediate-form FMOV - if (((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) || - (VT == MVT::v2f64 && SplatBitSize == 64)) { - APFloat RealVal( - SplatBitSize == 32 ? APFloat::IEEEsingle : APFloat::IEEEdouble, - SplatBits); - uint32_t ImmVal; - if (A64Imms::isFPImm(RealVal, ImmVal)) { - SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); - return DAG.getNode(AArch64ISD::NEON_FMOVIMM, DL, VT, Val); - } + if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(16, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(24, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; + SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(0, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; + SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(8, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); } } + + if (SecondTry) + goto FailedModImm; + SecondTry = true; + CnstBits = UndefBits; + goto AttemptModImm; } +// We can always fall back to a non-immediate OR. +FailedModImm: + return Op; +} + +SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + BuildVectorSDNode *BVN = cast(Op.getNode()); + SDLoc dl(Op); + EVT VT = Op.getValueType(); + + APInt CnstBits(VT.getSizeInBits(), 0); + APInt UndefBits(VT.getSizeInBits(), 0); + if (resolveBuildVector(BVN, CnstBits, UndefBits)) { + // We make use of a little bit of goto ickiness in order to avoid having to + // duplicate the immediate matching logic for the undef toggled case. + bool SecondTry = false; + AttemptModImm: + + if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) { + CnstBits = CnstBits.zextOrTrunc(64); + uint64_t CnstVal = CnstBits.getZExtValue(); + + // Certain magic vector constants (used to express things like NOT + // and NEG) are passed through unmodified. This allows codegen patterns + // for these operations to match. Special-purpose patterns will lower + // these immediates to MOVIs if it proves necessary. + if (VT.isInteger() && (CnstVal == 0 || CnstVal == ~0ULL)) + return Op; + + // The many faces of MOVI... + if (AArch64_AM::isAdvSIMDModImmType10(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType10(CnstVal); + if (VT.getSizeInBits() == 128) { + SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64, + DAG.getConstant(CnstVal, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + // Support the V64 version via subregister insertion. + SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64, + DAG.getConstant(CnstVal, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(0, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(8, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(16, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(24, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; + SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(0, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; + SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(8, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(264, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(272, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType9(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType9(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8; + SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + // The few faces of FMOV... + if (AArch64_AM::isAdvSIMDModImmType11(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType11(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32; + SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType12(CnstVal) && + VT.getSizeInBits() == 128) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal); + SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64, + DAG.getConstant(CnstVal, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + // The many faces of MVNI... + CnstVal = ~CnstVal; + if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(0, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(8, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(16, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(24, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; + SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(0, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; + SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(8, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(264, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + + if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) { + CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal); + MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; + SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy, + DAG.getConstant(CnstVal, MVT::i32), + DAG.getConstant(272, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Mov); + } + } + + if (SecondTry) + goto FailedModImm; + SecondTry = true; + CnstBits = UndefBits; + goto AttemptModImm; + } +FailedModImm: + + // Scan through the operands to find some interesting properties we can + // exploit: + // 1) If only one value is used, we can use a DUP, or + // 2) if only the low element is not undef, we can just insert that, or + // 3) if only one constant value is used (w/ some non-constant lanes), + // we can splat the constant value into the whole vector then fill + // in the non-constant lanes. + // 4) FIXME: If different constant values are used, but we can intelligently + // select the values we'll be overwriting for the non-constant + // lanes such that we can directly materialize the vector + // some other way (MOVI, e.g.), we can be sneaky. unsigned NumElts = VT.getVectorNumElements(); bool isOnlyLowElement = true; bool usesOnlyOneValue = true; - bool hasDominantValue = false; + bool usesOnlyOneConstantValue = true; bool isConstant = true; - - // Map of the number of times a particular SDValue appears in the - // element list. - DenseMap ValueCounts; + unsigned NumConstantLanes = 0; SDValue Value; + SDValue ConstantValue; for (unsigned i = 0; i < NumElts; ++i) { SDValue V = Op.getOperand(i); if (V.getOpcode() == ISD::UNDEF) @@ -4702,143 +5425,90 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (!isa(V) && !isa(V)) isConstant = false; - ValueCounts.insert(std::make_pair(V, 0)); - unsigned &Count = ValueCounts[V]; + if (isa(V) || isa(V)) { + ++NumConstantLanes; + if (!ConstantValue.getNode()) + ConstantValue = V; + else if (ConstantValue != V) + usesOnlyOneConstantValue = false; + } - // Is this value dominant? (takes up more than half of the lanes) - if (++Count > (NumElts / 2)) { - hasDominantValue = true; + if (!Value.getNode()) Value = V; - } + else if (V != Value) + usesOnlyOneValue = false; } - if (ValueCounts.size() != 1) - usesOnlyOneValue = false; - if (!Value.getNode() && ValueCounts.size() > 0) - Value = ValueCounts.begin()->first; - if (ValueCounts.size() == 0) + if (!Value.getNode()) return DAG.getUNDEF(VT); if (isOnlyLowElement) - return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - if (hasDominantValue && EltSize <= 64) { - // Use VDUP for non-constant splats. + // Use DUP for non-constant splats. For f32 constant splats, reduce to + // i32 and try again. + if (usesOnlyOneValue) { if (!isConstant) { - SDValue N; - - // If we are DUPing a value that comes directly from a vector, we could - // just use DUPLANE. We can only do this if the lane being extracted - // is at a constant index, as the DUP from lane instructions only have - // constant-index forms. - // - // If there is a TRUNCATE between EXTRACT_VECTOR_ELT and DUP, we can - // remove TRUNCATE for DUPLANE by apdating the source vector to - // appropriate vector type and lane index. - // - // FIXME: for now we have v1i8, v1i16, v1i32 legal vector types, if they - // are not legal any more, no need to check the type size in bits should - // be large than 64. - SDValue V = Value; - if (Value->getOpcode() == ISD::TRUNCATE) - V = Value->getOperand(0); - if (V->getOpcode() == ISD::EXTRACT_VECTOR_ELT && - isa(V->getOperand(1)) && - V->getOperand(0).getValueType().getSizeInBits() >= 64) { - - // If the element size of source vector is larger than DUPLANE - // element size, we can do transformation by, - // 1) bitcasting source register to smaller element vector - // 2) mutiplying the lane index by SrcEltSize/ResEltSize - // For example, we can lower - // "v8i16 vdup_lane(v4i32, 1)" - // to be - // "v8i16 vdup_lane(v8i16 bitcast(v4i32), 2)". - SDValue SrcVec = V->getOperand(0); - unsigned SrcEltSize = - SrcVec.getValueType().getVectorElementType().getSizeInBits(); - unsigned ResEltSize = VT.getVectorElementType().getSizeInBits(); - if (SrcEltSize > ResEltSize) { - assert((SrcEltSize % ResEltSize == 0) && "Invalid element size"); - SDValue BitCast; - unsigned SrcSize = SrcVec.getValueType().getSizeInBits(); - unsigned ResSize = VT.getSizeInBits(); - - if (SrcSize > ResSize) { - assert((SrcSize % ResSize == 0) && "Invalid vector size"); - EVT CastVT = - EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), - SrcSize / ResEltSize); - BitCast = DAG.getNode(ISD::BITCAST, DL, CastVT, SrcVec); - } else { - assert((SrcSize == ResSize) && "Invalid vector size of source vec"); - BitCast = DAG.getNode(ISD::BITCAST, DL, VT, SrcVec); - } + if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Value.getValueType() != VT) + return DAG.getNode(AArch64ISD::DUP, dl, VT, Value); - unsigned LaneIdx = V->getConstantOperandVal(1); - SDValue Lane = - DAG.getConstant((SrcEltSize / ResEltSize) * LaneIdx, MVT::i64); - N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, BitCast, Lane); - } else { - assert((SrcEltSize == ResEltSize) && - "Invalid element size of source vec"); - N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, V->getOperand(0), - V->getOperand(1)); - } - } else - N = DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value); - - if (!usesOnlyOneValue) { - // The dominant value was splatted as 'N', but we now have to insert - // all differing elements. - for (unsigned I = 0; I < NumElts; ++I) { - if (Op.getOperand(I) == Value) - continue; - SmallVector Ops; - Ops.push_back(N); - Ops.push_back(Op.getOperand(I)); - Ops.push_back(DAG.getConstant(I, MVT::i64)); - N = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, &Ops[0], 3); - } - } - return N; + // This is actually a DUPLANExx operation, which keeps everything vectory. + + // DUPLANE works on 128-bit vectors, widen it if necessary. + SDValue Lane = Value.getOperand(1); + Value = Value.getOperand(0); + if (Value.getValueType().getSizeInBits() == 64) + Value = WidenVector(Value, DAG); + + unsigned Opcode = getDUPLANEOp(VT.getVectorElementType()); + return DAG.getNode(Opcode, dl, VT, Value, Lane); + } + + if (VT.getVectorElementType().isFloatingPoint()) { + SmallVector Ops; + MVT NewType = + (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; + for (unsigned i = 0; i < NumElts; ++i) + Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i))); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts); + SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); + Val = LowerBUILD_VECTOR(Val, DAG); + if (Val.getNode()) + return DAG.getNode(ISD::BITCAST, dl, VT, Val); } - if (usesOnlyOneValue && isConstant) { - return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value); + } + + // If there was only one constant value used and for more than one lane, + // start by splatting that value, then replace the non-constant lanes. This + // is better than the default, which will perform a separate initialization + // for each lane. + if (NumConstantLanes > 0 && usesOnlyOneConstantValue) { + SDValue Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue); + // Now insert the non-constant lanes. + for (unsigned i = 0; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + SDValue LaneIdx = DAG.getConstant(i, MVT::i64); + if (!isa(V) && !isa(V)) { + // Note that type legalization likely mucked about with the VT of the + // source operand, so we may have to convert it here before inserting. + Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx); + } } + return Val; } + // If all elements are constants and the case above didn't get hit, fall back // to the default expansion, which will generate a load from the constant // pool. if (isConstant) return SDValue(); - // Try to lower this in lowering ShuffleVector way. - SDValue V0, V1; - int Mask[16]; - if (isKnownShuffleVector(Op, DAG, V0, V1, Mask)) { - unsigned V0NumElts = V0.getValueType().getVectorNumElements(); - if (!V1.getNode() && V0NumElts == NumElts * 2) { - V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, - DAG.getConstant(NumElts, MVT::i64)); - V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, - DAG.getConstant(0, MVT::i64)); - V0NumElts = V0.getValueType().getVectorNumElements(); - } - - if (V1.getNode() && NumElts == V0NumElts && - V0NumElts == V1.getValueType().getVectorNumElements()) { - SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask); - if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) - return Shuffle; - else - return LowerVECTOR_SHUFFLE(Shuffle, DAG); - } else { - SDValue Res; - if (isConcatVector(Op, DAG, V0, V1, Mask, Res)) - return Res; - } + // Empirical tests suggest this is rarely worth it for vectors of length <= 2. + if (NumElts >= 4) { + SDValue shuffle = ReconstructShuffle(Op, DAG); + if (shuffle != SDValue()) + return shuffle; } // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we @@ -4849,550 +5519,2419 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // on the stack followed by a load for everything else. if (!isConstant && !usesOnlyOneValue) { SDValue Vec = DAG.getUNDEF(VT); - for (unsigned i = 0 ; i < NumElts; ++i) { + SDValue Op0 = Op.getOperand(0); + unsigned ElemSize = VT.getVectorElementType().getSizeInBits(); + unsigned i = 0; + // For 32 and 64 bit types, use INSERT_SUBREG for lane zero to + // a) Avoid a RMW dependency on the full vector register, and + // b) Allow the register coalescer to fold away the copy if the + // value is already in an S or D register. + if (Op0.getOpcode() != ISD::UNDEF && (ElemSize == 32 || ElemSize == 64)) { + unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub; + MachineSDNode *N = + DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0, + DAG.getTargetConstant(SubIdx, MVT::i32)); + Vec = SDValue(N, 0); + ++i; + } + for (; i < NumElts; ++i) { SDValue V = Op.getOperand(i); if (V.getOpcode() == ISD::UNDEF) continue; SDValue LaneIdx = DAG.getConstant(i, MVT::i64); - Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, LaneIdx); + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); } return Vec; } + + // Just use the default expansion. We failed to find a better alternative. return SDValue(); } -/// isREVMask - Check if a vector shuffle corresponds to a REV -/// instruction with the specified blocksize. (The order of the elements -/// within each block of the vector is reversed.) -static bool isREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { - assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && - "Only possible block sizes for REV are: 16, 32, 64"); +SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!"); - unsigned EltSz = VT.getVectorElementType().getSizeInBits(); - if (EltSz == 64) - return false; + // Check for non-constant lane. + if (!isa(Op.getOperand(2))) + return SDValue(); - unsigned NumElts = VT.getVectorNumElements(); - unsigned BlockElts = M[0] + 1; - // If the first shuffle index is UNDEF, be optimistic. - if (M[0] < 0) - BlockElts = BlockSize / EltSz; + EVT VT = Op.getOperand(0).getValueType(); - if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) - return false; + // Insertion/extraction are legal for V128 types. + if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || + VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64) + return Op; - for (unsigned i = 0; i < NumElts; ++i) { - if (M[i] < 0) - continue; // ignore UNDEF indices - if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) - return false; - } + if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 && + VT != MVT::v1i64 && VT != MVT::v2f32) + return SDValue(); - return true; + // For V64 types, we perform insertion by expanding the value + // to a V128 type and perform the insertion on that. + SDLoc DL(Op); + SDValue WideVec = WidenVector(Op.getOperand(0), DAG); + EVT WideTy = WideVec.getValueType(); + + SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec, + Op.getOperand(1), Op.getOperand(2)); + // Re-narrow the resultant vector. + return NarrowVector(Node, DAG); +} + +SDValue +AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!"); + + // Check for non-constant lane. + if (!isa(Op.getOperand(1))) + return SDValue(); + + EVT VT = Op.getOperand(0).getValueType(); + + // Insertion/extraction are legal for V128 types. + if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || + VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64) + return Op; + + if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 && + VT != MVT::v1i64 && VT != MVT::v2f32) + return SDValue(); + + // For V64 types, we perform extraction by expanding the value + // to a V128 type and perform the extraction on that. + SDLoc DL(Op); + SDValue WideVec = WidenVector(Op.getOperand(0), DAG); + EVT WideTy = WideVec.getValueType(); + + EVT ExtrTy = WideTy.getVectorElementType(); + if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8) + ExtrTy = MVT::i32; + + // For extractions, we just return the result directly. + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec, + Op.getOperand(1)); +} + +SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getOperand(0).getValueType(); + SDLoc dl(Op); + // Just in case... + if (!VT.isVector()) + return SDValue(); + + ConstantSDNode *Cst = dyn_cast(Op.getOperand(1)); + if (!Cst) + return SDValue(); + unsigned Val = Cst->getZExtValue(); + + unsigned Size = Op.getValueType().getSizeInBits(); + if (Val == 0) { + switch (Size) { + case 8: + return DAG.getTargetExtractSubreg(AArch64::bsub, dl, Op.getValueType(), + Op.getOperand(0)); + case 16: + return DAG.getTargetExtractSubreg(AArch64::hsub, dl, Op.getValueType(), + Op.getOperand(0)); + case 32: + return DAG.getTargetExtractSubreg(AArch64::ssub, dl, Op.getValueType(), + Op.getOperand(0)); + case 64: + return DAG.getTargetExtractSubreg(AArch64::dsub, dl, Op.getValueType(), + Op.getOperand(0)); + default: + llvm_unreachable("Unexpected vector type in extract_subvector!"); + } + } + // If this is extracting the upper 64-bits of a 128-bit vector, we match + // that directly. + if (Size == 64 && Val * VT.getVectorElementType().getSizeInBits() == 64) + return Op; + + return SDValue(); +} + +bool AArch64TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, + EVT VT) const { + if (VT.getVectorNumElements() == 4 && + (VT.is128BitVector() || VT.is64BitVector())) { + unsigned PFIndexes[4]; + for (unsigned i = 0; i != 4; ++i) { + if (M[i] < 0) + PFIndexes[i] = 8; + else + PFIndexes[i] = M[i]; + } + + // Compute the index in the perfect shuffle table. + unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 + + PFIndexes[2] * 9 + PFIndexes[3]; + unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; + unsigned Cost = (PFEntry >> 30); + + if (Cost <= 4) + return true; + } + + bool DummyBool; + int DummyInt; + unsigned DummyUnsigned; + + return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) || + isREVMask(M, VT, 32) || isREVMask(M, VT, 16) || + isEXTMask(M, VT, DummyBool, DummyUnsigned) || + // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM. + isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) || + isZIPMask(M, VT, DummyUnsigned) || + isTRN_v_undef_Mask(M, VT, DummyUnsigned) || + isUZP_v_undef_Mask(M, VT, DummyUnsigned) || + isZIP_v_undef_Mask(M, VT, DummyUnsigned) || + isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) || + isConcatMask(M, VT, VT.getSizeInBits() == 128)); +} + +/// getVShiftImm - Check if this is a valid build_vector for the immediate +/// operand of a vector shift operation, where all the elements of the +/// build_vector must have the same constant integer value. +static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { + // Ignore bit_converts. + while (Op.getOpcode() == ISD::BITCAST) + Op = Op.getOperand(0); + BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, + HasAnyUndefs, ElementBits) || + SplatBitSize > ElementBits) + return false; + Cnt = SplatBits.getSExtValue(); + return true; +} + +/// isVShiftLImm - Check if this is a valid build_vector for the immediate +/// operand of a vector shift left operation. That value must be in the range: +/// 0 <= Value < ElementBits for a left shift; or +/// 0 <= Value <= ElementBits for a long left shift. +static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { + assert(VT.isVector() && "vector shift count is not a vector type"); + unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); + if (!getVShiftImm(Op, ElementBits, Cnt)) + return false; + return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits); +} + +/// isVShiftRImm - Check if this is a valid build_vector for the immediate +/// operand of a vector shift right operation. For a shift opcode, the value +/// is positive, but for an intrinsic the value count must be negative. The +/// absolute value must be in the range: +/// 1 <= |Value| <= ElementBits for a right shift; or +/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. +static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, + int64_t &Cnt) { + assert(VT.isVector() && "vector shift count is not a vector type"); + unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); + if (!getVShiftImm(Op, ElementBits, Cnt)) + return false; + if (isIntrinsic) + Cnt = -Cnt; + return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits)); +} + +SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, + SelectionDAG &DAG) const { + EVT VT = Op.getValueType(); + SDLoc DL(Op); + int64_t Cnt; + + if (!Op.getOperand(1).getValueType().isVector()) + return Op; + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + + switch (Op.getOpcode()) { + default: + llvm_unreachable("unexpected shift opcode"); + + case ISD::SHL: + if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) + return DAG.getNode(AArch64ISD::VSHL, SDLoc(Op), VT, Op.getOperand(0), + DAG.getConstant(Cnt, MVT::i32)); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::aarch64_neon_ushl, MVT::i32), + Op.getOperand(0), Op.getOperand(1)); + case ISD::SRA: + case ISD::SRL: + // Right shift immediate + if (isVShiftRImm(Op.getOperand(1), VT, false, false, Cnt) && + Cnt < EltSize) { + unsigned Opc = + (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR; + return DAG.getNode(Opc, SDLoc(Op), VT, Op.getOperand(0), + DAG.getConstant(Cnt, MVT::i32)); + } + + // Right shift register. Note, there is not a shift right register + // instruction, but the shift left register instruction takes a signed + // value, where negative numbers specify a right shift. + unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl + : Intrinsic::aarch64_neon_ushl; + // negate the shift amount + SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1)); + SDValue NegShiftLeft = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Opc, MVT::i32), Op.getOperand(0), NegShift); + return NegShiftLeft; + } + + return SDValue(); +} + +static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, + AArch64CC::CondCode CC, bool NoNans, EVT VT, + SDLoc dl, SelectionDAG &DAG) { + EVT SrcVT = LHS.getValueType(); + + BuildVectorSDNode *BVN = dyn_cast(RHS.getNode()); + APInt CnstBits(VT.getSizeInBits(), 0); + APInt UndefBits(VT.getSizeInBits(), 0); + bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits); + bool IsZero = IsCnst && (CnstBits == 0); + + if (SrcVT.getVectorElementType().isFloatingPoint()) { + switch (CC) { + default: + return SDValue(); + case AArch64CC::NE: { + SDValue Fcmeq; + if (IsZero) + Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS); + else + Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS); + return DAG.getNode(AArch64ISD::NOT, dl, VT, Fcmeq); + } + case AArch64CC::EQ: + if (IsZero) + return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS); + case AArch64CC::GE: + if (IsZero) + return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS); + case AArch64CC::GT: + if (IsZero) + return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS); + case AArch64CC::LS: + if (IsZero) + return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS); + case AArch64CC::LT: + if (!NoNans) + return SDValue(); + // If we ignore NaNs then we can use to the MI implementation. + // Fallthrough. + case AArch64CC::MI: + if (IsZero) + return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS); + } + } + + switch (CC) { + default: + return SDValue(); + case AArch64CC::NE: { + SDValue Cmeq; + if (IsZero) + Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS); + else + Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS); + return DAG.getNode(AArch64ISD::NOT, dl, VT, Cmeq); + } + case AArch64CC::EQ: + if (IsZero) + return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS); + case AArch64CC::GE: + if (IsZero) + return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS); + case AArch64CC::GT: + if (IsZero) + return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS); + case AArch64CC::LE: + if (IsZero) + return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS); + case AArch64CC::LS: + return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS); + case AArch64CC::LO: + return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS); + case AArch64CC::LT: + if (IsZero) + return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS); + return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS); + case AArch64CC::HI: + return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS); + case AArch64CC::HS: + return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS); + } +} + +SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op, + SelectionDAG &DAG) const { + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDLoc dl(Op); + + if (LHS.getValueType().getVectorElementType().isInteger()) { + assert(LHS.getValueType() == RHS.getValueType()); + AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC); + return EmitVectorComparison(LHS, RHS, AArch64CC, false, Op.getValueType(), + dl, DAG); + } + + assert(LHS.getValueType().getVectorElementType() == MVT::f32 || + LHS.getValueType().getVectorElementType() == MVT::f64); + + // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally + // clean. Some of them require two branches to implement. + AArch64CC::CondCode CC1, CC2; + bool ShouldInvert; + changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert); + + bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath; + SDValue Cmp = + EmitVectorComparison(LHS, RHS, CC1, NoNaNs, Op.getValueType(), dl, DAG); + if (!Cmp.getNode()) + return SDValue(); + + if (CC2 != AArch64CC::AL) { + SDValue Cmp2 = + EmitVectorComparison(LHS, RHS, CC2, NoNaNs, Op.getValueType(), dl, DAG); + if (!Cmp2.getNode()) + return SDValue(); + + Cmp = DAG.getNode(ISD::OR, dl, Cmp.getValueType(), Cmp, Cmp2); + } + + if (ShouldInvert) + return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType()); + + return Cmp; +} + +/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as +/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment +/// specified in the intrinsic calls. +bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, + unsigned Intrinsic) const { + switch (Intrinsic) { + case Intrinsic::aarch64_neon_ld2: + case Intrinsic::aarch64_neon_ld3: + case Intrinsic::aarch64_neon_ld4: + case Intrinsic::aarch64_neon_ld1x2: + case Intrinsic::aarch64_neon_ld1x3: + case Intrinsic::aarch64_neon_ld1x4: + case Intrinsic::aarch64_neon_ld2lane: + case Intrinsic::aarch64_neon_ld3lane: + case Intrinsic::aarch64_neon_ld4lane: + case Intrinsic::aarch64_neon_ld2r: + case Intrinsic::aarch64_neon_ld3r: + case Intrinsic::aarch64_neon_ld4r: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + // Conservatively set memVT to the entire set of vectors loaded. + uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; + Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); + Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); + Info.offset = 0; + Info.align = 0; + Info.vol = false; // volatile loads with NEON intrinsics not supported + Info.readMem = true; + Info.writeMem = false; + return true; + } + case Intrinsic::aarch64_neon_st2: + case Intrinsic::aarch64_neon_st3: + case Intrinsic::aarch64_neon_st4: + case Intrinsic::aarch64_neon_st1x2: + case Intrinsic::aarch64_neon_st1x3: + case Intrinsic::aarch64_neon_st1x4: + case Intrinsic::aarch64_neon_st2lane: + case Intrinsic::aarch64_neon_st3lane: + case Intrinsic::aarch64_neon_st4lane: { + Info.opc = ISD::INTRINSIC_VOID; + // Conservatively set memVT to the entire set of vectors stored. + unsigned NumElts = 0; + for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { + Type *ArgTy = I.getArgOperand(ArgI)->getType(); + if (!ArgTy->isVectorTy()) + break; + NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; + } + Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); + Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); + Info.offset = 0; + Info.align = 0; + Info.vol = false; // volatile stores with NEON intrinsics not supported + Info.readMem = false; + Info.writeMem = true; + return true; + } + case Intrinsic::aarch64_ldaxr: + case Intrinsic::aarch64_ldxr: { + PointerType *PtrTy = cast(I.getArgOperand(0)->getType()); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(PtrTy->getElementType()); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); + Info.vol = true; + Info.readMem = true; + Info.writeMem = false; + return true; + } + case Intrinsic::aarch64_stlxr: + case Intrinsic::aarch64_stxr: { + PointerType *PtrTy = cast(I.getArgOperand(1)->getType()); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(PtrTy->getElementType()); + Info.ptrVal = I.getArgOperand(1); + Info.offset = 0; + Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); + Info.vol = true; + Info.readMem = false; + Info.writeMem = true; + return true; + } + case Intrinsic::aarch64_ldaxp: + case Intrinsic::aarch64_ldxp: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::i128; + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = 16; + Info.vol = true; + Info.readMem = true; + Info.writeMem = false; + return true; + } + case Intrinsic::aarch64_stlxp: + case Intrinsic::aarch64_stxp: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::i128; + Info.ptrVal = I.getArgOperand(2); + Info.offset = 0; + Info.align = 16; + Info.vol = true; + Info.readMem = false; + Info.writeMem = true; + return true; + } + default: + break; + } + + return false; +} + +// Truncations from 64-bit GPR to 32-bit GPR is free. +bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) + return false; + unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); + unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); + if (NumBits1 <= NumBits2) + return false; + return true; +} +bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { + if (!VT1.isInteger() || !VT2.isInteger()) + return false; + unsigned NumBits1 = VT1.getSizeInBits(); + unsigned NumBits2 = VT2.getSizeInBits(); + if (NumBits1 <= NumBits2) + return false; + return true; +} + +// All 32-bit GPR operations implicitly zero the high-half of the corresponding +// 64-bit GPR. +bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) + return false; + unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); + unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); + if (NumBits1 == 32 && NumBits2 == 64) + return true; + return false; +} +bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { + if (!VT1.isInteger() || !VT2.isInteger()) + return false; + unsigned NumBits1 = VT1.getSizeInBits(); + unsigned NumBits2 = VT2.getSizeInBits(); + if (NumBits1 == 32 && NumBits2 == 64) + return true; + return false; +} + +bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { + EVT VT1 = Val.getValueType(); + if (isZExtFree(VT1, VT2)) { + return true; + } + + if (Val.getOpcode() != ISD::LOAD) + return false; + + // 8-, 16-, and 32-bit integer loads all implicitly zero-extend. + return (VT1.isSimple() && VT1.isInteger() && VT2.isSimple() && + VT2.isInteger() && VT1.getSizeInBits() <= 32); +} + +bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType, + unsigned &RequiredAligment) const { + if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy()) + return false; + // Cyclone supports unaligned accesses. + RequiredAligment = 0; + unsigned NumBits = LoadedType->getPrimitiveSizeInBits(); + return NumBits == 32 || NumBits == 64; +} + +bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType, + unsigned &RequiredAligment) const { + if (!LoadedType.isSimple() || + (!LoadedType.isInteger() && !LoadedType.isFloatingPoint())) + return false; + // Cyclone supports unaligned accesses. + RequiredAligment = 0; + unsigned NumBits = LoadedType.getSizeInBits(); + return NumBits == 32 || NumBits == 64; +} + +static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, + unsigned AlignCheck) { + return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && + (DstAlign == 0 || DstAlign % AlignCheck == 0)); +} + +EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, + unsigned SrcAlign, bool IsMemset, + bool ZeroMemset, + bool MemcpyStrSrc, + MachineFunction &MF) const { + // Don't use AdvSIMD to implement 16-byte memset. It would have taken one + // instruction to materialize the v2i64 zero and one store (with restrictive + // addressing mode). Just do two i64 store of zero-registers. + bool Fast; + const Function *F = MF.getFunction(); + if (Subtarget->hasFPARMv8() && !IsMemset && Size >= 16 && + !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoImplicitFloat) && + (memOpAlign(SrcAlign, DstAlign, 16) || + (allowsUnalignedMemoryAccesses(MVT::f128, 0, &Fast) && Fast))) + return MVT::f128; + + return Size >= 8 ? MVT::i64 : MVT::i32; +} + +// 12-bit optionally shifted immediates are legal for adds. +bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const { + if ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0)) + return true; + return false; +} + +// Integer comparisons are implemented with ADDS/SUBS, so the range of valid +// immediates is the same as for an add or a sub. +bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const { + if (Immed < 0) + Immed *= -1; + return isLegalAddImmediate(Immed); +} + +/// isLegalAddressingMode - Return true if the addressing mode represented +/// by AM is legal for this target, for a load/store of the specified type. +bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { + // AArch64 has five basic addressing modes: + // reg + // reg + 9-bit signed offset + // reg + SIZE_IN_BYTES * 12-bit unsigned offset + // reg1 + reg2 + // reg + SIZE_IN_BYTES * reg + + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + // No reg+reg+imm addressing. + if (AM.HasBaseReg && AM.BaseOffs && AM.Scale) + return false; + + // check reg + imm case: + // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12 + uint64_t NumBytes = 0; + if (Ty->isSized()) { + uint64_t NumBits = getDataLayout()->getTypeSizeInBits(Ty); + NumBytes = NumBits / 8; + if (!isPowerOf2_64(NumBits)) + NumBytes = 0; + } + + if (!AM.Scale) { + int64_t Offset = AM.BaseOffs; + + // 9-bit signed offset + if (Offset >= -(1LL << 9) && Offset <= (1LL << 9) - 1) + return true; + + // 12-bit unsigned offset + unsigned shift = Log2_64(NumBytes); + if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 && + // Must be a multiple of NumBytes (NumBytes is a power of 2) + (Offset >> shift) << shift == Offset) + return true; + return false; + } + + // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2 + + if (!AM.Scale || AM.Scale == 1 || + (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes)) + return true; + return false; +} + +int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM, + Type *Ty) const { + // Scaling factors are not free at all. + // Operands | Rt Latency + // ------------------------------------------- + // Rt, [Xn, Xm] | 4 + // ------------------------------------------- + // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5 + // Rt, [Xn, Wm, #imm] | + if (isLegalAddressingMode(AM, Ty)) + // Scale represents reg2 * scale, thus account for 1 if + // it is not equal to 0 or 1. + return AM.Scale != 0 && AM.Scale != 1; + return -1; +} + +bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { + VT = VT.getScalarType(); + + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + case MVT::f64: + return true; + default: + break; + } + + return false; +} + +const MCPhysReg * +AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const { + // LR is a callee-save register, but we must treat it as clobbered by any call + // site. Hence we include LR in the scratch registers, which are in turn added + // as implicit-defs for stackmaps and patchpoints. + static const MCPhysReg ScratchRegs[] = { + AArch64::X16, AArch64::X17, AArch64::LR, 0 + }; + return ScratchRegs; +} + +bool +AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N) const { + EVT VT = N->getValueType(0); + // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine + // it with shift to let it be lowered to UBFX. + if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) && + isa(N->getOperand(1))) { + uint64_t TruncMask = N->getConstantOperandVal(1); + if (isMask_64(TruncMask) && + N->getOperand(0).getOpcode() == ISD::SRL && + isa(N->getOperand(0)->getOperand(1))) + return false; + } + return true; +} + +bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + if (BitSize == 0) + return false; + + int64_t Val = Imm.getSExtValue(); + if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize)) + return true; + + if ((int64_t)Val < 0) + Val = ~Val; + if (BitSize == 32) + Val &= (1LL << 32) - 1; + + unsigned LZ = countLeadingZeros((uint64_t)Val); + unsigned Shift = (63 - LZ) / 16; + // MOVZ is free so return true for one or fewer MOVK. + return (Shift < 3) ? true : false; +} + +// Generate SUBS and CSEL for integer abs. +static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDLoc DL(N); + + // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1) + // and change it to SUB and CSEL. + if (VT.isInteger() && N->getOpcode() == ISD::XOR && + N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 && + N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0)) + if (ConstantSDNode *Y1C = dyn_cast(N1.getOperand(1))) + if (Y1C->getAPIntValue() == VT.getSizeInBits() - 1) { + SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), + N0.getOperand(0)); + // Generate SUBS & CSEL. + SDValue Cmp = + DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32), + N0.getOperand(0), DAG.getConstant(0, VT)); + return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0.getOperand(0), Neg, + DAG.getConstant(AArch64CC::PL, MVT::i32), + SDValue(Cmp.getNode(), 1)); + } + return SDValue(); +} + +// performXorCombine - Attempts to handle integer ABS. +static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + return performIntegerAbsCombine(N, DAG); +} + +static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + // Multiplication of a power of two plus/minus one can be done more + // cheaply as as shift+add/sub. For now, this is true unilaterally. If + // future CPUs have a cheaper MADD instruction, this may need to be + // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and + // 64-bit is 5 cycles, so this is always a win. + if (ConstantSDNode *C = dyn_cast(N->getOperand(1))) { + APInt Value = C->getAPIntValue(); + EVT VT = N->getValueType(0); + APInt VP1 = Value + 1; + if (VP1.isPowerOf2()) { + // Multiplying by one less than a power of two, replace with a shift + // and a subtract. + SDValue ShiftedVal = + DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), + DAG.getConstant(VP1.logBase2(), MVT::i64)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal, N->getOperand(0)); + } + APInt VM1 = Value - 1; + if (VM1.isPowerOf2()) { + // Multiplying by one more than a power of two, replace with a shift + // and an add. + SDValue ShiftedVal = + DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), + DAG.getConstant(VM1.logBase2(), MVT::i64)); + return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0)); + } + } + return SDValue(); +} + +static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + if (VT != MVT::f32 && VT != MVT::f64) + return SDValue(); + // Only optimize when the source and destination types have the same width. + if (VT.getSizeInBits() != N->getOperand(0).getValueType().getSizeInBits()) + return SDValue(); + + // If the result of an integer load is only used by an integer-to-float + // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead. + // This eliminates an "integer-to-vector-move UOP and improve throughput. + SDValue N0 = N->getOperand(0); + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + // Do not change the width of a volatile load. + !cast(N0)->isVolatile()) { + LoadSDNode *LN0 = cast(N0); + SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), + LN0->getPointerInfo(), LN0->isVolatile(), + LN0->isNonTemporal(), LN0->isInvariant(), + LN0->getAlignment()); + + // Make sure successors of the original load stay after it by updating them + // to use the new Chain. + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1)); + + unsigned Opcode = + (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF; + return DAG.getNode(Opcode, SDLoc(N), VT, Load); + } + + return SDValue(); +} + +/// An EXTR instruction is made up of two shifts, ORed together. This helper +/// searches for and classifies those shifts. +static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, + bool &FromHi) { + if (N.getOpcode() == ISD::SHL) + FromHi = false; + else if (N.getOpcode() == ISD::SRL) + FromHi = true; + else + return false; + + if (!isa(N.getOperand(1))) + return false; + + ShiftAmount = N->getConstantOperandVal(1); + Src = N->getOperand(0); + return true; +} + +/// EXTR instruction extracts a contiguous chunk of bits from two existing +/// registers viewed as a high/low pair. This function looks for the pattern: +/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an +/// EXTR. Can't quite be done in TableGen because the two immediates aren't +/// independent. +static SDValue tryCombineToEXTR(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + EVT VT = N->getValueType(0); + + assert(N->getOpcode() == ISD::OR && "Unexpected root"); + + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + SDValue LHS; + uint32_t ShiftLHS = 0; + bool LHSFromHi = 0; + if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi)) + return SDValue(); + + SDValue RHS; + uint32_t ShiftRHS = 0; + bool RHSFromHi = 0; + if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi)) + return SDValue(); + + // If they're both trying to come from the high part of the register, they're + // not really an EXTR. + if (LHSFromHi == RHSFromHi) + return SDValue(); + + if (ShiftLHS + ShiftRHS != VT.getSizeInBits()) + return SDValue(); + + if (LHSFromHi) { + std::swap(LHS, RHS); + std::swap(ShiftLHS, ShiftRHS); + } + + return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS, + DAG.getConstant(ShiftRHS, MVT::i64)); +} + +static SDValue tryCombineToBSL(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + EVT VT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + + if (!VT.isVector()) + return SDValue(); + + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() != ISD::AND) + return SDValue(); + + SDValue N1 = N->getOperand(1); + if (N1.getOpcode() != ISD::AND) + return SDValue(); + + // We only have to look for constant vectors here since the general, variable + // case can be handled in TableGen. + unsigned Bits = VT.getVectorElementType().getSizeInBits(); + uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1); + for (int i = 1; i >= 0; --i) + for (int j = 1; j >= 0; --j) { + BuildVectorSDNode *BVN0 = dyn_cast(N0->getOperand(i)); + BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(j)); + if (!BVN0 || !BVN1) + continue; + + bool FoundMatch = true; + for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) { + ConstantSDNode *CN0 = dyn_cast(BVN0->getOperand(k)); + ConstantSDNode *CN1 = dyn_cast(BVN1->getOperand(k)); + if (!CN0 || !CN1 || + CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) { + FoundMatch = false; + break; + } + } + + if (FoundMatch) + return DAG.getNode(AArch64ISD::BSL, DL, VT, SDValue(BVN0, 0), + N0->getOperand(1 - i), N1->getOperand(1 - j)); + } + + return SDValue(); +} + +static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) + if (!EnableAArch64ExtrGeneration) + return SDValue(); + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + SDValue Res = tryCombineToEXTR(N, DCI); + if (Res.getNode()) + return Res; + + Res = tryCombineToBSL(N, DCI); + if (Res.getNode()) + return Res; + + return SDValue(); +} + +static SDValue performBitcastCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + // Wait 'til after everything is legalized to try this. That way we have + // legal vector types and such. + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + // Remove extraneous bitcasts around an extract_subvector. + // For example, + // (v4i16 (bitconvert + // (extract_subvector (v2i64 (bitconvert (v8i16 ...)), (i64 1))))) + // becomes + // (extract_subvector ((v8i16 ...), (i64 4))) + + // Only interested in 64-bit vectors as the ultimate result. + EVT VT = N->getValueType(0); + if (!VT.isVector()) + return SDValue(); + if (VT.getSimpleVT().getSizeInBits() != 64) + return SDValue(); + // Is the operand an extract_subvector starting at the beginning or halfway + // point of the vector? A low half may also come through as an + // EXTRACT_SUBREG, so look for that, too. + SDValue Op0 = N->getOperand(0); + if (Op0->getOpcode() != ISD::EXTRACT_SUBVECTOR && + !(Op0->isMachineOpcode() && + Op0->getMachineOpcode() == AArch64::EXTRACT_SUBREG)) + return SDValue(); + uint64_t idx = cast(Op0->getOperand(1))->getZExtValue(); + if (Op0->getOpcode() == ISD::EXTRACT_SUBVECTOR) { + if (Op0->getValueType(0).getVectorNumElements() != idx && idx != 0) + return SDValue(); + } else if (Op0->getMachineOpcode() == AArch64::EXTRACT_SUBREG) { + if (idx != AArch64::dsub) + return SDValue(); + // The dsub reference is equivalent to a lane zero subvector reference. + idx = 0; + } + // Look through the bitcast of the input to the extract. + if (Op0->getOperand(0)->getOpcode() != ISD::BITCAST) + return SDValue(); + SDValue Source = Op0->getOperand(0)->getOperand(0); + // If the source type has twice the number of elements as our destination + // type, we know this is an extract of the high or low half of the vector. + EVT SVT = Source->getValueType(0); + if (SVT.getVectorNumElements() != VT.getVectorNumElements() * 2) + return SDValue(); + + DEBUG(dbgs() << "aarch64-lower: bitcast extract_subvector simplification\n"); + + // Create the simplified form to just extract the low or high half of the + // vector directly rather than bothering with the bitcasts. + SDLoc dl(N); + unsigned NumElements = VT.getVectorNumElements(); + if (idx) { + SDValue HalfIdx = DAG.getConstant(NumElements, MVT::i64); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx); + } else { + SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, MVT::i32); + return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT, + Source, SubReg), + 0); + } +} + +static SDValue performConcatVectorsCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + // Wait 'til after everything is legalized to try this. That way we have + // legal vector types and such. + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDLoc dl(N); + EVT VT = N->getValueType(0); + + // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector + // splat. The indexed instructions are going to be expecting a DUPLANE64, so + // canonicalise to that. + if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) { + assert(VT.getVectorElementType().getSizeInBits() == 64); + return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, + WidenVector(N->getOperand(0), DAG), + DAG.getConstant(0, MVT::i64)); + } + + // Canonicalise concat_vectors so that the right-hand vector has as few + // bit-casts as possible before its real operation. The primary matching + // destination for these operations will be the narrowing "2" instructions, + // which depend on the operation being performed on this right-hand vector. + // For example, + // (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS)))) + // becomes + // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS)) + + SDValue Op1 = N->getOperand(1); + if (Op1->getOpcode() != ISD::BITCAST) + return SDValue(); + SDValue RHS = Op1->getOperand(0); + MVT RHSTy = RHS.getValueType().getSimpleVT(); + // If the RHS is not a vector, this is not the pattern we're looking for. + if (!RHSTy.isVector()) + return SDValue(); + + DEBUG(dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n"); + + MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(), + RHSTy.getVectorNumElements() * 2); + return DAG.getNode( + ISD::BITCAST, dl, VT, + DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy, + DAG.getNode(ISD::BITCAST, dl, RHSTy, N->getOperand(0)), RHS)); +} + +static SDValue tryCombineFixedPointConvert(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + // Wait 'til after everything is legalized to try this. That way we have + // legal vector types and such. + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + // Transform a scalar conversion of a value from a lane extract into a + // lane extract of a vector conversion. E.g., from foo1 to foo2: + // double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); } + // double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; } + // + // The second form interacts better with instruction selection and the + // register allocator to avoid cross-class register copies that aren't + // coalescable due to a lane reference. + + // Check the operand and see if it originates from a lane extract. + SDValue Op1 = N->getOperand(1); + if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + // Yep, no additional predication needed. Perform the transform. + SDValue IID = N->getOperand(0); + SDValue Shift = N->getOperand(2); + SDValue Vec = Op1.getOperand(0); + SDValue Lane = Op1.getOperand(1); + EVT ResTy = N->getValueType(0); + EVT VecResTy; + SDLoc DL(N); + + // The vector width should be 128 bits by the time we get here, even + // if it started as 64 bits (the extract_vector handling will have + // done so). + assert(Vec.getValueType().getSizeInBits() == 128 && + "unexpected vector size on extract_vector_elt!"); + if (Vec.getValueType() == MVT::v4i32) + VecResTy = MVT::v4f32; + else if (Vec.getValueType() == MVT::v2i64) + VecResTy = MVT::v2f64; + else + assert(0 && "unexpected vector type!"); + + SDValue Convert = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane); + } + return SDValue(); } -// isPermuteMask - Check whether the vector shuffle matches to UZP, ZIP and -// TRN instruction. -static unsigned isPermuteMask(ArrayRef M, EVT VT, bool isV2undef) { - unsigned NumElts = VT.getVectorNumElements(); - if (NumElts < 4) - return 0; +// AArch64 high-vector "long" operations are formed by performing the non-high +// version on an extract_subvector of each operand which gets the high half: +// +// (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS)) +// +// However, there are cases which don't have an extract_high explicitly, but +// have another operation that can be made compatible with one for free. For +// example: +// +// (dupv64 scalar) --> (extract_high (dup128 scalar)) +// +// This routine does the actual conversion of such DUPs, once outer routines +// have determined that everything else is in order. +static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) { + // We can handle most types of duplicate, but the lane ones have an extra + // operand saying *which* lane, so we need to know. + bool IsDUPLANE; + switch (N.getOpcode()) { + case AArch64ISD::DUP: + IsDUPLANE = false; + break; + case AArch64ISD::DUPLANE8: + case AArch64ISD::DUPLANE16: + case AArch64ISD::DUPLANE32: + case AArch64ISD::DUPLANE64: + IsDUPLANE = true; + break; + default: + return SDValue(); + } + + MVT NarrowTy = N.getSimpleValueType(); + if (!NarrowTy.is64BitVector()) + return SDValue(); + + MVT ElementTy = NarrowTy.getVectorElementType(); + unsigned NumElems = NarrowTy.getVectorNumElements(); + MVT NewDUPVT = MVT::getVectorVT(ElementTy, NumElems * 2); + + SDValue NewDUP; + if (IsDUPLANE) + NewDUP = DAG.getNode(N.getOpcode(), SDLoc(N), NewDUPVT, N.getOperand(0), + N.getOperand(1)); + else + NewDUP = DAG.getNode(AArch64ISD::DUP, SDLoc(N), NewDUPVT, N.getOperand(0)); + + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N.getNode()), NarrowTy, + NewDUP, DAG.getConstant(NumElems, MVT::i64)); +} + +static bool isEssentiallyExtractSubvector(SDValue N) { + if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR) + return true; + + return N.getOpcode() == ISD::BITCAST && + N.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR; +} + +/// \brief Helper structure to keep track of ISD::SET_CC operands. +struct GenericSetCCInfo { + const SDValue *Opnd0; + const SDValue *Opnd1; + ISD::CondCode CC; +}; + +/// \brief Helper structure to keep track of a SET_CC lowered into AArch64 code. +struct AArch64SetCCInfo { + const SDValue *Cmp; + AArch64CC::CondCode CC; +}; + +/// \brief Helper structure to keep track of SetCC information. +union SetCCInfo { + GenericSetCCInfo Generic; + AArch64SetCCInfo AArch64; +}; + +/// \brief Helper structure to be able to read SetCC information. If set to +/// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a +/// GenericSetCCInfo. +struct SetCCInfoAndKind { + SetCCInfo Info; + bool IsAArch64; +}; + +/// \brief Check whether or not \p Op is a SET_CC operation, either a generic or +/// an +/// AArch64 lowered one. +/// \p SetCCInfo is filled accordingly. +/// \post SetCCInfo is meanginfull only when this function returns true. +/// \return True when Op is a kind of SET_CC operation. +static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) { + // If this is a setcc, this is straight forward. + if (Op.getOpcode() == ISD::SETCC) { + SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0); + SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1); + SetCCInfo.Info.Generic.CC = cast(Op.getOperand(2))->get(); + SetCCInfo.IsAArch64 = false; + return true; + } + // Otherwise, check if this is a matching csel instruction. + // In other words: + // - csel 1, 0, cc + // - csel 0, 1, !cc + if (Op.getOpcode() != AArch64ISD::CSEL) + return false; + // Set the information about the operands. + // TODO: we want the operands of the Cmp not the csel + SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3); + SetCCInfo.IsAArch64 = true; + SetCCInfo.Info.AArch64.CC = static_cast( + cast(Op.getOperand(2))->getZExtValue()); + + // Check that the operands matches the constraints: + // (1) Both operands must be constants. + // (2) One must be 1 and the other must be 0. + ConstantSDNode *TValue = dyn_cast(Op.getOperand(0)); + ConstantSDNode *FValue = dyn_cast(Op.getOperand(1)); + + // Check (1). + if (!TValue || !FValue) + return false; + + // Check (2). + if (!TValue->isOne()) { + // Update the comparison when we are interested in !cc. + std::swap(TValue, FValue); + SetCCInfo.Info.AArch64.CC = + AArch64CC::getInvertedCondCode(SetCCInfo.Info.AArch64.CC); + } + return TValue->isOne() && FValue->isNullValue(); +} + +// Returns true if Op is setcc or zext of setcc. +static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) { + if (isSetCC(Op, Info)) + return true; + return ((Op.getOpcode() == ISD::ZERO_EXTEND) && + isSetCC(Op->getOperand(0), Info)); +} + +// The folding we want to perform is: +// (add x, [zext] (setcc cc ...) ) +// --> +// (csel x, (add x, 1), !cc ...) +// +// The latter will get matched to a CSINC instruction. +static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) { + assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!"); + SDValue LHS = Op->getOperand(0); + SDValue RHS = Op->getOperand(1); + SetCCInfoAndKind InfoAndKind; + + // If neither operand is a SET_CC, give up. + if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) { + std::swap(LHS, RHS); + if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) + return SDValue(); + } + + // FIXME: This could be generatized to work for FP comparisons. + EVT CmpVT = InfoAndKind.IsAArch64 + ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType() + : InfoAndKind.Info.Generic.Opnd0->getValueType(); + if (CmpVT != MVT::i32 && CmpVT != MVT::i64) + return SDValue(); + + SDValue CCVal; + SDValue Cmp; + SDLoc dl(Op); + if (InfoAndKind.IsAArch64) { + CCVal = DAG.getConstant( + AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), MVT::i32); + Cmp = *InfoAndKind.Info.AArch64.Cmp; + } else + Cmp = getAArch64Cmp(*InfoAndKind.Info.Generic.Opnd0, + *InfoAndKind.Info.Generic.Opnd1, + ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, true), + CCVal, DAG, dl); + + EVT VT = Op->getValueType(0); + LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, VT)); + return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp); +} + +// The basic add/sub long vector instructions have variants with "2" on the end +// which act on the high-half of their inputs. They are normally matched by +// patterns like: +// +// (add (zeroext (extract_high LHS)), +// (zeroext (extract_high RHS))) +// -> uaddl2 vD, vN, vM +// +// However, if one of the extracts is something like a duplicate, this +// instruction can still be used profitably. This function puts the DAG into a +// more appropriate form for those patterns to trigger. +static SDValue performAddSubLongCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + MVT VT = N->getSimpleValueType(0); + if (!VT.is128BitVector()) { + if (N->getOpcode() == ISD::ADD) + return performSetccAddFolding(N, DAG); + return SDValue(); + } + + // Make sure both branches are extended in the same way. + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if ((LHS.getOpcode() != ISD::ZERO_EXTEND && + LHS.getOpcode() != ISD::SIGN_EXTEND) || + LHS.getOpcode() != RHS.getOpcode()) + return SDValue(); + + unsigned ExtType = LHS.getOpcode(); + + // It's not worth doing if at least one of the inputs isn't already an + // extract, but we don't know which it'll be so we have to try both. + if (isEssentiallyExtractSubvector(LHS.getOperand(0))) { + RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG); + if (!RHS.getNode()) + return SDValue(); + + RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS); + } else if (isEssentiallyExtractSubvector(RHS.getOperand(0))) { + LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG); + if (!LHS.getNode()) + return SDValue(); + + LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS); + } + + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS); +} + +// Massage DAGs which we can use the high-half "long" operations on into +// something isel will recognize better. E.g. +// +// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) --> +// (aarch64_neon_umull (extract_high (v2i64 vec))) +// (extract_high (v2i64 (dup128 scalar))))) +// +static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue LHS = N->getOperand(1); + SDValue RHS = N->getOperand(2); + assert(LHS.getValueType().is64BitVector() && + RHS.getValueType().is64BitVector() && + "unexpected shape for long operation"); + + // Either node could be a DUP, but it's not worth doing both of them (you'd + // just as well use the non-high version) so look for a corresponding extract + // operation on the other "wing". + if (isEssentiallyExtractSubvector(LHS)) { + RHS = tryExtendDUPToExtractHigh(RHS, DAG); + if (!RHS.getNode()) + return SDValue(); + } else if (isEssentiallyExtractSubvector(RHS)) { + LHS = tryExtendDUPToExtractHigh(LHS, DAG); + if (!LHS.getNode()) + return SDValue(); + } + + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0), + N->getOperand(0), LHS, RHS); +} + +static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) { + MVT ElemTy = N->getSimpleValueType(0).getScalarType(); + unsigned ElemBits = ElemTy.getSizeInBits(); + + int64_t ShiftAmount; + if (BuildVectorSDNode *BVN = dyn_cast(N->getOperand(2))) { + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, ElemBits) || + SplatBitSize != ElemBits) + return SDValue(); + + ShiftAmount = SplatValue.getSExtValue(); + } else if (ConstantSDNode *CVN = dyn_cast(N->getOperand(2))) { + ShiftAmount = CVN->getSExtValue(); + } else + return SDValue(); + + unsigned Opcode; + bool IsRightShift; + switch (IID) { + default: + llvm_unreachable("Unknown shift intrinsic"); + case Intrinsic::aarch64_neon_sqshl: + Opcode = AArch64ISD::SQSHL_I; + IsRightShift = false; + break; + case Intrinsic::aarch64_neon_uqshl: + Opcode = AArch64ISD::UQSHL_I; + IsRightShift = false; + break; + case Intrinsic::aarch64_neon_srshl: + Opcode = AArch64ISD::SRSHR_I; + IsRightShift = true; + break; + case Intrinsic::aarch64_neon_urshl: + Opcode = AArch64ISD::URSHR_I; + IsRightShift = true; + break; + case Intrinsic::aarch64_neon_sqshlu: + Opcode = AArch64ISD::SQSHLU_I; + IsRightShift = false; + break; + } + + if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) + return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1), + DAG.getConstant(-ShiftAmount, MVT::i32)); + else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount <= ElemBits) + return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1), + DAG.getConstant(ShiftAmount, MVT::i32)); + + return SDValue(); +} + +// The CRC32[BH] instructions ignore the high bits of their data operand. Since +// the intrinsics must be legal and take an i32, this means there's almost +// certainly going to be a zext in the DAG which we can eliminate. +static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) { + SDValue AndN = N->getOperand(2); + if (AndN.getOpcode() != ISD::AND) + return SDValue(); + + ConstantSDNode *CMask = dyn_cast(AndN.getOperand(1)); + if (!CMask || CMask->getZExtValue() != Mask) + return SDValue(); + + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), MVT::i32, + N->getOperand(0), N->getOperand(1), AndN.getOperand(0)); +} + +static SDValue performIntrinsicCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + SelectionDAG &DAG = DCI.DAG; + unsigned IID = getIntrinsicID(N); + switch (IID) { + default: + break; + case Intrinsic::aarch64_neon_vcvtfxs2fp: + case Intrinsic::aarch64_neon_vcvtfxu2fp: + return tryCombineFixedPointConvert(N, DCI, DAG); + break; + case Intrinsic::aarch64_neon_fmax: + return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2)); + case Intrinsic::aarch64_neon_fmin: + return DAG.getNode(AArch64ISD::FMIN, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2)); + case Intrinsic::aarch64_neon_smull: + case Intrinsic::aarch64_neon_umull: + case Intrinsic::aarch64_neon_pmull: + case Intrinsic::aarch64_neon_sqdmull: + return tryCombineLongOpWithDup(IID, N, DCI, DAG); + case Intrinsic::aarch64_neon_sqshl: + case Intrinsic::aarch64_neon_uqshl: + case Intrinsic::aarch64_neon_sqshlu: + case Intrinsic::aarch64_neon_srshl: + case Intrinsic::aarch64_neon_urshl: + return tryCombineShiftImm(IID, N, DAG); + case Intrinsic::aarch64_crc32b: + case Intrinsic::aarch64_crc32cb: + return tryCombineCRC32(0xff, N, DAG); + case Intrinsic::aarch64_crc32h: + case Intrinsic::aarch64_crc32ch: + return tryCombineCRC32(0xffff, N, DAG); + } + return SDValue(); +} + +static SDValue performExtendCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + // If we see something like (zext (sabd (extract_high ...), (DUP ...))) then + // we can convert that DUP into another extract_high (of a bigger DUP), which + // helps the backend to decide that an sabdl2 would be useful, saving a real + // extract_high operation. + if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND && + N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) { + SDNode *ABDNode = N->getOperand(0).getNode(); + unsigned IID = getIntrinsicID(ABDNode); + if (IID == Intrinsic::aarch64_neon_sabd || + IID == Intrinsic::aarch64_neon_uabd) { + SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG); + if (!NewABD.getNode()) + return SDValue(); + + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), + NewABD); + } + } + + // This is effectively a custom type legalization for AArch64. + // + // Type legalization will split an extend of a small, legal, type to a larger + // illegal type by first splitting the destination type, often creating + // illegal source types, which then get legalized in isel-confusing ways, + // leading to really terrible codegen. E.g., + // %result = v8i32 sext v8i8 %value + // becomes + // %losrc = extract_subreg %value, ... + // %hisrc = extract_subreg %value, ... + // %lo = v4i32 sext v4i8 %losrc + // %hi = v4i32 sext v4i8 %hisrc + // Things go rapidly downhill from there. + // + // For AArch64, the [sz]ext vector instructions can only go up one element + // size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32 + // take two instructions. + // + // This implies that the most efficient way to do the extend from v8i8 + // to two v4i32 values is to first extend the v8i8 to v8i16, then do + // the normal splitting to happen for the v8i16->v8i32. + + // This is pre-legalization to catch some cases where the default + // type legalization will create ill-tempered code. + if (!DCI.isBeforeLegalizeOps()) + return SDValue(); + + // We're only interested in cleaning things up for non-legal vector types + // here. If both the source and destination are legal, things will just + // work naturally without any fiddling. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT ResVT = N->getValueType(0); + if (!ResVT.isVector() || TLI.isTypeLegal(ResVT)) + return SDValue(); + // If the vector type isn't a simple VT, it's beyond the scope of what + // we're worried about here. Let legalization do its thing and hope for + // the best. + if (!ResVT.isSimple()) + return SDValue(); + + SDValue Src = N->getOperand(0); + MVT SrcVT = Src->getValueType(0).getSimpleVT(); + // If the source VT is a 64-bit vector, we can play games and get the + // better results we want. + if (SrcVT.getSizeInBits() != 64) + return SDValue(); + + unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits(); + unsigned ElementCount = SrcVT.getVectorNumElements(); + SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), ElementCount); + SDLoc DL(N); + Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src); + + // Now split the rest of the operation into two halves, each with a 64 + // bit source. + EVT LoVT, HiVT; + SDValue Lo, Hi; + unsigned NumElements = ResVT.getVectorNumElements(); + assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), + ResVT.getVectorElementType(), NumElements / 2); + + EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(), + LoVT.getVectorNumElements()); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src, + DAG.getIntPtrConstant(0)); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src, + DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi); + + // Now combine the parts back together so we still have a single result + // like the combiner expects. + return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); +} + +/// Replace a splat of a scalar to a vector store by scalar stores of the scalar +/// value. The load store optimizer pass will merge them to store pair stores. +/// This has better performance than a splat of the scalar followed by a split +/// vector store. Even if the stores are not merged it is four stores vs a dup, +/// followed by an ext.b and two stores. +static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) { + SDValue StVal = St->getValue(); + EVT VT = StVal.getValueType(); + + // Don't replace floating point stores, they possibly won't be transformed to + // stp because of the store pair suppress pass. + if (VT.isFloatingPoint()) + return SDValue(); + + // Check for insert vector elements. + if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT) + return SDValue(); + + // We can express a splat as store pair(s) for 2 or 4 elements. + unsigned NumVecElts = VT.getVectorNumElements(); + if (NumVecElts != 4 && NumVecElts != 2) + return SDValue(); + SDValue SplatVal = StVal.getOperand(1); + unsigned RemainInsertElts = NumVecElts - 1; + + // Check that this is a splat. + while (--RemainInsertElts) { + SDValue NextInsertElt = StVal.getOperand(0); + if (NextInsertElt.getOpcode() != ISD::INSERT_VECTOR_ELT) + return SDValue(); + if (NextInsertElt.getOperand(1) != SplatVal) + return SDValue(); + StVal = NextInsertElt; + } + unsigned OrigAlignment = St->getAlignment(); + unsigned EltOffset = NumVecElts == 4 ? 4 : 8; + unsigned Alignment = std::min(OrigAlignment, EltOffset); + + // Create scalar stores. This is at least as good as the code sequence for a + // split unaligned store wich is a dup.s, ext.b, and two stores. + // Most of the time the three stores should be replaced by store pair + // instructions (stp). + SDLoc DL(St); + SDValue BasePtr = St->getBasePtr(); + SDValue NewST1 = + DAG.getStore(St->getChain(), DL, SplatVal, BasePtr, St->getPointerInfo(), + St->isVolatile(), St->isNonTemporal(), St->getAlignment()); + + unsigned Offset = EltOffset; + while (--NumVecElts) { + SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, + DAG.getConstant(Offset, MVT::i64)); + NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr, + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), Alignment); + Offset += EltOffset; + } + return NewST1; +} + +static SDValue performSTORECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG, + const AArch64Subtarget *Subtarget) { + if (!DCI.isBeforeLegalize()) + return SDValue(); + + StoreSDNode *S = cast(N); + if (S->isVolatile()) + return SDValue(); + + // Cyclone has bad performance on unaligned 16B stores when crossing line and + // page boundries. We want to split such stores. + if (!Subtarget->isCyclone()) + return SDValue(); + + // Don't split at Oz. + MachineFunction &MF = DAG.getMachineFunction(); + bool IsMinSize = MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::MinSize); + if (IsMinSize) + return SDValue(); + + SDValue StVal = S->getValue(); + EVT VT = StVal.getValueType(); + + // Don't split v2i64 vectors. Memcpy lowering produces those and splitting + // those up regresses performance on micro-benchmarks and olden/bh. + if (!VT.isVector() || VT.getVectorNumElements() < 2 || VT == MVT::v2i64) + return SDValue(); + + // Split unaligned 16B stores. They are terrible for performance. + // Don't split stores with alignment of 1 or 2. Code that uses clang vector + // extensions can use this to mark that it does not want splitting to happen + // (by underspecifying alignment to be 1 or 2). Furthermore, the chance of + // eliminating alignment hazards is only 1 in 8 for alignment of 2. + if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 || + S->getAlignment() <= 2) + return SDValue(); + + // If we get a splat of a scalar convert this vector store to a store of + // scalars. They will be merged into store pairs thereby removing two + // instructions. + SDValue ReplacedSplat = replaceSplatVectorStore(DAG, S); + if (ReplacedSplat != SDValue()) + return ReplacedSplat; + + SDLoc DL(S); + unsigned NumElts = VT.getVectorNumElements() / 2; + // Split VT into two. + EVT HalfVT = + EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts); + SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, + DAG.getIntPtrConstant(0)); + SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, + DAG.getIntPtrConstant(NumElts)); + SDValue BasePtr = S->getBasePtr(); + SDValue NewST1 = + DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(), + S->isVolatile(), S->isNonTemporal(), S->getAlignment()); + SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, + DAG.getConstant(8, MVT::i64)); + return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr, + S->getPointerInfo(), S->isVolatile(), S->isNonTemporal(), + S->getAlignment()); +} + +/// Target-specific DAG combine function for post-increment LD1 (lane) and +/// post-increment LD1R. +static SDValue performPostLD1Combine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + bool IsLaneOp) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + + unsigned LoadIdx = IsLaneOp ? 1 : 0; + SDNode *LD = N->getOperand(LoadIdx).getNode(); + // If it is not LOAD, can not do such combine. + if (LD->getOpcode() != ISD::LOAD) + return SDValue(); + + LoadSDNode *LoadSDN = cast(LD); + EVT MemVT = LoadSDN->getMemoryVT(); + // Check if memory operand is the same type as the vector element. + if (MemVT != VT.getVectorElementType()) + return SDValue(); + + // Check if there are other uses. If so, do not combine as it will introduce + // an extra load. + for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE; + ++UI) { + if (UI.getUse().getResNo() == 1) // Ignore uses of the chain result. + continue; + if (*UI != N) + return SDValue(); + } + + SDValue Addr = LD->getOperand(1); + SDValue Vector = N->getOperand(0); + // Search for a use of the address operand that is an increment. + for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE = + Addr.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() != ISD::ADD + || UI.getUse().getResNo() != Addr.getResNo()) + continue; + + // Check that the add is independent of the load. Otherwise, folding it + // would create a cycle. + if (User->isPredecessorOf(LD) || LD->isPredecessorOf(User)) + continue; + // Also check that add is not used in the vector operand. This would also + // create a cycle. + if (User->isPredecessorOf(Vector.getNode())) + continue; - bool ismatch = true; + // If the increment is a constant, it must match the memory ref size. + SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); + if (ConstantSDNode *CInc = dyn_cast(Inc.getNode())) { + uint32_t IncVal = CInc->getZExtValue(); + unsigned NumBytes = VT.getScalarSizeInBits() / 8; + if (IncVal != NumBytes) + continue; + Inc = DAG.getRegister(AArch64::XZR, MVT::i64); + } + + SmallVector Ops; + Ops.push_back(LD->getOperand(0)); // Chain + if (IsLaneOp) { + Ops.push_back(Vector); // The vector to be inserted + Ops.push_back(N->getOperand(2)); // The lane to be inserted in the vector + } + Ops.push_back(Addr); + Ops.push_back(Inc); + + EVT Tys[3] = { VT, MVT::i64, MVT::Other }; + SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, 3)); + unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost; + SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops, + MemVT, + LoadSDN->getMemOperand()); + + // Update the uses. + std::vector NewResults; + NewResults.push_back(SDValue(LD, 0)); // The result of load + NewResults.push_back(SDValue(UpdN.getNode(), 2)); // Chain + DCI.CombineTo(LD, NewResults); + DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result + DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register + + break; + } + return SDValue(); +} + +/// Target-specific DAG combine function for NEON load/store intrinsics +/// to merge base address updates. +static SDValue performNEONPostLDSTCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) + return SDValue(); + + unsigned AddrOpIdx = N->getNumOperands() - 1; + SDValue Addr = N->getOperand(AddrOpIdx); + + // Search for a use of the address operand that is an increment. + for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), + UE = Addr.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() != ISD::ADD || + UI.getUse().getResNo() != Addr.getResNo()) + continue; + + // Check that the add is independent of the load/store. Otherwise, folding + // it would create a cycle. + if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) + continue; + + // Find the new opcode for the updating load/store. + bool IsStore = false; + bool IsLaneOp = false; + bool IsDupOp = false; + unsigned NewOpc = 0; + unsigned NumVecs = 0; + unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); + switch (IntNo) { + default: llvm_unreachable("unexpected intrinsic for Neon base update"); + case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post; + NumVecs = 2; break; + case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post; + NumVecs = 3; break; + case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post; + NumVecs = 4; break; + case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post; + NumVecs = 2; IsStore = true; break; + case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post; + NumVecs = 3; IsStore = true; break; + case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post; + NumVecs = 4; IsStore = true; break; + case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post; + NumVecs = 2; break; + case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post; + NumVecs = 3; break; + case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post; + NumVecs = 4; break; + case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post; + NumVecs = 2; IsStore = true; break; + case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post; + NumVecs = 3; IsStore = true; break; + case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post; + NumVecs = 4; IsStore = true; break; + case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost; + NumVecs = 2; IsDupOp = true; break; + case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost; + NumVecs = 3; IsDupOp = true; break; + case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost; + NumVecs = 4; IsDupOp = true; break; + case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost; + NumVecs = 2; IsLaneOp = true; break; + case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost; + NumVecs = 3; IsLaneOp = true; break; + case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost; + NumVecs = 4; IsLaneOp = true; break; + case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost; + NumVecs = 2; IsStore = true; IsLaneOp = true; break; + case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost; + NumVecs = 3; IsStore = true; IsLaneOp = true; break; + case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost; + NumVecs = 4; IsStore = true; IsLaneOp = true; break; + } + + EVT VecTy; + if (IsStore) + VecTy = N->getOperand(2).getValueType(); + else + VecTy = N->getValueType(0); + + // If the increment is a constant, it must match the memory ref size. + SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); + if (ConstantSDNode *CInc = dyn_cast(Inc.getNode())) { + uint32_t IncVal = CInc->getZExtValue(); + unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; + if (IsLaneOp || IsDupOp) + NumBytes /= VecTy.getVectorNumElements(); + if (IncVal != NumBytes) + continue; + Inc = DAG.getRegister(AArch64::XZR, MVT::i64); + } + SmallVector Ops; + Ops.push_back(N->getOperand(0)); // Incoming chain + // Load lane and store have vector list as input. + if (IsLaneOp || IsStore) + for (unsigned i = 2; i < AddrOpIdx; ++i) + Ops.push_back(N->getOperand(i)); + Ops.push_back(Addr); // Base register + Ops.push_back(Inc); + + // Return Types. + EVT Tys[6]; + unsigned NumResultVecs = (IsStore ? 0 : NumVecs); + unsigned n; + for (n = 0; n < NumResultVecs; ++n) + Tys[n] = VecTy; + Tys[n++] = MVT::i64; // Type of write back register + Tys[n] = MVT::Other; // Type of the chain + SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2)); - // Check UZP1 - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = i * 2; - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; - } - } - if (ismatch) - return AArch64ISD::NEON_UZP1; + MemIntrinsicSDNode *MemInt = cast(N); + SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops, + MemInt->getMemoryVT(), + MemInt->getMemOperand()); - // Check UZP2 - ismatch = true; - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = i * 2 + 1; - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; + // Update the uses. + std::vector NewResults; + for (unsigned i = 0; i < NumResultVecs; ++i) { + NewResults.push_back(SDValue(UpdN.getNode(), i)); } - } - if (ismatch) - return AArch64ISD::NEON_UZP2; + NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); + DCI.CombineTo(N, NewResults); + DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); - // Check ZIP1 - ismatch = true; - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = i / 2 + NumElts * (i % 2); - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; - } + break; } - if (ismatch) - return AArch64ISD::NEON_ZIP1; + return SDValue(); +} - // Check ZIP2 - ismatch = true; - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = (NumElts + i) / 2 + NumElts * (i % 2); - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; - } - } - if (ismatch) - return AArch64ISD::NEON_ZIP2; +// Optimize compare with zero and branch. +static SDValue performBRCONDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + SDValue Chain = N->getOperand(0); + SDValue Dest = N->getOperand(1); + SDValue CCVal = N->getOperand(2); + SDValue Cmp = N->getOperand(3); + + assert(isa(CCVal) && "Expected a ConstantSDNode here!"); + unsigned CC = cast(CCVal)->getZExtValue(); + if (CC != AArch64CC::EQ && CC != AArch64CC::NE) + return SDValue(); - // Check TRN1 - ismatch = true; - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = i + (NumElts - 1) * (i % 2); - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; - } - } - if (ismatch) - return AArch64ISD::NEON_TRN1; + unsigned CmpOpc = Cmp.getOpcode(); + if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS) + return SDValue(); - // Check TRN2 - ismatch = true; - for (unsigned i = 0; i < NumElts; ++i) { - unsigned answer = 1 + i + (NumElts - 1) * (i % 2); - if (isV2undef && answer >= NumElts) - answer -= NumElts; - if (M[i] != -1 && (unsigned)M[i] != answer) { - ismatch = false; - break; - } - } - if (ismatch) - return AArch64ISD::NEON_TRN2; + // Only attempt folding if there is only one use of the flag and no use of the + // value. + if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1)) + return SDValue(); - return 0; -} + SDValue LHS = Cmp.getOperand(0); + SDValue RHS = Cmp.getOperand(1); -SDValue -AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, - SelectionDAG &DAG) const { - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - SDLoc dl(Op); - EVT VT = Op.getValueType(); - ShuffleVectorSDNode *SVN = cast(Op.getNode()); + assert(LHS.getValueType() == RHS.getValueType() && + "Expected the value type to be the same for both operands!"); + if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64) + return SDValue(); - // Convert shuffles that are directly supported on NEON to target-specific - // DAG nodes, instead of keeping them as shuffles and matching them again - // during code selection. This is more efficient and avoids the possibility - // of inconsistencies between legalization and selection. - ArrayRef ShuffleMask = SVN->getMask(); + if (isa(LHS) && cast(LHS)->isNullValue()) + std::swap(LHS, RHS); - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - if (EltSize > 64) + if (!isa(RHS) || !cast(RHS)->isNullValue()) return SDValue(); - if (isREVMask(ShuffleMask, VT, 64)) - return DAG.getNode(AArch64ISD::NEON_REV64, dl, VT, V1); - if (isREVMask(ShuffleMask, VT, 32)) - return DAG.getNode(AArch64ISD::NEON_REV32, dl, VT, V1); - if (isREVMask(ShuffleMask, VT, 16)) - return DAG.getNode(AArch64ISD::NEON_REV16, dl, VT, V1); + if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA || + LHS.getOpcode() == ISD::SRL) + return SDValue(); - unsigned ISDNo; - if (V2.getOpcode() == ISD::UNDEF) - ISDNo = isPermuteMask(ShuffleMask, VT, true); + // Fold the compare into the branch instruction. + SDValue BR; + if (CC == AArch64CC::EQ) + BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest); else - ISDNo = isPermuteMask(ShuffleMask, VT, false); + BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest); - if (ISDNo) { - if (V2.getOpcode() == ISD::UNDEF) - return DAG.getNode(ISDNo, dl, VT, V1, V1); - else - return DAG.getNode(ISDNo, dl, VT, V1, V2); - } + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, BR, false); - SDValue Res; - if (isConcatVector(Op, DAG, V1, V2, &ShuffleMask[0], Res)) - return Res; + return SDValue(); +} - // If the element of shuffle mask are all the same constant, we can - // transform it into either NEON_VDUP or NEON_VDUPLANE - if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { - int Lane = SVN->getSplatIndex(); - // If this is undef splat, generate it via "just" vdup, if possible. - if (Lane == -1) Lane = 0; +// vselect (v1i1 setcc) -> +// vselect (v1iXX setcc) (XX is the size of the compared operand type) +// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as +// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine +// such VSELECT. +static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + EVT CCVT = N0.getValueType(); - // Test if V1 is a SCALAR_TO_VECTOR. - if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { - return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0)); - } - // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR. - if (V1.getOpcode() == ISD::BUILD_VECTOR) { - bool IsScalarToVector = true; - for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i) - if (V1.getOperand(i).getOpcode() != ISD::UNDEF && - i != (unsigned)Lane) { - IsScalarToVector = false; - break; - } - if (IsScalarToVector) - return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, - V1.getOperand(Lane)); - } + if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 || + CCVT.getVectorElementType() != MVT::i1) + return SDValue(); - // Test if V1 is a EXTRACT_SUBVECTOR. - if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) { - int ExtLane = cast(V1.getOperand(1))->getZExtValue(); - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1.getOperand(0), - DAG.getConstant(Lane + ExtLane, MVT::i64)); - } - // Test if V1 is a CONCAT_VECTORS. - if (V1.getOpcode() == ISD::CONCAT_VECTORS && - V1.getOperand(1).getOpcode() == ISD::UNDEF) { - SDValue Op0 = V1.getOperand(0); - assert((unsigned)Lane < Op0.getValueType().getVectorNumElements() && - "Invalid vector lane access"); - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, Op0, - DAG.getConstant(Lane, MVT::i64)); - } + EVT ResVT = N->getValueType(0); + EVT CmpVT = N0.getOperand(0).getValueType(); + // Only combine when the result type is of the same size as the compared + // operands. + if (ResVT.getSizeInBits() != CmpVT.getSizeInBits()) + return SDValue(); - return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1, - DAG.getConstant(Lane, MVT::i64)); - } + SDValue IfTrue = N->getOperand(1); + SDValue IfFalse = N->getOperand(2); + SDValue SetCC = + DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(), + N0.getOperand(0), N0.getOperand(1), + cast(N0.getOperand(2))->get()); + return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC, + IfTrue, IfFalse); +} - int Length = ShuffleMask.size(); - int V1EltNum = V1.getValueType().getVectorNumElements(); +/// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with +/// the compare-mask instructions rather than going via NZCV, even if LHS and +/// RHS are really scalar. This replaces any scalar setcc in the above pattern +/// with a vector one followed by a DUP shuffle on the result. +static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + EVT ResVT = N->getValueType(0); - // If the number of v1 elements is the same as the number of shuffle mask - // element and the shuffle masks are sequential values, we can transform - // it into NEON_VEXTRACT. - if (V1EltNum == Length) { - // Check if the shuffle mask is sequential. - int SkipUndef = 0; - while (ShuffleMask[SkipUndef] == -1) { - SkipUndef++; - } - int CurMask = ShuffleMask[SkipUndef]; - if (CurMask >= SkipUndef) { - bool IsSequential = true; - for (int I = SkipUndef; I < Length; ++I) { - if (ShuffleMask[I] != -1 && ShuffleMask[I] != CurMask) { - IsSequential = false; - break; - } - CurMask++; - } - if (IsSequential) { - assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect"); - unsigned VecSize = EltSize * V1EltNum; - unsigned Index = (EltSize / 8) * (ShuffleMask[SkipUndef] - SkipUndef); - if (VecSize == 64 || VecSize == 128) - return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2, - DAG.getConstant(Index, MVT::i64)); - } - } - } + if (!N->getOperand(1).getValueType().isVector()) + return SDValue(); - // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert - // by element from V2 to V1 . - // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a - // better choice to be inserted than V1 as less insert needed, so we count - // element to be inserted for both V1 and V2, and select less one as insert - // target. - - // Collect elements need to be inserted and their index. - SmallVector NV1Elt; - SmallVector N1Index; - SmallVector NV2Elt; - SmallVector N2Index; - for (int I = 0; I != Length; ++I) { - if (ShuffleMask[I] != I) { - NV1Elt.push_back(ShuffleMask[I]); - N1Index.push_back(I); - } - } - for (int I = 0; I != Length; ++I) { - if (ShuffleMask[I] != (I + V1EltNum)) { - NV2Elt.push_back(ShuffleMask[I]); - N2Index.push_back(I); - } - } + if (N0.getOpcode() != ISD::SETCC || N0.getValueType() != MVT::i1) + return SDValue(); - // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2 - // will be inserted. - SDValue InsV = V1; - SmallVector InsMasks = NV1Elt; - SmallVector InsIndex = N1Index; - if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) { - if (NV1Elt.size() > NV2Elt.size()) { - InsV = V2; - InsMasks = NV2Elt; - InsIndex = N2Index; - } - } else { - InsV = DAG.getNode(ISD::UNDEF, dl, VT); - } + SDLoc DL(N0); - for (int I = 0, E = InsMasks.size(); I != E; ++I) { - SDValue ExtV = V1; - int Mask = InsMasks[I]; - if (Mask >= V1EltNum) { - ExtV = V2; - Mask -= V1EltNum; - } - // Any value type smaller than i32 is illegal in AArch64, and this lower - // function is called after legalize pass, so we need to legalize - // the result here. - EVT EltVT; - if (VT.getVectorElementType().isFloatingPoint()) - EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32; - else - EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32; + EVT SrcVT = N0.getOperand(0).getValueType(); + SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, + ResVT.getSizeInBits() / SrcVT.getSizeInBits()); + EVT CCVT = SrcVT.changeVectorElementTypeToInteger(); + + // First perform a vector comparison, where lane 0 is the one we're interested + // in. + SDValue LHS = + DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0)); + SDValue RHS = + DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1)); + SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2)); + + // Now duplicate the comparison mask we want across all other lanes. + SmallVector DUPMask(CCVT.getVectorNumElements(), 0); + SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask.data()); + Mask = DAG.getNode(ISD::BITCAST, DL, ResVT.changeVectorElementTypeToInteger(), + Mask); + + return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2)); +} - if (Mask >= 0) { - ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV, - DAG.getConstant(Mask, MVT::i64)); - InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV, - DAG.getConstant(InsIndex[I], MVT::i64)); +SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + switch (N->getOpcode()) { + default: + break; + case ISD::ADD: + case ISD::SUB: + return performAddSubLongCombine(N, DCI, DAG); + case ISD::XOR: + return performXorCombine(N, DAG, DCI, Subtarget); + case ISD::MUL: + return performMulCombine(N, DAG, DCI, Subtarget); + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + return performIntToFpCombine(N, DAG); + case ISD::OR: + return performORCombine(N, DCI, Subtarget); + case ISD::INTRINSIC_WO_CHAIN: + return performIntrinsicCombine(N, DCI, Subtarget); + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + return performExtendCombine(N, DCI, DAG); + case ISD::BITCAST: + return performBitcastCombine(N, DCI, DAG); + case ISD::CONCAT_VECTORS: + return performConcatVectorsCombine(N, DCI, DAG); + case ISD::SELECT: + return performSelectCombine(N, DAG); + case ISD::VSELECT: + return performVSelectCombine(N, DCI.DAG); + case ISD::STORE: + return performSTORECombine(N, DCI, DAG, Subtarget); + case AArch64ISD::BRCOND: + return performBRCONDCombine(N, DCI, DAG); + case AArch64ISD::DUP: + return performPostLD1Combine(N, DCI, false); + case ISD::INSERT_VECTOR_ELT: + return performPostLD1Combine(N, DCI, true); + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: + switch (cast(N->getOperand(1))->getZExtValue()) { + case Intrinsic::aarch64_neon_ld2: + case Intrinsic::aarch64_neon_ld3: + case Intrinsic::aarch64_neon_ld4: + case Intrinsic::aarch64_neon_ld1x2: + case Intrinsic::aarch64_neon_ld1x3: + case Intrinsic::aarch64_neon_ld1x4: + case Intrinsic::aarch64_neon_ld2lane: + case Intrinsic::aarch64_neon_ld3lane: + case Intrinsic::aarch64_neon_ld4lane: + case Intrinsic::aarch64_neon_ld2r: + case Intrinsic::aarch64_neon_ld3r: + case Intrinsic::aarch64_neon_ld4r: + case Intrinsic::aarch64_neon_st2: + case Intrinsic::aarch64_neon_st3: + case Intrinsic::aarch64_neon_st4: + case Intrinsic::aarch64_neon_st1x2: + case Intrinsic::aarch64_neon_st1x3: + case Intrinsic::aarch64_neon_st1x4: + case Intrinsic::aarch64_neon_st2lane: + case Intrinsic::aarch64_neon_st3lane: + case Intrinsic::aarch64_neon_st4lane: + return performNEONPostLDSTCombine(N, DCI, DAG); + default: + break; } } - return InsV; + return SDValue(); } -AArch64TargetLowering::ConstraintType -AArch64TargetLowering::getConstraintType(const std::string &Constraint) const { - if (Constraint.size() == 1) { - switch (Constraint[0]) { - default: break; - case 'w': // An FP/SIMD vector register - return C_RegisterClass; - case 'I': // Constant that can be used with an ADD instruction - case 'J': // Constant that can be used with a SUB instruction - case 'K': // Constant that can be used with a 32-bit logical instruction - case 'L': // Constant that can be used with a 64-bit logical instruction - case 'M': // Constant that can be used as a 32-bit MOV immediate - case 'N': // Constant that can be used as a 64-bit MOV immediate - case 'Y': // Floating point constant zero - case 'Z': // Integer constant zero - return C_Other; - case 'Q': // A memory reference with base register and no offset - return C_Memory; - case 'S': // A symbolic address - return C_Other; - } +// Check if the return value is used as only a return value, as otherwise +// we can't perform a tail-call. In particular, we need to check for +// target ISD nodes that are returns and any other "odd" constructs +// that the generic analysis code won't necessarily catch. +bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N, + SDValue &Chain) const { + if (N->getNumValues() != 1) + return false; + if (!N->hasNUsesOfValue(1, 0)) + return false; + + SDValue TCChain = Chain; + SDNode *Copy = *N->use_begin(); + if (Copy->getOpcode() == ISD::CopyToReg) { + // If the copy has a glue operand, we conservatively assume it isn't safe to + // perform a tail call. + if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == + MVT::Glue) + return false; + TCChain = Copy->getOperand(0); + } else if (Copy->getOpcode() != ISD::FP_EXTEND) + return false; + + bool HasRet = false; + for (SDNode *Node : Copy->uses()) { + if (Node->getOpcode() != AArch64ISD::RET_FLAG) + return false; + HasRet = true; } - // FIXME: Ump, Utf, Usa, Ush - // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes, - // whatever they may be - // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be - // Usa: An absolute symbolic address - // Ush: The high part (bits 32:12) of a pc-relative symbolic address - assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa" - && Constraint != "Ush" && "Unimplemented constraints"); + if (!HasRet) + return false; - return TargetLowering::getConstraintType(Constraint); + Chain = TCChain; + return true; } -TargetLowering::ConstraintWeight -AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info, - const char *Constraint) const { +// Return whether the an instruction can potentially be optimized to a tail +// call. This will cause the optimizers to attempt to move, or duplicate, +// return instructions to help enable tail call optimizations for this +// instruction. +bool AArch64TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { + if (!CI->isTailCall()) + return false; - llvm_unreachable("Constraint weight unimplemented"); + return true; } -void -AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, - std::vector &Ops, - SelectionDAG &DAG) const { - SDValue Result(0, 0); - - // Only length 1 constraints are C_Other. - if (Constraint.size() != 1) return; +bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + bool &IsInc, + SelectionDAG &DAG) const { + if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) + return false; - // Only C_Other constraints get lowered like this. That means constants for us - // so return early if there's no hope the constraint can be lowered. + Base = Op->getOperand(0); + // All of the indexed addressing mode instructions take a signed + // 9 bit immediate offset. + if (ConstantSDNode *RHS = dyn_cast(Op->getOperand(1))) { + int64_t RHSC = (int64_t)RHS->getZExtValue(); + if (RHSC >= 256 || RHSC <= -256) + return false; + IsInc = (Op->getOpcode() == ISD::ADD); + Offset = Op->getOperand(1); + return true; + } + return false; +} - switch(Constraint[0]) { - default: break; - case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'Z': { - ConstantSDNode *C = dyn_cast(Op); - if (!C) - return; +bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const { + EVT VT; + SDValue Ptr; + if (LoadSDNode *LD = dyn_cast(N)) { + VT = LD->getMemoryVT(); + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast(N)) { + VT = ST->getMemoryVT(); + Ptr = ST->getBasePtr(); + } else + return false; - uint64_t CVal = C->getZExtValue(); - uint32_t Bits; + bool IsInc; + if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG)) + return false; + AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC; + return true; +} - switch (Constraint[0]) { - default: - // FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J' - // is a peculiarly useless SUB constraint. - llvm_unreachable("Unimplemented C_Other constraint"); - case 'I': - if (CVal <= 0xfff) - break; - return; - case 'K': - if (A64Imms::isLogicalImm(32, CVal, Bits)) - break; - return; - case 'L': - if (A64Imms::isLogicalImm(64, CVal, Bits)) - break; - return; - case 'Z': - if (CVal == 0) - break; - return; - } +bool AArch64TargetLowering::getPostIndexedAddressParts( + SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, + ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { + EVT VT; + SDValue Ptr; + if (LoadSDNode *LD = dyn_cast(N)) { + VT = LD->getMemoryVT(); + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast(N)) { + VT = ST->getMemoryVT(); + Ptr = ST->getBasePtr(); + } else + return false; - Result = DAG.getTargetConstant(CVal, Op.getValueType()); - break; - } - case 'S': { - // An absolute symbolic address or label reference. - if (const GlobalAddressSDNode *GA = dyn_cast(Op)) { - Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), - GA->getValueType(0)); - } else if (const BlockAddressSDNode *BA - = dyn_cast(Op)) { - Result = DAG.getTargetBlockAddress(BA->getBlockAddress(), - BA->getValueType(0)); - } else if (const ExternalSymbolSDNode *ES - = dyn_cast(Op)) { - Result = DAG.getTargetExternalSymbol(ES->getSymbol(), - ES->getValueType(0)); - } else - return; - break; - } - case 'Y': - if (const ConstantFPSDNode *CFP = dyn_cast(Op)) { - if (CFP->isExactlyValue(0.0)) { - Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0)); - break; - } - } - return; - } + bool IsInc; + if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG)) + return false; + // Post-indexing updates the base, so it's not a valid transform + // if that's not the same as the load's pointer. + if (Ptr != Base) + return false; + AM = IsInc ? ISD::POST_INC : ISD::POST_DEC; + return true; +} - if (Result.getNode()) { - Ops.push_back(Result); +void AArch64TargetLowering::ReplaceNodeResults( + SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { + switch (N->getOpcode()) { + default: + llvm_unreachable("Don't know how to custom expand this"); + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion"); + // Let normal code take care of it by not adding anything to Results. return; } +} - // It's an unknown constraint for us. Let generic code have a go. - TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +bool AArch64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { + // Loads and stores less than 128-bits are already atomic; ones above that + // are doomed anyway, so defer to the default libcall and blame the OS when + // things go wrong: + if (StoreInst *SI = dyn_cast(Inst)) + return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128; + else if (LoadInst *LI = dyn_cast(Inst)) + return LI->getType()->getPrimitiveSizeInBits() == 128; + + // For the real atomic operations, we have ldxr/stxr up to 128 bits. + return Inst->getType()->getPrimitiveSizeInBits() <= 128; } -std::pair -AArch64TargetLowering::getRegForInlineAsmConstraint( - const std::string &Constraint, - MVT VT) const { - if (Constraint.size() == 1) { - switch (Constraint[0]) { - case 'r': - if (VT.getSizeInBits() <= 32) - return std::make_pair(0U, &AArch64::GPR32RegClass); - else if (VT == MVT::i64) - return std::make_pair(0U, &AArch64::GPR64RegClass); - break; - case 'w': - if (VT == MVT::f16) - return std::make_pair(0U, &AArch64::FPR16RegClass); - else if (VT == MVT::f32) - return std::make_pair(0U, &AArch64::FPR32RegClass); - else if (VT.getSizeInBits() == 64) - return std::make_pair(0U, &AArch64::FPR64RegClass); - else if (VT.getSizeInBits() == 128) - return std::make_pair(0U, &AArch64::FPR128RegClass); - break; - } +Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Type *ValTy = cast(Addr->getType())->getElementType(); + bool IsAcquire = + Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd + // intrinsic must return {i64, i64} and we have to recombine them into a + // single i128 here. + if (ValTy->getPrimitiveSizeInBits() == 128) { + Intrinsic::ID Int = + IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp; + Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int); + + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi"); + + Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); + Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); + Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); + Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); + return Builder.CreateOr( + Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64"); } - // Use the default implementation in TargetLowering to convert the register - // constraint into a member of a register class. - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); + Type *Tys[] = { Addr->getType() }; + Intrinsic::ID Int = + IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr; + Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateTruncOrBitCast( + Builder.CreateCall(Ldxr, Addr), + cast(Addr->getType())->getElementType()); } -/// Represent NEON load and store intrinsics as MemIntrinsicNodes. -/// The associated MachineMemOperands record the alignment specified -/// in the intrinsic calls. -bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, - unsigned Intrinsic) const { - switch (Intrinsic) { - case Intrinsic::arm_neon_vld1: - case Intrinsic::arm_neon_vld2: - case Intrinsic::arm_neon_vld3: - case Intrinsic::arm_neon_vld4: - case Intrinsic::aarch64_neon_vld1x2: - case Intrinsic::aarch64_neon_vld1x3: - case Intrinsic::aarch64_neon_vld1x4: - case Intrinsic::arm_neon_vld2lane: - case Intrinsic::arm_neon_vld3lane: - case Intrinsic::arm_neon_vld4lane: { - Info.opc = ISD::INTRINSIC_W_CHAIN; - // Conservatively set memVT to the entire set of vectors loaded. - uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; - Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); - Info.ptrVal = I.getArgOperand(0); - Info.offset = 0; - Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); - Info.align = cast(AlignArg)->getZExtValue(); - Info.vol = false; // volatile loads with NEON intrinsics not supported - Info.readMem = true; - Info.writeMem = false; - return true; - } - case Intrinsic::arm_neon_vst1: - case Intrinsic::arm_neon_vst2: - case Intrinsic::arm_neon_vst3: - case Intrinsic::arm_neon_vst4: - case Intrinsic::aarch64_neon_vst1x2: - case Intrinsic::aarch64_neon_vst1x3: - case Intrinsic::aarch64_neon_vst1x4: - case Intrinsic::arm_neon_vst2lane: - case Intrinsic::arm_neon_vst3lane: - case Intrinsic::arm_neon_vst4lane: { - Info.opc = ISD::INTRINSIC_VOID; - // Conservatively set memVT to the entire set of vectors stored. - unsigned NumElts = 0; - for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { - Type *ArgTy = I.getArgOperand(ArgI)->getType(); - if (!ArgTy->isVectorTy()) - break; - NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; - } - Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); - Info.ptrVal = I.getArgOperand(0); - Info.offset = 0; - Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); - Info.align = cast(AlignArg)->getZExtValue(); - Info.vol = false; // volatile stores with NEON intrinsics not supported - Info.readMem = false; - Info.writeMem = true; - return true; - } - default: - break; +Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder, + Value *Val, Value *Addr, + AtomicOrdering Ord) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + bool IsRelease = + Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since the intrinsics must have legal type, the i128 intrinsics take two + // parameters: "i64, i64". We must marshal Val into the appropriate form + // before the call. + if (Val->getType()->getPrimitiveSizeInBits() == 128) { + Intrinsic::ID Int = + IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp; + Function *Stxr = Intrinsic::getDeclaration(M, Int); + Type *Int64Ty = Type::getInt64Ty(M->getContext()); + + Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo"); + Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi"); + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + return Builder.CreateCall3(Stxr, Lo, Hi, Addr); } - return false; + Intrinsic::ID Int = + IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr; + Type *Tys[] = { Addr->getType() }; + Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateCall2( + Stxr, Builder.CreateZExtOrBitCast( + Val, Stxr->getFunctionType()->getParamType(0)), + Addr); } diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index e946b25..de16c4d 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -12,364 +12,453 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H -#define LLVM_TARGET_AARCH64_ISELLOWERING_H +#ifndef LLVM_TARGET_AArch64_ISELLOWERING_H +#define LLVM_TARGET_AArch64_ISELLOWERING_H -#include "Utils/AArch64BaseInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/CallingConv.h" #include "llvm/Target/TargetLowering.h" namespace llvm { + namespace AArch64ISD { - enum NodeType { - // Start the numbering from where ISD NodeType finishes. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - // This is a conditional branch which also notes the flag needed - // (eq/sgt/...). A64 puts this information on the branches rather than - // compares as LLVM does. - BR_CC, - - // A node to be selected to an actual call operation: either BL or BLR in - // the absence of tail calls. - Call, - - // Indicates a floating-point immediate which fits into the format required - // by the FMOV instructions. First (and only) operand is the 8-bit encoded - // value of that immediate. - FPMOV, - - // Corresponds directly to an EXTR instruction. Operands are an LHS an RHS - // and an LSB. - EXTR, - - // Wraps a load from the GOT, which should always be performed with a 64-bit - // load instruction. This prevents the DAG combiner folding a truncate to - // form a smaller memory access. - GOTLoad, - - // Performs a bitfield insert. Arguments are: the value being inserted into; - // the value being inserted; least significant bit changed; width of the - // field. - BFI, - - // Simply a convenient node inserted during ISelLowering to represent - // procedure return. Will almost certainly be selected to "RET". - Ret, - - /// Extracts a field of contiguous bits from the source and sign extends - /// them into a single register. Arguments are: source; immr; imms. Note - /// these are pre-encoded since DAG matching can't cope with combining LSB - /// and Width into these values itself. - SBFX, - - /// This is an A64-ification of the standard LLVM SELECT_CC operation. The - /// main difference is that it only has the values and an A64 condition, - /// which will be produced by a setcc instruction. - SELECT_CC, - - /// This serves most of the functions of the LLVM SETCC instruction, for two - /// purposes. First, it prevents optimisations from fiddling with the - /// compare after we've moved the CondCode information onto the SELECT_CC or - /// BR_CC instructions. Second, it gives a legal instruction for the actual - /// comparison. - /// - /// It keeps a record of the condition flags asked for because certain - /// instructions are only valid for a subset of condition codes. - SETCC, - - // Designates a node which is a tail call: both a call and a return - // instruction as far as selction is concerned. It should be selected to an - // unconditional branch. Has the usual plethora of call operands, but: 1st - // is callee, 2nd is stack adjustment required immediately before branch. - TC_RETURN, - - // Designates a call used to support the TLS descriptor ABI. The call itself - // will be indirect ("BLR xN") but a relocation-specifier (".tlsdesccall - // var") must be attached somehow during code generation. It takes two - // operands: the callee and the symbol to be relocated against. - TLSDESCCALL, - - // Leaf node which will be lowered to an appropriate MRS to obtain the - // thread pointer: TPIDR_EL0. - THREAD_POINTER, - - /// Extracts a field of contiguous bits from the source and zero extends - /// them into a single register. Arguments are: source; immr; imms. Note - /// these are pre-encoded since DAG matching can't cope with combining LSB - /// and Width into these values itself. - UBFX, - - // Wraps an address which the ISelLowering phase has decided should be - // created using the large memory model style: i.e. a sequence of four - // movz/movk instructions. - WrapperLarge, - - // Wraps an address which the ISelLowering phase has decided should be - // created using the small memory model style: i.e. adrp/add or - // adrp/mem-op. This exists to prevent bare TargetAddresses which may never - // get selected. - WrapperSmall, - - // Vector move immediate - NEON_MOVIMM, - - // Vector Move Inverted Immediate - NEON_MVNIMM, - - // Vector FP move immediate - NEON_FMOVIMM, - - // Vector permute - NEON_UZP1, - NEON_UZP2, - NEON_ZIP1, - NEON_ZIP2, - NEON_TRN1, - NEON_TRN2, - - // Vector Element reverse - NEON_REV64, - NEON_REV32, - NEON_REV16, - - // Vector compare - NEON_CMP, - - // Vector compare zero - NEON_CMPZ, - - // Vector compare bitwise test - NEON_TST, - - // Vector saturating shift - NEON_QSHLs, - NEON_QSHLu, - - // Vector dup - NEON_VDUP, - - // Vector dup by lane - NEON_VDUPLANE, - - // Vector extract - NEON_VEXTRACT, - - // NEON duplicate lane loads - NEON_LD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE, - NEON_LD3DUP, - NEON_LD4DUP, - - // NEON loads with post-increment base updates: - NEON_LD1_UPD, - NEON_LD2_UPD, - NEON_LD3_UPD, - NEON_LD4_UPD, - NEON_LD1x2_UPD, - NEON_LD1x3_UPD, - NEON_LD1x4_UPD, - - // NEON stores with post-increment base updates: - NEON_ST1_UPD, - NEON_ST2_UPD, - NEON_ST3_UPD, - NEON_ST4_UPD, - NEON_ST1x2_UPD, - NEON_ST1x3_UPD, - NEON_ST1x4_UPD, - - // NEON duplicate lane loads with post-increment base updates: - NEON_LD2DUP_UPD, - NEON_LD3DUP_UPD, - NEON_LD4DUP_UPD, - - // NEON lane loads with post-increment base updates: - NEON_LD2LN_UPD, - NEON_LD3LN_UPD, - NEON_LD4LN_UPD, - - // NEON lane store with post-increment base updates: - NEON_ST2LN_UPD, - NEON_ST3LN_UPD, - NEON_ST4LN_UPD - }; -} +enum { + FIRST_NUMBER = ISD::BUILTIN_OP_END, + WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. + CALL, // Function call. + + // Almost the same as a normal call node, except that a TLSDesc relocation is + // needed so the linker can relax it correctly if possible. + TLSDESC_CALL, + ADRP, // Page address of a TargetGlobalAddress operand. + ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. + LOADgot, // Load from automatically generated descriptor (e.g. Global + // Offset Table, TLS record). + RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand. + BRCOND, // Conditional branch instruction; "b.cond". + CSEL, + FCSEL, // Conditional move instruction. + CSINV, // Conditional select invert. + CSNEG, // Conditional select negate. + CSINC, // Conditional select increment. + + // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on + // ELF. + THREAD_POINTER, + ADC, + SBC, // adc, sbc instructions + + // Arithmetic instructions which write flags. + ADDS, + SUBS, + ADCS, + SBCS, + ANDS, + + // Floating point comparison + FCMP, + + // Floating point max and min instructions. + FMAX, + FMIN, + + // Scalar extract + EXTR, + + // Scalar-to-vector duplication + DUP, + DUPLANE8, + DUPLANE16, + DUPLANE32, + DUPLANE64, + + // Vector immedate moves + MOVI, + MOVIshift, + MOVIedit, + MOVImsl, + FMOV, + MVNIshift, + MVNImsl, + + // Vector immediate ops + BICi, + ORRi, + + // Vector bit select: similar to ISD::VSELECT but not all bits within an + // element must be identical. + BSL, + + // Vector arithmetic negation + NEG, + + // Vector shuffles + ZIP1, + ZIP2, + UZP1, + UZP2, + TRN1, + TRN2, + REV16, + REV32, + REV64, + EXT, + + // Vector shift by scalar + VSHL, + VLSHR, + VASHR, + + // Vector shift by scalar (again) + SQSHL_I, + UQSHL_I, + SQSHLU_I, + SRSHR_I, + URSHR_I, + + // Vector comparisons + CMEQ, + CMGE, + CMGT, + CMHI, + CMHS, + FCMEQ, + FCMGE, + FCMGT, + + // Vector zero comparisons + CMEQz, + CMGEz, + CMGTz, + CMLEz, + CMLTz, + FCMEQz, + FCMGEz, + FCMGTz, + FCMLEz, + FCMLTz, + + // Vector bitwise negation + NOT, + + // Vector bitwise selection + BIT, + + // Compare-and-branch + CBZ, + CBNZ, + TBZ, + TBNZ, + + // Tail calls + TC_RETURN, + + // Custom prefetch handling + PREFETCH, + + // {s|u}int to FP within a FP register. + SITOF, + UITOF, + + // NEON Load/Store with post-increment base updates + LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE, + LD3post, + LD4post, + ST2post, + ST3post, + ST4post, + LD1x2post, + LD1x3post, + LD1x4post, + ST1x2post, + ST1x3post, + ST1x4post, + LD1DUPpost, + LD2DUPpost, + LD3DUPpost, + LD4DUPpost, + LD1LANEpost, + LD2LANEpost, + LD3LANEpost, + LD4LANEpost, + ST2LANEpost, + ST3LANEpost, + ST4LANEpost +}; + +} // end namespace AArch64ISD class AArch64Subtarget; class AArch64TargetMachine; class AArch64TargetLowering : public TargetLowering { + bool RequireStrictAlign; + public: explicit AArch64TargetLowering(AArch64TargetMachine &TM); - const char *getTargetNodeName(unsigned Opcode) const; + /// Selects the correct CCAssignFn for a the given CallingConvention + /// value. + CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; + + /// computeKnownBitsForTargetNode - Determine which of the bits specified in + /// Mask are known to be either zero or one and return them in the + /// KnownZero/KnownOne bitsets. + void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, + APInt &KnownOne, const SelectionDAG &DAG, + unsigned Depth = 0) const override; + + MVT getScalarShiftAmountTy(EVT LHSTy) const override; + + /// allowsUnalignedMemoryAccesses - Returns true if the target allows + /// unaligned memory accesses. of the specified type. + bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0, + bool *Fast = nullptr) const override { + if (RequireStrictAlign) + return false; + // FIXME: True for Cyclone, but not necessary others. + if (Fast) + *Fast = true; + return true; + } - CCAssignFn *CCAssignFnForNode(CallingConv::ID CC) const; + /// LowerOperation - Provide custom lowering hooks for some operations. + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - SDValue LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; + const char *getTargetNodeName(unsigned Opcode) const override; - SDValue LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - SDLoc dl, SelectionDAG &DAG) const; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - virtual unsigned getByValTypeAlignment(Type *Ty) const override; + /// getFunctionAlignment - Return the Log2 alignment of this function. + unsigned getFunctionAlignment(const Function *F) const; - SDValue LowerCall(CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const; + /// getMaximalGlobalOffset - Returns the maximal possible offset which can + /// be used for loads / stores from the global. + unsigned getMaximalGlobalOffset() const override; - SDValue LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool IsVarArg, - const SmallVectorImpl &Ins, - SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; + /// Returns true if a cast between SrcAS and DestAS is a noop. + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { + // Addrspacecasts are always noops. + return true; + } - SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; + /// createFastISel - This method returns a target specific FastISel object, + /// or null if the target does not support "fast" ISel. + FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const override; - bool isConcatVector(SDValue Op, SelectionDAG &DAG, SDValue V0, SDValue V1, - const int *Mask, SDValue &Res) const; + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; - bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &V0, - SDValue &V1, int *Mask) const; + bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, - const AArch64Subtarget *ST) const; + /// isShuffleMaskLegal - Return true if the given shuffle mask can be + /// codegen'd directly, or if it should be stack expanded. + bool isShuffleMaskLegal(const SmallVectorImpl &M, EVT VT) const override; - SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + /// getSetCCResultType - Return the ISD::SETCC ValueType + EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; - void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL, - SDValue &Chain) const; + SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; - /// IsEligibleForTailCallOptimization - Check whether the call is eligible - /// for tail call optimization. Targets which want to do tail call - /// optimization should implement this function. - bool IsEligibleForTailCallOptimization(SDValue Callee, - CallingConv::ID CalleeCC, - bool IsVarArg, - bool IsCalleeStructRet, - bool IsCallerStructRet, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - SelectionDAG& DAG) const; + MachineBasicBlock *EmitF128CSEL(MachineInstr *MI, + MachineBasicBlock *BB) const; - /// Finds the incoming stack arguments which overlap the given fixed stack - /// object and incorporates their load into the current chain. This prevents - /// an upcoming store from clobbering the stack argument before it's used. - SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, - MachineFrameInfo *MFI, int ClobberedFI) const; + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const override; - EVT getSetCCResultType(LLVMContext &Context, EVT VT) const; + bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + unsigned Intrinsic) const override; - bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; + bool isTruncateFree(Type *Ty1, Type *Ty2) const override; + bool isTruncateFree(EVT VT1, EVT VT2) const override; - bool IsTailCallConvention(CallingConv::ID CallCC) const; + bool isZExtFree(Type *Ty1, Type *Ty2) const override; + bool isZExtFree(EVT VT1, EVT VT2) const override; + bool isZExtFree(SDValue Val, EVT VT2) const override; - SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + bool hasPairedLoad(Type *LoadedType, + unsigned &RequiredAligment) const override; + bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override; - bool isLegalICmpImmediate(int64_t Val) const; - SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &A64cc, SelectionDAG &DAG, SDLoc &dl) const; + bool isLegalAddImmediate(int64_t) const override; + bool isLegalICmpImmediate(int64_t) const override; - virtual MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, + MachineFunction &MF) const override; - MachineBasicBlock * - emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, - unsigned Size, unsigned Opcode) const; + /// isLegalAddressingMode - Return true if the addressing mode represented + /// by AM is legal for this target, for a load/store of the specified type. + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; - MachineBasicBlock * - emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size, unsigned CmpOp, - A64CC::CondCodes Cond) const; - MachineBasicBlock * - emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size) const; + /// \brief Return the cost of the scaling factor used in the addressing + /// mode represented by AM for this target, for a load/store + /// of the specified type. + /// If the AM is supported, the return value must be >= 0. + /// If the AM is not supported, it returns a negative value. + int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override; - MachineBasicBlock * - EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const; + /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster + /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be + /// expanded to FMAs when this method returns true, otherwise fmuladd is + /// expanded to fmul + fadd. + bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; - SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG, - RTLIB::Libcall Call) const; - SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; - SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; - SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; + /// \brief Returns false if N is a bit extraction pattern of (X >> C) & Mask. + bool isDesirableToCommuteWithShift(const SDNode *N) const override; - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + /// \brief Returns true if it is beneficial to convert a load of a constant + /// to just the constant itself. + bool shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const override; - SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL, - SelectionDAG &DAG) const; - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const override; + Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, + Value *Addr, AtomicOrdering Ord) const override; - virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + bool shouldExpandAtomicInIR(Instruction *Inst) const override; - /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster - /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be - /// expanded to FMAs when this method returns true, otherwise fmuladd is - /// expanded to fmul + fadd. - virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const; +private: + /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can + /// make the right decision when generating code for different targets. + const AArch64Subtarget *Subtarget; - ConstraintType getConstraintType(const std::string &Constraint) const; + void addTypeForNEON(EVT VT, EVT PromotedBitwiseVT); + void addDRTypeForNEON(MVT VT); + void addQRTypeForNEON(MVT VT); - ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info, - const char *Constraint) const; - void LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, - std::vector &Ops, - SelectionDAG &DAG) const; + SDValue + LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, SDLoc DL, + SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; - std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; + SDValue LowerCall(CallLoweringInfo & /*CLI*/, + SmallVectorImpl &InVals) const override; - virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, - unsigned Intrinsic) const override; + SDValue LowerCallResult(SDValue Chain, SDValue InFlag, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, SDLoc DL, + SelectionDAG &DAG, SmallVectorImpl &InVals, + bool isThisReturn, SDValue ThisVal) const; + + bool isEligibleForTailCallOptimization( + SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, + bool isCalleeStructRet, bool isCallerStructRet, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, SelectionDAG &DAG) const; -protected: - std::pair - findRepresentativeClass(MVT VT) const; + /// Finds the incoming stack arguments which overlap the given fixed stack + /// object and incorporates their load into the current chain. This prevents + /// an upcoming store from clobbering the stack argument before it's used. + SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, + MachineFrameInfo *MFI, int ClobberedFI) const; -private: - const InstrItineraryData *Itins; + bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; - const AArch64Subtarget *getSubtarget() const { - return &getTargetMachine().getSubtarget(); - } -}; -enum NeonModImmType { - Neon_Mov_Imm, - Neon_Mvn_Imm + bool IsTailCallConvention(CallingConv::ID CallCC) const; + + void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL, + SDValue &Chain) const; + + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const override; + + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, SDLoc DL, + SelectionDAG &DAG) const override; + + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL, + SelectionDAG &DAG) const; + SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG, + RTLIB::Libcall Call) const; + SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorAND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; + + ConstraintType + getConstraintType(const std::string &Constraint) const override; + unsigned getRegisterByName(const char* RegName, EVT VT) const override; + + /// Examine constraint string and operand type and determine a weight value. + /// The operand object must already have been set up with the operand type. + ConstraintWeight + getSingleConstraintMatchWeight(AsmOperandInfo &info, + const char *constraint) const override; + + std::pair + getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const override; + void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const override; + + bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override; + bool mayBeEmittedAsTailCall(CallInst *CI) const override; + bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, + ISD::MemIndexedMode &AM, bool &IsInc, + SelectionDAG &DAG) const; + bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const override; + bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, + SDValue &Offset, ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const override; + + void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const override; }; -extern SDValue ScanBUILD_VECTOR(SDValue Op, bool &isOnlyLowElement, - bool &usesOnlyOneValue, bool &hasDominantValue, - bool &isConstant, bool &isUNDEF); -} // namespace llvm +namespace AArch64 { +FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo); +} // end namespace AArch64 + +} // end namespace llvm -#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H +#endif // LLVM_TARGET_AArch64_ISELLOWERING_H diff --git a/lib/Target/AArch64/AArch64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td new file mode 100644 index 0000000..3b9e3c6 --- /dev/null +++ b/lib/Target/AArch64/AArch64InstrAtomics.td @@ -0,0 +1,364 @@ +//=- AArch64InstrAtomics.td - AArch64 Atomic codegen support -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AArch64 Atomic operand code-gen constructs. +// +//===----------------------------------------------------------------------===// + +//===---------------------------------- +// Atomic fences +//===---------------------------------- +def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>; +def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>; + +//===---------------------------------- +// Atomic loads +//===---------------------------------- + +// When they're actually atomic, only one addressing mode (GPR64sp) is +// supported, but when they're relaxed and anything can be used, all the +// standard modes would be valid and may give efficiency gains. + +// A atomic load operation that actually needs acquire semantics. +class acquiring_load + : PatFrag<(ops node:$ptr), (base node:$ptr), [{ + AtomicOrdering Ordering = cast(N)->getOrdering(); + assert(Ordering != AcquireRelease && "unexpected load ordering"); + return Ordering == Acquire || Ordering == SequentiallyConsistent; +}]>; + +// An atomic load operation that does not need either acquire or release +// semantics. +class relaxed_load + : PatFrag<(ops node:$ptr), (base node:$ptr), [{ + AtomicOrdering Ordering = cast(N)->getOrdering(); + return Ordering == Monotonic || Ordering == Unordered; +}]>; + +// 8-bit loads +def : Pat<(acquiring_load GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; +def : Pat<(relaxed_load (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend8:$offset)), + (LDRBBroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)>; +def : Pat<(relaxed_load (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend8:$offset)), + (LDRBBroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$offset)>; +def : Pat<(relaxed_load (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)), + (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(relaxed_load + (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; + +// 16-bit loads +def : Pat<(acquiring_load GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; +def : Pat<(relaxed_load (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend)), + (LDRHHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>; +def : Pat<(relaxed_load (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)), + (LDRHHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>; +def : Pat<(relaxed_load (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)), + (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat<(relaxed_load + (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), + (LDURHHi GPR64sp:$Rn, simm9:$offset)>; + +// 32-bit loads +def : Pat<(acquiring_load GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>; +def : Pat<(relaxed_load (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend32:$extend)), + (LDRWroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>; +def : Pat<(relaxed_load (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend32:$extend)), + (LDRWroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>; +def : Pat<(relaxed_load (am_indexed32 GPR64sp:$Rn, + uimm12s4:$offset)), + (LDRWui GPR64sp:$Rn, uimm12s4:$offset)>; +def : Pat<(relaxed_load + (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), + (LDURWi GPR64sp:$Rn, simm9:$offset)>; + +// 64-bit loads +def : Pat<(acquiring_load GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>; +def : Pat<(relaxed_load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend64:$extend)), + (LDRXroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; +def : Pat<(relaxed_load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend64:$extend)), + (LDRXroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; +def : Pat<(relaxed_load (am_indexed64 GPR64sp:$Rn, + uimm12s8:$offset)), + (LDRXui GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(relaxed_load + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (LDURXi GPR64sp:$Rn, simm9:$offset)>; + +//===---------------------------------- +// Atomic stores +//===---------------------------------- + +// When they're actually atomic, only one addressing mode (GPR64sp) is +// supported, but when they're relaxed and anything can be used, all the +// standard modes would be valid and may give efficiency gains. + +// A store operation that actually needs release semantics. +class releasing_store + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getOrdering(); + assert(Ordering != AcquireRelease && "unexpected store ordering"); + return Ordering == Release || Ordering == SequentiallyConsistent; +}]>; + +// An atomic store operation that doesn't actually need to be atomic on AArch64. +class relaxed_store + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getOrdering(); + return Ordering == Monotonic || Ordering == Unordered; +}]>; + +// 8-bit stores +def : Pat<(releasing_store GPR64sp:$ptr, GPR32:$val), + (STLRB GPR32:$val, GPR64sp:$ptr)>; +def : Pat<(relaxed_store + (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend), + GPR32:$val), + (STRBBroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend)>; +def : Pat<(relaxed_store + (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend), + GPR32:$val), + (STRBBroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend)>; +def : Pat<(relaxed_store + (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), GPR32:$val), + (STRBBui GPR32:$val, GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(relaxed_store + (am_unscaled8 GPR64sp:$Rn, simm9:$offset), GPR32:$val), + (STURBBi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; + +// 16-bit stores +def : Pat<(releasing_store GPR64sp:$ptr, GPR32:$val), + (STLRH GPR32:$val, GPR64sp:$ptr)>; +def : Pat<(relaxed_store (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend), + GPR32:$val), + (STRHHroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>; +def : Pat<(relaxed_store (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend), + GPR32:$val), + (STRHHroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>; +def : Pat<(relaxed_store + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), GPR32:$val), + (STRHHui GPR32:$val, GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat<(relaxed_store + (am_unscaled16 GPR64sp:$Rn, simm9:$offset), GPR32:$val), + (STURHHi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; + +// 32-bit stores +def : Pat<(releasing_store GPR64sp:$ptr, GPR32:$val), + (STLRW GPR32:$val, GPR64sp:$ptr)>; +def : Pat<(relaxed_store (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend32:$extend), + GPR32:$val), + (STRWroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>; +def : Pat<(relaxed_store (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend32:$extend), + GPR32:$val), + (STRWroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>; +def : Pat<(relaxed_store + (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), GPR32:$val), + (STRWui GPR32:$val, GPR64sp:$Rn, uimm12s4:$offset)>; +def : Pat<(relaxed_store + (am_unscaled32 GPR64sp:$Rn, simm9:$offset), GPR32:$val), + (STURWi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>; + +// 64-bit stores +def : Pat<(releasing_store GPR64sp:$ptr, GPR64:$val), + (STLRX GPR64:$val, GPR64sp:$ptr)>; +def : Pat<(relaxed_store (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend), + GPR64:$val), + (STRXroW GPR64:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; +def : Pat<(relaxed_store (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend), + GPR64:$val), + (STRXroX GPR64:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; +def : Pat<(relaxed_store + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset), GPR64:$val), + (STRXui GPR64:$val, GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(relaxed_store + (am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val), + (STURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>; + +//===---------------------------------- +// Low-level exclusive operations +//===---------------------------------- + +// Load-exclusives. + +def ldxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def ldxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def ldxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +def ldxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i64; +}]>; + +def : Pat<(ldxr_1 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>; +def : Pat<(ldxr_2 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDXRH GPR64sp:$addr), sub_32)>; +def : Pat<(ldxr_4 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDXRW GPR64sp:$addr), sub_32)>; +def : Pat<(ldxr_8 GPR64sp:$addr), (LDXRX GPR64sp:$addr)>; + +def : Pat<(and (ldxr_1 GPR64sp:$addr), 0xff), + (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>; +def : Pat<(and (ldxr_2 GPR64sp:$addr), 0xffff), + (SUBREG_TO_REG (i64 0), (LDXRH GPR64sp:$addr), sub_32)>; +def : Pat<(and (ldxr_4 GPR64sp:$addr), 0xffffffff), + (SUBREG_TO_REG (i64 0), (LDXRW GPR64sp:$addr), sub_32)>; + +// Load-exclusives. + +def ldaxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def ldaxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def ldaxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +def ldaxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i64; +}]>; + +def : Pat<(ldaxr_1 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>; +def : Pat<(ldaxr_2 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDAXRH GPR64sp:$addr), sub_32)>; +def : Pat<(ldaxr_4 GPR64sp:$addr), + (SUBREG_TO_REG (i64 0), (LDAXRW GPR64sp:$addr), sub_32)>; +def : Pat<(ldaxr_8 GPR64sp:$addr), (LDAXRX GPR64sp:$addr)>; + +def : Pat<(and (ldaxr_1 GPR64sp:$addr), 0xff), + (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>; +def : Pat<(and (ldaxr_2 GPR64sp:$addr), 0xffff), + (SUBREG_TO_REG (i64 0), (LDAXRH GPR64sp:$addr), sub_32)>; +def : Pat<(and (ldaxr_4 GPR64sp:$addr), 0xffffffff), + (SUBREG_TO_REG (i64 0), (LDAXRW GPR64sp:$addr), sub_32)>; + +// Store-exclusives. + +def stxr_1 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stxr node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def stxr_2 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stxr node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def stxr_4 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stxr node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +def stxr_8 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stxr node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i64; +}]>; + + +def : Pat<(stxr_1 GPR64:$val, GPR64sp:$addr), + (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_2 GPR64:$val, GPR64sp:$addr), + (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_4 GPR64:$val, GPR64sp:$addr), + (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_8 GPR64:$val, GPR64sp:$addr), + (STXRX GPR64:$val, GPR64sp:$addr)>; + +def : Pat<(stxr_1 (zext (and GPR32:$val, 0xff)), GPR64sp:$addr), + (STXRB GPR32:$val, GPR64sp:$addr)>; +def : Pat<(stxr_2 (zext (and GPR32:$val, 0xffff)), GPR64sp:$addr), + (STXRH GPR32:$val, GPR64sp:$addr)>; +def : Pat<(stxr_4 (zext GPR32:$val), GPR64sp:$addr), + (STXRW GPR32:$val, GPR64sp:$addr)>; + +def : Pat<(stxr_1 (and GPR64:$val, 0xff), GPR64sp:$addr), + (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_2 (and GPR64:$val, 0xffff), GPR64sp:$addr), + (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr), + (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; + +// Store-release-exclusives. + +def stlxr_1 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stlxr node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def stlxr_2 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stlxr node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def stlxr_4 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stlxr node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +def stlxr_8 : PatFrag<(ops node:$val, node:$ptr), + (int_aarch64_stlxr node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i64; +}]>; + + +def : Pat<(stlxr_1 GPR64:$val, GPR64sp:$addr), + (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_2 GPR64:$val, GPR64sp:$addr), + (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_4 GPR64:$val, GPR64sp:$addr), + (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_8 GPR64:$val, GPR64sp:$addr), + (STLXRX GPR64:$val, GPR64sp:$addr)>; + +def : Pat<(stlxr_1 (zext (and GPR32:$val, 0xff)), GPR64sp:$addr), + (STLXRB GPR32:$val, GPR64sp:$addr)>; +def : Pat<(stlxr_2 (zext (and GPR32:$val, 0xffff)), GPR64sp:$addr), + (STLXRH GPR32:$val, GPR64sp:$addr)>; +def : Pat<(stlxr_4 (zext GPR32:$val), GPR64sp:$addr), + (STLXRW GPR32:$val, GPR64sp:$addr)>; + +def : Pat<(stlxr_1 (and GPR64:$val, 0xff), GPR64sp:$addr), + (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_2 (and GPR64:$val, 0xffff), GPR64sp:$addr), + (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; +def : Pat<(stlxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr), + (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>; + + +// And clear exclusive. + +def : Pat<(int_aarch64_clrex), (CLREX 0xf)>; diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 4cc3813..d455d7e 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -1,4 +1,4 @@ -//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tablegen -*-=// +//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tblgen -*-===// // // The LLVM Compiler Infrastructure // @@ -6,1482 +6,8569 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// This file describes AArch64 instruction formats, down to the level of the -// instruction's overall class. -//===----------------------------------------------------------------------===// - //===----------------------------------------------------------------------===// -// A64 Instruction Format Definitions. -//===----------------------------------------------------------------------===// +// Describe AArch64 instructions format here +// -// A64 is currently the only instruction set supported by the AArch64 -// architecture. -class A64Inst patterns, - InstrItinClass itin> - : Instruction { - // All A64 instructions are 32-bit. This field will be filled in - // gradually going down the hierarchy. - field bits<32> Inst; +// Format specifies the encoding used by the instruction. This is part of the +// ad-hoc solution used to emit machine instruction encodings by our machine +// code emitter. +class Format val> { + bits<2> Value = val; +} + +def PseudoFrm : Format<0>; +def NormalFrm : Format<1>; // Do we need any others? +// AArch64 Instruction Format +class AArch64Inst : Instruction { + field bits<32> Inst; // Instruction encoding. + // Mask of bits that cause an encoding to be UNPREDICTABLE. + // If a bit is set, then if the corresponding bit in the + // target encoding differs from its value in the "Inst" field, + // the instruction is UNPREDICTABLE (SoftFail in abstract parlance). field bits<32> Unpredictable = 0; // SoftFail is the generic name for this field, but we alias it so // as to make it more obvious what it means in ARM-land. field bits<32> SoftFail = Unpredictable; - - // LLVM-level model of the AArch64/A64 distinction. - let Namespace = "AArch64"; - let DecoderNamespace = "A64"; - let Size = 4; - - // Set the templated fields - let OutOperandList = outs; - let InOperandList = ins; - let AsmString = asmstr; - let Pattern = patterns; - let Itinerary = itin; + let Namespace = "AArch64"; + Format F = f; + bits<2> Form = F.Value; + let Pattern = []; + let Constraints = cstr; } -class PseudoInst patterns> : Instruction { - let Namespace = "AArch64"; - - let OutOperandList = outs; - let InOperandList= ins; - let Pattern = patterns; - let isCodeGenOnly = 1; - let isPseudo = 1; +// Pseudo instructions (don't have encoding information) +class Pseudo pattern, string cstr = ""> + : AArch64Inst { + dag OutOperandList = oops; + dag InOperandList = iops; + let Pattern = pattern; + let isCodeGenOnly = 1; } -// Represents a pseudo-instruction that represents a single A64 instruction for -// whatever reason, the eventual result will be a 32-bit real instruction. -class A64PseudoInst patterns> - : PseudoInst { +// Real instructions (have encoding information) +class EncodedI pattern> : AArch64Inst { + let Pattern = pattern; let Size = 4; } -// As above, this will be a single A64 instruction, but we can actually give the -// expansion in TableGen. -class A64PseudoExpand patterns, dag Result> - : A64PseudoInst, - PseudoInstExpansion; +// Normal instructions +class I pattern> + : EncodedI { + dag OutOperandList = oops; + dag InOperandList = iops; + let AsmString = !strconcat(asm, operands); +} +class TriOpFrag : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>; +class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; +class UnOpFrag : PatFrag<(ops node:$LHS), res>; -// First, some common cross-hierarchy register formats. +// Helper fragment for an extract of the high portion of a 128-bit vector. +def extract_high_v16i8 : + UnOpFrag<(extract_subvector (v16i8 node:$LHS), (i64 8))>; +def extract_high_v8i16 : + UnOpFrag<(extract_subvector (v8i16 node:$LHS), (i64 4))>; +def extract_high_v4i32 : + UnOpFrag<(extract_subvector (v4i32 node:$LHS), (i64 2))>; +def extract_high_v2i64 : + UnOpFrag<(extract_subvector (v2i64 node:$LHS), (i64 1))>; -class A64InstRd patterns, InstrItinClass itin> - : A64Inst { - bits<5> Rd; +//===----------------------------------------------------------------------===// +// Asm Operand Classes. +// - let Inst{4-0} = Rd; +// Shifter operand for arithmetic shifted encodings. +def ShifterOperand : AsmOperandClass { + let Name = "Shifter"; } -class A64InstRt patterns, InstrItinClass itin> - : A64Inst { - bits<5> Rt; - - let Inst{4-0} = Rt; +// Shifter operand for mov immediate encodings. +def MovImm32ShifterOperand : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "MovImm32Shifter"; + let RenderMethod = "addShifterOperands"; + let DiagnosticType = "InvalidMovImm32Shift"; +} +def MovImm64ShifterOperand : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "MovImm64Shifter"; + let RenderMethod = "addShifterOperands"; + let DiagnosticType = "InvalidMovImm64Shift"; } +// Shifter operand for arithmetic register shifted encodings. +class ArithmeticShifterOperand : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "ArithmeticShifter" # width; + let PredicateMethod = "isArithmeticShifter<" # width # ">"; + let RenderMethod = "addShifterOperands"; + let DiagnosticType = "AddSubRegShift" # width; +} -class A64InstRdn patterns, InstrItinClass itin> - : A64InstRd { - // Inherit rdt - bits<5> Rn; +def ArithmeticShifterOperand32 : ArithmeticShifterOperand<32>; +def ArithmeticShifterOperand64 : ArithmeticShifterOperand<64>; - let Inst{9-5} = Rn; +// Shifter operand for logical register shifted encodings. +class LogicalShifterOperand : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "LogicalShifter" # width; + let PredicateMethod = "isLogicalShifter<" # width # ">"; + let RenderMethod = "addShifterOperands"; + let DiagnosticType = "AddSubRegShift" # width; } -class A64InstRtn patterns, InstrItinClass itin> - : A64InstRt { - // Inherit rdt - bits<5> Rn; +def LogicalShifterOperand32 : LogicalShifterOperand<32>; +def LogicalShifterOperand64 : LogicalShifterOperand<64>; - let Inst{9-5} = Rn; +// Shifter operand for logical vector 128/64-bit shifted encodings. +def LogicalVecShifterOperand : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "LogicalVecShifter"; + let RenderMethod = "addShifterOperands"; +} +def LogicalVecHalfWordShifterOperand : AsmOperandClass { + let SuperClasses = [LogicalVecShifterOperand]; + let Name = "LogicalVecHalfWordShifter"; + let RenderMethod = "addShifterOperands"; } -// Instructions taking Rt,Rt2,Rn -class A64InstRtt2n patterns, InstrItinClass itin> - : A64InstRtn { - bits<5> Rt2; - - let Inst{14-10} = Rt2; +// The "MSL" shifter on the vector MOVI instruction. +def MoveVecShifterOperand : AsmOperandClass { + let SuperClasses = [ShifterOperand]; + let Name = "MoveVecShifter"; + let RenderMethod = "addShifterOperands"; } -class A64InstRdnm patterns, InstrItinClass itin> - : A64InstRdn { - bits<5> Rm; +// Extend operand for arithmetic encodings. +def ExtendOperand : AsmOperandClass { + let Name = "Extend"; + let DiagnosticType = "AddSubRegExtendLarge"; +} +def ExtendOperand64 : AsmOperandClass { + let SuperClasses = [ExtendOperand]; + let Name = "Extend64"; + let DiagnosticType = "AddSubRegExtendSmall"; +} +// 'extend' that's a lsl of a 64-bit register. +def ExtendOperandLSL64 : AsmOperandClass { + let SuperClasses = [ExtendOperand]; + let Name = "ExtendLSL64"; + let RenderMethod = "addExtend64Operands"; + let DiagnosticType = "AddSubRegExtendLarge"; +} - let Inst{20-16} = Rm; +// 8-bit floating-point immediate encodings. +def FPImmOperand : AsmOperandClass { + let Name = "FPImm"; + let ParserMethod = "tryParseFPImm"; + let DiagnosticType = "InvalidFPImm"; } -class A64InstRtnm patterns, InstrItinClass itin> - : A64InstRtn { - bits<5> Rm; +def CondCode : AsmOperandClass { + let Name = "CondCode"; + let DiagnosticType = "InvalidCondCode"; +} - let Inst{20-16} = Rm; +// A 32-bit register pasrsed as 64-bit +def GPR32as64Operand : AsmOperandClass { + let Name = "GPR32as64"; +} +def GPR32as64 : RegisterOperand { + let ParserMatchClass = GPR32as64Operand; } -//===----------------------------------------------------------------------===// -// -// Actual A64 Instruction Formats -// +// 8-bit immediate for AdvSIMD where 64-bit values of the form: +// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh +// are encoded as the eight bit value 'abcdefgh'. +def SIMDImmType10Operand : AsmOperandClass { let Name = "SIMDImmType10"; } -// Format for Add-subtract (extended register) instructions. -class A64I_addsubext opt, bits<3> option, - dag outs, dag ins, string asmstr, list patterns, - InstrItinClass itin> - : A64InstRdnm { - bits<3> Imm3; - - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = S; - let Inst{28-24} = 0b01011; - let Inst{23-22} = opt; - let Inst{21} = 0b1; - // Rm inherited in 20-16 - let Inst{15-13} = option; - let Inst{12-10} = Imm3; - // Rn inherited in 9-5 - // Rd inherited in 4-0 -} - -// Format for Add-subtract (immediate) instructions. -class A64I_addsubimm shift, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - bits<12> Imm12; - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = S; - let Inst{28-24} = 0b10001; - let Inst{23-22} = shift; - let Inst{21-10} = Imm12; -} - -// Format for Add-subtract (shifted register) instructions. -class A64I_addsubshift shift, - dag outs, dag ins, string asmstr, list patterns, - InstrItinClass itin> - : A64InstRdnm { - bits<6> Imm6; - - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = S; - let Inst{28-24} = 0b01011; - let Inst{23-22} = shift; - let Inst{21} = 0b0; - // Rm inherited in 20-16 - let Inst{15-10} = Imm6; - // Rn inherited in 9-5 - // Rd inherited in 4-0 -} - -// Format for Add-subtract (with carry) instructions. -class A64I_addsubcarry opcode2, - dag outs, dag ins, string asmstr, list patterns, - InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = S; - let Inst{28-21} = 0b11010000; - // Rm inherited in 20-16 - let Inst{15-10} = opcode2; - // Rn inherited in 9-5 - // Rd inherited in 4-0 -} - - -// Format for Bitfield instructions -class A64I_bitfield opc, bit n, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - bits<6> ImmR; - bits<6> ImmS; +//===----------------------------------------------------------------------===// +// Operand Definitions. +// - let Inst{31} = sf; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100110; - let Inst{22} = n; - let Inst{21-16} = ImmR; - let Inst{15-10} = ImmS; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 +// ADR[P] instruction labels. +def AdrpOperand : AsmOperandClass { + let Name = "AdrpLabel"; + let ParserMethod = "tryParseAdrpLabel"; + let DiagnosticType = "InvalidLabel"; +} +def adrplabel : Operand { + let EncoderMethod = "getAdrLabelOpValue"; + let PrintMethod = "printAdrpLabel"; + let ParserMatchClass = AdrpOperand; } -// Format for compare and branch (immediate) instructions. -class A64I_cmpbr patterns, InstrItinClass itin> - : A64InstRt { - bits<19> Label; - - let Inst{31} = sf; - let Inst{30-25} = 0b011010; - let Inst{24} = op; - let Inst{23-5} = Label; - // Inherit Rt in 4-0 +def AdrOperand : AsmOperandClass { + let Name = "AdrLabel"; + let ParserMethod = "tryParseAdrLabel"; + let DiagnosticType = "InvalidLabel"; +} +def adrlabel : Operand { + let EncoderMethod = "getAdrLabelOpValue"; + let ParserMatchClass = AdrOperand; } -// Format for conditional branch (immediate) instructions. -class A64I_condbr patterns, InstrItinClass itin> - : A64Inst { - bits<19> Label; - bits<4> Cond; +// simm9 predicate - True if the immediate is in the range [-256, 255]. +def SImm9Operand : AsmOperandClass { + let Name = "SImm9"; + let DiagnosticType = "InvalidMemoryIndexedSImm9"; +} +def simm9 : Operand, ImmLeaf= -256 && Imm < 256; }]> { + let ParserMatchClass = SImm9Operand; +} - let Inst{31-25} = 0b0101010; - let Inst{24} = o1; - let Inst{23-5} = Label; - let Inst{4} = o0; - let Inst{3-0} = Cond; +// simm7sN predicate - True if the immediate is a multiple of N in the range +// [-64 * N, 63 * N]. +class SImm7Scaled : AsmOperandClass { + let Name = "SImm7s" # Scale; + let DiagnosticType = "InvalidMemoryIndexed" # Scale # "SImm7"; } -// Format for conditional compare (immediate) instructions. -class A64I_condcmpimm patterns, InstrItinClass itin> - : A64Inst { - bits<5> Rn; - bits<5> UImm5; - bits<4> NZCVImm; - bits<4> Cond; +def SImm7s4Operand : SImm7Scaled<4>; +def SImm7s8Operand : SImm7Scaled<8>; +def SImm7s16Operand : SImm7Scaled<16>; - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = s; - let Inst{28-21} = 0b11010010; - let Inst{20-16} = UImm5; - let Inst{15-12} = Cond; - let Inst{11} = 0b1; - let Inst{10} = o2; - let Inst{9-5} = Rn; - let Inst{4} = o3; - let Inst{3-0} = NZCVImm; +def simm7s4 : Operand { + let ParserMatchClass = SImm7s4Operand; + let PrintMethod = "printImmScale<4>"; } -// Format for conditional compare (register) instructions. -class A64I_condcmpreg patterns, InstrItinClass itin> - : A64Inst { - bits<5> Rn; - bits<5> Rm; - bits<4> NZCVImm; - bits<4> Cond; +def simm7s8 : Operand { + let ParserMatchClass = SImm7s8Operand; + let PrintMethod = "printImmScale<8>"; +} +def simm7s16 : Operand { + let ParserMatchClass = SImm7s16Operand; + let PrintMethod = "printImmScale<16>"; +} - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = s; - let Inst{28-21} = 0b11010010; - let Inst{20-16} = Rm; - let Inst{15-12} = Cond; - let Inst{11} = 0b0; - let Inst{10} = o2; - let Inst{9-5} = Rn; - let Inst{4} = o3; - let Inst{3-0} = NZCVImm; +class AsmImmRange : AsmOperandClass { + let Name = "Imm" # Low # "_" # High; + let DiagnosticType = "InvalidImm" # Low # "_" # High; } -// Format for conditional select instructions. -class A64I_condsel op2, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - bits<4> Cond; +def Imm1_8Operand : AsmImmRange<1, 8>; +def Imm1_16Operand : AsmImmRange<1, 16>; +def Imm1_32Operand : AsmImmRange<1, 32>; +def Imm1_64Operand : AsmImmRange<1, 64>; - let Inst{31} = sf; - let Inst{30} = op; - let Inst{29} = s; - let Inst{28-21} = 0b11010100; - // Inherit Rm in 20-16 - let Inst{15-12} = Cond; - let Inst{11-10} = op2; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 +def MovZSymbolG3AsmOperand : AsmOperandClass { + let Name = "MovZSymbolG3"; + let RenderMethod = "addImmOperands"; } -// Format for data processing (1 source) instructions -class A64I_dp_1src opcode2, bits<6> opcode, - string asmstr, dag outs, dag ins, - list patterns, InstrItinClass itin> - : A64InstRdn { - let Inst{31} = sf; - let Inst{30} = 0b1; - let Inst{29} = S; - let Inst{28-21} = 0b11010110; - let Inst{20-16} = opcode2; - let Inst{15-10} = opcode; -} - -// Format for data processing (2 source) instructions -class A64I_dp_2src opcode, bit S, - string asmstr, dag outs, dag ins, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = sf; - let Inst{30} = 0b0; - let Inst{29} = S; - let Inst{28-21} = 0b11010110; - let Inst{15-10} = opcode; +def movz_symbol_g3 : Operand { + let ParserMatchClass = MovZSymbolG3AsmOperand; } -// Format for data-processing (3 source) instructions +def MovZSymbolG2AsmOperand : AsmOperandClass { + let Name = "MovZSymbolG2"; + let RenderMethod = "addImmOperands"; +} -class A64I_dp3 opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = sf; - let Inst{30-29} = opcode{5-4}; - let Inst{28-24} = 0b11011; - let Inst{23-21} = opcode{3-1}; - // Inherits Rm in 20-16 - let Inst{15} = opcode{0}; - // {14-10} mostly Ra, but unspecified for SMULH/UMULH - // Inherits Rn in 9-5 - // Inherits Rd in 4-0 -} - -// Format for exception generation instructions -class A64I_exception opc, bits<3> op2, bits<2> ll, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64Inst { - bits<16> UImm16; +def movz_symbol_g2 : Operand { + let ParserMatchClass = MovZSymbolG2AsmOperand; +} - let Inst{31-24} = 0b11010100; - let Inst{23-21} = opc; - let Inst{20-5} = UImm16; - let Inst{4-2} = op2; - let Inst{1-0} = ll; +def MovZSymbolG1AsmOperand : AsmOperandClass { + let Name = "MovZSymbolG1"; + let RenderMethod = "addImmOperands"; } -// Format for extract (immediate) instructions -class A64I_extract op, bit n, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - bits<6> LSB; +def movz_symbol_g1 : Operand { + let ParserMatchClass = MovZSymbolG1AsmOperand; +} - let Inst{31} = sf; - let Inst{30-29} = op{2-1}; - let Inst{28-23} = 0b100111; - let Inst{22} = n; - let Inst{21} = op{0}; - // Inherits Rm in bits 20-16 - let Inst{15-10} = LSB; - // Inherits Rn in 9-5 - // Inherits Rd in 4-0 +def MovZSymbolG0AsmOperand : AsmOperandClass { + let Name = "MovZSymbolG0"; + let RenderMethod = "addImmOperands"; } -let Predicates = [HasFPARMv8] in { +def movz_symbol_g0 : Operand { + let ParserMatchClass = MovZSymbolG0AsmOperand; +} -// Format for floating-point compare instructions. -class A64I_fpcmp type, bits<2> op, bits<5> opcode2, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64Inst { - bits<5> Rn; - bits<5> Rm; +def MovKSymbolG3AsmOperand : AsmOperandClass { + let Name = "MovKSymbolG3"; + let RenderMethod = "addImmOperands"; +} - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - let Inst{20-16} = Rm; - let Inst{15-14} = op; - let Inst{13-10} = 0b1000; - let Inst{9-5} = Rn; - let Inst{4-0} = opcode2; +def movk_symbol_g3 : Operand { + let ParserMatchClass = MovKSymbolG3AsmOperand; } -// Format for floating-point conditional compare instructions. -class A64I_fpccmp type, bit op, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - bits<5> Rn; - bits<5> Rm; - bits<4> NZCVImm; - bits<4> Cond; +def MovKSymbolG2AsmOperand : AsmOperandClass { + let Name = "MovKSymbolG2"; + let RenderMethod = "addImmOperands"; +} - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - let Inst{20-16} = Rm; - let Inst{15-12} = Cond; - let Inst{11-10} = 0b01; - let Inst{9-5} = Rn; - let Inst{4} = op; - let Inst{3-0} = NZCVImm; +def movk_symbol_g2 : Operand { + let ParserMatchClass = MovKSymbolG2AsmOperand; } -// Format for floating-point conditional select instructions. -class A64I_fpcondsel type, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - bits<4> Cond; +def MovKSymbolG1AsmOperand : AsmOperandClass { + let Name = "MovKSymbolG1"; + let RenderMethod = "addImmOperands"; +} - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 - let Inst{15-12} = Cond; - let Inst{11-10} = 0b11; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 +def movk_symbol_g1 : Operand { + let ParserMatchClass = MovKSymbolG1AsmOperand; } +def MovKSymbolG0AsmOperand : AsmOperandClass { + let Name = "MovKSymbolG0"; + let RenderMethod = "addImmOperands"; +} -// Format for floating-point data-processing (1 source) instructions. -class A64I_fpdp1 type, bits<6> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - let Inst{20-15} = opcode; - let Inst{14-10} = 0b10000; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format for floating-point data-processing (2 sources) instructions. -class A64I_fpdp2 type, bits<4> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 - let Inst{15-12} = opcode; - let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 +def movk_symbol_g0 : Operand { + let ParserMatchClass = MovKSymbolG0AsmOperand; } -// Format for floating-point data-processing (3 sources) instructions. -class A64I_fpdp3 type, bit o1, bit o0, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - bits<5> Ra; +class fixedpoint_i32 + : Operand, + ComplexPattern", [fpimm, ld]> { + let EncoderMethod = "getFixedPointScaleOpValue"; + let DecoderMethod = "DecodeFixedPointScaleImm32"; + let ParserMatchClass = Imm1_32Operand; +} - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11111; - let Inst{23-22} = type; - let Inst{21} = o1; - // Inherit Rm in 20-16 - let Inst{15} = o0; - let Inst{14-10} = Ra; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 +class fixedpoint_i64 + : Operand, + ComplexPattern", [fpimm, ld]> { + let EncoderMethod = "getFixedPointScaleOpValue"; + let DecoderMethod = "DecodeFixedPointScaleImm64"; + let ParserMatchClass = Imm1_64Operand; } -// Format for floating-point <-> fixed-point conversion instructions. -class A64I_fpfixed type, bits<2> mode, bits<3> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - bits<6> Scale; +def fixedpoint_f32_i32 : fixedpoint_i32; +def fixedpoint_f64_i32 : fixedpoint_i32; - let Inst{31} = sf; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b0; - let Inst{20-19} = mode; - let Inst{18-16} = opcode; - let Inst{15-10} = Scale; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 +def fixedpoint_f32_i64 : fixedpoint_i64; +def fixedpoint_f64_i64 : fixedpoint_i64; + +def vecshiftR8 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 9); +}]> { + let EncoderMethod = "getVecShiftR8OpValue"; + let DecoderMethod = "DecodeVecShiftR8Imm"; + let ParserMatchClass = Imm1_8Operand; +} +def vecshiftR16 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 17); +}]> { + let EncoderMethod = "getVecShiftR16OpValue"; + let DecoderMethod = "DecodeVecShiftR16Imm"; + let ParserMatchClass = Imm1_16Operand; +} +def vecshiftR16Narrow : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 9); +}]> { + let EncoderMethod = "getVecShiftR16OpValue"; + let DecoderMethod = "DecodeVecShiftR16ImmNarrow"; + let ParserMatchClass = Imm1_8Operand; +} +def vecshiftR32 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 33); +}]> { + let EncoderMethod = "getVecShiftR32OpValue"; + let DecoderMethod = "DecodeVecShiftR32Imm"; + let ParserMatchClass = Imm1_32Operand; +} +def vecshiftR32Narrow : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 17); +}]> { + let EncoderMethod = "getVecShiftR32OpValue"; + let DecoderMethod = "DecodeVecShiftR32ImmNarrow"; + let ParserMatchClass = Imm1_16Operand; +} +def vecshiftR64 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 65); +}]> { + let EncoderMethod = "getVecShiftR64OpValue"; + let DecoderMethod = "DecodeVecShiftR64Imm"; + let ParserMatchClass = Imm1_64Operand; +} +def vecshiftR64Narrow : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 33); +}]> { + let EncoderMethod = "getVecShiftR64OpValue"; + let DecoderMethod = "DecodeVecShiftR64ImmNarrow"; + let ParserMatchClass = Imm1_32Operand; } -// Format for floating-point <-> integer conversion instructions. -class A64I_fpint type, bits<2> rmode, bits<3> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - let Inst{31} = sf; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = 0b000000; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 +def Imm0_7Operand : AsmImmRange<0, 7>; +def Imm0_15Operand : AsmImmRange<0, 15>; +def Imm0_31Operand : AsmImmRange<0, 31>; +def Imm0_63Operand : AsmImmRange<0, 63>; + +def vecshiftL8 : Operand, ImmLeaf { + let EncoderMethod = "getVecShiftL8OpValue"; + let DecoderMethod = "DecodeVecShiftL8Imm"; + let ParserMatchClass = Imm0_7Operand; +} +def vecshiftL16 : Operand, ImmLeaf { + let EncoderMethod = "getVecShiftL16OpValue"; + let DecoderMethod = "DecodeVecShiftL16Imm"; + let ParserMatchClass = Imm0_15Operand; +} +def vecshiftL32 : Operand, ImmLeaf { + let EncoderMethod = "getVecShiftL32OpValue"; + let DecoderMethod = "DecodeVecShiftL32Imm"; + let ParserMatchClass = Imm0_31Operand; +} +def vecshiftL64 : Operand, ImmLeaf { + let EncoderMethod = "getVecShiftL64OpValue"; + let DecoderMethod = "DecodeVecShiftL64Imm"; + let ParserMatchClass = Imm0_63Operand; } -// Format for floating-point immediate instructions. -class A64I_fpimm type, bits<5> imm5, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRd { - bits<8> Imm8; +// Crazy immediate formats used by 32-bit and 64-bit logical immediate +// instructions for splatting repeating bit patterns across the immediate. +def logical_imm32_XFORM : SDNodeXFormgetZExtValue(), 32); + return CurDAG->getTargetConstant(enc, MVT::i32); +}]>; +def logical_imm64_XFORM : SDNodeXFormgetZExtValue(), 64); + return CurDAG->getTargetConstant(enc, MVT::i32); +}]>; + +def LogicalImm32Operand : AsmOperandClass { + let Name = "LogicalImm32"; + let DiagnosticType = "LogicalSecondSource"; +} +def LogicalImm64Operand : AsmOperandClass { + let Name = "LogicalImm64"; + let DiagnosticType = "LogicalSecondSource"; +} +def logical_imm32 : Operand, PatLeaf<(imm), [{ + return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 32); +}], logical_imm32_XFORM> { + let PrintMethod = "printLogicalImm32"; + let ParserMatchClass = LogicalImm32Operand; +} +def logical_imm64 : Operand, PatLeaf<(imm), [{ + return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 64); +}], logical_imm64_XFORM> { + let PrintMethod = "printLogicalImm64"; + let ParserMatchClass = LogicalImm64Operand; +} - let Inst{31} = m; - let Inst{30} = 0b0; - let Inst{29} = s; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0b1; - let Inst{20-13} = Imm8; - let Inst{12-10} = 0b100; - let Inst{9-5} = imm5; - // Inherit Rd in 4-0 +// imm0_65535 predicate - True if the immediate is in the range [0,65535]. +def Imm0_65535Operand : AsmImmRange<0, 65535>; +def imm0_65535 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_65535Operand; + let PrintMethod = "printHexImm"; } +// imm0_255 predicate - True if the immediate is in the range [0,255]. +def Imm0_255Operand : AsmOperandClass { let Name = "Imm0_255"; } +def imm0_255 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_255Operand; + let PrintMethod = "printHexImm"; } -// Format for load-register (literal) instructions. -class A64I_LDRlit opc, bit v, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRt { - bits<19> Imm19; +// imm0_127 predicate - True if the immediate is in the range [0,127] +def Imm0_127Operand : AsmImmRange<0, 127>; +def imm0_127 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_127Operand; + let PrintMethod = "printHexImm"; +} - let Inst{31-30} = opc; - let Inst{29-27} = 0b011; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-5} = Imm19; - // Inherit Rt in 4-0 +// NOTE: These imm0_N operands have to be of type i64 because i64 is the size +// for all shift-amounts. + +// imm0_63 predicate - True if the immediate is in the range [0,63] +def imm0_63 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_63Operand; } -// Format for load-store exclusive instructions. -class A64I_LDSTex_tn size, bit o2, bit L, bit o1, bit o0, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn { - let Inst{31-30} = size; - let Inst{29-24} = 0b001000; - let Inst{23} = o2; - let Inst{22} = L; - let Inst{21} = o1; - let Inst{15} = o0; +// imm0_31 predicate - True if the immediate is in the range [0,31] +def imm0_31 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_31Operand; } -class A64I_LDSTex_tt2n size, bit o2, bit L, bit o1, bit o0, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin>: - A64I_LDSTex_tn{ - bits<5> Rt2; - let Inst{14-10} = Rt2; +// imm0_15 predicate - True if the immediate is in the range [0,15] +def imm0_15 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_15Operand; } -class A64I_LDSTex_stn size, bit o2, bit L, bit o1, bit o0, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin>: - A64I_LDSTex_tn{ - bits<5> Rs; - let Inst{20-16} = Rs; +// imm0_7 predicate - True if the immediate is in the range [0,7] +def imm0_7 : Operand, ImmLeaf { + let ParserMatchClass = Imm0_7Operand; } -class A64I_LDSTex_stt2n size, bit o2, bit L, bit o1, bit o0, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin>: - A64I_LDSTex_stn{ - bits<5> Rt2; - let Inst{14-10} = Rt2; +// An arithmetic shifter operand: +// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr +// {5-0} - imm6 +class arith_shift : Operand { + let PrintMethod = "printShifter"; + let ParserMatchClass = !cast( + "ArithmeticShifterOperand" # width); } -// Format for load-store register (immediate post-indexed) instructions -class A64I_LSpostind size, bit v, bits<2> opc, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn { - bits<9> SImm9; +def arith_shift32 : arith_shift; +def arith_shift64 : arith_shift; - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b0; - let Inst{20-12} = SImm9; - let Inst{11-10} = 0b01; - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 +class arith_shifted_reg + : Operand, + ComplexPattern { + let PrintMethod = "printShiftedRegister"; + let MIOperandInfo = (ops regclass, !cast("arith_shift" # width)); } -// Format for load-store register (immediate pre-indexed) instructions -class A64I_LSpreind size, bit v, bits<2> opc, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn { - bits<9> SImm9; - +def arith_shifted_reg32 : arith_shifted_reg; +def arith_shifted_reg64 : arith_shifted_reg; - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b0; - let Inst{20-12} = SImm9; - let Inst{11-10} = 0b11; - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 +// An arithmetic shifter operand: +// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr, 11 = ror +// {5-0} - imm6 +class logical_shift : Operand { + let PrintMethod = "printShifter"; + let ParserMatchClass = !cast( + "LogicalShifterOperand" # width); } -// Format for load-store register (unprivileged) instructions -class A64I_LSunpriv size, bit v, bits<2> opc, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn { - bits<9> SImm9; +def logical_shift32 : logical_shift<32>; +def logical_shift64 : logical_shift<64>; - - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b0; - let Inst{20-12} = SImm9; - let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 +class logical_shifted_reg + : Operand, + ComplexPattern { + let PrintMethod = "printShiftedRegister"; + let MIOperandInfo = (ops regclass, shiftop); } -// Format for load-store (unscaled immediate) instructions. -class A64I_LSunalimm size, bit v, bits<2> opc, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn { - bits<9> SImm9; +def logical_shifted_reg32 : logical_shifted_reg; +def logical_shifted_reg64 : logical_shifted_reg; - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b0; - let Inst{20-12} = SImm9; - let Inst{11-10} = 0b00; - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 +// A logical vector shifter operand: +// {7-6} - shift type: 00 = lsl +// {5-0} - imm6: #0, #8, #16, or #24 +def logical_vec_shift : Operand { + let PrintMethod = "printShifter"; + let EncoderMethod = "getVecShifterOpValue"; + let ParserMatchClass = LogicalVecShifterOperand; } +// A logical vector half-word shifter operand: +// {7-6} - shift type: 00 = lsl +// {5-0} - imm6: #0 or #8 +def logical_vec_hw_shift : Operand { + let PrintMethod = "printShifter"; + let EncoderMethod = "getVecShifterOpValue"; + let ParserMatchClass = LogicalVecHalfWordShifterOperand; +} -// Format for load-store (unsigned immediate) instructions. -class A64I_LSunsigimm size, bit v, bits<2> opc, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn { - bits<12> UImm12; +// A vector move shifter operand: +// {0} - imm1: #8 or #16 +def move_vec_shift : Operand { + let PrintMethod = "printShifter"; + let EncoderMethod = "getMoveVecShifterOpValue"; + let ParserMatchClass = MoveVecShifterOperand; +} - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b01; - let Inst{23-22} = opc; - let Inst{21-10} = UImm12; +def AddSubImmOperand : AsmOperandClass { + let Name = "AddSubImm"; + let ParserMethod = "tryParseAddSubImm"; + let DiagnosticType = "AddSubSecondSource"; +} +// An ADD/SUB immediate shifter operand: +// second operand: +// {7-6} - shift type: 00 = lsl +// {5-0} - imm6: #0 or #12 +class addsub_shifted_imm + : Operand, ComplexPattern { + let PrintMethod = "printAddSubImm"; + let EncoderMethod = "getAddSubImmOpValue"; + let ParserMatchClass = AddSubImmOperand; + let MIOperandInfo = (ops i32imm, i32imm); } -// Format for load-store register (register offset) instructions. -class A64I_LSregoff size, bit v, bits<2> opc, bit optionlo, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn { - bits<5> Rm; +def addsub_shifted_imm32 : addsub_shifted_imm; +def addsub_shifted_imm64 : addsub_shifted_imm; - // Complex operand selection needed for these instructions, so they - // need an "addr" field for encoding/decoding to be generated. - bits<3> Ext; - // OptionHi = Ext{2-1} - // S = Ext{0} +class neg_addsub_shifted_imm + : Operand, ComplexPattern { + let PrintMethod = "printAddSubImm"; + let EncoderMethod = "getAddSubImmOpValue"; + let ParserMatchClass = AddSubImmOperand; + let MIOperandInfo = (ops i32imm, i32imm); +} - let Inst{31-30} = size; - let Inst{29-27} = 0b111; - let Inst{26} = v; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b1; - let Inst{20-16} = Rm; - let Inst{15-14} = Ext{2-1}; - let Inst{13} = optionlo; - let Inst{12} = Ext{0}; - let Inst{11-10} = 0b10; - // Inherits Rn in 9-5 - // Inherits Rt in 4-0 +def neg_addsub_shifted_imm32 : neg_addsub_shifted_imm; +def neg_addsub_shifted_imm64 : neg_addsub_shifted_imm; - let AddedComplexity = 50; +// An extend operand: +// {5-3} - extend type +// {2-0} - imm3 +def arith_extend : Operand { + let PrintMethod = "printArithExtend"; + let ParserMatchClass = ExtendOperand; +} +def arith_extend64 : Operand { + let PrintMethod = "printArithExtend"; + let ParserMatchClass = ExtendOperand64; } -// Format for Load-store register pair (offset) instructions -class A64I_LSPoffset opc, bit v, bit l, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtt2n { - bits<7> SImm7; - - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = v; - let Inst{25-23} = 0b010; - let Inst{22} = l; - let Inst{21-15} = SImm7; - // Inherit Rt2 in 14-10 - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 +// 'extend' that's a lsl of a 64-bit register. +def arith_extendlsl64 : Operand { + let PrintMethod = "printArithExtend"; + let ParserMatchClass = ExtendOperandLSL64; } -// Format for Load-store register pair (post-indexed) instructions -class A64I_LSPpostind opc, bit v, bit l, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtt2n { - bits<7> SImm7; +class arith_extended_reg32 : Operand, + ComplexPattern { + let PrintMethod = "printExtendedRegister"; + let MIOperandInfo = (ops GPR32, arith_extend); +} - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = v; - let Inst{25-23} = 0b001; - let Inst{22} = l; - let Inst{21-15} = SImm7; - // Inherit Rt2 in 14-10 - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 +class arith_extended_reg32to64 : Operand, + ComplexPattern { + let PrintMethod = "printExtendedRegister"; + let MIOperandInfo = (ops GPR32, arith_extend64); } -// Format for Load-store register pair (pre-indexed) instructions -class A64I_LSPpreind opc, bit v, bit l, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtt2n { - bits<7> SImm7; +// Floating-point immediate. +def fpimm32 : Operand, + PatLeaf<(f32 fpimm), [{ + return AArch64_AM::getFP32Imm(N->getValueAPF()) != -1; + }], SDNodeXFormgetValueAPF(); + uint32_t enc = AArch64_AM::getFP32Imm(InVal); + return CurDAG->getTargetConstant(enc, MVT::i32); + }]>> { + let ParserMatchClass = FPImmOperand; + let PrintMethod = "printFPImmOperand"; +} +def fpimm64 : Operand, + PatLeaf<(f64 fpimm), [{ + return AArch64_AM::getFP64Imm(N->getValueAPF()) != -1; + }], SDNodeXFormgetValueAPF(); + uint32_t enc = AArch64_AM::getFP64Imm(InVal); + return CurDAG->getTargetConstant(enc, MVT::i32); + }]>> { + let ParserMatchClass = FPImmOperand; + let PrintMethod = "printFPImmOperand"; +} - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = v; - let Inst{25-23} = 0b011; - let Inst{22} = l; - let Inst{21-15} = SImm7; - // Inherit Rt2 in 14-10 - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 +def fpimm8 : Operand { + let ParserMatchClass = FPImmOperand; + let PrintMethod = "printFPImmOperand"; } -// Format for Load-store non-temporal register pair (offset) instructions -class A64I_LSPnontemp opc, bit v, bit l, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtt2n { - bits<7> SImm7; +def fpimm0 : PatLeaf<(fpimm), [{ + return N->isExactlyValue(+0.0); +}]>; - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = v; - let Inst{25-23} = 0b000; - let Inst{22} = l; - let Inst{21-15} = SImm7; - // Inherit Rt2 in 14-10 - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format for Logical (immediate) instructions -class A64I_logicalimm opc, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - bit N; - bits<6> ImmR; - bits<6> ImmS; - - // N, ImmR and ImmS have no separate existence in any assembly syntax (or for - // selection), so we'll combine them into a single field here. - bits<13> Imm; - // N = Imm{12}; - // ImmR = Imm{11-6}; - // ImmS = Imm{5-0}; +// Vector lane operands +class AsmVectorIndex : AsmOperandClass { + let Name = "VectorIndex" # Suffix; + let DiagnosticType = "InvalidIndex" # Suffix; +} +def VectorIndex1Operand : AsmVectorIndex<"1">; +def VectorIndexBOperand : AsmVectorIndex<"B">; +def VectorIndexHOperand : AsmVectorIndex<"H">; +def VectorIndexSOperand : AsmVectorIndex<"S">; +def VectorIndexDOperand : AsmVectorIndex<"D">; + +def VectorIndex1 : Operand, ImmLeaf { + let ParserMatchClass = VectorIndex1Operand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i64imm); +} +def VectorIndexB : Operand, ImmLeaf { + let ParserMatchClass = VectorIndexBOperand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i64imm); +} +def VectorIndexH : Operand, ImmLeaf { + let ParserMatchClass = VectorIndexHOperand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i64imm); +} +def VectorIndexS : Operand, ImmLeaf { + let ParserMatchClass = VectorIndexSOperand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i64imm); +} +def VectorIndexD : Operand, ImmLeaf { + let ParserMatchClass = VectorIndexDOperand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i64imm); +} - let Inst{31} = sf; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100100; - let Inst{22} = Imm{12}; - let Inst{21-16} = Imm{11-6}; - let Inst{15-10} = Imm{5-0}; - // Rn inherited in 9-5 - // Rd inherited in 4-0 +// 8-bit immediate for AdvSIMD where 64-bit values of the form: +// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh +// are encoded as the eight bit value 'abcdefgh'. +def simdimmtype10 : Operand, + PatLeaf<(f64 fpimm), [{ + return AArch64_AM::isAdvSIMDModImmType10(N->getValueAPF() + .bitcastToAPInt() + .getZExtValue()); + }], SDNodeXFormgetValueAPF(); + uint32_t enc = AArch64_AM::encodeAdvSIMDModImmType10(N->getValueAPF() + .bitcastToAPInt() + .getZExtValue()); + return CurDAG->getTargetConstant(enc, MVT::i32); + }]>> { + let ParserMatchClass = SIMDImmType10Operand; + let PrintMethod = "printSIMDType10Operand"; } -// Format for Logical (shifted register) instructions -class A64I_logicalshift opc, bits<2> shift, bit N, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - bits<6> Imm6; - let Inst{31} = sf; - let Inst{30-29} = opc; - let Inst{28-24} = 0b01010; - let Inst{23-22} = shift; - let Inst{21} = N; - // Rm inherited - let Inst{15-10} = Imm6; - // Rn inherited - // Rd inherited -} - -// Format for Move wide (immediate) -class A64I_movw opc, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRd { - bits<16> UImm16; - bits<2> Shift; // Called "hw" officially +//--- +// System management +//--- - let Inst{31} = sf; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100101; - let Inst{22-21} = Shift; - let Inst{20-5} = UImm16; - // Inherits Rd in 4-0 +// Base encoding for system instruction operands. +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in +class BaseSystemI + : I { + let Inst{31-22} = 0b1101010100; + let Inst{21} = L; } -// Format for PC-relative addressing instructions, ADR and ADRP. -class A64I_PCADR patterns, InstrItinClass itin> - : A64InstRd { - bits<21> Label; - - let Inst{31} = op; - let Inst{30-29} = Label{1-0}; - let Inst{28-24} = 0b10000; - let Inst{23-5} = Label{20-2}; +// System instructions which do not have an Rt register. +class SimpleSystemI + : BaseSystemI { + let Inst{4-0} = 0b11111; } -// Format for system instructions -class A64I_system patterns, InstrItinClass itin> - : A64Inst { - bits<2> Op0; - bits<3> Op1; - bits<4> CRn; - bits<4> CRm; - bits<3> Op2; +// System instructions which have an Rt register. +class RtSystemI + : BaseSystemI, + Sched<[WriteSys]> { bits<5> Rt; + let Inst{4-0} = Rt; +} - let Inst{31-22} = 0b1101010100; - let Inst{21} = l; - let Inst{20-19} = Op0; - let Inst{18-16} = Op1; - let Inst{15-12} = CRn; +// Hint instructions that take both a CRm and a 3-bit immediate. +class HintI + : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#" $imm", "">, + Sched<[WriteHint]> { + bits <7> imm; + let Inst{20-12} = 0b000110010; + let Inst{11-5} = imm; +} + +// System instructions taking a single literal operand which encodes into +// CRm. op2 differentiates the opcodes. +def BarrierAsmOperand : AsmOperandClass { + let Name = "Barrier"; + let ParserMethod = "tryParseBarrierOperand"; +} +def barrier_op : Operand { + let PrintMethod = "printBarrierOption"; + let ParserMatchClass = BarrierAsmOperand; +} +class CRmSystemI opc, string asm> + : SimpleSystemI<0, (ins crmtype:$CRm), asm, "\t$CRm">, + Sched<[WriteBarrier]> { + bits<4> CRm; + let Inst{20-12} = 0b000110011; let Inst{11-8} = CRm; - let Inst{7-5} = Op2; - let Inst{4-0} = Rt; + let Inst{7-5} = opc; +} - // These instructions can do horrible things. - let hasSideEffects = 1; +// MRS/MSR system instructions. These have different operand classes because +// a different subset of registers can be accessed through each instruction. +def MRSSystemRegisterOperand : AsmOperandClass { + let Name = "MRSSystemRegister"; + let ParserMethod = "tryParseSysReg"; + let DiagnosticType = "MRS"; +} +// concatenation of 1, op0, op1, CRn, CRm, op2. 16-bit immediate. +def mrs_sysreg_op : Operand { + let ParserMatchClass = MRSSystemRegisterOperand; + let DecoderMethod = "DecodeMRSSystemRegister"; + let PrintMethod = "printMRSSystemRegister"; } -// Format for unconditional branch (immediate) instructions -class A64I_Bimm patterns, InstrItinClass itin> - : A64Inst { - // Doubly special in not even sharing register fields with other - // instructions, so we create our own Rn here. - bits<26> Label; +def MSRSystemRegisterOperand : AsmOperandClass { + let Name = "MSRSystemRegister"; + let ParserMethod = "tryParseSysReg"; + let DiagnosticType = "MSR"; +} +def msr_sysreg_op : Operand { + let ParserMatchClass = MSRSystemRegisterOperand; + let DecoderMethod = "DecodeMSRSystemRegister"; + let PrintMethod = "printMSRSystemRegister"; +} - let Inst{31} = op; - let Inst{30-26} = 0b00101; - let Inst{25-0} = Label; +class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg), + "mrs", "\t$Rt, $systemreg"> { + bits<15> systemreg; + let Inst{20} = 1; + let Inst{19-5} = systemreg; } -// Format for Test & branch (immediate) instructions -class A64I_TBimm patterns, InstrItinClass itin> - : A64InstRt { - // Doubly special in not even sharing register fields with other - // instructions, so we create our own Rn here. - bits<6> Imm; - bits<14> Label; +// FIXME: Some of these def NZCV, others don't. Best way to model that? +// Explicitly modeling each of the system register as a register class +// would do it, but feels like overkill at this point. +class MSRI : RtSystemI<0, (outs), (ins msr_sysreg_op:$systemreg, GPR64:$Rt), + "msr", "\t$systemreg, $Rt"> { + bits<15> systemreg; + let Inst{20} = 1; + let Inst{19-5} = systemreg; +} - let Inst{31} = Imm{5}; - let Inst{30-25} = 0b011011; - let Inst{24} = op; - let Inst{23-19} = Imm{4-0}; - let Inst{18-5} = Label; - // Inherit Rt in 4-0 +def SystemPStateFieldOperand : AsmOperandClass { + let Name = "SystemPStateField"; + let ParserMethod = "tryParseSysReg"; +} +def pstatefield_op : Operand { + let ParserMatchClass = SystemPStateFieldOperand; + let PrintMethod = "printSystemPStateField"; +} + +let Defs = [NZCV] in +class MSRpstateI + : SimpleSystemI<0, (ins pstatefield_op:$pstate_field, imm0_15:$imm), + "msr", "\t$pstate_field, $imm">, + Sched<[WriteSys]> { + bits<6> pstatefield; + bits<4> imm; + let Inst{20-19} = 0b00; + let Inst{18-16} = pstatefield{5-3}; + let Inst{15-12} = 0b0100; + let Inst{11-8} = imm; + let Inst{7-5} = pstatefield{2-0}; + + let DecoderMethod = "DecodeSystemPStateInstruction"; +} + +// SYS and SYSL generic system instructions. +def SysCRAsmOperand : AsmOperandClass { + let Name = "SysCR"; + let ParserMethod = "tryParseSysCROperand"; +} + +def sys_cr_op : Operand { + let PrintMethod = "printSysCROperand"; + let ParserMatchClass = SysCRAsmOperand; +} + +class SystemXtI + : RtSystemI { + bits<3> op1; + bits<4> Cn; + bits<4> Cm; + bits<3> op2; + let Inst{20-19} = 0b01; + let Inst{18-16} = op1; + let Inst{15-12} = Cn; + let Inst{11-8} = Cm; + let Inst{7-5} = op2; +} + +class SystemLXtI + : RtSystemI { + bits<3> op1; + bits<4> Cn; + bits<4> Cm; + bits<3> op2; + let Inst{20-19} = 0b01; + let Inst{18-16} = op1; + let Inst{15-12} = Cn; + let Inst{11-8} = Cm; + let Inst{7-5} = op2; } -// Format for Unconditional branch (register) instructions, including -// RET. Shares no fields with instructions further up the hierarchy -// so top-level. -class A64I_Breg opc, bits<5> op2, bits<6> op3, bits<5> op4, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64Inst { - // Doubly special in not even sharing register fields with other - // instructions, so we create our own Rn here. - bits<5> Rn; +// Branch (register) instructions: +// +// case opc of +// 0001 blr +// 0000 br +// 0101 dret +// 0100 eret +// 0010 ret +// otherwise UNDEFINED +class BaseBranchReg opc, dag oops, dag iops, string asm, + string operands, list pattern> + : I, Sched<[WriteBrReg]> { let Inst{31-25} = 0b1101011; let Inst{24-21} = opc; - let Inst{20-16} = op2; - let Inst{15-10} = op3; - let Inst{9-5} = Rn; - let Inst{4-0} = op4; + let Inst{20-16} = 0b11111; + let Inst{15-10} = 0b000000; + let Inst{4-0} = 0b00000; } +class BranchReg opc, string asm, list pattern> + : BaseBranchReg { + bits<5> Rn; + let Inst{9-5} = Rn; +} -//===----------------------------------------------------------------------===// -// -// Neon Instruction Format Definitions. -// +let mayLoad = 0, mayStore = 0, hasSideEffects = 1, isReturn = 1 in +class SpecialReturn opc, string asm> + : BaseBranchReg { + let Inst{9-5} = 0b11111; +} -let Predicates = [HasNEON] in { +//--- +// Conditional branch instruction. +//--- -class NeonInstAlias - : InstAlias { +// Condition code. +// 4-bit immediate. Pretty-printed as +def ccode : Operand { + let PrintMethod = "printCondCode"; + let ParserMatchClass = CondCode; +} +def inv_ccode : Operand { + let PrintMethod = "printInverseCondCode"; + let ParserMatchClass = CondCode; } -// Format AdvSIMD bitwise extract -class NeonI_BitExtract op2, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-24} = 0b101110; - let Inst{23-22} = op2; - let Inst{21} = 0b0; - // Inherit Rm in 20-16 - let Inst{15} = 0b0; - // imm4 in 14-11 - let Inst{10} = 0b0; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD perm -class NeonI_Perm size, bits<3> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-24} = 0b001110; - let Inst{23-22} = size; - let Inst{21} = 0b0; - // Inherit Rm in 20-16 - let Inst{15} = 0b0; - let Inst{14-12} = opcode; - let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 +// Conditional branch target. 19-bit immediate. The low two bits of the target +// offset are implied zero and so are not part of the immediate. +def PCRelLabel19Operand : AsmOperandClass { + let Name = "PCRelLabel19"; + let DiagnosticType = "InvalidLabel"; +} +def am_brcond : Operand { + let EncoderMethod = "getCondBranchTargetOpValue"; + let DecoderMethod = "DecodePCRelLabel19"; + let PrintMethod = "printAlignedLabel"; + let ParserMatchClass = PCRelLabel19Operand; } -// Format AdvSIMD table lookup -class NeonI_TBL op2, bits<2> len, bit op, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-24} = 0b001110; - let Inst{23-22} = op2; - let Inst{21} = 0b0; - // Inherit Rm in 20-16 - let Inst{15} = 0b0; - let Inst{14-13} = len; - let Inst{12} = op; - let Inst{11-10} = 0b00; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD 3 vector registers with same vector type -class NeonI_3VSame size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; +class BranchCond : I<(outs), (ins ccode:$cond, am_brcond:$target), + "b", ".$cond\t$target", "", + [(AArch64brcond bb:$target, imm:$cond, NZCV)]>, + Sched<[WriteBr]> { + let isBranch = 1; + let isTerminator = 1; + let Uses = [NZCV]; + + bits<4> cond; + bits<19> target; + let Inst{31-24} = 0b01010100; + let Inst{23-5} = target; + let Inst{4} = 0; + let Inst{3-0} = cond; +} + +//--- +// Compare-and-branch instructions. +//--- +class BaseCmpBranch + : I<(outs), (ins regtype:$Rt, am_brcond:$target), + asm, "\t$Rt, $target", "", + [(node regtype:$Rt, bb:$target)]>, + Sched<[WriteBr]> { + let isBranch = 1; + let isTerminator = 1; + + bits<5> Rt; + bits<19> target; + let Inst{30-25} = 0b011010; + let Inst{24} = op; + let Inst{23-5} = target; + let Inst{4-0} = Rt; +} + +multiclass CmpBranch { + def W : BaseCmpBranch { + let Inst{31} = 0; + } + def X : BaseCmpBranch { + let Inst{31} = 1; + } +} + +//--- +// Test-bit-and-branch instructions. +//--- +// Test-and-branch target. 14-bit sign-extended immediate. The low two bits of +// the target offset are implied zero and so are not part of the immediate. +def BranchTarget14Operand : AsmOperandClass { + let Name = "BranchTarget14"; +} +def am_tbrcond : Operand { + let EncoderMethod = "getTestBranchTargetOpValue"; + let PrintMethod = "printAlignedLabel"; + let ParserMatchClass = BranchTarget14Operand; +} + +// AsmOperand classes to emit (or not) special diagnostics +def TBZImm0_31Operand : AsmOperandClass { + let Name = "TBZImm0_31"; + let PredicateMethod = "isImm0_31"; + let RenderMethod = "addImm0_31Operands"; +} +def TBZImm32_63Operand : AsmOperandClass { + let Name = "Imm32_63"; + let DiagnosticType = "InvalidImm0_63"; +} + +class tbz_imm0_31 : Operand, ImmLeaf { + let ParserMatchClass = matcher; +} + +def tbz_imm0_31_diag : tbz_imm0_31; +def tbz_imm0_31_nodiag : tbz_imm0_31; + +def tbz_imm32_63 : Operand, ImmLeaf 31) && (((uint32_t)Imm) < 64); +}]> { + let ParserMatchClass = TBZImm32_63Operand; +} + +class BaseTestBranch + : I<(outs), (ins regtype:$Rt, immtype:$bit_off, am_tbrcond:$target), + asm, "\t$Rt, $bit_off, $target", "", + [(node regtype:$Rt, immtype:$bit_off, bb:$target)]>, + Sched<[WriteBr]> { + let isBranch = 1; + let isTerminator = 1; + + bits<5> Rt; + bits<6> bit_off; + bits<14> target; + + let Inst{30-25} = 0b011011; + let Inst{24} = op; + let Inst{23-19} = bit_off{4-0}; + let Inst{18-5} = target; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeTestAndBranch"; +} + +multiclass TestBranch { + def W : BaseTestBranch { + let Inst{31} = 0; + } + + def X : BaseTestBranch { + let Inst{31} = 1; + } + + // Alias X-reg with 0-31 imm to W-Reg. + def : InstAlias(NAME#"W") GPR32as64:$Rd, + tbz_imm0_31_nodiag:$imm, am_tbrcond:$target), 0>; + def : Pat<(node GPR64:$Rn, tbz_imm0_31_diag:$imm, bb:$target), + (!cast(NAME#"W") (EXTRACT_SUBREG GPR64:$Rn, sub_32), + tbz_imm0_31_diag:$imm, bb:$target)>; +} + +//--- +// Unconditional branch (immediate) instructions. +//--- +def BranchTarget26Operand : AsmOperandClass { + let Name = "BranchTarget26"; + let DiagnosticType = "InvalidLabel"; +} +def am_b_target : Operand { + let EncoderMethod = "getBranchTargetOpValue"; + let PrintMethod = "printAlignedLabel"; + let ParserMatchClass = BranchTarget26Operand; +} +def am_bl_target : Operand { + let EncoderMethod = "getBranchTargetOpValue"; + let PrintMethod = "printAlignedLabel"; + let ParserMatchClass = BranchTarget26Operand; +} + +class BImm pattern> + : I<(outs), iops, asm, "\t$addr", "", pattern>, Sched<[WriteBr]> { + bits<26> addr; + let Inst{31} = op; + let Inst{30-26} = 0b00101; + let Inst{25-0} = addr; + + let DecoderMethod = "DecodeUnconditionalBranch"; +} + +class BranchImm pattern> + : BImm; +class CallImm pattern> + : BImm; + +//--- +// Basic one-operand data processing instructions. +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseOneOperandData opc, RegisterClass regtype, string asm, + SDPatternOperator node> + : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "", + [(set regtype:$Rd, (node regtype:$Rn))]>, + Sched<[WriteI, ReadI]> { + bits<5> Rd; + bits<5> Rn; + + let Inst{30-13} = 0b101101011000000000; + let Inst{12-10} = opc; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +multiclass OneOperandData opc, string asm, + SDPatternOperator node = null_frag> { + def Wr : BaseOneOperandData { + let Inst{31} = 0; + } + + def Xr : BaseOneOperandData { + let Inst{31} = 1; + } +} + +class OneWRegData opc, string asm, SDPatternOperator node> + : BaseOneOperandData { + let Inst{31} = 0; +} + +class OneXRegData opc, string asm, SDPatternOperator node> + : BaseOneOperandData { + let Inst{31} = 1; +} + +//--- +// Basic two-operand data processing instructions. +//--- +class BaseBaseAddSubCarry pattern> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", pattern>, + Sched<[WriteI, ReadI, ReadI]> { + let Uses = [NZCV]; + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{30} = isSub; + let Inst{28-21} = 0b11010000; + let Inst{20-16} = Rm; + let Inst{15-10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class BaseAddSubCarry + : BaseBaseAddSubCarry; + +class BaseAddSubCarrySetFlags + : BaseBaseAddSubCarry { + let Defs = [NZCV]; +} + +multiclass AddSubCarry { + def Wr : BaseAddSubCarry { + let Inst{31} = 0; + let Inst{29} = 0; + } + def Xr : BaseAddSubCarry { + let Inst{31} = 1; + let Inst{29} = 0; + } + + // Sets flags. + def SWr : BaseAddSubCarrySetFlags { + let Inst{31} = 0; + let Inst{29} = 1; + } + def SXr : BaseAddSubCarrySetFlags { + let Inst{31} = 1; + let Inst{29} = 1; + } +} + +class BaseTwoOperand opc, RegisterClass regtype, string asm, + SDPatternOperator OpNode> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", + [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{30-21} = 0b0011010110; + let Inst{20-16} = Rm; + let Inst{15-14} = 0b00; + let Inst{13-10} = opc; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class BaseDiv + : BaseTwoOperand<{0,0,1,?}, regtype, asm, OpNode> { + let Inst{10} = isSigned; +} + +multiclass Div { + def Wr : BaseDiv, + Sched<[WriteID32, ReadID, ReadID]> { + let Inst{31} = 0; + } + def Xr : BaseDiv, + Sched<[WriteID64, ReadID, ReadID]> { + let Inst{31} = 1; + } +} + +class BaseShift shift_type, RegisterClass regtype, string asm, + SDPatternOperator OpNode = null_frag> + : BaseTwoOperand<{1,0,?,?}, regtype, asm, OpNode>, + Sched<[WriteIS, ReadI]> { + let Inst{11-10} = shift_type; +} + +multiclass Shift shift_type, string asm, SDNode OpNode> { + def Wr : BaseShift { + let Inst{31} = 0; + } + + def Xr : BaseShift { + let Inst{31} = 1; + } + + def : Pat<(i32 (OpNode GPR32:$Rn, i64:$Rm)), + (!cast(NAME # "Wr") GPR32:$Rn, + (EXTRACT_SUBREG i64:$Rm, sub_32))>; + + def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (zext GPR32:$Rm)))), + (!cast(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; + + def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (anyext GPR32:$Rm)))), + (!cast(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; + + def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (sext GPR32:$Rm)))), + (!cast(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; +} + +class ShiftAlias + : InstAlias; + +class BaseMulAccum opc, RegisterClass multype, + RegisterClass addtype, string asm, + list pattern> + : I<(outs addtype:$Rd), (ins multype:$Rn, multype:$Rm, addtype:$Ra), + asm, "\t$Rd, $Rn, $Rm, $Ra", "", pattern> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<5> Ra; + let Inst{30-24} = 0b0011011; + let Inst{23-21} = opc; + let Inst{20-16} = Rm; + let Inst{15} = isSub; + let Inst{14-10} = Ra; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass MulAccum { + def Wrrr : BaseMulAccum, + Sched<[WriteIM32, ReadIMA, ReadIM, ReadIM]> { + let Inst{31} = 0; + } + + def Xrrr : BaseMulAccum, + Sched<[WriteIM64, ReadIMA, ReadIM, ReadIM]> { + let Inst{31} = 1; + } +} + +class WideMulAccum opc, string asm, + SDNode AccNode, SDNode ExtNode> + : BaseMulAccum, + Sched<[WriteIM32, ReadIMA, ReadIM, ReadIM]> { + let Inst{31} = 1; +} + +class MulHi opc, string asm, SDNode OpNode> + : I<(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", + [(set GPR64:$Rd, (OpNode GPR64:$Rn, GPR64:$Rm))]>, + Sched<[WriteIM64, ReadIM, ReadIM]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-24} = 0b10011011; + let Inst{23-21} = opc; + let Inst{20-16} = Rm; + let Inst{15} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + // The Ra field of SMULH and UMULH is unused: it should be assembled as 31 + // (i.e. all bits 1) but is ignored by the processor. + let PostEncoderMethod = "fixMulHigh"; +} + +class MulAccumWAlias + : InstAlias; +class MulAccumXAlias + : InstAlias; +class WideMulAccumAlias + : InstAlias; + +class BaseCRC32 sz, bit C, RegisterClass StreamReg, + SDPatternOperator OpNode, string asm> + : I<(outs GPR32:$Rd), (ins GPR32:$Rn, StreamReg:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", + [(set GPR32:$Rd, (OpNode GPR32:$Rn, StreamReg:$Rm))]>, + Sched<[WriteISReg, ReadI, ReadISReg]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + + let Inst{31} = sf; + let Inst{30-21} = 0b0011010110; + let Inst{20-16} = Rm; + let Inst{15-13} = 0b010; + let Inst{12} = C; + let Inst{11-10} = sz; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + let Predicates = [HasCRC]; +} + +//--- +// Address generation. +//--- + +class ADRI pattern> + : I<(outs GPR64:$Xd), (ins adr:$label), asm, "\t$Xd, $label", "", + pattern>, + Sched<[WriteI]> { + bits<5> Xd; + bits<21> label; + let Inst{31} = page; + let Inst{30-29} = label{1-0}; + let Inst{28-24} = 0b10000; + let Inst{23-5} = label{20-2}; + let Inst{4-0} = Xd; + + let DecoderMethod = "DecodeAdrInstruction"; +} + +//--- +// Move immediate. +//--- + +def movimm32_imm : Operand { + let ParserMatchClass = Imm0_65535Operand; + let EncoderMethod = "getMoveWideImmOpValue"; + let PrintMethod = "printHexImm"; +} +def movimm32_shift : Operand { + let PrintMethod = "printShifter"; + let ParserMatchClass = MovImm32ShifterOperand; +} +def movimm64_shift : Operand { + let PrintMethod = "printShifter"; + let ParserMatchClass = MovImm64ShifterOperand; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseMoveImmediate opc, RegisterClass regtype, Operand shifter, + string asm> + : I<(outs regtype:$Rd), (ins movimm32_imm:$imm, shifter:$shift), + asm, "\t$Rd, $imm$shift", "", []>, + Sched<[WriteImm]> { + bits<5> Rd; + bits<16> imm; + bits<6> shift; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100101; + let Inst{22-21} = shift{5-4}; + let Inst{20-5} = imm; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeMoveImmInstruction"; +} + +multiclass MoveImmediate opc, string asm> { + def Wi : BaseMoveImmediate { + let Inst{31} = 0; + } + + def Xi : BaseMoveImmediate { + let Inst{31} = 1; + } +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseInsertImmediate opc, RegisterClass regtype, Operand shifter, + string asm> + : I<(outs regtype:$Rd), + (ins regtype:$src, movimm32_imm:$imm, shifter:$shift), + asm, "\t$Rd, $imm$shift", "$src = $Rd", []>, + Sched<[WriteI, ReadI]> { + bits<5> Rd; + bits<16> imm; + bits<6> shift; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100101; + let Inst{22-21} = shift{5-4}; + let Inst{20-5} = imm; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeMoveImmInstruction"; +} + +multiclass InsertImmediate opc, string asm> { + def Wi : BaseInsertImmediate { + let Inst{31} = 0; + } + + def Xi : BaseInsertImmediate { + let Inst{31} = 1; + } +} + +//--- +// Add/Subtract +//--- + +class BaseAddSubImm + : I<(outs dstRegtype:$Rd), (ins srcRegtype:$Rn, immtype:$imm), + asm, "\t$Rd, $Rn, $imm", "", + [(set dstRegtype:$Rd, (OpNode srcRegtype:$Rn, immtype:$imm))]>, + Sched<[WriteI, ReadI]> { + bits<5> Rd; + bits<5> Rn; + bits<14> imm; + let Inst{30} = isSub; + let Inst{29} = setFlags; + let Inst{28-24} = 0b10001; + let Inst{23-22} = imm{13-12}; // '00' => lsl #0, '01' => lsl #12 + let Inst{21-10} = imm{11-0}; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + let DecoderMethod = "DecodeBaseAddSubImm"; +} + +class BaseAddSubRegPseudo + : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), + [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>, + Sched<[WriteI, ReadI, ReadI]>; + +class BaseAddSubSReg + : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", + [(set regtype:$Rd, (OpNode regtype:$Rn, shifted_regtype:$Rm))]>, + Sched<[WriteISReg, ReadI, ReadISReg]> { + // The operands are in order to match the 'addr' MI operands, so we + // don't need an encoder method and by-name matching. Just use the default + // in-order handling. Since we're using by-order, make sure the names + // do not match. + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<8> shift; + let Inst{30} = isSub; + let Inst{29} = setFlags; + let Inst{28-24} = 0b01011; + let Inst{23-22} = shift{7-6}; + let Inst{21} = 0; + let Inst{20-16} = src2; + let Inst{15-10} = shift{5-0}; + let Inst{9-5} = src1; + let Inst{4-0} = dst; + + let DecoderMethod = "DecodeThreeAddrSRegInstruction"; +} + +class BaseAddSubEReg + : I<(outs dstRegtype:$R1), + (ins src1Regtype:$R2, src2Regtype:$R3), + asm, "\t$R1, $R2, $R3", "", + [(set dstRegtype:$R1, (OpNode src1Regtype:$R2, src2Regtype:$R3))]>, + Sched<[WriteIEReg, ReadI, ReadIEReg]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<6> ext; + let Inst{30} = isSub; + let Inst{29} = setFlags; + let Inst{28-24} = 0b01011; + let Inst{23-21} = 0b001; + let Inst{20-16} = Rm; + let Inst{15-13} = ext{5-3}; + let Inst{12-10} = ext{2-0}; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeAddSubERegInstruction"; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseAddSubEReg64 + : I<(outs dstRegtype:$Rd), + (ins src1Regtype:$Rn, src2Regtype:$Rm, ext_op:$ext), + asm, "\t$Rd, $Rn, $Rm$ext", "", []>, + Sched<[WriteIEReg, ReadI, ReadIEReg]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<6> ext; + let Inst{30} = isSub; + let Inst{29} = setFlags; + let Inst{28-24} = 0b01011; + let Inst{23-21} = 0b001; + let Inst{20-16} = Rm; + let Inst{15} = ext{5}; + let Inst{12-10} = ext{2-0}; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeAddSubERegInstruction"; +} + +// Aliases for register+register add/subtract. +class AddSubRegAlias + : InstAlias; + +multiclass AddSub { + let hasSideEffects = 0 in { + // Add/Subtract immediate + def Wri : BaseAddSubImm { + let Inst{31} = 0; + } + def Xri : BaseAddSubImm { + let Inst{31} = 1; + } + + // Add/Subtract register - Only used for CodeGen + def Wrr : BaseAddSubRegPseudo; + def Xrr : BaseAddSubRegPseudo; + + // Add/Subtract shifted register + def Wrs : BaseAddSubSReg { + let Inst{31} = 0; + } + def Xrs : BaseAddSubSReg { + let Inst{31} = 1; + } + } + + // Add/Subtract extended register + let AddedComplexity = 1, hasSideEffects = 0 in { + def Wrx : BaseAddSubEReg, mnemonic, OpNode> { + let Inst{31} = 0; + } + def Xrx : BaseAddSubEReg, mnemonic, OpNode> { + let Inst{31} = 1; + } + } + + def Xrx64 : BaseAddSubEReg64 { + // UXTX and SXTX only. + let Inst{14-13} = 0b11; + let Inst{31} = 1; + } + + // Register/register aliases with no shift when SP is not used. + def : AddSubRegAlias(NAME#"Wrs"), + GPR32, GPR32, GPR32, 0>; + def : AddSubRegAlias(NAME#"Xrs"), + GPR64, GPR64, GPR64, 0>; + + // Register/register aliases with no shift when either the destination or + // first source register is SP. + def : AddSubRegAlias(NAME#"Wrx"), + GPR32sponly, GPR32sp, GPR32, 16>; // UXTW #0 + def : AddSubRegAlias(NAME#"Wrx"), + GPR32sp, GPR32sponly, GPR32, 16>; // UXTW #0 + def : AddSubRegAlias(NAME#"Xrx64"), + GPR64sponly, GPR64sp, GPR64, 24>; // UXTX #0 + def : AddSubRegAlias(NAME#"Xrx64"), + GPR64sp, GPR64sponly, GPR64, 24>; // UXTX #0 +} + +multiclass AddSubS { + let isCompare = 1, Defs = [NZCV] in { + // Add/Subtract immediate + def Wri : BaseAddSubImm { + let Inst{31} = 0; + } + def Xri : BaseAddSubImm { + let Inst{31} = 1; + } + + // Add/Subtract register + def Wrr : BaseAddSubRegPseudo; + def Xrr : BaseAddSubRegPseudo; + + // Add/Subtract shifted register + def Wrs : BaseAddSubSReg { + let Inst{31} = 0; + } + def Xrs : BaseAddSubSReg { + let Inst{31} = 1; + } + + // Add/Subtract extended register + let AddedComplexity = 1 in { + def Wrx : BaseAddSubEReg, mnemonic, OpNode> { + let Inst{31} = 0; + } + def Xrx : BaseAddSubEReg, mnemonic, OpNode> { + let Inst{31} = 1; + } + } + + def Xrx64 : BaseAddSubEReg64 { + // UXTX and SXTX only. + let Inst{14-13} = 0b11; + let Inst{31} = 1; + } + } // Defs = [NZCV] + + // Compare aliases + def : InstAlias(NAME#"Wri") + WZR, GPR32sp:$src, addsub_shifted_imm32:$imm), 5>; + def : InstAlias(NAME#"Xri") + XZR, GPR64sp:$src, addsub_shifted_imm64:$imm), 5>; + def : InstAlias(NAME#"Wrx") + WZR, GPR32sp:$src1, GPR32:$src2, arith_extend:$sh), 4>; + def : InstAlias(NAME#"Xrx") + XZR, GPR64sp:$src1, GPR32:$src2, arith_extend:$sh), 4>; + def : InstAlias(NAME#"Xrx64") + XZR, GPR64sp:$src1, GPR64:$src2, arith_extendlsl64:$sh), 4>; + def : InstAlias(NAME#"Wrs") + WZR, GPR32:$src1, GPR32:$src2, arith_shift32:$sh), 4>; + def : InstAlias(NAME#"Xrs") + XZR, GPR64:$src1, GPR64:$src2, arith_shift64:$sh), 4>; + + // Compare shorthands + def : InstAlias(NAME#"Wrs") + WZR, GPR32:$src1, GPR32:$src2, 0), 5>; + def : InstAlias(NAME#"Xrs") + XZR, GPR64:$src1, GPR64:$src2, 0), 5>; + + // Register/register aliases with no shift when SP is not used. + def : AddSubRegAlias(NAME#"Wrs"), + GPR32, GPR32, GPR32, 0>; + def : AddSubRegAlias(NAME#"Xrs"), + GPR64, GPR64, GPR64, 0>; + + // Register/register aliases with no shift when the first source register + // is SP. + def : AddSubRegAlias(NAME#"Wrx"), + GPR32, GPR32sponly, GPR32, 16>; // UXTW #0 + def : AddSubRegAlias(NAME#"Xrx64"), + GPR64, GPR64sponly, GPR64, 24>; // UXTX #0 +} + +//--- +// Extract +//--- +def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisPtrTy<3>]>; +def AArch64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>; + +class BaseExtractImm patterns> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, imm_type:$imm), + asm, "\t$Rd, $Rn, $Rm, $imm", "", patterns>, + Sched<[WriteExtr, ReadExtrHi]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<6> imm; + + let Inst{30-23} = 0b00100111; + let Inst{21} = 0; + let Inst{20-16} = Rm; + let Inst{15-10} = imm; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass ExtractImm { + def Wrri : BaseExtractImm { + let Inst{31} = 0; + let Inst{22} = 0; + // imm<5> must be zero. + let imm{5} = 0; + } + def Xrri : BaseExtractImm { + + let Inst{31} = 1; + let Inst{22} = 1; + } +} + +//--- +// Bitfield +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseBitfieldImm opc, + RegisterClass regtype, Operand imm_type, string asm> + : I<(outs regtype:$Rd), (ins regtype:$Rn, imm_type:$immr, imm_type:$imms), + asm, "\t$Rd, $Rn, $immr, $imms", "", []>, + Sched<[WriteIS, ReadI]> { + bits<5> Rd; + bits<5> Rn; + bits<6> immr; + bits<6> imms; + + let Inst{30-29} = opc; + let Inst{28-23} = 0b100110; + let Inst{21-16} = immr; + let Inst{15-10} = imms; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass BitfieldImm opc, string asm> { + def Wri : BaseBitfieldImm { + let Inst{31} = 0; + let Inst{22} = 0; + // imms<5> and immr<5> must be zero, else ReservedValue(). + let Inst{21} = 0; + let Inst{15} = 0; + } + def Xri : BaseBitfieldImm { + let Inst{31} = 1; + let Inst{22} = 1; + } +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseBitfieldImmWith2RegArgs opc, + RegisterClass regtype, Operand imm_type, string asm> + : I<(outs regtype:$Rd), (ins regtype:$src, regtype:$Rn, imm_type:$immr, + imm_type:$imms), + asm, "\t$Rd, $Rn, $immr, $imms", "$src = $Rd", []>, + Sched<[WriteIS, ReadI]> { + bits<5> Rd; + bits<5> Rn; + bits<6> immr; + bits<6> imms; + + let Inst{30-29} = opc; + let Inst{28-23} = 0b100110; + let Inst{21-16} = immr; + let Inst{15-10} = imms; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass BitfieldImmWith2RegArgs opc, string asm> { + def Wri : BaseBitfieldImmWith2RegArgs { + let Inst{31} = 0; + let Inst{22} = 0; + // imms<5> and immr<5> must be zero, else ReservedValue(). + let Inst{21} = 0; + let Inst{15} = 0; + } + def Xri : BaseBitfieldImmWith2RegArgs { + let Inst{31} = 1; + let Inst{22} = 1; + } +} + +//--- +// Logical +//--- + +// Logical (immediate) +class BaseLogicalImm opc, RegisterClass dregtype, + RegisterClass sregtype, Operand imm_type, string asm, + list pattern> + : I<(outs dregtype:$Rd), (ins sregtype:$Rn, imm_type:$imm), + asm, "\t$Rd, $Rn, $imm", "", pattern>, + Sched<[WriteI, ReadI]> { + bits<5> Rd; + bits<5> Rn; + bits<13> imm; + let Inst{30-29} = opc; + let Inst{28-23} = 0b100100; + let Inst{22} = imm{12}; + let Inst{21-16} = imm{11-6}; + let Inst{15-10} = imm{5-0}; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeLogicalImmInstruction"; +} + +// Logical (shifted register) +class BaseLogicalSReg opc, bit N, RegisterClass regtype, + logical_shifted_reg shifted_regtype, string asm, + list pattern> + : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", pattern>, + Sched<[WriteISReg, ReadI, ReadISReg]> { + // The operands are in order to match the 'addr' MI operands, so we + // don't need an encoder method and by-name matching. Just use the default + // in-order handling. Since we're using by-order, make sure the names + // do not match. + bits<5> dst; + bits<5> src1; + bits<5> src2; + bits<8> shift; + let Inst{30-29} = opc; + let Inst{28-24} = 0b01010; + let Inst{23-22} = shift{7-6}; + let Inst{21} = N; + let Inst{20-16} = src2; + let Inst{15-10} = shift{5-0}; + let Inst{9-5} = src1; + let Inst{4-0} = dst; + + let DecoderMethod = "DecodeThreeAddrSRegInstruction"; +} + +// Aliases for register+register logical instructions. +class LogicalRegAlias + : InstAlias; + +let AddedComplexity = 6 in +multiclass LogicalImm opc, string mnemonic, SDNode OpNode> { + def Wri : BaseLogicalImm { + let Inst{31} = 0; + let Inst{22} = 0; // 64-bit version has an additional bit of immediate. + } + def Xri : BaseLogicalImm { + let Inst{31} = 1; + } +} + +multiclass LogicalImmS opc, string mnemonic, SDNode OpNode> { + let isCompare = 1, Defs = [NZCV] in { + def Wri : BaseLogicalImm { + let Inst{31} = 0; + let Inst{22} = 0; // 64-bit version has an additional bit of immediate. + } + def Xri : BaseLogicalImm { + let Inst{31} = 1; + } + } // end Defs = [NZCV] +} + +class BaseLogicalRegPseudo + : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), + [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>, + Sched<[WriteI, ReadI, ReadI]>; + +// Split from LogicalImm as not all instructions have both. +multiclass LogicalReg opc, bit N, string mnemonic, + SDPatternOperator OpNode> { + def Wrr : BaseLogicalRegPseudo; + def Xrr : BaseLogicalRegPseudo; + + def Wrs : BaseLogicalSReg { + let Inst{31} = 0; + } + def Xrs : BaseLogicalSReg { + let Inst{31} = 1; + } + + def : LogicalRegAlias(NAME#"Wrs"), GPR32>; + def : LogicalRegAlias(NAME#"Xrs"), GPR64>; +} + +// Split from LogicalReg to allow setting NZCV Defs +multiclass LogicalRegS opc, bit N, string mnemonic, + SDPatternOperator OpNode = null_frag> { + let Defs = [NZCV], mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def Wrr : BaseLogicalRegPseudo; + def Xrr : BaseLogicalRegPseudo; + + def Wrs : BaseLogicalSReg { + let Inst{31} = 0; + } + def Xrs : BaseLogicalSReg { + let Inst{31} = 1; + } + } // Defs = [NZCV] + + def : LogicalRegAlias(NAME#"Wrs"), GPR32>; + def : LogicalRegAlias(NAME#"Xrs"), GPR64>; +} + +//--- +// Conditionally set flags +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseCondSetFlagsImm + : I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond), + asm, "\t$Rn, $imm, $nzcv, $cond", "", []>, + Sched<[WriteI, ReadI]> { + let Uses = [NZCV]; + let Defs = [NZCV]; + + bits<5> Rn; + bits<5> imm; + bits<4> nzcv; + bits<4> cond; + + let Inst{30} = op; + let Inst{29-21} = 0b111010010; + let Inst{20-16} = imm; + let Inst{15-12} = cond; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = nzcv; +} + +multiclass CondSetFlagsImm { + def Wi : BaseCondSetFlagsImm { + let Inst{31} = 0; + } + def Xi : BaseCondSetFlagsImm { + let Inst{31} = 1; + } +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseCondSetFlagsReg + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), + asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, + Sched<[WriteI, ReadI, ReadI]> { + let Uses = [NZCV]; + let Defs = [NZCV]; + + bits<5> Rn; + bits<5> Rm; + bits<4> nzcv; + bits<4> cond; + + let Inst{30} = op; + let Inst{29-21} = 0b111010010; + let Inst{20-16} = Rm; + let Inst{15-12} = cond; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4} = 0b0; + let Inst{3-0} = nzcv; +} + +multiclass CondSetFlagsReg { + def Wr : BaseCondSetFlagsReg { + let Inst{31} = 0; + } + def Xr : BaseCondSetFlagsReg { + let Inst{31} = 1; + } +} + +//--- +// Conditional select +//--- + +class BaseCondSelect op2, RegisterClass regtype, string asm> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), + asm, "\t$Rd, $Rn, $Rm, $cond", "", + [(set regtype:$Rd, + (AArch64csel regtype:$Rn, regtype:$Rm, (i32 imm:$cond), NZCV))]>, + Sched<[WriteI, ReadI, ReadI]> { + let Uses = [NZCV]; + + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<4> cond; + + let Inst{30} = op; + let Inst{29-21} = 0b011010100; + let Inst{20-16} = Rm; + let Inst{15-12} = cond; + let Inst{11-10} = op2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass CondSelect op2, string asm> { + def Wr : BaseCondSelect { + let Inst{31} = 0; + } + def Xr : BaseCondSelect { + let Inst{31} = 1; + } +} + +class BaseCondSelectOp op2, RegisterClass regtype, string asm, + PatFrag frag> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), + asm, "\t$Rd, $Rn, $Rm, $cond", "", + [(set regtype:$Rd, + (AArch64csel regtype:$Rn, (frag regtype:$Rm), + (i32 imm:$cond), NZCV))]>, + Sched<[WriteI, ReadI, ReadI]> { + let Uses = [NZCV]; + + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<4> cond; + + let Inst{30} = op; + let Inst{29-21} = 0b011010100; + let Inst{20-16} = Rm; + let Inst{15-12} = cond; + let Inst{11-10} = op2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +def inv_cond_XFORM : SDNodeXForm(N->getZExtValue()); + return CurDAG->getTargetConstant(AArch64CC::getInvertedCondCode(CC), MVT::i32); +}]>; + +multiclass CondSelectOp op2, string asm, PatFrag frag> { + def Wr : BaseCondSelectOp { + let Inst{31} = 0; + } + def Xr : BaseCondSelectOp { + let Inst{31} = 1; + } + + def : Pat<(AArch64csel (frag GPR32:$Rm), GPR32:$Rn, (i32 imm:$cond), NZCV), + (!cast(NAME # Wr) GPR32:$Rn, GPR32:$Rm, + (inv_cond_XFORM imm:$cond))>; + + def : Pat<(AArch64csel (frag GPR64:$Rm), GPR64:$Rn, (i32 imm:$cond), NZCV), + (!cast(NAME # Xr) GPR64:$Rn, GPR64:$Rm, + (inv_cond_XFORM imm:$cond))>; +} + +//--- +// Special Mask Value +//--- +def maski8_or_more : Operand, + ImmLeaf { +} +def maski16_or_more : Operand, + ImmLeaf { +} + + +//--- +// Load/store +//--- + +// (unsigned immediate) +// Indexed for 8-bit registers. offset is in range [0,4095]. +def am_indexed8 : ComplexPattern; +def am_indexed16 : ComplexPattern; +def am_indexed32 : ComplexPattern; +def am_indexed64 : ComplexPattern; +def am_indexed128 : ComplexPattern; + +class UImm12OffsetOperand : AsmOperandClass { + let Name = "UImm12Offset" # Scale; + let RenderMethod = "addUImm12OffsetOperands<" # Scale # ">"; + let PredicateMethod = "isUImm12Offset<" # Scale # ">"; + let DiagnosticType = "InvalidMemoryIndexed" # Scale; +} + +def UImm12OffsetScale1Operand : UImm12OffsetOperand<1>; +def UImm12OffsetScale2Operand : UImm12OffsetOperand<2>; +def UImm12OffsetScale4Operand : UImm12OffsetOperand<4>; +def UImm12OffsetScale8Operand : UImm12OffsetOperand<8>; +def UImm12OffsetScale16Operand : UImm12OffsetOperand<16>; + +class uimm12_scaled : Operand { + let ParserMatchClass + = !cast("UImm12OffsetScale" # Scale # "Operand"); + let EncoderMethod + = "getLdStUImm12OpValue"; + let PrintMethod = "printUImm12Offset<" # Scale # ">"; +} + +def uimm12s1 : uimm12_scaled<1>; +def uimm12s2 : uimm12_scaled<2>; +def uimm12s4 : uimm12_scaled<4>; +def uimm12s8 : uimm12_scaled<8>; +def uimm12s16 : uimm12_scaled<16>; + +class BaseLoadStoreUI sz, bit V, bits<2> opc, dag oops, dag iops, + string asm, list pattern> + : I { + bits<5> Rt; + + bits<5> Rn; + bits<12> offset; + + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b01; + let Inst{23-22} = opc; + let Inst{21-10} = offset; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeUnsignedLdStInstruction"; +} + +multiclass LoadUI sz, bit V, bits<2> opc, RegisterClass regtype, + Operand indextype, string asm, list pattern> { + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def ui : BaseLoadStoreUI, + Sched<[WriteLD]>; + + def : InstAlias(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass StoreUI sz, bit V, bits<2> opc, RegisterClass regtype, + Operand indextype, string asm, list pattern> { + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def ui : BaseLoadStoreUI, + Sched<[WriteST]>; + + def : InstAlias(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +def PrefetchOperand : AsmOperandClass { + let Name = "Prefetch"; + let ParserMethod = "tryParsePrefetch"; +} +def prfop : Operand { + let PrintMethod = "printPrefetchOp"; + let ParserMatchClass = PrefetchOperand; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in +class PrefetchUI sz, bit V, bits<2> opc, string asm, list pat> + : BaseLoadStoreUI, + Sched<[WriteLD]>; + +//--- +// Load literal +//--- + +// Load literal address: 19-bit immediate. The low two bits of the target +// offset are implied zero and so are not part of the immediate. +def am_ldrlit : Operand { + let EncoderMethod = "getLoadLiteralOpValue"; + let DecoderMethod = "DecodePCRelLabel19"; + let PrintMethod = "printAlignedLabel"; + let ParserMatchClass = PCRelLabel19Operand; +} + +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +class LoadLiteral opc, bit V, RegisterClass regtype, string asm> + : I<(outs regtype:$Rt), (ins am_ldrlit:$label), + asm, "\t$Rt, $label", "", []>, + Sched<[WriteLD]> { + bits<5> Rt; + bits<19> label; + let Inst{31-30} = opc; + let Inst{29-27} = 0b011; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-5} = label; + let Inst{4-0} = Rt; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in +class PrefetchLiteral opc, bit V, string asm, list pat> + : I<(outs), (ins prfop:$Rt, am_ldrlit:$label), + asm, "\t$Rt, $label", "", pat>, + Sched<[WriteLD]> { + bits<5> Rt; + bits<19> label; + let Inst{31-30} = opc; + let Inst{29-27} = 0b011; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-5} = label; + let Inst{4-0} = Rt; +} + +//--- +// Load/store register offset +//--- + +def ro_Xindexed8 : ComplexPattern", []>; +def ro_Xindexed16 : ComplexPattern", []>; +def ro_Xindexed32 : ComplexPattern", []>; +def ro_Xindexed64 : ComplexPattern", []>; +def ro_Xindexed128 : ComplexPattern", []>; + +def ro_Windexed8 : ComplexPattern", []>; +def ro_Windexed16 : ComplexPattern", []>; +def ro_Windexed32 : ComplexPattern", []>; +def ro_Windexed64 : ComplexPattern", []>; +def ro_Windexed128 : ComplexPattern", []>; + +class MemExtendOperand : AsmOperandClass { + let Name = "Mem" # Reg # "Extend" # Width; + let PredicateMethod = "isMem" # Reg # "Extend<" # Width # ">"; + let RenderMethod = "addMemExtendOperands"; + let DiagnosticType = "InvalidMemory" # Reg # "Extend" # Width; +} + +def MemWExtend8Operand : MemExtendOperand<"W", 8> { + // The address "[x0, x1, lsl #0]" actually maps to the variant which performs + // the trivial shift. + let RenderMethod = "addMemExtend8Operands"; +} +def MemWExtend16Operand : MemExtendOperand<"W", 16>; +def MemWExtend32Operand : MemExtendOperand<"W", 32>; +def MemWExtend64Operand : MemExtendOperand<"W", 64>; +def MemWExtend128Operand : MemExtendOperand<"W", 128>; + +def MemXExtend8Operand : MemExtendOperand<"X", 8> { + // The address "[x0, x1, lsl #0]" actually maps to the variant which performs + // the trivial shift. + let RenderMethod = "addMemExtend8Operands"; +} +def MemXExtend16Operand : MemExtendOperand<"X", 16>; +def MemXExtend32Operand : MemExtendOperand<"X", 32>; +def MemXExtend64Operand : MemExtendOperand<"X", 64>; +def MemXExtend128Operand : MemExtendOperand<"X", 128>; + +class ro_extend + : Operand { + let ParserMatchClass = ParserClass; + let PrintMethod = "printMemExtend<'" # Reg # "', " # Width # ">"; + let DecoderMethod = "DecodeMemExtend"; + let EncoderMethod = "getMemExtendOpValue"; + let MIOperandInfo = (ops i32imm:$signed, i32imm:$doshift); +} + +def ro_Wextend8 : ro_extend; +def ro_Wextend16 : ro_extend; +def ro_Wextend32 : ro_extend; +def ro_Wextend64 : ro_extend; +def ro_Wextend128 : ro_extend; + +def ro_Xextend8 : ro_extend; +def ro_Xextend16 : ro_extend; +def ro_Xextend32 : ro_extend; +def ro_Xextend64 : ro_extend; +def ro_Xextend128 : ro_extend; + +class ROAddrMode { + // CodeGen-level pattern covering the entire addressing mode. + ComplexPattern Wpat = windex; + ComplexPattern Xpat = xindex; + + // Asm-level Operand covering the valid "uxtw #3" style syntax. + Operand Wext = wextend; + Operand Xext = xextend; +} + +def ro8 : ROAddrMode; +def ro16 : ROAddrMode; +def ro32 : ROAddrMode; +def ro64 : ROAddrMode; +def ro128 : ROAddrMode; + +class LoadStore8RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, dag ins, dag outs, list pat> + : I { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +class ROInstAlias + : InstAlias; + +multiclass Load8RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10 in + def roW : LoadStore8RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore8RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +multiclass Store8RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10 in + def roW : LoadStore8RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore8RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +class LoadStore16RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, dag ins, dag outs, list pat> + : I { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass Load16RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10 in + def roW : LoadStore16RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore16RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +multiclass Store16RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10 in + def roW : LoadStore16RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore16RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +class LoadStore32RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, dag ins, dag outs, list pat> + : I { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass Load32RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10 in + def roW : LoadStore32RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore32RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +multiclass Store32RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10 in + def roW : LoadStore32RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10 in + def roX : LoadStore32RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +class LoadStore64RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, dag ins, dag outs, list pat> + : I { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass Load64RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def roW : LoadStore64RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def roX : LoadStore64RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +multiclass Store64RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def roW : LoadStore64RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def roX : LoadStore64RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +class LoadStore128RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, dag ins, dag outs, list pat> + : I { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass Load128RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator loadop> { + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def roW : LoadStore128RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in + def roX : LoadStore128RO, + Sched<[WriteLDIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +multiclass Store128RO sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, ValueType Ty, SDPatternOperator storeop> { + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def roW : LoadStore128RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b0; + } + + let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in + def roX : LoadStore128RO, + Sched<[WriteSTIdx, ReadAdrBase]> { + let Inst{13} = 0b1; + } + + def : ROInstAlias(NAME # "roX")>; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in +class BasePrefetchRO sz, bit V, bits<2> opc, dag outs, dag ins, + string asm, list pat> + : I, + Sched<[WriteLD]> { + bits<5> Rt; + bits<5> Rn; + bits<5> Rm; + bits<2> extend; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15} = extend{1}; // sign extend Rm? + let Inst{14} = 1; + let Inst{12} = extend{0}; // do shift? + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; +} + +multiclass PrefetchRO sz, bit V, bits<2> opc, string asm> { + def roW : BasePrefetchRO { + let Inst{13} = 0b0; + } + + def roX : BasePrefetchRO { + let Inst{13} = 0b1; + } + + def : InstAlias<"prfm $Rt, [$Rn, $Rm]", + (!cast(NAME # "roX") prfop:$Rt, + GPR64sp:$Rn, GPR64:$Rm, 0, 0)>; +} + +//--- +// Load/store unscaled immediate +//--- + +def am_unscaled8 : ComplexPattern; +def am_unscaled16 : ComplexPattern; +def am_unscaled32 : ComplexPattern; +def am_unscaled64 : ComplexPattern; +def am_unscaled128 :ComplexPattern; + +class BaseLoadStoreUnscale sz, bit V, bits<2> opc, dag oops, dag iops, + string asm, list pattern> + : I { + bits<5> Rt; + bits<5> Rn; + bits<9> offset; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0; + let Inst{20-12} = offset; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeSignedLdStInstruction"; +} + +multiclass LoadUnscaled sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, list pattern> { + let AddedComplexity = 1 in // try this before LoadUI + def i : BaseLoadStoreUnscale, + Sched<[WriteLD]>; + + def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass StoreUnscaled sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, list pattern> { + let AddedComplexity = 1 in // try this before StoreUI + def i : BaseLoadStoreUnscale, + Sched<[WriteST]>; + + def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass PrefetchUnscaled sz, bit V, bits<2> opc, string asm, + list pat> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in + def i : BaseLoadStoreUnscale, + Sched<[WriteLD]>; + + def : InstAlias(NAME # "i") prfop:$Rt, GPR64sp:$Rn, 0)>; +} + +//--- +// Load/store unscaled immediate, unprivileged +//--- + +class BaseLoadStoreUnprivileged sz, bit V, bits<2> opc, + dag oops, dag iops, string asm> + : I { + bits<5> Rt; + bits<5> Rn; + bits<9> offset; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0; + let Inst{20-12} = offset; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeSignedLdStInstruction"; +} + +multiclass LoadUnprivileged sz, bit V, bits<2> opc, + RegisterClass regtype, string asm> { + let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in + def i : BaseLoadStoreUnprivileged, + Sched<[WriteLD]>; + + def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +multiclass StoreUnprivileged sz, bit V, bits<2> opc, + RegisterClass regtype, string asm> { + let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in + def i : BaseLoadStoreUnprivileged, + Sched<[WriteST]>; + + def : InstAlias(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>; +} + +//--- +// Load/store pre-indexed +//--- + +class BaseLoadStorePreIdx sz, bit V, bits<2> opc, dag oops, dag iops, + string asm, string cstr, list pat> + : I { + bits<5> Rt; + bits<5> Rn; + bits<9> offset; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0; + let Inst{23-22} = opc; + let Inst{21} = 0; + let Inst{20-12} = offset; + let Inst{11-10} = 0b11; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeSignedLdStInstruction"; +} + +let hasSideEffects = 0 in { +let mayStore = 0, mayLoad = 1 in +class LoadPreIdx sz, bit V, bits<2> opc, RegisterClass regtype, + string asm> + : BaseLoadStorePreIdx, + Sched<[WriteLD, WriteAdr]>; + +let mayStore = 1, mayLoad = 0 in +class StorePreIdx sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, SDPatternOperator storeop, ValueType Ty> + : BaseLoadStorePreIdx, + Sched<[WriteAdr, WriteST]>; +} // hasSideEffects = 0 + +//--- +// Load/store post-indexed +//--- + +// (pre-index) load/stores. +class BaseLoadStorePostIdx sz, bit V, bits<2> opc, dag oops, dag iops, + string asm, string cstr, list pat> + : I { + bits<5> Rt; + bits<5> Rn; + bits<9> offset; + let Inst{31-30} = sz; + let Inst{29-27} = 0b111; + let Inst{26} = V; + let Inst{25-24} = 0b00; + let Inst{23-22} = opc; + let Inst{21} = 0b0; + let Inst{20-12} = offset; + let Inst{11-10} = 0b01; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodeSignedLdStInstruction"; +} + +let hasSideEffects = 0 in { +let mayStore = 0, mayLoad = 1 in +class LoadPostIdx sz, bit V, bits<2> opc, RegisterClass regtype, + string asm> + : BaseLoadStorePostIdx, + Sched<[WriteLD, WriteI]>; + +let mayStore = 1, mayLoad = 0 in +class StorePostIdx sz, bit V, bits<2> opc, RegisterClass regtype, + string asm, SDPatternOperator storeop, ValueType Ty> + : BaseLoadStorePostIdx, + Sched<[WriteAdr, WriteST, ReadAdrBase]>; +} // hasSideEffects = 0 + + +//--- +// Load/store pair +//--- + +// (indexed, offset) + +class BaseLoadStorePairOffset opc, bit V, bit L, dag oops, dag iops, + string asm> + : I { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + bits<7> offset; + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = V; + let Inst{25-23} = 0b010; + let Inst{22} = L; + let Inst{21-15} = offset; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodePairLdStInstruction"; +} + +multiclass LoadPairOffset opc, bit V, RegisterClass regtype, + Operand indextype, string asm> { + let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in + def i : BaseLoadStorePairOffset, + Sched<[WriteLD, WriteLDHi]>; + + def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, 0)>; +} + + +multiclass StorePairOffset opc, bit V, RegisterClass regtype, + Operand indextype, string asm> { + let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in + def i : BaseLoadStorePairOffset, + Sched<[WriteSTP]>; + + def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, 0)>; +} + +// (pre-indexed) +class BaseLoadStorePairPreIdx opc, bit V, bit L, dag oops, dag iops, + string asm> + : I { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + bits<7> offset; + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = V; + let Inst{25-23} = 0b011; + let Inst{22} = L; + let Inst{21-15} = offset; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodePairLdStInstruction"; +} + +let hasSideEffects = 0 in { +let mayStore = 0, mayLoad = 1 in +class LoadPairPreIdx opc, bit V, RegisterClass regtype, + Operand indextype, string asm> + : BaseLoadStorePairPreIdx, + Sched<[WriteLD, WriteLDHi, WriteAdr]>; + +let mayStore = 1, mayLoad = 0 in +class StorePairPreIdx opc, bit V, RegisterClass regtype, + Operand indextype, string asm> + : BaseLoadStorePairPreIdx, + Sched<[WriteAdr, WriteSTP]>; +} // hasSideEffects = 0 + +// (post-indexed) + +class BaseLoadStorePairPostIdx opc, bit V, bit L, dag oops, dag iops, + string asm> + : I { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + bits<7> offset; + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = V; + let Inst{25-23} = 0b001; + let Inst{22} = L; + let Inst{21-15} = offset; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodePairLdStInstruction"; +} + +let hasSideEffects = 0 in { +let mayStore = 0, mayLoad = 1 in +class LoadPairPostIdx opc, bit V, RegisterClass regtype, + Operand idxtype, string asm> + : BaseLoadStorePairPostIdx, + Sched<[WriteLD, WriteLDHi, WriteAdr]>; + +let mayStore = 1, mayLoad = 0 in +class StorePairPostIdx opc, bit V, RegisterClass regtype, + Operand idxtype, string asm> + : BaseLoadStorePairPostIdx, + Sched<[WriteAdr, WriteSTP]>; +} // hasSideEffects = 0 + +// (no-allocate) + +class BaseLoadStorePairNoAlloc opc, bit V, bit L, dag oops, dag iops, + string asm> + : I { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + bits<7> offset; + let Inst{31-30} = opc; + let Inst{29-27} = 0b101; + let Inst{26} = V; + let Inst{25-23} = 0b000; + let Inst{22} = L; + let Inst{21-15} = offset; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let DecoderMethod = "DecodePairLdStInstruction"; +} + +multiclass LoadPairNoAlloc opc, bit V, RegisterClass regtype, + Operand indextype, string asm> { + let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in + def i : BaseLoadStorePairNoAlloc, + Sched<[WriteLD, WriteLDHi]>; + + + def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, 0)>; +} + +multiclass StorePairNoAlloc opc, bit V, RegisterClass regtype, + Operand indextype, string asm> { + let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in + def i : BaseLoadStorePairNoAlloc, + Sched<[WriteSTP]>; + + def : InstAlias(NAME # "i") regtype:$Rt, regtype:$Rt2, + GPR64sp:$Rn, 0)>; +} + +//--- +// Load/store exclusive +//--- + +// True exclusive operations write to and/or read from the system's exclusive +// monitors, which as far as a compiler is concerned can be modelled as a +// random shared memory address. Hence LoadExclusive mayStore. +// +// Since these instructions have the undefined register bits set to 1 in +// their canonical form, we need a post encoder method to set those bits +// to 1 when encoding these instructions. We do this using the +// fixLoadStoreExclusive function. This function has template parameters: +// +// fixLoadStoreExclusive +// +// hasRs indicates that the instruction uses the Rs field, so we won't set +// it to 1 (and the same for Rt2). We don't need template parameters for +// the other register fields since Rt and Rn are always used. +// +let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in +class BaseLoadStoreExclusive sz, bit o2, bit L, bit o1, bit o0, + dag oops, dag iops, string asm, string operands> + : I { + let Inst{31-30} = sz; + let Inst{29-24} = 0b001000; + let Inst{23} = o2; + let Inst{22} = L; + let Inst{21} = o1; + let Inst{15} = o0; + + let DecoderMethod = "DecodeExclusiveLdStInstruction"; +} + +// Neither Rs nor Rt2 operands. +class LoadStoreExclusiveSimple sz, bit o2, bit L, bit o1, bit o0, + dag oops, dag iops, string asm, string operands> + : BaseLoadStoreExclusive { + bits<5> Rt; + bits<5> Rn; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; +} + +// Simple load acquires don't set the exclusive monitor +let mayLoad = 1, mayStore = 0 in +class LoadAcquire sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : LoadStoreExclusiveSimple, + Sched<[WriteLD]>; + +class LoadExclusive sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : LoadStoreExclusiveSimple, + Sched<[WriteLD]>; + +class LoadExclusivePair sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : BaseLoadStoreExclusive, + Sched<[WriteLD, WriteLDHi]> { + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let PostEncoderMethod = "fixLoadStoreExclusive<0,1>"; +} + +// Simple store release operations do not check the exclusive monitor. +let mayLoad = 0, mayStore = 1 in +class StoreRelease sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : LoadStoreExclusiveSimple, + Sched<[WriteST]>; + +let mayLoad = 1, mayStore = 1 in +class StoreExclusive sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : BaseLoadStoreExclusive, + Sched<[WriteSTX]> { + bits<5> Ws; + bits<5> Rt; + bits<5> Rn; + let Inst{20-16} = Ws; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let Constraints = "@earlyclobber $Ws"; + let PostEncoderMethod = "fixLoadStoreExclusive<1,0>"; +} + +class StoreExclusivePair sz, bit o2, bit L, bit o1, bit o0, + RegisterClass regtype, string asm> + : BaseLoadStoreExclusive, + Sched<[WriteSTX]> { + bits<5> Ws; + bits<5> Rt; + bits<5> Rt2; + bits<5> Rn; + let Inst{20-16} = Ws; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + + let Constraints = "@earlyclobber $Ws"; +} + +//--- +// Exception generation +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in +class ExceptionGeneration op1, bits<2> ll, string asm> + : I<(outs), (ins imm0_65535:$imm), asm, "\t$imm", "", []>, + Sched<[WriteSys]> { + bits<16> imm; + let Inst{31-24} = 0b11010100; + let Inst{23-21} = op1; + let Inst{20-5} = imm; + let Inst{4-2} = 0b000; + let Inst{1-0} = ll; +} + +let Predicates = [HasFPARMv8] in { + +//--- +// Floating point to integer conversion +//--- + +class BaseFPToIntegerUnscaled type, bits<2> rmode, bits<3> opcode, + RegisterClass srcType, RegisterClass dstType, + string asm, list pattern> + : I<(outs dstType:$Rd), (ins srcType:$Rn), + asm, "\t$Rd, $Rn", "", pattern>, + Sched<[WriteFCvt]> { + bits<5> Rd; + bits<5> Rn; + let Inst{30-29} = 0b00; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 1; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseFPToInteger type, bits<2> rmode, bits<3> opcode, + RegisterClass srcType, RegisterClass dstType, + Operand immType, string asm, list pattern> + : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale), + asm, "\t$Rd, $Rn, $scale", "", pattern>, + Sched<[WriteFCvt]> { + bits<5> Rd; + bits<5> Rn; + bits<6> scale; + let Inst{30-29} = 0b00; + let Inst{28-24} = 0b11110; + let Inst{23-22} = type; + let Inst{21} = 0; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = scale; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass FPToIntegerUnscaled rmode, bits<3> opcode, string asm, + SDPatternOperator OpN> { + // Unscaled single-precision to 32-bit + def UWSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR32, asm, + [(set GPR32:$Rd, (OpN FPR32:$Rn))]> { + let Inst{31} = 0; // 32-bit GPR flag + } + + // Unscaled single-precision to 64-bit + def UXSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR64, asm, + [(set GPR64:$Rd, (OpN FPR32:$Rn))]> { + let Inst{31} = 1; // 64-bit GPR flag + } + + // Unscaled double-precision to 32-bit + def UWDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR32, asm, + [(set GPR32:$Rd, (OpN (f64 FPR64:$Rn)))]> { + let Inst{31} = 0; // 32-bit GPR flag + } + + // Unscaled double-precision to 64-bit + def UXDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR64, asm, + [(set GPR64:$Rd, (OpN (f64 FPR64:$Rn)))]> { + let Inst{31} = 1; // 64-bit GPR flag + } +} + +multiclass FPToIntegerScaled rmode, bits<3> opcode, string asm, + SDPatternOperator OpN> { + // Scaled single-precision to 32-bit + def SWSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR32, + fixedpoint_f32_i32, asm, + [(set GPR32:$Rd, (OpN (fmul FPR32:$Rn, + fixedpoint_f32_i32:$scale)))]> { + let Inst{31} = 0; // 32-bit GPR flag + let scale{5} = 1; + } + + // Scaled single-precision to 64-bit + def SXSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR64, + fixedpoint_f32_i64, asm, + [(set GPR64:$Rd, (OpN (fmul FPR32:$Rn, + fixedpoint_f32_i64:$scale)))]> { + let Inst{31} = 1; // 64-bit GPR flag + } + + // Scaled double-precision to 32-bit + def SWDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR32, + fixedpoint_f64_i32, asm, + [(set GPR32:$Rd, (OpN (fmul FPR64:$Rn, + fixedpoint_f64_i32:$scale)))]> { + let Inst{31} = 0; // 32-bit GPR flag + let scale{5} = 1; + } + + // Scaled double-precision to 64-bit + def SXDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR64, + fixedpoint_f64_i64, asm, + [(set GPR64:$Rd, (OpN (fmul FPR64:$Rn, + fixedpoint_f64_i64:$scale)))]> { + let Inst{31} = 1; // 64-bit GPR flag + } +} + +//--- +// Integer to floating point conversion +//--- + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseIntegerToFP pattern> + : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale), + asm, "\t$Rd, $Rn, $scale", "", pattern>, + Sched<[WriteFCvt]> { + bits<5> Rd; + bits<5> Rn; + bits<6> scale; + let Inst{30-23} = 0b00111100; + let Inst{21-17} = 0b00001; + let Inst{16} = isUnsigned; + let Inst{15-10} = scale; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class BaseIntegerToFPUnscaled + : I<(outs dstType:$Rd), (ins srcType:$Rn), + asm, "\t$Rd, $Rn", "", [(set (dvt dstType:$Rd), (node srcType:$Rn))]>, + Sched<[WriteFCvt]> { + bits<5> Rd; + bits<5> Rn; + bits<6> scale; + let Inst{30-23} = 0b00111100; + let Inst{21-17} = 0b10001; + let Inst{16} = isUnsigned; + let Inst{15-10} = 0b000000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass IntegerToFP { + // Unscaled + def UWSri: BaseIntegerToFPUnscaled { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{22} = 0; // 32-bit FPR flag + } + + def UWDri: BaseIntegerToFPUnscaled { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{22} = 1; // 64-bit FPR flag + } + + def UXSri: BaseIntegerToFPUnscaled { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{22} = 0; // 32-bit FPR flag + } + + def UXDri: BaseIntegerToFPUnscaled { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{22} = 1; // 64-bit FPR flag + } + + // Scaled + def SWSri: BaseIntegerToFP { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{22} = 0; // 32-bit FPR flag + let scale{5} = 1; + } + + def SWDri: BaseIntegerToFP { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{22} = 1; // 64-bit FPR flag + let scale{5} = 1; + } + + def SXSri: BaseIntegerToFP { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{22} = 0; // 32-bit FPR flag + } + + def SXDri: BaseIntegerToFP { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{22} = 1; // 64-bit FPR flag + } +} + +//--- +// Unscaled integer <-> floating point conversion (i.e. FMOV) +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseUnscaledConversion rmode, bits<3> opcode, + RegisterClass srcType, RegisterClass dstType, + string asm> + : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", + // We use COPY_TO_REGCLASS for these bitconvert operations. + // copyPhysReg() expands the resultant COPY instructions after + // regalloc is done. This gives greater freedom for the allocator + // and related passes (coalescing, copy propagation, et. al.) to + // be more effective. + [/*(set (dvt dstType:$Rd), (bitconvert (svt srcType:$Rn)))*/]>, + Sched<[WriteFCopy]> { + bits<5> Rd; + bits<5> Rn; + let Inst{30-23} = 0b00111100; + let Inst{21} = 1; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = 0b000000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseUnscaledConversionToHigh rmode, bits<3> opcode, + RegisterClass srcType, RegisterOperand dstType, string asm, + string kind> + : I<(outs dstType:$Rd), (ins srcType:$Rn, VectorIndex1:$idx), asm, + "{\t$Rd"#kind#"$idx, $Rn|"#kind#"\t$Rd$idx, $Rn}", "", []>, + Sched<[WriteFCopy]> { + bits<5> Rd; + bits<5> Rn; + let Inst{30-23} = 0b00111101; + let Inst{21} = 1; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = 0b000000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeFMOVLaneInstruction"; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseUnscaledConversionFromHigh rmode, bits<3> opcode, + RegisterOperand srcType, RegisterClass dstType, string asm, + string kind> + : I<(outs dstType:$Rd), (ins srcType:$Rn, VectorIndex1:$idx), asm, + "{\t$Rd, $Rn"#kind#"$idx|"#kind#"\t$Rd, $Rn$idx}", "", []>, + Sched<[WriteFCopy]> { + bits<5> Rd; + bits<5> Rn; + let Inst{30-23} = 0b00111101; + let Inst{21} = 1; + let Inst{20-19} = rmode; + let Inst{18-16} = opcode; + let Inst{15-10} = 0b000000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; + + let DecoderMethod = "DecodeFMOVLaneInstruction"; +} + + + +multiclass UnscaledConversion { + def WSr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR32, asm> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{22} = 0; // 32-bit FPR flag + } + + def XDr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR64, asm> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{22} = 1; // 64-bit FPR flag + } + + def SWr : BaseUnscaledConversion<0b00, 0b110, FPR32, GPR32, asm> { + let Inst{31} = 0; // 32-bit GPR flag + let Inst{22} = 0; // 32-bit FPR flag + } + + def DXr : BaseUnscaledConversion<0b00, 0b110, FPR64, GPR64, asm> { + let Inst{31} = 1; // 64-bit GPR flag + let Inst{22} = 1; // 64-bit FPR flag + } + + def XDHighr : BaseUnscaledConversionToHigh<0b01, 0b111, GPR64, V128, + asm, ".d"> { + let Inst{31} = 1; + let Inst{22} = 0; + } + + def DXHighr : BaseUnscaledConversionFromHigh<0b01, 0b110, V128, GPR64, + asm, ".d"> { + let Inst{31} = 1; + let Inst{22} = 0; + } +} + +//--- +// Floating point conversion +//--- + +class BaseFPConversion type, bits<2> opcode, RegisterClass dstType, + RegisterClass srcType, string asm, list pattern> + : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", pattern>, + Sched<[WriteFCvt]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-24} = 0b00011110; + let Inst{23-22} = type; + let Inst{21-17} = 0b10001; + let Inst{16-15} = opcode; + let Inst{14-10} = 0b10000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass FPConversion { + // Double-precision to Half-precision + def HDr : BaseFPConversion<0b01, 0b11, FPR16, FPR64, asm, + [(set FPR16:$Rd, (fround FPR64:$Rn))]>; + + // Double-precision to Single-precision + def SDr : BaseFPConversion<0b01, 0b00, FPR32, FPR64, asm, + [(set FPR32:$Rd, (fround FPR64:$Rn))]>; + + // Half-precision to Double-precision + def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm, + [(set FPR64:$Rd, (fextend FPR16:$Rn))]>; + + // Half-precision to Single-precision + def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm, + [(set FPR32:$Rd, (fextend FPR16:$Rn))]>; + + // Single-precision to Double-precision + def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm, + [(set FPR64:$Rd, (fextend FPR32:$Rn))]>; + + // Single-precision to Half-precision + def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm, + [(set FPR16:$Rd, (fround FPR32:$Rn))]>; +} + +//--- +// Single operand floating point data processing +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSingleOperandFPData opcode, RegisterClass regtype, + ValueType vt, string asm, SDPatternOperator node> + : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "", + [(set (vt regtype:$Rd), (node (vt regtype:$Rn)))]>, + Sched<[WriteF]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-23} = 0b000111100; + let Inst{21-19} = 0b100; + let Inst{18-15} = opcode; + let Inst{14-10} = 0b10000; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SingleOperandFPData opcode, string asm, + SDPatternOperator node = null_frag> { + def Sr : BaseSingleOperandFPData { + let Inst{22} = 0; // 32-bit size flag + } + + def Dr : BaseSingleOperandFPData { + let Inst{22} = 1; // 64-bit size flag + } +} + +//--- +// Two operand floating point data processing +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseTwoOperandFPData opcode, RegisterClass regtype, + string asm, list pat> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), + asm, "\t$Rd, $Rn, $Rm", "", pat>, + Sched<[WriteF]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-23} = 0b000111100; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass TwoOperandFPData opcode, string asm, + SDPatternOperator node = null_frag> { + def Srr : BaseTwoOperandFPData { + let Inst{22} = 0; // 32-bit size flag + } + + def Drr : BaseTwoOperandFPData { + let Inst{22} = 1; // 64-bit size flag + } +} + +multiclass TwoOperandFPDataNeg opcode, string asm, SDNode node> { + def Srr : BaseTwoOperandFPData { + let Inst{22} = 0; // 32-bit size flag + } + + def Drr : BaseTwoOperandFPData { + let Inst{22} = 1; // 64-bit size flag + } +} + + +//--- +// Three operand floating point data processing +//--- + +class BaseThreeOperandFPData pat> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, regtype: $Ra), + asm, "\t$Rd, $Rn, $Rm, $Ra", "", pat>, + Sched<[WriteFMul]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<5> Ra; + let Inst{31-23} = 0b000111110; + let Inst{21} = isNegated; + let Inst{20-16} = Rm; + let Inst{15} = isSub; + let Inst{14-10} = Ra; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass ThreeOperandFPData { + def Srrr : BaseThreeOperandFPData { + let Inst{22} = 0; // 32-bit size flag + } + + def Drrr : BaseThreeOperandFPData { + let Inst{22} = 1; // 64-bit size flag + } +} + +//--- +// Floating point data comparisons +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseOneOperandFPComparison pat> + : I<(outs), (ins regtype:$Rn), asm, "\t$Rn, #0.0", "", pat>, + Sched<[WriteFCmp]> { + bits<5> Rn; + let Inst{31-23} = 0b000111100; + let Inst{21} = 1; + + let Inst{15-10} = 0b001000; + let Inst{9-5} = Rn; + let Inst{4} = signalAllNans; + let Inst{3-0} = 0b1000; + + // Rm should be 0b00000 canonically, but we need to accept any value. + let PostEncoderMethod = "fixOneOperandFPComparison"; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseTwoOperandFPComparison pat> + : I<(outs), (ins regtype:$Rn, regtype:$Rm), asm, "\t$Rn, $Rm", "", pat>, + Sched<[WriteFCmp]> { + bits<5> Rm; + bits<5> Rn; + let Inst{31-23} = 0b000111100; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-10} = 0b001000; + let Inst{9-5} = Rn; + let Inst{4} = signalAllNans; + let Inst{3-0} = 0b0000; +} + +multiclass FPComparison { + let Defs = [NZCV] in { + def Srr : BaseTwoOperandFPComparison { + let Inst{22} = 0; + } + + def Sri : BaseOneOperandFPComparison { + let Inst{22} = 0; + } + + def Drr : BaseTwoOperandFPComparison { + let Inst{22} = 1; + } + + def Dri : BaseOneOperandFPComparison { + let Inst{22} = 1; + } + } // Defs = [NZCV] +} + +//--- +// Floating point conditional comparisons +//--- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseFPCondComparison + : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), + asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, + Sched<[WriteFCmp]> { + bits<5> Rn; + bits<5> Rm; + bits<4> nzcv; + bits<4> cond; + + let Inst{31-23} = 0b000111100; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-12} = cond; + let Inst{11-10} = 0b01; + let Inst{9-5} = Rn; + let Inst{4} = signalAllNans; + let Inst{3-0} = nzcv; +} + +multiclass FPCondComparison { + let Defs = [NZCV], Uses = [NZCV] in { + def Srr : BaseFPCondComparison { + let Inst{22} = 0; + } + + def Drr : BaseFPCondComparison { + let Inst{22} = 1; + } + } // Defs = [NZCV], Uses = [NZCV] +} + +//--- +// Floating point conditional select +//--- + +class BaseFPCondSelect + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), + asm, "\t$Rd, $Rn, $Rm, $cond", "", + [(set regtype:$Rd, + (AArch64csel (vt regtype:$Rn), regtype:$Rm, + (i32 imm:$cond), NZCV))]>, + Sched<[WriteF]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<4> cond; + + let Inst{31-23} = 0b000111100; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-12} = cond; + let Inst{11-10} = 0b11; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass FPCondSelect { + let Uses = [NZCV] in { + def Srrr : BaseFPCondSelect { + let Inst{22} = 0; + } + + def Drrr : BaseFPCondSelect { + let Inst{22} = 1; + } + } // Uses = [NZCV] +} + +//--- +// Floating move immediate +//--- + +class BaseFPMoveImmediate + : I<(outs regtype:$Rd), (ins fpimmtype:$imm), asm, "\t$Rd, $imm", "", + [(set regtype:$Rd, fpimmtype:$imm)]>, + Sched<[WriteFImm]> { + bits<5> Rd; + bits<8> imm; + let Inst{31-23} = 0b000111100; + let Inst{21} = 1; + let Inst{20-13} = imm; + let Inst{12-5} = 0b10000000; + let Inst{4-0} = Rd; +} + +multiclass FPMoveImmediate { + def Si : BaseFPMoveImmediate { + let Inst{22} = 0; + } + + def Di : BaseFPMoveImmediate { + let Inst{22} = 1; + } +} +} // end of 'let Predicates = [HasFPARMv8]' + +//---------------------------------------------------------------------------- +// AdvSIMD +//---------------------------------------------------------------------------- + +let Predicates = [HasNEON] in { + +//---------------------------------------------------------------------------- +// AdvSIMD three register vector instructions +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDThreeSameVector size, bits<5> opcode, + RegisterOperand regtype, string asm, string kind, + list pattern> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # + "|" # kind # "\t$Rd, $Rn, $Rm|}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDThreeSameVectorTied size, bits<5> opcode, + RegisterOperand regtype, string asm, string kind, + list pattern> + : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # + "|" # kind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +// All operand sizes distinguished in the encoding. +multiclass SIMDThreeSameVector opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, + asm, ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, + asm, ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, + asm, ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, + asm, ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, + asm, ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, + asm, ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; + def v2i64 : BaseSIMDThreeSameVector<1, U, 0b11, opc, V128, + asm, ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>; +} + +// As above, but D sized elements unsupported. +multiclass SIMDThreeSameVectorBHS opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, + asm, ".8b", + [(set V64:$Rd, (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))]>; + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, + asm, ".16b", + [(set V128:$Rd, (v16i8 (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm))))]>; + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, + asm, ".4h", + [(set V64:$Rd, (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>; + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, + asm, ".8h", + [(set V128:$Rd, (v8i16 (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>; + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, + asm, ".2s", + [(set V64:$Rd, (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>; + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, + asm, ".4s", + [(set V128:$Rd, (v4i32 (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>; +} + +multiclass SIMDThreeSameVectorBHSTied opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b00, opc, V64, + asm, ".8b", + [(set (v8i8 V64:$dst), + (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; + def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b00, opc, V128, + asm, ".16b", + [(set (v16i8 V128:$dst), + (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; + def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b01, opc, V64, + asm, ".4h", + [(set (v4i16 V64:$dst), + (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; + def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b01, opc, V128, + asm, ".8h", + [(set (v8i16 V128:$dst), + (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; + def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b10, opc, V64, + asm, ".2s", + [(set (v2i32 V64:$dst), + (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; + def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b10, opc, V128, + asm, ".4s", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; +} + +// As above, but only B sized elements supported. +multiclass SIMDThreeSameVectorB opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, + asm, ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, + asm, ".16b", + [(set (v16i8 V128:$Rd), + (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; +} + +// As above, but only S and D sized floating point elements supported. +multiclass SIMDThreeSameVectorFP opc, + string asm, SDPatternOperator OpNode> { + def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64, + asm, ".2s", + [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; + def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128, + asm, ".4s", + [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; + def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128, + asm, ".2d", + [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; +} + +multiclass SIMDThreeSameVectorFPCmp opc, + string asm, + SDPatternOperator OpNode> { + def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64, + asm, ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; + def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128, + asm, ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; + def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128, + asm, ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; +} + +multiclass SIMDThreeSameVectorFPTied opc, + string asm, SDPatternOperator OpNode> { + def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0}, opc, V64, + asm, ".2s", + [(set (v2f32 V64:$dst), + (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; + def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0}, opc, V128, + asm, ".4s", + [(set (v4f32 V128:$dst), + (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; + def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,1}, opc, V128, + asm, ".2d", + [(set (v2f64 V128:$dst), + (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; +} + +// As above, but D and B sized elements unsupported. +multiclass SIMDThreeSameVectorHS opc, string asm, + SDPatternOperator OpNode> { + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, + asm, ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, + asm, ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, + asm, ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, + asm, ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; +} + +// Logical three vector ops share opcode bits, and only use B sized elements. +multiclass SIMDLogicalThreeVector size, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8 : BaseSIMDThreeSameVector<0, U, size, 0b00011, V64, + asm, ".8b", + [(set (v8i8 V64:$Rd), (OpNode V64:$Rn, V64:$Rm))]>; + def v16i8 : BaseSIMDThreeSameVector<1, U, size, 0b00011, V128, + asm, ".16b", + [(set (v16i8 V128:$Rd), (OpNode V128:$Rn, V128:$Rm))]>; + + def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)), + (!cast(NAME#"v8i8") V64:$LHS, V64:$RHS)>; + def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)), + (!cast(NAME#"v8i8") V64:$LHS, V64:$RHS)>; + def : Pat<(v1i64 (OpNode V64:$LHS, V64:$RHS)), + (!cast(NAME#"v8i8") V64:$LHS, V64:$RHS)>; + + def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)), + (!cast(NAME#"v16i8") V128:$LHS, V128:$RHS)>; + def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)), + (!cast(NAME#"v16i8") V128:$LHS, V128:$RHS)>; + def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)), + (!cast(NAME#"v16i8") V128:$LHS, V128:$RHS)>; +} + +multiclass SIMDLogicalThreeVectorTied size, + string asm, SDPatternOperator OpNode> { + def v8i8 : BaseSIMDThreeSameVectorTied<0, U, size, 0b00011, V64, + asm, ".8b", + [(set (v8i8 V64:$dst), + (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; + def v16i8 : BaseSIMDThreeSameVectorTied<1, U, size, 0b00011, V128, + asm, ".16b", + [(set (v16i8 V128:$dst), + (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), + (v16i8 V128:$Rm)))]>; + + def : Pat<(v4i16 (OpNode (v4i16 V64:$LHS), (v4i16 V64:$MHS), + (v4i16 V64:$RHS))), + (!cast(NAME#"v8i8") + V64:$LHS, V64:$MHS, V64:$RHS)>; + def : Pat<(v2i32 (OpNode (v2i32 V64:$LHS), (v2i32 V64:$MHS), + (v2i32 V64:$RHS))), + (!cast(NAME#"v8i8") + V64:$LHS, V64:$MHS, V64:$RHS)>; + def : Pat<(v1i64 (OpNode (v1i64 V64:$LHS), (v1i64 V64:$MHS), + (v1i64 V64:$RHS))), + (!cast(NAME#"v8i8") + V64:$LHS, V64:$MHS, V64:$RHS)>; + + def : Pat<(v8i16 (OpNode (v8i16 V128:$LHS), (v8i16 V128:$MHS), + (v8i16 V128:$RHS))), + (!cast(NAME#"v16i8") + V128:$LHS, V128:$MHS, V128:$RHS)>; + def : Pat<(v4i32 (OpNode (v4i32 V128:$LHS), (v4i32 V128:$MHS), + (v4i32 V128:$RHS))), + (!cast(NAME#"v16i8") + V128:$LHS, V128:$MHS, V128:$RHS)>; + def : Pat<(v2i64 (OpNode (v2i64 V128:$LHS), (v2i64 V128:$MHS), + (v2i64 V128:$RHS))), + (!cast(NAME#"v16i8") + V128:$LHS, V128:$MHS, V128:$RHS)>; +} + + +//---------------------------------------------------------------------------- +// AdvSIMD two register vector instructions. +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDTwoSameVector size, bits<5> opcode, + RegisterOperand regtype, string asm, string dstkind, + string srckind, list pattern> + : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, + "{\t$Rd" # dstkind # ", $Rn" # srckind # + "|" # dstkind # "\t$Rd, $Rn}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDTwoSameVectorTied size, bits<5> opcode, + RegisterOperand regtype, string asm, string dstkind, + string srckind, list pattern> + : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm, + "{\t$Rd" # dstkind # ", $Rn" # srckind # + "|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +// Supports B, H, and S element sizes. +multiclass SIMDTwoVectorBHS opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; + def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; + def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; +} + +class BaseSIMDVectorLShiftLongBySize size, + RegisterOperand regtype, string asm, string dstkind, + string srckind, string amount> + : I<(outs V128:$Rd), (ins regtype:$Rn), asm, + "{\t$Rd" # dstkind # ", $Rn" # srckind # ", #" # amount # + "|" # dstkind # "\t$Rd, $Rn, #" # amount # "}", "", []>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29-24} = 0b101110; + let Inst{23-22} = size; + let Inst{21-10} = 0b100001001110; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDVectorLShiftLongBySizeBHS { + let neverHasSideEffects = 1 in { + def v8i8 : BaseSIMDVectorLShiftLongBySize<0, 0b00, V64, + "shll", ".8h", ".8b", "8">; + def v16i8 : BaseSIMDVectorLShiftLongBySize<1, 0b00, V128, + "shll2", ".8h", ".16b", "8">; + def v4i16 : BaseSIMDVectorLShiftLongBySize<0, 0b01, V64, + "shll", ".4s", ".4h", "16">; + def v8i16 : BaseSIMDVectorLShiftLongBySize<1, 0b01, V128, + "shll2", ".4s", ".8h", "16">; + def v2i32 : BaseSIMDVectorLShiftLongBySize<0, 0b10, V64, + "shll", ".2d", ".2s", "32">; + def v4i32 : BaseSIMDVectorLShiftLongBySize<1, 0b10, V128, + "shll2", ".2d", ".4s", "32">; + } +} + +// Supports all element sizes. +multiclass SIMDLongTwoVector opc, string asm, + SDPatternOperator OpNode> { + def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, + asm, ".4h", ".8b", + [(set (v4i16 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; + def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, + asm, ".8h", ".16b", + [(set (v8i16 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; + def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, + asm, ".2s", ".4h", + [(set (v2i32 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; + def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, + asm, ".4s", ".8h", + [(set (v4i32 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; + def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, + asm, ".1d", ".2s", + [(set (v1i64 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; + def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, + asm, ".2d", ".4s", + [(set (v2i64 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; +} + +multiclass SIMDLongTwoVectorTied opc, string asm, + SDPatternOperator OpNode> { + def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64, + asm, ".4h", ".8b", + [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), + (v8i8 V64:$Rn)))]>; + def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128, + asm, ".8h", ".16b", + [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), + (v16i8 V128:$Rn)))]>; + def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64, + asm, ".2s", ".4h", + [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), + (v4i16 V64:$Rn)))]>; + def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128, + asm, ".4s", ".8h", + [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), + (v8i16 V128:$Rn)))]>; + def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64, + asm, ".1d", ".2s", + [(set (v1i64 V64:$dst), (OpNode (v1i64 V64:$Rd), + (v2i32 V64:$Rn)))]>; + def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128, + asm, ".2d", ".4s", + [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), + (v4i32 V128:$Rn)))]>; +} + +// Supports all element sizes, except 1xD. +multiclass SIMDTwoVectorBHSDTied opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64, + asm, ".8b", ".8b", + [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn)))]>; + def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128, + asm, ".16b", ".16b", + [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>; + def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64, + asm, ".4h", ".4h", + [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn)))]>; + def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128, + asm, ".8h", ".8h", + [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn)))]>; + def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64, + asm, ".2s", ".2s", + [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn)))]>; + def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128, + asm, ".4s", ".4s", + [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>; + def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, V128, + asm, ".2d", ".2d", + [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn)))]>; +} + +multiclass SIMDTwoVectorBHSD opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; + def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; + def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; + def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, V128, + asm, ".2d", ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; +} + + +// Supports only B element sizes. +multiclass SIMDTwoVectorB size, bits<5> opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, V64, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; + def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, V128, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; + +} + +// Supports only B and H element sizes. +multiclass SIMDTwoVectorBH opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode V64:$Rn))]>; + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode V128:$Rn))]>; + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode V64:$Rn))]>; + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>; +} + +// Supports only S and D element sizes, uses high bit of the size field +// as an extra opcode bit. +multiclass SIMDTwoVectorFP opc, string asm, + SDPatternOperator OpNode> { + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, + asm, ".2s", ".2s", + [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, + asm, ".4s", ".4s", + [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, + asm, ".2d", ".2d", + [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; +} + +// Supports only S element size. +multiclass SIMDTwoVectorS opc, string asm, + SDPatternOperator OpNode> { + def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; + def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; +} + + +multiclass SIMDTwoVectorFPToInt opc, string asm, + SDPatternOperator OpNode> { + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, + asm, ".2d", ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; +} + +multiclass SIMDTwoVectorIntToFP opc, string asm, + SDPatternOperator OpNode> { + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, + asm, ".2s", ".2s", + [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, + asm, ".4s", ".4s", + [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, + asm, ".2d", ".2d", + [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; +} + + +class BaseSIMDMixedTwoVector size, bits<5> opcode, + RegisterOperand inreg, RegisterOperand outreg, + string asm, string outkind, string inkind, + list pattern> + : I<(outs outreg:$Rd), (ins inreg:$Rn), asm, + "{\t$Rd" # outkind # ", $Rn" # inkind # + "|" # outkind # "\t$Rd, $Rn}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class BaseSIMDMixedTwoVectorTied size, bits<5> opcode, + RegisterOperand inreg, RegisterOperand outreg, + string asm, string outkind, string inkind, + list pattern> + : I<(outs outreg:$dst), (ins outreg:$Rd, inreg:$Rn), asm, + "{\t$Rd" # outkind # ", $Rn" # inkind # + "|" # outkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDMixedTwoVector opc, string asm, + SDPatternOperator OpNode> { + def v8i8 : BaseSIMDMixedTwoVector<0, U, 0b00, opc, V128, V64, + asm, ".8b", ".8h", + [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn)))]>; + def v16i8 : BaseSIMDMixedTwoVectorTied<1, U, 0b00, opc, V128, V128, + asm#"2", ".16b", ".8h", []>; + def v4i16 : BaseSIMDMixedTwoVector<0, U, 0b01, opc, V128, V64, + asm, ".4h", ".4s", + [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn)))]>; + def v8i16 : BaseSIMDMixedTwoVectorTied<1, U, 0b01, opc, V128, V128, + asm#"2", ".8h", ".4s", []>; + def v2i32 : BaseSIMDMixedTwoVector<0, U, 0b10, opc, V128, V64, + asm, ".2s", ".2d", + [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn)))]>; + def v4i32 : BaseSIMDMixedTwoVectorTied<1, U, 0b10, opc, V128, V128, + asm#"2", ".4s", ".2d", []>; + + def : Pat<(concat_vectors (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn))), + (!cast(NAME # "v16i8") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; + def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn))), + (!cast(NAME # "v8i16") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; + def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn))), + (!cast(NAME # "v4i32") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; +} + +class BaseSIMDCmpTwoVector size, bits<5> opcode, + RegisterOperand regtype, + string asm, string kind, string zero, + ValueType dty, ValueType sty, SDNode OpNode> + : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero # + "|" # kind # "\t$Rd, $Rn, #" # zero # "}", "", + [(set (dty regtype:$Rd), (OpNode (sty regtype:$Rn)))]>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +// Comparisons support all element sizes, except 1xD. +multiclass SIMDCmpTwoVector opc, string asm, + SDNode OpNode> { + def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, opc, V64, + asm, ".8b", "0", + v8i8, v8i8, OpNode>; + def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, opc, V128, + asm, ".16b", "0", + v16i8, v16i8, OpNode>; + def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, opc, V64, + asm, ".4h", "0", + v4i16, v4i16, OpNode>; + def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, opc, V128, + asm, ".8h", "0", + v8i16, v8i16, OpNode>; + def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, opc, V64, + asm, ".2s", "0", + v2i32, v2i32, OpNode>; + def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, opc, V128, + asm, ".4s", "0", + v4i32, v4i32, OpNode>; + def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, opc, V128, + asm, ".2d", "0", + v2i64, v2i64, OpNode>; +} + +// FP Comparisons support only S and D element sizes. +multiclass SIMDFPCmpTwoVector opc, + string asm, SDNode OpNode> { + + def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, opc, V64, + asm, ".2s", "0.0", + v2i32, v2f32, OpNode>; + def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, opc, V128, + asm, ".4s", "0.0", + v4i32, v4f32, OpNode>; + def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, opc, V128, + asm, ".2d", "0.0", + v2i64, v2f64, OpNode>; + + def : InstAlias(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>; + def : InstAlias(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>; + def : InstAlias(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>; + def : InstAlias(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>; + def : InstAlias(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>; + def : InstAlias(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDFPCvtTwoVector size, bits<5> opcode, + RegisterOperand outtype, RegisterOperand intype, + string asm, string VdTy, string VnTy, + list pattern> + : I<(outs outtype:$Rd), (ins intype:$Rn), asm, + !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class BaseSIMDFPCvtTwoVectorTied size, bits<5> opcode, + RegisterOperand outtype, RegisterOperand intype, + string asm, string VdTy, string VnTy, + list pattern> + : I<(outs outtype:$dst), (ins outtype:$Rd, intype:$Rn), asm, + !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDFPWidenTwoVector opc, string asm> { + def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V128, V64, + asm, ".4s", ".4h", []>; + def v8i16 : BaseSIMDFPCvtTwoVector<1, U, {S,0}, opc, V128, V128, + asm#"2", ".4s", ".8h", []>; + def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V128, V64, + asm, ".2d", ".2s", []>; + def v4i32 : BaseSIMDFPCvtTwoVector<1, U, {S,1}, opc, V128, V128, + asm#"2", ".2d", ".4s", []>; +} + +multiclass SIMDFPNarrowTwoVector opc, string asm> { + def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V64, V128, + asm, ".4h", ".4s", []>; + def v8i16 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,0}, opc, V128, V128, + asm#"2", ".8h", ".4s", []>; + def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128, + asm, ".2s", ".2d", []>; + def v4i32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128, + asm#"2", ".4s", ".2d", []>; +} + +multiclass SIMDFPInexactCvtTwoVector opc, string asm, + Intrinsic OpNode> { + def v2f32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128, + asm, ".2s", ".2d", + [(set (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn)))]>; + def v4f32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128, + asm#"2", ".4s", ".2d", []>; + + def : Pat<(concat_vectors (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn))), + (!cast(NAME # "v4f32") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD three register different-size vector instructions. +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDDifferentThreeVector size, bits<4> opcode, + RegisterOperand outtype, RegisterOperand intype1, + RegisterOperand intype2, string asm, + string outkind, string inkind1, string inkind2, + list pattern> + : I<(outs outtype:$Rd), (ins intype1:$Rn, intype2:$Rm), asm, + "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 # + "|" # outkind # "\t$Rd, $Rn, $Rm}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31} = 0; + let Inst{30} = size{0}; + let Inst{29} = U; let Inst{28-24} = 0b01110; + let Inst{23-22} = size{2-1}; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-12} = opcode; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDDifferentThreeVectorTied size, bits<4> opcode, + RegisterOperand outtype, RegisterOperand intype1, + RegisterOperand intype2, string asm, + string outkind, string inkind1, string inkind2, + list pattern> + : I<(outs outtype:$dst), (ins outtype:$Rd, intype1:$Rn, intype2:$Rm), asm, + "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 # + "|" # outkind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31} = 0; + let Inst{30} = size{0}; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size{2-1}; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-12} = opcode; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +// FIXME: TableGen doesn't know how to deal with expanded types that also +// change the element count (in this case, placing the results in +// the high elements of the result register rather than the low +// elements). Until that's fixed, we can't code-gen those. +multiclass SIMDNarrowThreeVectorBHS opc, string asm, + Intrinsic IntOp> { + def v8i16_v8i8 : BaseSIMDDifferentThreeVector; + def v8i16_v16i8 : BaseSIMDDifferentThreeVectorTied; + def v4i32_v4i16 : BaseSIMDDifferentThreeVector; + def v4i32_v8i16 : BaseSIMDDifferentThreeVectorTied; + def v2i64_v2i32 : BaseSIMDDifferentThreeVector; + def v2i64_v4i32 : BaseSIMDDifferentThreeVectorTied; + + + // Patterns for the '2' variants involve INSERT_SUBREG, which you can't put in + // a version attached to an instruction. + def : Pat<(concat_vectors (v8i8 V64:$Rd), (IntOp (v8i16 V128:$Rn), + (v8i16 V128:$Rm))), + (!cast(NAME # "v8i16_v16i8") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; + def : Pat<(concat_vectors (v4i16 V64:$Rd), (IntOp (v4i32 V128:$Rn), + (v4i32 V128:$Rm))), + (!cast(NAME # "v4i32_v8i16") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; + def : Pat<(concat_vectors (v2i32 V64:$Rd), (IntOp (v2i64 V128:$Rn), + (v2i64 V128:$Rm))), + (!cast(NAME # "v2i64_v4i32") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; +} + +multiclass SIMDDifferentThreeVectorBD opc, string asm, + Intrinsic IntOp> { + def v8i8 : BaseSIMDDifferentThreeVector; + def v16i8 : BaseSIMDDifferentThreeVector; + let Predicates = [HasCrypto] in { + def v1i64 : BaseSIMDDifferentThreeVector; + def v2i64 : BaseSIMDDifferentThreeVector; + } + + def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 V128:$Rn)), + (v8i8 (extract_high_v16i8 V128:$Rm)))), + (!cast(NAME#"v16i8") V128:$Rn, V128:$Rm)>; +} + +multiclass SIMDLongThreeVectorHS opc, string asm, + SDPatternOperator OpNode> { + def v4i16_v4i32 : BaseSIMDDifferentThreeVector; + def v8i16_v4i32 : BaseSIMDDifferentThreeVector; + def v2i32_v2i64 : BaseSIMDDifferentThreeVector; + def v4i32_v2i64 : BaseSIMDDifferentThreeVector; +} + +multiclass SIMDLongThreeVectorBHSabdl opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8_v8i16 : BaseSIMDDifferentThreeVector; + def v16i8_v8i16 : BaseSIMDDifferentThreeVector; + def v4i16_v4i32 : BaseSIMDDifferentThreeVector; + def v8i16_v4i32 : BaseSIMDDifferentThreeVector; + def v2i32_v2i64 : BaseSIMDDifferentThreeVector; + def v4i32_v2i64 : BaseSIMDDifferentThreeVector; +} + +multiclass SIMDLongThreeVectorTiedBHSabal opc, + string asm, + SDPatternOperator OpNode> { + def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied; + def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied; + def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied; + def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied; + def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied; + def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied; +} + +multiclass SIMDLongThreeVectorBHS opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8_v8i16 : BaseSIMDDifferentThreeVector; + def v16i8_v8i16 : BaseSIMDDifferentThreeVector; + def v4i16_v4i32 : BaseSIMDDifferentThreeVector; + def v8i16_v4i32 : BaseSIMDDifferentThreeVector; + def v2i32_v2i64 : BaseSIMDDifferentThreeVector; + def v4i32_v2i64 : BaseSIMDDifferentThreeVector; +} + +multiclass SIMDLongThreeVectorTiedBHS opc, + string asm, + SDPatternOperator OpNode> { + def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied; + def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied; + def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied; + def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied; + def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied; + def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied; +} + +multiclass SIMDLongThreeVectorSQDMLXTiedHS opc, string asm, + SDPatternOperator Accum> { + def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied; + def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied; + def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied; + def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied; +} + +multiclass SIMDWideThreeVectorBHS opc, string asm, + SDPatternOperator OpNode> { + def v8i8_v8i16 : BaseSIMDDifferentThreeVector; + def v16i8_v8i16 : BaseSIMDDifferentThreeVector; + def v4i16_v4i32 : BaseSIMDDifferentThreeVector; + def v8i16_v4i32 : BaseSIMDDifferentThreeVector; + def v2i32_v2i64 : BaseSIMDDifferentThreeVector; + def v4i32_v2i64 : BaseSIMDDifferentThreeVector; +} + +//---------------------------------------------------------------------------- +// AdvSIMD bitwise extract from vector +//---------------------------------------------------------------------------- + +class BaseSIMDBitwiseExtract + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, i32imm:$imm), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $imm" # + "|" # kind # "\t$Rd, $Rn, $Rm, $imm}", "", + [(set (vty regtype:$Rd), + (AArch64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + bits<4> imm; + let Inst{31} = 0; + let Inst{30} = size; + let Inst{29-21} = 0b101110000; + let Inst{20-16} = Rm; + let Inst{15} = 0; + let Inst{14-11} = imm; + let Inst{10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + + +multiclass SIMDBitwiseExtract { + def v8i8 : BaseSIMDBitwiseExtract<0, V64, v8i8, asm, ".8b"> { + let imm{3} = 0; + } + def v16i8 : BaseSIMDBitwiseExtract<1, V128, v16i8, asm, ".16b">; +} + +//---------------------------------------------------------------------------- +// AdvSIMD zip vector +//---------------------------------------------------------------------------- + +class BaseSIMDZipVector size, bits<3> opc, RegisterOperand regtype, + string asm, string kind, SDNode OpNode, ValueType valty> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, + "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # + "|" # kind # "\t$Rd, $Rn, $Rm}", "", + [(set (valty regtype:$Rd), (OpNode regtype:$Rn, regtype:$Rm))]>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31} = 0; + let Inst{30} = size{0}; + let Inst{29-24} = 0b001110; + let Inst{23-22} = size{2-1}; + let Inst{21} = 0; + let Inst{20-16} = Rm; + let Inst{15} = 0; + let Inst{14-12} = opc; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDZipVectoropc, string asm, + SDNode OpNode> { + def v8i8 : BaseSIMDZipVector<0b000, opc, V64, + asm, ".8b", OpNode, v8i8>; + def v16i8 : BaseSIMDZipVector<0b001, opc, V128, + asm, ".16b", OpNode, v16i8>; + def v4i16 : BaseSIMDZipVector<0b010, opc, V64, + asm, ".4h", OpNode, v4i16>; + def v8i16 : BaseSIMDZipVector<0b011, opc, V128, + asm, ".8h", OpNode, v8i16>; + def v2i32 : BaseSIMDZipVector<0b100, opc, V64, + asm, ".2s", OpNode, v2i32>; + def v4i32 : BaseSIMDZipVector<0b101, opc, V128, + asm, ".4s", OpNode, v4i32>; + def v2i64 : BaseSIMDZipVector<0b111, opc, V128, + asm, ".2d", OpNode, v2i64>; + + def : Pat<(v2f32 (OpNode V64:$Rn, V64:$Rm)), + (!cast(NAME#"v2i32") V64:$Rn, V64:$Rm)>; + def : Pat<(v4f32 (OpNode V128:$Rn, V128:$Rm)), + (!cast(NAME#"v4i32") V128:$Rn, V128:$Rm)>; + def : Pat<(v2f64 (OpNode V128:$Rn, V128:$Rm)), + (!cast(NAME#"v2i64") V128:$Rn, V128:$Rm)>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD three register scalar instructions +//---------------------------------------------------------------------------- + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDThreeScalar size, bits<5> opcode, + RegisterClass regtype, string asm, + list pattern> + : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, + "\t$Rd, $Rn, $Rm", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; let Inst{23-22} = size; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 + let Inst{21} = 1; + let Inst{20-16} = Rm; let Inst{15-11} = opcode; - let Inst{10} = 0b1; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD 3 vector registers with different vector type -class NeonI_3VDiff size, bits<4> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; - let Inst{28-24} = 0b01110; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDThreeScalarD opc, string asm, + SDPatternOperator OpNode> { + def v1i64 : BaseSIMDThreeScalar; +} + +multiclass SIMDThreeScalarBHSD opc, string asm, + SDPatternOperator OpNode> { + def v1i64 : BaseSIMDThreeScalar; + def v1i32 : BaseSIMDThreeScalar; + def v1i16 : BaseSIMDThreeScalar; + def v1i8 : BaseSIMDThreeScalar; + + def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))), + (!cast(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>; + def : Pat<(i32 (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))), + (!cast(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>; +} + +multiclass SIMDThreeScalarHS opc, string asm, + SDPatternOperator OpNode> { + def v1i32 : BaseSIMDThreeScalar; + def v1i16 : BaseSIMDThreeScalar; +} + +multiclass SIMDThreeScalarSD opc, string asm, + SDPatternOperator OpNode = null_frag> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def #NAME#64 : BaseSIMDThreeScalar; + def #NAME#32 : BaseSIMDThreeScalar; + } + + def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (!cast(NAME # "64") FPR64:$Rn, FPR64:$Rm)>; +} + +multiclass SIMDThreeScalarFPCmp opc, string asm, + SDPatternOperator OpNode = null_frag> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def #NAME#64 : BaseSIMDThreeScalar; + def #NAME#32 : BaseSIMDThreeScalar; + } + + def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (!cast(NAME # "64") FPR64:$Rn, FPR64:$Rm)>; +} + +class BaseSIMDThreeScalarMixed size, bits<5> opcode, + dag oops, dag iops, string asm, string cstr, list pat> + : I, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; let Inst{23-22} = size; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 - let Inst{15-12} = opcode; - let Inst{11} = 0b0; - let Inst{10} = 0b0; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD two registers and an element -class NeonI_2VElem size, bits<4> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; - let Inst{28-24} = 0b01111; + let Inst{21} = 1; + let Inst{20-16} = Rm; + let Inst{15-11} = opcode; + let Inst{10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDThreeScalarMixedHS opc, string asm, + SDPatternOperator OpNode = null_frag> { + def i16 : BaseSIMDThreeScalarMixed; + def i32 : BaseSIMDThreeScalarMixed; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDThreeScalarMixedTiedHS opc, string asm, + SDPatternOperator OpNode = null_frag> { + def i16 : BaseSIMDThreeScalarMixed; + def i32 : BaseSIMDThreeScalarMixed; +} + +//---------------------------------------------------------------------------- +// AdvSIMD two register scalar instructions +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDTwoScalar size, bits<5> opcode, + RegisterClass regtype, RegisterClass regtype2, + string asm, list pat> + : I<(outs regtype:$Rd), (ins regtype2:$Rn), asm, + "\t$Rd, $Rn", "", pat>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; let Inst{23-22} = size; - // l in Inst{21} - // m in Inst{20} - // Inherit Rm in 19-16 - let Inst{15-12} = opcode; - // h in Inst{11} - let Inst{10} = 0b0; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD 1 vector register with modified immediate -class NeonI_1VModImm patterns, InstrItinClass itin> - : A64InstRd { - bits<8> Imm; - bits<4> cmode; - let Inst{31} = 0b0; - let Inst{30} = q; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDTwoScalarTied size, bits<5> opcode, + RegisterClass regtype, RegisterClass regtype2, + string asm, list pat> + : I<(outs regtype:$dst), (ins regtype:$Rd, regtype2:$Rn), asm, + "\t$Rd, $Rn", "$Rd = $dst", pat>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDCmpTwoScalar size, bits<5> opcode, + RegisterClass regtype, string asm, string zero> + : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, + "\t$Rd, $Rn, #" # zero, "", []>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class SIMDInexactCvtTwoScalar opcode, string asm> + : I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "", + [(set (f32 FPR32:$Rd), (int_aarch64_sisd_fcvtxn (f64 FPR64:$Rn)))]>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-17} = 0b011111100110000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDCmpTwoScalarD opc, string asm, + SDPatternOperator OpNode> { + def v1i64rz : BaseSIMDCmpTwoScalar; + + def : Pat<(v1i64 (OpNode FPR64:$Rn)), + (!cast(NAME # v1i64rz) FPR64:$Rn)>; +} + +multiclass SIMDCmpTwoScalarSD opc, string asm, + SDPatternOperator OpNode> { + def v1i64rz : BaseSIMDCmpTwoScalar; + def v1i32rz : BaseSIMDCmpTwoScalar; + + def : InstAlias(NAME # v1i64rz) FPR64:$Rd, FPR64:$Rn), 0>; + def : InstAlias(NAME # v1i32rz) FPR32:$Rd, FPR32:$Rn), 0>; + + def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))), + (!cast(NAME # v1i64rz) FPR64:$Rn)>; +} + +multiclass SIMDTwoScalarD opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v1i64 : BaseSIMDTwoScalar; + + def : Pat<(i64 (OpNode (i64 FPR64:$Rn))), + (!cast(NAME # "v1i64") FPR64:$Rn)>; +} + +multiclass SIMDTwoScalarSD opc, string asm> { + def v1i64 : BaseSIMDTwoScalar; + def v1i32 : BaseSIMDTwoScalar; +} + +multiclass SIMDTwoScalarCVTSD opc, string asm, + SDPatternOperator OpNode> { + def v1i64 : BaseSIMDTwoScalar; + def v1i32 : BaseSIMDTwoScalar; +} + +multiclass SIMDTwoScalarBHSD opc, string asm, + SDPatternOperator OpNode = null_frag> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def v1i64 : BaseSIMDTwoScalar; + def v1i32 : BaseSIMDTwoScalar; + def v1i16 : BaseSIMDTwoScalar; + def v1i8 : BaseSIMDTwoScalar; + } + + def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))), + (!cast(NAME # v1i64) FPR64:$Rn)>; +} + +multiclass SIMDTwoScalarBHSDTied opc, string asm, + Intrinsic OpNode> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def v1i64 : BaseSIMDTwoScalarTied; + def v1i32 : BaseSIMDTwoScalarTied; + def v1i16 : BaseSIMDTwoScalarTied; + def v1i8 : BaseSIMDTwoScalarTied; + } + + def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn))), + (!cast(NAME # v1i64) FPR64:$Rd, FPR64:$Rn)>; +} + + + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDTwoScalarMixedBHS opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v1i32 : BaseSIMDTwoScalar; + def v1i16 : BaseSIMDTwoScalar; + def v1i8 : BaseSIMDTwoScalar; +} + +//---------------------------------------------------------------------------- +// AdvSIMD scalar pairwise instructions +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDPairwiseScalar size, bits<5> opcode, + RegisterOperand regtype, RegisterOperand vectype, + string asm, string kind> + : I<(outs regtype:$Rd), (ins vectype:$Rn), asm, + "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", []>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21-17} = 0b11000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDPairwiseScalarD opc, string asm> { + def v2i64p : BaseSIMDPairwiseScalar; +} + +multiclass SIMDPairwiseScalarSD opc, string asm> { + def v2i32p : BaseSIMDPairwiseScalar; + def v2i64p : BaseSIMDPairwiseScalar; +} + +//---------------------------------------------------------------------------- +// AdvSIMD across lanes instructions +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDAcrossLanes size, bits<5> opcode, + RegisterClass regtype, RegisterOperand vectype, + string asm, string kind, list pattern> + : I<(outs regtype:$Rd), (ins vectype:$Rn), asm, + "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b11000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDAcrossLanesBHS opcode, + string asm> { + def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR8, V64, + asm, ".8b", []>; + def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR8, V128, + asm, ".16b", []>; + def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR16, V64, + asm, ".4h", []>; + def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR16, V128, + asm, ".8h", []>; + def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR32, V128, + asm, ".4s", []>; +} + +multiclass SIMDAcrossLanesHSD opcode, string asm> { + def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR16, V64, + asm, ".8b", []>; + def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR16, V128, + asm, ".16b", []>; + def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR32, V64, + asm, ".4h", []>; + def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR32, V128, + asm, ".8h", []>; + def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR64, V128, + asm, ".4s", []>; +} + +multiclass SIMDAcrossLanesS opcode, bit sz1, string asm, + Intrinsic intOp> { + def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128, + asm, ".4s", + [(set FPR32:$Rd, (intOp (v4f32 V128:$Rn)))]>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD INS/DUP instructions +//---------------------------------------------------------------------------- + +// FIXME: There has got to be a better way to factor these. ugh. + +class BaseSIMDInsDup pattern> + : I, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; let Inst{29} = op; + let Inst{28-21} = 0b01110000; + let Inst{15} = 0; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +class SIMDDupFromMain imm5, string size, ValueType vectype, + RegisterOperand vecreg, RegisterClass regtype> + : BaseSIMDInsDup { + let Inst{20-16} = imm5; + let Inst{14-11} = 0b0001; +} + +class SIMDDupFromElement + : BaseSIMDInsDup { + let Inst{14-11} = 0b0000; +} + +class SIMDDup64FromElement + : SIMDDupFromElement<1, ".2d", ".d", v2i64, v2i64, V128, + VectorIndexD, i64, AArch64duplane64> { + bits<1> idx; + let Inst{20} = idx; + let Inst{19-16} = 0b1000; +} + +class SIMDDup32FromElement + : SIMDDupFromElement { + bits<2> idx; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; +} + +class SIMDDup16FromElement + : SIMDDupFromElement { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; +} + +class SIMDDup8FromElement + : SIMDDupFromElement { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; +} + +class BaseSIMDMov imm4, RegisterClass regtype, + Operand idxtype, string asm, list pattern> + : BaseSIMDInsDup { + let Inst{14-11} = imm4; +} + +class SIMDSMov + : BaseSIMDMov; +class SIMDUMov + : BaseSIMDMov; + +class SIMDMovAlias + : InstAlias; + +multiclass SMov { + def vi8to32 : SIMDSMov<0, ".b", GPR32, VectorIndexB> { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; + } + def vi8to64 : SIMDSMov<1, ".b", GPR64, VectorIndexB> { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; + } + def vi16to32 : SIMDSMov<0, ".h", GPR32, VectorIndexH> { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + } + def vi16to64 : SIMDSMov<1, ".h", GPR64, VectorIndexH> { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + } + def vi32to64 : SIMDSMov<1, ".s", GPR64, VectorIndexS> { + bits<2> idx; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; + } +} + +multiclass UMov { + def vi8 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndexB> { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; + } + def vi16 : SIMDUMov<0, ".h", v8i16, GPR32, VectorIndexH> { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + } + def vi32 : SIMDUMov<0, ".s", v4i32, GPR32, VectorIndexS> { + bits<2> idx; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; + } + def vi64 : SIMDUMov<1, ".d", v2i64, GPR64, VectorIndexD> { + bits<1> idx; + let Inst{20} = idx; + let Inst{19-16} = 0b1000; + } + def : SIMDMovAlias<"mov", ".s", + !cast(NAME#"vi32"), + GPR32, VectorIndexS>; + def : SIMDMovAlias<"mov", ".d", + !cast(NAME#"vi64"), + GPR64, VectorIndexD>; +} + +class SIMDInsFromMain + : BaseSIMDInsDup<1, 0, (outs V128:$dst), + (ins V128:$Rd, idxtype:$idx, regtype:$Rn), "ins", + "{\t$Rd" # size # "$idx, $Rn" # + "|" # size # "\t$Rd$idx, $Rn}", + "$Rd = $dst", + [(set V128:$dst, + (vector_insert (vectype V128:$Rd), regtype:$Rn, idxtype:$idx))]> { + let Inst{14-11} = 0b0011; +} + +class SIMDInsFromElement + : BaseSIMDInsDup<1, 1, (outs V128:$dst), + (ins V128:$Rd, idxtype:$idx, V128:$Rn, idxtype:$idx2), "ins", + "{\t$Rd" # size # "$idx, $Rn" # size # "$idx2" # + "|" # size # "\t$Rd$idx, $Rn$idx2}", + "$Rd = $dst", + [(set V128:$dst, + (vector_insert + (vectype V128:$Rd), + (elttype (vector_extract (vectype V128:$Rn), idxtype:$idx2)), + idxtype:$idx))]>; + +class SIMDInsMainMovAlias + : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # + "|" # size #"\t$dst$idx, $src}", + (inst V128:$dst, idxtype:$idx, regtype:$src)>; +class SIMDInsElementMovAlias + : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # size # "$idx2" # + # "|" # size #" $dst$idx, $src$idx2}", + (inst V128:$dst, idxtype:$idx, V128:$src, idxtype:$idx2)>; + + +multiclass SIMDIns { + def vi8gpr : SIMDInsFromMain<".b", v16i8, GPR32, VectorIndexB> { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; + } + def vi16gpr : SIMDInsFromMain<".h", v8i16, GPR32, VectorIndexH> { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + } + def vi32gpr : SIMDInsFromMain<".s", v4i32, GPR32, VectorIndexS> { + bits<2> idx; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; + } + def vi64gpr : SIMDInsFromMain<".d", v2i64, GPR64, VectorIndexD> { + bits<1> idx; + let Inst{20} = idx; + let Inst{19-16} = 0b1000; + } + + def vi8lane : SIMDInsFromElement<".b", v16i8, i32, VectorIndexB> { + bits<4> idx; + bits<4> idx2; + let Inst{20-17} = idx; + let Inst{16} = 1; + let Inst{14-11} = idx2; + } + def vi16lane : SIMDInsFromElement<".h", v8i16, i32, VectorIndexH> { + bits<3> idx; + bits<3> idx2; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + let Inst{14-12} = idx2; + let Inst{11} = 0; + } + def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> { + bits<2> idx; + bits<2> idx2; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; + let Inst{14-13} = idx2; + let Inst{12-11} = 0; + } + def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> { + bits<1> idx; + bits<1> idx2; + let Inst{20} = idx; + let Inst{19-16} = 0b1000; + let Inst{14} = idx2; + let Inst{13-11} = 0; + } + + // For all forms of the INS instruction, the "mov" mnemonic is the + // preferred alias. Why they didn't just call the instruction "mov" in + // the first place is a very good question indeed... + def : SIMDInsMainMovAlias<".b", !cast(NAME#"vi8gpr"), + GPR32, VectorIndexB>; + def : SIMDInsMainMovAlias<".h", !cast(NAME#"vi16gpr"), + GPR32, VectorIndexH>; + def : SIMDInsMainMovAlias<".s", !cast(NAME#"vi32gpr"), + GPR32, VectorIndexS>; + def : SIMDInsMainMovAlias<".d", !cast(NAME#"vi64gpr"), + GPR64, VectorIndexD>; + + def : SIMDInsElementMovAlias<".b", !cast(NAME#"vi8lane"), + VectorIndexB>; + def : SIMDInsElementMovAlias<".h", !cast(NAME#"vi16lane"), + VectorIndexH>; + def : SIMDInsElementMovAlias<".s", !cast(NAME#"vi32lane"), + VectorIndexS>; + def : SIMDInsElementMovAlias<".d", !cast(NAME#"vi64lane"), + VectorIndexD>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD TBL/TBX +//---------------------------------------------------------------------------- + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDTableLookup len, bit op, RegisterOperand vectype, + RegisterOperand listtype, string asm, string kind> + : I<(outs vectype:$Vd), (ins listtype:$Vn, vectype:$Vm), asm, + "\t$Vd" # kind # ", $Vn, $Vm" # kind, "", []>, + Sched<[WriteV]> { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29-21} = 0b001110000; + let Inst{20-16} = Vm; + let Inst{15} = 0; + let Inst{14-13} = len; + let Inst{12} = op; + let Inst{11-10} = 0b00; + let Inst{9-5} = Vn; + let Inst{4-0} = Vd; +} + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDTableLookupTied len, bit op, RegisterOperand vectype, + RegisterOperand listtype, string asm, string kind> + : I<(outs vectype:$dst), (ins vectype:$Vd, listtype:$Vn, vectype:$Vm), asm, + "\t$Vd" # kind # ", $Vn, $Vm" # kind, "$Vd = $dst", []>, + Sched<[WriteV]> { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29-21} = 0b001110000; + let Inst{20-16} = Vm; + let Inst{15} = 0; + let Inst{14-13} = len; + let Inst{12} = op; + let Inst{11-10} = 0b00; + let Inst{9-5} = Vn; + let Inst{4-0} = Vd; +} + +class SIMDTableLookupAlias + : InstAlias; + +multiclass SIMDTableLookup { + def v8i8One : BaseSIMDTableLookup<0, 0b00, op, V64, VecListOne16b, + asm, ".8b">; + def v8i8Two : BaseSIMDTableLookup<0, 0b01, op, V64, VecListTwo16b, + asm, ".8b">; + def v8i8Three : BaseSIMDTableLookup<0, 0b10, op, V64, VecListThree16b, + asm, ".8b">; + def v8i8Four : BaseSIMDTableLookup<0, 0b11, op, V64, VecListFour16b, + asm, ".8b">; + def v16i8One : BaseSIMDTableLookup<1, 0b00, op, V128, VecListOne16b, + asm, ".16b">; + def v16i8Two : BaseSIMDTableLookup<1, 0b01, op, V128, VecListTwo16b, + asm, ".16b">; + def v16i8Three: BaseSIMDTableLookup<1, 0b10, op, V128, VecListThree16b, + asm, ".16b">; + def v16i8Four : BaseSIMDTableLookup<1, 0b11, op, V128, VecListFour16b, + asm, ".16b">; + + def : SIMDTableLookupAlias(NAME#"v8i8One"), + V64, VecListOne128>; + def : SIMDTableLookupAlias(NAME#"v8i8Two"), + V64, VecListTwo128>; + def : SIMDTableLookupAlias(NAME#"v8i8Three"), + V64, VecListThree128>; + def : SIMDTableLookupAlias(NAME#"v8i8Four"), + V64, VecListFour128>; + def : SIMDTableLookupAlias(NAME#"v16i8One"), + V128, VecListOne128>; + def : SIMDTableLookupAlias(NAME#"v16i8Two"), + V128, VecListTwo128>; + def : SIMDTableLookupAlias(NAME#"v16i8Three"), + V128, VecListThree128>; + def : SIMDTableLookupAlias(NAME#"v16i8Four"), + V128, VecListFour128>; +} + +multiclass SIMDTableLookupTied { + def v8i8One : BaseSIMDTableLookupTied<0, 0b00, op, V64, VecListOne16b, + asm, ".8b">; + def v8i8Two : BaseSIMDTableLookupTied<0, 0b01, op, V64, VecListTwo16b, + asm, ".8b">; + def v8i8Three : BaseSIMDTableLookupTied<0, 0b10, op, V64, VecListThree16b, + asm, ".8b">; + def v8i8Four : BaseSIMDTableLookupTied<0, 0b11, op, V64, VecListFour16b, + asm, ".8b">; + def v16i8One : BaseSIMDTableLookupTied<1, 0b00, op, V128, VecListOne16b, + asm, ".16b">; + def v16i8Two : BaseSIMDTableLookupTied<1, 0b01, op, V128, VecListTwo16b, + asm, ".16b">; + def v16i8Three: BaseSIMDTableLookupTied<1, 0b10, op, V128, VecListThree16b, + asm, ".16b">; + def v16i8Four : BaseSIMDTableLookupTied<1, 0b11, op, V128, VecListFour16b, + asm, ".16b">; + + def : SIMDTableLookupAlias(NAME#"v8i8One"), + V64, VecListOne128>; + def : SIMDTableLookupAlias(NAME#"v8i8Two"), + V64, VecListTwo128>; + def : SIMDTableLookupAlias(NAME#"v8i8Three"), + V64, VecListThree128>; + def : SIMDTableLookupAlias(NAME#"v8i8Four"), + V64, VecListFour128>; + def : SIMDTableLookupAlias(NAME#"v16i8One"), + V128, VecListOne128>; + def : SIMDTableLookupAlias(NAME#"v16i8Two"), + V128, VecListTwo128>; + def : SIMDTableLookupAlias(NAME#"v16i8Three"), + V128, VecListThree128>; + def : SIMDTableLookupAlias(NAME#"v16i8Four"), + V128, VecListFour128>; +} + + +//---------------------------------------------------------------------------- +// AdvSIMD scalar CPY +//---------------------------------------------------------------------------- +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDScalarCPY + : I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), "mov", + "{\t$dst, $src" # kind # "$idx" # + "|\t$dst, $src$idx}", "", []>, + Sched<[WriteV]> { + bits<5> dst; + bits<5> src; + let Inst{31-21} = 0b01011110000; + let Inst{15-10} = 0b000001; + let Inst{9-5} = src; + let Inst{4-0} = dst; +} + +class SIMDScalarCPYAlias + : InstAlias; + + +multiclass SIMDScalarCPY { + def i8 : BaseSIMDScalarCPY { + bits<4> idx; + let Inst{20-17} = idx; + let Inst{16} = 1; + } + def i16 : BaseSIMDScalarCPY { + bits<3> idx; + let Inst{20-18} = idx; + let Inst{17-16} = 0b10; + } + def i32 : BaseSIMDScalarCPY { + bits<2> idx; + let Inst{20-19} = idx; + let Inst{18-16} = 0b100; + } + def i64 : BaseSIMDScalarCPY { + bits<1> idx; + let Inst{20} = idx; + let Inst{19-16} = 0b1000; + } + + def : Pat<(v1i64 (scalar_to_vector (i64 (vector_extract (v2i64 V128:$src), + VectorIndexD:$idx)))), + (!cast(NAME # i64) V128:$src, VectorIndexD:$idx)>; + + // 'DUP' mnemonic aliases. + def : SIMDScalarCPYAlias<"dup", ".b", + !cast(NAME#"i8"), + FPR8, V128, VectorIndexB>; + def : SIMDScalarCPYAlias<"dup", ".h", + !cast(NAME#"i16"), + FPR16, V128, VectorIndexH>; + def : SIMDScalarCPYAlias<"dup", ".s", + !cast(NAME#"i32"), + FPR32, V128, VectorIndexS>; + def : SIMDScalarCPYAlias<"dup", ".d", + !cast(NAME#"i64"), + FPR64, V128, VectorIndexD>; +} + +//---------------------------------------------------------------------------- +// AdvSIMD modified immediate instructions +//---------------------------------------------------------------------------- + +class BaseSIMDModifiedImm pattern> + : I, + Sched<[WriteV]> { + bits<5> Rd; + bits<8> imm8; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = op; let Inst{28-19} = 0b0111100000; + let Inst{18-16} = imm8{7-5}; + let Inst{11-10} = 0b01; + let Inst{9-5} = imm8{4-0}; + let Inst{4-0} = Rd; +} + +class BaseSIMDModifiedImmVector pattern> + : BaseSIMDModifiedImm { + let DecoderMethod = "DecodeModImmInstruction"; +} + +class BaseSIMDModifiedImmVectorTied pattern> + : BaseSIMDModifiedImm { + let DecoderMethod = "DecodeModImmTiedInstruction"; +} + +class BaseSIMDModifiedImmVectorShift b15_b12, + RegisterOperand vectype, string asm, + string kind, list pattern> + : BaseSIMDModifiedImmVector { + bits<2> shift; + let Inst{15} = b15_b12{1}; + let Inst{14-13} = shift; + let Inst{12} = b15_b12{0}; +} + +class BaseSIMDModifiedImmVectorShiftTied b15_b12, + RegisterOperand vectype, string asm, + string kind, list pattern> + : BaseSIMDModifiedImmVectorTied { + bits<2> shift; + let Inst{15} = b15_b12{1}; + let Inst{14-13} = shift; + let Inst{12} = b15_b12{0}; +} + + +class BaseSIMDModifiedImmVectorShiftHalf b15_b12, + RegisterOperand vectype, string asm, + string kind, list pattern> + : BaseSIMDModifiedImmVector { + bits<2> shift; + let Inst{15} = b15_b12{1}; + let Inst{14} = 0; + let Inst{13} = shift{0}; + let Inst{12} = b15_b12{0}; +} + +class BaseSIMDModifiedImmVectorShiftHalfTied b15_b12, + RegisterOperand vectype, string asm, + string kind, list pattern> + : BaseSIMDModifiedImmVectorTied { + bits<2> shift; + let Inst{15} = b15_b12{1}; + let Inst{14} = 0; + let Inst{13} = shift{0}; + let Inst{12} = b15_b12{0}; +} + +multiclass SIMDModifiedImmVectorShift hw_cmode, bits<2> w_cmode, + string asm> { + def v4i16 : BaseSIMDModifiedImmVectorShiftHalf<0, op, hw_cmode, V64, + asm, ".4h", []>; + def v8i16 : BaseSIMDModifiedImmVectorShiftHalf<1, op, hw_cmode, V128, + asm, ".8h", []>; + + def v2i32 : BaseSIMDModifiedImmVectorShift<0, op, w_cmode, V64, + asm, ".2s", []>; + def v4i32 : BaseSIMDModifiedImmVectorShift<1, op, w_cmode, V128, + asm, ".4s", []>; +} + +multiclass SIMDModifiedImmVectorShiftTied hw_cmode, + bits<2> w_cmode, string asm, + SDNode OpNode> { + def v4i16 : BaseSIMDModifiedImmVectorShiftHalfTied<0, op, hw_cmode, V64, + asm, ".4h", + [(set (v4i16 V64:$dst), (OpNode V64:$Rd, + imm0_255:$imm8, + (i32 imm:$shift)))]>; + def v8i16 : BaseSIMDModifiedImmVectorShiftHalfTied<1, op, hw_cmode, V128, + asm, ".8h", + [(set (v8i16 V128:$dst), (OpNode V128:$Rd, + imm0_255:$imm8, + (i32 imm:$shift)))]>; + + def v2i32 : BaseSIMDModifiedImmVectorShiftTied<0, op, w_cmode, V64, + asm, ".2s", + [(set (v2i32 V64:$dst), (OpNode V64:$Rd, + imm0_255:$imm8, + (i32 imm:$shift)))]>; + def v4i32 : BaseSIMDModifiedImmVectorShiftTied<1, op, w_cmode, V128, + asm, ".4s", + [(set (v4i32 V128:$dst), (OpNode V128:$Rd, + imm0_255:$imm8, + (i32 imm:$shift)))]>; +} + +class SIMDModifiedImmMoveMSL cmode, + RegisterOperand vectype, string asm, + string kind, list pattern> + : BaseSIMDModifiedImmVector { + bits<1> shift; + let Inst{15-13} = cmode{3-1}; + let Inst{12} = shift; +} + +class SIMDModifiedImmVectorNoShift cmode, + RegisterOperand vectype, + Operand imm_type, string asm, + string kind, list pattern> + : BaseSIMDModifiedImmVector { let Inst{15-12} = cmode; - let Inst{11} = 0b0; // o2 - let Inst{10} = 1; - // Inherit Rd in 4-0 - let Inst{18-16} = Imm{7-5}; // imm a:b:c - let Inst{9-5} = Imm{4-0}; // imm d:e:f:g:h } -// Format AdvSIMD 3 scalar registers with same type +class SIMDModifiedImmScalarNoShift cmode, string asm, + list pattern> + : BaseSIMDModifiedImm { + let Inst{15-12} = cmode; + let DecoderMethod = "DecodeModImmInstruction"; +} -class NeonI_Scalar3Same size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31} = 0b0; - let Inst{30} = 0b1; - let Inst{29} = u; - let Inst{28-24} = 0b11110; +//---------------------------------------------------------------------------- +// AdvSIMD indexed element +//---------------------------------------------------------------------------- + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDIndexed size, bits<4> opc, + RegisterOperand dst_reg, RegisterOperand lhs_reg, + RegisterOperand rhs_reg, Operand vec_idx, string asm, + string apple_kind, string dst_kind, string lhs_kind, + string rhs_kind, list pattern> + : I<(outs dst_reg:$Rd), (ins lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), + asm, + "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" # + "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28} = Scalar; + let Inst{27-24} = 0b1111; let Inst{23-22} = size; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 - let Inst{15-11} = opcode; - let Inst{10} = 0b1; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 + // Bit 21 must be set by the derived class. + let Inst{20-16} = Rm; + let Inst{15-12} = opc; + // Bit 11 must be set by the derived class. + let Inst{10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; } +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDIndexedTied size, bits<4> opc, + RegisterOperand dst_reg, RegisterOperand lhs_reg, + RegisterOperand rhs_reg, Operand vec_idx, string asm, + string apple_kind, string dst_kind, string lhs_kind, + string rhs_kind, list pattern> + : I<(outs dst_reg:$dst), + (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), asm, + "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" # + "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; -// Format AdvSIMD 2 vector registers miscellaneous -class NeonI_2VMisc size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; - let Inst{28-24} = 0b01110; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28} = Scalar; + let Inst{27-24} = 0b1111; let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; + // Bit 21 must be set by the derived class. + let Inst{20-16} = Rm; + let Inst{15-12} = opc; + // Bit 11 must be set by the derived class. + let Inst{10} = 0; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +multiclass SIMDFPIndexedSD opc, string asm, + SDPatternOperator OpNode> { + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, + V64, V64, + V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2f32 V64:$Rd), + (OpNode (v2f32 V64:$Rn), + (v2f32 (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4f32 V128:$Rd), + (OpNode (v4f32 V128:$Rn), + (v4f32 (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v2i64_indexed : BaseSIMDIndexed<1, U, 0, 0b11, opc, + V128, V128, + V128, VectorIndexD, + asm, ".2d", ".2d", ".2d", ".d", + [(set (v2f64 V128:$Rd), + (OpNode (v2f64 V128:$Rn), + (v2f64 (AArch64duplane64 (v2f64 V128:$Rm), VectorIndexD:$idx))))]> { + bits<1> idx; + let Inst{11} = idx{0}; + let Inst{21} = 0; + } + + def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, + FPR32Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", + [(set (f32 FPR32Op:$Rd), + (OpNode (f32 FPR32Op:$Rn), + (f32 (vector_extract (v4f32 V128:$Rm), + VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b11, opc, + FPR64Op, FPR64Op, V128, VectorIndexD, + asm, ".d", "", "", ".d", + [(set (f64 FPR64Op:$Rd), + (OpNode (f64 FPR64Op:$Rn), + (f64 (vector_extract (v2f64 V128:$Rm), + VectorIndexD:$idx))))]> { + bits<1> idx; + let Inst{11} = idx{0}; + let Inst{21} = 0; + } +} + +multiclass SIMDFPIndexedSDTiedPatterns { + // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar. + def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), + (AArch64duplane32 (v4f32 V128:$Rm), + VectorIndexS:$idx))), + (!cast(INST # v2i32_indexed) + V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), + (AArch64dup (f32 FPR32Op:$Rm)))), + (!cast(INST # "v2i32_indexed") V64:$Rd, V64:$Rn, + (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; + + + // 2 variants for the .4s version: DUPLANE from 128-bit and DUP scalar. + def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), + (AArch64duplane32 (v4f32 V128:$Rm), + VectorIndexS:$idx))), + (!cast(INST # "v4i32_indexed") + V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), + (AArch64dup (f32 FPR32Op:$Rm)))), + (!cast(INST # "v4i32_indexed") V128:$Rd, V128:$Rn, + (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; + + // 2 variants for the .2d version: DUPLANE from 128-bit and DUP scalar. + def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), + (AArch64duplane64 (v2f64 V128:$Rm), + VectorIndexD:$idx))), + (!cast(INST # "v2i64_indexed") + V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), + (AArch64dup (f64 FPR64Op:$Rm)))), + (!cast(INST # "v2i64_indexed") V128:$Rd, V128:$Rn, + (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; + + // 2 variants for 32-bit scalar version: extract from .2s or from .4s + def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), + (vector_extract (v4f32 V128:$Rm), VectorIndexS:$idx))), + (!cast(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn, + V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), + (vector_extract (v2f32 V64:$Rm), VectorIndexS:$idx))), + (!cast(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn, + (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; + + // 1 variant for 64-bit scalar version: extract from .1d or from .2d + def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), + (vector_extract (v2f64 V128:$Rm), VectorIndexD:$idx))), + (!cast(INST # "v1i64_indexed") FPR64:$Rd, FPR64:$Rn, + V128:$Rm, VectorIndexD:$idx)>; +} + +multiclass SIMDFPIndexedSDTied opc, string asm> { + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64, + V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", []> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", []> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v2i64_indexed : BaseSIMDIndexedTied<1, U, 0, 0b11, opc, + V128, V128, + V128, VectorIndexD, + asm, ".2d", ".2d", ".2d", ".d", []> { + bits<1> idx; + let Inst{11} = idx{0}; + let Inst{21} = 0; + } + + + def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, + FPR32Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", []> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b11, opc, + FPR64Op, FPR64Op, V128, VectorIndexD, + asm, ".d", "", "", ".d", []> { + bits<1> idx; + let Inst{11} = idx{0}; + let Inst{21} = 0; + } +} + +multiclass SIMDIndexedHS opc, string asm, + SDPatternOperator OpNode> { + def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4i16 V64:$Rd), + (OpNode (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8i16 V128:$Rd), + (OpNode (v8i16 V128:$Rn), + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, + V64, V64, + V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2i32 V64:$Rd), + (OpNode (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4i32 V128:$Rd), + (OpNode (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc, + FPR16Op, FPR16Op, V128_lo, VectorIndexH, + asm, ".h", "", "", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, + FPR32Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", + [(set (i32 FPR32Op:$Rd), + (OpNode FPR32Op:$Rn, + (i32 (vector_extract (v4i32 V128:$Rm), + VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} + +multiclass SIMDVectorIndexedHS opc, string asm, + SDPatternOperator OpNode> { + def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, + V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4i16 V64:$Rd), + (OpNode (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8i16 V128:$Rd), + (OpNode (v8i16 V128:$Rn), + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, + V64, V64, + V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2i32 V64:$Rd), + (OpNode (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4i32 V128:$Rd), + (OpNode (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} + +multiclass SIMDVectorIndexedHSTied opc, string asm, + SDPatternOperator OpNode> { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4i16 V64:$dst), + (OpNode (v4i16 V64:$Rd),(v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8i16 V128:$dst), + (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), + (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, + V64, V64, + V128, VectorIndexS, + asm, ".2s", ".2s", ".2s", ".s", + [(set (v2i32 V64:$dst), + (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm, ".4s", ".4s", ".4s", ".s", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), + (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} + +multiclass SIMDIndexedLongSD opc, string asm, + SDPatternOperator OpNode> { + def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, + V128, V64, + V128_lo, VectorIndexH, + asm, ".4s", ".4s", ".4h", ".h", + [(set (v4i32 V128:$Rd), + (OpNode (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm#"2", ".4s", ".4s", ".8h", ".h", + [(set (v4i32 V128:$Rd), + (OpNode (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))]> { + + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, + V128, V64, + V128, VectorIndexS, + asm, ".2d", ".2d", ".2s", ".s", + [(set (v2i64 V128:$Rd), + (OpNode (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm#"2", ".2d", ".2d", ".4s", ".s", + [(set (v2i64 V128:$Rd), + (OpNode (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc, + FPR32Op, FPR16Op, V128_lo, VectorIndexH, + asm, ".h", "", "", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, + FPR64Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", []> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} + +multiclass SIMDIndexedLongSQDMLXSDTied opc, string asm, + SDPatternOperator Accum> { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, + V128, V64, + V128_lo, VectorIndexH, + asm, ".4s", ".4s", ".4h", ".h", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqdmull + (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + // FIXME: it would be nice to use the scalar (v1i32) instruction here, but an + // intermediate EXTRACT_SUBREG would be untyped. + def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), + (i32 (vector_extract (v4i32 + (int_aarch64_neon_sqdmull (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx)))), + (i64 0))))), + (EXTRACT_SUBREG + (!cast(NAME # v4i16_indexed) + (SUBREG_TO_REG (i32 0), FPR32Op:$Rd, ssub), V64:$Rn, + V128_lo:$Rm, VectorIndexH:$idx), + ssub)>; + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm#"2", ".4s", ".4s", ".8h", ".h", + [(set (v4i32 V128:$dst), + (Accum (v4i32 V128:$Rd), + (v4i32 (int_aarch64_neon_sqdmull + (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 + (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, + V128, V64, + V128, VectorIndexS, + asm, ".2d", ".2d", ".2s", ".s", + [(set (v2i64 V128:$dst), + (Accum (v2i64 V128:$Rd), + (v2i64 (int_aarch64_neon_sqdmull + (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm#"2", ".2d", ".2d", ".4s", ".s", + [(set (v2i64 V128:$dst), + (Accum (v2i64 V128:$Rd), + (v2i64 (int_aarch64_neon_sqdmull + (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 + (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc, + FPR32Op, FPR16Op, V128_lo, VectorIndexH, + asm, ".h", "", "", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + + def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, + FPR64Op, FPR32Op, V128, VectorIndexS, + asm, ".s", "", "", ".s", + [(set (i64 FPR64Op:$dst), + (Accum (i64 FPR64Op:$Rd), + (i64 (int_aarch64_neon_sqdmulls_scalar + (i32 FPR32Op:$Rn), + (i32 (vector_extract (v4i32 V128:$Rm), + VectorIndexS:$idx))))))]> { + + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } +} + +multiclass SIMDVectorIndexedLongSD opc, string asm, + SDPatternOperator OpNode> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, + V128, V64, + V128_lo, VectorIndexH, + asm, ".4s", ".4s", ".4h", ".h", + [(set (v4i32 V128:$Rd), + (OpNode (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm#"2", ".4s", ".4s", ".8h", ".h", + [(set (v4i32 V128:$Rd), + (OpNode (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))]> { + + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, + V128, V64, + V128, VectorIndexS, + asm, ".2d", ".2d", ".2s", ".s", + [(set (v2i64 V128:$Rd), + (OpNode (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm#"2", ".2d", ".2d", ".4s", ".s", + [(set (v2i64 V128:$Rd), + (OpNode (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + } +} + +multiclass SIMDVectorIndexedLongSDTied opc, string asm, + SDPatternOperator OpNode> { + let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, + V128, V64, + V128_lo, VectorIndexH, + asm, ".4s", ".4s", ".4h", ".h", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn), + (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, + V128, V128, + V128_lo, VectorIndexH, + asm#"2", ".4s", ".4s", ".8h", ".h", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), + (extract_high_v8i16 V128:$Rn), + (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), + VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, + V128, V64, + V128, VectorIndexS, + asm, ".2d", ".2d", ".2s", ".s", + [(set (v2i64 V128:$dst), + (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn), + (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + + def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, + V128, V128, + V128, VectorIndexS, + asm#"2", ".2d", ".2d", ".4s", ".s", + [(set (v2i64 V128:$dst), + (OpNode (v2i64 V128:$Rd), + (extract_high_v4i32 V128:$Rn), + (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm), + VectorIndexS:$idx))))]> { + bits<2> idx; + let Inst{11} = idx{1}; + let Inst{21} = idx{0}; + } + } +} + +//---------------------------------------------------------------------------- +// AdvSIMD scalar shift by immediate +//---------------------------------------------------------------------------- + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDScalarShift opc, bits<7> fixed_imm, + RegisterClass regtype1, RegisterClass regtype2, + Operand immtype, string asm, list pattern> + : I<(outs regtype1:$Rd), (ins regtype2:$Rn, immtype:$imm), + asm, "\t$Rd, $Rn, $imm", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<7> imm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-23} = 0b111110; + let Inst{22-16} = fixed_imm; + let Inst{15-11} = opc; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDScalarShiftTied opc, bits<7> fixed_imm, + RegisterClass regtype1, RegisterClass regtype2, + Operand immtype, string asm, list pattern> + : I<(outs regtype1:$dst), (ins regtype1:$Rd, regtype2:$Rn, immtype:$imm), + asm, "\t$Rd, $Rn, $imm", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + bits<7> imm; + let Inst{31-30} = 0b01; + let Inst{29} = U; + let Inst{28-23} = 0b111110; + let Inst{22-16} = fixed_imm; + let Inst{15-11} = opc; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} + + +multiclass SIMDScalarRShiftSD opc, string asm> { + def s : BaseSIMDScalarShift { + let Inst{20-16} = imm{4-0}; + } + + def d : BaseSIMDScalarShift { + let Inst{21-16} = imm{5-0}; + } +} + +multiclass SIMDScalarRShiftD opc, string asm, + SDPatternOperator OpNode> { + def d : BaseSIMDScalarShift { + let Inst{21-16} = imm{5-0}; + } + + def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftR64:$imm))), + (!cast(NAME # "d") FPR64:$Rn, vecshiftR64:$imm)>; +} + +multiclass SIMDScalarRShiftDTied opc, string asm, + SDPatternOperator OpNode = null_frag> { + def d : BaseSIMDScalarShiftTied { + let Inst{21-16} = imm{5-0}; + } + + def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), + (i32 vecshiftR64:$imm))), + (!cast(NAME # "d") FPR64:$Rd, FPR64:$Rn, + vecshiftR64:$imm)>; +} + +multiclass SIMDScalarLShiftD opc, string asm, + SDPatternOperator OpNode> { + def d : BaseSIMDScalarShift { + let Inst{21-16} = imm{5-0}; + } +} + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +multiclass SIMDScalarLShiftDTied opc, string asm> { + def d : BaseSIMDScalarShiftTied { + let Inst{21-16} = imm{5-0}; + } +} + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +multiclass SIMDScalarRShiftBHS opc, string asm, + SDPatternOperator OpNode = null_frag> { + def b : BaseSIMDScalarShift { + let Inst{18-16} = imm{2-0}; + } + + def h : BaseSIMDScalarShift { + let Inst{19-16} = imm{3-0}; + } + + def s : BaseSIMDScalarShift { + let Inst{20-16} = imm{4-0}; + } +} + +multiclass SIMDScalarLShiftBHSD opc, string asm, + SDPatternOperator OpNode> { + def b : BaseSIMDScalarShift { + let Inst{18-16} = imm{2-0}; + } + + def h : BaseSIMDScalarShift { + let Inst{19-16} = imm{3-0}; + } + + def s : BaseSIMDScalarShift { + let Inst{20-16} = imm{4-0}; + } + + def d : BaseSIMDScalarShift { + let Inst{21-16} = imm{5-0}; + } + + def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm))), + (!cast(NAME # "d") FPR64:$Rn, vecshiftL64:$imm)>; +} - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 +multiclass SIMDScalarRShiftBHSD opc, string asm> { + def b : BaseSIMDScalarShift { + let Inst{18-16} = imm{2-0}; + } + + def h : BaseSIMDScalarShift { + let Inst{19-16} = imm{3-0}; + } + + def s : BaseSIMDScalarShift { + let Inst{20-16} = imm{4-0}; + } + + def d : BaseSIMDScalarShift { + let Inst{21-16} = imm{5-0}; + } } -// Format AdvSIMD 2 vector 1 immediate shift -class NeonI_2VShiftImm opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - bits<7> Imm; - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; +//---------------------------------------------------------------------------- +// AdvSIMD vector x indexed element +//---------------------------------------------------------------------------- + +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDVectorShift opc, bits<7> fixed_imm, + RegisterOperand dst_reg, RegisterOperand src_reg, + Operand immtype, + string asm, string dst_kind, string src_kind, + list pattern> + : I<(outs dst_reg:$Rd), (ins src_reg:$Rn, immtype:$imm), + asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" # + "|" # dst_kind # "\t$Rd, $Rn, $imm}", "", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; let Inst{28-23} = 0b011110; - let Inst{22-16} = Imm; - let Inst{15-11} = opcode; - let Inst{10} = 0b1; - - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD duplicate and insert -class NeonI_copy imm4, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - bits<5> Imm5; - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = op; - let Inst{28-21} = 0b01110000; - let Inst{20-16} = Imm5; - let Inst{15} = 0b0; - let Inst{14-11} = imm4; - let Inst{10} = 0b1; - - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} -// Format AdvSIMD insert from element to vector -class NeonI_insert patterns, InstrItinClass itin> - : A64InstRdn { - bits<5> Imm5; - bits<4> Imm4; - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = op; - let Inst{28-21} = 0b01110000; - let Inst{20-16} = Imm5; - let Inst{15} = 0b0; - let Inst{14-11} = Imm4; - let Inst{10} = 0b1; - - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD scalar pairwise -class NeonI_ScalarPair size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - let Inst{31} = 0b0; - let Inst{30} = 0b1; - let Inst{29} = u; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21-17} = 0b11000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; + let Inst{22-16} = fixed_imm; + let Inst{15-11} = opc; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; +} - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 +let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in +class BaseSIMDVectorShiftTied opc, bits<7> fixed_imm, + RegisterOperand vectype1, RegisterOperand vectype2, + Operand immtype, + string asm, string dst_kind, string src_kind, + list pattern> + : I<(outs vectype1:$dst), (ins vectype1:$Rd, vectype2:$Rn, immtype:$imm), + asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" # + "|" # dst_kind # "\t$Rd, $Rn, $imm}", "$Rd = $dst", pattern>, + Sched<[WriteV]> { + bits<5> Rd; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; + let Inst{29} = U; + let Inst{28-23} = 0b011110; + let Inst{22-16} = fixed_imm; + let Inst{15-11} = opc; + let Inst{10} = 1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; } -// Format AdvSIMD 2 vector across lanes -class NeonI_2VAcross size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn -{ - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29} = u; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b11000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; +multiclass SIMDVectorRShiftSD opc, string asm, + Intrinsic OpNode> { + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftR32, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 imm:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftR32, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 imm:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftR64, + asm, ".2d", ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 imm:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } +} - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 +multiclass SIMDVectorRShiftSDToFP opc, string asm, + Intrinsic OpNode> { + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftR32, + asm, ".2s", ".2s", + [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 imm:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftR32, + asm, ".4s", ".4s", + [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 imm:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftR64, + asm, ".2d", ".2d", + [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 imm:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } } -// Format AdvSIMD scalar two registers miscellaneous -class NeonI_Scalar2SameMisc size, bits<5> opcode, dag outs, dag ins, - string asmstr, list patterns, InstrItinClass itin> - : A64InstRdn { - let Inst{31} = 0b0; - let Inst{30} = 0b1; - let Inst{29} = u; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD vector load/store multiple N-element structure -class NeonI_LdStMult opcode, bits<2> size, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn -{ - let Inst{31} = 0b0; - let Inst{30} = q; +multiclass SIMDVectorRShiftNarrowBHS opc, string asm, + SDPatternOperator OpNode> { + def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, + V64, V128, vecshiftR16Narrow, + asm, ".8b", ".8h", + [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftR16Narrow, + asm#"2", ".16b", ".8h", []> { + bits<3> imm; + let Inst{18-16} = imm; + let hasSideEffects = 0; + } + + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V128, vecshiftR32Narrow, + asm, ".4h", ".4s", + [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR32Narrow, + asm#"2", ".8h", ".4s", []> { + bits<4> imm; + let Inst{19-16} = imm; + let hasSideEffects = 0; + } + + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V64, V128, vecshiftR64Narrow, + asm, ".2s", ".2d", + [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftR64Narrow, + asm#"2", ".4s", ".2d", []> { + bits<5> imm; + let Inst{20-16} = imm; + let hasSideEffects = 0; + } + + // TableGen doesn't like patters w/ INSERT_SUBREG on the instructions + // themselves, so put them here instead. + + // Patterns involving what's effectively an insert high and a normal + // intrinsic, represented by CONCAT_VECTORS. + def : Pat<(concat_vectors (v8i8 V64:$Rd),(OpNode (v8i16 V128:$Rn), + vecshiftR16Narrow:$imm)), + (!cast(NAME # "v16i8_shift") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR16Narrow:$imm)>; + def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn), + vecshiftR32Narrow:$imm)), + (!cast(NAME # "v8i16_shift") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR32Narrow:$imm)>; + def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn), + vecshiftR64Narrow:$imm)), + (!cast(NAME # "v4i32_shift") + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR64Narrow:$imm)>; +} + +multiclass SIMDVectorLShiftBHSD opc, string asm, + SDPatternOperator OpNode> { + def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, + V64, V64, vecshiftL8, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), + (i32 vecshiftL8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftL8, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), + (i32 vecshiftL8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftL16, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), + (i32 vecshiftL16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftL16, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), + (i32 vecshiftL16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftL32, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), + (i32 vecshiftL32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftL32, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), + (i32 vecshiftL32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftL64, + asm, ".2d", ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), + (i32 vecshiftL64:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } +} + +multiclass SIMDVectorRShiftBHSD opc, string asm, + SDPatternOperator OpNode> { + def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, + V64, V64, vecshiftR8, + asm, ".8b", ".8b", + [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), + (i32 vecshiftR8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftR8, + asm, ".16b", ".16b", + [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), + (i32 vecshiftR8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftR16, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), + (i32 vecshiftR16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR16, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), + (i32 vecshiftR16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftR32, + asm, ".2s", ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), + (i32 vecshiftR32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftR32, + asm, ".4s", ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), + (i32 vecshiftR32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftR64, + asm, ".2d", ".2d", + [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), + (i32 vecshiftR64:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } +} + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDVectorRShiftBHSDTied opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?}, + V64, V64, vecshiftR8, asm, ".8b", ".8b", + [(set (v8i8 V64:$dst), + (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), + (i32 vecshiftR8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftR8, asm, ".16b", ".16b", + [(set (v16i8 V128:$dst), + (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), + (i32 vecshiftR8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftR16, asm, ".4h", ".4h", + [(set (v4i16 V64:$dst), + (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), + (i32 vecshiftR16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR16, asm, ".8h", ".8h", + [(set (v8i16 V128:$dst), + (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), + (i32 vecshiftR16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftR32, asm, ".2s", ".2s", + [(set (v2i32 V64:$dst), + (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), + (i32 vecshiftR32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftR32, asm, ".4s", ".4s", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), + (i32 vecshiftR32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftR64, + asm, ".2d", ".2d", [(set (v2i64 V128:$dst), + (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn), + (i32 vecshiftR64:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } +} + +multiclass SIMDVectorLShiftBHSDTied opc, string asm, + SDPatternOperator OpNode = null_frag> { + def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?}, + V64, V64, vecshiftL8, + asm, ".8b", ".8b", + [(set (v8i8 V64:$dst), + (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), + (i32 vecshiftL8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftL8, + asm, ".16b", ".16b", + [(set (v16i8 V128:$dst), + (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), + (i32 vecshiftL8:$imm)))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftL16, + asm, ".4h", ".4h", + [(set (v4i16 V64:$dst), + (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), + (i32 vecshiftL16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftL16, + asm, ".8h", ".8h", + [(set (v8i16 V128:$dst), + (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), + (i32 vecshiftL16:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?}, + V64, V64, vecshiftL32, + asm, ".2s", ".2s", + [(set (v2i32 V64:$dst), + (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), + (i32 vecshiftL32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftL32, + asm, ".4s", ".4s", + [(set (v4i32 V128:$dst), + (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), + (i32 vecshiftL32:$imm)))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?}, + V128, V128, vecshiftL64, + asm, ".2d", ".2d", + [(set (v2i64 V128:$dst), + (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn), + (i32 vecshiftL64:$imm)))]> { + bits<6> imm; + let Inst{21-16} = imm; + } +} + +multiclass SIMDVectorLShiftLongBHSD opc, string asm, + SDPatternOperator OpNode> { + def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, + V128, V64, vecshiftL8, asm, ".8h", ".8b", + [(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), vecshiftL8:$imm))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, + V128, V128, vecshiftL8, + asm#"2", ".8h", ".16b", + [(set (v8i16 V128:$Rd), + (OpNode (extract_high_v16i8 V128:$Rn), vecshiftL8:$imm))]> { + bits<3> imm; + let Inst{18-16} = imm; + } + + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V128, V64, vecshiftL16, asm, ".4s", ".4h", + [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), vecshiftL16:$imm))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftL16, + asm#"2", ".4s", ".8h", + [(set (v4i32 V128:$Rd), + (OpNode (extract_high_v8i16 V128:$Rn), vecshiftL16:$imm))]> { + + bits<4> imm; + let Inst{19-16} = imm; + } + + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, + V128, V64, vecshiftL32, asm, ".2d", ".2s", + [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), vecshiftL32:$imm))]> { + bits<5> imm; + let Inst{20-16} = imm; + } + + def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, + V128, V128, vecshiftL32, + asm#"2", ".2d", ".4s", + [(set (v2i64 V128:$Rd), + (OpNode (extract_high_v4i32 V128:$Rn), vecshiftL32:$imm))]> { + bits<5> imm; + let Inst{20-16} = imm; + } +} + + +//--- +// Vector load/store +//--- +// SIMD ldX/stX no-index memory references don't allow the optional +// ", #0" constant and handle post-indexing explicitly, so we use +// a more specialized parse method for them. Otherwise, it's the same as +// the general GPR64sp handling. + +class BaseSIMDLdSt opcode, bits<2> size, + string asm, dag oops, dag iops, list pattern> + : I { + bits<5> Vt; + bits<5> Rn; + let Inst{31} = 0; + let Inst{30} = Q; let Inst{29-23} = 0b0011000; - let Inst{22} = l; + let Inst{22} = L; let Inst{21-16} = 0b000000; let Inst{15-12} = opcode; let Inst{11-10} = size; - - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD vector load/store multiple N-element structure (post-index) -class NeonI_LdStMult_Post opcode, bits<2> size, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtnm -{ - let Inst{31} = 0b0; - let Inst{30} = q; + let Inst{9-5} = Rn; + let Inst{4-0} = Vt; +} + +class BaseSIMDLdStPost opcode, bits<2> size, + string asm, dag oops, dag iops> + : I { + bits<5> Vt; + bits<5> Rn; + bits<5> Xm; + let Inst{31} = 0; + let Inst{30} = Q; let Inst{29-23} = 0b0011001; - let Inst{22} = l; - let Inst{21} = 0b0; - // Inherit Rm in 20-16 + let Inst{22} = L; + let Inst{21} = 0; + let Inst{20-16} = Xm; let Inst{15-12} = opcode; let Inst{11-10} = size; - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD vector load Single N-element structure to all lanes -class NeonI_LdOne_Dup opcode, bits<2> size, dag outs, - dag ins, string asmstr, list patterns, - InstrItinClass itin> - : A64InstRtn -{ - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-23} = 0b0011010; - let Inst{22} = 0b1; - let Inst{21} = r; - let Inst{20-16} = 0b00000; + let Inst{9-5} = Rn; + let Inst{4-0} = Vt; +} + +// The immediate form of AdvSIMD post-indexed addressing is encoded with +// register post-index addressing from the zero register. +multiclass SIMDLdStAliases { + // E.g. "ld1 { v0.8b, v1.8b }, [x1], #16" + // "ld1\t$Vt, [$Rn], #16" + // may get mapped to + // (LD1Twov8b_POST VecListTwo8b:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias(NAME # Count # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # layout):$Vt, + XZR), 1>; + + // E.g. "ld1.8b { v0, v1 }, [x1], #16" + // "ld1.8b\t$Vt, [$Rn], #16" + // may get mapped to + // (LD1Twov8b_POST VecListTwo64:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias(NAME # Count # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # Size):$Vt, + XZR), 0>; + + // E.g. "ld1.8b { v0, v1 }, [x1]" + // "ld1\t$Vt, [$Rn]" + // may get mapped to + // (LD1Twov8b VecListTwo64:$Vt, GPR64sp:$Rn) + def : InstAlias(NAME # Count # "v" # layout) + !cast("VecList" # Count # Size):$Vt, + GPR64sp:$Rn), 0>; + + // E.g. "ld1.8b { v0, v1 }, [x1], x2" + // "ld1\t$Vt, [$Rn], $Xm" + // may get mapped to + // (LD1Twov8b_POST VecListTwo64:$Vt, GPR64sp:$Rn, GPR64pi8:$Xm) + def : InstAlias(NAME # Count # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # Size):$Vt, + !cast("GPR64pi" # Offset):$Xm), 0>; +} + +multiclass BaseSIMDLdN opcode> { + let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { + def v16b: BaseSIMDLdSt<1, 1, opcode, 0b00, asm, + (outs !cast(veclist # "16b"):$Vt), + (ins GPR64sp:$Rn), []>; + def v8h : BaseSIMDLdSt<1, 1, opcode, 0b01, asm, + (outs !cast(veclist # "8h"):$Vt), + (ins GPR64sp:$Rn), []>; + def v4s : BaseSIMDLdSt<1, 1, opcode, 0b10, asm, + (outs !cast(veclist # "4s"):$Vt), + (ins GPR64sp:$Rn), []>; + def v2d : BaseSIMDLdSt<1, 1, opcode, 0b11, asm, + (outs !cast(veclist # "2d"):$Vt), + (ins GPR64sp:$Rn), []>; + def v8b : BaseSIMDLdSt<0, 1, opcode, 0b00, asm, + (outs !cast(veclist # "8b"):$Vt), + (ins GPR64sp:$Rn), []>; + def v4h : BaseSIMDLdSt<0, 1, opcode, 0b01, asm, + (outs !cast(veclist # "4h"):$Vt), + (ins GPR64sp:$Rn), []>; + def v2s : BaseSIMDLdSt<0, 1, opcode, 0b10, asm, + (outs !cast(veclist # "2s"):$Vt), + (ins GPR64sp:$Rn), []>; + + + def v16b_POST: BaseSIMDLdStPost<1, 1, opcode, 0b00, asm, + (outs GPR64sp:$wback, + !cast(veclist # "16b"):$Vt), + (ins GPR64sp:$Rn, + !cast("GPR64pi" # Offset128):$Xm)>; + def v8h_POST : BaseSIMDLdStPost<1, 1, opcode, 0b01, asm, + (outs GPR64sp:$wback, + !cast(veclist # "8h"):$Vt), + (ins GPR64sp:$Rn, + !cast("GPR64pi" # Offset128):$Xm)>; + def v4s_POST : BaseSIMDLdStPost<1, 1, opcode, 0b10, asm, + (outs GPR64sp:$wback, + !cast(veclist # "4s"):$Vt), + (ins GPR64sp:$Rn, + !cast("GPR64pi" # Offset128):$Xm)>; + def v2d_POST : BaseSIMDLdStPost<1, 1, opcode, 0b11, asm, + (outs GPR64sp:$wback, + !cast(veclist # "2d"):$Vt), + (ins GPR64sp:$Rn, + !cast("GPR64pi" # Offset128):$Xm)>; + def v8b_POST : BaseSIMDLdStPost<0, 1, opcode, 0b00, asm, + (outs GPR64sp:$wback, + !cast(veclist # "8b"):$Vt), + (ins GPR64sp:$Rn, + !cast("GPR64pi" # Offset64):$Xm)>; + def v4h_POST : BaseSIMDLdStPost<0, 1, opcode, 0b01, asm, + (outs GPR64sp:$wback, + !cast(veclist # "4h"):$Vt), + (ins GPR64sp:$Rn, + !cast("GPR64pi" # Offset64):$Xm)>; + def v2s_POST : BaseSIMDLdStPost<0, 1, opcode, 0b10, asm, + (outs GPR64sp:$wback, + !cast(veclist # "2s"):$Vt), + (ins GPR64sp:$Rn, + !cast("GPR64pi" # Offset64):$Xm)>; + } + + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; +} + +// Only ld1/st1 has a v1d version. +multiclass BaseSIMDStN opcode> { + let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in { + def v16b : BaseSIMDLdSt<1, 0, opcode, 0b00, asm, (outs), + (ins !cast(veclist # "16b"):$Vt, + GPR64sp:$Rn), []>; + def v8h : BaseSIMDLdSt<1, 0, opcode, 0b01, asm, (outs), + (ins !cast(veclist # "8h"):$Vt, + GPR64sp:$Rn), []>; + def v4s : BaseSIMDLdSt<1, 0, opcode, 0b10, asm, (outs), + (ins !cast(veclist # "4s"):$Vt, + GPR64sp:$Rn), []>; + def v2d : BaseSIMDLdSt<1, 0, opcode, 0b11, asm, (outs), + (ins !cast(veclist # "2d"):$Vt, + GPR64sp:$Rn), []>; + def v8b : BaseSIMDLdSt<0, 0, opcode, 0b00, asm, (outs), + (ins !cast(veclist # "8b"):$Vt, + GPR64sp:$Rn), []>; + def v4h : BaseSIMDLdSt<0, 0, opcode, 0b01, asm, (outs), + (ins !cast(veclist # "4h"):$Vt, + GPR64sp:$Rn), []>; + def v2s : BaseSIMDLdSt<0, 0, opcode, 0b10, asm, (outs), + (ins !cast(veclist # "2s"):$Vt, + GPR64sp:$Rn), []>; + + def v16b_POST : BaseSIMDLdStPost<1, 0, opcode, 0b00, asm, + (outs GPR64sp:$wback), + (ins !cast(veclist # "16b"):$Vt, + GPR64sp:$Rn, + !cast("GPR64pi" # Offset128):$Xm)>; + def v8h_POST : BaseSIMDLdStPost<1, 0, opcode, 0b01, asm, + (outs GPR64sp:$wback), + (ins !cast(veclist # "8h"):$Vt, + GPR64sp:$Rn, + !cast("GPR64pi" # Offset128):$Xm)>; + def v4s_POST : BaseSIMDLdStPost<1, 0, opcode, 0b10, asm, + (outs GPR64sp:$wback), + (ins !cast(veclist # "4s"):$Vt, + GPR64sp:$Rn, + !cast("GPR64pi" # Offset128):$Xm)>; + def v2d_POST : BaseSIMDLdStPost<1, 0, opcode, 0b11, asm, + (outs GPR64sp:$wback), + (ins !cast(veclist # "2d"):$Vt, + GPR64sp:$Rn, + !cast("GPR64pi" # Offset128):$Xm)>; + def v8b_POST : BaseSIMDLdStPost<0, 0, opcode, 0b00, asm, + (outs GPR64sp:$wback), + (ins !cast(veclist # "8b"):$Vt, + GPR64sp:$Rn, + !cast("GPR64pi" # Offset64):$Xm)>; + def v4h_POST : BaseSIMDLdStPost<0, 0, opcode, 0b01, asm, + (outs GPR64sp:$wback), + (ins !cast(veclist # "4h"):$Vt, + GPR64sp:$Rn, + !cast("GPR64pi" # Offset64):$Xm)>; + def v2s_POST : BaseSIMDLdStPost<0, 0, opcode, 0b10, asm, + (outs GPR64sp:$wback), + (ins !cast(veclist # "2s"):$Vt, + GPR64sp:$Rn, + !cast("GPR64pi" # Offset64):$Xm)>; + } + + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; + defm : SIMDLdStAliases; +} + +multiclass BaseSIMDLd1 opcode> + : BaseSIMDLdN { + + // LD1 instructions have extra "1d" variants. + let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { + def v1d : BaseSIMDLdSt<0, 1, opcode, 0b11, asm, + (outs !cast(veclist # "1d"):$Vt), + (ins GPR64sp:$Rn), []>; + + def v1d_POST : BaseSIMDLdStPost<0, 1, opcode, 0b11, asm, + (outs GPR64sp:$wback, + !cast(veclist # "1d"):$Vt), + (ins GPR64sp:$Rn, + !cast("GPR64pi" # Offset64):$Xm)>; + } + + defm : SIMDLdStAliases; +} + +multiclass BaseSIMDSt1 opcode> + : BaseSIMDStN { + + // ST1 instructions have extra "1d" variants. + let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { + def v1d : BaseSIMDLdSt<0, 0, opcode, 0b11, asm, (outs), + (ins !cast(veclist # "1d"):$Vt, + GPR64sp:$Rn), []>; + + def v1d_POST : BaseSIMDLdStPost<0, 0, opcode, 0b11, asm, + (outs GPR64sp:$wback), + (ins !cast(veclist # "1d"):$Vt, + GPR64sp:$Rn, + !cast("GPR64pi" # Offset64):$Xm)>; + } + + defm : SIMDLdStAliases; +} + +multiclass SIMDLd1Multiple { + defm One : BaseSIMDLd1<"One", asm, "VecListOne", 16, 8, 0b0111>; + defm Two : BaseSIMDLd1<"Two", asm, "VecListTwo", 32, 16, 0b1010>; + defm Three : BaseSIMDLd1<"Three", asm, "VecListThree", 48, 24, 0b0110>; + defm Four : BaseSIMDLd1<"Four", asm, "VecListFour", 64, 32, 0b0010>; +} + +multiclass SIMDSt1Multiple { + defm One : BaseSIMDSt1<"One", asm, "VecListOne", 16, 8, 0b0111>; + defm Two : BaseSIMDSt1<"Two", asm, "VecListTwo", 32, 16, 0b1010>; + defm Three : BaseSIMDSt1<"Three", asm, "VecListThree", 48, 24, 0b0110>; + defm Four : BaseSIMDSt1<"Four", asm, "VecListFour", 64, 32, 0b0010>; +} + +multiclass SIMDLd2Multiple { + defm Two : BaseSIMDLdN<"Two", asm, "VecListTwo", 32, 16, 0b1000>; +} + +multiclass SIMDSt2Multiple { + defm Two : BaseSIMDStN<"Two", asm, "VecListTwo", 32, 16, 0b1000>; +} + +multiclass SIMDLd3Multiple { + defm Three : BaseSIMDLdN<"Three", asm, "VecListThree", 48, 24, 0b0100>; +} + +multiclass SIMDSt3Multiple { + defm Three : BaseSIMDStN<"Three", asm, "VecListThree", 48, 24, 0b0100>; +} + +multiclass SIMDLd4Multiple { + defm Four : BaseSIMDLdN<"Four", asm, "VecListFour", 64, 32, 0b0000>; +} + +multiclass SIMDSt4Multiple { + defm Four : BaseSIMDStN<"Four", asm, "VecListFour", 64, 32, 0b0000>; +} + +//--- +// AdvSIMD Load/store single-element +//--- + +class BaseSIMDLdStSingle opcode, + string asm, string operands, string cst, + dag oops, dag iops, list pattern> + : I { + bits<5> Vt; + bits<5> Rn; + let Inst{31} = 0; + let Inst{29-24} = 0b001101; + let Inst{22} = L; + let Inst{21} = R; + let Inst{15-13} = opcode; + let Inst{9-5} = Rn; + let Inst{4-0} = Vt; +} + +class BaseSIMDLdStSingleTied opcode, + string asm, string operands, string cst, + dag oops, dag iops, list pattern> + : I { + bits<5> Vt; + bits<5> Rn; + let Inst{31} = 0; + let Inst{29-24} = 0b001101; + let Inst{22} = L; + let Inst{21} = R; let Inst{15-13} = opcode; - let Inst{12} = 0b0; + let Inst{9-5} = Rn; + let Inst{4-0} = Vt; +} + + +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDLdR opcode, bit S, bits<2> size, string asm, + Operand listtype> + : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, [$Rn]", "", + (outs listtype:$Vt), (ins GPR64sp:$Rn), + []> { + let Inst{30} = Q; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = S; let Inst{11-10} = size; +} +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +class BaseSIMDLdRPost opcode, bit S, bits<2> size, + string asm, Operand listtype, Operand GPR64pi> + : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, [$Rn], $Xm", + "$Rn = $wback", + (outs GPR64sp:$wback, listtype:$Vt), + (ins GPR64sp:$Rn, GPR64pi:$Xm), []> { + bits<5> Xm; + let Inst{30} = Q; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = S; + let Inst{11-10} = size; +} + +multiclass SIMDLdrAliases { + // E.g. "ld1r { v0.8b }, [x1], #1" + // "ld1r.8b\t$Vt, [$Rn], #1" + // may get mapped to + // (LD1Rv8b_POST VecListOne8b:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias(NAME # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # layout):$Vt, + XZR), 1>; + + // E.g. "ld1r.8b { v0 }, [x1], #1" + // "ld1r.8b\t$Vt, [$Rn], #1" + // may get mapped to + // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias(NAME # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # Size):$Vt, + XZR), 0>; + + // E.g. "ld1r.8b { v0 }, [x1]" + // "ld1r.8b\t$Vt, [$Rn]" + // may get mapped to + // (LD1Rv8b VecListOne64:$Vt, GPR64sp:$Rn) + def : InstAlias(NAME # "v" # layout) + !cast("VecList" # Count # Size):$Vt, + GPR64sp:$Rn), 0>; + + // E.g. "ld1r.8b { v0 }, [x1], x2" + // "ld1r.8b\t$Vt, [$Rn], $Xm" + // may get mapped to + // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, GPR64pi1:$Xm) + def : InstAlias(NAME # "v" # layout # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # Size):$Vt, + !cast("GPR64pi" # Offset):$Xm), 0>; +} + +multiclass SIMDLdR opcode, bit S, string asm, string Count, + int Offset1, int Offset2, int Offset4, int Offset8> { + def v8b : BaseSIMDLdR<0, R, opcode, S, 0b00, asm, + !cast("VecList" # Count # "8b")>; + def v16b: BaseSIMDLdR<1, R, opcode, S, 0b00, asm, + !cast("VecList" # Count #"16b")>; + def v4h : BaseSIMDLdR<0, R, opcode, S, 0b01, asm, + !cast("VecList" # Count #"4h")>; + def v8h : BaseSIMDLdR<1, R, opcode, S, 0b01, asm, + !cast("VecList" # Count #"8h")>; + def v2s : BaseSIMDLdR<0, R, opcode, S, 0b10, asm, + !cast("VecList" # Count #"2s")>; + def v4s : BaseSIMDLdR<1, R, opcode, S, 0b10, asm, + !cast("VecList" # Count #"4s")>; + def v1d : BaseSIMDLdR<0, R, opcode, S, 0b11, asm, + !cast("VecList" # Count #"1d")>; + def v2d : BaseSIMDLdR<1, R, opcode, S, 0b11, asm, + !cast("VecList" # Count #"2d")>; + + def v8b_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b00, asm, + !cast("VecList" # Count # "8b"), + !cast("GPR64pi" # Offset1)>; + def v16b_POST: BaseSIMDLdRPost<1, R, opcode, S, 0b00, asm, + !cast("VecList" # Count # "16b"), + !cast("GPR64pi" # Offset1)>; + def v4h_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b01, asm, + !cast("VecList" # Count # "4h"), + !cast("GPR64pi" # Offset2)>; + def v8h_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b01, asm, + !cast("VecList" # Count # "8h"), + !cast("GPR64pi" # Offset2)>; + def v2s_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b10, asm, + !cast("VecList" # Count # "2s"), + !cast("GPR64pi" # Offset4)>; + def v4s_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b10, asm, + !cast("VecList" # Count # "4s"), + !cast("GPR64pi" # Offset4)>; + def v1d_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b11, asm, + !cast("VecList" # Count # "1d"), + !cast("GPR64pi" # Offset8)>; + def v2d_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b11, asm, + !cast("VecList" # Count # "2d"), + !cast("GPR64pi" # Offset8)>; + + defm : SIMDLdrAliases; + defm : SIMDLdrAliases; + defm : SIMDLdrAliases; + defm : SIMDLdrAliases; + defm : SIMDLdrAliases; + defm : SIMDLdrAliases; + defm : SIMDLdrAliases; + defm : SIMDLdrAliases; +} - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD vector load/store Single N-element structure to/from one lane -class NeonI_LdStOne_Lane op2_1, bit op0, dag outs, - dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtn -{ - bits<4> lane; - let Inst{31} = 0b0; - let Inst{29-23} = 0b0011010; - let Inst{22} = l; - let Inst{21} = r; +class SIMDLdStSingleB opcode, string asm, + dag oops, dag iops, list pattern> + : BaseSIMDLdStSingle { + // idx encoded in Q:S:size fields. + bits<4> idx; + let Inst{30} = idx{3}; + let Inst{23} = 0; let Inst{20-16} = 0b00000; - let Inst{15-14} = op2_1; - let Inst{13} = op0; - - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD post-index vector load Single N-element structure to all lanes -class NeonI_LdOne_Dup_Post opcode, bits<2> size, dag outs, - dag ins, string asmstr, list patterns, - InstrItinClass itin> - : A64InstRtnm -{ - let Inst{31} = 0b0; - let Inst{30} = q; - let Inst{29-23} = 0b0011011; - let Inst{22} = 0b1; - let Inst{21} = r; - // Inherit Rm in 20-16 - let Inst{15-13} = opcode; - let Inst{12} = 0b0; + let Inst{12} = idx{2}; + let Inst{11-10} = idx{1-0}; +} +class SIMDLdStSingleBTied opcode, string asm, + dag oops, dag iops, list pattern> + : BaseSIMDLdStSingleTied { + // idx encoded in Q:S:size fields. + bits<4> idx; + let Inst{30} = idx{3}; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = idx{2}; + let Inst{11-10} = idx{1-0}; +} +class SIMDLdStSingleBPost opcode, string asm, + dag oops, dag iops> + : BaseSIMDLdStSingle { + // idx encoded in Q:S:size fields. + bits<4> idx; + bits<5> Xm; + let Inst{30} = idx{3}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{2}; + let Inst{11-10} = idx{1-0}; +} +class SIMDLdStSingleBTiedPost opcode, string asm, + dag oops, dag iops> + : BaseSIMDLdStSingleTied { + // idx encoded in Q:S:size fields. + bits<4> idx; + bits<5> Xm; + let Inst{30} = idx{3}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{2}; + let Inst{11-10} = idx{1-0}; +} + +class SIMDLdStSingleH opcode, bit size, string asm, + dag oops, dag iops, list pattern> + : BaseSIMDLdStSingle { + // idx encoded in Q:S:size<1> fields. + bits<3> idx; + let Inst{30} = idx{2}; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = idx{1}; + let Inst{11} = idx{0}; + let Inst{10} = size; +} +class SIMDLdStSingleHTied opcode, bit size, string asm, + dag oops, dag iops, list pattern> + : BaseSIMDLdStSingleTied { + // idx encoded in Q:S:size<1> fields. + bits<3> idx; + let Inst{30} = idx{2}; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = idx{1}; + let Inst{11} = idx{0}; + let Inst{10} = size; +} + +class SIMDLdStSingleHPost opcode, bit size, string asm, + dag oops, dag iops> + : BaseSIMDLdStSingle { + // idx encoded in Q:S:size<1> fields. + bits<3> idx; + bits<5> Xm; + let Inst{30} = idx{2}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{1}; + let Inst{11} = idx{0}; + let Inst{10} = size; +} +class SIMDLdStSingleHTiedPost opcode, bit size, string asm, + dag oops, dag iops> + : BaseSIMDLdStSingleTied { + // idx encoded in Q:S:size<1> fields. + bits<3> idx; + bits<5> Xm; + let Inst{30} = idx{2}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{1}; + let Inst{11} = idx{0}; + let Inst{10} = size; +} +class SIMDLdStSingleS opcode, bits<2> size, string asm, + dag oops, dag iops, list pattern> + : BaseSIMDLdStSingle { + // idx encoded in Q:S fields. + bits<2> idx; + let Inst{30} = idx{1}; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = idx{0}; + let Inst{11-10} = size; +} +class SIMDLdStSingleSTied opcode, bits<2> size, string asm, + dag oops, dag iops, list pattern> + : BaseSIMDLdStSingleTied { + // idx encoded in Q:S fields. + bits<2> idx; + let Inst{30} = idx{1}; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = idx{0}; + let Inst{11-10} = size; +} +class SIMDLdStSingleSPost opcode, bits<2> size, + string asm, dag oops, dag iops> + : BaseSIMDLdStSingle { + // idx encoded in Q:S fields. + bits<2> idx; + bits<5> Xm; + let Inst{30} = idx{1}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{0}; + let Inst{11-10} = size; +} +class SIMDLdStSingleSTiedPost opcode, bits<2> size, + string asm, dag oops, dag iops> + : BaseSIMDLdStSingleTied { + // idx encoded in Q:S fields. + bits<2> idx; + bits<5> Xm; + let Inst{30} = idx{1}; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = idx{0}; + let Inst{11-10} = size; +} +class SIMDLdStSingleD opcode, bits<2> size, string asm, + dag oops, dag iops, list pattern> + : BaseSIMDLdStSingle { + // idx encoded in Q field. + bits<1> idx; + let Inst{30} = idx; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = 0; let Inst{11-10} = size; +} +class SIMDLdStSingleDTied opcode, bits<2> size, string asm, + dag oops, dag iops, list pattern> + : BaseSIMDLdStSingleTied { + // idx encoded in Q field. + bits<1> idx; + let Inst{30} = idx; + let Inst{23} = 0; + let Inst{20-16} = 0b00000; + let Inst{12} = 0; + let Inst{11-10} = size; +} +class SIMDLdStSingleDPost opcode, bits<2> size, + string asm, dag oops, dag iops> + : BaseSIMDLdStSingle { + // idx encoded in Q field. + bits<1> idx; + bits<5> Xm; + let Inst{30} = idx; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = 0; + let Inst{11-10} = size; +} +class SIMDLdStSingleDTiedPost opcode, bits<2> size, + string asm, dag oops, dag iops> + : BaseSIMDLdStSingleTied { + // idx encoded in Q field. + bits<1> idx; + bits<5> Xm; + let Inst{30} = idx; + let Inst{23} = 1; + let Inst{20-16} = Xm; + let Inst{12} = 0; + let Inst{11-10} = size; +} - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD post-index vector load/store Single N-element structure -// to/from one lane -class NeonI_LdStOne_Lane_Post op2_1, bit op0, dag outs, - dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRtnm -{ - bits<4> lane; - let Inst{31} = 0b0; - let Inst{29-23} = 0b0011011; - let Inst{22} = l; - let Inst{21} = r; - // Inherit Rm in 20-16 - let Inst{15-14} = op2_1; - let Inst{13} = op0; - - // Inherit Rn in 9-5 - // Inherit Rt in 4-0 -} - -// Format AdvSIMD 3 scalar registers with different type - -class NeonI_Scalar3Diff size, bits<4> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31-30} = 0b01; - let Inst{29} = u; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21} = 0b1; - // Inherit Rm in 20-16 - let Inst{15-12} = opcode; - let Inst{11-10} = 0b00; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDLdSingleBTied opcode, string asm, + RegisterOperand listtype, + RegisterOperand GPR64pi> { + def i8 : SIMDLdStSingleBTied<1, R, opcode, asm, + (outs listtype:$dst), + (ins listtype:$Vt, VectorIndexB:$idx, + GPR64sp:$Rn), []>; + + def i8_POST : SIMDLdStSingleBTiedPost<1, R, opcode, asm, + (outs GPR64sp:$wback, listtype:$dst), + (ins listtype:$Vt, VectorIndexB:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDLdSingleHTied opcode, bit size, string asm, + RegisterOperand listtype, + RegisterOperand GPR64pi> { + def i16 : SIMDLdStSingleHTied<1, R, opcode, size, asm, + (outs listtype:$dst), + (ins listtype:$Vt, VectorIndexH:$idx, + GPR64sp:$Rn), []>; + + def i16_POST : SIMDLdStSingleHTiedPost<1, R, opcode, size, asm, + (outs GPR64sp:$wback, listtype:$dst), + (ins listtype:$Vt, VectorIndexH:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDLdSingleSTied opcode, bits<2> size,string asm, + RegisterOperand listtype, + RegisterOperand GPR64pi> { + def i32 : SIMDLdStSingleSTied<1, R, opcode, size, asm, + (outs listtype:$dst), + (ins listtype:$Vt, VectorIndexS:$idx, + GPR64sp:$Rn), []>; + + def i32_POST : SIMDLdStSingleSTiedPost<1, R, opcode, size, asm, + (outs GPR64sp:$wback, listtype:$dst), + (ins listtype:$Vt, VectorIndexS:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in +multiclass SIMDLdSingleDTied opcode, bits<2> size, string asm, + RegisterOperand listtype, RegisterOperand GPR64pi> { + def i64 : SIMDLdStSingleDTied<1, R, opcode, size, asm, + (outs listtype:$dst), + (ins listtype:$Vt, VectorIndexD:$idx, + GPR64sp:$Rn), []>; + + def i64_POST : SIMDLdStSingleDTiedPost<1, R, opcode, size, asm, + (outs GPR64sp:$wback, listtype:$dst), + (ins listtype:$Vt, VectorIndexD:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in +multiclass SIMDStSingleB opcode, string asm, + RegisterOperand listtype, RegisterOperand GPR64pi> { + def i8 : SIMDLdStSingleB<0, R, opcode, asm, + (outs), (ins listtype:$Vt, VectorIndexB:$idx, + GPR64sp:$Rn), []>; + + def i8_POST : SIMDLdStSingleBPost<0, R, opcode, asm, + (outs GPR64sp:$wback), + (ins listtype:$Vt, VectorIndexB:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in +multiclass SIMDStSingleH opcode, bit size, string asm, + RegisterOperand listtype, RegisterOperand GPR64pi> { + def i16 : SIMDLdStSingleH<0, R, opcode, size, asm, + (outs), (ins listtype:$Vt, VectorIndexH:$idx, + GPR64sp:$Rn), []>; + + def i16_POST : SIMDLdStSingleHPost<0, R, opcode, size, asm, + (outs GPR64sp:$wback), + (ins listtype:$Vt, VectorIndexH:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in +multiclass SIMDStSingleS opcode, bits<2> size,string asm, + RegisterOperand listtype, RegisterOperand GPR64pi> { + def i32 : SIMDLdStSingleS<0, R, opcode, size, asm, + (outs), (ins listtype:$Vt, VectorIndexS:$idx, + GPR64sp:$Rn), []>; + + def i32_POST : SIMDLdStSingleSPost<0, R, opcode, size, asm, + (outs GPR64sp:$wback), + (ins listtype:$Vt, VectorIndexS:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; +} +let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in +multiclass SIMDStSingleD opcode, bits<2> size, string asm, + RegisterOperand listtype, RegisterOperand GPR64pi> { + def i64 : SIMDLdStSingleD<0, R, opcode, size, asm, + (outs), (ins listtype:$Vt, VectorIndexD:$idx, + GPR64sp:$Rn), []>; + + def i64_POST : SIMDLdStSingleDPost<0, R, opcode, size, asm, + (outs GPR64sp:$wback), + (ins listtype:$Vt, VectorIndexD:$idx, + GPR64sp:$Rn, GPR64pi:$Xm)>; } -// Format AdvSIMD scalar shift by immediate +multiclass SIMDLdStSingleAliases { + // E.g. "ld1 { v0.8b }[0], [x1], #1" + // "ld1\t$Vt, [$Rn], #1" + // may get mapped to + // (LD1Rv8b_POST VecListOne8b:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias(NAME # Type # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # layout):$Vt, + idxtype:$idx, XZR), 1>; + + // E.g. "ld1.8b { v0 }[0], [x1], #1" + // "ld1.8b\t$Vt, [$Rn], #1" + // may get mapped to + // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, XZR) + def : InstAlias(NAME # Type # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # "128"):$Vt, + idxtype:$idx, XZR), 0>; + + // E.g. "ld1.8b { v0 }[0], [x1]" + // "ld1.8b\t$Vt, [$Rn]" + // may get mapped to + // (LD1Rv8b VecListOne64:$Vt, GPR64sp:$Rn) + def : InstAlias(NAME # Type) + !cast("VecList" # Count # "128"):$Vt, + idxtype:$idx, GPR64sp:$Rn), 0>; + + // E.g. "ld1.8b { v0 }[0], [x1], x2" + // "ld1.8b\t$Vt, [$Rn], $Xm" + // may get mapped to + // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, GPR64pi1:$Xm) + def : InstAlias(NAME # Type # "_POST") + GPR64sp:$Rn, + !cast("VecList" # Count # "128"):$Vt, + idxtype:$idx, + !cast("GPR64pi" # Offset):$Xm), 0>; +} -class NeonI_ScalarShiftImm opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - bits<4> Imm4; - bits<3> Imm3; - let Inst{31-30} = 0b01; - let Inst{29} = u; - let Inst{28-23} = 0b111110; - let Inst{22-19} = Imm4; - let Inst{18-16} = Imm3; - let Inst{15-11} = opcode; - let Inst{10} = 0b1; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 +multiclass SIMDLdSt1SingleAliases { + defm : SIMDLdStSingleAliases; + defm : SIMDLdStSingleAliases; + defm : SIMDLdStSingleAliases; + defm : SIMDLdStSingleAliases; } -// Format AdvSIMD crypto AES -class NeonI_Crypto_AES size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - let Inst{31-24} = 0b01001110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10100; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 +multiclass SIMDLdSt2SingleAliases { + defm : SIMDLdStSingleAliases; + defm : SIMDLdStSingleAliases; + defm : SIMDLdStSingleAliases; + defm : SIMDLdStSingleAliases; } -// Format AdvSIMD crypto SHA -class NeonI_Crypto_SHA size, bits<5> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdn { - let Inst{31-24} = 0b01011110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10100; - let Inst{16-12} = opcode; +multiclass SIMDLdSt3SingleAliases { + defm : SIMDLdStSingleAliases; + defm : SIMDLdStSingleAliases; + defm : SIMDLdStSingleAliases; + defm : SIMDLdStSingleAliases; +} + +multiclass SIMDLdSt4SingleAliases { + defm : SIMDLdStSingleAliases; + defm : SIMDLdStSingleAliases; + defm : SIMDLdStSingleAliases; + defm : SIMDLdStSingleAliases; +} +} // end of 'let Predicates = [HasNEON]' + +//---------------------------------------------------------------------------- +// Crypto extensions +//---------------------------------------------------------------------------- + +let Predicates = [HasCrypto] in { +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class AESBase opc, string asm, dag outs, dag ins, string cstr, + list pat> + : I, + Sched<[WriteV]>{ + bits<5> Rd; + bits<5> Rn; + let Inst{31-16} = 0b0100111000101000; + let Inst{15-12} = opc; let Inst{11-10} = 0b10; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; } -// Format AdvSIMD crypto 3V SHA -class NeonI_Crypto_3VSHA size, bits<3> opcode, - dag outs, dag ins, string asmstr, - list patterns, InstrItinClass itin> - : A64InstRdnm { - let Inst{31-24} = 0b01011110; - let Inst{23-22} = size; - let Inst{21} = 0b0; - // Inherit Rm in 20-16 - let Inst{15} = 0b0; - let Inst{14-12} = opcode; +class AESInst opc, string asm, Intrinsic OpNode> + : AESBase; + +class AESTiedInst opc, string asm, Intrinsic OpNode> + : AESBase; + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class SHA3OpTiedInst opc, string asm, string dst_lhs_kind, + dag oops, dag iops, list pat> + : I, + Sched<[WriteV]>{ + bits<5> Rd; + bits<5> Rn; + bits<5> Rm; + let Inst{31-21} = 0b01011110000; + let Inst{20-16} = Rm; + let Inst{15} = 0; + let Inst{14-12} = opc; let Inst{11-10} = 0b00; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} - -// Format AdvSIMD scalar x indexed element -class NeonI_ScalarXIndexedElem opcode, dag outs, dag ins, - string asmstr, list patterns, - InstrItinClass itin> - : A64InstRdnm -{ - let Inst{31} = 0b0; - let Inst{30} = 0b1; - let Inst{29} = u; - let Inst{28-24} = 0b11111; - let Inst{23} = szhi; - let Inst{22} = szlo; - // l in Inst{21} - // m in Instr{20} - // Inherit Rm in 19-16 - let Inst{15-12} = opcode; - // h in Inst{11} - let Inst{10} = 0b0; - // Inherit Rn in 9-5 - // Inherit Rd in 4-0 -} -// Format AdvSIMD scalar copy - insert from element to scalar -class NeonI_ScalarCopy patterns, InstrItinClass itin> - : NeonI_copy<0b1, 0b0, 0b0000, outs, ins, asmstr, patterns, itin> { - let Inst{28} = 0b1; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; } + +class SHATiedInstQSV opc, string asm, Intrinsic OpNode> + : SHA3OpTiedInst; + +class SHATiedInstVVV opc, string asm, Intrinsic OpNode> + : SHA3OpTiedInst; + +class SHATiedInstQQV opc, string asm, Intrinsic OpNode> + : SHA3OpTiedInst; + +let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in +class SHA2OpInst opc, string asm, string kind, + string cstr, dag oops, dag iops, + list pat> + : I, + Sched<[WriteV]>{ + bits<5> Rd; + bits<5> Rn; + let Inst{31-16} = 0b0101111000101000; + let Inst{15-12} = opc; + let Inst{11-10} = 0b10; + let Inst{9-5} = Rn; + let Inst{4-0} = Rd; } +class SHATiedInstVV opc, string asm, Intrinsic OpNode> + : SHA2OpInst; + +class SHAInstSS opc, string asm, Intrinsic OpNode> + : SHA2OpInst; +} // end of 'let Predicates = [HasCrypto]' + +// Allow the size specifier tokens to be upper case, not just lower. +def : TokenAlias<".8B", ".8b">; +def : TokenAlias<".4H", ".4h">; +def : TokenAlias<".2S", ".2s">; +def : TokenAlias<".1D", ".1d">; +def : TokenAlias<".16B", ".16b">; +def : TokenAlias<".8H", ".8h">; +def : TokenAlias<".4S", ".4s">; +def : TokenAlias<".2D", ".2d">; +def : TokenAlias<".1Q", ".1q">; +def : TokenAlias<".B", ".b">; +def : TokenAlias<".H", ".h">; +def : TokenAlias<".S", ".s">; +def : TokenAlias<".D", ".d">; +def : TokenAlias<".Q", ".q">; diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index afb2034..ff115c0 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -11,257 +11,83 @@ // //===----------------------------------------------------------------------===// -#include "AArch64.h" #include "AArch64InstrInfo.h" -#include "AArch64MachineFunctionInfo.h" -#include "AArch64TargetMachine.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "Utils/AArch64BaseInfo.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineDominators.h" +#include "AArch64Subtarget.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Function.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/MC/MCInst.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" -#include + +using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "AArch64GenInstrInfo.inc" -using namespace llvm; - AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) - : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), - Subtarget(STI) {} + : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), + RI(this, &STI), Subtarget(STI) {} -void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { - unsigned Opc = 0; - unsigned ZeroReg = 0; - if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) { - // E.g. ADD xDst, xsp, #0 (, lsl #0) - BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg) - .addReg(SrcReg) - .addImm(0); - return; - } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { - // E.g. ADD wDST, wsp, #0 (, lsl #0) - BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg) - .addReg(SrcReg) - .addImm(0); - return; - } else if (DestReg == AArch64::NZCV) { - assert(AArch64::GPR64RegClass.contains(SrcReg)); - // E.g. MSR NZCV, xDST - BuildMI(MBB, I, DL, get(AArch64::MSRix)) - .addImm(A64SysReg::NZCV) - .addReg(SrcReg); - } else if (SrcReg == AArch64::NZCV) { - assert(AArch64::GPR64RegClass.contains(DestReg)); - // E.g. MRS xDST, NZCV - BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg) - .addImm(A64SysReg::NZCV); - } else if (AArch64::GPR64RegClass.contains(DestReg)) { - if(AArch64::GPR64RegClass.contains(SrcReg)){ - Opc = AArch64::ORRxxx_lsl; - ZeroReg = AArch64::XZR; - } else{ - assert(AArch64::FPR64RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVxd), DestReg) - .addReg(SrcReg); - return; - } - } else if (AArch64::GPR32RegClass.contains(DestReg)) { - if(AArch64::GPR32RegClass.contains(SrcReg)){ - Opc = AArch64::ORRwww_lsl; - ZeroReg = AArch64::WZR; - } else{ - assert(AArch64::FPR32RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVws), DestReg) - .addReg(SrcReg); - return; - } - } else if (AArch64::FPR32RegClass.contains(DestReg)) { - if(AArch64::FPR32RegClass.contains(SrcReg)){ - BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg) - .addReg(SrcReg); - return; - } - else { - assert(AArch64::GPR32RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVsw), DestReg) - .addReg(SrcReg); - return; - } - } else if (AArch64::FPR64RegClass.contains(DestReg)) { - if(AArch64::FPR64RegClass.contains(SrcReg)){ - BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg) - .addReg(SrcReg); - return; - } - else { - assert(AArch64::GPR64RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVdx), DestReg) - .addReg(SrcReg); - return; - } - } else if (AArch64::FPR128RegClass.contains(DestReg)) { - assert(AArch64::FPR128RegClass.contains(SrcReg)); +/// GetInstSize - Return the number of bytes of code the specified +/// instruction may be. This returns the maximum number of bytes. +unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { + const MCInstrDesc &Desc = MI->getDesc(); - // If NEON is enable, we use ORR to implement this copy. - // If NEON isn't available, emit STR and LDR to handle this. - if(getSubTarget().hasNEON()) { - BuildMI(MBB, I, DL, get(AArch64::ORRvvv_16B), DestReg) - .addReg(SrcReg) - .addReg(SrcReg); - return; - } else { - BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP) - .addReg(SrcReg) - .addReg(AArch64::XSP) - .addImm(0x1ff & -16); - - BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg) - .addReg(AArch64::XSP, RegState::Define) - .addReg(AArch64::XSP) - .addImm(16); - return; - } - } else if (AArch64::FPR8RegClass.contains(DestReg, SrcReg)) { - // The copy of two FPR8 registers is implemented by the copy of two FPR32 - const TargetRegisterInfo *TRI = &getRegisterInfo(); - unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_8, - &AArch64::FPR32RegClass); - unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_8, - &AArch64::FPR32RegClass); - BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst) - .addReg(Src); - return; - } else if (AArch64::FPR16RegClass.contains(DestReg, SrcReg)) { - // The copy of two FPR16 registers is implemented by the copy of two FPR32 - const TargetRegisterInfo *TRI = &getRegisterInfo(); - unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_16, - &AArch64::FPR32RegClass); - unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_16, - &AArch64::FPR32RegClass); - BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst) - .addReg(Src); - return; - } else { - CopyPhysRegTuple(MBB, I, DL, DestReg, SrcReg); - return; + switch (Desc.getOpcode()) { + default: + // Anything not explicitly designated otherwise is a nomal 4-byte insn. + return 4; + case TargetOpcode::DBG_VALUE: + case TargetOpcode::EH_LABEL: + case TargetOpcode::IMPLICIT_DEF: + case TargetOpcode::KILL: + return 0; } - // E.g. ORR xDst, xzr, xSrc, lsl #0 - BuildMI(MBB, I, DL, get(Opc), DestReg) - .addReg(ZeroReg) - .addReg(SrcReg) - .addImm(0); -} - -void AArch64InstrInfo::CopyPhysRegTuple(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - DebugLoc DL, unsigned DestReg, - unsigned SrcReg) const { - unsigned SubRegs; - bool IsQRegs; - if (AArch64::DPairRegClass.contains(DestReg, SrcReg)) { - SubRegs = 2; - IsQRegs = false; - } else if (AArch64::DTripleRegClass.contains(DestReg, SrcReg)) { - SubRegs = 3; - IsQRegs = false; - } else if (AArch64::DQuadRegClass.contains(DestReg, SrcReg)) { - SubRegs = 4; - IsQRegs = false; - } else if (AArch64::QPairRegClass.contains(DestReg, SrcReg)) { - SubRegs = 2; - IsQRegs = true; - } else if (AArch64::QTripleRegClass.contains(DestReg, SrcReg)) { - SubRegs = 3; - IsQRegs = true; - } else if (AArch64::QQuadRegClass.contains(DestReg, SrcReg)) { - SubRegs = 4; - IsQRegs = true; - } else - llvm_unreachable("Unknown register class"); - - unsigned BeginIdx = IsQRegs ? AArch64::qsub_0 : AArch64::dsub_0; - int Spacing = 1; - const TargetRegisterInfo *TRI = &getRegisterInfo(); - // Copy register tuples backward when the first Dest reg overlaps - // with SrcReg. - if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { - BeginIdx = BeginIdx + (SubRegs - 1); - Spacing = -1; - } - - unsigned Opc = IsQRegs ? AArch64::ORRvvv_16B : AArch64::ORRvvv_8B; - for (unsigned i = 0; i != SubRegs; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); - unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); - assert(Dst && Src && "Bad sub-register"); - BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) - .addReg(Src) - .addReg(Src); - } - return; -} - -/// Does the Opcode represent a conditional branch that we can remove and re-add -/// at the end of a basic block? -static bool isCondBranch(unsigned Opc) { - return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx || - Opc == AArch64::CBNZw || Opc == AArch64::CBNZx || - Opc == AArch64::TBZwii || Opc == AArch64::TBZxii || - Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii; -} - -/// Takes apart a given conditional branch MachineInstr (see isCondBranch), -/// setting TBB to the destination basic block and populating the Cond vector -/// with data necessary to recreate the conditional branch at a later -/// date. First element will be the opcode, and subsequent ones define the -/// conditions being branched on in an instruction-specific manner. -static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB, - SmallVectorImpl &Cond) { - switch(I->getOpcode()) { - case AArch64::Bcc: - case AArch64::CBZw: - case AArch64::CBZx: - case AArch64::CBNZw: - case AArch64::CBNZx: - // These instructions just have one predicate operand in position 0 (either - // a condition code or a register being compared). - Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); - Cond.push_back(I->getOperand(0)); - TBB = I->getOperand(1).getMBB(); - return; - case AArch64::TBZwii: - case AArch64::TBZxii: - case AArch64::TBNZwii: - case AArch64::TBNZxii: - // These have two predicate operands: a register and a bit position. - Cond.push_back(MachineOperand::CreateImm(I->getOpcode())); - Cond.push_back(I->getOperand(0)); - Cond.push_back(I->getOperand(1)); - TBB = I->getOperand(2).getMBB(); - return; + llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size"); +} + +static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, + SmallVectorImpl &Cond) { + // Block ends with fall-through condbranch. + switch (LastInst->getOpcode()) { default: - llvm_unreachable("Unknown conditional branch to classify"); + llvm_unreachable("Unknown branch instruction?"); + case AArch64::Bcc: + Target = LastInst->getOperand(1).getMBB(); + Cond.push_back(LastInst->getOperand(0)); + break; + case AArch64::CBZW: + case AArch64::CBZX: + case AArch64::CBNZW: + case AArch64::CBNZX: + Target = LastInst->getOperand(1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(-1)); + Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); + Cond.push_back(LastInst->getOperand(0)); + break; + case AArch64::TBZW: + case AArch64::TBZX: + case AArch64::TBNZW: + case AArch64::TBNZX: + Target = LastInst->getOperand(2).getMBB(); + Cond.push_back(MachineOperand::CreateImm(-1)); + Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); + Cond.push_back(LastInst->getOperand(0)); + Cond.push_back(LastInst->getOperand(1)); } } - -bool -AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const { +// Branch analysis. +bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) @@ -281,15 +107,16 @@ AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, // If there is only one terminator instruction, process it. unsigned LastOpc = LastInst->getOpcode(); if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (LastOpc == AArch64::Bimm) { + if (isUncondBranchOpcode(LastOpc)) { TBB = LastInst->getOperand(0).getMBB(); return false; } - if (isCondBranch(LastOpc)) { - classifyCondBranch(LastInst, TBB, Cond); + if (isCondBranchOpcode(LastOpc)) { + // Block ends with fall-through condbranch. + parseCondBranch(LastInst, TBB, Cond); return false; } - return true; // Can't handle indirect branch. + return true; // Can't handle indirect branch. } // Get the instruction before it if it is a terminator. @@ -298,8 +125,8 @@ AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, // If AllowModify is true and the block ends with two or more unconditional // branches, delete all but the first unconditional branch. - if (AllowModify && LastOpc == AArch64::Bimm) { - while (SecondLastOpc == AArch64::Bimm) { + if (AllowModify && isUncondBranchOpcode(LastOpc)) { + while (isUncondBranchOpcode(SecondLastOpc)) { LastInst->eraseFromParent(); LastInst = SecondLastInst; LastOpc = LastInst->getOpcode(); @@ -319,23 +146,15 @@ AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, return true; // If the block ends with a B and a Bcc, handle it. - if (LastOpc == AArch64::Bimm) { - if (SecondLastOpc == AArch64::Bcc) { - TBB = SecondLastInst->getOperand(1).getMBB(); - Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc)); - Cond.push_back(SecondLastInst->getOperand(0)); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } else if (isCondBranch(SecondLastOpc)) { - classifyCondBranch(SecondLastInst, TBB, Cond); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } + if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { + parseCondBranch(SecondLastInst, TBB, Cond); + FBB = LastInst->getOperand(0).getMBB(); + return false; } // If the block ends with two unconditional branches, handle it. The second // one is not executed, so remove it. - if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) { + if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { TBB = SecondLastInst->getOperand(0).getMBB(); I = LastInst; if (AllowModify) @@ -343,84 +162,72 @@ AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, return false; } + // ...likewise if it ends with an indirect branch followed by an unconditional + // branch. + if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return true; + } + // Otherwise, can't handle this. return true; } bool AArch64InstrInfo::ReverseBranchCondition( - SmallVectorImpl &Cond) const { - switch (Cond[0].getImm()) { - case AArch64::Bcc: { - A64CC::CondCodes CC = static_cast(Cond[1].getImm()); - CC = A64InvertCondCode(CC); - Cond[1].setImm(CC); - return false; - } - case AArch64::CBZw: - Cond[0].setImm(AArch64::CBNZw); - return false; - case AArch64::CBZx: - Cond[0].setImm(AArch64::CBNZx); - return false; - case AArch64::CBNZw: - Cond[0].setImm(AArch64::CBZw); - return false; - case AArch64::CBNZx: - Cond[0].setImm(AArch64::CBZx); - return false; - case AArch64::TBZwii: - Cond[0].setImm(AArch64::TBNZwii); - return false; - case AArch64::TBZxii: - Cond[0].setImm(AArch64::TBNZxii); - return false; - case AArch64::TBNZwii: - Cond[0].setImm(AArch64::TBZwii); - return false; - case AArch64::TBNZxii: - Cond[0].setImm(AArch64::TBZxii); - return false; - default: - llvm_unreachable("Unknown branch type"); - } -} - - -unsigned -AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const { - if (FBB == 0 && Cond.empty()) { - BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB); - return 1; - } else if (FBB == 0) { - MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); - for (int i = 1, e = Cond.size(); i != e; ++i) - MIB.addOperand(Cond[i]); - MIB.addMBB(TBB); - return 1; + SmallVectorImpl &Cond) const { + if (Cond[0].getImm() != -1) { + // Regular Bcc + AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm(); + Cond[0].setImm(AArch64CC::getInvertedCondCode(CC)); + } else { + // Folded compare-and-branch + switch (Cond[1].getImm()) { + default: + llvm_unreachable("Unknown conditional branch!"); + case AArch64::CBZW: + Cond[1].setImm(AArch64::CBNZW); + break; + case AArch64::CBNZW: + Cond[1].setImm(AArch64::CBZW); + break; + case AArch64::CBZX: + Cond[1].setImm(AArch64::CBNZX); + break; + case AArch64::CBNZX: + Cond[1].setImm(AArch64::CBZX); + break; + case AArch64::TBZW: + Cond[1].setImm(AArch64::TBNZW); + break; + case AArch64::TBNZW: + Cond[1].setImm(AArch64::TBZW); + break; + case AArch64::TBZX: + Cond[1].setImm(AArch64::TBNZX); + break; + case AArch64::TBNZX: + Cond[1].setImm(AArch64::TBZX); + break; + } } - MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); - for (int i = 1, e = Cond.size(); i != e; ++i) - MIB.addOperand(Cond[i]); - MIB.addMBB(TBB); - - BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB); - return 2; + return false; } unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; + if (I == MBB.begin()) + return 0; --I; while (I->isDebugValue()) { if (I == MBB.begin()) return 0; --I; } - if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode())) + if (!isUncondBranchOpcode(I->getOpcode()) && + !isCondBranchOpcode(I->getOpcode())) return 0; // Remove the branch. @@ -428,9 +235,10 @@ unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { I = MBB.end(); - if (I == MBB.begin()) return 1; + if (I == MBB.begin()) + return 1; --I; - if (!isCondBranch(I->getOpcode())) + if (!isCondBranchOpcode(I->getOpcode())) return 1; // Remove the branch. @@ -438,542 +246,1838 @@ unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 2; } -bool -AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const { - MachineInstr &MI = *MBBI; - MachineBasicBlock &MBB = *MI.getParent(); +void AArch64InstrInfo::instantiateCondBranch( + MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB, + const SmallVectorImpl &Cond) const { + if (Cond[0].getImm() != -1) { + // Regular Bcc + BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); + } else { + // Folded compare-and-branch + const MachineInstrBuilder MIB = + BuildMI(&MBB, DL, get(Cond[1].getImm())).addReg(Cond[2].getReg()); + if (Cond.size() > 3) + MIB.addImm(Cond[3].getImm()); + MIB.addMBB(TBB); + } +} - unsigned Opcode = MI.getOpcode(); - switch (Opcode) { - case AArch64::TLSDESC_BLRx: { - MachineInstr *NewMI = - BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL)) - .addOperand(MI.getOperand(1)); - MI.setDesc(get(AArch64::BLRx)); - - llvm::finalizeBundle(MBB, NewMI, *++MBBI); - return true; - } +unsigned AArch64InstrInfo::InsertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, DebugLoc DL) const { + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + + if (!FBB) { + if (Cond.empty()) // Unconditional branch? + BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB); + else + instantiateCondBranch(MBB, DL, TBB, Cond); + return 1; + } + + // Two-way conditional branch. + instantiateCondBranch(MBB, DL, TBB, Cond); + BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB); + return 2; +} + +// Find the original register that VReg is copied from. +static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { + while (TargetRegisterInfo::isVirtualRegister(VReg)) { + const MachineInstr *DefMI = MRI.getVRegDef(VReg); + if (!DefMI->isFullCopy()) + return VReg; + VReg = DefMI->getOperand(1).getReg(); + } + return VReg; +} + +// Determine if VReg is defined by an instruction that can be folded into a +// csel instruction. If so, return the folded opcode, and the replacement +// register. +static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, + unsigned *NewVReg = nullptr) { + VReg = removeCopies(MRI, VReg); + if (!TargetRegisterInfo::isVirtualRegister(VReg)) + return 0; + + bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); + const MachineInstr *DefMI = MRI.getVRegDef(VReg); + unsigned Opc = 0; + unsigned SrcOpNum = 0; + switch (DefMI->getOpcode()) { + case AArch64::ADDSXri: + case AArch64::ADDSWri: + // if NZCV is used, do not fold. + if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) + return 0; + // fall-through to ADDXri and ADDWri. + case AArch64::ADDXri: + case AArch64::ADDWri: + // add x, 1 -> csinc. + if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || + DefMI->getOperand(3).getImm() != 0) + return 0; + SrcOpNum = 1; + Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; + break; + + case AArch64::ORNXrr: + case AArch64::ORNWrr: { + // not x -> csinv, represented as orn dst, xzr, src. + unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); + if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) + return 0; + SrcOpNum = 2; + Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; + break; + } + + case AArch64::SUBSXrr: + case AArch64::SUBSWrr: + // if NZCV is used, do not fold. + if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1) + return 0; + // fall-through to SUBXrr and SUBWrr. + case AArch64::SUBXrr: + case AArch64::SUBWrr: { + // neg x -> csneg, represented as sub dst, xzr, src. + unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); + if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) + return 0; + SrcOpNum = 2; + Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; + break; + } default: + return 0; + } + assert(Opc && SrcOpNum && "Missing parameters"); + + if (NewVReg) + *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); + return Opc; +} + +bool AArch64InstrInfo::canInsertSelect( + const MachineBasicBlock &MBB, const SmallVectorImpl &Cond, + unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, + int &FalseCycles) const { + // Check register classes. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + const TargetRegisterClass *RC = + RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); + if (!RC) return false; + + // Expanding cbz/tbz requires an extra cycle of latency on the condition. + unsigned ExtraCondLat = Cond.size() != 1; + + // GPRs are handled by csel. + // FIXME: Fold in x+1, -x, and ~x when applicable. + if (AArch64::GPR64allRegClass.hasSubClassEq(RC) || + AArch64::GPR32allRegClass.hasSubClassEq(RC)) { + // Single-cycle csel, csinc, csinv, and csneg. + CondCycles = 1 + ExtraCondLat; + TrueCycles = FalseCycles = 1; + if (canFoldIntoCSel(MRI, TrueReg)) + TrueCycles = 0; + else if (canFoldIntoCSel(MRI, FalseReg)) + FalseCycles = 0; + return true; } + // Scalar floating point is handled by fcsel. + // FIXME: Form fabs, fmin, and fmax when applicable. + if (AArch64::FPR64RegClass.hasSubClassEq(RC) || + AArch64::FPR32RegClass.hasSubClassEq(RC)) { + CondCycles = 5 + ExtraCondLat; + TrueCycles = FalseCycles = 2; + return true; + } + + // Can't do vectors. return false; } -void -AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, - int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - DebugLoc DL = MBB.findDebugLoc(MBBI); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - unsigned Align = MFI.getObjectAlignment(FrameIdx); - - MachineMemOperand *MMO - = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), - MachineMemOperand::MOStore, - MFI.getObjectSize(FrameIdx), - Align); - - unsigned StoreOp = 0; - if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) { - switch(RC->getSize()) { - case 4: StoreOp = AArch64::LS32_STR; break; - case 8: StoreOp = AArch64::LS64_STR; break; +void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DstReg, + const SmallVectorImpl &Cond, + unsigned TrueReg, unsigned FalseReg) const { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + + // Parse the condition code, see parseCondBranch() above. + AArch64CC::CondCode CC; + switch (Cond.size()) { + default: + llvm_unreachable("Unknown condition opcode in Cond"); + case 1: // b.cc + CC = AArch64CC::CondCode(Cond[0].getImm()); + break; + case 3: { // cbz/cbnz + // We must insert a compare against 0. + bool Is64Bit; + switch (Cond[1].getImm()) { default: - llvm_unreachable("Unknown size for regclass"); - } - } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) { - StoreOp = AArch64::LSFP8_STR; - } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) { - StoreOp = AArch64::LSFP16_STR; - } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) || - RC->hasType(MVT::f128)) { - switch (RC->getSize()) { - case 4: StoreOp = AArch64::LSFP32_STR; break; - case 8: StoreOp = AArch64::LSFP64_STR; break; - case 16: StoreOp = AArch64::LSFP128_STR; break; + llvm_unreachable("Unknown branch opcode in Cond"); + case AArch64::CBZW: + Is64Bit = 0; + CC = AArch64CC::EQ; + break; + case AArch64::CBZX: + Is64Bit = 1; + CC = AArch64CC::EQ; + break; + case AArch64::CBNZW: + Is64Bit = 0; + CC = AArch64CC::NE; + break; + case AArch64::CBNZX: + Is64Bit = 1; + CC = AArch64CC::NE; + break; + } + unsigned SrcReg = Cond[2].getReg(); + if (Is64Bit) { + // cmp reg, #0 is actually subs xzr, reg, #0. + MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass); + BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR) + .addReg(SrcReg) + .addImm(0) + .addImm(0); + } else { + MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass); + BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR) + .addReg(SrcReg) + .addImm(0) + .addImm(0); + } + break; + } + case 4: { // tbz/tbnz + // We must insert a tst instruction. + switch (Cond[1].getImm()) { default: - llvm_unreachable("Unknown size for regclass"); - } - } else { // For a super register class has more than one sub registers - if (AArch64::DPairRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x2_8B; - else if (AArch64::DTripleRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x3_8B; - else if (AArch64::DQuadRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x4_8B; - else if (AArch64::QPairRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x2_16B; - else if (AArch64::QTripleRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x3_16B; - else if (AArch64::QQuadRegClass.hasSubClassEq(RC)) - StoreOp = AArch64::ST1x4_16B; + llvm_unreachable("Unknown branch opcode in Cond"); + case AArch64::TBZW: + case AArch64::TBZX: + CC = AArch64CC::EQ; + break; + case AArch64::TBNZW: + case AArch64::TBNZX: + CC = AArch64CC::NE; + break; + } + // cmp reg, #foo is actually ands xzr, reg, #1<hasType(MVT::i64) || RC->hasType(MVT::i32)) { - switch(RC->getSize()) { - case 4: LoadOp = AArch64::LS32_LDR; break; - case 8: LoadOp = AArch64::LS64_LDR; break; - default: - llvm_unreachable("Unknown size for regclass"); - } - } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) { - LoadOp = AArch64::LSFP8_LDR; - } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) { - LoadOp = AArch64::LSFP16_LDR; - } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) || - RC->hasType(MVT::f128)) { - switch (RC->getSize()) { - case 4: LoadOp = AArch64::LSFP32_LDR; break; - case 8: LoadOp = AArch64::LSFP64_LDR; break; - case 16: LoadOp = AArch64::LSFP128_LDR; break; - default: - llvm_unreachable("Unknown size for regclass"); - } - } else { // For a super register class has more than one sub registers - if (AArch64::DPairRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x2_8B; - else if (AArch64::DTripleRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x3_8B; - else if (AArch64::DQuadRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x4_8B; - else if (AArch64::QPairRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x2_16B; - else if (AArch64::QTripleRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x3_16B; - else if (AArch64::QQuadRegClass.hasSubClassEq(RC)) - LoadOp = AArch64::LD1x4_16B; - else - llvm_unreachable("Unknown reg class"); +bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const { + switch (MI.getOpcode()) { + default: + return false; + case AArch64::SBFMXri: // aka sxtw + case AArch64::UBFMXri: // aka uxtw + // Check for the 32 -> 64 bit extension case, these instructions can do + // much more. + if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) + return false; + // This is a signed or unsigned 32 -> 64 bit extension. + SrcReg = MI.getOperand(1).getReg(); + DstReg = MI.getOperand(0).getReg(); + SubIdx = AArch64::sub_32; + return true; + } +} - MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg); - // Vector load has different operands from other load instructions. - NewMI.addFrameIndex(FrameIdx) - .addMemOperand(MMO); - return; +/// analyzeCompare - For a comparison instruction, return the source registers +/// in SrcReg and SrcReg2, and the value it compares against in CmpValue. +/// Return true if the comparison instruction can be analyzed. +bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + unsigned &SrcReg2, int &CmpMask, + int &CmpValue) const { + switch (MI->getOpcode()) { + default: + break; + case AArch64::SUBSWrr: + case AArch64::SUBSWrs: + case AArch64::SUBSWrx: + case AArch64::SUBSXrr: + case AArch64::SUBSXrs: + case AArch64::SUBSXrx: + case AArch64::ADDSWrr: + case AArch64::ADDSWrs: + case AArch64::ADDSWrx: + case AArch64::ADDSXrr: + case AArch64::ADDSXrs: + case AArch64::ADDSXrx: + // Replace SUBSWrr with SUBWrr if NZCV is not used. + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = MI->getOperand(2).getReg(); + CmpMask = ~0; + CmpValue = 0; + return true; + case AArch64::SUBSWri: + case AArch64::ADDSWri: + case AArch64::SUBSXri: + case AArch64::ADDSXri: + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = 0; + CmpMask = ~0; + CmpValue = MI->getOperand(2).getImm(); + return true; + case AArch64::ANDSWri: + case AArch64::ANDSXri: + // ANDS does not use the same encoding scheme as the others xxxS + // instructions. + SrcReg = MI->getOperand(1).getReg(); + SrcReg2 = 0; + CmpMask = ~0; + CmpValue = AArch64_AM::decodeLogicalImmediate( + MI->getOperand(2).getImm(), + MI->getOpcode() == AArch64::ANDSWri ? 32 : 64); + return true; + } + + return false; +} + +static bool UpdateOperandRegClass(MachineInstr *Instr) { + MachineBasicBlock *MBB = Instr->getParent(); + assert(MBB && "Can't get MachineBasicBlock here"); + MachineFunction *MF = MBB->getParent(); + assert(MF && "Can't get MachineFunction here"); + const TargetMachine *TM = &MF->getTarget(); + const TargetInstrInfo *TII = TM->getInstrInfo(); + const TargetRegisterInfo *TRI = TM->getRegisterInfo(); + MachineRegisterInfo *MRI = &MF->getRegInfo(); + + for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx; + ++OpIdx) { + MachineOperand &MO = Instr->getOperand(OpIdx); + const TargetRegisterClass *OpRegCstraints = + Instr->getRegClassConstraint(OpIdx, TII, TRI); + + // If there's no constraint, there's nothing to do. + if (!OpRegCstraints) + continue; + // If the operand is a frame index, there's nothing to do here. + // A frame index operand will resolve correctly during PEI. + if (MO.isFI()) + continue; + + assert(MO.isReg() && + "Operand has register constraints without being a register!"); + + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (!OpRegCstraints->contains(Reg)) + return false; + } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && + !MRI->constrainRegClass(Reg, OpRegCstraints)) + return false; } - MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg); - NewMI.addFrameIndex(FrameIdx) - .addImm(0) - .addMemOperand(MMO); + return true; } -unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const { - unsigned Limit = (1 << 16) - 1; - for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - if (!I->getOperand(i).isFI()) continue; +/// optimizeCompareInstr - Convert the instruction supplying the argument to the +/// comparison into one that sets the zero bit in the flags register. +bool AArch64InstrInfo::optimizeCompareInstr( + MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, + int CmpValue, const MachineRegisterInfo *MRI) const { + + // Replace SUBSWrr with SUBWrr if NZCV is not used. + int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true); + if (Cmp_NZCV != -1) { + unsigned NewOpc; + switch (CmpInstr->getOpcode()) { + default: + return false; + case AArch64::ADDSWrr: NewOpc = AArch64::ADDWrr; break; + case AArch64::ADDSWri: NewOpc = AArch64::ADDWri; break; + case AArch64::ADDSWrs: NewOpc = AArch64::ADDWrs; break; + case AArch64::ADDSWrx: NewOpc = AArch64::ADDWrx; break; + case AArch64::ADDSXrr: NewOpc = AArch64::ADDXrr; break; + case AArch64::ADDSXri: NewOpc = AArch64::ADDXri; break; + case AArch64::ADDSXrs: NewOpc = AArch64::ADDXrs; break; + case AArch64::ADDSXrx: NewOpc = AArch64::ADDXrx; break; + case AArch64::SUBSWrr: NewOpc = AArch64::SUBWrr; break; + case AArch64::SUBSWri: NewOpc = AArch64::SUBWri; break; + case AArch64::SUBSWrs: NewOpc = AArch64::SUBWrs; break; + case AArch64::SUBSWrx: NewOpc = AArch64::SUBWrx; break; + case AArch64::SUBSXrr: NewOpc = AArch64::SUBXrr; break; + case AArch64::SUBSXri: NewOpc = AArch64::SUBXri; break; + case AArch64::SUBSXrs: NewOpc = AArch64::SUBXrs; break; + case AArch64::SUBSXrx: NewOpc = AArch64::SUBXrx; break; + } + + const MCInstrDesc &MCID = get(NewOpc); + CmpInstr->setDesc(MCID); + CmpInstr->RemoveOperand(Cmp_NZCV); + bool succeeded = UpdateOperandRegClass(CmpInstr); + (void)succeeded; + assert(succeeded && "Some operands reg class are incompatible!"); + return true; + } + + // Continue only if we have a "ri" where immediate is zero. + if (CmpValue != 0 || SrcReg2 != 0) + return false; + + // CmpInstr is a Compare instruction if destination register is not used. + if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg())) + return false; + + // Get the unique definition of SrcReg. + MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); + if (!MI) + return false; + + // We iterate backward, starting from the instruction before CmpInstr and + // stop when reaching the definition of the source register or done with the + // basic block, to check whether NZCV is used or modified in between. + MachineBasicBlock::iterator I = CmpInstr, E = MI, + B = CmpInstr->getParent()->begin(); + + // Early exit if CmpInstr is at the beginning of the BB. + if (I == B) + return false; + + // Check whether the definition of SrcReg is in the same basic block as + // Compare. If not, we can't optimize away the Compare. + if (MI->getParent() != CmpInstr->getParent()) + return false; + + // Check that NZCV isn't set between the comparison instruction and the one we + // want to change. + const TargetRegisterInfo *TRI = &getRegisterInfo(); + for (--I; I != E; --I) { + const MachineInstr &Instr = *I; - // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff - // is the largest offset guaranteed to fit in the immediate offset. - if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) { - Limit = std::min(Limit, 0xfffu); - break; - } + if (Instr.modifiesRegister(AArch64::NZCV, TRI) || + Instr.readsRegister(AArch64::NZCV, TRI)) + // This instruction modifies or uses NZCV after the one we want to + // change. We can't do this transformation. + return false; + if (I == B) + // The 'and' is below the comparison instruction. + return false; + } + + unsigned NewOpc = MI->getOpcode(); + switch (MI->getOpcode()) { + default: + return false; + case AArch64::ADDSWrr: + case AArch64::ADDSWri: + case AArch64::ADDSXrr: + case AArch64::ADDSXri: + case AArch64::SUBSWrr: + case AArch64::SUBSWri: + case AArch64::SUBSXrr: + case AArch64::SUBSXri: + break; + case AArch64::ADDWrr: NewOpc = AArch64::ADDSWrr; break; + case AArch64::ADDWri: NewOpc = AArch64::ADDSWri; break; + case AArch64::ADDXrr: NewOpc = AArch64::ADDSXrr; break; + case AArch64::ADDXri: NewOpc = AArch64::ADDSXri; break; + case AArch64::ADCWr: NewOpc = AArch64::ADCSWr; break; + case AArch64::ADCXr: NewOpc = AArch64::ADCSXr; break; + case AArch64::SUBWrr: NewOpc = AArch64::SUBSWrr; break; + case AArch64::SUBWri: NewOpc = AArch64::SUBSWri; break; + case AArch64::SUBXrr: NewOpc = AArch64::SUBSXrr; break; + case AArch64::SUBXri: NewOpc = AArch64::SUBSXri; break; + case AArch64::SBCWr: NewOpc = AArch64::SBCSWr; break; + case AArch64::SBCXr: NewOpc = AArch64::SBCSXr; break; + case AArch64::ANDWri: NewOpc = AArch64::ANDSWri; break; + case AArch64::ANDXri: NewOpc = AArch64::ANDSXri; break; + } - int AccessScale, MinOffset, MaxOffset; - getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset); - Limit = std::min(Limit, static_cast(MaxOffset)); + // Scan forward for the use of NZCV. + // When checking against MI: if it's a conditional code requires + // checking of V bit, then this is not safe to do. + // It is safe to remove CmpInstr if NZCV is redefined or killed. + // If we are done with the basic block, we need to check whether NZCV is + // live-out. + bool IsSafe = false; + for (MachineBasicBlock::iterator I = CmpInstr, + E = CmpInstr->getParent()->end(); + !IsSafe && ++I != E;) { + const MachineInstr &Instr = *I; + for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO; + ++IO) { + const MachineOperand &MO = Instr.getOperand(IO); + if (MO.isRegMask() && MO.clobbersPhysReg(AArch64::NZCV)) { + IsSafe = true; + break; + } + if (!MO.isReg() || MO.getReg() != AArch64::NZCV) + continue; + if (MO.isDef()) { + IsSafe = true; + break; + } - break; // At most one FI per instruction + // Decode the condition code. + unsigned Opc = Instr.getOpcode(); + AArch64CC::CondCode CC; + switch (Opc) { + default: + return false; + case AArch64::Bcc: + CC = (AArch64CC::CondCode)Instr.getOperand(IO - 2).getImm(); + break; + case AArch64::CSINVWr: + case AArch64::CSINVXr: + case AArch64::CSINCWr: + case AArch64::CSINCXr: + case AArch64::CSELWr: + case AArch64::CSELXr: + case AArch64::CSNEGWr: + case AArch64::CSNEGXr: + case AArch64::FCSELSrrr: + case AArch64::FCSELDrrr: + CC = (AArch64CC::CondCode)Instr.getOperand(IO - 1).getImm(); + break; + } + + // It is not safe to remove Compare instruction if Overflow(V) is used. + switch (CC) { + default: + // NZCV can be used multiple times, we should continue. + break; + case AArch64CC::VS: + case AArch64CC::VC: + case AArch64CC::GE: + case AArch64CC::LT: + case AArch64CC::GT: + case AArch64CC::LE: + return false; } } } - return Limit; + // If NZCV is not killed nor re-defined, we should check whether it is + // live-out. If it is live-out, do not optimize. + if (!IsSafe) { + MachineBasicBlock *ParentBlock = CmpInstr->getParent(); + for (auto *MBB : ParentBlock->successors()) + if (MBB->isLiveIn(AArch64::NZCV)) + return false; + } + + // Update the instruction to set NZCV. + MI->setDesc(get(NewOpc)); + CmpInstr->eraseFromParent(); + bool succeeded = UpdateOperandRegClass(MI); + (void)succeeded; + assert(succeeded && "Some operands reg class are incompatible!"); + MI->addRegisterDefined(AArch64::NZCV, TRI); + return true; } -void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI, - int &AccessScale, int &MinOffset, - int &MaxOffset) const { - switch (MI.getOpcode()) { + +/// Return true if this is this instruction has a non-zero immediate +bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const { + switch (MI->getOpcode()) { default: - llvm_unreachable("Unknown load/store kind"); - case TargetOpcode::DBG_VALUE: - AccessScale = 1; - MinOffset = INT_MIN; - MaxOffset = INT_MAX; - return; - case AArch64::LS8_LDR: case AArch64::LS8_STR: - case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR: - case AArch64::LDRSBw: - case AArch64::LDRSBx: - AccessScale = 1; - MinOffset = 0; - MaxOffset = 0xfff; - return; - case AArch64::LS16_LDR: case AArch64::LS16_STR: - case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR: - case AArch64::LDRSHw: - case AArch64::LDRSHx: - AccessScale = 2; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LS32_LDR: case AArch64::LS32_STR: - case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR: - case AArch64::LDRSWx: - case AArch64::LDPSWx: - AccessScale = 4; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LS64_LDR: case AArch64::LS64_STR: - case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR: - case AArch64::PRFM: - AccessScale = 8; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR: - AccessScale = 16; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR: - case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR: - AccessScale = 4; - MinOffset = -0x40 * AccessScale; - MaxOffset = 0x3f * AccessScale; - return; - case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR: - case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR: - AccessScale = 8; - MinOffset = -0x40 * AccessScale; - MaxOffset = 0x3f * AccessScale; - return; - case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR: - AccessScale = 16; - MinOffset = -0x40 * AccessScale; - MaxOffset = 0x3f * AccessScale; - return; - case AArch64::LD1x2_8B: case AArch64::ST1x2_8B: - AccessScale = 16; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LD1x3_8B: case AArch64::ST1x3_8B: - AccessScale = 24; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LD1x4_8B: case AArch64::ST1x4_8B: - case AArch64::LD1x2_16B: case AArch64::ST1x2_16B: - AccessScale = 32; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LD1x3_16B: case AArch64::ST1x3_16B: - AccessScale = 48; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; - case AArch64::LD1x4_16B: case AArch64::ST1x4_16B: - AccessScale = 64; - MinOffset = 0; - MaxOffset = 0xfff * AccessScale; - return; + break; + case AArch64::ADDSWrs: + case AArch64::ADDSXrs: + case AArch64::ADDWrs: + case AArch64::ADDXrs: + case AArch64::ANDSWrs: + case AArch64::ANDSXrs: + case AArch64::ANDWrs: + case AArch64::ANDXrs: + case AArch64::BICSWrs: + case AArch64::BICSXrs: + case AArch64::BICWrs: + case AArch64::BICXrs: + case AArch64::CRC32Brr: + case AArch64::CRC32CBrr: + case AArch64::CRC32CHrr: + case AArch64::CRC32CWrr: + case AArch64::CRC32CXrr: + case AArch64::CRC32Hrr: + case AArch64::CRC32Wrr: + case AArch64::CRC32Xrr: + case AArch64::EONWrs: + case AArch64::EONXrs: + case AArch64::EORWrs: + case AArch64::EORXrs: + case AArch64::ORNWrs: + case AArch64::ORNXrs: + case AArch64::ORRWrs: + case AArch64::ORRXrs: + case AArch64::SUBSWrs: + case AArch64::SUBSXrs: + case AArch64::SUBWrs: + case AArch64::SUBXrs: + if (MI->getOperand(3).isImm()) { + unsigned val = MI->getOperand(3).getImm(); + return (val != 0); + } + break; } + return false; } -unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { - const MCInstrDesc &MCID = MI.getDesc(); - const MachineBasicBlock &MBB = *MI.getParent(); - const MachineFunction &MF = *MBB.getParent(); - const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo(); +/// Return true if this is this instruction has a non-zero immediate +bool AArch64InstrInfo::hasExtendedReg(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: + break; + case AArch64::ADDSWrx: + case AArch64::ADDSXrx: + case AArch64::ADDSXrx64: + case AArch64::ADDWrx: + case AArch64::ADDXrx: + case AArch64::ADDXrx64: + case AArch64::SUBSWrx: + case AArch64::SUBSXrx: + case AArch64::SUBSXrx64: + case AArch64::SUBWrx: + case AArch64::SUBXrx: + case AArch64::SUBXrx64: + if (MI->getOperand(3).isImm()) { + unsigned val = MI->getOperand(3).getImm(); + return (val != 0); + } + break; + } - if (MCID.getSize()) - return MCID.getSize(); + return false; +} - if (MI.getOpcode() == AArch64::INLINEASM) - return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI); +// Return true if this instruction simply sets its single destination register +// to zero. This is equivalent to a register rename of the zero-register. +bool AArch64InstrInfo::isGPRZero(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: + break; + case AArch64::MOVZWi: + case AArch64::MOVZXi: // movz Rd, #0 (LSL #0) + if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) { + assert(MI->getDesc().getNumOperands() == 3 && + MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands"); + return true; + } + break; + case AArch64::ANDWri: // and Rd, Rzr, #imm + return MI->getOperand(1).getReg() == AArch64::WZR; + case AArch64::ANDXri: + return MI->getOperand(1).getReg() == AArch64::XZR; + case TargetOpcode::COPY: + return MI->getOperand(1).getReg() == AArch64::WZR; + } + return false; +} - switch (MI.getOpcode()) { - case TargetOpcode::BUNDLE: - return getInstBundleLength(MI); - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - case TargetOpcode::CFI_INSTRUCTION: - case TargetOpcode::EH_LABEL: - case TargetOpcode::GC_LABEL: - case TargetOpcode::DBG_VALUE: - case AArch64::TLSDESCCALL: - return 0; +// Return true if this instruction simply renames a general register without +// modifying bits. +bool AArch64InstrInfo::isGPRCopy(const MachineInstr *MI) const { + switch (MI->getOpcode()) { default: - llvm_unreachable("Unknown instruction class"); + break; + case TargetOpcode::COPY: { + // GPR32 copies will by lowered to ORRXrs + unsigned DstReg = MI->getOperand(0).getReg(); + return (AArch64::GPR32RegClass.contains(DstReg) || + AArch64::GPR64RegClass.contains(DstReg)); } + case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) + if (MI->getOperand(1).getReg() == AArch64::XZR) { + assert(MI->getDesc().getNumOperands() == 4 && + MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands"); + return true; + } + case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0) + if (MI->getOperand(2).getImm() == 0) { + assert(MI->getDesc().getNumOperands() == 4 && + MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands"); + return true; + } + } + return false; } -unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const { - unsigned Size = 0; - MachineBasicBlock::const_instr_iterator I = MI; - MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end(); - while (++I != E && I->isInsideBundle()) { - assert(!I->isBundle() && "No nested bundle!"); - Size += getInstSizeInBytes(*I); +// Return true if this instruction simply renames a general register without +// modifying bits. +bool AArch64InstrInfo::isFPRCopy(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: + break; + case TargetOpcode::COPY: { + // FPR64 copies will by lowered to ORR.16b + unsigned DstReg = MI->getOperand(0).getReg(); + return (AArch64::FPR64RegClass.contains(DstReg) || + AArch64::FPR128RegClass.contains(DstReg)); } - return Size; + case AArch64::ORRv16i8: + if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { + assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() && + "invalid ORRv16i8 operands"); + return true; + } + } + return false; } -bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, - const AArch64InstrInfo &TII) { - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); +unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: + break; + case AArch64::LDRWui: + case AArch64::LDRXui: + case AArch64::LDRBui: + case AArch64::LDRHui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } - MFI.getObjectOffset(FrameRegIdx); - llvm_unreachable("Unimplemented rewriteFrameIndex"); + return 0; } -void llvm::emitRegUpdate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - DebugLoc dl, const TargetInstrInfo &TII, - unsigned DstReg, unsigned SrcReg, unsigned ScratchReg, - int64_t NumBytes, MachineInstr::MIFlag MIFlags) { - if (NumBytes == 0 && DstReg == SrcReg) - return; - else if (abs64(NumBytes) & ~0xffffff) { - // Generically, we have to materialize the offset into a temporary register - // and subtract it. There are a couple of ways this could be done, for now - // we'll use a movz/movk or movn/movk sequence. - uint64_t Bits = static_cast(abs64(NumBytes)); - BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg) - .addImm(0xffff & Bits).addImm(0) - .setMIFlags(MIFlags); - - Bits >>= 16; - if (Bits & 0xffff) { - BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) - .addReg(ScratchReg) - .addImm(0xffff & Bits).addImm(1) - .setMIFlags(MIFlags); - } - - Bits >>= 16; - if (Bits & 0xffff) { - BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) - .addReg(ScratchReg) - .addImm(0xffff & Bits).addImm(2) - .setMIFlags(MIFlags); - } - - Bits >>= 16; - if (Bits & 0xffff) { - BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg) - .addReg(ScratchReg) - .addImm(0xffff & Bits).addImm(3) - .setMIFlags(MIFlags); - } - - // ADD DST, SRC, xTMP (, lsl #0) - unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx; - BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg) - .addReg(SrcReg, RegState::Kill) - .addReg(ScratchReg, RegState::Kill) - .addImm(0) - .setMIFlag(MIFlags); - return; +unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: + break; + case AArch64::STRWui: + case AArch64::STRXui: + case AArch64::STRBui: + case AArch64::STRHui: + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STRQui: + if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && + MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { + FrameIndex = MI->getOperand(1).getIndex(); + return MI->getOperand(0).getReg(); + } + break; } + return 0; +} - // Now we know that the adjustment can be done in at most two add/sub - // (immediate) instructions, which is always more efficient than a - // literal-pool load, or even a hypothetical movz/movk/add sequence +/// Return true if this is load/store scales or extends its register offset. +/// This refers to scaling a dynamic index as opposed to scaled immediates. +/// MI should be a memory op that allows scaled addressing. +bool AArch64InstrInfo::isScaledAddr(const MachineInstr *MI) const { + switch (MI->getOpcode()) { + default: + break; + case AArch64::LDRBBroW: + case AArch64::LDRBroW: + case AArch64::LDRDroW: + case AArch64::LDRHHroW: + case AArch64::LDRHroW: + case AArch64::LDRQroW: + case AArch64::LDRSBWroW: + case AArch64::LDRSBXroW: + case AArch64::LDRSHWroW: + case AArch64::LDRSHXroW: + case AArch64::LDRSWroW: + case AArch64::LDRSroW: + case AArch64::LDRWroW: + case AArch64::LDRXroW: + case AArch64::STRBBroW: + case AArch64::STRBroW: + case AArch64::STRDroW: + case AArch64::STRHHroW: + case AArch64::STRHroW: + case AArch64::STRQroW: + case AArch64::STRSroW: + case AArch64::STRWroW: + case AArch64::STRXroW: + case AArch64::LDRBBroX: + case AArch64::LDRBroX: + case AArch64::LDRDroX: + case AArch64::LDRHHroX: + case AArch64::LDRHroX: + case AArch64::LDRQroX: + case AArch64::LDRSBWroX: + case AArch64::LDRSBXroX: + case AArch64::LDRSHWroX: + case AArch64::LDRSHXroX: + case AArch64::LDRSWroX: + case AArch64::LDRSroX: + case AArch64::LDRWroX: + case AArch64::LDRXroX: + case AArch64::STRBBroX: + case AArch64::STRBroX: + case AArch64::STRDroX: + case AArch64::STRHHroX: + case AArch64::STRHroX: + case AArch64::STRQroX: + case AArch64::STRSroX: + case AArch64::STRWroX: + case AArch64::STRXroX: + + unsigned Val = MI->getOperand(3).getImm(); + AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val); + return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val); + } + return false; +} - // Decide whether we're doing addition or subtraction - unsigned LowOp, HighOp; - if (NumBytes >= 0) { - LowOp = AArch64::ADDxxi_lsl0_s; - HighOp = AArch64::ADDxxi_lsl12_s; - } else { - LowOp = AArch64::SUBxxi_lsl0_s; - HighOp = AArch64::SUBxxi_lsl12_s; - NumBytes = abs64(NumBytes); +/// Check all MachineMemOperands for a hint to suppress pairing. +bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const { + assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) && + "Too many target MO flags"); + for (auto *MM : MI->memoperands()) { + if (MM->getFlags() & + (MOSuppressPair << MachineMemOperand::MOTargetStartBit)) { + return true; + } } + return false; +} - // If we're here, at the very least a move needs to be produced, which just - // happens to be materializable by an ADD. - if ((NumBytes & 0xfff) || NumBytes == 0) { - BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg) - .addReg(SrcReg, RegState::Kill) - .addImm(NumBytes & 0xfff) - .setMIFlag(MIFlags); +/// Set a flag on the first MachineMemOperand to suppress pairing. +void AArch64InstrInfo::suppressLdStPair(MachineInstr *MI) const { + if (MI->memoperands_empty()) + return; - // Next update should use the register we've just defined. - SrcReg = DstReg; - } + assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) && + "Too many target MO flags"); + (*MI->memoperands_begin()) + ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit); +} + +bool +AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, + unsigned &Offset, + const TargetRegisterInfo *TRI) const { + switch (LdSt->getOpcode()) { + default: + return false; + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STRQui: + case AArch64::STRXui: + case AArch64::STRWui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + case AArch64::LDRXui: + case AArch64::LDRWui: + if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) + return false; + BaseReg = LdSt->getOperand(1).getReg(); + MachineFunction &MF = *LdSt->getParent()->getParent(); + unsigned Width = getRegClass(LdSt->getDesc(), 0, TRI, MF)->getSize(); + Offset = LdSt->getOperand(2).getImm() * Width; + return true; + }; +} - if (NumBytes & 0xfff000) { - BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg) - .addReg(SrcReg, RegState::Kill) - .addImm(NumBytes >> 12) - .setMIFlag(MIFlags); +/// Detect opportunities for ldp/stp formation. +/// +/// Only called for LdSt for which getLdStBaseRegImmOfs returns true. +bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, + MachineInstr *SecondLdSt, + unsigned NumLoads) const { + // Only cluster up to a single pair. + if (NumLoads > 1) + return false; + if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode()) + return false; + // getLdStBaseRegImmOfs guarantees that oper 2 isImm. + unsigned Ofs1 = FirstLdSt->getOperand(2).getImm(); + // Allow 6 bits of positive range. + if (Ofs1 > 64) + return false; + // The caller should already have ordered First/SecondLdSt by offset. + unsigned Ofs2 = SecondLdSt->getOperand(2).getImm(); + return Ofs1 + 1 == Ofs2; +} + +bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First, + MachineInstr *Second) const { + // Cyclone can fuse CMN, CMP followed by Bcc. + + // FIXME: B0 can also fuse: + // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ. + if (Second->getOpcode() != AArch64::Bcc) + return false; + switch (First->getOpcode()) { + default: + return false; + case AArch64::SUBSWri: + case AArch64::ADDSWri: + case AArch64::ANDSWri: + case AArch64::SUBSXri: + case AArch64::ADDSXri: + case AArch64::ANDSXri: + return true; } } -void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - DebugLoc dl, const TargetInstrInfo &TII, - unsigned ScratchReg, int64_t NumBytes, - MachineInstr::MIFlag MIFlags) { - emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16, - NumBytes, MIFlags); +MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, + int FrameIx, + uint64_t Offset, + const MDNode *MDPtr, + DebugLoc DL) const { + MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) + .addFrameIndex(FrameIx) + .addImm(0) + .addImm(Offset) + .addMetadata(MDPtr); + return &*MIB; } +static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, + unsigned Reg, unsigned SubIdx, + unsigned State, + const TargetRegisterInfo *TRI) { + if (!SubIdx) + return MIB.addReg(Reg, State); -namespace { - struct LDTLSCleanup : public MachineFunctionPass { - static char ID; - LDTLSCleanup() : MachineFunctionPass(ID) {} + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); + return MIB.addReg(Reg, State, SubIdx); +} - virtual bool runOnMachineFunction(MachineFunction &MF) { - AArch64MachineFunctionInfo* MFI - = MF.getInfo(); - if (MFI->getNumLocalDynamicTLSAccesses() < 2) { - // No point folding accesses if there isn't at least two. - return false; - } +static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, + unsigned NumRegs) { + // We really want the positive remainder mod 32 here, that happens to be + // easily obtainable with a mask. + return ((DestReg - SrcReg) & 0x1f) < NumRegs; +} - MachineDominatorTree *DT = &getAnalysis(); - return VisitNode(DT->getRootNode(), 0); - } - - // Visit the dominator subtree rooted at Node in pre-order. - // If TLSBaseAddrReg is non-null, then use that to replace any - // TLS_base_addr instructions. Otherwise, create the register - // when the first such instruction is seen, and then use it - // as we encounter more instructions. - bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { - MachineBasicBlock *BB = Node->getBlock(); - bool Changed = false; - - // Traverse the current block. - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; - ++I) { - switch (I->getOpcode()) { - case AArch64::TLSDESC_BLRx: - // Make sure it's a local dynamic access. - if (!I->getOperand(1).isSymbol() || - strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) - break; - - if (TLSBaseAddrReg) - I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg); - else - I = SetRegister(I, &TLSBaseAddrReg); - Changed = true; - break; - default: - break; - } - } +void AArch64InstrInfo::copyPhysRegTuple( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, + llvm::ArrayRef Indices) const { + assert(getSubTarget().hasNEON() && + "Unexpected register copy without NEON"); + const TargetRegisterInfo *TRI = &getRegisterInfo(); + uint16_t DestEncoding = TRI->getEncodingValue(DestReg); + uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); + unsigned NumRegs = Indices.size(); + + int SubReg = 0, End = NumRegs, Incr = 1; + if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { + SubReg = NumRegs - 1; + End = -1; + Incr = -1; + } - // Visit the children of this block in the dominator tree. - for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end(); - I != E; ++I) { - Changed |= VisitNode(*I, TLSBaseAddrReg); + for (; SubReg != End; SubReg += Incr) { + const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode)); + AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); + AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); + AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); + } +} + +void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (AArch64::GPR32spRegClass.contains(DestReg) && + (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + + if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) { + // If either operand is WSP, expand to ADD #0. + if (Subtarget.hasZeroCycleRegMove()) { + // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. + unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); + unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); + // This instruction is reading and writing X registers. This may upset + // the register scavenger and machine verifier, so we need to indicate + // that we are reading an undefined value from SrcRegX, but a proper + // value from SrcReg. + BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX) + .addReg(SrcRegX, RegState::Undef) + .addImm(0) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) + .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); + } else { + BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); } + } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) { + BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm( + AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + } else { + if (Subtarget.hasZeroCycleRegMove()) { + // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. + unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); + unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32, + &AArch64::GPR64spRegClass); + // This instruction is reading and writing X registers. This may upset + // the register scavenger and machine verifier, so we need to indicate + // that we are reading an undefined value from SrcRegX, but a proper + // value from SrcReg. + BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX) + .addReg(AArch64::XZR) + .addReg(SrcRegX, RegState::Undef) + .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); + } else { + // Otherwise, expand to ORR WZR. + BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg) + .addReg(AArch64::WZR) + .addReg(SrcReg, getKillRegState(KillSrc)); + } + } + return; + } - return Changed; + if (AArch64::GPR64spRegClass.contains(DestReg) && + (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { + if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { + // If either operand is SP, expand to ADD #0. + BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) { + BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm( + AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); + } else { + // Otherwise, expand to ORR XZR. + BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg) + .addReg(AArch64::XZR) + .addReg(SrcReg, getKillRegState(KillSrc)); } + return; + } - // Replace the TLS_base_addr instruction I with a copy from - // TLSBaseAddrReg, returning the new instruction. - MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I, - unsigned TLSBaseAddrReg) { - MachineFunction *MF = I->getParent()->getParent(); - const AArch64TargetMachine *TM = - static_cast(&MF->getTarget()); - const AArch64InstrInfo *TII = TM->getInstrInfo(); + // Copy a DDDD register quad by copying the individual sub-registers. + if (AArch64::DDDDRegClass.contains(DestReg) && + AArch64::DDDDRegClass.contains(SrcReg)) { + static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, + AArch64::dsub2, AArch64::dsub3 }; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, + Indices); + return; + } - // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the - // code sequence assumes the address will be. - MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), - TII->get(TargetOpcode::COPY), - AArch64::X0) - .addReg(TLSBaseAddrReg); + // Copy a DDD register triple by copying the individual sub-registers. + if (AArch64::DDDRegClass.contains(DestReg) && + AArch64::DDDRegClass.contains(SrcReg)) { + static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1, + AArch64::dsub2 }; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, + Indices); + return; + } - // Erase the TLS_base_addr instruction. - I->eraseFromParent(); + // Copy a DD register pair by copying the individual sub-registers. + if (AArch64::DDRegClass.contains(DestReg) && + AArch64::DDRegClass.contains(SrcReg)) { + static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 }; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8, + Indices); + return; + } + + // Copy a QQQQ register quad by copying the individual sub-registers. + if (AArch64::QQQQRegClass.contains(DestReg) && + AArch64::QQQQRegClass.contains(SrcReg)) { + static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, + AArch64::qsub2, AArch64::qsub3 }; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, + Indices); + return; + } + + // Copy a QQQ register triple by copying the individual sub-registers. + if (AArch64::QQQRegClass.contains(DestReg) && + AArch64::QQQRegClass.contains(SrcReg)) { + static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1, + AArch64::qsub2 }; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, + Indices); + return; + } + + // Copy a QQ register pair by copying the individual sub-registers. + if (AArch64::QQRegClass.contains(DestReg) && + AArch64::QQRegClass.contains(SrcReg)) { + static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 }; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8, + Indices); + return; + } - return Copy; + if (AArch64::FPR128RegClass.contains(DestReg) && + AArch64::FPR128RegClass.contains(SrcReg)) { + if(getSubTarget().hasNEON()) { + BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else { + BuildMI(MBB, I, DL, get(AArch64::STRQpre)) + .addReg(AArch64::SP, RegState::Define) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(AArch64::SP) + .addImm(-16); + BuildMI(MBB, I, DL, get(AArch64::LDRQpre)) + .addReg(AArch64::SP, RegState::Define) + .addReg(DestReg, RegState::Define) + .addReg(AArch64::SP) + .addImm(16); } + return; + } - // Create a virtal register in *TLSBaseAddrReg, and populate it by - // inserting a copy instruction after I. Returns the new instruction. - MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { - MachineFunction *MF = I->getParent()->getParent(); - const AArch64TargetMachine *TM = - static_cast(&MF->getTarget()); - const AArch64InstrInfo *TII = TM->getInstrInfo(); + if (AArch64::FPR64RegClass.contains(DestReg) && + AArch64::FPR64RegClass.contains(SrcReg)) { + if(getSubTarget().hasNEON()) { + DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, + &AArch64::FPR128RegClass); + SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, + &AArch64::FPR128RegClass); + BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else { + BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } + return; + } - // Create a virtual register for the TLS base address. - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass); + if (AArch64::FPR32RegClass.contains(DestReg) && + AArch64::FPR32RegClass.contains(SrcReg)) { + if(getSubTarget().hasNEON()) { + DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, + &AArch64::FPR128RegClass); + SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, + &AArch64::FPR128RegClass); + BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else { + BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } + return; + } - // Insert a copy from X0 to TLSBaseAddrReg for later. - MachineInstr *Next = I->getNextNode(); - MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), - TII->get(TargetOpcode::COPY), - *TLSBaseAddrReg) - .addReg(AArch64::X0); + if (AArch64::FPR16RegClass.contains(DestReg) && + AArch64::FPR16RegClass.contains(SrcReg)) { + if(getSubTarget().hasNEON()) { + DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, + &AArch64::FPR128RegClass); + SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, + &AArch64::FPR128RegClass); + BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else { + DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, + &AArch64::FPR32RegClass); + SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, + &AArch64::FPR32RegClass); + BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } + return; + } - return Copy; + if (AArch64::FPR8RegClass.contains(DestReg) && + AArch64::FPR8RegClass.contains(SrcReg)) { + if(getSubTarget().hasNEON()) { + DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, + &AArch64::FPR128RegClass); + SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, + &AArch64::FPR128RegClass); + BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) + .addReg(SrcReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + } else { + DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, + &AArch64::FPR32RegClass); + SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, + &AArch64::FPR32RegClass); + BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } + return; + } + + // Copies between GPR64 and FPR64. + if (AArch64::FPR64RegClass.contains(DestReg) && + AArch64::GPR64RegClass.contains(SrcReg)) { + BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (AArch64::GPR64RegClass.contains(DestReg) && + AArch64::FPR64RegClass.contains(SrcReg)) { + BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + // Copies between GPR32 and FPR32. + if (AArch64::FPR32RegClass.contains(DestReg) && + AArch64::GPR32RegClass.contains(SrcReg)) { + BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + if (AArch64::GPR32RegClass.contains(DestReg) && + AArch64::FPR32RegClass.contains(SrcReg)) { + BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } - virtual const char *getPassName() const { - return "Local Dynamic TLS Access Clean-up"; + if (DestReg == AArch64::NZCV) { + assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy"); + BuildMI(MBB, I, DL, get(AArch64::MSR)) + .addImm(AArch64SysReg::NZCV) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define); + return; + } + + if (SrcReg == AArch64::NZCV) { + assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy"); + BuildMI(MBB, I, DL, get(AArch64::MRS)) + .addReg(DestReg) + .addImm(AArch64SysReg::NZCV) + .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc)); + return; + } + + llvm_unreachable("unimplemented reg-to-reg copy"); +} + +void AArch64InstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, + bool isKill, int FI, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); + + MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); + unsigned Opc = 0; + bool Offset = true; + switch (RC->getSize()) { + case 1: + if (AArch64::FPR8RegClass.hasSubClassEq(RC)) + Opc = AArch64::STRBui; + break; + case 2: + if (AArch64::FPR16RegClass.hasSubClassEq(RC)) + Opc = AArch64::STRHui; + break; + case 4: + if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { + Opc = AArch64::STRWui; + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) + MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass); + else + assert(SrcReg != AArch64::WSP); + } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) + Opc = AArch64::STRSui; + break; + case 8: + if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { + Opc = AArch64::STRXui; + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) + MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); + else + assert(SrcReg != AArch64::SP); + } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) + Opc = AArch64::STRDui; + break; + case 16: + if (AArch64::FPR128RegClass.hasSubClassEq(RC)) + Opc = AArch64::STRQui; + else if (AArch64::DDRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register store without NEON"); + Opc = AArch64::ST1Twov1d, Offset = false; + } + break; + case 24: + if (AArch64::DDDRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register store without NEON"); + Opc = AArch64::ST1Threev1d, Offset = false; + } + break; + case 32: + if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register store without NEON"); + Opc = AArch64::ST1Fourv1d, Offset = false; + } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register store without NEON"); + Opc = AArch64::ST1Twov2d, Offset = false; } + break; + case 48: + if (AArch64::QQQRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register store without NEON"); + Opc = AArch64::ST1Threev2d, Offset = false; + } + break; + case 64: + if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register store without NEON"); + Opc = AArch64::ST1Fourv2d, Offset = false; + } + break; + } + assert(Opc && "Unknown register class"); + + const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI); + + if (Offset) + MI.addImm(0); + MI.addMemOperand(MMO); +} + +void AArch64InstrInfo::loadRegFromStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg, + int FI, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); + MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired(); - MachineFunctionPass::getAnalysisUsage(AU); + unsigned Opc = 0; + bool Offset = true; + switch (RC->getSize()) { + case 1: + if (AArch64::FPR8RegClass.hasSubClassEq(RC)) + Opc = AArch64::LDRBui; + break; + case 2: + if (AArch64::FPR16RegClass.hasSubClassEq(RC)) + Opc = AArch64::LDRHui; + break; + case 4: + if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) { + Opc = AArch64::LDRWui; + if (TargetRegisterInfo::isVirtualRegister(DestReg)) + MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass); + else + assert(DestReg != AArch64::WSP); + } else if (AArch64::FPR32RegClass.hasSubClassEq(RC)) + Opc = AArch64::LDRSui; + break; + case 8: + if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) { + Opc = AArch64::LDRXui; + if (TargetRegisterInfo::isVirtualRegister(DestReg)) + MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass); + else + assert(DestReg != AArch64::SP); + } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) + Opc = AArch64::LDRDui; + break; + case 16: + if (AArch64::FPR128RegClass.hasSubClassEq(RC)) + Opc = AArch64::LDRQui; + else if (AArch64::DDRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register load without NEON"); + Opc = AArch64::LD1Twov1d, Offset = false; } - }; + break; + case 24: + if (AArch64::DDDRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register load without NEON"); + Opc = AArch64::LD1Threev1d, Offset = false; + } + break; + case 32: + if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register load without NEON"); + Opc = AArch64::LD1Fourv1d, Offset = false; + } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register load without NEON"); + Opc = AArch64::LD1Twov2d, Offset = false; + } + break; + case 48: + if (AArch64::QQQRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register load without NEON"); + Opc = AArch64::LD1Threev2d, Offset = false; + } + break; + case 64: + if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { + assert(getSubTarget().hasNEON() && + "Unexpected register load without NEON"); + Opc = AArch64::LD1Fourv2d, Offset = false; + } + break; + } + assert(Opc && "Unknown register class"); + + const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) + .addReg(DestReg, getDefRegState(true)) + .addFrameIndex(FI); + if (Offset) + MI.addImm(0); + MI.addMemOperand(MMO); +} + +void llvm::emitFrameOffset(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, int Offset, + const AArch64InstrInfo *TII, + MachineInstr::MIFlag Flag, bool SetNZCV) { + if (DestReg == SrcReg && Offset == 0) + return; + + bool isSub = Offset < 0; + if (isSub) + Offset = -Offset; + + // FIXME: If the offset won't fit in 24-bits, compute the offset into a + // scratch register. If DestReg is a virtual register, use it as the + // scratch register; otherwise, create a new virtual register (to be + // replaced by the scavenger at the end of PEI). That case can be optimized + // slightly if DestReg is SP which is always 16-byte aligned, so the scratch + // register can be loaded with offset%8 and the add/sub can use an extending + // instruction with LSL#3. + // Currently the function handles any offsets but generates a poor sequence + // of code. + // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); + + unsigned Opc; + if (SetNZCV) + Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri; + else + Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri; + const unsigned MaxEncoding = 0xfff; + const unsigned ShiftSize = 12; + const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; + while (((unsigned)Offset) >= (1 << ShiftSize)) { + unsigned ThisVal; + if (((unsigned)Offset) > MaxEncodableValue) { + ThisVal = MaxEncodableValue; + } else { + ThisVal = Offset & MaxEncodableValue; + } + assert((ThisVal >> ShiftSize) <= MaxEncoding && + "Encoding cannot handle value that big"); + BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) + .addReg(SrcReg) + .addImm(ThisVal >> ShiftSize) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize)) + .setMIFlag(Flag); + + SrcReg = DestReg; + Offset -= ThisVal; + if (Offset == 0) + return; + } + BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) + .addReg(SrcReg) + .addImm(Offset) + .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) + .setMIFlag(Flag); } -char LDTLSCleanup::ID = 0; -FunctionPass* -llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } +MachineInstr * +AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + const SmallVectorImpl &Ops, + int FrameIndex) const { + // This is a bit of a hack. Consider this instruction: + // + // %vreg0 = COPY %SP; GPR64all:%vreg0 + // + // We explicitly chose GPR64all for the virtual register so such a copy might + // be eliminated by RegisterCoalescer. However, that may not be possible, and + // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all + // register class, TargetInstrInfo::foldMemoryOperand() is going to try. + // + // To prevent that, we are going to constrain the %vreg0 register class here. + // + // + // + if (MI->isCopy()) { + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + if (SrcReg == AArch64::SP && + TargetRegisterInfo::isVirtualRegister(DstReg)) { + MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass); + return nullptr; + } + if (DstReg == AArch64::SP && + TargetRegisterInfo::isVirtualRegister(SrcReg)) { + MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass); + return nullptr; + } + } + + // Cannot fold. + return nullptr; +} + +int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, + bool *OutUseUnscaledOp, + unsigned *OutUnscaledOp, + int *EmittableOffset) { + int Scale = 1; + bool IsSigned = false; + // The ImmIdx should be changed case by case if it is not 2. + unsigned ImmIdx = 2; + unsigned UnscaledOp = 0; + // Set output values in case of early exit. + if (EmittableOffset) + *EmittableOffset = 0; + if (OutUseUnscaledOp) + *OutUseUnscaledOp = false; + if (OutUnscaledOp) + *OutUnscaledOp = 0; + switch (MI.getOpcode()) { + default: + assert(0 && "unhandled opcode in rewriteAArch64FrameIndex"); + // Vector spills/fills can't take an immediate offset. + case AArch64::LD1Twov2d: + case AArch64::LD1Threev2d: + case AArch64::LD1Fourv2d: + case AArch64::LD1Twov1d: + case AArch64::LD1Threev1d: + case AArch64::LD1Fourv1d: + case AArch64::ST1Twov2d: + case AArch64::ST1Threev2d: + case AArch64::ST1Fourv2d: + case AArch64::ST1Twov1d: + case AArch64::ST1Threev1d: + case AArch64::ST1Fourv1d: + return AArch64FrameOffsetCannotUpdate; + case AArch64::PRFMui: + Scale = 8; + UnscaledOp = AArch64::PRFUMi; + break; + case AArch64::LDRXui: + Scale = 8; + UnscaledOp = AArch64::LDURXi; + break; + case AArch64::LDRWui: + Scale = 4; + UnscaledOp = AArch64::LDURWi; + break; + case AArch64::LDRBui: + Scale = 1; + UnscaledOp = AArch64::LDURBi; + break; + case AArch64::LDRHui: + Scale = 2; + UnscaledOp = AArch64::LDURHi; + break; + case AArch64::LDRSui: + Scale = 4; + UnscaledOp = AArch64::LDURSi; + break; + case AArch64::LDRDui: + Scale = 8; + UnscaledOp = AArch64::LDURDi; + break; + case AArch64::LDRQui: + Scale = 16; + UnscaledOp = AArch64::LDURQi; + break; + case AArch64::LDRBBui: + Scale = 1; + UnscaledOp = AArch64::LDURBBi; + break; + case AArch64::LDRHHui: + Scale = 2; + UnscaledOp = AArch64::LDURHHi; + break; + case AArch64::LDRSBXui: + Scale = 1; + UnscaledOp = AArch64::LDURSBXi; + break; + case AArch64::LDRSBWui: + Scale = 1; + UnscaledOp = AArch64::LDURSBWi; + break; + case AArch64::LDRSHXui: + Scale = 2; + UnscaledOp = AArch64::LDURSHXi; + break; + case AArch64::LDRSHWui: + Scale = 2; + UnscaledOp = AArch64::LDURSHWi; + break; + case AArch64::LDRSWui: + Scale = 4; + UnscaledOp = AArch64::LDURSWi; + break; + + case AArch64::STRXui: + Scale = 8; + UnscaledOp = AArch64::STURXi; + break; + case AArch64::STRWui: + Scale = 4; + UnscaledOp = AArch64::STURWi; + break; + case AArch64::STRBui: + Scale = 1; + UnscaledOp = AArch64::STURBi; + break; + case AArch64::STRHui: + Scale = 2; + UnscaledOp = AArch64::STURHi; + break; + case AArch64::STRSui: + Scale = 4; + UnscaledOp = AArch64::STURSi; + break; + case AArch64::STRDui: + Scale = 8; + UnscaledOp = AArch64::STURDi; + break; + case AArch64::STRQui: + Scale = 16; + UnscaledOp = AArch64::STURQi; + break; + case AArch64::STRBBui: + Scale = 1; + UnscaledOp = AArch64::STURBBi; + break; + case AArch64::STRHHui: + Scale = 2; + UnscaledOp = AArch64::STURHHi; + break; + + case AArch64::LDPXi: + case AArch64::LDPDi: + case AArch64::STPXi: + case AArch64::STPDi: + IsSigned = true; + Scale = 8; + break; + case AArch64::LDPQi: + case AArch64::STPQi: + IsSigned = true; + Scale = 16; + break; + case AArch64::LDPWi: + case AArch64::LDPSi: + case AArch64::STPWi: + case AArch64::STPSi: + IsSigned = true; + Scale = 4; + break; + + case AArch64::LDURXi: + case AArch64::LDURWi: + case AArch64::LDURBi: + case AArch64::LDURHi: + case AArch64::LDURSi: + case AArch64::LDURDi: + case AArch64::LDURQi: + case AArch64::LDURHHi: + case AArch64::LDURBBi: + case AArch64::LDURSBXi: + case AArch64::LDURSBWi: + case AArch64::LDURSHXi: + case AArch64::LDURSHWi: + case AArch64::LDURSWi: + case AArch64::STURXi: + case AArch64::STURWi: + case AArch64::STURBi: + case AArch64::STURHi: + case AArch64::STURSi: + case AArch64::STURDi: + case AArch64::STURQi: + case AArch64::STURBBi: + case AArch64::STURHHi: + Scale = 1; + break; + } + + Offset += MI.getOperand(ImmIdx).getImm() * Scale; + + bool useUnscaledOp = false; + // If the offset doesn't match the scale, we rewrite the instruction to + // use the unscaled instruction instead. Likewise, if we have a negative + // offset (and have an unscaled op to use). + if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0)) + useUnscaledOp = true; + + // Use an unscaled addressing mode if the instruction has a negative offset + // (or if the instruction is already using an unscaled addressing mode). + unsigned MaskBits; + if (IsSigned) { + // ldp/stp instructions. + MaskBits = 7; + Offset /= Scale; + } else if (UnscaledOp == 0 || useUnscaledOp) { + MaskBits = 9; + IsSigned = true; + Scale = 1; + } else { + MaskBits = 12; + IsSigned = false; + Offset /= Scale; + } + + // Attempt to fold address computation. + int MaxOff = (1 << (MaskBits - IsSigned)) - 1; + int MinOff = (IsSigned ? (-MaxOff - 1) : 0); + if (Offset >= MinOff && Offset <= MaxOff) { + if (EmittableOffset) + *EmittableOffset = Offset; + Offset = 0; + } else { + int NewOff = Offset < 0 ? MinOff : MaxOff; + if (EmittableOffset) + *EmittableOffset = NewOff; + Offset = (Offset - NewOff) * Scale; + } + if (OutUseUnscaledOp) + *OutUseUnscaledOp = useUnscaledOp; + if (OutUnscaledOp) + *OutUnscaledOp = UnscaledOp; + return AArch64FrameOffsetCanUpdate | + (Offset == 0 ? AArch64FrameOffsetIsLegal : 0); +} + +bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const AArch64InstrInfo *TII) { + unsigned Opcode = MI.getOpcode(); + unsigned ImmIdx = FrameRegIdx + 1; + + if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { + Offset += MI.getOperand(ImmIdx).getImm(); + emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), + MI.getOperand(0).getReg(), FrameReg, Offset, TII, + MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); + MI.eraseFromParent(); + Offset = 0; + return true; + } + + int NewOffset; + unsigned UnscaledOp; + bool UseUnscaledOp; + int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, + &UnscaledOp, &NewOffset); + if (Status & AArch64FrameOffsetCanUpdate) { + if (Status & AArch64FrameOffsetIsLegal) + // Replace the FrameIndex with FrameReg. + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + if (UseUnscaledOp) + MI.setDesc(TII->get(UnscaledOp)); + + MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); + return Offset == 0; + } + + return false; +} + +void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + NopInst.setOpcode(AArch64::HINT); + NopInst.addOperand(MCOperand::CreateImm(0)); +} diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index ad20f9c..90ce75f 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -11,9 +11,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_AARCH64INSTRINFO_H -#define LLVM_TARGET_AARCH64INSTRINFO_H +#ifndef LLVM_TARGET_AArch64INSTRINFO_H +#define LLVM_TARGET_AArch64INSTRINFO_H +#include "AArch64.h" #include "AArch64RegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" @@ -23,89 +24,208 @@ namespace llvm { class AArch64Subtarget; +class AArch64TargetMachine; class AArch64InstrInfo : public AArch64GenInstrInfo { + // Reserve bits in the MachineMemOperand target hint flags, starting at 1. + // They will be shifted into MOTargetHintStart when accessed. + enum TargetMemOperandFlags { + MOSuppressPair = 1 + }; + const AArch64RegisterInfo RI; const AArch64Subtarget &Subtarget; + public: - explicit AArch64InstrInfo(const AArch64Subtarget &TM); + explicit AArch64InstrInfo(const AArch64Subtarget &STI); /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). - /// - const TargetRegisterInfo &getRegisterInfo() const { return RI; } + const AArch64RegisterInfo &getRegisterInfo() const { return RI; } const AArch64Subtarget &getSubTarget() const { return Subtarget; } - void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - void CopyPhysRegTuple(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg) const; + unsigned GetInstSizeInBytes(const MachineInstr *MI) const; + + bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, + unsigned &DstReg, unsigned &SubIdx) const override; + + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; + unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; + + /// Returns true if there is a shiftable register and that the shift value + /// is non-zero. + bool hasShiftedReg(const MachineInstr *MI) const; + + /// Returns true if there is an extendable register and that the extending + /// value is non-zero. + bool hasExtendedReg(const MachineInstr *MI) const; + + /// \brief Does this instruction set its full destination register to zero? + bool isGPRZero(const MachineInstr *MI) const; + + /// \brief Does this instruction rename a GPR without modifying bits? + bool isGPRCopy(const MachineInstr *MI) const; + + /// \brief Does this instruction rename an FPR without modifying bits? + bool isFPRCopy(const MachineInstr *MI) const; + + /// Return true if this is load/store scales or extends its register offset. + /// This refers to scaling a dynamic index as opposed to scaled immediates. + /// MI should be a memory op that allows scaled addressing. + bool isScaledAddr(const MachineInstr *MI) const; + + /// Return true if pairing the given load or store is hinted to be + /// unprofitable. + bool isLdStPairSuppressed(const MachineInstr *MI) const; + + /// Hint that pairing the given load or store is unprofitable. + void suppressLdStPair(MachineInstr *MI) const; + + bool getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, + unsigned &Offset, + const TargetRegisterInfo *TRI) const override; + + bool enableClusterLoads() const override { return true; } + + bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt, + unsigned NumLoads) const override; + + bool shouldScheduleAdjacent(MachineInstr *First, + MachineInstr *Second) const override; + + MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, + uint64_t Offset, const MDNode *MDPtr, + DebugLoc DL) const; + void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + DebugLoc DL, unsigned DestReg, unsigned SrcReg, + bool KillSrc, unsigned Opcode, + llvm::ArrayRef Indices) const; + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + DebugLoc DL, unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, int FrameIndex, + MachineBasicBlock::iterator MBBI, unsigned SrcReg, + bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo *TRI) const override; + void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + MachineBasicBlock::iterator MBBI, unsigned DestReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + MachineInstr * + foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, + const SmallVectorImpl &Ops, + int FrameIndex) const override; bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, - bool AllowModify = false) const; + bool AllowModify = false) const override; + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl &Cond, - DebugLoc DL) const; - unsigned RemoveBranch(MachineBasicBlock &MBB) const; - bool ReverseBranchCondition(SmallVectorImpl &Cond) const; - - bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; - - /// Look through the instructions in this function and work out the largest - /// the stack frame can be while maintaining the ability to address local - /// slots with no complexities. - unsigned estimateRSStackLimit(MachineFunction &MF) const; - - /// getAddressConstraints - For loads and stores (and PRFMs) taking an - /// immediate offset, this function determines the constraints required for - /// the immediate. It must satisfy: - /// + MinOffset <= imm <= MaxOffset - /// + imm % OffsetScale == 0 - void getAddressConstraints(const MachineInstr &MI, int &AccessScale, - int &MinOffset, int &MaxOffset) const; - - - unsigned getInstSizeInBytes(const MachineInstr &MI) const; - - unsigned getInstBundleLength(const MachineInstr &MI) const; - + DebugLoc DL) const override; + bool + ReverseBranchCondition(SmallVectorImpl &Cond) const override; + bool canInsertSelect(const MachineBasicBlock &, + const SmallVectorImpl &Cond, unsigned, + unsigned, int &, int &, int &) const override; + void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc DL, unsigned DstReg, + const SmallVectorImpl &Cond, + unsigned TrueReg, unsigned FalseReg) const override; + void getNoopForMachoTarget(MCInst &NopInst) const override; + + /// analyzeCompare - For a comparison instruction, return the source registers + /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. + /// Return true if the comparison instruction can be analyzed. + bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, + unsigned &SrcReg2, int &CmpMask, + int &CmpValue) const override; + /// optimizeCompareInstr - Convert the instruction supplying the argument to + /// the comparison into one that sets the zero bit in the flags register. + bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, + unsigned SrcReg2, int CmpMask, int CmpValue, + const MachineRegisterInfo *MRI) const override; + +private: + void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL, + MachineBasicBlock *TBB, + const SmallVectorImpl &Cond) const; }; -bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, - const AArch64InstrInfo &TII); - +/// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg +/// plus Offset. This is intended to be used from within the prolog/epilog +/// insertion (PEI) pass, where a virtual scratch register may be allocated +/// if necessary, to be replaced by the scavenger at the end of PEI. +void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset, + const AArch64InstrInfo *TII, + MachineInstr::MIFlag = MachineInstr::NoFlags, + bool SetNZCV = false); + +/// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the +/// FP. Return false if the offset could not be handled directly in MI, and +/// return the left-over portion by reference. +bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, + unsigned FrameReg, int &Offset, + const AArch64InstrInfo *TII); + +/// \brief Use to report the frame offset status in isAArch64FrameOffsetLegal. +enum AArch64FrameOffsetStatus { + AArch64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply. + AArch64FrameOffsetIsLegal = 0x1, ///< Offset is legal. + AArch64FrameOffsetCanUpdate = 0x2 ///< Offset can apply, at least partly. +}; -void emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - DebugLoc dl, const TargetInstrInfo &TII, - unsigned DstReg, unsigned SrcReg, unsigned ScratchReg, - int64_t NumBytes, - MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags); +/// \brief Check if the @p Offset is a valid frame offset for @p MI. +/// The returned value reports the validity of the frame offset for @p MI. +/// It uses the values defined by AArch64FrameOffsetStatus for that. +/// If result == AArch64FrameOffsetCannotUpdate, @p MI cannot be updated to +/// use an offset.eq +/// If result & AArch64FrameOffsetIsLegal, @p Offset can completely be +/// rewriten in @p MI. +/// If result & AArch64FrameOffsetCanUpdate, @p Offset contains the +/// amount that is off the limit of the legal offset. +/// If set, @p OutUseUnscaledOp will contain the whether @p MI should be +/// turned into an unscaled operator, which opcode is in @p OutUnscaledOp. +/// If set, @p EmittableOffset contains the amount that can be set in @p MI +/// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that +/// is a legal offset. +int isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, + bool *OutUseUnscaledOp = nullptr, + unsigned *OutUnscaledOp = nullptr, + int *EmittableOffset = nullptr); + +static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; } + +static inline bool isCondBranchOpcode(int Opc) { + switch (Opc) { + case AArch64::Bcc: + case AArch64::CBZW: + case AArch64::CBZX: + case AArch64::CBNZW: + case AArch64::CBNZX: + case AArch64::TBZW: + case AArch64::TBZX: + case AArch64::TBNZW: + case AArch64::TBNZX: + return true; + default: + return false; + } +} -void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - DebugLoc dl, const TargetInstrInfo &TII, - unsigned ScratchReg, int64_t NumBytes, - MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags); +static inline bool isIndirectBranchOpcode(int Opc) { return Opc == AArch64::BR; } -} +} // end namespace llvm #endif diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 7d7a641..9ad36e8 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -1,4 +1,4 @@ -//===----- AArch64InstrInfo.td - AArch64 Instruction Info ----*- tablegen -*-=// +//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file describes the AArch64 scalar instructions in TableGen format. +// AArch64 Instruction definitions. // //===----------------------------------------------------------------------===// @@ -19,5368 +19,5266 @@ def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON", "neon">; def HasCrypto : Predicate<"Subtarget->hasCrypto()">, - AssemblerPredicate<"FeatureCrypto","crypto">; - -// Use fused MAC if more precision in FP computation is allowed. -def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" - " FPOpFusion::Fast)">; -include "AArch64InstrFormats.td" + AssemblerPredicate<"FeatureCrypto", "crypto">; +def HasCRC : Predicate<"Subtarget->hasCRC()">, + AssemblerPredicate<"FeatureCRC", "crc">; +def IsLE : Predicate<"Subtarget->isLittleEndian()">; +def IsBE : Predicate<"!Subtarget->isLittleEndian()">; //===----------------------------------------------------------------------===// -// AArch64 specific pattern fragments. +// AArch64-specific DAG Nodes. // -// An 'fmul' node with a single use. -def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{ - return N->hasOneUse(); -}]>; - - -//===----------------------------------------------------------------------===// -// Target-specific ISD nodes and profiles -//===----------------------------------------------------------------------===// - -def SDT_A64ret : SDTypeProfile<0, 0, []>; -def A64ret : SDNode<"AArch64ISD::Ret", SDT_A64ret, [SDNPHasChain, - SDNPOptInGlue, - SDNPVariadic]>; - -// (ins NZCV, Condition, Dest) -def SDT_A64br_cc : SDTypeProfile<0, 3, [SDTCisVT<0, i32>]>; -def A64br_cc : SDNode<"AArch64ISD::BR_CC", SDT_A64br_cc, [SDNPHasChain]>; - -// (outs Result), (ins NZCV, IfTrue, IfFalse, Condition) -def SDT_A64select_cc : SDTypeProfile<1, 4, [SDTCisVT<1, i32>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<2, 3>]>; -def A64select_cc : SDNode<"AArch64ISD::SELECT_CC", SDT_A64select_cc>; - -// (outs NZCV), (ins LHS, RHS, Condition) -def SDT_A64setcc : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, - SDTCisSameAs<1, 2>]>; -def A64setcc : SDNode<"AArch64ISD::SETCC", SDT_A64setcc>; - - -// (outs GPR64), (ins) -def A64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; - -// A64 compares don't care about the cond really (they set all flags) so a -// simple binary operator is useful. -def A64cmp : PatFrag<(ops node:$lhs, node:$rhs), - (A64setcc node:$lhs, node:$rhs, cond)>; - - -// When matching a notional (CMP op1, (sub 0, op2)), we'd like to use a CMN -// instruction on the grounds that "op1 - (-op2) == op1 + op2". However, the C -// and V flags can be set differently by this operation. It comes down to -// whether "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are -// then everything is fine. If not then the optimization is wrong. Thus general -// comparisons are only valid if op2 != 0. - -// So, finally, the only LLVM-native comparisons that don't mention C and V are -// SETEQ and SETNE. They're the only ones we can safely use CMN for in the -// absence of information about op2. -def equality_cond : PatLeaf<(cond), [{ - return N->get() == ISD::SETEQ || N->get() == ISD::SETNE; -}]>; - -def A64cmn : PatFrag<(ops node:$lhs, node:$rhs), - (A64setcc node:$lhs, (sub 0, node:$rhs), equality_cond)>; - -// There are two layers of indirection here, driven by the following -// considerations. -// + TableGen does not know CodeModel or Reloc so that decision should be -// made for a variable/address at ISelLowering. -// + The output of ISelLowering should be selectable (hence the Wrapper, -// rather than a bare target opcode) -def SDTAArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>, - SDTCisSameAs<0, 4>, - SDTCisPtrTy<0>]>; - -def A64WrapperLarge :SDNode<"AArch64ISD::WrapperLarge", SDTAArch64WrapperLarge>; - -def SDTAArch64WrapperSmall : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, - SDTCisSameAs<1, 2>, - SDTCisVT<3, i32>, - SDTCisPtrTy<0>]>; - -def A64WrapperSmall :SDNode<"AArch64ISD::WrapperSmall", SDTAArch64WrapperSmall>; - - -def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; -def A64GOTLoad : SDNode<"AArch64ISD::GOTLoad", SDTAArch64GOTLoad, - [SDNPHasChain]>; +// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS +def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, + [SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, SDTCisVT<1, i32>]>; + +// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS +def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisInt<0>, + SDTCisVT<3, i32>]>; + +// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS +def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, + [SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, + SDTCisInt<0>, + SDTCisVT<1, i32>, + SDTCisVT<4, i32>]>; + +def SDT_AArch64Brcond : SDTypeProfile<0, 3, + [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>]>; +def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; +def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, + SDTCisVT<2, OtherVT>]>; + + +def SDT_AArch64CSel : SDTypeProfile<1, 4, + [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisInt<3>, + SDTCisVT<4, i32>]>; +def SDT_AArch64FCmp : SDTypeProfile<0, 2, + [SDTCisFP<0>, + SDTCisSameAs<0, 1>]>; +def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; +def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; +def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>]>; +def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; +def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; +def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisInt<2>, SDTCisInt<3>]>; +def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; +def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, SDTCisInt<3>]>; +def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; + +def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; +def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>; +def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; +def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>; +def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, + SDTCisSameAs<0,3>]>; +def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; +def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; + +def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; + +def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, + SDTCisPtrTy<1>]>; +def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4, + [SDTCisVT<0, i64>, SDTCisVT<1, i32>, + SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, + SDTCisSameAs<1, 4>]>; + + +// Node definitions. +def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>; +def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>; +def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>; +def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", + SDCallSeqStart<[ SDTCisVT<0, i32> ]>, + [SDNPHasChain, SDNPOutGlue]>; +def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", + SDCallSeqEnd<[ SDTCisVT<0, i32>, + SDTCisVT<1, i32> ]>, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def AArch64call : SDNode<"AArch64ISD::CALL", + SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, + [SDNPHasChain]>; +def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, + [SDNPHasChain]>; +def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz, + [SDNPHasChain]>; +def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz, + [SDNPHasChain]>; +def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz, + [SDNPHasChain]>; + + +def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>; +def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>; +def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>; +def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>; +def AArch64retflag : SDNode<"AArch64ISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >; +def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>; +def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut, + [SDNPCommutative]>; +def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>; +def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut, + [SDNPCommutative]>; +def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; +def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; + +def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>; + +def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>; + +def AArch64fmax : SDNode<"AArch64ISD::FMAX", SDTFPBinOp>; +def AArch64fmin : SDNode<"AArch64ISD::FMIN", SDTFPBinOp>; + +def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>; +def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>; +def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>; +def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>; +def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>; + +def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>; +def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>; +def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>; +def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>; +def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>; +def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>; + +def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>; +def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>; +def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>; +def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>; +def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>; +def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>; +def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>; + +def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>; +def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>; +def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>; +def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>; + +def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>; +def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>; +def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>; +def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>; +def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>; +def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>; +def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>; +def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>; + +def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>; +def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>; +def AArch64bsl: SDNode<"AArch64ISD::BSL", SDT_AArch64trivec>; + +def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>; +def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>; +def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>; +def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>; +def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>; + +def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>; +def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>; +def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>; + +def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>; +def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>; +def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>; +def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>; +def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>; +def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS), + (AArch64not (AArch64cmeqz (and node:$LHS, node:$RHS)))>; + +def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>; +def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>; +def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>; +def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>; +def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>; + +def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>; +def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>; + +def AArch64neg : SDNode<"AArch64ISD::NEG", SDT_AArch64unvec>; + +def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH, + [SDNPHasChain, SDNPSideEffect]>; + +def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>; +def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; + +def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL", + SDT_AArch64TLSDescCall, + [SDNPInGlue, SDNPOutGlue, SDNPHasChain, + SDNPVariadic]>; + +def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", + SDT_AArch64WrapperLarge>; -// (A64BFI LHS, RHS, LSB, Width) -def SDTA64BFI : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, - SDTCisSameAs<1, 2>, - SDTCisVT<3, i64>, - SDTCisVT<4, i64>]>; -def A64Bfi : SDNode<"AArch64ISD::BFI", SDTA64BFI>; +//===----------------------------------------------------------------------===// -// (A64EXTR HiReg, LoReg, LSB) -def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, - SDTCisVT<3, i64>]>; -def A64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>; +//===----------------------------------------------------------------------===// -// (A64[SU]BFX Field, ImmR, ImmS). +// AArch64 Instruction Predicate Definitions. // -// Note that ImmR and ImmS are already encoded for the actual instructions. The -// more natural LSB and Width mix together to form ImmR and ImmS, something -// which TableGen can't handle. -def SDTA64BFX : SDTypeProfile<1, 3, [SDTCisVT<2, i64>, SDTCisVT<3, i64>]>; -def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>; +def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">; +def NoZCZ : Predicate<"!Subtarget->hasZeroCycleZeroing()">; +def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; +def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">; +def ForCodeSize : Predicate<"ForCodeSize">; +def NotForCodeSize : Predicate<"!ForCodeSize">; -def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>; +include "AArch64InstrFormats.td" -class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; +//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Call sequence pseudo-instructions +// Miscellaneous instructions. //===----------------------------------------------------------------------===// +let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), + [(AArch64callseq_start timm:$amt)]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(AArch64callseq_end timm:$amt1, timm:$amt2)]>; +} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 + +let isReMaterializable = 1, isCodeGenOnly = 1 in { +// FIXME: The following pseudo instructions are only needed because remat +// cannot handle multiple instructions. When that changes, they can be +// removed, along with the AArch64Wrapper node. + +let AddedComplexity = 10 in +def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr), + [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, + Sched<[WriteLDAdr]>; + +// The MOVaddr instruction should match only when the add is not folded +// into a load or store address. +def MOVaddr + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), + tglobaladdr:$low))]>, + Sched<[WriteAdrAdr]>; +def MOVaddrJT + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), + tjumptable:$low))]>, + Sched<[WriteAdrAdr]>; +def MOVaddrCP + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), + tconstpool:$low))]>, + Sched<[WriteAdrAdr]>; +def MOVaddrBA + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), + tblockaddress:$low))]>, + Sched<[WriteAdrAdr]>; +def MOVaddrTLS + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), + tglobaltlsaddr:$low))]>, + Sched<[WriteAdrAdr]>; +def MOVaddrEXT + : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), + [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), + texternalsym:$low))]>, + Sched<[WriteAdrAdr]>; + +} // isReMaterializable, isCodeGenOnly + +def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr), + (LOADgot tglobaltlsaddr:$addr)>; + +def : Pat<(AArch64LOADgot texternalsym:$addr), + (LOADgot texternalsym:$addr)>; + +def : Pat<(AArch64LOADgot tconstpool:$addr), + (LOADgot tconstpool:$addr)>; -def SDT_AArch64Call : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; -def AArch64Call : SDNode<"AArch64ISD::Call", SDT_AArch64Call, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; - -def AArch64tcret : SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64Call, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; - -// The TLSDESCCALL node is a variant call which goes to an indirectly calculated -// destination but needs a relocation against a fixed symbol. As such it has two -// certain operands: the callee and the relocated variable. -// -// The TLS ABI only allows it to be selected to a BLR instructin (with -// appropriate relocation). -def SDTTLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; - -def A64tlsdesc_blr : SDNode<"AArch64ISD::TLSDESCCALL", SDTTLSDescCall, - [SDNPInGlue, SDNPOutGlue, SDNPHasChain, - SDNPVariadic]>; - - -def SDT_AArch64CallSeqStart : SDCallSeqStart<[ SDTCisPtrTy<0> ]>; -def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AArch64CallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; - -def SDT_AArch64CallSeqEnd : SDCallSeqEnd<[ SDTCisPtrTy<0>, SDTCisPtrTy<1> ]>; -def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AArch64CallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; - - +//===----------------------------------------------------------------------===// +// System instructions. +//===----------------------------------------------------------------------===// -// These pseudo-instructions have special semantics by virtue of being passed to -// the InstrInfo constructor. CALLSEQ_START/CALLSEQ_END are produced by -// LowerCall to (in our case) tell the back-end about stack adjustments for -// arguments passed on the stack. Here we select those markers to -// pseudo-instructions which explicitly set the stack, and finally in the -// RegisterInfo we convert them to a true stack adjustment. -let Defs = [XSP], Uses = [XSP] in { - def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i64imm:$amt), - [(AArch64callseq_start timm:$amt)]>; +def HINT : HintI<"hint">; +def : InstAlias<"nop", (HINT 0b000)>; +def : InstAlias<"yield",(HINT 0b001)>; +def : InstAlias<"wfe", (HINT 0b010)>; +def : InstAlias<"wfi", (HINT 0b011)>; +def : InstAlias<"sev", (HINT 0b100)>; +def : InstAlias<"sevl", (HINT 0b101)>; - def ADJCALLSTACKUP : PseudoInst<(outs), (ins i64imm:$amt1, i64imm:$amt2), - [(AArch64callseq_end timm:$amt1, timm:$amt2)]>; -} + // As far as LLVM is concerned this writes to the system's exclusive monitors. +let mayLoad = 1, mayStore = 1 in +def CLREX : CRmSystemI; -//===----------------------------------------------------------------------===// -// Atomic operation pseudo-instructions -//===----------------------------------------------------------------------===// +def DMB : CRmSystemI; +def DSB : CRmSystemI; +def ISB : CRmSystemI; +def : InstAlias<"clrex", (CLREX 0xf)>; +def : InstAlias<"isb", (ISB 0xf)>; -// These get selected from C++ code as a pretty much direct translation from the -// generic DAG nodes. The one exception is the AtomicOrdering is added as an -// operand so that the eventual lowering can make use of it and choose -// acquire/release operations when required. - -let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in { -multiclass AtomicSizes { - def _I8 : PseudoInst<(outs GPR32:$dst), - (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I16 : PseudoInst<(outs GPR32:$dst), - (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I32 : PseudoInst<(outs GPR32:$dst), - (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I64 : PseudoInst<(outs GPR64:$dst), - (ins GPR64xsp:$ptr, GPR64:$incr, i32imm:$ordering), []>; -} -} +def MRS : MRSI; +def MSR : MSRI; +def MSRpstate: MSRpstateI; -defm ATOMIC_LOAD_ADD : AtomicSizes; -defm ATOMIC_LOAD_SUB : AtomicSizes; -defm ATOMIC_LOAD_AND : AtomicSizes; -defm ATOMIC_LOAD_OR : AtomicSizes; -defm ATOMIC_LOAD_XOR : AtomicSizes; -defm ATOMIC_LOAD_NAND : AtomicSizes; -defm ATOMIC_SWAP : AtomicSizes; -let Defs = [NZCV] in { - // These operations need a CMP to calculate the correct value - defm ATOMIC_LOAD_MIN : AtomicSizes; - defm ATOMIC_LOAD_MAX : AtomicSizes; - defm ATOMIC_LOAD_UMIN : AtomicSizes; - defm ATOMIC_LOAD_UMAX : AtomicSizes; -} +// The thread pointer (on Linux, at least, where this has been implemented) is +// TPIDR_EL0. +def : Pat<(AArch64threadpointer), (MRS 0xde82)>; -class AtomicCmpSwap - : PseudoInst<(outs GPRData:$dst), - (ins GPR64xsp:$ptr, GPRData:$old, GPRData:$new, - i32imm:$ordering), []> { - let usesCustomInserter = 1; - let hasCtrlDep = 1; - let mayLoad = 1; - let mayStore = 1; - let Defs = [NZCV]; -} +// Generic system instructions +def SYSxt : SystemXtI<0, "sys">; +def SYSLxt : SystemLXtI<1, "sysl">; -def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap; -def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap; -def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap; -def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap; +def : InstAlias<"sys $op1, $Cn, $Cm, $op2", + (SYSxt imm0_7:$op1, sys_cr_op:$Cn, + sys_cr_op:$Cm, imm0_7:$op2, XZR)>; //===----------------------------------------------------------------------===// -// Add-subtract (extended register) instructions +// Move immediate instructions. //===----------------------------------------------------------------------===// -// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP - -// The RHS of these operations is conceptually a sign/zero-extended -// register, optionally shifted left by 1-4. The extension can be a -// NOP (e.g. "sxtx" sign-extending a 64-bit register to 64-bits) but -// must be specified with one exception: - -// If one of the registers is sp/wsp then LSL is an alias for UXTW in -// 32-bit instructions and UXTX in 64-bit versions, the shift amount -// is not optional in that case (but can explicitly be 0), and the -// entire suffix can be skipped (e.g. "add sp, x3, x2"). - -multiclass extend_operands { - def _asmoperand : AsmOperandClass { - let Name = PREFIX; - let RenderMethod = "addRegExtendOperands"; - let PredicateMethod = "isRegExtend"; - let DiagnosticType = "AddSubRegExtend" # Diag; - } - - def _operand : Operand, - ImmLeaf= 0 && Imm <= 4; }]> { - let PrintMethod = "printRegExtendOperand"; - let DecoderMethod = "DecodeRegExtendOperand"; - let ParserMatchClass = !cast(PREFIX # "_asmoperand"); - } -} -defm UXTB : extend_operands<"UXTB", "Small">; -defm UXTH : extend_operands<"UXTH", "Small">; -defm UXTW : extend_operands<"UXTW", "Small">; -defm UXTX : extend_operands<"UXTX", "Large">; -defm SXTB : extend_operands<"SXTB", "Small">; -defm SXTH : extend_operands<"SXTH", "Small">; -defm SXTW : extend_operands<"SXTW", "Small">; -defm SXTX : extend_operands<"SXTX", "Large">; - -def LSL_extasmoperand : AsmOperandClass { - let Name = "RegExtendLSL"; - let RenderMethod = "addRegExtendOperands"; - let DiagnosticType = "AddSubRegExtendLarge"; -} +defm MOVK : InsertImmediate<0b11, "movk">; +defm MOVN : MoveImmediate<0b00, "movn">; -def LSL_extoperand : Operand { - let ParserMatchClass = LSL_extasmoperand; -} +let PostEncoderMethod = "fixMOVZ" in +defm MOVZ : MoveImmediate<0b10, "movz">; +// First group of aliases covers an implicit "lsl #0". +def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0)>; +def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0)>; +def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>; +def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>; +def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>; +def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>; -// The patterns for various sign-extensions are a little ugly and -// non-uniform because everything has already been promoted to the -// legal i64 and i32 types. We'll wrap the various variants up in a -// class for use later. -class extend_types { - dag uxtb; dag uxth; dag uxtw; dag uxtx; - dag sxtb; dag sxth; dag sxtw; dag sxtx; - ValueType ty; - RegisterClass GPR; -} +// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax. +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; +def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; -def extends_to_i64 : extend_types { - let uxtb = (and (anyext i32:$Rm), 255); - let uxth = (and (anyext i32:$Rm), 65535); - let uxtw = (zext i32:$Rm); - let uxtx = (i64 $Rm); +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; +def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; - let sxtb = (sext_inreg (anyext i32:$Rm), i8); - let sxth = (sext_inreg (anyext i32:$Rm), i16); - let sxtw = (sext i32:$Rm); - let sxtx = (i64 $Rm); +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g3:$sym, 48)>; +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32)>; +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16)>; +def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0)>; - let ty = i64; - let GPR = GPR64xsp; -} +def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; +def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; +def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; +def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; -def extends_to_i32 : extend_types { - let uxtb = (and i32:$Rm, 255); - let uxth = (and i32:$Rm, 65535); - let uxtw = (i32 i32:$Rm); - let uxtx = (i32 i32:$Rm); +def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16)>; +def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0)>; - let sxtb = (sext_inreg i32:$Rm, i8); - let sxth = (sext_inreg i32:$Rm, i16); - let sxtw = (i32 i32:$Rm); - let sxtx = (i32 i32:$Rm); +// Final group of aliases covers true "mov $Rd, $imm" cases. +multiclass movw_mov_alias { + def _asmoperand : AsmOperandClass { + let Name = basename # width # "_lsl" # shift # "MovAlias"; + let PredicateMethod = "is" # basename # "MovAlias<" # width # ", " + # shift # ">"; + let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">"; + } - let ty = i32; - let GPR = GPR32wsp; -} + def _movimm : Operand { + let ParserMatchClass = !cast(NAME # "_asmoperand"); + } -// Now, six of the extensions supported are easy and uniform: if the source size -// is 32-bits or less, then Rm is always a 32-bit register. We'll instantiate -// those instructions in one block. - -// The uxtx/sxtx could potentially be merged in, but three facts dissuaded me: -// + It would break the naming scheme: either ADDxx_uxtx or ADDww_uxtx would -// be impossible. -// + Patterns are very different as well. -// + Passing different registers would be ugly (more fields in extend_types -// would probably be the best option). -multiclass addsub_exts { - def w_uxtb : A64I_addsubext, - Sched<[WriteALU, ReadALU, ReadALU]>; - def w_uxth : A64I_addsubext, - Sched<[WriteALU, ReadALU, ReadALU]>; - def w_uxtw : A64I_addsubext, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def w_sxtb : A64I_addsubext, - Sched<[WriteALU, ReadALU, ReadALU]>; - def w_sxth : A64I_addsubext, - Sched<[WriteALU, ReadALU, ReadALU]>; - def w_sxtw : A64I_addsubext, - Sched<[WriteALU, ReadALU, ReadALU]>; -} + def : InstAlias<"mov $Rd, $imm", + (INST GPR:$Rd, !cast(NAME # "_movimm"):$imm, shift)>; +} + +defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>; +defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>; + +defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>; +defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>; +defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>; +defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>; + +defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>; +defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>; + +defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>; +defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>; +defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>; +defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>; + +let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, + isAsCheapAsAMove = 1 in { +// FIXME: The following pseudo instructions are only needed because remat +// cannot handle multiple instructions. When that changes, we can select +// directly to the real instructions and get rid of these pseudos. + +def MOVi32imm + : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), + [(set GPR32:$dst, imm:$src)]>, + Sched<[WriteImm]>; +def MOVi64imm + : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), + [(set GPR64:$dst, imm:$src)]>, + Sched<[WriteImm]>; +} // isReMaterializable, isCodeGenOnly + +// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the +// eventual expansion code fewer bits to worry about getting right. Marshalling +// the types is a little tricky though: +def i64imm_32bit : ImmLeaf(Imm); +}]>; -// These two could be merge in with the above, but their patterns aren't really -// necessary and the naming-scheme would necessarily break: -multiclass addsub_xxtx { - def x_uxtx : A64I_addsubext<0b1, op, S, 0b00, 0b011, - outs, - (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [(opfrag i64:$Rn, (shl i64:$Rm, UXTX_operand:$Imm3))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111, - outs, - (ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [/* No Pattern: same as uxtx */], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; -} +def trunc_imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), MVT::i32); +}]>; -multiclass addsub_wxtx { - def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011, - outs, (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [/* No pattern: probably same as uxtw */], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111, - outs, (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3), - !strconcat(asmop, "$Rn, $Rm, $Imm3"), - [/* No Pattern: probably same as uxtw */], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; -} +def : Pat<(i64 i64imm_32bit:$src), + (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>; + +// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK +// sequences. +def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, + tglobaladdr:$g1, tglobaladdr:$g0), + (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g3, 48), + tglobaladdr:$g2, 32), + tglobaladdr:$g1, 16), + tglobaladdr:$g0, 0)>; + +def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, + tblockaddress:$g1, tblockaddress:$g0), + (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g3, 48), + tblockaddress:$g2, 32), + tblockaddress:$g1, 16), + tblockaddress:$g0, 0)>; + +def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2, + tconstpool:$g1, tconstpool:$g0), + (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g3, 48), + tconstpool:$g2, 32), + tconstpool:$g1, 16), + tconstpool:$g0, 0)>; + +def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2, + tjumptable:$g1, tjumptable:$g0), + (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g3, 48), + tjumptable:$g2, 32), + tjumptable:$g1, 16), + tjumptable:$g0, 0)>; -class SetRD - : PatFrag<(ops node:$lhs, node:$rhs), (set RC:$Rd, (op node:$lhs, node:$rhs))>; -class SetNZCV - : PatFrag<(ops node:$lhs, node:$rhs), (set NZCV, (op node:$lhs, node:$rhs))>; - -defm ADDxx :addsub_exts<0b1, 0b0, 0b0, "add\t$Rd, ", SetRD, - (outs GPR64xsp:$Rd), extends_to_i64>, - addsub_xxtx< 0b0, 0b0, "add\t$Rd, ", SetRD, - (outs GPR64xsp:$Rd)>; -defm ADDww :addsub_exts<0b0, 0b0, 0b0, "add\t$Rd, ", SetRD, - (outs GPR32wsp:$Rd), extends_to_i32>, - addsub_wxtx< 0b0, 0b0, "add\t$Rd, ", - (outs GPR32wsp:$Rd)>; -defm SUBxx :addsub_exts<0b1, 0b1, 0b0, "sub\t$Rd, ", SetRD, - (outs GPR64xsp:$Rd), extends_to_i64>, - addsub_xxtx< 0b1, 0b0, "sub\t$Rd, ", SetRD, - (outs GPR64xsp:$Rd)>; -defm SUBww :addsub_exts<0b0, 0b1, 0b0, "sub\t$Rd, ", SetRD, - (outs GPR32wsp:$Rd), extends_to_i32>, - addsub_wxtx< 0b1, 0b0, "sub\t$Rd, ", - (outs GPR32wsp:$Rd)>; - -let Defs = [NZCV] in { -defm ADDSxx :addsub_exts<0b1, 0b0, 0b1, "adds\t$Rd, ", SetRD, - (outs GPR64:$Rd), extends_to_i64>, - addsub_xxtx< 0b0, 0b1, "adds\t$Rd, ", SetRD, - (outs GPR64:$Rd)>; -defm ADDSww :addsub_exts<0b0, 0b0, 0b1, "adds\t$Rd, ", SetRD, - (outs GPR32:$Rd), extends_to_i32>, - addsub_wxtx< 0b0, 0b1, "adds\t$Rd, ", - (outs GPR32:$Rd)>; -defm SUBSxx :addsub_exts<0b1, 0b1, 0b1, "subs\t$Rd, ", SetRD, - (outs GPR64:$Rd), extends_to_i64>, - addsub_xxtx< 0b1, 0b1, "subs\t$Rd, ", SetRD, - (outs GPR64:$Rd)>; -defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD, - (outs GPR32:$Rd), extends_to_i32>, - addsub_wxtx< 0b1, 0b1, "subs\t$Rd, ", - (outs GPR32:$Rd)>; - - -let SchedRW = [WriteCMP, ReadCMP, ReadCMP], Rd = 0b11111, isCompare = 1 in { -defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV, - (outs), extends_to_i64>, - addsub_xxtx< 0b0, 0b1, "cmn\t", SetNZCV, (outs)>; -defm CMNw : addsub_exts<0b0, 0b0, 0b1, "cmn\t", SetNZCV, - (outs), extends_to_i32>, - addsub_wxtx< 0b0, 0b1, "cmn\t", (outs)>; -defm CMPx : addsub_exts<0b1, 0b1, 0b1, "cmp\t", SetNZCV, - (outs), extends_to_i64>, - addsub_xxtx< 0b1, 0b1, "cmp\t", SetNZCV, (outs)>; -defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV, - (outs), extends_to_i32>, - addsub_wxtx< 0b1, 0b1, "cmp\t", (outs)>; -} -} -// Now patterns for the operation without a shift being needed. No patterns are -// created for uxtx/sxtx since they're non-uniform and it's expected that -// add/sub (shifted register) will handle those cases anyway. -multiclass addsubext_noshift_patterns { - def : Pat<(nodeop exts.ty:$Rn, exts.uxtb), - (!cast(prefix # "w_uxtb") $Rn, $Rm, 0)>; - def : Pat<(nodeop exts.ty:$Rn, exts.uxth), - (!cast(prefix # "w_uxth") $Rn, $Rm, 0)>; - def : Pat<(nodeop exts.ty:$Rn, exts.uxtw), - (!cast(prefix # "w_uxtw") $Rn, $Rm, 0)>; - - def : Pat<(nodeop exts.ty:$Rn, exts.sxtb), - (!cast(prefix # "w_sxtb") $Rn, $Rm, 0)>; - def : Pat<(nodeop exts.ty:$Rn, exts.sxth), - (!cast(prefix # "w_sxth") $Rn, $Rm, 0)>; - def : Pat<(nodeop exts.ty:$Rn, exts.sxtw), - (!cast(prefix # "w_sxtw") $Rn, $Rm, 0)>; -} +//===----------------------------------------------------------------------===// +// Arithmetic instructions. +//===----------------------------------------------------------------------===// -defm : addsubext_noshift_patterns<"ADDxx", add, extends_to_i64>; -defm : addsubext_noshift_patterns<"ADDww", add, extends_to_i32>; -defm : addsubext_noshift_patterns<"SUBxx", sub, extends_to_i64>; -defm : addsubext_noshift_patterns<"SUBww", sub, extends_to_i32>; +// Add/subtract with carry. +defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>; +defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>; + +def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; +def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; +def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; +def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; + +// Add/subtract +defm ADD : AddSub<0, "add", add>; +defm SUB : AddSub<1, "sub">; + +def : InstAlias<"mov $dst, $src", + (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>; +def : InstAlias<"mov $dst, $src", + (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>; +def : InstAlias<"mov $dst, $src", + (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>; +def : InstAlias<"mov $dst, $src", + (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>; + +defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn">; +defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp">; + +// Use SUBS instead of SUB to enable CSE between SUBS and SUB. +def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), + (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; +def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), + (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; +def : Pat<(sub GPR32:$Rn, GPR32:$Rm), + (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; +def : Pat<(sub GPR64:$Rn, GPR64:$Rm), + (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; +def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), + (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; +def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), + (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; +def : Pat<(sub GPR32sp:$R2, arith_extended_reg32:$R3), + (SUBSWrx GPR32sp:$R2, arith_extended_reg32:$R3)>; +def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64:$R3), + (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64:$R3)>; + +// Because of the immediate format for add/sub-imm instructions, the +// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). +// These patterns capture that transformation. +let AddedComplexity = 1 in { +def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), + (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; +def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), + (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; +def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), + (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; +def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), + (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; +} + +// Because of the immediate format for add/sub-imm instructions, the +// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). +// These patterns capture that transformation. +let AddedComplexity = 1 in { +def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), + (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; +def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), + (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; +def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), + (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; +def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), + (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; +} + +def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; +def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; +def : InstAlias<"neg $dst, $src$shift", + (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; +def : InstAlias<"neg $dst, $src$shift", + (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; + +def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>; +def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>; +def : InstAlias<"negs $dst, $src$shift", + (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>; +def : InstAlias<"negs $dst, $src$shift", + (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>; + + +// Unsigned/Signed divide +defm UDIV : Div<0, "udiv", udiv>; +defm SDIV : Div<1, "sdiv", sdiv>; +let isCodeGenOnly = 1 in { +defm UDIV_Int : Div<0, "udiv", int_aarch64_udiv>; +defm SDIV_Int : Div<1, "sdiv", int_aarch64_sdiv>; +} + +// Variable shift +defm ASRV : Shift<0b10, "asr", sra>; +defm LSLV : Shift<0b00, "lsl", shl>; +defm LSRV : Shift<0b01, "lsr", srl>; +defm RORV : Shift<0b11, "ror", rotr>; + +def : ShiftAlias<"asrv", ASRVWr, GPR32>; +def : ShiftAlias<"asrv", ASRVXr, GPR64>; +def : ShiftAlias<"lslv", LSLVWr, GPR32>; +def : ShiftAlias<"lslv", LSLVXr, GPR64>; +def : ShiftAlias<"lsrv", LSRVWr, GPR32>; +def : ShiftAlias<"lsrv", LSRVXr, GPR64>; +def : ShiftAlias<"rorv", RORVWr, GPR32>; +def : ShiftAlias<"rorv", RORVXr, GPR64>; + +// Multiply-add +let AddedComplexity = 7 in { +defm MADD : MulAccum<0, "madd", add>; +defm MSUB : MulAccum<1, "msub", sub>; + +def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), + (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; +def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), + (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; + +def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), + (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; +def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), + (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; +} // AddedComplexity = 7 + +let AddedComplexity = 5 in { +def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; +def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; +def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; +def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; + +def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), + (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; +def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), + (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; + +def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), + (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; +def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), + (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; +} // AddedComplexity = 5 + +def : MulAccumWAlias<"mul", MADDWrrr>; +def : MulAccumXAlias<"mul", MADDXrrr>; +def : MulAccumWAlias<"mneg", MSUBWrrr>; +def : MulAccumXAlias<"mneg", MSUBXrrr>; +def : WideMulAccumAlias<"smull", SMADDLrrr>; +def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; +def : WideMulAccumAlias<"umull", UMADDLrrr>; +def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; + +// Multiply-high +def SMULHrr : MulHi<0b010, "smulh", mulhs>; +def UMULHrr : MulHi<0b110, "umulh", mulhu>; + +// CRC32 +def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">; +def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">; +def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">; +def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">; + +def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">; +def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">; +def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; +def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; -defm : addsubext_noshift_patterns<"CMNx", A64cmn, extends_to_i64>; -defm : addsubext_noshift_patterns<"CMNw", A64cmn, extends_to_i32>; -defm : addsubext_noshift_patterns<"CMPx", A64cmp, extends_to_i64>; -defm : addsubext_noshift_patterns<"CMPw", A64cmp, extends_to_i32>; -// An extend of "lsl #imm" is valid if and only if one of Rn and Rd is -// sp/wsp. It is synonymous with uxtx/uxtw depending on the size of the -// operation. Also permitted in this case is complete omission of the argument, -// which implies "lsl #0". -multiclass lsl_aliases { - def : InstAlias; +//===----------------------------------------------------------------------===// +// Logical instructions. +//===----------------------------------------------------------------------===// - def : InstAlias; +// (immediate) +defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag>; +defm AND : LogicalImm<0b00, "and", and>; +defm EOR : LogicalImm<0b10, "eor", xor>; +defm ORR : LogicalImm<0b01, "orr", or>; + +// FIXME: these aliases *are* canonical sometimes (when movz can't be +// used). Actually, it seems to be working right now, but putting logical_immXX +// here is a bit dodgy on the AsmParser side too. +def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, + logical_imm32:$imm), 0>; +def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, + logical_imm64:$imm), 0>; + + +// (register) +defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>; +defm BICS : LogicalRegS<0b11, 1, "bics", + BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>; +defm AND : LogicalReg<0b00, 0, "and", and>; +defm BIC : LogicalReg<0b00, 1, "bic", + BinOpFrag<(and node:$LHS, (not node:$RHS))>>; +defm EON : LogicalReg<0b10, 1, "eon", + BinOpFrag<(xor node:$LHS, (not node:$RHS))>>; +defm EOR : LogicalReg<0b10, 0, "eor", xor>; +defm ORN : LogicalReg<0b01, 1, "orn", + BinOpFrag<(or node:$LHS, (not node:$RHS))>>; +defm ORR : LogicalReg<0b01, 0, "orr", or>; + +def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>; +def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>; + +def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>; +def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>; + +def : InstAlias<"mvn $Wd, $Wm$sh", + (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>; +def : InstAlias<"mvn $Xd, $Xm$sh", + (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>; + +def : InstAlias<"tst $src1, $src2", + (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>; +def : InstAlias<"tst $src1, $src2", + (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>; + +def : InstAlias<"tst $src1, $src2", + (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>; +def : InstAlias<"tst $src1, $src2", + (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>; + +def : InstAlias<"tst $src1, $src2$sh", + (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>; +def : InstAlias<"tst $src1, $src2$sh", + (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>; + + +def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; +def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; -} -defm : lsl_aliases<"add", ADDxxx_uxtx, Rxsp, GPR64xsp, GPR64>; -defm : lsl_aliases<"add", ADDxxx_uxtx, GPR64xsp, Rxsp, GPR64>; -defm : lsl_aliases<"add", ADDwww_uxtw, Rwsp, GPR32wsp, GPR32>; -defm : lsl_aliases<"add", ADDwww_uxtw, GPR32wsp, Rwsp, GPR32>; -defm : lsl_aliases<"sub", SUBxxx_uxtx, Rxsp, GPR64xsp, GPR64>; -defm : lsl_aliases<"sub", SUBxxx_uxtx, GPR64xsp, Rxsp, GPR64>; -defm : lsl_aliases<"sub", SUBwww_uxtw, Rwsp, GPR32wsp, GPR32>; -defm : lsl_aliases<"sub", SUBwww_uxtw, GPR32wsp, Rwsp, GPR32>; - -// Rd cannot be sp for flag-setting variants so only half of the aliases are -// needed. -defm : lsl_aliases<"adds", ADDSxxx_uxtx, GPR64, Rxsp, GPR64>; -defm : lsl_aliases<"adds", ADDSwww_uxtw, GPR32, Rwsp, GPR32>; -defm : lsl_aliases<"subs", SUBSxxx_uxtx, GPR64, Rxsp, GPR64>; -defm : lsl_aliases<"subs", SUBSwww_uxtw, GPR32, Rwsp, GPR32>; - -// CMP unfortunately has to be different because the instruction doesn't have a -// dest register. -multiclass cmp_lsl_aliases { - def : InstAlias; - - def : InstAlias; -} +//===----------------------------------------------------------------------===// +// One operand data processing instructions. +//===----------------------------------------------------------------------===// -defm : cmp_lsl_aliases<"cmp", CMPxx_uxtx, Rxsp, GPR64>; -defm : cmp_lsl_aliases<"cmp", CMPww_uxtw, Rwsp, GPR32>; -defm : cmp_lsl_aliases<"cmn", CMNxx_uxtx, Rxsp, GPR64>; -defm : cmp_lsl_aliases<"cmn", CMNww_uxtw, Rwsp, GPR32>; +defm CLS : OneOperandData<0b101, "cls">; +defm CLZ : OneOperandData<0b100, "clz", ctlz>; +defm RBIT : OneOperandData<0b000, "rbit">; +def REV16Wr : OneWRegData<0b001, "rev16", + UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; +def REV16Xr : OneXRegData<0b001, "rev16", null_frag>; + +def : Pat<(cttz GPR32:$Rn), + (CLZWr (RBITWr GPR32:$Rn))>; +def : Pat<(cttz GPR64:$Rn), + (CLZXr (RBITXr GPR64:$Rn))>; +def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)), + (i32 1))), + (CLSWr GPR32:$Rn)>; +def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)), + (i64 1))), + (CLSXr GPR64:$Rn)>; + +// Unlike the other one operand instructions, the instructions with the "rev" +// mnemonic do *not* just different in the size bit, but actually use different +// opcode bits for the different sizes. +def REVWr : OneWRegData<0b010, "rev", bswap>; +def REVXr : OneXRegData<0b011, "rev", bswap>; +def REV32Xr : OneXRegData<0b010, "rev32", + UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; + +// The bswap commutes with the rotr so we want a pattern for both possible +// orders. +def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>; +def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>; //===----------------------------------------------------------------------===// -// Add-subtract (immediate) instructions +// Bitfield immediate extraction instruction. //===----------------------------------------------------------------------===// -// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, MOV - -// These instructions accept a 12-bit unsigned immediate, optionally shifted -// left by 12 bits. Official assembly format specifies a 12 bit immediate with -// one of "", "LSL #0", "LSL #12" supplementary operands. - -// There are surprisingly few ways to make this work with TableGen, so this -// implementation has separate instructions for the "LSL #0" and "LSL #12" -// variants. - -// If the MCInst retained a single combined immediate (which could be 0x123000, -// for example) then both components (imm & shift) would have to be delegated to -// a single assembly operand. This would entail a separate operand parser -// (because the LSL would have to live in the same AArch64Operand as the -// immediate to be accessible); assembly parsing is rather complex and -// error-prone C++ code. -// -// By splitting the immediate, we can delegate handling this optional operand to -// an InstAlias. Supporting functions to generate the correct MCInst are still -// required, but these are essentially trivial and parsing can remain generic. -// -// Rejected plans with rationale: -// ------------------------------ -// -// In an ideal world you'de have two first class immediate operands (in -// InOperandList, specifying imm12 and shift). Unfortunately this is not -// selectable by any means I could discover. -// -// An Instruction with two MCOperands hidden behind a single entry in -// InOperandList (expanded by ComplexPatterns and MIOperandInfo) was functional, -// but required more C++ code to handle encoding/decoding. Parsing (the intended -// main beneficiary) ended up equally complex because of the optional nature of -// "LSL #0". -// -// Attempting to circumvent the need for a custom OperandParser above by giving -// InstAliases without the "lsl #0" failed. add/sub could be accommodated but -// the cmp/cmn aliases didn't use the MIOperandInfo to determine how operands -// should be parsed: there was no way to accommodate an "lsl #12". - -let ParserMethod = "ParseImmWithLSLOperand", - RenderMethod = "addImmWithLSLOperands" in { - // Derived PredicateMethod fields are different for each - def addsubimm_lsl0_asmoperand : AsmOperandClass { - let Name = "AddSubImmLSL0"; - // If an error is reported against this operand, instruction could also be a - // register variant. - let DiagnosticType = "AddSubSecondSource"; - } +let neverHasSideEffects = 1 in +defm EXTR : ExtractImm<"extr">; +def : InstAlias<"ror $dst, $src, $shift", + (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; +def : InstAlias<"ror $dst, $src, $shift", + (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; + +def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), + (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; +def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), + (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; - def addsubimm_lsl12_asmoperand : AsmOperandClass { - let Name = "AddSubImmLSL12"; - let DiagnosticType = "AddSubSecondSource"; - } +//===----------------------------------------------------------------------===// +// Other bitfield immediate instructions. +//===----------------------------------------------------------------------===// +let neverHasSideEffects = 1 in { +defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; +defm SBFM : BitfieldImm<0b00, "sbfm">; +defm UBFM : BitfieldImm<0b10, "ubfm">; } -def shr_12_XFORM : SDNodeXFormgetTargetConstant(N->getSExtValue() >> 12, MVT::i32); +def i32shift_a : Operand, SDNodeXFormgetZExtValue()) & 0x1f; + return CurDAG->getTargetConstant(enc, MVT::i64); }]>; -def shr_12_neg_XFORM : SDNodeXFormgetTargetConstant((-N->getSExtValue()) >> 12, MVT::i32); +def i32shift_b : Operand, SDNodeXFormgetZExtValue(); + return CurDAG->getTargetConstant(enc, MVT::i64); }]>; -def neg_XFORM : SDNodeXFormgetTargetConstant(-N->getSExtValue(), MVT::i32); +// min(7, 31 - shift_amt) +def i32shift_sext_i8 : Operand, SDNodeXFormgetZExtValue(); + enc = enc > 7 ? 7 : enc; + return CurDAG->getTargetConstant(enc, MVT::i64); }]>; +// min(15, 31 - shift_amt) +def i32shift_sext_i16 : Operand, SDNodeXFormgetZExtValue(); + enc = enc > 15 ? 15 : enc; + return CurDAG->getTargetConstant(enc, MVT::i64); +}]>; -multiclass addsub_imm_operands { - let PrintMethod = "printAddSubImmLSL0Operand", - EncoderMethod = "getAddSubImmOpValue", - ParserMatchClass = addsubimm_lsl0_asmoperand in { - def _posimm_lsl0 : Operand, - ImmLeaf= 0 && (Imm & ~0xfff) == 0; }]>; - def _negimm_lsl0 : Operand, - ImmLeaf; - } - - let PrintMethod = "printAddSubImmLSL12Operand", - EncoderMethod = "getAddSubImmOpValue", - ParserMatchClass = addsubimm_lsl12_asmoperand in { - def _posimm_lsl12 : Operand, - ImmLeaf= 0 && (Imm & ~0xfff000) == 0; }], - shr_12_XFORM>; - - def _negimm_lsl12 : Operand, - ImmLeaf; - } -} +def i64shift_a : Operand, SDNodeXFormgetZExtValue()) & 0x3f; + return CurDAG->getTargetConstant(enc, MVT::i64); +}]>; -// The add operands don't need any transformation -defm addsubimm_operand_i32 : addsub_imm_operands; -defm addsubimm_operand_i64 : addsub_imm_operands; - -multiclass addsubimm_varieties shift, - string asmop, string cmpasmop, - Operand imm_operand, Operand cmp_imm_operand, - RegisterClass GPR, RegisterClass GPRsp, - AArch64Reg ZR, ValueType Ty> { - // All registers for non-S variants allow SP - def _s : A64I_addsubimm, - Sched<[WriteALU, ReadALU]>; - - - // S variants can read SP but would write to ZR - def _S : A64I_addsubimm, - Sched<[WriteALU, ReadALU]> { - let Defs = [NZCV]; - } +def i64shift_b : Operand, SDNodeXFormgetZExtValue(); + return CurDAG->getTargetConstant(enc, MVT::i64); +}]>; - // Note that the pattern here for ADDS is subtle. Canonically CMP - // a, b becomes SUBS a, b. If b < 0 then this is equivalent to - // ADDS a, (-b). This is not true in general. - def _cmp : A64I_addsubimm, - Sched<[WriteCMP, ReadCMP]> { - let Rd = 0b11111; - let Defs = [NZCV]; - let isCompare = 1; - } -} +// min(7, 63 - shift_amt) +def i64shift_sext_i8 : Operand, SDNodeXFormgetZExtValue(); + enc = enc > 7 ? 7 : enc; + return CurDAG->getTargetConstant(enc, MVT::i64); +}]>; +// min(15, 63 - shift_amt) +def i64shift_sext_i16 : Operand, SDNodeXFormgetZExtValue(); + enc = enc > 15 ? 15 : enc; + return CurDAG->getTargetConstant(enc, MVT::i64); +}]>; -multiclass addsubimm_shifts { - defm _lsl0 : addsubimm_varieties(operand # "_lsl0"), - !cast(cmpoperand # "_lsl0"), - GPR, GPRsp, ZR, Ty>; - - defm _lsl12 : addsubimm_varieties(operand # "_lsl12"), - !cast(cmpoperand # "_lsl12"), - GPR, GPRsp, ZR, Ty>; -} +// min(31, 63 - shift_amt) +def i64shift_sext_i32 : Operand, SDNodeXFormgetZExtValue(); + enc = enc > 31 ? 31 : enc; + return CurDAG->getTargetConstant(enc, MVT::i64); +}]>; -defm ADDwwi : addsubimm_shifts<"ADDwi", 0b0, 0b0, "add", "cmn", - "addsubimm_operand_i32_posimm", - "addsubimm_operand_i32_negimm", - GPR32, GPR32wsp, WZR, i32>; -defm ADDxxi : addsubimm_shifts<"ADDxi", 0b1, 0b0, "add", "cmn", - "addsubimm_operand_i64_posimm", - "addsubimm_operand_i64_negimm", - GPR64, GPR64xsp, XZR, i64>; -defm SUBwwi : addsubimm_shifts<"SUBwi", 0b0, 0b1, "sub", "cmp", - "addsubimm_operand_i32_negimm", - "addsubimm_operand_i32_posimm", - GPR32, GPR32wsp, WZR, i32>; -defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp", - "addsubimm_operand_i64_negimm", - "addsubimm_operand_i64_posimm", - GPR64, GPR64xsp, XZR, i64>; - -multiclass MOVsp { - def _fromsp : InstAlias<"mov $Rd, $Rn", - (addop GPRsp:$Rd, SP:$Rn, 0), - 0b1>; - - def _tosp : InstAlias<"mov $Rd, $Rn", - (addop SP:$Rd, GPRsp:$Rn, 0), - 0b1>; -} +def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), + (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), + (i64 (i32shift_b imm0_31:$imm)))>; +def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), + (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), + (i64 (i64shift_b imm0_63:$imm)))>; -// Recall Rxsp is a RegisterClass containing *just* xsp. -defm MOVxx : MOVsp; -defm MOVww : MOVsp; +let AddedComplexity = 10 in { +def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), + (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; +def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), + (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; +} + +def : InstAlias<"asr $dst, $src, $shift", + (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; +def : InstAlias<"asr $dst, $src, $shift", + (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; +def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; +def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; +def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; +def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; +def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; + +def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), + (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; +def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), + (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; + +def : InstAlias<"lsr $dst, $src, $shift", + (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; +def : InstAlias<"lsr $dst, $src, $shift", + (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; +def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; +def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; +def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; +def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; +def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; //===----------------------------------------------------------------------===// -// Add-subtract (shifted register) instructions +// Conditionally set flags instructions. //===----------------------------------------------------------------------===// -// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS - -//===------------------------------- -// 1. The "shifted register" operands. Shared with logical insts. -//===------------------------------- - -multiclass shift_operands { - def _asmoperand_i32 : AsmOperandClass { - let Name = "Shift" # form # "i32"; - let RenderMethod = "addShiftOperands"; - let PredicateMethod = "isShift"; - let DiagnosticType = "AddSubRegShift32"; - } - - // Note that the operand type is intentionally i64 because the DAGCombiner - // puts these into a canonical form. - def _i32 : Operand, ImmLeaf= 0 && Imm <= 31; }]> { - let ParserMatchClass - = !cast(prefix # "_asmoperand_i32"); - let PrintMethod = "printShiftOperand"; - let DecoderMethod = "Decode32BitShiftOperand"; - } +defm CCMN : CondSetFlagsImm<0, "ccmn">; +defm CCMP : CondSetFlagsImm<1, "ccmp">; - def _asmoperand_i64 : AsmOperandClass { - let Name = "Shift" # form # "i64"; - let RenderMethod = "addShiftOperands"; - let PredicateMethod = "isShift"; - let DiagnosticType = "AddSubRegShift64"; - } - - def _i64 : Operand, ImmLeaf= 0 && Imm <= 63; }]> { - let ParserMatchClass - = !cast(prefix # "_asmoperand_i64"); - let PrintMethod = "printShiftOperand"; - } -} +defm CCMN : CondSetFlagsReg<0, "ccmn">; +defm CCMP : CondSetFlagsReg<1, "ccmp">; -defm lsl_operand : shift_operands<"lsl_operand", "LSL">; -defm lsr_operand : shift_operands<"lsr_operand", "LSR">; -defm asr_operand : shift_operands<"asr_operand", "ASR">; - -// Not used for add/sub, but defined here for completeness. The "logical -// (shifted register)" instructions *do* have an ROR variant. -defm ror_operand : shift_operands<"ror_operand", "ROR">; - -//===------------------------------- -// 2. The basic 3.5-operand ADD/SUB/ADDS/SUBS instructions. -//===------------------------------- - -// N.b. the commutable parameter is just !N. It will be first against the wall -// when the revolution comes. -multiclass addsub_shifts defs> { - let isCommutable = commutable, Defs = defs in { - def _lsl : A64I_addsubshift("lsl_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set GPR:$Rd, (opfrag ty:$Rn, (shl ty:$Rm, - !cast("lsl_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU]>; - - def _lsr : A64I_addsubshift("lsr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm, - !cast("lsr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU]>; - - def _asr : A64I_addsubshift("asr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm, - !cast("asr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU]>; - } +//===----------------------------------------------------------------------===// +// Conditional select instructions. +//===----------------------------------------------------------------------===// +defm CSEL : CondSelect<0, 0b00, "csel">; + +def inc : PatFrag<(ops node:$in), (add node:$in, 1)>; +defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; +defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; +defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; + +def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), + (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; +def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), + (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; +def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), + (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; +def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), + (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; +def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV), + (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; +def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV), + (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; + +def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV), + (CSINCWr WZR, WZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV), + (CSINCXr XZR, XZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV), + (CSINVWr WZR, WZR, (i32 imm:$cc))>; +def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV), + (CSINVXr XZR, XZR, (i32 imm:$cc))>; + +// The inverse of the condition code from the alias instruction is what is used +// in the aliased instruction. The parser all ready inverts the condition code +// for these aliases. +def : InstAlias<"cset $dst, $cc", + (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; +def : InstAlias<"cset $dst, $cc", + (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; + +def : InstAlias<"csetm $dst, $cc", + (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>; +def : InstAlias<"csetm $dst, $cc", + (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>; + +def : InstAlias<"cinc $dst, $src, $cc", + (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; +def : InstAlias<"cinc $dst, $src, $cc", + (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; + +def : InstAlias<"cinv $dst, $src, $cc", + (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; +def : InstAlias<"cinv $dst, $src, $cc", + (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; + +def : InstAlias<"cneg $dst, $src, $cc", + (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>; +def : InstAlias<"cneg $dst, $src, $cc", + (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>; - def _noshift - : InstAlias(prefix # "_lsl") GPR:$Rd, GPR:$Rn, - GPR:$Rm, 0)>; +//===----------------------------------------------------------------------===// +// PC-relative instructions. +//===----------------------------------------------------------------------===// +let isReMaterializable = 1 in { +let neverHasSideEffects = 1, mayStore = 0, mayLoad = 0 in { +def ADR : ADRI<0, "adr", adrlabel, []>; +} // neverHasSideEffects = 1 - def : Pat<(opfrag ty:$Rn, ty:$Rm), - (!cast(prefix # "_lsl") $Rn, $Rm, 0)>; -} +def ADRP : ADRI<1, "adrp", adrplabel, + [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>; +} // isReMaterializable = 1 -multiclass addsub_sizes defs> { - defm xxx : addsub_shifts; - defm www : addsub_shifts; -} +// page address of a constant pool entry, block address +def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; +def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; +//===----------------------------------------------------------------------===// +// Unconditional branch (register) instructions. +//===----------------------------------------------------------------------===// -defm ADD : addsub_sizes<"ADD", 0b0, 0b0, 0b1, "add", add, []>; -defm SUB : addsub_sizes<"SUB", 0b1, 0b0, 0b0, "sub", sub, []>; +let isReturn = 1, isTerminator = 1, isBarrier = 1 in { +def RET : BranchReg<0b0010, "ret", []>; +def DRPS : SpecialReturn<0b0101, "drps">; +def ERET : SpecialReturn<0b0100, "eret">; +} // isReturn = 1, isTerminator = 1, isBarrier = 1 -defm ADDS : addsub_sizes<"ADDS", 0b0, 0b1, 0b1, "adds", addc, [NZCV]>; -defm SUBS : addsub_sizes<"SUBS", 0b1, 0b1, 0b0, "subs", subc, [NZCV]>; +// Default to the LR register. +def : InstAlias<"ret", (RET LR)>; -//===------------------------------- -// 1. The NEG/NEGS aliases -//===------------------------------- +let isCall = 1, Defs = [LR], Uses = [SP] in { +def BLR : BranchReg<0b0001, "blr", [(AArch64call GPR64:$Rn)]>; +} // isCall -multiclass neg_alias { - def : InstAlias<"neg $Rd, $Rm, $Imm6", - (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>; +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { +def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; +} // isBranch, isTerminator, isBarrier, isIndirectBranch - def : Pat<(sub 0, (shiftop ty:$Rm, shift_operand:$Imm6)), - (INST ZR, $Rm, shift_operand:$Imm6)>; +// Create a separate pseudo-instruction for codegen to use so that we don't +// flag lr as used in every function. It'll be restored before the RET by the +// epilogue if it's legitimately used. +def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]> { + let isTerminator = 1; + let isBarrier = 1; + let isReturn = 1; } -defm : neg_alias; -defm : neg_alias; -defm : neg_alias; -def : InstAlias<"neg $Rd, $Rm", (SUBwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; -def : Pat<(sub 0, i32:$Rm), (SUBwww_lsl WZR, $Rm, 0)>; - -defm : neg_alias; -defm : neg_alias; -defm : neg_alias; -def : InstAlias<"neg $Rd, $Rm", (SUBxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; -def : Pat<(sub 0, i64:$Rm), (SUBxxx_lsl XZR, $Rm, 0)>; - -// NEGS doesn't get any patterns yet: defining multiple outputs means C++ has to -// be involved. -class negs_alias - : InstAlias<"negs $Rd, $Rm, $Imm6", - (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>; - -def : negs_alias; -def : negs_alias; -def : negs_alias; -def : InstAlias<"negs $Rd, $Rm", (SUBSwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; - -def : negs_alias; -def : negs_alias; -def : negs_alias; -def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; - -//===------------------------------- -// 1. The CMP/CMN aliases -//===------------------------------- - -multiclass cmp_shifts { - let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in { - def _lsl : A64I_addsubshift("lsl_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), - [(set NZCV, (opfrag ty:$Rn, (shl ty:$Rm, - !cast("lsl_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteCMP, ReadCMP, ReadCMP]>; - - def _lsr : A64I_addsubshift("lsr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), - [(set NZCV, (opfrag ty:$Rn, (srl ty:$Rm, - !cast("lsr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteCMP, ReadCMP, ReadCMP]>; - - def _asr : A64I_addsubshift("asr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rn, $Rm, $Imm6"), - [(set NZCV, (opfrag ty:$Rn, (sra ty:$Rm, - !cast("asr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteCMP, ReadCMP, ReadCMP]>; - } - - def _noshift - : InstAlias(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; - - def : Pat<(opfrag ty:$Rn, ty:$Rm), - (!cast(prefix # "_lsl") $Rn, $Rm, 0)>; +// This is a directive-like pseudo-instruction. The purpose is to insert an +// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction +// (which in the usual case is a BLR). +let hasSideEffects = 1 in +def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> { + let AsmString = ".tlsdesccall $sym"; } -defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, i32, GPR32>; -defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, i64, GPR64>; - -defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, i32, GPR32>; -defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, i64, GPR64>; +// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It +// gets expanded to two MCInsts during lowering. +let isCall = 1, Defs = [LR] in +def TLSDESC_BLR + : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym), + [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>; +def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym), + (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>; //===----------------------------------------------------------------------===// -// Add-subtract (with carry) instructions +// Conditional branch (immediate) instruction. //===----------------------------------------------------------------------===// -// Contains: ADC, ADCS, SBC, SBCS + aliases NGC, NGCS - -multiclass A64I_addsubcarrySizes { - let Uses = [NZCV] in { - def www : A64I_addsubcarry<0b0, op, s, 0b000000, - (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm), - !strconcat(asmop, "\t$Rd, $Rn, $Rm"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def xxx : A64I_addsubcarry<0b1, op, s, 0b000000, - (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm), - !strconcat(asmop, "\t$Rd, $Rn, $Rm"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - } -} - -let isCommutable = 1 in { - defm ADC : A64I_addsubcarrySizes<0b0, 0b0, "adc">; -} - -defm SBC : A64I_addsubcarrySizes<0b1, 0b0, "sbc">; - -let Defs = [NZCV] in { - let isCommutable = 1 in { - defm ADCS : A64I_addsubcarrySizes<0b0, 0b1, "adcs">; - } - - defm SBCS : A64I_addsubcarrySizes<0b1, 0b1, "sbcs">; -} +def Bcc : BranchCond; -def : InstAlias<"ngc $Rd, $Rm", (SBCwww GPR32:$Rd, WZR, GPR32:$Rm)>; -def : InstAlias<"ngc $Rd, $Rm", (SBCxxx GPR64:$Rd, XZR, GPR64:$Rm)>; -def : InstAlias<"ngcs $Rd, $Rm", (SBCSwww GPR32:$Rd, WZR, GPR32:$Rm)>; -def : InstAlias<"ngcs $Rd, $Rm", (SBCSxxx GPR64:$Rd, XZR, GPR64:$Rm)>; +//===----------------------------------------------------------------------===// +// Compare-and-branch instructions. +//===----------------------------------------------------------------------===// +defm CBZ : CmpBranch<0, "cbz", AArch64cbz>; +defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>; -// Note that adde and sube can form a chain longer than two (e.g. for 256-bit -// addition). So the flag-setting instructions are appropriate. -def : Pat<(adde i32:$Rn, i32:$Rm), (ADCSwww $Rn, $Rm)>; -def : Pat<(adde i64:$Rn, i64:$Rm), (ADCSxxx $Rn, $Rm)>; -def : Pat<(sube i32:$Rn, i32:$Rm), (SBCSwww $Rn, $Rm)>; -def : Pat<(sube i64:$Rn, i64:$Rm), (SBCSxxx $Rn, $Rm)>; +//===----------------------------------------------------------------------===// +// Test-bit-and-branch instructions. +//===----------------------------------------------------------------------===// +defm TBZ : TestBranch<0, "tbz", AArch64tbz>; +defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>; //===----------------------------------------------------------------------===// -// Bitfield +// Unconditional branch (immediate) instructions. //===----------------------------------------------------------------------===// -// Contains: SBFM, BFM, UBFM, [SU]XT[BHW], ASR, LSR, LSL, SBFI[ZX], BFI, BFXIL, -// UBFIZ, UBFX +let isBranch = 1, isTerminator = 1, isBarrier = 1 in { +def B : BranchImm<0, "b", [(br bb:$addr)]>; +} // isBranch, isTerminator, isBarrier -// Because of the rather complicated nearly-overlapping aliases, the decoding of -// this range of instructions is handled manually. The architectural -// instructions are BFM, SBFM and UBFM but a disassembler should never produce -// these. -// -// In the end, the best option was to use BFM instructions for decoding under -// almost all circumstances, but to create aliasing *Instructions* for each of -// the canonical forms and specify a completely custom decoder which would -// substitute the correct MCInst as needed. -// -// This also simplifies instruction selection, parsing etc because the MCInsts -// have a shape that's closer to their use in code. +let isCall = 1, Defs = [LR], Uses = [SP] in { +def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>; +} // isCall +def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>; -//===------------------------------- -// 1. The architectural BFM instructions -//===------------------------------- +//===----------------------------------------------------------------------===// +// Exception generation instructions. +//===----------------------------------------------------------------------===// +def BRK : ExceptionGeneration<0b001, 0b00, "brk">; +def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; +def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; +def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">; +def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; +def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; +def SMC : ExceptionGeneration<0b000, 0b11, "smc">; +def SVC : ExceptionGeneration<0b000, 0b01, "svc">; + +// DCPSn defaults to an immediate operand of zero if unspecified. +def : InstAlias<"dcps1", (DCPS1 0)>; +def : InstAlias<"dcps2", (DCPS2 0)>; +def : InstAlias<"dcps3", (DCPS3 0)>; -def uimm5_asmoperand : AsmOperandClass { - let Name = "UImm5"; - let PredicateMethod = "isUImm<5>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm5"; -} +//===----------------------------------------------------------------------===// +// Load instructions. +//===----------------------------------------------------------------------===// -def uimm6_asmoperand : AsmOperandClass { - let Name = "UImm6"; - let PredicateMethod = "isUImm<6>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm6"; +// Pair (indexed, offset) +defm LDPW : LoadPairOffset<0b00, 0, GPR32, simm7s4, "ldp">; +defm LDPX : LoadPairOffset<0b10, 0, GPR64, simm7s8, "ldp">; +defm LDPS : LoadPairOffset<0b00, 1, FPR32, simm7s4, "ldp">; +defm LDPD : LoadPairOffset<0b01, 1, FPR64, simm7s8, "ldp">; +defm LDPQ : LoadPairOffset<0b10, 1, FPR128, simm7s16, "ldp">; + +defm LDPSW : LoadPairOffset<0b01, 0, GPR64, simm7s4, "ldpsw">; + +// Pair (pre-indexed) +def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32, simm7s4, "ldp">; +def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64, simm7s8, "ldp">; +def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32, simm7s4, "ldp">; +def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64, simm7s8, "ldp">; +def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128, simm7s16, "ldp">; + +def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64, simm7s4, "ldpsw">; + +// Pair (post-indexed) +def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32, simm7s4, "ldp">; +def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64, simm7s8, "ldp">; +def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32, simm7s4, "ldp">; +def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64, simm7s8, "ldp">; +def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128, simm7s16, "ldp">; + +def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64, simm7s4, "ldpsw">; + + +// Pair (no allocate) +defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32, simm7s4, "ldnp">; +defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64, simm7s8, "ldnp">; +defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32, simm7s4, "ldnp">; +defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64, simm7s8, "ldnp">; +defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128, simm7s16, "ldnp">; + +//--- +// (register offset) +//--- + +// Integer +defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>; +defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>; +defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>; +defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>; + +// Floating-point +defm LDRB : Load8RO<0b00, 1, 0b01, FPR8, "ldr", untyped, load>; +defm LDRH : Load16RO<0b01, 1, 0b01, FPR16, "ldr", f16, load>; +defm LDRS : Load32RO<0b10, 1, 0b01, FPR32, "ldr", f32, load>; +defm LDRD : Load64RO<0b11, 1, 0b01, FPR64, "ldr", f64, load>; +defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128, "ldr", f128, load>; + +// Load sign-extended half-word +defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>; +defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>; + +// Load sign-extended byte +defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>; +defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>; + +// Load sign-extended word +defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>; + +// Pre-fetch. +defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">; + +// For regular load, we do not have any alignment requirement. +// Thus, it is safe to directly map the vector loads with interesting +// addressing modes. +// FIXME: We could do the same for bitconvert to floating point vectors. +multiclass ScalToVecROLoadPat { + def : Pat<(VecTy (scalar_to_vector (ScalTy + (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))), + (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), + (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset), + sub)>; + + def : Pat<(VecTy (scalar_to_vector (ScalTy + (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))), + (INSERT_SUBREG (VecTy (IMPLICIT_DEF)), + (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset), + sub)>; } -def bitfield32_imm : Operand, - ImmLeaf= 0 && Imm < 32; }]> { - let ParserMatchClass = uimm5_asmoperand; +let AddedComplexity = 10 in { +defm : ScalToVecROLoadPat; +defm : ScalToVecROLoadPat; - let DecoderMethod = "DecodeBitfield32ImmOperand"; -} +defm : ScalToVecROLoadPat; +defm : ScalToVecROLoadPat; +defm : ScalToVecROLoadPat; +defm : ScalToVecROLoadPat; -def bitfield64_imm : Operand, - ImmLeaf= 0 && Imm < 64; }]> { - let ParserMatchClass = uimm6_asmoperand; +defm : ScalToVecROLoadPat; +defm : ScalToVecROLoadPat; - // Default decoder works in 64-bit case: the 6-bit field can take any value. -} +defm : ScalToVecROLoadPat; -multiclass A64I_bitfieldSizes opc, string asmop> { - def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), - (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { - let DecoderMethod = "DecodeBitfieldInstruction"; - } +defm : ScalToVecROLoadPat; - def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), - (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { - let DecoderMethod = "DecodeBitfieldInstruction"; - } -} -defm SBFM : A64I_bitfieldSizes<0b00, "sbfm">; -defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">; - -// BFM instructions modify the destination register rather than defining it -// completely. -def BFMwwii : - A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), - (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS), - "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - let DecoderMethod = "DecodeBitfieldInstruction"; - let Constraints = "$src = $Rd"; -} +def : Pat <(v1i64 (scalar_to_vector (i64 + (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend64:$extend))))), + (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>; -def BFMxxii : - A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), - (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS), - "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - let DecoderMethod = "DecodeBitfieldInstruction"; - let Constraints = "$src = $Rd"; +def : Pat <(v1i64 (scalar_to_vector (i64 + (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend64:$extend))))), + (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>; } +// Match all load 64 bits width whose type is compatible with FPR64 +multiclass VecROLoadPat { -//===------------------------------- -// 2. Extend aliases to 64-bit dest -//===------------------------------- - -// Unfortunately the extensions that end up as 64-bits cannot be handled by an -// instruction alias: their syntax is (for example) "SXTB x0, w0", which needs -// to be mapped to "SBFM x0, x0, #0, 7" (changing the class of Rn). InstAlias is -// not capable of such a map as far as I'm aware - -// Note that these instructions are strictly more specific than the -// BFM ones (in ImmR) so they can handle their own decoding. -class A64I_bf_ext opc, RegisterClass GPRDest, ValueType dty, - string asmop, bits<6> imms, dag pattern> - : A64I_bitfield, - Sched<[WriteALU, ReadALU]> { - let ImmR = 0b000000; - let ImmS = imms; -} + def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), + (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; -// Signed extensions -def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtb", 7, - (sext_inreg (anyext i32:$Rn), i8)>; -def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxtb", 7, - (sext_inreg i32:$Rn, i8)>; -def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxth", 15, - (sext_inreg (anyext i32:$Rn), i16)>; -def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxth", 15, - (sext_inreg i32:$Rn, i16)>; -def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtw", 31, (sext i32:$Rn)>; - -// Unsigned extensions -def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxtb", 7, - (and i32:$Rn, 255)>; -def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxth", 15, - (and i32:$Rn, 65535)>; - -// The 64-bit unsigned variants are not strictly architectural but recommended -// for consistency. -let isAsmParserOnly = 1 in { - def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxtb", 7, - (and (anyext i32:$Rn), 255)>; - def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxth", 15, - (and (anyext i32:$Rn), 65535)>; + def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), + (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; } -// Extra patterns for when the source register is actually 64-bits -// too. There's no architectural difference here, it's just LLVM -// shinanigans. There's no need for equivalent zero-extension patterns -// because they'll already be caught by logical (immediate) matching. -def : Pat<(sext_inreg i64:$Rn, i8), - (SXTBxw (EXTRACT_SUBREG $Rn, sub_32))>; -def : Pat<(sext_inreg i64:$Rn, i16), - (SXTHxw (EXTRACT_SUBREG $Rn, sub_32))>; -def : Pat<(sext_inreg i64:$Rn, i32), - (SXTWxw (EXTRACT_SUBREG $Rn, sub_32))>; - - -//===------------------------------- -// 3. Aliases for ASR and LSR (the simple shifts) -//===------------------------------- - -// These also handle their own decoding because ImmS being set makes -// them take precedence over BFM. -multiclass A64I_shift opc, string asmop, SDNode opnode> { - def wwi : A64I_bitfield<0b0, opc, 0b0, - (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), - [(set i32:$Rd, (opnode i32:$Rn, bitfield32_imm:$ImmR))], - NoItinerary>, - Sched<[WriteALU, ReadALU]> { - let ImmS = 31; - } - - def xxi : A64I_bitfield<0b1, opc, 0b1, - (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR"), - [(set i64:$Rd, (opnode i64:$Rn, bitfield64_imm:$ImmR))], - NoItinerary>, - Sched<[WriteALU, ReadALU]> { - let ImmS = 63; - } - +let AddedComplexity = 10 in { +let Predicates = [IsLE] in { + // We must do vector loads with LD1 in big-endian. + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; +} + +defm : VecROLoadPat; +defm : VecROLoadPat; + +// Match all load 128 bits width whose type is compatible with FPR128 +let Predicates = [IsLE] in { + // We must do vector loads with LD1 in big-endian. + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; + defm : VecROLoadPat; +} +} // AddedComplexity = 10 + +// zextload -> i64 +multiclass ExtLoadTo64ROPat { + def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), + (SUBREG_TO_REG (i64 0), + (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), + sub_32)>; + + def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), + (SUBREG_TO_REG (i64 0), + (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), + sub_32)>; } -defm ASR : A64I_shift<0b00, "asr", sra>; -defm LSR : A64I_shift<0b10, "lsr", srl>; - -//===------------------------------- -// 4. Aliases for LSL -//===------------------------------- +let AddedComplexity = 10 in { + defm : ExtLoadTo64ROPat; + defm : ExtLoadTo64ROPat; + defm : ExtLoadTo64ROPat; -// Unfortunately LSL and subsequent aliases are much more complicated. We need -// to be able to say certain output instruction fields depend in a complex -// manner on combinations of input assembly fields). -// -// MIOperandInfo *might* have been able to do it, but at the cost of -// significantly more C++ code. - -// N.b. contrary to usual practice these operands store the shift rather than -// the machine bits in an MCInst. The complexity overhead of consistency -// outweighed the benefits in this case (custom asmparser, printer and selection -// vs custom encoder). -def bitfield32_lsl_imm : Operand, - ImmLeaf= 0 && Imm <= 31; }]> { - let ParserMatchClass = uimm5_asmoperand; - let EncoderMethod = "getBitfield32LSLOpValue"; -} + // zextloadi1 -> zextloadi8 + defm : ExtLoadTo64ROPat; -def bitfield64_lsl_imm : Operand, - ImmLeaf= 0 && Imm <= 63; }]> { - let ParserMatchClass = uimm6_asmoperand; - let EncoderMethod = "getBitfield64LSLOpValue"; -} + // extload -> zextload + defm : ExtLoadTo64ROPat; + defm : ExtLoadTo64ROPat; + defm : ExtLoadTo64ROPat; -class A64I_bitfield_lsl - : A64I_bitfield, - Sched<[WriteALU, ReadALU]> { - bits<12> FullImm; - let ImmR = FullImm{5-0}; - let ImmS = FullImm{11-6}; - - // No disassembler allowed because it would overlap with BFM which does the - // actual work. - let isAsmParserOnly = 1; + // extloadi1 -> zextloadi8 + defm : ExtLoadTo64ROPat; } -def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, i32, bitfield32_lsl_imm>; -def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, i64, bitfield64_lsl_imm>; -//===------------------------------- -// 5. Aliases for bitfield extract instructions -//===------------------------------- - -def bfx32_width_asmoperand : AsmOperandClass { - let Name = "BFX32Width"; - let PredicateMethod = "isBitfieldWidth<32>"; - let RenderMethod = "addBFXWidthOperands"; - let DiagnosticType = "Width32"; -} +// zextload -> i64 +multiclass ExtLoadTo32ROPat { + def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))), + (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; -def bfx32_width : Operand, ImmLeaf { - let PrintMethod = "printBFXWidthOperand"; - let ParserMatchClass = bfx32_width_asmoperand; -} + def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))), + (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; -def bfx64_width_asmoperand : AsmOperandClass { - let Name = "BFX64Width"; - let PredicateMethod = "isBitfieldWidth<64>"; - let RenderMethod = "addBFXWidthOperands"; - let DiagnosticType = "Width64"; } -def bfx64_width : Operand { - let PrintMethod = "printBFXWidthOperand"; - let ParserMatchClass = bfx64_width_asmoperand; +let AddedComplexity = 10 in { + // extload -> zextload + defm : ExtLoadTo32ROPat; + defm : ExtLoadTo32ROPat; + defm : ExtLoadTo32ROPat; + + // zextloadi1 -> zextloadi8 + defm : ExtLoadTo32ROPat; +} + +//--- +// (unsigned immediate) +//--- +defm LDRX : LoadUI<0b11, 0, 0b01, GPR64, uimm12s8, "ldr", + [(set GPR64:$Rt, + (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; +defm LDRW : LoadUI<0b10, 0, 0b01, GPR32, uimm12s4, "ldr", + [(set GPR32:$Rt, + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; +defm LDRB : LoadUI<0b00, 1, 0b01, FPR8, uimm12s1, "ldr", + [(set FPR8:$Rt, + (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>; +defm LDRH : LoadUI<0b01, 1, 0b01, FPR16, uimm12s2, "ldr", + [(set (f16 FPR16:$Rt), + (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>; +defm LDRS : LoadUI<0b10, 1, 0b01, FPR32, uimm12s4, "ldr", + [(set (f32 FPR32:$Rt), + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>; +defm LDRD : LoadUI<0b11, 1, 0b01, FPR64, uimm12s8, "ldr", + [(set (f64 FPR64:$Rt), + (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>; +defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128, uimm12s16, "ldr", + [(set (f128 FPR128:$Rt), + (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>; + +// For regular load, we do not have any alignment requirement. +// Thus, it is safe to directly map the vector loads with interesting +// addressing modes. +// FIXME: We could do the same for bitconvert to floating point vectors. +def : Pat <(v8i8 (scalar_to_vector (i32 + (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), + (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; +def : Pat <(v16i8 (scalar_to_vector (i32 + (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>; +def : Pat <(v4i16 (scalar_to_vector (i32 + (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; +def : Pat <(v8i16 (scalar_to_vector (i32 + (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>; +def : Pat <(v2i32 (scalar_to_vector (i32 + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), + (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), + (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; +def : Pat <(v4i32 (scalar_to_vector (i32 + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>; +def : Pat <(v1i64 (scalar_to_vector (i64 + (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat <(v2i64 (scalar_to_vector (i64 + (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>; + +// Match all load 64 bits width whose type is compatible with FPR64 +let Predicates = [IsLE] in { + // We must use LD1 to perform vector loads in big-endian. + def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; +} +def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))), + (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>; + +// Match all load 128 bits width whose type is compatible with FPR128 +let Predicates = [IsLE] in { + // We must use LD1 to perform vector loads in big-endian. + def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; +} +def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))), + (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>; + +defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh", + [(set GPR32:$Rt, + (zextloadi16 (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)))]>; +defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb", + [(set GPR32:$Rt, + (zextloadi8 (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)))]>; +// zextload -> i64 +def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; +def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), + (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; + +// zextloadi1 -> zextloadi8 +def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; + +// extload -> zextload +def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), + (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), + (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; +def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))), + (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>; +def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; +def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))), + (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>; + +// load sign-extended half-word +defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh", + [(set GPR32:$Rt, + (sextloadi16 (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)))]>; +defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh", + [(set GPR64:$Rt, + (sextloadi16 (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset)))]>; + +// load sign-extended byte +defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb", + [(set GPR32:$Rt, + (sextloadi8 (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)))]>; +defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb", + [(set GPR64:$Rt, + (sextloadi8 (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset)))]>; + +// load sign-extended word +defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw", + [(set GPR64:$Rt, + (sextloadi32 (am_indexed32 GPR64sp:$Rn, + uimm12s4:$offset)))]>; + +// load zero-extended word +def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))), + (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>; + +// Pre-fetch. +def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", + [(AArch64Prefetch imm:$Rt, + (am_indexed64 GPR64sp:$Rn, + uimm12s8:$offset))]>; + +def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>; + +//--- +// (literal) +def LDRWl : LoadLiteral<0b00, 0, GPR32, "ldr">; +def LDRXl : LoadLiteral<0b01, 0, GPR64, "ldr">; +def LDRSl : LoadLiteral<0b00, 1, FPR32, "ldr">; +def LDRDl : LoadLiteral<0b01, 1, FPR64, "ldr">; +def LDRQl : LoadLiteral<0b10, 1, FPR128, "ldr">; + +// load sign-extended word +def LDRSWl : LoadLiteral<0b10, 0, GPR64, "ldrsw">; + +// prefetch +def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; +// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>; + +//--- +// (unscaled immediate) +defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64, "ldur", + [(set GPR64:$Rt, + (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32, "ldur", + [(set GPR32:$Rt, + (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8, "ldur", + [(set FPR8:$Rt, + (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16, "ldur", + [(set FPR16:$Rt, + (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32, "ldur", + [(set (f32 FPR32:$Rt), + (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64, "ldur", + [(set (f64 FPR64:$Rt), + (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128, "ldur", + [(set (f128 FPR128:$Rt), + (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>; + +defm LDURHH + : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh", + [(set GPR32:$Rt, + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURBB + : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb", + [(set GPR32:$Rt, + (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; + +// Match all load 64 bits width whose type is compatible with FPR64 +let Predicates = [IsLE] in { + def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; +} +def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))), + (LDURDi GPR64sp:$Rn, simm9:$offset)>; + +// Match all load 128 bits width whose type is compatible with FPR128 +let Predicates = [IsLE] in { + def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))), + (LDURQi GPR64sp:$Rn, simm9:$offset)>; +} + +// anyext -> zext +def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (LDURHHi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; +// unscaled zext +def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (LDURHHi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (LDURBBi GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; + + +//--- +// LDR mnemonics fall back to LDUR for negative or unaligned offsets. + +// Define new assembler match classes as we want to only match these when +// the don't otherwise match the scaled addressing mode for LDR/STR. Don't +// associate a DiagnosticType either, as we want the diagnostic for the +// canonical form (the scaled operand) to take precedence. +class SImm9OffsetOperand : AsmOperandClass { + let Name = "SImm9OffsetFB" # Width; + let PredicateMethod = "isSImm9OffsetFB<" # Width # ">"; + let RenderMethod = "addImmOperands"; } +def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>; +def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>; +def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>; +def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>; +def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>; + +def simm9_offset_fb8 : Operand { + let ParserMatchClass = SImm9OffsetFB8Operand; +} +def simm9_offset_fb16 : Operand { + let ParserMatchClass = SImm9OffsetFB16Operand; +} +def simm9_offset_fb32 : Operand { + let ParserMatchClass = SImm9OffsetFB32Operand; +} +def simm9_offset_fb64 : Operand { + let ParserMatchClass = SImm9OffsetFB64Operand; +} +def simm9_offset_fb128 : Operand { + let ParserMatchClass = SImm9OffsetFB128Operand; +} + +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURBi FPR8:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURHi FPR16:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURSi FPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURDi FPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; +def : InstAlias<"ldr $Rt, [$Rn, $offset]", + (LDURQi FPR128:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; + +// zextload -> i64 +def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>; +def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))), + (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>; + +// load sign-extended half-word +defm LDURSHW + : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh", + [(set GPR32:$Rt, + (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURSHX + : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh", + [(set GPR64:$Rt, + (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>; + +// load sign-extended byte +defm LDURSBW + : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb", + [(set GPR32:$Rt, + (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; +defm LDURSBX + : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb", + [(set GPR64:$Rt, + (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>; + +// load sign-extended word +defm LDURSW + : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw", + [(set GPR64:$Rt, + (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>; + +// zero and sign extending aliases from generic LDR* mnemonics to LDUR*. +def : InstAlias<"ldrb $Rt, [$Rn, $offset]", + (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"ldrh $Rt, [$Rn, $offset]", + (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", + (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"ldrsb $Rt, [$Rn, $offset]", + (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", + (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"ldrsh $Rt, [$Rn, $offset]", + (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"ldrsw $Rt, [$Rn, $offset]", + (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; + +// Pre-fetch. +defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum", + [(AArch64Prefetch imm:$Rt, + (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; + +//--- +// (unscaled immediate, unprivileged) +defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; +defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; + +defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; +defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; + +// load sign-extended half-word +defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; +defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; + +// load sign-extended byte +defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; +defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; + +// load sign-extended word +defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; + +//--- +// (immediate pre-indexed) +def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32, "ldr">; +def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64, "ldr">; +def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8, "ldr">; +def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16, "ldr">; +def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32, "ldr">; +def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64, "ldr">; +def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128, "ldr">; + +// load sign-extended half-word +def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32, "ldrsh">; +def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64, "ldrsh">; + +// load sign-extended byte +def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32, "ldrsb">; +def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64, "ldrsb">; + +// load zero-extended byte +def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32, "ldrb">; +def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32, "ldrh">; + +// load sign-extended word +def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64, "ldrsw">; + +//--- +// (immediate post-indexed) +def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32, "ldr">; +def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64, "ldr">; +def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8, "ldr">; +def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16, "ldr">; +def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32, "ldr">; +def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64, "ldr">; +def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128, "ldr">; + +// load sign-extended half-word +def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32, "ldrsh">; +def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64, "ldrsh">; + +// load sign-extended byte +def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32, "ldrsb">; +def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64, "ldrsb">; + +// load zero-extended byte +def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32, "ldrb">; +def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32, "ldrh">; + +// load sign-extended word +def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64, "ldrsw">; -multiclass A64I_bitfield_extract opc, string asmop, SDNode op> { - def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), - (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [(set i32:$Rd, (op i32:$Rn, imm:$ImmR, imm:$ImmS))], - NoItinerary>, - Sched<[WriteALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - } - - def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), - (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [(set i64:$Rd, (op i64:$Rn, imm:$ImmR, imm:$ImmS))], - NoItinerary>, - Sched<[WriteALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - } -} +//===----------------------------------------------------------------------===// +// Store instructions. +//===----------------------------------------------------------------------===// -defm SBFX : A64I_bitfield_extract<0b00, "sbfx", A64Sbfx>; -defm UBFX : A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>; - -// Again, variants based on BFM modify Rd so need it as an input too. -def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), - (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS), - "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - let Constraints = "$src = $Rd"; +// Pair (indexed, offset) +// FIXME: Use dedicated range-checked addressing mode operand here. +defm STPW : StorePairOffset<0b00, 0, GPR32, simm7s4, "stp">; +defm STPX : StorePairOffset<0b10, 0, GPR64, simm7s8, "stp">; +defm STPS : StorePairOffset<0b00, 1, FPR32, simm7s4, "stp">; +defm STPD : StorePairOffset<0b01, 1, FPR64, simm7s8, "stp">; +defm STPQ : StorePairOffset<0b10, 1, FPR128, simm7s16, "stp">; + +// Pair (pre-indexed) +def STPWpre : StorePairPreIdx<0b00, 0, GPR32, simm7s4, "stp">; +def STPXpre : StorePairPreIdx<0b10, 0, GPR64, simm7s8, "stp">; +def STPSpre : StorePairPreIdx<0b00, 1, FPR32, simm7s4, "stp">; +def STPDpre : StorePairPreIdx<0b01, 1, FPR64, simm7s8, "stp">; +def STPQpre : StorePairPreIdx<0b10, 1, FPR128, simm7s16, "stp">; + +// Pair (pre-indexed) +def STPWpost : StorePairPostIdx<0b00, 0, GPR32, simm7s4, "stp">; +def STPXpost : StorePairPostIdx<0b10, 0, GPR64, simm7s8, "stp">; +def STPSpost : StorePairPostIdx<0b00, 1, FPR32, simm7s4, "stp">; +def STPDpost : StorePairPostIdx<0b01, 1, FPR64, simm7s8, "stp">; +def STPQpost : StorePairPostIdx<0b10, 1, FPR128, simm7s16, "stp">; + +// Pair (no allocate) +defm STNPW : StorePairNoAlloc<0b00, 0, GPR32, simm7s4, "stnp">; +defm STNPX : StorePairNoAlloc<0b10, 0, GPR64, simm7s8, "stnp">; +defm STNPS : StorePairNoAlloc<0b00, 1, FPR32, simm7s4, "stnp">; +defm STNPD : StorePairNoAlloc<0b01, 1, FPR64, simm7s8, "stnp">; +defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128, simm7s16, "stnp">; + +//--- +// (Register offset) + +// Integer +defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>; +defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>; +defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>; +defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>; + + +// Floating-point +defm STRB : Store8RO< 0b00, 1, 0b00, FPR8, "str", untyped, store>; +defm STRH : Store16RO<0b01, 1, 0b00, FPR16, "str", f16, store>; +defm STRS : Store32RO<0b10, 1, 0b00, FPR32, "str", f32, store>; +defm STRD : Store64RO<0b11, 1, 0b00, FPR64, "str", f64, store>; +defm STRQ : Store128RO<0b00, 1, 0b10, FPR128, "str", f128, store>; + +multiclass TruncStoreFrom64ROPat { + + def : Pat<(storeop GPR64:$Rt, + (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), + (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32), + GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; + + def : Pat<(storeop GPR64:$Rt, + (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), + (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32), + GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; } -def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), - (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS), - "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - let Constraints = "$src = $Rd"; +let AddedComplexity = 10 in { + // truncstore i64 + defm : TruncStoreFrom64ROPat; + defm : TruncStoreFrom64ROPat; + defm : TruncStoreFrom64ROPat; } -// SBFX instructions can do a 1-instruction sign-extension of boolean values. -def : Pat<(sext_inreg i64:$Rn, i1), (SBFXxxii $Rn, 0, 0)>; -def : Pat<(sext_inreg i32:$Rn, i1), (SBFXwwii $Rn, 0, 0)>; -def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)), - (SBFXxxii (SUBREG_TO_REG (i64 0), $Rn, sub_32), 0, 0)>; - -// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could -// use either 64-bit or 32-bit variant, but 32-bit might be more efficient. -def : Pat<(i64 (zext i32:$Rn)), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31), - sub_32)>; - -//===------------------------------- -// 6. Aliases for bitfield insert instructions -//===------------------------------- - -def bfi32_lsb_asmoperand : AsmOperandClass { - let Name = "BFI32LSB"; - let PredicateMethod = "isUImm<5>"; - let RenderMethod = "addBFILSBOperands<32>"; - let DiagnosticType = "UImm5"; -} +multiclass VecROStorePat { + def : Pat<(store (VecTy FPR:$Rt), + (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)), + (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>; -def bfi32_lsb : Operand, - ImmLeaf= 0 && Imm <= 31; }]> { - let PrintMethod = "printBFILSBOperand<32>"; - let ParserMatchClass = bfi32_lsb_asmoperand; + def : Pat<(store (VecTy FPR:$Rt), + (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)), + (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>; } -def bfi64_lsb_asmoperand : AsmOperandClass { - let Name = "BFI64LSB"; - let PredicateMethod = "isUImm<6>"; - let RenderMethod = "addBFILSBOperands<64>"; - let DiagnosticType = "UImm6"; -} +let AddedComplexity = 10 in { +// Match all store 64 bits width whose type is compatible with FPR64 +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; +} + +defm : VecROStorePat; +defm : VecROStorePat; + +// Match all store 128 bits width whose type is compatible with FPR128 +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; + defm : VecROStorePat; +} +} // AddedComplexity = 10 + +//--- +// (unsigned immediate) +defm STRX : StoreUI<0b11, 0, 0b00, GPR64, uimm12s8, "str", + [(store GPR64:$Rt, + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; +defm STRW : StoreUI<0b10, 0, 0b00, GPR32, uimm12s4, "str", + [(store GPR32:$Rt, + (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; +defm STRB : StoreUI<0b00, 1, 0b00, FPR8, uimm12s1, "str", + [(store FPR8:$Rt, + (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>; +defm STRH : StoreUI<0b01, 1, 0b00, FPR16, uimm12s2, "str", + [(store (f16 FPR16:$Rt), + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>; +defm STRS : StoreUI<0b10, 1, 0b00, FPR32, uimm12s4, "str", + [(store (f32 FPR32:$Rt), + (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>; +defm STRD : StoreUI<0b11, 1, 0b00, FPR64, uimm12s8, "str", + [(store (f64 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>; +defm STRQ : StoreUI<0b00, 1, 0b10, FPR128, uimm12s16, "str", []>; + +defm STRHH : StoreUI<0b01, 0, 0b00, GPR32, uimm12s2, "strh", + [(truncstorei16 GPR32:$Rt, + (am_indexed16 GPR64sp:$Rn, + uimm12s2:$offset))]>; +defm STRBB : StoreUI<0b00, 0, 0b00, GPR32, uimm12s1, "strb", + [(truncstorei8 GPR32:$Rt, + (am_indexed8 GPR64sp:$Rn, + uimm12s1:$offset))]>; + +// Match all store 64 bits width whose type is compatible with FPR64 +let AddedComplexity = 10 in { +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + def : Pat<(store (v2f32 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(store (v8i8 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(store (v4i16 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; + def : Pat<(store (v2i32 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; +} +def : Pat<(store (v1f64 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; +def : Pat<(store (v1i64 FPR64:$Rt), + (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)), + (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>; + +// Match all store 128 bits width whose type is compatible with FPR128 +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + def : Pat<(store (v4f32 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v2f64 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v16i8 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v8i16 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v4i32 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + def : Pat<(store (v2i64 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; +} +def : Pat<(store (f128 FPR128:$Rt), + (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)), + (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>; + +// truncstore i64 +def : Pat<(truncstorei32 GPR64:$Rt, + (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)), + (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>; +def : Pat<(truncstorei16 GPR64:$Rt, + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), + (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)), + (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>; + +} // AddedComplexity = 10 + +//--- +// (unscaled immediate) +defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64, "stur", + [(store GPR64:$Rt, + (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; +defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32, "stur", + [(store GPR32:$Rt, + (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; +defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8, "stur", + [(store FPR8:$Rt, + (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; +defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16, "stur", + [(store (f16 FPR16:$Rt), + (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; +defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32, "stur", + [(store (f32 FPR32:$Rt), + (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>; +defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64, "stur", + [(store (f64 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>; +defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128, "stur", + [(store (f128 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>; +defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32, "sturh", + [(truncstorei16 GPR32:$Rt, + (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>; +defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32, "sturb", + [(truncstorei8 GPR32:$Rt, + (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>; + +// Match all store 64 bits width whose type is compatible with FPR64 +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + def : Pat<(store (v2f32 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v8i8 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v4i16 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v2i32 FPR64:$Rt), + (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; +} +def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)), + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>; + +// Match all store 128 bits width whose type is compatible with FPR128 +let Predicates = [IsLE] in { + // We must use ST1 to store vectors in big-endian. + def : Pat<(store (v4f32 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v2f64 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v16i8 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v8i16 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v4i32 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v2i64 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; + def : Pat<(store (v2f64 FPR128:$Rt), + (am_unscaled128 GPR64sp:$Rn, simm9:$offset)), + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>; +} + +// unscaled i64 truncating stores +def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)), + (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), + (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; +def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)), + (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>; + +//--- +// STR mnemonics fall back to STUR for negative or unaligned offsets. +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURBi FPR8:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURHi FPR16:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURSi FPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>; +def : InstAlias<"str $Rt, [$Rn, $offset]", + (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>; + +def : InstAlias<"strb $Rt, [$Rn, $offset]", + (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>; +def : InstAlias<"strh $Rt, [$Rn, $offset]", + (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>; + +//--- +// (unscaled immediate, unprivileged) +defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; +defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; + +defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; +defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; + +//--- +// (immediate pre-indexed) +def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32, "str", pre_store, i32>; +def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64, "str", pre_store, i64>; +def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8, "str", pre_store, untyped>; +def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16, "str", pre_store, f16>; +def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32, "str", pre_store, f32>; +def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64, "str", pre_store, f64>; +def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128, "str", pre_store, f128>; + +def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32, "strb", pre_truncsti8, i32>; +def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32, "strh", pre_truncsti16, i32>; + +// truncstore i64 +def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; +def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; +def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; + +def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + +def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; + +//--- +// (immediate post-indexed) +def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32, "str", post_store, i32>; +def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64, "str", post_store, i64>; +def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8, "str", post_store, untyped>; +def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16, "str", post_store, f16>; +def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32, "str", post_store, f32>; +def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64, "str", post_store, f64>; +def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128, "str", post_store, f128>; + +def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32, "strb", post_truncsti8, i32>; +def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32, "strh", post_truncsti16, i32>; + +// truncstore i64 +def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; +def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; +def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off), + (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr, + simm9:$off)>; + +def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off), + (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>; + +def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; +def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off), + (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>; -def bfi64_lsb : Operand, - ImmLeaf= 0 && Imm <= 63; }]> { - let PrintMethod = "printBFILSBOperand<64>"; - let ParserMatchClass = bfi64_lsb_asmoperand; -} +//===----------------------------------------------------------------------===// +// Load/store exclusive instructions. +//===----------------------------------------------------------------------===// -// Width verification is performed during conversion so width operand can be -// shared between 32/64-bit cases. Still needed for the print method though -// because ImmR encodes "width - 1". -def bfi32_width_asmoperand : AsmOperandClass { - let Name = "BFI32Width"; - let PredicateMethod = "isBitfieldWidth<32>"; - let RenderMethod = "addBFIWidthOperands"; - let DiagnosticType = "Width32"; -} +def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">; +def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">; +def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">; +def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">; -def bfi32_width : Operand, - ImmLeaf= 1 && Imm <= 32; }]> { - let PrintMethod = "printBFIWidthOperand"; - let ParserMatchClass = bfi32_width_asmoperand; -} +def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">; +def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">; +def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">; +def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">; -def bfi64_width_asmoperand : AsmOperandClass { - let Name = "BFI64Width"; - let PredicateMethod = "isBitfieldWidth<64>"; - let RenderMethod = "addBFIWidthOperands"; - let DiagnosticType = "Width64"; -} +def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">; +def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">; +def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">; +def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">; -def bfi64_width : Operand, - ImmLeaf= 1 && Imm <= 64; }]> { - let PrintMethod = "printBFIWidthOperand"; - let ParserMatchClass = bfi64_width_asmoperand; -} +def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">; +def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">; +def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">; +def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">; -multiclass A64I_bitfield_insert opc, string asmop> { - def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd), - (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - } +def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">; +def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">; +def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">; +def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">; - def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd), - (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), - !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"), - [], NoItinerary>, - Sched<[WriteALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - } -} +def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">; +def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">; +def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">; +def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">; -defm SBFIZ : A64I_bitfield_insert<0b00, "sbfiz">; -defm UBFIZ : A64I_bitfield_insert<0b10, "ubfiz">; +def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">; +def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">; +def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">; +def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">; -def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd), - (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS), - "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - let Constraints = "$src = $Rd"; -} +def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">; +def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; -def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd), - (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS), - "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]> { - // As above, no disassembler allowed. - let isAsmParserOnly = 1; - let Constraints = "$src = $Rd"; -} +def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; +def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; //===----------------------------------------------------------------------===// -// Compare and branch (immediate) +// Scaled floating point to integer conversion instructions. //===----------------------------------------------------------------------===// -// Contains: CBZ, CBNZ - -class label_asmoperand : AsmOperandClass { - let Name = "Label" # width # "_" # scale; - let PredicateMethod = "isLabel<" # width # "," # scale # ">"; - let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">"; - let DiagnosticType = "Label"; -} -def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>; - -// All conditional immediate branches are the same really: 19 signed bits scaled -// by the instruction-size (4). -def bcc_target : Operand { - // This label is a 19-bit offset from PC, scaled by the instruction-width: 4. - let ParserMatchClass = label_wid19_scal4_asmoperand; - let PrintMethod = "printLabelOperand<19, 4>"; - let EncoderMethod = "getLabelOpValue"; - let OperandType = "OPERAND_PCREL"; -} - -multiclass cmpbr_sizes { - let isBranch = 1, isTerminator = 1 in { - def x : A64I_cmpbr<0b1, op, - (outs), - (ins GPR64:$Rt, bcc_target:$Label), - !strconcat(asmop,"\t$Rt, $Label"), - [(A64br_cc (A64cmp i64:$Rt, 0), SETOP, bb:$Label)], - NoItinerary>, - Sched<[WriteBr, ReadBr]>; - - def w : A64I_cmpbr<0b0, op, - (outs), - (ins GPR32:$Rt, bcc_target:$Label), - !strconcat(asmop,"\t$Rt, $Label"), - [(A64br_cc (A64cmp i32:$Rt, 0), SETOP, bb:$Label)], - NoItinerary>, - Sched<[WriteBr, ReadBr]>; - } +defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>; +defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>; +defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>; +defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>; +defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>; +defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>; +defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>; +defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; +defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", fp_to_sint>; +defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", fp_to_uint>; +defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", fp_to_sint>; +defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", fp_to_uint>; +let isCodeGenOnly = 1 in { +defm FCVTZS_Int : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvtzs>; +defm FCVTZU_Int : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>; +defm FCVTZS_Int : FPToIntegerScaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvtzs>; +defm FCVTZU_Int : FPToIntegerScaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>; } -defm CBZ : cmpbr_sizes<0b0, "cbz", ImmLeaf >; -defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf >; - //===----------------------------------------------------------------------===// -// Conditional branch (immediate) instructions +// Scaled integer to floating point conversion instructions. //===----------------------------------------------------------------------===// -// Contains: B.cc - -def cond_code_asmoperand : AsmOperandClass { - let Name = "CondCode"; - let DiagnosticType = "CondCode"; -} - -def cond_code : Operand, ImmLeaf= 0 && Imm <= 15; -}]> { - let PrintMethod = "printCondCodeOperand"; - let ParserMatchClass = cond_code_asmoperand; -} -def Bcc : A64I_condbr<0b0, 0b0, (outs), - (ins cond_code:$Cond, bcc_target:$Label), - "b.$Cond $Label", [(A64br_cc NZCV, (i32 imm:$Cond), bb:$Label)], - NoItinerary>, - Sched<[WriteBr]> { - let Uses = [NZCV]; - let isBranch = 1; - let isTerminator = 1; -} +defm SCVTF : IntegerToFP<0, "scvtf", sint_to_fp>; +defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>; //===----------------------------------------------------------------------===// -// Conditional compare (immediate) instructions +// Unscaled integer to floating point conversion instruction. //===----------------------------------------------------------------------===// -// Contains: CCMN, CCMP -def uimm4_asmoperand : AsmOperandClass { - let Name = "UImm4"; - let PredicateMethod = "isUImm<4>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm4"; -} - -def uimm4 : Operand { - let ParserMatchClass = uimm4_asmoperand; -} +defm FMOV : UnscaledConversion<"fmov">; -def uimm5 : Operand { - let ParserMatchClass = uimm5_asmoperand; -} +def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>; +def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>; -// The only difference between this operand and the one for instructions like -// B.cc is that it's parsed manually. The other get parsed implicitly as part of -// the mnemonic handling. -def cond_code_op_asmoperand : AsmOperandClass { - let Name = "CondCodeOp"; - let RenderMethod = "addCondCodeOperands"; - let PredicateMethod = "isCondCode"; - let ParserMethod = "ParseCondCodeOperand"; - let DiagnosticType = "CondCode"; -} +//===----------------------------------------------------------------------===// +// Floating point conversion instruction. +//===----------------------------------------------------------------------===// -def cond_code_op : Operand { - let PrintMethod = "printCondCodeOperand"; - let ParserMatchClass = cond_code_op_asmoperand; -} +defm FCVT : FPConversion<"fcvt">; -class A64I_condcmpimmImpl - : A64I_condcmpimm, - Sched<[WriteCMP, ReadCMP]> { - let Defs = [NZCV]; -} +def : Pat<(f32_to_f16 FPR32:$Rn), + (i32 (COPY_TO_REGCLASS + (f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)), + GPR32))>; -def CCMNwi : A64I_condcmpimmImpl<0b0, 0b0, GPR32, "ccmn">; -def CCMNxi : A64I_condcmpimmImpl<0b1, 0b0, GPR64, "ccmn">; -def CCMPwi : A64I_condcmpimmImpl<0b0, 0b1, GPR32, "ccmp">; -def CCMPxi : A64I_condcmpimmImpl<0b1, 0b1, GPR64, "ccmp">; +def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn), + [(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>; //===----------------------------------------------------------------------===// -// Conditional compare (register) instructions +// Floating point single operand instructions. //===----------------------------------------------------------------------===// -// Contains: CCMN, CCMP - -class A64I_condcmpregImpl - : A64I_condcmpreg, - Sched<[WriteCMP, ReadCMP, ReadCMP]> { - let Defs = [NZCV]; -} -def CCMNww : A64I_condcmpregImpl<0b0, 0b0, GPR32, "ccmn">; -def CCMNxx : A64I_condcmpregImpl<0b1, 0b0, GPR64, "ccmn">; -def CCMPww : A64I_condcmpregImpl<0b0, 0b1, GPR32, "ccmp">; -def CCMPxx : A64I_condcmpregImpl<0b1, 0b1, GPR64, "ccmp">; +defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>; +defm FMOV : SingleOperandFPData<0b0000, "fmov">; +defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>; +defm FRINTA : SingleOperandFPData<0b1100, "frinta", frnd>; +defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>; +defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>; +defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_aarch64_neon_frintn>; +defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>; -//===----------------------------------------------------------------------===// -// Conditional select instructions -//===----------------------------------------------------------------------===// -// Contains: CSEL, CSINC, CSINV, CSNEG + aliases CSET, CSETM, CINC, CINV, CNEG - -// Condition code which is encoded as the inversion (semantically rather than -// bitwise) in the instruction. -def inv_cond_code_op_asmoperand : AsmOperandClass { - let Name = "InvCondCodeOp"; - let RenderMethod = "addInvCondCodeOperands"; - let PredicateMethod = "isCondCode"; - let ParserMethod = "ParseCondCodeOperand"; - let DiagnosticType = "CondCode"; -} +def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))), + (FRINTNDr FPR64:$Rn)>; -def inv_cond_code_op : Operand { - let ParserMatchClass = inv_cond_code_op_asmoperand; +// FRINTX is inserted to set the flags as required by FENV_ACCESS ON behavior +// in the C spec. Setting hasSideEffects ensures it is not DCE'd. +// +// TODO: We should really model the FPSR flags correctly. This is really ugly. +let hasSideEffects = 1 in { +defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>; } -// Having a separate operand for the selectable use-case is debatable, but gives -// consistency with cond_code. -def inv_cond_XFORM : SDNodeXForm(N->getZExtValue()); - return CurDAG->getTargetConstant(A64InvertCondCode(CC), MVT::i32); -}]>; +defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>; -def inv_cond_code - : ImmLeaf= 0 && Imm <= 15; }], inv_cond_XFORM>; - - -multiclass A64I_condselSizes op2, string asmop, - SDPatternOperator select> { - let Uses = [NZCV] in { - def wwwc : A64I_condsel<0b0, op, 0b0, op2, - (outs GPR32:$Rd), - (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), - [(set i32:$Rd, (select i32:$Rn, i32:$Rm))], - NoItinerary>, - Sched<[WriteCMP, ReadCMP, ReadCMP]>; - - - def xxxc : A64I_condsel<0b1, op, 0b0, op2, - (outs GPR64:$Rd), - (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"), - [(set i64:$Rd, (select i64:$Rn, i64:$Rm))], - NoItinerary>, - Sched<[WriteCMP, ReadCMP, ReadCMP]>; - } +let SchedRW = [WriteFDiv] in { +defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>; } -def simple_select - : PatFrag<(ops node:$lhs, node:$rhs), - (A64select_cc NZCV, node:$lhs, node:$rhs, (i32 imm:$Cond))>; - -class complex_select - : PatFrag<(ops node:$lhs, node:$rhs), - (A64select_cc NZCV, node:$lhs, (opnode node:$rhs), (i32 imm:$Cond))>; - - -defm CSEL : A64I_condselSizes<0b0, 0b00, "csel", simple_select>; -defm CSINC : A64I_condselSizes<0b0, 0b01, "csinc", - complex_select>>; -defm CSINV : A64I_condselSizes<0b1, 0b00, "csinv", complex_select>; -defm CSNEG : A64I_condselSizes<0b1, 0b01, "csneg", complex_select>; - -// Now the instruction aliases, which fit nicely into LLVM's model: - -def : InstAlias<"cset $Rd, $Cond", - (CSINCwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>; -def : InstAlias<"cset $Rd, $Cond", - (CSINCxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>; -def : InstAlias<"csetm $Rd, $Cond", - (CSINVwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>; -def : InstAlias<"csetm $Rd, $Cond", - (CSINVxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>; -def : InstAlias<"cinc $Rd, $Rn, $Cond", - (CSINCwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; -def : InstAlias<"cinc $Rd, $Rn, $Cond", - (CSINCxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; -def : InstAlias<"cinv $Rd, $Rn, $Cond", - (CSINVwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; -def : InstAlias<"cinv $Rd, $Rn, $Cond", - (CSINVxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; -def : InstAlias<"cneg $Rd, $Rn, $Cond", - (CSNEGwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>; -def : InstAlias<"cneg $Rd, $Rn, $Cond", - (CSNEGxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>; - -// Finally some helper patterns. - -// For CSET (a.k.a. zero-extension of icmp) -def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond), - (CSINCwwwc WZR, WZR, cond_code:$Cond)>; -def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond), - (CSINCwwwc WZR, WZR, inv_cond_code:$Cond)>; - -def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond), - (CSINCxxxc XZR, XZR, cond_code:$Cond)>; -def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond), - (CSINCxxxc XZR, XZR, inv_cond_code:$Cond)>; - -// For CSETM (a.k.a. sign-extension of icmp) -def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond), - (CSINVwwwc WZR, WZR, cond_code:$Cond)>; -def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond), - (CSINVwwwc WZR, WZR, inv_cond_code:$Cond)>; - -def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond), - (CSINVxxxc XZR, XZR, cond_code:$Cond)>; -def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond), - (CSINVxxxc XZR, XZR, inv_cond_code:$Cond)>; - -// CINC, CINV and CNEG get dealt with automatically, which leaves the issue of -// commutativity. The instructions are to complex for isCommutable to be used, -// so we have to create the patterns manually: - -// No commutable pattern for CSEL since the commuted version is isomorphic. - -// CSINC -def :Pat<(A64select_cc NZCV, (add i32:$Rm, 1), i32:$Rn, inv_cond_code:$Cond), - (CSINCwwwc $Rn, $Rm, inv_cond_code:$Cond)>; -def :Pat<(A64select_cc NZCV, (add i64:$Rm, 1), i64:$Rn, inv_cond_code:$Cond), - (CSINCxxxc $Rn, $Rm, inv_cond_code:$Cond)>; - -// CSINV -def :Pat<(A64select_cc NZCV, (not i32:$Rm), i32:$Rn, inv_cond_code:$Cond), - (CSINVwwwc $Rn, $Rm, inv_cond_code:$Cond)>; -def :Pat<(A64select_cc NZCV, (not i64:$Rm), i64:$Rn, inv_cond_code:$Cond), - (CSINVxxxc $Rn, $Rm, inv_cond_code:$Cond)>; - -// CSNEG -def :Pat<(A64select_cc NZCV, (ineg i32:$Rm), i32:$Rn, inv_cond_code:$Cond), - (CSNEGwwwc $Rn, $Rm, inv_cond_code:$Cond)>; -def :Pat<(A64select_cc NZCV, (ineg i64:$Rm), i64:$Rn, inv_cond_code:$Cond), - (CSNEGxxxc $Rn, $Rm, inv_cond_code:$Cond)>; - //===----------------------------------------------------------------------===// -// Data Processing (1 source) instructions +// Floating point two operand instructions. //===----------------------------------------------------------------------===// -// Contains: RBIT, REV16, REV, REV32, CLZ, CLS. - -// We define an unary operator which always fails. We will use this to -// define unary operators that cannot be matched. - -class A64I_dp_1src_impl opcode, string asmop, - list patterns, RegisterClass GPRrc, - InstrItinClass itin>: - A64I_dp_1src, - Sched<[WriteALU, ReadALU]>; - -multiclass A64I_dp_1src opcode, string asmop> { - let hasSideEffects = 0 in { - def ww : A64I_dp_1src_impl<0b0, opcode, asmop, [], GPR32, NoItinerary>; - def xx : A64I_dp_1src_impl<0b1, opcode, asmop, [], GPR64, NoItinerary>; - } -} -defm RBIT : A64I_dp_1src<0b000000, "rbit">; -defm CLS : A64I_dp_1src<0b000101, "cls">; -defm CLZ : A64I_dp_1src<0b000100, "clz">; - -def : Pat<(ctlz i32:$Rn), (CLZww $Rn)>; -def : Pat<(ctlz i64:$Rn), (CLZxx $Rn)>; -def : Pat<(ctlz_zero_undef i32:$Rn), (CLZww $Rn)>; -def : Pat<(ctlz_zero_undef i64:$Rn), (CLZxx $Rn)>; - -def : Pat<(cttz i32:$Rn), (CLZww (RBITww $Rn))>; -def : Pat<(cttz i64:$Rn), (CLZxx (RBITxx $Rn))>; -def : Pat<(cttz_zero_undef i32:$Rn), (CLZww (RBITww $Rn))>; -def : Pat<(cttz_zero_undef i64:$Rn), (CLZxx (RBITxx $Rn))>; - - -def REVww : A64I_dp_1src_impl<0b0, 0b000010, "rev", - [(set i32:$Rd, (bswap i32:$Rn))], - GPR32, NoItinerary>; -def REVxx : A64I_dp_1src_impl<0b1, 0b000011, "rev", - [(set i64:$Rd, (bswap i64:$Rn))], - GPR64, NoItinerary>; -def REV32xx : A64I_dp_1src_impl<0b1, 0b000010, "rev32", - [(set i64:$Rd, (bswap (rotr i64:$Rn, (i64 32))))], - GPR64, NoItinerary>; -def REV16ww : A64I_dp_1src_impl<0b0, 0b000001, "rev16", - [(set i32:$Rd, (bswap (rotr i32:$Rn, (i64 16))))], - GPR32, - NoItinerary>; -def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>; +defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>; +let SchedRW = [WriteFDiv] in { +defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>; +} +defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_aarch64_neon_fmaxnm>; +defm FMAX : TwoOperandFPData<0b0100, "fmax", AArch64fmax>; +defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_aarch64_neon_fminnm>; +defm FMIN : TwoOperandFPData<0b0101, "fmin", AArch64fmin>; +let SchedRW = [WriteFMul] in { +defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>; +defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>; +} +defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>; + +def : Pat<(v1f64 (AArch64fmax (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(v1f64 (AArch64fmin (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FMINDrr FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(v1f64 (int_aarch64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(v1f64 (int_aarch64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; //===----------------------------------------------------------------------===// -// Data Processing (2 sources) instructions +// Floating point three operand instructions. //===----------------------------------------------------------------------===// -// Contains: CRC32C?[BHWX], UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL, -// LSR, ASR, ROR - - -class dp_2src_impl opcode, string asmop, list patterns, - RegisterClass GPRsp, - InstrItinClass itin>: - A64I_dp_2src, - Sched<[WriteALU, ReadALU, ReadALU]>; - -multiclass dp_2src_crc { - def B_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 0}, - !strconcat(asmop, "b"), [], GPR32, NoItinerary>; - def H_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 1}, - !strconcat(asmop, "h"), [], GPR32, NoItinerary>; - def W_www : dp_2src_impl<0b0, {0, 1, 0, c, 1, 0}, - !strconcat(asmop, "w"), [], GPR32, NoItinerary>; - def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0, - !strconcat(asmop, "x\t$Rd, $Rn, $Rm"), - (outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; -} -multiclass dp_2src_zext opcode, string asmop, SDPatternOperator op> { - def www : dp_2src_impl<0b0, - opcode, - asmop, - [(set i32:$Rd, - (op i32:$Rn, (i64 (zext i32:$Rm))))], - GPR32, - NoItinerary>; - def xxx : dp_2src_impl<0b1, - opcode, - asmop, - [(set i64:$Rd, (op i64:$Rn, i64:$Rm))], - GPR64, - NoItinerary>; -} +defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>; +defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", + TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; +defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", + TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >; +defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", + TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; +// The following def pats catch the case where the LHS of an FMA is negated. +// The TriOpFrag above catches the case where the middle operand is negated. -multiclass dp_2src opcode, string asmop, SDPatternOperator op> { - def www : dp_2src_impl<0b0, - opcode, - asmop, - [(set i32:$Rd, (op i32:$Rn, i32:$Rm))], - GPR32, - NoItinerary>; - def xxx : dp_2src_impl<0b1, - opcode, - asmop, - [(set i64:$Rd, (op i64:$Rn, i64:$Rm))], - GPR64, - NoItinerary>; -} +// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike +// the NEON variant. +def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), + (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -// Here we define the data processing 2 source instructions. -defm CRC32 : dp_2src_crc<0b0, "crc32">; -defm CRC32C : dp_2src_crc<0b1, "crc32c">; +def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), + (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -let SchedRW = [WriteDiv, ReadDiv, ReadDiv] in { - defm UDIV : dp_2src<0b000010, "udiv", udiv>; - defm SDIV : dp_2src<0b000011, "sdiv", sdiv>; -} +// We handled -(a + b*c) for FNMADD above, now it's time for "(-a) + (-b)*c" and +// "(-a) + b*(-c)". +def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), + (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -let SchedRW = [WriteALUs, ReadALU, ReadALU] in { - defm LSLV : dp_2src_zext<0b001000, "lsl", shl>; - defm LSRV : dp_2src_zext<0b001001, "lsr", srl>; - defm ASRV : dp_2src_zext<0b001010, "asr", sra>; - defm RORV : dp_2src_zext<0b001011, "ror", rotr>; -} +def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), + (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -// Extra patterns for an incoming 64-bit value for a 32-bit -// operation. Since the LLVM operations are undefined (as in C) if the -// RHS is out of range, it's perfectly permissible to discard the high -// bits of the GPR64. -def : Pat<(shl i32:$Rn, i64:$Rm), - (LSLVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>; -def : Pat<(srl i32:$Rn, i64:$Rm), - (LSRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>; -def : Pat<(sra i32:$Rn, i64:$Rm), - (ASRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>; -def : Pat<(rotr i32:$Rn, i64:$Rm), - (RORVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>; - -// Here we define the aliases for the data processing 2 source instructions. -def LSL_mnemonic : MnemonicAlias<"lslv", "lsl">; -def LSR_mnemonic : MnemonicAlias<"lsrv", "lsr">; -def ASR_menmonic : MnemonicAlias<"asrv", "asr">; -def ROR_menmonic : MnemonicAlias<"rorv", "ror">; +def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))), + (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; + +def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))), + (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; //===----------------------------------------------------------------------===// -// Data Processing (3 sources) instructions +// Floating point comparison instructions. //===----------------------------------------------------------------------===// -// Contains: MADD, MSUB, SMADDL, SMSUBL, SMULH, UMADDL, UMSUBL, UMULH -// + aliases MUL, MNEG, SMULL, SMNEGL, UMULL, UMNEGL - -class A64I_dp3_4operand opcode, RegisterClass AccReg, - ValueType AccTy, RegisterClass SrcReg, - string asmop, dag pattern> - : A64I_dp3, - Sched<[WriteMAC, ReadMAC, ReadMAC, ReadMAC]> { - bits<5> Ra; - let Inst{14-10} = Ra; - - RegisterClass AccGPR = AccReg; - RegisterClass SrcGPR = SrcReg; -} - -def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, i32, GPR32, "madd", - (add i32:$Ra, (mul i32:$Rn, i32:$Rm))>; -def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, i64, GPR64, "madd", - (add i64:$Ra, (mul i64:$Rn, i64:$Rm))>; - -def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, i32, GPR32, "msub", - (sub i32:$Ra, (mul i32:$Rn, i32:$Rm))>; -def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, i64, GPR64, "msub", - (sub i64:$Ra, (mul i64:$Rn, i64:$Rm))>; - -def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, i64, GPR32, "smaddl", - (add i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>; -def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, i64, GPR32, "smsubl", - (sub i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>; - -def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, i64, GPR32, "umaddl", - (add i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>; -def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, i64, GPR32, "umsubl", - (sub i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>; - -let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in { - def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd), - (ins GPR64:$Rn, GPR64:$Rm), - "umulh\t$Rd, $Rn, $Rm", - [(set i64:$Rd, (mulhu i64:$Rn, i64:$Rm))], - NoItinerary>, - Sched<[WriteMAC, ReadMAC, ReadMAC]>; - - def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd), - (ins GPR64:$Rn, GPR64:$Rm), - "smulh\t$Rd, $Rn, $Rm", - [(set i64:$Rd, (mulhs i64:$Rn, i64:$Rm))], - NoItinerary>, - Sched<[WriteMAC, ReadMAC, ReadMAC]>; -} -multiclass A64I_dp3_3operand { - def : InstAlias; +defm FCMPE : FPComparison<1, "fcmpe">; +defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>; - def : Pat; -} +//===----------------------------------------------------------------------===// +// Floating point conditional comparison instructions. +//===----------------------------------------------------------------------===// -defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul i32:$Rn, i32:$Rm)>; -defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul i64:$Rn, i64:$Rm)>; +defm FCCMPE : FPCondComparison<1, "fccmpe">; +defm FCCMP : FPCondComparison<0, "fccmp">; -defm : A64I_dp3_3operand<"mneg", MSUBwwww, WZR, - (sub 0, (mul i32:$Rn, i32:$Rm))>; -defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR, - (sub 0, (mul i64:$Rn, i64:$Rm))>; +//===----------------------------------------------------------------------===// +// Floating point conditional select instruction. +//===----------------------------------------------------------------------===// -defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR, - (mul (i64 (sext i32:$Rn)), (sext i32:$Rm))>; -defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR, - (sub 0, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>; +defm FCSEL : FPCondSelect<"fcsel">; -defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR, - (mul (i64 (zext i32:$Rn)), (zext i32:$Rm))>; -defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR, - (sub 0, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>; +// CSEL instructions providing f128 types need to be handled by a +// pseudo-instruction since the eventual code will need to introduce basic +// blocks and control flow. +def F128CSEL : Pseudo<(outs FPR128:$Rd), + (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), + [(set (f128 FPR128:$Rd), + (AArch64csel FPR128:$Rn, FPR128:$Rm, + (i32 imm:$cond), NZCV))]> { + let Uses = [NZCV]; + let usesCustomInserter = 1; +} //===----------------------------------------------------------------------===// -// Exception generation +// Floating point immediate move. //===----------------------------------------------------------------------===// -// Contains: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3 - -def uimm16_asmoperand : AsmOperandClass { - let Name = "UImm16"; - let PredicateMethod = "isUImm<16>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm16"; -} -def uimm16 : Operand { - let ParserMatchClass = uimm16_asmoperand; +let isReMaterializable = 1 in { +defm FMOV : FPMoveImmediate<"fmov">; } -class A64I_exceptImpl opc, bits<2> ll, string asmop> - : A64I_exception, - Sched<[WriteBr]> { - let isBranch = 1; - let isTerminator = 1; -} +//===----------------------------------------------------------------------===// +// Advanced SIMD two vector instructions. +//===----------------------------------------------------------------------===// -def SVCi : A64I_exceptImpl<0b000, 0b01, "svc">; -def HVCi : A64I_exceptImpl<0b000, 0b10, "hvc">; -def SMCi : A64I_exceptImpl<0b000, 0b11, "smc">; -def BRKi : A64I_exceptImpl<0b001, 0b00, "brk">; -def HLTi : A64I_exceptImpl<0b010, 0b00, "hlt">; +defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>; +defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>; +defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; +defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>; +defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>; +defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>; +defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>; +defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>; +defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; +defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>; + +defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; +defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>; +defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; +defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>; +defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; +defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>; +defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>; +defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; +def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))), + (FCVTLv4i16 V64:$Rn)>; +def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), + (i64 4)))), + (FCVTLv8i16 V128:$Rn)>; +def : Pat<(v2f64 (fextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; +def : Pat<(v2f64 (fextend (v2f32 (extract_subvector (v4f32 V128:$Rn), + (i64 2))))), + (FCVTLv4i32 V128:$Rn)>; + +defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>; +defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>; +defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>; +defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>; +defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; +def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))), + (FCVTNv4i16 V128:$Rn)>; +def : Pat<(concat_vectors V64:$Rd, + (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), + (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; +def : Pat<(v2f32 (fround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; +def : Pat<(concat_vectors V64:$Rd, (v2f32 (fround (v2f64 V128:$Rn)))), + (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; +defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>; +defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>; +defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", + int_aarch64_neon_fcvtxn>; +defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>; +defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>; +let isCodeGenOnly = 1 in { +defm FCVTZS_Int : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", + int_aarch64_neon_fcvtzs>; +defm FCVTZU_Int : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", + int_aarch64_neon_fcvtzu>; +} +defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>; +defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>; +defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", frnd>; +defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>; +defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>; +defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>; +defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>; +defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>; +defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>; +defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>; +defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>; +defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", + UnOpFrag<(sub immAllZerosV, node:$LHS)> >; +defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; +// Aliases for MVN -> NOT. +def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}", + (NOTv8i8 V64:$Vd, V64:$Vn)>; +def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}", + (NOTv16i8 V128:$Vd, V128:$Vn)>; + +def : Pat<(AArch64neg (v8i8 V64:$Rn)), (NEGv8i8 V64:$Rn)>; +def : Pat<(AArch64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>; +def : Pat<(AArch64neg (v4i16 V64:$Rn)), (NEGv4i16 V64:$Rn)>; +def : Pat<(AArch64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>; +def : Pat<(AArch64neg (v2i32 V64:$Rn)), (NEGv2i32 V64:$Rn)>; +def : Pat<(AArch64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>; +def : Pat<(AArch64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>; + +def : Pat<(AArch64not (v8i8 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(AArch64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(AArch64not (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(AArch64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(AArch64not (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(AArch64not (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(AArch64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(AArch64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; + +def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; +def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; +def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; + +defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>; +defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>; +defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>; +defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>; +defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", + BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >; +defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>; +defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>; +defm SHLL : SIMDVectorLShiftLongBySizeBHS; +defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; +defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; +defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>; +defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>; +defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>; +defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", + BinOpFrag<(add node:$LHS, (int_aarch64_neon_uaddlp node:$RHS))> >; +defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", + int_aarch64_neon_uaddlp>; +defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>; +defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>; +defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>; +defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>; +defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>; +defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; + +def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; +def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; + +// Patterns for vector long shift (by element width). These need to match all +// three of zext, sext and anyext so it's easier to pull the patterns out of the +// definition. +multiclass SIMDVectorLShiftLongBySizeBHSPats { + def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), + (SHLLv8i8 V64:$Rn)>; + def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)), + (SHLLv16i8 V128:$Rn)>; + def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), + (SHLLv4i16 V64:$Rn)>; + def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)), + (SHLLv8i16 V128:$Rn)>; + def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), + (SHLLv2i32 V64:$Rn)>; + def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)), + (SHLLv4i32 V128:$Rn)>; +} + +defm : SIMDVectorLShiftLongBySizeBHSPats; +defm : SIMDVectorLShiftLongBySizeBHSPats; +defm : SIMDVectorLShiftLongBySizeBHSPats; -def DCPS1i : A64I_exceptImpl<0b101, 0b01, "dcps1">; -def DCPS2i : A64I_exceptImpl<0b101, 0b10, "dcps2">; -def DCPS3i : A64I_exceptImpl<0b101, 0b11, "dcps3">; +//===----------------------------------------------------------------------===// +// Advanced SIMD three vector instructions. +//===----------------------------------------------------------------------===// -// The immediate is optional for the DCPS instructions, defaulting to 0. -def : InstAlias<"dcps1", (DCPS1i 0)>; -def : InstAlias<"dcps2", (DCPS2i 0)>; -def : InstAlias<"dcps3", (DCPS3i 0)>; +defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; +defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>; +defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>; +defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>; +defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; +defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; +defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; +defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; +defm FABD : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_aarch64_neon_fabd>; +defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_aarch64_neon_facge>; +defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_aarch64_neon_facgt>; +defm FADDP : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_aarch64_neon_addp>; +defm FADD : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>; +defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>; +defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>; +defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>; +defm FDIV : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>; +defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; +defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_aarch64_neon_fmaxnm>; +defm FMAXP : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_aarch64_neon_fmaxp>; +defm FMAX : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", AArch64fmax>; +defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_aarch64_neon_fminnmp>; +defm FMINNM : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_aarch64_neon_fminnm>; +defm FMINP : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_aarch64_neon_fminp>; +defm FMIN : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", AArch64fmin>; + +// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the +// instruction expects the addend first, while the fma intrinsic puts it last. +defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b11001, "fmla", + TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; +defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b11001, "fmls", + TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; + +// The following def pats catch the case where the LHS of an FMA is negated. +// The TriOpFrag above catches the case where the middle operand is negated. +def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)), + (FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>; + +def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), + (FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>; + +def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), + (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>; + +defm FMULX : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_aarch64_neon_fmulx>; +defm FMUL : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>; +defm FRECPS : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_aarch64_neon_frecps>; +defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_aarch64_neon_frsqrts>; +defm FSUB : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>; +defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", + TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >; +defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", + TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >; +defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; +defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>; +defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >; +defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>; +defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>; +defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>; +defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>; +defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", int_aarch64_neon_smax>; +defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>; +defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", int_aarch64_neon_smin>; +defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; +defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; +defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; +defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; +defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; +defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; +defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_aarch64_neon_srhadd>; +defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>; +defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>; +defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; +defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >; +defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>; +defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>; +defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>; +defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>; +defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", int_aarch64_neon_umax>; +defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>; +defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", int_aarch64_neon_umin>; +defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>; +defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>; +defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>; +defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>; +defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>; +defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>; +defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>; + +defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; +defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", + BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; +defm BIF : SIMDLogicalThreeVector<1, 0b11, "bif">; +defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>; +defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl", + TriOpFrag<(or (and node:$LHS, node:$MHS), (and (vnot node:$LHS), node:$RHS))>>; +defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; +defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", + BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; +defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; + +def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm), + (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; +def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm), + (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; +def : Pat<(AArch64bsl (v2i32 V64:$Rd), V64:$Rn, V64:$Rm), + (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; +def : Pat<(AArch64bsl (v1i64 V64:$Rd), V64:$Rn, V64:$Rm), + (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>; + +def : Pat<(AArch64bsl (v16i8 V128:$Rd), V128:$Rn, V128:$Rm), + (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; +def : Pat<(AArch64bsl (v8i16 V128:$Rd), V128:$Rn, V128:$Rm), + (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; +def : Pat<(AArch64bsl (v4i32 V128:$Rd), V128:$Rn, V128:$Rm), + (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; +def : Pat<(AArch64bsl (v2i64 V128:$Rd), V128:$Rn, V128:$Rm), + (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>; + +def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", + (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>; +def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}", + (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; +def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}", + (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; +def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}", + (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; + +def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}", + (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>; +def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}", + (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; +def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}", + (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; +def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}", + (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; + +def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" # + "|cmls.8b\t$dst, $src1, $src2}", + (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" # + "|cmls.16b\t$dst, $src1, $src2}", + (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" # + "|cmls.4h\t$dst, $src1, $src2}", + (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" # + "|cmls.8h\t$dst, $src1, $src2}", + (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" # + "|cmls.2s\t$dst, $src1, $src2}", + (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" # + "|cmls.4s\t$dst, $src1, $src2}", + (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" # + "|cmls.2d\t$dst, $src1, $src2}", + (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; + +def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" # + "|cmlo.8b\t$dst, $src1, $src2}", + (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" # + "|cmlo.16b\t$dst, $src1, $src2}", + (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" # + "|cmlo.4h\t$dst, $src1, $src2}", + (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" # + "|cmlo.8h\t$dst, $src1, $src2}", + (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" # + "|cmlo.2s\t$dst, $src1, $src2}", + (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" # + "|cmlo.4s\t$dst, $src1, $src2}", + (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" # + "|cmlo.2d\t$dst, $src1, $src2}", + (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; + +def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" # + "|cmle.8b\t$dst, $src1, $src2}", + (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" # + "|cmle.16b\t$dst, $src1, $src2}", + (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" # + "|cmle.4h\t$dst, $src1, $src2}", + (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" # + "|cmle.8h\t$dst, $src1, $src2}", + (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" # + "|cmle.2s\t$dst, $src1, $src2}", + (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" # + "|cmle.4s\t$dst, $src1, $src2}", + (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" # + "|cmle.2d\t$dst, $src1, $src2}", + (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; + +def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" # + "|cmlt.8b\t$dst, $src1, $src2}", + (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" # + "|cmlt.16b\t$dst, $src1, $src2}", + (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" # + "|cmlt.4h\t$dst, $src1, $src2}", + (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" # + "|cmlt.8h\t$dst, $src1, $src2}", + (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" # + "|cmlt.2s\t$dst, $src1, $src2}", + (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" # + "|cmlt.4s\t$dst, $src1, $src2}", + (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # + "|cmlt.2d\t$dst, $src1, $src2}", + (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; + +def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # + "|fcmle.2s\t$dst, $src1, $src2}", + (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" # + "|fcmle.4s\t$dst, $src1, $src2}", + (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # + "|fcmle.2d\t$dst, $src1, $src2}", + (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; + +def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # + "|fcmlt.2s\t$dst, $src1, $src2}", + (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" # + "|fcmlt.4s\t$dst, $src1, $src2}", + (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # + "|fcmlt.2d\t$dst, $src1, $src2}", + (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; + +def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # + "|facle.2s\t$dst, $src1, $src2}", + (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" # + "|facle.4s\t$dst, $src1, $src2}", + (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # + "|facle.2d\t$dst, $src1, $src2}", + (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; + +def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # + "|faclt.2s\t$dst, $src1, $src2}", + (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" # + "|faclt.4s\t$dst, $src1, $src2}", + (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; +def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # + "|faclt.2d\t$dst, $src1, $src2}", + (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; //===----------------------------------------------------------------------===// -// Extract (immediate) +// Advanced SIMD three scalar instructions. //===----------------------------------------------------------------------===// -// Contains: EXTR + alias ROR - -def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0, - (outs GPR32:$Rd), - (ins GPR32:$Rn, GPR32:$Rm, bitfield32_imm:$LSB), - "extr\t$Rd, $Rn, $Rm, $LSB", - [(set i32:$Rd, - (A64Extr i32:$Rn, i32:$Rm, imm:$LSB))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; -def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1, - (outs GPR64:$Rd), - (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB), - "extr\t$Rd, $Rn, $Rm, $LSB", - [(set i64:$Rd, - (A64Extr i64:$Rn, i64:$Rm, imm:$LSB))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - -def : InstAlias<"ror $Rd, $Rs, $LSB", - (EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>; -def : InstAlias<"ror $Rd, $Rs, $LSB", - (EXTRxxxi GPR64:$Rd, GPR64:$Rs, GPR64:$Rs, bitfield64_imm:$LSB)>; - -def : Pat<(rotr i32:$Rn, bitfield32_imm:$LSB), - (EXTRwwwi $Rn, $Rn, bitfield32_imm:$LSB)>; -def : Pat<(rotr i64:$Rn, bitfield64_imm:$LSB), - (EXTRxxxi $Rn, $Rn, bitfield64_imm:$LSB)>; + +defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; +defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>; +defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>; +defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; +defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; +defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; +defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; +defm FABD : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_aarch64_sisd_fabd>; +def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (FABD64 FPR64:$Rn, FPR64:$Rm)>; +defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge", + int_aarch64_neon_facge>; +defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt", + int_aarch64_neon_facgt>; +defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>; +defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>; +defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>; +defm FMULX : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_aarch64_neon_fmulx>; +defm FRECPS : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_aarch64_neon_frecps>; +defm FRSQRTS : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_aarch64_neon_frsqrts>; +defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; +defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; +defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; +defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; +defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; +defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; +defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>; +defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>; +defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; +defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>; +defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>; +defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>; +defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>; +defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>; +defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>; + +def : InstAlias<"cmls $dst, $src1, $src2", + (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"cmle $dst, $src1, $src2", + (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"cmlo $dst, $src1, $src2", + (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"cmlt $dst, $src1, $src2", + (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"fcmle $dst, $src1, $src2", + (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; +def : InstAlias<"fcmle $dst, $src1, $src2", + (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"fcmlt $dst, $src1, $src2", + (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; +def : InstAlias<"fcmlt $dst, $src1, $src2", + (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"facle $dst, $src1, $src2", + (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; +def : InstAlias<"facle $dst, $src1, $src2", + (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; +def : InstAlias<"faclt $dst, $src1, $src2", + (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>; +def : InstAlias<"faclt $dst, $src1, $src2", + (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>; //===----------------------------------------------------------------------===// -// Floating-point compare instructions +// Advanced SIMD three scalar instructions (mixed operands). //===----------------------------------------------------------------------===// -// Contains: FCMP, FCMPE +defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", + int_aarch64_neon_sqdmulls_scalar>; +defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; +defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; + +def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd), + (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; +def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd), + (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), + (i32 FPR32:$Rm))))), + (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; -def fpzero_asmoperand : AsmOperandClass { - let Name = "FPZero"; - let ParserMethod = "ParseFPImmOperand"; - let DiagnosticType = "FPZero"; -} +//===----------------------------------------------------------------------===// +// Advanced SIMD two scalar instructions. +//===----------------------------------------------------------------------===// -def fpz32 : Operand, - ComplexPattern { - let ParserMatchClass = fpzero_asmoperand; - let PrintMethod = "printFPZeroOperand"; - let DecoderMethod = "DecodeFPZeroOperand"; -} - -def fpz64 : Operand, - ComplexPattern { - let ParserMatchClass = fpzero_asmoperand; - let PrintMethod = "printFPZeroOperand"; - let DecoderMethod = "DecodeFPZeroOperand"; -} - -def fpz64movi : Operand, - ComplexPattern { - let ParserMatchClass = fpzero_asmoperand; - let PrintMethod = "printFPZeroOperand"; - let DecoderMethod = "DecodeFPZeroOperand"; -} - -multiclass A64I_fpcmpSignal type, bit imm, dag ins, dag pattern> { - def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0}, - (outs), ins, "fcmp\t$Rn, $Rm", [pattern], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Defs = [NZCV]; - } - - def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0}, - (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Defs = [NZCV]; - } -} - -defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm), - (set NZCV, (A64cmp f32:$Rn, f32:$Rm))>; -defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm), - (set NZCV, (A64cmp f64:$Rn, f64:$Rm))>; - -// What would be Rm should be written as 0; note that even though it's called -// "$Rm" here to fit in with the InstrFormats, it's actually an immediate. -defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Rm), - (set NZCV, (A64cmp f32:$Rn, fpz32:$Rm))>; - -defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Rm), - (set NZCV, (A64cmp f64:$Rn, fpz64:$Rm))>; - - -//===----------------------------------------------------------------------===// -// Floating-point conditional compare instructions -//===----------------------------------------------------------------------===// -// Contains: FCCMP, FCCMPE - -class A64I_fpccmpImpl type, bit op, RegisterClass FPR, string asmop> - : A64I_fpccmp<0b0, 0b0, type, op, - (outs), - (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond), - !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"), - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Defs = [NZCV]; -} - -def FCCMPss : A64I_fpccmpImpl<0b00, 0b0, FPR32, "fccmp">; -def FCCMPEss : A64I_fpccmpImpl<0b00, 0b1, FPR32, "fccmpe">; -def FCCMPdd : A64I_fpccmpImpl<0b01, 0b0, FPR64, "fccmp">; -def FCCMPEdd : A64I_fpccmpImpl<0b01, 0b1, FPR64, "fccmpe">; - -//===----------------------------------------------------------------------===// -// Floating-point conditional select instructions -//===----------------------------------------------------------------------===// -// Contains: FCSEL - -let Uses = [NZCV] in { - def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd), - (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond), - "fcsel\t$Rd, $Rn, $Rm, $Cond", - [(set f32:$Rd, - (simple_select f32:$Rn, f32:$Rm))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - - def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd), - (ins FPR64:$Rn, FPR64:$Rm, cond_code_op:$Cond), - "fcsel\t$Rd, $Rn, $Rm, $Cond", - [(set f64:$Rd, - (simple_select f64:$Rn, f64:$Rm))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; -} - -//===----------------------------------------------------------------------===// -// Floating-point data-processing (1 source) -//===----------------------------------------------------------------------===// -// Contains: FMOV, FABS, FNEG, FSQRT, FCVT, FRINT[NPMZAXI]. - -def FPNoUnop : PatFrag<(ops node:$val), (fneg node:$val), - [{ (void)N; return false; }]>; - -// First we do the fairly trivial bunch with uniform "OP s, s" and "OP d, d" -// syntax. Default to no pattern because most are odd enough not to have one. -multiclass A64I_fpdp1sizes opcode, string asmstr, - SDPatternOperator opnode = FPNoUnop> { - def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn), - !strconcat(asmstr, "\t$Rd, $Rn"), - [(set f32:$Rd, (opnode f32:$Rn))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn), - !strconcat(asmstr, "\t$Rd, $Rn"), - [(set f64:$Rd, (opnode f64:$Rn))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm FMOV : A64I_fpdp1sizes<0b000000, "fmov">; -defm FABS : A64I_fpdp1sizes<0b000001, "fabs", fabs>; -defm FNEG : A64I_fpdp1sizes<0b000010, "fneg", fneg>; -let SchedRW = [WriteFPSqrt, ReadFPSqrt] in { - defm FSQRT : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>; -} - -defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">; -defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>; -defm FRINTM : A64I_fpdp1sizes<0b001010, "frintm", ffloor>; -defm FRINTZ : A64I_fpdp1sizes<0b001011, "frintz", ftrunc>; -defm FRINTA : A64I_fpdp1sizes<0b001100, "frinta">; -defm FRINTX : A64I_fpdp1sizes<0b001110, "frintx", frint>; -defm FRINTI : A64I_fpdp1sizes<0b001111, "frinti", fnearbyint>; - -// The FCVT instrucitons have different source and destination register-types, -// but the fields are uniform everywhere a D-register (say) crops up. Package -// this information in a Record. -class FCVTRegType fld, ValueType vt> { - RegisterClass Class = rc; - ValueType VT = vt; - bit t1 = fld{1}; - bit t0 = fld{0}; -} - -def FCVT16 : FCVTRegType; -def FCVT32 : FCVTRegType; -def FCVT64 : FCVTRegType; - -class A64I_fpdp1_fcvt - : A64I_fpdp1<0b0, 0b0, {SrcReg.t1, SrcReg.t0}, - {0,0,0,1, DestReg.t1, DestReg.t0}, - (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn), - "fcvt\t$Rd, $Rn", - [(set DestReg.VT:$Rd, (opnode SrcReg.VT:$Rn))], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - -def FCVTds : A64I_fpdp1_fcvt; -def FCVThs : A64I_fpdp1_fcvt; -def FCVTsd : A64I_fpdp1_fcvt; -def FCVThd : A64I_fpdp1_fcvt; -def FCVTsh : A64I_fpdp1_fcvt; -def FCVTdh : A64I_fpdp1_fcvt; - - -//===----------------------------------------------------------------------===// -// Floating-point data-processing (2 sources) instructions -//===----------------------------------------------------------------------===// -// Contains: FMUL, FDIV, FADD, FSUB, FMAX, FMIN, FMAXNM, FMINNM, FNMUL - -def FPNoBinop : PatFrag<(ops node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs), - [{ (void)N; return false; }]>; - -multiclass A64I_fpdp2sizes opcode, string asmstr, - SDPatternOperator opnode> { - def sss : A64I_fpdp2<0b0, 0b0, 0b00, opcode, - (outs FPR32:$Rd), - (ins FPR32:$Rn, FPR32:$Rm), - !strconcat(asmstr, "\t$Rd, $Rn, $Rm"), - [(set f32:$Rd, (opnode f32:$Rn, f32:$Rm))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode, - (outs FPR64:$Rd), - (ins FPR64:$Rn, FPR64:$Rm), - !strconcat(asmstr, "\t$Rd, $Rn, $Rm"), - [(set f64:$Rd, (opnode f64:$Rn, f64:$Rm))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; -} - -let isCommutable = 1 in { - let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { - defm FMUL : A64I_fpdp2sizes<0b0000, "fmul", fmul>; - } - defm FADD : A64I_fpdp2sizes<0b0010, "fadd", fadd>; - - // No patterns for these. - defm FMAX : A64I_fpdp2sizes<0b0100, "fmax", FPNoBinop>; - defm FMIN : A64I_fpdp2sizes<0b0101, "fmin", FPNoBinop>; - defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>; - defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>; - - let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { - defm FNMUL : A64I_fpdp2sizes<0b1000, "fnmul", - PatFrag<(ops node:$lhs, node:$rhs), - (fneg (fmul node:$lhs, node:$rhs))> >; - } -} - -let SchedRW = [WriteFPDiv, ReadFPDiv, ReadFPDiv] in { - defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>; -} -defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>; - -//===----------------------------------------------------------------------===// -// Floating-point data-processing (3 sources) instructions -//===----------------------------------------------------------------------===// -// Contains: FMADD, FMSUB, FNMADD, FNMSUB - -def fmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), - (fma (fneg node:$Rn), node:$Rm, node:$Ra)>; -def fnmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), - (fma node:$Rn, node:$Rm, (fneg node:$Ra))>; -def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra), - (fma (fneg node:$Rn), node:$Rm, (fneg node:$Ra))>; - -class A64I_fpdp3Impl type, bit o1, bit o0, SDPatternOperator fmakind> - : A64I_fpdp3<0b0, 0b0, type, o1, o0, (outs FPR:$Rd), - (ins FPR:$Rn, FPR:$Rm, FPR:$Ra), - !strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"), - [(set VT:$Rd, (fmakind VT:$Rn, VT:$Rm, VT:$Ra))], - NoItinerary>, - Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]>; - -def FMADDssss : A64I_fpdp3Impl<"fmadd", FPR32, f32, 0b00, 0b0, 0b0, fma>; -def FMSUBssss : A64I_fpdp3Impl<"fmsub", FPR32, f32, 0b00, 0b0, 0b1, fmsub>; -def FNMADDssss : A64I_fpdp3Impl<"fnmadd", FPR32, f32, 0b00, 0b1, 0b0, fnmadd>; -def FNMSUBssss : A64I_fpdp3Impl<"fnmsub", FPR32, f32, 0b00, 0b1, 0b1, fnmsub>; - -def FMADDdddd : A64I_fpdp3Impl<"fmadd", FPR64, f64, 0b01, 0b0, 0b0, fma>; -def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>; -def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>; -def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>; - -// Extra patterns for when we're allowed to optimise separate multiplication and -// addition. -let Predicates = [HasFPARMv8, UseFusedMAC] in { -def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))), - (FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))), - (FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))), - (FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; -def : Pat<(f32 (fsub (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)), - (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; - -def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))), - (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))), - (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))), - (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -def : Pat<(f64 (fsub (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)), - (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -} - - -//===----------------------------------------------------------------------===// -// Floating-point <-> fixed-point conversion instructions -//===----------------------------------------------------------------------===// -// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF - -// #1-#32 allowed, encoded as "64 - -def fixedpos_asmoperand_i32 : AsmOperandClass { - let Name = "CVTFixedPos32"; - let RenderMethod = "addCVTFixedPosOperands"; - let PredicateMethod = "isCVTFixedPos<32>"; - let DiagnosticType = "CVTFixedPos32"; -} - -// Also encoded as "64 - " but #1-#64 allowed. -def fixedpos_asmoperand_i64 : AsmOperandClass { - let Name = "CVTFixedPos64"; - let RenderMethod = "addCVTFixedPosOperands"; - let PredicateMethod = "isCVTFixedPos<64>"; - let DiagnosticType = "CVTFixedPos64"; -} - -// We need the cartesian product of f32/f64 i32/i64 operands for -// conversions: -// + Selection needs to use operands of correct floating type -// + Assembly parsing and decoding depend on integer width -class cvtfix_i32_op - : Operand, - ComplexPattern", [fpimm]> { - let ParserMatchClass = fixedpos_asmoperand_i32; - let DecoderMethod = "DecodeCVT32FixedPosOperand"; - let PrintMethod = "printCVTFixedPosOperand"; -} - -class cvtfix_i64_op - : Operand, - ComplexPattern", [fpimm]> { - let ParserMatchClass = fixedpos_asmoperand_i64; - let PrintMethod = "printCVTFixedPosOperand"; -} - -// Because of the proliferation of weird operands, it's not really -// worth going for a multiclass here. Oh well. - -class A64I_fptofix type, bits<3> opcode, - RegisterClass GPR, RegisterClass FPR, - ValueType DstTy, ValueType SrcTy, - Operand scale_op, string asmop, SDNode cvtop> - : A64I_fpfixed, - Sched<[WriteFPALU, ReadFPALU]>; - -def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32, i32, f32, - cvtfix_i32_op, "fcvtzs", fp_to_sint>; -def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32, i64, f32, - cvtfix_i64_op, "fcvtzs", fp_to_sint>; -def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32, i32, f32, - cvtfix_i32_op, "fcvtzu", fp_to_uint>; -def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32, i64, f32, - cvtfix_i64_op, "fcvtzu", fp_to_uint>; - -def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64, i32, f64, - cvtfix_i32_op, "fcvtzs", fp_to_sint>; -def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64, i64, f64, - cvtfix_i64_op, "fcvtzs", fp_to_sint>; -def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64, i32, f64, - cvtfix_i32_op, "fcvtzu", fp_to_uint>; -def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64, i64, f64, - cvtfix_i64_op, "fcvtzu", fp_to_uint>; - - -class A64I_fixtofp type, bits<3> opcode, - RegisterClass FPR, RegisterClass GPR, - ValueType DstTy, ValueType SrcTy, - Operand scale_op, string asmop, SDNode cvtop> - : A64I_fpfixed, - Sched<[WriteFPALU, ReadFPALU]>; - -def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32, f32, i32, - cvtfix_i32_op, "scvtf", sint_to_fp>; -def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64, f32, i64, - cvtfix_i64_op, "scvtf", sint_to_fp>; -def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32, f32, i32, - cvtfix_i32_op, "ucvtf", uint_to_fp>; -def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64, f32, i64, - cvtfix_i64_op, "ucvtf", uint_to_fp>; -def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32, f64, i32, - cvtfix_i32_op, "scvtf", sint_to_fp>; -def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64, f64, i64, - cvtfix_i64_op, "scvtf", sint_to_fp>; -def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32, f64, i32, - cvtfix_i32_op, "ucvtf", uint_to_fp>; -def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64, f64, i64, - cvtfix_i64_op, "ucvtf", uint_to_fp>; - -//===----------------------------------------------------------------------===// -// Floating-point <-> integer conversion instructions -//===----------------------------------------------------------------------===// -// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF - -class A64I_fpintI type, bits<2> rmode, bits<3> opcode, - RegisterClass DestPR, RegisterClass SrcPR, string asmop> - : A64I_fpint, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass A64I_fptointRM rmode, bit o2, string asmop> { - def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0}, - GPR32, FPR32, asmop # "s">; - def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0}, - GPR64, FPR32, asmop # "s">; - def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1}, - GPR32, FPR32, asmop # "u">; - def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1}, - GPR64, FPR32, asmop # "u">; - - def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0}, - GPR32, FPR64, asmop # "s">; - def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0}, - GPR64, FPR64, asmop # "s">; - def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1}, - GPR32, FPR64, asmop # "u">; - def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1}, - GPR64, FPR64, asmop # "u">; -} - -defm FCVTN : A64I_fptointRM<0b00, 0b0, "fcvtn">; -defm FCVTP : A64I_fptointRM<0b01, 0b0, "fcvtp">; -defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">; -defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">; -defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">; - -let Predicates = [HasFPARMv8] in { -def : Pat<(i32 (fp_to_sint f32:$Rn)), (FCVTZSws $Rn)>; -def : Pat<(i64 (fp_to_sint f32:$Rn)), (FCVTZSxs $Rn)>; -def : Pat<(i32 (fp_to_uint f32:$Rn)), (FCVTZUws $Rn)>; -def : Pat<(i64 (fp_to_uint f32:$Rn)), (FCVTZUxs $Rn)>; -def : Pat<(i32 (fp_to_sint f64:$Rn)), (FCVTZSwd $Rn)>; -def : Pat<(i64 (fp_to_sint f64:$Rn)), (FCVTZSxd $Rn)>; -def : Pat<(i32 (fp_to_uint f64:$Rn)), (FCVTZUwd $Rn)>; -def : Pat<(i64 (fp_to_uint f64:$Rn)), (FCVTZUxd $Rn)>; -} - -multiclass A64I_inttofp { - def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>; - def CVTFsx : A64I_fpintI<0b1, 0b00, 0b00, {0, 1, o0}, FPR32, GPR64, asmop>; - def CVTFdw : A64I_fpintI<0b0, 0b01, 0b00, {0, 1, o0}, FPR64, GPR32, asmop>; - def CVTFdx : A64I_fpintI<0b1, 0b01, 0b00, {0, 1, o0}, FPR64, GPR64, asmop>; -} - -defm S : A64I_inttofp<0b0, "scvtf">; -defm U : A64I_inttofp<0b1, "ucvtf">; - -let Predicates = [HasFPARMv8] in { -def : Pat<(f32 (sint_to_fp i32:$Rn)), (SCVTFsw $Rn)>; -def : Pat<(f32 (sint_to_fp i64:$Rn)), (SCVTFsx $Rn)>; -def : Pat<(f64 (sint_to_fp i32:$Rn)), (SCVTFdw $Rn)>; -def : Pat<(f64 (sint_to_fp i64:$Rn)), (SCVTFdx $Rn)>; -def : Pat<(f32 (uint_to_fp i32:$Rn)), (UCVTFsw $Rn)>; -def : Pat<(f32 (uint_to_fp i64:$Rn)), (UCVTFsx $Rn)>; -def : Pat<(f64 (uint_to_fp i32:$Rn)), (UCVTFdw $Rn)>; -def : Pat<(f64 (uint_to_fp i64:$Rn)), (UCVTFdx $Rn)>; -} - -def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">; -def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">; -def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">; -def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">; - -let Predicates = [HasFPARMv8] in { -def : Pat<(i32 (bitconvert f32:$Rn)), (FMOVws $Rn)>; -def : Pat<(f32 (bitconvert i32:$Rn)), (FMOVsw $Rn)>; -def : Pat<(i64 (bitconvert f64:$Rn)), (FMOVxd $Rn)>; -def : Pat<(f64 (bitconvert i64:$Rn)), (FMOVdx $Rn)>; -} - -def lane1_asmoperand : AsmOperandClass { - let Name = "Lane1"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "Lane1"; -} - -def lane1 : Operand { - let ParserMatchClass = lane1_asmoperand; - let PrintMethod = "printBareImmOperand"; -} - -let DecoderMethod = "DecodeFMOVLaneInstruction" in { - def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110, - (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane), - "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111, - (outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane), - "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -let Predicates = [HasFPARMv8] in { -def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]", - (FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>; - -def : InstAlias<"fmov $Rd.2d[$Lane], $Rn", - (FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>; -} +defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", int_aarch64_neon_abs>; +defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>; +defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>; +defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>; +defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>; +defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>; +defm FCMEQ : SIMDCmpTwoScalarSD<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>; +defm FCMGE : SIMDCmpTwoScalarSD<1, 1, 0b01100, "fcmge", AArch64fcmgez>; +defm FCMGT : SIMDCmpTwoScalarSD<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; +defm FCMLE : SIMDCmpTwoScalarSD<1, 1, 0b01101, "fcmle", AArch64fcmlez>; +defm FCMLT : SIMDCmpTwoScalarSD<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; +defm FCVTAS : SIMDTwoScalarSD< 0, 0, 0b11100, "fcvtas">; +defm FCVTAU : SIMDTwoScalarSD< 1, 0, 0b11100, "fcvtau">; +defm FCVTMS : SIMDTwoScalarSD< 0, 0, 0b11011, "fcvtms">; +defm FCVTMU : SIMDTwoScalarSD< 1, 0, 0b11011, "fcvtmu">; +defm FCVTNS : SIMDTwoScalarSD< 0, 0, 0b11010, "fcvtns">; +defm FCVTNU : SIMDTwoScalarSD< 1, 0, 0b11010, "fcvtnu">; +defm FCVTPS : SIMDTwoScalarSD< 0, 1, 0b11010, "fcvtps">; +defm FCVTPU : SIMDTwoScalarSD< 1, 1, 0b11010, "fcvtpu">; +def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; +defm FCVTZS : SIMDTwoScalarSD< 0, 1, 0b11011, "fcvtzs">; +defm FCVTZU : SIMDTwoScalarSD< 1, 1, 0b11011, "fcvtzu">; +defm FRECPE : SIMDTwoScalarSD< 0, 1, 0b11101, "frecpe">; +defm FRECPX : SIMDTwoScalarSD< 0, 1, 0b11111, "frecpx">; +defm FRSQRTE : SIMDTwoScalarSD< 1, 1, 0b11101, "frsqrte">; +defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", + UnOpFrag<(sub immAllZerosV, node:$LHS)> >; +defm SCVTF : SIMDTwoScalarCVTSD< 0, 0, 0b11101, "scvtf", AArch64sitof>; +defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; +defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; +defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; +defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; +defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", + int_aarch64_neon_suqadd>; +defm UCVTF : SIMDTwoScalarCVTSD< 1, 0, 0b11101, "ucvtf", AArch64uitof>; +defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; +defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", + int_aarch64_neon_usqadd>; + +def : Pat<(AArch64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>; + +def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))), + (FCVTASv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))), + (FCVTAUv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))), + (FCVTMSv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))), + (FCVTMUv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))), + (FCVTNSv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))), + (FCVTNUv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))), + (FCVTPSv1i64 FPR64:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))), + (FCVTPUv1i64 FPR64:$Rn)>; + +def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))), + (FRECPEv1i32 FPR32:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))), + (FRECPEv1i64 FPR64:$Rn)>; +def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))), + (FRECPEv1i64 FPR64:$Rn)>; + +def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))), + (FRECPXv1i32 FPR32:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))), + (FRECPXv1i64 FPR64:$Rn)>; + +def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))), + (FRSQRTEv1i32 FPR32:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))), + (FRSQRTEv1i64 FPR64:$Rn)>; +def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))), + (FRSQRTEv1i64 FPR64:$Rn)>; + +// If an integer is about to be converted to a floating point value, +// just load it on the floating point unit. +// Here are the patterns for 8 and 16-bits to float. +// 8-bits -> float. +multiclass UIntToFPROLoadPat { + def : Pat<(DstTy (uint_to_fp (SrcTy + (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, + ro.Wext:$extend))))), + (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), + (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend), + sub))>; + + def : Pat<(DstTy (uint_to_fp (SrcTy + (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, + ro.Wext:$extend))))), + (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)), + (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend), + sub))>; +} + +defm : UIntToFPROLoadPat; +def : Pat <(f32 (uint_to_fp (i32 + (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), + (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; +def : Pat <(f32 (uint_to_fp (i32 + (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), + (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), + (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; +// 16-bits -> float. +defm : UIntToFPROLoadPat; +def : Pat <(f32 (uint_to_fp (i32 + (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; +def : Pat <(f32 (uint_to_fp (i32 + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), + (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), + (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; +// 32-bits are handled in target specific dag combine: +// performIntToFpCombine. +// 64-bits integer to 32-bits floating point, not possible with +// UCVTF on floating point registers (both source and destination +// must have the same size). + +// Here are the patterns for 8, 16, 32, and 64-bits to double. +// 8-bits -> double. +defm : UIntToFPROLoadPat; +def : Pat <(f64 (uint_to_fp (i32 + (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>; +def : Pat <(f64 (uint_to_fp (i32 + (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>; +// 16-bits -> double. +defm : UIntToFPROLoadPat; +def : Pat <(f64 (uint_to_fp (i32 + (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>; +def : Pat <(f64 (uint_to_fp (i32 + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>; +// 32-bits -> double. +defm : UIntToFPROLoadPat; +def : Pat <(f64 (uint_to_fp (i32 + (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>; +def : Pat <(f64 (uint_to_fp (i32 + (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))), + (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>; +// 64-bits -> double are handled in target specific dag combine: +// performIntToFpCombine. //===----------------------------------------------------------------------===// -// Floating-point immediate instructions +// Advanced SIMD three different-sized vector instructions. //===----------------------------------------------------------------------===// -// Contains: FMOV -def fpimm_asmoperand : AsmOperandClass { - let Name = "FMOVImm"; - let ParserMethod = "ParseFPImmOperand"; - let DiagnosticType = "FPImm"; -} - -// The MCOperand for these instructions are the encoded 8-bit values. -def SDXF_fpimm : SDNodeXFormgetValueAPF(), Imm8); - return CurDAG->getTargetConstant(Imm8, MVT::i32); +defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>; +defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>; +defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>; +defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>; +defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>; +defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", + int_aarch64_neon_sabd>; +defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", + int_aarch64_neon_sabd>; +defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", + BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; +defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", + BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; +defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; +defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", + TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; +defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>; +defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", + int_aarch64_neon_sqadd>; +defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", + int_aarch64_neon_sqsub>; +defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", + int_aarch64_neon_sqdmull>; +defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", + BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; +defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", + BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; +defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", + int_aarch64_neon_uabd>; +defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", + int_aarch64_neon_uabd>; +defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", + BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>; +defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", + BinOpFrag<(add node:$LHS, (zext node:$RHS))>>; +defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; +defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", + TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; +defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>; +defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", + BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>; +defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", + BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>; + +// Patterns for 64-bit pmull +def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm), + (PMULLv1i64 V64:$Rn, V64:$Rm)>; +def : Pat<(int_aarch64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)), + (vector_extract (v2i64 V128:$Rm), (i64 1))), + (PMULLv2i64 V128:$Rn, V128:$Rm)>; + +// CodeGen patterns for addhn and subhn instructions, which can actually be +// written in LLVM IR without too much difficulty. + +// ADDHN +def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), + (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), + (i32 16))))), + (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), + (i32 32))))), + (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v8i8 V64:$Rd), + (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), + (i32 8))))), + (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v4i16 V64:$Rd), + (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm), + (i32 16))))), + (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v2i32 V64:$Rd), + (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm), + (i32 32))))), + (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; + +// SUBHN +def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), + (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; +def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), + (i32 16))))), + (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; +def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), + (i32 32))))), + (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v8i8 V64:$Rd), + (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), + (i32 8))))), + (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v4i16 V64:$Rd), + (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm), + (i32 16))))), + (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; +def : Pat<(concat_vectors (v2i32 V64:$Rd), + (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm), + (i32 32))))), + (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), + V128:$Rn, V128:$Rm)>; + +//---------------------------------------------------------------------------- +// AdvSIMD bitwise extract from vector instruction. +//---------------------------------------------------------------------------- + +defm EXT : SIMDBitwiseExtract<"ext">; + +def : Pat<(v4i16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), + (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; +def : Pat<(v8i16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), + (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; +def : Pat<(v2i32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), + (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; +def : Pat<(v2f32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), + (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; +def : Pat<(v4i32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), + (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; +def : Pat<(v4f32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), + (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; +def : Pat<(v2i64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), + (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; +def : Pat<(v2f64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), + (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; + +// We use EXT to handle extract_subvector to copy the upper 64-bits of a +// 128-bit vector. +def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 8))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; +def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 4))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; +def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; +def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; +def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; +def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))), + (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; + + +//---------------------------------------------------------------------------- +// AdvSIMD zip vector +//---------------------------------------------------------------------------- + +defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>; +defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>; +defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>; +defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>; +defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>; +defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>; + +//---------------------------------------------------------------------------- +// AdvSIMD TBL/TBX instructions +//---------------------------------------------------------------------------- + +defm TBL : SIMDTableLookup< 0, "tbl">; +defm TBX : SIMDTableLookupTied<1, "tbx">; + +def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), + (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; +def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), + (TBLv16i8One V128:$Ri, V128:$Rn)>; + +def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd), + (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), + (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; +def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd), + (v16i8 V128:$Ri), (v16i8 V128:$Rn))), + (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; + + +//---------------------------------------------------------------------------- +// AdvSIMD scalar CPY instruction +//---------------------------------------------------------------------------- + +defm CPY : SIMDScalarCPY<"cpy">; + +//---------------------------------------------------------------------------- +// AdvSIMD scalar pairwise instructions +//---------------------------------------------------------------------------- + +defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; +defm FADDP : SIMDPairwiseScalarSD<1, 0, 0b01101, "faddp">; +defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">; +defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">; +defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">; +defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">; +def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))), + (ADDPv2i64p V128:$Rn)>; +def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))), + (ADDPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))), + (FADDPv2i32p V64:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))), + (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; +def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))), + (FADDPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))), + (FMAXNMPv2i32p V64:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))), + (FMAXNMPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))), + (FMAXPv2i32p V64:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))), + (FMAXPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))), + (FMINNMPv2i32p V64:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))), + (FMINNMPv2i64p V128:$Rn)>; +def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))), + (FMINPv2i32p V64:$Rn)>; +def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))), + (FMINPv2i64p V128:$Rn)>; + +//---------------------------------------------------------------------------- +// AdvSIMD INS/DUP instructions +//---------------------------------------------------------------------------- + +def DUPv8i8gpr : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>; +def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>; +def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>; +def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>; +def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>; +def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>; +def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>; + +def DUPv2i64lane : SIMDDup64FromElement; +def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; +def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>; +def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>; +def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; +def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; +def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; + +def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))), + (v2f32 (DUPv2i32lane + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), + (i64 0)))>; +def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))), + (v4f32 (DUPv4i32lane + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), + (i64 0)))>; +def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))), + (v2f64 (DUPv2i64lane + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), + (i64 0)))>; + +def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), + (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; +def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), + (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; +def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), + (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; + +// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane +// instruction even if the types don't match: we just have to remap the lane +// carefully. N.b. this trick only applies to truncations. +def VecIndex_x2 : SDNodeXFormgetTargetConstant(2 * N->getZExtValue(), MVT::i64); }]>; - -class fmov_operand - : Operand, - PatLeaf<(FT fpimm), [{ return A64Imms::isFPImm(N->getValueAPF()); }], - SDXF_fpimm> { - let PrintMethod = "printFPImmOperand"; - let ParserMatchClass = fpimm_asmoperand; -} - -def fmov32_operand : fmov_operand; -def fmov64_operand : fmov_operand; - -class A64I_fpimm_impl type, RegisterClass Reg, ValueType VT, - Operand fmov_operand> - : A64I_fpimm<0b0, 0b0, type, 0b00000, - (outs Reg:$Rd), - (ins fmov_operand:$Imm8), - "fmov\t$Rd, $Imm8", - [(set VT:$Rd, fmov_operand:$Imm8)], - NoItinerary>, - Sched<[WriteFPALU]>; - -def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>; -def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>; - -//===----------------------------------------------------------------------===// -// Load-register (literal) instructions -//===----------------------------------------------------------------------===// -// Contains: LDR, LDRSW, PRFM - -def ldrlit_label_asmoperand : AsmOperandClass { - let Name = "LoadLitLabel"; - let RenderMethod = "addLabelOperands<19, 4>"; - let DiagnosticType = "Label"; -} - -def ldrlit_label : Operand { - let EncoderMethod = "getLoadLitLabelOpValue"; - - // This label is a 19-bit offset from PC, scaled by the instruction-width: 4. - let PrintMethod = "printLabelOperand<19, 4>"; - let ParserMatchClass = ldrlit_label_asmoperand; - let OperandType = "OPERAND_PCREL"; -} - -// Various instructions take an immediate value (which can always be used), -// where some numbers have a symbolic name to make things easier. These operands -// and the associated functions abstract away the differences. -multiclass namedimm { - def _asmoperand : AsmOperandClass { - let Name = "NamedImm" # prefix; - let PredicateMethod = "isUImm"; - let RenderMethod = "addImmOperands"; - let ParserMethod = "ParseNamedImmOperand<" # mapper # ">"; - let DiagnosticType = "NamedImm_" # prefix; - } - - def _op : Operand { - let ParserMatchClass = !cast(prefix # "_asmoperand"); - let PrintMethod = "printNamedImmOperand<" # mapper # ">"; - let DecoderMethod = "DecodeNamedImmOperand<" # mapper # ">"; - } -} - -defm prefetch : namedimm<"prefetch", "A64PRFM::PRFMMapper">; - -class A64I_LDRlitSimple opc, bit v, RegisterClass OutReg, - list patterns = []> - : A64I_LDRlit, - Sched<[WriteLd]>; - -let mayLoad = 1 in { - def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>; - def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>; -} - -let Predicates = [HasFPARMv8] in { -def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32>; -def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64>; -} - -let mayLoad = 1 in { - let Predicates = [HasFPARMv8] in { - def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>; - } - - def LDRSWx_lit : A64I_LDRlit<0b10, 0b0, - (outs GPR64:$Rt), - (ins ldrlit_label:$Imm19), - "ldrsw\t$Rt, $Imm19", - [], NoItinerary>, - Sched<[WriteLd]>; - - def PRFM_lit : A64I_LDRlit<0b11, 0b0, - (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19), - "prfm\t$Rt, $Imm19", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]>; -} - -//===----------------------------------------------------------------------===// -// Load-store exclusive instructions -//===----------------------------------------------------------------------===// -// Contains: STXRB, STXRH, STXR, LDXRB, LDXRH, LDXR. STXP, LDXP, STLXRB, -// STLXRH, STLXR, LDAXRB, LDAXRH, LDAXR, STLXP, LDAXP, STLRB, -// STLRH, STLR, LDARB, LDARH, LDAR - -// Since these instructions have the undefined register bits set to 1 in -// their canonical form, we need a post encoder method to set those bits -// to 1 when encoding these instructions. We do this using the -// fixLoadStoreExclusive function. This function has template parameters: -// -// fixLoadStoreExclusive -// -// hasRs indicates that the instruction uses the Rs field, so we won't set -// it to 1 (and the same for Rt2). We don't need template parameters for -// the other register fiels since Rt and Rn are always used. - -// This operand parses a GPR64xsp register, followed by an optional immediate -// #0. -def GPR64xsp0_asmoperand : AsmOperandClass { - let Name = "GPR64xsp0"; - let PredicateMethod = "isWrappedReg"; - let RenderMethod = "addRegOperands"; - let ParserMethod = "ParseLSXAddressOperand"; - // Diagnostics are provided by ParserMethod -} - -def GPR64xsp0 : RegisterOperand { - let ParserMatchClass = GPR64xsp0_asmoperand; -} - -//===---------------------------------- -// Store-exclusive (releasing & normal) -//===---------------------------------- - -class A64I_SRexs_impl size, bits<3> opcode, string asm, dag outs, - dag ins, list pat, - InstrItinClass itin> : - A64I_LDSTex_stn { - let mayStore = 1; - let PostEncoderMethod = "fixLoadStoreExclusive<1,0>"; - let Constraints = "@earlyclobber $Rs"; -} - -multiclass A64I_SRex opcode, string prefix> { - def _byte: A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"), - (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _hword: A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"), - (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [],NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _word: A64I_SRexs_impl<0b10, opcode, asmstr, - (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _dword: A64I_SRexs_impl<0b11, opcode, asmstr, - (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; -} - -defm STXR : A64I_SRex<"stxr", 0b000, "STXR">; -defm STLXR : A64I_SRex<"stlxr", 0b001, "STLXR">; - -//===---------------------------------- -// Loads -//===---------------------------------- - -class A64I_LRexs_impl size, bits<3> opcode, string asm, dag outs, - dag ins, list pat, - InstrItinClass itin> : - A64I_LDSTex_tn { - let mayLoad = 1; - let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; -} - -multiclass A64I_LRex opcode> { - def _byte: A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"), - (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd]>; - - def _hword: A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"), - (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd]>; - - def _word: A64I_LRexs_impl<0b10, opcode, asmstr, - (outs GPR32:$Rt), (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd]>; - - def _dword: A64I_LRexs_impl<0b11, opcode, asmstr, - (outs GPR64:$Rt), (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd]>; -} - -defm LDXR : A64I_LRex<"ldxr", 0b000>; -defm LDAXR : A64I_LRex<"ldaxr", 0b001>; -defm LDAR : A64I_LRex<"ldar", 0b101>; - -class acquiring_load - : PatFrag<(ops node:$ptr), (base node:$ptr), [{ - AtomicOrdering Ordering = cast(N)->getOrdering(); - return Ordering == Acquire || Ordering == SequentiallyConsistent; +def VecIndex_x4 : SDNodeXFormgetTargetConstant(4 * N->getZExtValue(), MVT::i64); }]>; - -def atomic_load_acquire_8 : acquiring_load; -def atomic_load_acquire_16 : acquiring_load; -def atomic_load_acquire_32 : acquiring_load; -def atomic_load_acquire_64 : acquiring_load; - -def : Pat<(atomic_load_acquire_8 i64:$Rn), (LDAR_byte $Rn)>; -def : Pat<(atomic_load_acquire_16 i64:$Rn), (LDAR_hword $Rn)>; -def : Pat<(atomic_load_acquire_32 i64:$Rn), (LDAR_word $Rn)>; -def : Pat<(atomic_load_acquire_64 i64:$Rn), (LDAR_dword $Rn)>; - -//===---------------------------------- -// Store-release (no exclusivity) -//===---------------------------------- - -class A64I_SLexs_impl size, bits<3> opcode, string asm, dag outs, - dag ins, list pat, - InstrItinClass itin> : - A64I_LDSTex_tn { - let mayStore = 1; - let PostEncoderMethod = "fixLoadStoreExclusive<0,0>"; -} - -class releasing_store - : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ - AtomicOrdering Ordering = cast(N)->getOrdering(); - return Ordering == Release || Ordering == SequentiallyConsistent; +def VecIndex_x8 : SDNodeXFormgetTargetConstant(8 * N->getZExtValue(), MVT::i64); }]>; -def atomic_store_release_8 : releasing_store; -def atomic_store_release_16 : releasing_store; -def atomic_store_release_32 : releasing_store; -def atomic_store_release_64 : releasing_store; - -multiclass A64I_SLex opcode, string prefix> { - def _byte: A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"), - (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [(atomic_store_release_8 i64:$Rn, i32:$Rt)], - NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _hword: A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"), - (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [(atomic_store_release_16 i64:$Rn, i32:$Rt)], - NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _word: A64I_SLexs_impl<0b10, opcode, asmstr, - (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn), - [(atomic_store_release_32 i64:$Rn, i32:$Rt)], - NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; - - def _dword: A64I_SLexs_impl<0b11, opcode, asmstr, - (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn), - [(atomic_store_release_64 i64:$Rn, i64:$Rt)], - NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt]>; -} - -defm STLR : A64I_SLex<"stlr", 0b101, "STLR">; - -//===---------------------------------- -// Store-exclusive pair (releasing & normal) -//===---------------------------------- - -class A64I_SPexs_impl size, bits<3> opcode, string asm, dag outs, - dag ins, list pat, - InstrItinClass itin> : - A64I_LDSTex_stt2n { - let mayStore = 1; -} - - -multiclass A64I_SPex opcode> { - def _word: A64I_SPexs_impl<0b10, opcode, asmstr, (outs), - (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2, - GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; - - def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs), - (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2, - GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; -} - -defm STXP : A64I_SPex<"stxp", 0b010>; -defm STLXP : A64I_SPex<"stlxp", 0b011>; - -//===---------------------------------- -// Load-exclusive pair (acquiring & normal) -//===---------------------------------- - -class A64I_LPexs_impl size, bits<3> opcode, string asm, dag outs, - dag ins, list pat, - InstrItinClass itin> : - A64I_LDSTex_tt2n { - let mayLoad = 1; - let DecoderMethod = "DecodeLoadPairExclusiveInstruction"; - let PostEncoderMethod = "fixLoadStoreExclusive<0,1>"; -} - -multiclass A64I_LPex opcode> { - def _word: A64I_LPexs_impl<0b10, opcode, asmstr, - (outs GPR32:$Rt, GPR32:$Rt2), - (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]>; - - def _dword: A64I_LPexs_impl<0b11, opcode, asmstr, - (outs GPR64:$Rt, GPR64:$Rt2), - (ins GPR64xsp0:$Rn), - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]>; -} - -defm LDXP : A64I_LPex<"ldxp", 0b010>; -defm LDAXP : A64I_LPex<"ldaxp", 0b011>; +multiclass DUPWithTruncPats { + def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn), + imm:$idx)))), + (DUP V128:$Rn, (IdxXFORM imm:$idx))>; + + def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn), + imm:$idx)))), + (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; +} + +defm : DUPWithTruncPats; +defm : DUPWithTruncPats; +defm : DUPWithTruncPats; + +defm : DUPWithTruncPats; +defm : DUPWithTruncPats; +defm : DUPWithTruncPats; + +multiclass DUPWithTrunci64Pats { + def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v2i64 V128:$Rn), + imm:$idx))))), + (DUP V128:$Rn, (IdxXFORM imm:$idx))>; + + def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v1i64 V64:$Rn), + imm:$idx))))), + (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>; +} + +defm : DUPWithTrunci64Pats; +defm : DUPWithTrunci64Pats; +defm : DUPWithTrunci64Pats; + +defm : DUPWithTrunci64Pats; +defm : DUPWithTrunci64Pats; +defm : DUPWithTrunci64Pats; + +// SMOV and UMOV definitions, with some extra patterns for convenience +defm SMOV : SMov; +defm UMOV : UMov; + +def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), + (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>; +def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), + (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; +def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), + (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; +def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), + (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; +def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), + (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; +def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), + (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; + +// Extracting i8 or i16 elements will have the zero-extend transformed to +// an 'and' mask by type legalization since neither i8 nor i16 are legal types +// for AArch64. Match these patterns here since UMOV already zeroes out the high +// bits of the destination register. +def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), + (i32 0xff)), + (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>; +def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), + (i32 0xffff)), + (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; + +defm INS : SIMDIns; + +def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), + (SUBREG_TO_REG (i32 0), + (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; +def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), + (SUBREG_TO_REG (i32 0), + (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; + +def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), + (SUBREG_TO_REG (i32 0), + (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; +def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), + (SUBREG_TO_REG (i32 0), + (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; + +def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), + (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), + (i32 FPR32:$Rn), ssub))>; +def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))), + (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), + (i32 FPR32:$Rn), ssub))>; +def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), + (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), + (i64 FPR64:$Rn), dsub))>; + +def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; +def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; +def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>; + +def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), + (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), + (EXTRACT_SUBREG + (INSvi32lane + (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), + VectorIndexS:$imm, + (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), + (i64 0)), + dsub)>; +def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn), + (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), + (INSvi32lane + V128:$Rn, VectorIndexS:$imm, + (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), + (i64 0))>; +def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), + (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))), + (INSvi64lane + V128:$Rn, VectorIndexD:$imm, + (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)), + (i64 0))>; + +// Copy an element at a constant index in one vector into a constant indexed +// element of another. +// FIXME refactor to a shared class/dev parameterized on vector type, vector +// index type and INS extension +def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane + (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), + VectorIndexB:$idx2)), + (v16i8 (INSvi8lane + V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) + )>; +def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane + (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), + VectorIndexH:$idx2)), + (v8i16 (INSvi16lane + V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) + )>; +def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane + (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), + VectorIndexS:$idx2)), + (v4i32 (INSvi32lane + V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) + )>; +def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane + (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), + VectorIndexD:$idx2)), + (v2i64 (INSvi64lane + V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2) + )>; + +multiclass Neon_INS_elt_pattern { + def : Pat<(VT128 (vector_insert V128:$src, + (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), + imm:$Immd)), + (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>; + + def : Pat<(VT128 (vector_insert V128:$src, + (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), + imm:$Immd)), + (INS V128:$src, imm:$Immd, + (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>; + + def : Pat<(VT64 (vector_insert V64:$src, + (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)), + imm:$Immd)), + (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), + imm:$Immd, V128:$Rn, imm:$Immn), + dsub)>; + + def : Pat<(VT64 (vector_insert V64:$src, + (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)), + imm:$Immd)), + (EXTRACT_SUBREG + (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd, + (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn), + dsub)>; +} + +defm : Neon_INS_elt_pattern; +defm : Neon_INS_elt_pattern; +defm : Neon_INS_elt_pattern; +defm : Neon_INS_elt_pattern; +defm : Neon_INS_elt_pattern; +defm : Neon_INS_elt_pattern; + + +// Floating point vector extractions are codegen'd as either a sequence of +// subregister extractions, possibly fed by an INS if the lane number is +// anything other than zero. +def : Pat<(vector_extract (v2f64 V128:$Rn), 0), + (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; +def : Pat<(vector_extract (v4f32 V128:$Rn), 0), + (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; +def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), + (f64 (EXTRACT_SUBREG + (INSvi64lane (v2f64 (IMPLICIT_DEF)), 0, + V128:$Rn, VectorIndexD:$idx), + dsub))>; +def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), + (f32 (EXTRACT_SUBREG + (INSvi32lane (v4f32 (IMPLICIT_DEF)), 0, + V128:$Rn, VectorIndexS:$idx), + ssub))>; + +// All concat_vectors operations are canonicalised to act on i64 vectors for +// AArch64. In the general case we need an instruction, which had just as well be +// INS. +class ConcatPat + : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), + (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; + +def : ConcatPat; +def : ConcatPat; +def : ConcatPat; +def : ConcatPat; +def : ConcatPat; +def : ConcatPat; + +// If the high lanes are undef, though, we can just ignore them: +class ConcatUndefPat + : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; + +def : ConcatUndefPat; +def : ConcatUndefPat; +def : ConcatUndefPat; +def : ConcatUndefPat; +def : ConcatUndefPat; +def : ConcatUndefPat; + +//---------------------------------------------------------------------------- +// AdvSIMD across lanes instructions +//---------------------------------------------------------------------------- + +defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">; +defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">; +defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">; +defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; +defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; +defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; +defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; +defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>; +defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; +defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; +defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>; + +multiclass SIMDAcrossLanesSignedIntrinsic { +// If there is a sign extension after this intrinsic, consume it as smov already +// performed it + def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)), + (i32 (SMOVvi8to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), + (i64 0)))>; + def : Pat<(i32 (intOp (v8i8 V64:$Rn))), + (i32 (SMOVvi8to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), + (i64 0)))>; +// If there is a sign extension after this intrinsic, consume it as smov already +// performed it +def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)), + (i32 (SMOVvi8to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), + (i64 0)))>; +def : Pat<(i32 (intOp (v16i8 V128:$Rn))), + (i32 (SMOVvi8to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), + (i64 0)))>; +// If there is a sign extension after this intrinsic, consume it as smov already +// performed it +def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)), + (i32 (SMOVvi16to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), + (i64 0)))>; +def : Pat<(i32 (intOp (v4i16 V64:$Rn))), + (i32 (SMOVvi16to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), + (i64 0)))>; +// If there is a sign extension after this intrinsic, consume it as smov already +// performed it +def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)), + (i32 (SMOVvi16to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), + (i64 0)))>; +def : Pat<(i32 (intOp (v8i16 V128:$Rn))), + (i32 (SMOVvi16to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), + (i64 0)))>; + +def : Pat<(i32 (intOp (v4i32 V128:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), + ssub))>; +} + +multiclass SIMDAcrossLanesUnsignedIntrinsic { +// If there is a masking operation keeping only what has been actually +// generated, consume it. + def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), + ssub))>; + def : Pat<(i32 (intOp (v8i8 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), + ssub))>; +// If there is a masking operation keeping only what has been actually +// generated, consume it. +def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), + ssub))>; +def : Pat<(i32 (intOp (v16i8 V128:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), + ssub))>; + +// If there is a masking operation keeping only what has been actually +// generated, consume it. +def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), + ssub))>; +def : Pat<(i32 (intOp (v4i16 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), + ssub))>; +// If there is a masking operation keeping only what has been actually +// generated, consume it. +def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), + ssub))>; +def : Pat<(i32 (intOp (v8i16 V128:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), + ssub))>; + +def : Pat<(i32 (intOp (v4i32 V128:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), + ssub))>; + +} + +multiclass SIMDAcrossLanesSignedLongIntrinsic { + def : Pat<(i32 (intOp (v8i8 V64:$Rn))), + (i32 (SMOVvi16to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), + (i64 0)))>; +def : Pat<(i32 (intOp (v16i8 V128:$Rn))), + (i32 (SMOVvi16to32 + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), + (i64 0)))>; + +def : Pat<(i32 (intOp (v4i16 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), + ssub))>; +def : Pat<(i32 (intOp (v8i16 V128:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), + ssub))>; + +def : Pat<(i64 (intOp (v4i32 V128:$Rn))), + (i64 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), + dsub))>; +} + +multiclass SIMDAcrossLanesUnsignedLongIntrinsic { + def : Pat<(i32 (intOp (v8i8 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), + ssub))>; +def : Pat<(i32 (intOp (v16i8 V128:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), + ssub))>; + +def : Pat<(i32 (intOp (v4i16 V64:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), + ssub))>; +def : Pat<(i32 (intOp (v8i16 V128:$Rn))), + (i32 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), + ssub))>; + +def : Pat<(i64 (intOp (v4i32 V128:$Rn))), + (i64 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), + dsub))>; +} + +defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", int_aarch64_neon_saddv>; +// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm +def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))), + (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; + +defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", int_aarch64_neon_uaddv>; +// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm +def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))), + (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; + +defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_aarch64_neon_smaxv>; +def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))), + (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; + +defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_aarch64_neon_sminv>; +def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))), + (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; + +defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_aarch64_neon_umaxv>; +def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))), + (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; + +defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_aarch64_neon_uminv>; +def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))), + (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; + +defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>; +defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>; + +// The vaddlv_s32 intrinsic gets mapped to SADDLP. +def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))), + (i64 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (SADDLPv2i32_v1i64 V64:$Rn), dsub), + dsub))>; +// The vaddlv_u32 intrinsic gets mapped to UADDLP. +def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))), + (i64 (EXTRACT_SUBREG + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), + (UADDLPv2i32_v1i64 V64:$Rn), dsub), + dsub))>; + +//------------------------------------------------------------------------------ +// AdvSIMD modified immediate instructions +//------------------------------------------------------------------------------ + +// AdvSIMD BIC +defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>; +// AdvSIMD ORR +defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>; + +def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>; + +def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; + +def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>; +def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>; + +def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; + +// AdvSIMD FMOV +def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8, + "fmov", ".2d", + [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; +def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64, fpimm8, + "fmov", ".2s", + [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; +def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8, + "fmov", ".4s", + [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; + +// AdvSIMD MOVI + +// EDIT byte mask: scalar +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", + [(set FPR64:$Rd, simdimmtype10:$imm8)]>; +// The movi_edit node has the immediate value already encoded, so we use +// a plain imm0_255 here. +def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)), + (MOVID imm0_255:$shift)>; + +def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>; +def : Pat<(v2i32 immAllZerosV), (MOVID (i32 0))>; +def : Pat<(v4i16 immAllZerosV), (MOVID (i32 0))>; +def : Pat<(v8i8 immAllZerosV), (MOVID (i32 0))>; + +def : Pat<(v1i64 immAllOnesV), (MOVID (i32 255))>; +def : Pat<(v2i32 immAllOnesV), (MOVID (i32 255))>; +def : Pat<(v4i16 immAllOnesV), (MOVID (i32 255))>; +def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>; + +// EDIT byte mask: 2d + +// The movi_edit node has the immediate value already encoded, so we use +// a plain imm0_255 in the pattern +let isReMaterializable = 1, isAsCheapAsAMove = 1 in +def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128, + simdimmtype10, + "movi", ".2d", + [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; + + +// Use movi.2d to materialize 0.0 if the HW does zero-cycle zeroing. +// Complexity is added to break a tie with a plain MOVI. +let AddedComplexity = 1 in { +def : Pat<(f32 fpimm0), + (f32 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), ssub))>, + Requires<[HasZCZ]>; +def : Pat<(f64 fpimm0), + (f64 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), dsub))>, + Requires<[HasZCZ]>; +} + +def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>; +def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>; +def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>; +def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>; + +def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>; +def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; +def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; +def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; + +def : Pat<(v2f64 (AArch64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>; +def : Pat<(v4f32 (AArch64dup (f32 fpimm0))), (MOVIv2d_ns (i32 0))>; + +// EDIT per word & halfword: 2s, 4h, 4s, & 8h +defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; + +def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; + +def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; + +def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), + (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), + (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), + (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))), + (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; + +// EDIT per word: 2s & 4s with MSL shifter +def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", + [(set (v2i32 V64:$Rd), + (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; +def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", + [(set (v4i32 V128:$Rd), + (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; + +// Per byte: 8b & 16b +def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64, imm0_255, + "movi", ".8b", + [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; +def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255, + "movi", ".16b", + [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; + +// AdvSIMD MVNI + +// EDIT per word & halfword: 2s, 4h, 4s, & 8h +defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; + +def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; + +def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; +def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; + +def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), + (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), + (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), + (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; +def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), + (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; + +// EDIT per word: 2s & 4s with MSL shifter +def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", + [(set (v2i32 V64:$Rd), + (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; +def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", + [(set (v4i32 V128:$Rd), + (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; + +//---------------------------------------------------------------------------- +// AdvSIMD indexed element +//---------------------------------------------------------------------------- + +let neverHasSideEffects = 1 in { + defm FMLA : SIMDFPIndexedSDTied<0, 0b0001, "fmla">; + defm FMLS : SIMDFPIndexedSDTied<0, 0b0101, "fmls">; +} + +// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the +// instruction expects the addend first, while the intrinsic expects it last. + +// On the other hand, there are quite a few valid combinatorial options due to +// the commutativity of multiplication and the fact that (-x) * y = x * (-y). +defm : SIMDFPIndexedSDTiedPatterns<"FMLA", + TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>; +defm : SIMDFPIndexedSDTiedPatterns<"FMLA", + TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>; + +defm : SIMDFPIndexedSDTiedPatterns<"FMLS", + TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; +defm : SIMDFPIndexedSDTiedPatterns<"FMLS", + TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; +defm : SIMDFPIndexedSDTiedPatterns<"FMLS", + TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; +defm : SIMDFPIndexedSDTiedPatterns<"FMLS", + TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; + +multiclass FMLSIndexedAfterNegPatterns { + // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit + // and DUP scalar. + def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), + (AArch64duplane32 (v4f32 (fneg V128:$Rm)), + VectorIndexS:$idx))), + (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), + (v2f32 (AArch64duplane32 + (v4f32 (insert_subvector undef, + (v2f32 (fneg V64:$Rm)), + (i32 0))), + VectorIndexS:$idx)))), + (FMLSv2i32_indexed V64:$Rd, V64:$Rn, + (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), + VectorIndexS:$idx)>; + def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), + (AArch64dup (f32 (fneg FPR32Op:$Rm))))), + (FMLSv2i32_indexed V64:$Rd, V64:$Rn, + (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; + + // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit + // and DUP scalar. + def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), + (AArch64duplane32 (v4f32 (fneg V128:$Rm)), + VectorIndexS:$idx))), + (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, + VectorIndexS:$idx)>; + def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), + (v4f32 (AArch64duplane32 + (v4f32 (insert_subvector undef, + (v2f32 (fneg V64:$Rm)), + (i32 0))), + VectorIndexS:$idx)))), + (FMLSv4i32_indexed V128:$Rd, V128:$Rn, + (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), + VectorIndexS:$idx)>; + def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), + (AArch64dup (f32 (fneg FPR32Op:$Rm))))), + (FMLSv4i32_indexed V128:$Rd, V128:$Rn, + (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; + + // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar + // (DUPLANE from 64-bit would be trivial). + def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), + (AArch64duplane64 (v2f64 (fneg V128:$Rm)), + VectorIndexD:$idx))), + (FMLSv2i64_indexed + V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), + (AArch64dup (f64 (fneg FPR64Op:$Rm))))), + (FMLSv2i64_indexed V128:$Rd, V128:$Rn, + (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; + + // 2 variants for 32-bit scalar version: extract from .2s or from .4s + def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), + (vector_extract (v4f32 (fneg V128:$Rm)), + VectorIndexS:$idx))), + (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, + V128:$Rm, VectorIndexS:$idx)>; + def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), + (vector_extract (v2f32 (fneg V64:$Rm)), + VectorIndexS:$idx))), + (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, + (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; + + // 1 variant for 64-bit scalar version: extract from .1d or from .2d + def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), + (vector_extract (v2f64 (fneg V128:$Rm)), + VectorIndexS:$idx))), + (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn, + V128:$Rm, VectorIndexS:$idx)>; +} + +defm : FMLSIndexedAfterNegPatterns< + TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; +defm : FMLSIndexedAfterNegPatterns< + TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >; + +defm FMULX : SIMDFPIndexedSD<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>; +defm FMUL : SIMDFPIndexedSD<0, 0b1001, "fmul", fmul>; + +def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))), + (FMULv2i32_indexed V64:$Rn, + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), + (i64 0))>; +def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))), + (FMULv4i32_indexed V128:$Rn, + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), + (i64 0))>; +def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))), + (FMULv2i64_indexed V128:$Rn, + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), + (i64 0))>; + +defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; +defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; +defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", + TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>; +defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", + TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>; +defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; +defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; +defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", + TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>; +defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", + int_aarch64_neon_smull>; +defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", + int_aarch64_neon_sqadd>; +defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", + int_aarch64_neon_sqsub>; +defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>; +defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", + TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; +defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", + TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>; +defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", + int_aarch64_neon_umull>; + +// A scalar sqdmull with the second operand being a vector lane can be +// handled directly with the indexed instruction encoding. +def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), + (vector_extract (v4i32 V128:$Vm), + VectorIndexS:$idx)), + (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; + +//---------------------------------------------------------------------------- +// AdvSIMD scalar shift instructions +//---------------------------------------------------------------------------- +defm FCVTZS : SIMDScalarRShiftSD<0, 0b11111, "fcvtzs">; +defm FCVTZU : SIMDScalarRShiftSD<1, 0b11111, "fcvtzu">; +defm SCVTF : SIMDScalarRShiftSD<0, 0b11100, "scvtf">; +defm UCVTF : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">; +// Codegen patterns for the above. We don't put these directly on the +// instructions because TableGen's type inference can't handle the truth. +// Having the same base pattern for fp <--> int totally freaks it out. +def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), + (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; +def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), + (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; +def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), + (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), + (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), + vecshiftR64:$imm)), + (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), + vecshiftR64:$imm)), + (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), + (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; +def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), + (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; +def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), + (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), + (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), + vecshiftR64:$imm)), + (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), + vecshiftR64:$imm)), + (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; + +defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; +defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; +defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", + int_aarch64_neon_sqrshrn>; +defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", + int_aarch64_neon_sqrshrun>; +defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; +defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; +defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", + int_aarch64_neon_sqshrn>; +defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", + int_aarch64_neon_sqshrun>; +defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; +defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>; +defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", + TriOpFrag<(add node:$LHS, + (AArch64srshri node:$MHS, node:$RHS))>>; +defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>; +defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", + TriOpFrag<(add node:$LHS, + (AArch64vashr node:$MHS, node:$RHS))>>; +defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", + int_aarch64_neon_uqrshrn>; +defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; +defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", + int_aarch64_neon_uqshrn>; +defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>; +defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", + TriOpFrag<(add node:$LHS, + (AArch64urshri node:$MHS, node:$RHS))>>; +defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>; +defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", + TriOpFrag<(add node:$LHS, + (AArch64vlshr node:$MHS, node:$RHS))>>; + +//---------------------------------------------------------------------------- +// AdvSIMD vector shift instructions +//---------------------------------------------------------------------------- +defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; +defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; +defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf", + int_aarch64_neon_vcvtfxs2fp>; +defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", + int_aarch64_neon_rshrn>; +defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>; +defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", + BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>; +defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>; +def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), + (i32 vecshiftL64:$imm))), + (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; +defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", + int_aarch64_neon_sqrshrn>; +defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", + int_aarch64_neon_sqrshrun>; +defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>; +defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>; +defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", + int_aarch64_neon_sqshrn>; +defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", + int_aarch64_neon_sqshrun>; +defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>; +def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), + (i32 vecshiftR64:$imm))), + (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; +defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>; +defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", + TriOpFrag<(add node:$LHS, + (AArch64srshri node:$MHS, node:$RHS))> >; +defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", + BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>; + +defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; +defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", + TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; +defm UCVTF : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf", + int_aarch64_neon_vcvtfxu2fp>; +defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", + int_aarch64_neon_uqrshrn>; +defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>; +defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", + int_aarch64_neon_uqshrn>; +defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>; +defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", + TriOpFrag<(add node:$LHS, + (AArch64urshri node:$MHS, node:$RHS))> >; +defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", + BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>; +defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; +defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", + TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; + +// SHRN patterns for when a logical right shift was used instead of arithmetic +// (the immediate guarantees no sign bits actually end up in the result so it +// doesn't matter). +def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), + (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; +def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), + (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; +def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), + (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; + +def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), + (trunc (AArch64vlshr (v8i16 V128:$Rn), + vecshiftR16Narrow:$imm)))), + (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR16Narrow:$imm)>; +def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), + (trunc (AArch64vlshr (v4i32 V128:$Rn), + vecshiftR32Narrow:$imm)))), + (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR32Narrow:$imm)>; +def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), + (trunc (AArch64vlshr (v2i64 V128:$Rn), + vecshiftR64Narrow:$imm)))), + (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), + V128:$Rn, vecshiftR32Narrow:$imm)>; + +// Vector sign and zero extensions are implemented with SSHLL and USSHLL. +// Anyexts are implemented as zexts. +def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>; +def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; +def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; +def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>; +def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; +def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; +def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>; +def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; +def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; +// Also match an extend from the upper half of a 128 bit source register. +def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), + (USHLLv16i8_shift V128:$Rn, (i32 0))>; +def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), + (USHLLv16i8_shift V128:$Rn, (i32 0))>; +def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), + (SSHLLv16i8_shift V128:$Rn, (i32 0))>; +def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), + (USHLLv8i16_shift V128:$Rn, (i32 0))>; +def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), + (USHLLv8i16_shift V128:$Rn, (i32 0))>; +def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), + (SSHLLv8i16_shift V128:$Rn, (i32 0))>; +def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), + (USHLLv4i32_shift V128:$Rn, (i32 0))>; +def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), + (USHLLv4i32_shift V128:$Rn, (i32 0))>; +def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), + (SSHLLv4i32_shift V128:$Rn, (i32 0))>; + +// Vector shift sxtl aliases +def : InstAlias<"sxtl.8h $dst, $src1", + (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"sxtl $dst.8h, $src1.8b", + (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"sxtl.4s $dst, $src1", + (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"sxtl $dst.4s, $src1.4h", + (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"sxtl.2d $dst, $src1", + (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"sxtl $dst.2d, $src1.2s", + (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; + +// Vector shift sxtl2 aliases +def : InstAlias<"sxtl2.8h $dst, $src1", + (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"sxtl2 $dst.8h, $src1.16b", + (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"sxtl2.4s $dst, $src1", + (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"sxtl2 $dst.4s, $src1.8h", + (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"sxtl2.2d $dst, $src1", + (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"sxtl2 $dst.2d, $src1.4s", + (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; + +// Vector shift uxtl aliases +def : InstAlias<"uxtl.8h $dst, $src1", + (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"uxtl $dst.8h, $src1.8b", + (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"uxtl.4s $dst, $src1", + (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"uxtl $dst.4s, $src1.4h", + (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"uxtl.2d $dst, $src1", + (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; +def : InstAlias<"uxtl $dst.2d, $src1.2s", + (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; + +// Vector shift uxtl2 aliases +def : InstAlias<"uxtl2.8h $dst, $src1", + (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"uxtl2 $dst.8h, $src1.16b", + (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"uxtl2.4s $dst, $src1", + (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"uxtl2 $dst.4s, $src1.8h", + (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"uxtl2.2d $dst, $src1", + (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; +def : InstAlias<"uxtl2 $dst.2d, $src1.4s", + (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; + +// If an integer is about to be converted to a floating point value, +// just load it on the floating point unit. +// These patterns are more complex because floating point loads do not +// support sign extension. +// The sign extension has to be explicitly added and is only supported for +// one step: byte-to-half, half-to-word, word-to-doubleword. +// SCVTF GPR -> FPR is 9 cycles. +// SCVTF FPR -> FPR is 4 cyclces. +// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles. +// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR +// and still being faster. +// However, this is not good for code size. +// 8-bits -> float. 2 sizes step-up. +class SExtLoadi8CVTf32Pat + : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))), + (SCVTFv1i32 (f32 (EXTRACT_SUBREG + (SSHLLv4i16_shift + (f64 + (EXTRACT_SUBREG + (SSHLLv8i8_shift + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + INST, + bsub), + 0), + dsub)), + 0), + ssub)))>, Requires<[NotForCodeSize]>; + +def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext), + (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>; +def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext), + (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>; +def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), + (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>; +def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset), + (LDURBi GPR64sp:$Rn, simm9:$offset)>; + +// 16-bits -> float. 1 size step-up. +class SExtLoadi16CVTf32Pat + : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))), + (SCVTFv1i32 (f32 (EXTRACT_SUBREG + (SSHLLv4i16_shift + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + INST, + hsub), + 0), + ssub)))>, Requires<[NotForCodeSize]>; + +def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), + (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; +def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), + (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; +def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; +def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), + (LDURHi GPR64sp:$Rn, simm9:$offset)>; + +// 32-bits to 32-bits are handled in target specific dag combine: +// performIntToFpCombine. +// 64-bits integer to 32-bits floating point, not possible with +// SCVTF on floating point registers (both source and destination +// must have the same size). + +// Here are the patterns for 8, 16, 32, and 64-bits to double. +// 8-bits -> double. 3 size step-up: give up. +// 16-bits -> double. 2 size step. +class SExtLoadi16CVTf64Pat + : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))), + (SCVTFv1i64 (f64 (EXTRACT_SUBREG + (SSHLLv2i32_shift + (f64 + (EXTRACT_SUBREG + (SSHLLv4i16_shift + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + INST, + hsub), + 0), + dsub)), + 0), + dsub)))>, Requires<[NotForCodeSize]>; + +def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext), + (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>; +def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext), + (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>; +def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), + (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>; +def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset), + (LDURHi GPR64sp:$Rn, simm9:$offset)>; +// 32-bits -> double. 1 size step-up. +class SExtLoadi32CVTf64Pat + : Pat <(f64 (sint_to_fp (i32 (load addrmode)))), + (SCVTFv1i64 (f64 (EXTRACT_SUBREG + (SSHLLv2i32_shift + (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + INST, + ssub), + 0), + dsub)))>, Requires<[NotForCodeSize]>; + +def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext), + (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>; +def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext), + (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>; +def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), + (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>; +def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset), + (LDURSi GPR64sp:$Rn, simm9:$offset)>; + +// 64-bits -> double are handled in target specific dag combine: +// performIntToFpCombine. + + +//---------------------------------------------------------------------------- +// AdvSIMD Load-Store Structure +//---------------------------------------------------------------------------- +defm LD1 : SIMDLd1Multiple<"ld1">; +defm LD2 : SIMDLd2Multiple<"ld2">; +defm LD3 : SIMDLd3Multiple<"ld3">; +defm LD4 : SIMDLd4Multiple<"ld4">; + +defm ST1 : SIMDSt1Multiple<"st1">; +defm ST2 : SIMDSt2Multiple<"st2">; +defm ST3 : SIMDSt3Multiple<"st3">; +defm ST4 : SIMDSt4Multiple<"st4">; + +class Ld1Pat + : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>; + +def : Ld1Pat; +def : Ld1Pat; +def : Ld1Pat; +def : Ld1Pat; +def : Ld1Pat; +def : Ld1Pat; +def : Ld1Pat; +def : Ld1Pat; + +class St1Pat + : Pat<(store ty:$Vt, GPR64sp:$Rn), + (INST ty:$Vt, GPR64sp:$Rn)>; + +def : St1Pat; +def : St1Pat; +def : St1Pat; +def : St1Pat; +def : St1Pat; +def : St1Pat; +def : St1Pat; +def : St1Pat; + +//--- +// Single-element +//--- + +defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>; +defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>; +defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>; +defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>; +let mayLoad = 1, neverHasSideEffects = 1 in { +defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>; +defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>; +defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>; +defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>; +defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>; +defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>; +defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>; +defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>; +defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>; +defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>; +defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>; +defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>; +defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>; +defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>; +defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; +defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; +} + +def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), + (LD1Rv8b GPR64sp:$Rn)>; +def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))), + (LD1Rv16b GPR64sp:$Rn)>; +def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), + (LD1Rv4h GPR64sp:$Rn)>; +def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))), + (LD1Rv8h GPR64sp:$Rn)>; +def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), + (LD1Rv2s GPR64sp:$Rn)>; +def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), + (LD1Rv4s GPR64sp:$Rn)>; +def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), + (LD1Rv2d GPR64sp:$Rn)>; +def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), + (LD1Rv1d GPR64sp:$Rn)>; +// Grab the floating point version too +def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), + (LD1Rv2s GPR64sp:$Rn)>; +def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))), + (LD1Rv4s GPR64sp:$Rn)>; +def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), + (LD1Rv2d GPR64sp:$Rn)>; +def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))), + (LD1Rv1d GPR64sp:$Rn)>; + +class Ld1Lane128Pat + : Pat<(vector_insert (VTy VecListOne128:$Rd), + (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), + (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>; + +def : Ld1Lane128Pat; +def : Ld1Lane128Pat; +def : Ld1Lane128Pat; +def : Ld1Lane128Pat; +def : Ld1Lane128Pat; +def : Ld1Lane128Pat; + +class Ld1Lane64Pat + : Pat<(vector_insert (VTy VecListOne64:$Rd), + (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx), + (EXTRACT_SUBREG + (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), + VecIndex:$idx, GPR64sp:$Rn), + dsub)>; + +def : Ld1Lane64Pat; +def : Ld1Lane64Pat; +def : Ld1Lane64Pat; +def : Ld1Lane64Pat; + + +defm LD1 : SIMDLdSt1SingleAliases<"ld1">; +defm LD2 : SIMDLdSt2SingleAliases<"ld2">; +defm LD3 : SIMDLdSt3SingleAliases<"ld3">; +defm LD4 : SIMDLdSt4SingleAliases<"ld4">; + +// Stores +defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; +defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; +defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; +defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; + +let AddedComplexity = 15 in +class St1Lane128Pat + : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), + GPR64sp:$Rn), + (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>; + +def : St1Lane128Pat; +def : St1Lane128Pat; +def : St1Lane128Pat; +def : St1Lane128Pat; +def : St1Lane128Pat; +def : St1Lane128Pat; + +let AddedComplexity = 15 in +class St1Lane64Pat + : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), + GPR64sp:$Rn), + (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), + VecIndex:$idx, GPR64sp:$Rn)>; + +def : St1Lane64Pat; +def : St1Lane64Pat; +def : St1Lane64Pat; +def : St1Lane64Pat; + +multiclass St1LanePost64Pat { + def : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), + GPR64sp:$Rn, offset), + (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), + VecIndex:$idx, GPR64sp:$Rn, XZR)>; + + def : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), + GPR64sp:$Rn, GPR64:$Rm), + (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), + VecIndex:$idx, GPR64sp:$Rn, $Rm)>; +} + +defm : St1LanePost64Pat; +defm : St1LanePost64Pat; +defm : St1LanePost64Pat; +defm : St1LanePost64Pat; +defm : St1LanePost64Pat; +defm : St1LanePost64Pat; + +multiclass St1LanePost128Pat { + def : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), + GPR64sp:$Rn, offset), + (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>; + + def : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), + GPR64sp:$Rn, GPR64:$Rm), + (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>; +} + +defm : St1LanePost128Pat; +defm : St1LanePost128Pat; +defm : St1LanePost128Pat; +defm : St1LanePost128Pat; +defm : St1LanePost128Pat; +defm : St1LanePost128Pat; + +let mayStore = 1, neverHasSideEffects = 1 in { +defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; +defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; +defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; +defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; +defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; +defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; +defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; +defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; +defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; +defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; +defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; +defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; +} + +defm ST1 : SIMDLdSt1SingleAliases<"st1">; +defm ST2 : SIMDLdSt2SingleAliases<"st2">; +defm ST3 : SIMDLdSt3SingleAliases<"st3">; +defm ST4 : SIMDLdSt4SingleAliases<"st4">; + +//---------------------------------------------------------------------------- +// Crypto extensions +//---------------------------------------------------------------------------- + +def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>; +def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>; +def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>; +def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>; + +def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>; +def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>; +def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>; +def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>; +def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>; +def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>; +def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>; + +def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>; +def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>; +def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>; + +//---------------------------------------------------------------------------- +// Compiler-pseudos +//---------------------------------------------------------------------------- +// FIXME: Like for X86, these should go in their own separate .td file. + +// Any instruction that defines a 32-bit result leaves the high half of the +// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may +// be copying from a truncate. But any other 32-bit operation will zero-extend +// up to 64 bits. +// FIXME: X86 also checks for CMOV here. Do we need something similar? +def def32 : PatLeaf<(i32 GPR32:$src), [{ + return N->getOpcode() != ISD::TRUNCATE && + N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && + N->getOpcode() != ISD::CopyFromReg; +}]>; -//===----------------------------------------------------------------------===// -// Load-store register (unscaled immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: LDURB, LDURH, LDRUSB, LDRUSH, LDRUSW, STUR, STURB, STURH and PRFUM -// -// and +// In the case of a 32-bit def that is known to implicitly zero-extend, +// we can use a SUBREG_TO_REG. +def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>; + +// For an anyext, we don't care what the high bits are, so we can perform an +// INSERT_SUBREF into an IMPLICIT_DEF. +def : Pat<(i64 (anyext GPR32:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; + +// When we need to explicitly zero-extend, we use an unsigned bitfield move +// instruction (UBFM) on the enclosing super-reg. +def : Pat<(i64 (zext GPR32:$src)), + (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; + +// To sign extend, we use a signed bitfield move instruction (SBFM) on the +// containing super-reg. +def : Pat<(i64 (sext GPR32:$src)), + (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; +def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>; +def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>; +def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>; +def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>; +def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; +def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; +def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; + +def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), + (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), + (i64 (i32shift_sext_i8 imm0_31:$imm)))>; +def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), + (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), + (i64 (i64shift_sext_i8 imm0_63:$imm)))>; + +def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), + (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), + (i64 (i32shift_sext_i16 imm0_31:$imm)))>; +def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), + (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), + (i64 (i64shift_sext_i16 imm0_63:$imm)))>; + +def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)), + (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), + (i64 (i64shift_a imm0_63:$imm)), + (i64 (i64shift_sext_i32 imm0_63:$imm)))>; + +// sra patterns have an AddedComplexity of 10, so make sure we have a higher +// AddedComplexity for the following patterns since we want to match sext + sra +// patterns before we attempt to match a single sra node. +let AddedComplexity = 20 in { +// We support all sext + sra combinations which preserve at least one bit of the +// original value which is to be sign extended. E.g. we support shifts up to +// bitwidth-1 bits. +def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), + (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; +def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), + (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; + +def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), + (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; +def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), + (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; + +def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), + (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), + (i64 imm0_31:$imm), 31)>; +} // AddedComplexity = 20 + +// To truncate, we can simply extract from a subregister. +def : Pat<(i32 (trunc GPR64sp:$src)), + (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; + +// __builtin_trap() uses the BRK instruction on AArch64. +def : Pat<(trap), (BRK 1)>; + +// Conversions within AdvSIMD types in the same register size are free. +// But because we need a consistent lane ordering, in big endian many +// conversions require one or more REV instructions. // -//===----------------------------------------------------------------------===// -// Load-store register (register offset) instructions -//===----------------------------------------------------------------------===// -// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store register (unsigned immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM +// Consider a simple memory load followed by a bitconvert then a store. +// v0 = load v2i32 +// v1 = BITCAST v2i32 v0 to v4i16 +// store v4i16 v2 // -// and +// In big endian mode every memory access has an implicit byte swap. LDR and +// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that +// is, they treat the vector as a sequence of elements to be byte-swapped. +// The two pairs of instructions are fundamentally incompatible. We've decided +// to use LD1/ST1 only to simplify compiler implementation. // -//===----------------------------------------------------------------------===// -// Load-store register (immediate post-indexed) instructions -//===----------------------------------------------------------------------===// -// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW +// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes +// the original code sequence: +// v0 = load v2i32 +// v1 = REV v2i32 (implicit) +// v2 = BITCAST v2i32 v1 to v4i16 +// v3 = REV v4i16 v2 (implicit) +// store v4i16 v3 // -// and +// But this is now broken - the value stored is different to the value loaded +// due to lane reordering. To fix this, on every BITCAST we must perform two +// other REVs: +// v0 = load v2i32 +// v1 = REV v2i32 (implicit) +// v2 = REV v2i32 +// v3 = BITCAST v2i32 v2 to v4i16 +// v4 = REV v4i16 +// v5 = REV v4i16 v4 (implicit) +// store v4i16 v5 // -//===----------------------------------------------------------------------===// -// Load-store register (immediate pre-indexed) instructions -//===----------------------------------------------------------------------===// -// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW - -// Note that patterns are much later on in a completely separate section (they -// need ADRPxi to be defined). - -//===------------------------------- -// 1. Various operands needed -//===------------------------------- - -//===------------------------------- -// 1.1 Unsigned 12-bit immediate operands -//===------------------------------- -// The addressing mode for these instructions consists of an unsigned 12-bit -// immediate which is scaled by the size of the memory access. +// This means an extra two instructions, but actually in most cases the two REV +// instructions can be combined into one. For example: +// (REV64_2s (REV64_4h X)) === (REV32_4h X) // -// We represent this in the MC layer by two operands: -// 1. A base register. -// 2. A 12-bit immediate: not multiplied by access size, so "LDR x0,[x0,#8]" -// would have '1' in this field. -// This means that separate functions are needed for converting representations -// which *are* aware of the intended access size. - -// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to -// know the access size via some means. An isolated operand does not have this -// information unless told from here, which means we need separate tablegen -// Operands for each access size. This multiclass takes care of instantiating -// the correct template functions in the rest of the backend. - -//===------------------------------- -// 1.1 Unsigned 12-bit immediate operands -//===------------------------------- - -multiclass offsets_uimm12 { - def uimm12_asmoperand : AsmOperandClass { - let Name = "OffsetUImm12_" # MemSize; - let PredicateMethod = "isOffsetUImm12<" # MemSize # ">"; - let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">"; - let DiagnosticType = "LoadStoreUImm12_" # MemSize; - } - - // Pattern is really no more than an ImmLeaf, but predicated on MemSize which - // complicates things beyond TableGen's ken. - def uimm12 : Operand, - ComplexPattern"> { - let ParserMatchClass - = !cast(prefix # uimm12_asmoperand); - - let PrintMethod = "printOffsetUImm12Operand<" # MemSize # ">"; - let EncoderMethod = "getOffsetUImm12OpValue<" # MemSize # ">"; - } -} - -defm byte_ : offsets_uimm12<1, "byte_">; -defm hword_ : offsets_uimm12<2, "hword_">; -defm word_ : offsets_uimm12<4, "word_">; -defm dword_ : offsets_uimm12<8, "dword_">; -defm qword_ : offsets_uimm12<16, "qword_">; - -//===------------------------------- -// 1.1 Signed 9-bit immediate operands -//===------------------------------- - -// The MCInst is expected to store the bit-wise encoding of the value, -// which amounts to lopping off the extended sign bits. -def SDXF_simm9 : SDNodeXFormgetTargetConstant(N->getZExtValue() & 0x1ff, MVT::i32); -}]>; - -def simm9_asmoperand : AsmOperandClass { - let Name = "SImm9"; - let PredicateMethod = "isSImm<9>"; - let RenderMethod = "addSImmOperands<9>"; - let DiagnosticType = "LoadStoreSImm9"; -} - -def simm9 : Operand, - ImmLeaf= -0x100 && Imm <= 0xff; }], - SDXF_simm9> { - let PrintMethod = "printOffsetSImm9Operand"; - let ParserMatchClass = simm9_asmoperand; -} - - -//===------------------------------- -// 1.3 Register offset extensions -//===------------------------------- - -// The assembly-syntax for these addressing-modes is: -// [, {, {}}] +// There is also no 128-bit REV instruction. This must be synthesized with an +// EXT instruction. // -// The essential semantics are: -// + is a shift: # or #0 -// + can be W or X. -// + If is W, can be UXTW or SXTW -// + If is X, can be LSL or SXTX +// Most bitconverts require some sort of conversion. The only exceptions are: +// a) Identity conversions - vNfX <-> vNiX +// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX // -// The trickiest of those constraints is that Rm can be either GPR32 or GPR64, -// which will need separate instructions for LLVM type-consistency. We'll also -// need separate operands, of course. -multiclass regexts { - def regext_asmoperand : AsmOperandClass { - let Name = "AddrRegExtend_" # MemSize # "_" # Rm; - let PredicateMethod = "isAddrRegExtend<" # MemSize # "," # RmSize # ">"; - let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">"; - let DiagnosticType = "LoadStoreExtend" # RmSize # "_" # MemSize; - } - - def regext : Operand { - let PrintMethod - = "printAddrRegExtendOperand<" # MemSize # ", " # RmSize # ">"; - - let DecoderMethod = "DecodeAddrRegExtendOperand"; - let ParserMatchClass - = !cast(prefix # regext_asmoperand); - } -} - -multiclass regexts_wx { - // Rm is an X-register if LSL or SXTX are specified as the shift. - defm Xm_ : regexts; - - // Rm is a W-register if UXTW or SXTW are specified as the shift. - defm Wm_ : regexts; -} - -defm byte_ : regexts_wx<1, "byte_">; -defm hword_ : regexts_wx<2, "hword_">; -defm word_ : regexts_wx<4, "word_">; -defm dword_ : regexts_wx<8, "dword_">; -defm qword_ : regexts_wx<16, "qword_">; - - -//===------------------------------ -// 2. The instructions themselves. -//===------------------------------ - -// We have the following instructions to implement: -// | | B | H | W | X | -// |-----------------+-------+-------+-------+--------| -// | unsigned str | STRB | STRH | STR | STR | -// | unsigned ldr | LDRB | LDRH | LDR | LDR | -// | signed ldr to W | LDRSB | LDRSH | - | - | -// | signed ldr to X | LDRSB | LDRSH | LDRSW | (PRFM) | - -// This will instantiate the LDR/STR instructions you'd expect to use for an -// unsigned datatype (first two rows above) or floating-point register, which is -// reasonably uniform across all access sizes. - - -//===------------------------------ -// 2.1 Regular instructions -//===------------------------------ - -// This class covers the basic unsigned or irrelevantly-signed loads and stores, -// to general-purpose and floating-point registers. - -class AddrParams { - Operand uimm12 = !cast(prefix # "_uimm12"); - - Operand regextWm = !cast(prefix # "_Wm_regext"); - Operand regextXm = !cast(prefix # "_Xm_regext"); -} - -def byte_addrparams : AddrParams<"byte">; -def hword_addrparams : AddrParams<"hword">; -def word_addrparams : AddrParams<"word">; -def dword_addrparams : AddrParams<"dword">; -def qword_addrparams : AddrParams<"qword">; - -multiclass A64I_LDRSTR_unsigned size, bit v, - bit high_opc, string asmsuffix, - RegisterClass GPR, AddrParams params> { - // Unsigned immediate - def _STR : A64I_LSunsigimm, - Sched<[WriteSt, ReadSt, ReadSt]> { - let mayStore = 1; - } - def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]", - (!cast(prefix # "_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - - def _LDR : A64I_LSunsigimm, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; - } - def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]", - (!cast(prefix # "_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - - // Register offset (four of these: load/store and Wm/Xm). - let mayLoad = 1 in { - def _Wm_RegOffset_LDR : A64I_LSregoff, - Sched<[WriteLd, ReadLd, ReadLd]>; - - def _Xm_RegOffset_LDR : A64I_LSregoff, - Sched<[WriteLd, ReadLd, ReadLd]>; - } - def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]", - (!cast(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, 2)>; - - let mayStore = 1 in { - def _Wm_RegOffset_STR : A64I_LSregoff, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; - - def _Xm_RegOffset_STR : A64I_LSregoff, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>; - } - def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]", - (!cast(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, 2)>; - - // Unaligned immediate - def _STUR : A64I_LSunalimm, - Sched<[WriteSt, ReadSt, ReadSt]> { - let mayStore = 1; - } - def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]", - (!cast(prefix # "_STUR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - - def _LDUR : A64I_LSunalimm, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; - } - def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]", - (!cast(prefix # "_LDUR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - - // Post-indexed - def _PostInd_STR : A64I_LSpostind, - Sched<[WriteSt, ReadSt, ReadSt]> { - let Constraints = "$Rn = $Rn_wb"; - let mayStore = 1; - - // Decoder only needed for unpredictability checking (FIXME). - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - def _PostInd_LDR : A64I_LSpostind, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - // Pre-indexed - def _PreInd_STR : A64I_LSpreind, - Sched<[WriteSt, ReadSt, ReadSt]> { - let Constraints = "$Rn = $Rn_wb"; - let mayStore = 1; - - // Decoder only needed for unpredictability checking (FIXME). - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - def _PreInd_LDR : A64I_LSpreind, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - -} - -// STRB/LDRB: First define the instructions -defm LS8 - : A64I_LDRSTR_unsigned<"LS8", 0b00, 0b0, 0b0, "b", GPR32, byte_addrparams>; - -// STRH/LDRH -defm LS16 - : A64I_LDRSTR_unsigned<"LS16", 0b01, 0b0, 0b0, "h", GPR32, hword_addrparams>; - - -// STR/LDR to/from a W register -defm LS32 - : A64I_LDRSTR_unsigned<"LS32", 0b10, 0b0, 0b0, "", GPR32, word_addrparams>; - -// STR/LDR to/from an X register -defm LS64 - : A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>; - -let Predicates = [HasFPARMv8] in { -// STR/LDR to/from a B register -defm LSFP8 - : A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>; - -// STR/LDR to/from an H register -defm LSFP16 - : A64I_LDRSTR_unsigned<"LSFP16", 0b01, 0b1, 0b0, "", FPR16, hword_addrparams>; - -// STR/LDR to/from an S register -defm LSFP32 - : A64I_LDRSTR_unsigned<"LSFP32", 0b10, 0b1, 0b0, "", FPR32, word_addrparams>; -// STR/LDR to/from a D register -defm LSFP64 - : A64I_LDRSTR_unsigned<"LSFP64", 0b11, 0b1, 0b0, "", FPR64, dword_addrparams>; -// STR/LDR to/from a Q register -defm LSFP128 - : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128, - qword_addrparams>; -} - -//===------------------------------ -// 2.3 Signed loads -//===------------------------------ - -// Byte and half-word signed loads can both go into either an X or a W register, -// so it's worth factoring out. Signed word loads don't fit because there is no -// W version. -multiclass A64I_LDR_signed size, string asmopcode, AddrParams params, - string prefix> { - // Unsigned offset - def w : A64I_LSunsigimm, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; - } - def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", - (!cast(prefix # w) GPR32:$Rt, GPR64xsp:$Rn, 0)>; - - def x : A64I_LSunsigimm, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; - } - def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]", - (!cast(prefix # x) GPR64:$Rt, GPR64xsp:$Rn, 0)>; - - // Register offset - let mayLoad = 1 in { - def w_Wm_RegOffset : A64I_LSregoff, - Sched<[WriteLd, ReadLd, ReadLd]>; - - def w_Xm_RegOffset : A64I_LSregoff, - Sched<[WriteLd, ReadLd, ReadLd]>; - - def x_Wm_RegOffset : A64I_LSregoff, - Sched<[WriteLd, ReadLd, ReadLd]>; - - def x_Xm_RegOffset : A64I_LSregoff, - Sched<[WriteLd, ReadLd, ReadLd]>; - } - def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]", - (!cast(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, 2)>; - - def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]", - (!cast(prefix # "x_Xm_RegOffset") GPR64:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, 2)>; - - - let mayLoad = 1 in { - // Unaligned offset - def w_U : A64I_LSunalimm, - Sched<[WriteLd, ReadLd]>; - - def x_U : A64I_LSunalimm, - Sched<[WriteLd, ReadLd]>; - - - // Post-indexed - def w_PostInd : A64I_LSpostind, - Sched<[WriteLd, WriteLd, ReadLd]> { - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - def x_PostInd : A64I_LSpostind, - Sched<[WriteLd, WriteLd, ReadLd]> { - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - // Pre-indexed - def w_PreInd : A64I_LSpreind, - Sched<[WriteLd, WriteLd, ReadLd]> { - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - - def x_PreInd : A64I_LSpreind, - Sched<[WriteLd, WriteLd, ReadLd]> { - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; - } - } // let mayLoad = 1 -} - -// LDRSB -defm LDRSB : A64I_LDR_signed<0b00, "b", byte_addrparams, "LDRSB">; -// LDRSH -defm LDRSH : A64I_LDR_signed<0b01, "h", hword_addrparams, "LDRSH">; - -// LDRSW: load a 32-bit register, sign-extending to 64-bits. -def LDRSWx - : A64I_LSunsigimm<0b10, 0b0, 0b10, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, word_uimm12:$UImm12), - "ldrsw\t$Rt, [$Rn, $UImm12]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; -} -def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; - -let mayLoad = 1 in { - def LDRSWx_Wm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b0, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext), - "ldrsw\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd, ReadLd]>; - - def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext), - "ldrsw\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd, ReadLd]>; -} -def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]", - (LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>; - - -def LDURSWx - : A64I_LSunalimm<0b10, 0b0, 0b10, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldursw\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; -} -def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; - -def LDRSWx_PostInd - : A64I_LSpostind<0b10, 0b0, 0b10, - (outs GPR64:$Rt, GPR64xsp:$Rn_wb), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldrsw\t$Rt, [$Rn], $SImm9", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; -} - -def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10, - (outs GPR64:$Rt, GPR64xsp:$Rn_wb), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldrsw\t$Rt, [$Rn, $SImm9]!", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeSingleIndexedInstruction"; -} - -//===------------------------------ -// 2.4 Prefetch operations -//===------------------------------ - -def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs), - (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12), - "prfm\t$Rt, [$Rn, $UImm12]", - [], NoItinerary>, - Sched<[WritePreLd, ReadPreLd]> { - let mayLoad = 1; -} -def : InstAlias<"prfm $Rt, [$Rn]", - (PRFM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>; - -let mayLoad = 1 in { - def PRFM_Wm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b0, (outs), - (ins prefetch_op:$Rt, GPR64xsp:$Rn, - GPR32:$Rm, dword_Wm_regext:$Ext), - "prfm\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WritePreLd, ReadPreLd]>; - def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs), - (ins prefetch_op:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, dword_Xm_regext:$Ext), - "prfm\t$Rt, [$Rn, $Rm, $Ext]", - [], NoItinerary>, - Sched<[WritePreLd, ReadPreLd]>; -} - -def : InstAlias<"prfm $Rt, [$Rn, $Rm]", - (PRFM_Xm_RegOffset prefetch_op:$Rt, GPR64xsp:$Rn, - GPR64:$Rm, 2)>; - - -def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs), - (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9), - "prfum\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>, - Sched<[WritePreLd, ReadPreLd]> { - let mayLoad = 1; -} -def : InstAlias<"prfum $Rt, [$Rn]", - (PRFUM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>; - -//===----------------------------------------------------------------------===// -// Load-store register (unprivileged) instructions -//===----------------------------------------------------------------------===// -// Contains: LDTRB, LDTRH, LDTRSB, LDTRSH, LDTRSW, STTR, STTRB and STTRH - -// These instructions very much mirror the "unscaled immediate" loads, but since -// there are no floating-point variants we need to split them out into their own -// section to avoid instantiation of "ldtr d0, [sp]" etc. - -multiclass A64I_LDTRSTTR size, string asmsuffix, RegisterClass GPR, - string prefix> { - def _UnPriv_STR : A64I_LSunpriv, - Sched<[WriteLd, ReadLd]> { - let mayStore = 1; - } - - def : InstAlias<"sttr" # asmsuffix # " $Rt, [$Rn]", - (!cast(prefix # "_UnPriv_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - - def _UnPriv_LDR : A64I_LSunpriv, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; - } - - def : InstAlias<"ldtr" # asmsuffix # " $Rt, [$Rn]", - (!cast(prefix # "_UnPriv_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>; - -} - -// STTRB/LDTRB: First define the instructions -defm LS8 : A64I_LDTRSTTR<0b00, "b", GPR32, "LS8">; - -// STTRH/LDTRH -defm LS16 : A64I_LDTRSTTR<0b01, "h", GPR32, "LS16">; - -// STTR/LDTR to/from a W register -defm LS32 : A64I_LDTRSTTR<0b10, "", GPR32, "LS32">; - -// STTR/LDTR to/from an X register -defm LS64 : A64I_LDTRSTTR<0b11, "", GPR64, "LS64">; - -// Now a class for the signed instructions that can go to either 32 or 64 -// bits... -multiclass A64I_LDTR_signed size, string asmopcode, string prefix> { - let mayLoad = 1 in { - def w : A64I_LSunpriv, - Sched<[WriteLd, ReadLd]>; - - def x : A64I_LSunpriv, - Sched<[WriteLd, ReadLd]>; - } - - def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]", - (!cast(prefix # "w") GPR32:$Rt, GPR64xsp:$Rn, 0)>; - - def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]", - (!cast(prefix # "x") GPR64:$Rt, GPR64xsp:$Rn, 0)>; - -} - -// LDTRSB -defm LDTRSB : A64I_LDTR_signed<0b00, "b", "LDTRSB">; -// LDTRSH -defm LDTRSH : A64I_LDTR_signed<0b01, "h", "LDTRSH">; - -// And finally LDTRSW which only goes to 64 bits. -def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0b10, - (outs GPR64:$Rt), - (ins GPR64xsp:$Rn, simm9:$SImm9), - "ldtrsw\t$Rt, [$Rn, $SImm9]", - [], NoItinerary>, - Sched<[WriteLd, ReadLd]> { - let mayLoad = 1; -} -def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>; - -//===----------------------------------------------------------------------===// -// Load-store register pair (offset) instructions -//===----------------------------------------------------------------------===// -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store register pair (post-indexed) instructions -//===----------------------------------------------------------------------===// -// Contains: STP, LDP, LDPSW -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store register pair (pre-indexed) instructions -//===----------------------------------------------------------------------===// -// Contains: STP, LDP, LDPSW -// -// and -// -//===----------------------------------------------------------------------===// -// Load-store non-temporal register pair (offset) instructions -//===----------------------------------------------------------------------===// -// Contains: STNP, LDNP - - -// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to -// know the access size via some means. An isolated operand does not have this -// information unless told from here, which means we need separate tablegen -// Operands for each access size. This multiclass takes care of instantiating -// the correct template functions in the rest of the backend. - -multiclass offsets_simm7 { - // The bare signed 7-bit immediate is used in post-indexed instructions, but - // because of the scaling performed a generic "simm7" operand isn't - // appropriate here either. - def simm7_asmoperand : AsmOperandClass { - let Name = "SImm7_Scaled" # MemSize; - let PredicateMethod = "isSImm7Scaled<" # MemSize # ">"; - let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">"; - let DiagnosticType = "LoadStoreSImm7_" # MemSize; - } - - def simm7 : Operand { - let PrintMethod = "printSImm7ScaledOperand<" # MemSize # ">"; - let ParserMatchClass = !cast(prefix # "simm7_asmoperand"); - } -} - -defm word_ : offsets_simm7<"4", "word_">; -defm dword_ : offsets_simm7<"8", "dword_">; -defm qword_ : offsets_simm7<"16", "qword_">; - -multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, - Operand simm7, string prefix> { - def _STR : A64I_LSPoffset, - Sched<[WriteLd, ReadLd]> { - let mayStore = 1; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - def : InstAlias<"stp $Rt, $Rt2, [$Rn]", - (!cast(prefix # "_STR") SomeReg:$Rt, - SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; - - def _LDR : A64I_LSPoffset, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - def : InstAlias<"ldp $Rt, $Rt2, [$Rn]", - (!cast(prefix # "_LDR") SomeReg:$Rt, - SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; - - def _PostInd_STR : A64I_LSPpostind, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> { - let mayStore = 1; - let Constraints = "$Rn = $Rn_wb"; - - // Decoder only needed for unpredictability checking (FIXME). - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - - def _PostInd_LDR : A64I_LSPpostind, - Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - - def _PreInd_STR : A64I_LSPpreind, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> { - let mayStore = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - - def _PreInd_LDR : A64I_LSPpreind, - Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - - def _NonTemp_STR : A64I_LSPnontemp, - Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> { - let mayStore = 1; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - def : InstAlias<"stnp $Rt, $Rt2, [$Rn]", - (!cast(prefix # "_NonTemp_STR") SomeReg:$Rt, - SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; - - def _NonTemp_LDR : A64I_LSPnontemp, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let DecoderMethod = "DecodeLDSTPairInstruction"; - } - def : InstAlias<"ldnp $Rt, $Rt2, [$Rn]", - (!cast(prefix # "_NonTemp_LDR") SomeReg:$Rt, - SomeReg:$Rt2, GPR64xsp:$Rn, 0)>; - -} - - -defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">; -defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">; - -let Predicates = [HasFPARMv8] in { -defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">; -defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">; -defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7, - "LSFPPair128">; -} - - -def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1, - (outs GPR64:$Rt, GPR64:$Rt2), - (ins GPR64xsp:$Rn, word_simm7:$SImm7), - "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>, - Sched<[WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let DecoderMethod = "DecodeLDSTPairInstruction"; -} -def : InstAlias<"ldpsw $Rt, $Rt2, [$Rn]", - (LDPSWx GPR64:$Rt, GPR64:$Rt2, GPR64xsp:$Rn, 0)>; - -def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1, - (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), - (ins GPR64xsp:$Rn, word_simm7:$SImm7), - "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeLDSTPairInstruction"; -} - -def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1, - (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb), - (ins GPR64xsp:$Rn, word_simm7:$SImm7), - "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!", - [], NoItinerary>, - Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> { - let mayLoad = 1; - let Constraints = "$Rn = $Rn_wb"; - let DecoderMethod = "DecodeLDSTPairInstruction"; -} - -//===----------------------------------------------------------------------===// -// Logical (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: AND, ORR, EOR, ANDS, + aliases TST, MOV - -multiclass logical_imm_operands { - def _asmoperand : AsmOperandClass { - let Name = "LogicalImm" # note # size; - let PredicateMethod = "isLogicalImm" # note # "<" # size # ">"; - let RenderMethod = "addLogicalImmOperands<" # size # ">"; - let DiagnosticType = "LogicalSecondSource"; - } - - def _operand - : Operand, ComplexPattern { - let ParserMatchClass = !cast(prefix # "_asmoperand"); - let PrintMethod = "printLogicalImmOperand<" # size # ">"; - let DecoderMethod = "DecodeLogicalImmOperand<" # size # ">"; - } -} - -defm logical_imm32 : logical_imm_operands<"logical_imm32", "", 32, i32>; -defm logical_imm64 : logical_imm_operands<"logical_imm64", "", 64, i64>; - -// The mov versions only differ in assembly parsing, where they -// exclude values representable with either MOVZ or MOVN. -defm logical_imm32_mov - : logical_imm_operands<"logical_imm32_mov", "MOV", 32, i32>; -defm logical_imm64_mov - : logical_imm_operands<"logical_imm64_mov", "MOV", 64, i64>; - - -multiclass A64I_logimmSizes opc, string asmop, SDNode opnode> { - def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd), - (ins GPR32:$Rn, logical_imm32_operand:$Imm), - !strconcat(asmop, "\t$Rd, $Rn, $Imm"), - [(set i32:$Rd, - (opnode i32:$Rn, logical_imm32_operand:$Imm))], - NoItinerary>, - Sched<[WriteALU, ReadALU]>; - - def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd), - (ins GPR64:$Rn, logical_imm64_operand:$Imm), - !strconcat(asmop, "\t$Rd, $Rn, $Imm"), - [(set i64:$Rd, - (opnode i64:$Rn, logical_imm64_operand:$Imm))], - NoItinerary>, - Sched<[WriteALU, ReadALU]>; -} - -defm AND : A64I_logimmSizes<0b00, "and", and>; -defm ORR : A64I_logimmSizes<0b01, "orr", or>; -defm EOR : A64I_logimmSizes<0b10, "eor", xor>; - -let Defs = [NZCV] in { - def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd), - (ins GPR32:$Rn, logical_imm32_operand:$Imm), - "ands\t$Rd, $Rn, $Imm", - [], NoItinerary>, - Sched<[WriteALU, ReadALU]>; - - def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd), - (ins GPR64:$Rn, logical_imm64_operand:$Imm), - "ands\t$Rd, $Rn, $Imm", - [], NoItinerary>, - Sched<[WriteALU, ReadALU]>; -} - - -def : InstAlias<"tst $Rn, $Imm", - (ANDSwwi WZR, GPR32:$Rn, logical_imm32_operand:$Imm)>; -def : InstAlias<"tst $Rn, $Imm", - (ANDSxxi XZR, GPR64:$Rn, logical_imm64_operand:$Imm)>; -def : InstAlias<"mov $Rd, $Imm", - (ORRwwi GPR32wsp:$Rd, WZR, logical_imm32_mov_operand:$Imm)>; -def : InstAlias<"mov $Rd, $Imm", - (ORRxxi GPR64xsp:$Rd, XZR, logical_imm64_mov_operand:$Imm)>; - -//===----------------------------------------------------------------------===// -// Logical (shifted register) instructions -//===----------------------------------------------------------------------===// -// Contains: AND, BIC, ORR, ORN, EOR, EON, ANDS, BICS + aliases TST, MVN, MOV - -// Operand for optimizing (icmp (and LHS, RHS), 0, SomeCode). In theory "ANDS" -// behaves differently for unsigned comparisons, so we defensively only allow -// signed or n/a as the operand. In practice "unsigned greater than 0" is "not -// equal to 0" and LLVM gives us this. -def signed_cond : PatLeaf<(cond), [{ - return !isUnsignedIntSetCC(N->get()); -}]>; - - -// These instructions share their "shift" operands with add/sub (shifted -// register instructions). They are defined there. - -// N.b. the commutable parameter is just !N. It will be first against the wall -// when the revolution comes. -multiclass logical_shifts opc, - bit N, bit commutable, - string asmop, SDPatternOperator opfrag, ValueType ty, - RegisterClass GPR, list defs> { - let isCommutable = commutable, Defs = defs in { - def _lsl : A64I_logicalshift("lsl_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (shl ty:$Rm, - !cast("lsl_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _lsr : A64I_logicalshift("lsr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm, - !cast("lsr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _asr : A64I_logicalshift("asr_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm, - !cast("asr_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _ror : A64I_logicalshift("ror_operand_" # ty):$Imm6), - !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"), - [(set ty:$Rd, (opfrag ty:$Rn, (rotr ty:$Rm, - !cast("ror_operand_" # ty):$Imm6)) - )], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - } - - def _noshift - : InstAlias(prefix # "_lsl") GPR:$Rd, GPR:$Rn, - GPR:$Rm, 0)>; - - def : Pat<(opfrag ty:$Rn, ty:$Rm), - (!cast(prefix # "_lsl") $Rn, $Rm, 0)>; -} - -multiclass logical_sizes opc, bit N, bit commutable, - string asmop, SDPatternOperator opfrag, - list defs> { - defm xxx : logical_shifts; - defm www : logical_shifts; -} - - -defm AND : logical_sizes<"AND", 0b00, 0b0, 0b1, "and", and, []>; -defm ORR : logical_sizes<"ORR", 0b01, 0b0, 0b1, "orr", or, []>; -defm EOR : logical_sizes<"EOR", 0b10, 0b0, 0b1, "eor", xor, []>; -defm ANDS : logical_sizes<"ANDS", 0b11, 0b0, 0b1, "ands", - PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), - [{ (void)N; return false; }]>, - [NZCV]>; - -defm BIC : logical_sizes<"BIC", 0b00, 0b1, 0b0, "bic", - PatFrag<(ops node:$lhs, node:$rhs), - (and node:$lhs, (not node:$rhs))>, []>; -defm ORN : logical_sizes<"ORN", 0b01, 0b1, 0b0, "orn", - PatFrag<(ops node:$lhs, node:$rhs), - (or node:$lhs, (not node:$rhs))>, []>; -defm EON : logical_sizes<"EON", 0b10, 0b1, 0b0, "eon", - PatFrag<(ops node:$lhs, node:$rhs), - (xor node:$lhs, (not node:$rhs))>, []>; -defm BICS : logical_sizes<"BICS", 0b11, 0b1, 0b0, "bics", - PatFrag<(ops node:$lhs, node:$rhs), - (and node:$lhs, (not node:$rhs)), - [{ (void)N; return false; }]>, - [NZCV]>; - -multiclass tst_shifts { - let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in { - def _lsl : A64I_logicalshift("lsl_operand_" # ty):$Imm6), - "tst\t$Rn, $Rm, $Imm6", - [(set NZCV, (A64setcc (and ty:$Rn, (shl ty:$Rm, - !cast("lsl_operand_" # ty):$Imm6)), - 0, signed_cond))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - - def _lsr : A64I_logicalshift("lsr_operand_" # ty):$Imm6), - "tst\t$Rn, $Rm, $Imm6", - [(set NZCV, (A64setcc (and ty:$Rn, (srl ty:$Rm, - !cast("lsr_operand_" # ty):$Imm6)), - 0, signed_cond))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _asr : A64I_logicalshift("asr_operand_" # ty):$Imm6), - "tst\t$Rn, $Rm, $Imm6", - [(set NZCV, (A64setcc (and ty:$Rn, (sra ty:$Rm, - !cast("asr_operand_" # ty):$Imm6)), - 0, signed_cond))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _ror : A64I_logicalshift("ror_operand_" # ty):$Imm6), - "tst\t$Rn, $Rm, $Imm6", - [(set NZCV, (A64setcc (and ty:$Rn, (rotr ty:$Rm, - !cast("ror_operand_" # ty):$Imm6)), - 0, signed_cond))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - } - - def _noshift : InstAlias<"tst $Rn, $Rm", - (!cast(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; - - def : Pat<(A64setcc (and ty:$Rn, ty:$Rm), 0, signed_cond), - (!cast(prefix # "_lsl") $Rn, $Rm, 0)>; -} - -defm TSTxx : tst_shifts<"TSTxx", 0b1, i64, GPR64>; -defm TSTww : tst_shifts<"TSTww", 0b0, i32, GPR32>; - - -multiclass mvn_shifts { - let isCommutable = 0, Rn = 0b11111 in { - def _lsl : A64I_logicalshift("lsl_operand_" # ty):$Imm6), - "mvn\t$Rd, $Rm, $Imm6", - [(set ty:$Rd, (not (shl ty:$Rm, - !cast("lsl_operand_" # ty):$Imm6)))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - - def _lsr : A64I_logicalshift("lsr_operand_" # ty):$Imm6), - "mvn\t$Rd, $Rm, $Imm6", - [(set ty:$Rd, (not (srl ty:$Rm, - !cast("lsr_operand_" # ty):$Imm6)))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _asr : A64I_logicalshift("asr_operand_" # ty):$Imm6), - "mvn\t$Rd, $Rm, $Imm6", - [(set ty:$Rd, (not (sra ty:$Rm, - !cast("asr_operand_" # ty):$Imm6)))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - - def _ror : A64I_logicalshift("ror_operand_" # ty):$Imm6), - "mvn\t$Rd, $Rm, $Imm6", - [(set ty:$Rd, (not (rotr ty:$Rm, - !cast("lsl_operand_" # ty):$Imm6)))], - NoItinerary>, - Sched<[WriteALU, ReadALU, ReadALU]>; - } - - def _noshift : InstAlias<"mvn $Rn, $Rm", - (!cast(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>; - - def : Pat<(not ty:$Rm), - (!cast(prefix # "_lsl") $Rm, 0)>; -} - -defm MVNxx : mvn_shifts<"MVNxx", 0b1, i64, GPR64>; -defm MVNww : mvn_shifts<"MVNww", 0b0, i32, GPR32>; - -def MOVxx :InstAlias<"mov $Rd, $Rm", (ORRxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>; -def MOVww :InstAlias<"mov $Rd, $Rm", (ORRwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>; - -//===----------------------------------------------------------------------===// -// Move wide (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: MOVN, MOVZ, MOVK + MOV aliases - -// A wide variety of different relocations are needed for variants of these -// instructions, so it turns out that we need a different operand for all of -// them. -multiclass movw_operands { - def _imm_asmoperand : AsmOperandClass { - let Name = instname # width # "Shifted" # shift; - let PredicateMethod = "is" # instname # width # "Imm"; - let RenderMethod = "addMoveWideImmOperands"; - let ParserMethod = "ParseImmWithLSLOperand"; - let DiagnosticType = "MOVWUImm16"; - } - - def _imm : Operand { - let ParserMatchClass = !cast(prefix # "_imm_asmoperand"); - let PrintMethod = "printMoveWideImmOperand"; - let EncoderMethod = "getMoveWideImmOpValue"; - let DecoderMethod = "DecodeMoveWideImmOperand<" # width # ">"; - - let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift); - } -} - -defm movn32 : movw_operands<"movn32", "MOVN", 32>; -defm movn64 : movw_operands<"movn64", "MOVN", 64>; -defm movz32 : movw_operands<"movz32", "MOVZ", 32>; -defm movz64 : movw_operands<"movz64", "MOVZ", 64>; -defm movk32 : movw_operands<"movk32", "MOVK", 32>; -defm movk64 : movw_operands<"movk64", "MOVK", 64>; - -multiclass A64I_movwSizes opc, string asmop, dag ins32bit, - dag ins64bit> { - - def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit, - !strconcat(asmop, "\t$Rd, $FullImm"), - [], NoItinerary>, - Sched<[WriteALU]> { - bits<18> FullImm; - let UImm16 = FullImm{15-0}; - let Shift = FullImm{17-16}; - } - - def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit, - !strconcat(asmop, "\t$Rd, $FullImm"), - [], NoItinerary>, - Sched<[WriteALU]> { - bits<18> FullImm; - let UImm16 = FullImm{15-0}; - let Shift = FullImm{17-16}; - } -} - -let isMoveImm = 1, isReMaterializable = 1, - isAsCheapAsAMove = 1, hasSideEffects = 0 in { - defm MOVN : A64I_movwSizes<0b00, "movn", - (ins movn32_imm:$FullImm), - (ins movn64_imm:$FullImm)>; - - // Some relocations are able to convert between a MOVZ and a MOVN. If these - // are applied the instruction must be emitted with the corresponding bits as - // 0, which means a MOVZ needs to override that bit from the default. - let PostEncoderMethod = "fixMOVZ" in - defm MOVZ : A64I_movwSizes<0b10, "movz", - (ins movz32_imm:$FullImm), - (ins movz64_imm:$FullImm)>; -} - -let Constraints = "$src = $Rd", - SchedRW = [WriteALU, ReadALU] in -defm MOVK : A64I_movwSizes<0b11, "movk", - (ins GPR32:$src, movk32_imm:$FullImm), - (ins GPR64:$src, movk64_imm:$FullImm)>; - - -// And now the "MOV" aliases. These also need their own operands because what -// they accept is completely different to what the base instructions accept. -multiclass movalias_operand { - def _asmoperand : AsmOperandClass { - let Name = basename # width # "MovAlias"; - let PredicateMethod - = "isMoveWideMovAlias<" # width # ", A64Imms::" # immpredicate # ">"; - let RenderMethod - = "addMoveWideMovAliasOperands<" # width # ", " - # "A64Imms::" # immpredicate # ">"; - } - - def _movimm : Operand { - let ParserMatchClass = !cast(prefix # "_asmoperand"); - - let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift); - } -} - -defm movz32 : movalias_operand<"movz32", "MOVZ", "isMOVZImm", 32>; -defm movz64 : movalias_operand<"movz64", "MOVZ", "isMOVZImm", 64>; -defm movn32 : movalias_operand<"movn32", "MOVN", "isOnlyMOVNImm", 32>; -defm movn64 : movalias_operand<"movn64", "MOVN", "isOnlyMOVNImm", 64>; - -// FIXME: these are officially canonical aliases, but TableGen is too limited to -// print them at the moment. I believe in this case an "AliasPredicate" method -// will need to be implemented. to allow it, as well as the more generally -// useful handling of non-register, non-constant operands. -class movalias - : InstAlias<"mov $Rd, $FullImm", (INST GPR:$Rd, operand:$FullImm)>; - -def : movalias; -def : movalias; -def : movalias; -def : movalias; - -def movw_addressref_g0 : ComplexPattern">; -def movw_addressref_g1 : ComplexPattern">; -def movw_addressref_g2 : ComplexPattern">; -def movw_addressref_g3 : ComplexPattern">; - -def : Pat<(A64WrapperLarge movw_addressref_g3:$G3, movw_addressref_g2:$G2, - movw_addressref_g1:$G1, movw_addressref_g0:$G0), - (MOVKxii (MOVKxii (MOVKxii (MOVZxii movw_addressref_g3:$G3), - movw_addressref_g2:$G2), - movw_addressref_g1:$G1), - movw_addressref_g0:$G0)>; - -//===----------------------------------------------------------------------===// -// PC-relative addressing instructions -//===----------------------------------------------------------------------===// -// Contains: ADR, ADRP - -def adr_label : Operand { - let EncoderMethod = "getLabelOpValue"; - - // This label is a 21-bit offset from PC, unscaled - let PrintMethod = "printLabelOperand<21, 1>"; - let ParserMatchClass = label_asmoperand<21, 1>; - let OperandType = "OPERAND_PCREL"; -} - -def adrp_label_asmoperand : AsmOperandClass { - let Name = "AdrpLabel"; - let RenderMethod = "addLabelOperands<21, 4096>"; - let DiagnosticType = "Label"; -} - -def adrp_label : Operand { - let EncoderMethod = "getAdrpLabelOpValue"; - - // This label is a 21-bit offset from PC, scaled by the page-size: 4096. - let PrintMethod = "printLabelOperand<21, 4096>"; - let ParserMatchClass = adrp_label_asmoperand; - let OperandType = "OPERAND_PCREL"; -} - -let hasSideEffects = 0 in { - def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label), - "adr\t$Rd, $Label", [], NoItinerary>, - Sched<[WriteALUs]>; - - def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label), - "adrp\t$Rd, $Label", [], NoItinerary>, - Sched<[WriteALUs]>; -} - -//===----------------------------------------------------------------------===// -// System instructions -//===----------------------------------------------------------------------===// -// Contains: HINT, CLREX, DSB, DMB, ISB, MSR, SYS, SYSL, MRS -// + aliases IC, DC, AT, TLBI, NOP, YIELD, WFE, WFI, SEV, SEVL - -// Op1 and Op2 fields are sometimes simple 3-bit unsigned immediate values. -def uimm3_asmoperand : AsmOperandClass { - let Name = "UImm3"; - let PredicateMethod = "isUImm<3>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm3"; -} - -def uimm3 : Operand { - let ParserMatchClass = uimm3_asmoperand; -} - -// The HINT alias can accept a simple unsigned 7-bit immediate. -def uimm7_asmoperand : AsmOperandClass { - let Name = "UImm7"; - let PredicateMethod = "isUImm<7>"; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "UImm7"; -} - -def uimm7 : Operand { - let ParserMatchClass = uimm7_asmoperand; -} - -// Multiclass namedimm is defined with the prefetch operands. Most of these fit -// into the NamedImmMapper scheme well: they either accept a named operand or -// any immediate under a particular value (which may be 0, implying no immediate -// is allowed). -defm dbarrier : namedimm<"dbarrier", "A64DB::DBarrierMapper">; -defm isb : namedimm<"isb", "A64ISB::ISBMapper">; -defm ic : namedimm<"ic", "A64IC::ICMapper">; -defm dc : namedimm<"dc", "A64DC::DCMapper">; -defm at : namedimm<"at", "A64AT::ATMapper">; -defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">; - -// However, MRS and MSR are more complicated for a few reasons: -// * There are ~1000 generic names S3____ which have an -// implementation-defined effect -// * Most registers are shared, but some are read-only or write-only. -// * There is a variant of MSR which accepts the same register name (SPSel), -// but which would have a different encoding. - -// In principle these could be resolved in with more complicated subclasses of -// NamedImmMapper, however that imposes an overhead on other "named -// immediates". Both in concrete terms with virtual tables and in unnecessary -// abstraction. - -// The solution adopted here is to take the MRS/MSR Mappers out of the usual -// hierarchy (they're not derived from NamedImmMapper) and to add logic for -// their special situation. -def mrs_asmoperand : AsmOperandClass { - let Name = "MRS"; - let ParserMethod = "ParseSysRegOperand"; - let DiagnosticType = "MRS"; -} - -def mrs_op : Operand { - let ParserMatchClass = mrs_asmoperand; - let PrintMethod = "printMRSOperand"; - let DecoderMethod = "DecodeMRSOperand"; -} - -def msr_asmoperand : AsmOperandClass { - let Name = "MSRWithReg"; - - // Note that SPSel is valid for both this and the pstate operands, but with - // different immediate encodings. This is why these operands provide a string - // AArch64Operand rather than an immediate. The overlap is small enough that - // it could be resolved with hackery now, but who can say in future? - let ParserMethod = "ParseSysRegOperand"; - let DiagnosticType = "MSR"; -} - -def msr_op : Operand { - let ParserMatchClass = msr_asmoperand; - let PrintMethod = "printMSROperand"; - let DecoderMethod = "DecodeMSROperand"; -} - -def pstate_asmoperand : AsmOperandClass { - let Name = "MSRPState"; - // See comment above about parser. - let ParserMethod = "ParseSysRegOperand"; - let DiagnosticType = "MSR"; -} - -def pstate_op : Operand { - let ParserMatchClass = pstate_asmoperand; - let PrintMethod = "printNamedImmOperand"; - let DecoderMethod = "DecodeNamedImmOperand"; -} - -// When is specified, an assembler should accept something like "C4", not -// the usual "#4" immediate. -def CRx_asmoperand : AsmOperandClass { - let Name = "CRx"; - let PredicateMethod = "isUImm<4>"; - let RenderMethod = "addImmOperands"; - let ParserMethod = "ParseCRxOperand"; - // Diagnostics are handled in all cases by ParseCRxOperand. -} - -def CRx : Operand { - let ParserMatchClass = CRx_asmoperand; - let PrintMethod = "printCRxOperand"; -} - - -// Finally, we can start defining the instructions. - -// HINT is straightforward, with a few aliases. -def HINTi : A64I_system<0b0, (outs), (ins uimm7:$UImm7), "hint\t$UImm7", - [], NoItinerary> { - bits<7> UImm7; - let CRm = UImm7{6-3}; - let Op2 = UImm7{2-0}; - - let Op0 = 0b00; - let Op1 = 0b011; - let CRn = 0b0010; - let Rt = 0b11111; -} - -def : InstAlias<"nop", (HINTi 0)>; -def : InstAlias<"yield", (HINTi 1)>; -def : InstAlias<"wfe", (HINTi 2)>; -def : InstAlias<"wfi", (HINTi 3)>; -def : InstAlias<"sev", (HINTi 4)>; -def : InstAlias<"sevl", (HINTi 5)>; - -// Quite a few instructions then follow a similar pattern of fixing common -// fields in the bitpattern, we'll define a helper-class for them. -class simple_sys op0, bits<3> op1, bits<4> crn, bits<3> op2, - Operand operand, string asmop> - : A64I_system<0b0, (outs), (ins operand:$CRm), !strconcat(asmop, "\t$CRm"), - [], NoItinerary> { - let Op0 = op0; - let Op1 = op1; - let CRn = crn; - let Op2 = op2; - let Rt = 0b11111; -} - - -def CLREXi : simple_sys<0b00, 0b011, 0b0011, 0b010, uimm4, "clrex">; -def DSBi : simple_sys<0b00, 0b011, 0b0011, 0b100, dbarrier_op, "dsb">; -def DMBi : simple_sys<0b00, 0b011, 0b0011, 0b101, dbarrier_op, "dmb">; -def ISBi : simple_sys<0b00, 0b011, 0b0011, 0b110, isb_op, "isb">; - -def : InstAlias<"clrex", (CLREXi 0b1111)>; -def : InstAlias<"isb", (ISBi 0b1111)>; - -// (DMBi 0xb) is a "DMB ISH" instruciton, appropriate for Linux SMP -// configurations at least. -def : Pat<(atomic_fence imm, imm), (DMBi 0xb)>; - -// Any SYS bitpattern can be represented with a complex and opaque "SYS" -// instruction. -def SYSiccix : A64I_system<0b0, (outs), - (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, - uimm3:$Op2, GPR64:$Rt), - "sys\t$Op1, $CRn, $CRm, $Op2, $Rt", - [], NoItinerary> { - let Op0 = 0b01; -} - -// You can skip the Xt argument whether it makes sense or not for the generic -// SYS instruction. -def : InstAlias<"sys $Op1, $CRn, $CRm, $Op2", - (SYSiccix uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2, XZR)>; - - -// But many have aliases, which obviously don't fit into -class SYSalias - : A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary> { - let isAsmParserOnly = 1; - - bits<14> SysOp; - let Op0 = 0b01; - let Op1 = SysOp{13-11}; - let CRn = SysOp{10-7}; - let CRm = SysOp{6-3}; - let Op2 = SysOp{2-0}; -} - -def ICix : SYSalias<(ins ic_op:$SysOp, GPR64:$Rt), "ic\t$SysOp, $Rt">; - -def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp"> { - let Rt = 0b11111; -} - -def DCix : SYSalias<(ins dc_op:$SysOp, GPR64:$Rt), "dc\t$SysOp, $Rt">; -def ATix : SYSalias<(ins at_op:$SysOp, GPR64:$Rt), "at\t$SysOp, $Rt">; - -def TLBIix : SYSalias<(ins tlbi_op:$SysOp, GPR64:$Rt), "tlbi\t$SysOp, $Rt">; - -def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> { - let Rt = 0b11111; -} - - -def SYSLxicci : A64I_system<0b1, (outs GPR64:$Rt), - (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2), - "sysl\t$Rt, $Op1, $CRn, $CRm, $Op2", - [], NoItinerary> { - let Op0 = 0b01; -} - -// The instructions themselves are rather simple for MSR and MRS. -def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt), - "msr\t$SysReg, $Rt", [], NoItinerary> { - bits<16> SysReg; - let Op0 = SysReg{15-14}; - let Op1 = SysReg{13-11}; - let CRn = SysReg{10-7}; - let CRm = SysReg{6-3}; - let Op2 = SysReg{2-0}; -} - -def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg), - "mrs\t$Rt, $SysReg", [], NoItinerary> { - bits<16> SysReg; - let Op0 = SysReg{15-14}; - let Op1 = SysReg{13-11}; - let CRn = SysReg{10-7}; - let CRm = SysReg{6-3}; - let Op2 = SysReg{2-0}; -} - -def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm), - "msr\t$PState, $CRm", [], NoItinerary> { - bits<6> PState; - - let Op0 = 0b00; - let Op1 = PState{5-3}; - let CRn = 0b0100; - let Op2 = PState{2-0}; - let Rt = 0b11111; -} - -//===----------------------------------------------------------------------===// -// Test & branch (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: TBZ, TBNZ - -// The bit to test is a simple unsigned 6-bit immediate in the X-register -// versions. -def uimm6 : Operand { - let ParserMatchClass = uimm6_asmoperand; -} - -def label_wid14_scal4_asmoperand : label_asmoperand<14, 4>; - -def tbimm_target : Operand { - let EncoderMethod = "getLabelOpValue"; - - // This label is a 14-bit offset from PC, scaled by the instruction-width: 4. - let PrintMethod = "printLabelOperand<14, 4>"; - let ParserMatchClass = label_wid14_scal4_asmoperand; - - let OperandType = "OPERAND_PCREL"; -} - -def A64eq : ImmLeaf; -def A64ne : ImmLeaf; - -// These instructions correspond to patterns involving "and" with a power of -// two, which we need to be able to select. -def tstb64_pat : ComplexPattern">; -def tstb32_pat : ComplexPattern">; - -let isBranch = 1, isTerminator = 1 in { - def TBZxii : A64I_TBimm<0b0, (outs), - (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label), - "tbz\t$Rt, $Imm, $Label", - [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0), - A64eq, bb:$Label)], - NoItinerary>, - Sched<[WriteBr]>; - - def TBNZxii : A64I_TBimm<0b1, (outs), - (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label), - "tbnz\t$Rt, $Imm, $Label", - [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0), - A64ne, bb:$Label)], - NoItinerary>, - Sched<[WriteBr]>; - - - // Note, these instructions overlap with the above 64-bit patterns. This is - // intentional, "tbz x3, #1, somewhere" and "tbz w3, #1, somewhere" would both - // do the same thing and are both permitted assembly. They also both have - // sensible DAG patterns. - def TBZwii : A64I_TBimm<0b0, (outs), - (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label), - "tbz\t$Rt, $Imm, $Label", - [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0), - A64eq, bb:$Label)], - NoItinerary>, - Sched<[WriteBr]> { - let Imm{5} = 0b0; - } - - def TBNZwii : A64I_TBimm<0b1, (outs), - (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label), - "tbnz\t$Rt, $Imm, $Label", - [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0), - A64ne, bb:$Label)], - NoItinerary>, - Sched<[WriteBr]> { - let Imm{5} = 0b0; - } -} - -//===----------------------------------------------------------------------===// -// Unconditional branch (immediate) instructions -//===----------------------------------------------------------------------===// -// Contains: B, BL - -def label_wid26_scal4_asmoperand : label_asmoperand<26, 4>; - -def bimm_target : Operand { - let EncoderMethod = "getLabelOpValue"; - - // This label is a 26-bit offset from PC, scaled by the instruction-width: 4. - let PrintMethod = "printLabelOperand<26, 4>"; - let ParserMatchClass = label_wid26_scal4_asmoperand; - - let OperandType = "OPERAND_PCREL"; -} - -def blimm_target : Operand { - let EncoderMethod = "getLabelOpValue"; - - // This label is a 26-bit offset from PC, scaled by the instruction-width: 4. - let PrintMethod = "printLabelOperand<26, 4>"; - let ParserMatchClass = label_wid26_scal4_asmoperand; - - let OperandType = "OPERAND_PCREL"; -} - -class A64I_BimmImpl patterns, Operand lbl_type> - : A64I_Bimm, - Sched<[WriteBr]>; - -let isBranch = 1 in { - def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> { - let isTerminator = 1; - let isBarrier = 1; - } - - let SchedRW = [WriteBrL] in { - def BLimm : A64I_BimmImpl<0b1, "bl", - [(AArch64Call tglobaladdr:$Label)], blimm_target> { - let isCall = 1; - let Defs = [X30]; - } - } -} - -def : Pat<(AArch64Call texternalsym:$Label), (BLimm texternalsym:$Label)>; - -//===----------------------------------------------------------------------===// -// Unconditional branch (register) instructions -//===----------------------------------------------------------------------===// -// Contains: BR, BLR, RET, ERET, DRP. - -// Most of the notional opcode fields in the A64I_Breg format are fixed in A64 -// at the moment. -class A64I_BregImpl opc, - dag outs, dag ins, string asmstr, list patterns, - InstrItinClass itin = NoItinerary> - : A64I_Breg, - Sched<[WriteBr]> { - let isBranch = 1; - let isIndirectBranch = 1; -} - -// Note that these are not marked isCall or isReturn because as far as LLVM is -// concerned they're not. "ret" is just another jump unless it has been selected -// by LLVM as the function's return. - -let isBranch = 1 in { - def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn), - "br\t$Rn", [(brind i64:$Rn)]> { - let isBarrier = 1; - let isTerminator = 1; - } - - let SchedRW = [WriteBrL] in { - def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn), - "blr\t$Rn", [(AArch64Call i64:$Rn)]> { - let isBarrier = 0; - let isCall = 1; - let Defs = [X30]; - } - } - - def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn), - "ret\t$Rn", []> { - let isBarrier = 1; - let isTerminator = 1; - let isReturn = 1; - } - - // Create a separate pseudo-instruction for codegen to use so that we don't - // flag x30 as used in every function. It'll be restored before the RET by the - // epilogue if it's legitimately used. - def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))> { - let isTerminator = 1; - let isBarrier = 1; - let isReturn = 1; - } - - def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []> { - let Rn = 0b11111; - let isBarrier = 1; - let isTerminator = 1; - let isReturn = 1; - } - - def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []> { - let Rn = 0b11111; - let isBarrier = 1; - } -} - -def RETAlias : InstAlias<"ret", (RETx X30)>; - - -//===----------------------------------------------------------------------===// -// Address generation patterns -//===----------------------------------------------------------------------===// - -// Primary method of address generation for the small/absolute memory model is -// an ADRP/ADR pair: -// ADRP x0, some_variable -// ADD x0, x0, #:lo12:some_variable -// -// The load/store elision of the ADD is accomplished when selecting -// addressing-modes. This just mops up the cases where that doesn't work and we -// really need an address in some register. - -// This wrapper applies a LO12 modifier to the address. Otherwise we could just -// use the same address. - -class ADRP_ADD - : Pat<(Wrapper addrop:$Hi, addrop:$Lo12, (i32 imm)), - (ADDxxi_lsl0_s (ADRPxi addrop:$Hi), addrop:$Lo12)>; - -def : ADRP_ADD; -def : ADRP_ADD; -def : ADRP_ADD; -def : ADRP_ADD; -def : ADRP_ADD; -def : ADRP_ADD; - -//===----------------------------------------------------------------------===// -// GOT access patterns -//===----------------------------------------------------------------------===// - -class GOTLoadSmall - : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)), - (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>; - -def : GOTLoadSmall; -def : GOTLoadSmall; -def : GOTLoadSmall; - -//===----------------------------------------------------------------------===// -// Tail call handling -//===----------------------------------------------------------------------===// - -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in { - def TC_RETURNdi - : PseudoInst<(outs), (ins i64imm:$dst, i32imm:$FPDiff), - [(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff))]>; - - def TC_RETURNxi - : PseudoInst<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), - [(AArch64tcret i64:$dst, (i32 timm:$FPDiff))]>; -} - -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, - Uses = [XSP] in { - def TAIL_Bimm : A64PseudoExpand<(outs), (ins bimm_target:$Label), [], - (Bimm bimm_target:$Label)>; - - def TAIL_BRx : A64PseudoExpand<(outs), (ins tcGPR64:$Rd), [], - (BRx GPR64:$Rd)>; -} - +let Predicates = [IsLE] in { +def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; + +def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +} +let Predicates = [IsBE] in { +def : Pat<(v8i8 (bitconvert GPR64:$Xn)), + (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; +def : Pat<(v4i16 (bitconvert GPR64:$Xn)), + (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; +def : Pat<(v2i32 (bitconvert GPR64:$Xn)), + (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; +def : Pat<(v2f32 (bitconvert GPR64:$Xn)), + (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>; + +def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), + (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; +def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), + (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; +def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), + (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; +def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), + (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>; +} +def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), + (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), + (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; + +def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), + (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>; +def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), + (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>; +def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), + (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), + (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>; +def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; + +let Predicates = [IsLE] in { +def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), + (v1i64 (REV64v2i32 FPR64:$src))>; +def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), + (v1i64 (REV64v4i16 FPR64:$src))>; +def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), + (v1i64 (REV64v8i8 FPR64:$src))>; +def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), + (v1i64 (REV64v2i32 FPR64:$src))>; +} +def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), + (v2i32 (REV64v2i32 FPR64:$src))>; +def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), + (v2i32 (REV32v4i16 FPR64:$src))>; +def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), + (v2i32 (REV32v8i8 FPR64:$src))>; +def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), + (v2i32 (REV64v2i32 FPR64:$src))>; +def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), + (v2i32 (REV64v2i32 FPR64:$src))>; +} +def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), + (v4i16 (REV64v4i16 FPR64:$src))>; +def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), + (v4i16 (REV32v4i16 FPR64:$src))>; +def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), + (v4i16 (REV16v8i8 FPR64:$src))>; +def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), + (v4i16 (REV64v4i16 FPR64:$src))>; +def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), + (v4i16 (REV32v4i16 FPR64:$src))>; +def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), + (v4i16 (REV64v4i16 FPR64:$src))>; +} + +let Predicates = [IsLE] in { +def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), + (v8i8 (REV64v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), + (v8i8 (REV32v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), + (v8i8 (REV16v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), + (v8i8 (REV64v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), + (v8i8 (REV32v8i8 FPR64:$src))>; +def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), + (v8i8 (REV64v8i8 FPR64:$src))>; +} + +let Predicates = [IsLE] in { +def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), + (f64 (REV64v2i32 FPR64:$src))>; +def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), + (f64 (REV64v4i16 FPR64:$src))>; +def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), + (f64 (REV64v2i32 FPR64:$src))>; +def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), + (f64 (REV64v8i8 FPR64:$src))>; +} +def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>; +def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>; +def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; +def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), + (v1f64 (REV64v2i32 FPR64:$src))>; +def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), + (v1f64 (REV64v4i16 FPR64:$src))>; +def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), + (v1f64 (REV64v8i8 FPR64:$src))>; +def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), + (v1f64 (REV64v2i32 FPR64:$src))>; +} +def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; +def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), + (v2f32 (REV64v2i32 FPR64:$src))>; +def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), + (v2f32 (REV32v4i16 FPR64:$src))>; +def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), + (v2f32 (REV32v8i8 FPR64:$src))>; +def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), + (v2f32 (REV64v2i32 FPR64:$src))>; +def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), + (v2f32 (REV64v2i32 FPR64:$src))>; +} +def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; +def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), + (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; +def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), + (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), + (REV64v4i32 FPR128:$src), (i32 8)))>; +def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), + (f128 (EXTv16i8 (REV64v8i16 FPR128:$src), + (REV64v8i16 FPR128:$src), (i32 8)))>; +def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), + (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>; +def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), + (f128 (EXTv16i8 (REV64v4i32 FPR128:$src), + (REV64v4i32 FPR128:$src), (i32 8)))>; +def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), + (f128 (EXTv16i8 (REV64v16i8 FPR128:$src), + (REV64v16i8 FPR128:$src), (i32 8)))>; +} + +let Predicates = [IsLE] in { +def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), + (v2f64 (EXTv16i8 FPR128:$src, + FPR128:$src, (i32 8)))>; +def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), + (v2f64 (REV64v4i32 FPR128:$src))>; +def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), + (v2f64 (REV64v8i16 FPR128:$src))>; +def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), + (v2f64 (REV64v16i8 FPR128:$src))>; +def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), + (v2f64 (REV64v4i32 FPR128:$src))>; +} +def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), + (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src), + (REV64v4i32 FPR128:$src), (i32 8)))>; +def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), + (v4f32 (REV32v8i16 FPR128:$src))>; +def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), + (v4f32 (REV32v16i8 FPR128:$src))>; +def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), + (v4f32 (REV64v4i32 FPR128:$src))>; +def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), + (v4f32 (REV64v4i32 FPR128:$src))>; +} +def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), + (v2i64 (EXTv16i8 FPR128:$src, + FPR128:$src, (i32 8)))>; +def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), + (v2i64 (REV64v4i32 FPR128:$src))>; +def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), + (v2i64 (REV64v8i16 FPR128:$src))>; +def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), + (v2i64 (REV64v16i8 FPR128:$src))>; +def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), + (v2i64 (REV64v4i32 FPR128:$src))>; +} +def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), + (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src), + (REV64v4i32 FPR128:$src), + (i32 8)))>; +def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), + (v4i32 (REV64v4i32 FPR128:$src))>; +def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), + (v4i32 (REV32v8i16 FPR128:$src))>; +def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), + (v4i32 (REV32v16i8 FPR128:$src))>; +def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), + (v4i32 (REV64v4i32 FPR128:$src))>; +} +def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; + +let Predicates = [IsLE] in { +def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), + (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src), + (REV64v8i16 FPR128:$src), + (i32 8)))>; +def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), + (v8i16 (REV64v8i16 FPR128:$src))>; +def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), + (v8i16 (REV32v8i16 FPR128:$src))>; +def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), + (v8i16 (REV16v16i8 FPR128:$src))>; +def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), + (v8i16 (REV64v8i16 FPR128:$src))>; +def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), + (v8i16 (REV32v8i16 FPR128:$src))>; +} + +let Predicates = [IsLE] in { +def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; +} +let Predicates = [IsBE] in { +def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), + (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src), + (REV64v16i8 FPR128:$src), + (i32 8)))>; +def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), + (v16i8 (REV64v16i8 FPR128:$src))>; +def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), + (v16i8 (REV32v16i8 FPR128:$src))>; +def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), + (v16i8 (REV16v16i8 FPR128:$src))>; +def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), + (v16i8 (REV64v16i8 FPR128:$src))>; +def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), + (v16i8 (REV32v16i8 FPR128:$src))>; +} + +def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), + (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; +def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), + (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; +def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))), + (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; +def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), + (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; + +// A 64-bit subvector insert to the first 128-bit vector position +// is a subregister copy that needs no instruction. +def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (i32 0)), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; +def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (i32 0)), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; +def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (i32 0)), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; +def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (i32 0)), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; +def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (i32 0)), + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; +def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (i32 0)), + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; + +// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 +// or v2f32. +def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), + (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), + (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; +def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), + (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), + (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; + // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, + // so we match on v4f32 here, not v2f32. This will also catch adding + // the low two lanes of a true v4f32 vector. +def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), + (vector_extract (v4f32 FPR128:$Rn), (i64 1))), + (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; + +// Scalar 64-bit shifts in FPR64 registers. +def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), + (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), + (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), + (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; +def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), + (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; + +// Tail call return handling. These are all compiler pseudo-instructions, +// so no encoding information or anything like that. +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { + def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff),[]>; + def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>; +} + +def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)), + (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>; +def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)), + (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)), - (TC_RETURNdi texternalsym:$dst, imm:$FPDiff)>; - -//===----------------------------------------------------------------------===// -// Thread local storage -//===----------------------------------------------------------------------===// - -// This is a pseudo-instruction representing the ".tlsdesccall" directive in -// assembly. Its effect is to insert an R_AARCH64_TLSDESC_CALL relocation at the -// current location. It should always be immediately followed by a BLR -// instruction, and is intended solely for relaxation by the linker. - -def : Pat<(A64threadpointer), (MRSxi 0xde82)>; - -def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> { - let hasSideEffects = 1; -} - -def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var), - [(A64tlsdesc_blr i64:$Rn, tglobaltlsaddr:$Var)]> { - let isCall = 1; - let Defs = [X30]; -} - -def : Pat<(A64tlsdesc_blr i64:$Rn, texternalsym:$Var), - (TLSDESC_BLRx $Rn, texternalsym:$Var)>; - -//===----------------------------------------------------------------------===// -// Bitfield patterns -//===----------------------------------------------------------------------===// - -def bfi32_lsb_to_immr : SDNodeXFormgetTargetConstant((32 - N->getZExtValue()) % 32, MVT::i64); -}]>; - -def bfi64_lsb_to_immr : SDNodeXFormgetTargetConstant((64 - N->getZExtValue()) % 64, MVT::i64); -}]>; - -def bfi_width_to_imms : SDNodeXFormgetTargetConstant(N->getZExtValue() - 1, MVT::i64); -}]>; - - -// The simpler patterns deal with cases where no AND mask is actually needed -// (either all bits are used or the low 32 bits are used). -let AddedComplexity = 10 in { - -def : Pat<(A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS), - (BFIxxii $src, $Rn, - (bfi64_lsb_to_immr (i64 imm:$ImmR)), - (bfi_width_to_imms (i64 imm:$ImmS)))>; - -def : Pat<(A64Bfi i32:$src, i32:$Rn, imm:$ImmR, imm:$ImmS), - (BFIwwii $src, $Rn, - (bfi32_lsb_to_immr (i64 imm:$ImmR)), - (bfi_width_to_imms (i64 imm:$ImmS)))>; - - -def : Pat<(and (A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS), - (i64 4294967295)), - (SUBREG_TO_REG (i64 0), - (BFIwwii (EXTRACT_SUBREG $src, sub_32), - (EXTRACT_SUBREG $Rn, sub_32), - (bfi32_lsb_to_immr (i64 imm:$ImmR)), - (bfi_width_to_imms (i64 imm:$ImmS))), - sub_32)>; - -} - -//===----------------------------------------------------------------------===// -// Miscellaneous patterns -//===----------------------------------------------------------------------===// - -// Truncation from 64 to 32-bits just involves renaming your register. -def : Pat<(i32 (trunc i64:$val)), (EXTRACT_SUBREG $val, sub_32)>; - -// Similarly, extension where we don't care about the high bits is -// just a rename. -def : Pat<(i64 (anyext i32:$val)), - (INSERT_SUBREG (IMPLICIT_DEF), $val, sub_32)>; - -// SELECT instructions providing f128 types need to be handled by a -// pseudo-instruction since the eventual code will need to introduce basic -// blocks and control flow. -def F128CSEL : PseudoInst<(outs FPR128:$Rd), - (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond), - [(set f128:$Rd, (simple_select f128:$Rn, f128:$Rm))]> { - let Uses = [NZCV]; - let usesCustomInserter = 1; -} - -//===----------------------------------------------------------------------===// -// Load/store patterns -//===----------------------------------------------------------------------===// - -// There are lots of patterns here, because we need to allow at least three -// parameters to vary independently. -// 1. Instruction: "ldrb w9, [sp]", "ldrh w9, [sp]", ... -// 2. LLVM source: zextloadi8, anyextloadi8, ... -// 3. Address-generation: A64Wrapper, (add BASE, OFFSET), ... -// -// The biggest problem turns out to be the address-generation variable. At the -// point of instantiation we need to produce two DAGs, one for the pattern and -// one for the instruction. Doing this at the lowest level of classes doesn't -// work. -// -// Consider the simple uimm12 addressing mode, and the desire to match both (add -// GPR64xsp:$Rn, uimm12:$Offset) and GPR64xsp:$Rn, particularly on the -// instruction side. We'd need to insert either "GPR64xsp" and "uimm12" or -// "GPR64xsp" and "0" into an unknown dag. !subst is not capable of this -// operation, and PatFrags are for selection not output. -// -// As a result, the address-generation patterns are the final -// instantiations. However, we do still need to vary the operand for the address -// further down (At the point we're deciding A64WrapperSmall, we don't know -// the memory width of the operation). - -//===------------------------------ -// 1. Basic infrastructural defs -//===------------------------------ - -// First, some simple classes for !foreach and !subst to use: -class Decls { - dag pattern; -} - -def decls : Decls; -def ALIGN; -def INST; -def OFFSET; -def SHIFT; - -// You can't use !subst on an actual immediate, but you *can* use it on an -// operand record that happens to match a single immediate. So we do. -def imm_eq0 : ImmLeaf; -def imm_eq1 : ImmLeaf; -def imm_eq2 : ImmLeaf; -def imm_eq3 : ImmLeaf; -def imm_eq4 : ImmLeaf; - -// If the low bits of a pointer are known to be 0 then an "or" is just as good -// as addition for computing an offset. This fragment forwards that check for -// TableGen's use. -def add_like_or : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs), -[{ - return CurDAG->isBaseWithConstantOffset(SDValue(N, 0)); -}]>; - -// Load/store (unsigned immediate) operations with relocations against global -// symbols (for lo12) are only valid if those symbols have correct alignment -// (since the immediate offset is divided by the access scale, it can't have a -// remainder). -// -// The guaranteed alignment is provided as part of the WrapperSmall -// operation, and checked against one of these. -def any_align : ImmLeaf; -def min_align2 : ImmLeaf= 2; }]>; -def min_align4 : ImmLeaf= 4; }]>; -def min_align8 : ImmLeaf= 8; }]>; -def min_align16 : ImmLeaf= 16; }]>; - -// "Normal" load/store instructions can be used on atomic operations, provided -// the ordering parameter is at most "monotonic". Anything above that needs -// special handling with acquire/release instructions. -class simple_load - : PatFrag<(ops node:$ptr), (base node:$ptr), [{ - return cast(N)->getOrdering() <= Monotonic; -}]>; - -def atomic_load_simple_i8 : simple_load; -def atomic_load_simple_i16 : simple_load; -def atomic_load_simple_i32 : simple_load; -def atomic_load_simple_i64 : simple_load; - -class simple_store - : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ - return cast(N)->getOrdering() <= Monotonic; -}]>; - -def atomic_store_simple_i8 : simple_store; -def atomic_store_simple_i16 : simple_store; -def atomic_store_simple_i32 : simple_store; -def atomic_store_simple_i64 : simple_store; - -//===------------------------------ -// 2. UImm12 and SImm9 -//===------------------------------ - -// These instructions have two operands providing the address so they can be -// treated similarly for most purposes. - -//===------------------------------ -// 2.1 Base patterns covering extend/truncate semantics -//===------------------------------ - -// Atomic patterns can be shared between integer operations of all sizes, a -// quick multiclass here allows reuse. -multiclass ls_atomic_pats { - def : Pat<(!cast("atomic_load_simple_" # sty) address), - (LOAD Base, Offset)>; - - def : Pat<(!cast("atomic_store_simple_" # sty) address, transty:$Rt), - (STORE $Rt, Base, Offset)>; -} - -// Instructions accessing a memory chunk smaller than a register (or, in a -// pinch, the same size) have a characteristic set of patterns they want to -// match: extending loads and truncating stores. This class deals with the -// sign-neutral version of those patterns. -// -// It will be instantiated across multiple addressing-modes. -multiclass ls_small_pats - : ls_atomic_pats { - def : Pat<(!cast(zextload # sty) address), (LOAD Base, Offset)>; - - def : Pat<(!cast(extload # sty) address), (LOAD Base, Offset)>; - - // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit - // register was actually set. - def : Pat<(i64 (!cast(zextload # sty) address)), - (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>; - - def : Pat<(i64 (!cast(extload # sty) address)), - (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>; - - def : Pat<(!cast(truncstore # sty) i32:$Rt, address), - (STORE $Rt, Base, Offset)>; - - // For truncating store from 64-bits, we have to manually tell LLVM to - // ignore the high bits of the x register. - def : Pat<(!cast(truncstore # sty) i64:$Rt, address), - (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>; -} - -// Next come patterns for sign-extending loads. -multiclass load_signed_pats { - def : Pat<(i32 (!cast("sextload" # sty) address)), - (!cast("LDRS" # T # "w" # U) Base, Offset)>; - - def : Pat<(i64 (!cast("sextload" # sty) address)), - (!cast("LDRS" # T # "x" # U) Base, Offset)>; - -} - -// and finally "natural-width" loads and stores come next. -multiclass ls_neutral_pats { - def : Pat<(sty (load address)), (LOAD Base, Offset)>; - def : Pat<(store sty:$Rt, address), (STORE $Rt, Base, Offset)>; -} - -// Integer operations also get atomic instructions to select for. -multiclass ls_int_neutral_pats - : ls_neutral_pats, - ls_atomic_pats; - -//===------------------------------ -// 2.2. Addressing-mode instantiations -//===------------------------------ - -multiclass uimm12_pats { - defm : ls_small_pats; - defm : ls_small_pats; - defm : ls_small_pats; - - defm : ls_int_neutral_pats; - - defm : ls_int_neutral_pats; - - defm : ls_neutral_pats; - - defm : ls_neutral_pats; - - defm : ls_neutral_pats; - - defm : ls_neutral_pats; - - defm : load_signed_pats<"B", "", Base, - !foreach(decls.pattern, Offset, - !subst(OFFSET, byte_uimm12, decls.pattern)), - !foreach(decls.pattern, address, - !subst(OFFSET, byte_uimm12, - !subst(ALIGN, any_align, decls.pattern))), - i8>; - - defm : load_signed_pats<"H", "", Base, - !foreach(decls.pattern, Offset, - !subst(OFFSET, hword_uimm12, decls.pattern)), - !foreach(decls.pattern, address, - !subst(OFFSET, hword_uimm12, - !subst(ALIGN, min_align2, decls.pattern))), - i16>; - - def : Pat<(sextloadi32 !foreach(decls.pattern, address, - !subst(OFFSET, word_uimm12, - !subst(ALIGN, min_align4, decls.pattern)))), - (LDRSWx Base, !foreach(decls.pattern, Offset, - !subst(OFFSET, word_uimm12, decls.pattern)))>; -} - -// Straightforward patterns of last resort: a pointer with or without an -// appropriate offset. -defm : uimm12_pats<(i64 i64:$Rn), (i64 i64:$Rn), (i64 0)>; -defm : uimm12_pats<(add i64:$Rn, OFFSET:$UImm12), - (i64 i64:$Rn), (i64 OFFSET:$UImm12)>; - -// The offset could be hidden behind an "or", of course: -defm : uimm12_pats<(add_like_or i64:$Rn, OFFSET:$UImm12), - (i64 i64:$Rn), (i64 OFFSET:$UImm12)>; - -// Global addresses under the small-absolute model should use these -// instructions. There are ELF relocations specifically for it. -defm : uimm12_pats<(A64WrapperSmall tglobaladdr:$Hi, tglobaladdr:$Lo12, ALIGN), - (ADRPxi tglobaladdr:$Hi), (i64 tglobaladdr:$Lo12)>; - -defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12, - ALIGN), - (ADRPxi tglobaltlsaddr:$Hi), (i64 tglobaltlsaddr:$Lo12)>; - -// External symbols that make it this far should also get standard relocations. -defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12, - ALIGN), - (ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>; - -defm : uimm12_pats<(A64WrapperSmall tconstpool:$Hi, tconstpool:$Lo12, ALIGN), - (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>; - -// We also want to use uimm12 instructions for local variables at the moment. -def tframeindex_XFORM : SDNodeXForm(N)->getIndex(); - return CurDAG->getTargetFrameIndex(FI, MVT::i64); -}]>; - -defm : uimm12_pats<(i64 frameindex:$Rn), - (tframeindex_XFORM tframeindex:$Rn), (i64 0)>; - -// These can be much simpler than uimm12 because we don't to change the operand -// type (e.g. LDURB and LDURH take the same operands). -multiclass simm9_pats { - defm : ls_small_pats; - defm : ls_small_pats; - - defm : ls_int_neutral_pats; - defm : ls_int_neutral_pats; - - defm : ls_neutral_pats; - defm : ls_neutral_pats; - defm : ls_neutral_pats; - defm : ls_neutral_pats; - - def : Pat<(i64 (zextloadi32 address)), - (SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>; - - def : Pat<(truncstorei32 i64:$Rt, address), - (LS32_STUR (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>; - - defm : load_signed_pats<"B", "_U", Base, Offset, address, i8>; - defm : load_signed_pats<"H", "_U", Base, Offset, address, i16>; - def : Pat<(sextloadi32 address), (LDURSWx Base, Offset)>; -} - -defm : simm9_pats<(add i64:$Rn, simm9:$SImm9), - (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>; - -defm : simm9_pats<(add_like_or i64:$Rn, simm9:$SImm9), - (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>; - - -//===------------------------------ -// 3. Register offset patterns -//===------------------------------ - -// Atomic patterns can be shared between integer operations of all sizes, a -// quick multiclass here allows reuse. -multiclass ro_atomic_pats { - def : Pat<(!cast("atomic_load_simple_" # sty) address), - (LOAD Base, Offset, Extend)>; - - def : Pat<(!cast("atomic_store_simple_" # sty) address, transty:$Rt), - (STORE $Rt, Base, Offset, Extend)>; -} - -// The register offset instructions take three operands giving the instruction, -// and have an annoying split between instructions where Rm is 32-bit and -// 64-bit. So we need a special hierarchy to describe them. Other than that the -// same operations should be supported as for simm9 and uimm12 addressing. - -multiclass ro_small_pats - : ro_atomic_pats { - def : Pat<(!cast(zextload # sty) address), - (LOAD Base, Offset, Extend)>; - - def : Pat<(!cast(extload # sty) address), - (LOAD Base, Offset, Extend)>; - - // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit - // register was actually set. - def : Pat<(i64 (!cast(zextload # sty) address)), - (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>; - - def : Pat<(i64 (!cast(extload # sty) address)), - (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>; - - def : Pat<(!cast(truncstore # sty) i32:$Rt, address), - (STORE $Rt, Base, Offset, Extend)>; - - // For truncating store from 64-bits, we have to manually tell LLVM to - // ignore the high bits of the x register. - def : Pat<(!cast(truncstore # sty) i64:$Rt, address), - (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset, Extend)>; - -} - -// Next come patterns for sign-extending loads. -multiclass ro_signed_pats { - def : Pat<(i32 (!cast("sextload" # sty) address)), - (!cast("LDRS" # T # "w_" # Rm # "_RegOffset") - Base, Offset, Extend)>; - - def : Pat<(i64 (!cast("sextload" # sty) address)), - (!cast("LDRS" # T # "x_" # Rm # "_RegOffset") - Base, Offset, Extend)>; -} - -// and finally "natural-width" loads and stores come next. -multiclass ro_neutral_pats { - def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>; - def : Pat<(store sty:$Rt, address), - (STORE $Rt, Base, Offset, Extend)>; -} - -multiclass ro_int_neutral_pats - : ro_neutral_pats, - ro_atomic_pats; - -multiclass regoff_pats { - defm : ro_small_pats("LS8_" # Rm # "_RegOffset_LDR"), - !cast("LS8_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq0, decls.pattern)), - i8>; - defm : ro_small_pats("LS16_" # Rm # "_RegOffset_LDR"), - !cast("LS16_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq1, decls.pattern)), - i16>; - defm : ro_small_pats("LS32_" # Rm # "_RegOffset_LDR"), - !cast("LS32_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq2, decls.pattern)), - i32>; - - defm : ro_int_neutral_pats< - !cast("LS32_" # Rm # "_RegOffset_LDR"), - !cast("LS32_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq2, decls.pattern)), - i32>; - - defm : ro_int_neutral_pats< - !cast("LS64_" # Rm # "_RegOffset_LDR"), - !cast("LS64_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq3, decls.pattern)), - i64>; - - defm : ro_neutral_pats("LSFP16_" # Rm # "_RegOffset_LDR"), - !cast("LSFP16_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq1, decls.pattern)), - f16>; - - defm : ro_neutral_pats("LSFP32_" # Rm # "_RegOffset_LDR"), - !cast("LSFP32_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq2, decls.pattern)), - f32>; - - defm : ro_neutral_pats("LSFP64_" # Rm # "_RegOffset_LDR"), - !cast("LSFP64_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq3, decls.pattern)), - f64>; - - defm : ro_neutral_pats("LSFP128_" # Rm # "_RegOffset_LDR"), - !cast("LSFP128_" # Rm # "_RegOffset_STR"), - Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq4, decls.pattern)), - f128>; - - defm : ro_signed_pats<"B", Rm, Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq0, decls.pattern)), - i8>; - - defm : ro_signed_pats<"H", Rm, Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq1, decls.pattern)), - i16>; - - def : Pat<(sextloadi32 !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq2, decls.pattern))), - (!cast("LDRSWx_" # Rm # "_RegOffset") - Base, Offset, Extend)>; -} - - -// Finally we're in a position to tell LLVM exactly what addresses are reachable -// using register-offset instructions. Essentially a base plus a possibly -// extended, possibly shifted (by access size) offset. - -defm : regoff_pats<"Wm", (add i64:$Rn, (sext i32:$Rm)), - (i64 i64:$Rn), (i32 i32:$Rm), (i64 6)>; - -defm : regoff_pats<"Wm", (add i64:$Rn, (shl (sext i32:$Rm), SHIFT)), - (i64 i64:$Rn), (i32 i32:$Rm), (i64 7)>; - -defm : regoff_pats<"Wm", (add i64:$Rn, (zext i32:$Rm)), - (i64 i64:$Rn), (i32 i32:$Rm), (i64 2)>; - -defm : regoff_pats<"Wm", (add i64:$Rn, (shl (zext i32:$Rm), SHIFT)), - (i64 i64:$Rn), (i32 i32:$Rm), (i64 3)>; - -defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm), - (i64 i64:$Rn), (i64 i64:$Rm), (i64 2)>; - -defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)), - (i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>; - -//===----------------------------------------------------------------------===// -// Advanced SIMD (NEON) Support -// + (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>; -include "AArch64InstrNEON.td" +include "AArch64InstrAtomics.td" diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td deleted file mode 100644 index 0b97e3b..0000000 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ /dev/null @@ -1,9476 +0,0 @@ -//===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the AArch64 NEON instruction set. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// NEON-specific DAG Nodes. -//===----------------------------------------------------------------------===// - -// (outs Result), (ins Imm, OpCmode) -def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>; - -def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>; - -def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>; - -// (outs Result), (ins Imm) -def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1, - [SDTCisVec<0>, SDTCisVT<1, i32>]>>; - -// (outs Result), (ins LHS, RHS, CondCode) -def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3, - [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>; - -// (outs Result), (ins LHS, 0/0.0 constant, CondCode) -def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3, - [SDTCisVec<0>, SDTCisVec<1>]>>; - -// (outs Result), (ins LHS, RHS) -def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2, - [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>; - -def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, - SDTCisVT<2, i32>]>; -def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>; -def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>; - -def SDTPERMUTE : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>]>; -def Neon_uzp1 : SDNode<"AArch64ISD::NEON_UZP1", SDTPERMUTE>; -def Neon_uzp2 : SDNode<"AArch64ISD::NEON_UZP2", SDTPERMUTE>; -def Neon_zip1 : SDNode<"AArch64ISD::NEON_ZIP1", SDTPERMUTE>; -def Neon_zip2 : SDNode<"AArch64ISD::NEON_ZIP2", SDTPERMUTE>; -def Neon_trn1 : SDNode<"AArch64ISD::NEON_TRN1", SDTPERMUTE>; -def Neon_trn2 : SDNode<"AArch64ISD::NEON_TRN2", SDTPERMUTE>; - -def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; -def Neon_rev64 : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>; -def Neon_rev32 : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>; -def Neon_rev16 : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>; -def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1, - [SDTCisVec<0>]>>; -def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2, - [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>; -def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3, - [SDTCisVec<0>, SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>; - -//===----------------------------------------------------------------------===// -// Addressing-mode instantiations -//===----------------------------------------------------------------------===// - -multiclass ls_64_pats { -defm : ls_neutral_pats; -} - -multiclass ls_128_pats { -defm : ls_neutral_pats; -} - -multiclass uimm12_neon_pats { - defm : ls_64_pats; - defm : ls_64_pats; - defm : ls_64_pats; - defm : ls_64_pats; - defm : ls_64_pats; - defm : ls_64_pats; - - defm : ls_128_pats; - defm : ls_128_pats; - defm : ls_128_pats; - defm : ls_128_pats; - defm : ls_128_pats; - defm : ls_128_pats; -} - -defm : uimm12_neon_pats<(A64WrapperSmall - tconstpool:$Hi, tconstpool:$Lo12, ALIGN), - (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>; - -//===----------------------------------------------------------------------===// -// Multiclasses -//===----------------------------------------------------------------------===// - -multiclass NeonI_3VSame_B_sizes size, bits<5> opcode, - string asmop, SDPatternOperator opnode8B, - SDPatternOperator opnode16B, - bit Commutable = 0> { - let isCommutable = Commutable in { - def _8B : NeonI_3VSame<0b0, u, size, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), - asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b", - [(set (v8i8 VPR64:$Rd), - (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _16B : NeonI_3VSame<0b1, u, size, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b", - [(set (v16i8 VPR128:$Rd), - (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } - -} - -multiclass NeonI_3VSame_HS_sizes opcode, - string asmop, SDPatternOperator opnode, - bit Commutable = 0> { - let isCommutable = Commutable in { - def _4H : NeonI_3VSame<0b0, u, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), - asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h", - [(set (v4i16 VPR64:$Rd), - (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _8H : NeonI_3VSame<0b1, u, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h", - [(set (v8i16 VPR128:$Rd), - (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _2S : NeonI_3VSame<0b0, u, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), - asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s", - [(set (v2i32 VPR64:$Rd), - (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _4S : NeonI_3VSame<0b1, u, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", - [(set (v4i32 VPR128:$Rd), - (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} -multiclass NeonI_3VSame_BHS_sizes opcode, - string asmop, SDPatternOperator opnode, - bit Commutable = 0> - : NeonI_3VSame_HS_sizes { - let isCommutable = Commutable in { - def _8B : NeonI_3VSame<0b0, u, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), - asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b", - [(set (v8i8 VPR64:$Rd), - (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _16B : NeonI_3VSame<0b1, u, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b", - [(set (v16i8 VPR128:$Rd), - (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -multiclass NeonI_3VSame_BHSD_sizes opcode, - string asmop, SDPatternOperator opnode, - bit Commutable = 0> - : NeonI_3VSame_BHS_sizes { - let isCommutable = Commutable in { - def _2D : NeonI_3VSame<0b1, u, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d", - [(set (v2i64 VPR128:$Rd), - (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -// Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types, -// but Result types can be integer or floating point types. -multiclass NeonI_3VSame_SD_sizes opcode, - string asmop, SDPatternOperator opnode, - ValueType ResTy2S, ValueType ResTy4S, - ValueType ResTy2D, bit Commutable = 0> { - let isCommutable = Commutable in { - def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), - asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s", - [(set (ResTy2S VPR64:$Rd), - (ResTy2S (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", - [(set (ResTy4S VPR128:$Rd), - (ResTy4S (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d", - [(set (ResTy2D VPR128:$Rd), - (ResTy2D (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -//===----------------------------------------------------------------------===// -// Instruction Definitions -//===----------------------------------------------------------------------===// - -// Vector Arithmetic Instructions - -// Vector Add (Integer and Floating-Point) - -defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>; -defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, - v2f32, v4f32, v2f64, 1>; - -// Patterns to match add of v1i8/v1i16/v1i32 types -def : Pat<(v1i8 (add FPR8:$Rn, FPR8:$Rm)), - (EXTRACT_SUBREG - (ADDvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)), - sub_8)>; -def : Pat<(v1i16 (add FPR16:$Rn, FPR16:$Rm)), - (EXTRACT_SUBREG - (ADDvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)), - sub_16)>; -def : Pat<(v1i32 (add FPR32:$Rn, FPR32:$Rm)), - (EXTRACT_SUBREG - (ADDvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), - sub_32)>; - -// Vector Sub (Integer and Floating-Point) - -defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>; -defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, - v2f32, v4f32, v2f64, 0>; - -// Patterns to match sub of v1i8/v1i16/v1i32 types -def : Pat<(v1i8 (sub FPR8:$Rn, FPR8:$Rm)), - (EXTRACT_SUBREG - (SUBvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)), - sub_8)>; -def : Pat<(v1i16 (sub FPR16:$Rn, FPR16:$Rm)), - (EXTRACT_SUBREG - (SUBvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)), - sub_16)>; -def : Pat<(v1i32 (sub FPR32:$Rn, FPR32:$Rm)), - (EXTRACT_SUBREG - (SUBvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), - sub_32)>; - -// Vector Multiply (Integer and Floating-Point) - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>; -defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, - v2f32, v4f32, v2f64, 1>; -} - -// Patterns to match mul of v1i8/v1i16/v1i32 types -def : Pat<(v1i8 (mul FPR8:$Rn, FPR8:$Rm)), - (EXTRACT_SUBREG - (MULvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)), - sub_8)>; -def : Pat<(v1i16 (mul FPR16:$Rn, FPR16:$Rm)), - (EXTRACT_SUBREG - (MULvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)), - sub_16)>; -def : Pat<(v1i32 (mul FPR32:$Rn, FPR32:$Rm)), - (EXTRACT_SUBREG - (MULvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), - sub_32)>; - -// Vector Multiply (Polynomial) - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul", - int_arm_neon_vmulp, int_arm_neon_vmulp, 1>; -} - -// Vector Multiply-accumulate and Multiply-subtract (Integer) - -// class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and -// two operands constraints. -class NeonI_3VSame_Constraint_impl size, - bits<5> opcode, SDPatternOperator opnode> - : NeonI_3VSame, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (add node:$Ra, (mul node:$Rn, node:$Rm))>; - -def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (sub node:$Ra, (mul node:$Rn, node:$Rm))>; - - -let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC] in { -def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8, - 0b0, 0b0, 0b00, 0b10010, Neon_mla>; -def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8, - 0b1, 0b0, 0b00, 0b10010, Neon_mla>; -def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16, - 0b0, 0b0, 0b01, 0b10010, Neon_mla>; -def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16, - 0b1, 0b0, 0b01, 0b10010, Neon_mla>; -def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32, - 0b0, 0b0, 0b10, 0b10010, Neon_mla>; -def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32, - 0b1, 0b0, 0b10, 0b10010, Neon_mla>; - -def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8, - 0b0, 0b1, 0b00, 0b10010, Neon_mls>; -def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8, - 0b1, 0b1, 0b00, 0b10010, Neon_mls>; -def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16, - 0b0, 0b1, 0b01, 0b10010, Neon_mls>; -def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16, - 0b1, 0b1, 0b01, 0b10010, Neon_mls>; -def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32, - 0b0, 0b1, 0b10, 0b10010, Neon_mls>; -def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32, - 0b1, 0b1, 0b10, 0b10010, Neon_mls>; -} - -// Vector Multiply-accumulate and Multiply-subtract (Floating Point) - -def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (fadd node:$Ra, (fmul_su node:$Rn, node:$Rm))>; - -def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (fsub node:$Ra, (fmul_su node:$Rn, node:$Rm))>; - -let Predicates = [HasNEON, UseFusedMAC], - SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC] in { -def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32, - 0b0, 0b0, 0b00, 0b11001, Neon_fmla>; -def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32, - 0b1, 0b0, 0b00, 0b11001, Neon_fmla>; -def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64, - 0b1, 0b0, 0b01, 0b11001, Neon_fmla>; - -def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32, - 0b0, 0b0, 0b10, 0b11001, Neon_fmls>; -def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32, - 0b1, 0b0, 0b10, 0b11001, Neon_fmls>; -def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64, - 0b1, 0b0, 0b11, 0b11001, Neon_fmls>; -} - -// We're also allowed to match the fma instruction regardless of compile -// options. -def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)), - (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>; -def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)), - (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; -def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)), - (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; - -def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)), - (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>; -def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)), - (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; -def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)), - (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; - -// Vector Divide (Floating-Point) - -let SchedRW = [WriteFPDiv, ReadFPDiv, ReadFPDiv] in { -defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, - v2f32, v4f32, v2f64, 0>; -} - -// Vector Bitwise Operations - -// Vector Bitwise AND - -defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>; - -// Vector Bitwise Exclusive OR - -defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>; - -// Vector Bitwise OR - -defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>; - -// ORR disassembled as MOV if Vn==Vm - -// Vector Move - register -// Alias for ORR if Vn=Vm. -// FIXME: This is actually the preferred syntax but TableGen can't deal with -// custom printing of aliases. -def : NeonInstAlias<"mov $Rd.8b, $Rn.8b", - (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>; -def : NeonInstAlias<"mov $Rd.16b, $Rn.16b", - (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>; - -// The MOVI instruction takes two immediate operands. The first is the -// immediate encoding, while the second is the cmode. A cmode of 14, or -// 0b1110, produces a MOVI operation, rather than a MVNI, ORR, or BIC. -def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>; -def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>; - -def Neon_not8B : PatFrag<(ops node:$in), - (xor node:$in, (bitconvert (v8i8 Neon_AllOne)))>; -def Neon_not16B : PatFrag<(ops node:$in), - (xor node:$in, (bitconvert (v16i8 Neon_AllOne)))>; - -def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm), - (or node:$Rn, (Neon_not8B node:$Rm))>; - -def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm), - (or node:$Rn, (Neon_not16B node:$Rm))>; - -def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm), - (and node:$Rn, (Neon_not8B node:$Rm))>; - -def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm), - (and node:$Rn, (Neon_not16B node:$Rm))>; - - -// Vector Bitwise OR NOT - register - -defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn", - Neon_orn8B, Neon_orn16B, 0>; - -// Vector Bitwise Bit Clear (AND NOT) - register - -defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic", - Neon_bic8B, Neon_bic16B, 0>; - -multiclass Neon_bitwise2V_patterns { - def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$Rn, VPR128:$Rm)>; -} - -// Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN -defm : Neon_bitwise2V_patterns; -defm : Neon_bitwise2V_patterns; -defm : Neon_bitwise2V_patterns; -defm : Neon_bitwise2V_patterns; -defm : Neon_bitwise2V_patterns; - -// Vector Bitwise Select -def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8, - 0b0, 0b1, 0b01, 0b00011, vselect>; - -def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8, - 0b1, 0b1, 0b01, 0b00011, vselect>; - -multiclass Neon_bitwise3V_patterns { - // Disassociate type from instruction definition - def : Pat<(v8i8 (opnode (v8i8 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v2i32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v2f32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v4i16 (opnode (v4i16 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1i64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1f64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v16i8 (opnode (v16i8 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v4i32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v8i16 (opnode (v8i16 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2i64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2f64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v4f32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - - // Allow to match BSL instruction pattern with non-constant operand - def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd), - (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), - (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd), - (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), - (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd), - (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), - (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd), - (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), - (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd), - (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), - (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd), - (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), - (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd), - (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), - (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd), - (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), - (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; - - // Allow to match llvm.arm.* intrinsics. - def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src), - (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src), - (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src), - (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src), - (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src), - (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v1f64 (int_arm_neon_vbsl (v1f64 VPR64:$src), - (v1f64 VPR64:$Rn), (v1f64 VPR64:$Rm))), - (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; - def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src), - (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src), - (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src), - (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src), - (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src), - (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; - def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src), - (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))), - (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; -} - -// Additional patterns for bitwise instruction BSL -defm: Neon_bitwise3V_patterns; - -def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm), - (vselect node:$src, node:$Rn, node:$Rm), - [{ (void)N; return false; }]>; - -// Vector Bitwise Insert if True - -def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8, - 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>; -def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8, - 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>; - -// Vector Bitwise Insert if False - -def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8, - 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>; -def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8, - 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>; - -// Vector Absolute Difference and Accumulate (Signed, Unsigned) - -def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>; -def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>; - -// Vector Absolute Difference and Accumulate (Unsigned) -def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8, - 0b0, 0b1, 0b00, 0b01111, Neon_uaba>; -def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8, - 0b1, 0b1, 0b00, 0b01111, Neon_uaba>; -def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16, - 0b0, 0b1, 0b01, 0b01111, Neon_uaba>; -def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16, - 0b1, 0b1, 0b01, 0b01111, Neon_uaba>; -def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32, - 0b0, 0b1, 0b10, 0b01111, Neon_uaba>; -def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32, - 0b1, 0b1, 0b10, 0b01111, Neon_uaba>; - -// Vector Absolute Difference and Accumulate (Signed) -def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8, - 0b0, 0b0, 0b00, 0b01111, Neon_saba>; -def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8, - 0b1, 0b0, 0b00, 0b01111, Neon_saba>; -def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16, - 0b0, 0b0, 0b01, 0b01111, Neon_saba>; -def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16, - 0b1, 0b0, 0b01, 0b01111, Neon_saba>; -def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32, - 0b0, 0b0, 0b10, 0b01111, Neon_saba>; -def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32, - 0b1, 0b0, 0b10, 0b01111, Neon_saba>; - - -// Vector Absolute Difference (Signed, Unsigned) -defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>; -defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>; - -// Vector Absolute Difference (Floating Point) -defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd", - int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>; - -// Vector Reciprocal Step (Floating Point) -defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps", - int_arm_neon_vrecps, - v2f32, v4f32, v2f64, 0>; - -// Vector Reciprocal Square Root Step (Floating Point) -defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", - int_arm_neon_vrsqrts, - v2f32, v4f32, v2f64, 0>; - -// Vector Comparisons - -def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs), - (Neon_cmp node:$lhs, node:$rhs, SETEQ)>; -def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs), - (Neon_cmp node:$lhs, node:$rhs, SETUGE)>; -def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs), - (Neon_cmp node:$lhs, node:$rhs, SETGE)>; -def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs), - (Neon_cmp node:$lhs, node:$rhs, SETUGT)>; -def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs), - (Neon_cmp node:$lhs, node:$rhs, SETGT)>; - -// NeonI_compare_aliases class: swaps register operands to implement -// comparison aliases, e.g., CMLE is alias for CMGE with operands reversed. -class NeonI_compare_aliases - : NeonInstAlias; - -// Vector Comparisons (Integer) - -// Vector Compare Mask Equal (Integer) -let isCommutable =1 in { -defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>; -} - -// Vector Compare Mask Higher or Same (Unsigned Integer) -defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>; - -// Vector Compare Mask Greater Than or Equal (Integer) -defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>; - -// Vector Compare Mask Higher (Unsigned Integer) -defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>; - -// Vector Compare Mask Greater Than (Integer) -defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>; - -// Vector Compare Mask Bitwise Test (Integer) -defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>; - -// Vector Compare Mask Less or Same (Unsigned Integer) -// CMLS is alias for CMHS with operands reversed. -def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>; -def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>; -def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>; -def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>; -def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>; -def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>; -def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>; - -// Vector Compare Mask Less Than or Equal (Integer) -// CMLE is alias for CMGE with operands reversed. -def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>; -def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>; -def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>; -def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>; -def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>; -def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>; -def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>; - -// Vector Compare Mask Lower (Unsigned Integer) -// CMLO is alias for CMHI with operands reversed. -def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>; -def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>; -def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>; -def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>; -def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>; -def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>; -def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>; - -// Vector Compare Mask Less Than (Integer) -// CMLT is alias for CMGT with operands reversed. -def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>; -def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>; -def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>; -def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>; -def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>; -def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>; -def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>; - - -def neon_uimm0_asmoperand : AsmOperandClass -{ - let Name = "UImm0"; - let PredicateMethod = "isUImm<0>"; - let RenderMethod = "addImmOperands"; -} - -def neon_uimm0 : Operand, ImmLeaf { - let ParserMatchClass = neon_uimm0_asmoperand; - let PrintMethod = "printNeonUImm0Operand"; - -} - -multiclass NeonI_cmpz_sizes opcode, string asmop, CondCode CC> -{ - def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.8b, $Rn.8b, $Imm", - [(set (v8i8 VPR64:$Rd), - (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.16b, $Rn.16b, $Imm", - [(set (v16i8 VPR128:$Rd), - (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.4h, $Rn.4h, $Imm", - [(set (v4i16 VPR64:$Rd), - (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.8h, $Rn.8h, $Imm", - [(set (v8i16 VPR128:$Rd), - (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.2s, $Rn.2s, $Imm", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.4s, $Rn.4s, $Imm", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), - asmop # "\t$Rd.2d, $Rn.2d, $Imm", - [(set (v2i64 VPR128:$Rd), - (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -// Vector Compare Mask Equal to Zero (Integer) -defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>; - -// Vector Compare Mask Greater Than or Equal to Zero (Signed Integer) -defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>; - -// Vector Compare Mask Greater Than Zero (Signed Integer) -defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>; - -// Vector Compare Mask Less Than or Equal To Zero (Signed Integer) -defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>; - -// Vector Compare Mask Less Than Zero (Signed Integer) -defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>; - -// Vector Comparisons (Floating Point) - -// Vector Compare Mask Equal (Floating Point) -let isCommutable =1 in { -defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq, - v2i32, v4i32, v2i64, 0>; -} - -// Vector Compare Mask Greater Than Or Equal (Floating Point) -defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge, - v2i32, v4i32, v2i64, 0>; - -// Vector Compare Mask Greater Than (Floating Point) -defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt, - v2i32, v4i32, v2i64, 0>; - -// Vector Compare Mask Less Than Or Equal (Floating Point) -// FCMLE is alias for FCMGE with operands reversed. -def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>; -def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>; -def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>; - -// Vector Compare Mask Less Than (Floating Point) -// FCMLT is alias for FCMGT with operands reversed. -def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>; -def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>; -def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>; - -def fpzero_izero_asmoperand : AsmOperandClass { - let Name = "FPZeroIZero"; - let ParserMethod = "ParseFPImm0AndImm0Operand"; - let DiagnosticType = "FPZero"; -} - -def fpzz32 : Operand, - ComplexPattern { - let ParserMatchClass = fpzero_izero_asmoperand; - let PrintMethod = "printFPZeroOperand"; - let DecoderMethod = "DecodeFPZeroOperand"; -} - -multiclass NeonI_fpcmpz_sizes opcode, - string asmop, CondCode CC> -{ - def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn, fpzz32:$FPImm), - asmop # "\t$Rd.2s, $Rn.2s, $FPImm", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpzz32:$FPImm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm), - asmop # "\t$Rd.4s, $Rn.4s, $FPImm", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm), - asmop # "\t$Rd.2d, $Rn.2d, $FPImm", - [(set (v2i64 VPR128:$Rd), - (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -// Vector Compare Mask Equal to Zero (Floating Point) -defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>; - -// Vector Compare Mask Greater Than or Equal to Zero (Floating Point) -defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>; - -// Vector Compare Mask Greater Than Zero (Floating Point) -defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>; - -// Vector Compare Mask Less Than or Equal To Zero (Floating Point) -defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>; - -// Vector Compare Mask Less Than Zero (Floating Point) -defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>; - -// Vector Absolute Comparisons (Floating Point) - -// Vector Absolute Compare Mask Greater Than Or Equal (Floating Point) -defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge", - int_arm_neon_vacge, - v2i32, v4i32, v2i64, 0>; - -// Vector Absolute Compare Mask Greater Than (Floating Point) -defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt", - int_arm_neon_vacgt, - v2i32, v4i32, v2i64, 0>; - -// Vector Absolute Compare Mask Less Than Or Equal (Floating Point) -// FACLE is alias for FACGE with operands reversed. -def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>; -def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>; -def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>; - -// Vector Absolute Compare Mask Less Than (Floating Point) -// FACLT is alias for FACGT with operands reversed. -def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>; -def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>; -def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>; - -// Vector halving add (Integer Signed, Unsigned) -defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd", - int_arm_neon_vhadds, 1>; -defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd", - int_arm_neon_vhaddu, 1>; - -// Vector halving sub (Integer Signed, Unsigned) -defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub", - int_arm_neon_vhsubs, 0>; -defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub", - int_arm_neon_vhsubu, 0>; - -// Vector rouding halving add (Integer Signed, Unsigned) -defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd", - int_arm_neon_vrhadds, 1>; -defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd", - int_arm_neon_vrhaddu, 1>; - -// Vector Saturating add (Integer Signed, Unsigned) -defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd", - int_arm_neon_vqadds, 1>; -defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd", - int_arm_neon_vqaddu, 1>; - -// Vector Saturating sub (Integer Signed, Unsigned) -defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub", - int_arm_neon_vqsubs, 1>; -defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub", - int_arm_neon_vqsubu, 1>; - -// Vector Shift Left (Signed and Unsigned Integer) -defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl", - int_arm_neon_vshifts, 1>; -defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl", - int_arm_neon_vshiftu, 1>; - -// Vector Saturating Shift Left (Signed and Unsigned Integer) -defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl", - int_arm_neon_vqshifts, 1>; -defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl", - int_arm_neon_vqshiftu, 1>; - -// Vector Rouding Shift Left (Signed and Unsigned Integer) -defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl", - int_arm_neon_vrshifts, 1>; -defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl", - int_arm_neon_vrshiftu, 1>; - -// Vector Saturating Rouding Shift Left (Signed and Unsigned Integer) -defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl", - int_arm_neon_vqrshifts, 1>; -defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl", - int_arm_neon_vqrshiftu, 1>; - -// Vector Maximum (Signed and Unsigned Integer) -defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>; -defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>; - -// Vector Minimum (Signed and Unsigned Integer) -defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>; -defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>; - -// Vector Maximum (Floating Point) -defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax", - int_arm_neon_vmaxs, - v2f32, v4f32, v2f64, 1>; - -// Vector Minimum (Floating Point) -defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin", - int_arm_neon_vmins, - v2f32, v4f32, v2f64, 1>; - -// Vector maxNum (Floating Point) - prefer a number over a quiet NaN) -defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm", - int_aarch64_neon_vmaxnm, - v2f32, v4f32, v2f64, 1>; - -// Vector minNum (Floating Point) - prefer a number over a quiet NaN) -defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm", - int_aarch64_neon_vminnm, - v2f32, v4f32, v2f64, 1>; - -// Vector Maximum Pairwise (Signed and Unsigned Integer) -defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>; -defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>; - -// Vector Minimum Pairwise (Signed and Unsigned Integer) -defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>; -defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>; - -// Vector Maximum Pairwise (Floating Point) -defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp", - int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>; - -// Vector Minimum Pairwise (Floating Point) -defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp", - int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>; - -// Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN) -defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp", - int_aarch64_neon_vpmaxnm, - v2f32, v4f32, v2f64, 1>; - -// Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN) -defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp", - int_aarch64_neon_vpminnm, - v2f32, v4f32, v2f64, 1>; - -// Vector Addition Pairwise (Integer) -defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>; - -// Vector Addition Pairwise (Floating Point) -defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp", - int_arm_neon_vpadd, - v2f32, v4f32, v2f64, 1>; - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -// Vector Saturating Doubling Multiply High -defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh", - int_arm_neon_vqdmulh, 1>; - -// Vector Saturating Rouding Doubling Multiply High -defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh", - int_arm_neon_vqrdmulh, 1>; - -// Vector Multiply Extended (Floating Point) -defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx", - int_aarch64_neon_vmulx, - v2f32, v4f32, v2f64, 1>; -} - -// Patterns to match llvm.aarch64.* intrinsic for -// ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output -class Neon_VectorPair_v2i32_pattern - : Pat<(v1i32 (opnode (v2i32 VPR64:$Rn))), - (EXTRACT_SUBREG - (v2i32 (INST (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rn))), - sub_32)>; - -def : Neon_VectorPair_v2i32_pattern; -def : Neon_VectorPair_v2i32_pattern; -def : Neon_VectorPair_v2i32_pattern; -def : Neon_VectorPair_v2i32_pattern; -def : Neon_VectorPair_v2i32_pattern; - -// Vector Immediate Instructions - -multiclass neon_mov_imm_shift_asmoperands -{ - def _asmoperand : AsmOperandClass - { - let Name = "NeonMovImmShift" # PREFIX; - let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands"; - let PredicateMethod = "isNeonMovImmShift" # PREFIX; - } -} - -// Definition of vector immediates shift operands - -// The selectable use-cases extract the shift operation -// information from the OpCmode fields encoded in the immediate. -def neon_mod_shift_imm_XFORM : SDNodeXFormgetZExtValue(); - unsigned ShiftImm; - unsigned ShiftOnesIn; - unsigned HasShift = - A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn); - if (!HasShift) return SDValue(); - return CurDAG->getTargetConstant(ShiftImm, MVT::i32); -}]>; - -// Vector immediates shift operands which accept LSL and MSL -// shift operators with shift value in the range of 0, 8, 16, 24 (LSL), -// or 0, 8 (LSLH) or 8, 16 (MSL). -defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">; -defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">; -// LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24 -defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">; - -multiclass neon_mov_imm_shift_operands -{ - def _operand : Operand, ImmLeaf - { - let PrintMethod = - "printNeonMovImmShiftOperand"; - let DecoderMethod = - "DecodeNeonMovImmShiftOperand"; - let ParserMatchClass = - !cast("neon_mov_imm_" # PREFIX # HALF # "_asmoperand"); - } -} - -defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{ - unsigned ShiftImm; - unsigned ShiftOnesIn; - unsigned HasShift = - A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); - return (HasShift && !ShiftOnesIn); -}]>; - -defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{ - unsigned ShiftImm; - unsigned ShiftOnesIn; - unsigned HasShift = - A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); - return (HasShift && ShiftOnesIn); -}]>; - -defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{ - unsigned ShiftImm; - unsigned ShiftOnesIn; - unsigned HasShift = - A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); - return (HasShift && !ShiftOnesIn); -}]>; - -def neon_uimm1_asmoperand : AsmOperandClass -{ - let Name = "UImm1"; - let PredicateMethod = "isUImm<1>"; - let RenderMethod = "addImmOperands"; -} - -def neon_uimm2_asmoperand : AsmOperandClass -{ - let Name = "UImm2"; - let PredicateMethod = "isUImm<2>"; - let RenderMethod = "addImmOperands"; -} - -def neon_uimm8_asmoperand : AsmOperandClass -{ - let Name = "UImm8"; - let PredicateMethod = "isUImm<8>"; - let RenderMethod = "addImmOperands"; -} - -def neon_uimm8 : Operand, ImmLeaf { - let ParserMatchClass = neon_uimm8_asmoperand; - let PrintMethod = "printUImmHexOperand"; -} - -def neon_uimm64_mask_asmoperand : AsmOperandClass -{ - let Name = "NeonUImm64Mask"; - let PredicateMethod = "isNeonUImm64Mask"; - let RenderMethod = "addNeonUImm64MaskOperands"; -} - -// MCOperand for 64-bit bytemask with each byte having only the -// value 0x00 and 0xff is encoded as an unsigned 8-bit value -def neon_uimm64_mask : Operand, ImmLeaf { - let ParserMatchClass = neon_uimm64_mask_asmoperand; - let PrintMethod = "printNeonUImm64MaskOperand"; -} - -multiclass NeonI_mov_imm_lsl_sizes -{ - // shift zeros, per word - def _2S : NeonI_1VModImm<0b0, op, - (outs VPR64:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_LSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"), - [(set (v2i32 VPR64:$Rd), - (v2i32 (opnode (timm:$Imm), - (neon_mov_imm_LSL_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bits<2> Simm; - let cmode = {0b0, Simm{1}, Simm{0}, 0b0}; - } - - def _4S : NeonI_1VModImm<0b1, op, - (outs VPR128:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_LSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"), - [(set (v4i32 VPR128:$Rd), - (v4i32 (opnode (timm:$Imm), - (neon_mov_imm_LSL_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bits<2> Simm; - let cmode = {0b0, Simm{1}, Simm{0}, 0b0}; - } - - // shift zeros, per halfword - def _4H : NeonI_1VModImm<0b0, op, - (outs VPR64:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm), - !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"), - [(set (v4i16 VPR64:$Rd), - (v4i16 (opnode (timm:$Imm), - (neon_mov_imm_LSLH_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bit Simm; - let cmode = {0b1, 0b0, Simm, 0b0}; - } - - def _8H : NeonI_1VModImm<0b1, op, - (outs VPR128:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm), - !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"), - [(set (v8i16 VPR128:$Rd), - (v8i16 (opnode (timm:$Imm), - (neon_mov_imm_LSLH_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bit Simm; - let cmode = {0b1, 0b0, Simm, 0b0}; - } -} - -multiclass NeonI_mov_imm_with_constraint_lsl_sizes -{ - let Constraints = "$src = $Rd" in { - // shift zeros, per word - def _2S : NeonI_1VModImm<0b0, op, - (outs VPR64:$Rd), - (ins VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"), - [(set (v2i32 VPR64:$Rd), - (v2i32 (opnode (v2i32 VPR64:$src), - (v2i32 (neonopnode timm:$Imm, - neon_mov_imm_LSL_operand:$Simm)))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bits<2> Simm; - let cmode = {0b0, Simm{1}, Simm{0}, 0b1}; - } - - def _4S : NeonI_1VModImm<0b1, op, - (outs VPR128:$Rd), - (ins VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"), - [(set (v4i32 VPR128:$Rd), - (v4i32 (opnode (v4i32 VPR128:$src), - (v4i32 (neonopnode timm:$Imm, - neon_mov_imm_LSL_operand:$Simm)))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bits<2> Simm; - let cmode = {0b0, Simm{1}, Simm{0}, 0b1}; - } - - // shift zeros, per halfword - def _4H : NeonI_1VModImm<0b0, op, - (outs VPR64:$Rd), - (ins VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm), - !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"), - [(set (v4i16 VPR64:$Rd), - (v4i16 (opnode (v4i16 VPR64:$src), - (v4i16 (neonopnode timm:$Imm, - neon_mov_imm_LSL_operand:$Simm)))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bit Simm; - let cmode = {0b1, 0b0, Simm, 0b1}; - } - - def _8H : NeonI_1VModImm<0b1, op, - (outs VPR128:$Rd), - (ins VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm), - !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"), - [(set (v8i16 VPR128:$Rd), - (v8i16 (opnode (v8i16 VPR128:$src), - (v8i16 (neonopnode timm:$Imm, - neon_mov_imm_LSL_operand:$Simm)))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bit Simm; - let cmode = {0b1, 0b0, Simm, 0b1}; - } - } -} - -multiclass NeonI_mov_imm_msl_sizes -{ - // shift ones, per word - def _2S : NeonI_1VModImm<0b0, op, - (outs VPR64:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_MSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"), - [(set (v2i32 VPR64:$Rd), - (v2i32 (opnode (timm:$Imm), - (neon_mov_imm_MSL_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bit Simm; - let cmode = {0b1, 0b1, 0b0, Simm}; - } - - def _4S : NeonI_1VModImm<0b1, op, - (outs VPR128:$Rd), - (ins neon_uimm8:$Imm, - neon_mov_imm_MSL_operand:$Simm), - !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"), - [(set (v4i32 VPR128:$Rd), - (v4i32 (opnode (timm:$Imm), - (neon_mov_imm_MSL_operand:$Simm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - bit Simm; - let cmode = {0b1, 0b1, 0b0, Simm}; - } -} - -// Vector Move Immediate Shifted -let isReMaterializable = 1 in { -defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>; -} - -// Vector Move Inverted Immediate Shifted -let isReMaterializable = 1 in { -defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>; -} - -// Vector Bitwise Bit Clear (AND NOT) - immediate -let isReMaterializable = 1 in { -defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1, - and, Neon_mvni>; -} - -// Vector Bitwise OR - immedidate - -let isReMaterializable = 1 in { -defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0, - or, Neon_movi>; -} - -// Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate -// LowerBUILD_VECTOR favors lowering MOVI over MVNI. -// BIC immediate instructions selection requires additional patterns to -// transform Neon_movi operands into BIC immediate operands - -def neon_mov_imm_LSLH_transform_XFORM : SDNodeXFormgetZExtValue(); - unsigned ShiftImm; - unsigned ShiftOnesIn; - (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn); - // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1 - // Transform encoded shift amount 0 to 1 and 1 to 0. - return CurDAG->getTargetConstant(!ShiftImm, MVT::i32); -}]>; - -def neon_mov_imm_LSLH_transform_operand - : ImmLeaf; - -// Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0xff, LSL 8) -// Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0xff) -def : Pat<(v4i16 (and VPR64:$src, - (v4i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm)))), - (BICvi_lsl_4H VPR64:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; - -// Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0xff, LSL 8) -// Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0xff) -def : Pat<(v8i16 (and VPR128:$src, - (v8i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm)))), - (BICvi_lsl_8H VPR128:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; - -def : Pat<(v8i8 (and VPR64:$src, - (bitconvert(v4i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_4H VPR64:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; -def : Pat<(v2i32 (and VPR64:$src, - (bitconvert(v4i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_4H VPR64:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; -def : Pat<(v1i64 (and VPR64:$src, - (bitconvert(v4i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_4H VPR64:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; - -def : Pat<(v16i8 (and VPR128:$src, - (bitconvert(v8i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_8H VPR128:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; -def : Pat<(v4i32 (and VPR128:$src, - (bitconvert(v8i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_8H VPR128:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; -def : Pat<(v2i64 (and VPR128:$src, - (bitconvert(v8i16 (Neon_movi 255, - neon_mov_imm_LSLH_transform_operand:$Simm))))), - (BICvi_lsl_8H VPR128:$src, 255, - neon_mov_imm_LSLH_transform_operand:$Simm)>; - -multiclass Neon_bitwiseVi_patterns { - def : Pat<(v8i8 (opnode VPR64:$src, - (bitconvert(v4i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4H VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v2i32 (opnode VPR64:$src, - (bitconvert(v4i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4H VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v1i64 (opnode VPR64:$src, - (bitconvert(v4i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4H VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - - def : Pat<(v16i8 (opnode VPR128:$src, - (bitconvert(v8i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST8H VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v4i32 (opnode VPR128:$src, - (bitconvert(v8i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST8H VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v2i64 (opnode VPR128:$src, - (bitconvert(v8i16 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST8H VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - - def : Pat<(v8i8 (opnode VPR64:$src, - (bitconvert(v2i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST2S VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v4i16 (opnode VPR64:$src, - (bitconvert(v2i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST2S VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v1i64 (opnode VPR64:$src, - (bitconvert(v2i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST2S VPR64:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - - def : Pat<(v16i8 (opnode VPR128:$src, - (bitconvert(v4i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4S VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v8i16 (opnode VPR128:$src, - (bitconvert(v4i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4S VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; - def : Pat<(v2i64 (opnode VPR128:$src, - (bitconvert(v4i32 (neonopnode timm:$Imm, - neon_mov_imm_LSLH_operand:$Simm))))), - (INST4S VPR128:$src, neon_uimm8:$Imm, - neon_mov_imm_LSLH_operand:$Simm)>; -} - -// Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate -defm : Neon_bitwiseVi_patterns; - -// Additional patterns for Vector Bitwise OR - immedidate -defm : Neon_bitwiseVi_patterns; - - -// Vector Move Immediate Masked -let isReMaterializable = 1 in { -defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>; -} - -// Vector Move Inverted Immediate Masked -let isReMaterializable = 1 in { -defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>; -} - -class NeonI_mov_imm_lsl_aliases - : NeonInstAlias; - -// Aliases for Vector Move Immediate Shifted -def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>; -def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>; - -// Aliases for Vector Move Inverted Immediate Shifted -def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>; -def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>; - -// Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate -def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>; -def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>; - -// Aliases for Vector Bitwise OR - immedidate -def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>; -def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>; -def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>; - -// Vector Move Immediate - per byte -let isReMaterializable = 1 in { -def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0, - (outs VPR64:$Rd), (ins neon_uimm8:$Imm), - "movi\t$Rd.8b, $Imm", - [(set (v8i8 VPR64:$Rd), - (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - let cmode = 0b1110; -} - -def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0, - (outs VPR128:$Rd), (ins neon_uimm8:$Imm), - "movi\t$Rd.16b, $Imm", - [(set (v16i8 VPR128:$Rd), - (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - let cmode = 0b1110; -} -} - -// Vector Move Immediate - bytemask, per double word -let isReMaterializable = 1 in { -def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1, - (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm), - "movi\t $Rd.2d, $Imm", - [(set (v2i64 VPR128:$Rd), - (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - let cmode = 0b1110; -} -} - -// Vector Move Immediate - bytemask, one doubleword - -let isReMaterializable = 1 in { -def MOVIdi : NeonI_1VModImm<0b0, 0b1, - (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm), - "movi\t $Rd, $Imm", - [(set (v1i64 FPR64:$Rd), - (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))], - NoItinerary>, - Sched<[WriteFPALU]> { - let cmode = 0b1110; -} -} - -// Vector Floating Point Move Immediate - -class NeonI_FMOV_impl - : NeonI_1VModImm, - Sched<[WriteFPALU]> { - let cmode = 0b1111; - } - -let isReMaterializable = 1 in { -def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>; -def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>; -def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>; -} - -// Vector Shift (Immediate) - -// Shift Right/Left Immediate - The immh:immb field of these shifts are encoded -// as follows: -// -// Offset Encoding -// 8 immh:immb<6:3> = '0001xxx', is encoded in immh:immb<2:0> -// 16 immh:immb<6:4> = '001xxxx', is encoded in immh:immb<3:0> -// 32 immh:immb<6:5> = '01xxxxx', is encoded in immh:immb<4:0> -// 64 immh:immb<6> = '1xxxxxx', is encoded in immh:immb<5:0> -// -// The shift right immediate amount, in the range 1 to element bits, is computed -// as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0 -// to element bits - 1, is computed as UInt(immh:immb) - Offset. - -class shr_imm_asmoperands : AsmOperandClass { - let Name = "ShrImm" # OFFSET; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "ShrImm" # OFFSET; -} - -class shr_imm : Operand { - let EncoderMethod = "getShiftRightImm" # OFFSET; - let DecoderMethod = "DecodeShiftRightImm" # OFFSET; - let ParserMatchClass = - !cast("shr_imm" # OFFSET # "_asmoperand"); -} - -def shr_imm8_asmoperand : shr_imm_asmoperands<"8">; -def shr_imm16_asmoperand : shr_imm_asmoperands<"16">; -def shr_imm32_asmoperand : shr_imm_asmoperands<"32">; -def shr_imm64_asmoperand : shr_imm_asmoperands<"64">; - -def shr_imm8 : shr_imm<"8">, ImmLeaf 0 && Imm <= 8;}]>; -def shr_imm16 : shr_imm<"16">, ImmLeaf 0 && Imm <= 16;}]>; -def shr_imm32 : shr_imm<"32">, ImmLeaf 0 && Imm <= 32;}]>; -def shr_imm64 : shr_imm<"64">, ImmLeaf 0 && Imm <= 64;}]>; - -class shl_imm_asmoperands : AsmOperandClass { - let Name = "ShlImm" # OFFSET; - let RenderMethod = "addImmOperands"; - let DiagnosticType = "ShlImm" # OFFSET; -} - -class shl_imm : Operand { - let EncoderMethod = "getShiftLeftImm" # OFFSET; - let DecoderMethod = "DecodeShiftLeftImm" # OFFSET; - let ParserMatchClass = - !cast("shl_imm" # OFFSET # "_asmoperand"); -} - -def shl_imm8_asmoperand : shl_imm_asmoperands<"8">; -def shl_imm16_asmoperand : shl_imm_asmoperands<"16">; -def shl_imm32_asmoperand : shl_imm_asmoperands<"32">; -def shl_imm64_asmoperand : shl_imm_asmoperands<"64">; - -def shl_imm8 : shl_imm<"8">, ImmLeaf= 0 && Imm < 8;}]>; -def shl_imm16 : shl_imm<"16">, ImmLeaf= 0 && Imm < 16;}]>; -def shl_imm32 : shl_imm<"32">, ImmLeaf= 0 && Imm < 32;}]>; -def shl_imm64 : shl_imm<"64">, ImmLeaf= 0 && Imm < 64;}]>; - -class N2VShift opcode, string asmop, string T, - RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_N2VShL opcode, string asmop> { - // 64-bit vector types. - def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, shl> { - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - } - - def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, shl> { - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - } - - def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, shl> { - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - } - - // 128-bit vector types. - def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, shl> { - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - } - - def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, shl> { - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - } - - def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, shl> { - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - } - - def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, shl> { - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - } -} - -multiclass NeonI_N2VShR opcode, string asmop, SDNode OpNode> { - def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, - OpNode> { - let Inst{22} = 0b1; - } -} - -// Shift left - -defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">; - -// Additional patterns to match vector shift left by immediate. -// (v1i8/v1i16/v1i32 types) -def : Pat<(v1i8 (shl (v1i8 FPR8:$Rn), - (v1i8 (Neon_vdup (i32 (shl_imm8:$Imm)))))), - (EXTRACT_SUBREG - (SHLvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - shl_imm8:$Imm), - sub_8)>; -def : Pat<(v1i16 (shl (v1i16 FPR16:$Rn), - (v1i16 (Neon_vdup (i32 (shl_imm16:$Imm)))))), - (EXTRACT_SUBREG - (SHLvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - shl_imm16:$Imm), - sub_16)>; -def : Pat<(v1i32 (shl (v1i32 FPR32:$Rn), - (v1i32 (Neon_vdup (i32 (shl_imm32:$Imm)))))), - (EXTRACT_SUBREG - (SHLvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - shl_imm32:$Imm), - sub_32)>; - -// Shift right -defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>; -defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>; - -// Additional patterns to match vector shift right by immediate. -// (v1i8/v1i16/v1i32 types) -def : Pat<(v1i8 (sra (v1i8 FPR8:$Rn), - (v1i8 (Neon_vdup (i32 (shr_imm8:$Imm)))))), - (EXTRACT_SUBREG - (SSHRvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - shr_imm8:$Imm), - sub_8)>; -def : Pat<(v1i16 (sra (v1i16 FPR16:$Rn), - (v1i16 (Neon_vdup (i32 (shr_imm16:$Imm)))))), - (EXTRACT_SUBREG - (SSHRvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - shr_imm16:$Imm), - sub_16)>; -def : Pat<(v1i32 (sra (v1i32 FPR32:$Rn), - (v1i32 (Neon_vdup (i32 (shr_imm32:$Imm)))))), - (EXTRACT_SUBREG - (SSHRvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - shr_imm32:$Imm), - sub_32)>; -def : Pat<(v1i8 (srl (v1i8 FPR8:$Rn), - (v1i8 (Neon_vdup (i32 (shr_imm8:$Imm)))))), - (EXTRACT_SUBREG - (USHRvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - shr_imm8:$Imm), - sub_8)>; -def : Pat<(v1i16 (srl (v1i16 FPR16:$Rn), - (v1i16 (Neon_vdup (i32 (shr_imm16:$Imm)))))), - (EXTRACT_SUBREG - (USHRvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - shr_imm16:$Imm), - sub_16)>; -def : Pat<(v1i32 (srl (v1i32 FPR32:$Rn), - (v1i32 (Neon_vdup (i32 (shr_imm32:$Imm)))))), - (EXTRACT_SUBREG - (USHRvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - shr_imm32:$Imm), - sub_32)>; - -def Neon_High16B : PatFrag<(ops node:$in), - (extract_subvector (v16i8 node:$in), (iPTR 8))>; -def Neon_High8H : PatFrag<(ops node:$in), - (extract_subvector (v8i16 node:$in), (iPTR 4))>; -def Neon_High4S : PatFrag<(ops node:$in), - (extract_subvector (v4i32 node:$in), (iPTR 2))>; -def Neon_High2D : PatFrag<(ops node:$in), - (extract_subvector (v2i64 node:$in), (iPTR 1))>; -def Neon_High4float : PatFrag<(ops node:$in), - (extract_subvector (v4f32 node:$in), (iPTR 2))>; -def Neon_High2double : PatFrag<(ops node:$in), - (extract_subvector (v2f64 node:$in), (iPTR 1))>; - -def Neon_Low16B : PatFrag<(ops node:$in), - (v8i8 (extract_subvector (v16i8 node:$in), - (iPTR 0)))>; -def Neon_Low8H : PatFrag<(ops node:$in), - (v4i16 (extract_subvector (v8i16 node:$in), - (iPTR 0)))>; -def Neon_Low4S : PatFrag<(ops node:$in), - (v2i32 (extract_subvector (v4i32 node:$in), - (iPTR 0)))>; -def Neon_Low2D : PatFrag<(ops node:$in), - (v1i64 (extract_subvector (v2i64 node:$in), - (iPTR 0)))>; -def Neon_Low4float : PatFrag<(ops node:$in), - (v2f32 (extract_subvector (v4f32 node:$in), - (iPTR 0)))>; -def Neon_Low2double : PatFrag<(ops node:$in), - (v1f64 (extract_subvector (v2f64 node:$in), - (iPTR 0)))>; - -class N2VShiftLong opcode, string asmop, string DestT, - string SrcT, ValueType DestTy, ValueType SrcTy, - Operand ImmTy, SDPatternOperator ExtOp> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU]>; - -class N2VShiftLongHigh opcode, string asmop, string DestT, - string SrcT, ValueType DestTy, ValueType SrcTy, - int StartIndex, Operand ImmTy, - SDPatternOperator ExtOp, PatFrag getTop> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_N2VShLL opcode, string asmop, - SDNode ExtOp> { - // 64-bit vector types. - def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8, - shl_imm8, ExtOp> { - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - } - - def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16, - shl_imm16, ExtOp> { - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - } - - def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32, - shl_imm32, ExtOp> { - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - } - - // 128-bit vector types - def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8, - 8, shl_imm8, ExtOp, Neon_High16B> { - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - } - - def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16, - 4, shl_imm16, ExtOp, Neon_High8H> { - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - } - - def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32, - 2, shl_imm32, ExtOp, Neon_High4S> { - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - } - - // Use other patterns to match when the immediate is 0. - def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))), - (!cast(prefix # "_8B") VPR64:$Rn, 0)>; - - def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))), - (!cast(prefix # "_4H") VPR64:$Rn, 0)>; - - def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))), - (!cast(prefix # "_2S") VPR64:$Rn, 0)>; - - def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))), - (!cast(prefix # "_16B") VPR128:$Rn, 0)>; - - def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))), - (!cast(prefix # "_8H") VPR128:$Rn, 0)>; - - def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))), - (!cast(prefix # "_4S") VPR128:$Rn, 0)>; -} - -// Shift left long -defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>; -defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>; - -class NeonI_ext_len_alias - : NeonInstAlias; - -// Signed integer lengthen (vector) is alias for SSHLL Vd, Vn, #0 -// Signed integer lengthen (vector, second part) is alias for SSHLL2 Vd, Vn, #0 -// FIXME: This is actually the preferred syntax but TableGen can't deal with -// custom printing of aliases. -def SXTLvv_8B : NeonI_ext_len_alias<"sxtl", ".8h", ".8b", SSHLLvvi_8B, VPR128, VPR64>; -def SXTLvv_4H : NeonI_ext_len_alias<"sxtl", ".4s", ".4h", SSHLLvvi_4H, VPR128, VPR64>; -def SXTLvv_2S : NeonI_ext_len_alias<"sxtl", ".2d", ".2s", SSHLLvvi_2S, VPR128, VPR64>; -def SXTL2vv_16B : NeonI_ext_len_alias<"sxtl2", ".8h", ".16b", SSHLLvvi_16B, VPR128, VPR128>; -def SXTL2vv_8H : NeonI_ext_len_alias<"sxtl2", ".4s", ".8h", SSHLLvvi_8H, VPR128, VPR128>; -def SXTL2vv_4S : NeonI_ext_len_alias<"sxtl2", ".2d", ".4s", SSHLLvvi_4S, VPR128, VPR128>; - -// Unsigned integer lengthen (vector) is alias for USHLL Vd, Vn, #0 -// Unsigned integer lengthen (vector, second part) is alias for USHLL2 Vd, Vn, #0 -// FIXME: This is actually the preferred syntax but TableGen can't deal with -// custom printing of aliases. -def UXTLvv_8B : NeonI_ext_len_alias<"uxtl", ".8h", ".8b", USHLLvvi_8B, VPR128, VPR64>; -def UXTLvv_4H : NeonI_ext_len_alias<"uxtl", ".4s", ".4h", USHLLvvi_4H, VPR128, VPR64>; -def UXTLvv_2S : NeonI_ext_len_alias<"uxtl", ".2d", ".2s", USHLLvvi_2S, VPR128, VPR64>; -def UXTL2vv_16B : NeonI_ext_len_alias<"uxtl2", ".8h", ".16b", USHLLvvi_16B, VPR128, VPR128>; -def UXTL2vv_8H : NeonI_ext_len_alias<"uxtl2", ".4s", ".8h", USHLLvvi_8H, VPR128, VPR128>; -def UXTL2vv_4S : NeonI_ext_len_alias<"uxtl2", ".2d", ".4s", USHLLvvi_4S, VPR128, VPR128>; - -def : Pat<(v8i16 (anyext (v8i8 VPR64:$Rn))), (USHLLvvi_8B VPR64:$Rn, 0)>; -def : Pat<(v4i32 (anyext (v4i16 VPR64:$Rn))), (USHLLvvi_4H VPR64:$Rn, 0)>; -def : Pat<(v2i64 (anyext (v2i32 VPR64:$Rn))), (USHLLvvi_2S VPR64:$Rn, 0)>; - -// Rounding/Saturating shift -class N2VShift_RQ opcode, string asmop, string T, - RegisterOperand VPRC, ValueType Ty, Operand ImmTy, - SDPatternOperator OpNode> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU]>; - -// shift right (vector by immediate) -multiclass NeonI_N2VShR_RQ opcode, string asmop, - SDPatternOperator OpNode> { - def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, - OpNode> { - let Inst{22} = 0b1; - } -} - -multiclass NeonI_N2VShL_Q opcode, string asmop, - SDPatternOperator OpNode> { - // 64-bit vector types. - def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - // 128-bit vector types. - def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, - OpNode> { - let Inst{22} = 0b1; - } -} - -// Rounding shift right -defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr", - int_aarch64_neon_vsrshr>; -defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr", - int_aarch64_neon_vurshr>; - -// Saturating shift left unsigned -defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>; - -// Saturating shift left -defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>; -defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>; - -class N2VShiftAdd opcode, string asmop, string T, - RegisterOperand VPRC, ValueType Ty, Operand ImmTy, - SDNode OpNode> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -// Shift Right accumulate -multiclass NeonI_N2VShRAdd opcode, string asmop, SDNode OpNode> { - def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, - OpNode> { - let Inst{22} = 0b1; - } -} - -// Shift right and accumulate -defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>; -defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>; - -// Rounding shift accumulate -class N2VShiftAdd_R opcode, string asmop, string T, - RegisterOperand VPRC, ValueType Ty, Operand ImmTy, - SDPatternOperator OpNode> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -multiclass NeonI_N2VShRAdd_R opcode, string asmop, - SDPatternOperator OpNode> { - def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, - OpNode> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, - OpNode> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, - OpNode> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, - OpNode> { - let Inst{22} = 0b1; - } -} - -// Rounding shift right and accumulate -defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>; -defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>; - -// Shift insert by immediate -class N2VShiftIns opcode, string asmop, string T, - RegisterOperand VPRC, ValueType Ty, Operand ImmTy, - SDPatternOperator OpNode> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -// shift left insert (vector by immediate) -multiclass NeonI_N2VShLIns opcode, string asmop> { - def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, - int_aarch64_neon_vsli> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, - int_aarch64_neon_vsli> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, - int_aarch64_neon_vsli> { - let Inst{22-21} = 0b01; - } - - // 128-bit vector types - def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, - int_aarch64_neon_vsli> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, - int_aarch64_neon_vsli> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, - int_aarch64_neon_vsli> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, - int_aarch64_neon_vsli> { - let Inst{22} = 0b1; - } -} - -// shift right insert (vector by immediate) -multiclass NeonI_N2VShRIns opcode, string asmop> { - // 64-bit vector types. - def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, - int_aarch64_neon_vsri> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, - int_aarch64_neon_vsri> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, - int_aarch64_neon_vsri> { - let Inst{22-21} = 0b01; - } - - // 128-bit vector types - def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, - int_aarch64_neon_vsri> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, - int_aarch64_neon_vsri> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, - int_aarch64_neon_vsri> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, - int_aarch64_neon_vsri> { - let Inst{22} = 0b1; - } -} - -// Shift left and insert -defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">; - -// Shift right and insert -defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">; - -class N2VShR_Narrow opcode, string asmop, string DestT, - string SrcT, Operand ImmTy> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU]>; - -class N2VShR_Narrow_Hi opcode, string asmop, string DestT, - string SrcT, Operand ImmTy> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -// left long shift by immediate -multiclass NeonI_N2VShR_Narrow opcode, string asmop> { - def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> { - let Inst{22-19} = 0b0001; - } - - def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> { - let Inst{22-20} = 0b001; - } - - def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> { - let Inst{22-21} = 0b01; - } - - // Shift Narrow High - def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h", - shr_imm8> { - let Inst{22-19} = 0b0001; - } - - def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s", - shr_imm16> { - let Inst{22-20} = 0b001; - } - - def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d", - shr_imm32> { - let Inst{22-21} = 0b01; - } -} - -// Shift right narrow -defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">; - -// Shift right narrow (prefix Q is saturating, prefix R is rounding) -defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">; -defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">; -defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">; -defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">; -defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">; -defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">; -defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">; - -def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn), - (v2i64 (concat_vectors (v1i64 node:$Rm), - (v1i64 node:$Rn)))>; -def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn), - (v8i16 (concat_vectors (v4i16 node:$Rm), - (v4i16 node:$Rn)))>; -def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn), - (v4i32 (concat_vectors (v2i32 node:$Rm), - (v2i32 node:$Rn)))>; -def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn), - (v4f32 (concat_vectors (v2f32 node:$Rm), - (v2f32 node:$Rn)))>; -def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn), - (v2f64 (concat_vectors (v1f64 node:$Rm), - (v1f64 node:$Rn)))>; - -def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs), - (v8i16 (srl (v8i16 node:$lhs), - (v8i16 (Neon_vdup (i32 node:$rhs)))))>; -def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs), - (v4i32 (srl (v4i32 node:$lhs), - (v4i32 (Neon_vdup (i32 node:$rhs)))))>; -def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs), - (v2i64 (srl (v2i64 node:$lhs), - (v2i64 (Neon_vdup (i32 node:$rhs)))))>; -def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs), - (v8i16 (sra (v8i16 node:$lhs), - (v8i16 (Neon_vdup (i32 node:$rhs)))))>; -def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs), - (v4i32 (sra (v4i32 node:$lhs), - (v4i32 (Neon_vdup (i32 node:$rhs)))))>; -def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs), - (v2i64 (sra (v2i64 node:$lhs), - (v2i64 (Neon_vdup (i32 node:$rhs)))))>; - -// Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors) -multiclass Neon_shiftNarrow_patterns { - def : Pat<(v8i8 (trunc (!cast("Neon_" # shr # "Imm8H") VPR128:$Rn, - (i32 shr_imm8:$Imm)))), - (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>; - def : Pat<(v4i16 (trunc (!cast("Neon_" # shr # "Imm4S") VPR128:$Rn, - (i32 shr_imm16:$Imm)))), - (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>; - def : Pat<(v2i32 (trunc (!cast("Neon_" # shr # "Imm2D") VPR128:$Rn, - (i32 shr_imm32:$Imm)))), - (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>; - - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert - (v8i8 (trunc (!cast("Neon_" # shr # "Imm8H") - VPR128:$Rn, (i32 shr_imm8:$Imm))))))), - (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), - VPR128:$Rn, imm:$Imm)>; - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert - (v4i16 (trunc (!cast("Neon_" # shr # "Imm4S") - VPR128:$Rn, (i32 shr_imm16:$Imm))))))), - (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, imm:$Imm)>; - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert - (v2i32 (trunc (!cast("Neon_" # shr # "Imm2D") - VPR128:$Rn, (i32 shr_imm32:$Imm))))))), - (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, imm:$Imm)>; -} - -multiclass Neon_shiftNarrow_QR_patterns { - def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm)), - (!cast(prefix # "_8B") VPR128:$Rn, imm:$Imm)>; - def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm)), - (!cast(prefix # "_4H") VPR128:$Rn, imm:$Imm)>; - def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm)), - (!cast(prefix # "_2S") VPR128:$Rn, imm:$Imm)>; - - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), - (v1i64 (bitconvert (v8i8 - (op (v8i16 VPR128:$Rn), shr_imm8:$Imm))))), - (!cast(prefix # "_16B") - (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, imm:$Imm)>; - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), - (v1i64 (bitconvert (v4i16 - (op (v4i32 VPR128:$Rn), shr_imm16:$Imm))))), - (!cast(prefix # "_8H") - (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, imm:$Imm)>; - def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), - (v1i64 (bitconvert (v2i32 - (op (v2i64 VPR128:$Rn), shr_imm32:$Imm))))), - (!cast(prefix # "_4S") - (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, imm:$Imm)>; -} - -defm : Neon_shiftNarrow_patterns<"lshr">; -defm : Neon_shiftNarrow_patterns<"ashr">; - -defm : Neon_shiftNarrow_QR_patterns; -defm : Neon_shiftNarrow_QR_patterns; -defm : Neon_shiftNarrow_QR_patterns; -defm : Neon_shiftNarrow_QR_patterns; -defm : Neon_shiftNarrow_QR_patterns; -defm : Neon_shiftNarrow_QR_patterns; -defm : Neon_shiftNarrow_QR_patterns; - -// Convert fix-point and float-pointing -class N2VCvt_Fx opcode, string asmop, string T, - RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy, - Operand ImmTy, SDPatternOperator IntOp> - : NeonI_2VShiftImm, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_N2VCvt_Fx2fp opcode, string asmop, - SDPatternOperator IntOp> { - def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32, - shr_imm32, IntOp> { - let Inst{22-21} = 0b01; - } - - def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32, - shr_imm32, IntOp> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64, - shr_imm64, IntOp> { - let Inst{22} = 0b1; - } -} - -multiclass NeonI_N2VCvt_Fp2fx opcode, string asmop, - SDPatternOperator IntOp> { - def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32, - shr_imm32, IntOp> { - let Inst{22-21} = 0b01; - } - - def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32, - shr_imm32, IntOp> { - let Inst{22-21} = 0b01; - } - - def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64, - shr_imm64, IntOp> { - let Inst{22} = 0b1; - } -} - -// Convert fixed-point to floating-point -defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf", - int_arm_neon_vcvtfxs2fp>; -defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf", - int_arm_neon_vcvtfxu2fp>; - -// Convert floating-point to fixed-point -defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs", - int_arm_neon_vcvtfp2fxs>; -defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu", - int_arm_neon_vcvtfp2fxu>; - -multiclass Neon_sshll2_0 -{ - def _v8i8 : PatFrag<(ops node:$Rn), - (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>; - def _v4i16 : PatFrag<(ops node:$Rn), - (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>; - def _v2i32 : PatFrag<(ops node:$Rn), - (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>; -} - -defm NI_sext_high : Neon_sshll2_0; -defm NI_zext_high : Neon_sshll2_0; - - -//===----------------------------------------------------------------------===// -// Multiclasses for NeonI_Across -//===----------------------------------------------------------------------===// - -// Variant 1 - -multiclass NeonI_2VAcross_1 opcode, - string asmop, SDPatternOperator opnode> -{ - def _1h8b: NeonI_2VAcross<0b0, u, 0b00, opcode, - (outs FPR16:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd, $Rn.8b", - [(set (v1i16 FPR16:$Rd), - (v1i16 (opnode (v8i8 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode, - (outs FPR16:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.16b", - [(set (v1i16 FPR16:$Rd), - (v1i16 (opnode (v16i8 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode, - (outs FPR32:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd, $Rn.4h", - [(set (v1i32 FPR32:$Rd), - (v1i32 (opnode (v4i16 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode, - (outs FPR32:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.8h", - [(set (v1i32 FPR32:$Rd), - (v1i32 (opnode (v8i16 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - // _1d2s doesn't exist! - - def _1d4s: NeonI_2VAcross<0b1, u, 0b10, opcode, - (outs FPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.4s", - [(set (v1i64 FPR64:$Rd), - (v1i64 (opnode (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>; -defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>; - -// Variant 2 - -multiclass NeonI_2VAcross_2 opcode, - string asmop, SDPatternOperator opnode> -{ - def _1b8b: NeonI_2VAcross<0b0, u, 0b00, opcode, - (outs FPR8:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd, $Rn.8b", - [(set (v1i8 FPR8:$Rd), - (v1i8 (opnode (v8i8 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode, - (outs FPR8:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.16b", - [(set (v1i8 FPR8:$Rd), - (v1i8 (opnode (v16i8 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode, - (outs FPR16:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd, $Rn.4h", - [(set (v1i16 FPR16:$Rd), - (v1i16 (opnode (v4i16 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode, - (outs FPR16:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.8h", - [(set (v1i16 FPR16:$Rd), - (v1i16 (opnode (v8i16 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - // _1s2s doesn't exist! - - def _1s4s: NeonI_2VAcross<0b1, u, 0b10, opcode, - (outs FPR32:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.4s", - [(set (v1i32 FPR32:$Rd), - (v1i32 (opnode (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>; -defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>; - -defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>; -defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>; - -defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>; - -// Variant 3 - -multiclass NeonI_2VAcross_3 opcode, bits<2> size, - string asmop, SDPatternOperator opnode> { - def _1s4s: NeonI_2VAcross<0b1, u, size, opcode, - (outs FPR32:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd, $Rn.4s", - [(set (f32 FPR32:$Rd), - (f32 (opnode (v4f32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv", - int_aarch64_neon_vmaxnmv>; -defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv", - int_aarch64_neon_vminnmv>; - -defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv", - int_aarch64_neon_vmaxv>; -defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv", - int_aarch64_neon_vminv>; - -// The followings are for instruction class (Perm) - -class NeonI_Permute size, bits<3> opcode, - string asmop, RegisterOperand OpVPR, string OpS, - SDPatternOperator opnode, ValueType Ty> - : NeonI_Perm, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_Perm_pat opcode, string asmop, - SDPatternOperator opnode> { - def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop, - VPR64, "8b", opnode, v8i8>; - def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop, - VPR128, "16b",opnode, v16i8>; - def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop, - VPR64, "4h", opnode, v4i16>; - def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop, - VPR128, "8h", opnode, v8i16>; - def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop, - VPR64, "2s", opnode, v2i32>; - def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop, - VPR128, "4s", opnode, v4i32>; - def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop, - VPR128, "2d", opnode, v2i64>; -} - -defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1", Neon_uzp1>; -defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1", Neon_trn1>; -defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1", Neon_zip1>; -defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2", Neon_uzp2>; -defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2", Neon_trn2>; -defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2", Neon_zip2>; - -multiclass NeonI_Perm_float_pat { - def : Pat<(v2f32 (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))), - (!cast(INS # "_2s") VPR64:$Rn, VPR64:$Rm)>; - - def : Pat<(v4f32 (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))), - (!cast(INS # "_4s") VPR128:$Rn, VPR128:$Rm)>; - - def : Pat<(v2f64 (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))), - (!cast(INS # "_2d") VPR128:$Rn, VPR128:$Rm)>; -} - -defm : NeonI_Perm_float_pat<"UZP1vvv", Neon_uzp1>; -defm : NeonI_Perm_float_pat<"UZP2vvv", Neon_uzp2>; -defm : NeonI_Perm_float_pat<"ZIP1vvv", Neon_zip1>; -defm : NeonI_Perm_float_pat<"ZIP2vvv", Neon_zip2>; -defm : NeonI_Perm_float_pat<"TRN1vvv", Neon_trn1>; -defm : NeonI_Perm_float_pat<"TRN2vvv", Neon_trn2>; - -// The followings are for instruction class (3V Diff) - -// normal long/long2 pattern -class NeonI_3VDL size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, SDPatternOperator ext, - RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_3VDL_s opcode, - string asmop, SDPatternOperator opnode, - bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, sext, VPR64, v8i16, v8i8>; - def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, sext, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, sext, VPR64, v2i64, v2i32>; - } -} - -multiclass NeonI_3VDL2_s opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b", - opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>; - def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>; - } -} - -multiclass NeonI_3VDL_u opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, zext, VPR64, v8i16, v8i8>; - def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, zext, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, zext, VPR64, v2i64, v2i32>; - } -} - -multiclass NeonI_3VDL2_u opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b", - opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>; - def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>; - } -} - -defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>; -defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>; - -defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>; -defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>; - -defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>; -defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>; - -defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>; -defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>; - -// normal wide/wide2 pattern -class NeonI_3VDW size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, SDPatternOperator ext, - RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_3VDW_s opcode, string asmop, - SDPatternOperator opnode> { - def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, sext, VPR64, v8i16, v8i8>; - def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, sext, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, sext, VPR64, v2i64, v2i32>; -} - -defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>; -defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>; - -multiclass NeonI_3VDW2_s opcode, string asmop, - SDPatternOperator opnode> { - def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b", - opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>; - def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>; -} - -defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>; -defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>; - -multiclass NeonI_3VDW_u opcode, string asmop, - SDPatternOperator opnode> { - def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, zext, VPR64, v8i16, v8i8>; - def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, zext, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, zext, VPR64, v2i64, v2i32>; -} - -defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>; -defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>; - -multiclass NeonI_3VDW2_u opcode, string asmop, - SDPatternOperator opnode> { - def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b", - opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>; - def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>; -} - -defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>; -defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>; - -// Get the high half part of the vector element. -multiclass NeonI_get_high { - def _8h : PatFrag<(ops node:$Rn), - (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn), - (v8i16 (Neon_vdup (i32 8)))))))>; - def _4s : PatFrag<(ops node:$Rn), - (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn), - (v4i32 (Neon_vdup (i32 16)))))))>; - def _2d : PatFrag<(ops node:$Rn), - (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn), - (v2i64 (Neon_vdup (i32 32)))))))>; -} - -defm NI_get_hi : NeonI_get_high; - -// pattern for addhn/subhn with 2 operands -class NeonI_3VDN_addhn_2Op size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, SDPatternOperator get_hi, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_3VDN_addhn_2Op opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h", - opnode, NI_get_hi_8h, v8i8, v8i16>; - def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s", - opnode, NI_get_hi_4s, v4i16, v4i32>; - def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d", - opnode, NI_get_hi_2d, v2i32, v2i64>; - } -} - -defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>; -defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>; - -// pattern for operation with 2 operands -class NeonI_3VD_2Op size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, - RegisterOperand ResVPR, RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -// normal narrow pattern -multiclass NeonI_3VDN_2Op opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h", - opnode, VPR64, VPR128, v8i8, v8i16>; - def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s", - opnode, VPR64, VPR128, v4i16, v4i32>; - def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d", - opnode, VPR64, VPR128, v2i32, v2i64>; - } -} - -defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>; -defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>; - -// pattern for acle intrinsic with 3 operands -class NeonI_3VDN_3Op size, bits<4> opcode, - string asmop, string ResS, string OpS> - : NeonI_3VDiff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let neverHasSideEffects = 1; -} - -multiclass NeonI_3VDN_3Op_v1 opcode, string asmop> { - def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">; - def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">; - def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">; -} - -defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">; -defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">; - -defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">; -defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">; - -// Patterns have to be separate because there's a SUBREG_TO_REG in the output -// part. -class NarrowHighHalfPat - : Pat<(Neon_combine_2D (v1i64 VPR64:$src), - (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn), - (SrcTy VPR128:$Rm)))))), - (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - VPR128:$Rn, VPR128:$Rm)>; - -// addhn2 patterns -def : NarrowHighHalfPat>; -def : NarrowHighHalfPat>; -def : NarrowHighHalfPat>; - -// subhn2 patterns -def : NarrowHighHalfPat>; -def : NarrowHighHalfPat>; -def : NarrowHighHalfPat>; - -// raddhn2 patterns -def : NarrowHighHalfPat; -def : NarrowHighHalfPat; -def : NarrowHighHalfPat; - -// rsubhn2 patterns -def : NarrowHighHalfPat; -def : NarrowHighHalfPat; -def : NarrowHighHalfPat; - -// pattern that need to extend result -class NeonI_3VDL_Ext size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, - RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy, ValueType OpSTy> - : NeonI_3VDiff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_3VDL_zext opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, VPR64, v8i16, v8i8, v8i8>; - def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, VPR64, v4i32, v4i16, v4i16>; - def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, VPR64, v2i64, v2i32, v2i32>; - } -} - -defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>; -defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>; - -multiclass NeonI_Op_High { - def _16B : PatFrag<(ops node:$Rn, node:$Rm), - (op (v8i8 (Neon_High16B node:$Rn)), - (v8i8 (Neon_High16B node:$Rm)))>; - def _8H : PatFrag<(ops node:$Rn, node:$Rm), - (op (v4i16 (Neon_High8H node:$Rn)), - (v4i16 (Neon_High8H node:$Rm)))>; - def _4S : PatFrag<(ops node:$Rn, node:$Rm), - (op (v2i32 (Neon_High4S node:$Rn)), - (v2i32 (Neon_High4S node:$Rm)))>; -} - -defm NI_sabdl_hi : NeonI_Op_High; -defm NI_uabdl_hi : NeonI_Op_High; -defm NI_smull_hi : NeonI_Op_High; -defm NI_umull_hi : NeonI_Op_High; -defm NI_qdmull_hi : NeonI_Op_High; -defm NI_pmull_hi : NeonI_Op_High; - -multiclass NeonI_3VDL_Abd_u opcode, string asmop, string opnode, - bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b", - !cast(opnode # "_16B"), - VPR128, v8i16, v16i8, v8i8>; - def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h", - !cast(opnode # "_8H"), - VPR128, v4i32, v8i16, v4i16>; - def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s", - !cast(opnode # "_4S"), - VPR128, v2i64, v4i32, v2i32>; - } -} - -defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>; -defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>; - -// For pattern that need two operators being chained. -class NeonI_3VDL_Aba size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, SDPatternOperator subop, - RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy, ValueType OpSTy> - : NeonI_3VDiff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -multiclass NeonI_3VDL_Aba_v1 opcode, string asmop, - SDPatternOperator opnode, SDPatternOperator subop>{ - def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, subop, VPR64, v8i16, v8i8, v8i8>; - def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, subop, VPR64, v4i32, v4i16, v4i16>; - def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, subop, VPR64, v2i64, v2i32, v2i32>; -} - -defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal", - add, int_arm_neon_vabds>; -defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal", - add, int_arm_neon_vabdu>; - -multiclass NeonI_3VDL2_Aba_v1 opcode, string asmop, - SDPatternOperator opnode, string subop> { - def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b", - opnode, !cast(subop # "_16B"), - VPR128, v8i16, v16i8, v8i8>; - def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, !cast(subop # "_8H"), - VPR128, v4i32, v8i16, v4i16>; - def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, !cast(subop # "_4S"), - VPR128, v2i64, v4i32, v2i32>; -} - -defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add, - "NI_sabdl_hi">; -defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add, - "NI_uabdl_hi">; - -// Long pattern with 2 operands -multiclass NeonI_3VDL_2Op opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable, - SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { - def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, VPR128, VPR64, v8i16, v8i8>; - def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, VPR128, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, VPR128, VPR64, v2i64, v2i32>; - } -} - -defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>; -defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>; - -class NeonI_3VDL2_2Op_mull size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff, - Sched<[WriteFPMul, ReadFPMul, ReadFPMul]>; - -multiclass NeonI_3VDL2_2Op_mull_v1 opcode, string asmop, - string opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b", - !cast(opnode # "_16B"), - v8i16, v16i8>; - def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h", - !cast(opnode # "_8H"), - v4i32, v8i16>; - def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s", - !cast(opnode # "_4S"), - v2i64, v4i32>; - } -} - -defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2", - "NI_smull_hi", 1>; -defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2", - "NI_umull_hi", 1>; - -// Long pattern with 3 operands -class NeonI_3VDL_3Op size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator opnode, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff, - Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { - let Constraints = "$src = $Rd"; -} - -multiclass NeonI_3VDL_3Op_v1 opcode, string asmop, - SDPatternOperator opnode> { - def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode, v8i16, v8i8>; - def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, v4i32, v4i16>; - def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, v2i64, v2i32>; -} - -def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), - (add node:$Rd, - (int_arm_neon_vmulls node:$Rn, node:$Rm))>; - -def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), - (add node:$Rd, - (int_arm_neon_vmullu node:$Rn, node:$Rm))>; - -def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), - (sub node:$Rd, - (int_arm_neon_vmulls node:$Rn, node:$Rm))>; - -def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), - (sub node:$Rd, - (int_arm_neon_vmullu node:$Rn, node:$Rm))>; - -defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>; -defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>; - -defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>; -defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>; - -class NeonI_3VDL2_3Op_mlas size, bits<4> opcode, - string asmop, string ResS, string OpS, - SDPatternOperator subop, SDPatternOperator opnode, - RegisterOperand OpVPR, - ValueType ResTy, ValueType OpTy> - : NeonI_3VDiff, - Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { - let Constraints = "$src = $Rd"; -} - -multiclass NeonI_3VDL2_3Op_mlas_v1 opcode, string asmop, - SDPatternOperator subop, string opnode> { - def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b", - subop, !cast(opnode # "_16B"), - VPR128, v8i16, v16i8>; - def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h", - subop, !cast(opnode # "_8H"), - VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s", - subop, !cast(opnode # "_4S"), - VPR128, v2i64, v4i32>; -} - -defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2", - add, "NI_smull_hi">; -defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2", - add, "NI_umull_hi">; - -defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2", - sub, "NI_smull_hi">; -defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2", - sub, "NI_umull_hi">; - -multiclass NeonI_3VDL_qdmlal_3Op_v2 opcode, string asmop, - SDPatternOperator opnode> { - def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, int_arm_neon_vqdmull, - VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, int_arm_neon_vqdmull, - VPR64, v2i64, v2i32>; -} - -defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal", - int_arm_neon_vqadds>; -defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl", - int_arm_neon_vqsubs>; - -multiclass NeonI_3VDL_v2 opcode, string asmop, - SDPatternOperator opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", - opnode, VPR128, VPR64, v4i32, v4i16>; - def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", - opnode, VPR128, VPR64, v2i64, v2i32>; - } -} - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull", - int_arm_neon_vqdmull, 1>; -} - -multiclass NeonI_3VDL2_2Op_mull_v2 opcode, string asmop, - string opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h", - !cast(opnode # "_8H"), - v4i32, v8i16>; - def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s", - !cast(opnode # "_4S"), - v2i64, v4i32>; - } -} - -defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2", - "NI_qdmull_hi", 1>; - -multiclass NeonI_3VDL2_3Op_qdmlal_v2 opcode, string asmop, - SDPatternOperator opnode> { - def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h", - opnode, NI_qdmull_hi_8H, - VPR128, v4i32, v8i16>; - def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s", - opnode, NI_qdmull_hi_4S, - VPR128, v2i64, v4i32>; -} - -defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2", - int_arm_neon_vqadds>; -defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2", - int_arm_neon_vqsubs>; - -multiclass NeonI_3VDL_v3 opcode, string asmop, - SDPatternOperator opnode_8h8b, - SDPatternOperator opnode_1q1d, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", - opnode_8h8b, VPR128, VPR64, v8i16, v8i8>; - - def _1q1d : NeonI_3VD_2Op<0b0, u, 0b11, opcode, asmop, "1q", "1d", - opnode_1q1d, VPR128, VPR64, v16i8, v1i64>; - } -} - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in -defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, - int_aarch64_neon_vmull_p64, 1>; - -multiclass NeonI_3VDL2_2Op_mull_v3 opcode, string asmop, - string opnode, bit Commutable = 0> { - let isCommutable = Commutable in { - def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b", - !cast(opnode # "_16B"), - v8i16, v16i8>; - - def _1q2d : - NeonI_3VDiff<0b1, u, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), - asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d", - [(set (v16i8 VPR128:$Rd), - (v16i8 (int_aarch64_neon_vmull_p64 - (v1i64 (scalar_to_vector - (i64 (vector_extract (v2i64 VPR128:$Rn), 1)))), - (v1i64 (scalar_to_vector - (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))], - NoItinerary>, - Sched<[WriteFPMul, ReadFPMul, ReadFPMul]>; - } - - def : Pat<(v16i8 (int_aarch64_neon_vmull_p64 - (v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 1))), - (v1i64 (extract_subvector (v2i64 VPR128:$Rm), (i64 1))))), - (!cast(NAME # "_1q2d") VPR128:$Rn, VPR128:$Rm)>; -} - -defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi", - 1>; - -// End of implementation for instruction class (3V Diff) - -// The followings are vector load/store multiple N-element structure -// (class SIMD lselem). - -// ld1: load multiple 1-element structure to 1/2/3/4 registers. -// ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4). -// The structure consists of a sequence of sets of N values. -// The first element of the structure is placed in the first lane -// of the first first vector, the second element in the first lane -// of the second vector, and so on. -// E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into -// the three 64-bit vectors list {BA, DC, FE}. -// E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three -// 64-bit vectors list {DA, EB, FC}. -// Store instructions store multiple structure to N registers like load. - - -class NeonI_LDVList opcode, bits<2> size, - RegisterOperand VecList, string asmop> - : NeonI_LdStMult, - Sched<[WriteVecLd, ReadVecLd]> { - let mayLoad = 1; - let neverHasSideEffects = 1; -} - -multiclass LDVList_BHSD opcode, string List, string asmop> { - def _8B : NeonI_LDVList<0, opcode, 0b00, - !cast(List # "8B_operand"), asmop>; - - def _4H : NeonI_LDVList<0, opcode, 0b01, - !cast(List # "4H_operand"), asmop>; - - def _2S : NeonI_LDVList<0, opcode, 0b10, - !cast(List # "2S_operand"), asmop>; - - def _16B : NeonI_LDVList<1, opcode, 0b00, - !cast(List # "16B_operand"), asmop>; - - def _8H : NeonI_LDVList<1, opcode, 0b01, - !cast(List # "8H_operand"), asmop>; - - def _4S : NeonI_LDVList<1, opcode, 0b10, - !cast(List # "4S_operand"), asmop>; - - def _2D : NeonI_LDVList<1, opcode, 0b11, - !cast(List # "2D_operand"), asmop>; -} - -// Load multiple N-element structure to N consecutive registers (N = 1,2,3,4) -defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">; -def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">; - -defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">; - -defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">; - -defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">; - -// Load multiple 1-element structure to N consecutive registers (N = 2,3,4) -defm LD1x2 : LDVList_BHSD<0b1010, "VPair", "ld1">; -def LD1x2_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">; - -defm LD1x3 : LDVList_BHSD<0b0110, "VTriple", "ld1">; -def LD1x3_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">; - -defm LD1x4 : LDVList_BHSD<0b0010, "VQuad", "ld1">; -def LD1x4_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">; - -class NeonI_STVList opcode, bits<2> size, - RegisterOperand VecList, string asmop> - : NeonI_LdStMult, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { - let mayStore = 1; - let neverHasSideEffects = 1; -} - -multiclass STVList_BHSD opcode, string List, string asmop> { - def _8B : NeonI_STVList<0, opcode, 0b00, - !cast(List # "8B_operand"), asmop>; - - def _4H : NeonI_STVList<0, opcode, 0b01, - !cast(List # "4H_operand"), asmop>; - - def _2S : NeonI_STVList<0, opcode, 0b10, - !cast(List # "2S_operand"), asmop>; - - def _16B : NeonI_STVList<1, opcode, 0b00, - !cast(List # "16B_operand"), asmop>; - - def _8H : NeonI_STVList<1, opcode, 0b01, - !cast(List # "8H_operand"), asmop>; - - def _4S : NeonI_STVList<1, opcode, 0b10, - !cast(List # "4S_operand"), asmop>; - - def _2D : NeonI_STVList<1, opcode, 0b11, - !cast(List # "2D_operand"), asmop>; -} - -// Store multiple N-element structures from N registers (N = 1,2,3,4) -defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">; -def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">; - -defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">; - -defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">; - -defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">; - -// Store multiple 1-element structures from N consecutive registers (N = 2,3,4) -defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">; -def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">; - -defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">; -def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">; - -defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">; -def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">; - -def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>; -def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>; - -def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>; -def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>; - -def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>; -def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>; - -def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>; -def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>; - -def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>; -def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>; - -def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>; -def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>; - -def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr), - (ST1_2D GPR64xsp:$addr, VPR128:$value)>; -def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr), - (ST1_2D GPR64xsp:$addr, VPR128:$value)>; - -def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr), - (ST1_4S GPR64xsp:$addr, VPR128:$value)>; -def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr), - (ST1_4S GPR64xsp:$addr, VPR128:$value)>; - -def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr), - (ST1_8H GPR64xsp:$addr, VPR128:$value)>; -def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr), - (ST1_16B GPR64xsp:$addr, VPR128:$value)>; - -def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr), - (ST1_1D GPR64xsp:$addr, VPR64:$value)>; -def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr), - (ST1_1D GPR64xsp:$addr, VPR64:$value)>; - -def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr), - (ST1_2S GPR64xsp:$addr, VPR64:$value)>; -def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr), - (ST1_2S GPR64xsp:$addr, VPR64:$value)>; - -def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr), - (ST1_4H GPR64xsp:$addr, VPR64:$value)>; -def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr), - (ST1_8B GPR64xsp:$addr, VPR64:$value)>; - -// Match load/store of v1i8/v1i16/v1i32 type to FPR8/FPR16/FPR32 load/store. -// FIXME: for now we have v1i8, v1i16, v1i32 legal types, if they are illegal, -// these patterns are not needed any more. -def : Pat<(v1i8 (load GPR64xsp:$addr)), (LSFP8_LDR $addr, 0)>; -def : Pat<(v1i16 (load GPR64xsp:$addr)), (LSFP16_LDR $addr, 0)>; -def : Pat<(v1i32 (load GPR64xsp:$addr)), (LSFP32_LDR $addr, 0)>; - -def : Pat<(store (v1i8 FPR8:$value), GPR64xsp:$addr), - (LSFP8_STR $value, $addr, 0)>; -def : Pat<(store (v1i16 FPR16:$value), GPR64xsp:$addr), - (LSFP16_STR $value, $addr, 0)>; -def : Pat<(store (v1i32 FPR32:$value), GPR64xsp:$addr), - (LSFP32_STR $value, $addr, 0)>; - - -// End of vector load/store multiple N-element structure(class SIMD lselem) - -// The followings are post-index vector load/store multiple N-element -// structure(class SIMD lselem-post) -def exact1_asmoperand : AsmOperandClass { - let Name = "Exact1"; - let PredicateMethod = "isExactImm<1>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact1 : Operand, ImmLeaf { - let ParserMatchClass = exact1_asmoperand; -} - -def exact2_asmoperand : AsmOperandClass { - let Name = "Exact2"; - let PredicateMethod = "isExactImm<2>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact2 : Operand, ImmLeaf { - let ParserMatchClass = exact2_asmoperand; -} - -def exact3_asmoperand : AsmOperandClass { - let Name = "Exact3"; - let PredicateMethod = "isExactImm<3>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact3 : Operand, ImmLeaf { - let ParserMatchClass = exact3_asmoperand; -} - -def exact4_asmoperand : AsmOperandClass { - let Name = "Exact4"; - let PredicateMethod = "isExactImm<4>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact4 : Operand, ImmLeaf { - let ParserMatchClass = exact4_asmoperand; -} - -def exact6_asmoperand : AsmOperandClass { - let Name = "Exact6"; - let PredicateMethod = "isExactImm<6>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact6 : Operand, ImmLeaf { - let ParserMatchClass = exact6_asmoperand; -} - -def exact8_asmoperand : AsmOperandClass { - let Name = "Exact8"; - let PredicateMethod = "isExactImm<8>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact8 : Operand, ImmLeaf { - let ParserMatchClass = exact8_asmoperand; -} - -def exact12_asmoperand : AsmOperandClass { - let Name = "Exact12"; - let PredicateMethod = "isExactImm<12>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact12 : Operand, ImmLeaf { - let ParserMatchClass = exact12_asmoperand; -} - -def exact16_asmoperand : AsmOperandClass { - let Name = "Exact16"; - let PredicateMethod = "isExactImm<16>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact16 : Operand, ImmLeaf { - let ParserMatchClass = exact16_asmoperand; -} - -def exact24_asmoperand : AsmOperandClass { - let Name = "Exact24"; - let PredicateMethod = "isExactImm<24>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact24 : Operand, ImmLeaf { - let ParserMatchClass = exact24_asmoperand; -} - -def exact32_asmoperand : AsmOperandClass { - let Name = "Exact32"; - let PredicateMethod = "isExactImm<32>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact32 : Operand, ImmLeaf { - let ParserMatchClass = exact32_asmoperand; -} - -def exact48_asmoperand : AsmOperandClass { - let Name = "Exact48"; - let PredicateMethod = "isExactImm<48>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact48 : Operand, ImmLeaf { - let ParserMatchClass = exact48_asmoperand; -} - -def exact64_asmoperand : AsmOperandClass { - let Name = "Exact64"; - let PredicateMethod = "isExactImm<64>"; - let RenderMethod = "addImmOperands"; -} -def uimm_exact64 : Operand, ImmLeaf { - let ParserMatchClass = exact64_asmoperand; -} - -multiclass NeonI_LDWB_VList opcode, bits<2> size, - RegisterOperand VecList, Operand ImmTy, - string asmop> { - let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1, - DecoderMethod = "DecodeVLDSTPostInstruction" in { - def _fixed : NeonI_LdStMult_Post, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd]> { - let Rm = 0b11111; - } - - def _register : NeonI_LdStMult_Post, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]>; - } -} - -multiclass LDWB_VList_BHSD opcode, string List, Operand ImmTy, - Operand ImmTy2, string asmop> { - defm _8B : NeonI_LDWB_VList<0, opcode, 0b00, - !cast(List # "8B_operand"), - ImmTy, asmop>; - - defm _4H : NeonI_LDWB_VList<0, opcode, 0b01, - !cast(List # "4H_operand"), - ImmTy, asmop>; - - defm _2S : NeonI_LDWB_VList<0, opcode, 0b10, - !cast(List # "2S_operand"), - ImmTy, asmop>; - - defm _16B : NeonI_LDWB_VList<1, opcode, 0b00, - !cast(List # "16B_operand"), - ImmTy2, asmop>; - - defm _8H : NeonI_LDWB_VList<1, opcode, 0b01, - !cast(List # "8H_operand"), - ImmTy2, asmop>; - - defm _4S : NeonI_LDWB_VList<1, opcode, 0b10, - !cast(List # "4S_operand"), - ImmTy2, asmop>; - - defm _2D : NeonI_LDWB_VList<1, opcode, 0b11, - !cast(List # "2D_operand"), - ImmTy2, asmop>; -} - -// Post-index load multiple N-element structures from N registers (N = 1,2,3,4) -defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">; -defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8, - "ld1">; - -defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">; - -defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48, - "ld3">; - -defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">; - -// Post-index load multiple 1-element structures from N consecutive registers -// (N = 2,3,4) -defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32, - "ld1">; -defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand, - uimm_exact16, "ld1">; - -defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48, - "ld1">; -defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand, - uimm_exact24, "ld1">; - -defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64, - "ld1">; -defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand, - uimm_exact32, "ld1">; - -multiclass NeonI_STWB_VList opcode, bits<2> size, - RegisterOperand VecList, Operand ImmTy, - string asmop> { - let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1, - DecoderMethod = "DecodeVLDSTPostInstruction" in { - def _fixed : NeonI_LdStMult_Post, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { - let Rm = 0b11111; - } - - def _register : NeonI_LdStMult_Post, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt, ReadVecSt]>; - } -} - -multiclass STWB_VList_BHSD opcode, string List, Operand ImmTy, - Operand ImmTy2, string asmop> { - defm _8B : NeonI_STWB_VList<0, opcode, 0b00, - !cast(List # "8B_operand"), ImmTy, asmop>; - - defm _4H : NeonI_STWB_VList<0, opcode, 0b01, - !cast(List # "4H_operand"), - ImmTy, asmop>; - - defm _2S : NeonI_STWB_VList<0, opcode, 0b10, - !cast(List # "2S_operand"), - ImmTy, asmop>; - - defm _16B : NeonI_STWB_VList<1, opcode, 0b00, - !cast(List # "16B_operand"), - ImmTy2, asmop>; - - defm _8H : NeonI_STWB_VList<1, opcode, 0b01, - !cast(List # "8H_operand"), - ImmTy2, asmop>; - - defm _4S : NeonI_STWB_VList<1, opcode, 0b10, - !cast(List # "4S_operand"), - ImmTy2, asmop>; - - defm _2D : NeonI_STWB_VList<1, opcode, 0b11, - !cast(List # "2D_operand"), - ImmTy2, asmop>; -} - -// Post-index load multiple N-element structures from N registers (N = 1,2,3,4) -defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">; -defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8, - "st1">; - -defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">; - -defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48, - "st3">; - -defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">; - -// Post-index load multiple 1-element structures from N consecutive registers -// (N = 2,3,4) -defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32, - "st1">; -defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand, - uimm_exact16, "st1">; - -defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48, - "st1">; -defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand, - uimm_exact24, "st1">; - -defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64, - "st1">; -defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand, - uimm_exact32, "st1">; - -// End of post-index vector load/store multiple N-element structure -// (class SIMD lselem-post) - -// The followings are vector load/store single N-element structure -// (class SIMD lsone). -def neon_uimm0_bare : Operand, - ImmLeaf { - let ParserMatchClass = neon_uimm0_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm1_bare : Operand, - ImmLeaf { - let ParserMatchClass = neon_uimm1_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm2_bare : Operand, - ImmLeaf { - let ParserMatchClass = neon_uimm2_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm3_bare : Operand, - ImmLeaf { - let ParserMatchClass = uimm3_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -def neon_uimm4_bare : Operand, - ImmLeaf { - let ParserMatchClass = uimm4_asmoperand; - let PrintMethod = "printUImmBareOperand"; -} - -class NeonI_LDN_Dup opcode, bits<2> size, - RegisterOperand VecList, string asmop> - : NeonI_LdOne_Dup, - Sched<[WriteVecLd, ReadVecLd]> { - let mayLoad = 1; - let neverHasSideEffects = 1; -} - -multiclass LDN_Dup_BHSD opcode, string List, string asmop> { - def _8B : NeonI_LDN_Dup<0, r, opcode, 0b00, - !cast(List # "8B_operand"), asmop>; - - def _4H : NeonI_LDN_Dup<0, r, opcode, 0b01, - !cast(List # "4H_operand"), asmop>; - - def _2S : NeonI_LDN_Dup<0, r, opcode, 0b10, - !cast(List # "2S_operand"), asmop>; - - def _1D : NeonI_LDN_Dup<0, r, opcode, 0b11, - !cast(List # "1D_operand"), asmop>; - - def _16B : NeonI_LDN_Dup<1, r, opcode, 0b00, - !cast(List # "16B_operand"), asmop>; - - def _8H : NeonI_LDN_Dup<1, r, opcode, 0b01, - !cast(List # "8H_operand"), asmop>; - - def _4S : NeonI_LDN_Dup<1, r, opcode, 0b10, - !cast(List # "4S_operand"), asmop>; - - def _2D : NeonI_LDN_Dup<1, r, opcode, 0b11, - !cast(List # "2D_operand"), asmop>; -} - -// Load single 1-element structure to all lanes of 1 register -defm LD1R : LDN_Dup_BHSD<0b0, 0b110, "VOne", "ld1r">; - -// Load single N-element structure to all lanes of N consecutive -// registers (N = 2,3,4) -defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">; -defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">; -defm LD4R : LDN_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r">; - - -class LD1R_pattern - : Pat<(VTy (Neon_vdup (DTy (LoadOp GPR64xsp:$Rn)))), - (VTy (INST GPR64xsp:$Rn))>; - -// Match all LD1R instructions -def : LD1R_pattern; - -def : LD1R_pattern; - -def : LD1R_pattern; - -def : LD1R_pattern; - -def : LD1R_pattern; -def : LD1R_pattern; - -def : LD1R_pattern; -def : LD1R_pattern; - -def : LD1R_pattern; -def : LD1R_pattern; - -class LD1R_pattern_v1 - : Pat<(VTy (scalar_to_vector (DTy (LoadOp GPR64xsp:$Rn)))), - (VTy (INST GPR64xsp:$Rn))>; - -def : LD1R_pattern_v1; -def : LD1R_pattern_v1; - -multiclass VectorList_Bare_BHSD { - defm B : VectorList_operands; - defm H : VectorList_operands; - defm S : VectorList_operands; - defm D : VectorList_operands; -} - -// Special vector list operand of 128-bit vectors with bare layout. -// i.e. only show ".b", ".h", ".s", ".d" -defm VOne : VectorList_Bare_BHSD<"VOne", 1, FPR128>; -defm VPair : VectorList_Bare_BHSD<"VPair", 2, QPair>; -defm VTriple : VectorList_Bare_BHSD<"VTriple", 3, QTriple>; -defm VQuad : VectorList_Bare_BHSD<"VQuad", 4, QQuad>; - -class NeonI_LDN_Lane op2_1, bit op0, RegisterOperand VList, - Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane<1, r, op2_1, op0, - (outs VList:$Rt), - (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn]", - [], - NoItinerary>, - Sched<[WriteVecLd, ReadVecLd, ReadVecLd]> { - let mayLoad = 1; - let neverHasSideEffects = 1; - let hasExtraDefRegAllocReq = 1; - let Constraints = "$src = $Rt"; -} - -multiclass LDN_Lane_BHSD { - def _B : NeonI_LDN_Lane(List # "B_operand"), - neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _H : NeonI_LDN_Lane(List # "H_operand"), - neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _S : NeonI_LDN_Lane(List # "S_operand"), - neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _D : NeonI_LDN_Lane(List # "D_operand"), - neon_uimm1_bare, asmop> { - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } -} - -// Load single 1-element structure to one lane of 1 register. -defm LD1LN : LDN_Lane_BHSD<0b0, 0b0, "VOne", "ld1">; - -// Load single N-element structure to one lane of N consecutive registers -// (N = 2,3,4) -defm LD2LN : LDN_Lane_BHSD<0b1, 0b0, "VPair", "ld2">; -defm LD3LN : LDN_Lane_BHSD<0b0, 0b1, "VTriple", "ld3">; -defm LD4LN : LDN_Lane_BHSD<0b1, 0b1, "VQuad", "ld4">; - -multiclass LD1LN_patterns { - def : Pat<(VTy (vector_insert (VTy VPR64:$src), - (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))), - (VTy (EXTRACT_SUBREG - (INST GPR64xsp:$Rn, - (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), - ImmOp:$lane), - sub_64))>; - - def : Pat<(VTy2 (vector_insert (VTy2 VPR128:$src), - (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp2:$lane))), - (VTy2 (INST GPR64xsp:$Rn, VPR128:$src, ImmOp2:$lane))>; -} - -// Match all LD1LN instructions -defm : LD1LN_patterns; - -defm : LD1LN_patterns; - -defm : LD1LN_patterns; -defm : LD1LN_patterns; - -defm : LD1LN_patterns; -defm : LD1LN_patterns; - -class NeonI_STN_Lane op2_1, bit op0, RegisterOperand VList, - Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane<0, r, op2_1, op0, - (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn]", - [], - NoItinerary>, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { - let mayStore = 1; - let neverHasSideEffects = 1; - let hasExtraDefRegAllocReq = 1; -} - -multiclass STN_Lane_BHSD { - def _B : NeonI_STN_Lane(List # "B_operand"), - neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _H : NeonI_STN_Lane(List # "H_operand"), - neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _S : NeonI_STN_Lane(List # "S_operand"), - neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _D : NeonI_STN_Lane(List # "D_operand"), - neon_uimm1_bare, asmop>{ - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } -} - -// Store single 1-element structure from one lane of 1 register. -defm ST1LN : STN_Lane_BHSD<0b0, 0b0, "VOne", "st1">; - -// Store single N-element structure from one lane of N consecutive registers -// (N = 2,3,4) -defm ST2LN : STN_Lane_BHSD<0b1, 0b0, "VPair", "st2">; -defm ST3LN : STN_Lane_BHSD<0b0, 0b1, "VTriple", "st3">; -defm ST4LN : STN_Lane_BHSD<0b1, 0b1, "VQuad", "st4">; - -multiclass ST1LN_patterns { - def : Pat<(StoreOp (DTy (vector_extract (VTy VPR64:$Rt), ImmOp:$lane)), - GPR64xsp:$Rn), - (INST GPR64xsp:$Rn, - (SUBREG_TO_REG (i64 0), VPR64:$Rt, sub_64), - ImmOp:$lane)>; - - def : Pat<(StoreOp (DTy (vector_extract (VTy2 VPR128:$Rt), ImmOp2:$lane)), - GPR64xsp:$Rn), - (INST GPR64xsp:$Rn, VPR128:$Rt, ImmOp2:$lane)>; -} - -// Match all ST1LN instructions -defm : ST1LN_patterns; - -defm : ST1LN_patterns; - -defm : ST1LN_patterns; -defm : ST1LN_patterns; - -defm : ST1LN_patterns; -defm : ST1LN_patterns; - -// End of vector load/store single N-element structure (class SIMD lsone). - - -// The following are post-index load/store single N-element instructions -// (class SIMD lsone-post) - -multiclass NeonI_LDN_WB_Dup opcode, bits<2> size, - RegisterOperand VecList, Operand ImmTy, - string asmop> { - let mayLoad = 1, neverHasSideEffects = 1, Constraints = "$wb = $Rn", - DecoderMethod = "DecodeVLDSTLanePostInstruction" in { - def _fixed : NeonI_LdOne_Dup_Post, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd]> { - let Rm = 0b11111; - } - - def _register : NeonI_LdOne_Dup_Post, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]>; - } -} - -multiclass LDWB_Dup_BHSD opcode, string List, string asmop, - Operand uimm_b, Operand uimm_h, - Operand uimm_s, Operand uimm_d> { - defm _8B : NeonI_LDN_WB_Dup<0, r, opcode, 0b00, - !cast(List # "8B_operand"), - uimm_b, asmop>; - - defm _4H : NeonI_LDN_WB_Dup<0, r, opcode, 0b01, - !cast(List # "4H_operand"), - uimm_h, asmop>; - - defm _2S : NeonI_LDN_WB_Dup<0, r, opcode, 0b10, - !cast(List # "2S_operand"), - uimm_s, asmop>; - - defm _1D : NeonI_LDN_WB_Dup<0, r, opcode, 0b11, - !cast(List # "1D_operand"), - uimm_d, asmop>; - - defm _16B : NeonI_LDN_WB_Dup<1, r, opcode, 0b00, - !cast(List # "16B_operand"), - uimm_b, asmop>; - - defm _8H : NeonI_LDN_WB_Dup<1, r, opcode, 0b01, - !cast(List # "8H_operand"), - uimm_h, asmop>; - - defm _4S : NeonI_LDN_WB_Dup<1, r, opcode, 0b10, - !cast(List # "4S_operand"), - uimm_s, asmop>; - - defm _2D : NeonI_LDN_WB_Dup<1, r, opcode, 0b11, - !cast(List # "2D_operand"), - uimm_d, asmop>; -} - -// Post-index load single 1-element structure to all lanes of 1 register -defm LD1R_WB : LDWB_Dup_BHSD<0b0, 0b110, "VOne", "ld1r", uimm_exact1, - uimm_exact2, uimm_exact4, uimm_exact8>; - -// Post-index load single N-element structure to all lanes of N consecutive -// registers (N = 2,3,4) -defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2, - uimm_exact4, uimm_exact8, uimm_exact16>; -defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3, - uimm_exact6, uimm_exact12, uimm_exact24>; -defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4, - uimm_exact8, uimm_exact16, uimm_exact32>; - -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1, - Constraints = "$Rn = $wb, $Rt = $src", - DecoderMethod = "DecodeVLDSTLanePostInstruction" in { - class LDN_WBFx_Lane op2_1, bit op0, RegisterOperand VList, - Operand ImmTy, Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0, - (outs VList:$Rt, GPR64xsp:$wb), - (ins GPR64xsp:$Rn, ImmTy:$amt, - VList:$src, ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn], $amt", - [], - NoItinerary>, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]> { - let Rm = 0b11111; - } - - class LDN_WBReg_Lane op2_1, bit op0, RegisterOperand VList, - Operand ImmTy, Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0, - (outs VList:$Rt, GPR64xsp:$wb), - (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, - VList:$src, ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn], $Rm", - [], - NoItinerary>, - Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd, ReadVecLd]>; -} - -multiclass LD_Lane_WB_BHSD { - def _B_fixed : LDN_WBFx_Lane(List # "B_operand"), - uimm_b, neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _B_register : LDN_WBReg_Lane(List # "B_operand"), - uimm_b, neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _H_fixed : LDN_WBFx_Lane(List # "H_operand"), - uimm_h, neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _H_register : LDN_WBReg_Lane(List # "H_operand"), - uimm_h, neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _S_fixed : LDN_WBFx_Lane(List # "S_operand"), - uimm_s, neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _S_register : LDN_WBReg_Lane(List # "S_operand"), - uimm_s, neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _D_fixed : LDN_WBFx_Lane(List # "D_operand"), - uimm_d, neon_uimm1_bare, asmop> { - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } - - def _D_register : LDN_WBReg_Lane(List # "D_operand"), - uimm_d, neon_uimm1_bare, asmop> { - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } -} - -// Post-index load single 1-element structure to one lane of 1 register. -defm LD1LN_WB : LD_Lane_WB_BHSD<0b0, 0b0, "VOne", "ld1", uimm_exact1, - uimm_exact2, uimm_exact4, uimm_exact8>; - -// Post-index load single N-element structure to one lane of N consecutive -// registers -// (N = 2,3,4) -defm LD2LN_WB : LD_Lane_WB_BHSD<0b1, 0b0, "VPair", "ld2", uimm_exact2, - uimm_exact4, uimm_exact8, uimm_exact16>; -defm LD3LN_WB : LD_Lane_WB_BHSD<0b0, 0b1, "VTriple", "ld3", uimm_exact3, - uimm_exact6, uimm_exact12, uimm_exact24>; -defm LD4LN_WB : LD_Lane_WB_BHSD<0b1, 0b1, "VQuad", "ld4", uimm_exact4, - uimm_exact8, uimm_exact16, uimm_exact32>; - -let mayStore = 1, neverHasSideEffects = 1, - hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb", - DecoderMethod = "DecodeVLDSTLanePostInstruction" in { - class STN_WBFx_Lane op2_1, bit op0, RegisterOperand VList, - Operand ImmTy, Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0, - (outs GPR64xsp:$wb), - (ins GPR64xsp:$Rn, ImmTy:$amt, - VList:$Rt, ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn], $amt", - [], - NoItinerary>, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> { - let Rm = 0b11111; - } - - class STN_WBReg_Lane op2_1, bit op0, RegisterOperand VList, - Operand ImmTy, Operand ImmOp, string asmop> - : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0, - (outs GPR64xsp:$wb), - (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VList:$Rt, - ImmOp:$lane), - asmop # "\t$Rt[$lane], [$Rn], $Rm", - [], - NoItinerary>, - Sched<[WriteVecSt, ReadVecSt, ReadVecSt, ReadVecSt]>; -} - -multiclass ST_Lane_WB_BHSD { - def _B_fixed : STN_WBFx_Lane(List # "B_operand"), - uimm_b, neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _B_register : STN_WBReg_Lane(List # "B_operand"), - uimm_b, neon_uimm4_bare, asmop> { - let Inst{12-10} = lane{2-0}; - let Inst{30} = lane{3}; - } - - def _H_fixed : STN_WBFx_Lane(List # "H_operand"), - uimm_h, neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _H_register : STN_WBReg_Lane(List # "H_operand"), - uimm_h, neon_uimm3_bare, asmop> { - let Inst{12-10} = {lane{1}, lane{0}, 0b0}; - let Inst{30} = lane{2}; - } - - def _S_fixed : STN_WBFx_Lane(List # "S_operand"), - uimm_s, neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _S_register : STN_WBReg_Lane(List # "S_operand"), - uimm_s, neon_uimm2_bare, asmop> { - let Inst{12-10} = {lane{0}, 0b0, 0b0}; - let Inst{30} = lane{1}; - } - - def _D_fixed : STN_WBFx_Lane(List # "D_operand"), - uimm_d, neon_uimm1_bare, asmop> { - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } - - def _D_register : STN_WBReg_Lane(List # "D_operand"), - uimm_d, neon_uimm1_bare, asmop> { - let Inst{12-10} = 0b001; - let Inst{30} = lane{0}; - } -} - -// Post-index store single 1-element structure from one lane of 1 register. -defm ST1LN_WB : ST_Lane_WB_BHSD<0b0, 0b0, "VOne", "st1", uimm_exact1, - uimm_exact2, uimm_exact4, uimm_exact8>; - -// Post-index store single N-element structure from one lane of N consecutive -// registers (N = 2,3,4) -defm ST2LN_WB : ST_Lane_WB_BHSD<0b1, 0b0, "VPair", "st2", uimm_exact2, - uimm_exact4, uimm_exact8, uimm_exact16>; -defm ST3LN_WB : ST_Lane_WB_BHSD<0b0, 0b1, "VTriple", "st3", uimm_exact3, - uimm_exact6, uimm_exact12, uimm_exact24>; -defm ST4LN_WB : ST_Lane_WB_BHSD<0b1, 0b1, "VQuad", "st4", uimm_exact4, - uimm_exact8, uimm_exact16, uimm_exact32>; - -// End of post-index load/store single N-element instructions -// (class SIMD lsone-post) - -// Neon Scalar instructions implementation -// Scalar Three Same - -class NeonI_Scalar3Same_size size, bits<5> opcode, string asmop, - RegisterClass FPRC> - : NeonI_Scalar3Same, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -class NeonI_Scalar3Same_D_size opcode, string asmop> - : NeonI_Scalar3Same_size; - -multiclass NeonI_Scalar3Same_HS_sizes opcode, string asmop, - bit Commutable = 0> { - let isCommutable = Commutable in { - def hhh : NeonI_Scalar3Same_size; - def sss : NeonI_Scalar3Same_size; - } -} - -multiclass NeonI_Scalar3Same_SD_sizes opcode, - string asmop, bit Commutable = 0> { - let isCommutable = Commutable in { - def sss : NeonI_Scalar3Same_size; - def ddd : NeonI_Scalar3Same_size; - } -} - -multiclass NeonI_Scalar3Same_BHSD_sizes opcode, - string asmop, bit Commutable = 0> { - let isCommutable = Commutable in { - def bbb : NeonI_Scalar3Same_size; - def hhh : NeonI_Scalar3Same_size; - def sss : NeonI_Scalar3Same_size; - def ddd : NeonI_Scalar3Same_size; - } -} - -multiclass Neon_Scalar3Same_D_size_patterns { - def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), - (INSTD FPR64:$Rn, FPR64:$Rm)>; -} - -multiclass Neon_Scalar3Same_BHSD_size_patterns - : Neon_Scalar3Same_D_size_patterns { - def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), - (INSTB FPR8:$Rn, FPR8:$Rm)>; - def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (INSTH FPR16:$Rn, FPR16:$Rm)>; - def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; -} - -multiclass Neon_Scalar3Same_HS_size_patterns { - def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (INSTH FPR16:$Rn, FPR16:$Rm)>; - def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; -} - -multiclass Neon_Scalar3Same_SD_size_patterns { - def : Pat<(SResTy (opnode (STy FPR32:$Rn), (STy FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; - def : Pat<(DResTy (opnode (DTy FPR64:$Rn), (DTy FPR64:$Rm))), - (INSTD FPR64:$Rn, FPR64:$Rm)>; -} - -class Neon_Scalar3Same_cmp_V1_D_size_patterns - : Pat<(v1i64 (Neon_cmp (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), CC)), - (INSTD FPR64:$Rn, FPR64:$Rm)>; - -// Scalar Three Different - -class NeonI_Scalar3Diff_size size, bits<4> opcode, string asmop, - RegisterClass FPRCD, RegisterClass FPRCS> - : NeonI_Scalar3Diff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_Scalar3Diff_HS_size opcode, string asmop> { - def shh : NeonI_Scalar3Diff_size; - def dss : NeonI_Scalar3Diff_size; -} - -multiclass NeonI_Scalar3Diff_ml_HS_size opcode, string asmop> { - let Constraints = "$Src = $Rd" in { - def shh : NeonI_Scalar3Diff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]>; - def dss : NeonI_Scalar3Diff, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]>; - } -} - -multiclass Neon_Scalar3Diff_HS_size_patterns { - def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (INSTH FPR16:$Rn, FPR16:$Rm)>; - def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; -} - -multiclass Neon_Scalar3Diff_ml_HS_size_patterns { - def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>; - def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>; -} - -// Scalar Two Registers Miscellaneous - -class NeonI_Scalar2SameMisc_size size, bits<5> opcode, string asmop, - RegisterClass FPRCD, RegisterClass FPRCS> - : NeonI_Scalar2SameMisc, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_Scalar2SameMisc_SD_size opcode, - string asmop> { - def ss : NeonI_Scalar2SameMisc_size; - def dd : NeonI_Scalar2SameMisc_size; -} - -multiclass NeonI_Scalar2SameMisc_D_size opcode, string asmop> { - def dd : NeonI_Scalar2SameMisc_size; -} - -multiclass NeonI_Scalar2SameMisc_BHSD_size opcode, string asmop> - : NeonI_Scalar2SameMisc_D_size { - def bb : NeonI_Scalar2SameMisc_size; - def hh : NeonI_Scalar2SameMisc_size; - def ss : NeonI_Scalar2SameMisc_size; -} - -class NeonI_Scalar2SameMisc_fcvtxn_D_size opcode, string asmop> - : NeonI_Scalar2SameMisc_size; - -multiclass NeonI_Scalar2SameMisc_narrow_HSD_size opcode, - string asmop> { - def bh : NeonI_Scalar2SameMisc_size; - def hs : NeonI_Scalar2SameMisc_size; - def sd : NeonI_Scalar2SameMisc_size; -} - -class NeonI_Scalar2SameMisc_accum_size size, bits<5> opcode, - string asmop, RegisterClass FPRC> - : NeonI_Scalar2SameMisc, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -multiclass NeonI_Scalar2SameMisc_accum_BHSD_size opcode, - string asmop> { - - let Constraints = "$Src = $Rd" in { - def bb : NeonI_Scalar2SameMisc_accum_size; - def hh : NeonI_Scalar2SameMisc_accum_size; - def ss : NeonI_Scalar2SameMisc_accum_size; - def dd : NeonI_Scalar2SameMisc_accum_size; - } -} - -class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns - : Pat<(f32 (opnode (f64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; - -multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns { - def : Pat<(v1i32 (opnode (f32 FPR32:$Rn))), - (INSTS FPR32:$Rn)>; - def : Pat<(v1i64 (opnode (f64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; -} - -class Neon_Scalar2SameMisc_vcvt_D_size_patterns - : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; - -multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns { - def : Pat<(f32 (opnode (v1i32 FPR32:$Rn))), - (INSTS FPR32:$Rn)>; - def : Pat<(f64 (opnode (v1i64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; -} - -multiclass Neon_Scalar2SameMisc_SD_size_patterns { - def : Pat<(f32 (opnode (f32 FPR32:$Rn))), - (INSTS FPR32:$Rn)>; - def : Pat<(f64 (opnode (f64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; -} - -class Neon_Scalar2SameMisc_V1_D_size_patterns - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; - -class NeonI_Scalar2SameMisc_cmpz_D_size opcode, string asmop> - : NeonI_Scalar2SameMisc, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_Scalar2SameMisc_cmpz_SD_size opcode, - string asmop> { - def ssi : NeonI_Scalar2SameMisc, - Sched<[WriteFPALU, ReadFPALU]>; - def ddi : NeonI_Scalar2SameMisc, - Sched<[WriteFPALU, ReadFPALU]>; -} - -class Neon_Scalar2SameMisc_cmpz_D_size_patterns - : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), - (v1i64 (bitconvert (v8i8 Neon_AllZero))))), - (INSTD FPR64:$Rn, 0)>; - -class Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns - : Pat<(v1i64 (Neon_cmpz (v1i64 FPR64:$Rn), - (i32 neon_uimm0:$Imm), CC)), - (INSTD FPR64:$Rn, neon_uimm0:$Imm)>; - -multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns { - def : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (f32 fpzz32:$FPImm))), - (INSTS FPR32:$Rn, fpzz32:$FPImm)>; - def : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (f32 fpzz32:$FPImm))), - (INSTD FPR64:$Rn, fpzz32:$FPImm)>; - def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpzz32:$FPImm), CC)), - (INSTD FPR64:$Rn, fpzz32:$FPImm)>; -} - -multiclass Neon_Scalar2SameMisc_D_size_patterns { - def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; -} - -multiclass Neon_Scalar2SameMisc_BHSD_size_patterns - : Neon_Scalar2SameMisc_D_size_patterns { - def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))), - (INSTB FPR8:$Rn)>; - def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))), - (INSTH FPR16:$Rn)>; - def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))), - (INSTS FPR32:$Rn)>; -} - -multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns< - SDPatternOperator opnode, - Instruction INSTH, - Instruction INSTS, - Instruction INSTD> { - def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))), - (INSTH FPR16:$Rn)>; - def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))), - (INSTS FPR32:$Rn)>; - def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))), - (INSTD FPR64:$Rn)>; - -} - -multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns< - SDPatternOperator opnode, - Instruction INSTB, - Instruction INSTH, - Instruction INSTS, - Instruction INSTD> { - def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))), - (INSTB FPR8:$Src, FPR8:$Rn)>; - def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))), - (INSTH FPR16:$Src, FPR16:$Rn)>; - def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))), - (INSTS FPR32:$Src, FPR32:$Rn)>; - def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))), - (INSTD FPR64:$Src, FPR64:$Rn)>; -} - -// Scalar Shift By Immediate - -class NeonI_ScalarShiftImm_size opcode, string asmop, - RegisterClass FPRC, Operand ImmTy> - : NeonI_ScalarShiftImm, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_ScalarShiftRightImm_D_size opcode, - string asmop> { - def ddi : NeonI_ScalarShiftImm_size { - bits<6> Imm; - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - let Inst{21-16} = Imm; - } -} - -multiclass NeonI_ScalarShiftRightImm_BHSD_size opcode, - string asmop> - : NeonI_ScalarShiftRightImm_D_size { - def bbi : NeonI_ScalarShiftImm_size { - bits<3> Imm; - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - let Inst{18-16} = Imm; - } - def hhi : NeonI_ScalarShiftImm_size { - bits<4> Imm; - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - let Inst{19-16} = Imm; - } - def ssi : NeonI_ScalarShiftImm_size { - bits<5> Imm; - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - let Inst{20-16} = Imm; - } -} - -multiclass NeonI_ScalarShiftLeftImm_D_size opcode, - string asmop> { - def ddi : NeonI_ScalarShiftImm_size { - bits<6> Imm; - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - let Inst{21-16} = Imm; - } -} - -multiclass NeonI_ScalarShiftLeftImm_BHSD_size opcode, - string asmop> - : NeonI_ScalarShiftLeftImm_D_size { - def bbi : NeonI_ScalarShiftImm_size { - bits<3> Imm; - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - let Inst{18-16} = Imm; - } - def hhi : NeonI_ScalarShiftImm_size { - bits<4> Imm; - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - let Inst{19-16} = Imm; - } - def ssi : NeonI_ScalarShiftImm_size { - bits<5> Imm; - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - let Inst{20-16} = Imm; - } -} - -class NeonI_ScalarShiftRightImm_accum_D_size opcode, string asmop> - : NeonI_ScalarShiftImm, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - bits<6> Imm; - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - let Inst{21-16} = Imm; - let Constraints = "$Src = $Rd"; -} - -class NeonI_ScalarShiftLeftImm_accum_D_size opcode, string asmop> - : NeonI_ScalarShiftImm, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - bits<6> Imm; - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - let Inst{21-16} = Imm; - let Constraints = "$Src = $Rd"; -} - -class NeonI_ScalarShiftImm_narrow_size opcode, string asmop, - RegisterClass FPRCD, RegisterClass FPRCS, - Operand ImmTy> - : NeonI_ScalarShiftImm, - Sched<[WriteFPALU, ReadFPALU]>; - -multiclass NeonI_ScalarShiftImm_narrow_HSD_size opcode, - string asmop> { - def bhi : NeonI_ScalarShiftImm_narrow_size { - bits<3> Imm; - let Inst{22-19} = 0b0001; // immh:immb = 0001xxx - let Inst{18-16} = Imm; - } - def hsi : NeonI_ScalarShiftImm_narrow_size { - bits<4> Imm; - let Inst{22-20} = 0b001; // immh:immb = 001xxxx - let Inst{19-16} = Imm; - } - def sdi : NeonI_ScalarShiftImm_narrow_size { - bits<5> Imm; - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - let Inst{20-16} = Imm; - } -} - -multiclass NeonI_ScalarShiftImm_cvt_SD_size opcode, string asmop> { - def ssi : NeonI_ScalarShiftImm_size { - bits<5> Imm; - let Inst{22-21} = 0b01; // immh:immb = 01xxxxx - let Inst{20-16} = Imm; - } - def ddi : NeonI_ScalarShiftImm_size { - bits<6> Imm; - let Inst{22} = 0b1; // immh:immb = 1xxxxxx - let Inst{21-16} = Imm; - } -} - -multiclass Neon_ScalarShiftRImm_D_size_patterns { - def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INSTD FPR64:$Rn, imm:$Imm)>; -} - -multiclass Neon_ScalarShiftLImm_D_size_patterns { - def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shl_imm64:$Imm))), - (INSTD FPR64:$Rn, imm:$Imm)>; -} - -class Neon_ScalarShiftLImm_V1_D_size_patterns - : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), - (v1i64 (Neon_vdup (i32 shl_imm64:$Imm))))), - (INSTD FPR64:$Rn, imm:$Imm)>; - -class Neon_ScalarShiftRImm_V1_D_size_patterns - : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), - (v1i64 (Neon_vdup (i32 shr_imm64:$Imm))))), - (INSTD FPR64:$Rn, imm:$Imm)>; - -multiclass Neon_ScalarShiftLImm_BHSD_size_patterns - : Neon_ScalarShiftLImm_D_size_patterns { - def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 shl_imm8:$Imm))), - (INSTB FPR8:$Rn, imm:$Imm)>; - def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 shl_imm16:$Imm))), - (INSTH FPR16:$Rn, imm:$Imm)>; - def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 shl_imm32:$Imm))), - (INSTS FPR32:$Rn, imm:$Imm)>; -} - -class Neon_ScalarShiftLImm_accum_D_size_patterns - : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), - (i32 shl_imm64:$Imm))), - (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>; - -class Neon_ScalarShiftRImm_accum_D_size_patterns - : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), - (i32 shr_imm64:$Imm))), - (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>; - -multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns< - SDPatternOperator opnode, - Instruction INSTH, - Instruction INSTS, - Instruction INSTD> { - def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 shr_imm16:$Imm))), - (INSTH FPR16:$Rn, imm:$Imm)>; - def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))), - (INSTS FPR32:$Rn, imm:$Imm)>; - def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INSTD FPR64:$Rn, imm:$Imm)>; -} - -multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns { - def ssi : Pat<(f32 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))), - (INSTS FPR32:$Rn, imm:$Imm)>; - def ddi : Pat<(f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INSTD FPR64:$Rn, imm:$Imm)>; -} - -multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns { - def ssi : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (i32 shr_imm32:$Imm))), - (INSTS FPR32:$Rn, imm:$Imm)>; - def ddi : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INSTD FPR64:$Rn, imm:$Imm)>; -} - -// Scalar Signed Shift Right (Immediate) -defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">; -defm : Neon_ScalarShiftRImm_D_size_patterns; -// Pattern to match llvm.arm.* intrinsic. -def : Neon_ScalarShiftRImm_V1_D_size_patterns; - -// Scalar Unsigned Shift Right (Immediate) -defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">; -defm : Neon_ScalarShiftRImm_D_size_patterns; -// Pattern to match llvm.arm.* intrinsic. -def : Neon_ScalarShiftRImm_V1_D_size_patterns; - -// Scalar Signed Rounding Shift Right (Immediate) -defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">; -defm : Neon_ScalarShiftRImm_D_size_patterns; - -// Scalar Unigned Rounding Shift Right (Immediate) -defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">; -defm : Neon_ScalarShiftRImm_D_size_patterns; - -// Scalar Signed Shift Right and Accumulate (Immediate) -def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">; -def : Neon_ScalarShiftRImm_accum_D_size_patterns - ; - -// Scalar Unsigned Shift Right and Accumulate (Immediate) -def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">; -def : Neon_ScalarShiftRImm_accum_D_size_patterns - ; - -// Scalar Signed Rounding Shift Right and Accumulate (Immediate) -def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">; -def : Neon_ScalarShiftRImm_accum_D_size_patterns - ; - -// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate) -def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">; -def : Neon_ScalarShiftRImm_accum_D_size_patterns - ; - -// Scalar Shift Left (Immediate) -defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">; -defm : Neon_ScalarShiftLImm_D_size_patterns; -// Pattern to match llvm.arm.* intrinsic. -def : Neon_ScalarShiftLImm_V1_D_size_patterns; - -// Signed Saturating Shift Left (Immediate) -defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">; -defm : Neon_ScalarShiftLImm_BHSD_size_patterns; -// Pattern to match llvm.arm.* intrinsic. -defm : Neon_ScalarShiftLImm_D_size_patterns; - -// Unsigned Saturating Shift Left (Immediate) -defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">; -defm : Neon_ScalarShiftLImm_BHSD_size_patterns; -// Pattern to match llvm.arm.* intrinsic. -defm : Neon_ScalarShiftLImm_D_size_patterns; - -// Signed Saturating Shift Left Unsigned (Immediate) -defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">; -defm : Neon_ScalarShiftLImm_BHSD_size_patterns; - -// Shift Right And Insert (Immediate) -def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">; -def : Neon_ScalarShiftRImm_accum_D_size_patterns - ; - -// Shift Left And Insert (Immediate) -def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">; -def : Neon_ScalarShiftLImm_accum_D_size_patterns - ; - -// Signed Saturating Shift Right Narrow (Immediate) -defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; - -// Unsigned Saturating Shift Right Narrow (Immediate) -defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; - -// Signed Saturating Rounded Shift Right Narrow (Immediate) -defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; - -// Unsigned Saturating Rounded Shift Right Narrow (Immediate) -defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; - -// Signed Saturating Shift Right Unsigned Narrow (Immediate) -defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; - -// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate) -defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">; -defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; - -// Scalar Signed Fixed-point Convert To Floating-Point (Immediate) -defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">; -defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns; - -// Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate) -defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">; -defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns; - -// Scalar Floating-point Convert To Signed Fixed-point (Immediate) -defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">; -defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns; - -// Scalar Floating-point Convert To Unsigned Fixed-point (Immediate) -defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">; -defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns; - -// Patterns For Convert Instructions Between v1f64 and v1i64 -class Neon_ScalarShiftImm_cvtf_v1f64_pattern - : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INST FPR64:$Rn, imm:$Imm)>; - -class Neon_ScalarShiftImm_fcvt_v1f64_pattern - : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))), - (INST FPR64:$Rn, imm:$Imm)>; - -def : Neon_ScalarShiftImm_cvtf_v1f64_pattern; - -def : Neon_ScalarShiftImm_cvtf_v1f64_pattern; - -def : Neon_ScalarShiftImm_fcvt_v1f64_pattern; - -def : Neon_ScalarShiftImm_fcvt_v1f64_pattern; - -// Scalar Integer Add -let isCommutable = 1 in { -def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">; -} - -// Scalar Integer Sub -def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">; - -// Pattern for Scalar Integer Add and Sub with D register only -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -// Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -// Scalar Integer Saturating Add (Signed, Unsigned) -defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>; -defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>; - -// Scalar Integer Saturating Sub (Signed, Unsigned) -defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>; -defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>; - - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Integer Saturating Add, Sub (Signed, Unsigned) -defm : Neon_Scalar3Same_BHSD_size_patterns; -defm : Neon_Scalar3Same_BHSD_size_patterns; -defm : Neon_Scalar3Same_BHSD_size_patterns; -defm : Neon_Scalar3Same_BHSD_size_patterns; - -// Scalar Integer Saturating Doubling Multiply Half High -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in -defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>; - -// Scalar Integer Saturating Rounding Doubling Multiply Half High -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>; -} - -// Patterns to match llvm.arm.* intrinsic for -// Scalar Integer Saturating Doubling Multiply Half High and -// Scalar Integer Saturating Rounding Doubling Multiply Half High -defm : Neon_Scalar3Same_HS_size_patterns; -defm : Neon_Scalar3Same_HS_size_patterns; - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in { -// Scalar Floating-point Multiply Extended -defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>; -} - -// Scalar Floating-point Reciprocal Step -defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>; -defm : Neon_Scalar3Same_SD_size_patterns; -def : Pat<(v1f64 (int_arm_neon_vrecps (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FRECPSddd FPR64:$Rn, FPR64:$Rm)>; - -// Scalar Floating-point Reciprocal Square Root Step -defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>; -defm : Neon_Scalar3Same_SD_size_patterns; -def : Pat<(v1f64 (int_arm_neon_vrsqrts (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FRSQRTSddd FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>; - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Floating-point Multiply Extended, -multiclass Neon_Scalar3Same_MULX_SD_size_patterns { - def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))), - (INSTS FPR32:$Rn, FPR32:$Rm)>; - def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))), - (INSTD FPR64:$Rn, FPR64:$Rm)>; -} - -defm : Neon_Scalar3Same_MULX_SD_size_patterns; -def : Pat<(v1f64 (int_aarch64_neon_vmulx (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FMULXddd FPR64:$Rn, FPR64:$Rm)>; - -// Scalar Integer Shift Left (Signed, Unsigned) -def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">; -def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">; - -// Patterns to match llvm.arm.* intrinsic for -// Scalar Integer Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Integer Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -// Scalar Integer Saturating Shift Left (Signed, Unsigned) -defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>; -defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>; - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Integer Saturating Shift Letf (Signed, Unsigned) -defm : Neon_Scalar3Same_BHSD_size_patterns; -defm : Neon_Scalar3Same_BHSD_size_patterns; - -// Patterns to match llvm.arm.* intrinsic for -// Scalar Integer Saturating Shift Letf (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -// Scalar Integer Rounding Shift Left (Signed, Unsigned) -def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">; -def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">; - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Integer Rounding Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -// Patterns to match llvm.arm.* intrinsic for -// Scalar Integer Rounding Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) -defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>; -defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>; - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_BHSD_size_patterns; -defm : Neon_Scalar3Same_BHSD_size_patterns; - -// Patterns to match llvm.arm.* intrinsic for -// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC] in { -// Signed Saturating Doubling Multiply-Add Long -defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">; -} -defm : Neon_Scalar3Diff_ml_HS_size_patterns; - -// Signed Saturating Doubling Multiply-Subtract Long -let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC] in { -defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">; -} -defm : Neon_Scalar3Diff_ml_HS_size_patterns; - -// Signed Saturating Doubling Multiply Long -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in { -defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">; -} -defm : Neon_Scalar3Diff_HS_size_patterns; - -// Scalar Signed Integer Convert To Floating-point -defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">; -defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns; - -// Scalar Unsigned Integer Convert To Floating-point -defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">; -defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns; - -// Scalar Floating-point Converts -def FCVTXN : NeonI_Scalar2SameMisc_fcvtxn_D_size<0b1, 0b10110, "fcvtxn">; -def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns; - -defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">; -defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; -def : Neon_Scalar2SameMisc_vcvt_D_size_patterns; - -// Patterns For Convert Instructions Between v1f64 and v1i64 -class Neon_Scalar2SameMisc_cvtf_v1f64_pattern - : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>; - -class Neon_Scalar2SameMisc_fcvt_v1f64_pattern - : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; - -def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern; -def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern; - -def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern; -def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern; - -// Scalar Floating-point Reciprocal Estimate -defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">; -defm : Neon_Scalar2SameMisc_SD_size_patterns; -def : Neon_Scalar2SameMisc_V1_D_size_patterns; - -// Scalar Floating-point Reciprocal Exponent -defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">; -defm : Neon_Scalar2SameMisc_SD_size_patterns; - -// Scalar Floating-point Reciprocal Square Root Estimate -defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">; -defm : Neon_Scalar2SameMisc_SD_size_patterns; -def : Neon_Scalar2SameMisc_V1_D_size_patterns; - -// Scalar Floating-point Round -class Neon_ScalarFloatRound_pattern - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; - -def : Neon_ScalarFloatRound_pattern; -def : Neon_ScalarFloatRound_pattern; -def : Neon_ScalarFloatRound_pattern; -def : Neon_ScalarFloatRound_pattern; -def : Neon_ScalarFloatRound_pattern; -def : Neon_ScalarFloatRound_pattern; -def : Neon_ScalarFloatRound_pattern; - -// Scalar Integer Compare - -// Scalar Compare Bitwise Equal -def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">; -defm : Neon_Scalar3Same_D_size_patterns; - -class Neon_Scalar3Same_cmp_D_size_v1_patterns - : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm), CC)), - (INSTD FPR64:$Rn, FPR64:$Rm)>; - -def : Neon_Scalar3Same_cmp_D_size_v1_patterns; - -// Scalar Compare Signed Greather Than Or Equal -def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">; -defm : Neon_Scalar3Same_D_size_patterns; -def : Neon_Scalar3Same_cmp_D_size_v1_patterns; - -// Scalar Compare Unsigned Higher Or Same -def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">; -defm : Neon_Scalar3Same_D_size_patterns; -def : Neon_Scalar3Same_cmp_D_size_v1_patterns; - -// Scalar Compare Unsigned Higher -def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">; -defm : Neon_Scalar3Same_D_size_patterns; -def : Neon_Scalar3Same_cmp_D_size_v1_patterns; - -// Scalar Compare Signed Greater Than -def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">; -defm : Neon_Scalar3Same_D_size_patterns; -def : Neon_Scalar3Same_cmp_D_size_v1_patterns; - -// Scalar Compare Bitwise Test Bits -def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">; -defm : Neon_Scalar3Same_D_size_patterns; -defm : Neon_Scalar3Same_D_size_patterns; - -// Scalar Compare Bitwise Equal To Zero -def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">; -def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; -def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; - -// Scalar Compare Signed Greather Than Or Equal To Zero -def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">; -def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; -def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; - -// Scalar Compare Signed Greater Than Zero -def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">; -def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; -def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; - -// Scalar Compare Signed Less Than Or Equal To Zero -def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">; -def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; -def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; - -// Scalar Compare Less Than Zero -def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">; -def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; -def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; - -// Scalar Floating-point Compare - -// Scalar Floating-point Compare Mask Equal -defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">; -defm : Neon_Scalar3Same_SD_size_patterns; -def : Neon_Scalar3Same_cmp_V1_D_size_patterns; - -// Scalar Floating-point Compare Mask Equal To Zero -defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; - -// Scalar Floating-point Compare Mask Greater Than Or Equal -defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">; -defm : Neon_Scalar3Same_SD_size_patterns; -def : Neon_Scalar3Same_cmp_V1_D_size_patterns; - -// Scalar Floating-point Compare Mask Greater Than Or Equal To Zero -defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; - -// Scalar Floating-point Compare Mask Greather Than -defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">; -defm : Neon_Scalar3Same_SD_size_patterns; -def : Neon_Scalar3Same_cmp_V1_D_size_patterns; - -// Scalar Floating-point Compare Mask Greather Than Zero -defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; - -// Scalar Floating-point Compare Mask Less Than Or Equal To Zero -defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; - -// Scalar Floating-point Compare Mask Less Than Zero -defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">; -defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; - -// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal -defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">; -defm : Neon_Scalar3Same_SD_size_patterns; -def : Pat<(v1i64 (int_arm_neon_vacge (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FACGEddd FPR64:$Rn, FPR64:$Rm)>; - -// Scalar Floating-point Absolute Compare Mask Greater Than -defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">; -defm : Neon_Scalar3Same_SD_size_patterns; -def : Pat<(v1i64 (int_arm_neon_vacgt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FACGTddd FPR64:$Rn, FPR64:$Rm)>; - -// Scalar Floating-point Absolute Difference -defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">; -defm : Neon_Scalar3Same_SD_size_patterns; - -// Scalar Absolute Value -defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">; -defm : Neon_Scalar2SameMisc_D_size_patterns; - -// Scalar Signed Saturating Absolute Value -defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">; -defm : Neon_Scalar2SameMisc_BHSD_size_patterns; - -// Scalar Negate -defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">; -defm : Neon_Scalar2SameMisc_D_size_patterns; - -// Scalar Signed Saturating Negate -defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">; -defm : Neon_Scalar2SameMisc_BHSD_size_patterns; - -// Scalar Signed Saturating Accumulated of Unsigned Value -defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">; -defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns; - -// Scalar Unsigned Saturating Accumulated of Signed Value -defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">; -defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns; - -def : Pat<(v1i64 (int_aarch64_neon_suqadd (v1i64 FPR64:$Src), - (v1i64 FPR64:$Rn))), - (SUQADDdd FPR64:$Src, FPR64:$Rn)>; - -def : Pat<(v1i64 (int_aarch64_neon_usqadd (v1i64 FPR64:$Src), - (v1i64 FPR64:$Rn))), - (USQADDdd FPR64:$Src, FPR64:$Rn)>; - -def : Pat<(v1i64 (int_arm_neon_vabs (v1i64 FPR64:$Rn))), - (ABSdd FPR64:$Rn)>; - -def : Pat<(v1i64 (int_arm_neon_vqabs (v1i64 FPR64:$Rn))), - (SQABSdd FPR64:$Rn)>; - -def : Pat<(v1i64 (int_arm_neon_vqneg (v1i64 FPR64:$Rn))), - (SQNEGdd FPR64:$Rn)>; - -def : Pat<(v1i64 (sub (v1i64 (bitconvert (v8i8 Neon_AllZero))), - (v1i64 FPR64:$Rn))), - (NEGdd FPR64:$Rn)>; - -// Scalar Signed Saturating Extract Unsigned Narrow -defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">; -defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns; - -// Scalar Signed Saturating Extract Narrow -defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">; -defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns; - -// Scalar Unsigned Saturating Extract Narrow -defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">; -defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns; - -// Scalar Reduce Pairwise - -multiclass NeonI_ScalarPair_D_sizes opcode, - string asmop, bit Commutable = 0> { - let isCommutable = Commutable in { - def _D_2D : NeonI_ScalarPair, - Sched<[WriteFPALU, ReadFPALU]>; - } -} - -multiclass NeonI_ScalarPair_SD_sizes opcode, - string asmop, bit Commutable = 0> - : NeonI_ScalarPair_D_sizes { - let isCommutable = Commutable in { - def _S_2S : NeonI_ScalarPair, - Sched<[WriteFPALU, ReadFPALU]>; - } -} - -// Scalar Reduce Addition Pairwise (Integer) with -// Pattern to match llvm.arm.* intrinsic -defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>; - -// Pattern to match llvm.aarch64.* intrinsic for -// Scalar Reduce Addition Pairwise (Integer) -def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))), - (ADDPvv_D_2D VPR128:$Rn)>; -def : Pat<(v1i64 (int_aarch64_neon_vaddv (v2i64 VPR128:$Rn))), - (ADDPvv_D_2D VPR128:$Rn)>; - -// Scalar Reduce Addition Pairwise (Floating Point) -defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>; - -// Scalar Reduce Maximum Pairwise (Floating Point) -defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>; - -// Scalar Reduce Minimum Pairwise (Floating Point) -defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>; - -// Scalar Reduce maxNum Pairwise (Floating Point) -defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>; - -// Scalar Reduce minNum Pairwise (Floating Point) -defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>; - -multiclass Neon_ScalarPair_SD_size_patterns { - def : Pat<(f32 (opnode (v2f32 VPR64:$Rn))), - (INSTS VPR64:$Rn)>; - def : Pat<(f64 (opnode (v2f64 VPR128:$Rn))), - (INSTD VPR128:$Rn)>; -} - -// Patterns to match llvm.aarch64.* intrinsic for -// Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point) -defm : Neon_ScalarPair_SD_size_patterns; - -defm : Neon_ScalarPair_SD_size_patterns; - -defm : Neon_ScalarPair_SD_size_patterns; - -defm : Neon_ScalarPair_SD_size_patterns; - -defm : Neon_ScalarPair_SD_size_patterns; - -def : Pat<(f32 (int_aarch64_neon_vpfadd (v4f32 VPR128:$Rn))), - (FADDPvv_S_2S (v2f32 - (EXTRACT_SUBREG - (v4f32 (FADDP_4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rn))), - sub_64)))>; - -// Scalar by element Arithmetic - -class NeonI_ScalarXIndexedElemArith opcode, - string rmlane, bit u, bit szhi, bit szlo, - RegisterClass ResFPR, RegisterClass OpFPR, - RegisterOperand OpVPR, Operand OpImm> - : NeonI_ScalarXIndexedElem, - Sched<[WriteFPMul, ReadFPMul, ReadFPMul]> { - bits<3> Imm; - bits<5> MRm; -} - -class NeonI_ScalarXIndexedElemArith_Constraint_Impl opcode, - string rmlane, - bit u, bit szhi, bit szlo, - RegisterClass ResFPR, - RegisterClass OpFPR, - RegisterOperand OpVPR, - Operand OpImm> - : NeonI_ScalarXIndexedElem, - Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { - let Constraints = "$src = $Rd"; - bits<3> Imm; - bits<5> MRm; -} - -// Scalar Floating Point multiply (scalar, by element) -def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul", - 0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul", - 0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { - let Inst{11} = Imm{0}; // h - let Inst{21} = 0b0; // l - let Inst{20-16} = MRm; -} - -// Scalar Floating Point multiply extended (scalar, by element) -def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx", - 0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx", - 0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { - let Inst{11} = Imm{0}; // h - let Inst{21} = 0b0; // l - let Inst{20-16} = MRm; -} - -multiclass Neon_ScalarXIndexedElem_MUL_MULX_Patterns< - SDPatternOperator opnode, - Instruction INST, - ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm, - ValueType OpNTy, ValueType ExTy, Operand OpNImm> { - - def : Pat<(ResTy (opnode (ResTy FPRC:$Rn), - (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)))), - (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode (ResTy FPRC:$Rn), - (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)))), - (ResTy (INST (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; - - // swapped operands - def : Pat<(ResTy (opnode - (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)), - (ResTy FPRC:$Rn))), - (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode - (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)), - (ResTy FPRC:$Rn))), - (ResTy (INST (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; -} - -// Patterns for Scalar Floating Point multiply (scalar, by element) -defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns; - -// Patterns for Scalar Floating Point multiply extended (scalar, by element) -defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns; - -// Scalar Floating Point fused multiply-add (scalar, by element) -def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla", - 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla", - 0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { - let Inst{11} = Imm{0}; // h - let Inst{21} = 0b0; // l - let Inst{20-16} = MRm; -} - -// Scalar Floating Point fused multiply-subtract (scalar, by element) -def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls", - 0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls", - 0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { - let Inst{11} = Imm{0}; // h - let Inst{21} = 0b0; // l - let Inst{20-16} = MRm; -} -// We are allowed to match the fma instruction regardless of compile options. -multiclass Neon_ScalarXIndexedElem_FMA_Patterns< - Instruction FMLAI, Instruction FMLSI, - ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm, - ValueType OpNTy, ValueType ExTy, Operand OpNImm> { - // fmla - def : Pat<(ResTy (fma (ResTy FPRC:$Rn), - (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)), - (ResTy FPRC:$Ra))), - (ResTy (FMLAI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (fma (ResTy FPRC:$Rn), - (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)), - (ResTy FPRC:$Ra))), - (ResTy (FMLAI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; - - // swapped fmla operands - def : Pat<(ResTy (fma - (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)), - (ResTy FPRC:$Rn), - (ResTy FPRC:$Ra))), - (ResTy (FMLAI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (fma - (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)), - (ResTy FPRC:$Rn), - (ResTy FPRC:$Ra))), - (ResTy (FMLAI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; - - // fmls - def : Pat<(ResTy (fma (ResTy FPRC:$Rn), - (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))), - (ResTy FPRC:$Ra))), - (ResTy (FMLSI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (fma (ResTy FPRC:$Rn), - (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))), - (ResTy FPRC:$Ra))), - (ResTy (FMLSI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; - - // swapped fmls operands - def : Pat<(ResTy (fma - (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))), - (ResTy FPRC:$Rn), - (ResTy FPRC:$Ra))), - (ResTy (FMLSI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (fma - (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))), - (ResTy FPRC:$Rn), - (ResTy FPRC:$Ra))), - (ResTy (FMLSI (ResTy FPRC:$Ra), - (ResTy FPRC:$Rn), - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), - OpNImm:$Imm))>; -} - -// Scalar Floating Point fused multiply-add and -// multiply-subtract (scalar, by element) -defm : Neon_ScalarXIndexedElem_FMA_Patterns; -defm : Neon_ScalarXIndexedElem_FMA_Patterns; -defm : Neon_ScalarXIndexedElem_FMA_Patterns; - -// Scalar Signed saturating doubling multiply long (scalar, by element) -def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull", - 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull", - 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { - let Inst{11} = Imm{2}; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull", - 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull", - 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} - -multiclass Neon_ScalarXIndexedElem_MUL_Patterns< - SDPatternOperator opnode, - Instruction INST, - ValueType ResTy, RegisterClass FPRC, - ValueType OpVTy, ValueType OpTy, - ValueType VecOpTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> { - - def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn), - (OpVTy (scalar_to_vector - (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))), - (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn), - (OpVTy (extract_subvector (VecOpTy VPRC:$MRm), OpImm:$Imm)))), - (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; - - //swapped operands - def : Pat<(ResTy (opnode - (OpVTy (scalar_to_vector - (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))), - (OpVTy FPRC:$Rn))), - (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode - (OpVTy (extract_subvector (VecOpTy VPRC:$MRm), OpImm:$Imm)), - (OpVTy FPRC:$Rn))), - (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; -} - - -// Patterns for Scalar Signed saturating doubling -// multiply long (scalar, by element) -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; - -// Scalar Signed saturating doubling multiply-add long (scalar, by element) -def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", - 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", - 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { - let Inst{11} = Imm{2}; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", - 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", - 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} - -// Scalar Signed saturating doubling -// multiply-subtract long (scalar, by element) -def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", - 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", - 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { - let Inst{11} = Imm{2}; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", - 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", - 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} - -multiclass Neon_ScalarXIndexedElem_MLAL_Patterns< - SDPatternOperator opnode, - SDPatternOperator coreopnode, - Instruction INST, - ValueType ResTy, RegisterClass ResFPRC, RegisterClass FPRC, - ValueType OpTy, - ValueType OpVTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> { - - def : Pat<(ResTy (opnode - (ResTy ResFPRC:$Ra), - (ResTy (coreopnode (OpTy FPRC:$Rn), - (OpTy (scalar_to_vector - (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))))))), - (ResTy (INST (ResTy ResFPRC:$Ra), - (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode - (ResTy ResFPRC:$Ra), - (ResTy (coreopnode (OpTy FPRC:$Rn), - (OpTy (extract_subvector (OpVTy VPRC:$MRm), OpImm:$Imm)))))), - (ResTy (INST (ResTy ResFPRC:$Ra), - (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; - - // swapped operands - def : Pat<(ResTy (opnode - (ResTy ResFPRC:$Ra), - (ResTy (coreopnode - (OpTy (scalar_to_vector - (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))), - (OpTy FPRC:$Rn))))), - (ResTy (INST (ResTy ResFPRC:$Ra), - (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; - - def : Pat<(ResTy (opnode - (ResTy ResFPRC:$Ra), - (ResTy (coreopnode - (OpTy (extract_subvector (OpVTy VPRC:$MRm), OpImm:$Imm)), - (OpTy FPRC:$Rn))))), - (ResTy (INST (ResTy ResFPRC:$Ra), - (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; -} - -// Patterns for Scalar Signed saturating -// doubling multiply-add long (scalar, by element) -defm : Neon_ScalarXIndexedElem_MLAL_Patterns; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns; - -// Patterns for Scalar Signed saturating -// doubling multiply-sub long (scalar, by element) -defm : Neon_ScalarXIndexedElem_MLAL_Patterns; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns; -defm : Neon_ScalarXIndexedElem_MLAL_Patterns; - -// Scalar Signed saturating doubling multiply returning -// high half (scalar, by element) -def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh", - 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh", - 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> { - let Inst{11} = Imm{2}; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh", - 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh", - 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} - -// Patterns for Scalar Signed saturating doubling multiply returning -// high half (scalar, by element) -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; - -// Scalar Signed saturating rounding doubling multiply -// returning high half (scalar, by element) -def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh", - 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh", - 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> { - let Inst{11} = Imm{2}; // h - let Inst{21} = Imm{1}; // l - let Inst{20} = Imm{0}; // m - let Inst{19-16} = MRm{3-0}; -} -def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh", - 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> { - let Inst{11} = 0b0; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} -def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh", - 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { - let Inst{11} = Imm{1}; // h - let Inst{21} = Imm{0}; // l - let Inst{20-16} = MRm; -} - -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; -defm : Neon_ScalarXIndexedElem_MUL_Patterns; - -// Scalar general arithmetic operation -class Neon_Scalar_GeneralMath2D_pattern - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; - -class Neon_Scalar_GeneralMath3D_pattern - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (INST FPR64:$Rn, FPR64:$Rm)>; - -class Neon_Scalar_GeneralMath4D_pattern - : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), - (v1f64 FPR64:$Ra))), - (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; - -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; -def : Neon_Scalar_GeneralMath3D_pattern; - -def : Neon_Scalar_GeneralMath2D_pattern; -def : Neon_Scalar_GeneralMath2D_pattern; - -def : Neon_Scalar_GeneralMath4D_pattern; -def : Neon_Scalar_GeneralMath4D_pattern; - -// Scalar Copy - DUP element to scalar -class NeonI_Scalar_DUP - : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm), - asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]", - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]> { - bits<4> Imm; -} - -def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} -def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} -def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} -def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> { - let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; -} - -def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 0)), - (f32 (EXTRACT_SUBREG (v4f32 VPR128:$Rn), sub_32))>; -def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 1)), - (f32 (DUPsv_S (v4f32 VPR128:$Rn), 1))>; -def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 2)), - (f32 (DUPsv_S (v4f32 VPR128:$Rn), 2))>; -def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 3)), - (f32 (DUPsv_S (v4f32 VPR128:$Rn), 3))>; - -def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 0)), - (f64 (EXTRACT_SUBREG (v2f64 VPR128:$Rn), sub_64))>; -def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 1)), - (f64 (DUPdv_D (v2f64 VPR128:$Rn), 1))>; - -def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 0)), - (f32 (EXTRACT_SUBREG (v2f32 VPR64:$Rn), sub_32))>; -def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 1)), - (f32 (DUPsv_S (v4f32 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - 1))>; - -def : Pat<(f64 (vector_extract (v1f64 VPR64:$Rn), 0)), - (f64 (EXTRACT_SUBREG (v1f64 VPR64:$Rn), sub_64))>; - -multiclass NeonI_Scalar_DUP_Ext_Vec_pattern { - - def : Pat<(ResTy (extract_subvector (OpTy VPR128:$Rn), OpLImm:$Imm)), - (ResTy (DUPI VPR128:$Rn, OpLImm:$Imm))>; - - def : Pat<(ResTy (extract_subvector (NOpTy VPR64:$Rn), OpNImm:$Imm)), - (ResTy (DUPI - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - OpNImm:$Imm))>; -} - -// Patterns for extract subvectors of v1ix data using scalar DUP instructions. -defm : NeonI_Scalar_DUP_Ext_Vec_pattern; -defm : NeonI_Scalar_DUP_Ext_Vec_pattern; -defm : NeonI_Scalar_DUP_Ext_Vec_pattern; - -multiclass NeonI_Scalar_DUP_Copy_pattern1 { - - def : Pat<(ResTy (vector_insert (ResTy undef), - (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)), - (neon_uimm0_bare:$Imm))), - (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; - - def : Pat<(ResTy (vector_insert (ResTy undef), - (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)), - (OpNImm:$Imm))), - (ResTy (DUPI - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - OpNImm:$Imm))>; -} - -multiclass NeonI_Scalar_DUP_Copy_pattern2 { - - def : Pat<(ResTy (scalar_to_vector - (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))), - (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; - - def : Pat<(ResTy (scalar_to_vector - (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))), - (ResTy (DUPI - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - OpNImm:$Imm))>; -} - -// Patterns for vector copy to v1ix and v1fx vectors using scalar DUP -// instructions. -defm : NeonI_Scalar_DUP_Copy_pattern1; -defm : NeonI_Scalar_DUP_Copy_pattern1; -defm : NeonI_Scalar_DUP_Copy_pattern1; -defm : NeonI_Scalar_DUP_Copy_pattern1; -defm : NeonI_Scalar_DUP_Copy_pattern2; -defm : NeonI_Scalar_DUP_Copy_pattern2; -defm : NeonI_Scalar_DUP_Copy_pattern2; -defm : NeonI_Scalar_DUP_Copy_pattern2; - -multiclass NeonI_Scalar_DUP_alias { - def : NeonInstAlias; -} - -// Aliases for Scalar copy - DUP element (scalar) -// FIXME: This is actually the preferred syntax but TableGen can't deal with -// custom printing of aliases. -defm : NeonI_Scalar_DUP_alias<"mov", ".b", DUPbv_B, neon_uimm4_bare, FPR8>; -defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>; -defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>; -defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>; - -multiclass NeonI_SDUP { - def : Pat<(ResTy (GetLow VPR128:$Rn)), - (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>; - def : Pat<(ResTy (GetHigh VPR128:$Rn)), - (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>; -} - -defm : NeonI_SDUP; -defm : NeonI_SDUP; -defm : NeonI_SDUP; -defm : NeonI_SDUP; -defm : NeonI_SDUP; -defm : NeonI_SDUP; - -// The following is for sext/zext from v1xx to v1xx -multiclass NeonI_ext { - // v1i32 -> v1i64 - def : Pat<(v1i64 (ExtOp (v1i32 FPR32:$Rn))), - (EXTRACT_SUBREG - (v2i64 (!cast(prefix # "_2S") - (v2i32 (SUBREG_TO_REG (i64 0), $Rn, sub_32)), 0)), - sub_64)>; - - // v1i16 -> v1i32 - def : Pat<(v1i32 (ExtOp (v1i16 FPR16:$Rn))), - (EXTRACT_SUBREG - (v4i32 (!cast(prefix # "_4H") - (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)), - sub_32)>; - - // v1i8 -> v1i16 - def : Pat<(v1i16 (ExtOp (v1i8 FPR8:$Rn))), - (EXTRACT_SUBREG - (v8i16 (!cast(prefix # "_8B") - (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)), - sub_16)>; -} - -defm NeonI_zext : NeonI_ext<"USHLLvvi", zext>; -defm NeonI_sext : NeonI_ext<"SSHLLvvi", sext>; - -// zext v1i8 -> v1i32 -def : Pat<(v1i32 (zext (v1i8 FPR8:$Rn))), - (v1i32 (EXTRACT_SUBREG - (v1i64 (SUBREG_TO_REG (i64 0), - (v1i8 (DUPbv_B - (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), - 0)), - sub_8)), - sub_32))>; - -// zext v1i8 -> v1i64 -def : Pat<(v1i64 (zext (v1i8 FPR8:$Rn))), - (v1i64 (SUBREG_TO_REG (i64 0), - (v1i8 (DUPbv_B - (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), - 0)), - sub_8))>; - -// zext v1i16 -> v1i64 -def : Pat<(v1i64 (zext (v1i16 FPR16:$Rn))), - (v1i64 (SUBREG_TO_REG (i64 0), - (v1i16 (DUPhv_H - (v8i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), - 0)), - sub_16))>; - -// sext v1i8 -> v1i32 -def : Pat<(v1i32 (sext (v1i8 FPR8:$Rn))), - (EXTRACT_SUBREG - (v4i32 (SSHLLvvi_4H - (v4i16 (SUBREG_TO_REG (i64 0), - (v1i16 (EXTRACT_SUBREG - (v8i16 (SSHLLvvi_8B - (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)), - sub_16)), - sub_16)), 0)), - sub_32)>; - -// sext v1i8 -> v1i64 -def : Pat<(v1i64 (sext (v1i8 FPR8:$Rn))), - (EXTRACT_SUBREG - (v2i64 (SSHLLvvi_2S - (v2i32 (SUBREG_TO_REG (i64 0), - (v1i32 (EXTRACT_SUBREG - (v4i32 (SSHLLvvi_4H - (v4i16 (SUBREG_TO_REG (i64 0), - (v1i16 (EXTRACT_SUBREG - (v8i16 (SSHLLvvi_8B - (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)), - sub_16)), - sub_16)), 0)), - sub_32)), - sub_32)), 0)), - sub_64)>; - - -// sext v1i16 -> v1i64 -def : Pat<(v1i64 (sext (v1i16 FPR16:$Rn))), - (EXTRACT_SUBREG - (v2i64 (SSHLLvvi_2S - (v2i32 (SUBREG_TO_REG (i64 0), - (v1i32 (EXTRACT_SUBREG - (v4i32 (SSHLLvvi_4H - (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)), - sub_32)), - sub_32)), 0)), - sub_64)>; - -//===----------------------------------------------------------------------===// -// Non-Instruction Patterns -//===----------------------------------------------------------------------===// - -// 64-bit vector bitcasts... - -def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>; - -def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>; - -def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>; - -def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>; - -def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; - -def : Pat<(v1i64 (bitconvert (v1f64 VPR64:$src))), (v1i64 VPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v1f64 VPR64:$src))), (v2f32 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v1f64 VPR64:$src))), (v2i32 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v1f64 VPR64:$src))), (v4i16 VPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v1f64 VPR64:$src))), (v8i8 VPR64:$src)>; -def : Pat<(f64 (bitconvert (v1f64 VPR64:$src))), (f64 VPR64:$src)>; - -def : Pat<(v1f64 (bitconvert (v1i64 VPR64:$src))), (v1f64 VPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v2f32 VPR64:$src))), (v1f64 VPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v2i32 VPR64:$src))), (v1f64 VPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v4i16 VPR64:$src))), (v1f64 VPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v8i8 VPR64:$src))), (v1f64 VPR64:$src)>; -def : Pat<(v1f64 (bitconvert (f64 VPR64:$src))), (v1f64 VPR64:$src)>; - -// ..and 128-bit vector bitcasts... - -def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>; - -def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>; - -def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>; - -def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>; - -def : Pat<(v2f64 (bitconvert (v2i64 VPR128:$src))), (v2f64 VPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v2i64 VPR128:$src))), (v4f32 VPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v2i64 VPR128:$src))), (v4i32 VPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v2i64 VPR128:$src))), (v8i16 VPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v2i64 VPR128:$src))), (v16i8 VPR128:$src)>; - -def : Pat<(v2i64 (bitconvert (v2f64 VPR128:$src))), (v2i64 VPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v2f64 VPR128:$src))), (v4f32 VPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>; - -// ...and scalar bitcasts... -def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>; -def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>; -def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; - -def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>; -def : Pat<(i64 (bitconvert (v1f64 FPR64:$src))), (FMOVxd $src)>; -def : Pat<(i64 (bitconvert (v2i32 FPR64:$src))), (FMOVxd $src)>; -def : Pat<(i64 (bitconvert (v2f32 FPR64:$src))), (FMOVxd $src)>; -def : Pat<(i64 (bitconvert (v4i16 FPR64:$src))), (FMOVxd $src)>; -def : Pat<(i64 (bitconvert (v8i8 FPR64:$src))), (FMOVxd $src)>; - -def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>; - -def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>; - -def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>; -def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>; -def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>; -def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>; -def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>; - -def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>; -def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>; -def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>; -def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>; -def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>; -def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>; - -def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>; -def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>; -def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; - -def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; -def : Pat<(v1f64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; -def : Pat<(v2i32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; -def : Pat<(v2f32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; -def : Pat<(v4i16 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; -def : Pat<(v8i8 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; - -def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>; - -def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; - -def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; - -// Scalar Three Same - -def neon_uimm3 : Operand, - ImmLeaf { - let ParserMatchClass = uimm3_asmoperand; - let PrintMethod = "printUImmHexOperand"; -} - -def neon_uimm4 : Operand, - ImmLeaf { - let ParserMatchClass = uimm4_asmoperand; - let PrintMethod = "printUImmHexOperand"; -} - -// Bitwise Extract -class NeonI_Extract op2, string asmop, - string OpS, RegisterOperand OpVPR, Operand OpImm> - : NeonI_BitExtract, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>{ - bits<4> Index; -} - -def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b", - VPR64, neon_uimm3> { - let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}}; -} - -def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b", - VPR128, neon_uimm4> { - let Inst{14-11} = Index; -} - -class NI_Extract - : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm), - (i64 OpImm:$Imm))), - (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>; - -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; -def : NI_Extract; - -// Table lookup -class NI_TBL op2, bits<2> len, bit op, - string asmop, string OpS, RegisterOperand OpVPR, - RegisterOperand VecList> - : NeonI_TBL, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - -// The vectors in look up table are always 16b -multiclass NI_TBL_pat len, bit op, string asmop, string List> { - def _8b : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64, - !cast(List # "16B_operand")>; - - def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128, - !cast(List # "16B_operand")>; -} - -defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">; -defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">; -defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">; -defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">; - -// Table lookup extension -class NI_TBX op2, bits<2> len, bit op, - string asmop, string OpS, RegisterOperand OpVPR, - RegisterOperand VecList> - : NeonI_TBL, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; -} - -// The vectors in look up table are always 16b -multiclass NI_TBX_pat len, bit op, string asmop, string List> { - def _8b : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64, - !cast(List # "16B_operand")>; - - def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128, - !cast(List # "16B_operand")>; -} - -defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">; -defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">; -defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">; -defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">; - -class NeonI_INS_main - : NeonI_copy<0b1, 0b0, 0b0011, - (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm), - asmop # "\t$Rd." # Res # "[$Imm], $Rn", - [(set (ResTy VPR128:$Rd), - (ResTy (vector_insert - (ResTy VPR128:$src), - (OpTy OpGPR:$Rn), - (OpImm:$Imm))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - bits<4> Imm; - let Constraints = "$src = $Rd"; -} - -//Insert element (vector, from main) -def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32, - neon_uimm4_bare> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} -def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32, - neon_uimm3_bare> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} -def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32, - neon_uimm2_bare> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} -def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64, - neon_uimm1_bare> { - let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; -} - -def : NeonInstAlias<"mov $Rd.b[$Imm], $Rn", - (INSbw VPR128:$Rd, GPR32:$Rn, neon_uimm4_bare:$Imm), 0>; -def : NeonInstAlias<"mov $Rd.h[$Imm], $Rn", - (INShw VPR128:$Rd, GPR32:$Rn, neon_uimm3_bare:$Imm), 0>; -def : NeonInstAlias<"mov $Rd.s[$Imm], $Rn", - (INSsw VPR128:$Rd, GPR32:$Rn, neon_uimm2_bare:$Imm), 0>; -def : NeonInstAlias<"mov $Rd.d[$Imm], $Rn", - (INSdx VPR128:$Rd, GPR64:$Rn, neon_uimm1_bare:$Imm), 0>; - -class Neon_INS_main_pattern - : Pat<(ResTy (vector_insert - (ResTy VPR64:$src), - (OpTy OpGPR:$Rn), - (OpImm:$Imm))), - (ResTy (EXTRACT_SUBREG - (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), - OpGPR:$Rn, OpImm:$Imm)), sub_64))>; - -def INSbw_pattern : Neon_INS_main_pattern; -def INShw_pattern : Neon_INS_main_pattern; -def INSsw_pattern : Neon_INS_main_pattern; -def INSdx_pattern : Neon_INS_main_pattern; - -class NeonI_INS_element - : NeonI_insert<0b1, 0b1, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, - ResImm:$Immd, ResImm:$Immn), - asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]", - [], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - bits<4> Immd; - bits<4> Immn; -} - -//Insert element (vector, from element) -def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> { - let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1}; - let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}}; -} -def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> { - let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0}; - let Inst{14-11} = {Immn{2}, Immn{1}, Immn{0}, 0b0}; - // bit 11 is unspecified, but should be set to zero. -} -def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> { - let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0}; - let Inst{14-11} = {Immn{1}, Immn{0}, 0b0, 0b0}; - // bits 11-12 are unspecified, but should be set to zero. -} -def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> { - let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0}; - let Inst{14-11} = {Immn{0}, 0b0, 0b0, 0b0}; - // bits 11-13 are unspecified, but should be set to zero. -} - -def : NeonInstAlias<"mov $Rd.b[$Immd], $Rn.b[$Immn]", - (INSELb VPR128:$Rd, VPR128:$Rn, - neon_uimm4_bare:$Immd, neon_uimm4_bare:$Immn), 0>; -def : NeonInstAlias<"mov $Rd.h[$Immd], $Rn.h[$Immn]", - (INSELh VPR128:$Rd, VPR128:$Rn, - neon_uimm3_bare:$Immd, neon_uimm3_bare:$Immn), 0>; -def : NeonInstAlias<"mov $Rd.s[$Immd], $Rn.s[$Immn]", - (INSELs VPR128:$Rd, VPR128:$Rn, - neon_uimm2_bare:$Immd, neon_uimm2_bare:$Immn), 0>; -def : NeonInstAlias<"mov $Rd.d[$Immd], $Rn.d[$Immn]", - (INSELd VPR128:$Rd, VPR128:$Rn, - neon_uimm1_bare:$Immd, neon_uimm1_bare:$Immn), 0>; - -multiclass Neon_INS_elt_pattern { -def : Pat<(ResTy (vector_insert - (ResTy VPR128:$src), - (MidTy (vector_extract - (ResTy VPR128:$Rn), - (StImm:$Immn))), - (StImm:$Immd))), - (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn), - StImm:$Immd, StImm:$Immn)>; - -def : Pat <(ResTy (vector_insert - (ResTy VPR128:$src), - (MidTy (vector_extract - (NaTy VPR64:$Rn), - (NaImm:$Immn))), - (StImm:$Immd))), - (INS (ResTy VPR128:$src), - (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)), - StImm:$Immd, NaImm:$Immn)>; - -def : Pat <(NaTy (vector_insert - (NaTy VPR64:$src), - (MidTy (vector_extract - (ResTy VPR128:$Rn), - (StImm:$Immn))), - (NaImm:$Immd))), - (NaTy (EXTRACT_SUBREG - (ResTy (INS - (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)), - (ResTy VPR128:$Rn), - NaImm:$Immd, StImm:$Immn)), - sub_64))>; - -def : Pat <(NaTy (vector_insert - (NaTy VPR64:$src), - (MidTy (vector_extract - (NaTy VPR64:$Rn), - (NaImm:$Immn))), - (NaImm:$Immd))), - (NaTy (EXTRACT_SUBREG - (ResTy (INS - (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)), - (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)), - NaImm:$Immd, NaImm:$Immn)), - sub_64))>; -} - -defm : Neon_INS_elt_pattern; -defm : Neon_INS_elt_pattern; -defm : Neon_INS_elt_pattern; -defm : Neon_INS_elt_pattern; -defm : Neon_INS_elt_pattern; -defm : Neon_INS_elt_pattern; - -multiclass Neon_INS_elt_float_pattern { -def : Pat <(ResTy (vector_insert - (ResTy VPR128:$src), - (MidTy OpFPR:$Rn), - (ResImm:$Imm))), - (INS (ResTy VPR128:$src), - (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)), - ResImm:$Imm, - (i64 0))>; - -def : Pat <(NaTy (vector_insert - (NaTy VPR64:$src), - (MidTy OpFPR:$Rn), - (ResImm:$Imm))), - (NaTy (EXTRACT_SUBREG - (ResTy (INS - (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)), - (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)), - ResImm:$Imm, - (i64 0))), - sub_64))>; -} - -defm : Neon_INS_elt_float_pattern; -defm : Neon_INS_elt_float_pattern; - -class NeonI_SMOV - : NeonI_copy, - Sched<[WriteFPALU, ReadFPALU]> { - bits<4> Imm; -} - -//Signed integer move (main, from element) -def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare, - GPR32, i32> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} -def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare, - GPR32, i32> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} -def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare, - GPR64, i64> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} -def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare, - GPR64, i64> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} -def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare, - GPR64, i64> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} - -multiclass Neon_SMOVx_pattern { - def : Pat<(i64 (sext_inreg - (i64 (anyext - (i32 (vector_extract - (StTy VPR128:$Rn), (StImm:$Imm))))), - eleTy)), - (SMOVI VPR128:$Rn, StImm:$Imm)>; - - def : Pat<(i64 (sext - (i32 (vector_extract - (StTy VPR128:$Rn), (StImm:$Imm))))), - (SMOVI VPR128:$Rn, StImm:$Imm)>; - - def : Pat<(i64 (sext_inreg - (i64 (vector_extract - (NaTy VPR64:$Rn), (NaImm:$Imm))), - eleTy)), - (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - NaImm:$Imm)>; - - def : Pat<(i64 (sext_inreg - (i64 (anyext - (i32 (vector_extract - (NaTy VPR64:$Rn), (NaImm:$Imm))))), - eleTy)), - (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - NaImm:$Imm)>; - - def : Pat<(i64 (sext - (i32 (vector_extract - (NaTy VPR64:$Rn), (NaImm:$Imm))))), - (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - NaImm:$Imm)>; -} - -defm : Neon_SMOVx_pattern; -defm : Neon_SMOVx_pattern; -defm : Neon_SMOVx_pattern; - -class Neon_SMOVw_pattern - : Pat<(i32 (sext_inreg - (i32 (vector_extract - (NaTy VPR64:$Rn), (NaImm:$Imm))), - eleTy)), - (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - NaImm:$Imm)>; - -def : Neon_SMOVw_pattern; -def : Neon_SMOVw_pattern; - -class NeonI_UMOV - : NeonI_copy, - Sched<[WriteFPALU, ReadFPALU]> { - bits<4> Imm; -} - -//Unsigned integer move (main, from element) -def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare, - GPR32, i32> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} -def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare, - GPR32, i32> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} -def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare, - GPR32, i32> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} -def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare, - GPR64, i64> { - let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; -} - -def : NeonInstAlias<"mov $Rd, $Rn.s[$Imm]", - (UMOVws GPR32:$Rd, VPR128:$Rn, neon_uimm2_bare:$Imm), 0>; -def : NeonInstAlias<"mov $Rd, $Rn.d[$Imm]", - (UMOVxd GPR64:$Rd, VPR128:$Rn, neon_uimm1_bare:$Imm), 0>; - -class Neon_UMOV_pattern - : Pat<(ResTy (vector_extract - (NaTy VPR64:$Rn), NaImm:$Imm)), - (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - NaImm:$Imm)>; - -def : Neon_UMOV_pattern; -def : Neon_UMOV_pattern; -def : Neon_UMOV_pattern; - -def : Pat<(i32 (and - (i32 (vector_extract - (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))), - 255)), - (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>; - -def : Pat<(i32 (and - (i32 (vector_extract - (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))), - 65535)), - (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>; - -def : Pat<(i64 (zext - (i32 (vector_extract - (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))), - (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>; - -def : Pat<(i32 (and - (i32 (vector_extract - (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))), - 255)), - (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), - neon_uimm3_bare:$Imm)>; - -def : Pat<(i32 (and - (i32 (vector_extract - (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))), - 65535)), - (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), - neon_uimm2_bare:$Imm)>; - -def : Pat<(i64 (zext - (i32 (vector_extract - (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))), - (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), - neon_uimm0_bare:$Imm)>; - -// Additional copy patterns for scalar types -def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))), - (UMOVwb (v16i8 - (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>; - -def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))), - (UMOVwh (v8i16 - (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>; - -def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))), - (FMOVws FPR32:$Rn)>; - -def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))), - (FMOVxd FPR64:$Rn)>; - -def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))), - (f64 FPR64:$Rn)>; - -def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)), - (v1i8 (EXTRACT_SUBREG (v16i8 - (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))), - sub_8))>; - -def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)), - (v1i16 (EXTRACT_SUBREG (v8i16 - (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))), - sub_16))>; - -def : Pat<(v1i32 (scalar_to_vector GPR32:$src)), - (FMOVsw $src)>; - -def : Pat<(v1i64 (scalar_to_vector GPR64:$src)), - (FMOVdx $src)>; - -def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), - (v8i8 (EXTRACT_SUBREG (v16i8 - (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))), - sub_64))>; - -def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), - (v4i16 (EXTRACT_SUBREG (v8i16 - (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))), - sub_64))>; - -def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)), - (v2i32 (EXTRACT_SUBREG (v16i8 - (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))), - sub_64))>; - -def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), - (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))>; - -def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), - (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))>; - -def : Pat<(v4i32 (scalar_to_vector GPR32:$Rn)), - (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))>; - -def : Pat<(v2i64 (scalar_to_vector GPR64:$Rn)), - (INSdx (v2i64 (IMPLICIT_DEF)), $Rn, (i64 0))>; - -def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), - (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>; -def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), - (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>; - -def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))), - (v1f64 FPR64:$Rn)>; - -def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), - (f64 FPR64:$src), sub_64)>; - -class NeonI_DUP_Elt - : NeonI_copy, - Sched<[WriteFPALU, ReadFPALU]> { - bits<4> Imm; -} - -def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128, - neon_uimm4_bare> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} - -def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128, - neon_uimm3_bare> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} - -def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128, - neon_uimm2_bare> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} - -def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128, - neon_uimm1_bare> { - let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; -} - -def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64, - neon_uimm4_bare> { - let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; -} - -def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64, - neon_uimm3_bare> { - let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; -} - -def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64, - neon_uimm2_bare> { - let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; -} - -multiclass NeonI_DUP_Elt_pattern { -def : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)), - (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>; - -def : Pat<(ResTy (Neon_vduplane - (NaTy VPR64:$Rn), OpNImm:$Imm)), - (ResTy (DUPELT - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>; -} -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; -defm : NeonI_DUP_Elt_pattern; - -def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))), - (v2f32 (DUPELT2s - (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (i64 0)))>; -def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))), - (v4f32 (DUPELT4s - (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (i64 0)))>; -def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))), - (v2f64 (DUPELT2d - (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64), - (i64 0)))>; - -multiclass NeonI_DUP_pattern { -def : Pat<(ResTy (Neon_vduplane (OpTy OpRC:$Rn), OpNImm:$Imm)), - (ResTy (DUPELT - (SUBREG_TO_REG (i64 0), OpRC:$Rn, SubIndex), OpNImm:$Imm))>; -} - -defm : NeonI_DUP_pattern; -defm : NeonI_DUP_pattern; -defm : NeonI_DUP_pattern; -defm : NeonI_DUP_pattern; -defm : NeonI_DUP_pattern; - -class NeonI_DUP - : NeonI_copy, - Sched<[WriteFPALU, ReadFPALU]>; - -def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> { - let Inst{20-16} = 0b00001; - // bits 17-20 are unspecified, but should be set to zero. -} - -def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> { - let Inst{20-16} = 0b00010; - // bits 18-20 are unspecified, but should be set to zero. -} - -def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> { - let Inst{20-16} = 0b00100; - // bits 19-20 are unspecified, but should be set to zero. -} - -def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> { - let Inst{20-16} = 0b01000; - // bit 20 is unspecified, but should be set to zero. -} - -def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> { - let Inst{20-16} = 0b00001; - // bits 17-20 are unspecified, but should be set to zero. -} - -def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> { - let Inst{20-16} = 0b00010; - // bits 18-20 are unspecified, but should be set to zero. -} - -def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> { - let Inst{20-16} = 0b00100; - // bits 19-20 are unspecified, but should be set to zero. -} - -// patterns for CONCAT_VECTORS -multiclass Concat_Vector_Pattern { -def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)), - (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>; -def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))), - (INSELd - (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)), - (i64 1), - (i64 0))>; -def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))), - (DUPELT2d - (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - (i64 0))> ; -} - -defm : Concat_Vector_Pattern; -defm : Concat_Vector_Pattern; -defm : Concat_Vector_Pattern; -defm : Concat_Vector_Pattern; -defm : Concat_Vector_Pattern; -defm : Concat_Vector_Pattern; - -def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), undef)), - (v2i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32))>; -def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (EXTRACT_SUBREG - (v4i32 (INSELs - (v4i32 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)), - (v4i32 (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), - (i64 1), - (i64 0))), - sub_64)>; -def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rn))), - (DUPELT2s (v4i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32)), 0)>; - -//patterns for EXTRACT_SUBVECTOR -def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))), - (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; -def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))), - (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; -def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))), - (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; -def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))), - (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; -def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))), - (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; -def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))), - (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; - -// The followings are for instruction class (3V Elem) - -// Variant 1 - -class NI_2VE size, bits<4> opcode, - string asmop, string ResS, string OpS, string EleOpS, - Operand OpImm, RegisterOperand ResVPR, - RegisterOperand OpVPR, RegisterOperand EleOpVPR> - : NeonI_2VElem, - Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> { - bits<3> Index; - bits<5> Re; - - let Constraints = "$src = $Rd"; -} - -multiclass NI_2VE_v1 opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", - neon_uimm2_bare, VPR64, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // Index operations on 16-bit(H) elements are restricted to using v0-v15. - def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h", - neon_uimm3_bare, VPR64, VPR64, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } - - def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h", - neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } -} - -defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">; -defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">; - -// Pattern for lane in 128-bit vector -class NI_2VE_laneq - : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn), - (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VE_lane - : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn), - (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST ResVPR:$src, OpVPR:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; - -multiclass NI_2VE_v1_pat -{ - def : NI_2VE_laneq(subop # "_2s4s"), neon_uimm2_bare, - op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32>; - - def : NI_2VE_laneq(subop # "_4s4s"), neon_uimm2_bare, - op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32>; - - def : NI_2VE_laneq(subop # "_4h8h"), neon_uimm3_bare, - op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16>; - - def : NI_2VE_laneq(subop # "_8h8h"), neon_uimm3_bare, - op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_lane(subop # "_2s4s"), neon_uimm1_bare, - op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32>; - - def : NI_2VE_lane(subop # "_4h8h"), neon_uimm2_bare, - op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16>; -} - -defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>; -defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>; - -class NI_2VE_2op size, bits<4> opcode, - string asmop, string ResS, string OpS, string EleOpS, - Operand OpImm, RegisterOperand ResVPR, - RegisterOperand OpVPR, RegisterOperand EleOpVPR> - : NeonI_2VElem, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - bits<3> Index; - bits<5> Re; -} - -multiclass NI_2VE_v1_2op opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", - neon_uimm2_bare, VPR64, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // Index operations on 16-bit(H) elements are restricted to using v0-v15. - def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h", - neon_uimm3_bare, VPR64, VPR64, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } - - def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h", - neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } -} - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">; -defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">; -defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">; -} - -// Pattern for lane in 128-bit vector -class NI_2VE_mul_laneq - : Pat<(ResTy (op (OpTy OpVPR:$Rn), - (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VE_mul_lane - : Pat<(ResTy (op (OpTy OpVPR:$Rn), - (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST OpVPR:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; - -multiclass NI_2VE_mul_v1_pat { - def : NI_2VE_mul_laneq(subop # "_2s4s"), neon_uimm2_bare, - op, VPR64, VPR128, v2i32, v2i32, v4i32>; - - def : NI_2VE_mul_laneq(subop # "_4s4s"), neon_uimm2_bare, - op, VPR128, VPR128, v4i32, v4i32, v4i32>; - - def : NI_2VE_mul_laneq(subop # "_4h8h"), neon_uimm3_bare, - op, VPR64, VPR128Lo, v4i16, v4i16, v8i16>; - - def : NI_2VE_mul_laneq(subop # "_8h8h"), neon_uimm3_bare, - op, VPR128, VPR128Lo, v8i16, v8i16, v8i16>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_mul_lane(subop # "_2s4s"), neon_uimm1_bare, - op, VPR64, VPR64, v2i32, v2i32, v2i32>; - - def : NI_2VE_mul_lane(subop # "_4h8h"), neon_uimm2_bare, - op, VPR64, VPR64Lo, v4i16, v4i16, v4i16>; -} - -defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>; -defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>; -defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>; - -// Variant 2 - -multiclass NI_2VE_v2_2op opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", - neon_uimm2_bare, VPR64, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // _1d2d doesn't exist! - - def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d", - neon_uimm1_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{0}}; - let Inst{21} = 0b0; - let Inst{20-16} = Re; - } -} - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">; -defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">; -} - -class NI_2VE_mul_lane_2d - : Pat<(ResTy (op (OpTy OpVPR:$Rn), - (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))), - (INST OpVPR:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>; - -multiclass NI_2VE_mul_v2_pat { - def : NI_2VE_mul_laneq(subop # "_2s4s"), neon_uimm2_bare, - op, VPR64, VPR128, v2f32, v2f32, v4f32>; - - def : NI_2VE_mul_laneq(subop # "_4s4s"), neon_uimm2_bare, - op, VPR128, VPR128, v4f32, v4f32, v4f32>; - - def : NI_2VE_mul_laneq(subop # "_2d2d"), neon_uimm1_bare, - op, VPR128, VPR128, v2f64, v2f64, v2f64>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_mul_lane(subop # "_2s4s"), neon_uimm1_bare, - op, VPR64, VPR64, v2f32, v2f32, v2f32>; - - def : NI_2VE_mul_lane_2d(subop # "_2d2d"), neon_uimm1_bare, - op, VPR128, VPR64, v2f64, v2f64, v1f64, - BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>; -} - -defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>; -defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>; - -def : Pat<(v2f32 (fmul (v2f32 (Neon_vdup (f32 FPR32:$Re))), - (v2f32 VPR64:$Rn))), - (FMULve_2s4s VPR64:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; - -def : Pat<(v4f32 (fmul (v4f32 (Neon_vdup (f32 FPR32:$Re))), - (v4f32 VPR128:$Rn))), - (FMULve_4s4s VPR128:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; - -def : Pat<(v2f64 (fmul (v2f64 (Neon_vdup (f64 FPR64:$Re))), - (v2f64 VPR128:$Rn))), - (FMULve_2d2d VPR128:$Rn, (SUBREG_TO_REG (i64 0), $Re, sub_64), 0)>; - -// The followings are patterns using fma -// -ffp-contract=fast generates fma - -multiclass NI_2VE_v2 opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", - neon_uimm2_bare, VPR64, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // _1d2d doesn't exist! - - def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d", - neon_uimm1_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{0}}; - let Inst{21} = 0b0; - let Inst{20-16} = Re; - } -} - -defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">; -defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">; - -// Pattern for lane in 128-bit vector -class NI_2VEswap_laneq - : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))), - (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), - (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane 0 -class NI_2VEfma_lane0 - : Pat<(ResTy (op (ResTy ResVPR:$Rn), - (ResTy (Neon_vdup (f32 FPR32:$Re))), - (ResTy ResVPR:$src))), - (INST ResVPR:$src, ResVPR:$Rn, - (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; - -// Pattern for lane in 64-bit vector -class NI_2VEswap_lane - : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))), - (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), - (INST ResVPR:$src, ResVPR:$Rn, - (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VEswap_lane_2d2d - : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))), - (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), - (INST ResVPR:$src, ResVPR:$Rn, - (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>; - - -multiclass NI_2VE_fma_v2_pat { - def : NI_2VEswap_laneq(subop # "_2s4s"), - neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, - BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; - - def : NI_2VEfma_lane0(subop # "_2s4s"), - op, VPR64, v2f32>; - - def : NI_2VEswap_laneq(subop # "_4s4s"), - neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, - BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; - - def : NI_2VEfma_lane0(subop # "_4s4s"), - op, VPR128, v4f32>; - - def : NI_2VEswap_laneq(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, - BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VEswap_lane(subop # "_2s4s"), - neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, - BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; - - def : NI_2VEswap_lane_2d2d(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, - BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>; -} - -defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>; - -// Pattern for lane 0 -class NI_2VEfms_lane0 - : Pat<(ResTy (op (ResTy (fneg ResVPR:$Rn)), - (ResTy (Neon_vdup (f32 FPR32:$Re))), - (ResTy ResVPR:$src))), - (INST ResVPR:$src, ResVPR:$Rn, - (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; - -multiclass NI_2VE_fms_v2_pat -{ - def : NI_2VEswap_laneq(subop # "_2s4s"), - neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, - BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_laneq(subop # "_2s4s"), - neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, - BinOpFrag<(Neon_vduplane - (fneg node:$LHS), node:$RHS)>>; - - def : NI_2VEfms_lane0(subop # "_2s4s"), - op, VPR64, v2f32>; - - def : NI_2VEswap_laneq(subop # "_4s4s"), - neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, - BinOpFrag<(fneg (Neon_vduplane - node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_laneq(subop # "_4s4s"), - neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, - BinOpFrag<(Neon_vduplane - (fneg node:$LHS), node:$RHS)>>; - - def : NI_2VEfms_lane0(subop # "_4s4s"), - op, VPR128, v4f32>; - - def : NI_2VEswap_laneq(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, - BinOpFrag<(fneg (Neon_vduplane - node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_laneq(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, - BinOpFrag<(Neon_vduplane - (fneg node:$LHS), node:$RHS)>>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VEswap_lane(subop # "_2s4s"), - neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, - BinOpFrag<(fneg (Neon_vduplane - node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_lane(subop # "_2s4s"), - neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, - BinOpFrag<(Neon_vduplane - (fneg node:$LHS), node:$RHS)>>; - - def : NI_2VEswap_lane(subop # "_4s4s"), - neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32, - BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_lane(subop # "_4s4s"), - neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32, - BinOpFrag<(Neon_vduplane (fneg node:$LHS), node:$RHS)>>; - - def : NI_2VEswap_lane_2d2d(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, - BinOpFrag<(fneg (Neon_combine_2d - node:$LHS, node:$RHS))>>; - - def : NI_2VEswap_lane_2d2d(subop # "_2d2d"), - neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, - BinOpFrag<(Neon_combine_2d - (fneg node:$LHS), (fneg node:$RHS))>>; -} - -defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>; - -// Variant 3: Long type -// E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S -// SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S - -multiclass NI_2VE_v3 opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s", - neon_uimm2_bare, VPR128, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // Index operations on 16-bit(H) elements are restricted to using v0-v15. - def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h", - neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } - - def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h", - neon_uimm3_bare, VPR128, VPR64, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } -} - -defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">; -defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">; -defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">; -defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">; -defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">; -defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">; - -multiclass NI_2VE_v3_2op opcode, string asmop> { - // vector register class for element is always 128-bit to cover the max index - def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s", - neon_uimm2_bare, VPR128, VPR64, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s", - neon_uimm2_bare, VPR128, VPR128, VPR128> { - let Inst{11} = {Index{1}}; - let Inst{21} = {Index{0}}; - let Inst{20-16} = Re; - } - - // Index operations on 16-bit(H) elements are restricted to using v0-v15. - def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h", - neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } - - def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h", - neon_uimm3_bare, VPR128, VPR64, VPR128Lo> { - let Inst{11} = {Index{2}}; - let Inst{21} = {Index{1}}; - let Inst{20} = {Index{0}}; - let Inst{19-16} = Re{3-0}; - } -} - -let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in { -defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">; -defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">; -defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">; -} - -def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))), - (FMOVdd $src)>; - -// Pattern for lane in 128-bit vector -class NI_2VEL2_laneq - : Pat<(ResTy (op (ResTy VPR128:$src), - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vduplane - (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VEL2_lane - : Pat<(ResTy (op (ResTy VPR128:$src), - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vduplane - (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST VPR128:$src, VPR128:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; - -class NI_2VEL2_lane0 - : Pat<(ResTy (op (ResTy VPR128:$src), - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))), - (INST VPR128:$src, VPR128:$Rn, (DupInst $Re), 0)>; - -multiclass NI_2VEL_v3_pat { - def : NI_2VE_laneq(subop # "_4s4h"), neon_uimm3_bare, - op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16>; - - def : NI_2VE_laneq(subop # "_2d2s"), neon_uimm2_bare, - op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32>; - - def : NI_2VEL2_laneq(subop # "_4s8h"), neon_uimm3_bare, - op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_laneq(subop # "_2d4s"), neon_uimm2_bare, - op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>; - - def : NI_2VEL2_lane0(subop # "_4s8h"), - op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; - - def : NI_2VEL2_lane0(subop # "_2d4s"), - op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_lane(subop # "_4s4h"), neon_uimm2_bare, - op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16>; - - def : NI_2VE_lane(subop # "_2d2s"), neon_uimm1_bare, - op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32>; - - def : NI_2VEL2_lane(subop # "_4s8h"), neon_uimm2_bare, - op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_lane(subop # "_2d4s"), neon_uimm1_bare, - op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>; -} - -defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>; -defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>; -defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>; -defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>; - -// Pattern for lane in 128-bit vector -class NI_2VEL2_mul_laneq - : Pat<(ResTy (op - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vduplane - (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>; - -// Pattern for lane in 64-bit vector -class NI_2VEL2_mul_lane - : Pat<(ResTy (op - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vduplane - (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), - (INST VPR128:$Rn, - (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; - -// Pattern for fixed lane 0 -class NI_2VEL2_mul_lane0 - : Pat<(ResTy (op - (HalfOpTy (hiop (OpTy VPR128:$Rn))), - (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))), - (INST VPR128:$Rn, (DupInst $Re), 0)>; - -multiclass NI_2VEL_mul_v3_pat { - def : NI_2VE_mul_laneq(subop # "_4s4h"), neon_uimm3_bare, - op, VPR64, VPR128Lo, v4i32, v4i16, v8i16>; - - def : NI_2VE_mul_laneq(subop # "_2d2s"), neon_uimm2_bare, - op, VPR64, VPR128, v2i64, v2i32, v4i32>; - - def : NI_2VEL2_mul_laneq(subop # "_4s8h"), neon_uimm3_bare, - op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_mul_laneq(subop # "_2d4s"), neon_uimm2_bare, - op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>; - - def : NI_2VEL2_mul_lane0(subop # "_4s8h"), - op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; - - def : NI_2VEL2_mul_lane0(subop # "_2d4s"), - op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_mul_lane(subop # "_4s4h"), neon_uimm2_bare, - op, VPR64, VPR64Lo, v4i32, v4i16, v4i16>; - - def : NI_2VE_mul_lane(subop # "_2d2s"), neon_uimm1_bare, - op, VPR64, VPR64, v2i64, v2i32, v2i32>; - - def : NI_2VEL2_mul_lane(subop # "_4s8h"), neon_uimm2_bare, - op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_mul_lane(subop # "_2d4s"), neon_uimm1_bare, - op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>; -} - -defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>; -defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>; -defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>; - -multiclass NI_qdma { - def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (op node:$Ra, - (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>; - - def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), - (op node:$Ra, - (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>; -} - -defm Neon_qdmlal : NI_qdma; -defm Neon_qdmlsl : NI_qdma; - -multiclass NI_2VEL_v3_qdma_pat { - def : NI_2VE_laneq(subop # "_4s4h"), neon_uimm3_bare, - !cast(op # "_4s"), VPR128, VPR64, VPR128Lo, - v4i32, v4i16, v8i16>; - - def : NI_2VE_laneq(subop # "_2d2s"), neon_uimm2_bare, - !cast(op # "_2d"), VPR128, VPR64, VPR128, - v2i64, v2i32, v4i32>; - - def : NI_2VEL2_laneq(subop # "_4s8h"), neon_uimm3_bare, - !cast(op # "_4s"), VPR128Lo, - v4i32, v8i16, v8i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_laneq(subop # "_2d4s"), neon_uimm2_bare, - !cast(op # "_2d"), VPR128, - v2i64, v4i32, v4i32, v2i32, Neon_High4S>; - - def : NI_2VEL2_lane0(subop # "_4s8h"), - !cast(op # "_4s"), - v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; - - def : NI_2VEL2_lane0(subop # "_2d4s"), - !cast(op # "_2d"), - v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; - - // Index can only be half of the max value for lane in 64-bit vector - - def : NI_2VE_lane(subop # "_4s4h"), neon_uimm2_bare, - !cast(op # "_4s"), VPR128, VPR64, VPR64Lo, - v4i32, v4i16, v4i16>; - - def : NI_2VE_lane(subop # "_2d2s"), neon_uimm1_bare, - !cast(op # "_2d"), VPR128, VPR64, VPR64, - v2i64, v2i32, v2i32>; - - def : NI_2VEL2_lane(subop # "_4s8h"), neon_uimm2_bare, - !cast(op # "_4s"), VPR64Lo, - v4i32, v8i16, v4i16, v4i16, Neon_High8H>; - - def : NI_2VEL2_lane(subop # "_2d4s"), neon_uimm1_bare, - !cast(op # "_2d"), VPR64, - v2i64, v4i32, v2i32, v2i32, Neon_High4S>; -} - -defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">; -defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">; - -// End of implementation for instruction class (3V Elem) - -class NeonI_REV size, bit Q, bit U, - bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy, - SDPatternOperator Neon_Rev> - : NeonI_2VMisc, - Sched<[WriteFPALU, ReadFPALU]>; - -def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128, - v16i8, Neon_rev64>; -def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128, - v8i16, Neon_rev64>; -def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128, - v4i32, Neon_rev64>; -def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64, - v8i8, Neon_rev64>; -def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64, - v4i16, Neon_rev64>; -def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64, - v2i32, Neon_rev64>; - -def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>; -def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>; - -def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128, - v16i8, Neon_rev32>; -def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128, - v8i16, Neon_rev32>; -def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64, - v8i8, Neon_rev32>; -def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64, - v4i16, Neon_rev32>; - -def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128, - v16i8, Neon_rev16>; -def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64, - v8i8, Neon_rev16>; - -multiclass NeonI_PairwiseAdd opcode, - SDPatternOperator Neon_Padd> { - def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.8h, $Rn.16b", - [(set (v8i16 VPR128:$Rd), - (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.4h, $Rn.8b", - [(set (v4i16 VPR64:$Rd), - (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.8h", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.4h", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.4s", - [(set (v2i64 VPR128:$Rd), - (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.1d, $Rn.2s", - [(set (v1i64 VPR64:$Rd), - (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010, - int_arm_neon_vpaddls>; -defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010, - int_arm_neon_vpaddlu>; - -def : Pat<(v1i64 (int_aarch64_neon_saddlv (v2i32 VPR64:$Rn))), - (SADDLP2s1d $Rn)>; -def : Pat<(v1i64 (int_aarch64_neon_uaddlv (v2i32 VPR64:$Rn))), - (UADDLP2s1d $Rn)>; - -multiclass NeonI_PairwiseAddAcc opcode, - SDPatternOperator Neon_Padd> { - let Constraints = "$src = $Rd" in { - def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.8h, $Rn.16b", - [(set (v8i16 VPR128:$Rd), - (v8i16 (Neon_Padd - (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.4h, $Rn.8b", - [(set (v4i16 VPR64:$Rd), - (v4i16 (Neon_Padd - (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.8h", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_Padd - (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.4h", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_Padd - (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.4s", - [(set (v2i64 VPR128:$Rd), - (v2i64 (Neon_Padd - (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.1d, $Rn.2s", - [(set (v1i64 VPR64:$Rd), - (v1i64 (Neon_Padd - (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110, - int_arm_neon_vpadals>; -defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110, - int_arm_neon_vpadalu>; - -multiclass NeonI_2VMisc_BHSDsize_1Arg opcode> { - def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.16b, $Rn.16b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.8h, $Rn.8h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.8b, $Rn.8b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.4h, $Rn.4h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>; -defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>; -defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>; -defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>; - -multiclass NeonI_2VMisc_BHSD_1Arg_Pattern { - def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))), - (v16i8 (!cast(Prefix # 16b) (v16i8 VPR128:$Rn)))>; - - def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))), - (v8i16 (!cast(Prefix # 8h) (v8i16 VPR128:$Rn)))>; - - def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))), - (v4i32 (!cast(Prefix # 4s) (v4i32 VPR128:$Rn)))>; - - def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))), - (v2i64 (!cast(Prefix # 2d) (v2i64 VPR128:$Rn)))>; - - def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))), - (v8i8 (!cast(Prefix # 8b) (v8i8 VPR64:$Rn)))>; - - def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))), - (v4i16 (!cast(Prefix # 4h) (v4i16 VPR64:$Rn)))>; - - def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))), - (v2i32 (!cast(Prefix # 2s) (v2i32 VPR64:$Rn)))>; -} - -defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>; -defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>; -defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>; - -def : Pat<(v16i8 (sub - (v16i8 Neon_AllZero), - (v16i8 VPR128:$Rn))), - (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>; -def : Pat<(v8i8 (sub - (v8i8 Neon_AllZero), - (v8i8 VPR64:$Rn))), - (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>; -def : Pat<(v8i16 (sub - (v8i16 (bitconvert (v16i8 Neon_AllZero))), - (v8i16 VPR128:$Rn))), - (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>; -def : Pat<(v4i16 (sub - (v4i16 (bitconvert (v8i8 Neon_AllZero))), - (v4i16 VPR64:$Rn))), - (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>; -def : Pat<(v4i32 (sub - (v4i32 (bitconvert (v16i8 Neon_AllZero))), - (v4i32 VPR128:$Rn))), - (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>; -def : Pat<(v2i32 (sub - (v2i32 (bitconvert (v8i8 Neon_AllZero))), - (v2i32 VPR64:$Rn))), - (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>; -def : Pat<(v2i64 (sub - (v2i64 (bitconvert (v16i8 Neon_AllZero))), - (v2i64 VPR128:$Rn))), - (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>; - -multiclass NeonI_2VMisc_BHSDsize_2Args opcode> { - let Constraints = "$src = $Rd" in { - def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.16b, $Rn.16b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.8h, $Rn.8h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.8b, $Rn.8b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.4h, $Rn.4h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>; -defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>; - -multiclass NeonI_2VMisc_BHSD_2Args_Pattern { - def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))), - (v16i8 (!cast(Prefix # 16b) - (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>; - - def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))), - (v8i16 (!cast(Prefix # 8h) - (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>; - - def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))), - (v4i32 (!cast(Prefix # 4s) - (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>; - - def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))), - (v2i64 (!cast(Prefix # 2d) - (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>; - - def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))), - (v8i8 (!cast(Prefix # 8b) - (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>; - - def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))), - (v4i16 (!cast(Prefix # 4h) - (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>; - - def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))), - (v2i32 (!cast(Prefix # 2s) - (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>; -} - -defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>; -defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>; - -multiclass NeonI_2VMisc_BHSsizes { - def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.16b, $Rn.16b", - [(set (v16i8 VPR128:$Rd), - (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.8h, $Rn.8h", - [(set (v8i16 VPR128:$Rd), - (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.8b, $Rn.8b", - [(set (v8i8 VPR64:$Rd), - (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.4h, $Rn.4h", - [(set (v4i16 VPR64:$Rd), - (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>; -defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>; - -multiclass NeonI_2VMisc_Bsize size, - bits<5> Opcode> { - def 16b : NeonI_2VMisc<0b1, U, size, Opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.16b, $Rn.16b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8b : NeonI_2VMisc<0b0, U, size, Opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.8b, $Rn.8b", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>; -defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>; -defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>; - -def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b", - (NOT16b VPR128:$Rd, VPR128:$Rn), 0>; -def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b", - (NOT8b VPR64:$Rd, VPR64:$Rn), 0>; - -def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))), - (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>; -def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))), - (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>; - -def : Pat<(v16i8 (xor - (v16i8 VPR128:$Rn), - (v16i8 Neon_AllOne))), - (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>; -def : Pat<(v8i8 (xor - (v8i8 VPR64:$Rn), - (v8i8 Neon_AllOne))), - (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>; -def : Pat<(v8i16 (xor - (v8i16 VPR128:$Rn), - (v8i16 (bitconvert (v16i8 Neon_AllOne))))), - (NOT16b VPR128:$Rn)>; -def : Pat<(v4i16 (xor - (v4i16 VPR64:$Rn), - (v4i16 (bitconvert (v8i8 Neon_AllOne))))), - (NOT8b VPR64:$Rn)>; -def : Pat<(v4i32 (xor - (v4i32 VPR128:$Rn), - (v4i32 (bitconvert (v16i8 Neon_AllOne))))), - (NOT16b VPR128:$Rn)>; -def : Pat<(v2i32 (xor - (v2i32 VPR64:$Rn), - (v2i32 (bitconvert (v8i8 Neon_AllOne))))), - (NOT8b VPR64:$Rn)>; -def : Pat<(v2i64 (xor - (v2i64 VPR128:$Rn), - (v2i64 (bitconvert (v16i8 Neon_AllOne))))), - (NOT16b VPR128:$Rn)>; - -def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))), - (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>; -def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))), - (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>; - -multiclass NeonI_2VMisc_SDsizes opcode, - SDPatternOperator Neon_Op> { - def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [(set (v4f32 VPR128:$Rd), - (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.2d", - [(set (v2f64 VPR128:$Rd), - (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [(set (v2f32 VPR64:$Rd), - (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>; -defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>; - -multiclass NeonI_2VMisc_HSD_Narrow opcode> { - def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.8b, $Rn.8h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4h, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - let Constraints = "$Rd = $src" in { - def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.16b, $Rn.8h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.8h, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.4s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>; -defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>; -defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>; -defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>; - -multiclass NeonI_2VMisc_Narrow_Patterns { - def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))), - (v8i8 (!cast(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>; - - def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))), - (v4i16 (!cast(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>; - - def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))), - (v2i32 (!cast(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>; - - def : Pat<(v16i8 (concat_vectors - (v8i8 VPR64:$src), - (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))), - (!cast(Prefix # 8h16b) - (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), - VPR128:$Rn)>; - - def : Pat<(v8i16 (concat_vectors - (v4i16 VPR64:$src), - (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))), - (!cast(Prefix # 4s8h) - (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), - VPR128:$Rn)>; - - def : Pat<(v4i32 (concat_vectors - (v2i32 VPR64:$src), - (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))), - (!cast(Prefix # 2d4s) - (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), - VPR128:$Rn)>; -} - -defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>; -defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>; -defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>; -defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>; - -multiclass NeonI_2VMisc_SHIFT opcode> { - let DecoderMethod = "DecodeSHLLInstruction" in { - def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR128:$Rd), - (ins VPR64:$Rn, uimm_exact8:$Imm), - asmop # "\t$Rd.8h, $Rn.8b, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR128:$Rd), - (ins VPR64:$Rn, uimm_exact16:$Imm), - asmop # "\t$Rd.4s, $Rn.4h, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode, - (outs VPR128:$Rd), - (ins VPR64:$Rn, uimm_exact32:$Imm), - asmop # "\t$Rd.2d, $Rn.2s, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), - (ins VPR128:$Rn, uimm_exact8:$Imm), - asmop # "2\t$Rd.8h, $Rn.16b, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), - (ins VPR128:$Rn, uimm_exact16:$Imm), - asmop # "2\t$Rd.4s, $Rn.8h, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, - (outs VPR128:$Rd), - (ins VPR128:$Rn, uimm_exact32:$Imm), - asmop # "2\t$Rd.2d, $Rn.4s, $Imm", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - } -} - -defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>; - -class NeonI_SHLL_Patterns - : Pat<(DesTy (shl - (DesTy (ExtOp (OpTy VPR64:$Rn))), - (DesTy (Neon_vdup - (i32 Neon_Imm:$Imm))))), - (!cast("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>; - -class NeonI_SHLL_High_Patterns - : Pat<(DesTy (shl - (DesTy (ExtOp - (OpTy (GetHigh VPR128:$Rn)))), - (DesTy (Neon_vdup - (i32 Neon_Imm:$Imm))))), - (!cast("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>; - -def : NeonI_SHLL_Patterns; -def : NeonI_SHLL_Patterns; -def : NeonI_SHLL_Patterns; -def : NeonI_SHLL_Patterns; -def : NeonI_SHLL_Patterns; -def : NeonI_SHLL_Patterns; -def : NeonI_SHLL_High_Patterns; -def : NeonI_SHLL_High_Patterns; -def : NeonI_SHLL_High_Patterns; -def : NeonI_SHLL_High_Patterns; -def : NeonI_SHLL_High_Patterns; -def : NeonI_SHLL_High_Patterns; - -multiclass NeonI_2VMisc_SD_Narrow opcode> { - def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4h, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - let Constraints = "$src = $Rd" in { - def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.8h, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - - def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.4s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>; - } -} - -defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>; - -multiclass NeonI_2VMisc_Narrow_Pattern { - - def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))), - (!cast(prefix # "4s4h") (v4f32 VPR128:$Rn))>; - - def : Pat<(v8i16 (concat_vectors - (v4i16 VPR64:$src), - (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))), - (!cast(prefix # "4s8h") - (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), - (v4f32 VPR128:$Rn))>; - - def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))), - (!cast(prefix # "2d2s") (v2f64 VPR128:$Rn))>; - - def : Pat<(v4f32 (concat_vectors - (v2f32 VPR64:$src), - (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))), - (!cast(prefix # "2d4s") - (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), - (v2f64 VPR128:$Rn))>; -} - -defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>; - -multiclass NeonI_2VMisc_D_Narrow opcode> { - def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR64:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), - asmop # "2\t$Rd.4s, $Rn.2d", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - } - - def : Pat<(v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))), - (!cast(prefix # "2d2s") VPR128:$Rn)>; - - def : Pat<(v4f32 (concat_vectors - (v2f32 VPR64:$src), - (v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))))), - (!cast(prefix # "2d4s") - (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), - VPR128:$Rn)>; -} - -defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>; - -def Neon_High4Float : PatFrag<(ops node:$in), - (extract_subvector (v4f32 node:$in), (iPTR 2))>; - -multiclass NeonI_2VMisc_HS_Extend opcode> { - def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.4s, $Rn.4h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2d, $Rn.2s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "2\t$Rd.4s, $Rn.8h", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "2\t$Rd.2d, $Rn.4s", - [], NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>; - -multiclass NeonI_2VMisc_Extend_Pattern { - def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))), - (!cast(prefix # "4h4s") VPR64:$Rn)>; - - def : Pat<(v4f32 (int_arm_neon_vcvthf2fp - (v4i16 (Neon_High8H - (v8i16 VPR128:$Rn))))), - (!cast(prefix # "8h4s") VPR128:$Rn)>; - - def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))), - (!cast(prefix # "2s2d") VPR64:$Rn)>; - - def : Pat<(v2f64 (fextend - (v2f32 (Neon_High4Float - (v4f32 VPR128:$Rn))))), - (!cast(prefix # "4s2d") VPR128:$Rn)>; -} - -defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">; - -multiclass NeonI_2VMisc_SD_Conv opcode, - ValueType ResTy4s, ValueType OpTy4s, - ValueType ResTy2d, ValueType OpTy2d, - ValueType ResTy2s, ValueType OpTy2s, - SDPatternOperator Neon_Op> { - - def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [(set (ResTy4s VPR128:$Rd), - (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.2d, $Rn.2d", - [(set (ResTy2d VPR128:$Rd), - (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [(set (ResTy2s VPR64:$Rd), - (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -multiclass NeonI_2VMisc_fp_to_int opcode, SDPatternOperator Neon_Op> { - defm _ : NeonI_2VMisc_SD_Conv; -} - -defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010, - int_arm_neon_vcvtns>; -defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010, - int_arm_neon_vcvtnu>; -defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010, - int_arm_neon_vcvtps>; -defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010, - int_arm_neon_vcvtpu>; -defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011, - int_arm_neon_vcvtms>; -defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011, - int_arm_neon_vcvtmu>; -defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>; -defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>; -defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100, - int_arm_neon_vcvtas>; -defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100, - int_arm_neon_vcvtau>; - -multiclass NeonI_2VMisc_int_to_fp opcode, SDPatternOperator Neon_Op> { - defm _ : NeonI_2VMisc_SD_Conv; -} - -defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>; -defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>; - -multiclass NeonI_2VMisc_fp_to_fp opcode, SDPatternOperator Neon_Op> { - defm _ : NeonI_2VMisc_SD_Conv; -} - -defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000, - int_aarch64_neon_frintn>; -defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>; -defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>; -defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>; -defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>; -defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>; -defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>; -defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101, - int_arm_neon_vrecpe>; -defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101, - int_arm_neon_vrsqrte>; -let SchedRW = [WriteFPSqrt, ReadFPSqrt] in { -defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>; -} - -multiclass NeonI_2VMisc_S_Conv opcode, SDPatternOperator Neon_Op> { - def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode, - (outs VPR128:$Rd), (ins VPR128:$Rn), - asmop # "\t$Rd.4s, $Rn.4s", - [(set (v4i32 VPR128:$Rd), - (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; - - def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, - (outs VPR64:$Rd), (ins VPR64:$Rn), - asmop # "\t$Rd.2s, $Rn.2s", - [(set (v2i32 VPR64:$Rd), - (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))], - NoItinerary>, - Sched<[WriteFPALU, ReadFPALU]>; -} - -defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100, - int_arm_neon_vrecpe>; -defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100, - int_arm_neon_vrsqrte>; - -// Crypto Class -class NeonI_Cryptoaes_2v size, bits<5> opcode, - string asmop, SDPatternOperator opnode> - : NeonI_Crypto_AES, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let Predicates = [HasNEON, HasCrypto]; -} - -def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>; -def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>; - -class NeonI_Cryptoaes size, bits<5> opcode, - string asmop, SDPatternOperator opnode> - : NeonI_Crypto_AES, - Sched<[WriteFPALU, ReadFPALU]>; - -def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>; -def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>; - -class NeonI_Cryptosha_vv size, bits<5> opcode, - string asmop, SDPatternOperator opnode> - : NeonI_Crypto_SHA, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let Predicates = [HasNEON, HasCrypto]; -} - -def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1", - int_arm_neon_sha1su1>; -def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0", - int_arm_neon_sha256su0>; - -class NeonI_Cryptosha_ss size, bits<5> opcode, - string asmop, SDPatternOperator opnode> - : NeonI_Crypto_SHA, - Sched<[WriteFPALU, ReadFPALU]> { - let Predicates = [HasNEON, HasCrypto]; - let hasSideEffects = 0; -} - -def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>; -def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)), - (COPY_TO_REGCLASS (SHA1H (COPY_TO_REGCLASS i32:$Rn, FPR32)), GPR32)>; - - -class NeonI_Cryptosha3_vvv size, bits<3> opcode, string asmop, - SDPatternOperator opnode> - : NeonI_Crypto_3VSHA, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let Predicates = [HasNEON, HasCrypto]; -} - -def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0", - int_arm_neon_sha1su0>; -def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1", - int_arm_neon_sha256su1>; - -class NeonI_Cryptosha3_qqv size, bits<3> opcode, string asmop, - SDPatternOperator opnode> - : NeonI_Crypto_3VSHA, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let Predicates = [HasNEON, HasCrypto]; -} - -def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h", - int_arm_neon_sha256h>; -def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2", - int_arm_neon_sha256h2>; - -class NeonI_Cryptosha3_qsv size, bits<3> opcode, string asmop> - : NeonI_Crypto_3VSHA, - Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> { - let Constraints = "$src = $Rd"; - let hasSideEffects = 0; - let Predicates = [HasNEON, HasCrypto]; -} - -def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c">; -def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p">; -def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m">; - -def : Pat<(int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk), - (SHA1C v4i32:$hash_abcd, - (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>; -def : Pat<(int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk), - (SHA1M v4i32:$hash_abcd, - (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>; -def : Pat<(int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk), - (SHA1P v4i32:$hash_abcd, - (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>; - -// Additional patterns to match shl to USHL. -def : Pat<(v8i8 (shl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), - (USHLvvv_8B $Rn, $Rm)>; -def : Pat<(v4i16 (shl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), - (USHLvvv_4H $Rn, $Rm)>; -def : Pat<(v2i32 (shl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), - (USHLvvv_2S $Rn, $Rm)>; -def : Pat<(v1i64 (shl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), - (USHLddd $Rn, $Rm)>; -def : Pat<(v16i8 (shl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), - (USHLvvv_16B $Rn, $Rm)>; -def : Pat<(v8i16 (shl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), - (USHLvvv_8H $Rn, $Rm)>; -def : Pat<(v4i32 (shl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), - (USHLvvv_4S $Rn, $Rm)>; -def : Pat<(v2i64 (shl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), - (USHLvvv_2D $Rn, $Rm)>; - -def : Pat<(v1i8 (shl (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)), - sub_8)>; -def : Pat<(v1i16 (shl (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)), - sub_16)>; -def : Pat<(v1i32 (shl (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)), - sub_32)>; - -// Additional patterns to match sra, srl. -// For a vector right shift by vector, the shift amounts of SSHL/USHL are -// negative. Negate the vector of shift amount first. -def : Pat<(v8i8 (srl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), - (USHLvvv_8B $Rn, (NEG8b $Rm))>; -def : Pat<(v4i16 (srl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), - (USHLvvv_4H $Rn, (NEG4h $Rm))>; -def : Pat<(v2i32 (srl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), - (USHLvvv_2S $Rn, (NEG2s $Rm))>; -def : Pat<(v1i64 (srl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), - (USHLddd $Rn, (NEGdd $Rm))>; -def : Pat<(v16i8 (srl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), - (USHLvvv_16B $Rn, (NEG16b $Rm))>; -def : Pat<(v8i16 (srl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), - (USHLvvv_8H $Rn, (NEG8h $Rm))>; -def : Pat<(v4i32 (srl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), - (USHLvvv_4S $Rn, (NEG4s $Rm))>; -def : Pat<(v2i64 (srl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), - (USHLvvv_2D $Rn, (NEG2d $Rm))>; - -def : Pat<(v1i8 (srl (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (NEG8b (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8))), - sub_8)>; -def : Pat<(v1i16 (srl (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (NEG4h (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16))), - sub_16)>; -def : Pat<(v1i32 (srl (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (EXTRACT_SUBREG - (USHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (NEG2s (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32))), - sub_32)>; - -def : Pat<(v8i8 (sra (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), - (SSHLvvv_8B $Rn, (NEG8b $Rm))>; -def : Pat<(v4i16 (sra (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), - (SSHLvvv_4H $Rn, (NEG4h $Rm))>; -def : Pat<(v2i32 (sra (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), - (SSHLvvv_2S $Rn, (NEG2s $Rm))>; -def : Pat<(v1i64 (sra (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), - (SSHLddd $Rn, (NEGdd $Rm))>; -def : Pat<(v16i8 (sra (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), - (SSHLvvv_16B $Rn, (NEG16b $Rm))>; -def : Pat<(v8i16 (sra (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), - (SSHLvvv_8H $Rn, (NEG8h $Rm))>; -def : Pat<(v4i32 (sra (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), - (SSHLvvv_4S $Rn, (NEG4s $Rm))>; -def : Pat<(v2i64 (sra (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), - (SSHLvvv_2D $Rn, (NEG2d $Rm))>; - -def : Pat<(v1i8 (sra (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), - (EXTRACT_SUBREG - (SSHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8), - (NEG8b (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8))), - sub_8)>; -def : Pat<(v1i16 (sra (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), - (EXTRACT_SUBREG - (SSHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16), - (NEG4h (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16))), - sub_16)>; -def : Pat<(v1i32 (sra (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), - (EXTRACT_SUBREG - (SSHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), - (NEG2s (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32))), - sub_32)>; - -// -// Patterns for handling half-precision values -// - -// Convert between f16 value and f32 value -def : Pat<(f32 (f16_to_f32 (i32 GPR32:$Rn))), - (FCVTsh (EXTRACT_SUBREG (FMOVsw $Rn), sub_16))>; -def : Pat<(i32 (f32_to_f16 (f32 FPR32:$Rn))), - (FMOVws (SUBREG_TO_REG (i64 0), (f16 (FCVThs $Rn)), sub_16))>; - -// Convert f16 value coming in as i16 value to f32 -def : Pat<(f32 (f16_to_f32 (i32 (and (i32 GPR32:$Rn), 65535)))), - (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>; -def : Pat<(f32 (f16_to_f32 (i32 (assertzext GPR32:$Rn)))), - (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>; - -def : Pat<(f32 (f16_to_f32 (i32 (assertzext (i32 ( - f32_to_f16 (f32 FPR32:$Rn))))))), - (f32 FPR32:$Rn)>; - -// Patterns for vector extract of half-precision FP value in i16 storage type -def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract - (v4i16 VPR64:$Rn), neon_uimm2_bare:$Imm)), 65535)))), - (FCVTsh (f16 (DUPhv_H - (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - neon_uimm2_bare:$Imm)))>; - -def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract - (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)), 65535)))), - (FCVTsh (f16 (DUPhv_H (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)))>; - -// Patterns for vector insert of half-precision FP value 0 in i16 storage type -def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), - (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))), - (neon_uimm3_bare:$Imm))), - (v8i16 (INSELh (v8i16 VPR128:$Rn), - (v8i16 (SUBREG_TO_REG (i64 0), - (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)), - sub_16)), - neon_uimm3_bare:$Imm, 0))>; - -def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), - (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))), - (neon_uimm2_bare:$Imm))), - (v4i16 (EXTRACT_SUBREG - (v8i16 (INSELh - (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - (v8i16 (SUBREG_TO_REG (i64 0), - (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)), - sub_16)), - neon_uimm2_bare:$Imm, 0)), - sub_64))>; - -// Patterns for vector insert of half-precision FP value in i16 storage type -def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), - (i32 (assertsext (i32 (fp_to_sint - (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))), - (neon_uimm3_bare:$Imm))), - (v8i16 (INSELh (v8i16 VPR128:$Rn), - (v8i16 (SUBREG_TO_REG (i64 0), - (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)), - sub_16)), - neon_uimm3_bare:$Imm, 0))>; - -def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), - (i32 (assertsext (i32 (fp_to_sint - (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))), - (neon_uimm2_bare:$Imm))), - (v4i16 (EXTRACT_SUBREG - (v8i16 (INSELh - (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - (v8i16 (SUBREG_TO_REG (i64 0), - (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)), - sub_16)), - neon_uimm2_bare:$Imm, 0)), - sub_64))>; - -def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), - (i32 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)), - (neon_uimm3_bare:$Imm1))), - (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src), - neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>; - -// Patterns for vector copy of half-precision FP value in i16 storage type -def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), - (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32 - (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)), - 65535)))))))), - (neon_uimm3_bare:$Imm1))), - (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src), - neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>; - -def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), - (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32 - (vector_extract (v4i16 VPR64:$src), neon_uimm3_bare:$Imm2)), - 65535)))))))), - (neon_uimm3_bare:$Imm1))), - (v4i16 (EXTRACT_SUBREG - (v8i16 (INSELh - (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), - neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2)), - sub_64))>; - - diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp new file mode 100644 index 0000000..e7454be --- /dev/null +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -0,0 +1,942 @@ +//=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that performs load / store related peephole +// optimizations. This pass should be run after register allocation. +// +//===----------------------------------------------------------------------===// + +#include "AArch64InstrInfo.h" +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +#define DEBUG_TYPE "aarch64-ldst-opt" + +/// AArch64AllocLoadStoreOpt - Post-register allocation pass to combine +/// load / store instructions to form ldp / stp instructions. + +STATISTIC(NumPairCreated, "Number of load/store pair instructions generated"); +STATISTIC(NumPostFolded, "Number of post-index updates folded"); +STATISTIC(NumPreFolded, "Number of pre-index updates folded"); +STATISTIC(NumUnscaledPairCreated, + "Number of load/store from unscaled generated"); + +static cl::opt ScanLimit("aarch64-load-store-scan-limit", cl::init(20), + cl::Hidden); + +// Place holder while testing unscaled load/store combining +static cl::opt +EnableAArch64UnscaledMemOp("aarch64-unscaled-mem-op", cl::Hidden, + cl::desc("Allow AArch64 unscaled load/store combining"), + cl::init(true)); + +namespace { +struct AArch64LoadStoreOpt : public MachineFunctionPass { + static char ID; + AArch64LoadStoreOpt() : MachineFunctionPass(ID) {} + + const AArch64InstrInfo *TII; + const TargetRegisterInfo *TRI; + + // Scan the instructions looking for a load/store that can be combined + // with the current instruction into a load/store pair. + // Return the matching instruction if one is found, else MBB->end(). + // If a matching instruction is found, mergeForward is set to true if the + // merge is to remove the first instruction and replace the second with + // a pair-wise insn, and false if the reverse is true. + MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, + bool &mergeForward, + unsigned Limit); + // Merge the two instructions indicated into a single pair-wise instruction. + // If mergeForward is true, erase the first instruction and fold its + // operation into the second. If false, the reverse. Return the instruction + // following the first instruction (which may change during processing). + MachineBasicBlock::iterator + mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, bool mergeForward); + + // Scan the instruction list to find a base register update that can + // be combined with the current instruction (a load or store) using + // pre or post indexed addressing with writeback. Scan forwards. + MachineBasicBlock::iterator + findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit, + int Value); + + // Scan the instruction list to find a base register update that can + // be combined with the current instruction (a load or store) using + // pre or post indexed addressing with writeback. Scan backwards. + MachineBasicBlock::iterator + findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit); + + // Merge a pre-index base register update into a ld/st instruction. + MachineBasicBlock::iterator + mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Update); + + // Merge a post-index base register update into a ld/st instruction. + MachineBasicBlock::iterator + mergePostIdxUpdateInsn(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Update); + + bool optimizeBlock(MachineBasicBlock &MBB); + + bool runOnMachineFunction(MachineFunction &Fn) override; + + const char *getPassName() const override { + return "AArch64 load / store optimization pass"; + } + +private: + int getMemSize(MachineInstr *MemMI); +}; +char AArch64LoadStoreOpt::ID = 0; +} + +static bool isUnscaledLdst(unsigned Opc) { + switch (Opc) { + default: + return false; + case AArch64::STURSi: + return true; + case AArch64::STURDi: + return true; + case AArch64::STURQi: + return true; + case AArch64::STURWi: + return true; + case AArch64::STURXi: + return true; + case AArch64::LDURSi: + return true; + case AArch64::LDURDi: + return true; + case AArch64::LDURQi: + return true; + case AArch64::LDURWi: + return true; + case AArch64::LDURXi: + return true; + } +} + +// Size in bytes of the data moved by an unscaled load or store +int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) { + switch (MemMI->getOpcode()) { + default: + llvm_unreachable("Opcode has has unknown size!"); + case AArch64::STRSui: + case AArch64::STURSi: + return 4; + case AArch64::STRDui: + case AArch64::STURDi: + return 8; + case AArch64::STRQui: + case AArch64::STURQi: + return 16; + case AArch64::STRWui: + case AArch64::STURWi: + return 4; + case AArch64::STRXui: + case AArch64::STURXi: + return 8; + case AArch64::LDRSui: + case AArch64::LDURSi: + return 4; + case AArch64::LDRDui: + case AArch64::LDURDi: + return 8; + case AArch64::LDRQui: + case AArch64::LDURQi: + return 16; + case AArch64::LDRWui: + case AArch64::LDURWi: + return 4; + case AArch64::LDRXui: + case AArch64::LDURXi: + return 8; + } +} + +static unsigned getMatchingPairOpcode(unsigned Opc) { + switch (Opc) { + default: + llvm_unreachable("Opcode has no pairwise equivalent!"); + case AArch64::STRSui: + case AArch64::STURSi: + return AArch64::STPSi; + case AArch64::STRDui: + case AArch64::STURDi: + return AArch64::STPDi; + case AArch64::STRQui: + case AArch64::STURQi: + return AArch64::STPQi; + case AArch64::STRWui: + case AArch64::STURWi: + return AArch64::STPWi; + case AArch64::STRXui: + case AArch64::STURXi: + return AArch64::STPXi; + case AArch64::LDRSui: + case AArch64::LDURSi: + return AArch64::LDPSi; + case AArch64::LDRDui: + case AArch64::LDURDi: + return AArch64::LDPDi; + case AArch64::LDRQui: + case AArch64::LDURQi: + return AArch64::LDPQi; + case AArch64::LDRWui: + case AArch64::LDURWi: + return AArch64::LDPWi; + case AArch64::LDRXui: + case AArch64::LDURXi: + return AArch64::LDPXi; + } +} + +static unsigned getPreIndexedOpcode(unsigned Opc) { + switch (Opc) { + default: + llvm_unreachable("Opcode has no pre-indexed equivalent!"); + case AArch64::STRSui: return AArch64::STRSpre; + case AArch64::STRDui: return AArch64::STRDpre; + case AArch64::STRQui: return AArch64::STRQpre; + case AArch64::STRWui: return AArch64::STRWpre; + case AArch64::STRXui: return AArch64::STRXpre; + case AArch64::LDRSui: return AArch64::LDRSpre; + case AArch64::LDRDui: return AArch64::LDRDpre; + case AArch64::LDRQui: return AArch64::LDRQpre; + case AArch64::LDRWui: return AArch64::LDRWpre; + case AArch64::LDRXui: return AArch64::LDRXpre; + } +} + +static unsigned getPostIndexedOpcode(unsigned Opc) { + switch (Opc) { + default: + llvm_unreachable("Opcode has no post-indexed wise equivalent!"); + case AArch64::STRSui: + return AArch64::STRSpost; + case AArch64::STRDui: + return AArch64::STRDpost; + case AArch64::STRQui: + return AArch64::STRQpost; + case AArch64::STRWui: + return AArch64::STRWpost; + case AArch64::STRXui: + return AArch64::STRXpost; + case AArch64::LDRSui: + return AArch64::LDRSpost; + case AArch64::LDRDui: + return AArch64::LDRDpost; + case AArch64::LDRQui: + return AArch64::LDRQpost; + case AArch64::LDRWui: + return AArch64::LDRWpost; + case AArch64::LDRXui: + return AArch64::LDRXpost; + } +} + +MachineBasicBlock::iterator +AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, + bool mergeForward) { + MachineBasicBlock::iterator NextI = I; + ++NextI; + // If NextI is the second of the two instructions to be merged, we need + // to skip one further. Either way we merge will invalidate the iterator, + // and we don't need to scan the new instruction, as it's a pairwise + // instruction, which we're not considering for further action anyway. + if (NextI == Paired) + ++NextI; + + bool IsUnscaled = isUnscaledLdst(I->getOpcode()); + int OffsetStride = + IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1; + + unsigned NewOpc = getMatchingPairOpcode(I->getOpcode()); + // Insert our new paired instruction after whichever of the paired + // instructions mergeForward indicates. + MachineBasicBlock::iterator InsertionPoint = mergeForward ? Paired : I; + // Also based on mergeForward is from where we copy the base register operand + // so we get the flags compatible with the input code. + MachineOperand &BaseRegOp = + mergeForward ? Paired->getOperand(1) : I->getOperand(1); + + // Which register is Rt and which is Rt2 depends on the offset order. + MachineInstr *RtMI, *Rt2MI; + if (I->getOperand(2).getImm() == + Paired->getOperand(2).getImm() + OffsetStride) { + RtMI = Paired; + Rt2MI = I; + } else { + RtMI = I; + Rt2MI = Paired; + } + // Handle Unscaled + int OffsetImm = RtMI->getOperand(2).getImm(); + if (IsUnscaled && EnableAArch64UnscaledMemOp) + OffsetImm /= OffsetStride; + + // Construct the new instruction. + MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint, + I->getDebugLoc(), TII->get(NewOpc)) + .addOperand(RtMI->getOperand(0)) + .addOperand(Rt2MI->getOperand(0)) + .addOperand(BaseRegOp) + .addImm(OffsetImm); + (void)MIB; + + // FIXME: Do we need/want to copy the mem operands from the source + // instructions? Probably. What uses them after this? + + DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n "); + DEBUG(I->print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG(Paired->print(dbgs())); + DEBUG(dbgs() << " with instruction:\n "); + DEBUG(((MachineInstr *)MIB)->print(dbgs())); + DEBUG(dbgs() << "\n"); + + // Erase the old instructions. + I->eraseFromParent(); + Paired->eraseFromParent(); + + return NextI; +} + +/// trackRegDefsUses - Remember what registers the specified instruction uses +/// and modifies. +static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs, + BitVector &UsedRegs, + const TargetRegisterInfo *TRI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + ModifiedRegs.setBitsNotInMask(MO.getRegMask()); + + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (MO.isDef()) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + ModifiedRegs.set(*AI); + } else { + assert(MO.isUse() && "Reg operand not a def and not a use?!?"); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + UsedRegs.set(*AI); + } + } +} + +static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { + if (!IsUnscaled && (Offset > 63 || Offset < -64)) + return false; + if (IsUnscaled) { + // Convert the byte-offset used by unscaled into an "element" offset used + // by the scaled pair load/store instructions. + int elemOffset = Offset / OffsetStride; + if (elemOffset > 63 || elemOffset < -64) + return false; + } + return true; +} + +// Do alignment, specialized to power of 2 and for signed ints, +// avoiding having to do a C-style cast from uint_64t to int when +// using RoundUpToAlignment from include/llvm/Support/MathExtras.h. +// FIXME: Move this function to include/MathExtras.h? +static int alignTo(int Num, int PowOf2) { + return (Num + PowOf2 - 1) & ~(PowOf2 - 1); +} + +/// findMatchingInsn - Scan the instructions looking for a load/store that can +/// be combined with the current instruction into a load/store pair. +MachineBasicBlock::iterator +AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, + bool &mergeForward, unsigned Limit) { + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator MBBI = I; + MachineInstr *FirstMI = I; + ++MBBI; + + int Opc = FirstMI->getOpcode(); + bool mayLoad = FirstMI->mayLoad(); + bool IsUnscaled = isUnscaledLdst(Opc); + unsigned Reg = FirstMI->getOperand(0).getReg(); + unsigned BaseReg = FirstMI->getOperand(1).getReg(); + int Offset = FirstMI->getOperand(2).getImm(); + + // Early exit if the first instruction modifies the base register. + // e.g., ldr x0, [x0] + // Early exit if the offset if not possible to match. (6 bits of positive + // range, plus allow an extra one in case we find a later insn that matches + // with Offset-1 + if (FirstMI->modifiesRegister(BaseReg, TRI)) + return E; + int OffsetStride = + IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1; + if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) + return E; + + // Track which registers have been modified and used between the first insn + // (inclusive) and the second insn. + BitVector ModifiedRegs, UsedRegs; + ModifiedRegs.resize(TRI->getNumRegs()); + UsedRegs.resize(TRI->getNumRegs()); + for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { + MachineInstr *MI = MBBI; + // Skip DBG_VALUE instructions. Otherwise debug info can affect the + // optimization by changing how far we scan. + if (MI->isDebugValue()) + continue; + + // Now that we know this is a real instruction, count it. + ++Count; + + if (Opc == MI->getOpcode() && MI->getOperand(2).isImm()) { + // If we've found another instruction with the same opcode, check to see + // if the base and offset are compatible with our starting instruction. + // These instructions all have scaled immediate operands, so we just + // check for +1/-1. Make sure to check the new instruction offset is + // actually an immediate and not a symbolic reference destined for + // a relocation. + // + // Pairwise instructions have a 7-bit signed offset field. Single insns + // have a 12-bit unsigned offset field. To be a valid combine, the + // final offset must be in range. + unsigned MIBaseReg = MI->getOperand(1).getReg(); + int MIOffset = MI->getOperand(2).getImm(); + if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || + (Offset + OffsetStride == MIOffset))) { + int MinOffset = Offset < MIOffset ? Offset : MIOffset; + // If this is a volatile load/store that otherwise matched, stop looking + // as something is going on that we don't have enough information to + // safely transform. Similarly, stop if we see a hint to avoid pairs. + if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) + return E; + // If the resultant immediate offset of merging these instructions + // is out of range for a pairwise instruction, bail and keep looking. + bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode()); + if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) { + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + continue; + } + // If the alignment requirements of the paired (scaled) instruction + // can't express the offset of the unscaled input, bail and keep + // looking. + if (IsUnscaled && EnableAArch64UnscaledMemOp && + (alignTo(MinOffset, OffsetStride) != MinOffset)) { + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + continue; + } + // If the destination register of the loads is the same register, bail + // and keep looking. A load-pair instruction with both destination + // registers the same is UNPREDICTABLE and will result in an exception. + if (mayLoad && Reg == MI->getOperand(0).getReg()) { + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + continue; + } + + // If the Rt of the second instruction was not modified or used between + // the two instructions, we can combine the second into the first. + if (!ModifiedRegs[MI->getOperand(0).getReg()] && + !UsedRegs[MI->getOperand(0).getReg()]) { + mergeForward = false; + return MBBI; + } + + // Likewise, if the Rt of the first instruction is not modified or used + // between the two instructions, we can combine the first into the + // second. + if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] && + !UsedRegs[FirstMI->getOperand(0).getReg()]) { + mergeForward = true; + return MBBI; + } + // Unable to combine these instructions due to interference in between. + // Keep looking. + } + } + + // If the instruction wasn't a matching load or store, but does (or can) + // modify memory, stop searching, as we don't have alias analysis or + // anything like that to tell us whether the access is tromping on the + // locations we care about. The big one we want to catch is calls. + // + // FIXME: Theoretically, we can do better than that for SP and FP based + // references since we can effectively know where those are touching. It's + // unclear if it's worth the extra code, though. Most paired instructions + // will be sequential, perhaps with a few intervening non-memory related + // instructions. + if (MI->mayStore() || MI->isCall()) + return E; + // Likewise, if we're matching a store instruction, we don't want to + // move across a load, as it may be reading the same location. + if (FirstMI->mayStore() && MI->mayLoad()) + return E; + + // Update modified / uses register lists. + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + + // Otherwise, if the base register is modified, we have no match, so + // return early. + if (ModifiedRegs[BaseReg]) + return E; + } + return E; +} + +MachineBasicBlock::iterator +AArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Update) { + assert((Update->getOpcode() == AArch64::ADDXri || + Update->getOpcode() == AArch64::SUBXri) && + "Unexpected base register update instruction to merge!"); + MachineBasicBlock::iterator NextI = I; + // Return the instruction following the merged instruction, which is + // the instruction following our unmerged load. Unless that's the add/sub + // instruction we're merging, in which case it's the one after that. + if (++NextI == Update) + ++NextI; + + int Value = Update->getOperand(2).getImm(); + assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && + "Can't merge 1 << 12 offset into pre-indexed load / store"); + if (Update->getOpcode() == AArch64::SUBXri) + Value = -Value; + + unsigned NewOpc = getPreIndexedOpcode(I->getOpcode()); + MachineInstrBuilder MIB = + BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) + .addOperand(Update->getOperand(0)) + .addOperand(I->getOperand(0)) + .addOperand(I->getOperand(1)) + .addImm(Value); + (void)MIB; + + DEBUG(dbgs() << "Creating pre-indexed load/store."); + DEBUG(dbgs() << " Replacing instructions:\n "); + DEBUG(I->print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG(Update->print(dbgs())); + DEBUG(dbgs() << " with instruction:\n "); + DEBUG(((MachineInstr *)MIB)->print(dbgs())); + DEBUG(dbgs() << "\n"); + + // Erase the old instructions for the block. + I->eraseFromParent(); + Update->eraseFromParent(); + + return NextI; +} + +MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn( + MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update) { + assert((Update->getOpcode() == AArch64::ADDXri || + Update->getOpcode() == AArch64::SUBXri) && + "Unexpected base register update instruction to merge!"); + MachineBasicBlock::iterator NextI = I; + // Return the instruction following the merged instruction, which is + // the instruction following our unmerged load. Unless that's the add/sub + // instruction we're merging, in which case it's the one after that. + if (++NextI == Update) + ++NextI; + + int Value = Update->getOperand(2).getImm(); + assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && + "Can't merge 1 << 12 offset into post-indexed load / store"); + if (Update->getOpcode() == AArch64::SUBXri) + Value = -Value; + + unsigned NewOpc = getPostIndexedOpcode(I->getOpcode()); + MachineInstrBuilder MIB = + BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) + .addOperand(Update->getOperand(0)) + .addOperand(I->getOperand(0)) + .addOperand(I->getOperand(1)) + .addImm(Value); + (void)MIB; + + DEBUG(dbgs() << "Creating post-indexed load/store."); + DEBUG(dbgs() << " Replacing instructions:\n "); + DEBUG(I->print(dbgs())); + DEBUG(dbgs() << " "); + DEBUG(Update->print(dbgs())); + DEBUG(dbgs() << " with instruction:\n "); + DEBUG(((MachineInstr *)MIB)->print(dbgs())); + DEBUG(dbgs() << "\n"); + + // Erase the old instructions for the block. + I->eraseFromParent(); + Update->eraseFromParent(); + + return NextI; +} + +static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg, + int Offset) { + switch (MI->getOpcode()) { + default: + break; + case AArch64::SUBXri: + // Negate the offset for a SUB instruction. + Offset *= -1; + // FALLTHROUGH + case AArch64::ADDXri: + // Make sure it's a vanilla immediate operand, not a relocation or + // anything else we can't handle. + if (!MI->getOperand(2).isImm()) + break; + // Watch out for 1 << 12 shifted value. + if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm())) + break; + // If the instruction has the base register as source and dest and the + // immediate will fit in a signed 9-bit integer, then we have a match. + if (MI->getOperand(0).getReg() == BaseReg && + MI->getOperand(1).getReg() == BaseReg && + MI->getOperand(2).getImm() <= 255 && + MI->getOperand(2).getImm() >= -256) { + // If we have a non-zero Offset, we check that it matches the amount + // we're adding to the register. + if (!Offset || Offset == MI->getOperand(2).getImm()) + return true; + } + break; + } + return false; +} + +MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward( + MachineBasicBlock::iterator I, unsigned Limit, int Value) { + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineInstr *MemMI = I; + MachineBasicBlock::iterator MBBI = I; + const MachineFunction &MF = *MemMI->getParent()->getParent(); + + unsigned DestReg = MemMI->getOperand(0).getReg(); + unsigned BaseReg = MemMI->getOperand(1).getReg(); + int Offset = MemMI->getOperand(2).getImm() * + TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize(); + + // If the base register overlaps the destination register, we can't + // merge the update. + if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) + return E; + + // Scan forward looking for post-index opportunities. + // Updating instructions can't be formed if the memory insn already + // has an offset other than the value we're looking for. + if (Offset != Value) + return E; + + // Track which registers have been modified and used between the first insn + // (inclusive) and the second insn. + BitVector ModifiedRegs, UsedRegs; + ModifiedRegs.resize(TRI->getNumRegs()); + UsedRegs.resize(TRI->getNumRegs()); + ++MBBI; + for (unsigned Count = 0; MBBI != E; ++MBBI) { + MachineInstr *MI = MBBI; + // Skip DBG_VALUE instructions. Otherwise debug info can affect the + // optimization by changing how far we scan. + if (MI->isDebugValue()) + continue; + + // Now that we know this is a real instruction, count it. + ++Count; + + // If we found a match, return it. + if (isMatchingUpdateInsn(MI, BaseReg, Value)) + return MBBI; + + // Update the status of what the instruction clobbered and used. + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + + // Otherwise, if the base register is used or modified, we have no match, so + // return early. + if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) + return E; + } + return E; +} + +MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward( + MachineBasicBlock::iterator I, unsigned Limit) { + MachineBasicBlock::iterator B = I->getParent()->begin(); + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineInstr *MemMI = I; + MachineBasicBlock::iterator MBBI = I; + const MachineFunction &MF = *MemMI->getParent()->getParent(); + + unsigned DestReg = MemMI->getOperand(0).getReg(); + unsigned BaseReg = MemMI->getOperand(1).getReg(); + int Offset = MemMI->getOperand(2).getImm(); + unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize(); + + // If the load/store is the first instruction in the block, there's obviously + // not any matching update. Ditto if the memory offset isn't zero. + if (MBBI == B || Offset != 0) + return E; + // If the base register overlaps the destination register, we can't + // merge the update. + if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) + return E; + + // Track which registers have been modified and used between the first insn + // (inclusive) and the second insn. + BitVector ModifiedRegs, UsedRegs; + ModifiedRegs.resize(TRI->getNumRegs()); + UsedRegs.resize(TRI->getNumRegs()); + --MBBI; + for (unsigned Count = 0; MBBI != B; --MBBI) { + MachineInstr *MI = MBBI; + // Skip DBG_VALUE instructions. Otherwise debug info can affect the + // optimization by changing how far we scan. + if (MI->isDebugValue()) + continue; + + // Now that we know this is a real instruction, count it. + ++Count; + + // If we found a match, return it. + if (isMatchingUpdateInsn(MI, BaseReg, RegSize)) + return MBBI; + + // Update the status of what the instruction clobbered and used. + trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); + + // Otherwise, if the base register is used or modified, we have no match, so + // return early. + if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) + return E; + } + return E; +} + +bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { + bool Modified = false; + // Two tranformations to do here: + // 1) Find loads and stores that can be merged into a single load or store + // pair instruction. + // e.g., + // ldr x0, [x2] + // ldr x1, [x2, #8] + // ; becomes + // ldp x0, x1, [x2] + // 2) Find base register updates that can be merged into the load or store + // as a base-reg writeback. + // e.g., + // ldr x0, [x2] + // add x2, x2, #4 + // ; becomes + // ldr x0, [x2], #4 + + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E;) { + MachineInstr *MI = MBBI; + switch (MI->getOpcode()) { + default: + // Just move on to the next instruction. + ++MBBI; + break; + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STRQui: + case AArch64::STRXui: + case AArch64::STRWui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + case AArch64::LDRXui: + case AArch64::LDRWui: + // do the unscaled versions as well + case AArch64::STURSi: + case AArch64::STURDi: + case AArch64::STURQi: + case AArch64::STURWi: + case AArch64::STURXi: + case AArch64::LDURSi: + case AArch64::LDURDi: + case AArch64::LDURQi: + case AArch64::LDURWi: + case AArch64::LDURXi: { + // If this is a volatile load/store, don't mess with it. + if (MI->hasOrderedMemoryRef()) { + ++MBBI; + break; + } + // Make sure this is a reg+imm (as opposed to an address reloc). + if (!MI->getOperand(2).isImm()) { + ++MBBI; + break; + } + // Check if this load/store has a hint to avoid pair formation. + // MachineMemOperands hints are set by the AArch64StorePairSuppress pass. + if (TII->isLdStPairSuppressed(MI)) { + ++MBBI; + break; + } + // Look ahead up to ScanLimit instructions for a pairable instruction. + bool mergeForward = false; + MachineBasicBlock::iterator Paired = + findMatchingInsn(MBBI, mergeForward, ScanLimit); + if (Paired != E) { + // Merge the loads into a pair. Keeping the iterator straight is a + // pain, so we let the merge routine tell us what the next instruction + // is after it's done mucking about. + MBBI = mergePairedInsns(MBBI, Paired, mergeForward); + + Modified = true; + ++NumPairCreated; + if (isUnscaledLdst(MI->getOpcode())) + ++NumUnscaledPairCreated; + break; + } + ++MBBI; + break; + } + // FIXME: Do the other instructions. + } + } + + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E;) { + MachineInstr *MI = MBBI; + // Do update merging. It's simpler to keep this separate from the above + // switch, though not strictly necessary. + int Opc = MI->getOpcode(); + switch (Opc) { + default: + // Just move on to the next instruction. + ++MBBI; + break; + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STRQui: + case AArch64::STRXui: + case AArch64::STRWui: + case AArch64::LDRSui: + case AArch64::LDRDui: + case AArch64::LDRQui: + case AArch64::LDRXui: + case AArch64::LDRWui: + // do the unscaled versions as well + case AArch64::STURSi: + case AArch64::STURDi: + case AArch64::STURQi: + case AArch64::STURWi: + case AArch64::STURXi: + case AArch64::LDURSi: + case AArch64::LDURDi: + case AArch64::LDURQi: + case AArch64::LDURWi: + case AArch64::LDURXi: { + // Make sure this is a reg+imm (as opposed to an address reloc). + if (!MI->getOperand(2).isImm()) { + ++MBBI; + break; + } + // Look ahead up to ScanLimit instructions for a mergable instruction. + MachineBasicBlock::iterator Update = + findMatchingUpdateInsnForward(MBBI, ScanLimit, 0); + if (Update != E) { + // Merge the update into the ld/st. + MBBI = mergePostIdxUpdateInsn(MBBI, Update); + Modified = true; + ++NumPostFolded; + break; + } + // Don't know how to handle pre/post-index versions, so move to the next + // instruction. + if (isUnscaledLdst(Opc)) { + ++MBBI; + break; + } + + // Look back to try to find a pre-index instruction. For example, + // add x0, x0, #8 + // ldr x1, [x0] + // merged into: + // ldr x1, [x0, #8]! + Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit); + if (Update != E) { + // Merge the update into the ld/st. + MBBI = mergePreIdxUpdateInsn(MBBI, Update); + Modified = true; + ++NumPreFolded; + break; + } + + // Look forward to try to find a post-index instruction. For example, + // ldr x1, [x0, #64] + // add x0, x0, #64 + // merged into: + // ldr x1, [x0, #64]! + + // The immediate in the load/store is scaled by the size of the register + // being loaded. The immediate in the add we're looking for, + // however, is not, so adjust here. + int Value = MI->getOperand(2).getImm() * + TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent())) + ->getSize(); + Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value); + if (Update != E) { + // Merge the update into the ld/st. + MBBI = mergePreIdxUpdateInsn(MBBI, Update); + Modified = true; + ++NumPreFolded; + break; + } + + // Nothing found. Just move to the next instruction. + ++MBBI; + break; + } + // FIXME: Do the other instructions. + } + } + + return Modified; +} + +bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + const TargetMachine &TM = Fn.getTarget(); + TII = static_cast(TM.getInstrInfo()); + TRI = TM.getRegisterInfo(); + + bool Modified = false; + for (auto &MBB : Fn) + Modified |= optimizeBlock(MBB); + + return Modified; +} + +// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep +// loads and stores near one another? + +/// createARMLoadStoreOptimizationPass - returns an instance of the load / store +/// optimization pass. +FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() { + return new AArch64LoadStoreOpt(); +} diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp index 3842bfd..ab6d375 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -1,4 +1,4 @@ -//===-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst -==// +//==-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst --==// // // The LLVM Compiler Infrastructure // @@ -12,146 +12,191 @@ // //===----------------------------------------------------------------------===// -#include "AArch64AsmPrinter.h" -#include "AArch64TargetMachine.h" +#include "AArch64MCInstLower.h" #include "MCTargetDesc/AArch64MCExpr.h" #include "Utils/AArch64BaseInfo.h" -#include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/Mangler.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" - +#include "llvm/Support/CodeGen.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; -MCOperand -AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO, - const MCSymbol *Sym) const { - const MCExpr *Expr = 0; +AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, Mangler &mang, + AsmPrinter &printer) + : Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {} - Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, OutContext); +MCSymbol * +AArch64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { + return Printer.getSymbol(MO.getGlobal()); +} - switch (MO.getTargetFlags()) { - case AArch64II::MO_GOT: - Expr = AArch64MCExpr::CreateGOT(Expr, OutContext); - break; - case AArch64II::MO_GOT_LO12: - Expr = AArch64MCExpr::CreateGOTLo12(Expr, OutContext); - break; - case AArch64II::MO_LO12: - Expr = AArch64MCExpr::CreateLo12(Expr, OutContext); - break; - case AArch64II::MO_DTPREL_G1: - Expr = AArch64MCExpr::CreateDTPREL_G1(Expr, OutContext); - break; - case AArch64II::MO_DTPREL_G0_NC: - Expr = AArch64MCExpr::CreateDTPREL_G0_NC(Expr, OutContext); - break; - case AArch64II::MO_GOTTPREL: - Expr = AArch64MCExpr::CreateGOTTPREL(Expr, OutContext); - break; - case AArch64II::MO_GOTTPREL_LO12: - Expr = AArch64MCExpr::CreateGOTTPRELLo12(Expr, OutContext); - break; - case AArch64II::MO_TLSDESC: - Expr = AArch64MCExpr::CreateTLSDesc(Expr, OutContext); - break; - case AArch64II::MO_TLSDESC_LO12: - Expr = AArch64MCExpr::CreateTLSDescLo12(Expr, OutContext); - break; - case AArch64II::MO_TPREL_G1: - Expr = AArch64MCExpr::CreateTPREL_G1(Expr, OutContext); - break; - case AArch64II::MO_TPREL_G0_NC: - Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext); - break; - case AArch64II::MO_ABS_G3: - Expr = AArch64MCExpr::CreateABS_G3(Expr, OutContext); - break; - case AArch64II::MO_ABS_G2_NC: - Expr = AArch64MCExpr::CreateABS_G2_NC(Expr, OutContext); - break; - case AArch64II::MO_ABS_G1_NC: - Expr = AArch64MCExpr::CreateABS_G1_NC(Expr, OutContext); - break; - case AArch64II::MO_ABS_G0_NC: - Expr = AArch64MCExpr::CreateABS_G0_NC(Expr, OutContext); - break; - case AArch64II::MO_NO_FLAG: - // Expr is already correct - break; - default: - llvm_unreachable("Unexpected MachineOperand flag"); +MCSymbol * +AArch64MCInstLower::GetExternalSymbolSymbol(const MachineOperand &MO) const { + return Printer.GetExternalSymbolSymbol(MO.getSymbolName()); +} + +MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO, + MCSymbol *Sym) const { + // FIXME: We would like an efficient form for this, so we don't have to do a + // lot of extra uniquing. + MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; + if ((MO.getTargetFlags() & AArch64II::MO_GOT) != 0) { + if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE) + RefKind = MCSymbolRefExpr::VK_GOTPAGE; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == + AArch64II::MO_PAGEOFF) + RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF; + else + assert(0 && "Unexpected target flags with MO_GOT on GV operand"); + } else if ((MO.getTargetFlags() & AArch64II::MO_TLS) != 0) { + if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE) + RefKind = MCSymbolRefExpr::VK_TLVPPAGE; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == + AArch64II::MO_PAGEOFF) + RefKind = MCSymbolRefExpr::VK_TLVPPAGEOFF; + else + llvm_unreachable("Unexpected target flags with MO_TLS on GV operand"); + } else { + if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE) + RefKind = MCSymbolRefExpr::VK_PAGE; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == + AArch64II::MO_PAGEOFF) + RefKind = MCSymbolRefExpr::VK_PAGEOFF; } + const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx); + if (!MO.isJTI() && MO.getOffset()) + Expr = MCBinaryExpr::CreateAdd( + Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx); + return MCOperand::CreateExpr(Expr); +} + +MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, + MCSymbol *Sym) const { + uint32_t RefFlags = 0; + if (MO.getTargetFlags() & AArch64II::MO_GOT) + RefFlags |= AArch64MCExpr::VK_GOT; + else if (MO.getTargetFlags() & AArch64II::MO_TLS) { + TLSModel::Model Model; + if (MO.isGlobal()) { + const GlobalValue *GV = MO.getGlobal(); + Model = Printer.TM.getTLSModel(GV); + } else { + assert(MO.isSymbol() && + StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" && + "unexpected external TLS symbol"); + Model = TLSModel::GeneralDynamic; + } + switch (Model) { + case TLSModel::InitialExec: + RefFlags |= AArch64MCExpr::VK_GOTTPREL; + break; + case TLSModel::LocalExec: + RefFlags |= AArch64MCExpr::VK_TPREL; + break; + case TLSModel::LocalDynamic: + RefFlags |= AArch64MCExpr::VK_DTPREL; + break; + case TLSModel::GeneralDynamic: + RefFlags |= AArch64MCExpr::VK_TLSDESC; + break; + } + } else { + // No modifier means this is a generic reference, classified as absolute for + // the cases where it matters (:abs_g0: etc). + RefFlags |= AArch64MCExpr::VK_ABS; + } + + if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE) + RefFlags |= AArch64MCExpr::VK_PAGE; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == + AArch64II::MO_PAGEOFF) + RefFlags |= AArch64MCExpr::VK_PAGEOFF; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G3) + RefFlags |= AArch64MCExpr::VK_G3; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G2) + RefFlags |= AArch64MCExpr::VK_G2; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G1) + RefFlags |= AArch64MCExpr::VK_G1; + else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0) + RefFlags |= AArch64MCExpr::VK_G0; + + if (MO.getTargetFlags() & AArch64II::MO_NC) + RefFlags |= AArch64MCExpr::VK_NC; + + const MCExpr *Expr = + MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, Ctx); if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd(Expr, - MCConstantExpr::Create(MO.getOffset(), - OutContext), - OutContext); + Expr = MCBinaryExpr::CreateAdd( + Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx); + + AArch64MCExpr::VariantKind RefKind; + RefKind = static_cast(RefFlags); + Expr = AArch64MCExpr::Create(Expr, RefKind, Ctx); return MCOperand::CreateExpr(Expr); } -bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO, - MCOperand &MCOp) const { +MCOperand AArch64MCInstLower::LowerSymbolOperand(const MachineOperand &MO, + MCSymbol *Sym) const { + if (TargetTriple.isOSDarwin()) + return lowerSymbolOperandDarwin(MO, Sym); + + assert(TargetTriple.isOSBinFormatELF() && "Expect Darwin or ELF target"); + return lowerSymbolOperandELF(MO, Sym); +} + +bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO, + MCOperand &MCOp) const { switch (MO.getType()) { - default: llvm_unreachable("unknown operand type"); + default: + assert(0 && "unknown operand type"); case MachineOperand::MO_Register: + // Ignore all implicit register operands. if (MO.isImplicit()) return false; - assert(!MO.getSubReg() && "Subregs should be eliminated!"); MCOp = MCOperand::CreateReg(MO.getReg()); break; + case MachineOperand::MO_RegisterMask: + // Regmasks are like implicit defs. + return false; case MachineOperand::MO_Immediate: MCOp = MCOperand::CreateImm(MO.getImm()); break; - case MachineOperand::MO_FPImmediate: { - assert(MO.getFPImm()->isZero() && "Only fp imm 0.0 is supported"); - MCOp = MCOperand::CreateFPImm(0.0); - break; - } - case MachineOperand::MO_BlockAddress: - MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress())); - break; - case MachineOperand::MO_ExternalSymbol: - MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName())); + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr( + MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx)); break; case MachineOperand::MO_GlobalAddress: - MCOp = lowerSymbolOperand(MO, getSymbol(MO.getGlobal())); + MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); break; - case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( - MO.getMBB()->getSymbol(), OutContext)); + case MachineOperand::MO_ExternalSymbol: + MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); break; case MachineOperand::MO_JumpTableIndex: - MCOp = lowerSymbolOperand(MO, GetJTISymbol(MO.getIndex())); + MCOp = LowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex())); break; case MachineOperand::MO_ConstantPoolIndex: - MCOp = lowerSymbolOperand(MO, GetCPISymbol(MO.getIndex())); + MCOp = LowerSymbolOperand(MO, Printer.GetCPISymbol(MO.getIndex())); + break; + case MachineOperand::MO_BlockAddress: + MCOp = LowerSymbolOperand( + MO, Printer.GetBlockAddressSymbol(MO.getBlockAddress())); break; - case MachineOperand::MO_RegisterMask: - // Ignore call clobbers - return false; - } - return true; } -void llvm::LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, - MCInst &OutMI, - AArch64AsmPrinter &AP) { +void AArch64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.setOpcode(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - MCOperand MCOp; - if (AP.lowerOperand(MO, MCOp)) + if (lowerOperand(MI->getOperand(i), MCOp)) OutMI.addOperand(MCOp); } } diff --git a/lib/Target/AArch64/AArch64MCInstLower.h b/lib/Target/AArch64/AArch64MCInstLower.h new file mode 100644 index 0000000..ba50ba9 --- /dev/null +++ b/lib/Target/AArch64/AArch64MCInstLower.h @@ -0,0 +1,52 @@ +//===-- AArch64MCInstLower.h - Lower MachineInstr to MCInst ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef AArch64_MCINSTLOWER_H +#define AArch64_MCINSTLOWER_H + +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { +class AsmPrinter; +class MCAsmInfo; +class MCContext; +class MCInst; +class MCOperand; +class MCSymbol; +class MachineInstr; +class MachineModuleInfoMachO; +class MachineOperand; +class Mangler; + +/// AArch64MCInstLower - This class is used to lower an MachineInstr +/// into an MCInst. +class LLVM_LIBRARY_VISIBILITY AArch64MCInstLower { + MCContext &Ctx; + AsmPrinter &Printer; + Triple TargetTriple; + +public: + AArch64MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer); + + bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const; + void Lower(const MachineInstr *MI, MCInst &OutMI) const; + + MCOperand lowerSymbolOperandDarwin(const MachineOperand &MO, + MCSymbol *Sym) const; + MCOperand lowerSymbolOperandELF(const MachineOperand &MO, + MCSymbol *Sym) const; + MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; + + MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; + MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; +}; +} + +#endif diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp deleted file mode 100644 index f45d8f7..0000000 --- a/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp +++ /dev/null @@ -1,18 +0,0 @@ -//===-- AArch64MachineFuctionInfo.cpp - AArch64 machine function info -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file just contains the anchor for the AArch64MachineFunctionInfo to -// force vtable emission. -// -//===----------------------------------------------------------------------===// -#include "AArch64MachineFunctionInfo.h" - -using namespace llvm; - -void AArch64MachineFunctionInfo::anchor() { } diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 33da54f..7c257ba 100644 --- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -1,4 +1,4 @@ -//=- AArch64MachineFuctionInfo.h - AArch64 machine function info -*- C++ -*-==// +//=- AArch64MachineFuctionInfo.h - AArch64 machine function info --*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -11,17 +11,19 @@ // //===----------------------------------------------------------------------===// -#ifndef AARCH64MACHINEFUNCTIONINFO_H -#define AARCH64MACHINEFUNCTIONINFO_H +#ifndef AArch64MACHINEFUNCTIONINFO_H +#define AArch64MACHINEFUNCTIONINFO_H +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/MC/MCLinkerOptimizationHint.h" namespace llvm { -/// This class is derived from MachineFunctionInfo and contains private AArch64 -/// target-specific information for each MachineFunction. -class AArch64MachineFunctionInfo : public MachineFunctionInfo { - virtual void anchor(); +/// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and +/// contains private AArch64-specific information for each MachineFunction. +class AArch64FunctionInfo : public MachineFunctionInfo { /// Number of bytes of arguments this function has on the stack. If the callee /// is expected to restore the argument stack this should be a multiple of 16, @@ -39,111 +41,123 @@ class AArch64MachineFunctionInfo : public MachineFunctionInfo { /// callee is expected to pop the args. unsigned ArgumentStackToRestore; - /// If the stack needs to be adjusted on frame entry in two stages, this - /// records the size of the first adjustment just prior to storing - /// callee-saved registers. The callee-saved slots are addressed assuming - /// SP == - InitialStackAdjust. - unsigned InitialStackAdjust; + /// HasStackFrame - True if this function has a stack frame. Set by + /// processFunctionBeforeCalleeSavedScan(). + bool HasStackFrame; - /// Number of local-dynamic TLS accesses. - unsigned NumLocalDynamics; + /// \brief Amount of stack frame size, not including callee-saved registers. + unsigned LocalStackSize; - /// @see AArch64 Procedure Call Standard, B.3 - /// - /// The Frame index of the area where LowerFormalArguments puts the - /// general-purpose registers that might contain variadic parameters. - int VariadicGPRIdx; + /// \brief Number of TLS accesses using the special (combinable) + /// _TLS_MODULE_BASE_ symbol. + unsigned NumLocalDynamicTLSAccesses; - /// @see AArch64 Procedure Call Standard, B.3 - /// - /// The size of the frame object used to store the general-purpose registers - /// which might contain variadic arguments. This is the offset from - /// VariadicGPRIdx to what's stored in __gr_top. - unsigned VariadicGPRSize; + /// \brief FrameIndex for start of varargs area for arguments passed on the + /// stack. + int VarArgsStackIndex; - /// @see AArch64 Procedure Call Standard, B.3 - /// - /// The Frame index of the area where LowerFormalArguments puts the - /// floating-point registers that might contain variadic parameters. - int VariadicFPRIdx; + /// \brief FrameIndex for start of varargs area for arguments passed in + /// general purpose registers. + int VarArgsGPRIndex; - /// @see AArch64 Procedure Call Standard, B.3 - /// - /// The size of the frame object used to store the floating-point registers - /// which might contain variadic arguments. This is the offset from - /// VariadicFPRIdx to what's stored in __vr_top. - unsigned VariadicFPRSize; + /// \brief Size of the varargs area for arguments passed in general purpose + /// registers. + unsigned VarArgsGPRSize; - /// @see AArch64 Procedure Call Standard, B.3 - /// - /// The Frame index of an object pointing just past the last known stacked - /// argument on entry to a variadic function. This goes into the __stack field - /// of the va_list type. - int VariadicStackIdx; + /// \brief FrameIndex for start of varargs area for arguments passed in + /// floating-point registers. + int VarArgsFPRIndex; - /// The offset of the frame pointer from the stack pointer on function - /// entry. This is expected to be negative. - int FramePointerOffset; + /// \brief Size of the varargs area for arguments passed in floating-point + /// registers. + unsigned VarArgsFPRSize; public: - AArch64MachineFunctionInfo() - : BytesInStackArgArea(0), - ArgumentStackToRestore(0), - InitialStackAdjust(0), - NumLocalDynamics(0), - VariadicGPRIdx(0), - VariadicGPRSize(0), - VariadicFPRIdx(0), - VariadicFPRSize(0), - VariadicStackIdx(0), - FramePointerOffset(0) {} - - explicit AArch64MachineFunctionInfo(MachineFunction &MF) - : BytesInStackArgArea(0), - ArgumentStackToRestore(0), - InitialStackAdjust(0), - NumLocalDynamics(0), - VariadicGPRIdx(0), - VariadicGPRSize(0), - VariadicFPRIdx(0), - VariadicFPRSize(0), - VariadicStackIdx(0), - FramePointerOffset(0) {} + AArch64FunctionInfo() + : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false), + NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0), + VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) {} + + explicit AArch64FunctionInfo(MachineFunction &MF) + : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false), + NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0), + VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) { + (void)MF; + } unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; } - void setBytesInStackArgArea (unsigned bytes) { BytesInStackArgArea = bytes;} + void setBytesInStackArgArea(unsigned bytes) { BytesInStackArgArea = bytes; } unsigned getArgumentStackToRestore() const { return ArgumentStackToRestore; } void setArgumentStackToRestore(unsigned bytes) { ArgumentStackToRestore = bytes; } - unsigned getInitialStackAdjust() const { return InitialStackAdjust; } - void setInitialStackAdjust(unsigned bytes) { InitialStackAdjust = bytes; } + bool hasStackFrame() const { return HasStackFrame; } + void setHasStackFrame(bool s) { HasStackFrame = s; } - unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } - void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } + void setLocalStackSize(unsigned Size) { LocalStackSize = Size; } + unsigned getLocalStackSize() const { return LocalStackSize; } - int getVariadicGPRIdx() const { return VariadicGPRIdx; } - void setVariadicGPRIdx(int Idx) { VariadicGPRIdx = Idx; } + void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; } + unsigned getNumLocalDynamicTLSAccesses() const { + return NumLocalDynamicTLSAccesses; + } - unsigned getVariadicGPRSize() const { return VariadicGPRSize; } - void setVariadicGPRSize(unsigned Size) { VariadicGPRSize = Size; } + int getVarArgsStackIndex() const { return VarArgsStackIndex; } + void setVarArgsStackIndex(int Index) { VarArgsStackIndex = Index; } - int getVariadicFPRIdx() const { return VariadicFPRIdx; } - void setVariadicFPRIdx(int Idx) { VariadicFPRIdx = Idx; } + int getVarArgsGPRIndex() const { return VarArgsGPRIndex; } + void setVarArgsGPRIndex(int Index) { VarArgsGPRIndex = Index; } - unsigned getVariadicFPRSize() const { return VariadicFPRSize; } - void setVariadicFPRSize(unsigned Size) { VariadicFPRSize = Size; } + unsigned getVarArgsGPRSize() const { return VarArgsGPRSize; } + void setVarArgsGPRSize(unsigned Size) { VarArgsGPRSize = Size; } - int getVariadicStackIdx() const { return VariadicStackIdx; } - void setVariadicStackIdx(int Idx) { VariadicStackIdx = Idx; } + int getVarArgsFPRIndex() const { return VarArgsFPRIndex; } + void setVarArgsFPRIndex(int Index) { VarArgsFPRIndex = Index; } - int getFramePointerOffset() const { return FramePointerOffset; } - void setFramePointerOffset(int Idx) { FramePointerOffset = Idx; } + unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; } + void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; } -}; + typedef SmallPtrSet SetOfInstructions; + + const SetOfInstructions &getLOHRelated() const { return LOHRelated; } + + // Shortcuts for LOH related types. + class MILOHDirective { + MCLOHType Kind; + /// Arguments of this directive. Order matters. + SmallVector Args; + + public: + typedef SmallVectorImpl LOHArgs; + + MILOHDirective(MCLOHType Kind, const LOHArgs &Args) + : Kind(Kind), Args(Args.begin(), Args.end()) { + assert(isValidMCLOHType(Kind) && "Invalid LOH directive type!"); + } + + MCLOHType getKind() const { return Kind; } + const LOHArgs &getArgs() const { return Args; } + }; + + typedef MILOHDirective::LOHArgs MILOHArgs; + typedef SmallVector MILOHContainer; + + const MILOHContainer &getLOHContainer() const { return LOHContainerSet; } + + /// Add a LOH directive of this @p Kind and this @p Args. + void addLOHDirective(MCLOHType Kind, const MILOHArgs &Args) { + LOHContainerSet.push_back(MILOHDirective(Kind, Args)); + LOHRelated.insert(Args.begin(), Args.end()); + } + +private: + // Hold the lists of LOHs. + MILOHContainer LOHContainerSet; + SetOfInstructions LOHRelated; +}; } // End llvm namespace -#endif +#endif // AArch64MACHINEFUNCTIONINFO_H diff --git a/lib/Target/AArch64/AArch64PerfectShuffle.h b/lib/Target/AArch64/AArch64PerfectShuffle.h new file mode 100644 index 0000000..b22fa24 --- /dev/null +++ b/lib/Target/AArch64/AArch64PerfectShuffle.h @@ -0,0 +1,6586 @@ +//===-- AArch64PerfectShuffle.h - AdvSIMD Perfect Shuffle Table -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file, which was autogenerated by llvm-PerfectShuffle, contains data +// for the optimal way to build a perfect shuffle using AdvSIMD instructions. +// +//===----------------------------------------------------------------------===// + +// 31 entries have cost 0 +// 242 entries have cost 1 +// 1447 entries have cost 2 +// 3602 entries have cost 3 +// 1237 entries have cost 4 +// 2 entries have cost 5 + +// This table is 6561*4 = 26244 bytes in size. +static const unsigned PerfectShuffleTable[6561+1] = { + 135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS + 1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS + 2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0> + 2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0> + 1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS + 2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3> + 2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3> + 2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0> + 135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS + 2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0> + 1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS + 1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS + 2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0> + 2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5> + 2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7> + 3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1> + 2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1> + 1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS + 3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0> + 3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1> + 1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS + 2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0> + 3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6> + 3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6> + 2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7> + 2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2> + 1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS + 2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0> + 2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0> + 2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0> + 2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3> + 2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6> + 3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6> + 3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7> + 3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0> + 2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0> + 2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1> + 2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS + 3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS + 3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4> + 3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6> + 1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS + 2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS + 3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5> + 1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS + 2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7> + 2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3> + 3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7> + 3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5> + 2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6> + 2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5> + 2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7> + 2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7> + 2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7> + 2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7> + 3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS + 2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3> + 3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7> + 3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS + 3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0> + 2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6> + 2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0> + 2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0> + 2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0> + 3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0> + 3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7> + 3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0> + 2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6> + 3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0> + 3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7> + 2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7> + 2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0> + 135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS + 1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS + 1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS + 2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u> + 1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS + 1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS + 2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS + 2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u> + 135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS + 2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1> + 1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS + 2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1> + 2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0> + 2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5> + 4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7> + 2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1> + 2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0> + 1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS + 1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1> + 2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1> + 2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0> + 2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3> + 1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS + 2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7> + 2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3> + 2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1> + 1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS + 1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS + 3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1> + 2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2> + 835584U, // <0,1,2,3>: Cost 0 copy LHS + 1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS + 3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7> + 2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7> + 1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2> + 835584U, // <0,1,2,u>: Cost 0 copy LHS + 2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0> + 2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3> + 2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0> + 2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0> + 2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS + 2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7> + 2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0> + 2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1> + 2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3> + 2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS + 4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1> + 2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1> + 2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4> + 2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS + 1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS + 2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS + 2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4> + 1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS + 3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1> + 2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1> + 3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0> + 2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7> + 2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6> + 2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1> + 2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1> + 2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1> + 2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7> + 2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS + 3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7> + 2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1> + 3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7> + 2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS + 3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7> + 2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1> + 1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1> + 1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1> + 2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0> + 2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1> + 2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0> + 2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1> + 2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6> + 3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0> + 2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0> + 2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7> + 2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2> + 1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS + 1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS + 2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS + 835584U, // <0,1,u,3>: Cost 0 copy LHS + 1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS + 1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS + 2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS + 1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u> + 835584U, // <0,1,u,u>: Cost 0 copy LHS + 2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0> + 1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS + 1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS + 2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0> + 2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6> + 2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7> + 2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7> + 2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0> + 1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS + 2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2> + 2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1> + 2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2> + 2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2> + 2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS + 2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7> + 2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7> + 3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7> + 2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2> + 1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2> + 2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2> + 2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2> + 3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3> + 1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS + 2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3> + 2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7> + 2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2> + 1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS + 2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2> + 2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2> + 3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3> + 2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3> + 2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6> + 2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6> + 3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3> + 2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0> + 2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2> + 2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS + 4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3> + 2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4> + 2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4> + 2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS + 1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS + 1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS + 2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4> + 1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS + 2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7> + 2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3> + 3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7> + 3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6> + 2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6> + 2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5> + 2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0> + 2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS + 2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS + 2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1> + 3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2> + 2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3> + 2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7> + 2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5> + 3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6> + 2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6> + 2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2> + 2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7> + 2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2> + 2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2> + 3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2> + 2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0> + 2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6> + 2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2> + 3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2> + 2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7> + 2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2> + 1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u> + 1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS + 1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS + 3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3> + 1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS + 1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS + 1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS + 2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS + 1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS + 2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0> + 2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2> + 2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0> + 4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3> + 2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS + 4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6> + 3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7> + 3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0> + 2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS + 2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2> + 2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1> + 2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3> + 2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3> + 2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6> + 4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6> + 3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1> + 3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3> + 2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2> + 1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS + 1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2> + 2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2> + 3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3> + 1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS + 3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6> + 2598154746U, // <0,3,2,6>: Cost 3 vext1 , <6,2,7,3> + 2598155258U, // <0,3,2,7>: Cost 3 vext1 , <7,0,1,2> + 1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS + 3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2> + 2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3> + 3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3> + 2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3> + 3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6> + 3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6> + 3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7> + 3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7> + 2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3> + 3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2> + 2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4> + 2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4> + 4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3> + 3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6> + 2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6> + 3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS + 3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4> + 2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6> + 3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS + 3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2> + 4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2> + 3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7> + 2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5> + 3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7> + 2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7> + 2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0> + 2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5> + 2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7> + 3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3> + 3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7> + 3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7> + 3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7> + 3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6> + 3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6> + 2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0> + 2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0> + 3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1> + 2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3> + 3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7> + 3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7> + 3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5> + 3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7> + 3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7> + 3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0> + 2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3> + 1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS + 1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u> + 2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2> + 3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3> + 1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS + 3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6> + 2598203898U, // <0,3,u,6>: Cost 3 vext1 , <6,2,7,3> + 2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0> + 1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS + 2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4> + 2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS + 2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4> + 3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0> + 2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6> + 2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1> + 3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS + 3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0> + 3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS + 2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS + 3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1> + 3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0> + 2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1> + 2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS + 1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS + 3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS + 2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1> + 1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS + 2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS + 2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2> + 3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2> + 2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4> + 2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS + 2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS + 1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS + 2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2> + 1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS + 3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2> + 3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2> + 3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4> + 3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3> + 3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6> + 3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6> + 4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS + 3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4> + 3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2> + 2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4> + 3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0> + 3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3> + 3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4> + 2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4> + 2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS + 2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS + 3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4> + 2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS + 2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS + 2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0> + 3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5> + 2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5> + 2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS + 2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6> + 1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS + 2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5> + 1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS + 2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6> + 3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2> + 2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6> + 3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0> + 2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6> + 3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6> + 2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0> + 2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4> + 2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2> + 3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS + 3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1> + 3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4> + 3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4> + 3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS + 2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0> + 2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0> + 3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2> + 2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0> + 2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS + 2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS + 2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS + 2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u> + 2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS + 1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS + 1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS + 2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u> + 1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS + 3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0> + 2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS + 3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2> + 3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5> + 3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS + 3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1> + 2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1> + 3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS + 2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS + 1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS + 2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3> + 2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2> + 2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2> + 1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1> + 2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5> + 2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0> + 2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3> + 1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS + 2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS + 2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2> + 3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7> + 2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2> + 2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS + 3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5> + 4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6> + 3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS + 3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS + 3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2> + 3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3> + 3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1> + 3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3> + 2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5> + 3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0> + 3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7> + 2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0> + 2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0> + 2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1> + 3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4> + 3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4> + 3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5> + 3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6> + 2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS + 3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5> + 2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6> + 2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS + 3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0> + 3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0> + 3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5> + 3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0> + 3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5> + 2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5> + 2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0> + 2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7> + 2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7> + 2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS + 3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0> + 3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3> + 3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4> + 2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS + 2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0> + 3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7> + 1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0> + 1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0> + 2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS + 2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0> + 2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7> + 3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2> + 2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS + 2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7> + 2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0> + 2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0> + 2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS + 1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS + 2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS + 2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0> + 2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u> + 1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u> + 2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS + 2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u> + 1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0> + 1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0> + 2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS + 2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS + 2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2> + 3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4> + 2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6> + 2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0> + 2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0> + 2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS + 2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS + 2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS + 3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1> + 2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3> + 3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3> + 2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS + 2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1> + 2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6> + 2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS + 2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS + 1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS + 2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2> + 2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2> + 2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1> + 1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2> + 2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3> + 3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6> + 2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS + 1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS + 3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2> + 3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7> + 3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0> + 3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3> + 2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6> + 3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7> + 3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0> + 2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0> + 2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0> + 2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS + 4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2> + 2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4> + 3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2> + 2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS + 2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS + 2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0> + 4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS + 2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS + 2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7> + 3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0> + 3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7> + 3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0> + 3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5> + 3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0> + 3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7> + 4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS + 2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7> + 3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0> + 3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3> + 3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6> + 3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0> + 3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4> + 3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6> + 2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6> + 2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7> + 2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7> + 2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1> + 2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0> + 3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7> + 2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0> + 2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5> + 2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6> + 3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2> + 2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0> + 2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1> + 1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS + 2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS + 2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u> + 2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0> + 1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u> + 2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS + 2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0> + 2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS + 1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS + 2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0> + 2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS + 2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0> + 3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0> + 2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5> + 2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6> + 2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7> + 3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7> + 2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS + 2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1> + 3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1> + 2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0> + 3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5> + 2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1> + 3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3> + 3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7> + 2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7> + 2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1> + 2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS + 1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2> + 3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2> + 2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0> + 2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS + 2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7> + 2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2> + 3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7> + 1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2> + 3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2> + 3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3> + 3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3> + 3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3> + 3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6> + 3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7> + 2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7> + 3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0> + 2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7> + 3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS + 2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4> + 2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4> + 3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7> + 3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6> + 2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS + 3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7> + 3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5> + 2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS + 3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0> + 3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7> + 3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5> + 3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7> + 2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5> + 3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7> + 2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7> + 2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0> + 2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7> + 3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0> + 2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6> + 3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7> + 3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7> + 3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS + 2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7> + 3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6> + 2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0> + 2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7> + 3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1> + 3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0> + 3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7> + 3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0> + 3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS + 3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7> + 3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7> + 2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7> + 2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7> + 2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u> + 1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u> + 2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u> + 2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0> + 2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u> + 2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS + 2669066421U, // <0,7,u,6>: Cost 3 vext2 , + 2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0> + 1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u> + 135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS + 1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS + 1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS + 2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, + 1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS + 2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, + 3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS + 2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0> + 135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS + 1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1> + 1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS + 1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS + 2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2> + 1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS + 1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS + 2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, + 2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS + 1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS + 1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS + 1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2> + 1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS + 835584U, // <0,u,2,3>: Cost 0 copy LHS + 1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS + 3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, + 1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS + 1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2> + 835584U, // <0,u,2,u>: Cost 0 copy LHS + 2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2> + 2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2> + 2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u> + 2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3> + 2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6> + 2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6> + 2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7> + 2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u> + 2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2> + 2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS + 2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS + 3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS + 2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4> + 2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS + 1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS + 1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS + 2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, + 1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS + 2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS + 2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0> + 3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, + 2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, + 2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS + 2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u> + 1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS + 2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS + 1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS + 2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS + 2262496983U, // <0,u,6,1>: Cost 3 vrev + 2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u> + 2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, + 2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS + 2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u> + 2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u> + 1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u> + 1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u> + 2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS + 2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0> + 2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7> + 2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u> + 2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS + 2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, + 2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7> + 2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7> + 2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS + 135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS + 1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS + 1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS + 835584U, // <0,u,u,3>: Cost 0 copy LHS + 1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS + 1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS + 1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS + 1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u> + 835584U, // <0,u,u,u>: Cost 0 copy LHS + 2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0> + 1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1> + 2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2> + 2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0> + 2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1> + 2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0> + 3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7> + 3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0> + 1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1> + 2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS + 2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1> + 1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS + 3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3> + 2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS + 2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1> + 2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7> + 3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2> + 1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS + 2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1> + 2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1> + 2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0> + 2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1> + 2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6> + 3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7> + 2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0> + 2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2> + 2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1> + 3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0> + 3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1> + 67944550U, // <1,0,3,2>: Cost 1 vrev LHS + 2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3> + 2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS + 4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7> + 3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7> + 2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3> + 68386972U, // <1,0,3,u>: Cost 1 vrev LHS + 2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1> + 2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5> + 2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6> + 3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1> + 3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1> + 2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS + 3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1> + 3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4> + 2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS + 4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0> + 2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS + 3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS + 3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5> + 2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5> + 3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0> + 3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0> + 3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS + 3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS + 3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1> + 2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7> + 3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7> + 3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6> + 3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1> + 3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0> + 3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0> + 2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0> + 2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0> + 2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0> + 4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1> + 2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7> + 3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0> + 3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6> + 3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0> + 2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0> + 3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7> + 2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0> + 3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0> + 1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1> + 67985515U, // <1,0,u,2>: Cost 1 vrev LHS + 2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1> + 2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6> + 2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS + 2669082807U, // <1,0,u,6>: Cost 3 vext2 , + 2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u> + 68427937U, // <1,0,u,u>: Cost 1 vrev LHS + 1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1> + 1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS + 2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1> + 2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2> + 2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5> + 2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1> + 3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7> + 3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0> + 1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1> + 1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS + 202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS + 2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0> + 2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3> + 1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS + 2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7> + 2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7> + 2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1> + 202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS + 2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2> + 2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1> + 2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2> + 2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1> + 2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS + 3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7> + 2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7> + 3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0> + 2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1> + 2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2> + 3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1> + 4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2> + 2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS + 2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6> + 2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7> + 3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7> + 2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3> + 2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS + 2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS + 2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4> + 3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0> + 3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5> + 2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS + 1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS + 2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS + 3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4> + 1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS + 2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1> + 2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3> + 4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2> + 2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7> + 2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5> + 2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5> + 2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0> + 2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS + 2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7> + 3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2> + 2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7> + 2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3> + 3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7> + 3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6> + 4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5> + 2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6> + 2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0> + 2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0> + 2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1> + 2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1> + 3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3> + 4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS + 2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6> + 3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7> + 3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0> + 2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7> + 2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1> + 1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS + 202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS + 2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, + 2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS + 1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS + 1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS + 2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, + 2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS + 202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS + 2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0> + 1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS + 2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2> + 2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1> + 2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5> + 3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7> + 2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2> + 2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1> + 1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS + 2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2> + 2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1> + 2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0> + 2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS + 2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS + 2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7> + 2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7> + 3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0> + 2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS + 2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2> + 2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2> + 2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2> + 2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3> + 3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5> + 3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7> + 2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7> + 2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1> + 2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3> + 403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS + 1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2> + 1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2> + 1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3> + 403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS + 1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3> + 1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3> + 1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2> + 403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS + 2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2> + 3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6> + 3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4> + 2598759198U, // <1,2,4,3>: Cost 3 vext1 , <3,u,1,2> + 2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4> + 1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS + 2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6> + 2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0> + 1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS + 2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS + 2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7> + 3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2> + 2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS + 2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS + 2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5> + 2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0> + 2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7> + 2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS + 2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1> + 2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2> + 2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3> + 2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7> + 3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5> + 3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7> + 2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6> + 2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2> + 2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7> + 1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2> + 2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2> + 3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3> + 2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1> + 2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6> + 3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0> + 3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1> + 2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1> + 1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2> + 403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS + 1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2> + 1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2> + 1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2> + 403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS + 1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS + 1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3> + 1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2> + 403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS + 2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0> + 1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS + 2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2> + 2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3> + 2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5> + 3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6> + 3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7> + 3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1> + 1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS + 2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2> + 2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1> + 2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3> + 1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS + 2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS + 2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7> + 3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7> + 2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1> + 1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS + 2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0> + 2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3> + 2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2> + 2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3> + 2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4> + 3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5> + 2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6> + 3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3> + 2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u> + 1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS + 1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3> + 2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2> + 2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3> + 1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS + 2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5> + 2598826490U, // <1,3,3,6>: Cost 3 vext1 , <6,2,7,3> + 3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7> + 1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS + 2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS + 2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3> + 3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3> + 2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4> + 2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS + 1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS + 2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6> + 3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4> + 1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS + 2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS + 2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7> + 2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5> + 2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5> + 2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS + 2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5> + 2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4> + 1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS + 1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS + 2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0> + 3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1> + 2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3> + 3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7> + 2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3> + 3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, + 2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3> + 2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7> + 2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u> + 2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1> + 2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3> + 3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7> + 2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3> + 2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6> + 2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5> + 3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1> + 2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7> + 2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1> + 1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS + 1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u> + 2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2> + 1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS + 1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS + 1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS + 2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6> + 1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS + 1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS + 2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4> + 2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS + 3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4> + 2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4> + 2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5> + 1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1> + 2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2> + 3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1> + 1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1> + 3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2> + 2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4> + 3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0> + 3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6> + 3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1> + 2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS + 3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS + 3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0> + 2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS + 3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2> + 3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3> + 3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2> + 2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1> + 3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4> + 2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS + 3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7> + 3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2> + 2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS + 2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS + 2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4> + 2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3> + 3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3> + 2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6> + 3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5> + 3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6> + 3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3> + 3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u> + 2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1> + 3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4> + 3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4> + 3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4> + 2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4> + 2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS + 2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6> + 3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1> + 2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS + 2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS + 2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5> + 2598913774U, // <1,4,5,2>: Cost 3 vext1 , <2,3,u,1> + 3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2> + 2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS + 2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS + 1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS + 3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS + 1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS + 2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS + 2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1> + 3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2> + 2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2> + 2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS + 3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7> + 3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7> + 2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1> + 2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS + 2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4> + 3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1> + 3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4> + 3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4> + 3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS + 3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0> + 2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1> + 3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2> + 2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4> + 2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS + 2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS + 2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u> + 2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1> + 2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, + 1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1> + 1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS + 2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1> + 1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS + 2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0> + 1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS + 2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5> + 3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4> + 1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5> + 2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5> + 2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1> + 3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1> + 1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS + 2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2> + 2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1> + 2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0> + 2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7> + 2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5> + 2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1> + 2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7> + 3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS + 2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3> + 3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1> + 2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5> + 2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2> + 2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1> + 3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5> + 3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7> + 2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7> + 4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS + 2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1> + 2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2> + 2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7> + 3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2> + 2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3> + 2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6> + 3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5> + 4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6> + 2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS + 2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS + 2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1> + 2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0> + 3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3> + 3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4> + 2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4> + 1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS + 2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS + 2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6> + 1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS + 2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS + 2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5> + 3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5> + 4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3> + 2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS + 2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5> + 2667794530U, // <1,5,5,6>: Cost 3 vext2 , <5,6,7,0> + 2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7> + 2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS + 2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1> + 2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5> + 2667794938U, // <1,5,6,2>: Cost 3 vext2 , <6,2,7,3> + 3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4> + 3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6> + 3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6> + 2667795256U, // <1,5,6,6>: Cost 3 vext2 , <6,6,6,6> + 2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0> + 2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0> + 2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS + 2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1> + 2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1> + 2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7> + 2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS + 2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7> + 3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0> + 2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1> + 2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS + 2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, + 1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS + 2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, + 2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, + 1594054682U, // <1,5,u,4>: Cost 2 vext2 , + 1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS + 2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, + 2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS + 2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS + 3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0> + 2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS + 2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6> + 3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1> + 2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5> + 2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6> + 2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6> + 4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS + 2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS + 3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2> + 2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1> + 3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0> + 3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3> + 2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6> + 3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7> + 3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1> + 2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS + 2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS + 2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1> + 3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0> + 3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2> + 3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1> + 3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS + 3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7> + 2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7> + 2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS + 2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS + 2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS + 3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1> + 2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3> + 3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3> + 2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS + 4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, + 2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3> + 3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7> + 3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u> + 3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1> + 3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0> + 3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4> + 3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4> + 3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5> + 2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS + 3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS + 4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS + 2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS + 3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2> + 3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5> + 3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5> + 3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5> + 3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6> + 3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5> + 2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0> + 2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS + 2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS + 2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1> + 3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6> + 3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7> + 3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0> + 3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS + 3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7> + 2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6> + 2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7> + 2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7> + 1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1> + 2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1> + 2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3> + 3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7> + 2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5> + 2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1> + 2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7> + 4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS + 1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1> + 1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1> + 2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS + 3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2> + 3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, + 2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5> + 2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS + 2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, + 3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7> + 1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1> + 2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1> + 2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS + 3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1> + 3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1> + 2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1> + 3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0> + 2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7> + 2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7> + 2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS + 2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS + 2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1> + 3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0> + 2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1> + 2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS + 2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7> + 2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7> + 2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1> + 2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7> + 2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS + 3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7> + 3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2> + 2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1> + 2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS + 3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7> + 2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2> + 3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2> + 2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS + 1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS + 2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7> + 2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2> + 2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3> + 1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS + 1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3> + 2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3> + 2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2> + 1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS + 2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1> + 3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1> + 3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4> + 3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4> + 3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4> + 2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS + 3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0> + 3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6> + 2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS + 2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS + 2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7> + 3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2> + 2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5> + 2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS + 4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6> + 2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7> + 3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS + 2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS + 2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1> + 2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0> + 3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7> + 3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6> + 3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS + 3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6> + 3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6> + 2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0> + 2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0> + 2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1> + 3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1> + 3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1> + 3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0> + 3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS + 2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7> + 3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7> + 2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7> + 2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1> + 1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS + 2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS + 2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2> + 2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u> + 1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS + 1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u> + 2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3> + 2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2> + 1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS + 1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u> + 1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS + 2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2> + 2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u> + 1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u> + 1658771190U, // <1,u,0,5>: Cost 2 vext3 , + 2736789248U, // <1,u,0,6>: Cost 3 vext3 , + 2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1> + 1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS + 1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS + 202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS + 1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS + 1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS + 1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS + 2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7> + 3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS + 2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS + 202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS + 2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0> + 2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS + 2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2> + 2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3> + 2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4> + 2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS + 2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6> + 2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS + 2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u> + 403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS + 1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2> + 115726126U, // <1,u,3,2>: Cost 1 vrev LHS + 2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS + 403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS + 1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3> + 1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3> + 2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS + 403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS + 2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1> + 2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0> + 2263217967U, // <1,u,4,2>: Cost 3 vrev + 2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4> + 2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS + 1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS + 2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6> + 2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, + 1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS + 2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS + 2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7> + 3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS + 2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS + 2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS + 2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS + 1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS + 1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS + 1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS + 2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u> + 2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, + 2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3> + 2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, + 2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4> + 3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, + 2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u> + 2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7> + 2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u> + 1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u> + 2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1> + 2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1> + 2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7> + 2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS + 2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u> + 2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, + 2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7> + 1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u> + 403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS + 202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS + 115767091U, // <1,u,u,2>: Cost 1 vrev LHS + 1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS + 403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS + 1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS + 1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS + 1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS + 403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS + 2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0> + 2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1> + 1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2> + 2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0> + 2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS + 3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5> + 2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0> + 4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7> + 1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2> + 2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1> + 2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0> + 1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS + 2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS + 2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS + 3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7> + 2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1> + 3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2> + 1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS + 1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS + 2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2> + 2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0> + 2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2> + 1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS + 2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3> + 2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2> + 2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2> + 1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS + 2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0> + 2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1> + 2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2> + 4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3> + 3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS + 3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5> + 4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6> + 3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7> + 2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u> + 2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS + 2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5> + 2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6> + 3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2> + 2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS + 2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS + 2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS + 3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5> + 2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6> + 3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS + 3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5> + 2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7> + 3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5> + 2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5> + 3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5> + 3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0> + 2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS + 2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5> + 4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0> + 2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS + 2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7> + 3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6> + 2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6> + 3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0> + 3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6> + 3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7> + 2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6> + 2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0> + 3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3> + 2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1> + 3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0> + 3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6> + 3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2> + 3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0> + 3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7> + 2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0> + 1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS + 2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1> + 1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS + 2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2> + 1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS + 2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS + 2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS + 2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS + 1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS + 2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS + 2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS + 3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0> + 1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2> + 2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS + 2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2> + 3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0> + 2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1> + 1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2> + 2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS + 2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1> + 2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0> + 2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3> + 2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS + 3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7> + 3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1> + 3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2> + 2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3> + 2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS + 3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1> + 2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2> + 2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0> + 2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS + 3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3> + 3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7> + 2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0> + 2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0> + 2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS + 2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1> + 2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2> + 2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3> + 2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS + 2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5> + 2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3> + 4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7> + 2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u> + 2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5> + 3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6> + 3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5> + 1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4> + 3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS + 2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS + 3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS + 3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6> + 1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4> + 2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS + 2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7> + 3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1> + 2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7> + 2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS + 3740913668U, // <2,1,5,5>: Cost 4 vext2 , <5,5,5,5> + 3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5> + 3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS + 2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7> + 2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS + 3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2> + 2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7> + 3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS + 2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS + 4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5> + 3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6> + 3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1> + 2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS + 2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2> + 3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1> + 3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0> + 2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7> + 3740915046U, // <2,1,7,4>: Cost 4 vext2 , <7,4,5,6> + 3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7> + 3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1> + 3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u> + 2669827714U, // <2,1,7,u>: Cost 3 vext2 , <7,u,1,2> + 2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS + 2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1> + 2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2> + 1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u> + 2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS + 2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5> + 2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3> + 2669828370U, // <2,1,u,7>: Cost 3 vext2 , + 1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u> + 1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2> + 1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS + 2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0> + 2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2> + 2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS + 3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7> + 3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6> + 3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2> + 1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2> + 2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2> + 2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1> + 2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0> + 2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS + 3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS + 2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7> + 3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3> + 3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1> + 2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS + 1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS + 2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2> + 269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS + 2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3> + 1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS + 2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7> + 2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7> + 2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2> + 269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS + 2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1> + 2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0> + 2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2> + 1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS + 2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5> + 3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5> + 2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6> + 2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3> + 1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS + 2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS + 3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2> + 2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5> + 3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5> + 2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS + 1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS + 2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS + 3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4> + 1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS + 3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2> + 2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3> + 2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7> + 4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS + 2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5> + 2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5> + 2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0> + 2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS + 2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS + 3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6> + 3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3> + 2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3> + 2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7> + 3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS + 3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7> + 3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6> + 2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2> + 2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7> + 2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1> + 3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5> + 2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2> + 4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS + 2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6> + 3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7> + 3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7> + 2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7> + 2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1> + 1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS + 1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS + 269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS + 1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS + 1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS + 1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS + 2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6> + 2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS + 269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS + 1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0> + 470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS + 1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2> + 2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3> + 1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5> + 2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7> + 2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7> + 2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0> + 470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS + 1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2> + 1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1> + 1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0> + 1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3> + 2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS + 1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7> + 2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7> + 2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0> + 1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3> + 2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2> + 2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1> + 1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2> + 1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1> + 2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5> + 2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7> + 1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7> + 2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1> + 1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1> + 1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2> + 2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3> + 1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3> + 1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3> + 1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6> + 2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7> + 2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3> + 2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7> + 1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2> + 1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS + 1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4> + 2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0> + 2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2> + 1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS + 470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS + 1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6> + 2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4> + 470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS + 2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS + 1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3> + 2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4> + 2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6> + 1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6> + 1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5> + 1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0> + 1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7> + 1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7> + 2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2> + 2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6> + 1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3> + 2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5> + 2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6> + 2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7> + 1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6> + 1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1> + 1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1> + 1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2> + 2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3> + 2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3> + 2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1> + 1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6> + 2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2> + 2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1> + 1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7> + 1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2> + 1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, + 470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS + 1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, + 1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, + 1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, + 470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS + 1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, + 1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, + 470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS + 2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4> + 2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS + 2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4> + 2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4> + 2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2> + 2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1> + 1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2> + 3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1> + 1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2> + 2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2> + 3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1> + 2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4> + 2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS + 3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4> + 3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7> + 3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3> + 3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1> + 2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS + 2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4> + 3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3> + 2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4> + 2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5> + 2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4> + 2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS + 3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS + 3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0> + 2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS + 2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4> + 4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1> + 3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4> + 3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3> + 3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4> + 2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5> + 2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6> + 3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7> + 2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u> + 2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2> + 3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1> + 3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2> + 3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3> + 2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4> + 2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS + 2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS + 4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7> + 2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS + 2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5> + 2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2> + 2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5> + 2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5> + 2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS + 3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5> + 1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS + 2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS + 1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS + 1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS + 2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2> + 2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4> + 2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2> + 1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS + 2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS + 2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6> + 2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1> + 1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS + 2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4> + 3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2> + 3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4> + 3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4> + 3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6> + 2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0> + 2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1> + 3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7> + 2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4> + 1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS + 2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS + 2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2> + 2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2> + 1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS + 2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5> + 1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS + 2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS + 1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS + 3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0> + 2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS + 2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2> + 2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5> + 2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5> + 2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5> + 3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7> + 3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS + 2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS + 3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2> + 3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1> + 2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0> + 2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7> + 2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5> + 3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7> + 3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2> + 2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3> + 2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5> + 3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2> + 4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1> + 2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2> + 3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1> + 2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5> + 3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1> + 3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7> + 3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS + 3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS + 2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS + 2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3> + 2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5> + 3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3> + 2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6> + 2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3> + 2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6> + 2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7> + 2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u> + 2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS + 3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4> + 2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5> + 2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4> + 2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS + 2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS + 3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6> + 3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS + 2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS + 2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS + 3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7> + 3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5> + 3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5> + 2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS + 2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5> + 4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6> + 2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7> + 2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7> + 2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS + 3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6> + 2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3> + 3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6> + 2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5> + 3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7> + 4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6> + 3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS + 3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS + 2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS + 2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2> + 2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7> + 3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2> + 2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS + 2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7> + 2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2> + 4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS + 2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS + 2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS + 2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS + 2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u> + 2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u> + 2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, + 2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS + 2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6> + 2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7> + 2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS + 2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0> + 1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS + 2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2> + 3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0> + 1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6> + 3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6> + 2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1> + 2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2> + 1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS + 2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2> + 2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1> + 2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0> + 2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS + 2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6> + 2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7> + 3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7> + 2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2> + 2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS + 2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1> + 2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3> + 2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2> + 2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1> + 2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6> + 2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7> + 2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6> + 2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS + 2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1> + 2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2> + 3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1> + 2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7> + 2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3> + 2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6> + 3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5> + 3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6> + 1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS + 1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS + 2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2> + 2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3> + 2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0> + 3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4> + 2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4> + 1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS + 2667875700U, // <2,6,4,6>: Cost 3 vext2 , <4,6,4,6> + 4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS + 1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS + 3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2> + 2667876048U, // <2,6,5,1>: Cost 3 vext2 , <5,1,7,3> + 2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7> + 3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5> + 2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5> + 2667876356U, // <2,6,5,5>: Cost 3 vext2 , <5,5,5,5> + 2667876450U, // <2,6,5,6>: Cost 3 vext2 , <5,6,7,0> + 2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS + 2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS + 2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS + 3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1> + 2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6> + 3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3> + 2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS + 3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5> + 2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6> + 2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS + 2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS + 2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1> + 2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2> + 2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2> + 2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0> + 2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5> + 3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7> + 2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2> + 2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0> + 2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1> + 2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, + 1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS + 2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, + 2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS + 1594136612U, // <2,6,u,4>: Cost 2 vext2 , + 1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS + 2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, + 1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS + 1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS + 2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2> + 1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2> + 2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2> + 2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0> + 2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2> + 2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7> + 2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7> + 2599760953U, // <2,7,0,7>: Cost 3 vext1 , <7,0,u,2> + 1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2> + 2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2> + 3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1> + 3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0> + 3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7> + 3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5> + 3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7> + 2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7> + 3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0> + 2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2> + 3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2> + 3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7> + 2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2> + 3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1> + 3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6> + 2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7> + 2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7> + 3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1> + 2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7> + 1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS + 2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7> + 2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2> + 2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3> + 1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS + 2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3> + 1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3> + 2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2> + 1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS + 2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6> + 3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4> + 3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7> + 2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4> + 3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4> + 2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS + 2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u> + 3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0> + 2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS + 2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2> + 3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7> + 3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7> + 3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5> + 2599800118U, // <2,7,5,4>: Cost 3 vext1 , RHS + 3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5> + 3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7> + 2599802214U, // <2,7,5,7>: Cost 3 vext1 , <7,4,5,6> + 2599802670U, // <2,7,5,u>: Cost 3 vext1 , LHS + 2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS + 3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7> + 2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3> + 3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6> + 2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS + 2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6> + 2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6> + 3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0> + 2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS + 2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1> + 3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2> + 3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7> + 3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7> + 2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS + 3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5> + 2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7> + 2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7> + 2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1> + 1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS + 1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2> + 2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2> + 2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u> + 1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS + 2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS + 1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u> + 2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2> + 1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS + 1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0> + 470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS + 1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2> + 1658631909U, // <2,u,0,3>: Cost 2 vext3 , + 1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5> + 2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7> + 1658853120U, // <2,u,0,6>: Cost 2 vext3 , + 3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS + 470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS + 1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2> + 1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1> + 1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0> + 1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3> + 2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5> + 1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7> + 2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7> + 2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, + 1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3> + 1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS + 2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1> + 269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS + 1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1> + 1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS + 2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6> + 1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7> + 3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS + 269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS + 1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2> + 2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1> + 1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3> + 1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS + 1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6> + 2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5> + 1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3> + 1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS + 1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS + 1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS + 1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4> + 2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5> + 1190213513U, // <2,u,4,3>: Cost 2 vrev + 1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS + 470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS + 1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6> + 3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS + 470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS + 2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5> + 1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3> + 2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5> + 2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, + 1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6> + 1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5> + 1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS + 1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7> + 1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS + 1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS + 2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2> + 1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3> + 2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, + 1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS + 2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS + 1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6> + 1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1> + 1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS + 1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2> + 2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u> + 2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7> + 2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1> + 1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6> + 2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2> + 2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7> + 1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7> + 1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2> + 1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS + 470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS + 269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS + 1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS + 1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS + 470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS + 1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS + 1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS + 470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS + 1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0> + 1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1> + 1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2> + 3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1> + 2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1> + 3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7> + 3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1> + 3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0> + 1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2> + 1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS + 2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0> + 537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS + 2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3> + 1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS + 2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7> + 2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7> + 2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1> + 537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS + 1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2> + 2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1> + 2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0> + 2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1> + 1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6> + 2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7> + 2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7> + 2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7> + 1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2> + 2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2> + 2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0> + 2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1> + 2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3> + 2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6> + 3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7> + 2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7> + 3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1> + 2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3> + 2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4> + 1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5> + 1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6> + 3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1> + 2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6> + 1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS + 2666573172U, // <3,0,4,6>: Cost 3 vext2 , <4,6,4,6> + 3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4> + 1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6> + 2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7> + 2666573520U, // <3,0,5,1>: Cost 3 vext2 , <5,1,7,3> + 3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS + 3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6> + 2666573766U, // <3,0,5,4>: Cost 3 vext2 , <5,4,7,6> + 2666573828U, // <3,0,5,5>: Cost 3 vext2 , <5,5,5,5> + 2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7> + 2666573992U, // <3,0,5,7>: Cost 3 vext2 , <5,7,5,7> + 3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS + 2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7> + 2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7> + 2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7> + 3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2> + 3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1> + 3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0> + 2666574648U, // <3,0,6,6>: Cost 3 vext2 , <6,6,6,6> + 2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0> + 2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7> + 2666574842U, // <3,0,7,0>: Cost 3 vext2 , <7,0,1,2> + 2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7> + 2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0> + 3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7> + 2666575206U, // <3,0,7,4>: Cost 3 vext2 , <7,4,5,6> + 3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7> + 3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3> + 2666575468U, // <3,0,7,7>: Cost 3 vext2 , <7,7,7,7> + 2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0> + 1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2> + 1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1> + 537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS + 2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1> + 1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6> + 1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS + 2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, + 2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u> + 537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS + 2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS + 2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS + 2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1> + 1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2> + 2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS + 2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1> + 3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6> + 3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0> + 1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2> + 2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1> + 1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1> + 2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1> + 1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3> + 2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5> + 2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5> + 3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5> + 3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3> + 1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3> + 2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1> + 2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3> + 2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2> + 1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0> + 2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS + 2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3> + 3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7> + 2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0> + 1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0> + 1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS + 1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3> + 2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0> + 2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1> + 1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS + 1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7> + 2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7> + 2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3> + 1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3> + 2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS + 2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5> + 2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5> + 2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5> + 2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS + 2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS + 2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS + 3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4> + 2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5> + 2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1> + 2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7> + 2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5> + 1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7> + 2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5> + 3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7> + 3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0> + 2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS + 1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7> + 3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1> + 2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7> + 2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7> + 2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7> + 3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5> + 2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7> + 3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7> + 2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0> + 2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7> + 3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS + 2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1> + 4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2> + 3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS + 3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS + 2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7> + 3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7> + 3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7> + 3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS + 1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS + 1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3> + 2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0> + 1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0> + 1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS + 1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7> + 2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7> + 2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS + 1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3> + 2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0> + 1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS + 2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0> + 2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0> + 2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5> + 2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7> + 2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4> + 2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0> + 1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS + 1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2> + 2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1> + 2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0> + 2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1> + 2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS + 2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7> + 2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3> + 3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1> + 1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1> + 2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1> + 2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3> + 1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2> + 1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3> + 2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5> + 2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7> + 2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6> + 3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3> + 1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3> + 1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1> + 2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0> + 2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2> + 2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3> + 1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5> + 2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1> + 2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2> + 2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0> + 1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1> + 2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS + 2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4> + 2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4> + 2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5> + 2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS + 1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS + 2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0> + 2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4> + 1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS + 2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS + 2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5> + 2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7> + 2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6> + 1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5> + 2665263108U, // <3,2,5,5>: Cost 3 vext2 , <5,5,5,5> + 2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7> + 2665263272U, // <3,2,5,7>: Cost 3 vext2 , <5,7,5,7> + 1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5> + 2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1> + 2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3> + 2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6> + 1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7> + 2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5> + 2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7> + 2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7> + 2665263950U, // <3,2,6,7>: Cost 3 vext2 , <6,7,0,1> + 1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7> + 2665264122U, // <3,2,7,0>: Cost 3 vext2 , <7,0,1,2> + 2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3> + 4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2> + 2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS + 2665264486U, // <3,2,7,4>: Cost 3 vext2 , <7,4,5,6> + 2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7> + 2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7> + 2665264748U, // <3,2,7,7>: Cost 3 vext2 , <7,7,7,7> + 2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS + 1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1> + 1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS + 1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2> + 1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3> + 1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5> + 1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS + 2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0> + 2665265408U, // <3,2,u,7>: Cost 3 vext2 , + 1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1> + 2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0> + 1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2> + 2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0> + 2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2> + 2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1> + 2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2> + 3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2> + 4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7> + 1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2> + 2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3> + 1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3> + 2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3> + 2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1> + 2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS + 2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3> + 3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3> + 2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3> + 1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3> + 2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS + 2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3> + 1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3> + 2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0> + 2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS + 2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4> + 2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3> + 2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3> + 1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3> + 1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS + 2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3> + 2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3> + 336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS + 1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS + 2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5> + 2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7> + 2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3> + 336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS + 2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS + 2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4> + 2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4> + 2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6> + 2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4> + 1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6> + 2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS + 4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7> + 1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6> + 2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS + 2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5> + 2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5> + 2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5> + 2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS + 3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5> + 2665934946U, // <3,3,5,6>: Cost 3 vext2 , <5,6,7,0> + 2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS + 2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS + 2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7> + 3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7> + 2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7> + 2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7> + 2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7> + 3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7> + 2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6> + 2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3> + 2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3> + 2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS + 2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7> + 2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7> + 2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3> + 2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS + 2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7> + 3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3> + 3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7> + 2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS + 1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS + 1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2> + 1592858504U, // <3,3,u,2>: Cost 2 vext2 , + 336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS + 1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS + 1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6> + 2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3> + 2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS + 336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS + 2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0> + 1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS + 2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2> + 3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4> + 2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5> + 1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1> + 1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2> + 3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0> + 1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS + 2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2> + 2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1> + 1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4> + 2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3> + 2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS + 2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0> + 2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3> + 2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4> + 1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4> + 3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS + 2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3> + 2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2> + 2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1> + 2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3> + 2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3> + 2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0> + 2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4> + 2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0> + 2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2> + 2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4> + 2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4> + 2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3> + 2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1> + 2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS + 3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS + 3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1> + 2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2> + 2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS + 2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4> + 2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4> + 2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4> + 1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4> + 1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS + 1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6> + 3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4> + 1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS + 1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS + 2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3> + 1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5> + 2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2> + 1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS + 2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7> + 537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS + 2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5> + 537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS + 2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1> + 2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6> + 2666607098U, // <3,4,6,2>: Cost 3 vext2 , <6,2,7,3> + 2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6> + 1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6> + 2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7> + 2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7> + 2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4> + 1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2> + 2666607610U, // <3,4,7,0>: Cost 3 vext2 , <7,0,1,2> + 3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5> + 2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4> + 3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7> + 2666607974U, // <3,4,7,4>: Cost 3 vext2 , <7,4,5,6> + 2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0> + 2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0> + 2666608236U, // <3,4,7,7>: Cost 3 vext2 , <7,7,7,7> + 2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4> + 1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS + 1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS + 1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u> + 2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, + 1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS + 1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS + 537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS + 2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u> + 537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS + 3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0> + 2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS + 2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5> + 3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4> + 2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1> + 2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1> + 2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1> + 2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0> + 2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS + 2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS + 3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1> + 2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5> + 2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5> + 2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5> + 2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3> + 3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7> + 1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3> + 1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3> + 3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3> + 2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5> + 3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2> + 2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5> + 2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5> + 2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3> + 3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7> + 2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3> + 2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5> + 3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2> + 2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5> + 3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4> + 2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3> + 2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6> + 2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5> + 4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6> + 3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS + 3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS + 2600304742U, // <3,5,4,0>: Cost 3 vext1 , LHS + 3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5> + 2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4> + 3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0> + 2600308022U, // <3,5,4,4>: Cost 3 vext1 , RHS + 2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS + 2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS + 1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6> + 1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6> + 2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS + 2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3> + 2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5> + 2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5> + 2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS + 1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5> + 2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6> + 1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7> + 1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7> + 2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1> + 2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7> + 2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6> + 2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4> + 2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5> + 2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7> + 2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7> + 1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0> + 1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0> + 1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS + 1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7> + 2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2> + 2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2> + 1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS + 1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7> + 2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0> + 2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7> + 1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS + 1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS + 1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u> + 2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2> + 2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2> + 1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS + 1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7> + 2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS + 1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3> + 1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS + 2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS + 2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2> + 2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4> + 3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4> + 2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2> + 2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7> + 2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0> + 2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS + 2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2> + 3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3> + 3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1> + 2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6> + 3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1> + 2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6> + 3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3> + 2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3> + 2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3> + 2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6> + 2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS + 2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3> + 2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6> + 3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0> + 2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6> + 2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6> + 2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3> + 1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3> + 1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3> + 3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2> + 3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3> + 3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3> + 2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3> + 2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6> + 3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6> + 3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2> + 2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS + 2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS + 3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6> + 2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3> + 2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5> + 3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6> + 2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6> + 2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6> + 2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0> + 2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS + 2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6> + 2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS + 3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2> + 2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7> + 2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6> + 2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5> + 3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6> + 4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6> + 2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5> + 2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS + 2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1> + 2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3> + 2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3> + 3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6> + 2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4> + 2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7> + 1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6> + 1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7> + 1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7> + 1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1> + 2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7> + 2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7> + 2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3> + 1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5> + 2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1> + 2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2> + 2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS + 1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1> + 1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1> + 2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2> + 2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u> + 2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6> + 1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5> + 2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6> + 1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6> + 1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3> + 1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1> + 2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0> + 1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS + 2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2> + 3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0> + 2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5> + 2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0> + 2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0> + 2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1> + 1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS + 2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2> + 2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1> + 2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0> + 2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7> + 2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS + 1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7> + 2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7> + 2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3> + 1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7> + 2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS + 3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3> + 2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2> + 2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1> + 2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS + 2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7> + 1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7> + 2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3> + 1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7> + 2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2> + 2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3> + 2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3> + 2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3> + 2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6> + 2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7> + 2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7> + 2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7> + 2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2> + 2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS + 2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7> + 2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7> + 3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7> + 2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS + 1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS + 2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4> + 2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6> + 1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS + 2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2> + 2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3> + 3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3> + 2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0> + 2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5> + 2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5> + 2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7> + 2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS + 2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS + 2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1> + 2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0> + 2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3> + 2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0> + 2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5> + 2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4> + 2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6> + 2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0> + 2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7> + 2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS + 2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7> + 2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7> + 2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7> + 2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS + 2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7> + 2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7> + 1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7> + 1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7> + 2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, + 1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS + 2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, + 2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS + 2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, + 1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS + 1595545808U, // <3,7,u,6>: Cost 2 vext2 , + 1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7> + 1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS + 1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0> + 1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, + 1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, + 1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, + 2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, + 1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, + 1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, + 2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS + 1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, + 1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u> + 1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u> + 537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS + 1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, + 1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS + 1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u> + 2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u> + 1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, + 537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS + 1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2> + 2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3> + 1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u> + 1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, + 1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6> + 2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, + 1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u> + 1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, + 1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, + 1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, + 1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3> + 2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, + 336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS + 1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, + 1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, + 2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, + 2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS + 336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS + 2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS + 1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, + 1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, + 2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, + 1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4> + 1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, + 1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, + 1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, + 1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, + 1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS + 2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, + 1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5> + 1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, + 1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS + 1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5> + 537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS + 1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, + 537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS + 2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, + 2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6> + 2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, + 1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, + 1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6> + 2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, + 1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6> + 1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, + 1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, + 1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS + 1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7> + 2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2> + 3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS + 1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS + 1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7> + 2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, + 1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7> + 1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS + 1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, + 1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, + 537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS + 336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS + 1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, + 1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, + 537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS + 1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, + 537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS + 2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0> + 2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1> + 2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2> + 3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4> + 2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4> + 3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0> + 3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0> + 3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0> + 2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1> + 2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS + 2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4> + 1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS + 2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1> + 2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS + 2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4> + 3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1> + 2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1> + 1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS + 2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4> + 3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4> + 2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4> + 2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0> + 2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6> + 3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7> + 2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4> + 3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2> + 2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4> + 3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS + 2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4> + 2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4> + 3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4> + 3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS + 3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5> + 3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7> + 3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0> + 2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4> + 2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS + 2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5> + 2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6> + 3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2> + 2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS + 2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS + 3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2> + 3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4> + 2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS + 2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS + 1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS + 2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2> + 2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5> + 2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS + 3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7> + 3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7> + 2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5> + 1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS + 2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS + 3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1> + 1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS + 2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6> + 2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS + 2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6> + 3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6> + 2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0> + 1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS + 3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2> + 3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS + 4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS + 3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0> + 3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5> + 3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5> + 3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0> + 3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7> + 4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS + 2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS + 1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS + 1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS + 2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u> + 2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS + 2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS + 2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4> + 2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u> + 1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS + 2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1> + 2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS + 3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6> + 2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2> + 2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5> + 3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4> + 3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1> + 3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4> + 2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS + 3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2> + 2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4> + 2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4> + 2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3> + 3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5> + 3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1> + 3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7> + 3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1> + 2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3> + 2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS + 2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4> + 3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2> + 1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4> + 2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS + 3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3> + 3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7> + 3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2> + 1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4> + 2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS + 2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3> + 2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4> + 2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4> + 2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS + 2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7> + 2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> + 3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2> + 2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3> + 3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1> + 3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0> + 3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5> + 3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS + 3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5> + 2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS + 3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4> + 3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0> + 2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS + 1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS + 2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2> + 2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5> + 2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2> + 1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS + 2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7> + 2600686074U, // <4,1,5,6>: Cost 3 vext1 , <6,2,7,3> + 2600686586U, // <4,1,5,7>: Cost 3 vext1 , <7,0,1,2> + 1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS + 2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS + 2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1> + 4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2> + 3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS + 2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS + 2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6> + 4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7> + 3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1> + 2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS + 2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1> + 3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1> + 3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1> + 2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4> + 3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6> + 3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4> + 3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7> + 3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7> + 2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1> + 1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS + 2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2> + 2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2> + 1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4> + 1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS + 2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS + 2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> + 2600686586U, // <4,1,u,7>: Cost 3 vext1 , <7,0,1,2> + 1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS + 2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2> + 2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS + 2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6> + 3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2> + 2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6> + 3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7> + 2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4> + 3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2> + 2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS + 3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2> + 3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1> + 3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0> + 3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS + 2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3> + 3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7> + 3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3> + 3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3> + 2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3> + 3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4> + 3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3> + 2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2> + 2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3> + 3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0> + 3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7> + 3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6> + 3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2> + 2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3> + 2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1> + 3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1> + 3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2> + 2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4> + 2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5> + 2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4> + 3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4> + 2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4> + 2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1> + 2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS + 2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4> + 2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4> + 2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4> + 2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS + 2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS + 2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4> + 3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0> + 2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS + 2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS + 2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0> + 2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2> + 2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS + 2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS + 3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7> + 2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7> + 3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS + 2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS + 1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS + 2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2> + 2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2> + 2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS + 1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS + 2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6> + 2600767994U, // <4,2,6,6>: Cost 3 vext1 , <6,2,7,3> + 2600768506U, // <4,2,6,7>: Cost 3 vext1 , <7,0,1,2> + 1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS + 2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2> + 3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2> + 3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2> + 2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4> + 4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4> + 3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7> + 3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4> + 3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7> + 2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2> + 1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS + 2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2> + 2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2> + 2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS + 1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS + 2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS + 2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u> + 2600784890U, // <4,2,u,7>: Cost 3 vext1 , <7,0,1,2> + 1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS + 3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0> + 2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2> + 2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4> + 3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3> + 3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1> + 2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0> + 3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0> + 3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0> + 2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2> + 2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1> + 3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1> + 2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4> + 2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4> + 3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0> + 3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3> + 3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1> + 3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3> + 2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4> + 3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS + 2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3> + 3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2> + 2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4> + 3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS + 2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4> + 3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3> + 3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3> + 2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3> + 3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1> + 3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1> + 3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3> + 2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3> + 2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4> + 3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7> + 3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7> + 3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7> + 2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3> + 2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1> + 2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2> + 2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4> + 3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0> + 2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5> + 2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6> + 2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4> + 3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2> + 2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1> + 2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS + 2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5> + 2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5> + 2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3> + 2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS + 4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5> + 3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5> + 2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4> + 2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS + 2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS + 2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6> + 2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6> + 2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3> + 2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS + 1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6> + 4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6> + 2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4> + 1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6> + 3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1> + 3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5> + 3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7> + 3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7> + 3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5> + 3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7> + 3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7> + 3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4> + 3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1> + 2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS + 2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2> + 2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u> + 2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4> + 2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS + 1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u> + 2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u> + 2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4> + 1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u> + 2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4> + 1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS + 2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS + 3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1> + 2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0> + 2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1> + 2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2> + 3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0> + 1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS + 2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2> + 2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1> + 2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3> + 2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3> + 2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3> + 2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4> + 3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3> + 3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3> + 2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3> + 3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4> + 3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4> + 2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2> + 2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4> + 2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4> + 3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7> + 2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4> + 3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4> + 2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4> + 2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2> + 3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4> + 3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3> + 2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4> + 2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4> + 3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5> + 3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4> + 3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4> + 2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4> + 1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS + 2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4> + 2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2> + 2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4> + 161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS + 1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS + 2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS + 2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4> + 161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS + 2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS + 2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4> + 3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5> + 2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5> + 2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS + 1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS + 1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS + 2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5> + 1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS + 2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS + 3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2> + 2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5> + 2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6> + 2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS + 2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6> + 1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS + 2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4> + 1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS + 2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2> + 3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4> + 3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7> + 3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4> + 2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4> + 2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4> + 2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4> + 2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7> + 2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4> + 1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS + 1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS + 2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS + 2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u> + 161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS + 1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS + 1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS + 2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u> + 161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS + 2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0> + 1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS + 2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5> + 2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0> + 2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5> + 3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7> + 2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5> + 2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0> + 1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS + 2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2> + 2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5> + 2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0> + 2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS + 3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4> + 2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5> + 3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6> + 2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3> + 2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS + 2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS + 3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3> + 2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5> + 1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5> + 2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5> + 3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7> + 2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7> + 4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS + 1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5> + 2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2> + 3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1> + 2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4> + 2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3> + 2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0> + 2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5> + 2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4> + 2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5> + 2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2> + 2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS + 3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4> + 2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5> + 2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4> + 2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS + 1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS + 2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5> + 2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6> + 1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS + 1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS + 2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3> + 2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4> + 2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2> + 1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5> + 2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5> + 2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0> + 2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS + 1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS + 1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS + 2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6> + 2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6> + 1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6> + 1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS + 3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5> + 2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6> + 27705344U, // <4,5,6,7>: Cost 0 copy RHS + 27705344U, // <4,5,6,u>: Cost 0 copy RHS + 2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS + 2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4> + 2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7> + 2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5> + 2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS + 2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7> + 2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4> + 2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4> + 2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS + 1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS + 1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS + 2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, + 1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u> + 1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS + 1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS + 2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, + 27705344U, // <4,5,u,7>: Cost 0 copy RHS + 27705344U, // <4,5,u,u>: Cost 0 copy RHS + 2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0> + 1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS + 1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6> + 3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0> + 2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5> + 3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7> + 2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0> + 4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS + 1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS + 2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2> + 2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1> + 2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0> + 2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3> + 3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5> + 2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7> + 3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7> + 4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS + 2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3> + 2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4> + 2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3> + 2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2> + 2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1> + 2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6> + 2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7> + 2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7> + 2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3> + 2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1> + 2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2> + 2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6> + 3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2> + 2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3> + 2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6> + 2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6> + 3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6> + 2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4> + 2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2> + 2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS + 2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3> + 2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4> + 2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4> + 2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS + 1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS + 1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS + 2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4> + 1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS + 2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS + 2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3> + 2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3> + 3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3> + 2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6> + 2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5> + 2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6> + 2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS + 2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS + 1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS + 2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2> + 2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3> + 2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2> + 1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6> + 2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6> + 2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6> + 2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS + 1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS + 2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2> + 2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2> + 3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7> + 2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4> + 2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6> + 2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6> + 3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3> + 2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7> + 2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2> + 1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS + 1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS + 1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS + 2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, + 1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u> + 1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS + 1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS + 2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS + 1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS + 3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0> + 2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS + 2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4> + 3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4> + 3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5> + 2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0> + 3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7> + 3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4> + 2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS + 2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1> + 3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1> + 2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4> + 3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5> + 3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS + 3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7> + 3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7> + 3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3> + 2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1> + 3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS + 3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3> + 3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2> + 2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4> + 3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7> + 2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7> + 3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7> + 3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3> + 2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7> + 3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2> + 3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4> + 3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4> + 3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3> + 3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6> + 2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7> + 2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7> + 3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4> + 2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7> + 2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1> + 3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3> + 3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7> + 3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5> + 2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4> + 2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS + 2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4> + 3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7> + 2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS + 2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2> + 3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7> + 2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5> + 3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5> + 2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6> + 2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5> + 2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5> + 2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7> + 2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2> + 1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS + 3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2> + 2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2> + 2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2> + 1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS + 1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6> + 2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3> + 3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7> + 1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS + 3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS + 3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4> + 3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7> + 3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4> + 3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7> + 2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7> + 3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7> + 2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7> + 2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7> + 1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS + 2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS + 2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2> + 2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2> + 1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS + 1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u> + 2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3> + 3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7> + 1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS + 2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0> + 1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS + 1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u> + 2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, + 2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5> + 2265397305U, // <4,u,0,5>: Cost 3 vrev + 2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u> + 2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0> + 1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS + 2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2> + 2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1> + 1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS + 2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3> + 2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, + 2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7> + 3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7> + 2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, + 1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS + 2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS + 2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u> + 2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2> + 1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u> + 2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u> + 2733864859U, // <4,u,2,5>: Cost 3 vext3 , + 2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7> + 2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, + 1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u> + 2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2> + 2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2> + 2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u> + 2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3> + 2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6> + 2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, + 2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> + 2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u> + 2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2> + 1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS + 2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, + 2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4> + 2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4> + 161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS + 1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS + 1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS + 2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, + 161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS + 1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS + 1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS + 2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5> + 3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS + 1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS + 1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS + 1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS + 3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS + 1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS + 1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS + 2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2> + 1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS + 1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6> + 1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS + 1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6> + 1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS + 27705344U, // <4,u,6,7>: Cost 0 copy RHS + 27705344U, // <4,u,6,u>: Cost 0 copy RHS + 2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS + 2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4> + 2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7> + 2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u> + 2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS + 2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6> + 2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7> + 2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7> + 2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS + 1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS + 1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS + 1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS + 1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u> + 161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS + 1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS + 1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS + 27705344U, // <4,u,u,7>: Cost 0 copy RHS + 27705344U, // <4,u,u,u>: Cost 0 copy RHS + 2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0> + 2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1> + 2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2> + 3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5> + 2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5> + 3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0> + 3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0> + 3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0> + 2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2> + 2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS + 2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1> + 1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS + 3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7> + 2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS + 2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1> + 3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7> + 3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2> + 1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS + 2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2> + 2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5> + 2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4> + 2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5> + 2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5> + 2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5> + 2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4> + 2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6> + 2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5> + 3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5> + 3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4> + 2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5> + 3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5> + 2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0> + 3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4> + 3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0> + 3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7> + 2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5> + 2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS + 1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5> + 2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6> + 3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5> + 2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS + 2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS + 3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5> + 2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0> + 1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5> + 3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1> + 2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS + 3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS + 3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0> + 3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5> + 2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0> + 3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0> + 3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS + 2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS + 4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0> + 2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS + 3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7> + 3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6> + 3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5> + 3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7> + 3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6> + 2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5> + 2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS + 2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS + 2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0> + 2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7> + 3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2> + 2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS + 2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0> + 2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7> + 4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7> + 2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS + 2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2> + 1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5> + 1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS + 2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5> + 2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6> + 2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS + 2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u> + 2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6> + 1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS + 2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0> + 1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS + 2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2> + 2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2> + 2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5> + 2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0> + 3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7> + 3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0> + 1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS + 2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1> + 2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1> + 2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0> + 2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3> + 2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5> + 2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5> + 3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5> + 3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5> + 2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3> + 3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2> + 3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3> + 2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2> + 2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0> + 2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5> + 2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3> + 2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7> + 3808199610U, // <5,1,2,7>: Cost 4 vext3 , <1,2,7,0> + 2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0> + 2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS + 2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3> + 3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5> + 2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5> + 2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5> + 2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7> + 3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7> + 2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5> + 2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3> + 1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1> + 2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5> + 2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5> + 2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5> + 2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4> + 1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS + 2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6> + 3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4> + 1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1> + 2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1> + 2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1> + 3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1> + 2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7> + 2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5> + 2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5> + 2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0> + 2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7> + 2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1> + 2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS + 3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7> + 2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3> + 3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7> + 2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS + 2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7> + 2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6> + 2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1> + 2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1> + 2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS + 2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1> + 2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1> + 2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS + 2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS + 2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3> + 4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6> + 2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7> + 2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS + 1591662326U, // <5,1,u,0>: Cost 2 vext2 , + 1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS + 2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5> + 2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS + 2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5> + 1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS + 2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, + 2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, + 2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS + 3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0> + 2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS + 2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2> + 3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2> + 3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1> + 3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1> + 3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4> + 3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0> + 2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS + 2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2> + 3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2> + 3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5> + 2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5> + 3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS + 3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0> + 3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3> + 3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1> + 2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5> + 3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS + 3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3> + 2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2> + 2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3> + 2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5> + 3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7> + 3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6> + 3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5> + 2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3> + 2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1> + 2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5> + 2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5> + 3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5> + 1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5> + 2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5> + 3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5> + 3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7> + 1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5> + 2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2> + 3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3> + 2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5> + 2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5> + 2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6> + 2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS + 2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2> + 3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4> + 2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS + 2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS + 3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3> + 3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7> + 2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS + 2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS + 2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5> + 3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7> + 3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1> + 2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS + 3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS + 3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3> + 2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3> + 2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7> + 2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5> + 3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7> + 3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7> + 3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1> + 2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7> + 2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS + 2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2> + 2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7> + 2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS + 2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS + 4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5> + 3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6> + 3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7> + 2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS + 2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1> + 2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS + 2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u> + 2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3> + 1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5> + 2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS + 2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5> + 3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u> + 1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5> + 3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0> + 2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2> + 3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0> + 3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2> + 2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1> + 3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2> + 3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0> + 2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0> + 2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2> + 3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3> + 3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1> + 2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3> + 2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5> + 3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3> + 2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7> + 3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7> + 2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5> + 2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3> + 3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1> + 3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5> + 3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2> + 2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4> + 2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5> + 2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4> + 3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3> + 3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3> + 2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4> + 3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1> + 2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3> + 3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2> + 2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3> + 2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5> + 2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5> + 3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7> + 3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5> + 2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5> + 2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5> + 3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0> + 2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3> + 2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5> + 2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5> + 2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6> + 3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5> + 2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4> + 2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6> + 2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS + 2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5> + 2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5> + 2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5> + 2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS + 2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5> + 3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0> + 2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5> + 2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS + 2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS + 3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6> + 2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6> + 2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6> + 2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS + 3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0> + 4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6> + 2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4> + 2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS + 1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS + 1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7> + 2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2> + 2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2> + 1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS + 2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3> + 2601513466U, // <5,3,7,6>: Cost 3 vext1 , <6,2,7,3> + 3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7> + 1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS + 1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS + 1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u> + 2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2> + 2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2> + 1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS + 2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6> + 2601521658U, // <5,3,u,6>: Cost 3 vext1 , <6,2,7,3> + 2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u> + 1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS + 3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS + 2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS + 3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2> + 3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5> + 2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5> + 2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1> + 2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0> + 3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0> + 2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS + 2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1> + 3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4> + 3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4> + 3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7> + 3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4> + 2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0> + 3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5> + 3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1> + 2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1> + 3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4> + 3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4> + 3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4> + 2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5> + 3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4> + 2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3> + 3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3> + 3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5> + 2666752099U, // <5,4,2,u>: Cost 3 vext2 , <2,u,4,5> + 3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS + 3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4> + 2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4> + 3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3> + 2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4> + 3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0> + 2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5> + 3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7> + 2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4> + 2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS + 2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4> + 3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3> + 3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4> + 2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4> + 1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5> + 2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4> + 2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4> + 1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5> + 2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS + 2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5> + 2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3> + 3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2> + 2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS + 2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5> + 1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS + 3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS + 1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS + 2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS + 2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7> + 3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2> + 2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6> + 2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS + 2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5> + 3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7> + 2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5> + 2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS + 2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS + 2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4> + 2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7> + 2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7> + 2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS + 3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5> + 94817590U, // <5,4,7,6>: Cost 1 vrev RHS + 2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7> + 94965064U, // <5,4,7,u>: Cost 1 vrev RHS + 2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS + 2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u> + 2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u> + 2667419628U, // <5,4,u,3>: Cost 3 vext2 , + 2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS + 1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5> + 94825783U, // <5,4,u,6>: Cost 1 vrev RHS + 2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5> + 94973257U, // <5,4,u,u>: Cost 1 vrev RHS + 2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0> + 1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS + 2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2> + 3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2> + 2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1> + 2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0> + 3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7> + 4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS + 1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS + 2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2> + 2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5> + 2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0> + 2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3> + 2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5> + 2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7> + 3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7> + 2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3> + 2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5> + 3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS + 3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3> + 2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2> + 2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4> + 3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3> + 2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3> + 2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7> + 4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7> + 2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4> + 2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2> + 3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5> + 3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3> + 2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5> + 2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6> + 2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5> + 3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7> + 3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1> + 2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2> + 2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1> + 2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5> + 3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3> + 3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4> + 1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5> + 1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS + 2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5> + 2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6> + 1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5> + 1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS + 2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3> + 2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2> + 2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2> + 1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS + 229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS + 2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0> + 2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7> + 229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS + 2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS + 3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6> + 2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3> + 3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6> + 2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5> + 2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5> + 2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6> + 2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1> + 2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1> + 2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS + 3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7> + 2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5> + 2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7> + 2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS + 3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5> + 4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6> + 2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS + 2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS + 1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS + 1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS + 2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5> + 2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u> + 1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS + 229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS + 2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, + 2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS + 229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS + 2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0> + 1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS + 2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2> + 2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4> + 2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5> + 3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6> + 3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7> + 4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS + 1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS + 2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2> + 2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1> + 2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0> + 2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3> + 2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6> + 2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7> + 3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7> + 2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS + 2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6> + 3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2> + 3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3> + 2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2> + 2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1> + 2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6> + 2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6> + 2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7> + 2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3> + 2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6> + 2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2> + 3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3> + 3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6> + 2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3> + 1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6> + 2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6> + 3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7> + 4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS + 1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6> + 2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS + 3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5> + 2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5> + 2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5> + 2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6> + 1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS + 2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6> + 2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5> + 1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS + 2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS + 2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3> + 3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6> + 3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4> + 2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6> + 2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5> + 2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1> + 2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS + 2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS + 2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS + 3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4> + 2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3> + 2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6> + 2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS + 2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6> + 2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6> + 2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS + 2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS + 430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS + 1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2> + 1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2> + 1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2> + 430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS + 1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6> + 1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6> + 1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7> + 430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS + 430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS + 1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS + 1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2> + 1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2> + 430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS + 1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS + 1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3> + 1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2> + 430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS + 2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0> + 1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS + 2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2> + 2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0> + 2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5> + 2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7> + 3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7> + 2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0> + 1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS + 2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2> + 2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1> + 2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0> + 1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7> + 2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS + 2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7> + 2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7> + 3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7> + 1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7> + 2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7> + 3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3> + 2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2> + 2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1> + 2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7> + 2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7> + 2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7> + 3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7> + 2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1> + 2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2> + 2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5> + 3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1> + 2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3> + 2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6> + 2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0> + 3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7> + 2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7> + 2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2> + 2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS + 2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7> + 3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0> + 2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4> + 2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS + 1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS + 2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6> + 2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7> + 1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS + 2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS + 2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3> + 2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3> + 2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7> + 2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS + 2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5> + 2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7> + 1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS + 1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS + 2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0> + 3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5> + 2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2> + 2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6> + 2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4> + 2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u> + 2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6> + 2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7> + 2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u> + 1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS + 2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1> + 2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2> + 2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3> + 1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS + 1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7> + 2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3> + 2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7> + 1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS + 1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS + 1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS + 2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, + 1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS + 1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS + 1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS + 2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, + 1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS + 1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS + 2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0> + 1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS + 2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2> + 2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, + 2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, + 2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, + 2266134675U, // <5,u,0,6>: Cost 3 vrev + 2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0> + 1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS + 2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2> + 2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1> + 1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS + 1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u> + 2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u> + 2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, + 2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u> + 2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS + 1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS + 2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0> + 2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5> + 2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2> + 2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, + 2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u> + 2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u> + 2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7> + 2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, + 2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1> + 2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, + 2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u> + 2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u> + 2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3> + 1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u> + 2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0> + 2734610422U, // <5,u,3,6>: Cost 3 vext3 , + 2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u> + 1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u> + 1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u> + 1661163546U, // <5,u,4,1>: Cost 2 vext3 , + 2734463012U, // <5,u,4,2>: Cost 3 vext3 , + 2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, + 1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5> + 1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS + 2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6> + 2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u> + 1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS + 1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS + 2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5> + 2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, + 2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, + 1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS + 229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS + 1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS + 1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS + 229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS + 2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS + 2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS + 2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6> + 2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, + 2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS + 2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS + 2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6> + 2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS + 2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, + 430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS + 1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7> + 1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2> + 2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS + 430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS + 1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3> + 118708378U, // <5,u,7,6>: Cost 1 vrev RHS + 2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS + 430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS + 430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS + 1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS + 1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS + 1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS + 430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS + 229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS + 118716571U, // <5,u,u,6>: Cost 1 vrev RHS + 1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS + 430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS + 2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0> + 2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1> + 2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2> + 3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5> + 2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6> + 3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0> + 3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6> + 3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7> + 2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2> + 2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS + 3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0> + 1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS + 2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6> + 2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS + 2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1> + 2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1> + 2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1> + 1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS + 2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2> + 2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6> + 3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6> + 2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5> + 1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6> + 3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7> + 2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6> + 3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2> + 1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6> + 3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2> + 2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4> + 2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5> + 3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3> + 2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6> + 3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6> + 3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6> + 3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7> + 2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5> + 2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6> + 2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5> + 1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6> + 3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6> + 2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6> + 2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS + 2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0> + 2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0> + 1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6> + 3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS + 2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6> + 3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6> + 3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0> + 2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6> + 3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6> + 2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0> + 2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS + 2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS + 2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0> + 2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS + 3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS + 3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5> + 2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0> + 3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7> + 2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0> + 2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1> + 2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS + 2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS + 2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0> + 2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2> + 2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7> + 2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS + 4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5> + 3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0> + 4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7> + 2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS + 2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2> + 2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1> + 1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS + 2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5> + 1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6> + 2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS + 2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u> + 2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS + 1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS + 2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS + 2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS + 3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6> + 2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2> + 2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS + 3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2> + 2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1> + 2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0> + 2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2> + 3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1> + 2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1> + 3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6> + 2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3> + 2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6> + 3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5> + 3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6> + 3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1> + 2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3> + 2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS + 3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3> + 3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2> + 2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0> + 2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS + 3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3> + 2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3> + 3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0> + 2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0> + 2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS + 2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3> + 2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6> + 3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1> + 2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6> + 2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7> + 2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3> + 3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2> + 2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3> + 2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1> + 2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6> + 3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4> + 2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6> + 3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS + 2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6> + 3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0> + 2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1> + 2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6> + 2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1> + 3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7> + 3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6> + 2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7> + 2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6> + 3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6> + 2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0> + 3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS + 2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7> + 2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS + 3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7> + 3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6> + 3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS + 2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS + 3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7> + 2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6> + 3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1> + 3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS + 2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS + 2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7> + 2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2> + 3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS + 2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS + 2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5> + 3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0> + 4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7> + 2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS + 2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS + 2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3> + 2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6> + 2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0> + 2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6> + 2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7> + 2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u> + 2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u> + 2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0> + 2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0> + 1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS + 2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2> + 3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0> + 2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6> + 3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3> + 2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4> + 3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0> + 1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS + 2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1> + 2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1> + 2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0> + 2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS + 3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6> + 2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7> + 2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3> + 3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1> + 2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1> + 3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1> + 3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3> + 2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2> + 2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3> + 2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6> + 3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7> + 2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6> + 3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7> + 2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3> + 2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1> + 3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0> + 2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6> + 2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4> + 2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5> + 2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6> + 2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6> + 2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4> + 2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1> + 1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2> + 3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u> + 2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6> + 2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6> + 2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6> + 1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS + 2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0> + 3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2> + 1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2> + 3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3> + 2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3> + 3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7> + 2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6> + 2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5> + 2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5> + 2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0> + 2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS + 2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6> + 2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1> + 2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3> + 2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6> + 2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7> + 2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5> + 2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7> + 2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6> + 2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1> + 2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7> + 2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS + 2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2> + 2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7> + 1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS + 2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS + 3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7> + 2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6> + 2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7> + 1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS + 1591744256U, // <6,2,u,0>: Cost 2 vext2 , + 1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS + 2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6> + 1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS + 2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5> + 1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS + 2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0> + 2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS + 1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS + 3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0> + 2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2> + 2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4> + 3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2> + 2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2> + 2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2> + 3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0> + 2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0> + 2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2> + 3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3> + 3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1> + 3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3> + 3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1> + 2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6> + 3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3> + 3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0> + 3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3> + 2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6> + 3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4> + 2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3> + 3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2> + 3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0> + 3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6> + 3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7> + 2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7> + 3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6> + 2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3> + 3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1> + 3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3> + 2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3> + 2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3> + 2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6> + 3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5> + 3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7> + 3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7> + 2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5> + 2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS + 2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3> + 2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3> + 2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6> + 2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS + 1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6> + 2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6> + 2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4> + 1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6> + 3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS + 3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7> + 3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5> + 3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5> + 2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6> + 3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7> + 3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6> + 3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0> + 2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6> + 2602164326U, // <6,3,6,0>: Cost 3 vext1 , LHS + 2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3> + 2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6> + 3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1> + 2602167524U, // <6,3,6,4>: Cost 3 vext1 , <4,4,6,6> + 3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7> + 2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6> + 3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7> + 2602170158U, // <6,3,6,u>: Cost 3 vext1 , LHS + 1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS + 2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7> + 1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7> + 2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2> + 1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS + 2602176208U, // <6,3,7,5>: Cost 3 vext1 , <5,1,7,3> + 2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3> + 2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7> + 1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS + 1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS + 2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2> + 1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u> + 2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2> + 1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS + 1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6> + 2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0> + 2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7> + 1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS + 3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0> + 2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS + 2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6> + 3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1> + 2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6> + 2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1> + 2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2> + 3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0> + 2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS + 2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1> + 3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1> + 3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0> + 2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS + 3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS + 3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0> + 2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3> + 3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1> + 2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1> + 2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4> + 3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3> + 3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2> + 3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1> + 2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4> + 2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS + 2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0> + 3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0> + 2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4> + 3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2> + 3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3> + 3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6> + 3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3> + 2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6> + 3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6> + 2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5> + 3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7> + 2668817222U, // <6,4,3,u>: Cost 3 vext2 , <3,u,5,6> + 2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS + 3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4> + 2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4> + 3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4> + 2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4> + 2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS + 1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6> + 2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4> + 1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6> + 2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS + 3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3> + 2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5> + 2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6> + 2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS + 2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5> + 1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS + 2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS + 1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS + 1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS + 2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2> + 2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2> + 2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2> + 1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS + 2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3> + 2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3> + 2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2> + 1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS + 2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS + 2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4> + 3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5> + 2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7> + 2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS + 2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5> + 2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6> + 4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7> + 2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS + 1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS + 2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS + 2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u> + 2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6> + 1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6> + 2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS + 1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS + 2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS + 1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS + 3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0> + 2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS + 3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6> + 3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2> + 3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1> + 4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5> + 3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7> + 1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0> + 1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0> + 2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1> + 3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5> + 3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0> + 3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS + 3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6> + 3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7> + 3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4> + 2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3> + 2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3> + 2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS + 3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5> + 3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2> + 3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5> + 2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS + 3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6> + 2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7> + 3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS + 2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5> + 3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2> + 3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5> + 3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3> + 3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3> + 2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6> + 3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6> + 3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0> + 2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3> + 2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3> + 2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS + 3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5> + 3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5> + 2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5> + 2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS + 2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS + 3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0> + 1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6> + 1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6> + 2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS + 3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2> + 3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2> + 3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2> + 2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS + 2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5> + 2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6> + 2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7> + 2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7> + 2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1> + 3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4> + 3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4> + 2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4> + 2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5> + 3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5> + 2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6> + 2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0> + 2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1> + 2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS + 2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7> + 2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7> + 3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2> + 2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS + 2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5> + 2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6> + 3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS + 2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS + 2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS + 2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u> + 2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u> + 2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, + 2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS + 2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS + 2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6> + 1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u> + 1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u> + 2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS + 1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS + 2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4> + 3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1> + 2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2> + 3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3> + 2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0> + 4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS + 1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS + 2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2> + 2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1> + 2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0> + 2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS + 3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3> + 2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7> + 2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3> + 3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3> + 2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS + 3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6> + 3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3> + 2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6> + 2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1> + 3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3> + 3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7> + 2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7> + 2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3> + 2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3> + 2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2> + 3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4> + 3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6> + 2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3> + 2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5> + 3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4> + 2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6> + 4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS + 2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5> + 2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS + 3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2> + 3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6> + 3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6> + 1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6> + 1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS + 2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0> + 2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6> + 1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6> + 3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS + 2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3> + 3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5> + 3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0> + 2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6> + 2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5> + 2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0> + 2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS + 2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS + 1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS + 2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2> + 2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3> + 2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2> + 1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS + 2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3> + 296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS + 2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7> + 296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS + 2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS + 3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7> + 2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7> + 2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6> + 2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS + 2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4> + 2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6> + 1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS + 1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS + 1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS + 1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS + 2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u> + 2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS + 1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS + 1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS + 296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS + 1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS + 296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS + 1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0> + 497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS + 1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2> + 2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0> + 1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5> + 1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0> + 2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7> + 2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2> + 497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS + 1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2> + 1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1> + 1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0> + 1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3> + 2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5> + 1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7> + 2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7> + 2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2> + 1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3> + 2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2> + 2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3> + 1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2> + 1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1> + 2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6> + 2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7> + 1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7> + 2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1> + 1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1> + 1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2> + 2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3> + 2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1> + 1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3> + 1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6> + 2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7> + 2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7> + 2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1> + 1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2> + 1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1> + 2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3> + 2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3> + 2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5> + 1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4> + 497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS + 1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6> + 2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7> + 497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS + 2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2> + 1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3> + 2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4> + 2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0> + 1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6> + 1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5> + 1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0> + 1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7> + 1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7> + 2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2> + 2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3> + 1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3> + 2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5> + 2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6> + 2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7> + 1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6> + 1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1> + 1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1> + 1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2> + 2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7> + 2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3> + 2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7> + 1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6> + 2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7> + 1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7> + 1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7> + 1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2> + 1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, + 497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS + 1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, + 1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, + 1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, + 497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS + 1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, + 1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, + 497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS + 1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0> + 497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS + 1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2> + 2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, + 1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5> + 1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0> + 2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7> + 1193130221U, // <6,u,0,7>: Cost 2 vrev + 497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS + 1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2> + 1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1> + 1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS + 1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3> + 2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS + 1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7> + 2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7> + 2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, + 1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS + 1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS + 2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3> + 1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2> + 1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1> + 1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS + 2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u> + 1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7> + 2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, + 1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1> + 1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2> + 2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3> + 2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3> + 1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3> + 1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6> + 2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, + 2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, + 2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1> + 1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2> + 1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1> + 2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3> + 1661245476U, // <6,u,4,2>: Cost 2 vext3 , + 2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, + 1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4> + 497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS + 1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6> + 1661614161U, // <6,u,4,7>: Cost 2 vext3 , + 497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS + 2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS + 1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3> + 2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5> + 2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, + 1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6> + 1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5> + 1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS + 1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7> + 1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS + 1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS + 2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2> + 1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3> + 2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, + 1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS + 2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5> + 296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS + 1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1> + 296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS + 1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS + 2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7> + 1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7> + 1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS + 1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS + 2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5> + 1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7> + 1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS + 1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS + 1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS + 497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS + 1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS + 1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, + 1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS + 497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS + 296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS + 1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS + 497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS + 1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0> + 1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1> + 1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2> + 3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0> + 2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1> + 2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6> + 2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0> + 3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7> + 1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1> + 1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS + 2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5> + 564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS + 2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7> + 1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS + 2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7> + 1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1> + 2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0> + 564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS + 1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2> + 2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5> + 2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0> + 2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1> + 1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6> + 2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7> + 2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7> + 2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7> + 1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2> + 2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2> + 2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0> + 2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0> + 2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3> + 2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6> + 2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0> + 2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0> + 3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7> + 2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0> + 2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4> + 1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5> + 1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6> + 3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4> + 2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6> + 1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS + 2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6> + 3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5> + 1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5> + 2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS + 2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3> + 2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7> + 2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0> + 2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6> + 2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5> + 1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0> + 2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7> + 1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0> + 2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7> + 2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7> + 2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7> + 2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0> + 3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7> + 2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7> + 2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6> + 2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1> + 2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7> + 2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2> + 2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS + 3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS + 3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7> + 2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6> + 2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7> + 2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0> + 2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7> + 2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2> + 1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2> + 1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1> + 564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS + 2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u> + 1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6> + 1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS + 1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u> + 2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, + 564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS + 2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS + 2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS + 2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS + 1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2> + 2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS + 2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1> + 2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0> + 3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1> + 1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2> + 2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1> + 1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1> + 3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6> + 1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3> + 2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5> + 2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7> + 2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1> + 3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5> + 1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3> + 2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1> + 2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3> + 3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2> + 1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0> + 2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5> + 2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3> + 2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2> + 2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0> + 1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0> + 2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0> + 1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3> + 2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0> + 2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7> + 2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5> + 1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7> + 2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7> + 2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3> + 1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7> + 2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5> + 2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5> + 2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5> + 2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5> + 3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5> + 2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS + 2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1> + 3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0> + 2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS + 2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS + 2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7> + 3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6> + 1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7> + 2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS + 3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7> + 2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1> + 2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1> + 1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7> + 3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7> + 2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7> + 2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7> + 2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7> + 3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7> + 2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7> + 3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7> + 2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1> + 2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7> + 2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2> + 2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1> + 3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3> + 3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS + 2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS + 3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7> + 3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0> + 2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7> + 3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS + 2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS + 1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3> + 2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0> + 1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7> + 2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS + 1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7> + 2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7> + 2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS + 1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7> + 2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2> + 2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2> + 2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0> + 2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0> + 2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6> + 2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7> + 2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1> + 3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2> + 2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7> + 2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3> + 3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0> + 3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0> + 2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1> + 2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3> + 3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0> + 2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3> + 2602718850U, // <7,2,1,7>: Cost 3 vext1 , <7,u,1,2> + 2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1> + 2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1> + 2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3> + 1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2> + 1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3> + 2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5> + 2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7> + 2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6> + 3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5> + 1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3> + 1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1> + 2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5> + 2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6> + 2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7> + 1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5> + 2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7> + 2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6> + 2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0> + 1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1> + 2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6> + 2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3> + 2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4> + 2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5> + 2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6> + 2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7> + 2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0> + 3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0> + 2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7> + 2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7> + 3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3> + 2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7> + 2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7> + 2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7> + 3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7> + 2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7> + 3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0> + 2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7> + 2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS + 2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3> + 2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7> + 1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7> + 2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS + 2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7> + 2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7> + 3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7> + 1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7> + 2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1> + 3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0> + 3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5> + 2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS + 2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS + 3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7> + 4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6> + 2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7> + 2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS + 1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1> + 2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5> + 1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2> + 1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7> + 1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5> + 2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7> + 2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7> + 2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0> + 1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7> + 2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0> + 1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2> + 2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0> + 2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2> + 2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1> + 2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2> + 2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0> + 3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0> + 1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2> + 2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1> + 2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1> + 2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3> + 2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5> + 2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6> + 2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3> + 3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1> + 2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5> + 2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5> + 2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1> + 2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0> + 2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2> + 2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0> + 2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5> + 2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4> + 2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3> + 2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3> + 2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0> + 2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1> + 2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3> + 2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3> + 1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3> + 2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4> + 2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7> + 2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3> + 2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7> + 1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3> + 2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1> + 2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2> + 2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4> + 2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5> + 2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5> + 1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6> + 2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6> + 3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4> + 1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6> + 2602819686U, // <7,3,5,0>: Cost 3 vext1 , LHS + 1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3> + 2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3> + 2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7> + 2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5> + 2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7> + 2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0> + 2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0> + 1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3> + 2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7> + 2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3> + 1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3> + 2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7> + 2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7> + 3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7> + 2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6> + 2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7> + 1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3> + 2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1> + 2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5> + 2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6> + 2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7> + 2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5> + 2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7> + 2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7> + 2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7> + 2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1> + 2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u> + 1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2> + 1593153452U, // <7,3,u,2>: Cost 2 vext2 , + 1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3> + 2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u> + 1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6> + 2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3> + 2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0> + 1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2> + 2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0> + 1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS + 2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2> + 3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1> + 2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5> + 1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1> + 1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2> + 3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1> + 1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1> + 2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2> + 2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1> + 2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3> + 2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7> + 2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3> + 2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0> + 2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3> + 3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3> + 2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0> + 3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1> + 3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3> + 2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2> + 2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1> + 3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7> + 2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3> + 2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0> + 3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5> + 2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0> + 2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2> + 3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5> + 3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1> + 2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3> + 2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6> + 2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4> + 2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5> + 3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7> + 2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4> + 2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2> + 3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1> + 3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4> + 3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3> + 1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4> + 1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5> + 1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6> + 3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7> + 1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5> + 1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS + 2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7> + 2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3> + 2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5> + 1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS + 2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7> + 564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS + 2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7> + 564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS + 2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1> + 3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5> + 2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3> + 2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4> + 1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6> + 2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7> + 2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7> + 2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1> + 1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2> + 2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2> + 3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2> + 3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5> + 3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6> + 2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1> + 2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0> + 2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1> + 2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7> + 2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7> + 1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS + 1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS + 2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3> + 2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u> + 1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6> + 1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1> + 564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS + 2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5> + 564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS + 2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS + 2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS + 2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS + 3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0> + 2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1> + 2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1> + 2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1> + 2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2> + 2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS + 2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1> + 3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1> + 3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0> + 2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7> + 2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5> + 2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3> + 2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1> + 1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3> + 1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3> + 3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1> + 3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0> + 3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7> + 2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4> + 3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7> + 2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3> + 3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3> + 2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3> + 2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4> + 3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS + 2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5> + 3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5> + 3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3> + 3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS + 2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5> + 3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5> + 2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0> + 2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0> + 2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS + 3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7> + 3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5> + 3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4> + 2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS + 2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS + 2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5> + 1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6> + 1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6> + 2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1> + 2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3> + 3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3> + 3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3> + 2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4> + 1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5> + 2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6> + 1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7> + 1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7> + 2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1> + 2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7> + 3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2> + 2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4> + 2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5> + 2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7> + 2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7> + 1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0> + 1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0> + 1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS + 2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3> + 2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2> + 2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7> + 1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS + 1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7> + 2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0> + 2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1> + 1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3> + 1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS + 2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS + 2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0> + 2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u> + 1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS + 1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7> + 2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7> + 1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0> + 1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3> + 2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0> + 1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS + 2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4> + 2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0> + 2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2> + 2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0> + 2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0> + 2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2> + 1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS + 1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1> + 2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1> + 2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0> + 2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3> + 2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS + 2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7> + 2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3> + 2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3> + 1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1> + 2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1> + 2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3> + 2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2> + 2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1> + 2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5> + 2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7> + 2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7> + 1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3> + 1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3> + 2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2> + 2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3> + 2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3> + 2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3> + 2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5> + 2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7> + 3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6> + 2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0> + 2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0> + 2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1> + 2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3> + 2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5> + 2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4> + 2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6> + 1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS + 2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0> + 2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5> + 1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS + 2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS + 2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3> + 2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7> + 3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4> + 1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6> + 2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5> + 2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0> + 2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7> + 1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5> + 2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1> + 2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3> + 2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3> + 3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4> + 2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4> + 2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7> + 1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6> + 1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7> + 1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7> + 1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1> + 2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0> + 2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2> + 2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0> + 1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5> + 2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4> + 2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6> + 2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7> + 1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1> + 1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1> + 1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS + 2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2> + 2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0> + 1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5> + 1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS + 1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6> + 1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3> + 1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1> + 2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0> + 1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2> + 2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0> + 3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0> + 2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1> + 2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0> + 2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0> + 2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2> + 1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2> + 2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3> + 2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1> + 2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3> + 2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5> + 2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS + 2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3> + 2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1> + 2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1> + 2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3> + 3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5> + 3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0> + 2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2> + 2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0> + 3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5> + 3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3> + 2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3> + 2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3> + 2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3> + 2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1> + 3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3> + 3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6> + 2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7> + 2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5> + 2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3> + 2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7> + 2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7> + 2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7> + 2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5> + 3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7> + 3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3> + 3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4> + 2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4> + 1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6> + 2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4> + 2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6> + 1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6> + 2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS + 2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7> + 3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3> + 2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7> + 2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7> + 1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7> + 2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7> + 2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5> + 1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7> + 2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS + 2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0> + 2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7> + 2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7> + 2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS + 2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7> + 1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7> + 2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0> + 1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7> + 1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS + 2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2> + 2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2> + 2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2> + 1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS + 2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7> + 2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7> + 363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS + 363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS + 1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS + 1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2> + 2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3> + 2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0> + 1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS + 1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6> + 1595840756U, // <7,7,u,6>: Cost 2 vext2 , + 363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS + 363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS + 1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0> + 1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, + 1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, + 1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, + 2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, + 1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, + 1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, + 2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS + 1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, + 1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS + 1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1> + 564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS + 1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, + 1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS + 2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, + 1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1> + 1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, + 564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS + 1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2> + 2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, + 1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2> + 1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, + 1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6> + 2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, + 2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, + 1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, + 1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, + 1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, + 1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3> + 2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, + 1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3> + 1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, + 1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, + 2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, + 2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, + 1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, + 2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, + 1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, + 1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, + 2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, + 1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4> + 1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, + 1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, + 1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, + 1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, + 1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS + 1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u> + 2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, + 1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, + 1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u> + 1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u> + 564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS + 1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, + 564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS + 2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, + 2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, + 1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u> + 1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, + 1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6> + 2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, + 1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u> + 1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, + 1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, + 1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, + 2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, + 2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, + 2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7> + 1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, + 1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7> + 2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, + 363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS + 363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS + 1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, + 1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, + 564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS + 1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, + 1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, + 1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, + 564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS + 363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS + 564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS + 135053414U, // : Cost 1 vdup0 LHS + 1611489290U, // : Cost 2 vext3 LHS, <0,0,1,1> + 1611489300U, // : Cost 2 vext3 LHS, <0,0,2,2> + 2568054923U, // : Cost 3 vext1 <3,0,0,0>, <3,0,0,0> + 1481706806U, // : Cost 2 vext1 <0,u,0,0>, RHS + 2555449040U, // : Cost 3 vext1 <0,u,0,0>, <5,1,7,3> + 2591282078U, // : Cost 3 vext1 <6,u,0,0>, <6,u,0,0> + 2591945711U, // : Cost 3 vext1 <7,0,0,0>, <7,0,0,0> + 135053414U, // : Cost 1 vdup0 LHS + 1493655654U, // : Cost 2 vext1 <2,u,0,1>, LHS + 1860550758U, // : Cost 2 vzipl LHS, LHS + 537747563U, // : Cost 1 vext3 LHS, LHS + 2625135576U, // : Cost 3 vext2 <1,2,u,0>, <1,3,1,3> + 1493658934U, // : Cost 2 vext1 <2,u,0,1>, RHS + 2625135760U, // : Cost 3 vext2 <1,2,u,0>, <1,5,3,7> + 1517548447U, // : Cost 2 vext1 <6,u,0,1>, <6,u,0,1> + 2591290362U, // : Cost 3 vext1 <6,u,0,1>, <7,0,1,2> + 537747612U, // : Cost 1 vext3 LHS, LHS + 1611489444U, // : Cost 2 vext3 LHS, <0,2,0,2> + 2685231276U, // : Cost 3 vext3 LHS, <0,2,1,1> + 1994768486U, // : Cost 2 vtrnl LHS, LHS + 2685231294U, // : Cost 3 vext3 LHS, <0,2,3,1> + 1611489484U, // : Cost 2 vext3 LHS, <0,2,4,6> + 2712068310U, // : Cost 3 vext3 RHS, <0,2,5,7> + 2625136570U, // : Cost 3 vext2 <1,2,u,0>, <2,6,3,7> + 2591962097U, // : Cost 3 vext1 <7,0,0,2>, <7,0,0,2> + 1611489516U, // : Cost 2 vext3 LHS, <0,2,u,2> + 2954067968U, // : Cost 3 vzipr LHS, <0,0,0,0> + 2685231356U, // : Cost 3 vext3 LHS, <0,3,1,0> + 72589981U, // : Cost 1 vrev LHS + 2625137052U, // : Cost 3 vext2 <1,2,u,0>, <3,3,3,3> + 2625137154U, // : Cost 3 vext2 <1,2,u,0>, <3,4,5,6> + 2639071848U, // : Cost 3 vext2 <3,5,u,0>, <3,5,u,0> + 2639735481U, // : Cost 3 vext2 <3,6,u,0>, <3,6,u,0> + 2597279354U, // : Cost 3 vext1 <7,u,0,3>, <7,u,0,3> + 73032403U, // : Cost 1 vrev LHS + 2687074636U, // : Cost 3 vext3 <0,4,0,u>, <0,4,0,u> + 1611489618U, // : Cost 2 vext3 LHS, <0,4,1,5> + 1611489628U, // : Cost 2 vext3 LHS, <0,4,2,6> + 3629222038U, // : Cost 4 vext1 <0,u,0,4>, <3,0,1,2> + 2555481398U, // : Cost 3 vext1 <0,u,0,4>, RHS + 1551396150U, // : Cost 2 vext2 <1,2,u,0>, RHS + 2651680116U, // : Cost 3 vext2 <5,6,u,0>, <4,6,4,6> + 2646150600U, // : Cost 3 vext2 <4,7,5,0>, <4,7,5,0> + 1611932050U, // : Cost 2 vext3 LHS, <0,4,u,6> + 2561458278U, // : Cost 3 vext1 <1,u,0,5>, LHS + 1863532646U, // : Cost 2 vzipl RHS, LHS + 2712068526U, // : Cost 3 vext3 RHS, <0,5,2,7> + 2649689976U, // : Cost 3 vext2 <5,3,u,0>, <5,3,u,0> + 2220237489U, // : Cost 3 vrev <0,u,4,5> + 2651680772U, // : Cost 3 vext2 <5,6,u,0>, <5,5,5,5> + 1577939051U, // : Cost 2 vext2 <5,6,u,0>, <5,6,u,0> + 2830077238U, // : Cost 3 vuzpr <1,u,3,0>, RHS + 1579266317U, // : Cost 2 vext2 <5,u,u,0>, <5,u,u,0> + 2555494502U, // : Cost 3 vext1 <0,u,0,6>, LHS + 2712068598U, // : Cost 3 vext3 RHS, <0,6,1,7> + 1997750374U, // : Cost 2 vtrnl RHS, LHS + 2655662673U, // : Cost 3 vext2 <6,3,u,0>, <6,3,u,0> + 2555497782U, // : Cost 3 vext1 <0,u,0,6>, RHS + 2651681459U, // : Cost 3 vext2 <5,6,u,0>, <6,5,0,u> + 2651681592U, // : Cost 3 vext2 <5,6,u,0>, <6,6,6,6> + 2651681614U, // : Cost 3 vext2 <5,6,u,0>, <6,7,0,1> + 1997750428U, // : Cost 2 vtrnl RHS, LHS + 2567446630U, // : Cost 3 vext1 <2,u,0,7>, LHS + 2567447446U, // : Cost 3 vext1 <2,u,0,7>, <1,2,3,0> + 2567448641U, // : Cost 3 vext1 <2,u,0,7>, <2,u,0,7> + 2573421338U, // : Cost 3 vext1 <3,u,0,7>, <3,u,0,7> + 2567449910U, // : Cost 3 vext1 <2,u,0,7>, RHS + 2651682242U, // : Cost 3 vext2 <5,6,u,0>, <7,5,6,u> + 2591339429U, // : Cost 3 vext1 <6,u,0,7>, <6,u,0,7> + 2651682412U, // : Cost 3 vext2 <5,6,u,0>, <7,7,7,7> + 2567452462U, // : Cost 3 vext1 <2,u,0,7>, LHS + 135053414U, // : Cost 1 vdup0 LHS + 1611489938U, // : Cost 2 vext3 LHS, <0,u,1,1> + 537748125U, // : Cost 1 vext3 LHS, LHS + 2685674148U, // : Cost 3 vext3 LHS, <0,u,3,1> + 1611932338U, // : Cost 2 vext3 LHS, <0,u,4,6> + 1551399066U, // : Cost 2 vext2 <1,2,u,0>, RHS + 1517605798U, // : Cost 2 vext1 <6,u,0,u>, <6,u,0,u> + 2830077481U, // : Cost 3 vuzpr <1,u,3,0>, RHS + 537748179U, // : Cost 1 vext3 LHS, LHS + 1544101961U, // : Cost 2 vext2 <0,0,u,1>, <0,0,u,1> + 1558036582U, // : Cost 2 vext2 <2,3,u,1>, LHS + 2619171051U, // : Cost 3 vext2 <0,2,u,1>, <0,2,u,1> + 1611490038U, // : Cost 2 vext3 LHS, <1,0,3,2> + 2555522358U, // : Cost 3 vext1 <0,u,1,0>, RHS + 2712068871U, // : Cost 3 vext3 RHS, <1,0,5,1> + 2591355815U, // : Cost 3 vext1 <6,u,1,0>, <6,u,1,0> + 2597328512U, // : Cost 3 vext1 <7,u,1,0>, <7,u,1,0> + 1611490083U, // : Cost 2 vext3 LHS, <1,0,u,2> + 1481785446U, // : Cost 2 vext1 <0,u,1,1>, LHS + 202162278U, // : Cost 1 vdup1 LHS + 2555528808U, // : Cost 3 vext1 <0,u,1,1>, <2,2,2,2> + 1611490120U, // : Cost 2 vext3 LHS, <1,1,3,3> + 1481788726U, // : Cost 2 vext1 <0,u,1,1>, RHS + 2689876828U, // : Cost 3 vext3 LHS, <1,1,5,5> + 2591364008U, // : Cost 3 vext1 <6,u,1,1>, <6,u,1,1> + 2592691274U, // : Cost 3 vext1 <7,1,1,1>, <7,1,1,1> + 202162278U, // : Cost 1 vdup1 LHS + 1499709542U, // : Cost 2 vext1 <3,u,1,2>, LHS + 2689876871U, // : Cost 3 vext3 LHS, <1,2,1,3> + 2631116445U, // : Cost 3 vext2 <2,2,u,1>, <2,2,u,1> + 835584U, // : Cost 0 copy LHS + 1499712822U, // : Cost 2 vext1 <3,u,1,2>, RHS + 2689876907U, // : Cost 3 vext3 LHS, <1,2,5,3> + 2631780282U, // : Cost 3 vext2 <2,3,u,1>, <2,6,3,7> + 1523603074U, // : Cost 2 vext1 <7,u,1,2>, <7,u,1,2> + 835584U, // : Cost 0 copy LHS + 1487773798U, // : Cost 2 vext1 <1,u,1,3>, LHS + 1611490264U, // : Cost 2 vext3 LHS, <1,3,1,3> + 2685232094U, // : Cost 3 vext3 LHS, <1,3,2,0> + 2018746470U, // : Cost 2 vtrnr LHS, LHS + 1487777078U, // : Cost 2 vext1 <1,u,1,3>, RHS + 1611490304U, // : Cost 2 vext3 LHS, <1,3,5,7> + 2685674505U, // : Cost 3 vext3 LHS, <1,3,6,7> + 2640407307U, // : Cost 3 vext2 <3,7,u,1>, <3,7,u,1> + 1611490327U, // : Cost 2 vext3 LHS, <1,3,u,3> + 1567992749U, // : Cost 2 vext2 <4,0,u,1>, <4,0,u,1> + 2693121070U, // : Cost 3 vext3 <1,4,1,u>, <1,4,1,u> + 2693194807U, // : Cost 3 vext3 <1,4,2,u>, <1,4,2,u> + 1152386432U, // : Cost 2 vrev <1,u,3,4> + 2555555126U, // : Cost 3 vext1 <0,u,1,4>, RHS + 1558039862U, // : Cost 2 vext2 <2,3,u,1>, RHS + 2645716371U, // : Cost 3 vext2 <4,6,u,1>, <4,6,u,1> + 2597361284U, // : Cost 3 vext1 <7,u,1,4>, <7,u,1,4> + 1152755117U, // : Cost 2 vrev <1,u,u,4> + 1481818214U, // : Cost 2 vext1 <0,u,1,5>, LHS + 2555560694U, // : Cost 3 vext1 <0,u,1,5>, <1,0,3,2> + 2555561576U, // : Cost 3 vext1 <0,u,1,5>, <2,2,2,2> + 1611490448U, // : Cost 2 vext3 LHS, <1,5,3,7> + 1481821494U, // : Cost 2 vext1 <0,u,1,5>, RHS + 2651025435U, // : Cost 3 vext2 <5,5,u,1>, <5,5,u,1> + 2651689068U, // : Cost 3 vext2 <5,6,u,1>, <5,6,u,1> + 2823966006U, // : Cost 3 vuzpr <0,u,1,1>, RHS + 1611932861U, // : Cost 2 vext3 LHS, <1,5,u,7> + 2555568230U, // : Cost 3 vext1 <0,u,1,6>, LHS + 2689877199U, // : Cost 3 vext3 LHS, <1,6,1,7> + 2712069336U, // : Cost 3 vext3 RHS, <1,6,2,7> + 2685232353U, // : Cost 3 vext3 LHS, <1,6,3,7> + 2555571510U, // : Cost 3 vext1 <0,u,1,6>, RHS + 2689877235U, // : Cost 3 vext3 LHS, <1,6,5,7> + 2657661765U, // : Cost 3 vext2 <6,6,u,1>, <6,6,u,1> + 1584583574U, // : Cost 2 vext2 <6,7,u,1>, <6,7,u,1> + 1585247207U, // : Cost 2 vext2 <6,u,u,1>, <6,u,u,1> + 2561548390U, // : Cost 3 vext1 <1,u,1,7>, LHS + 2561549681U, // : Cost 3 vext1 <1,u,1,7>, <1,u,1,7> + 2573493926U, // : Cost 3 vext1 <3,u,1,7>, <2,3,0,1> + 2042962022U, // : Cost 2 vtrnr RHS, LHS + 2561551670U, // : Cost 3 vext1 <1,u,1,7>, RHS + 2226300309U, // : Cost 3 vrev <1,u,5,7> + 2658325990U, // : Cost 3 vext2 <6,7,u,1>, <7,6,1,u> + 2658326124U, // : Cost 3 vext2 <6,7,u,1>, <7,7,7,7> + 2042962027U, // : Cost 2 vtrnr RHS, LHS + 1481842790U, // : Cost 2 vext1 <0,u,1,u>, LHS + 202162278U, // : Cost 1 vdup1 LHS + 2685674867U, // : Cost 3 vext3 LHS, <1,u,2,0> + 835584U, // : Cost 0 copy LHS + 1481846070U, // : Cost 2 vext1 <0,u,1,u>, RHS + 1611933077U, // : Cost 2 vext3 LHS, <1,u,5,7> + 2685674910U, // : Cost 3 vext3 LHS, <1,u,6,7> + 1523652232U, // : Cost 2 vext1 <7,u,1,u>, <7,u,1,u> + 835584U, // : Cost 0 copy LHS + 1544110154U, // : Cost 2 vext2 <0,0,u,2>, <0,0,u,2> + 1545437286U, // : Cost 2 vext2 <0,2,u,2>, LHS + 1545437420U, // : Cost 2 vext2 <0,2,u,2>, <0,2,u,2> + 2685232589U, // : Cost 3 vext3 LHS, <2,0,3,0> + 2619179346U, // : Cost 3 vext2 <0,2,u,2>, <0,4,1,5> + 2712069606U, // : Cost 3 vext3 RHS, <2,0,5,7> + 2689877484U, // : Cost 3 vext3 LHS, <2,0,6,4> + 2659656273U, // : Cost 3 vext2 <7,0,u,2>, <0,7,2,u> + 1545437853U, // : Cost 2 vext2 <0,2,u,2>, LHS + 1550082851U, // : Cost 2 vext2 <1,0,u,2>, <1,0,u,2> + 2619179828U, // : Cost 3 vext2 <0,2,u,2>, <1,1,1,1> + 2619179926U, // : Cost 3 vext2 <0,2,u,2>, <1,2,3,0> + 2685232671U, // : Cost 3 vext3 LHS, <2,1,3,1> + 2555604278U, // : Cost 3 vext1 <0,u,2,1>, RHS + 2619180176U, // : Cost 3 vext2 <0,2,u,2>, <1,5,3,7> + 2689877564U, // : Cost 3 vext3 LHS, <2,1,6,3> + 2602718850U, // : Cost 3 vext1 , <7,u,1,2> + 1158703235U, // : Cost 2 vrev <2,u,u,1> + 1481867366U, // : Cost 2 vext1 <0,u,2,2>, LHS + 2555609846U, // : Cost 3 vext1 <0,u,2,2>, <1,0,3,2> + 269271142U, // : Cost 1 vdup2 LHS + 1611490930U, // : Cost 2 vext3 LHS, <2,2,3,3> + 1481870646U, // : Cost 2 vext1 <0,u,2,2>, RHS + 2689877640U, // : Cost 3 vext3 LHS, <2,2,5,7> + 2619180986U, // : Cost 3 vext2 <0,2,u,2>, <2,6,3,7> + 2593436837U, // : Cost 3 vext1 <7,2,2,2>, <7,2,2,2> + 269271142U, // : Cost 1 vdup2 LHS + 408134301U, // : Cost 1 vext1 LHS, LHS + 1481876214U, // : Cost 2 vext1 LHS, <1,0,3,2> + 1481877096U, // : Cost 2 vext1 LHS, <2,2,2,2> + 1880326246U, // : Cost 2 vzipr LHS, LHS + 408137014U, // : Cost 1 vext1 LHS, RHS + 1529654992U, // : Cost 2 vext1 LHS, <5,1,7,3> + 1529655802U, // : Cost 2 vext1 LHS, <6,2,7,3> + 1529656314U, // : Cost 2 vext1 LHS, <7,0,1,2> + 408139566U, // : Cost 1 vext1 LHS, LHS + 1567853468U, // : Cost 2 vext2 <4,0,6,2>, <4,0,6,2> + 2561598362U, // : Cost 3 vext1 <1,u,2,4>, <1,2,3,4> + 2555627214U, // : Cost 3 vext1 <0,u,2,4>, <2,3,4,5> + 2685232918U, // : Cost 3 vext3 LHS, <2,4,3,5> + 2555628854U, // : Cost 3 vext1 <0,u,2,4>, RHS + 1545440566U, // : Cost 2 vext2 <0,2,u,2>, RHS + 1571982740U, // : Cost 2 vext2 <4,6,u,2>, <4,6,u,2> + 2592125957U, // : Cost 3 vext1 <7,0,2,4>, <7,0,2,4> + 1545440809U, // : Cost 2 vext2 <0,2,u,2>, RHS + 2555633766U, // : Cost 3 vext1 <0,u,2,5>, LHS + 2561606550U, // : Cost 3 vext1 <1,u,2,5>, <1,2,3,0> + 2689877856U, // : Cost 3 vext3 LHS, <2,5,2,7> + 2685233000U, // : Cost 3 vext3 LHS, <2,5,3,6> + 1158441059U, // : Cost 2 vrev <2,u,4,5> + 2645725188U, // : Cost 3 vext2 <4,6,u,2>, <5,5,5,5> + 2689877892U, // : Cost 3 vext3 LHS, <2,5,6,7> + 2823900470U, // : Cost 3 vuzpr <0,u,0,2>, RHS + 1158736007U, // : Cost 2 vrev <2,u,u,5> + 1481900134U, // : Cost 2 vext1 <0,u,2,6>, LHS + 2555642614U, // : Cost 3 vext1 <0,u,2,6>, <1,0,3,2> + 2555643496U, // : Cost 3 vext1 <0,u,2,6>, <2,2,2,2> + 1611491258U, // : Cost 2 vext3 LHS, <2,6,3,7> + 1481903414U, // : Cost 2 vext1 <0,u,2,6>, RHS + 2689877964U, // : Cost 3 vext3 LHS, <2,6,5,7> + 2689877973U, // : Cost 3 vext3 LHS, <2,6,6,7> + 2645726030U, // : Cost 3 vext2 <4,6,u,2>, <6,7,0,1> + 1611933671U, // : Cost 2 vext3 LHS, <2,6,u,7> + 1585919033U, // : Cost 2 vext2 <7,0,u,2>, <7,0,u,2> + 2573566710U, // : Cost 3 vext1 <3,u,2,7>, <1,0,3,2> + 2567596115U, // : Cost 3 vext1 <2,u,2,7>, <2,u,2,7> + 1906901094U, // : Cost 2 vzipr RHS, LHS + 2555653430U, // : Cost 3 vext1 <0,u,2,7>, RHS + 2800080230U, // : Cost 3 vuzpl LHS, <7,4,5,6> + 2980643164U, // : Cost 3 vzipr RHS, <0,4,2,6> + 2645726828U, // : Cost 3 vext2 <4,6,u,2>, <7,7,7,7> + 1906901099U, // : Cost 2 vzipr RHS, LHS + 408175266U, // : Cost 1 vext1 LHS, LHS + 1545443118U, // : Cost 2 vext2 <0,2,u,2>, LHS + 269271142U, // : Cost 1 vdup2 LHS + 1611491416U, // : Cost 2 vext3 LHS, <2,u,3,3> + 408177974U, // : Cost 1 vext1 LHS, RHS + 1545443482U, // : Cost 2 vext2 <0,2,u,2>, RHS + 1726339226U, // : Cost 2 vuzpl LHS, RHS + 1529697274U, // : Cost 2 vext1 LHS, <7,0,1,2> + 408180526U, // : Cost 1 vext1 LHS, LHS + 1544781824U, // : Cost 2 vext2 LHS, <0,0,0,0> + 471040156U, // : Cost 1 vext2 LHS, LHS + 1544781988U, // : Cost 2 vext2 LHS, <0,2,0,2> + 2618523900U, // : Cost 3 vext2 LHS, <0,3,1,0> + 1544782162U, // : Cost 2 vext2 LHS, <0,4,1,5> + 2238188352U, // : Cost 3 vrev <3,u,5,0> + 2623169023U, // : Cost 3 vext2 LHS, <0,6,2,7> + 2238335826U, // : Cost 3 vrev <3,u,7,0> + 471040669U, // : Cost 1 vext2 LHS, LHS + 1544782582U, // : Cost 2 vext2 LHS, <1,0,3,2> + 1544782644U, // : Cost 2 vext2 LHS, <1,1,1,1> + 1544782742U, // : Cost 2 vext2 LHS, <1,2,3,0> + 1544782808U, // : Cost 2 vext2 LHS, <1,3,1,3> + 2618524733U, // : Cost 3 vext2 LHS, <1,4,3,5> + 1544782992U, // : Cost 2 vext2 LHS, <1,5,3,7> + 2618524897U, // : Cost 3 vext2 LHS, <1,6,3,7> + 2703517987U, // : Cost 3 vext3 <3,1,7,u>, <3,1,7,u> + 1544783213U, // : Cost 2 vext2 LHS, <1,u,1,3> + 1529716838U, // : Cost 2 vext1 , LHS + 1164167966U, // : Cost 2 vrev <3,u,1,2> + 1544783464U, // : Cost 2 vext2 LHS, <2,2,2,2> + 1544783526U, // : Cost 2 vext2 LHS, <2,3,0,1> + 1529720118U, // : Cost 2 vext1 , RHS + 2618525544U, // : Cost 3 vext2 LHS, <2,5,3,6> + 1544783802U, // : Cost 2 vext2 LHS, <2,6,3,7> + 2704181620U, // : Cost 3 vext3 <3,2,7,u>, <3,2,7,u> + 1544783931U, // : Cost 2 vext2 LHS, <2,u,0,1> + 1544784022U, // : Cost 2 vext2 LHS, <3,0,1,2> + 1487922559U, // : Cost 2 vext1 <1,u,3,3>, <1,u,3,3> + 1493895256U, // : Cost 2 vext1 <2,u,3,3>, <2,u,3,3> + 336380006U, // : Cost 1 vdup3 LHS + 1544784386U, // : Cost 2 vext2 LHS, <3,4,5,6> + 2824054478U, // : Cost 3 vuzpr LHS, <2,3,4,5> + 2238286668U, // : Cost 3 vrev <3,u,6,3> + 2954069136U, // : Cost 3 vzipr LHS, <1,5,3,7> + 336380006U, // : Cost 1 vdup3 LHS + 1487929446U, // : Cost 2 vext1 <1,u,3,4>, LHS + 1487930752U, // : Cost 2 vext1 <1,u,3,4>, <1,u,3,4> + 2623171644U, // : Cost 3 vext2 LHS, <4,2,6,0> + 2561673366U, // : Cost 3 vext1 <1,u,3,4>, <3,0,1,2> + 1487932726U, // : Cost 2 vext1 <1,u,3,4>, RHS + 471043382U, // : Cost 1 vext2 LHS, RHS + 1592561012U, // : Cost 2 vext2 LHS, <4,6,4,6> + 2238368598U, // : Cost 3 vrev <3,u,7,4> + 471043625U, // : Cost 1 vext2 LHS, RHS + 2555707494U, // : Cost 3 vext1 <0,u,3,5>, LHS + 1574645465U, // : Cost 2 vext2 <5,1,u,3>, <5,1,u,3> + 2567653106U, // : Cost 3 vext1 <2,u,3,5>, <2,3,u,5> + 2555709954U, // : Cost 3 vext1 <0,u,3,5>, <3,4,5,6> + 1592561606U, // : Cost 2 vext2 LHS, <5,4,7,6> + 1592561668U, // : Cost 2 vext2 LHS, <5,5,5,5> + 1592561762U, // : Cost 2 vext2 LHS, <5,6,7,0> + 1750314294U, // : Cost 2 vuzpr LHS, RHS + 1750314295U, // : Cost 2 vuzpr LHS, RHS + 2623172897U, // : Cost 3 vext2 LHS, <6,0,1,2> + 2561688962U, // : Cost 3 vext1 <1,u,3,6>, <1,u,3,6> + 1581281795U, // : Cost 2 vext2 <6,2,u,3>, <6,2,u,3> + 2706541204U, // : Cost 3 vext3 <3,6,3,u>, <3,6,3,u> + 2623173261U, // : Cost 3 vext2 LHS, <6,4,5,6> + 1164495686U, // : Cost 2 vrev <3,u,5,6> + 1592562488U, // : Cost 2 vext2 LHS, <6,6,6,6> + 1592562510U, // : Cost 2 vext2 LHS, <6,7,0,1> + 1164716897U, // : Cost 2 vrev <3,u,u,6> + 1487954022U, // : Cost 2 vext1 <1,u,3,7>, LHS + 1487955331U, // : Cost 2 vext1 <1,u,3,7>, <1,u,3,7> + 1493928028U, // : Cost 2 vext1 <2,u,3,7>, <2,u,3,7> + 2561697942U, // : Cost 3 vext1 <1,u,3,7>, <3,0,1,2> + 1487957302U, // : Cost 2 vext1 <1,u,3,7>, RHS + 2707352311U, // : Cost 3 vext3 <3,7,5,u>, <3,7,5,u> + 2655024623U, // : Cost 3 vext2 <6,2,u,3>, <7,6,2,u> + 1592563308U, // : Cost 2 vext2 LHS, <7,7,7,7> + 1487959854U, // : Cost 2 vext1 <1,u,3,7>, LHS + 1544787667U, // : Cost 2 vext2 LHS, + 471045934U, // : Cost 1 vext2 LHS, LHS + 1549432709U, // : Cost 2 vext2 LHS, + 336380006U, // : Cost 1 vdup3 LHS + 1544788031U, // : Cost 2 vext2 LHS, + 471046298U, // : Cost 1 vext2 LHS, RHS + 1549433040U, // : Cost 2 vext2 LHS, + 1750314537U, // : Cost 2 vuzpr LHS, RHS + 471046501U, // : Cost 1 vext2 LHS, LHS + 2625167360U, // : Cost 3 vext2 <1,2,u,4>, <0,0,0,0> + 1551425638U, // : Cost 2 vext2 <1,2,u,4>, LHS + 2619195630U, // : Cost 3 vext2 <0,2,u,4>, <0,2,u,4> + 2619343104U, // : Cost 3 vext2 <0,3,1,4>, <0,3,1,4> + 2625167698U, // : Cost 3 vext2 <1,2,u,4>, <0,4,1,5> + 1638329234U, // : Cost 2 vext3 RHS, <4,0,5,1> + 1638329244U, // : Cost 2 vext3 RHS, <4,0,6,2> + 3787803556U, // : Cost 4 vext3 RHS, <4,0,7,1> + 1551426205U, // : Cost 2 vext2 <1,2,u,4>, LHS + 2555748454U, // : Cost 3 vext1 <0,u,4,1>, LHS + 2625168180U, // : Cost 3 vext2 <1,2,u,4>, <1,1,1,1> + 1551426503U, // : Cost 2 vext2 <1,2,u,4>, <1,2,u,4> + 2625168344U, // : Cost 3 vext2 <1,2,u,4>, <1,3,1,3> + 2555751734U, // : Cost 3 vext1 <0,u,4,1>, RHS + 1860554038U, // : Cost 2 vzipl LHS, RHS + 2689879022U, // : Cost 3 vext3 LHS, <4,1,6,3> + 2592248852U, // : Cost 3 vext1 <7,0,4,1>, <7,0,4,1> + 1555408301U, // : Cost 2 vext2 <1,u,u,4>, <1,u,u,4> + 2555756646U, // : Cost 3 vext1 <0,u,4,2>, LHS + 2625168943U, // : Cost 3 vext2 <1,2,u,4>, <2,1,4,u> + 2625169000U, // : Cost 3 vext2 <1,2,u,4>, <2,2,2,2> + 2619197134U, // : Cost 3 vext2 <0,2,u,4>, <2,3,4,5> + 2555759926U, // : Cost 3 vext1 <0,u,4,2>, RHS + 2712071222U, // : Cost 3 vext3 RHS, <4,2,5,3> + 1994771766U, // : Cost 2 vtrnl LHS, RHS + 2592257045U, // : Cost 3 vext1 <7,0,4,2>, <7,0,4,2> + 1994771784U, // : Cost 2 vtrnl LHS, RHS + 2625169558U, // : Cost 3 vext2 <1,2,u,4>, <3,0,1,2> + 2567709594U, // : Cost 3 vext1 <2,u,4,3>, <1,2,3,4> + 2567710817U, // : Cost 3 vext1 <2,u,4,3>, <2,u,4,3> + 2625169820U, // : Cost 3 vext2 <1,2,u,4>, <3,3,3,3> + 2625169922U, // : Cost 3 vext2 <1,2,u,4>, <3,4,5,6> + 2954069710U, // : Cost 3 vzipr LHS, <2,3,4,5> + 2954068172U, // : Cost 3 vzipr LHS, <0,2,4,6> + 3903849472U, // : Cost 4 vuzpr <1,u,3,4>, <1,3,5,7> + 2954068174U, // : Cost 3 vzipr LHS, <0,2,4,u> + 1505919078U, // : Cost 2 vext1 <4,u,4,4>, LHS + 2567717831U, // : Cost 3 vext1 <2,u,4,4>, <1,2,u,4> + 2567719010U, // : Cost 3 vext1 <2,u,4,4>, <2,u,4,4> + 2570373542U, // : Cost 3 vext1 <3,3,4,4>, <3,3,4,4> + 161926454U, // : Cost 1 vdup0 RHS + 1551428918U, // : Cost 2 vext2 <1,2,u,4>, RHS + 1638329572U, // : Cost 2 vext3 RHS, <4,4,6,6> + 2594927963U, // : Cost 3 vext1 <7,4,4,4>, <7,4,4,4> + 161926454U, // : Cost 1 vdup0 RHS + 1493983334U, // : Cost 2 vext1 <2,u,4,5>, LHS + 2689879301U, // : Cost 3 vext3 LHS, <4,5,1,3> + 1493985379U, // : Cost 2 vext1 <2,u,4,5>, <2,u,4,5> + 2567727254U, // : Cost 3 vext1 <2,u,4,5>, <3,0,1,2> + 1493986614U, // : Cost 2 vext1 <2,u,4,5>, RHS + 1863535926U, // : Cost 2 vzipl RHS, RHS + 537750838U, // : Cost 1 vext3 LHS, RHS + 2830110006U, // : Cost 3 vuzpr <1,u,3,4>, RHS + 537750856U, // : Cost 1 vext3 LHS, RHS + 1482047590U, // : Cost 2 vext1 <0,u,4,6>, LHS + 2555790070U, // : Cost 3 vext1 <0,u,4,6>, <1,0,3,2> + 2555790952U, // : Cost 3 vext1 <0,u,4,6>, <2,2,2,2> + 2555791510U, // : Cost 3 vext1 <0,u,4,6>, <3,0,1,2> + 1482050870U, // : Cost 2 vext1 <0,u,4,6>, RHS + 2689879422U, // : Cost 3 vext3 LHS, <4,6,5,7> + 1997753654U, // : Cost 2 vtrnl RHS, RHS + 2712071562U, // : Cost 3 vext3 RHS, <4,6,7,1> + 1482053422U, // : Cost 2 vext1 <0,u,4,6>, LHS + 2567741542U, // : Cost 3 vext1 <2,u,4,7>, LHS + 2567742362U, // : Cost 3 vext1 <2,u,4,7>, <1,2,3,4> + 2567743589U, // : Cost 3 vext1 <2,u,4,7>, <2,u,4,7> + 2573716286U, // : Cost 3 vext1 <3,u,4,7>, <3,u,4,7> + 2567744822U, // : Cost 3 vext1 <2,u,4,7>, RHS + 2712071624U, // : Cost 3 vext3 RHS, <4,7,5,0> + 96808489U, // : Cost 1 vrev RHS + 2651715180U, // : Cost 3 vext2 <5,6,u,4>, <7,7,7,7> + 96955963U, // : Cost 1 vrev RHS + 1482063974U, // : Cost 2 vext1 <0,u,4,u>, LHS + 1551431470U, // : Cost 2 vext2 <1,2,u,4>, LHS + 1494009958U, // : Cost 2 vext1 <2,u,4,u>, <2,u,4,u> + 2555807894U, // : Cost 3 vext1 <0,u,4,u>, <3,0,1,2> + 161926454U, // : Cost 1 vdup0 RHS + 1551431834U, // : Cost 2 vext2 <1,2,u,4>, RHS + 537751081U, // : Cost 1 vext3 LHS, RHS + 2830110249U, // : Cost 3 vuzpr <1,u,3,4>, RHS + 537751099U, // : Cost 1 vext3 LHS, RHS + 2631811072U, // : Cost 3 vext2 <2,3,u,5>, <0,0,0,0> + 1558069350U, // : Cost 2 vext2 <2,3,u,5>, LHS + 2619203823U, // : Cost 3 vext2 <0,2,u,5>, <0,2,u,5> + 2619867456U, // : Cost 3 vext2 <0,3,u,5>, <0,3,u,5> + 1546273106U, // : Cost 2 vext2 <0,4,1,5>, <0,4,1,5> + 2733010539U, // : Cost 3 vext3 LHS, <5,0,5,1> + 2597622682U, // : Cost 3 vext1 <7,u,5,0>, <6,7,u,5> + 1176539396U, // : Cost 2 vrev <5,u,7,0> + 1558069917U, // : Cost 2 vext2 <2,3,u,5>, LHS + 1505968230U, // : Cost 2 vext1 <4,u,5,1>, LHS + 2624512887U, // : Cost 3 vext2 <1,1,u,5>, <1,1,u,5> + 2631811990U, // : Cost 3 vext2 <2,3,u,5>, <1,2,3,0> + 2618541056U, // : Cost 3 vext2 <0,1,u,5>, <1,3,5,7> + 1505971510U, // : Cost 2 vext1 <4,u,5,1>, RHS + 2627167419U, // : Cost 3 vext2 <1,5,u,5>, <1,5,u,5> + 2579714554U, // : Cost 3 vext1 <4,u,5,1>, <6,2,7,3> + 1638330064U, // : Cost 2 vext3 RHS, <5,1,7,3> + 1638477529U, // : Cost 2 vext3 RHS, <5,1,u,3> + 2561802342U, // : Cost 3 vext1 <1,u,5,2>, LHS + 2561803264U, // : Cost 3 vext1 <1,u,5,2>, <1,3,5,7> + 2631149217U, // : Cost 3 vext2 <2,2,u,5>, <2,2,u,5> + 1558071026U, // : Cost 2 vext2 <2,3,u,5>, <2,3,u,5> + 2561805622U, // : Cost 3 vext1 <1,u,5,2>, RHS + 2714062607U, // : Cost 3 vext3 RHS, <5,2,5,3> + 2631813050U, // : Cost 3 vext2 <2,3,u,5>, <2,6,3,7> + 3092335926U, // : Cost 3 vtrnr <0,u,0,2>, RHS + 1561389191U, // : Cost 2 vext2 <2,u,u,5>, <2,u,u,5> + 2561810534U, // : Cost 3 vext1 <1,u,5,3>, LHS + 2561811857U, // : Cost 3 vext1 <1,u,5,3>, <1,u,5,3> + 2631813474U, // : Cost 3 vext2 <2,3,u,5>, <3,2,5,u> + 2631813532U, // : Cost 3 vext2 <2,3,u,5>, <3,3,3,3> + 2619869698U, // : Cost 3 vext2 <0,3,u,5>, <3,4,5,6> + 3001847002U, // : Cost 3 vzipr LHS, <4,4,5,5> + 2954070530U, // : Cost 3 vzipr LHS, <3,4,5,6> + 2018749750U, // : Cost 2 vtrnr LHS, RHS + 2018749751U, // : Cost 2 vtrnr LHS, RHS + 2573762662U, // : Cost 3 vext1 <3,u,5,4>, LHS + 2620017634U, // : Cost 3 vext2 <0,4,1,5>, <4,1,5,0> + 2573764338U, // : Cost 3 vext1 <3,u,5,4>, <2,3,u,5> + 2573765444U, // : Cost 3 vext1 <3,u,5,4>, <3,u,5,4> + 1570680053U, // : Cost 2 vext2 <4,4,u,5>, <4,4,u,5> + 1558072630U, // : Cost 2 vext2 <2,3,u,5>, RHS + 2645749143U, // : Cost 3 vext2 <4,6,u,5>, <4,6,u,5> + 1638330310U, // : Cost 2 vext3 RHS, <5,4,7,6> + 1558072873U, // : Cost 2 vext2 <2,3,u,5>, RHS + 1506000998U, // : Cost 2 vext1 <4,u,5,5>, LHS + 2561827984U, // : Cost 3 vext1 <1,u,5,5>, <1,5,3,7> + 2579744360U, // : Cost 3 vext1 <4,u,5,5>, <2,2,2,2> + 2579744918U, // : Cost 3 vext1 <4,u,5,5>, <3,0,1,2> + 1506004278U, // : Cost 2 vext1 <4,u,5,5>, RHS + 229035318U, // : Cost 1 vdup1 RHS + 2712072206U, // : Cost 3 vext3 RHS, <5,5,6,6> + 1638330392U, // : Cost 2 vext3 RHS, <5,5,7,7> + 229035318U, // : Cost 1 vdup1 RHS + 1500037222U, // : Cost 2 vext1 <3,u,5,6>, LHS + 2561836436U, // : Cost 3 vext1 <1,u,5,6>, <1,u,5,6> + 2567809133U, // : Cost 3 vext1 <2,u,5,6>, <2,u,5,6> + 1500040006U, // : Cost 2 vext1 <3,u,5,6>, <3,u,5,6> + 1500040502U, // : Cost 2 vext1 <3,u,5,6>, RHS + 2714062935U, // : Cost 3 vext3 RHS, <5,6,5,7> + 2712072288U, // : Cost 3 vext3 RHS, <5,6,6,7> + 27705344U, // : Cost 0 copy RHS + 27705344U, // : Cost 0 copy RHS + 1488101478U, // : Cost 2 vext1 <1,u,5,7>, LHS + 1488102805U, // : Cost 2 vext1 <1,u,5,7>, <1,u,5,7> + 2561844840U, // : Cost 3 vext1 <1,u,5,7>, <2,2,2,2> + 2561845398U, // : Cost 3 vext1 <1,u,5,7>, <3,0,1,2> + 1488104758U, // : Cost 2 vext1 <1,u,5,7>, RHS + 1638330536U, // : Cost 2 vext3 RHS, <5,7,5,7> + 2712072362U, // : Cost 3 vext3 RHS, <5,7,6,0> + 2042965302U, // : Cost 2 vtrnr RHS, RHS + 1488107310U, // : Cost 2 vext1 <1,u,5,7>, LHS + 1488109670U, // : Cost 2 vext1 <1,u,5,u>, LHS + 1488110998U, // : Cost 2 vext1 <1,u,5,u>, <1,u,5,u> + 2561853032U, // : Cost 3 vext1 <1,u,5,u>, <2,2,2,2> + 1500056392U, // : Cost 2 vext1 <3,u,5,u>, <3,u,5,u> + 1488112950U, // : Cost 2 vext1 <1,u,5,u>, RHS + 229035318U, // : Cost 1 vdup1 RHS + 2954111490U, // : Cost 3 vzipr LHS, <3,4,5,6> + 27705344U, // : Cost 0 copy RHS + 27705344U, // : Cost 0 copy RHS + 2619211776U, // : Cost 3 vext2 <0,2,u,6>, <0,0,0,0> + 1545470054U, // : Cost 2 vext2 <0,2,u,6>, LHS + 1545470192U, // : Cost 2 vext2 <0,2,u,6>, <0,2,u,6> + 2255958969U, // : Cost 3 vrev <6,u,3,0> + 1546797458U, // : Cost 2 vext2 <0,4,u,6>, <0,4,u,6> + 2720624971U, // : Cost 3 vext3 <6,0,5,u>, <6,0,5,u> + 2256180180U, // : Cost 3 vrev <6,u,6,0> + 2960682294U, // : Cost 3 vzipr <1,2,u,0>, RHS + 1545470621U, // : Cost 2 vext2 <0,2,u,6>, LHS + 1182004127U, // : Cost 2 vrev <6,u,0,1> + 2619212596U, // : Cost 3 vext2 <0,2,u,6>, <1,1,1,1> + 2619212694U, // : Cost 3 vext2 <0,2,u,6>, <1,2,3,0> + 2619212760U, // : Cost 3 vext2 <0,2,u,6>, <1,3,1,3> + 2626511979U, // : Cost 3 vext2 <1,4,u,6>, <1,4,u,6> + 2619212944U, // : Cost 3 vext2 <0,2,u,6>, <1,5,3,7> + 2714063264U, // : Cost 3 vext3 RHS, <6,1,6,3> + 2967326006U, // : Cost 3 vzipr <2,3,u,1>, RHS + 1182594023U, // : Cost 2 vrev <6,u,u,1> + 1506050150U, // : Cost 2 vext1 <4,u,6,2>, LHS + 2579792630U, // : Cost 3 vext1 <4,u,6,2>, <1,0,3,2> + 2619213416U, // : Cost 3 vext2 <0,2,u,6>, <2,2,2,2> + 2619213478U, // : Cost 3 vext2 <0,2,u,6>, <2,3,0,1> + 1506053430U, // : Cost 2 vext1 <4,u,6,2>, RHS + 2633148309U, // : Cost 3 vext2 <2,5,u,6>, <2,5,u,6> + 2619213754U, // : Cost 3 vext2 <0,2,u,6>, <2,6,3,7> + 1638330874U, // : Cost 2 vext3 RHS, <6,2,7,3> + 1638478339U, // : Cost 2 vext3 RHS, <6,2,u,3> + 2619213974U, // : Cost 3 vext2 <0,2,u,6>, <3,0,1,2> + 2255836074U, // : Cost 3 vrev <6,u,1,3> + 2255909811U, // : Cost 3 vrev <6,u,2,3> + 2619214236U, // : Cost 3 vext2 <0,2,u,6>, <3,3,3,3> + 1564715549U, // : Cost 2 vext2 <3,4,u,6>, <3,4,u,6> + 2639121006U, // : Cost 3 vext2 <3,5,u,6>, <3,5,u,6> + 3001847012U, // : Cost 3 vzipr LHS, <4,4,6,6> + 1880329526U, // : Cost 2 vzipr LHS, RHS + 1880329527U, // : Cost 2 vzipr LHS, RHS + 2567864422U, // : Cost 3 vext1 <2,u,6,4>, LHS + 2733011558U, // : Cost 3 vext3 LHS, <6,4,1,3> + 2567866484U, // : Cost 3 vext1 <2,u,6,4>, <2,u,6,4> + 2638458005U, // : Cost 3 vext2 <3,4,u,6>, <4,3,6,u> + 1570540772U, // : Cost 2 vext2 <4,4,6,6>, <4,4,6,6> + 1545473334U, // : Cost 2 vext2 <0,2,u,6>, RHS + 1572015512U, // : Cost 2 vext2 <4,6,u,6>, <4,6,u,6> + 2960715062U, // : Cost 3 vzipr <1,2,u,4>, RHS + 1545473577U, // : Cost 2 vext2 <0,2,u,6>, RHS + 2567872614U, // : Cost 3 vext1 <2,u,6,5>, LHS + 2645757648U, // : Cost 3 vext2 <4,6,u,6>, <5,1,7,3> + 2567874490U, // : Cost 3 vext1 <2,u,6,5>, <2,6,3,7> + 2576501250U, // : Cost 3 vext1 <4,3,6,5>, <3,4,5,6> + 1576660943U, // : Cost 2 vext2 <5,4,u,6>, <5,4,u,6> + 2645757956U, // : Cost 3 vext2 <4,6,u,6>, <5,5,5,5> + 2645758050U, // : Cost 3 vext2 <4,6,u,6>, <5,6,7,0> + 2824080694U, // : Cost 3 vuzpr <0,u,2,6>, RHS + 1182626795U, // : Cost 2 vrev <6,u,u,5> + 1506082918U, // : Cost 2 vext1 <4,u,6,6>, LHS + 2579825398U, // : Cost 3 vext1 <4,u,6,6>, <1,0,3,2> + 2645758458U, // : Cost 3 vext2 <4,6,u,6>, <6,2,7,3> + 2579826838U, // : Cost 3 vext1 <4,u,6,6>, <3,0,1,2> + 1506086198U, // : Cost 2 vext1 <4,u,6,6>, RHS + 2579828432U, // : Cost 3 vext1 <4,u,6,6>, <5,1,7,3> + 296144182U, // : Cost 1 vdup2 RHS + 1638331202U, // : Cost 2 vext3 RHS, <6,6,7,7> + 296144182U, // : Cost 1 vdup2 RHS + 432349286U, // : Cost 1 vext1 RHS, LHS + 1506091766U, // : Cost 2 vext1 RHS, <1,0,3,2> + 1506092648U, // : Cost 2 vext1 RHS, <2,2,2,2> + 1506093206U, // : Cost 2 vext1 RHS, <3,0,1,2> + 432352809U, // : Cost 1 vext1 RHS, RHS + 1506094800U, // : Cost 2 vext1 RHS, <5,1,7,3> + 1506095610U, // : Cost 2 vext1 RHS, <6,2,7,3> + 1906904374U, // : Cost 2 vzipr RHS, RHS + 432355118U, // : Cost 1 vext1 RHS, LHS + 432357478U, // : Cost 1 vext1 RHS, LHS + 1545475886U, // : Cost 2 vext2 <0,2,u,6>, LHS + 1506100840U, // : Cost 2 vext1 RHS, <2,2,2,2> + 1506101398U, // : Cost 2 vext1 RHS, <3,0,1,2> + 432361002U, // : Cost 1 vext1 RHS, RHS + 1545476250U, // : Cost 2 vext2 <0,2,u,6>, RHS + 296144182U, // : Cost 1 vdup2 RHS + 1880370486U, // : Cost 2 vzipr LHS, RHS + 432363310U, // : Cost 1 vext1 RHS, LHS + 1571356672U, // : Cost 2 vext2 RHS, <0,0,0,0> + 497614950U, // : Cost 1 vext2 RHS, LHS + 1571356836U, // : Cost 2 vext2 RHS, <0,2,0,2> + 2573880146U, // : Cost 3 vext1 <3,u,7,0>, <3,u,7,0> + 1571357010U, // : Cost 2 vext2 RHS, <0,4,1,5> + 1512083716U, // : Cost 2 vext1 <5,u,7,0>, <5,u,7,0> + 2621874741U, // : Cost 3 vext2 <0,6,u,7>, <0,6,u,7> + 2585826298U, // : Cost 3 vext1 <5,u,7,0>, <7,0,1,2> + 497615517U, // : Cost 1 vext2 RHS, LHS + 1571357430U, // : Cost 2 vext2 RHS, <1,0,3,2> + 1571357492U, // : Cost 2 vext2 RHS, <1,1,1,1> + 1571357590U, // : Cost 2 vext2 RHS, <1,2,3,0> + 1552114715U, // : Cost 2 vext2 <1,3,u,7>, <1,3,u,7> + 2573888822U, // : Cost 3 vext1 <3,u,7,1>, RHS + 1553441981U, // : Cost 2 vext2 <1,5,u,7>, <1,5,u,7> + 2627847438U, // : Cost 3 vext2 <1,6,u,7>, <1,6,u,7> + 2727408775U, // : Cost 3 vext3 <7,1,7,u>, <7,1,7,u> + 1555432880U, // : Cost 2 vext2 <1,u,u,7>, <1,u,u,7> + 2629838337U, // : Cost 3 vext2 <2,0,u,7>, <2,0,u,7> + 1188058754U, // : Cost 2 vrev <7,u,1,2> + 1571358312U, // : Cost 2 vext2 RHS, <2,2,2,2> + 1571358374U, // : Cost 2 vext2 RHS, <2,3,0,1> + 2632492869U, // : Cost 3 vext2 <2,4,u,7>, <2,4,u,7> + 2633156502U, // : Cost 3 vext2 <2,5,u,7>, <2,5,u,7> + 1560078311U, // : Cost 2 vext2 <2,6,u,7>, <2,6,u,7> + 2728072408U, // : Cost 3 vext3 <7,2,7,u>, <7,2,7,u> + 1561405577U, // : Cost 2 vext2 <2,u,u,7>, <2,u,u,7> + 1571358870U, // : Cost 2 vext2 RHS, <3,0,1,2> + 2627184913U, // : Cost 3 vext2 <1,5,u,7>, <3,1,5,u> + 2633820523U, // : Cost 3 vext2 <2,6,u,7>, <3,2,6,u> + 1571359132U, // : Cost 2 vext2 RHS, <3,3,3,3> + 1571359234U, // : Cost 2 vext2 RHS, <3,4,5,6> + 1512108295U, // : Cost 2 vext1 <5,u,7,3>, <5,u,7,3> + 1518080992U, // : Cost 2 vext1 <6,u,7,3>, <6,u,7,3> + 2640456465U, // : Cost 3 vext2 <3,7,u,7>, <3,7,u,7> + 1571359518U, // : Cost 2 vext2 RHS, <3,u,1,2> + 1571359634U, // : Cost 2 vext2 RHS, <4,0,5,1> + 2573911067U, // : Cost 3 vext1 <3,u,7,4>, <1,3,u,7> + 2645101622U, // : Cost 3 vext2 RHS, <4,2,5,3> + 2573912918U, // : Cost 3 vext1 <3,u,7,4>, <3,u,7,4> + 1571359952U, // : Cost 2 vext2 RHS, <4,4,4,4> + 497618248U, // : Cost 1 vext2 RHS, RHS + 1571360116U, // : Cost 2 vext2 RHS, <4,6,4,6> + 2645102024U, // : Cost 3 vext2 RHS, <4,7,5,0> + 497618473U, // : Cost 1 vext2 RHS, RHS + 2645102152U, // : Cost 3 vext2 RHS, <5,0,1,2> + 1571360464U, // : Cost 2 vext2 RHS, <5,1,7,3> + 2645102334U, // : Cost 3 vext2 RHS, <5,2,3,4> + 2645102447U, // : Cost 3 vext2 RHS, <5,3,7,0> + 1571360710U, // : Cost 2 vext2 RHS, <5,4,7,6> + 1571360772U, // : Cost 2 vext2 RHS, <5,5,5,5> + 1571360866U, // : Cost 2 vext2 RHS, <5,6,7,0> + 1571360936U, // : Cost 2 vext2 RHS, <5,7,5,7> + 1571361017U, // : Cost 2 vext2 RHS, <5,u,5,7> + 1530044518U, // : Cost 2 vext1 , LHS + 2645103016U, // : Cost 3 vext2 RHS, <6,1,7,2> + 1571361274U, // : Cost 2 vext2 RHS, <6,2,7,3> + 2645103154U, // : Cost 3 vext2 RHS, <6,3,4,5> + 1530047798U, // : Cost 2 vext1 , RHS + 1188386474U, // : Cost 2 vrev <7,u,5,6> + 1571361592U, // : Cost 2 vext2 RHS, <6,6,6,6> + 1571361614U, // : Cost 2 vext2 RHS, <6,7,0,1> + 1571361695U, // : Cost 2 vext2 RHS, <6,u,0,1> + 1571361786U, // : Cost 2 vext2 RHS, <7,0,1,2> + 2573935616U, // : Cost 3 vext1 <3,u,7,7>, <1,3,5,7> + 2645103781U, // : Cost 3 vext2 RHS, <7,2,2,2> + 2573937497U, // : Cost 3 vext1 <3,u,7,7>, <3,u,7,7> + 1571362150U, // : Cost 2 vext2 RHS, <7,4,5,6> + 1512141067U, // : Cost 2 vext1 <5,u,7,7>, <5,u,7,7> + 1518113764U, // : Cost 2 vext1 <6,u,7,7>, <6,u,7,7> + 363253046U, // : Cost 1 vdup3 RHS + 363253046U, // : Cost 1 vdup3 RHS + 1571362515U, // : Cost 2 vext2 RHS, + 497620782U, // : Cost 1 vext2 RHS, LHS + 1571362693U, // : Cost 2 vext2 RHS, + 1571362748U, // : Cost 2 vext2 RHS, + 1571362879U, // : Cost 2 vext2 RHS, + 497621146U, // : Cost 1 vext2 RHS, RHS + 1571363024U, // : Cost 2 vext2 RHS, + 363253046U, // : Cost 1 vdup3 RHS + 497621349U, // : Cost 1 vext2 RHS, LHS + 135053414U, // : Cost 1 vdup0 LHS + 471081121U, // : Cost 1 vext2 LHS, LHS + 1544822948U, // : Cost 2 vext2 LHS, <0,2,0,2> + 1616140005U, // : Cost 2 vext3 LHS, + 1544823122U, // : Cost 2 vext2 LHS, <0,4,1,5> + 1512157453U, // : Cost 2 vext1 <5,u,u,0>, <5,u,u,0> + 1662220032U, // : Cost 2 vext3 RHS, + 1194457487U, // : Cost 2 vrev + 471081629U, // : Cost 1 vext2 LHS, LHS + 1544823542U, // : Cost 2 vext2 LHS, <1,0,3,2> + 202162278U, // : Cost 1 vdup1 LHS + 537753390U, // : Cost 1 vext3 LHS, LHS + 1544823768U, // : Cost 2 vext2 LHS, <1,3,1,3> + 1494248758U, // : Cost 2 vext1 <2,u,u,1>, RHS + 1544823952U, // : Cost 2 vext2 LHS, <1,5,3,7> + 1518138343U, // : Cost 2 vext1 <6,u,u,1>, <6,u,u,1> + 1640322907U, // : Cost 2 vext3 RHS, + 537753444U, // : Cost 1 vext3 LHS, LHS + 1482309734U, // : Cost 2 vext1 <0,u,u,2>, LHS + 1194031451U, // : Cost 2 vrev + 269271142U, // : Cost 1 vdup2 LHS + 835584U, // : Cost 0 copy LHS + 1482313014U, // : Cost 2 vext1 <0,u,u,2>, RHS + 2618566504U, // : Cost 3 vext2 LHS, <2,5,3,6> + 1544824762U, // : Cost 2 vext2 LHS, <2,6,3,7> + 1638479788U, // : Cost 2 vext3 RHS, + 835584U, // : Cost 0 copy LHS + 408576723U, // : Cost 1 vext1 LHS, LHS + 1482318582U, // : Cost 2 vext1 LHS, <1,0,3,2> + 120371557U, // : Cost 1 vrev LHS + 336380006U, // : Cost 1 vdup3 LHS + 408579382U, // : Cost 1 vext1 LHS, RHS + 1616140271U, // : Cost 2 vext3 LHS, + 1530098170U, // : Cost 2 vext1 LHS, <6,2,7,3> + 1880329544U, // : Cost 2 vzipr LHS, RHS + 408581934U, // : Cost 1 vext1 LHS, LHS + 1488298086U, // : Cost 2 vext1 <1,u,u,4>, LHS + 1488299437U, // : Cost 2 vext1 <1,u,u,4>, <1,u,u,4> + 1659271204U, // : Cost 2 vext3 LHS, + 1194195311U, // : Cost 2 vrev + 161926454U, // : Cost 1 vdup0 RHS + 471084342U, // : Cost 1 vext2 LHS, RHS + 1571368308U, // : Cost 2 vext2 RHS, <4,6,4,6> + 1640323153U, // : Cost 2 vext3 RHS, + 471084585U, // : Cost 1 vext2 LHS, RHS + 1494278246U, // : Cost 2 vext1 <2,u,u,5>, LHS + 1571368656U, // : Cost 2 vext2 RHS, <5,1,7,3> + 1494280327U, // : Cost 2 vext1 <2,u,u,5>, <2,u,u,5> + 1616140415U, // : Cost 2 vext3 LHS, + 1494281526U, // : Cost 2 vext1 <2,u,u,5>, RHS + 229035318U, // : Cost 1 vdup1 RHS + 537753754U, // : Cost 1 vext3 LHS, RHS + 1750355254U, // : Cost 2 vuzpr LHS, RHS + 537753772U, // : Cost 1 vext3 LHS, RHS + 1482342502U, // : Cost 2 vext1 <0,u,u,6>, LHS + 2556084982U, // : Cost 3 vext1 <0,u,u,6>, <1,0,3,2> + 1571369466U, // : Cost 2 vext2 RHS, <6,2,7,3> + 1611938000U, // : Cost 2 vext3 LHS, + 1482345782U, // : Cost 2 vext1 <0,u,u,6>, RHS + 1194359171U, // : Cost 2 vrev + 296144182U, // : Cost 1 vdup2 RHS + 27705344U, // : Cost 0 copy RHS + 27705344U, // : Cost 0 copy RHS + 432496742U, // : Cost 1 vext1 RHS, LHS + 1488324016U, // : Cost 2 vext1 <1,u,u,7>, <1,u,u,7> + 1494296713U, // : Cost 2 vext1 <2,u,u,7>, <2,u,u,7> + 1906901148U, // : Cost 2 vzipr RHS, LHS + 432500283U, // : Cost 1 vext1 RHS, RHS + 1506242256U, // : Cost 2 vext1 RHS, <5,1,7,3> + 120699277U, // : Cost 1 vrev RHS + 363253046U, // : Cost 1 vdup3 RHS + 432502574U, // : Cost 1 vext1 RHS, LHS + 408617688U, // : Cost 1 vext1 LHS, LHS + 471086894U, // : Cost 1 vext2 LHS, LHS + 537753957U, // : Cost 1 vext3 LHS, LHS + 835584U, // : Cost 0 copy LHS + 408620342U, // : Cost 1 vext1 LHS, RHS + 471087258U, // : Cost 1 vext2 LHS, RHS + 537753997U, // : Cost 1 vext3 LHS, RHS + 27705344U, // : Cost 0 copy RHS + 835584U, // : Cost 0 copy LHS + 0 +}; diff --git a/lib/Target/AArch64/AArch64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp new file mode 100644 index 0000000..4723cc4 --- /dev/null +++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp @@ -0,0 +1,578 @@ +//=- AArch64PromoteConstant.cpp --- Promote constant to global for AArch64 -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AArch64PromoteConstant pass which promotes constants +// to global variables when this is likely to be more efficient. Currently only +// types related to constant vector (i.e., constant vector, array of constant +// vectors, constant structure with a constant vector field, etc.) are promoted +// to global variables. Constant vectors are likely to be lowered in target +// constant pool during instruction selection already; therefore, the access +// will remain the same (memory load), but the structure types are not split +// into different constant pool accesses for each field. A bonus side effect is +// that created globals may be merged by the global merge pass. +// +// FIXME: This pass may be useful for other targets too. +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-promote-const" + +// Stress testing mode - disable heuristics. +static cl::opt Stress("aarch64-stress-promote-const", cl::Hidden, + cl::desc("Promote all vector constants")); + +STATISTIC(NumPromoted, "Number of promoted constants"); +STATISTIC(NumPromotedUses, "Number of promoted constants uses"); + +//===----------------------------------------------------------------------===// +// AArch64PromoteConstant +//===----------------------------------------------------------------------===// + +namespace { +/// Promotes interesting constant into global variables. +/// The motivating example is: +/// static const uint16_t TableA[32] = { +/// 41944, 40330, 38837, 37450, 36158, 34953, 33826, 32768, +/// 31776, 30841, 29960, 29128, 28340, 27595, 26887, 26215, +/// 25576, 24967, 24386, 23832, 23302, 22796, 22311, 21846, +/// 21400, 20972, 20561, 20165, 19785, 19419, 19066, 18725, +/// }; +/// +/// uint8x16x4_t LoadStatic(void) { +/// uint8x16x4_t ret; +/// ret.val[0] = vld1q_u16(TableA + 0); +/// ret.val[1] = vld1q_u16(TableA + 8); +/// ret.val[2] = vld1q_u16(TableA + 16); +/// ret.val[3] = vld1q_u16(TableA + 24); +/// return ret; +/// } +/// +/// The constants in this example are folded into the uses. Thus, 4 different +/// constants are created. +/// +/// As their type is vector the cheapest way to create them is to load them +/// for the memory. +/// +/// Therefore the final assembly final has 4 different loads. With this pass +/// enabled, only one load is issued for the constants. +class AArch64PromoteConstant : public ModulePass { + +public: + static char ID; + AArch64PromoteConstant() : ModulePass(ID) {} + + const char *getPassName() const override { return "AArch64 Promote Constant"; } + + /// Iterate over the functions and promote the interesting constants into + /// global variables with module scope. + bool runOnModule(Module &M) override { + DEBUG(dbgs() << getPassName() << '\n'); + bool Changed = false; + for (auto &MF : M) { + Changed |= runOnFunction(MF); + } + return Changed; + } + +private: + /// Look for interesting constants used within the given function. + /// Promote them into global variables, load these global variables within + /// the related function, so that the number of inserted load is minimal. + bool runOnFunction(Function &F); + + // This transformation requires dominator info + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + } + + /// Type to store a list of User. + typedef SmallVector Users; + /// Map an insertion point to all the uses it dominates. + typedef DenseMap InsertionPoints; + /// Map a function to the required insertion point of load for a + /// global variable. + typedef DenseMap InsertionPointsPerFunc; + + /// Find the closest point that dominates the given Use. + Instruction *findInsertionPoint(Value::user_iterator &Use); + + /// Check if the given insertion point is dominated by an existing + /// insertion point. + /// If true, the given use is added to the list of dominated uses for + /// the related existing point. + /// \param NewPt the insertion point to be checked + /// \param UseIt the use to be added into the list of dominated uses + /// \param InsertPts existing insertion points + /// \pre NewPt and all instruction in InsertPts belong to the same function + /// \return true if one of the insertion point in InsertPts dominates NewPt, + /// false otherwise + bool isDominated(Instruction *NewPt, Value::user_iterator &UseIt, + InsertionPoints &InsertPts); + + /// Check if the given insertion point can be merged with an existing + /// insertion point in a common dominator. + /// If true, the given use is added to the list of the created insertion + /// point. + /// \param NewPt the insertion point to be checked + /// \param UseIt the use to be added into the list of dominated uses + /// \param InsertPts existing insertion points + /// \pre NewPt and all instruction in InsertPts belong to the same function + /// \pre isDominated returns false for the exact same parameters. + /// \return true if it exists an insertion point in InsertPts that could + /// have been merged with NewPt in a common dominator, + /// false otherwise + bool tryAndMerge(Instruction *NewPt, Value::user_iterator &UseIt, + InsertionPoints &InsertPts); + + /// Compute the minimal insertion points to dominates all the interesting + /// uses of value. + /// Insertion points are group per function and each insertion point + /// contains a list of all the uses it dominates within the related function + /// \param Val constant to be examined + /// \param[out] InsPtsPerFunc output storage of the analysis + void computeInsertionPoints(Constant *Val, + InsertionPointsPerFunc &InsPtsPerFunc); + + /// Insert a definition of a new global variable at each point contained in + /// InsPtsPerFunc and update the related uses (also contained in + /// InsPtsPerFunc). + bool insertDefinitions(Constant *Cst, InsertionPointsPerFunc &InsPtsPerFunc); + + /// Compute the minimal insertion points to dominate all the interesting + /// uses of Val and insert a definition of a new global variable + /// at these points. + /// Also update the uses of Val accordingly. + /// Currently a use of Val is considered interesting if: + /// - Val is not UndefValue + /// - Val is not zeroinitialized + /// - Replacing Val per a load of a global variable is valid. + /// \see shouldConvert for more details + bool computeAndInsertDefinitions(Constant *Val); + + /// Promote the given constant into a global variable if it is expected to + /// be profitable. + /// \return true if Cst has been promoted + bool promoteConstant(Constant *Cst); + + /// Transfer the list of dominated uses of IPI to NewPt in InsertPts. + /// Append UseIt to this list and delete the entry of IPI in InsertPts. + static void appendAndTransferDominatedUses(Instruction *NewPt, + Value::user_iterator &UseIt, + InsertionPoints::iterator &IPI, + InsertionPoints &InsertPts) { + // Record the dominated use. + IPI->second.push_back(UseIt); + // Transfer the dominated uses of IPI to NewPt + // Inserting into the DenseMap may invalidate existing iterator. + // Keep a copy of the key to find the iterator to erase. + Instruction *OldInstr = IPI->first; + InsertPts.insert(InsertionPoints::value_type(NewPt, IPI->second)); + // Erase IPI. + IPI = InsertPts.find(OldInstr); + InsertPts.erase(IPI); + } +}; +} // end anonymous namespace + +char AArch64PromoteConstant::ID = 0; + +namespace llvm { +void initializeAArch64PromoteConstantPass(PassRegistry &); +} + +INITIALIZE_PASS_BEGIN(AArch64PromoteConstant, "aarch64-promote-const", + "AArch64 Promote Constant Pass", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(AArch64PromoteConstant, "aarch64-promote-const", + "AArch64 Promote Constant Pass", false, false) + +ModulePass *llvm::createAArch64PromoteConstantPass() { + return new AArch64PromoteConstant(); +} + +/// Check if the given type uses a vector type. +static bool isConstantUsingVectorTy(const Type *CstTy) { + if (CstTy->isVectorTy()) + return true; + if (CstTy->isStructTy()) { + for (unsigned EltIdx = 0, EndEltIdx = CstTy->getStructNumElements(); + EltIdx < EndEltIdx; ++EltIdx) + if (isConstantUsingVectorTy(CstTy->getStructElementType(EltIdx))) + return true; + } else if (CstTy->isArrayTy()) + return isConstantUsingVectorTy(CstTy->getArrayElementType()); + return false; +} + +/// Check if the given use (Instruction + OpIdx) of Cst should be converted into +/// a load of a global variable initialized with Cst. +/// A use should be converted if it is legal to do so. +/// For instance, it is not legal to turn the mask operand of a shuffle vector +/// into a load of a global variable. +static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr, + unsigned OpIdx) { + // shufflevector instruction expects a const for the mask argument, i.e., the + // third argument. Do not promote this use in that case. + if (isa(Instr) && OpIdx == 2) + return false; + + // extractvalue instruction expects a const idx. + if (isa(Instr) && OpIdx > 0) + return false; + + // extractvalue instruction expects a const idx. + if (isa(Instr) && OpIdx > 1) + return false; + + if (isa(Instr) && OpIdx > 0) + return false; + + // Alignment argument must be constant. + if (isa(Instr) && OpIdx > 0) + return false; + + // Alignment argument must be constant. + if (isa(Instr) && OpIdx > 1) + return false; + + // Index must be constant. + if (isa(Instr) && OpIdx > 0) + return false; + + // Personality function and filters must be constant. + // Give up on that instruction. + if (isa(Instr)) + return false; + + // Switch instruction expects constants to compare to. + if (isa(Instr)) + return false; + + // Expected address must be a constant. + if (isa(Instr)) + return false; + + // Do not mess with intrinsics. + if (isa(Instr)) + return false; + + // Do not mess with inline asm. + const CallInst *CI = dyn_cast(Instr); + if (CI && isa(CI->getCalledValue())) + return false; + + return true; +} + +/// Check if the given Cst should be converted into +/// a load of a global variable initialized with Cst. +/// A constant should be converted if it is likely that the materialization of +/// the constant will be tricky. Thus, we give up on zero or undef values. +/// +/// \todo Currently, accept only vector related types. +/// Also we give up on all simple vector type to keep the existing +/// behavior. Otherwise, we should push here all the check of the lowering of +/// BUILD_VECTOR. By giving up, we lose the potential benefit of merging +/// constant via global merge and the fact that the same constant is stored +/// only once with this method (versus, as many function that uses the constant +/// for the regular approach, even for float). +/// Again, the simplest solution would be to promote every +/// constant and rematerialize them when they are actually cheap to create. +static bool shouldConvert(const Constant *Cst) { + if (isa(Cst)) + return false; + + // FIXME: In some cases, it may be interesting to promote in memory + // a zero initialized constant. + // E.g., when the type of Cst require more instructions than the + // adrp/add/load sequence or when this sequence can be shared by several + // instances of Cst. + // Ideally, we could promote this into a global and rematerialize the constant + // when it was a bad idea. + if (Cst->isZeroValue()) + return false; + + if (Stress) + return true; + + // FIXME: see function \todo + if (Cst->getType()->isVectorTy()) + return false; + return isConstantUsingVectorTy(Cst->getType()); +} + +Instruction * +AArch64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) { + // If this user is a phi, the insertion point is in the related + // incoming basic block. + PHINode *PhiInst = dyn_cast(*Use); + Instruction *InsertionPoint; + if (PhiInst) + InsertionPoint = + PhiInst->getIncomingBlock(Use.getOperandNo())->getTerminator(); + else + InsertionPoint = dyn_cast(*Use); + assert(InsertionPoint && "User is not an instruction!"); + return InsertionPoint; +} + +bool AArch64PromoteConstant::isDominated(Instruction *NewPt, + Value::user_iterator &UseIt, + InsertionPoints &InsertPts) { + + DominatorTree &DT = getAnalysis( + *NewPt->getParent()->getParent()).getDomTree(); + + // Traverse all the existing insertion points and check if one is dominating + // NewPt. If it is, remember that. + for (auto &IPI : InsertPts) { + if (NewPt == IPI.first || DT.dominates(IPI.first, NewPt) || + // When IPI.first is a terminator instruction, DT may think that + // the result is defined on the edge. + // Here we are testing the insertion point, not the definition. + (IPI.first->getParent() != NewPt->getParent() && + DT.dominates(IPI.first->getParent(), NewPt->getParent()))) { + // No need to insert this point. Just record the dominated use. + DEBUG(dbgs() << "Insertion point dominated by:\n"); + DEBUG(IPI.first->print(dbgs())); + DEBUG(dbgs() << '\n'); + IPI.second.push_back(UseIt); + return true; + } + } + return false; +} + +bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt, + Value::user_iterator &UseIt, + InsertionPoints &InsertPts) { + DominatorTree &DT = getAnalysis( + *NewPt->getParent()->getParent()).getDomTree(); + BasicBlock *NewBB = NewPt->getParent(); + + // Traverse all the existing insertion point and check if one is dominated by + // NewPt and thus useless or can be combined with NewPt into a common + // dominator. + for (InsertionPoints::iterator IPI = InsertPts.begin(), + EndIPI = InsertPts.end(); + IPI != EndIPI; ++IPI) { + BasicBlock *CurBB = IPI->first->getParent(); + if (NewBB == CurBB) { + // Instructions are in the same block. + // By construction, NewPt is dominating the other. + // Indeed, isDominated returned false with the exact same arguments. + DEBUG(dbgs() << "Merge insertion point with:\n"); + DEBUG(IPI->first->print(dbgs())); + DEBUG(dbgs() << "\nat considered insertion point.\n"); + appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts); + return true; + } + + // Look for a common dominator + BasicBlock *CommonDominator = DT.findNearestCommonDominator(NewBB, CurBB); + // If none exists, we cannot merge these two points. + if (!CommonDominator) + continue; + + if (CommonDominator != NewBB) { + // By construction, the CommonDominator cannot be CurBB. + assert(CommonDominator != CurBB && + "Instruction has not been rejected during isDominated check!"); + // Take the last instruction of the CommonDominator as insertion point + NewPt = CommonDominator->getTerminator(); + } + // else, CommonDominator is the block of NewBB, hence NewBB is the last + // possible insertion point in that block. + DEBUG(dbgs() << "Merge insertion point with:\n"); + DEBUG(IPI->first->print(dbgs())); + DEBUG(dbgs() << '\n'); + DEBUG(NewPt->print(dbgs())); + DEBUG(dbgs() << '\n'); + appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts); + return true; + } + return false; +} + +void AArch64PromoteConstant::computeInsertionPoints( + Constant *Val, InsertionPointsPerFunc &InsPtsPerFunc) { + DEBUG(dbgs() << "** Compute insertion points **\n"); + for (Value::user_iterator UseIt = Val->user_begin(), + EndUseIt = Val->user_end(); + UseIt != EndUseIt; ++UseIt) { + // If the user is not an Instruction, we cannot modify it. + if (!isa(*UseIt)) + continue; + + // Filter out uses that should not be converted. + if (!shouldConvertUse(Val, cast(*UseIt), UseIt.getOperandNo())) + continue; + + DEBUG(dbgs() << "Considered use, opidx " << UseIt.getOperandNo() << ":\n"); + DEBUG((*UseIt)->print(dbgs())); + DEBUG(dbgs() << '\n'); + + Instruction *InsertionPoint = findInsertionPoint(UseIt); + + DEBUG(dbgs() << "Considered insertion point:\n"); + DEBUG(InsertionPoint->print(dbgs())); + DEBUG(dbgs() << '\n'); + + // Check if the current insertion point is useless, i.e., it is dominated + // by another one. + InsertionPoints &InsertPts = + InsPtsPerFunc[InsertionPoint->getParent()->getParent()]; + if (isDominated(InsertionPoint, UseIt, InsertPts)) + continue; + // This insertion point is useful, check if we can merge some insertion + // point in a common dominator or if NewPt dominates an existing one. + if (tryAndMerge(InsertionPoint, UseIt, InsertPts)) + continue; + + DEBUG(dbgs() << "Keep considered insertion point\n"); + + // It is definitely useful by its own + InsertPts[InsertionPoint].push_back(UseIt); + } +} + +bool AArch64PromoteConstant::insertDefinitions( + Constant *Cst, InsertionPointsPerFunc &InsPtsPerFunc) { + // We will create one global variable per Module. + DenseMap ModuleToMergedGV; + bool HasChanged = false; + + // Traverse all insertion points in all the function. + for (InsertionPointsPerFunc::iterator FctToInstPtsIt = InsPtsPerFunc.begin(), + EndIt = InsPtsPerFunc.end(); + FctToInstPtsIt != EndIt; ++FctToInstPtsIt) { + InsertionPoints &InsertPts = FctToInstPtsIt->second; +// Do more checking for debug purposes. +#ifndef NDEBUG + DominatorTree &DT = getAnalysis( + *FctToInstPtsIt->first).getDomTree(); +#endif + GlobalVariable *PromotedGV; + assert(!InsertPts.empty() && "Empty uses does not need a definition"); + + Module *M = FctToInstPtsIt->first->getParent(); + DenseMap::iterator MapIt = + ModuleToMergedGV.find(M); + if (MapIt == ModuleToMergedGV.end()) { + PromotedGV = new GlobalVariable( + *M, Cst->getType(), true, GlobalValue::InternalLinkage, nullptr, + "_PromotedConst", nullptr, GlobalVariable::NotThreadLocal); + PromotedGV->setInitializer(Cst); + ModuleToMergedGV[M] = PromotedGV; + DEBUG(dbgs() << "Global replacement: "); + DEBUG(PromotedGV->print(dbgs())); + DEBUG(dbgs() << '\n'); + ++NumPromoted; + HasChanged = true; + } else { + PromotedGV = MapIt->second; + } + + for (InsertionPoints::iterator IPI = InsertPts.begin(), + EndIPI = InsertPts.end(); + IPI != EndIPI; ++IPI) { + // Create the load of the global variable. + IRBuilder<> Builder(IPI->first->getParent(), IPI->first); + LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV); + DEBUG(dbgs() << "**********\n"); + DEBUG(dbgs() << "New def: "); + DEBUG(LoadedCst->print(dbgs())); + DEBUG(dbgs() << '\n'); + + // Update the dominated uses. + Users &DominatedUsers = IPI->second; + for (Value::user_iterator Use : DominatedUsers) { +#ifndef NDEBUG + assert((DT.dominates(LoadedCst, cast(*Use)) || + (isa(*Use) && + DT.dominates(LoadedCst, findInsertionPoint(Use)))) && + "Inserted definition does not dominate all its uses!"); +#endif + DEBUG(dbgs() << "Use to update " << Use.getOperandNo() << ":"); + DEBUG(Use->print(dbgs())); + DEBUG(dbgs() << '\n'); + Use->setOperand(Use.getOperandNo(), LoadedCst); + ++NumPromotedUses; + } + } + } + return HasChanged; +} + +bool AArch64PromoteConstant::computeAndInsertDefinitions(Constant *Val) { + InsertionPointsPerFunc InsertPtsPerFunc; + computeInsertionPoints(Val, InsertPtsPerFunc); + return insertDefinitions(Val, InsertPtsPerFunc); +} + +bool AArch64PromoteConstant::promoteConstant(Constant *Cst) { + assert(Cst && "Given variable is not a valid constant."); + + if (!shouldConvert(Cst)) + return false; + + DEBUG(dbgs() << "******************************\n"); + DEBUG(dbgs() << "Candidate constant: "); + DEBUG(Cst->print(dbgs())); + DEBUG(dbgs() << '\n'); + + return computeAndInsertDefinitions(Cst); +} + +bool AArch64PromoteConstant::runOnFunction(Function &F) { + // Look for instructions using constant vector. Promote that constant to a + // global variable. Create as few loads of this variable as possible and + // update the uses accordingly. + bool LocalChange = false; + SmallSet AlreadyChecked; + + for (auto &MBB : F) { + for (auto &MI : MBB) { + // Traverse the operand, looking for constant vectors. Replace them by a + // load of a global variable of constant vector type. + for (unsigned OpIdx = 0, EndOpIdx = MI.getNumOperands(); + OpIdx != EndOpIdx; ++OpIdx) { + Constant *Cst = dyn_cast(MI.getOperand(OpIdx)); + // There is no point in promoting global values as they are already + // global. Do not promote constant expressions either, as they may + // require some code expansion. + if (Cst && !isa(Cst) && !isa(Cst) && + AlreadyChecked.insert(Cst)) + LocalChange |= promoteConstant(Cst); + } + } + } + return LocalChange; +} diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 06e1ffb..01b9587 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -12,175 +12,393 @@ // //===----------------------------------------------------------------------===// - #include "AArch64RegisterInfo.h" #include "AArch64FrameLowering.h" -#include "AArch64MachineFunctionInfo.h" -#include "AArch64TargetMachine.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "AArch64InstrInfo.h" +#include "AArch64Subtarget.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetOptions.h" + +using namespace llvm; #define GET_REGINFO_TARGET_DESC #include "AArch64GenRegisterInfo.inc" -using namespace llvm; +AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii, + const AArch64Subtarget *sti) + : AArch64GenRegisterInfo(AArch64::LR), TII(tii), STI(sti) {} -AArch64RegisterInfo::AArch64RegisterInfo() - : AArch64GenRegisterInfo(AArch64::X30) { -} - -const uint16_t * +const MCPhysReg * AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - return CSR_PCS_SaveList; -} - -const uint32_t* -AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID) const { - return CSR_PCS_RegMask; + assert(MF && "Invalid MachineFunction pointer."); + if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) + return CSR_AArch64_AllRegs_SaveList; + else + return CSR_AArch64_AAPCS_SaveList; } -const uint32_t *AArch64RegisterInfo::getTLSDescCallPreservedMask() const { - return TLSDesc_RegMask; +const uint32_t * +AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { + if (CC == CallingConv::AnyReg) + return CSR_AArch64_AllRegs_RegMask; + else + return CSR_AArch64_AAPCS_RegMask; } -const TargetRegisterClass * -AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { - if (RC == &AArch64::FlagClassRegClass) - return &AArch64::GPR64RegClass; +const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const { + if (STI->isTargetDarwin()) + return CSR_AArch64_TLS_Darwin_RegMask; - return RC; + assert(STI->isTargetELF() && "only expect Darwin or ELF TLS"); + return CSR_AArch64_TLS_ELF_RegMask; } - +const uint32_t * +AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { + // This should return a register mask that is the same as that returned by + // getCallPreservedMask but that additionally preserves the register used for + // the first i64 argument (which must also be the register used to return a + // single i64 return value) + // + // In case that the calling convention does not use the same register for + // both, the function should return NULL (does not currently apply) + return CSR_AArch64_AAPCS_ThisReturn_RegMask; +} BitVector AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { - BitVector Reserved(getNumRegs()); const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - Reserved.set(AArch64::XSP); - Reserved.set(AArch64::WSP); - + // FIXME: avoid re-calculating this every time. + BitVector Reserved(getNumRegs()); + Reserved.set(AArch64::SP); Reserved.set(AArch64::XZR); + Reserved.set(AArch64::WSP); Reserved.set(AArch64::WZR); - if (TFI->hasFP(MF)) { - Reserved.set(AArch64::X29); + if (TFI->hasFP(MF) || STI->isTargetDarwin()) { + Reserved.set(AArch64::FP); Reserved.set(AArch64::W29); } + if (STI->isTargetDarwin()) { + Reserved.set(AArch64::X18); // Platform register + Reserved.set(AArch64::W18); + } + + if (hasBasePointer(MF)) { + Reserved.set(AArch64::X19); + Reserved.set(AArch64::W19); + } + return Reserved; } -static bool hasFrameOffset(int opcode) { - return opcode != AArch64::LD1x2_8B && opcode != AArch64::LD1x3_8B && - opcode != AArch64::LD1x4_8B && opcode != AArch64::ST1x2_8B && - opcode != AArch64::ST1x3_8B && opcode != AArch64::ST1x4_8B && - opcode != AArch64::LD1x2_16B && opcode != AArch64::LD1x3_16B && - opcode != AArch64::LD1x4_16B && opcode != AArch64::ST1x2_16B && - opcode != AArch64::ST1x3_16B && opcode != AArch64::ST1x4_16B; -} +bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF, + unsigned Reg) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); -void -AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, - int SPAdj, - unsigned FIOperandNum, - RegScavenger *RS) const { - assert(SPAdj == 0 && "Cannot deal with nonzero SPAdj yet"); - MachineInstr &MI = *MBBI; - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const AArch64FrameLowering *TFI = - static_cast(MF.getTarget().getFrameLowering()); - - // In order to work out the base and offset for addressing, the FrameLowering - // code needs to know (sometimes) whether the instruction is storing/loading a - // callee-saved register, or whether it's a more generic - // operation. Fortunately the frame indices are used *only* for that purpose - // and are contiguous, so we can check here. - const std::vector &CSI = MFI->getCalleeSavedInfo(); - int MinCSFI = 0; - int MaxCSFI = -1; - - if (CSI.size()) { - MinCSFI = CSI[0].getFrameIdx(); - MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + switch (Reg) { + default: + break; + case AArch64::SP: + case AArch64::XZR: + case AArch64::WSP: + case AArch64::WZR: + return true; + case AArch64::X18: + case AArch64::W18: + return STI->isTargetDarwin(); + case AArch64::FP: + case AArch64::W29: + return TFI->hasFP(MF) || STI->isTargetDarwin(); + case AArch64::W19: + case AArch64::X19: + return hasBasePointer(MF); } - int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); - bool IsCalleeSaveOp = FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI; + return false; +} - unsigned FrameReg; - int64_t Offset; - Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj, - IsCalleeSaveOp); - // A vector load/store instruction doesn't have an offset operand. - bool HasOffsetOp = hasFrameOffset(MI.getOpcode()); - if (HasOffsetOp) - Offset += MI.getOperand(FIOperandNum + 1).getImm(); +const TargetRegisterClass * +AArch64RegisterInfo::getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const { + return &AArch64::GPR64RegClass; +} - // DBG_VALUE instructions have no real restrictions so they can be handled - // easily. - if (MI.isDebugValue()) { - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*isDef=*/ false); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); - return; - } +const TargetRegisterClass * +AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { + if (RC == &AArch64::CCRRegClass) + return &AArch64::GPR64RegClass; // Only MSR & MRS copy NZCV. + return RC; +} - const AArch64InstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - int MinOffset, MaxOffset, OffsetScale; - if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s || !HasOffsetOp) { - MinOffset = 0; - MaxOffset = 0xfff; - OffsetScale = 1; - } else { - // Load/store of a stack object - TII.getAddressConstraints(MI, OffsetScale, MinOffset, MaxOffset); - } +unsigned AArch64RegisterInfo::getBaseRegister() const { return AArch64::X19; } - // There are two situations we don't use frame + offset directly in the - // instruction: - // (1) The offset can't really be scaled - // (2) Can't encode offset as it doesn't have an offset operand - if ((Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) || - (!HasOffsetOp && Offset != 0)) { - unsigned BaseReg = - MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); - emitRegUpdate(MBB, MBBI, MBBI->getDebugLoc(), TII, - BaseReg, FrameReg, BaseReg, Offset); - FrameReg = BaseReg; - Offset = 0; - } +bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); - // Negative offsets are expected if we address from FP, but for - // now this checks nothing has gone horribly wrong. - assert(Offset >= 0 && "Unexpected negative offset from SP"); + // In the presence of variable sized objects, if the fixed stack size is + // large enough that referencing from the FP won't result in things being + // in range relatively often, we can use a base pointer to allow access + // from the other direction like the SP normally works. + if (MFI->hasVarSizedObjects()) { + // Conservatively estimate whether the negative offset from the frame + // pointer will be sufficient to reach. If a function has a smallish + // frame, it's less likely to have lots of spills and callee saved + // space, so it's all more likely to be within range of the frame pointer. + // If it's wrong, we'll materialize the constant and still get to the + // object; it's just suboptimal. Negative offsets use the unscaled + // load/store instructions, which have a 9-bit signed immediate. + if (MFI->getLocalFrameSize() < 256) + return false; + return true; + } - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, true); - if (HasOffsetOp) - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset / OffsetScale); + return false; } unsigned AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - if (TFI->hasFP(MF)) - return AArch64::X29; - else - return AArch64::XSP; + return TFI->hasFP(MF) ? AArch64::FP : AArch64::SP; +} + +bool AArch64RegisterInfo::requiresRegisterScavenging( + const MachineFunction &MF) const { + return true; +} + +bool AArch64RegisterInfo::requiresVirtualBaseRegisters( + const MachineFunction &MF) const { + return true; } bool AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + // AArch64FrameLowering::resolveFrameIndexReference() can always fall back + // to the stack pointer, so only put the emergency spill slot next to the + // FP when there's no better way to access it (SP or base pointer). + return MFI->hasVarSizedObjects() && !hasBasePointer(MF); +} + +bool AArch64RegisterInfo::requiresFrameIndexScavenging( + const MachineFunction &MF) const { + return true; +} + +bool +AArch64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + // Only consider eliminating leaf frames. + if (MFI->hasCalls() || (MF.getTarget().Options.DisableFramePointerElim(MF) && + MFI->adjustsStack())) + return true; + return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); +} + +/// needsFrameBaseReg - Returns true if the instruction's frame index +/// reference would be better served by a base register other than FP +/// or SP. Used by LocalStackFrameAllocation to determine which frame index +/// references it should create new base registers for. +bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, + int64_t Offset) const { + for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) + assert(i < MI->getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + + // It's the load/store FI references that cause issues, as it can be difficult + // to materialize the offset if it won't fit in the literal field. Estimate + // based on the size of the local frame and some conservative assumptions + // about the rest of the stack frame (note, this is pre-regalloc, so + // we don't know everything for certain yet) whether this offset is likely + // to be out of range of the immediate. Return true if so. + + // We only generate virtual base registers for loads and stores, so + // return false for everything else. + if (!MI->mayLoad() && !MI->mayStore()) + return false; + + // Without a virtual base register, if the function has variable sized + // objects, all fixed-size local references will be via the frame pointer, + // Approximate the offset and see if it's legal for the instruction. + // Note that the incoming offset is based on the SP value at function entry, + // so it'll be negative. + MachineFunction &MF = *MI->getParent()->getParent(); const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const AArch64FrameLowering *AFI - = static_cast(TFI); - return AFI->useFPForAddressing(MF); + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Estimate an offset from the frame pointer. + // Conservatively assume all GPR callee-saved registers get pushed. + // FP, LR, X19-X28, D8-D15. 64-bits each. + int64_t FPOffset = Offset - 16 * 20; + // Estimate an offset from the stack pointer. + // The incoming offset is relating to the SP at the start of the function, + // but when we access the local it'll be relative to the SP after local + // allocation, so adjust our SP-relative offset by that allocation size. + Offset += MFI->getLocalFrameSize(); + // Assume that we'll have at least some spill slots allocated. + // FIXME: This is a total SWAG number. We should run some statistics + // and pick a real one. + Offset += 128; // 128 bytes of spill slots + + // If there is a frame pointer, try using it. + // The FP is only available if there is no dynamic realignment. We + // don't know for sure yet whether we'll need that, so we guess based + // on whether there are any local variables that would trigger it. + if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, FPOffset)) + return false; + + // If we can reference via the stack pointer or base pointer, try that. + // FIXME: This (and the code that resolves the references) can be improved + // to only disallow SP relative references in the live range of + // the VLA(s). In practice, it's unclear how much difference that + // would make, but it may be worth doing. + if (isFrameOffsetLegal(MI, Offset)) + return false; + + // The offset likely isn't legal; we want to allocate a virtual base register. + return true; +} + +bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, + int64_t Offset) const { + assert(Offset <= INT_MAX && "Offset too big to fit in int."); + assert(MI && "Unable to get the legal offset for nil instruction."); + int SaveOffset = Offset; + return isAArch64FrameOffsetLegal(*MI, SaveOffset) & AArch64FrameOffsetIsLegal; +} + +/// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx +/// at the beginning of the basic block. +void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, + unsigned BaseReg, + int FrameIdx, + int64_t Offset) const { + MachineBasicBlock::iterator Ins = MBB->begin(); + DebugLoc DL; // Defaults to "unknown" + if (Ins != MBB->end()) + DL = Ins->getDebugLoc(); + + const MCInstrDesc &MCID = TII->get(AArch64::ADDXri); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + const MachineFunction &MF = *MBB->getParent(); + MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF)); + unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); + + BuildMI(*MBB, Ins, DL, MCID, BaseReg) + .addFrameIndex(FrameIdx) + .addImm(Offset) + .addImm(Shifter); } + +void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, + int64_t Offset) const { + int Off = Offset; // ARM doesn't need the general 64-bit offsets + unsigned i = 0; + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + bool Done = rewriteAArch64FrameIndex(MI, i, BaseReg, Off, TII); + assert(Done && "Unable to resolve frame index!"); + (void)Done; +} + +void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { + assert(SPAdj == 0 && "Unexpected"); + + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + const AArch64FrameLowering *TFI = static_cast( + MF.getTarget().getFrameLowering()); + + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + unsigned FrameReg; + int Offset; + + // Special handling of dbg_value, stackmap and patchpoint instructions. + if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP || + MI.getOpcode() == TargetOpcode::PATCHPOINT) { + Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, + /*PreferFP=*/true); + Offset += MI.getOperand(FIOperandNum + 1).getImm(); + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + return; + } + + // Modify MI as necessary to handle as much of 'Offset' as possible + Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg); + if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII)) + return; + + assert((!RS || !RS->isScavengingFrameIndex(FrameIndex)) && + "Emergency spill slot is out of reach"); + + // If we get here, the immediate doesn't fit into the instruction. We folded + // as much as possible above. Handle the rest, providing a register that is + // SP+LargeImm. + unsigned ScratchReg = + MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); + emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII); + MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true); +} + +namespace llvm { + +unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + switch (RC->getID()) { + default: + return 0; + case AArch64::GPR32RegClassID: + case AArch64::GPR32spRegClassID: + case AArch64::GPR32allRegClassID: + case AArch64::GPR64spRegClassID: + case AArch64::GPR64allRegClassID: + case AArch64::GPR64RegClassID: + case AArch64::GPR32commonRegClassID: + case AArch64::GPR64commonRegClassID: + return 32 - 1 // XZR/SP + - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP + - STI->isTargetDarwin() // X18 reserved as platform register + - hasBasePointer(MF); // X19 + case AArch64::FPR8RegClassID: + case AArch64::FPR16RegClassID: + case AArch64::FPR32RegClassID: + case AArch64::FPR64RegClassID: + case AArch64::FPR128RegClassID: + return 32; + + case AArch64::DDRegClassID: + case AArch64::DDDRegClassID: + case AArch64::DDDDRegClassID: + case AArch64::QQRegClassID: + case AArch64::QQQRegClassID: + case AArch64::QQQQRegClassID: + return 32; + + case AArch64::FPR128_loRegClassID: + return 16; + } +} + +} // namespace llvm diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h index 4d67943..76af1ed 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/lib/Target/AArch64/AArch64RegisterInfo.h @@ -1,4 +1,4 @@ -//==- AArch64RegisterInfo.h - AArch64 Register Information Impl -*- C++ -*-===// +//==- AArch64RegisterInfo.h - AArch64 Register Information Impl --*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -7,14 +7,12 @@ // //===----------------------------------------------------------------------===// // -// This file contains the AArch64 implementation of the MCRegisterInfo class. +// This file contains the AArch64 implementation of the MRegisterInfo class. // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_AARCH64REGISTERINFO_H -#define LLVM_TARGET_AARCH64REGISTERINFO_H - -#include "llvm/Target/TargetRegisterInfo.h" +#ifndef LLVM_TARGET_AArch64REGISTERINFO_H +#define LLVM_TARGET_AArch64REGISTERINFO_H #define GET_REGINFO_HEADER #include "AArch64GenRegisterInfo.inc" @@ -23,49 +21,81 @@ namespace llvm { class AArch64InstrInfo; class AArch64Subtarget; +class MachineFunction; +class RegScavenger; +class TargetRegisterClass; struct AArch64RegisterInfo : public AArch64GenRegisterInfo { - AArch64RegisterInfo(); +private: + const AArch64InstrInfo *TII; + const AArch64Subtarget *STI; - const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const uint32_t *getCallPreservedMask(CallingConv::ID) const; +public: + AArch64RegisterInfo(const AArch64InstrInfo *tii, const AArch64Subtarget *sti); - const uint32_t *getTLSDescCallPreservedMask() const; + bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; - BitVector getReservedRegs(const MachineFunction &MF) const; - unsigned getFrameRegister(const MachineFunction &MF) const; + /// Code Generation virtual methods... + const MCPhysReg * + getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; + const uint32_t *getCallPreservedMask(CallingConv::ID) const override; - void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - unsigned FIOperandNum, - RegScavenger *Rs = NULL) const; + unsigned getCSRFirstUseCost() const override { + // The cost will be compared against BlockFrequency where entry has the + // value of 1 << 14. A value of 5 will choose to spill or split really + // cold path instead of using a callee-saved register. + return 5; + } - /// getCrossCopyRegClass - Returns a legal register class to copy a register - /// in the specified class to or from. Returns original class if it is - /// possible to copy between a two registers of the specified class. + // Calls involved in thread-local variable lookup save more registers than + // normal calls, so they need a different mask to represent this. + const uint32_t *getTLSCallPreservedMask() const; + + /// getThisReturnPreservedMask - Returns a call preserved mask specific to the + /// case that 'returned' is on an i64 first argument if the calling convention + /// is one that can (partially) model this attribute with a preserved mask + /// (i.e. it is a calling convention that uses the same register for the first + /// i64 argument and an i64 return value) + /// + /// Should return NULL in the case that the calling convention does not have + /// this property + const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; + + BitVector getReservedRegs(const MachineFunction &MF) const override; const TargetRegisterClass * - getCrossCopyRegClass(const TargetRegisterClass *RC) const; - - /// getLargestLegalSuperClass - Returns the largest super class of RC that is - /// legal to use in the current sub-target and has the same spill size. - const TargetRegisterClass* - getLargestLegalSuperClass(const TargetRegisterClass *RC) const { - if (RC == &AArch64::tcGPR64RegClass) - return &AArch64::GPR64RegClass; - - return RC; - } + getPointerRegClass(const MachineFunction &MF, + unsigned Kind = 0) const override; + const TargetRegisterClass * + getCrossCopyRegClass(const TargetRegisterClass *RC) const override; + + bool requiresRegisterScavenging(const MachineFunction &MF) const override; + bool useFPForScavengingIndex(const MachineFunction &MF) const override; + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; + + bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override; + bool isFrameOffsetLegal(const MachineInstr *MI, + int64_t Offset) const override; + void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, + int FrameIdx, + int64_t Offset) const override; + void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, + int64_t Offset) const override; + void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS = nullptr) const override; + bool cannotEliminateFrame(const MachineFunction &MF) const; - bool requiresRegisterScavenging(const MachineFunction &MF) const { - return true; - } + bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override; + bool hasBasePointer(const MachineFunction &MF) const; + unsigned getBaseRegister() const; - bool requiresFrameIndexScavenging(const MachineFunction &MF) const { - return true; - } + // Debug information queries. + unsigned getFrameRegister(const MachineFunction &MF) const override; - bool useFPForScavengingIndex(const MachineFunction &MF) const; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const override; }; } // end namespace llvm -#endif // LLVM_TARGET_AARCH64REGISTERINFO_H +#endif // LLVM_TARGET_AArch64REGISTERINFO_H diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index 9de7abd..21c927f 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -1,4 +1,4 @@ -//===- AArch64RegisterInfo.td - ARM Register defs ----------*- tablegen -*-===// +//=- AArch64RegisterInfo.td - Describe the AArch64 Regisers --*- tablegen -*-=// // // The LLVM Compiler Infrastructure // @@ -7,284 +7,587 @@ // //===----------------------------------------------------------------------===// // -// This file contains declarations that describe the AArch64 register file // //===----------------------------------------------------------------------===// -let Namespace = "AArch64" in { -def sub_128 : SubRegIndex<128>; -def sub_64 : SubRegIndex<64>; -def sub_32 : SubRegIndex<32>; -def sub_16 : SubRegIndex<16>; -def sub_8 : SubRegIndex<8>; - -// Note: Code depends on these having consecutive numbers. -def qqsub : SubRegIndex<256, 256>; - -def qsub_0 : SubRegIndex<128>; -def qsub_1 : SubRegIndex<128, 128>; -def qsub_2 : ComposedSubRegIndex; -def qsub_3 : ComposedSubRegIndex; - -def dsub_0 : SubRegIndex<64>; -def dsub_1 : SubRegIndex<64, 64>; -def dsub_2 : ComposedSubRegIndex; -def dsub_3 : ComposedSubRegIndex; -} -// Registers are identified with 5-bit ID numbers. -class AArch64Reg enc, string n> : Register { +class AArch64Reg enc, string n, list subregs = [], + list altNames = []> + : Register { let HWEncoding = enc; let Namespace = "AArch64"; + let SubRegs = subregs; } -class AArch64RegWithSubs enc, string n, list subregs = [], - list inds = []> - : AArch64Reg { - let SubRegs = subregs; - let SubRegIndices = inds; +let Namespace = "AArch64" in { + def sub_32 : SubRegIndex<32>; + + def bsub : SubRegIndex<8>; + def hsub : SubRegIndex<16>; + def ssub : SubRegIndex<32>; + def dsub : SubRegIndex<32>; + def qhisub : SubRegIndex<64>; + def qsub : SubRegIndex<64>; + // Note: Code depends on these having consecutive numbers + def dsub0 : SubRegIndex<64>; + def dsub1 : SubRegIndex<64>; + def dsub2 : SubRegIndex<64>; + def dsub3 : SubRegIndex<64>; + // Note: Code depends on these having consecutive numbers + def qsub0 : SubRegIndex<128>; + def qsub1 : SubRegIndex<128>; + def qsub2 : SubRegIndex<128>; + def qsub3 : SubRegIndex<128>; +} + +let Namespace = "AArch64" in { + def vreg : RegAltNameIndex; + def vlist1 : RegAltNameIndex; } //===----------------------------------------------------------------------===// -// Integer registers: w0-w30, wzr, wsp, x0-x30, xzr, sp +// Registers //===----------------------------------------------------------------------===// - -foreach Index = 0-30 in { - def W#Index : AArch64Reg< Index, "w"#Index>, DwarfRegNum<[Index]>; +def W0 : AArch64Reg<0, "w0" >, DwarfRegNum<[0]>; +def W1 : AArch64Reg<1, "w1" >, DwarfRegNum<[1]>; +def W2 : AArch64Reg<2, "w2" >, DwarfRegNum<[2]>; +def W3 : AArch64Reg<3, "w3" >, DwarfRegNum<[3]>; +def W4 : AArch64Reg<4, "w4" >, DwarfRegNum<[4]>; +def W5 : AArch64Reg<5, "w5" >, DwarfRegNum<[5]>; +def W6 : AArch64Reg<6, "w6" >, DwarfRegNum<[6]>; +def W7 : AArch64Reg<7, "w7" >, DwarfRegNum<[7]>; +def W8 : AArch64Reg<8, "w8" >, DwarfRegNum<[8]>; +def W9 : AArch64Reg<9, "w9" >, DwarfRegNum<[9]>; +def W10 : AArch64Reg<10, "w10">, DwarfRegNum<[10]>; +def W11 : AArch64Reg<11, "w11">, DwarfRegNum<[11]>; +def W12 : AArch64Reg<12, "w12">, DwarfRegNum<[12]>; +def W13 : AArch64Reg<13, "w13">, DwarfRegNum<[13]>; +def W14 : AArch64Reg<14, "w14">, DwarfRegNum<[14]>; +def W15 : AArch64Reg<15, "w15">, DwarfRegNum<[15]>; +def W16 : AArch64Reg<16, "w16">, DwarfRegNum<[16]>; +def W17 : AArch64Reg<17, "w17">, DwarfRegNum<[17]>; +def W18 : AArch64Reg<18, "w18">, DwarfRegNum<[18]>; +def W19 : AArch64Reg<19, "w19">, DwarfRegNum<[19]>; +def W20 : AArch64Reg<20, "w20">, DwarfRegNum<[20]>; +def W21 : AArch64Reg<21, "w21">, DwarfRegNum<[21]>; +def W22 : AArch64Reg<22, "w22">, DwarfRegNum<[22]>; +def W23 : AArch64Reg<23, "w23">, DwarfRegNum<[23]>; +def W24 : AArch64Reg<24, "w24">, DwarfRegNum<[24]>; +def W25 : AArch64Reg<25, "w25">, DwarfRegNum<[25]>; +def W26 : AArch64Reg<26, "w26">, DwarfRegNum<[26]>; +def W27 : AArch64Reg<27, "w27">, DwarfRegNum<[27]>; +def W28 : AArch64Reg<28, "w28">, DwarfRegNum<[28]>; +def W29 : AArch64Reg<29, "w29">, DwarfRegNum<[29]>; +def W30 : AArch64Reg<30, "w30">, DwarfRegNum<[30]>; +def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>; +def WZR : AArch64Reg<31, "wzr">, DwarfRegAlias; + +let SubRegIndices = [sub_32] in { +def X0 : AArch64Reg<0, "x0", [W0]>, DwarfRegAlias; +def X1 : AArch64Reg<1, "x1", [W1]>, DwarfRegAlias; +def X2 : AArch64Reg<2, "x2", [W2]>, DwarfRegAlias; +def X3 : AArch64Reg<3, "x3", [W3]>, DwarfRegAlias; +def X4 : AArch64Reg<4, "x4", [W4]>, DwarfRegAlias; +def X5 : AArch64Reg<5, "x5", [W5]>, DwarfRegAlias; +def X6 : AArch64Reg<6, "x6", [W6]>, DwarfRegAlias; +def X7 : AArch64Reg<7, "x7", [W7]>, DwarfRegAlias; +def X8 : AArch64Reg<8, "x8", [W8]>, DwarfRegAlias; +def X9 : AArch64Reg<9, "x9", [W9]>, DwarfRegAlias; +def X10 : AArch64Reg<10, "x10", [W10]>, DwarfRegAlias; +def X11 : AArch64Reg<11, "x11", [W11]>, DwarfRegAlias; +def X12 : AArch64Reg<12, "x12", [W12]>, DwarfRegAlias; +def X13 : AArch64Reg<13, "x13", [W13]>, DwarfRegAlias; +def X14 : AArch64Reg<14, "x14", [W14]>, DwarfRegAlias; +def X15 : AArch64Reg<15, "x15", [W15]>, DwarfRegAlias; +def X16 : AArch64Reg<16, "x16", [W16]>, DwarfRegAlias; +def X17 : AArch64Reg<17, "x17", [W17]>, DwarfRegAlias; +def X18 : AArch64Reg<18, "x18", [W18]>, DwarfRegAlias; +def X19 : AArch64Reg<19, "x19", [W19]>, DwarfRegAlias; +def X20 : AArch64Reg<20, "x20", [W20]>, DwarfRegAlias; +def X21 : AArch64Reg<21, "x21", [W21]>, DwarfRegAlias; +def X22 : AArch64Reg<22, "x22", [W22]>, DwarfRegAlias; +def X23 : AArch64Reg<23, "x23", [W23]>, DwarfRegAlias; +def X24 : AArch64Reg<24, "x24", [W24]>, DwarfRegAlias; +def X25 : AArch64Reg<25, "x25", [W25]>, DwarfRegAlias; +def X26 : AArch64Reg<26, "x26", [W26]>, DwarfRegAlias; +def X27 : AArch64Reg<27, "x27", [W27]>, DwarfRegAlias; +def X28 : AArch64Reg<28, "x28", [W28]>, DwarfRegAlias; +def FP : AArch64Reg<29, "x29", [W29]>, DwarfRegAlias; +def LR : AArch64Reg<30, "x30", [W30]>, DwarfRegAlias; +def SP : AArch64Reg<31, "sp", [WSP]>, DwarfRegAlias; +def XZR : AArch64Reg<31, "xzr", [WZR]>, DwarfRegAlias; } -def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>; -def WZR : AArch64Reg<31, "wzr">; +// Condition code register. +def NZCV : AArch64Reg<0, "nzcv">; -// Could be combined with previous loop, but this way leaves w and x registers -// consecutive as LLVM register numbers, which makes for easier debugging. -foreach Index = 0-30 in { - def X#Index : AArch64RegWithSubs("W"#Index)], [sub_32]>, - DwarfRegNum<[Index]>; +// GPR register classes with the intersections of GPR32/GPR32sp and +// GPR64/GPR64sp for use by the coalescer. +def GPR32common : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 30)> { + let AltOrders = [(rotl GPR32common, 8)]; + let AltOrderSelect = [{ return 1; }]; +} +def GPR64common : RegisterClass<"AArch64", [i64], 64, + (add (sequence "X%u", 0, 28), FP, LR)> { + let AltOrders = [(rotl GPR64common, 8)]; + let AltOrderSelect = [{ return 1; }]; +} +// GPR register classes which exclude SP/WSP. +def GPR32 : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR)> { + let AltOrders = [(rotl GPR32, 8)]; + let AltOrderSelect = [{ return 1; }]; +} +def GPR64 : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR)> { + let AltOrders = [(rotl GPR64, 8)]; + let AltOrderSelect = [{ return 1; }]; } -def XSP : AArch64RegWithSubs<31, "sp", [WSP], [sub_32]>, DwarfRegNum<[31]>; -def XZR : AArch64RegWithSubs<31, "xzr", [WZR], [sub_32]>; +// GPR register classes which include SP/WSP. +def GPR32sp : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WSP)> { + let AltOrders = [(rotl GPR32sp, 8)]; + let AltOrderSelect = [{ return 1; }]; +} +def GPR64sp : RegisterClass<"AArch64", [i64], 64, (add GPR64common, SP)> { + let AltOrders = [(rotl GPR64sp, 8)]; + let AltOrderSelect = [{ return 1; }]; +} -// Most instructions treat register 31 as zero for reads and a black-hole for -// writes. +def GPR32sponly : RegisterClass<"AArch64", [i32], 32, (add WSP)>; +def GPR64sponly : RegisterClass<"AArch64", [i64], 64, (add SP)>; -// Note that the order of registers is important for the Disassembler here: -// tablegen uses it to form MCRegisterClass::getRegister, which we assume can -// take an encoding value. -def GPR32 : RegisterClass<"AArch64", [i32], 32, - (add (sequence "W%u", 0, 30), WZR)> { +def GPR64spPlus0Operand : AsmOperandClass { + let Name = "GPR64sp0"; + let RenderMethod = "addRegOperands"; + let ParserMethod = "tryParseGPR64sp0Operand"; } -def GPR64 : RegisterClass<"AArch64", [i64], 64, - (add (sequence "X%u", 0, 30), XZR)> { +def GPR64sp0 : RegisterOperand { + let ParserMatchClass = GPR64spPlus0Operand; } -def GPR32nowzr : RegisterClass<"AArch64", [i32], 32, - (sequence "W%u", 0, 30)> { +// GPR register classes which include WZR/XZR AND SP/WSP. This is not a +// constraint used by any instructions, it is used as a common super-class. +def GPR32all : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR, WSP)>; +def GPR64all : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR, SP)>; + +// For tail calls, we can't use callee-saved registers, as they are restored +// to the saved value before the tail call, which would clobber a call address. +// This is for indirect tail calls to store the address of the destination. +def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X21, + X22, X23, X24, X25, X26, + X27, X28)>; + +// GPR register classes for post increment amount of vector load/store that +// has alternate printing when Rm=31 and prints a constant immediate value +// equal to the total number of bytes transferred. + +// FIXME: TableGen *should* be able to do these itself now. There appears to be +// a bug in counting how many operands a Post-indexed MCInst should have which +// means the aliases don't trigger. +def GPR64pi1 : RegisterOperand">; +def GPR64pi2 : RegisterOperand">; +def GPR64pi3 : RegisterOperand">; +def GPR64pi4 : RegisterOperand">; +def GPR64pi6 : RegisterOperand">; +def GPR64pi8 : RegisterOperand">; +def GPR64pi12 : RegisterOperand">; +def GPR64pi16 : RegisterOperand">; +def GPR64pi24 : RegisterOperand">; +def GPR64pi32 : RegisterOperand">; +def GPR64pi48 : RegisterOperand">; +def GPR64pi64 : RegisterOperand">; + +// Condition code regclass. +def CCR : RegisterClass<"AArch64", [i32], 32, (add NZCV)> { + let CopyCost = -1; // Don't allow copying of status registers. + + // CCR is not allocatable. + let isAllocatable = 0; } -def GPR64noxzr : RegisterClass<"AArch64", [i64], 64, - (sequence "X%u", 0, 30)> { -} +//===----------------------------------------------------------------------===// +// Floating Point Scalar Registers +//===----------------------------------------------------------------------===// -// For tail calls, we can't use callee-saved registers or the structure-return -// register, as they are supposed to be live across function calls and may be -// clobbered by the epilogue. -def tcGPR64 : RegisterClass<"AArch64", [i64], 64, - (add (sequence "X%u", 0, 7), - (sequence "X%u", 9, 18))> { +def B0 : AArch64Reg<0, "b0">, DwarfRegNum<[64]>; +def B1 : AArch64Reg<1, "b1">, DwarfRegNum<[65]>; +def B2 : AArch64Reg<2, "b2">, DwarfRegNum<[66]>; +def B3 : AArch64Reg<3, "b3">, DwarfRegNum<[67]>; +def B4 : AArch64Reg<4, "b4">, DwarfRegNum<[68]>; +def B5 : AArch64Reg<5, "b5">, DwarfRegNum<[69]>; +def B6 : AArch64Reg<6, "b6">, DwarfRegNum<[70]>; +def B7 : AArch64Reg<7, "b7">, DwarfRegNum<[71]>; +def B8 : AArch64Reg<8, "b8">, DwarfRegNum<[72]>; +def B9 : AArch64Reg<9, "b9">, DwarfRegNum<[73]>; +def B10 : AArch64Reg<10, "b10">, DwarfRegNum<[74]>; +def B11 : AArch64Reg<11, "b11">, DwarfRegNum<[75]>; +def B12 : AArch64Reg<12, "b12">, DwarfRegNum<[76]>; +def B13 : AArch64Reg<13, "b13">, DwarfRegNum<[77]>; +def B14 : AArch64Reg<14, "b14">, DwarfRegNum<[78]>; +def B15 : AArch64Reg<15, "b15">, DwarfRegNum<[79]>; +def B16 : AArch64Reg<16, "b16">, DwarfRegNum<[80]>; +def B17 : AArch64Reg<17, "b17">, DwarfRegNum<[81]>; +def B18 : AArch64Reg<18, "b18">, DwarfRegNum<[82]>; +def B19 : AArch64Reg<19, "b19">, DwarfRegNum<[83]>; +def B20 : AArch64Reg<20, "b20">, DwarfRegNum<[84]>; +def B21 : AArch64Reg<21, "b21">, DwarfRegNum<[85]>; +def B22 : AArch64Reg<22, "b22">, DwarfRegNum<[86]>; +def B23 : AArch64Reg<23, "b23">, DwarfRegNum<[87]>; +def B24 : AArch64Reg<24, "b24">, DwarfRegNum<[88]>; +def B25 : AArch64Reg<25, "b25">, DwarfRegNum<[89]>; +def B26 : AArch64Reg<26, "b26">, DwarfRegNum<[90]>; +def B27 : AArch64Reg<27, "b27">, DwarfRegNum<[91]>; +def B28 : AArch64Reg<28, "b28">, DwarfRegNum<[92]>; +def B29 : AArch64Reg<29, "b29">, DwarfRegNum<[93]>; +def B30 : AArch64Reg<30, "b30">, DwarfRegNum<[94]>; +def B31 : AArch64Reg<31, "b31">, DwarfRegNum<[95]>; + +let SubRegIndices = [bsub] in { +def H0 : AArch64Reg<0, "h0", [B0]>, DwarfRegAlias; +def H1 : AArch64Reg<1, "h1", [B1]>, DwarfRegAlias; +def H2 : AArch64Reg<2, "h2", [B2]>, DwarfRegAlias; +def H3 : AArch64Reg<3, "h3", [B3]>, DwarfRegAlias; +def H4 : AArch64Reg<4, "h4", [B4]>, DwarfRegAlias; +def H5 : AArch64Reg<5, "h5", [B5]>, DwarfRegAlias; +def H6 : AArch64Reg<6, "h6", [B6]>, DwarfRegAlias; +def H7 : AArch64Reg<7, "h7", [B7]>, DwarfRegAlias; +def H8 : AArch64Reg<8, "h8", [B8]>, DwarfRegAlias; +def H9 : AArch64Reg<9, "h9", [B9]>, DwarfRegAlias; +def H10 : AArch64Reg<10, "h10", [B10]>, DwarfRegAlias; +def H11 : AArch64Reg<11, "h11", [B11]>, DwarfRegAlias; +def H12 : AArch64Reg<12, "h12", [B12]>, DwarfRegAlias; +def H13 : AArch64Reg<13, "h13", [B13]>, DwarfRegAlias; +def H14 : AArch64Reg<14, "h14", [B14]>, DwarfRegAlias; +def H15 : AArch64Reg<15, "h15", [B15]>, DwarfRegAlias; +def H16 : AArch64Reg<16, "h16", [B16]>, DwarfRegAlias; +def H17 : AArch64Reg<17, "h17", [B17]>, DwarfRegAlias; +def H18 : AArch64Reg<18, "h18", [B18]>, DwarfRegAlias; +def H19 : AArch64Reg<19, "h19", [B19]>, DwarfRegAlias; +def H20 : AArch64Reg<20, "h20", [B20]>, DwarfRegAlias; +def H21 : AArch64Reg<21, "h21", [B21]>, DwarfRegAlias; +def H22 : AArch64Reg<22, "h22", [B22]>, DwarfRegAlias; +def H23 : AArch64Reg<23, "h23", [B23]>, DwarfRegAlias; +def H24 : AArch64Reg<24, "h24", [B24]>, DwarfRegAlias; +def H25 : AArch64Reg<25, "h25", [B25]>, DwarfRegAlias; +def H26 : AArch64Reg<26, "h26", [B26]>, DwarfRegAlias; +def H27 : AArch64Reg<27, "h27", [B27]>, DwarfRegAlias; +def H28 : AArch64Reg<28, "h28", [B28]>, DwarfRegAlias; +def H29 : AArch64Reg<29, "h29", [B29]>, DwarfRegAlias; +def H30 : AArch64Reg<30, "h30", [B30]>, DwarfRegAlias; +def H31 : AArch64Reg<31, "h31", [B31]>, DwarfRegAlias; } +let SubRegIndices = [hsub] in { +def S0 : AArch64Reg<0, "s0", [H0]>, DwarfRegAlias; +def S1 : AArch64Reg<1, "s1", [H1]>, DwarfRegAlias; +def S2 : AArch64Reg<2, "s2", [H2]>, DwarfRegAlias; +def S3 : AArch64Reg<3, "s3", [H3]>, DwarfRegAlias; +def S4 : AArch64Reg<4, "s4", [H4]>, DwarfRegAlias; +def S5 : AArch64Reg<5, "s5", [H5]>, DwarfRegAlias; +def S6 : AArch64Reg<6, "s6", [H6]>, DwarfRegAlias; +def S7 : AArch64Reg<7, "s7", [H7]>, DwarfRegAlias; +def S8 : AArch64Reg<8, "s8", [H8]>, DwarfRegAlias; +def S9 : AArch64Reg<9, "s9", [H9]>, DwarfRegAlias; +def S10 : AArch64Reg<10, "s10", [H10]>, DwarfRegAlias; +def S11 : AArch64Reg<11, "s11", [H11]>, DwarfRegAlias; +def S12 : AArch64Reg<12, "s12", [H12]>, DwarfRegAlias; +def S13 : AArch64Reg<13, "s13", [H13]>, DwarfRegAlias; +def S14 : AArch64Reg<14, "s14", [H14]>, DwarfRegAlias; +def S15 : AArch64Reg<15, "s15", [H15]>, DwarfRegAlias; +def S16 : AArch64Reg<16, "s16", [H16]>, DwarfRegAlias; +def S17 : AArch64Reg<17, "s17", [H17]>, DwarfRegAlias; +def S18 : AArch64Reg<18, "s18", [H18]>, DwarfRegAlias; +def S19 : AArch64Reg<19, "s19", [H19]>, DwarfRegAlias; +def S20 : AArch64Reg<20, "s20", [H20]>, DwarfRegAlias; +def S21 : AArch64Reg<21, "s21", [H21]>, DwarfRegAlias; +def S22 : AArch64Reg<22, "s22", [H22]>, DwarfRegAlias; +def S23 : AArch64Reg<23, "s23", [H23]>, DwarfRegAlias; +def S24 : AArch64Reg<24, "s24", [H24]>, DwarfRegAlias; +def S25 : AArch64Reg<25, "s25", [H25]>, DwarfRegAlias; +def S26 : AArch64Reg<26, "s26", [H26]>, DwarfRegAlias; +def S27 : AArch64Reg<27, "s27", [H27]>, DwarfRegAlias; +def S28 : AArch64Reg<28, "s28", [H28]>, DwarfRegAlias; +def S29 : AArch64Reg<29, "s29", [H29]>, DwarfRegAlias; +def S30 : AArch64Reg<30, "s30", [H30]>, DwarfRegAlias; +def S31 : AArch64Reg<31, "s31", [H31]>, DwarfRegAlias; +} -// Certain addressing-useful instructions accept sp directly. Again the order of -// registers is important to the Disassembler. -def GPR32wsp : RegisterClass<"AArch64", [i32], 32, - (add (sequence "W%u", 0, 30), WSP)> { +let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in { +def D0 : AArch64Reg<0, "d0", [S0], ["v0", ""]>, DwarfRegAlias; +def D1 : AArch64Reg<1, "d1", [S1], ["v1", ""]>, DwarfRegAlias; +def D2 : AArch64Reg<2, "d2", [S2], ["v2", ""]>, DwarfRegAlias; +def D3 : AArch64Reg<3, "d3", [S3], ["v3", ""]>, DwarfRegAlias; +def D4 : AArch64Reg<4, "d4", [S4], ["v4", ""]>, DwarfRegAlias; +def D5 : AArch64Reg<5, "d5", [S5], ["v5", ""]>, DwarfRegAlias; +def D6 : AArch64Reg<6, "d6", [S6], ["v6", ""]>, DwarfRegAlias; +def D7 : AArch64Reg<7, "d7", [S7], ["v7", ""]>, DwarfRegAlias; +def D8 : AArch64Reg<8, "d8", [S8], ["v8", ""]>, DwarfRegAlias; +def D9 : AArch64Reg<9, "d9", [S9], ["v9", ""]>, DwarfRegAlias; +def D10 : AArch64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias; +def D11 : AArch64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias; +def D12 : AArch64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias; +def D13 : AArch64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias; +def D14 : AArch64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias; +def D15 : AArch64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias; +def D16 : AArch64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias; +def D17 : AArch64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias; +def D18 : AArch64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias; +def D19 : AArch64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias; +def D20 : AArch64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias; +def D21 : AArch64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias; +def D22 : AArch64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias; +def D23 : AArch64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias; +def D24 : AArch64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias; +def D25 : AArch64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias; +def D26 : AArch64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias; +def D27 : AArch64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias; +def D28 : AArch64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias; +def D29 : AArch64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias; +def D30 : AArch64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias; +def D31 : AArch64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias; } -def GPR64xsp : RegisterClass<"AArch64", [i64], 64, - (add (sequence "X%u", 0, 30), XSP)> { +let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in { +def Q0 : AArch64Reg<0, "q0", [D0], ["v0", ""]>, DwarfRegAlias; +def Q1 : AArch64Reg<1, "q1", [D1], ["v1", ""]>, DwarfRegAlias; +def Q2 : AArch64Reg<2, "q2", [D2], ["v2", ""]>, DwarfRegAlias; +def Q3 : AArch64Reg<3, "q3", [D3], ["v3", ""]>, DwarfRegAlias; +def Q4 : AArch64Reg<4, "q4", [D4], ["v4", ""]>, DwarfRegAlias; +def Q5 : AArch64Reg<5, "q5", [D5], ["v5", ""]>, DwarfRegAlias; +def Q6 : AArch64Reg<6, "q6", [D6], ["v6", ""]>, DwarfRegAlias; +def Q7 : AArch64Reg<7, "q7", [D7], ["v7", ""]>, DwarfRegAlias; +def Q8 : AArch64Reg<8, "q8", [D8], ["v8", ""]>, DwarfRegAlias; +def Q9 : AArch64Reg<9, "q9", [D9], ["v9", ""]>, DwarfRegAlias; +def Q10 : AArch64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias; +def Q11 : AArch64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias; +def Q12 : AArch64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias; +def Q13 : AArch64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias; +def Q14 : AArch64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias; +def Q15 : AArch64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias; +def Q16 : AArch64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias; +def Q17 : AArch64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias; +def Q18 : AArch64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias; +def Q19 : AArch64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias; +def Q20 : AArch64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias; +def Q21 : AArch64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias; +def Q22 : AArch64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias; +def Q23 : AArch64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias; +def Q24 : AArch64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias; +def Q25 : AArch64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias; +def Q26 : AArch64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias; +def Q27 : AArch64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias; +def Q28 : AArch64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias; +def Q29 : AArch64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias; +def Q30 : AArch64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias; +def Q31 : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias; } -// Some aliases *only* apply to SP (e.g. MOV uses different encoding for SP and -// non-SP variants). We can't use a bare register in those patterns because -// TableGen doesn't like it, so we need a class containing just stack registers -def Rxsp : RegisterClass<"AArch64", [i64], 64, - (add XSP)> { +def FPR8 : RegisterClass<"AArch64", [untyped], 8, (sequence "B%u", 0, 31)> { + let Size = 8; } +def FPR16 : RegisterClass<"AArch64", [f16], 16, (sequence "H%u", 0, 31)> { + let Size = 16; +} +def FPR32 : RegisterClass<"AArch64", [f32, i32], 32,(sequence "S%u", 0, 31)>; +def FPR64 : RegisterClass<"AArch64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32, + v1i64], + 64, (sequence "D%u", 0, 31)>; +// We don't (yet) have an f128 legal type, so don't use that here. We +// normalize 128-bit vectors to v2f64 for arg passing and such, so use +// that here. +def FPR128 : RegisterClass<"AArch64", + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128], + 128, (sequence "Q%u", 0, 31)>; -def Rwsp : RegisterClass<"AArch64", [i32], 32, - (add WSP)> { +// The lower 16 vector registers. Some instructions can only take registers +// in this range. +def FPR128_lo : RegisterClass<"AArch64", + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + 128, (trunc FPR128, 16)>; + +// Pairs, triples, and quads of 64-bit vector registers. +def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>; +def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2], + [(rotl FPR64, 0), (rotl FPR64, 1), + (rotl FPR64, 2)]>; +def DSeqQuads : RegisterTuples<[dsub0, dsub1, dsub2, dsub3], + [(rotl FPR64, 0), (rotl FPR64, 1), + (rotl FPR64, 2), (rotl FPR64, 3)]>; +def DD : RegisterClass<"AArch64", [untyped], 64, (add DSeqPairs)> { + let Size = 128; +} +def DDD : RegisterClass<"AArch64", [untyped], 64, (add DSeqTriples)> { + let Size = 196; +} +def DDDD : RegisterClass<"AArch64", [untyped], 64, (add DSeqQuads)> { + let Size = 256; } -//===----------------------------------------------------------------------===// -// Scalar registers in the vector unit: -// b0-b31, h0-h31, s0-s31, d0-d31, q0-q31 -//===----------------------------------------------------------------------===// +// Pairs, triples, and quads of 128-bit vector registers. +def QSeqPairs : RegisterTuples<[qsub0, qsub1], [(rotl FPR128, 0), (rotl FPR128, 1)]>; +def QSeqTriples : RegisterTuples<[qsub0, qsub1, qsub2], + [(rotl FPR128, 0), (rotl FPR128, 1), + (rotl FPR128, 2)]>; +def QSeqQuads : RegisterTuples<[qsub0, qsub1, qsub2, qsub3], + [(rotl FPR128, 0), (rotl FPR128, 1), + (rotl FPR128, 2), (rotl FPR128, 3)]>; +def QQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqPairs)> { + let Size = 256; +} +def QQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqTriples)> { + let Size = 384; +} +def QQQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqQuads)> { + let Size = 512; +} -foreach Index = 0-31 in { - def B # Index : AArch64Reg< Index, "b" # Index>, - DwarfRegNum<[!add(Index, 64)]>; - def H # Index : AArch64RegWithSubs("B" # Index)], [sub_8]>, - DwarfRegNum<[!add(Index, 64)]>; +// Vector operand versions of the FP registers. Alternate name printing and +// assmebler matching. +def VectorReg64AsmOperand : AsmOperandClass { + let Name = "VectorReg64"; + let PredicateMethod = "isVectorReg"; +} +def VectorReg128AsmOperand : AsmOperandClass { + let Name = "VectorReg128"; + let PredicateMethod = "isVectorReg"; +} - def S # Index : AArch64RegWithSubs("H" # Index)], [sub_16]>, - DwarfRegNum<[!add(Index, 64)]>; +def V64 : RegisterOperand { + let ParserMatchClass = VectorReg64AsmOperand; +} - def D # Index : AArch64RegWithSubs("S" # Index)], [sub_32]>, - DwarfRegNum<[!add(Index, 64)]>; +def V128 : RegisterOperand { + let ParserMatchClass = VectorReg128AsmOperand; +} - def Q # Index : AArch64RegWithSubs("D" # Index)], [sub_64]>, - DwarfRegNum<[!add(Index, 64)]>; +def VectorRegLoAsmOperand : AsmOperandClass { let Name = "VectorRegLo"; } +def V128_lo : RegisterOperand { + let ParserMatchClass = VectorRegLoAsmOperand; } +class TypedVecListAsmOperand + : AsmOperandClass { + let Name = "TypedVectorList" # count # "_" # lanes # kind; -def FPR8 : RegisterClass<"AArch64", [v1i8], 8, - (sequence "B%u", 0, 31)> { + let PredicateMethod + = "isTypedVectorList<" # count # ", " # lanes # ", '" # kind # "'>"; + let RenderMethod = "addVectorList" # regsize # "Operands<" # count # ">"; } -def FPR16 : RegisterClass<"AArch64", [f16, v1i16], 16, - (sequence "H%u", 0, 31)> { -} +class TypedVecListRegOperand + : RegisterOperand">; -def FPR32 : RegisterClass<"AArch64", [f32, v1i32], 32, - (sequence "S%u", 0, 31)> { -} +multiclass VectorList { + // With implicit types (probably on instruction instead). E.g. { v0, v1 } + def _64AsmOperand : AsmOperandClass { + let Name = NAME # "64"; + let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">"; + let RenderMethod = "addVectorList64Operands<" # count # ">"; + } -def FPR64 : RegisterClass<"AArch64", - [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64], - 64, (sequence "D%u", 0, 31)>; + def "64" : RegisterOperand { + let ParserMatchClass = !cast(NAME # "_64AsmOperand"); + } -def FPR128 : RegisterClass<"AArch64", - [f128, v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], - 128, (sequence "Q%u", 0, 31)>; + def _128AsmOperand : AsmOperandClass { + let Name = NAME # "128"; + let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">"; + let RenderMethod = "addVectorList128Operands<" # count # ">"; + } + + def "128" : RegisterOperand { + let ParserMatchClass = !cast(NAME # "_128AsmOperand"); + } -def FPR64Lo : RegisterClass<"AArch64", - [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64], - 64, (sequence "D%u", 0, 15)>; + // 64-bit register lists with explicit type. -def FPR128Lo : RegisterClass<"AArch64", - [f128, v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], - 128, (sequence "Q%u", 0, 15)>; + // { v0.8b, v1.8b } + def _8bAsmOperand : TypedVecListAsmOperand; + def "8b" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_8bAsmOperand"); + } -//===----------------------------------------------------------------------===// -// Vector registers: -//===----------------------------------------------------------------------===// + // { v0.4h, v1.4h } + def _4hAsmOperand : TypedVecListAsmOperand; + def "4h" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_4hAsmOperand"); + } -def VPR64AsmOperand : AsmOperandClass { - let Name = "VPR"; - let PredicateMethod = "isReg"; - let RenderMethod = "addRegOperands"; -} + // { v0.2s, v1.2s } + def _2sAsmOperand : TypedVecListAsmOperand; + def "2s" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_2sAsmOperand"); + } + + // { v0.1d, v1.1d } + def _1dAsmOperand : TypedVecListAsmOperand; + def "1d" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_1dAsmOperand"); + } -def VPR64 : RegisterOperand; + // 128-bit register lists with explicit type -def VPR128 : RegisterOperand; + // { v0.16b, v1.16b } + def _16bAsmOperand : TypedVecListAsmOperand; + def "16b" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_16bAsmOperand"); + } -def VPR64Lo : RegisterOperand; + // { v0.8h, v1.8h } + def _8hAsmOperand : TypedVecListAsmOperand; + def "8h" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_8hAsmOperand"); + } -def VPR128Lo : RegisterOperand; + // { v0.4s, v1.4s } + def _4sAsmOperand : TypedVecListAsmOperand; + def "4s" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_4sAsmOperand"); + } -// Flags register -def NZCV : Register<"nzcv"> { - let Namespace = "AArch64"; -} + // { v0.2d, v1.2d } + def _2dAsmOperand : TypedVecListAsmOperand; + def "2d" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_2dAsmOperand"); + } -def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> { - let CopyCost = -1; - let isAllocatable = 0; -} + // { v0.b, v1.b } + def _bAsmOperand : TypedVecListAsmOperand; + def "b" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_bAsmOperand"); + } -//===----------------------------------------------------------------------===// -// Consecutive vector registers -//===----------------------------------------------------------------------===// -// 2 Consecutive 64-bit registers: D0_D1, D1_D2, ..., D31_D0 -def Tuples2D : RegisterTuples<[dsub_0, dsub_1], - [(rotl FPR64, 0), (rotl FPR64, 1)]>; - -// 3 Consecutive 64-bit registers: D0_D1_D2, ..., D31_D0_D1 -def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2], - [(rotl FPR64, 0), (rotl FPR64, 1), - (rotl FPR64, 2)]>; - -// 4 Consecutive 64-bit registers: D0_D1_D2_D3, ..., D31_D0_D1_D2 -def Tuples4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3], - [(rotl FPR64, 0), (rotl FPR64, 1), - (rotl FPR64, 2), (rotl FPR64, 3)]>; - -// 2 Consecutive 128-bit registers: Q0_Q1, Q1_Q2, ..., Q30_Q31 -def Tuples2Q : RegisterTuples<[qsub_0, qsub_1], - [(rotl FPR128, 0), (rotl FPR128, 1)]>; - -// 3 Consecutive 128-bit registers: Q0_Q1_Q2, ..., Q31_Q0_Q1 -def Tuples3Q : RegisterTuples<[qsub_0, qsub_1, qsub_2], - [(rotl FPR128, 0), (rotl FPR128, 1), - (rotl FPR128, 2)]>; - -// 4 Consecutive 128-bit registers: Q0_Q1_Q2_Q3, ..., Q31_Q0_Q1_Q2 -def Tuples4Q : RegisterTuples<[qsub_0, qsub_1, qsub_2, qsub_3], - [(rotl FPR128, 0), (rotl FPR128, 1), - (rotl FPR128, 2), (rotl FPR128, 3)]>; - -// The followings are super register classes to model 2/3/4 consecutive -// 64-bit/128-bit registers. - -def DPair : RegisterClass<"AArch64", [v2i64], 64, (add Tuples2D)>; - -def DTriple : RegisterClass<"AArch64", [untyped], 64, (add Tuples3D)> { - let Size = 192; // 3 x 64 bits, we have no predefined type of that size. -} - -def DQuad : RegisterClass<"AArch64", [v4i64], 64, (add Tuples4D)>; - -def QPair : RegisterClass<"AArch64", [v4i64], 128, (add Tuples2Q)>; - -def QTriple : RegisterClass<"AArch64", [untyped], 128, (add Tuples3Q)> { - let Size = 384; // 3 x 128 bits, we have no predefined type of that size. -} - -def QQuad : RegisterClass<"AArch64", [v8i64], 128, (add Tuples4Q)>; - - -// The followings are vector list operands -multiclass VectorList_operands { - def _asmoperand : AsmOperandClass { - let Name = PREFIX # LAYOUT # Count; - let RenderMethod = "addVectorListOperands"; - let PredicateMethod = - "isVectorList"; - let ParserMethod = "ParseVectorList"; + // { v0.h, v1.h } + def _hAsmOperand : TypedVecListAsmOperand; + def "h" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_hAsmOperand"); } - def _operand : RegisterOperand"> { - let ParserMatchClass = - !cast(PREFIX # LAYOUT # "_asmoperand"); + // { v0.s, v1.s } + def _sAsmOperand : TypedVecListAsmOperand; + def "s" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_sAsmOperand"); } -} -multiclass VectorList_BHSD { - defm 8B : VectorList_operands; - defm 4H : VectorList_operands; - defm 2S : VectorList_operands; - defm 1D : VectorList_operands; - defm 16B : VectorList_operands; - defm 8H : VectorList_operands; - defm 4S : VectorList_operands; - defm 2D : VectorList_operands; + // { v0.d, v1.d } + def _dAsmOperand : TypedVecListAsmOperand; + def "d" : TypedVecListRegOperand { + let ParserMatchClass = !cast(NAME # "_dAsmOperand"); + } + + } -// Vector list operand with 1/2/3/4 registers: VOne8B_operand,..., VQuad2D_operand -defm VOne : VectorList_BHSD<"VOne", 1, FPR64, FPR128>; -defm VPair : VectorList_BHSD<"VPair", 2, DPair, QPair>; -defm VTriple : VectorList_BHSD<"VTriple", 3, DTriple, QTriple>; -defm VQuad : VectorList_BHSD<"VQuad", 4, DQuad, QQuad>; +defm VecListOne : VectorList<1, FPR64, FPR128>; +defm VecListTwo : VectorList<2, DD, QQ>; +defm VecListThree : VectorList<3, DDD, QQQ>; +defm VecListFour : VectorList<4, DDDD, QQQQ>; + + +// Register operand versions of the scalar FP registers. +def FPR16Op : RegisterOperand; +def FPR32Op : RegisterOperand; +def FPR64Op : RegisterOperand; +def FPR128Op : RegisterOperand; diff --git a/lib/Target/AArch64/AArch64SchedA53.td b/lib/Target/AArch64/AArch64SchedA53.td new file mode 100644 index 0000000..0c3949e --- /dev/null +++ b/lib/Target/AArch64/AArch64SchedA53.td @@ -0,0 +1,291 @@ +//==- AArch64SchedA53.td - Cortex-A53 Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the ARM Cortex A53 processors. +// +//===----------------------------------------------------------------------===// + +// ===---------------------------------------------------------------------===// +// The following definitions describe the simpler per-operand machine model. +// This works with MachineScheduler. See MCSchedModel.h for details. + +// Cortex-A53 machine model for scheduling and other instruction cost heuristics. +def CortexA53Model : SchedMachineModel { + let MicroOpBufferSize = 0; // Explicitly set to zero since A53 is in-order. + let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. + let MinLatency = 1 ; // OperandCycles are interpreted as MinLatency. + let LoadLatency = 3; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 9; // Based on "Cortex-A53 Software Optimisation + // Specification - Instruction Timings" + // v 1.0 Spreadsheet +} + + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available. + +// Modeling each pipeline as a ProcResource using the BufferSize = 0 since +// Cortex-A53 is in-order. + +def A53UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU +def A53UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC +def A53UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division +def A53UnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store +def A53UnitB : ProcResource<1> { let BufferSize = 0; } // Branch +def A53UnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU +def A53UnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mult/Div/Sqrt + + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedWrite types which both map the ProcResources and +// set the latency. + +let SchedModel = CortexA53Model in { + +// ALU - Despite having a full latency of 4, most of the ALU instructions can +// forward a cycle earlier and then two cycles earlier in the case of a +// shift-only instruction. These latencies will be incorrect when the +// result cannot be forwarded, but modeling isn't rocket surgery. +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 3; } +def : WriteRes { let Latency = 2; } +def : WriteRes { let Latency = 3; } + +// MAC +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } + +// Div +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } + +// Load +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } + +// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd +// below, choosing the median of 3 which makes the latency 6. +// May model this more carefully in the future. The remaining +// A53WriteVLD# types represent the 1-5 cycle issues explicitly. +def : WriteRes { let Latency = 6; + let ResourceCycles = [3]; } +def A53WriteVLD1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; } +def A53WriteVLD2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5; + let ResourceCycles = [2]; } +def A53WriteVLD3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6; + let ResourceCycles = [3]; } +def A53WriteVLD4 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 7; + let ResourceCycles = [4]; } +def A53WriteVLD5 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 8; + let ResourceCycles = [5]; } + +// Pre/Post Indexing - Performed as part of address generation which is already +// accounted for in the WriteST* latencies below +def : WriteRes { let Latency = 0; } + +// Store +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 4; } + +// Vector Store - Similar to vector loads, can take 1-3 cycles to issue. +def : WriteRes { let Latency = 5; + let ResourceCycles = [2];} +def A53WriteVST1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; } +def A53WriteVST2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5; + let ResourceCycles = [2]; } +def A53WriteVST3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6; + let ResourceCycles = [3]; } + +// Branch +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// FP ALU +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 6; } + +// FP Mul, Div, Sqrt +def : WriteRes { let Latency = 6; } +def : WriteRes { let Latency = 33; + let ResourceCycles = [29]; } +def A53WriteFMAC : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 10; } +def A53WriteFDivSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 18; + let ResourceCycles = [14]; } +def A53WriteFDivDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 33; + let ResourceCycles = [29]; } +def A53WriteFSqrtSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 17; + let ResourceCycles = [13]; } +def A53WriteFSqrtDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 32; + let ResourceCycles = [28]; } + +//===----------------------------------------------------------------------===// +// Subtarget-specific SchedRead types. + +// No forwarding for these reads. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable +// operands are needed one cycle later if and only if they are to be +// shifted. Otherwise, they too are needed two cycle later. This same +// ReadAdvance applies to Extended registers as well, even though there is +// a seperate SchedPredicate for them. +def : ReadAdvance; +def A53ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI, + WriteISReg, WriteIEReg,WriteIS, + WriteID32,WriteID64, + WriteIM32,WriteIM64]>; +def A53ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI, + WriteISReg, WriteIEReg,WriteIS, + WriteID32,WriteID64, + WriteIM32,WriteIM64]>; +def A53ReadISReg : SchedReadVariant<[ + SchedVar, + SchedVar]>; +def : SchedAlias; + +def A53ReadIEReg : SchedReadVariant<[ + SchedVar, + SchedVar]>; +def : SchedAlias; + +// MAC - Operands are generally needed one cycle later in the MAC pipe. +// Accumulator operands are needed two cycles later. +def : ReadAdvance; +def : ReadAdvance; + +// Div +def : ReadAdvance; + +//===----------------------------------------------------------------------===// +// Subtarget-specific InstRWs. + +//--- +// Miscellaneous +//--- +def : InstRW<[WriteI], (instrs COPY)>; + +//--- +// Vector Loads +//--- +def : InstRW<[A53WriteVLD1], (instregex "LD1i(8|16|32|64)$")>; +def : InstRW<[A53WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A53WriteVLD1], (instregex "LD2i(8|16|32|64)$")>; +def : InstRW<[A53WriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[A53WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>; +def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; +def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; + +def : InstRW<[A53WriteVLD2], (instregex "LD3i(8|16|32|64)$")>; +def : InstRW<[A53WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[A53WriteVLD3], (instregex "LD3Threev(2d)$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; + +def : InstRW<[A53WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; +def : InstRW<[A53WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[A53WriteVLD4], (instregex "LD4Fourv(2d)$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; + +//--- +// Vector Stores +//--- +def : InstRW<[A53WriteVST1], (instregex "ST1i(8|16|32|64)$")>; +def : InstRW<[A53WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A53WriteVST1], (instregex "ST2i(8|16|32|64)$")>; +def : InstRW<[A53WriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A53WriteVST2], (instregex "ST3i(8|16|32|64)$")>; +def : InstRW<[A53WriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST3Threev(2d)$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; + +def : InstRW<[A53WriteVST2], (instregex "ST4i(8|16|32|64)$")>; +def : InstRW<[A53WriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[A53WriteVST2], (instregex "ST4Fourv(2d)$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; +def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; + +//--- +// Floating Point MAC, DIV, SQRT +//--- +def : InstRW<[A53WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>; +def : InstRW<[A53WriteFMAC], (instregex "^FML(A|S).*")>; +def : InstRW<[A53WriteFDivSP], (instrs FDIVSrr)>; +def : InstRW<[A53WriteFDivDP], (instrs FDIVDrr)>; +def : InstRW<[A53WriteFDivSP], (instregex "^FDIVv.*32$")>; +def : InstRW<[A53WriteFDivDP], (instregex "^FDIVv.*64$")>; +def : InstRW<[A53WriteFSqrtSP], (instregex "^.*SQRT.*32$")>; +def : InstRW<[A53WriteFSqrtDP], (instregex "^.*SQRT.*64$")>; + +} diff --git a/lib/Target/AArch64/AArch64SchedCyclone.td b/lib/Target/AArch64/AArch64SchedCyclone.td new file mode 100644 index 0000000..a2a1802 --- /dev/null +++ b/lib/Target/AArch64/AArch64SchedCyclone.td @@ -0,0 +1,865 @@ +//=- ARMSchedCyclone.td - AArch64 Cyclone Scheduling Defs ----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for AArch64 Cyclone to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +def CycloneModel : SchedMachineModel { + let IssueWidth = 6; // 6 micro-ops are dispatched per cycle. + let MicroOpBufferSize = 192; // Based on the reorder buffer. + let LoadLatency = 4; // Optimistic load latency. + let MispredictPenalty = 16; // 14-19 cycles are typical. +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Cyclone. + +// 4 integer pipes +def CyUnitI : ProcResource<4> { + let BufferSize = 48; +} + +// 2 branch units: I[0..1] +def CyUnitB : ProcResource<2> { + let Super = CyUnitI; + let BufferSize = 24; +} + +// 1 indirect-branch unit: I[0] +def CyUnitBR : ProcResource<1> { + let Super = CyUnitB; +} + +// 2 shifter pipes: I[2..3] +// When an instruction consumes a CyUnitIS, it also consumes a CyUnitI +def CyUnitIS : ProcResource<2> { + let Super = CyUnitI; + let BufferSize = 24; +} + +// 1 mul pipe: I[0] +def CyUnitIM : ProcResource<1> { + let Super = CyUnitBR; + let BufferSize = 32; +} + +// 1 div pipe: I[1] +def CyUnitID : ProcResource<1> { + let Super = CyUnitB; + let BufferSize = 16; +} + +// 1 integer division unit. This is driven by the ID pipe, but only +// consumes the pipe for one cycle at issue and another cycle at writeback. +def CyUnitIntDiv : ProcResource<1>; + +// 2 ld/st pipes. +def CyUnitLS : ProcResource<2> { + let BufferSize = 28; +} + +// 3 fp/vector pipes. +def CyUnitV : ProcResource<3> { + let BufferSize = 48; +} +// 2 fp/vector arithmetic and multiply pipes: V[0-1] +def CyUnitVM : ProcResource<2> { + let Super = CyUnitV; + let BufferSize = 32; +} +// 1 fp/vector division/sqrt pipe: V[2] +def CyUnitVD : ProcResource<1> { + let Super = CyUnitV; + let BufferSize = 16; +} +// 1 fp compare pipe: V[0] +def CyUnitVC : ProcResource<1> { + let Super = CyUnitVM; + let BufferSize = 16; +} + +// 2 fp division/square-root units. These are driven by the VD pipe, +// but only consume the pipe for one cycle at issue and a cycle at writeback. +def CyUnitFloatDiv : ProcResource<2>; + +//===----------------------------------------------------------------------===// +// Define scheduler read/write resources and latency on Cyclone. +// This mirrors sections 7.7-7.9 of the Tuning Guide v1.0.1. + +let SchedModel = CycloneModel in { + +//--- +// 7.8.1. Moves +//--- + +// A single nop micro-op (uX). +def WriteX : SchedWriteRes<[]> { let Latency = 0; } + +// Move zero is a register rename (to machine register zero). +// The move is replaced by a single nop micro-op. +// MOVZ Rd, #0 +// AND Rd, Rzr, #imm +def WriteZPred : SchedPredicate<[{TII->isGPRZero(MI)}]>; +def WriteImmZ : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def : InstRW<[WriteImmZ], (instrs MOVZWi,MOVZXi,ANDWri,ANDXri)>; + +// Move GPR is a register rename and single nop micro-op. +// ORR Xd, XZR, Xm +// ADD Xd, Xn, #0 +def WriteIMovPred : SchedPredicate<[{TII->isGPRCopy(MI)}]>; +def WriteVMovPred : SchedPredicate<[{TII->isFPRCopy(MI)}]>; +def WriteMov : SchedWriteVariant<[ + SchedVar, + SchedVar, + SchedVar]>; +def : InstRW<[WriteMov], (instrs COPY,ORRXrr,ADDXrr)>; + +// Move non-zero immediate is an integer ALU op. +// MOVN,MOVZ,MOVK +def : WriteRes; + +//--- +// 7.8.2-7.8.5. Arithmetic and Logical, Comparison, Conditional, +// Shifts and Bitfield Operations +//--- + +// ADR,ADRP +// ADD(S)ri,SUB(S)ri,AND(S)ri,EORri,ORRri +// ADD(S)rr,SUB(S)rr,AND(S)rr,BIC(S)rr,EONrr,EORrr,ORNrr,ORRrr +// ADC(S),SBC(S) +// Aliases: CMN, CMP, TST +// +// Conditional operations. +// CCMNi,CCMPi,CCMNr,CCMPr, +// CSEL,CSINC,CSINV,CSNEG +// +// Bit counting and reversal operations. +// CLS,CLZ,RBIT,REV,REV16,REV32 +def : WriteRes; + +// ADD with shifted register operand is a single micro-op that +// consumes a shift pipeline for two cycles. +// ADD(S)rs,SUB(S)rs,AND(S)rs,BIC(S)rs,EONrs,EORrs,ORNrs,ORRrs +// EXAMPLE: ADDrs Xn, Xm LSL #imm +def : WriteRes { + let Latency = 2; + let ResourceCycles = [2]; +} + +// ADD with extended register operand is the same as shifted reg operand. +// ADD(S)re,SUB(S)re +// EXAMPLE: ADDXre Xn, Xm, UXTB #1 +def : WriteRes { + let Latency = 2; + let ResourceCycles = [2]; +} + +// Variable shift and bitfield operations. +// ASRV,LSLV,LSRV,RORV,BFM,SBFM,UBFM +def : WriteRes; + +// EXTR Shifts a pair of registers and requires two micro-ops. +// The second micro-op is delayed, as modeled by ReadExtrHi. +// EXTR Xn, Xm, #imm +def : WriteRes { + let Latency = 2; + let NumMicroOps = 2; +} + +// EXTR's first register read is delayed by one cycle, effectively +// shortening its writer's latency. +// EXTR Xn, Xm, #imm +def : ReadAdvance; + +//--- +// 7.8.6. Multiplies +//--- + +// MUL/MNEG are aliases for MADD/MSUB. +// MADDW,MSUBW,SMADDL,SMSUBL,UMADDL,UMSUBL +def : WriteRes { + let Latency = 4; +} +// MADDX,MSUBX,SMULH,UMULH +def : WriteRes { + let Latency = 5; +} + +//--- +// 7.8.7. Divide +//--- + +// 32-bit divide takes 7-13 cycles. 10 cycles covers a 20-bit quotient. +// The ID pipe is consumed for 2 cycles: issue and writeback. +// SDIVW,UDIVW +def : WriteRes { + let Latency = 10; + let ResourceCycles = [2, 10]; +} +// 64-bit divide takes 7-21 cycles. 13 cycles covers a 32-bit quotient. +// The ID pipe is consumed for 2 cycles: issue and writeback. +// SDIVX,UDIVX +def : WriteRes { + let Latency = 13; + let ResourceCycles = [2, 13]; +} + +//--- +// 7.8.8,7.8.10. Load/Store, single element +//--- + +// Integer loads take 4 cycles and use one LS unit for one cycle. +def : WriteRes { + let Latency = 4; +} + +// Store-load forwarding is 4 cycles. +// +// Note: The store-exclusive sequence incorporates this +// latency. However, general heuristics should not model the +// dependence between a store and subsequent may-alias load because +// hardware speculation works. +def : WriteRes { + let Latency = 4; +} + +// Load from base address plus an optionally scaled register offset. +// Rt latency is latency WriteIS + WriteLD. +// EXAMPLE: LDR Xn, Xm [, lsl 3] +def CyWriteLDIdx : SchedWriteVariant<[ + SchedVar, // Load from scaled register. + SchedVar]>; // Load from register offset. +def : SchedAlias; // Map AArch64->Cyclone type. + +// EXAMPLE: STR Xn, Xm [, lsl 3] +def CyWriteSTIdx : SchedWriteVariant<[ + SchedVar, // Store to scaled register. + SchedVar]>; // Store to register offset. +def : SchedAlias; // Map AArch64->Cyclone type. + +// Read the (unshifted) base register Xn in the second micro-op one cycle later. +// EXAMPLE: LDR Xn, Xm [, lsl 3] +def ReadBaseRS : SchedReadAdvance<1>; +def CyReadAdrBase : SchedReadVariant<[ + SchedVar, // Read base reg after shifting offset. + SchedVar]>; // Read base reg with no shift. +def : SchedAlias; // Map AArch64->Cyclone type. + +//--- +// 7.8.9,7.8.11. Load/Store, paired +//--- + +// Address pre/post increment is a simple ALU op with one cycle latency. +def : WriteRes; + +// LDP high register write is fused with the load, but a nop micro-op remains. +def : WriteRes { + let Latency = 4; +} + +// STP is a vector op and store, except for QQ, which is just two stores. +def : SchedAlias; +def : InstRW<[WriteST, WriteST], (instrs STPQi)>; + +//--- +// 7.8.13. Branches +//--- + +// Branches take a single micro-op. +// The misprediction penalty is defined as a SchedMachineModel property. +def : WriteRes {let Latency = 0;} +def : WriteRes {let Latency = 0;} + +//--- +// 7.8.14. Never-issued Instructions, Barrier and Hint Operations +//--- + +// NOP,SEV,SEVL,WFE,WFI,YIELD +def : WriteRes {let Latency = 0;} +// ISB +def : InstRW<[WriteI], (instrs ISB)>; +// SLREX,DMB,DSB +def : WriteRes; + +// System instructions get an invalid latency because the latency of +// other operations across them is meaningless. +def : WriteRes {let Latency = -1;} + +//===----------------------------------------------------------------------===// +// 7.9 Vector Unit Instructions + +// Simple vector operations take 2 cycles. +def : WriteRes {let Latency = 2;} + +// Define some longer latency vector op types for Cyclone. +def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;} +def CyWriteV4 : SchedWriteRes<[CyUnitV]> {let Latency = 4;} +def CyWriteV5 : SchedWriteRes<[CyUnitV]> {let Latency = 5;} +def CyWriteV6 : SchedWriteRes<[CyUnitV]> {let Latency = 6;} + +// Simple floating-point operations take 2 cycles. +def : WriteRes {let Latency = 2;} + +//--- +// 7.9.1 Vector Moves +//--- + +// TODO: Add Cyclone-specific zero-cycle zeros. LLVM currently +// generates expensive int-float conversion instead: +// FMOVDi Dd, #0.0 +// FMOVv2f64ns Vd.2d, #0.0 + +// FMOVSi,FMOVDi +def : WriteRes {let Latency = 2;} + +// MOVI,MVNI are WriteV +// FMOVv2f32ns,FMOVv2f64ns,FMOVv4f32ns are WriteV + +// Move FPR is a register rename and single nop micro-op. +// ORR.16b Vd,Vn,Vn +// COPY is handled above in the WriteMov Variant. +def WriteVMov : SchedWriteVariant<[ + SchedVar, + SchedVar]>; +def : InstRW<[WriteVMov], (instrs ORRv16i8)>; + +// FMOVSr,FMOVDr are WriteF. + +// MOV V,V is a WriteV. + +// CPY D,V[x] is a WriteV + +// INS V[x],V[y] is a WriteV. + +// FMOVWSr,FMOVXDr,FMOVXDHighr +def : WriteRes { + let Latency = 5; +} + +// FMOVSWr,FMOVDXr +def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>; + +// INS V[x],R +def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>; +def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>; + +// SMOV,UMOV R,V[x] +def CyWriteCopyToGPR : WriteSequence<[WriteLD, WriteI]>; +def : InstRW<[CyWriteCopyToGPR], (instregex "SMOVv","UMOVv")>; + +// DUP V,R +def : InstRW<[CyWriteCopyToFPR], (instregex "DUPv")>; + +// DUP V,V[x] is a WriteV. + +//--- +// 7.9.2 Integer Arithmetic, Logical, and Comparisons +//--- + +// BIC,ORR V,#imm are WriteV + +def : InstRW<[CyWriteV3], (instregex "ABSv")>; + +// MVN,NEG,NOT are WriteV + +def : InstRW<[CyWriteV3], (instregex "SQABSv","SQNEGv")>; + +// ADDP is a WriteV. +def CyWriteVADDLP : SchedWriteRes<[CyUnitV]> {let Latency = 2;} +def : InstRW<[CyWriteVADDLP], (instregex "SADDLPv","UADDLPv")>; + +def : InstRW<[CyWriteV3], + (instregex "ADDVv","SMAXVv","UMAXVv","SMINVv","UMINVv")>; + +def : InstRW<[CyWriteV3], (instregex "SADDLV","UADDLV")>; + +// ADD,SUB are WriteV + +// Forward declare. +def CyWriteVABD : SchedWriteRes<[CyUnitV]> {let Latency = 3;} + +// Add/Diff and accumulate uses the vector multiply unit. +def CyWriteVAccum : SchedWriteRes<[CyUnitVM]> {let Latency = 3;} +def CyReadVAccum : SchedReadAdvance<1, + [CyWriteVAccum, CyWriteVADDLP, CyWriteVABD]>; + +def : InstRW<[CyWriteVAccum, CyReadVAccum], + (instregex "SADALP","UADALP")>; + +def : InstRW<[CyWriteVAccum, CyReadVAccum], + (instregex "SABAv","UABAv","SABALv","UABALv")>; + +def : InstRW<[CyWriteV3], (instregex "SQADDv","SQSUBv","UQADDv","UQSUBv")>; + +def : InstRW<[CyWriteV3], (instregex "SUQADDv","USQADDv")>; + +def : InstRW<[CyWriteV4], (instregex "ADDHNv","RADDHNv", "RSUBHNv", "SUBHNv")>; + +// WriteV includes: +// AND,BIC,CMTST,EOR,ORN,ORR +// ADDP +// SHADD,SHSUB,SRHADD,UHADD,UHSUB,URHADD +// SADDL,SSUBL,UADDL,USUBL +// SADDW,SSUBW,UADDW,USUBW + +def : InstRW<[CyWriteV3], (instregex "CMEQv","CMGEv","CMGTv", + "CMLEv","CMLTv", + "CMHIv","CMHSv")>; + +def : InstRW<[CyWriteV3], (instregex "SMAXv","SMINv","UMAXv","UMINv", + "SMAXPv","SMINPv","UMAXPv","UMINPv")>; + +def : InstRW<[CyWriteVABD], (instregex "SABDv","UABDv", + "SABDLv","UABDLv")>; + +//--- +// 7.9.3 Floating Point Arithmetic and Comparisons +//--- + +// FABS,FNEG are WriteF + +def : InstRW<[CyWriteV4], (instrs FADDPv2i32p)>; +def : InstRW<[CyWriteV5], (instrs FADDPv2i64p)>; + +def : InstRW<[CyWriteV3], (instregex "FMAXPv2i","FMAXNMPv2i", + "FMINPv2i","FMINNMPv2i")>; + +def : InstRW<[CyWriteV4], (instregex "FMAXVv","FMAXNMVv","FMINVv","FMINNMVv")>; + +def : InstRW<[CyWriteV4], (instrs FADDSrr,FADDv2f32,FADDv4f32, + FSUBSrr,FSUBv2f32,FSUBv4f32, + FADDPv2f32,FADDPv4f32, + FABD32,FABDv2f32,FABDv4f32)>; +def : InstRW<[CyWriteV5], (instrs FADDDrr,FADDv2f64, + FSUBDrr,FSUBv2f64, + FADDPv2f64, + FABD64,FABDv2f64)>; + +def : InstRW<[CyWriteV3], (instregex "FCMEQ","FCMGT","FCMLE","FCMLT")>; + +def : InstRW<[CyWriteV3], (instregex "FACGE","FACGT", + "FMAXS","FMAXD","FMAXv", + "FMINS","FMIND","FMINv", + "FMAXNMS","FMAXNMD","FMAXNMv", + "FMINNMS","FMINNMD","FMINNMv", + "FMAXPv2f","FMAXPv4f", + "FMINPv2f","FMINPv4f", + "FMAXNMPv2f","FMAXNMPv4f", + "FMINNMPv2f","FMINNMPv4f")>; + +// FCMP,FCMPE,FCCMP,FCCMPE +def : WriteRes {let Latency = 4;} + +// FCSEL is a WriteF. + +//--- +// 7.9.4 Shifts and Bitfield Operations +//--- + +// SHL is a WriteV + +def CyWriteVSHR : SchedWriteRes<[CyUnitV]> {let Latency = 2;} +def : InstRW<[CyWriteVSHR], (instregex "SSHRv","USHRv")>; + +def CyWriteVSRSHR : SchedWriteRes<[CyUnitV]> {let Latency = 3;} +def : InstRW<[CyWriteVSRSHR], (instregex "SRSHRv","URSHRv")>; + +// Shift and accumulate uses the vector multiply unit. +def CyWriteVShiftAcc : SchedWriteRes<[CyUnitVM]> {let Latency = 3;} +def CyReadVShiftAcc : SchedReadAdvance<1, + [CyWriteVShiftAcc, CyWriteVSHR, CyWriteVSRSHR]>; +def : InstRW<[CyWriteVShiftAcc, CyReadVShiftAcc], + (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>; + +// SSHL,USHL are WriteV. + +def : InstRW<[CyWriteV3], (instregex "SRSHLv","URSHLv")>; + +// SQSHL,SQSHLU,UQSHL are WriteV. + +def : InstRW<[CyWriteV3], (instregex "SQRSHLv","UQRSHLv")>; + +// WriteV includes: +// SHLL,SSHLL,USHLL +// SLI,SRI +// BIF,BIT,BSL +// EXT +// CLS,CLZ,CNT,RBIT,REV16,REV32,REV64,XTN +// XTN2 + +def : InstRW<[CyWriteV4], + (instregex "RSHRNv","SHRNv", + "SQRSHRNv","SQRSHRUNv","SQSHRNv","SQSHRUNv", + "UQRSHRNv","UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>; + +//--- +// 7.9.5 Multiplication +//--- + +def CyWriteVMul : SchedWriteRes<[CyUnitVM]> { let Latency = 4;} +def : InstRW<[CyWriteVMul], (instregex "MULv","SMULLv","UMULLv", + "SQDMULLv","SQDMULHv","SQRDMULHv")>; + +// FMUL,FMULX,FNMUL default to WriteFMul. +def : WriteRes { let Latency = 4;} + +def CyWriteV64Mul : SchedWriteRes<[CyUnitVM]> { let Latency = 5;} +def : InstRW<[CyWriteV64Mul], (instrs FMULDrr,FMULv2f64,FMULv2i64_indexed, + FNMULDrr,FMULX64,FMULXv2f64,FMULXv2i64_indexed)>; + +def CyReadVMulAcc : SchedReadAdvance<1, [CyWriteVMul, CyWriteV64Mul]>; +def : InstRW<[CyWriteVMul, CyReadVMulAcc], + (instregex "MLA","MLS","SMLAL","SMLSL","UMLAL","UMLSL", + "SQDMLAL","SQDMLSL")>; + +def CyWriteSMul : SchedWriteRes<[CyUnitVM]> { let Latency = 8;} +def CyWriteDMul : SchedWriteRes<[CyUnitVM]> { let Latency = 10;} +def CyReadSMul : SchedReadAdvance<4, [CyWriteSMul]>; +def CyReadDMul : SchedReadAdvance<5, [CyWriteDMul]>; + +def : InstRW<[CyWriteSMul, CyReadSMul], + (instrs FMADDSrrr,FMSUBSrrr,FNMADDSrrr,FNMSUBSrrr, + FMLAv2f32,FMLAv4f32, + FMLAv1i32_indexed,FMLAv1i64_indexed,FMLAv2i32_indexed)>; +def : InstRW<[CyWriteDMul, CyReadDMul], + (instrs FMADDDrrr,FMSUBDrrr,FNMADDDrrr,FNMSUBDrrr, + FMLAv2f64,FMLAv2i64_indexed, + FMLSv2f64,FMLSv2i64_indexed)>; + +def CyWritePMUL : SchedWriteRes<[CyUnitVD]> { let Latency = 3; } +def : InstRW<[CyWritePMUL], (instregex "PMULv", "PMULLv")>; + +//--- +// 7.9.6 Divide and Square Root +//--- + +// FDIV,FSQRT +// TODO: Add 64-bit variant with 19 cycle latency. +// TODO: Specialize FSQRT for longer latency. +def : WriteRes { + let Latency = 17; + let ResourceCycles = [2, 17]; +} + +def : InstRW<[CyWriteV4], (instregex "FRECPEv","FRECPXv","URECPEv","URSQRTEv")>; + +def WriteFRSQRTE : SchedWriteRes<[CyUnitVM]> { let Latency = 4; } +def : InstRW<[WriteFRSQRTE], (instregex "FRSQRTEv")>; + +def WriteFRECPS : SchedWriteRes<[CyUnitVM]> { let Latency = 8; } +def WriteFRSQRTS : SchedWriteRes<[CyUnitVM]> { let Latency = 10; } +def : InstRW<[WriteFRECPS], (instregex "FRECPSv")>; +def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>; + +//--- +// 7.9.7 Integer-FP Conversions +//--- + +// FCVT lengthen f16/s32 +def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>; + +// FCVT,FCVTN,FCVTXN +// SCVTF,UCVTF V,V +// FRINT(AIMNPXZ) V,V +def : WriteRes {let Latency = 4;} + +// SCVT/UCVT S/D, Rd = VLD5+V4: 9 cycles. +def CyWriteCvtToFPR : WriteSequence<[WriteVLD, CyWriteV4]>; +def : InstRW<[CyWriteCopyToFPR], (instregex "FCVT[AMNPZ][SU][SU][WX][SD]r")>; + +// FCVT Rd, S/D = V6+LD4: 10 cycles +def CyWriteCvtToGPR : WriteSequence<[CyWriteV6, WriteLD]>; +def : InstRW<[CyWriteCvtToGPR], (instregex "[SU]CVTF[SU][WX][SD]r")>; + +// FCVTL is a WriteV + +//--- +// 7.9.8-7.9.10 Cryptography, Data Transposition, Table Lookup +//--- + +def CyWriteCrypto2 : SchedWriteRes<[CyUnitVD]> {let Latency = 2;} +def : InstRW<[CyWriteCrypto2], (instrs AESIMCrr, AESMCrr, SHA1Hrr, + AESDrr, AESErr, SHA1SU1rr, SHA256SU0rr, + SHA1SU0rrr)>; + +def CyWriteCrypto3 : SchedWriteRes<[CyUnitVD]> {let Latency = 3;} +def : InstRW<[CyWriteCrypto3], (instrs SHA256SU1rrr)>; + +def CyWriteCrypto6 : SchedWriteRes<[CyUnitVD]> {let Latency = 6;} +def : InstRW<[CyWriteCrypto6], (instrs SHA1Crrr, SHA1Mrrr, SHA1Prrr, + SHA256Hrrr,SHA256H2rrr)>; + +// TRN,UZP,ZUP are WriteV. + +// TBL,TBX are WriteV. + +//--- +// 7.9.11-7.9.14 Load/Store, single element and paired +//--- + +// Loading into the vector unit takes 5 cycles vs 4 for integer loads. +def : WriteRes { + let Latency = 5; +} + +// Store-load forwarding is 4 cycles. +def : WriteRes { + let Latency = 4; +} + +// WriteVLDPair/VSTPair sequences are expanded by the target description. + +//--- +// 7.9.15 Load, element operations +//--- + +// Only the first WriteVLD and WriteAdr for writeback matches def operands. +// Subsequent WriteVLDs consume resources. Since all loaded values have the +// same latency, this is acceptable. + +// Vd is read 5 cycles after issuing the vector load. +def : ReadAdvance; + +def : InstRW<[WriteVLD], + (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLD, WriteAdr], + (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; + +// Register writes from the load's high half are fused micro-ops. +def : InstRW<[WriteVLD], + (instregex "LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[WriteVLD, WriteAdr], + (instregex "LD1Twov(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVLD, WriteVLD], + (instregex "LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLD, WriteAdr, WriteVLD], + (instregex "LD1Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLD, WriteVLD], + (instregex "LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[WriteVLD, WriteAdr, WriteVLD], + (instregex "LD1Threev(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVLD, WriteVLD, WriteVLD], + (instregex "LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD], + (instregex "LD1Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLD, WriteVLD], + (instregex "LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[WriteVLD, WriteAdr, WriteVLD], + (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVLD, WriteVLD, WriteVLD, WriteVLD], + (instregex "LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD, WriteVLD], + (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD], + (instregex "LD1i(8|16|32)$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr], + (instregex "LD1i(8|16|32)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD], (instrs LD1i64)>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr],(instrs LD1i64_POST)>; + +def : InstRW<[WriteVLDShuffle], + (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr], + (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[WriteVLDShuffle, WriteV], + (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV], + (instregex "LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle], + (instregex "LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle], + (instregex "LD2Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV], + (instregex "LD2i(8|16|32)$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV], + (instregex "LD2i(8|16|32)_POST")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV], + (instregex "LD2i64$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV], + (instregex "LD2i64_POST")>; + +def : InstRW<[WriteVLDShuffle, WriteV], + (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV], + (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV], + (instregex "LD3Threev(8b|4h|2s)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV], + (instregex "LD3Threev(8b|4h|2s)_POST")>; +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle], + (instregex "LD3Threev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle], + (instregex "LD3Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV], + (instregex "LD3i(8|16|32)$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV], + (instregex "LD3i(8|16|32)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV], + (instregex "LD3i64$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV], + (instregex "LD3i64_POST")>; + +def : InstRW<[WriteVLDShuffle, WriteV, WriteV], + (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV], + (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>; + +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV], + (instrs LD3Rv1d,LD3Rv2d)>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV], + (instrs LD3Rv2d_POST,LD3Rv2d_POST)>; + +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV], + (instregex "LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV], + (instregex "LD4Fourv(8b|4h|2s)_POST")>; +def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle, + WriteVLDPairShuffle, WriteVLDPairShuffle], + (instregex "LD4Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle, + WriteVLDPairShuffle, WriteVLDPairShuffle], + (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV], + (instregex "LD4i(8|16|32)$")>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV], + (instregex "LD4i(8|16|32)_POST")>; + + +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV], + (instrs LD4i64)>; +def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV], + (instrs LD4i64_POST)>; + +def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV], + (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV], + (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>; + +def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV], + (instrs LD4Rv1d,LD4Rv2d)>; +def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV], + (instrs LD4Rv1d_POST,LD4Rv2d_POST)>; + +//--- +// 7.9.16 Store, element operations +//--- + +// Only the WriteAdr for writeback matches a def operands. +// Subsequent WriteVLDs only consume resources. + +def : InstRW<[WriteVST], + (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVST], + (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle], + (instregex "ST1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle], + (instregex "ST1Twov(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVST, WriteVST], + (instregex "ST1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVST, WriteVST], + (instregex "ST1Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle, WriteVST], + (instregex "ST1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVST], + (instregex "ST1Threev(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVST, WriteVST, WriteVST], + (instregex "ST1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST], + (instregex "ST1Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST1Fourv(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], + (instregex "ST1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST, WriteVST], + (instregex "ST1Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle], (instregex "ST1i(8|16|32)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST1i(8|16|32)_POST")>; + +def : InstRW<[WriteVSTShuffle], (instrs ST1i64)>; +def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST1i64_POST)>; + +def : InstRW<[WriteVSTShuffle], + (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle], + (instregex "ST2Twov(8b|4h|2s)_POST")>; +def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST2Twov(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle], (instregex "ST2i(8|16|32)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST2i(8|16|32)_POST")>; +def : InstRW<[WriteVSTShuffle], (instrs ST2i64)>; +def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST2i64_POST)>; + +def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST3Threev(8b|4h|2s)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST3Threev(8b|4h|2s)_POST")>; +def : InstRW<[WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST3Threev(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle], + (instregex "ST3Threev(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTShuffle], (instregex "ST3i(8|16|32)$")>; +def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST3i(8|16|32)_POST")>; + +def :InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64)>; +def :InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64_POST)>; + +def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle], + (instregex "ST4Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle], + (instregex "ST4Fourv(8b|4h|2s|1d)_POST")>; +def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle, + WriteVSTPairShuffle, WriteVSTPairShuffle], + (instregex "ST4Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle, + WriteVSTPairShuffle, WriteVSTPairShuffle], + (instregex "ST4Fourv(16b|8h|4s|2d)_POST")>; + +def : InstRW<[WriteVSTPairShuffle], (instregex "ST4i(8|16|32)$")>; +def : InstRW<[WriteAdr, WriteVSTPairShuffle], (instregex "ST4i(8|16|32)_POST")>; + +def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST4i64)>; +def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>; + +//--- +// Unused SchedRead types +//--- + +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +} // SchedModel = CycloneModel diff --git a/lib/Target/AArch64/AArch64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td index ec8450b..eaa9110 100644 --- a/lib/Target/AArch64/AArch64Schedule.td +++ b/lib/Target/AArch64/AArch64Schedule.td @@ -1,4 +1,4 @@ -//===- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===// +//==-- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===// // // The LLVM Compiler Infrastructure // @@ -7,74 +7,98 @@ // //===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// Generic processor itineraries for legacy compatibility. - -def GenericItineraries : ProcessorItineraries<[], [], []>; - - -//===----------------------------------------------------------------------===// -// Base SchedReadWrite types - -// Basic ALU -def WriteALU : SchedWrite; // Generic: may contain shift and/or ALU operation -def WriteALUs : SchedWrite; // Shift only with no ALU operation -def ReadALU : SchedRead; // Operand not needed for shifting -def ReadALUs : SchedRead; // Operand needed for shifting - -// Multiply with optional accumulate -def WriteMAC : SchedWrite; -def ReadMAC : SchedRead; - -// Compares -def WriteCMP : SchedWrite; -def ReadCMP : SchedRead; - -// Division -def WriteDiv : SchedWrite; -def ReadDiv : SchedRead; - -// Loads -def WriteLd : SchedWrite; -def WritePreLd : SchedWrite; -def WriteVecLd : SchedWrite; -def ReadLd : SchedRead; -def ReadPreLd : SchedRead; -def ReadVecLd : SchedRead; - -// Stores -def WriteSt : SchedWrite; -def WriteVecSt : SchedWrite; -def ReadSt : SchedRead; -def ReadVecSt : SchedRead; - -// Branches -def WriteBr : SchedWrite; -def WriteBrL : SchedWrite; -def ReadBr : SchedRead; - -// Floating Point ALU -def WriteFPALU : SchedWrite; -def ReadFPALU : SchedRead; - -// Floating Point MAC, Mul, Div, Sqrt -// Most processors will simply send all of these down a dedicated pipe, but -// they're explicitly seperated here for flexibility of modeling later. May -// consider consolidating them into a single WriteFPXXXX type in the future. -def WriteFPMAC : SchedWrite; -def WriteFPMul : SchedWrite; -def WriteFPDiv : SchedWrite; -def WriteFPSqrt : SchedWrite; -def ReadFPMAC : SchedRead; -def ReadFPMul : SchedRead; -def ReadFPDiv : SchedRead; -def ReadFPSqrt : SchedRead; - -// Noop -def WriteNoop : SchedWrite; - - -//===----------------------------------------------------------------------===// -// Subtarget specific Machine Models. - -include "AArch64ScheduleA53.td" +// Define TII for use in SchedVariant Predicates. +// const MachineInstr *MI and const TargetSchedModel *SchedModel +// are defined by default. +def : PredicateProlog<[{ + const AArch64InstrInfo *TII = + static_cast(SchedModel->getInstrInfo()); + (void)TII; +}]>; + +// AArch64 Scheduler Definitions + +def WriteImm : SchedWrite; // MOVN, MOVZ +// TODO: Provide variants for MOV32/64imm Pseudos that dynamically +// select the correct sequence of WriteImms. + +def WriteI : SchedWrite; // ALU +def WriteISReg : SchedWrite; // ALU of Shifted-Reg +def WriteIEReg : SchedWrite; // ALU of Extended-Reg +def ReadI : SchedRead; // ALU +def ReadISReg : SchedRead; // ALU of Shifted-Reg +def ReadIEReg : SchedRead; // ALU of Extended-Reg +def WriteExtr : SchedWrite; // EXTR shifts a reg pair +def ReadExtrHi : SchedRead; // Read the high reg of the EXTR pair +def WriteIS : SchedWrite; // Shift/Scale +def WriteID32 : SchedWrite; // 32-bit Divide +def WriteID64 : SchedWrite; // 64-bit Divide +def ReadID : SchedRead; // 32/64-bit Divide +def WriteIM32 : SchedWrite; // 32-bit Multiply +def WriteIM64 : SchedWrite; // 64-bit Multiply +def ReadIM : SchedRead; // 32/64-bit Multiply +def ReadIMA : SchedRead; // 32/64-bit Multiply Accumulate +def WriteBr : SchedWrite; // Branch +def WriteBrReg : SchedWrite; // Indirect Branch + +def WriteLD : SchedWrite; // Load from base addr plus immediate offset +def WriteST : SchedWrite; // Store to base addr plus immediate offset +def WriteSTP : SchedWrite; // Store a register pair. +def WriteAdr : SchedWrite; // Address pre/post increment. + +def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled). +def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled). +def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST. + +// Predicate for determining when a shiftable register is shifted. +def RegShiftedPred : SchedPredicate<[{TII->hasShiftedReg(MI)}]>; + +// Predicate for determining when a extendedable register is extended. +def RegExtendedPred : SchedPredicate<[{TII->hasExtendedReg(MI)}]>; + +// ScaledIdxPred is true if a WriteLDIdx operand will be +// scaled. Subtargets can use this to dynamically select resources and +// latency for WriteLDIdx and ReadAdrBase. +def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(MI)}]>; + +// Serialized two-level address load. +// EXAMPLE: LOADGot +def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>; + +// Serialized two-level address lookup. +// EXAMPLE: MOVaddr... +def WriteAdrAdr : WriteSequence<[WriteAdr, WriteAdr]>; + +// The second register of a load-pair. +// LDP,LDPSW,LDNP,LDXP,LDAXP +def WriteLDHi : SchedWrite; + +// Store-exclusive is a store followed by a dependent load. +def WriteSTX : WriteSequence<[WriteST, WriteLD]>; + +def WriteSys : SchedWrite; // Long, variable latency system ops. +def WriteBarrier : SchedWrite; // Memory barrier. +def WriteHint : SchedWrite; // Hint instruction. + +def WriteF : SchedWrite; // General floating-point ops. +def WriteFCmp : SchedWrite; // Floating-point compare. +def WriteFCvt : SchedWrite; // Float conversion. +def WriteFCopy : SchedWrite; // Float-int register copy. +def WriteFImm : SchedWrite; // Floating-point immediate. +def WriteFMul : SchedWrite; // Floating-point multiply. +def WriteFDiv : SchedWrite; // Floating-point division. + +def WriteV : SchedWrite; // Vector ops. +def WriteVLD : SchedWrite; // Vector loads. +def WriteVST : SchedWrite; // Vector stores. + +// Read the unwritten lanes of the VLD's destination registers. +def ReadVLD : SchedRead; + +// Sequential vector load and shuffle. +def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteV]>; +def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>; + +// Store a shuffled vector. +def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>; +def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>; diff --git a/lib/Target/AArch64/AArch64ScheduleA53.td b/lib/Target/AArch64/AArch64ScheduleA53.td deleted file mode 100644 index 20a14e7..0000000 --- a/lib/Target/AArch64/AArch64ScheduleA53.td +++ /dev/null @@ -1,144 +0,0 @@ -//=- AArch64ScheduleA53.td - ARM Cortex-A53 Scheduling Definitions -*- tablegen -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the itinerary class data for the ARM Cortex A53 processors. -// -//===----------------------------------------------------------------------===// - -// ===---------------------------------------------------------------------===// -// The following definitions describe the simpler per-operand machine model. -// This works with MachineScheduler. See MCSchedModel.h for details. - -// Cortex-A53 machine model for scheduling and other instruction cost heuristics. -def CortexA53Model : SchedMachineModel { - let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. - let MinLatency = 1 ; // OperandCycles are interpreted as MinLatency. - let LoadLatency = 2; // Optimistic load latency assuming bypass. - // This is overriden by OperandCycles if the - // Itineraries are queried instead. - let MispredictPenalty = 9; // Based on "Cortex-A53 Software Optimisation - // Specification - Instruction Timings" - // v 1.0 Spreadsheet -} - - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available. - -// Modeling each pipeline as a ProcResource using the default BufferSize = -1. -// Cortex-A53 is in-order and therefore should be using BufferSize = 0. The -// current configuration performs better with the basic latencies provided so -// far. Will revisit BufferSize once the latency information is more accurate. - -let SchedModel = CortexA53Model in { - -def A53UnitALU : ProcResource<2>; // Int ALU -def A53UnitMAC : ProcResource<1>; // Int MAC -def A53UnitDiv : ProcResource<1>; // Int Division -def A53UnitLdSt : ProcResource<1>; // Load/Store -def A53UnitB : ProcResource<1>; // Branch -def A53UnitFPALU : ProcResource<1>; // FP ALU -def A53UnitFPMDS : ProcResource<1>; // FP Mult/Div/Sqrt - - -//===----------------------------------------------------------------------===// -// Subtarget-specific SchedWrite types which both map the ProcResources and -// set the latency. - -// Issue - Every instruction must consume an A53WriteIssue. Optionally, -// instructions that cannot be dual-issued will also include the -// A53WriteIssue2nd in their SchedRW list. That second WriteRes will -// ensure that a second issue slot is consumed. -def A53WriteIssue : SchedWriteRes<[]>; -def A53WriteIssue2nd : SchedWriteRes<[]> { let Latency = 0; } - -// ALU - These are reduced to 1 despite a true latency of 4 in order to easily -// model forwarding logic. Once forwarding is properly modelled, then -// they'll be corrected. -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -// MAC -def : WriteRes { let Latency = 4; } - -// Div -def : WriteRes { let Latency = 4; } - -// Load - Note: Vector loads take 1-5 cycles to issue. For the WriteVecLd below, -// choosing the median of 3 which makes the latency 6. May model this more -// carefully in the future. -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 6; } - -// Store - Note: Vector stores take 1-3 cycles to issue. For the ReadVecSt below, -// choosing the median of 2 which makes the latency 5. May model this more -// carefully in the future. -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 5; } - -// Branch -def : WriteRes; -def : WriteRes; - -// FP ALU -def : WriteRes {let Latency = 6; } - -// FP MAC, Mul, Div, Sqrt -// Using Double Precision numbers for now as a worst case. Additionally, not -// modeling the exact hazard but instead treating the whole pipe as a hazard. -// As an example VMUL, VMLA, and others are actually pipelined. VDIV and VSQRT -// have a total latency of 33 and 32 respectively but only a hazard of 29 and -// 28 (double-prescion example). -def : WriteRes { let Latency = 10; } -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 33; - let ResourceCycles = [29]; } -def : WriteRes { let Latency = 32; - let ResourceCycles = [28]; } - - -//===----------------------------------------------------------------------===// -// Subtarget-specific SchedRead types. - -// No forwarding defined for ReadALU yet. -def : ReadAdvance; - -// No forwarding defined for ReadCMP yet. -def : ReadAdvance; - -// No forwarding defined for ReadBr yet. -def : ReadAdvance; - -// No forwarding defined for ReadMAC yet. -def : ReadAdvance; - -// No forwarding defined for ReadDiv yet. -def : ReadAdvance; - -// No forwarding defined for ReadLd, ReadPreLd, ReadVecLd yet. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -// No forwarding defined for ReadSt and ReadVecSt yet. -def : ReadAdvance; -def : ReadAdvance; - -// No forwarding defined for ReadFPALU yet. -def : ReadAdvance; - -// No forwarding defined for ReadFPMAC/Mul/Div/Sqrt yet. -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; -def : ReadAdvance; - -} diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index 6bbe075..5c65b75 100644 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -11,15 +11,49 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-selectiondag-info" #include "AArch64TargetMachine.h" -#include "llvm/CodeGen/SelectionDAG.h" using namespace llvm; -AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const AArch64TargetMachine &TM) - : TargetSelectionDAGInfo(TM), - Subtarget(&TM.getSubtarget()) { -} +#define DEBUG_TYPE "aarch64-selectiondag-info" + +AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const TargetMachine &TM) + : TargetSelectionDAGInfo(TM), + Subtarget(&TM.getSubtarget()) {} + +AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {} + +SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( + SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo) const { + // Check to see if there is a specialized entry-point for memory zeroing. + ConstantSDNode *V = dyn_cast(Src); + ConstantSDNode *SizeValue = dyn_cast(Size); + const char *bzeroEntry = + (V && V->isNullValue()) ? Subtarget->getBZeroEntry() : nullptr; + // For small size (< 256), it is not beneficial to use bzero + // instead of memset. + if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) { + const AArch64TargetLowering &TLI = + *static_cast( + DAG.getTarget().getTargetLowering()); -AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() { + EVT IntPtr = TLI.getPointerTy(); + Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; + Entry.Ty = IntPtrTy; + Args.push_back(Entry); + Entry.Node = Size; + Args.push_back(Entry); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(bzeroEntry, IntPtr), &Args, 0) + .setDiscardResult(); + std::pair CallResult = TLI.LowerCallTo(CLI); + return CallResult.second; + } + return SDValue(); } diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h index d412ed2..8381f99 100644 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -11,22 +11,27 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_AARCH64SELECTIONDAGINFO_H -#define LLVM_AARCH64SELECTIONDAGINFO_H +#ifndef AArch64SELECTIONDAGINFO_H +#define AArch64SELECTIONDAGINFO_H #include "llvm/Target/TargetSelectionDAGInfo.h" namespace llvm { -class AArch64TargetMachine; - class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo { + /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can + /// make the right decision when generating code for different targets. const AArch64Subtarget *Subtarget; + public: - explicit AArch64SelectionDAGInfo(const AArch64TargetMachine &TM); + explicit AArch64SelectionDAGInfo(const TargetMachine &TM); ~AArch64SelectionDAGInfo(); -}; + SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, + SDValue Dst, SDValue Src, SDValue Size, + unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo) const override; +}; } #endif diff --git a/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp new file mode 100644 index 0000000..45f8ddb --- /dev/null +++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp @@ -0,0 +1,168 @@ +//===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass identifies floating point stores that should not be combined into +// store pairs. Later we may do the same for floating point loads. +// ===---------------------------------------------------------------------===// + +#include "AArch64InstrInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-stp-suppress" + +namespace { +class AArch64StorePairSuppress : public MachineFunctionPass { + const AArch64InstrInfo *TII; + const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + MachineFunction *MF; + TargetSchedModel SchedModel; + MachineTraceMetrics *Traces; + MachineTraceMetrics::Ensemble *MinInstr; + +public: + static char ID; + AArch64StorePairSuppress() : MachineFunctionPass(ID) {} + + virtual const char *getPassName() const override { + return "AArch64 Store Pair Suppression"; + } + + bool runOnMachineFunction(MachineFunction &F) override; + +private: + bool shouldAddSTPToBlock(const MachineBasicBlock *BB); + + bool isNarrowFPStore(const MachineInstr &MI); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +char AArch64StorePairSuppress::ID = 0; +} // anonymous + +FunctionPass *llvm::createAArch64StorePairSuppressPass() { + return new AArch64StorePairSuppress(); +} + +/// Return true if an STP can be added to this block without increasing the +/// critical resource height. STP is good to form in Ld/St limited blocks and +/// bad to form in float-point limited blocks. This is true independent of the +/// critical path. If the critical path is longer than the resource height, the +/// extra vector ops can limit physreg renaming. Otherwise, it could simply +/// oversaturate the vector units. +bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) { + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + + MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB); + unsigned ResLength = BBTrace.getResourceLength(); + + // Get the machine model's scheduling class for STPQi. + // Bypass TargetSchedule's SchedClass resolution since we only have an opcode. + unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass(); + const MCSchedClassDesc *SCDesc = + SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx); + + // If a subtarget does not define resources for STPQi, bail here. + if (SCDesc->isValid() && !SCDesc->isVariant()) { + unsigned ResLenWithSTP = BBTrace.getResourceLength( + ArrayRef(), SCDesc); + if (ResLenWithSTP > ResLength) { + DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber() + << " resources " << ResLength << " -> " << ResLenWithSTP + << "\n"); + return false; + } + } + return true; +} + +/// Return true if this is a floating-point store smaller than the V reg. On +/// cyclone, these require a vector shuffle before storing a pair. +/// Ideally we would call getMatchingPairOpcode() and have the machine model +/// tell us if it's profitable with no cpu knowledge here. +/// +/// FIXME: We plan to develop a decent Target abstraction for simple loads and +/// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer. +bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + case AArch64::STRSui: + case AArch64::STRDui: + case AArch64::STURSi: + case AArch64::STURDi: + return true; + } +} + +bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + TII = static_cast(MF->getTarget().getInstrInfo()); + TRI = MF->getTarget().getRegisterInfo(); + MRI = &MF->getRegInfo(); + const TargetSubtargetInfo &ST = + MF->getTarget().getSubtarget(); + SchedModel.init(*ST.getSchedModel(), &ST, TII); + + Traces = &getAnalysis(); + MinInstr = nullptr; + + DEBUG(dbgs() << "*** " << getPassName() << ": " << MF->getName() << '\n'); + + if (!SchedModel.hasInstrSchedModel()) { + DEBUG(dbgs() << " Skipping pass: no machine model present.\n"); + return false; + } + + // Check for a sequence of stores to the same base address. We don't need to + // precisely determine whether a store pair can be formed. But we do want to + // filter out most situations where we can't form store pairs to avoid + // computing trace metrics in those cases. + for (auto &MBB : *MF) { + bool SuppressSTP = false; + unsigned PrevBaseReg = 0; + for (auto &MI : MBB) { + if (!isNarrowFPStore(MI)) + continue; + unsigned BaseReg; + unsigned Offset; + if (TII->getLdStBaseRegImmOfs(&MI, BaseReg, Offset, TRI)) { + if (PrevBaseReg == BaseReg) { + // If this block can take STPs, skip ahead to the next block. + if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent())) + break; + // Otherwise, continue unpairing the stores in this block. + DEBUG(dbgs() << "Unpairing store " << MI << "\n"); + SuppressSTP = true; + TII->suppressLdStPair(&MI); + } + PrevBaseReg = BaseReg; + } else + PrevBaseReg = 0; + } + } + // This pass just sets some internal MachineMemOperand flags. It can't really + // invalidate anything. + return false; +} diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index 9140bbd..cd69994 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -1,4 +1,4 @@ -//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information --------------===// +//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,57 +7,110 @@ // //===----------------------------------------------------------------------===// // -// This file implements the AArch64 specific subclass of TargetSubtargetInfo. +// This file implements the AArch64 specific subclass of TargetSubtarget. // //===----------------------------------------------------------------------===// +#include "AArch64InstrInfo.h" #include "AArch64Subtarget.h" -#include "AArch64RegisterInfo.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/GlobalValue.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-subtarget" -#define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR +#define GET_SUBTARGETINFO_TARGET_DESC #include "AArch64GenSubtargetInfo.inc" -using namespace llvm; - -// Pin the vtable to this file. -void AArch64Subtarget::anchor() {} +static cl::opt +EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " + "converter pass"), cl::init(true), cl::Hidden); -AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS, - bool LittleEndian) +AArch64Subtarget::AArch64Subtarget(const std::string &TT, + const std::string &CPU, + const std::string &FS, bool LittleEndian) : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), - HasFPARMv8(false), HasNEON(false), HasCrypto(false), TargetTriple(TT), - CPUString(CPU), IsLittleEndian(LittleEndian) { + HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false), + HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU), + TargetTriple(TT), IsLittleEndian(LittleEndian) { + // Determine default and user-specified characteristics + + if (CPUString.empty()) + CPUString = "generic"; - initializeSubtargetFeatures(CPU, FS); + ParseSubtargetFeatures(CPUString, FS); } -void AArch64Subtarget::initializeSubtargetFeatures(StringRef CPU, - StringRef FS) { - if (CPU.empty()) - CPUString = "generic"; +/// ClassifyGlobalReference - Find the target operand flags that describe +/// how a global value should be referenced for the current subtarget. +unsigned char +AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, + const TargetMachine &TM) const { + + // Determine whether this is a reference to a definition or a declaration. + // Materializable GVs (in JIT lazy compilation mode) do not require an extra + // load from stub. + bool isDecl = GV->hasAvailableExternallyLinkage(); + if (GV->isDeclaration() && !GV->isMaterializable()) + isDecl = true; + + // MachO large model always goes via a GOT, simply to get a single 8-byte + // absolute relocation on all global addresses. + if (TM.getCodeModel() == CodeModel::Large && isTargetMachO()) + return AArch64II::MO_GOT; + + // The small code mode's direct accesses use ADRP, which cannot necessarily + // produce the value 0 (if the code is above 4GB). Therefore they must use the + // GOT. + if (TM.getCodeModel() == CodeModel::Small && GV->isWeakForLinker() && isDecl) + return AArch64II::MO_GOT; + + // If symbol visibility is hidden, the extra load is not needed if + // the symbol is definitely defined in the current translation unit. - std::string FullFS = FS; - if (CPUString == "generic") { - // Enable FP by default. - if (FullFS.empty()) - FullFS = "+fp-armv8"; + // The handling of non-hidden symbols in PIC mode is rather target-dependent: + // + On MachO, if the symbol is defined in this module the GOT can be + // skipped. + // + On ELF, the R_AARCH64_COPY relocation means that even symbols actually + // defined could end up in unexpected places. Use a GOT. + if (TM.getRelocationModel() != Reloc::Static && GV->hasDefaultVisibility()) { + if (isTargetMachO()) + return (isDecl || GV->isWeakForLinker()) ? AArch64II::MO_GOT + : AArch64II::MO_NO_FLAG; else - FullFS = "+fp-armv8," + FullFS; + // No need to go through the GOT for local symbols on ELF. + return GV->hasLocalLinkage() ? AArch64II::MO_NO_FLAG : AArch64II::MO_GOT; } - ParseSubtargetFeatures(CPU, FullFS); + return AArch64II::MO_NO_FLAG; } -bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV, - Reloc::Model RelocM) const { - if (RelocM == Reloc::Static) - return false; +/// This function returns the name of a function which has an interface +/// like the non-standard bzero function, if such a function exists on +/// the current subtarget and it is considered prefereable over +/// memset with zero passed as the second argument. Otherwise it +/// returns null. +const char *AArch64Subtarget::getBZeroEntry() const { + // Prefer bzero on Darwin only. + if(isTargetDarwin()) + return "bzero"; + + return nullptr; +} + +void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, + MachineInstr *begin, MachineInstr *end, + unsigned NumRegionInstrs) const { + // LNT run (at least on Cyclone) showed reasonably significant gains for + // bi-directional scheduling. 253.perlbmk. + Policy.OnlyTopDown = false; + Policy.OnlyBottomUp = false; +} - return !GV->hasLocalLinkage() && !GV->hasHiddenVisibility(); +bool AArch64Subtarget::enableEarlyIfConversion() const { + return EnableEarlyIfConvert; } diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index 68c6c4b..590ea05 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -1,4 +1,4 @@ -//==-- AArch64Subtarget.h - Define Subtarget for the AArch64 ---*- C++ -*--===// +//===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===// // // The LLVM Compiler Infrastructure // @@ -7,29 +7,27 @@ // //===----------------------------------------------------------------------===// // -// This file declares the AArch64 specific subclass of TargetSubtargetInfo. +// This file declares the AArch64 specific subclass of TargetSubtarget. // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_AARCH64_SUBTARGET_H -#define LLVM_TARGET_AARCH64_SUBTARGET_H +#ifndef AArch64SUBTARGET_H +#define AArch64SUBTARGET_H -#include "llvm/ADT/Triple.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include "AArch64RegisterInfo.h" +#include #define GET_SUBTARGETINFO_HEADER #include "AArch64GenSubtargetInfo.inc" -#include - namespace llvm { -class StringRef; class GlobalValue; +class StringRef; class AArch64Subtarget : public AArch64GenSubtargetInfo { - virtual void anchor(); protected: - enum ARMProcFamilyEnum {Others, CortexA53, CortexA57}; + enum ARMProcFamilyEnum {Others, CortexA53, CortexA57, Cyclone}; /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others. ARMProcFamilyEnum ARMProcFamily; @@ -37,47 +35,76 @@ protected: bool HasFPARMv8; bool HasNEON; bool HasCrypto; + bool HasCRC; - /// TargetTriple - What processor and OS we're targeting. - Triple TargetTriple; + // HasZeroCycleRegMove - Has zero-cycle register mov instructions. + bool HasZeroCycleRegMove; + + // HasZeroCycleZeroing - Has zero-cycle zeroing instructions. + bool HasZeroCycleZeroing; /// CPUString - String name of used CPU. std::string CPUString; - /// IsLittleEndian - The target is Little Endian - bool IsLittleEndian; + /// TargetTriple - What processor and OS we're targeting. + Triple TargetTriple; -private: - void initializeSubtargetFeatures(StringRef CPU, StringRef FS); + /// IsLittleEndian - Is the target little endian? + bool IsLittleEndian; public: /// This constructor initializes the data members to match that /// of the specified triple. - /// - AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS, - bool LittleEndian); + AArch64Subtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, bool LittleEndian); - virtual bool enableMachineScheduler() const { - return true; - } - - /// ParseSubtargetFeatures - Parses features string setting specified - /// subtarget options. Definition of function is auto generated by tblgen. - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + bool enableMachineScheduler() const override { return true; } - bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const; + bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; } - bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } - bool isTargetLinux() const { return TargetTriple.isOSLinux(); } + bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } bool hasFPARMv8() const { return HasFPARMv8; } bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } + bool hasCRC() const { return HasCRC; } + + bool isLittleEndian() const { return IsLittleEndian; } + + bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } + + bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } + + bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } + + bool isCyclone() const { return CPUString == "cyclone"; } + + /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size + /// that still makes it profitable to inline the call. + unsigned getMaxInlineSizeThreshold() const { return 64; } + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + /// ClassifyGlobalReference - Find the target operand flags that describe + /// how a global value should be referenced for the current subtarget. + unsigned char ClassifyGlobalReference(const GlobalValue *GV, + const TargetMachine &TM) const; + + /// This function returns the name of a function which has an interface + /// like the non-standard bzero function, if such a function exists on + /// the current subtarget and it is considered prefereable over + /// memset with zero passed as the second argument. Otherwise it + /// returns null. + const char *getBZeroEntry() const; - bool isLittle() const { return IsLittleEndian; } + void overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin, + MachineInstr *end, + unsigned NumRegionInstrs) const override; - const std::string & getCPUString() const { return CPUString; } + bool enableEarlyIfConversion() const override; }; } // End llvm namespace -#endif // LLVM_TARGET_AARCH64_SUBTARGET_H +#endif // AArch64SUBTARGET_H diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index d9c990d..0b5dd2f 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -7,41 +7,80 @@ // //===----------------------------------------------------------------------===// // -// This file contains the implementation of the AArch64TargetMachine -// methods. Principally just setting up the passes needed to generate correct -// code on this architecture. // //===----------------------------------------------------------------------===// #include "AArch64.h" #include "AArch64TargetMachine.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/PassManager.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" - +#include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; +static cl::opt +EnableCCMP("aarch64-ccmp", cl::desc("Enable the CCMP formation pass"), + cl::init(true), cl::Hidden); + +static cl::opt +EnableStPairSuppress("aarch64-stp-suppress", cl::desc("Suppress STP for AArch64"), + cl::init(true), cl::Hidden); + +static cl::opt +EnableAdvSIMDScalar("aarch64-simd-scalar", cl::desc("Enable use of AdvSIMD scalar" + " integer instructions"), cl::init(false), cl::Hidden); + +static cl::opt +EnablePromoteConstant("aarch64-promote-const", cl::desc("Enable the promote " + "constant pass"), cl::init(true), cl::Hidden); + +static cl::opt +EnableCollectLOH("aarch64-collect-loh", cl::desc("Enable the pass that emits the" + " linker optimization hints (LOH)"), cl::init(true), + cl::Hidden); + +static cl::opt +EnableDeadRegisterElimination("aarch64-dead-def-elimination", cl::Hidden, + cl::desc("Enable the pass that removes dead" + " definitons and replaces stores to" + " them with stores to the zero" + " register"), + cl::init(true)); + +static cl::opt +EnableLoadStoreOpt("aarch64-load-store-opt", cl::desc("Enable the load/store pair" + " optimization pass"), cl::init(true), cl::Hidden); + extern "C" void LLVMInitializeAArch64Target() { + // Register the target. RegisterTargetMachine X(TheAArch64leTarget); RegisterTargetMachine Y(TheAArch64beTarget); + + RegisterTargetMachine Z(TheARM64leTarget); + RegisterTargetMachine W(TheARM64beTarget); } +/// TargetMachine ctor - Create an AArch64 architecture model. +/// AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool LittleEndian) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, LittleEndian), - InstrInfo(Subtarget), - DL(LittleEndian ? - "e-m:e-i64:64-i128:128-n32:64-S128" : - "E-m:e-i64:64-i128:128-n32:64-S128"), - TLInfo(*this), - TSInfo(*this), - FrameLowering(Subtarget) { + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, LittleEndian), + // This nested ternary is horrible, but DL needs to be properly + // initialized + // before TLInfo is constructed. + DL(Subtarget.isTargetMachO() + ? "e-m:o-i64:64-i128:128-n32:64-S128" + : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128" + : "E-m:e-i64:64-i128:128-n32:64-S128")), + InstrInfo(Subtarget), TLInfo(*this), FrameLowering(*this, Subtarget), + TSInfo(*this) { initAsmInfo(); } @@ -63,50 +102,107 @@ AArch64beTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL) : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} -void AArch64TargetMachine::addAnalysisPasses(PassManagerBase &PM) { - // Add first the target-independent BasicTTI pass, then our AArch64 pass. This - // allows the AArch64 pass to delegate to the target independent layer when - // appropriate. - PM.add(createBasicTargetTransformInfoPass(this)); - PM.add(createAArch64TargetTransformInfoPass(this)); -} - namespace { /// AArch64 Code Generator Pass Configuration Options. class AArch64PassConfig : public TargetPassConfig { public: AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) {} AArch64TargetMachine &getAArch64TargetMachine() const { return getTM(); } - const AArch64Subtarget &getAArch64Subtarget() const { - return *getAArch64TargetMachine().getSubtargetImpl(); - } - - virtual bool addInstSelector(); - virtual bool addPreEmitPass(); + bool addPreISel() override; + bool addInstSelector() override; + bool addILPOpts() override; + bool addPreRegAlloc() override; + bool addPostRegAlloc() override; + bool addPreSched2() override; + bool addPreEmitPass() override; }; } // namespace +void AArch64TargetMachine::addAnalysisPasses(PassManagerBase &PM) { + // Add first the target-independent BasicTTI pass, then our AArch64 pass. This + // allows the AArch64 pass to delegate to the target independent layer when + // appropriate. + PM.add(createBasicTargetTransformInfoPass(this)); + PM.add(createAArch64TargetTransformInfoPass(this)); +} + TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { return new AArch64PassConfig(this, PM); } -bool AArch64PassConfig::addPreEmitPass() { - addPass(&UnpackMachineBundlesID); - addPass(createAArch64BranchFixupPass()); - return true; +// Pass Pipeline Configuration +bool AArch64PassConfig::addPreISel() { + // Run promote constant before global merge, so that the promoted constants + // get a chance to be merged + if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant) + addPass(createAArch64PromoteConstantPass()); + if (TM->getOptLevel() != CodeGenOpt::None) + addPass(createGlobalMergePass(TM)); + if (TM->getOptLevel() != CodeGenOpt::None) + addPass(createAArch64AddressTypePromotionPass()); + + // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg + // ourselves. + addPass(createAtomicExpandLoadLinkedPass(TM)); + + return false; } bool AArch64PassConfig::addInstSelector() { - addPass(createAArch64ISelDAG(getAArch64TargetMachine(), getOptLevel())); + addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel())); - // For ELF, cleanup any local-dynamic TLS accesses. - if (getAArch64Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None) + // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many + // references to _TLS_MODULE_BASE_ as possible. + if (TM->getSubtarget().isTargetELF() && + getOptLevel() != CodeGenOpt::None) addPass(createAArch64CleanupLocalDynamicTLSPass()); return false; } + +bool AArch64PassConfig::addILPOpts() { + if (EnableCCMP) + addPass(createAArch64ConditionalCompares()); + addPass(&EarlyIfConverterID); + if (EnableStPairSuppress) + addPass(createAArch64StorePairSuppressPass()); + return true; +} + +bool AArch64PassConfig::addPreRegAlloc() { + // Use AdvSIMD scalar instructions whenever profitable. + if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) + addPass(createAArch64AdvSIMDScalar()); + return true; +} + +bool AArch64PassConfig::addPostRegAlloc() { + // Change dead register definitions to refer to the zero register. + if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination) + addPass(createAArch64DeadRegisterDefinitions()); + return true; +} + +bool AArch64PassConfig::addPreSched2() { + // Expand some pseudo instructions to allow proper scheduling. + addPass(createAArch64ExpandPseudoPass()); + // Use load/store pair instructions when possible. + if (TM->getOptLevel() != CodeGenOpt::None && EnableLoadStoreOpt) + addPass(createAArch64LoadStoreOptimizationPass()); + return true; +} + +bool AArch64PassConfig::addPreEmitPass() { + // Relax conditional branch instructions if they're otherwise out of + // range of their destination. + addPass(createAArch64BranchRelaxation()); + if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH && + TM->getSubtarget().isTargetMachO()) + addPass(createAArch64CollectLOHPass()); + return true; +} diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h index 4297c92..079b19b 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.h +++ b/lib/Target/AArch64/AArch64TargetMachine.h @@ -1,4 +1,4 @@ -//=== AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-===// +//==-- AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -11,60 +11,60 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_AARCH64TARGETMACHINE_H -#define LLVM_AARCH64TARGETMACHINE_H +#ifndef AArch64TARGETMACHINE_H +#define AArch64TARGETMACHINE_H -#include "AArch64FrameLowering.h" -#include "AArch64ISelLowering.h" #include "AArch64InstrInfo.h" -#include "AArch64SelectionDAGInfo.h" +#include "AArch64ISelLowering.h" #include "AArch64Subtarget.h" +#include "AArch64FrameLowering.h" +#include "AArch64SelectionDAGInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/MC/MCStreamer.h" namespace llvm { class AArch64TargetMachine : public LLVMTargetMachine { - AArch64Subtarget Subtarget; - AArch64InstrInfo InstrInfo; - const DataLayout DL; - AArch64TargetLowering TLInfo; - AArch64SelectionDAGInfo TSInfo; - AArch64FrameLowering FrameLowering; +protected: + AArch64Subtarget Subtarget; + +private: + const DataLayout DL; + AArch64InstrInfo InstrInfo; + AArch64TargetLowering TLInfo; + AArch64FrameLowering FrameLowering; + AArch64SelectionDAGInfo TSInfo; public: AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool LittleEndian); + CodeGenOpt::Level OL, bool IsLittleEndian); - const AArch64InstrInfo *getInstrInfo() const { - return &InstrInfo; + const AArch64Subtarget *getSubtargetImpl() const override { + return &Subtarget; } - - const AArch64FrameLowering *getFrameLowering() const { + const AArch64TargetLowering *getTargetLowering() const override { + return &TLInfo; + } + const DataLayout *getDataLayout() const override { return &DL; } + const AArch64FrameLowering *getFrameLowering() const override { return &FrameLowering; } - - const AArch64TargetLowering *getTargetLowering() const { - return &TLInfo; + const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; } + const AArch64RegisterInfo *getRegisterInfo() const override { + return &InstrInfo.getRegisterInfo(); } - - const AArch64SelectionDAGInfo *getSelectionDAGInfo() const { + const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } - const AArch64Subtarget *getSubtargetImpl() const { return &Subtarget; } - - const DataLayout *getDataLayout() const { return &DL; } - - const TargetRegisterInfo *getRegisterInfo() const { - return &InstrInfo.getRegisterInfo(); - } - TargetPassConfig *createPassConfig(PassManagerBase &PM); + // Pass Pipeline Configuration + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - virtual void addAnalysisPasses(PassManagerBase &PM); + /// \brief Register AArch64 analysis passes with a pass manager. + void addAnalysisPasses(PassManagerBase &PM) override; }; // AArch64leTargetMachine - AArch64 little endian target machine. @@ -72,8 +72,8 @@ public: class AArch64leTargetMachine : public AArch64TargetMachine { virtual void anchor(); public: - AArch64leTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, + AArch64leTargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; @@ -83,12 +83,12 @@ public: class AArch64beTargetMachine : public AArch64TargetMachine { virtual void anchor(); public: - AArch64beTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, + AArch64beTargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; -} // End llvm namespace +} // end namespace llvm #endif diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp index 663d619..4069038 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp +++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp @@ -6,19 +6,47 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file deals with any AArch64 specific requirements on object files. -// -//===----------------------------------------------------------------------===// - #include "AArch64TargetObjectFile.h" - +#include "AArch64TargetMachine.h" +#include "llvm/IR/Mangler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/Dwarf.h" using namespace llvm; +using namespace dwarf; -void -AArch64ElfTargetObjectFile::Initialize(MCContext &Ctx, - const TargetMachine &TM) { +void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx, + const TargetMachine &TM) { TargetLoweringObjectFileELF::Initialize(Ctx, TM); InitializeELF(TM.Options.UseInitArray); } + +const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference( + const GlobalValue *GV, unsigned Encoding, Mangler &Mang, + const TargetMachine &TM, MachineModuleInfo *MMI, + MCStreamer &Streamer) const { + // On Darwin, we can reference dwarf symbols with foo@GOT-., which + // is an indirect pc-relative reference. The default implementation + // won't reference using the GOT, so we need this target-specific + // version. + if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) { + const MCSymbol *Sym = TM.getSymbol(GV, Mang); + const MCExpr *Res = + MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext()); + MCSymbol *PCSym = getContext().CreateTempSymbol(); + Streamer.EmitLabel(PCSym); + const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext()); + return MCBinaryExpr::CreateSub(Res, PC, getContext()); + } + + return TargetLoweringObjectFileMachO::getTTypeGlobalReference( + GV, Encoding, Mang, TM, MMI, Streamer); +} + +MCSymbol *AArch64_MachoTargetObjectFile::getCFIPersonalitySymbol( + const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, + MachineModuleInfo *MMI) const { + return TM.getSymbol(GV, Mang); +} diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h index 0f00a78..de63cb4 100644 --- a/lib/Target/AArch64/AArch64TargetObjectFile.h +++ b/lib/Target/AArch64/AArch64TargetObjectFile.h @@ -1,4 +1,4 @@ -//===-- AArch64TargetObjectFile.h - AArch64 Object Info ---------*- C++ -*-===// +//===-- AArch64TargetObjectFile.h - AArch64 Object Info -*- C++ ---------*-===// // // The LLVM Compiler Infrastructure // @@ -6,25 +6,34 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file deals with any AArch64 specific requirements on object files. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H -#define LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H +#ifndef LLVM_TARGET_AArch64_TARGETOBJECTFILE_H +#define LLVM_TARGET_AArch64_TARGETOBJECTFILE_H #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" namespace llvm { +class AArch64TargetMachine; + +/// This implementation is used for AArch64 ELF targets (Linux in particular). +class AArch64_ELFTargetObjectFile : public TargetLoweringObjectFileELF { + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; +}; + +/// AArch64_MachoTargetObjectFile - This TLOF implementation is used for Darwin. +class AArch64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { +public: + const MCExpr *getTTypeGlobalReference(const GlobalValue *GV, + unsigned Encoding, Mangler &Mang, + const TargetMachine &TM, + MachineModuleInfo *MMI, + MCStreamer &Streamer) const override; - /// AArch64ElfTargetObjectFile - This implementation is used for ELF - /// AArch64 targets. - class AArch64ElfTargetObjectFile : public TargetLoweringObjectFileELF { - virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); - }; + MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang, + const TargetMachine &TM, + MachineModuleInfo *MMI) const override; +}; } // end namespace llvm diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index e2a1647..33e482a 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1,4 +1,4 @@ -//===- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass ---------===// +//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass --------===// // // The LLVM Compiler Infrastructure // @@ -14,15 +14,18 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "aarch64tti" #include "AArch64.h" #include "AArch64TargetMachine.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" +#include using namespace llvm; +#define DEBUG_TYPE "aarch64tti" + // Declare the pass initialization routine locally as target-specific passes // don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. @@ -33,25 +36,28 @@ void initializeAArch64TTIPass(PassRegistry &); namespace { class AArch64TTI final : public ImmutablePass, public TargetTransformInfo { + const AArch64TargetMachine *TM; const AArch64Subtarget *ST; const AArch64TargetLowering *TLI; + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + public: - AArch64TTI() : ImmutablePass(ID), ST(0), TLI(0) { + AArch64TTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { llvm_unreachable("This pass cannot be directly constructed"); } AArch64TTI(const AArch64TargetMachine *TM) - : ImmutablePass(ID), ST(TM->getSubtargetImpl()), + : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), TLI(TM->getTargetLowering()) { initializeAArch64TTIPass(*PassRegistry::getPassRegistry()); } - virtual void initializePass() override { - pushTTIStack(this); - } + void initializePass() override { pushTTIStack(this); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + void getAnalysisUsage(AnalysisUsage &AU) const override { TargetTransformInfo::getAnalysisUsage(AU); } @@ -59,31 +65,37 @@ public: static char ID; /// Provide necessary pointer adjustments for the two base classes. - virtual void *getAdjustedAnalysisPointer(const void *ID) override { + void *getAdjustedAnalysisPointer(const void *ID) override { if (ID == &TargetTransformInfo::ID) - return (TargetTransformInfo*)this; + return (TargetTransformInfo *)this; return this; } /// \name Scalar TTI Implementations /// @{ + unsigned getIntImmCost(int64_t Val) const; + unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; + unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty) const override; + unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty) const override; + PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; /// @} - /// \name Vector TTI Implementations /// @{ - unsigned getNumberOfRegisters(bool Vector) const { + unsigned getNumberOfRegisters(bool Vector) const override { if (Vector) { if (ST->hasNEON()) return 32; return 0; } - return 32; + return 31; } - unsigned getRegisterBitWidth(bool Vector) const { + unsigned getRegisterBitWidth(bool Vector) const override { if (Vector) { if (ST->hasNEON()) return 128; @@ -92,6 +104,26 @@ public: return 64; } + unsigned getMaximumUnrollFactor() const override { return 2; } + + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const + override; + + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const + override; + + unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind Opd1Info = OK_AnyValue, + OperandValueKind Opd2Info = OK_AnyValue) const + override; + + unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override; + + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const + override; + + unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) const override; /// @} }; @@ -105,3 +137,328 @@ ImmutablePass * llvm::createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM) { return new AArch64TTI(TM); } + +/// \brief Calculate the cost of materializing a 64-bit value. This helper +/// method might only calculate a fraction of a larger immediate. Therefore it +/// is valid to return a cost of ZERO. +unsigned AArch64TTI::getIntImmCost(int64_t Val) const { + // Check if the immediate can be encoded within an instruction. + if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64)) + return 0; + + if (Val < 0) + Val = ~Val; + + // Calculate how many moves we will need to materialize this constant. + unsigned LZ = countLeadingZeros((uint64_t)Val); + return (64 - LZ + 15) / 16; +} + +/// \brief Calculate the cost of materializing the given constant. +unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + if (BitSize == 0) + return ~0U; + + // Sign-extend all constants to a multiple of 64-bit. + APInt ImmVal = Imm; + if (BitSize & 0x3f) + ImmVal = Imm.sext((BitSize + 63) & ~0x3fU); + + // Split the constant into 64-bit chunks and calculate the cost for each + // chunk. + unsigned Cost = 0; + for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) { + APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64); + int64_t Val = Tmp.getSExtValue(); + Cost += getIntImmCost(Val); + } + // We need at least one instruction to materialze the constant. + return std::max(1U, Cost); +} + +unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty) const { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TCC_Free; + + unsigned ImmIdx = ~0U; + switch (Opcode) { + default: + return TCC_Free; + case Instruction::GetElementPtr: + // Always hoist the base address of a GetElementPtr. + if (Idx == 0) + return 2 * TCC_Basic; + return TCC_Free; + case Instruction::Store: + ImmIdx = 0; + break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::ICmp: + ImmIdx = 1; + break; + // Always return TCC_Free for the shift value of a shift instruction. + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + if (Idx == 1) + return TCC_Free; + break; + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::IntToPtr: + case Instruction::PtrToInt: + case Instruction::BitCast: + case Instruction::PHI: + case Instruction::Call: + case Instruction::Select: + case Instruction::Ret: + case Instruction::Load: + break; + } + + if (Idx == ImmIdx) { + unsigned NumConstants = (BitSize + 63) / 64; + unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty); + return (Cost <= NumConstants * TCC_Basic) + ? static_cast(TCC_Free) : Cost; + } + return AArch64TTI::getIntImmCost(Imm, Ty); +} + +unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) const { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. + if (BitSize == 0) + return TCC_Free; + + switch (IID) { + default: + return TCC_Free; + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + if (Idx == 1) { + unsigned NumConstants = (BitSize + 63) / 64; + unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty); + return (Cost <= NumConstants * TCC_Basic) + ? static_cast(TCC_Free) : Cost; + } + break; + case Intrinsic::experimental_stackmap: + if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TCC_Free; + break; + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TCC_Free; + break; + } + return AArch64TTI::getIntImmCost(Imm, Ty); +} + +AArch64TTI::PopcntSupportKind +AArch64TTI::getPopcntSupport(unsigned TyWidth) const { + assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); + if (TyWidth == 32 || TyWidth == 64) + return PSK_FastHardware; + // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount. + return PSK_Software; +} + +unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + EVT SrcTy = TLI->getValueType(Src); + EVT DstTy = TLI->getValueType(Dst); + + if (!SrcTy.isSimple() || !DstTy.isSimple()) + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); + + static const TypeConversionCostTblEntry ConversionTbl[] = { + // LowerVectorINT_TO_FP: + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, + // LowerVectorFP_TO_INT + { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, + { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 }, + { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, + { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 }, + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 }, + { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 4 }, + { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 4 }, + { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 }, + { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4 }, + { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 }, + { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 }, + }; + + int Idx = ConvertCostTableLookup( + ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(), + SrcTy.getSimpleVT()); + if (Idx != -1) + return ConversionTbl[Idx].Cost; + + return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); +} + +unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + assert(Val->isVectorTy() && "This must be a vector type"); + + if (Index != -1U) { + // Legalize the type. + std::pair LT = TLI->getTypeLegalizationCost(Val); + + // This type is legalized to a scalar type. + if (!LT.second.isVector()) + return 0; + + // The type may be split. Normalize the index to the new type. + unsigned Width = LT.second.getVectorNumElements(); + Index = Index % Width; + + // The element at index zero is already inside the vector. + if (Index == 0) + return 0; + } + + // All other insert/extracts cost this much. + return 2; +} + +unsigned AArch64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind Opd1Info, + OperandValueKind Opd2Info) const { + // Legalize the type. + std::pair LT = TLI->getTypeLegalizationCost(Ty); + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + + switch (ISD) { + default: + return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Opd1Info, + Opd2Info); + case ISD::ADD: + case ISD::MUL: + case ISD::XOR: + case ISD::OR: + case ISD::AND: + // These nodes are marked as 'custom' for combining purposes only. + // We know that they are legal. See LowerAdd in ISelLowering. + return 1 * LT.first; + } +} + +unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { + // Address computations in vectorized code with non-consecutive addresses will + // likely result in more instructions compared to scalar code where the + // computation can more often be merged into the index mode. The resulting + // extra micro-ops can significantly decrease throughput. + unsigned NumVectorInstToHideOverhead = 10; + + if (Ty->isVectorTy() && IsComplex) + return NumVectorInstToHideOverhead; + + // In many cases the address computation is not merged into the instruction + // addressing mode. + return 1; +} + +unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + + int ISD = TLI->InstructionOpcodeToISD(Opcode); + // We don't lower vector selects well that are wider than the register width. + if (ValTy->isVectorTy() && ISD == ISD::SELECT) { + // We would need this many instructions to hide the scalarization happening. + unsigned AmortizationCost = 20; + static const TypeConversionCostTblEntry + VectorSelectTbl[] = { + { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost }, + { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost }, + { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost }, + { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost }, + { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost }, + { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost } + }; + + EVT SelCondTy = TLI->getValueType(CondTy); + EVT SelValTy = TLI->getValueType(ValTy); + if (SelCondTy.isSimple() && SelValTy.isSimple()) { + int Idx = + ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(), + SelValTy.getSimpleVT()); + if (Idx != -1) + return VectorSelectTbl[Idx].Cost; + } + } + return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + +unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + std::pair LT = TLI->getTypeLegalizationCost(Src); + + if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 && + Src->getVectorElementType()->isIntegerTy(64)) { + // Unaligned stores are extremely inefficient. We don't split + // unaligned v2i64 stores because the negative impact that has shown in + // practice on inlined memcpy code. + // We make v2i64 stores expensive so that we will only vectorize if there + // are 6 other instructions getting vectorized. + unsigned AmortizationCost = 6; + + return LT.first * 2 * AmortizationCost; + } + + if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) && + Src->getVectorNumElements() < 8) { + // We scalarize the loads/stores because there is not v.4b register and we + // have to promote the elements to v.4h. + unsigned NumVecElts = Src->getVectorNumElements(); + unsigned NumVectorizableInstsToAmortize = NumVecElts * 2; + // We generate 2 instructions per vector element. + return NumVectorizableInstsToAmortize * NumVecElts * 2; + } + + return LT.first; +} diff --git a/lib/Target/AArch64/Android.mk b/lib/Target/AArch64/Android.mk index 144c2d3..d0a50da 100644 --- a/lib/Target/AArch64/Android.mk +++ b/lib/Target/AArch64/Android.mk @@ -3,31 +3,41 @@ LOCAL_PATH := $(call my-dir) arm64_codegen_TBLGEN_TABLES := \ AArch64GenRegisterInfo.inc \ AArch64GenInstrInfo.inc \ - AArch64GenCodeEmitter.inc \ - AArch64GenMCCodeEmitter.inc \ - AArch64GenMCPseudoLowering.inc \ AArch64GenAsmWriter.inc \ - AArch64GenAsmMatcher.inc \ + AArch64GenAsmWriter1.inc \ AArch64GenDAGISel.inc \ - AArch64GenFastISel.inc \ AArch64GenCallingConv.inc \ + AArch64GenAsmMatcher.inc \ AArch64GenSubtargetInfo.inc \ - AArch64GenDisassemblerTables.inc + AArch64GenMCCodeEmitter.inc \ + AArch64GenFastISel.inc \ + AArch64GenDisassemblerTables.inc \ + AArch64GenMCPseudoLowering.inc \ arm64_codegen_SRC_FILES := \ + AArch64AddressTypePromotion.cpp \ + AArch64AdvSIMDScalarPass.cpp \ AArch64AsmPrinter.cpp \ + AArch64BranchRelaxation.cpp \ + AArch64CleanupLocalDynamicTLSPass.cpp \ + AArch64CollectLOH.cpp \ + AArch64ConditionalCompares.cpp \ + AArch64DeadRegisterDefinitionsPass.cpp \ + AArch64ExpandPseudoInsts.cpp \ + AArch64FastISel.cpp \ AArch64FrameLowering.cpp \ - AArch64ISelDAGToDAG.cpp \ - AArch64MachineFunctionInfo.cpp \ - AArch64RegisterInfo.cpp \ - AArch64Subtarget.cpp \ - AArch64TargetObjectFile.cpp \ - AArch64BranchFixupPass.cpp \ AArch64InstrInfo.cpp \ + AArch64ISelDAGToDAG.cpp \ AArch64ISelLowering.cpp \ + AArch64LoadStoreOptimizer.cpp \ AArch64MCInstLower.cpp \ + AArch64PromoteConstant.cpp \ + AArch64RegisterInfo.cpp \ AArch64SelectionDAGInfo.cpp \ + AArch64StorePairSuppress.cpp \ + AArch64Subtarget.cpp \ AArch64TargetMachine.cpp \ + AArch64TargetObjectFile.cpp \ AArch64TargetTransformInfo.cpp # For the host diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index e933ec1..65b77c5 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -6,34 +6,31 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file contains the (GNU-style) assembly parser for the AArch64 -// architecture. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64MCExpr.h" #include "Utils/AArch64BaseInfo.h" -#include "llvm/ADT/APFloat.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCTargetAsmParser.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" - +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include using namespace llvm; namespace { @@ -41,21 +38,74 @@ namespace { class AArch64Operand; class AArch64AsmParser : public MCTargetAsmParser { +public: + typedef SmallVectorImpl OperandVector; + +private: + StringRef Mnemonic; ///< Instruction mnemonic. MCSubtargetInfo &STI; MCAsmParser &Parser; + MCAsmParser &getParser() const { return Parser; } + MCAsmLexer &getLexer() const { return Parser.getLexer(); } + + SMLoc getLoc() const { return Parser.getTok().getLoc(); } + + bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands); + AArch64CC::CondCode parseCondCodeString(StringRef Cond); + bool parseCondCode(OperandVector &Operands, bool invertCondCode); + int tryParseRegister(); + int tryMatchVectorRegister(StringRef &Kind, bool expected); + bool parseRegister(OperandVector &Operands); + bool parseSymbolicImmVal(const MCExpr *&ImmVal); + bool parseVectorList(OperandVector &Operands); + bool parseOperand(OperandVector &Operands, bool isCondCode, + bool invertCondCode); + + void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } + bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } + bool showMatchError(SMLoc Loc, unsigned ErrCode); + + bool parseDirectiveWord(unsigned Size, SMLoc L); + bool parseDirectiveTLSDescCall(SMLoc L); + + bool parseDirectiveLOH(StringRef LOH, SMLoc L); + + bool validateInstruction(MCInst &Inst, SmallVectorImpl &Loc); + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, MCStreamer &Out, + unsigned &ErrorInfo, + bool MatchingInlineAsm) override; +/// @name Auto-generated Match Functions +/// { + #define GET_ASSEMBLER_HEADER #include "AArch64GenAsmMatcher.inc" + /// } + + OperandMatchResultTy tryParseOptionalShiftExtend(OperandVector &Operands); + OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands); + OperandMatchResultTy tryParseMRSSystemRegister(OperandVector &Operands); + OperandMatchResultTy tryParseSysReg(OperandVector &Operands); + OperandMatchResultTy tryParseSysCROperand(OperandVector &Operands); + OperandMatchResultTy tryParsePrefetch(OperandVector &Operands); + OperandMatchResultTy tryParseAdrpLabel(OperandVector &Operands); + OperandMatchResultTy tryParseAdrLabel(OperandVector &Operands); + OperandMatchResultTy tryParseFPImm(OperandVector &Operands); + OperandMatchResultTy tryParseAddSubImm(OperandVector &Operands); + OperandMatchResultTy tryParseGPR64sp0Operand(OperandVector &Operands); + bool tryParseVectorRegister(OperandVector &Operands); + public: enum AArch64MatchResultTy { - Match_FirstAArch64 = FIRST_TARGET_MATCH_RESULT_TY, + Match_InvalidSuffix = FIRST_TARGET_MATCH_RESULT_TY, #define GET_OPERAND_DIAGNOSTIC_TYPES #include "AArch64GenAsmMatcher.inc" }; - AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &MII) + const MCInstrInfo &MII, + const MCTargetOptions &Options) : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { MCAsmParserExtension::Initialize(_Parser); @@ -63,191 +113,197 @@ public: setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } - // These are the public interface of the MCTargetAsmParser - bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, - SMLoc NameLoc, - SmallVectorImpl &Operands); - - bool ParseDirective(AsmToken DirectiveID); - bool ParseDirectiveTLSDescCall(SMLoc L); - bool ParseDirectiveWord(unsigned Size, SMLoc L); - - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl &Operands, - MCStreamer&Out, unsigned &ErrorInfo, - bool MatchingInlineAsm); - - // The rest of the sub-parsers have more freedom over interface: they return - // an OperandMatchResultTy because it's less ambiguous than true/false or - // -1/0/1 even if it is more verbose - OperandMatchResultTy - ParseOperand(SmallVectorImpl &Operands, - StringRef Mnemonic); - - OperandMatchResultTy ParseImmediate(const MCExpr *&ExprVal); - - OperandMatchResultTy ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind); - - OperandMatchResultTy - ParseNEONLane(SmallVectorImpl &Operands, - uint32_t NumLanes); - - OperandMatchResultTy - ParseRegister(SmallVectorImpl &Operands, - uint32_t &NumLanes); - - OperandMatchResultTy - ParseImmWithLSLOperand(SmallVectorImpl &Operands); - - OperandMatchResultTy - ParseCondCodeOperand(SmallVectorImpl &Operands); - - OperandMatchResultTy - ParseCRxOperand(SmallVectorImpl &Operands); - - OperandMatchResultTy - ParseFPImmOperand(SmallVectorImpl &Operands); - - OperandMatchResultTy - ParseFPImm0AndImm0Operand( SmallVectorImpl &Operands); - - template OperandMatchResultTy - ParseNamedImmOperand(SmallVectorImpl &Operands) { - return ParseNamedImmOperand(SomeNamedImmMapper(), Operands); - } - - OperandMatchResultTy - ParseNamedImmOperand(const NamedImmMapper &Mapper, - SmallVectorImpl &Operands); - - OperandMatchResultTy - ParseLSXAddressOperand(SmallVectorImpl &Operands); - - OperandMatchResultTy - ParseShiftExtend(SmallVectorImpl &Operands); - - OperandMatchResultTy - ParseSysRegOperand(SmallVectorImpl &Operands); - - bool TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc, StringRef &Layout, - SMLoc &LayoutLoc); - - OperandMatchResultTy ParseVectorList(SmallVectorImpl &); - - bool validateInstruction(MCInst &Inst, - const SmallVectorImpl &Operands); - - /// Scan the next token (which had better be an identifier) and determine - /// whether it represents a general-purpose or vector register. It returns - /// true if an identifier was found and populates its reference arguments. It - /// does not consume the token. - bool - IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, StringRef &LayoutSpec, - SMLoc &LayoutLoc) const; - + SMLoc NameLoc, OperandVector &Operands) override; + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; + bool ParseDirective(AsmToken DirectiveID) override; + unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, + unsigned Kind) override; + + static bool classifySymbolRef(const MCExpr *Expr, + AArch64MCExpr::VariantKind &ELFRefKind, + MCSymbolRefExpr::VariantKind &DarwinRefKind, + int64_t &Addend); }; - -} +} // end anonymous namespace namespace { -/// Instances of this class represent a parsed AArch64 machine instruction. +/// AArch64Operand - Instances of this class represent a parsed AArch64 machine +/// instruction. class AArch64Operand : public MCParsedAsmOperand { private: enum KindTy { - k_ImmWithLSL, // #uimm {, LSL #amt } - k_CondCode, // eq/ne/... - k_FPImmediate, // Limited-precision floating-point imm - k_Immediate, // Including expressions referencing symbols + k_Immediate, + k_ShiftedImm, + k_CondCode, k_Register, + k_VectorList, + k_VectorIndex, + k_Token, + k_SysReg, + k_SysCR, + k_Prefetch, k_ShiftExtend, - k_VectorList, // A sequential list of 1 to 4 registers. - k_SysReg, // The register operand of MRS and MSR instructions - k_Token, // The mnemonic; other raw tokens the auto-generated - k_WrappedRegister // Load/store exclusive permit a wrapped register. + k_FPImm, + k_Barrier } Kind; SMLoc StartLoc, EndLoc; - struct ImmWithLSLOp { - const MCExpr *Val; - unsigned ShiftAmount; - bool ImplicitAmount; + struct TokOp { + const char *Data; + unsigned Length; + bool IsSuffix; // Is the operand actually a suffix on the mnemonic. }; - struct CondCodeOp { - A64CC::CondCodes Code; + struct RegOp { + unsigned RegNum; + bool isVector; }; - struct FPImmOp { - double Val; + struct VectorListOp { + unsigned RegNum; + unsigned Count; + unsigned NumElements; + unsigned ElementKind; + }; + + struct VectorIndexOp { + unsigned Val; }; struct ImmOp { const MCExpr *Val; }; - struct RegOp { - unsigned RegNum; + struct ShiftedImmOp { + const MCExpr *Val; + unsigned ShiftAmount; }; - struct ShiftExtendOp { - A64SE::ShiftExtSpecifiers ShiftType; - unsigned Amount; - bool ImplicitAmount; + struct CondCodeOp { + AArch64CC::CondCode Code; }; - // A vector register list is a sequential list of 1 to 4 registers. - struct VectorListOp { - unsigned RegNum; - unsigned Count; - A64Layout::VectorLayout Layout; + struct FPImmOp { + unsigned Val; // Encoded 8-bit representation. + }; + + struct BarrierOp { + unsigned Val; // Not the enum since not all values have names. }; struct SysRegOp { const char *Data; unsigned Length; + uint64_t FeatureBits; // We need to pass through information about which + // core we are compiling for so that the SysReg + // Mappers can appropriately conditionalize. }; - struct TokOp { - const char *Data; - unsigned Length; + struct SysCRImmOp { + unsigned Val; + }; + + struct PrefetchOp { + unsigned Val; + }; + + struct ShiftExtendOp { + AArch64_AM::ShiftExtendType Type; + unsigned Amount; + bool HasExplicitAmount; + }; + + struct ExtendOp { + unsigned Val; }; union { - struct ImmWithLSLOp ImmWithLSL; - struct CondCodeOp CondCode; - struct FPImmOp FPImm; - struct ImmOp Imm; + struct TokOp Tok; struct RegOp Reg; - struct ShiftExtendOp ShiftExtend; struct VectorListOp VectorList; + struct VectorIndexOp VectorIndex; + struct ImmOp Imm; + struct ShiftedImmOp ShiftedImm; + struct CondCodeOp CondCode; + struct FPImmOp FPImm; + struct BarrierOp Barrier; struct SysRegOp SysReg; - struct TokOp Tok; + struct SysCRImmOp SysCRImm; + struct PrefetchOp Prefetch; + struct ShiftExtendOp ShiftExtend; }; - AArch64Operand(KindTy K, SMLoc S, SMLoc E) - : MCParsedAsmOperand(), Kind(K), StartLoc(S), EndLoc(E) {} + // Keep the MCContext around as the MCExprs may need manipulated during + // the add<>Operands() calls. + MCContext &Ctx; + + AArch64Operand(KindTy K, MCContext &_Ctx) + : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {} public: - AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand() { + AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) { + Kind = o.Kind; + StartLoc = o.StartLoc; + EndLoc = o.EndLoc; + switch (Kind) { + case k_Token: + Tok = o.Tok; + break; + case k_Immediate: + Imm = o.Imm; + break; + case k_ShiftedImm: + ShiftedImm = o.ShiftedImm; + break; + case k_CondCode: + CondCode = o.CondCode; + break; + case k_FPImm: + FPImm = o.FPImm; + break; + case k_Barrier: + Barrier = o.Barrier; + break; + case k_Register: + Reg = o.Reg; + break; + case k_VectorList: + VectorList = o.VectorList; + break; + case k_VectorIndex: + VectorIndex = o.VectorIndex; + break; + case k_SysReg: + SysReg = o.SysReg; + break; + case k_SysCR: + SysCRImm = o.SysCRImm; + break; + case k_Prefetch: + Prefetch = o.Prefetch; + break; + case k_ShiftExtend: + ShiftExtend = o.ShiftExtend; + break; + } } - SMLoc getStartLoc() const { return StartLoc; } - SMLoc getEndLoc() const { return EndLoc; } - void print(raw_ostream&) const; - void dump() const; + /// getStartLoc - Get the location of the first token of this operand. + SMLoc getStartLoc() const override { return StartLoc; } + /// getEndLoc - Get the location of the last token of this operand. + SMLoc getEndLoc() const override { return EndLoc; } StringRef getToken() const { assert(Kind == k_Token && "Invalid access!"); return StringRef(Tok.Data, Tok.Length); } - unsigned getReg() const { - assert((Kind == k_Register || Kind == k_WrappedRegister) - && "Invalid access!"); - return Reg.RegNum; + bool isTokenSuffix() const { + assert(Kind == k_Token && "Invalid access!"); + return Tok.IsSuffix; } const MCExpr *getImm() const { @@ -255,731 +311,779 @@ public: return Imm.Val; } - A64CC::CondCodes getCondCode() const { - assert(Kind == k_CondCode && "Invalid access!"); - return CondCode.Code; + const MCExpr *getShiftedImmVal() const { + assert(Kind == k_ShiftedImm && "Invalid access!"); + return ShiftedImm.Val; } - static bool isNonConstantExpr(const MCExpr *E, - AArch64MCExpr::VariantKind &Variant) { - if (const AArch64MCExpr *A64E = dyn_cast(E)) { - Variant = A64E->getKind(); - return true; - } else if (!isa(E)) { - Variant = AArch64MCExpr::VK_AARCH64_None; - return true; - } - - return false; + unsigned getShiftedImmShift() const { + assert(Kind == k_ShiftedImm && "Invalid access!"); + return ShiftedImm.ShiftAmount; } - bool isCondCode() const { return Kind == k_CondCode; } - bool isToken() const { return Kind == k_Token; } - bool isReg() const { return Kind == k_Register; } - bool isImm() const { return Kind == k_Immediate; } - bool isMem() const { return false; } - bool isFPImm() const { return Kind == k_FPImmediate; } - bool isShiftOrExtend() const { return Kind == k_ShiftExtend; } - bool isSysReg() const { return Kind == k_SysReg; } - bool isImmWithLSL() const { return Kind == k_ImmWithLSL; } - bool isWrappedReg() const { return Kind == k_WrappedRegister; } - - bool isAddSubImmLSL0() const { - if (!isImmWithLSL()) return false; - if (ImmWithLSL.ShiftAmount != 0) return false; - - AArch64MCExpr::VariantKind Variant; - if (isNonConstantExpr(ImmWithLSL.Val, Variant)) { - return Variant == AArch64MCExpr::VK_AARCH64_LO12 - || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12 - || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC - || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12 - || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC - || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC_LO12; - } - - // Otherwise it should be a real immediate in range: - const MCConstantExpr *CE = cast(ImmWithLSL.Val); - return CE->getValue() >= 0 && CE->getValue() <= 0xfff; + AArch64CC::CondCode getCondCode() const { + assert(Kind == k_CondCode && "Invalid access!"); + return CondCode.Code; } - bool isAddSubImmLSL12() const { - if (!isImmWithLSL()) return false; - if (ImmWithLSL.ShiftAmount != 12) return false; - - AArch64MCExpr::VariantKind Variant; - if (isNonConstantExpr(ImmWithLSL.Val, Variant)) { - return Variant == AArch64MCExpr::VK_AARCH64_DTPREL_HI12 - || Variant == AArch64MCExpr::VK_AARCH64_TPREL_HI12; - } - - // Otherwise it should be a real immediate in range: - const MCConstantExpr *CE = cast(ImmWithLSL.Val); - return CE->getValue() >= 0 && CE->getValue() <= 0xfff; + unsigned getFPImm() const { + assert(Kind == k_FPImm && "Invalid access!"); + return FPImm.Val; } - template bool isAddrRegExtend() const { - if (!isShiftOrExtend()) return false; - - A64SE::ShiftExtSpecifiers Ext = ShiftExtend.ShiftType; - if (RmSize == 32 && !(Ext == A64SE::UXTW || Ext == A64SE::SXTW)) - return false; - - if (RmSize == 64 && !(Ext == A64SE::LSL || Ext == A64SE::SXTX)) - return false; - - return ShiftExtend.Amount == Log2_32(MemSize) || ShiftExtend.Amount == 0; + unsigned getBarrier() const { + assert(Kind == k_Barrier && "Invalid access!"); + return Barrier.Val; } - bool isAdrpLabel() const { - if (!isImm()) return false; - - AArch64MCExpr::VariantKind Variant; - if (isNonConstantExpr(getImm(), Variant)) { - return Variant == AArch64MCExpr::VK_AARCH64_None - || Variant == AArch64MCExpr::VK_AARCH64_GOT - || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL - || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC; - } - - return isLabel<21, 4096>(); + unsigned getReg() const override { + assert(Kind == k_Register && "Invalid access!"); + return Reg.RegNum; } - template bool isBitfieldWidth() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) return false; - - return CE->getValue() >= 1 && CE->getValue() <= RegWidth; + unsigned getVectorListStart() const { + assert(Kind == k_VectorList && "Invalid access!"); + return VectorList.RegNum; } - template - bool isCVTFixedPos() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) return false; - - return CE->getValue() >= 1 && CE->getValue() <= RegWidth; + unsigned getVectorListCount() const { + assert(Kind == k_VectorList && "Invalid access!"); + return VectorList.Count; } - bool isFMOVImm() const { - if (!isFPImm()) return false; - - APFloat RealVal(FPImm.Val); - uint32_t ImmVal; - return A64Imms::isFPImm(RealVal, ImmVal); + unsigned getVectorIndex() const { + assert(Kind == k_VectorIndex && "Invalid access!"); + return VectorIndex.Val; } - bool isFPZero() const { - if (!isFPImm()) return false; - - APFloat RealVal(FPImm.Val); - return RealVal.isPosZero(); + StringRef getSysReg() const { + assert(Kind == k_SysReg && "Invalid access!"); + return StringRef(SysReg.Data, SysReg.Length); } - template - bool isLabel() const { - if (!isImm()) return false; - - if (dyn_cast(Imm.Val)) { - return true; - } else if (const MCConstantExpr *CE = dyn_cast(Imm.Val)) { - int64_t Val = CE->getValue(); - int64_t Min = - (scale * (1LL << (field_width - 1))); - int64_t Max = scale * ((1LL << (field_width - 1)) - 1); - return (Val % scale) == 0 && Val >= Min && Val <= Max; - } - - // N.b. this disallows explicit relocation specifications via an - // AArch64MCExpr. Users needing that behaviour - return false; + uint64_t getSysRegFeatureBits() const { + assert(Kind == k_SysReg && "Invalid access!"); + return SysReg.FeatureBits; } - bool isLane1() const { - if (!isImm()) return false; - - // Because it's come through custom assembly parsing, it must always be a - // constant expression. - return cast(getImm())->getValue() == 1; + unsigned getSysCR() const { + assert(Kind == k_SysCR && "Invalid access!"); + return SysCRImm.Val; } - bool isLoadLitLabel() const { - if (!isImm()) return false; - - AArch64MCExpr::VariantKind Variant; - if (isNonConstantExpr(getImm(), Variant)) { - return Variant == AArch64MCExpr::VK_AARCH64_None - || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL; - } - - return isLabel<19, 4>(); + unsigned getPrefetch() const { + assert(Kind == k_Prefetch && "Invalid access!"); + return Prefetch.Val; } - template bool isLogicalImm() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast(Imm.Val); - if (!CE) return false; - - uint32_t Bits; - return A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits); + AArch64_AM::ShiftExtendType getShiftExtendType() const { + assert(Kind == k_ShiftExtend && "Invalid access!"); + return ShiftExtend.Type; } - template bool isLogicalImmMOV() const { - if (!isLogicalImm()) return false; - - const MCConstantExpr *CE = cast(Imm.Val); - - // The move alias for ORR is only valid if the immediate cannot be - // represented with a move (immediate) instruction; they take priority. - int UImm16, Shift; - return !A64Imms::isMOVZImm(RegWidth, CE->getValue(), UImm16, Shift) - && !A64Imms::isMOVNImm(RegWidth, CE->getValue(), UImm16, Shift); + unsigned getShiftExtendAmount() const { + assert(Kind == k_ShiftExtend && "Invalid access!"); + return ShiftExtend.Amount; } - template - bool isOffsetUImm12() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast(getImm()); - - // Assume they know what they're doing for now if they've given us a - // non-constant expression. In principle we could check for ridiculous - // things that can't possibly work or relocations that would almost - // certainly break resulting code. - if (!CE) - return true; - - int64_t Val = CE->getValue(); - - // Must be a multiple of the access size in bytes. - if ((Val & (MemSize - 1)) != 0) return false; - - // Must be 12-bit unsigned - return Val >= 0 && Val <= 0xfff * MemSize; + bool hasShiftExtendAmount() const { + assert(Kind == k_ShiftExtend && "Invalid access!"); + return ShiftExtend.HasExplicitAmount; } - template - bool isShift() const { - if (!isShiftOrExtend()) return false; - - if (ShiftExtend.ShiftType != SHKind) + bool isImm() const override { return Kind == k_Immediate; } + bool isMem() const override { return false; } + bool isSImm9() const { + if (!isImm()) return false; - - return is64Bit ? ShiftExtend.Amount <= 63 : ShiftExtend.Amount <= 31; - } - - bool isMOVN32Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_SABS_G0, - AArch64MCExpr::VK_AARCH64_SABS_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G0, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G0, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(32, PermittedModifiers, NumModifiers); - } - - bool isMOVN64Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_SABS_G0, - AArch64MCExpr::VK_AARCH64_SABS_G1, - AArch64MCExpr::VK_AARCH64_SABS_G2, - AArch64MCExpr::VK_AARCH64_DTPREL_G2, - AArch64MCExpr::VK_AARCH64_DTPREL_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G0, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G2, - AArch64MCExpr::VK_AARCH64_TPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G0, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(64, PermittedModifiers, NumModifiers); + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = MCE->getValue(); + return (Val >= -256 && Val < 256); } - - - bool isMOVZ32Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_ABS_G0, - AArch64MCExpr::VK_AARCH64_ABS_G1, - AArch64MCExpr::VK_AARCH64_SABS_G0, - AArch64MCExpr::VK_AARCH64_SABS_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G0, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G0, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(32, PermittedModifiers, NumModifiers); - } - - bool isMOVZ64Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_ABS_G0, - AArch64MCExpr::VK_AARCH64_ABS_G1, - AArch64MCExpr::VK_AARCH64_ABS_G2, - AArch64MCExpr::VK_AARCH64_ABS_G3, - AArch64MCExpr::VK_AARCH64_SABS_G0, - AArch64MCExpr::VK_AARCH64_SABS_G1, - AArch64MCExpr::VK_AARCH64_SABS_G2, - AArch64MCExpr::VK_AARCH64_DTPREL_G2, - AArch64MCExpr::VK_AARCH64_DTPREL_G1, - AArch64MCExpr::VK_AARCH64_DTPREL_G0, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G2, - AArch64MCExpr::VK_AARCH64_TPREL_G1, - AArch64MCExpr::VK_AARCH64_TPREL_G0, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(64, PermittedModifiers, NumModifiers); + bool isSImm7s4() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = MCE->getValue(); + return (Val >= -256 && Val <= 252 && (Val & 3) == 0); } - - bool isMOVK32Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_ABS_G0_NC, - AArch64MCExpr::VK_AARCH64_ABS_G1_NC, - AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC, - AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC, - AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, - AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(32, PermittedModifiers, NumModifiers); - } - - bool isMOVK64Imm() const { - static const AArch64MCExpr::VariantKind PermittedModifiers[] = { - AArch64MCExpr::VK_AARCH64_ABS_G0_NC, - AArch64MCExpr::VK_AARCH64_ABS_G1_NC, - AArch64MCExpr::VK_AARCH64_ABS_G2_NC, - AArch64MCExpr::VK_AARCH64_ABS_G3, - AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC, - AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC, - AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC, - AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, - AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, - }; - const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); - - return isMoveWideImm(64, PermittedModifiers, NumModifiers); + bool isSImm7s8() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = MCE->getValue(); + return (Val >= -512 && Val <= 504 && (Val & 7) == 0); } - - bool isMoveWideImm(unsigned RegWidth, - const AArch64MCExpr::VariantKind *PermittedModifiers, - unsigned NumModifiers) const { - if (!isImmWithLSL()) return false; - - if (ImmWithLSL.ShiftAmount % 16 != 0) return false; - if (ImmWithLSL.ShiftAmount >= RegWidth) return false; - - AArch64MCExpr::VariantKind Modifier; - if (isNonConstantExpr(ImmWithLSL.Val, Modifier)) { - // E.g. "#:abs_g0:sym, lsl #16" makes no sense. - if (!ImmWithLSL.ImplicitAmount) return false; - - for (unsigned i = 0; i < NumModifiers; ++i) - if (PermittedModifiers[i] == Modifier) return true; - + bool isSImm7s16() const { + if (!isImm()) return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = MCE->getValue(); + return (Val >= -1024 && Val <= 1008 && (Val & 15) == 0); + } + + bool isSymbolicUImm12Offset(const MCExpr *Expr, unsigned Scale) const { + AArch64MCExpr::VariantKind ELFRefKind; + MCSymbolRefExpr::VariantKind DarwinRefKind; + int64_t Addend; + if (!AArch64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, + Addend)) { + // If we don't understand the expression, assume the best and + // let the fixup and relocation code deal with it. + return true; } - const MCConstantExpr *CE = dyn_cast(ImmWithLSL.Val); - return CE && CE->getValue() >= 0 && CE->getValue() <= 0xffff; - } - - template - bool isMoveWideMovAlias() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) return false; - - int UImm16, Shift; - uint64_t Value = CE->getValue(); - - // If this is a 32-bit instruction then all bits above 32 should be the - // same: either of these is fine because signed/unsigned values should be - // permitted. - if (RegWidth == 32) { - if ((Value >> 32) != 0 && (Value >> 32) != 0xffffffff) - return false; - - Value &= 0xffffffffULL; + if (DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF || + ELFRefKind == AArch64MCExpr::VK_LO12 || + ELFRefKind == AArch64MCExpr::VK_GOT_LO12 || + ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12 || + ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12_NC || + ELFRefKind == AArch64MCExpr::VK_TPREL_LO12 || + ELFRefKind == AArch64MCExpr::VK_TPREL_LO12_NC || + ELFRefKind == AArch64MCExpr::VK_GOTTPREL_LO12_NC || + ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12) { + // Note that we don't range-check the addend. It's adjusted modulo page + // size when converted, so there is no "out of range" condition when using + // @pageoff. + return Addend >= 0 && (Addend % Scale) == 0; + } else if (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF || + DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) { + // @gotpageoff/@tlvppageoff can only be used directly, not with an addend. + return Addend == 0; } - return isValidImm(RegWidth, Value, UImm16, Shift); + return false; } - bool isMSRWithReg() const { - if (!isSysReg()) return false; + template bool isUImm12Offset() const { + if (!isImm()) + return false; - bool IsKnownRegister; - StringRef Name(SysReg.Data, SysReg.Length); - A64SysReg::MSRMapper().fromString(Name, IsKnownRegister); + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return isSymbolicUImm12Offset(getImm(), Scale); - return IsKnownRegister; + int64_t Val = MCE->getValue(); + return (Val % Scale) == 0 && Val >= 0 && (Val / Scale) < 0x1000; } - bool isMSRPState() const { - if (!isSysReg()) return false; - - bool IsKnownRegister; - StringRef Name(SysReg.Data, SysReg.Length); - A64PState::PStateMapper().fromString(Name, IsKnownRegister); - - return IsKnownRegister; + bool isImm0_7() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = MCE->getValue(); + return (Val >= 0 && Val < 8); } - - bool isMRS() const { - if (!isSysReg()) return false; - - // First check against specific MSR-only (write-only) registers - bool IsKnownRegister; - StringRef Name(SysReg.Data, SysReg.Length); - A64SysReg::MRSMapper().fromString(Name, IsKnownRegister); - - return IsKnownRegister; + bool isImm1_8() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = MCE->getValue(); + return (Val > 0 && Val < 9); } - - bool isPRFM() const { - if (!isImm()) return false; - - const MCConstantExpr *CE = dyn_cast(getImm()); - - if (!CE) + bool isImm0_15() const { + if (!isImm()) return false; - - return CE->getValue() >= 0 && CE->getValue() <= 31; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = MCE->getValue(); + return (Val >= 0 && Val < 16); } - - template bool isRegExtend() const { - if (!isShiftOrExtend()) return false; - - if (ShiftExtend.ShiftType != SHKind) + bool isImm1_16() const { + if (!isImm()) return false; - - return ShiftExtend.Amount <= 4; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = MCE->getValue(); + return (Val > 0 && Val < 17); } - - bool isRegExtendLSL() const { - if (!isShiftOrExtend()) return false; - - if (ShiftExtend.ShiftType != A64SE::LSL) + bool isImm0_31() const { + if (!isImm()) return false; - - return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = MCE->getValue(); + return (Val >= 0 && Val < 32); } - - // if 0 < value <= w, return true - bool isShrFixedWidth(int w) const { + bool isImm1_31() const { if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) return false; - int64_t Value = CE->getValue(); - return Value > 0 && Value <= w; + int64_t Val = MCE->getValue(); + return (Val >= 1 && Val < 32); } - - bool isShrImm8() const { return isShrFixedWidth(8); } - - bool isShrImm16() const { return isShrFixedWidth(16); } - - bool isShrImm32() const { return isShrFixedWidth(32); } - - bool isShrImm64() const { return isShrFixedWidth(64); } - - // if 0 <= value < w, return true - bool isShlFixedWidth(int w) const { + bool isImm1_32() const { if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) return false; - int64_t Value = CE->getValue(); - return Value >= 0 && Value < w; + int64_t Val = MCE->getValue(); + return (Val >= 1 && Val < 33); } + bool isImm0_63() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = MCE->getValue(); + return (Val >= 0 && Val < 64); + } + bool isImm1_63() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = MCE->getValue(); + return (Val >= 1 && Val < 64); + } + bool isImm1_64() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = MCE->getValue(); + return (Val >= 1 && Val < 65); + } + bool isImm0_127() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = MCE->getValue(); + return (Val >= 0 && Val < 128); + } + bool isImm0_255() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = MCE->getValue(); + return (Val >= 0 && Val < 256); + } + bool isImm0_65535() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = MCE->getValue(); + return (Val >= 0 && Val < 65536); + } + bool isImm32_63() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + int64_t Val = MCE->getValue(); + return (Val >= 32 && Val < 64); + } + bool isLogicalImm32() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + return AArch64_AM::isLogicalImmediate(MCE->getValue(), 32); + } + bool isLogicalImm64() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return false; + return AArch64_AM::isLogicalImmediate(MCE->getValue(), 64); + } + bool isShiftedImm() const { return Kind == k_ShiftedImm; } + bool isAddSubImm() const { + if (!isShiftedImm() && !isImm()) + return false; - bool isShlImm8() const { return isShlFixedWidth(8); } - - bool isShlImm16() const { return isShlFixedWidth(16); } + const MCExpr *Expr; - bool isShlImm32() const { return isShlFixedWidth(32); } + // An ADD/SUB shifter is either 'lsl #0' or 'lsl #12'. + if (isShiftedImm()) { + unsigned Shift = ShiftedImm.ShiftAmount; + Expr = ShiftedImm.Val; + if (Shift != 0 && Shift != 12) + return false; + } else { + Expr = getImm(); + } - bool isShlImm64() const { return isShlFixedWidth(64); } + AArch64MCExpr::VariantKind ELFRefKind; + MCSymbolRefExpr::VariantKind DarwinRefKind; + int64_t Addend; + if (AArch64AsmParser::classifySymbolRef(Expr, ELFRefKind, + DarwinRefKind, Addend)) { + return DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF + || DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF + || (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF && Addend == 0) + || ELFRefKind == AArch64MCExpr::VK_LO12 + || ELFRefKind == AArch64MCExpr::VK_DTPREL_HI12 + || ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12 + || ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12_NC + || ELFRefKind == AArch64MCExpr::VK_TPREL_HI12 + || ELFRefKind == AArch64MCExpr::VK_TPREL_LO12 + || ELFRefKind == AArch64MCExpr::VK_TPREL_LO12_NC + || ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12; + } - bool isNeonMovImmShiftLSL() const { - if (!isShiftOrExtend()) + // Otherwise it should be a real immediate in range: + const MCConstantExpr *CE = cast(Expr); + return CE->getValue() >= 0 && CE->getValue() <= 0xfff; + } + bool isCondCode() const { return Kind == k_CondCode; } + bool isSIMDImmType10() const { + if (!isImm()) return false; - - if (ShiftExtend.ShiftType != A64SE::LSL) + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) return false; - - // Valid shift amount is 0, 8, 16 and 24. - return ShiftExtend.Amount % 8 == 0 && ShiftExtend.Amount <= 24; + return AArch64_AM::isAdvSIMDModImmType10(MCE->getValue()); } - - bool isNeonMovImmShiftLSLH() const { - if (!isShiftOrExtend()) + bool isBranchTarget26() const { + if (!isImm()) return false; - - if (ShiftExtend.ShiftType != A64SE::LSL) + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return true; + int64_t Val = MCE->getValue(); + if (Val & 0x3) return false; - - // Valid shift amount is 0 and 8. - return ShiftExtend.Amount == 0 || ShiftExtend.Amount == 8; + return (Val >= -(0x2000000 << 2) && Val <= (0x1ffffff << 2)); + } + bool isPCRelLabel19() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return true; + int64_t Val = MCE->getValue(); + if (Val & 0x3) + return false; + return (Val >= -(0x40000 << 2) && Val <= (0x3ffff << 2)); + } + bool isBranchTarget14() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + return true; + int64_t Val = MCE->getValue(); + if (Val & 0x3) + return false; + return (Val >= -(0x2000 << 2) && Val <= (0x1fff << 2)); } - bool isNeonMovImmShiftMSL() const { - if (!isShiftOrExtend()) + bool + isMovWSymbol(ArrayRef AllowedModifiers) const { + if (!isImm()) return false; - if (ShiftExtend.ShiftType != A64SE::MSL) + AArch64MCExpr::VariantKind ELFRefKind; + MCSymbolRefExpr::VariantKind DarwinRefKind; + int64_t Addend; + if (!AArch64AsmParser::classifySymbolRef(getImm(), ELFRefKind, + DarwinRefKind, Addend)) { + return false; + } + if (DarwinRefKind != MCSymbolRefExpr::VK_None) return false; - // Valid shift amount is 8 and 16. - return ShiftExtend.Amount == 8 || ShiftExtend.Amount == 16; - } + for (unsigned i = 0; i != AllowedModifiers.size(); ++i) { + if (ELFRefKind == AllowedModifiers[i]) + return Addend == 0; + } - template - bool isVectorList() const { - return Kind == k_VectorList && VectorList.Layout == Layout && - VectorList.Count == Count; + return false; } - template bool isSImm7Scaled() const { - if (!isImm()) - return false; + bool isMovZSymbolG3() const { + static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 }; + return isMovWSymbol(Variants); + } - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) return false; + bool isMovZSymbolG2() const { + static AArch64MCExpr::VariantKind Variants[] = { + AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S, + AArch64MCExpr::VK_TPREL_G2, AArch64MCExpr::VK_DTPREL_G2}; + return isMovWSymbol(Variants); + } - int64_t Val = CE->getValue(); - if (Val % MemSize != 0) return false; + bool isMovZSymbolG1() const { + static AArch64MCExpr::VariantKind Variants[] = { + AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S, + AArch64MCExpr::VK_GOTTPREL_G1, AArch64MCExpr::VK_TPREL_G1, + AArch64MCExpr::VK_DTPREL_G1, + }; + return isMovWSymbol(Variants); + } - Val /= MemSize; + bool isMovZSymbolG0() const { + static AArch64MCExpr::VariantKind Variants[] = { + AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S, + AArch64MCExpr::VK_TPREL_G0, AArch64MCExpr::VK_DTPREL_G0}; + return isMovWSymbol(Variants); + } - return Val >= -64 && Val < 64; + bool isMovKSymbolG3() const { + static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 }; + return isMovWSymbol(Variants); } - template - bool isSImm() const { - if (!isImm()) return false; + bool isMovKSymbolG2() const { + static AArch64MCExpr::VariantKind Variants[] = { + AArch64MCExpr::VK_ABS_G2_NC}; + return isMovWSymbol(Variants); + } - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) return false; + bool isMovKSymbolG1() const { + static AArch64MCExpr::VariantKind Variants[] = { + AArch64MCExpr::VK_ABS_G1_NC, AArch64MCExpr::VK_TPREL_G1_NC, + AArch64MCExpr::VK_DTPREL_G1_NC + }; + return isMovWSymbol(Variants); + } - return CE->getValue() >= -(1LL << (BitWidth - 1)) - && CE->getValue() < (1LL << (BitWidth - 1)); + bool isMovKSymbolG0() const { + static AArch64MCExpr::VariantKind Variants[] = { + AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC, + AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC + }; + return isMovWSymbol(Variants); } - template - bool isUImm() const { + template + bool isMOVZMovAlias() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast(getImm()); if (!CE) return false; + uint64_t Value = CE->getValue(); - return CE->getValue() >= 0 && CE->getValue() < (1LL << bitWidth); - } + if (RegWidth == 32) + Value &= 0xffffffffULL; - bool isUImm() const { - if (!isImm()) return false; + // "lsl #0" takes precedence: in practice this only affects "#0, lsl #0". + if (Value == 0 && Shift != 0) + return false; - return isa(getImm()); + return (Value & ~(0xffffULL << Shift)) == 0; } - bool isNeonUImm64Mask() const { - if (!isImm()) - return false; + template + bool isMOVNMovAlias() const { + if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) - return false; - + if (!CE) return false; uint64_t Value = CE->getValue(); - // i64 value with each byte being either 0x00 or 0xff. - for (unsigned i = 0; i < 8; ++i, Value >>= 8) - if ((Value & 0xff) != 0 && (Value & 0xff) != 0xff) + // MOVZ takes precedence over MOVN. + for (int MOVZShift = 0; MOVZShift <= 48; MOVZShift += 16) + if ((Value & ~(0xffffULL << MOVZShift)) == 0) return false; - return true; + + Value = ~Value; + if (RegWidth == 32) + Value &= 0xffffffffULL; + + return (Value & ~(0xffffULL << Shift)) == 0; } - // if value == N, return true - template - bool isExactImm() const { - if (!isImm()) return false; + bool isFPImm() const { return Kind == k_FPImm; } + bool isBarrier() const { return Kind == k_Barrier; } + bool isSysReg() const { return Kind == k_SysReg; } + bool isMRSSystemRegister() const { + if (!isSysReg()) return false; - const MCConstantExpr *CE = dyn_cast(getImm()); - if (!CE) return false; + bool IsKnownRegister; + auto Mapper = AArch64SysReg::MRSMapper(getSysRegFeatureBits()); + Mapper.fromString(getSysReg(), IsKnownRegister); - return CE->getValue() == N; + return IsKnownRegister; } + bool isMSRSystemRegister() const { + if (!isSysReg()) return false; + + bool IsKnownRegister; + auto Mapper = AArch64SysReg::MSRMapper(getSysRegFeatureBits()); + Mapper.fromString(getSysReg(), IsKnownRegister); - bool isFPZeroIZero() const { - return isFPZero(); + return IsKnownRegister; } + bool isSystemPStateField() const { + if (!isSysReg()) return false; - static AArch64Operand *CreateImmWithLSL(const MCExpr *Val, - unsigned ShiftAmount, - bool ImplicitAmount, - SMLoc S,SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_ImmWithLSL, S, E); - Op->ImmWithLSL.Val = Val; - Op->ImmWithLSL.ShiftAmount = ShiftAmount; - Op->ImmWithLSL.ImplicitAmount = ImplicitAmount; - return Op; + bool IsKnownRegister; + AArch64PState::PStateMapper().fromString(getSysReg(), IsKnownRegister); + + return IsKnownRegister; + } + bool isReg() const override { return Kind == k_Register && !Reg.isVector; } + bool isVectorReg() const { return Kind == k_Register && Reg.isVector; } + bool isVectorRegLo() const { + return Kind == k_Register && Reg.isVector && + AArch64MCRegisterClasses[AArch64::FPR128_loRegClassID].contains( + Reg.RegNum); + } + bool isGPR32as64() const { + return Kind == k_Register && !Reg.isVector && + AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(Reg.RegNum); } - static AArch64Operand *CreateCondCode(A64CC::CondCodes Code, - SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_CondCode, S, E); - Op->CondCode.Code = Code; - return Op; + bool isGPR64sp0() const { + return Kind == k_Register && !Reg.isVector && + AArch64MCRegisterClasses[AArch64::GPR64spRegClassID].contains(Reg.RegNum); } - static AArch64Operand *CreateFPImm(double Val, - SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_FPImmediate, S, E); - Op->FPImm.Val = Val; - return Op; + /// Is this a vector list with the type implicit (presumably attached to the + /// instruction itself)? + template bool isImplicitlyTypedVectorList() const { + return Kind == k_VectorList && VectorList.Count == NumRegs && + !VectorList.ElementKind; } - static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_Immediate, S, E); - Op->Imm.Val = Val; - return Op; + template + bool isTypedVectorList() const { + if (Kind != k_VectorList) + return false; + if (VectorList.Count != NumRegs) + return false; + if (VectorList.ElementKind != ElementKind) + return false; + return VectorList.NumElements == NumElements; } - static AArch64Operand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_Register, S, E); - Op->Reg.RegNum = RegNum; - return Op; + bool isVectorIndex1() const { + return Kind == k_VectorIndex && VectorIndex.Val == 1; } + bool isVectorIndexB() const { + return Kind == k_VectorIndex && VectorIndex.Val < 16; + } + bool isVectorIndexH() const { + return Kind == k_VectorIndex && VectorIndex.Val < 8; + } + bool isVectorIndexS() const { + return Kind == k_VectorIndex && VectorIndex.Val < 4; + } + bool isVectorIndexD() const { + return Kind == k_VectorIndex && VectorIndex.Val < 2; + } + bool isToken() const override { return Kind == k_Token; } + bool isTokenEqual(StringRef Str) const { + return Kind == k_Token && getToken() == Str; + } + bool isSysCR() const { return Kind == k_SysCR; } + bool isPrefetch() const { return Kind == k_Prefetch; } + bool isShiftExtend() const { return Kind == k_ShiftExtend; } + bool isShifter() const { + if (!isShiftExtend()) + return false; - static AArch64Operand *CreateWrappedReg(unsigned RegNum, SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_WrappedRegister, S, E); - Op->Reg.RegNum = RegNum; - return Op; + AArch64_AM::ShiftExtendType ST = getShiftExtendType(); + return (ST == AArch64_AM::LSL || ST == AArch64_AM::LSR || + ST == AArch64_AM::ASR || ST == AArch64_AM::ROR || + ST == AArch64_AM::MSL); } + bool isExtend() const { + if (!isShiftExtend()) + return false; - static AArch64Operand *CreateShiftExtend(A64SE::ShiftExtSpecifiers ShiftTyp, - unsigned Amount, - bool ImplicitAmount, - SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, S, E); - Op->ShiftExtend.ShiftType = ShiftTyp; - Op->ShiftExtend.Amount = Amount; - Op->ShiftExtend.ImplicitAmount = ImplicitAmount; - return Op; + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + return (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB || + ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH || + ET == AArch64_AM::UXTW || ET == AArch64_AM::SXTW || + ET == AArch64_AM::UXTX || ET == AArch64_AM::SXTX || + ET == AArch64_AM::LSL) && + getShiftExtendAmount() <= 4; } - static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S) { - AArch64Operand *Op = new AArch64Operand(k_SysReg, S, S); - Op->Tok.Data = Str.data(); - Op->Tok.Length = Str.size(); - return Op; + bool isExtend64() const { + if (!isExtend()) + return false; + // UXTX and SXTX require a 64-bit source register (the ExtendLSL64 class). + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + return ET != AArch64_AM::UXTX && ET != AArch64_AM::SXTX; + } + bool isExtendLSL64() const { + if (!isExtend()) + return false; + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + return (ET == AArch64_AM::UXTX || ET == AArch64_AM::SXTX || + ET == AArch64_AM::LSL) && + getShiftExtendAmount() <= 4; } - static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count, - A64Layout::VectorLayout Layout, - SMLoc S, SMLoc E) { - AArch64Operand *Op = new AArch64Operand(k_VectorList, S, E); - Op->VectorList.RegNum = RegNum; - Op->VectorList.Count = Count; - Op->VectorList.Layout = Layout; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; + template bool isMemXExtend() const { + if (!isExtend()) + return false; + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + return (ET == AArch64_AM::LSL || ET == AArch64_AM::SXTX) && + (getShiftExtendAmount() == Log2_32(Width / 8) || + getShiftExtendAmount() == 0); } - static AArch64Operand *CreateToken(StringRef Str, SMLoc S) { - AArch64Operand *Op = new AArch64Operand(k_Token, S, S); - Op->Tok.Data = Str.data(); - Op->Tok.Length = Str.size(); - return Op; + template bool isMemWExtend() const { + if (!isExtend()) + return false; + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + return (ET == AArch64_AM::UXTW || ET == AArch64_AM::SXTW) && + (getShiftExtendAmount() == Log2_32(Width / 8) || + getShiftExtendAmount() == 0); } + template + bool isArithmeticShifter() const { + if (!isShifter()) + return false; - void addExpr(MCInst &Inst, const MCExpr *Expr) const { - // Add as immediates when possible. - if (const MCConstantExpr *CE = dyn_cast(Expr)) - Inst.addOperand(MCOperand::CreateImm(CE->getValue())); - else - Inst.addOperand(MCOperand::CreateExpr(Expr)); + // An arithmetic shifter is LSL, LSR, or ASR. + AArch64_AM::ShiftExtendType ST = getShiftExtendType(); + return (ST == AArch64_AM::LSL || ST == AArch64_AM::LSR || + ST == AArch64_AM::ASR) && getShiftExtendAmount() < width; } - template - void addBFILSBOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = cast(getImm()); - unsigned EncodedVal = (RegWidth - CE->getValue()) % RegWidth; - Inst.addOperand(MCOperand::CreateImm(EncodedVal)); - } + template + bool isLogicalShifter() const { + if (!isShifter()) + return false; - void addBFIWidthOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *CE = cast(getImm()); - Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1)); + // A logical shifter is LSL, LSR, ASR or ROR. + AArch64_AM::ShiftExtendType ST = getShiftExtendType(); + return (ST == AArch64_AM::LSL || ST == AArch64_AM::LSR || + ST == AArch64_AM::ASR || ST == AArch64_AM::ROR) && + getShiftExtendAmount() < width; } - void addBFXWidthOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); + bool isMovImm32Shifter() const { + if (!isShifter()) + return false; - uint64_t LSB = Inst.getOperand(Inst.getNumOperands()-1).getImm(); - const MCConstantExpr *CE = cast(getImm()); + // A MOVi shifter is LSL of 0, 16, 32, or 48. + AArch64_AM::ShiftExtendType ST = getShiftExtendType(); + if (ST != AArch64_AM::LSL) + return false; + uint64_t Val = getShiftExtendAmount(); + return (Val == 0 || Val == 16); + } + + bool isMovImm64Shifter() const { + if (!isShifter()) + return false; - Inst.addOperand(MCOperand::CreateImm(LSB + CE->getValue() - 1)); + // A MOVi shifter is LSL of 0 or 16. + AArch64_AM::ShiftExtendType ST = getShiftExtendType(); + if (ST != AArch64_AM::LSL) + return false; + uint64_t Val = getShiftExtendAmount(); + return (Val == 0 || Val == 16 || Val == 32 || Val == 48); } - void addCondCodeOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getCondCode())); + bool isLogicalVecShifter() const { + if (!isShifter()) + return false; + + // A logical vector shifter is a left shift by 0, 8, 16, or 24. + unsigned Shift = getShiftExtendAmount(); + return getShiftExtendType() == AArch64_AM::LSL && + (Shift == 0 || Shift == 8 || Shift == 16 || Shift == 24); } - void addCVTFixedPosOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); + bool isLogicalVecHalfWordShifter() const { + if (!isLogicalVecShifter()) + return false; - const MCConstantExpr *CE = cast(getImm()); - Inst.addOperand(MCOperand::CreateImm(64 - CE->getValue())); + // A logical vector shifter is a left shift by 0 or 8. + unsigned Shift = getShiftExtendAmount(); + return getShiftExtendType() == AArch64_AM::LSL && + (Shift == 0 || Shift == 8); } - void addFMOVImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); + bool isMoveVecShifter() const { + if (!isShiftExtend()) + return false; - APFloat RealVal(FPImm.Val); - uint32_t ImmVal; - A64Imms::isFPImm(RealVal, ImmVal); + // A logical vector shifter is a left shift by 8 or 16. + unsigned Shift = getShiftExtendAmount(); + return getShiftExtendType() == AArch64_AM::MSL && + (Shift == 8 || Shift == 16); + } - Inst.addOperand(MCOperand::CreateImm(ImmVal)); + // Fallback unscaled operands are for aliases of LDR/STR that fall back + // to LDUR/STUR when the offset is not legal for the former but is for + // the latter. As such, in addition to checking for being a legal unscaled + // address, also check that it is not a legal scaled address. This avoids + // ambiguity in the matcher. + template + bool isSImm9OffsetFB() const { + return isSImm9() && !isUImm12Offset(); } - void addFPZeroOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands"); - Inst.addOperand(MCOperand::CreateImm(0)); + bool isAdrpLabel() const { + // Validation was handled during parsing, so we just sanity check that + // something didn't go haywire. + if (!isImm()) + return false; + + if (const MCConstantExpr *CE = dyn_cast(Imm.Val)) { + int64_t Val = CE->getValue(); + int64_t Min = - (4096 * (1LL << (21 - 1))); + int64_t Max = 4096 * ((1LL << (21 - 1)) - 1); + return (Val % 4096) == 0 && Val >= Min && Val <= Max; + } + + return true; } - void addFPZeroIZeroOperands(MCInst &Inst, unsigned N) const { - addFPZeroOperands(Inst, N); + bool isAdrLabel() const { + // Validation was handled during parsing, so we just sanity check that + // something didn't go haywire. + if (!isImm()) + return false; + + if (const MCConstantExpr *CE = dyn_cast(Imm.Val)) { + int64_t Val = CE->getValue(); + int64_t Min = - (1LL << (21 - 1)); + int64_t Max = ((1LL << (21 - 1)) - 1); + return Val >= Min && Val <= Max; + } + + return true; } - void addInvCondCodeOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - unsigned Encoded = A64InvertCondCode(getCondCode()); - Inst.addOperand(MCOperand::CreateImm(Encoded)); + void addExpr(MCInst &Inst, const MCExpr *Expr) const { + // Add as immediates when possible. Null MCExpr = 0. + if (!Expr) + Inst.addOperand(MCOperand::CreateImm(0)); + else if (const MCConstantExpr *CE = dyn_cast(Expr)) + Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + else + Inst.addOperand(MCOperand::CreateExpr(Expr)); } void addRegOperands(MCInst &Inst, unsigned N) const { @@ -987,1330 +1091,2026 @@ public: Inst.addOperand(MCOperand::CreateReg(getReg())); } - void addImmOperands(MCInst &Inst, unsigned N) const { + void addGPR32as64Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } + assert( + AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(getReg())); - template - void addSImm7ScaledOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); + const MCRegisterInfo *RI = Ctx.getRegisterInfo(); + uint32_t Reg = RI->getRegClass(AArch64::GPR32RegClassID).getRegister( + RI->getEncodingValue(getReg())); - const MCConstantExpr *CE = cast(getImm()); - uint64_t Val = CE->getValue() / MemSize; - Inst.addOperand(MCOperand::CreateImm(Val & 0x7f)); + Inst.addOperand(MCOperand::CreateReg(Reg)); } - template - void addSImmOperands(MCInst &Inst, unsigned N) const { + void addVectorReg64Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - - const MCConstantExpr *CE = cast(getImm()); - uint64_t Val = CE->getValue(); - Inst.addOperand(MCOperand::CreateImm(Val & ((1ULL << BitWidth) - 1))); + assert( + AArch64MCRegisterClasses[AArch64::FPR128RegClassID].contains(getReg())); + Inst.addOperand(MCOperand::CreateReg(AArch64::D0 + getReg() - AArch64::Q0)); } - void addImmWithLSLOperands(MCInst &Inst, unsigned N) const { - assert (N == 1 && "Invalid number of operands!"); - - addExpr(Inst, ImmWithLSL.Val); + void addVectorReg128Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + assert( + AArch64MCRegisterClasses[AArch64::FPR128RegClassID].contains(getReg())); + Inst.addOperand(MCOperand::CreateReg(getReg())); } - template - void addLabelOperands(MCInst &Inst, unsigned N) const { + void addVectorRegLoOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getReg())); + } - const MCConstantExpr *CE = dyn_cast(Imm.Val); + template + void addVectorList64Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + static unsigned FirstRegs[] = { AArch64::D0, AArch64::D0_D1, + AArch64::D0_D1_D2, AArch64::D0_D1_D2_D3 }; + unsigned FirstReg = FirstRegs[NumRegs - 1]; - if (!CE) { - addExpr(Inst, Imm.Val); - return; - } + Inst.addOperand( + MCOperand::CreateReg(FirstReg + getVectorListStart() - AArch64::Q0)); + } - int64_t Val = CE->getValue(); - assert(Val % scale == 0 && "Unaligned immediate in instruction"); - Val /= scale; + template + void addVectorList128Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + static unsigned FirstRegs[] = { AArch64::Q0, AArch64::Q0_Q1, + AArch64::Q0_Q1_Q2, AArch64::Q0_Q1_Q2_Q3 }; + unsigned FirstReg = FirstRegs[NumRegs - 1]; - Inst.addOperand(MCOperand::CreateImm(Val & ((1LL << field_width) - 1))); + Inst.addOperand( + MCOperand::CreateReg(FirstReg + getVectorListStart() - AArch64::Q0)); } - template - void addOffsetUImm12Operands(MCInst &Inst, unsigned N) const { + void addVectorIndex1Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); + } - if (const MCConstantExpr *CE = dyn_cast(getImm())) { - Inst.addOperand(MCOperand::CreateImm(CE->getValue() / MemSize)); + void addVectorIndexBOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); + } + + void addVectorIndexHOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); + } + + void addVectorIndexSOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); + } + + void addVectorIndexDOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); + } + + void addImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // If this is a pageoff symrefexpr with an addend, adjust the addend + // to be only the page-offset portion. Otherwise, just add the expr + // as-is. + addExpr(Inst, getImm()); + } + + void addAddSubImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + if (isShiftedImm()) { + addExpr(Inst, getShiftedImmVal()); + Inst.addOperand(MCOperand::CreateImm(getShiftedImmShift())); } else { - Inst.addOperand(MCOperand::CreateExpr(getImm())); + addExpr(Inst, getImm()); + Inst.addOperand(MCOperand::CreateImm(0)); } } - template - void addLogicalImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands"); - const MCConstantExpr *CE = cast(Imm.Val); + void addCondCodeOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getCondCode())); + } - uint32_t Bits; - A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits); + void addAdrpLabelOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) + addExpr(Inst, getImm()); + else + Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 12)); + } - Inst.addOperand(MCOperand::CreateImm(Bits)); + void addAdrLabelOperands(MCInst &Inst, unsigned N) const { + addImmOperands(Inst, N); } - void addMRSOperands(MCInst &Inst, unsigned N) const { + template + void addUImm12OffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); - bool Valid; - StringRef Name(SysReg.Data, SysReg.Length); - uint32_t Bits = A64SysReg::MRSMapper().fromString(Name, Valid); + if (!MCE) { + Inst.addOperand(MCOperand::CreateExpr(getImm())); + return; + } + Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / Scale)); + } - Inst.addOperand(MCOperand::CreateImm(Bits)); + void addSImm9Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); } - void addMSRWithRegOperands(MCInst &Inst, unsigned N) const { + void addSImm7s4Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 4)); + } - bool Valid; - StringRef Name(SysReg.Data, SysReg.Length); - uint32_t Bits = A64SysReg::MSRMapper().fromString(Name, Valid); + void addSImm7s8Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 8)); + } - Inst.addOperand(MCOperand::CreateImm(Bits)); + void addSImm7s16Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 16)); } - void addMSRPStateOperands(MCInst &Inst, unsigned N) const { + void addImm0_7Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + } - bool Valid; - StringRef Name(SysReg.Data, SysReg.Length); - uint32_t Bits = A64PState::PStateMapper().fromString(Name, Valid); + void addImm1_8Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + } - Inst.addOperand(MCOperand::CreateImm(Bits)); + void addImm0_15Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); } - void addMoveWideImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 2 && "Invalid number of operands!"); + void addImm1_16Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + } - addExpr(Inst, ImmWithLSL.Val); + void addImm0_31Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + } - AArch64MCExpr::VariantKind Variant; - if (!isNonConstantExpr(ImmWithLSL.Val, Variant)) { - Inst.addOperand(MCOperand::CreateImm(ImmWithLSL.ShiftAmount / 16)); - return; - } + void addImm1_31Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + } - // We know it's relocated - switch (Variant) { - case AArch64MCExpr::VK_AARCH64_ABS_G0: - case AArch64MCExpr::VK_AARCH64_ABS_G0_NC: - case AArch64MCExpr::VK_AARCH64_SABS_G0: - case AArch64MCExpr::VK_AARCH64_DTPREL_G0: - case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC: - case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC: - case AArch64MCExpr::VK_AARCH64_TPREL_G0: - case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC: - Inst.addOperand(MCOperand::CreateImm(0)); - break; - case AArch64MCExpr::VK_AARCH64_ABS_G1: - case AArch64MCExpr::VK_AARCH64_ABS_G1_NC: - case AArch64MCExpr::VK_AARCH64_SABS_G1: - case AArch64MCExpr::VK_AARCH64_DTPREL_G1: - case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC: - case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: - case AArch64MCExpr::VK_AARCH64_TPREL_G1: - case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC: - Inst.addOperand(MCOperand::CreateImm(1)); - break; - case AArch64MCExpr::VK_AARCH64_ABS_G2: - case AArch64MCExpr::VK_AARCH64_ABS_G2_NC: - case AArch64MCExpr::VK_AARCH64_SABS_G2: - case AArch64MCExpr::VK_AARCH64_DTPREL_G2: - case AArch64MCExpr::VK_AARCH64_TPREL_G2: - Inst.addOperand(MCOperand::CreateImm(2)); - break; - case AArch64MCExpr::VK_AARCH64_ABS_G3: - Inst.addOperand(MCOperand::CreateImm(3)); - break; - default: llvm_unreachable("Inappropriate move wide relocation"); - } + void addImm1_32Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); } - template - void addMoveWideMovAliasOperands(MCInst &Inst, unsigned N) const { - assert(N == 2 && "Invalid number of operands!"); - int UImm16, Shift; + void addImm0_63Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + } - const MCConstantExpr *CE = cast(getImm()); - uint64_t Value = CE->getValue(); + void addImm1_63Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + } - if (RegWidth == 32) { - Value &= 0xffffffffULL; - } + void addImm1_64Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + } - bool Valid = isValidImm(RegWidth, Value, UImm16, Shift); - (void)Valid; - assert(Valid && "Invalid immediates should have been weeded out by now"); + void addImm0_127Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + } - Inst.addOperand(MCOperand::CreateImm(UImm16)); - Inst.addOperand(MCOperand::CreateImm(Shift)); + void addImm0_255Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); } - void addPRFMOperands(MCInst &Inst, unsigned N) const { + void addImm0_65535Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + } - const MCConstantExpr *CE = cast(getImm()); - assert(CE->getValue() >= 0 && CE->getValue() <= 31 - && "PRFM operand should be 5-bits"); + void addImm32_63Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); + } - Inst.addOperand(MCOperand::CreateImm(CE->getValue())); + void addLogicalImm32Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid logical immediate operand!"); + uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue(), 32); + Inst.addOperand(MCOperand::CreateImm(encoding)); } - // For Add-sub (extended register) operands. - void addRegExtendOperands(MCInst &Inst, unsigned N) const { + void addLogicalImm64Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid logical immediate operand!"); + uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue(), 64); + Inst.addOperand(MCOperand::CreateImm(encoding)); + } - Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount)); + void addSIMDImmType10Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + assert(MCE && "Invalid immediate operand!"); + uint64_t encoding = AArch64_AM::encodeAdvSIMDModImmType10(MCE->getValue()); + Inst.addOperand(MCOperand::CreateImm(encoding)); } - // For Vector Immediates shifted imm operands. - void addNeonMovImmShiftLSLOperands(MCInst &Inst, unsigned N) const { + void addBranchTarget26Operands(MCInst &Inst, unsigned N) const { + // Branch operands don't encode the low bits, so shift them off + // here. If it's a label, however, just put it on directly as there's + // not enough information now to do anything. assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) { + addExpr(Inst, getImm()); + return; + } + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2)); + } - if (ShiftExtend.Amount % 8 != 0 || ShiftExtend.Amount > 24) - llvm_unreachable("Invalid shift amount for vector immediate inst."); + void addPCRelLabel19Operands(MCInst &Inst, unsigned N) const { + // Branch operands don't encode the low bits, so shift them off + // here. If it's a label, however, just put it on directly as there's + // not enough information now to do anything. + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) { + addExpr(Inst, getImm()); + return; + } + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2)); + } - // Encode LSL shift amount 0, 8, 16, 24 as 0, 1, 2, 3. - int64_t Imm = ShiftExtend.Amount / 8; - Inst.addOperand(MCOperand::CreateImm(Imm)); + void addBranchTarget14Operands(MCInst &Inst, unsigned N) const { + // Branch operands don't encode the low bits, so shift them off + // here. If it's a label, however, just put it on directly as there's + // not enough information now to do anything. + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = dyn_cast(getImm()); + if (!MCE) { + addExpr(Inst, getImm()); + return; + } + assert(MCE && "Invalid constant immediate operand!"); + Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2)); } - void addNeonMovImmShiftLSLHOperands(MCInst &Inst, unsigned N) const { + void addFPImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getFPImm())); + } - if (ShiftExtend.Amount != 0 && ShiftExtend.Amount != 8) - llvm_unreachable("Invalid shift amount for vector immediate inst."); + void addBarrierOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getBarrier())); + } - // Encode LSLH shift amount 0, 8 as 0, 1. - int64_t Imm = ShiftExtend.Amount / 8; - Inst.addOperand(MCOperand::CreateImm(Imm)); + void addMRSSystemRegisterOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + bool Valid; + auto Mapper = AArch64SysReg::MRSMapper(getSysRegFeatureBits()); + uint32_t Bits = Mapper.fromString(getSysReg(), Valid); + + Inst.addOperand(MCOperand::CreateImm(Bits)); } - void addNeonMovImmShiftMSLOperands(MCInst &Inst, unsigned N) const { + void addMSRSystemRegisterOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - if (ShiftExtend.Amount != 8 && ShiftExtend.Amount != 16) - llvm_unreachable("Invalid shift amount for vector immediate inst."); + bool Valid; + auto Mapper = AArch64SysReg::MSRMapper(getSysRegFeatureBits()); + uint32_t Bits = Mapper.fromString(getSysReg(), Valid); - // Encode MSL shift amount 8, 16 as 0, 1. - int64_t Imm = ShiftExtend.Amount / 8 - 1; - Inst.addOperand(MCOperand::CreateImm(Imm)); + Inst.addOperand(MCOperand::CreateImm(Bits)); } - // For the extend in load-store (register offset) instructions. - template - void addAddrRegExtendOperands(MCInst &Inst, unsigned N) const { - addAddrRegExtendOperands(Inst, N, MemSize); + void addSystemPStateFieldOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + bool Valid; + uint32_t Bits = + AArch64PState::PStateMapper().fromString(getSysReg(), Valid); + + Inst.addOperand(MCOperand::CreateImm(Bits)); } - void addAddrRegExtendOperands(MCInst &Inst, unsigned N, - unsigned MemSize) const { + void addSysCROperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getSysCR())); + } - // First bit of Option is set in instruction classes, the high two bits are - // as follows: - unsigned OptionHi = 0; - switch (ShiftExtend.ShiftType) { - case A64SE::UXTW: - case A64SE::LSL: - OptionHi = 1; - break; - case A64SE::SXTW: - case A64SE::SXTX: - OptionHi = 3; - break; - default: - llvm_unreachable("Invalid extend type for register offset"); - } + void addPrefetchOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(getPrefetch())); + } - unsigned S = 0; - if (MemSize == 1 && !ShiftExtend.ImplicitAmount) - S = 1; - else if (MemSize != 1 && ShiftExtend.Amount != 0) - S = 1; + void addShifterOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + unsigned Imm = + AArch64_AM::getShifterImm(getShiftExtendType(), getShiftExtendAmount()); + Inst.addOperand(MCOperand::CreateImm(Imm)); + } - Inst.addOperand(MCOperand::CreateImm((OptionHi << 1) | S)); + void addExtendOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + if (ET == AArch64_AM::LSL) ET = AArch64_AM::UXTW; + unsigned Imm = AArch64_AM::getArithExtendImm(ET, getShiftExtendAmount()); + Inst.addOperand(MCOperand::CreateImm(Imm)); } - void addShiftOperands(MCInst &Inst, unsigned N) const { + + void addExtend64Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + if (ET == AArch64_AM::LSL) ET = AArch64_AM::UXTX; + unsigned Imm = AArch64_AM::getArithExtendImm(ET, getShiftExtendAmount()); + Inst.addOperand(MCOperand::CreateImm(Imm)); + } - Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount)); + void addMemExtendOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + bool IsSigned = ET == AArch64_AM::SXTW || ET == AArch64_AM::SXTX; + Inst.addOperand(MCOperand::CreateImm(IsSigned)); + Inst.addOperand(MCOperand::CreateImm(getShiftExtendAmount() != 0)); } - void addNeonUImm64MaskOperands(MCInst &Inst, unsigned N) const { + // For 8-bit load/store instructions with a register offset, both the + // "DoShift" and "NoShift" variants have a shift of 0. Because of this, + // they're disambiguated by whether the shift was explicit or implicit rather + // than its size. + void addMemExtend8Operands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands!"); + AArch64_AM::ShiftExtendType ET = getShiftExtendType(); + bool IsSigned = ET == AArch64_AM::SXTW || ET == AArch64_AM::SXTX; + Inst.addOperand(MCOperand::CreateImm(IsSigned)); + Inst.addOperand(MCOperand::CreateImm(hasShiftExtendAmount())); + } + + template + void addMOVZMovAliasOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - // A bit from each byte in the constant forms the encoded immediate - const MCConstantExpr *CE = dyn_cast(getImm()); + const MCConstantExpr *CE = cast(getImm()); uint64_t Value = CE->getValue(); - - unsigned Imm = 0; - for (unsigned i = 0; i < 8; ++i, Value >>= 8) { - Imm |= (Value & 1) << i; - } - Inst.addOperand(MCOperand::CreateImm(Imm)); + Inst.addOperand(MCOperand::CreateImm((Value >> Shift) & 0xffff)); } - void addVectorListOperands(MCInst &Inst, unsigned N) const { + template + void addMOVNMovAliasOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); + + const MCConstantExpr *CE = cast(getImm()); + uint64_t Value = CE->getValue(); + Inst.addOperand(MCOperand::CreateImm((~Value >> Shift) & 0xffff)); } -}; -} // end anonymous namespace. + void print(raw_ostream &OS) const override; -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseOperand(SmallVectorImpl &Operands, - StringRef Mnemonic) { + static AArch64Operand *CreateToken(StringRef Str, bool IsSuffix, SMLoc S, + MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_Token, Ctx); + Op->Tok.Data = Str.data(); + Op->Tok.Length = Str.size(); + Op->Tok.IsSuffix = IsSuffix; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } - // See if the operand has a custom parser - OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + static AArch64Operand *CreateReg(unsigned RegNum, bool isVector, SMLoc S, + SMLoc E, MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_Register, Ctx); + Op->Reg.RegNum = RegNum; + Op->Reg.isVector = isVector; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } - // It could either succeed, fail or just not care. - if (ResTy != MatchOperand_NoMatch) - return ResTy; + static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count, + unsigned NumElements, char ElementKind, + SMLoc S, SMLoc E, MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_VectorList, Ctx); + Op->VectorList.RegNum = RegNum; + Op->VectorList.Count = Count; + Op->VectorList.NumElements = NumElements; + Op->VectorList.ElementKind = ElementKind; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } - switch (getLexer().getKind()) { - default: - Error(Parser.getTok().getLoc(), "unexpected token in operand"); - return MatchOperand_ParseFail; - case AsmToken::Identifier: { - // It might be in the LSL/UXTB family ... - OperandMatchResultTy GotShift = ParseShiftExtend(Operands); + static AArch64Operand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, + MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_VectorIndex, Ctx); + Op->VectorIndex.Val = Idx; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } - // We can only continue if no tokens were eaten. - if (GotShift != MatchOperand_NoMatch) - return GotShift; + static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, + MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_Immediate, Ctx); + Op->Imm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } - // ... or it might be a register ... - uint32_t NumLanes = 0; - OperandMatchResultTy GotReg = ParseRegister(Operands, NumLanes); - assert(GotReg != MatchOperand_ParseFail - && "register parsing shouldn't partially succeed"); - - if (GotReg == MatchOperand_Success) { - if (Parser.getTok().is(AsmToken::LBrac)) - return ParseNEONLane(Operands, NumLanes); - else - return MatchOperand_Success; - } - // ... or it might be a symbolish thing - } - // Fall through - case AsmToken::LParen: // E.g. (strcmp-4) - case AsmToken::Integer: // 1f, 2b labels - case AsmToken::String: // quoted labels - case AsmToken::Dot: // . is Current location - case AsmToken::Dollar: // $ is PC - case AsmToken::Colon: { - SMLoc StartLoc = Parser.getTok().getLoc(); - SMLoc EndLoc; - const MCExpr *ImmVal = 0; - - if (ParseImmediate(ImmVal) != MatchOperand_Success) - return MatchOperand_ParseFail; + static AArch64Operand *CreateShiftedImm(const MCExpr *Val, + unsigned ShiftAmount, SMLoc S, + SMLoc E, MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_ShiftedImm, Ctx); + Op->ShiftedImm .Val = Val; + Op->ShiftedImm.ShiftAmount = ShiftAmount; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } - EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc)); - return MatchOperand_Success; + static AArch64Operand *CreateCondCode(AArch64CC::CondCode Code, SMLoc S, + SMLoc E, MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_CondCode, Ctx); + Op->CondCode.Code = Code; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; } - case AsmToken::Hash: { // Immediates - SMLoc StartLoc = Parser.getTok().getLoc(); - SMLoc EndLoc; - const MCExpr *ImmVal = 0; - Parser.Lex(); - if (ParseImmediate(ImmVal) != MatchOperand_Success) - return MatchOperand_ParseFail; + static AArch64Operand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_FPImm, Ctx); + Op->FPImm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } - EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); - Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc)); - return MatchOperand_Success; + static AArch64Operand *CreateBarrier(unsigned Val, SMLoc S, MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_Barrier, Ctx); + Op->Barrier.Val = Val; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; } - case AsmToken::LBrac: { - SMLoc Loc = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateToken("[", Loc)); - Parser.Lex(); // Eat '[' - // There's no comma after a '[', so we can parse the next operand - // immediately. - return ParseOperand(Operands, Mnemonic); + static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S, + uint64_t FeatureBits, MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_SysReg, Ctx); + Op->SysReg.Data = Str.data(); + Op->SysReg.Length = Str.size(); + Op->SysReg.FeatureBits = FeatureBits; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; } - // The following will likely be useful later, but not in very early cases - case AsmToken::LCurly: // SIMD vector list is not parsed here - llvm_unreachable("Don't know how to deal with '{' in operand"); - return MatchOperand_ParseFail; + + static AArch64Operand *CreateSysCR(unsigned Val, SMLoc S, SMLoc E, + MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_SysCR, Ctx); + Op->SysCRImm.Val = Val; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; } -} -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseImmediate(const MCExpr *&ExprVal) { - if (getLexer().is(AsmToken::Colon)) { - AArch64MCExpr::VariantKind RefKind; + static AArch64Operand *CreatePrefetch(unsigned Val, SMLoc S, MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_Prefetch, Ctx); + Op->Prefetch.Val = Val; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } - OperandMatchResultTy ResTy = ParseRelocPrefix(RefKind); - if (ResTy != MatchOperand_Success) - return ResTy; + static AArch64Operand *CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, + unsigned Val, bool HasExplicitAmount, + SMLoc S, SMLoc E, MCContext &Ctx) { + AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, Ctx); + Op->ShiftExtend.Type = ShOp; + Op->ShiftExtend.Amount = Val; + Op->ShiftExtend.HasExplicitAmount = HasExplicitAmount; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } +}; - const MCExpr *SubExprVal; - if (getParser().parseExpression(SubExprVal)) - return MatchOperand_ParseFail; +} // end anonymous namespace. - ExprVal = AArch64MCExpr::Create(RefKind, SubExprVal, getContext()); - return MatchOperand_Success; +void AArch64Operand::print(raw_ostream &OS) const { + switch (Kind) { + case k_FPImm: + OS << ""; + break; + case k_Barrier: { + bool Valid; + StringRef Name = AArch64DB::DBarrierMapper().toString(getBarrier(), Valid); + if (Valid) + OS << ""; + else + OS << ""; + break; + } + case k_Immediate: + getImm()->print(OS); + break; + case k_ShiftedImm: { + unsigned Shift = getShiftedImmShift(); + OS << "print(OS); + OS << ", lsl #" << AArch64_AM::getShiftValue(Shift) << ">"; + break; + } + case k_CondCode: + OS << ""; + break; + case k_Register: + OS << ""; + break; + case k_VectorList: { + OS << ""; + break; + } + case k_VectorIndex: + OS << ""; + break; + case k_SysReg: + OS << "'; + break; + case k_Token: + OS << "'" << getToken() << "'"; + break; + case k_SysCR: + OS << "c" << getSysCR(); + break; + case k_Prefetch: { + bool Valid; + StringRef Name = AArch64PRFM::PRFMMapper().toString(getPrefetch(), Valid); + if (Valid) + OS << ""; + else + OS << ""; + break; + } + case k_ShiftExtend: { + OS << "<" << AArch64_AM::getShiftExtendName(getShiftExtendType()) << " #" + << getShiftExtendAmount(); + if (!hasShiftExtendAmount()) + OS << ""; + OS << '>'; + break; + } } +} + +/// @name Auto-generated Match Functions +/// { + +static unsigned MatchRegisterName(StringRef Name); - // No weird AArch64MCExpr prefix - return getParser().parseExpression(ExprVal) - ? MatchOperand_ParseFail : MatchOperand_Success; +/// } + +static unsigned matchVectorRegName(StringRef Name) { + return StringSwitch(Name) + .Case("v0", AArch64::Q0) + .Case("v1", AArch64::Q1) + .Case("v2", AArch64::Q2) + .Case("v3", AArch64::Q3) + .Case("v4", AArch64::Q4) + .Case("v5", AArch64::Q5) + .Case("v6", AArch64::Q6) + .Case("v7", AArch64::Q7) + .Case("v8", AArch64::Q8) + .Case("v9", AArch64::Q9) + .Case("v10", AArch64::Q10) + .Case("v11", AArch64::Q11) + .Case("v12", AArch64::Q12) + .Case("v13", AArch64::Q13) + .Case("v14", AArch64::Q14) + .Case("v15", AArch64::Q15) + .Case("v16", AArch64::Q16) + .Case("v17", AArch64::Q17) + .Case("v18", AArch64::Q18) + .Case("v19", AArch64::Q19) + .Case("v20", AArch64::Q20) + .Case("v21", AArch64::Q21) + .Case("v22", AArch64::Q22) + .Case("v23", AArch64::Q23) + .Case("v24", AArch64::Q24) + .Case("v25", AArch64::Q25) + .Case("v26", AArch64::Q26) + .Case("v27", AArch64::Q27) + .Case("v28", AArch64::Q28) + .Case("v29", AArch64::Q29) + .Case("v30", AArch64::Q30) + .Case("v31", AArch64::Q31) + .Default(0); } -// A lane attached to a NEON register. "[N]", which should yield three tokens: -// '[', N, ']'. A hash is not allowed to precede the immediate here. -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseNEONLane(SmallVectorImpl &Operands, - uint32_t NumLanes) { - SMLoc Loc = Parser.getTok().getLoc(); +static bool isValidVectorKind(StringRef Name) { + return StringSwitch(Name.lower()) + .Case(".8b", true) + .Case(".16b", true) + .Case(".4h", true) + .Case(".8h", true) + .Case(".2s", true) + .Case(".4s", true) + .Case(".1d", true) + .Case(".2d", true) + .Case(".1q", true) + // Accept the width neutral ones, too, for verbose syntax. If those + // aren't used in the right places, the token operand won't match so + // all will work out. + .Case(".b", true) + .Case(".h", true) + .Case(".s", true) + .Case(".d", true) + .Default(false); +} - assert(Parser.getTok().is(AsmToken::LBrac) && "inappropriate operand"); - Operands.push_back(AArch64Operand::CreateToken("[", Loc)); - Parser.Lex(); // Eat '[' +static void parseValidVectorKind(StringRef Name, unsigned &NumElements, + char &ElementKind) { + assert(isValidVectorKind(Name)); - if (Parser.getTok().isNot(AsmToken::Integer)) { - Error(Parser.getTok().getLoc(), "expected lane number"); - return MatchOperand_ParseFail; - } + ElementKind = Name.lower()[Name.size() - 1]; + NumElements = 0; - if (Parser.getTok().getIntVal() >= NumLanes) { - Error(Parser.getTok().getLoc(), "lane number incompatible with layout"); - return MatchOperand_ParseFail; + if (Name.size() == 2) + return; + + // Parse the lane count + Name = Name.drop_front(); + while (isdigit(Name.front())) { + NumElements = 10 * NumElements + (Name.front() - '0'); + Name = Name.drop_front(); } +} - const MCExpr *Lane = MCConstantExpr::Create(Parser.getTok().getIntVal(), - getContext()); - SMLoc S = Parser.getTok().getLoc(); - Parser.Lex(); // Eat actual lane - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateImm(Lane, S, E)); +bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, + SMLoc &EndLoc) { + StartLoc = getLoc(); + RegNo = tryParseRegister(); + EndLoc = SMLoc::getFromPointer(getLoc().getPointer() - 1); + return (RegNo == (unsigned)-1); +} +/// tryParseRegister - Try to parse a register name. The token must be an +/// Identifier when called, and if it is a register name the token is eaten and +/// the register is added to the operand list. +int AArch64AsmParser::tryParseRegister() { + const AsmToken &Tok = Parser.getTok(); + assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + + std::string lowerCase = Tok.getString().lower(); + unsigned RegNum = MatchRegisterName(lowerCase); + // Also handle a few aliases of registers. + if (RegNum == 0) + RegNum = StringSwitch(lowerCase) + .Case("fp", AArch64::FP) + .Case("lr", AArch64::LR) + .Case("x31", AArch64::XZR) + .Case("w31", AArch64::WZR) + .Default(0); + + if (RegNum == 0) + return -1; + + Parser.Lex(); // Eat identifier token. + return RegNum; +} - if (Parser.getTok().isNot(AsmToken::RBrac)) { - Error(Parser.getTok().getLoc(), "expected ']' after lane"); - return MatchOperand_ParseFail; +/// tryMatchVectorRegister - Try to parse a vector register name with optional +/// kind specifier. If it is a register specifier, eat the token and return it. +int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) { + if (Parser.getTok().isNot(AsmToken::Identifier)) { + TokError("vector register expected"); + return -1; + } + + StringRef Name = Parser.getTok().getString(); + // If there is a kind specifier, it's separated from the register name by + // a '.'. + size_t Start = 0, Next = Name.find('.'); + StringRef Head = Name.slice(Start, Next); + unsigned RegNum = matchVectorRegName(Head); + if (RegNum) { + if (Next != StringRef::npos) { + Kind = Name.slice(Next, StringRef::npos); + if (!isValidVectorKind(Kind)) { + TokError("invalid vector kind qualifier"); + return -1; + } + } + Parser.Lex(); // Eat the register token. + return RegNum; } - Operands.push_back(AArch64Operand::CreateToken("]", Loc)); - Parser.Lex(); // Eat ']' - - return MatchOperand_Success; + if (expected) + TokError("vector register expected"); + return -1; } +/// tryParseSysCROperand - Try to parse a system instruction CR operand name. AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind) { - assert(getLexer().is(AsmToken::Colon) && "expected a ':'"); - Parser.Lex(); +AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) { + SMLoc S = getLoc(); - if (getLexer().isNot(AsmToken::Identifier)) { - Error(Parser.getTok().getLoc(), - "expected relocation specifier in operand after ':'"); + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Error(S, "Expected cN operand where 0 <= N <= 15"); return MatchOperand_ParseFail; } - std::string LowerCase = Parser.getTok().getIdentifier().lower(); - RefKind = StringSwitch(LowerCase) - .Case("got", AArch64MCExpr::VK_AARCH64_GOT) - .Case("got_lo12", AArch64MCExpr::VK_AARCH64_GOT_LO12) - .Case("lo12", AArch64MCExpr::VK_AARCH64_LO12) - .Case("abs_g0", AArch64MCExpr::VK_AARCH64_ABS_G0) - .Case("abs_g0_nc", AArch64MCExpr::VK_AARCH64_ABS_G0_NC) - .Case("abs_g1", AArch64MCExpr::VK_AARCH64_ABS_G1) - .Case("abs_g1_nc", AArch64MCExpr::VK_AARCH64_ABS_G1_NC) - .Case("abs_g2", AArch64MCExpr::VK_AARCH64_ABS_G2) - .Case("abs_g2_nc", AArch64MCExpr::VK_AARCH64_ABS_G2_NC) - .Case("abs_g3", AArch64MCExpr::VK_AARCH64_ABS_G3) - .Case("abs_g0_s", AArch64MCExpr::VK_AARCH64_SABS_G0) - .Case("abs_g1_s", AArch64MCExpr::VK_AARCH64_SABS_G1) - .Case("abs_g2_s", AArch64MCExpr::VK_AARCH64_SABS_G2) - .Case("dtprel_g2", AArch64MCExpr::VK_AARCH64_DTPREL_G2) - .Case("dtprel_g1", AArch64MCExpr::VK_AARCH64_DTPREL_G1) - .Case("dtprel_g1_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC) - .Case("dtprel_g0", AArch64MCExpr::VK_AARCH64_DTPREL_G0) - .Case("dtprel_g0_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC) - .Case("dtprel_hi12", AArch64MCExpr::VK_AARCH64_DTPREL_HI12) - .Case("dtprel_lo12", AArch64MCExpr::VK_AARCH64_DTPREL_LO12) - .Case("dtprel_lo12_nc", AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC) - .Case("gottprel_g1", AArch64MCExpr::VK_AARCH64_GOTTPREL_G1) - .Case("gottprel_g0_nc", AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC) - .Case("gottprel", AArch64MCExpr::VK_AARCH64_GOTTPREL) - .Case("gottprel_lo12", AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12) - .Case("tprel_g2", AArch64MCExpr::VK_AARCH64_TPREL_G2) - .Case("tprel_g1", AArch64MCExpr::VK_AARCH64_TPREL_G1) - .Case("tprel_g1_nc", AArch64MCExpr::VK_AARCH64_TPREL_G1_NC) - .Case("tprel_g0", AArch64MCExpr::VK_AARCH64_TPREL_G0) - .Case("tprel_g0_nc", AArch64MCExpr::VK_AARCH64_TPREL_G0_NC) - .Case("tprel_hi12", AArch64MCExpr::VK_AARCH64_TPREL_HI12) - .Case("tprel_lo12", AArch64MCExpr::VK_AARCH64_TPREL_LO12) - .Case("tprel_lo12_nc", AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC) - .Case("tlsdesc", AArch64MCExpr::VK_AARCH64_TLSDESC) - .Case("tlsdesc_lo12", AArch64MCExpr::VK_AARCH64_TLSDESC_LO12) - .Default(AArch64MCExpr::VK_AARCH64_None); - - if (RefKind == AArch64MCExpr::VK_AARCH64_None) { - Error(Parser.getTok().getLoc(), - "expected relocation specifier in operand after ':'"); + StringRef Tok = Parser.getTok().getIdentifier(); + if (Tok[0] != 'c' && Tok[0] != 'C') { + Error(S, "Expected cN operand where 0 <= N <= 15"); return MatchOperand_ParseFail; } - Parser.Lex(); // Eat identifier - if (getLexer().isNot(AsmToken::Colon)) { - Error(Parser.getTok().getLoc(), - "expected ':' after relocation specifier"); + uint32_t CRNum; + bool BadNum = Tok.drop_front().getAsInteger(10, CRNum); + if (BadNum || CRNum > 15) { + Error(S, "Expected cN operand where 0 <= N <= 15"); return MatchOperand_ParseFail; } - Parser.Lex(); + + Parser.Lex(); // Eat identifier token. + Operands.push_back( + AArch64Operand::CreateSysCR(CRNum, S, getLoc(), getContext())); return MatchOperand_Success; } +/// tryParsePrefetch - Try to parse a prefetch operand. AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseImmWithLSLOperand( - SmallVectorImpl &Operands) { +AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) { + SMLoc S = getLoc(); + const AsmToken &Tok = Parser.getTok(); + // Either an identifier for named values or a 5-bit immediate. + bool Hash = Tok.is(AsmToken::Hash); + if (Hash || Tok.is(AsmToken::Integer)) { + if (Hash) + Parser.Lex(); // Eat hash token. + const MCExpr *ImmVal; + if (getParser().parseExpression(ImmVal)) + return MatchOperand_ParseFail; - SMLoc S = Parser.getTok().getLoc(); + const MCConstantExpr *MCE = dyn_cast(ImmVal); + if (!MCE) { + TokError("immediate value expected for prefetch operand"); + return MatchOperand_ParseFail; + } + unsigned prfop = MCE->getValue(); + if (prfop > 31) { + TokError("prefetch operand out of range, [0,31] expected"); + return MatchOperand_ParseFail; + } - if (Parser.getTok().is(AsmToken::Hash)) - Parser.Lex(); // Eat '#' - else if (Parser.getTok().isNot(AsmToken::Integer)) - // Operand should start from # or should be integer, emit error otherwise. - return MatchOperand_NoMatch; + Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext())); + return MatchOperand_Success; + } - const MCExpr *Imm; - if (ParseImmediate(Imm) != MatchOperand_Success) + if (Tok.isNot(AsmToken::Identifier)) { + TokError("pre-fetch hint expected"); return MatchOperand_ParseFail; - else if (Parser.getTok().isNot(AsmToken::Comma)) { - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, 0, true, S, E)); - return MatchOperand_Success; } - // Eat ',' - Parser.Lex(); + bool Valid; + unsigned prfop = AArch64PRFM::PRFMMapper().fromString(Tok.getString(), Valid); + if (!Valid) { + TokError("pre-fetch hint expected"); + return MatchOperand_ParseFail; + } - // The optional operand must be "lsl #N" where N is non-negative. - if (Parser.getTok().is(AsmToken::Identifier) - && Parser.getTok().getIdentifier().equals_lower("lsl")) { - Parser.Lex(); + Parser.Lex(); // Eat identifier token. + Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext())); + return MatchOperand_Success; +} - if (Parser.getTok().is(AsmToken::Hash)) { - Parser.Lex(); +/// tryParseAdrpLabel - Parse and validate a source label for the ADRP +/// instruction. +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) { + SMLoc S = getLoc(); + const MCExpr *Expr; - if (Parser.getTok().isNot(AsmToken::Integer)) { - Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate"); - return MatchOperand_ParseFail; - } - } + if (Parser.getTok().is(AsmToken::Hash)) { + Parser.Lex(); // Eat hash token. } - int64_t ShiftAmount = Parser.getTok().getIntVal(); - - if (ShiftAmount < 0) { - Error(Parser.getTok().getLoc(), "positive shift amount required"); + if (parseSymbolicImmVal(Expr)) return MatchOperand_ParseFail; + + AArch64MCExpr::VariantKind ELFRefKind; + MCSymbolRefExpr::VariantKind DarwinRefKind; + int64_t Addend; + if (classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) { + if (DarwinRefKind == MCSymbolRefExpr::VK_None && + ELFRefKind == AArch64MCExpr::VK_INVALID) { + // No modifier was specified at all; this is the syntax for an ELF basic + // ADRP relocation (unfortunately). + Expr = + AArch64MCExpr::Create(Expr, AArch64MCExpr::VK_ABS_PAGE, getContext()); + } else if ((DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGE || + DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGE) && + Addend != 0) { + Error(S, "gotpage label reference not allowed an addend"); + return MatchOperand_ParseFail; + } else if (DarwinRefKind != MCSymbolRefExpr::VK_PAGE && + DarwinRefKind != MCSymbolRefExpr::VK_GOTPAGE && + DarwinRefKind != MCSymbolRefExpr::VK_TLVPPAGE && + ELFRefKind != AArch64MCExpr::VK_GOT_PAGE && + ELFRefKind != AArch64MCExpr::VK_GOTTPREL_PAGE && + ELFRefKind != AArch64MCExpr::VK_TLSDESC_PAGE) { + // The operand must be an @page or @gotpage qualified symbolref. + Error(S, "page or gotpage label reference expected"); + return MatchOperand_ParseFail; + } } - Parser.Lex(); // Eat the number - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, ShiftAmount, - false, S, E)); + // We have either a label reference possibly with addend or an immediate. The + // addend is a raw value here. The linker will adjust it to only reference the + // page. + SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); + Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext())); + return MatchOperand_Success; } - +/// tryParseAdrLabel - Parse and validate a source label for the ADR +/// instruction. AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseCondCodeOperand( - SmallVectorImpl &Operands) { - if (Parser.getTok().isNot(AsmToken::Identifier)) - return MatchOperand_NoMatch; +AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) { + SMLoc S = getLoc(); + const MCExpr *Expr; - StringRef Tok = Parser.getTok().getIdentifier(); - A64CC::CondCodes CondCode = A64StringToCondCode(Tok); + if (Parser.getTok().is(AsmToken::Hash)) { + Parser.Lex(); // Eat hash token. + } - if (CondCode == A64CC::Invalid) - return MatchOperand_NoMatch; + if (getParser().parseExpression(Expr)) + return MatchOperand_ParseFail; - SMLoc S = Parser.getTok().getLoc(); - Parser.Lex(); // Eat condition code - SMLoc E = Parser.getTok().getLoc(); + SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); + Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext())); - Operands.push_back(AArch64Operand::CreateCondCode(CondCode, S, E)); return MatchOperand_Success; } +/// tryParseFPImm - A floating point immediate expression operand. AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseCRxOperand( - SmallVectorImpl &Operands) { - SMLoc S = Parser.getTok().getLoc(); - if (Parser.getTok().isNot(AsmToken::Identifier)) { - Error(S, "Expected cN operand where 0 <= N <= 15"); - return MatchOperand_ParseFail; - } +AArch64AsmParser::tryParseFPImm(OperandVector &Operands) { + SMLoc S = getLoc(); - StringRef Tok = Parser.getTok().getIdentifier(); - if (Tok[0] != 'c' && Tok[0] != 'C') { - Error(S, "Expected cN operand where 0 <= N <= 15"); - return MatchOperand_ParseFail; + bool Hash = false; + if (Parser.getTok().is(AsmToken::Hash)) { + Parser.Lex(); // Eat '#' + Hash = true; } - uint32_t CRNum; - bool BadNum = Tok.drop_front().getAsInteger(10, CRNum); - if (BadNum || CRNum > 15) { - Error(S, "Expected cN operand where 0 <= N <= 15"); - return MatchOperand_ParseFail; + // Handle negation, as that still comes through as a separate token. + bool isNegative = false; + if (Parser.getTok().is(AsmToken::Minus)) { + isNegative = true; + Parser.Lex(); + } + const AsmToken &Tok = Parser.getTok(); + if (Tok.is(AsmToken::Real)) { + APFloat RealVal(APFloat::IEEEdouble, Tok.getString()); + uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); + // If we had a '-' in front, toggle the sign bit. + IntVal ^= (uint64_t)isNegative << 63; + int Val = AArch64_AM::getFP64Imm(APInt(64, IntVal)); + Parser.Lex(); // Eat the token. + // Check for out of range values. As an exception, we let Zero through, + // as we handle that special case in post-processing before matching in + // order to use the zero register for it. + if (Val == -1 && !RealVal.isZero()) { + TokError("expected compatible register or floating-point constant"); + return MatchOperand_ParseFail; + } + Operands.push_back(AArch64Operand::CreateFPImm(Val, S, getContext())); + return MatchOperand_Success; + } + if (Tok.is(AsmToken::Integer)) { + int64_t Val; + if (!isNegative && Tok.getString().startswith("0x")) { + Val = Tok.getIntVal(); + if (Val > 255 || Val < 0) { + TokError("encoded floating point value out of range"); + return MatchOperand_ParseFail; + } + } else { + APFloat RealVal(APFloat::IEEEdouble, Tok.getString()); + uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); + // If we had a '-' in front, toggle the sign bit. + IntVal ^= (uint64_t)isNegative << 63; + Val = AArch64_AM::getFP64Imm(APInt(64, IntVal)); + } + Parser.Lex(); // Eat the token. + Operands.push_back(AArch64Operand::CreateFPImm(Val, S, getContext())); + return MatchOperand_Success; } - const MCExpr *CRImm = MCConstantExpr::Create(CRNum, getContext()); - - Parser.Lex(); - SMLoc E = Parser.getTok().getLoc(); + if (!Hash) + return MatchOperand_NoMatch; - Operands.push_back(AArch64Operand::CreateImm(CRImm, S, E)); - return MatchOperand_Success; + TokError("invalid floating point immediate"); + return MatchOperand_ParseFail; } +/// tryParseAddSubImm - Parse ADD/SUB shifted immediate operand AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseFPImmOperand( - SmallVectorImpl &Operands) { +AArch64AsmParser::tryParseAddSubImm(OperandVector &Operands) { + SMLoc S = getLoc(); + + if (Parser.getTok().is(AsmToken::Hash)) + Parser.Lex(); // Eat '#' + else if (Parser.getTok().isNot(AsmToken::Integer)) + // Operand should start from # or should be integer, emit error otherwise. + return MatchOperand_NoMatch; + + const MCExpr *Imm; + if (parseSymbolicImmVal(Imm)) + return MatchOperand_ParseFail; + else if (Parser.getTok().isNot(AsmToken::Comma)) { + uint64_t ShiftAmount = 0; + const MCConstantExpr *MCE = dyn_cast(Imm); + if (MCE) { + int64_t Val = MCE->getValue(); + if (Val > 0xfff && (Val & 0xfff) == 0) { + Imm = MCConstantExpr::Create(Val >> 12, getContext()); + ShiftAmount = 12; + } + } + SMLoc E = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateShiftedImm(Imm, ShiftAmount, S, E, + getContext())); + return MatchOperand_Success; + } + + // Eat ',' + Parser.Lex(); + + // The optional operand must be "lsl #N" where N is non-negative. + if (!Parser.getTok().is(AsmToken::Identifier) || + !Parser.getTok().getIdentifier().equals_lower("lsl")) { + Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate"); + return MatchOperand_ParseFail; + } - SMLoc S = Parser.getTok().getLoc(); + // Eat 'lsl' + Parser.Lex(); - bool Hash = false; if (Parser.getTok().is(AsmToken::Hash)) { - Parser.Lex(); // Eat '#' - Hash = true; + Parser.Lex(); } - bool Negative = false; - if (Parser.getTok().is(AsmToken::Minus)) { - Negative = true; - Parser.Lex(); // Eat '-' - } else if (Parser.getTok().is(AsmToken::Plus)) { - Parser.Lex(); // Eat '+' + if (Parser.getTok().isNot(AsmToken::Integer)) { + Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate"); + return MatchOperand_ParseFail; } - if (Parser.getTok().isNot(AsmToken::Real)) { - if (!Hash) - return MatchOperand_NoMatch; - Error(S, "Expected floating-point immediate"); + int64_t ShiftAmount = Parser.getTok().getIntVal(); + + if (ShiftAmount < 0) { + Error(Parser.getTok().getLoc(), "positive shift amount required"); return MatchOperand_ParseFail; } + Parser.Lex(); // Eat the number - APFloat RealVal(APFloat::IEEEdouble, Parser.getTok().getString()); - if (Negative) RealVal.changeSign(); - double DblVal = RealVal.convertToDouble(); - - Parser.Lex(); // Eat real number SMLoc E = Parser.getTok().getLoc(); - - Operands.push_back(AArch64Operand::CreateFPImm(DblVal, S, E)); + Operands.push_back(AArch64Operand::CreateShiftedImm(Imm, ShiftAmount, + S, E, getContext())); return MatchOperand_Success; } -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseFPImm0AndImm0Operand( - SmallVectorImpl &Operands) { +/// parseCondCodeString - Parse a Condition Code string. +AArch64CC::CondCode AArch64AsmParser::parseCondCodeString(StringRef Cond) { + AArch64CC::CondCode CC = StringSwitch(Cond.lower()) + .Case("eq", AArch64CC::EQ) + .Case("ne", AArch64CC::NE) + .Case("cs", AArch64CC::HS) + .Case("hs", AArch64CC::HS) + .Case("cc", AArch64CC::LO) + .Case("lo", AArch64CC::LO) + .Case("mi", AArch64CC::MI) + .Case("pl", AArch64CC::PL) + .Case("vs", AArch64CC::VS) + .Case("vc", AArch64CC::VC) + .Case("hi", AArch64CC::HI) + .Case("ls", AArch64CC::LS) + .Case("ge", AArch64CC::GE) + .Case("lt", AArch64CC::LT) + .Case("gt", AArch64CC::GT) + .Case("le", AArch64CC::LE) + .Case("al", AArch64CC::AL) + .Case("nv", AArch64CC::NV) + .Default(AArch64CC::Invalid); + return CC; +} - SMLoc S = Parser.getTok().getLoc(); +/// parseCondCode - Parse a Condition Code operand. +bool AArch64AsmParser::parseCondCode(OperandVector &Operands, + bool invertCondCode) { + SMLoc S = getLoc(); + const AsmToken &Tok = Parser.getTok(); + assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); - bool Hash = false; - if (Parser.getTok().is(AsmToken::Hash)) { - Parser.Lex(); // Eat '#' - Hash = true; - } + StringRef Cond = Tok.getString(); + AArch64CC::CondCode CC = parseCondCodeString(Cond); + if (CC == AArch64CC::Invalid) + return TokError("invalid condition code"); + Parser.Lex(); // Eat identifier token. - APFloat RealVal(0.0); - if (Parser.getTok().is(AsmToken::Real)) { - if(Parser.getTok().getString() != "0.0") { - Error(S, "only #0.0 is acceptable as immediate"); - return MatchOperand_ParseFail; - } - } - else if (Parser.getTok().is(AsmToken::Integer)) { - if(Parser.getTok().getIntVal() != 0) { - Error(S, "only #0.0 is acceptable as immediate"); + if (invertCondCode) + CC = AArch64CC::getInvertedCondCode(AArch64CC::CondCode(CC)); + + Operands.push_back( + AArch64Operand::CreateCondCode(CC, S, getLoc(), getContext())); + return false; +} + +/// tryParseOptionalShift - Some operands take an optional shift argument. Parse +/// them if present. +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) { + const AsmToken &Tok = Parser.getTok(); + std::string LowerID = Tok.getString().lower(); + AArch64_AM::ShiftExtendType ShOp = + StringSwitch(LowerID) + .Case("lsl", AArch64_AM::LSL) + .Case("lsr", AArch64_AM::LSR) + .Case("asr", AArch64_AM::ASR) + .Case("ror", AArch64_AM::ROR) + .Case("msl", AArch64_AM::MSL) + .Case("uxtb", AArch64_AM::UXTB) + .Case("uxth", AArch64_AM::UXTH) + .Case("uxtw", AArch64_AM::UXTW) + .Case("uxtx", AArch64_AM::UXTX) + .Case("sxtb", AArch64_AM::SXTB) + .Case("sxth", AArch64_AM::SXTH) + .Case("sxtw", AArch64_AM::SXTW) + .Case("sxtx", AArch64_AM::SXTX) + .Default(AArch64_AM::InvalidShiftExtend); + + if (ShOp == AArch64_AM::InvalidShiftExtend) + return MatchOperand_NoMatch; + + SMLoc S = Tok.getLoc(); + Parser.Lex(); + + bool Hash = getLexer().is(AsmToken::Hash); + if (!Hash && getLexer().isNot(AsmToken::Integer)) { + if (ShOp == AArch64_AM::LSL || ShOp == AArch64_AM::LSR || + ShOp == AArch64_AM::ASR || ShOp == AArch64_AM::ROR || + ShOp == AArch64_AM::MSL) { + // We expect a number here. + TokError("expected #imm after shift specifier"); return MatchOperand_ParseFail; } + + // "extend" type operatoins don't need an immediate, #0 is implicit. + SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); + Operands.push_back( + AArch64Operand::CreateShiftExtend(ShOp, 0, false, S, E, getContext())); + return MatchOperand_Success; } - else { - if (!Hash) - return MatchOperand_NoMatch; - Error(S, "only #0.0 is acceptable as immediate"); + + if (Hash) + Parser.Lex(); // Eat the '#'. + + // Make sure we do actually have a number + if (!Parser.getTok().is(AsmToken::Integer)) { + Error(Parser.getTok().getLoc(), + "expected integer shift amount"); return MatchOperand_ParseFail; } - Parser.Lex(); // Eat real number - SMLoc E = Parser.getTok().getLoc(); + const MCExpr *ImmVal; + if (getParser().parseExpression(ImmVal)) + return MatchOperand_ParseFail; + + const MCConstantExpr *MCE = dyn_cast(ImmVal); + if (!MCE) { + TokError("expected #imm after shift specifier"); + return MatchOperand_ParseFail; + } - Operands.push_back(AArch64Operand::CreateFPImm(0.0, S, E)); + SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); + Operands.push_back(AArch64Operand::CreateShiftExtend( + ShOp, MCE->getValue(), true, S, E, getContext())); return MatchOperand_Success; } -// Automatically generated -static unsigned MatchRegisterName(StringRef Name); - -bool -AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, - StringRef &Layout, - SMLoc &LayoutLoc) const { - const AsmToken &Tok = Parser.getTok(); - - if (Tok.isNot(AsmToken::Identifier)) - return false; +/// parseSysAlias - The IC, DC, AT, and TLBI instructions are simple aliases for +/// the SYS instruction. Parse them specially so that we create a SYS MCInst. +bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, + OperandVector &Operands) { + if (Name.find('.') != StringRef::npos) + return TokError("invalid operand"); - std::string LowerReg = Tok.getString().lower(); - size_t DotPos = LowerReg.find('.'); + Mnemonic = Name; + Operands.push_back( + AArch64Operand::CreateToken("sys", false, NameLoc, getContext())); - bool IsVec128 = false; + const AsmToken &Tok = Parser.getTok(); + StringRef Op = Tok.getString(); SMLoc S = Tok.getLoc(); - RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos); - if (DotPos == std::string::npos) { - Layout = StringRef(); - } else { - // Everything afterwards needs to be a literal token, expected to be - // '.2d','.b' etc for vector registers. - - // This StringSwitch validates the input and (perhaps more importantly) - // gives us a permanent string to use in the token (a pointer into LowerReg - // would go out of scope when we return). - LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1); - StringRef LayoutText = StringRef(LowerReg).substr(DotPos); - - // See if it's a 128-bit layout first. - Layout = StringSwitch(LayoutText) - .Case(".q", ".q").Case(".1q", ".1q") - .Case(".d", ".d").Case(".2d", ".2d") - .Case(".s", ".s").Case(".4s", ".4s") - .Case(".h", ".h").Case(".8h", ".8h") - .Case(".b", ".b").Case(".16b", ".16b") - .Default(""); - - if (Layout.size() != 0) - IsVec128 = true; - else { - Layout = StringSwitch(LayoutText) - .Case(".1d", ".1d") - .Case(".2s", ".2s") - .Case(".4h", ".4h") - .Case(".8b", ".8b") - .Default(""); + const MCExpr *Expr = nullptr; + +#define SYS_ALIAS(op1, Cn, Cm, op2) \ + do { \ + Expr = MCConstantExpr::Create(op1, getContext()); \ + Operands.push_back( \ + AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \ + Operands.push_back( \ + AArch64Operand::CreateSysCR(Cn, S, getLoc(), getContext())); \ + Operands.push_back( \ + AArch64Operand::CreateSysCR(Cm, S, getLoc(), getContext())); \ + Expr = MCConstantExpr::Create(op2, getContext()); \ + Operands.push_back( \ + AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \ + } while (0) + + if (Mnemonic == "ic") { + if (!Op.compare_lower("ialluis")) { + // SYS #0, C7, C1, #0 + SYS_ALIAS(0, 7, 1, 0); + } else if (!Op.compare_lower("iallu")) { + // SYS #0, C7, C5, #0 + SYS_ALIAS(0, 7, 5, 0); + } else if (!Op.compare_lower("ivau")) { + // SYS #3, C7, C5, #1 + SYS_ALIAS(3, 7, 5, 1); + } else { + return TokError("invalid operand for IC instruction"); } - - if (Layout.size() == 0) { - // If we've still not pinned it down the register is malformed. - return false; + } else if (Mnemonic == "dc") { + if (!Op.compare_lower("zva")) { + // SYS #3, C7, C4, #1 + SYS_ALIAS(3, 7, 4, 1); + } else if (!Op.compare_lower("ivac")) { + // SYS #3, C7, C6, #1 + SYS_ALIAS(0, 7, 6, 1); + } else if (!Op.compare_lower("isw")) { + // SYS #0, C7, C6, #2 + SYS_ALIAS(0, 7, 6, 2); + } else if (!Op.compare_lower("cvac")) { + // SYS #3, C7, C10, #1 + SYS_ALIAS(3, 7, 10, 1); + } else if (!Op.compare_lower("csw")) { + // SYS #0, C7, C10, #2 + SYS_ALIAS(0, 7, 10, 2); + } else if (!Op.compare_lower("cvau")) { + // SYS #3, C7, C11, #1 + SYS_ALIAS(3, 7, 11, 1); + } else if (!Op.compare_lower("civac")) { + // SYS #3, C7, C14, #1 + SYS_ALIAS(3, 7, 14, 1); + } else if (!Op.compare_lower("cisw")) { + // SYS #0, C7, C14, #2 + SYS_ALIAS(0, 7, 14, 2); + } else { + return TokError("invalid operand for DC instruction"); + } + } else if (Mnemonic == "at") { + if (!Op.compare_lower("s1e1r")) { + // SYS #0, C7, C8, #0 + SYS_ALIAS(0, 7, 8, 0); + } else if (!Op.compare_lower("s1e2r")) { + // SYS #4, C7, C8, #0 + SYS_ALIAS(4, 7, 8, 0); + } else if (!Op.compare_lower("s1e3r")) { + // SYS #6, C7, C8, #0 + SYS_ALIAS(6, 7, 8, 0); + } else if (!Op.compare_lower("s1e1w")) { + // SYS #0, C7, C8, #1 + SYS_ALIAS(0, 7, 8, 1); + } else if (!Op.compare_lower("s1e2w")) { + // SYS #4, C7, C8, #1 + SYS_ALIAS(4, 7, 8, 1); + } else if (!Op.compare_lower("s1e3w")) { + // SYS #6, C7, C8, #1 + SYS_ALIAS(6, 7, 8, 1); + } else if (!Op.compare_lower("s1e0r")) { + // SYS #0, C7, C8, #3 + SYS_ALIAS(0, 7, 8, 2); + } else if (!Op.compare_lower("s1e0w")) { + // SYS #0, C7, C8, #3 + SYS_ALIAS(0, 7, 8, 3); + } else if (!Op.compare_lower("s12e1r")) { + // SYS #4, C7, C8, #4 + SYS_ALIAS(4, 7, 8, 4); + } else if (!Op.compare_lower("s12e1w")) { + // SYS #4, C7, C8, #5 + SYS_ALIAS(4, 7, 8, 5); + } else if (!Op.compare_lower("s12e0r")) { + // SYS #4, C7, C8, #6 + SYS_ALIAS(4, 7, 8, 6); + } else if (!Op.compare_lower("s12e0w")) { + // SYS #4, C7, C8, #7 + SYS_ALIAS(4, 7, 8, 7); + } else { + return TokError("invalid operand for AT instruction"); + } + } else if (Mnemonic == "tlbi") { + if (!Op.compare_lower("vmalle1is")) { + // SYS #0, C8, C3, #0 + SYS_ALIAS(0, 8, 3, 0); + } else if (!Op.compare_lower("alle2is")) { + // SYS #4, C8, C3, #0 + SYS_ALIAS(4, 8, 3, 0); + } else if (!Op.compare_lower("alle3is")) { + // SYS #6, C8, C3, #0 + SYS_ALIAS(6, 8, 3, 0); + } else if (!Op.compare_lower("vae1is")) { + // SYS #0, C8, C3, #1 + SYS_ALIAS(0, 8, 3, 1); + } else if (!Op.compare_lower("vae2is")) { + // SYS #4, C8, C3, #1 + SYS_ALIAS(4, 8, 3, 1); + } else if (!Op.compare_lower("vae3is")) { + // SYS #6, C8, C3, #1 + SYS_ALIAS(6, 8, 3, 1); + } else if (!Op.compare_lower("aside1is")) { + // SYS #0, C8, C3, #2 + SYS_ALIAS(0, 8, 3, 2); + } else if (!Op.compare_lower("vaae1is")) { + // SYS #0, C8, C3, #3 + SYS_ALIAS(0, 8, 3, 3); + } else if (!Op.compare_lower("alle1is")) { + // SYS #4, C8, C3, #4 + SYS_ALIAS(4, 8, 3, 4); + } else if (!Op.compare_lower("vale1is")) { + // SYS #0, C8, C3, #5 + SYS_ALIAS(0, 8, 3, 5); + } else if (!Op.compare_lower("vaale1is")) { + // SYS #0, C8, C3, #7 + SYS_ALIAS(0, 8, 3, 7); + } else if (!Op.compare_lower("vmalle1")) { + // SYS #0, C8, C7, #0 + SYS_ALIAS(0, 8, 7, 0); + } else if (!Op.compare_lower("alle2")) { + // SYS #4, C8, C7, #0 + SYS_ALIAS(4, 8, 7, 0); + } else if (!Op.compare_lower("vale2is")) { + // SYS #4, C8, C3, #5 + SYS_ALIAS(4, 8, 3, 5); + } else if (!Op.compare_lower("vale3is")) { + // SYS #6, C8, C3, #5 + SYS_ALIAS(6, 8, 3, 5); + } else if (!Op.compare_lower("alle3")) { + // SYS #6, C8, C7, #0 + SYS_ALIAS(6, 8, 7, 0); + } else if (!Op.compare_lower("vae1")) { + // SYS #0, C8, C7, #1 + SYS_ALIAS(0, 8, 7, 1); + } else if (!Op.compare_lower("vae2")) { + // SYS #4, C8, C7, #1 + SYS_ALIAS(4, 8, 7, 1); + } else if (!Op.compare_lower("vae3")) { + // SYS #6, C8, C7, #1 + SYS_ALIAS(6, 8, 7, 1); + } else if (!Op.compare_lower("aside1")) { + // SYS #0, C8, C7, #2 + SYS_ALIAS(0, 8, 7, 2); + } else if (!Op.compare_lower("vaae1")) { + // SYS #0, C8, C7, #3 + SYS_ALIAS(0, 8, 7, 3); + } else if (!Op.compare_lower("alle1")) { + // SYS #4, C8, C7, #4 + SYS_ALIAS(4, 8, 7, 4); + } else if (!Op.compare_lower("vale1")) { + // SYS #0, C8, C7, #5 + SYS_ALIAS(0, 8, 7, 5); + } else if (!Op.compare_lower("vale2")) { + // SYS #4, C8, C7, #5 + SYS_ALIAS(4, 8, 7, 5); + } else if (!Op.compare_lower("vale3")) { + // SYS #6, C8, C7, #5 + SYS_ALIAS(6, 8, 7, 5); + } else if (!Op.compare_lower("vaale1")) { + // SYS #0, C8, C7, #7 + SYS_ALIAS(0, 8, 7, 7); + } else if (!Op.compare_lower("ipas2e1")) { + // SYS #4, C8, C4, #1 + SYS_ALIAS(4, 8, 4, 1); + } else if (!Op.compare_lower("ipas2le1")) { + // SYS #4, C8, C4, #5 + SYS_ALIAS(4, 8, 4, 5); + } else if (!Op.compare_lower("ipas2e1is")) { + // SYS #4, C8, C4, #1 + SYS_ALIAS(4, 8, 0, 1); + } else if (!Op.compare_lower("ipas2le1is")) { + // SYS #4, C8, C4, #5 + SYS_ALIAS(4, 8, 0, 5); + } else if (!Op.compare_lower("vmalls12e1")) { + // SYS #4, C8, C7, #6 + SYS_ALIAS(4, 8, 7, 6); + } else if (!Op.compare_lower("vmalls12e1is")) { + // SYS #4, C8, C3, #6 + SYS_ALIAS(4, 8, 3, 6); + } else { + return TokError("invalid operand for TLBI instruction"); } } - RegNum = MatchRegisterName(LowerReg.substr(0, DotPos)); - if (RegNum == AArch64::NoRegister) { - RegNum = StringSwitch(LowerReg.substr(0, DotPos)) - .Case("ip0", AArch64::X16) - .Case("ip1", AArch64::X17) - .Case("fp", AArch64::X29) - .Case("lr", AArch64::X30) - .Case("v0", IsVec128 ? AArch64::Q0 : AArch64::D0) - .Case("v1", IsVec128 ? AArch64::Q1 : AArch64::D1) - .Case("v2", IsVec128 ? AArch64::Q2 : AArch64::D2) - .Case("v3", IsVec128 ? AArch64::Q3 : AArch64::D3) - .Case("v4", IsVec128 ? AArch64::Q4 : AArch64::D4) - .Case("v5", IsVec128 ? AArch64::Q5 : AArch64::D5) - .Case("v6", IsVec128 ? AArch64::Q6 : AArch64::D6) - .Case("v7", IsVec128 ? AArch64::Q7 : AArch64::D7) - .Case("v8", IsVec128 ? AArch64::Q8 : AArch64::D8) - .Case("v9", IsVec128 ? AArch64::Q9 : AArch64::D9) - .Case("v10", IsVec128 ? AArch64::Q10 : AArch64::D10) - .Case("v11", IsVec128 ? AArch64::Q11 : AArch64::D11) - .Case("v12", IsVec128 ? AArch64::Q12 : AArch64::D12) - .Case("v13", IsVec128 ? AArch64::Q13 : AArch64::D13) - .Case("v14", IsVec128 ? AArch64::Q14 : AArch64::D14) - .Case("v15", IsVec128 ? AArch64::Q15 : AArch64::D15) - .Case("v16", IsVec128 ? AArch64::Q16 : AArch64::D16) - .Case("v17", IsVec128 ? AArch64::Q17 : AArch64::D17) - .Case("v18", IsVec128 ? AArch64::Q18 : AArch64::D18) - .Case("v19", IsVec128 ? AArch64::Q19 : AArch64::D19) - .Case("v20", IsVec128 ? AArch64::Q20 : AArch64::D20) - .Case("v21", IsVec128 ? AArch64::Q21 : AArch64::D21) - .Case("v22", IsVec128 ? AArch64::Q22 : AArch64::D22) - .Case("v23", IsVec128 ? AArch64::Q23 : AArch64::D23) - .Case("v24", IsVec128 ? AArch64::Q24 : AArch64::D24) - .Case("v25", IsVec128 ? AArch64::Q25 : AArch64::D25) - .Case("v26", IsVec128 ? AArch64::Q26 : AArch64::D26) - .Case("v27", IsVec128 ? AArch64::Q27 : AArch64::D27) - .Case("v28", IsVec128 ? AArch64::Q28 : AArch64::D28) - .Case("v29", IsVec128 ? AArch64::Q29 : AArch64::D29) - .Case("v30", IsVec128 ? AArch64::Q30 : AArch64::D30) - .Case("v31", IsVec128 ? AArch64::Q31 : AArch64::D31) - .Default(AArch64::NoRegister); - } - if (RegNum == AArch64::NoRegister) - return false; +#undef SYS_ALIAS - return true; -} + Parser.Lex(); // Eat operand. -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseRegister(SmallVectorImpl &Operands, - uint32_t &NumLanes) { - unsigned RegNum; - StringRef Layout; - SMLoc RegEndLoc, LayoutLoc; - SMLoc S = Parser.getTok().getLoc(); - - if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)) - return MatchOperand_NoMatch; + bool ExpectRegister = (Op.lower().find("all") == StringRef::npos); + bool HasRegister = false; - Operands.push_back(AArch64Operand::CreateReg(RegNum, S, RegEndLoc)); + // Check for the optional register operand. + if (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); // Eat comma. - if (Layout.size() != 0) { - unsigned long long TmpLanes = 0; - llvm::getAsUnsignedInteger(Layout.substr(1), 10, TmpLanes); - if (TmpLanes != 0) { - NumLanes = TmpLanes; - } else { - // If the number of lanes isn't specified explicitly, a valid instruction - // will have an element specifier and be capable of acting on the entire - // vector register. - switch (Layout.back()) { - default: llvm_unreachable("Invalid layout specifier"); - case 'b': NumLanes = 16; break; - case 'h': NumLanes = 8; break; - case 's': NumLanes = 4; break; - case 'd': NumLanes = 2; break; - case 'q': NumLanes = 1; break; - } - } + if (Tok.isNot(AsmToken::Identifier) || parseRegister(Operands)) + return TokError("expected register operand"); - Operands.push_back(AArch64Operand::CreateToken(Layout, LayoutLoc)); + HasRegister = true; } - Parser.Lex(); - return MatchOperand_Success; -} - -bool -AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, - SMLoc &EndLoc) { - // This callback is used for things like DWARF frame directives in - // assembly. They don't care about things like NEON layouts or lanes, they - // just want to be able to produce the DWARF register number. - StringRef LayoutSpec; - SMLoc RegEndLoc, LayoutLoc; - StartLoc = Parser.getTok().getLoc(); - - if (!IdentifyRegister(RegNo, RegEndLoc, LayoutSpec, LayoutLoc)) - return true; + if (getLexer().isNot(AsmToken::EndOfStatement)) { + Parser.eatToEndOfStatement(); + return TokError("unexpected token in argument list"); + } - Parser.Lex(); - EndLoc = Parser.getTok().getLoc(); + if (ExpectRegister && !HasRegister) { + return TokError("specified " + Mnemonic + " op requires a register"); + } + else if (!ExpectRegister && HasRegister) { + return TokError("specified " + Mnemonic + " op does not use a register"); + } + Parser.Lex(); // Consume the EndOfStatement return false; } AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseNamedImmOperand(const NamedImmMapper &Mapper, - SmallVectorImpl &Operands) { - // Since these operands occur in very limited circumstances, without - // alternatives, we actually signal an error if there is no match. If relaxing - // this, beware of unintended consequences: an immediate will be accepted - // during matching, no matter how it gets into the AArch64Operand. +AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); - SMLoc S = Tok.getLoc(); - if (Tok.is(AsmToken::Identifier)) { - bool ValidName; - uint32_t Code = Mapper.fromString(Tok.getString().lower(), ValidName); - - if (!ValidName) { - Error(S, "operand specifier not recognised"); + // Can be either a #imm style literal or an option name + bool Hash = Tok.is(AsmToken::Hash); + if (Hash || Tok.is(AsmToken::Integer)) { + // Immediate operand. + if (Hash) + Parser.Lex(); // Eat the '#' + const MCExpr *ImmVal; + SMLoc ExprLoc = getLoc(); + if (getParser().parseExpression(ImmVal)) + return MatchOperand_ParseFail; + const MCConstantExpr *MCE = dyn_cast(ImmVal); + if (!MCE) { + Error(ExprLoc, "immediate value expected for barrier operand"); return MatchOperand_ParseFail; } - - Parser.Lex(); // We're done with the identifier. Eat it - - SMLoc E = Parser.getTok().getLoc(); - const MCExpr *Imm = MCConstantExpr::Create(Code, getContext()); - Operands.push_back(AArch64Operand::CreateImm(Imm, S, E)); + if (MCE->getValue() < 0 || MCE->getValue() > 15) { + Error(ExprLoc, "barrier operand out of range"); + return MatchOperand_ParseFail; + } + Operands.push_back( + AArch64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext())); return MatchOperand_Success; - } else if (Tok.is(AsmToken::Hash)) { - Parser.Lex(); + } - const MCExpr *ImmVal; - if (ParseImmediate(ImmVal) != MatchOperand_Success) - return MatchOperand_ParseFail; + if (Tok.isNot(AsmToken::Identifier)) { + TokError("invalid operand for instruction"); + return MatchOperand_ParseFail; + } - const MCConstantExpr *CE = dyn_cast(ImmVal); - if (!CE || CE->getValue() < 0 || !Mapper.validImm(CE->getValue())) { - Error(S, "Invalid immediate for instruction"); - return MatchOperand_ParseFail; - } + bool Valid; + unsigned Opt = AArch64DB::DBarrierMapper().fromString(Tok.getString(), Valid); + if (!Valid) { + TokError("invalid barrier option name"); + return MatchOperand_ParseFail; + } - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E)); - return MatchOperand_Success; + // The only valid named option for ISB is 'sy' + if (Mnemonic == "isb" && Opt != AArch64DB::SY) { + TokError("'sy' or #imm operand expected"); + return MatchOperand_ParseFail; } - Error(S, "unexpected operand for instruction"); - return MatchOperand_ParseFail; + Operands.push_back( + AArch64Operand::CreateBarrier(Opt, getLoc(), getContext())); + Parser.Lex(); // Consume the option + + return MatchOperand_Success; } AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseSysRegOperand( - SmallVectorImpl &Operands) { +AArch64AsmParser::tryParseSysReg(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); - // Any MSR/MRS operand will be an identifier, and we want to store it as some - // kind of string: SPSel is valid for two different forms of MSR with two - // different encodings. There's no collision at the moment, but the potential - // is there. - if (!Tok.is(AsmToken::Identifier)) { + if (Tok.isNot(AsmToken::Identifier)) return MatchOperand_NoMatch; - } - SMLoc S = Tok.getLoc(); - Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), S)); + Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), getLoc(), + STI.getFeatureBits(), getContext())); Parser.Lex(); // Eat identifier return MatchOperand_Success; } -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseLSXAddressOperand( - SmallVectorImpl &Operands) { - SMLoc S = Parser.getTok().getLoc(); - - unsigned RegNum; - SMLoc RegEndLoc, LayoutLoc; - StringRef Layout; - if(!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc) - || !AArch64MCRegisterClasses[AArch64::GPR64xspRegClassID].contains(RegNum) - || Layout.size() != 0) { - // Check Layout.size because we don't want to let "x3.4s" or similar - // through. - return MatchOperand_NoMatch; - } - Parser.Lex(); // Eat register +/// tryParseVectorRegister - Parse a vector register operand. +bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) { + if (Parser.getTok().isNot(AsmToken::Identifier)) + return true; - if (Parser.getTok().is(AsmToken::RBrac)) { - // We're done - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E)); - return MatchOperand_Success; - } + SMLoc S = getLoc(); + // Check for a vector register specifier first. + StringRef Kind; + int64_t Reg = tryMatchVectorRegister(Kind, false); + if (Reg == -1) + return true; + Operands.push_back( + AArch64Operand::CreateReg(Reg, true, S, getLoc(), getContext())); + // If there was an explicit qualifier, that goes on as a literal text + // operand. + if (!Kind.empty()) + Operands.push_back( + AArch64Operand::CreateToken(Kind, false, S, getContext())); + + // If there is an index specifier following the register, parse that too. + if (Parser.getTok().is(AsmToken::LBrac)) { + SMLoc SIdx = getLoc(); + Parser.Lex(); // Eat left bracket token. - // Otherwise, only ", #0" is valid + const MCExpr *ImmVal; + if (getParser().parseExpression(ImmVal)) + return false; + const MCConstantExpr *MCE = dyn_cast(ImmVal); + if (!MCE) { + TokError("immediate value expected for vector index"); + return false; + } - if (Parser.getTok().isNot(AsmToken::Comma)) { - Error(Parser.getTok().getLoc(), "expected ',' or ']' after register"); - return MatchOperand_ParseFail; - } - Parser.Lex(); // Eat ',' + SMLoc E = getLoc(); + if (Parser.getTok().isNot(AsmToken::RBrac)) { + Error(E, "']' expected"); + return false; + } - if (Parser.getTok().isNot(AsmToken::Hash)) { - Error(Parser.getTok().getLoc(), "expected '#0'"); - return MatchOperand_ParseFail; - } - Parser.Lex(); // Eat '#' + Parser.Lex(); // Eat right bracket token. - if (Parser.getTok().isNot(AsmToken::Integer) - || Parser.getTok().getIntVal() != 0 ) { - Error(Parser.getTok().getLoc(), "expected '#0'"); - return MatchOperand_ParseFail; + Operands.push_back(AArch64Operand::CreateVectorIndex(MCE->getValue(), SIdx, + E, getContext())); } - Parser.Lex(); // Eat '0' - SMLoc E = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E)); - return MatchOperand_Success; + return false; } -AArch64AsmParser::OperandMatchResultTy -AArch64AsmParser::ParseShiftExtend( - SmallVectorImpl &Operands) { - StringRef IDVal = Parser.getTok().getIdentifier(); - std::string LowerID = IDVal.lower(); - - A64SE::ShiftExtSpecifiers Spec = - StringSwitch(LowerID) - .Case("lsl", A64SE::LSL) - .Case("msl", A64SE::MSL) - .Case("lsr", A64SE::LSR) - .Case("asr", A64SE::ASR) - .Case("ror", A64SE::ROR) - .Case("uxtb", A64SE::UXTB) - .Case("uxth", A64SE::UXTH) - .Case("uxtw", A64SE::UXTW) - .Case("uxtx", A64SE::UXTX) - .Case("sxtb", A64SE::SXTB) - .Case("sxth", A64SE::SXTH) - .Case("sxtw", A64SE::SXTW) - .Case("sxtx", A64SE::SXTX) - .Default(A64SE::Invalid); - - if (Spec == A64SE::Invalid) - return MatchOperand_NoMatch; +/// parseRegister - Parse a non-vector register operand. +bool AArch64AsmParser::parseRegister(OperandVector &Operands) { + SMLoc S = getLoc(); + // Try for a vector register. + if (!tryParseVectorRegister(Operands)) + return false; - // Eat the shift - SMLoc S, E; - S = Parser.getTok().getLoc(); - Parser.Lex(); + // Try for a scalar register. + int64_t Reg = tryParseRegister(); + if (Reg == -1) + return true; + Operands.push_back( + AArch64Operand::CreateReg(Reg, false, S, getLoc(), getContext())); - if (Spec != A64SE::LSL && Spec != A64SE::LSR && Spec != A64SE::ASR && - Spec != A64SE::ROR && Spec != A64SE::MSL) { - // The shift amount can be omitted for the extending versions, but not real - // shifts: - // add x0, x0, x0, uxtb - // is valid, and equivalent to - // add x0, x0, x0, uxtb #0 - - if (Parser.getTok().is(AsmToken::Comma) || - Parser.getTok().is(AsmToken::EndOfStatement) || - Parser.getTok().is(AsmToken::RBrac)) { - Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, 0, true, - S, E)); - return MatchOperand_Success; + // A small number of instructions (FMOVXDhighr, for example) have "[1]" + // as a string token in the instruction itself. + if (getLexer().getKind() == AsmToken::LBrac) { + SMLoc LBracS = getLoc(); + Parser.Lex(); + const AsmToken &Tok = Parser.getTok(); + if (Tok.is(AsmToken::Integer)) { + SMLoc IntS = getLoc(); + int64_t Val = Tok.getIntVal(); + if (Val == 1) { + Parser.Lex(); + if (getLexer().getKind() == AsmToken::RBrac) { + SMLoc RBracS = getLoc(); + Parser.Lex(); + Operands.push_back( + AArch64Operand::CreateToken("[", false, LBracS, getContext())); + Operands.push_back( + AArch64Operand::CreateToken("1", false, IntS, getContext())); + Operands.push_back( + AArch64Operand::CreateToken("]", false, RBracS, getContext())); + return false; + } + } } } - // Eat # at beginning of immediate - if (!Parser.getTok().is(AsmToken::Hash)) { - Error(Parser.getTok().getLoc(), - "expected #imm after shift specifier"); - return MatchOperand_ParseFail; - } - Parser.Lex(); + return false; +} - // Make sure we do actually have a number - if (!Parser.getTok().is(AsmToken::Integer)) { - Error(Parser.getTok().getLoc(), - "expected integer shift amount"); - return MatchOperand_ParseFail; - } - unsigned Amount = Parser.getTok().getIntVal(); - Parser.Lex(); - E = Parser.getTok().getLoc(); +bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) { + bool HasELFModifier = false; + AArch64MCExpr::VariantKind RefKind; - Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, Amount, false, - S, E)); + if (Parser.getTok().is(AsmToken::Colon)) { + Parser.Lex(); // Eat ':" + HasELFModifier = true; - return MatchOperand_Success; -} + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Error(Parser.getTok().getLoc(), + "expect relocation specifier in operand after ':'"); + return true; + } -/// Try to parse a vector register token, If it is a vector register, -/// the token is eaten and return true. Otherwise return false. -bool AArch64AsmParser::TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc, - StringRef &Layout, SMLoc &LayoutLoc) { - bool IsVector = true; - - if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)) - IsVector = false; - else if (!AArch64MCRegisterClasses[AArch64::FPR64RegClassID] - .contains(RegNum) && - !AArch64MCRegisterClasses[AArch64::FPR128RegClassID] - .contains(RegNum)) - IsVector = false; - else if (Layout.size() == 0) - IsVector = false; - - if (!IsVector) - Error(Parser.getTok().getLoc(), "expected vector type register"); - - Parser.Lex(); // Eat this token. - return IsVector; -} + std::string LowerCase = Parser.getTok().getIdentifier().lower(); + RefKind = StringSwitch(LowerCase) + .Case("lo12", AArch64MCExpr::VK_LO12) + .Case("abs_g3", AArch64MCExpr::VK_ABS_G3) + .Case("abs_g2", AArch64MCExpr::VK_ABS_G2) + .Case("abs_g2_s", AArch64MCExpr::VK_ABS_G2_S) + .Case("abs_g2_nc", AArch64MCExpr::VK_ABS_G2_NC) + .Case("abs_g1", AArch64MCExpr::VK_ABS_G1) + .Case("abs_g1_s", AArch64MCExpr::VK_ABS_G1_S) + .Case("abs_g1_nc", AArch64MCExpr::VK_ABS_G1_NC) + .Case("abs_g0", AArch64MCExpr::VK_ABS_G0) + .Case("abs_g0_s", AArch64MCExpr::VK_ABS_G0_S) + .Case("abs_g0_nc", AArch64MCExpr::VK_ABS_G0_NC) + .Case("dtprel_g2", AArch64MCExpr::VK_DTPREL_G2) + .Case("dtprel_g1", AArch64MCExpr::VK_DTPREL_G1) + .Case("dtprel_g1_nc", AArch64MCExpr::VK_DTPREL_G1_NC) + .Case("dtprel_g0", AArch64MCExpr::VK_DTPREL_G0) + .Case("dtprel_g0_nc", AArch64MCExpr::VK_DTPREL_G0_NC) + .Case("dtprel_hi12", AArch64MCExpr::VK_DTPREL_HI12) + .Case("dtprel_lo12", AArch64MCExpr::VK_DTPREL_LO12) + .Case("dtprel_lo12_nc", AArch64MCExpr::VK_DTPREL_LO12_NC) + .Case("tprel_g2", AArch64MCExpr::VK_TPREL_G2) + .Case("tprel_g1", AArch64MCExpr::VK_TPREL_G1) + .Case("tprel_g1_nc", AArch64MCExpr::VK_TPREL_G1_NC) + .Case("tprel_g0", AArch64MCExpr::VK_TPREL_G0) + .Case("tprel_g0_nc", AArch64MCExpr::VK_TPREL_G0_NC) + .Case("tprel_hi12", AArch64MCExpr::VK_TPREL_HI12) + .Case("tprel_lo12", AArch64MCExpr::VK_TPREL_LO12) + .Case("tprel_lo12_nc", AArch64MCExpr::VK_TPREL_LO12_NC) + .Case("tlsdesc_lo12", AArch64MCExpr::VK_TLSDESC_LO12) + .Case("got", AArch64MCExpr::VK_GOT_PAGE) + .Case("got_lo12", AArch64MCExpr::VK_GOT_LO12) + .Case("gottprel", AArch64MCExpr::VK_GOTTPREL_PAGE) + .Case("gottprel_lo12", AArch64MCExpr::VK_GOTTPREL_LO12_NC) + .Case("gottprel_g1", AArch64MCExpr::VK_GOTTPREL_G1) + .Case("gottprel_g0_nc", AArch64MCExpr::VK_GOTTPREL_G0_NC) + .Case("tlsdesc", AArch64MCExpr::VK_TLSDESC_PAGE) + .Default(AArch64MCExpr::VK_INVALID); + + if (RefKind == AArch64MCExpr::VK_INVALID) { + Error(Parser.getTok().getLoc(), + "expect relocation specifier in operand after ':'"); + return true; + } + Parser.Lex(); // Eat identifier -// A vector list contains 1-4 consecutive registers. -// Now there are two kinds of vector list when number of vector > 1: -// (1) {Vn.layout, Vn+1.layout, ... , Vm.layout} -// (2) {Vn.layout - Vm.layout} -// If the layout is like .b/.h/.s/.d, also parse the lane. -AArch64AsmParser::OperandMatchResultTy AArch64AsmParser::ParseVectorList( - SmallVectorImpl &Operands) { - if (Parser.getTok().isNot(AsmToken::LCurly)) { - Error(Parser.getTok().getLoc(), "'{' expected"); - return MatchOperand_ParseFail; + if (Parser.getTok().isNot(AsmToken::Colon)) { + Error(Parser.getTok().getLoc(), "expect ':' after relocation specifier"); + return true; + } + Parser.Lex(); // Eat ':' } - SMLoc SLoc = Parser.getTok().getLoc(); - Parser.Lex(); // Eat '{' token. - unsigned Reg, Count = 1; - StringRef LayoutStr; - SMLoc RegEndLoc, LayoutLoc; - if (!TryParseVector(Reg, RegEndLoc, LayoutStr, LayoutLoc)) - return MatchOperand_ParseFail; + if (getParser().parseExpression(ImmVal)) + return true; + + if (HasELFModifier) + ImmVal = AArch64MCExpr::Create(ImmVal, RefKind, getContext()); + + return false; +} + +/// parseVectorList - Parse a vector list operand for AdvSIMD instructions. +bool AArch64AsmParser::parseVectorList(OperandVector &Operands) { + assert(Parser.getTok().is(AsmToken::LCurly) && "Token is not a Left Bracket"); + SMLoc S = getLoc(); + Parser.Lex(); // Eat left bracket token. + StringRef Kind; + int64_t FirstReg = tryMatchVectorRegister(Kind, true); + if (FirstReg == -1) + return true; + int64_t PrevReg = FirstReg; + unsigned Count = 1; if (Parser.getTok().is(AsmToken::Minus)) { Parser.Lex(); // Eat the minus. - unsigned Reg2; - StringRef LayoutStr2; - SMLoc RegEndLoc2, LayoutLoc2; - SMLoc RegLoc2 = Parser.getTok().getLoc(); + SMLoc Loc = getLoc(); + StringRef NextKind; + int64_t Reg = tryMatchVectorRegister(NextKind, true); + if (Reg == -1) + return true; + // Any Kind suffices must match on all regs in the list. + if (Kind != NextKind) + return Error(Loc, "mismatched register size suffix"); - if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2)) - return MatchOperand_ParseFail; - unsigned Space = (Reg < Reg2) ? (Reg2 - Reg) : (Reg2 + 32 - Reg); + unsigned Space = (PrevReg < Reg) ? (Reg - PrevReg) : (Reg + 32 - PrevReg); - if (LayoutStr != LayoutStr2) { - Error(LayoutLoc2, "expected the same vector layout"); - return MatchOperand_ParseFail; - } if (Space == 0 || Space > 3) { - Error(RegLoc2, "invalid number of vectors"); - return MatchOperand_ParseFail; + return Error(Loc, "invalid number of vectors"); } Count += Space; - } else { - unsigned LastReg = Reg; + } + else { while (Parser.getTok().is(AsmToken::Comma)) { - Parser.Lex(); // Eat the comma. - unsigned Reg2; - StringRef LayoutStr2; - SMLoc RegEndLoc2, LayoutLoc2; - SMLoc RegLoc2 = Parser.getTok().getLoc(); + Parser.Lex(); // Eat the comma token. - if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2)) - return MatchOperand_ParseFail; - unsigned Space = (LastReg < Reg2) ? (Reg2 - LastReg) - : (Reg2 + 32 - LastReg); - Count++; - - // The space between two vectors should be 1. And they should have the same layout. - // Total count shouldn't be great than 4 - if (Space != 1) { - Error(RegLoc2, "invalid space between two vectors"); - return MatchOperand_ParseFail; - } - if (LayoutStr != LayoutStr2) { - Error(LayoutLoc2, "expected the same vector layout"); - return MatchOperand_ParseFail; - } - if (Count > 4) { - Error(RegLoc2, "invalid number of vectors"); - return MatchOperand_ParseFail; - } + SMLoc Loc = getLoc(); + StringRef NextKind; + int64_t Reg = tryMatchVectorRegister(NextKind, true); + if (Reg == -1) + return true; + // Any Kind suffices must match on all regs in the list. + if (Kind != NextKind) + return Error(Loc, "mismatched register size suffix"); + + // Registers must be incremental (with wraparound at 31) + if (getContext().getRegisterInfo()->getEncodingValue(Reg) != + (getContext().getRegisterInfo()->getEncodingValue(PrevReg) + 1) % 32) + return Error(Loc, "registers must be sequential"); - LastReg = Reg2; + PrevReg = Reg; + ++Count; } } - if (Parser.getTok().isNot(AsmToken::RCurly)) { - Error(Parser.getTok().getLoc(), "'}' expected"); - return MatchOperand_ParseFail; - } - SMLoc ELoc = Parser.getTok().getLoc(); - Parser.Lex(); // Eat '}' token. + if (Parser.getTok().isNot(AsmToken::RCurly)) + return Error(getLoc(), "'}' expected"); + Parser.Lex(); // Eat the '}' token. - A64Layout::VectorLayout Layout = A64StringToVectorLayout(LayoutStr); - if (Count > 1) { // If count > 1, create vector list using super register. - bool IsVec64 = (Layout < A64Layout::VL_16B); - static unsigned SupRegIDs[3][2] = { - { AArch64::QPairRegClassID, AArch64::DPairRegClassID }, - { AArch64::QTripleRegClassID, AArch64::DTripleRegClassID }, - { AArch64::QQuadRegClassID, AArch64::DQuadRegClassID } - }; - unsigned SupRegID = SupRegIDs[Count - 2][static_cast(IsVec64)]; - unsigned Sub0 = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0; - const MCRegisterInfo *MRI = getContext().getRegisterInfo(); - Reg = MRI->getMatchingSuperReg(Reg, Sub0, - &AArch64MCRegisterClasses[SupRegID]); - } - Operands.push_back( - AArch64Operand::CreateVectorList(Reg, Count, Layout, SLoc, ELoc)); + if (Count > 4) + return Error(S, "invalid number of vectors"); + + unsigned NumElements = 0; + char ElementKind = 0; + if (!Kind.empty()) + parseValidVectorKind(Kind, NumElements, ElementKind); + Operands.push_back(AArch64Operand::CreateVectorList( + FirstReg, Count, NumElements, ElementKind, S, getLoc(), getContext())); + + // If there is an index specifier following the list, parse that too. if (Parser.getTok().is(AsmToken::LBrac)) { - uint32_t NumLanes = 0; - switch(Layout) { - case A64Layout::VL_B : NumLanes = 16; break; - case A64Layout::VL_H : NumLanes = 8; break; - case A64Layout::VL_S : NumLanes = 4; break; - case A64Layout::VL_D : NumLanes = 2; break; - default: - SMLoc Loc = getLexer().getLoc(); - Error(Loc, "expected comma before next operand"); - return MatchOperand_ParseFail; + SMLoc SIdx = getLoc(); + Parser.Lex(); // Eat left bracket token. + + const MCExpr *ImmVal; + if (getParser().parseExpression(ImmVal)) + return false; + const MCConstantExpr *MCE = dyn_cast(ImmVal); + if (!MCE) { + TokError("immediate value expected for vector index"); + return false; } - return ParseNEONLane(Operands, NumLanes); - } else { - return MatchOperand_Success; + + SMLoc E = getLoc(); + if (Parser.getTok().isNot(AsmToken::RBrac)) { + Error(E, "']' expected"); + return false; + } + + Parser.Lex(); // Eat right bracket token. + + Operands.push_back(AArch64Operand::CreateVectorIndex(MCE->getValue(), SIdx, + E, getContext())); } + return false; } -// FIXME: We would really like to be able to tablegen'erate this. -bool AArch64AsmParser:: -validateInstruction(MCInst &Inst, - const SmallVectorImpl &Operands) { - switch (Inst.getOpcode()) { - case AArch64::BFIwwii: - case AArch64::BFIxxii: - case AArch64::SBFIZwwii: - case AArch64::SBFIZxxii: - case AArch64::UBFIZwwii: - case AArch64::UBFIZxxii: { - unsigned ImmOps = Inst.getNumOperands() - 2; - int64_t ImmR = Inst.getOperand(ImmOps).getImm(); - int64_t ImmS = Inst.getOperand(ImmOps+1).getImm(); - - if (ImmR != 0 && ImmS >= ImmR) { - return Error(Operands[4]->getStartLoc(), - "requested insert overflows register"); - } - return false; +AArch64AsmParser::OperandMatchResultTy +AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) { + const AsmToken &Tok = Parser.getTok(); + if (!Tok.is(AsmToken::Identifier)) + return MatchOperand_NoMatch; + + unsigned RegNum = MatchRegisterName(Tok.getString().lower()); + + MCContext &Ctx = getContext(); + const MCRegisterInfo *RI = Ctx.getRegisterInfo(); + if (!RI->getRegClass(AArch64::GPR64spRegClassID).contains(RegNum)) + return MatchOperand_NoMatch; + + SMLoc S = getLoc(); + Parser.Lex(); // Eat register + + if (Parser.getTok().isNot(AsmToken::Comma)) { + Operands.push_back( + AArch64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx)); + return MatchOperand_Success; } - case AArch64::BFXILwwii: - case AArch64::BFXILxxii: - case AArch64::SBFXwwii: - case AArch64::SBFXxxii: - case AArch64::UBFXwwii: - case AArch64::UBFXxxii: { - unsigned ImmOps = Inst.getNumOperands() - 2; - int64_t ImmR = Inst.getOperand(ImmOps).getImm(); - int64_t ImmS = Inst.getOperand(ImmOps+1).getImm(); - int64_t RegWidth = 0; - switch (Inst.getOpcode()) { - case AArch64::SBFXxxii: case AArch64::UBFXxxii: case AArch64::BFXILxxii: - RegWidth = 64; - break; - case AArch64::SBFXwwii: case AArch64::UBFXwwii: case AArch64::BFXILwwii: - RegWidth = 32; - break; - } + Parser.Lex(); // Eat comma. + + if (Parser.getTok().is(AsmToken::Hash)) + Parser.Lex(); // Eat hash + + if (Parser.getTok().isNot(AsmToken::Integer)) { + Error(getLoc(), "index must be absent or #0"); + return MatchOperand_ParseFail; + } + + const MCExpr *ImmVal; + if (Parser.parseExpression(ImmVal) || !isa(ImmVal) || + cast(ImmVal)->getValue() != 0) { + Error(getLoc(), "index must be absent or #0"); + return MatchOperand_ParseFail; + } + + Operands.push_back( + AArch64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx)); + return MatchOperand_Success; +} - if (ImmS >= RegWidth || ImmS < ImmR) { - return Error(Operands[4]->getStartLoc(), - "requested extract overflows register"); - } +/// parseOperand - Parse a arm instruction operand. For now this parses the +/// operand regardless of the mnemonic. +bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, + bool invertCondCode) { + // Check if the current operand has a custom associated parser, if so, try to + // custom parse the operand, or fallback to the general approach. + OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); + if (ResTy == MatchOperand_Success) return false; - } - case AArch64::ICix: { - int64_t ImmVal = Inst.getOperand(0).getImm(); - A64IC::ICValues ICOp = static_cast(ImmVal); - if (!A64IC::NeedsRegister(ICOp)) { - return Error(Operands[1]->getStartLoc(), - "specified IC op does not use a register"); - } + // If there wasn't a custom match, try the generic matcher below. Otherwise, + // there was a match, but an error occurred, in which case, just return that + // the operand parsing failed. + if (ResTy == MatchOperand_ParseFail) + return true; + + // Nothing custom, so do general case parsing. + SMLoc S, E; + switch (getLexer().getKind()) { + default: { + SMLoc S = getLoc(); + const MCExpr *Expr; + if (parseSymbolicImmVal(Expr)) + return Error(S, "invalid operand"); + + SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); + Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext())); return false; } - case AArch64::ICi: { - int64_t ImmVal = Inst.getOperand(0).getImm(); - A64IC::ICValues ICOp = static_cast(ImmVal); - if (A64IC::NeedsRegister(ICOp)) { - return Error(Operands[1]->getStartLoc(), - "specified IC op requires a register"); - } - return false; + case AsmToken::LBrac: { + SMLoc Loc = Parser.getTok().getLoc(); + Operands.push_back(AArch64Operand::CreateToken("[", false, Loc, + getContext())); + Parser.Lex(); // Eat '[' + + // There's no comma after a '[', so we can parse the next operand + // immediately. + return parseOperand(Operands, false, false); } - case AArch64::TLBIix: { - int64_t ImmVal = Inst.getOperand(0).getImm(); - A64TLBI::TLBIValues TLBIOp = static_cast(ImmVal); - if (!A64TLBI::NeedsRegister(TLBIOp)) { - return Error(Operands[1]->getStartLoc(), - "specified TLBI op does not use a register"); - } + case AsmToken::LCurly: + return parseVectorList(Operands); + case AsmToken::Identifier: { + // If we're expecting a Condition Code operand, then just parse that. + if (isCondCode) + return parseCondCode(Operands, invertCondCode); + + // If it's a register name, parse it. + if (!parseRegister(Operands)) + return false; + + // This could be an optional "shift" or "extend" operand. + OperandMatchResultTy GotShift = tryParseOptionalShiftExtend(Operands); + // We can only continue if no tokens were eaten. + if (GotShift != MatchOperand_NoMatch) + return GotShift; + + // This was not a register so parse other operands that start with an + // identifier (like labels) as expressions and create them as immediates. + const MCExpr *IdVal; + S = getLoc(); + if (getParser().parseExpression(IdVal)) + return true; + + E = SMLoc::getFromPointer(getLoc().getPointer() - 1); + Operands.push_back(AArch64Operand::CreateImm(IdVal, S, E, getContext())); return false; } - case AArch64::TLBIi: { - int64_t ImmVal = Inst.getOperand(0).getImm(); - A64TLBI::TLBIValues TLBIOp = static_cast(ImmVal); - if (A64TLBI::NeedsRegister(TLBIOp)) { - return Error(Operands[1]->getStartLoc(), - "specified TLBI op requires a register"); + case AsmToken::Integer: + case AsmToken::Real: + case AsmToken::Hash: { + // #42 -> immediate. + S = getLoc(); + if (getLexer().is(AsmToken::Hash)) + Parser.Lex(); + + // Parse a negative sign + bool isNegative = false; + if (Parser.getTok().is(AsmToken::Minus)) { + isNegative = true; + // We need to consume this token only when we have a Real, otherwise + // we let parseSymbolicImmVal take care of it + if (Parser.getLexer().peekTok().is(AsmToken::Real)) + Parser.Lex(); + } + + // The only Real that should come through here is a literal #0.0 for + // the fcmp[e] r, #0.0 instructions. They expect raw token operands, + // so convert the value. + const AsmToken &Tok = Parser.getTok(); + if (Tok.is(AsmToken::Real)) { + APFloat RealVal(APFloat::IEEEdouble, Tok.getString()); + uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); + if (Mnemonic != "fcmp" && Mnemonic != "fcmpe" && Mnemonic != "fcmeq" && + Mnemonic != "fcmge" && Mnemonic != "fcmgt" && Mnemonic != "fcmle" && + Mnemonic != "fcmlt") + return TokError("unexpected floating point literal"); + else if (IntVal != 0 || isNegative) + return TokError("expected floating-point constant #0.0"); + Parser.Lex(); // Eat the token. + + Operands.push_back( + AArch64Operand::CreateToken("#0", false, S, getContext())); + Operands.push_back( + AArch64Operand::CreateToken(".0", false, S, getContext())); + return false; } + + const MCExpr *ImmVal; + if (parseSymbolicImmVal(ImmVal)) + return true; + + E = SMLoc::getFromPointer(getLoc().getPointer() - 1); + Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E, getContext())); return false; } } - - return false; } - -// Parses the instruction *together with* all operands, appending each parsed -// operand to the "Operands" list +/// ParseInstruction - Parse an AArch64 instruction mnemonic followed by its +/// operands. bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, - SmallVectorImpl &Operands) { - StringRef PatchedName = StringSwitch(Name.lower()) - .Case("beq", "b.eq") - .Case("bne", "b.ne") - .Case("bhs", "b.hs") - .Case("bcs", "b.cs") - .Case("blo", "b.lo") - .Case("bcc", "b.cc") - .Case("bmi", "b.mi") - .Case("bpl", "b.pl") - .Case("bvs", "b.vs") - .Case("bvc", "b.vc") - .Case("bhi", "b.hi") - .Case("bls", "b.ls") - .Case("bge", "b.ge") - .Case("blt", "b.lt") - .Case("bgt", "b.gt") - .Case("ble", "b.le") - .Case("bal", "b.al") - .Case("bnv", "b.nv") - .Default(Name); - - size_t CondCodePos = PatchedName.find('.'); - - StringRef Mnemonic = PatchedName.substr(0, CondCodePos); - Operands.push_back(AArch64Operand::CreateToken(Mnemonic, NameLoc)); - - if (CondCodePos != StringRef::npos) { - // We have a condition code - SMLoc S = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 1); - StringRef CondStr = PatchedName.substr(CondCodePos + 1, StringRef::npos); - A64CC::CondCodes Code; - - Code = A64StringToCondCode(CondStr); - - if (Code == A64CC::Invalid) { - Error(S, "invalid condition code"); + OperandVector &Operands) { + Name = StringSwitch(Name.lower()) + .Case("beq", "b.eq") + .Case("bne", "b.ne") + .Case("bhs", "b.hs") + .Case("bcs", "b.cs") + .Case("blo", "b.lo") + .Case("bcc", "b.cc") + .Case("bmi", "b.mi") + .Case("bpl", "b.pl") + .Case("bvs", "b.vs") + .Case("bvc", "b.vc") + .Case("bhi", "b.hi") + .Case("bls", "b.ls") + .Case("bge", "b.ge") + .Case("blt", "b.lt") + .Case("bgt", "b.gt") + .Case("ble", "b.le") + .Case("bal", "b.al") + .Case("bnv", "b.nv") + .Default(Name); + + // Create the leading tokens for the mnemonic, split by '.' characters. + size_t Start = 0, Next = Name.find('.'); + StringRef Head = Name.slice(Start, Next); + + // IC, DC, AT, and TLBI instructions are aliases for the SYS instruction. + if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi") { + bool IsError = parseSysAlias(Head, NameLoc, Operands); + if (IsError && getLexer().isNot(AsmToken::EndOfStatement)) Parser.eatToEndOfStatement(); - return true; - } - - SMLoc DotL = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos); - - Operands.push_back(AArch64Operand::CreateToken(".", DotL)); - SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 3); - Operands.push_back(AArch64Operand::CreateCondCode(Code, S, E)); + return IsError; } - // Now we parse the operands of this instruction + Operands.push_back( + AArch64Operand::CreateToken(Head, false, NameLoc, getContext())); + Mnemonic = Head; + + // Handle condition codes for a branch mnemonic + if (Head == "b" && Next != StringRef::npos) { + Start = Next; + Next = Name.find('.', Start + 1); + Head = Name.slice(Start + 1, Next); + + SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() + + (Head.data() - Name.data())); + AArch64CC::CondCode CC = parseCondCodeString(Head); + if (CC == AArch64CC::Invalid) + return Error(SuffixLoc, "invalid condition code"); + Operands.push_back( + AArch64Operand::CreateToken(".", true, SuffixLoc, getContext())); + Operands.push_back( + AArch64Operand::CreateCondCode(CC, NameLoc, NameLoc, getContext())); + } + + // Add the remaining tokens in the mnemonic. + while (Next != StringRef::npos) { + Start = Next; + Next = Name.find('.', Start + 1); + Head = Name.slice(Start, Next); + SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() + + (Head.data() - Name.data()) + 1); + Operands.push_back( + AArch64Operand::CreateToken(Head, true, SuffixLoc, getContext())); + } + + // Conditional compare instructions have a Condition Code operand, which needs + // to be parsed and an immediate operand created. + bool condCodeFourthOperand = + (Head == "ccmp" || Head == "ccmn" || Head == "fccmp" || + Head == "fccmpe" || Head == "fcsel" || Head == "csel" || + Head == "csinc" || Head == "csinv" || Head == "csneg"); + + // These instructions are aliases to some of the conditional select + // instructions. However, the condition code is inverted in the aliased + // instruction. + // + // FIXME: Is this the correct way to handle these? Or should the parser + // generate the aliased instructions directly? + bool condCodeSecondOperand = (Head == "cset" || Head == "csetm"); + bool condCodeThirdOperand = + (Head == "cinc" || Head == "cinv" || Head == "cneg"); + + // Read the remaining operands. if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. - if (ParseOperand(Operands, Mnemonic)) { + if (parseOperand(Operands, false, false)) { Parser.eatToEndOfStatement(); return true; } + unsigned N = 2; while (getLexer().is(AsmToken::Comma)) { - Parser.Lex(); // Eat the comma. + Parser.Lex(); // Eat the comma. // Parse and remember the operand. - if (ParseOperand(Operands, Mnemonic)) { + if (parseOperand(Operands, (N == 4 && condCodeFourthOperand) || + (N == 3 && condCodeThirdOperand) || + (N == 2 && condCodeSecondOperand), + condCodeSecondOperand || condCodeThirdOperand)) { Parser.eatToEndOfStatement(); return true; } - // After successfully parsing some operands there are two special cases to // consider (i.e. notional operands not separated by commas). Both are due // to memory specifiers: @@ -2321,52 +3121,716 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, // in the given context! if (Parser.getTok().is(AsmToken::RBrac)) { SMLoc Loc = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateToken("]", Loc)); + Operands.push_back(AArch64Operand::CreateToken("]", false, Loc, + getContext())); Parser.Lex(); } if (Parser.getTok().is(AsmToken::Exclaim)) { SMLoc Loc = Parser.getTok().getLoc(); - Operands.push_back(AArch64Operand::CreateToken("!", Loc)); + Operands.push_back(AArch64Operand::CreateToken("!", false, Loc, + getContext())); Parser.Lex(); } + + ++N; } } if (getLexer().isNot(AsmToken::EndOfStatement)) { - SMLoc Loc = getLexer().getLoc(); + SMLoc Loc = Parser.getTok().getLoc(); Parser.eatToEndOfStatement(); - return Error(Loc, "expected comma before next operand"); + return Error(Loc, "unexpected token in argument list"); } - // Eat the EndOfStatement - Parser.Lex(); - + Parser.Lex(); // Consume the EndOfStatement return false; } +// FIXME: This entire function is a giant hack to provide us with decent +// operand range validation/diagnostics until TableGen/MC can be extended +// to support autogeneration of this kind of validation. +bool AArch64AsmParser::validateInstruction(MCInst &Inst, + SmallVectorImpl &Loc) { + const MCRegisterInfo *RI = getContext().getRegisterInfo(); + // Check for indexed addressing modes w/ the base register being the + // same as a destination/source register or pair load where + // the Rt == Rt2. All of those are undefined behaviour. + switch (Inst.getOpcode()) { + case AArch64::LDPSWpre: + case AArch64::LDPWpost: + case AArch64::LDPWpre: + case AArch64::LDPXpost: + case AArch64::LDPXpre: { + unsigned Rt = Inst.getOperand(1).getReg(); + unsigned Rt2 = Inst.getOperand(2).getReg(); + unsigned Rn = Inst.getOperand(3).getReg(); + if (RI->isSubRegisterEq(Rn, Rt)) + return Error(Loc[0], "unpredictable LDP instruction, writeback base " + "is also a destination"); + if (RI->isSubRegisterEq(Rn, Rt2)) + return Error(Loc[1], "unpredictable LDP instruction, writeback base " + "is also a destination"); + // FALLTHROUGH + } + case AArch64::LDPDi: + case AArch64::LDPQi: + case AArch64::LDPSi: + case AArch64::LDPSWi: + case AArch64::LDPWi: + case AArch64::LDPXi: { + unsigned Rt = Inst.getOperand(0).getReg(); + unsigned Rt2 = Inst.getOperand(1).getReg(); + if (Rt == Rt2) + return Error(Loc[1], "unpredictable LDP instruction, Rt2==Rt"); + break; + } + case AArch64::LDPDpost: + case AArch64::LDPDpre: + case AArch64::LDPQpost: + case AArch64::LDPQpre: + case AArch64::LDPSpost: + case AArch64::LDPSpre: + case AArch64::LDPSWpost: { + unsigned Rt = Inst.getOperand(1).getReg(); + unsigned Rt2 = Inst.getOperand(2).getReg(); + if (Rt == Rt2) + return Error(Loc[1], "unpredictable LDP instruction, Rt2==Rt"); + break; + } + case AArch64::STPDpost: + case AArch64::STPDpre: + case AArch64::STPQpost: + case AArch64::STPQpre: + case AArch64::STPSpost: + case AArch64::STPSpre: + case AArch64::STPWpost: + case AArch64::STPWpre: + case AArch64::STPXpost: + case AArch64::STPXpre: { + unsigned Rt = Inst.getOperand(1).getReg(); + unsigned Rt2 = Inst.getOperand(2).getReg(); + unsigned Rn = Inst.getOperand(3).getReg(); + if (RI->isSubRegisterEq(Rn, Rt)) + return Error(Loc[0], "unpredictable STP instruction, writeback base " + "is also a source"); + if (RI->isSubRegisterEq(Rn, Rt2)) + return Error(Loc[1], "unpredictable STP instruction, writeback base " + "is also a source"); + break; + } + case AArch64::LDRBBpre: + case AArch64::LDRBpre: + case AArch64::LDRHHpre: + case AArch64::LDRHpre: + case AArch64::LDRSBWpre: + case AArch64::LDRSBXpre: + case AArch64::LDRSHWpre: + case AArch64::LDRSHXpre: + case AArch64::LDRSWpre: + case AArch64::LDRWpre: + case AArch64::LDRXpre: + case AArch64::LDRBBpost: + case AArch64::LDRBpost: + case AArch64::LDRHHpost: + case AArch64::LDRHpost: + case AArch64::LDRSBWpost: + case AArch64::LDRSBXpost: + case AArch64::LDRSHWpost: + case AArch64::LDRSHXpost: + case AArch64::LDRSWpost: + case AArch64::LDRWpost: + case AArch64::LDRXpost: { + unsigned Rt = Inst.getOperand(1).getReg(); + unsigned Rn = Inst.getOperand(2).getReg(); + if (RI->isSubRegisterEq(Rn, Rt)) + return Error(Loc[0], "unpredictable LDR instruction, writeback base " + "is also a source"); + break; + } + case AArch64::STRBBpost: + case AArch64::STRBpost: + case AArch64::STRHHpost: + case AArch64::STRHpost: + case AArch64::STRWpost: + case AArch64::STRXpost: + case AArch64::STRBBpre: + case AArch64::STRBpre: + case AArch64::STRHHpre: + case AArch64::STRHpre: + case AArch64::STRWpre: + case AArch64::STRXpre: { + unsigned Rt = Inst.getOperand(1).getReg(); + unsigned Rn = Inst.getOperand(2).getReg(); + if (RI->isSubRegisterEq(Rn, Rt)) + return Error(Loc[0], "unpredictable STR instruction, writeback base " + "is also a source"); + break; + } + } + + // Now check immediate ranges. Separate from the above as there is overlap + // in the instructions being checked and this keeps the nested conditionals + // to a minimum. + switch (Inst.getOpcode()) { + case AArch64::ADDSWri: + case AArch64::ADDSXri: + case AArch64::ADDWri: + case AArch64::ADDXri: + case AArch64::SUBSWri: + case AArch64::SUBSXri: + case AArch64::SUBWri: + case AArch64::SUBXri: { + // Annoyingly we can't do this in the isAddSubImm predicate, so there is + // some slight duplication here. + if (Inst.getOperand(2).isExpr()) { + const MCExpr *Expr = Inst.getOperand(2).getExpr(); + AArch64MCExpr::VariantKind ELFRefKind; + MCSymbolRefExpr::VariantKind DarwinRefKind; + int64_t Addend; + if (!classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) { + return Error(Loc[2], "invalid immediate expression"); + } + + // Only allow these with ADDXri. + if ((DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF || + DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) && + Inst.getOpcode() == AArch64::ADDXri) + return false; + + // Only allow these with ADDXri/ADDWri + if ((ELFRefKind == AArch64MCExpr::VK_LO12 || + ELFRefKind == AArch64MCExpr::VK_DTPREL_HI12 || + ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12 || + ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12_NC || + ELFRefKind == AArch64MCExpr::VK_TPREL_HI12 || + ELFRefKind == AArch64MCExpr::VK_TPREL_LO12 || + ELFRefKind == AArch64MCExpr::VK_TPREL_LO12_NC || + ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12) && + (Inst.getOpcode() == AArch64::ADDXri || + Inst.getOpcode() == AArch64::ADDWri)) + return false; + + // Don't allow expressions in the immediate field otherwise + return Error(Loc[2], "invalid immediate expression"); + } + return false; + } + default: + return false; + } +} + +bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) { + switch (ErrCode) { + case Match_MissingFeature: + return Error(Loc, + "instruction requires a CPU feature not currently enabled"); + case Match_InvalidOperand: + return Error(Loc, "invalid operand for instruction"); + case Match_InvalidSuffix: + return Error(Loc, "invalid type suffix for instruction"); + case Match_InvalidCondCode: + return Error(Loc, "expected AArch64 condition code"); + case Match_AddSubRegExtendSmall: + return Error(Loc, + "expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]"); + case Match_AddSubRegExtendLarge: + return Error(Loc, + "expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]"); + case Match_AddSubSecondSource: + return Error(Loc, + "expected compatible register, symbol or integer in range [0, 4095]"); + case Match_LogicalSecondSource: + return Error(Loc, "expected compatible register or logical immediate"); + case Match_InvalidMovImm32Shift: + return Error(Loc, "expected 'lsl' with optional integer 0 or 16"); + case Match_InvalidMovImm64Shift: + return Error(Loc, "expected 'lsl' with optional integer 0, 16, 32 or 48"); + case Match_AddSubRegShift32: + return Error(Loc, + "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]"); + case Match_AddSubRegShift64: + return Error(Loc, + "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]"); + case Match_InvalidFPImm: + return Error(Loc, + "expected compatible register or floating-point constant"); + case Match_InvalidMemoryIndexedSImm9: + return Error(Loc, "index must be an integer in range [-256, 255]."); + case Match_InvalidMemoryIndexed4SImm7: + return Error(Loc, "index must be a multiple of 4 in range [-256, 252]."); + case Match_InvalidMemoryIndexed8SImm7: + return Error(Loc, "index must be a multiple of 8 in range [-512, 504]."); + case Match_InvalidMemoryIndexed16SImm7: + return Error(Loc, "index must be a multiple of 16 in range [-1024, 1008]."); + case Match_InvalidMemoryWExtend8: + return Error(Loc, + "expected 'uxtw' or 'sxtw' with optional shift of #0"); + case Match_InvalidMemoryWExtend16: + return Error(Loc, + "expected 'uxtw' or 'sxtw' with optional shift of #0 or #1"); + case Match_InvalidMemoryWExtend32: + return Error(Loc, + "expected 'uxtw' or 'sxtw' with optional shift of #0 or #2"); + case Match_InvalidMemoryWExtend64: + return Error(Loc, + "expected 'uxtw' or 'sxtw' with optional shift of #0 or #3"); + case Match_InvalidMemoryWExtend128: + return Error(Loc, + "expected 'uxtw' or 'sxtw' with optional shift of #0 or #4"); + case Match_InvalidMemoryXExtend8: + return Error(Loc, + "expected 'lsl' or 'sxtx' with optional shift of #0"); + case Match_InvalidMemoryXExtend16: + return Error(Loc, + "expected 'lsl' or 'sxtx' with optional shift of #0 or #1"); + case Match_InvalidMemoryXExtend32: + return Error(Loc, + "expected 'lsl' or 'sxtx' with optional shift of #0 or #2"); + case Match_InvalidMemoryXExtend64: + return Error(Loc, + "expected 'lsl' or 'sxtx' with optional shift of #0 or #3"); + case Match_InvalidMemoryXExtend128: + return Error(Loc, + "expected 'lsl' or 'sxtx' with optional shift of #0 or #4"); + case Match_InvalidMemoryIndexed1: + return Error(Loc, "index must be an integer in range [0, 4095]."); + case Match_InvalidMemoryIndexed2: + return Error(Loc, "index must be a multiple of 2 in range [0, 8190]."); + case Match_InvalidMemoryIndexed4: + return Error(Loc, "index must be a multiple of 4 in range [0, 16380]."); + case Match_InvalidMemoryIndexed8: + return Error(Loc, "index must be a multiple of 8 in range [0, 32760]."); + case Match_InvalidMemoryIndexed16: + return Error(Loc, "index must be a multiple of 16 in range [0, 65520]."); + case Match_InvalidImm0_7: + return Error(Loc, "immediate must be an integer in range [0, 7]."); + case Match_InvalidImm0_15: + return Error(Loc, "immediate must be an integer in range [0, 15]."); + case Match_InvalidImm0_31: + return Error(Loc, "immediate must be an integer in range [0, 31]."); + case Match_InvalidImm0_63: + return Error(Loc, "immediate must be an integer in range [0, 63]."); + case Match_InvalidImm0_127: + return Error(Loc, "immediate must be an integer in range [0, 127]."); + case Match_InvalidImm0_65535: + return Error(Loc, "immediate must be an integer in range [0, 65535]."); + case Match_InvalidImm1_8: + return Error(Loc, "immediate must be an integer in range [1, 8]."); + case Match_InvalidImm1_16: + return Error(Loc, "immediate must be an integer in range [1, 16]."); + case Match_InvalidImm1_32: + return Error(Loc, "immediate must be an integer in range [1, 32]."); + case Match_InvalidImm1_64: + return Error(Loc, "immediate must be an integer in range [1, 64]."); + case Match_InvalidIndex1: + return Error(Loc, "expected lane specifier '[1]'"); + case Match_InvalidIndexB: + return Error(Loc, "vector lane must be an integer in range [0, 15]."); + case Match_InvalidIndexH: + return Error(Loc, "vector lane must be an integer in range [0, 7]."); + case Match_InvalidIndexS: + return Error(Loc, "vector lane must be an integer in range [0, 3]."); + case Match_InvalidIndexD: + return Error(Loc, "vector lane must be an integer in range [0, 1]."); + case Match_InvalidLabel: + return Error(Loc, "expected label or encodable integer pc offset"); + case Match_MRS: + return Error(Loc, "expected readable system register"); + case Match_MSR: + return Error(Loc, "expected writable system register or pstate"); + case Match_MnemonicFail: + return Error(Loc, "unrecognized instruction mnemonic"); + default: + assert(0 && "unexpected error code!"); + return Error(Loc, "invalid instruction format"); + } +} + +static const char *getSubtargetFeatureName(unsigned Val); + +bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + unsigned &ErrorInfo, + bool MatchingInlineAsm) { + assert(!Operands.empty() && "Unexpect empty operand list!"); + AArch64Operand *Op = static_cast(Operands[0]); + assert(Op->isToken() && "Leading operand should always be a mnemonic!"); + + StringRef Tok = Op->getToken(); + unsigned NumOperands = Operands.size(); + + if (NumOperands == 4 && Tok == "lsl") { + AArch64Operand *Op2 = static_cast(Operands[2]); + AArch64Operand *Op3 = static_cast(Operands[3]); + if (Op2->isReg() && Op3->isImm()) { + const MCConstantExpr *Op3CE = dyn_cast(Op3->getImm()); + if (Op3CE) { + uint64_t Op3Val = Op3CE->getValue(); + uint64_t NewOp3Val = 0; + uint64_t NewOp4Val = 0; + if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains( + Op2->getReg())) { + NewOp3Val = (32 - Op3Val) & 0x1f; + NewOp4Val = 31 - Op3Val; + } else { + NewOp3Val = (64 - Op3Val) & 0x3f; + NewOp4Val = 63 - Op3Val; + } + + const MCExpr *NewOp3 = MCConstantExpr::Create(NewOp3Val, getContext()); + const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext()); + + Operands[0] = AArch64Operand::CreateToken( + "ubfm", false, Op->getStartLoc(), getContext()); + Operands[3] = AArch64Operand::CreateImm(NewOp3, Op3->getStartLoc(), + Op3->getEndLoc(), getContext()); + Operands.push_back(AArch64Operand::CreateImm( + NewOp4, Op3->getStartLoc(), Op3->getEndLoc(), getContext())); + delete Op3; + delete Op; + } + } + } else if (NumOperands == 5) { + // FIXME: Horrible hack to handle the BFI -> BFM, SBFIZ->SBFM, and + // UBFIZ -> UBFM aliases. + if (Tok == "bfi" || Tok == "sbfiz" || Tok == "ubfiz") { + AArch64Operand *Op1 = static_cast(Operands[1]); + AArch64Operand *Op3 = static_cast(Operands[3]); + AArch64Operand *Op4 = static_cast(Operands[4]); + + if (Op1->isReg() && Op3->isImm() && Op4->isImm()) { + const MCConstantExpr *Op3CE = dyn_cast(Op3->getImm()); + const MCConstantExpr *Op4CE = dyn_cast(Op4->getImm()); + + if (Op3CE && Op4CE) { + uint64_t Op3Val = Op3CE->getValue(); + uint64_t Op4Val = Op4CE->getValue(); + + uint64_t RegWidth = 0; + if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( + Op1->getReg())) + RegWidth = 64; + else + RegWidth = 32; + + if (Op3Val >= RegWidth) + return Error(Op3->getStartLoc(), + "expected integer in range [0, 31]"); + if (Op4Val < 1 || Op4Val > RegWidth) + return Error(Op4->getStartLoc(), + "expected integer in range [1, 32]"); + + uint64_t NewOp3Val = 0; + if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains( + Op1->getReg())) + NewOp3Val = (32 - Op3Val) & 0x1f; + else + NewOp3Val = (64 - Op3Val) & 0x3f; + + uint64_t NewOp4Val = Op4Val - 1; + + if (NewOp3Val != 0 && NewOp4Val >= NewOp3Val) + return Error(Op4->getStartLoc(), + "requested insert overflows register"); + + const MCExpr *NewOp3 = + MCConstantExpr::Create(NewOp3Val, getContext()); + const MCExpr *NewOp4 = + MCConstantExpr::Create(NewOp4Val, getContext()); + Operands[3] = AArch64Operand::CreateImm( + NewOp3, Op3->getStartLoc(), Op3->getEndLoc(), getContext()); + Operands[4] = AArch64Operand::CreateImm( + NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext()); + if (Tok == "bfi") + Operands[0] = AArch64Operand::CreateToken( + "bfm", false, Op->getStartLoc(), getContext()); + else if (Tok == "sbfiz") + Operands[0] = AArch64Operand::CreateToken( + "sbfm", false, Op->getStartLoc(), getContext()); + else if (Tok == "ubfiz") + Operands[0] = AArch64Operand::CreateToken( + "ubfm", false, Op->getStartLoc(), getContext()); + else + llvm_unreachable("No valid mnemonic for alias?"); + + delete Op; + delete Op3; + delete Op4; + } + } + + // FIXME: Horrible hack to handle the BFXIL->BFM, SBFX->SBFM, and + // UBFX -> UBFM aliases. + } else if (NumOperands == 5 && + (Tok == "bfxil" || Tok == "sbfx" || Tok == "ubfx")) { + AArch64Operand *Op1 = static_cast(Operands[1]); + AArch64Operand *Op3 = static_cast(Operands[3]); + AArch64Operand *Op4 = static_cast(Operands[4]); + + if (Op1->isReg() && Op3->isImm() && Op4->isImm()) { + const MCConstantExpr *Op3CE = dyn_cast(Op3->getImm()); + const MCConstantExpr *Op4CE = dyn_cast(Op4->getImm()); + + if (Op3CE && Op4CE) { + uint64_t Op3Val = Op3CE->getValue(); + uint64_t Op4Val = Op4CE->getValue(); + + uint64_t RegWidth = 0; + if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( + Op1->getReg())) + RegWidth = 64; + else + RegWidth = 32; + + if (Op3Val >= RegWidth) + return Error(Op3->getStartLoc(), + "expected integer in range [0, 31]"); + if (Op4Val < 1 || Op4Val > RegWidth) + return Error(Op4->getStartLoc(), + "expected integer in range [1, 32]"); + + uint64_t NewOp4Val = Op3Val + Op4Val - 1; + + if (NewOp4Val >= RegWidth || NewOp4Val < Op3Val) + return Error(Op4->getStartLoc(), + "requested extract overflows register"); + + const MCExpr *NewOp4 = + MCConstantExpr::Create(NewOp4Val, getContext()); + Operands[4] = AArch64Operand::CreateImm( + NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext()); + if (Tok == "bfxil") + Operands[0] = AArch64Operand::CreateToken( + "bfm", false, Op->getStartLoc(), getContext()); + else if (Tok == "sbfx") + Operands[0] = AArch64Operand::CreateToken( + "sbfm", false, Op->getStartLoc(), getContext()); + else if (Tok == "ubfx") + Operands[0] = AArch64Operand::CreateToken( + "ubfm", false, Op->getStartLoc(), getContext()); + else + llvm_unreachable("No valid mnemonic for alias?"); + + delete Op; + delete Op4; + } + } + } + } + // FIXME: Horrible hack for sxtw and uxtw with Wn src and Xd dst operands. + // InstAlias can't quite handle this since the reg classes aren't + // subclasses. + if (NumOperands == 3 && (Tok == "sxtw" || Tok == "uxtw")) { + // The source register can be Wn here, but the matcher expects a + // GPR64. Twiddle it here if necessary. + AArch64Operand *Op = static_cast(Operands[2]); + if (Op->isReg()) { + unsigned Reg = getXRegFromWReg(Op->getReg()); + Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(), + Op->getEndLoc(), getContext()); + delete Op; + } + } + // FIXME: Likewise for sxt[bh] with a Xd dst operand + else if (NumOperands == 3 && (Tok == "sxtb" || Tok == "sxth")) { + AArch64Operand *Op = static_cast(Operands[1]); + if (Op->isReg() && + AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( + Op->getReg())) { + // The source register can be Wn here, but the matcher expects a + // GPR64. Twiddle it here if necessary. + AArch64Operand *Op = static_cast(Operands[2]); + if (Op->isReg()) { + unsigned Reg = getXRegFromWReg(Op->getReg()); + Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(), + Op->getEndLoc(), getContext()); + delete Op; + } + } + } + // FIXME: Likewise for uxt[bh] with a Xd dst operand + else if (NumOperands == 3 && (Tok == "uxtb" || Tok == "uxth")) { + AArch64Operand *Op = static_cast(Operands[1]); + if (Op->isReg() && + AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( + Op->getReg())) { + // The source register can be Wn here, but the matcher expects a + // GPR32. Twiddle it here if necessary. + AArch64Operand *Op = static_cast(Operands[1]); + if (Op->isReg()) { + unsigned Reg = getWRegFromXReg(Op->getReg()); + Operands[1] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(), + Op->getEndLoc(), getContext()); + delete Op; + } + } + } + + // Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR. + if (NumOperands == 3 && Tok == "fmov") { + AArch64Operand *RegOp = static_cast(Operands[1]); + AArch64Operand *ImmOp = static_cast(Operands[2]); + if (RegOp->isReg() && ImmOp->isFPImm() && + ImmOp->getFPImm() == (unsigned)-1) { + unsigned zreg = + AArch64MCRegisterClasses[AArch64::FPR32RegClassID].contains( + RegOp->getReg()) + ? AArch64::WZR + : AArch64::XZR; + Operands[2] = AArch64Operand::CreateReg(zreg, false, Op->getStartLoc(), + Op->getEndLoc(), getContext()); + delete ImmOp; + } + } + + MCInst Inst; + // First try to match against the secondary set of tables containing the + // short-form NEON instructions (e.g. "fadd.2s v0, v1, v2"). + unsigned MatchResult = + MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 1); + + // If that fails, try against the alternate table containing long-form NEON: + // "fadd v0.2s, v1.2s, v2.2s" + if (MatchResult != Match_Success) + MatchResult = + MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 0); + + switch (MatchResult) { + case Match_Success: { + // Perform range checking and other semantic validations + SmallVector OperandLocs; + NumOperands = Operands.size(); + for (unsigned i = 1; i < NumOperands; ++i) + OperandLocs.push_back(Operands[i]->getStartLoc()); + if (validateInstruction(Inst, OperandLocs)) + return true; + + Inst.setLoc(IDLoc); + Out.EmitInstruction(Inst, STI); + return false; + } + case Match_MissingFeature: { + assert(ErrorInfo && "Unknown missing feature!"); + // Special case the error message for the very common case where only + // a single subtarget feature is missing (neon, e.g.). + std::string Msg = "instruction requires:"; + unsigned Mask = 1; + for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { + if (ErrorInfo & Mask) { + Msg += " "; + Msg += getSubtargetFeatureName(ErrorInfo & Mask); + } + Mask <<= 1; + } + return Error(IDLoc, Msg); + } + case Match_MnemonicFail: + return showMatchError(IDLoc, MatchResult); + case Match_InvalidOperand: { + SMLoc ErrorLoc = IDLoc; + if (ErrorInfo != ~0U) { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); + + ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + } + // If the match failed on a suffix token operand, tweak the diagnostic + // accordingly. + if (((AArch64Operand *)Operands[ErrorInfo])->isToken() && + ((AArch64Operand *)Operands[ErrorInfo])->isTokenSuffix()) + MatchResult = Match_InvalidSuffix; + + return showMatchError(ErrorLoc, MatchResult); + } + case Match_InvalidMemoryIndexed1: + case Match_InvalidMemoryIndexed2: + case Match_InvalidMemoryIndexed4: + case Match_InvalidMemoryIndexed8: + case Match_InvalidMemoryIndexed16: + case Match_InvalidCondCode: + case Match_AddSubRegExtendSmall: + case Match_AddSubRegExtendLarge: + case Match_AddSubSecondSource: + case Match_LogicalSecondSource: + case Match_AddSubRegShift32: + case Match_AddSubRegShift64: + case Match_InvalidMovImm32Shift: + case Match_InvalidMovImm64Shift: + case Match_InvalidFPImm: + case Match_InvalidMemoryWExtend8: + case Match_InvalidMemoryWExtend16: + case Match_InvalidMemoryWExtend32: + case Match_InvalidMemoryWExtend64: + case Match_InvalidMemoryWExtend128: + case Match_InvalidMemoryXExtend8: + case Match_InvalidMemoryXExtend16: + case Match_InvalidMemoryXExtend32: + case Match_InvalidMemoryXExtend64: + case Match_InvalidMemoryXExtend128: + case Match_InvalidMemoryIndexed4SImm7: + case Match_InvalidMemoryIndexed8SImm7: + case Match_InvalidMemoryIndexed16SImm7: + case Match_InvalidMemoryIndexedSImm9: + case Match_InvalidImm0_7: + case Match_InvalidImm0_15: + case Match_InvalidImm0_31: + case Match_InvalidImm0_63: + case Match_InvalidImm0_127: + case Match_InvalidImm0_65535: + case Match_InvalidImm1_8: + case Match_InvalidImm1_16: + case Match_InvalidImm1_32: + case Match_InvalidImm1_64: + case Match_InvalidIndex1: + case Match_InvalidIndexB: + case Match_InvalidIndexH: + case Match_InvalidIndexS: + case Match_InvalidIndexD: + case Match_InvalidLabel: + case Match_MSR: + case Match_MRS: { + // Any time we get here, there's nothing fancy to do. Just get the + // operand SMLoc and display the diagnostic. + SMLoc ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(); + if (ErrorLoc == SMLoc()) + ErrorLoc = IDLoc; + return showMatchError(ErrorLoc, MatchResult); + } + } + + llvm_unreachable("Implement any new match types added!"); + return true; +} + +/// ParseDirective parses the arm specific directives bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); + SMLoc Loc = DirectiveID.getLoc(); if (IDVal == ".hword") - return ParseDirectiveWord(2, DirectiveID.getLoc()); - else if (IDVal == ".word") - return ParseDirectiveWord(4, DirectiveID.getLoc()); - else if (IDVal == ".xword") - return ParseDirectiveWord(8, DirectiveID.getLoc()); - else if (IDVal == ".tlsdesccall") - return ParseDirectiveTLSDescCall(DirectiveID.getLoc()); - - return true; + return parseDirectiveWord(2, Loc); + if (IDVal == ".word") + return parseDirectiveWord(4, Loc); + if (IDVal == ".xword") + return parseDirectiveWord(8, Loc); + if (IDVal == ".tlsdesccall") + return parseDirectiveTLSDescCall(Loc); + + return parseDirectiveLOH(IDVal, Loc); } /// parseDirectiveWord /// ::= .word [ expression (, expression)* ] -bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { +bool AArch64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; if (getParser().parseExpression(Value)) - return false; + return true; getParser().getStreamer().EmitValue(Value, Size); @@ -2374,10 +3838,8 @@ bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { break; // FIXME: Improve diagnostic. - if (getLexer().isNot(AsmToken::Comma)) { - Error(L, "unexpected token in directive"); - return false; - } + if (getLexer().isNot(AsmToken::Comma)) + return Error(L, "unexpected token in directive"); Parser.Lex(); } } @@ -2388,15 +3850,14 @@ bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { // parseDirectiveTLSDescCall: // ::= .tlsdesccall symbol -bool AArch64AsmParser::ParseDirectiveTLSDescCall(SMLoc L) { +bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) { StringRef Name; - if (getParser().parseIdentifier(Name)) { - Error(L, "expected symbol after directive"); - return false; - } + if (getParser().parseIdentifier(Name)) + return Error(L, "expected symbol after directive"); MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); - const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext()); + const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext()); + Expr = AArch64MCExpr::Create(Expr, AArch64MCExpr::VK_TLSDESC, getContext()); MCInst Inst; Inst.setOpcode(AArch64::TLSDESCCALL); @@ -2406,271 +3867,181 @@ bool AArch64AsmParser::ParseDirectiveTLSDescCall(SMLoc L) { return false; } +/// ::= .loh label1, ..., labelN +/// The number of arguments depends on the loh identifier. +bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) { + if (IDVal != MCLOHDirectiveName()) + return true; + MCLOHType Kind; + if (getParser().getTok().isNot(AsmToken::Identifier)) { + if (getParser().getTok().isNot(AsmToken::Integer)) + return TokError("expected an identifier or a number in directive"); + // We successfully get a numeric value for the identifier. + // Check if it is valid. + int64_t Id = getParser().getTok().getIntVal(); + Kind = (MCLOHType)Id; + // Check that Id does not overflow MCLOHType. + if (!isValidMCLOHType(Kind) || Id != Kind) + return TokError("invalid numeric identifier in directive"); + } else { + StringRef Name = getTok().getIdentifier(); + // We successfully parse an identifier. + // Check if it is a recognized one. + int Id = MCLOHNameToId(Name); + + if (Id == -1) + return TokError("invalid identifier in directive"); + Kind = (MCLOHType)Id; + } + // Consume the identifier. + Lex(); + // Get the number of arguments of this LOH. + int NbArgs = MCLOHIdToNbArgs(Kind); + + assert(NbArgs != -1 && "Invalid number of arguments"); + + SmallVector Args; + for (int Idx = 0; Idx < NbArgs; ++Idx) { + StringRef Name; + if (getParser().parseIdentifier(Name)) + return TokError("expected identifier in directive"); + Args.push_back(getContext().GetOrCreateSymbol(Name)); + + if (Idx + 1 == NbArgs) + break; + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '" + Twine(IDVal) + "' directive"); + Lex(); + } + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '" + Twine(IDVal) + "' directive"); -bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl &Operands, - MCStreamer &Out, unsigned &ErrorInfo, - bool MatchingInlineAsm) { - MCInst Inst; - unsigned MatchResult; - MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, - MatchingInlineAsm); + getStreamer().EmitLOHDirective((MCLOHType)Kind, Args); + return false; +} - if (ErrorInfo != ~0U && ErrorInfo >= Operands.size()) - return Error(IDLoc, "too few operands for instruction"); +bool +AArch64AsmParser::classifySymbolRef(const MCExpr *Expr, + AArch64MCExpr::VariantKind &ELFRefKind, + MCSymbolRefExpr::VariantKind &DarwinRefKind, + int64_t &Addend) { + ELFRefKind = AArch64MCExpr::VK_INVALID; + DarwinRefKind = MCSymbolRefExpr::VK_None; + Addend = 0; + + if (const AArch64MCExpr *AE = dyn_cast(Expr)) { + ELFRefKind = AE->getKind(); + Expr = AE->getSubExpr(); + } + + const MCSymbolRefExpr *SE = dyn_cast(Expr); + if (SE) { + // It's a simple symbol reference with no addend. + DarwinRefKind = SE->getKind(); + return true; + } - switch (MatchResult) { - default: break; - case Match_Success: - if (validateInstruction(Inst, Operands)) - return true; + const MCBinaryExpr *BE = dyn_cast(Expr); + if (!BE) + return false; - Out.EmitInstruction(Inst, STI); + SE = dyn_cast(BE->getLHS()); + if (!SE) return false; - case Match_MissingFeature: - Error(IDLoc, "instruction requires a CPU feature not currently enabled"); - return true; - case Match_InvalidOperand: { - SMLoc ErrorLoc = IDLoc; - if (ErrorInfo != ~0U) { - ErrorLoc = ((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(); - if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; - } + DarwinRefKind = SE->getKind(); - return Error(ErrorLoc, "invalid operand for instruction"); - } - case Match_MnemonicFail: - return Error(IDLoc, "invalid instruction"); + if (BE->getOpcode() != MCBinaryExpr::Add && + BE->getOpcode() != MCBinaryExpr::Sub) + return false; - case Match_AddSubRegExtendSmall: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]"); - case Match_AddSubRegExtendLarge: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]"); - case Match_AddSubRegShift32: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]"); - case Match_AddSubRegShift64: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]"); - case Match_AddSubSecondSource: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected compatible register, symbol or integer in range [0, 4095]"); - case Match_CVTFixedPos32: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 32]"); - case Match_CVTFixedPos64: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 64]"); - case Match_CondCode: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected AArch64 condition code"); - case Match_FPImm: - // Any situation which allows a nontrivial floating-point constant also - // allows a register. - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected compatible register or floating-point constant"); - case Match_FPZero: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected floating-point constant #0.0 or invalid register type"); - case Match_Label: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected label or encodable integer pc offset"); - case Match_Lane1: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected lane specifier '[1]'"); - case Match_LoadStoreExtend32_1: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'uxtw' or 'sxtw' with optional shift of #0"); - case Match_LoadStoreExtend32_2: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'uxtw' or 'sxtw' with optional shift of #0 or #1"); - case Match_LoadStoreExtend32_4: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'uxtw' or 'sxtw' with optional shift of #0 or #2"); - case Match_LoadStoreExtend32_8: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'uxtw' or 'sxtw' with optional shift of #0 or #3"); - case Match_LoadStoreExtend32_16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtw' with optional shift of #0 or #4"); - case Match_LoadStoreExtend64_1: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtx' with optional shift of #0"); - case Match_LoadStoreExtend64_2: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtx' with optional shift of #0 or #1"); - case Match_LoadStoreExtend64_4: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtx' with optional shift of #0 or #2"); - case Match_LoadStoreExtend64_8: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtx' with optional shift of #0 or #3"); - case Match_LoadStoreExtend64_16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'lsl' or 'sxtx' with optional shift of #0 or #4"); - case Match_LoadStoreSImm7_4: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer multiple of 4 in range [-256, 252]"); - case Match_LoadStoreSImm7_8: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer multiple of 8 in range [-512, 504]"); - case Match_LoadStoreSImm7_16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer multiple of 16 in range [-1024, 1008]"); - case Match_LoadStoreSImm9: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [-256, 255]"); - case Match_LoadStoreUImm12_1: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic reference or integer in range [0, 4095]"); - case Match_LoadStoreUImm12_2: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic reference or integer in range [0, 8190]"); - case Match_LoadStoreUImm12_4: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic reference or integer in range [0, 16380]"); - case Match_LoadStoreUImm12_8: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic reference or integer in range [0, 32760]"); - case Match_LoadStoreUImm12_16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic reference or integer in range [0, 65520]"); - case Match_LogicalSecondSource: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected compatible register or logical immediate"); - case Match_MOVWUImm16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected relocated symbol or integer in range [0, 65535]"); - case Match_MRS: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected readable system register"); - case Match_MSR: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected writable system register or pstate"); - case Match_NamedImm_at: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic 'at' operand: s1e[0-3][rw] or s12e[01][rw]"); - case Match_NamedImm_dbarrier: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 15] or symbolic barrier operand"); - case Match_NamedImm_dc: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected symbolic 'dc' operand"); - case Match_NamedImm_ic: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected 'ic' operand: 'ialluis', 'iallu' or 'ivau'"); - case Match_NamedImm_isb: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 15] or 'sy'"); - case Match_NamedImm_prefetch: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected prefetch hint: p(ld|st|i)l[123](strm|keep)"); - case Match_NamedImm_tlbi: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected translation buffer invalidation operand"); - case Match_UImm16: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 65535]"); - case Match_UImm3: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 7]"); - case Match_UImm4: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 15]"); - case Match_UImm5: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 31]"); - case Match_UImm6: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 63]"); - case Match_UImm7: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 127]"); - case Match_Width32: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [, 31]"); - case Match_Width64: - return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [, 63]"); - case Match_ShrImm8: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 8]"); - case Match_ShrImm16: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 16]"); - case Match_ShrImm32: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 32]"); - case Match_ShrImm64: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [1, 64]"); - case Match_ShlImm8: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 7]"); - case Match_ShlImm16: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 15]"); - case Match_ShlImm32: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 31]"); - case Match_ShlImm64: - return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), - "expected integer in range [0, 63]"); - } + // See if the addend is is a constant, otherwise there's more going + // on here than we can deal with. + auto AddendExpr = dyn_cast(BE->getRHS()); + if (!AddendExpr) + return false; - llvm_unreachable("Implement any new match types added!"); - return true; + Addend = AddendExpr->getValue(); + if (BE->getOpcode() == MCBinaryExpr::Sub) + Addend = -Addend; + + // It's some symbol reference + a constant addend, but really + // shouldn't use both Darwin and ELF syntax. + return ELFRefKind == AArch64MCExpr::VK_INVALID || + DarwinRefKind == MCSymbolRefExpr::VK_None; } -void AArch64Operand::print(raw_ostream &OS) const { +/// Force static initialization. +extern "C" void LLVMInitializeAArch64AsmParser() { + RegisterMCAsmParser X(TheAArch64leTarget); + RegisterMCAsmParser Y(TheAArch64beTarget); + + RegisterMCAsmParser Z(TheARM64leTarget); + RegisterMCAsmParser W(TheARM64beTarget); +} + +#define GET_REGISTER_MATCHER +#define GET_SUBTARGET_FEATURE_NAME +#define GET_MATCHER_IMPLEMENTATION +#include "AArch64GenAsmMatcher.inc" + +// Define this matcher function after the auto-generated include so we +// have the match class enum definitions. +unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, + unsigned Kind) { + AArch64Operand *Op = static_cast(AsmOp); + // If the kind is a token for a literal immediate, check if our asm + // operand matches. This is for InstAliases which have a fixed-value + // immediate in the syntax. + int64_t ExpectedVal; switch (Kind) { - case k_CondCode: - OS << ""; + default: + return Match_InvalidOperand; + case MCK__35_0: + ExpectedVal = 0; break; - case k_FPImmediate: - OS << ""; + case MCK__35_1: + ExpectedVal = 1; break; - case k_ImmWithLSL: - OS << ""; + case MCK__35_12: + ExpectedVal = 12; break; - case k_Immediate: - getImm()->print(OS); + case MCK__35_16: + ExpectedVal = 16; break; - case k_Register: - OS << "'; + case MCK__35_2: + ExpectedVal = 2; break; - case k_Token: - OS << '\'' << getToken() << '\''; + case MCK__35_24: + ExpectedVal = 24; break; - case k_ShiftExtend: - OS << ""; + case MCK__35_3: + ExpectedVal = 3; break; - case k_SysReg: { - StringRef Name(SysReg.Data, SysReg.Length); - OS << "'; + case MCK__35_32: + ExpectedVal = 32; break; - } - default: - llvm_unreachable("No idea how to print this kind of operand"); + case MCK__35_4: + ExpectedVal = 4; + break; + case MCK__35_48: + ExpectedVal = 48; + break; + case MCK__35_6: + ExpectedVal = 6; + break; + case MCK__35_64: + ExpectedVal = 64; + break; + case MCK__35_8: + ExpectedVal = 8; break; } + if (!Op->isImm()) + return Match_InvalidOperand; + const MCConstantExpr *CE = dyn_cast(Op->getImm()); + if (!CE) + return Match_InvalidOperand; + if (CE->getValue() == ExpectedVal) + return Match_Success; + return Match_InvalidOperand; } - -void AArch64Operand::dump() const { - print(errs()); -} - - -/// Force static initialization. -extern "C" void LLVMInitializeAArch64AsmParser() { - RegisterMCAsmParser X(TheAArch64leTarget); - RegisterMCAsmParser Y(TheAArch64beTarget); -} - -#define GET_REGISTER_MATCHER -#define GET_MATCHER_IMPLEMENTATION -#include "AArch64GenAsmMatcher.inc" diff --git a/lib/Target/AArch64/AsmParser/CMakeLists.txt b/lib/Target/AArch64/AsmParser/CMakeLists.txt index e81ec70..cc0a9d8 100644 --- a/lib/Target/AArch64/AsmParser/CMakeLists.txt +++ b/lib/Target/AArch64/AsmParser/CMakeLists.txt @@ -1,3 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + add_llvm_library(LLVMAArch64AsmParser AArch64AsmParser.cpp ) + diff --git a/lib/Target/AArch64/AsmParser/LLVMBuild.txt b/lib/Target/AArch64/AsmParser/LLVMBuild.txt index 2d8f632..11eb9d5 100644 --- a/lib/Target/AArch64/AsmParser/LLVMBuild.txt +++ b/lib/Target/AArch64/AsmParser/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/AArch64/AsmParser/LLVMBuild.txt -------------*- Conf -*--===; +;===- ./lib/Target/AArch64/AsmParser/LLVMBuild.txt ---------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; diff --git a/lib/Target/AArch64/AsmParser/Makefile b/lib/Target/AArch64/AsmParser/Makefile index 56c9ef5..00268c7 100644 --- a/lib/Target/AArch64/AsmParser/Makefile +++ b/lib/Target/AArch64/AsmParser/Makefile @@ -9,7 +9,7 @@ LEVEL = ../../../.. LIBRARYNAME = LLVMAArch64AsmParser -# Hack: we need to include 'main' target directory to grab private headers +# Hack: we need to include 'main' ARM target directory to grab private headers CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt index dfc10af..789d549 100644 --- a/lib/Target/AArch64/CMakeLists.txt +++ b/lib/Target/AArch64/CMakeLists.txt @@ -1,37 +1,51 @@ set(LLVM_TARGET_DEFINITIONS AArch64.td) -tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher) -tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer) -tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv) -tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler) +tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info) tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info) tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering) -tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) +tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel) +tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel) +tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv) tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler) add_public_tablegen_target(AArch64CommonTableGen) add_llvm_target(AArch64CodeGen + AArch64AddressTypePromotion.cpp + AArch64AdvSIMDScalarPass.cpp AArch64AsmPrinter.cpp - AArch64BranchFixupPass.cpp + AArch64BranchRelaxation.cpp + AArch64CleanupLocalDynamicTLSPass.cpp + AArch64CollectLOH.cpp + AArch64ConditionalCompares.cpp + AArch64DeadRegisterDefinitionsPass.cpp + AArch64ExpandPseudoInsts.cpp + AArch64FastISel.cpp AArch64FrameLowering.cpp AArch64ISelDAGToDAG.cpp AArch64ISelLowering.cpp AArch64InstrInfo.cpp - AArch64MachineFunctionInfo.cpp + AArch64LoadStoreOptimizer.cpp AArch64MCInstLower.cpp + AArch64PromoteConstant.cpp AArch64RegisterInfo.cpp AArch64SelectionDAGInfo.cpp + AArch64StorePairSuppress.cpp AArch64Subtarget.cpp AArch64TargetMachine.cpp AArch64TargetObjectFile.cpp AArch64TargetTransformInfo.cpp - ) +) +add_dependencies(LLVMAArch64CodeGen intrinsics_gen) + +add_subdirectory(TargetInfo) add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) add_subdirectory(MCTargetDesc) -add_subdirectory(TargetInfo) add_subdirectory(Utils) diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 9bd363a..6de27d6 100644 --- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -1,4 +1,4 @@ -//===- AArch64Disassembler.cpp - Disassembler for AArch64 ISA -------------===// +//===- AArch64Disassembler.cpp - Disassembler for AArch64 -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,244 +7,169 @@ // //===----------------------------------------------------------------------===// // -// This file contains the functions necessary to decode AArch64 instruction -// bitpatterns into MCInsts (with the help of TableGenerated information from -// the instruction definitions). // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-disassembler" - -#include "AArch64.h" -#include "AArch64RegisterInfo.h" +#include "AArch64Disassembler.h" +#include "AArch64ExternalSymbolizer.h" #include "AArch64Subtarget.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; -typedef MCDisassembler::DecodeStatus DecodeStatus; - -namespace { -/// AArch64 disassembler for all AArch64 platforms. -class AArch64Disassembler : public MCDisassembler { - OwningPtr RegInfo; -public: - /// Initializes the disassembler. - /// - AArch64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info) - : MCDisassembler(STI), RegInfo(Info) { - } - - ~AArch64Disassembler() {} +#define DEBUG_TYPE "aarch64-disassembler" - /// See MCDisassembler. - DecodeStatus getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const; +// Pull DecodeStatus and its enum values into the global namespace. +typedef llvm::MCDisassembler::DecodeStatus DecodeStatus; - const MCRegisterInfo *getRegInfo() const { return RegInfo.get(); } -}; - -} - -// Forward-declarations used in the auto-generated files. -static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus -DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); - -static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus -DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); - -static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder); +// Forward declare these because the autogenerated code will reference them. +// Definitions are further down. static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeFPR128LoRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeGPR64noxzrRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeFPR128_loRegisterClass(llvm::MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeQPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeDTripleRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeQTripleRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeDQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeQQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, +static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); - -static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, - unsigned OptionHiS, - uint64_t Address, - const void *Decoder); - - -static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst, - unsigned Imm6Bits, - uint64_t Address, +static DecodeStatus DecodeGPR64spRegisterClass(llvm::MCInst &Inst, + unsigned RegNo, uint64_t Address, const void *Decoder); - -static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, - unsigned Imm6Bits, - uint64_t Address, +static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeGPR32spRegisterClass(llvm::MCInst &Inst, + unsigned RegNo, uint64_t Address, const void *Decoder); - -static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, - unsigned RmBits, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val, +static DecodeStatus DecodeQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); - -static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder); - -template -static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, - unsigned FullImm, - uint64_t Address, - const void *Decoder); - -template -static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst, - unsigned Bits, +static DecodeStatus DecodeQQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeQQQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); - -static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst, - unsigned ShiftAmount, +static DecodeStatus DecodeDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeDDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -template -static DecodeStatus -DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount, - uint64_t Address, const void *Decoder); - -static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst, - unsigned ShiftAmount, +static DecodeStatus DecodeDDDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn, + +static DecodeStatus DecodeFixedPointScaleImm32(llvm::MCInst &Inst, unsigned Imm, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeFixedPointScaleImm64(llvm::MCInst &Inst, unsigned Imm, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodePCRelLabel19(llvm::MCInst &Inst, unsigned Imm, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMemExtend(llvm::MCInst &Inst, unsigned Imm, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeMSRSystemRegister(llvm::MCInst &Inst, unsigned Imm, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst, + uint32_t insn, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst, + uint32_t insn, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst, + uint32_t insn, uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst, + uint32_t insn, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn, uint64_t Address, const void *Decoder); - -static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, +static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst, + uint32_t insn, uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst, + uint32_t insn, uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst, + uint32_t insn, uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst, + uint32_t insn, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn, + uint64_t Address, const void *Decoder); -static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst, - unsigned Insn, +static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); - -static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); - -template -static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); - -static DecodeStatus -DecodeSysRegOperand(const A64SysReg::SysRegMapper &InstMapper, - llvm::MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); - -static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); - - -static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeVLDSTPostInstruction(MCInst &Inst, unsigned Val, - uint64_t Address, +static DecodeStatus DecodeVecShiftR64Imm(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder); +static DecodeStatus DecodeVecShiftR64ImmNarrow(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder); - -static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder); - -static bool Check(DecodeStatus &Out, DecodeStatus In); - -#include "AArch64GenDisassemblerTables.inc" +static DecodeStatus DecodeVecShiftR32Imm(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder); +static DecodeStatus DecodeVecShiftR32ImmNarrow(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, + const void *Decoder); +static DecodeStatus DecodeVecShiftR16Imm(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder); +static DecodeStatus DecodeVecShiftR16ImmNarrow(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, + const void *Decoder); +static DecodeStatus DecodeVecShiftR8Imm(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder); +static DecodeStatus DecodeVecShiftL64Imm(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder); +static DecodeStatus DecodeVecShiftL32Imm(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder); +static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder); +static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder); static bool Check(DecodeStatus &Out, DecodeStatus In) { switch (In) { @@ -261,486 +186,479 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) { llvm_unreachable("Invalid DecodeStatus!"); } +#include "AArch64GenDisassemblerTables.inc" +#include "AArch64GenInstrInfo.inc" + +#define Success llvm::MCDisassembler::Success +#define Fail llvm::MCDisassembler::Fail +#define SoftFail llvm::MCDisassembler::SoftFail + +static MCDisassembler *createAArch64Disassembler(const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new AArch64Disassembler(STI, Ctx); +} + DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, - const MemoryObject &Region, - uint64_t Address, - raw_ostream &os, - raw_ostream &cs) const { + const MemoryObject &Region, + uint64_t Address, + raw_ostream &os, + raw_ostream &cs) const { CommentStream = &cs; uint8_t bytes[4]; + Size = 0; // We want to read exactly 4 bytes of data. - if (Region.readBytes(Address, 4, bytes) == -1) { - Size = 0; - return MCDisassembler::Fail; - } + if (Region.readBytes(Address, 4, (uint8_t *)bytes) == -1) + return Fail; + Size = 4; // Encoded as a small-endian 32-bit word in the stream. - uint32_t insn = (bytes[3] << 24) | - (bytes[2] << 16) | - (bytes[1] << 8) | - (bytes[0] << 0); + uint32_t insn = + (bytes[3] << 24) | (bytes[2] << 16) | (bytes[1] << 8) | (bytes[0] << 0); // Calling the auto-generated decoder function. - DecodeStatus result = decodeInstruction(DecoderTableA6432, MI, insn, Address, - this, STI); - if (result != MCDisassembler::Fail) { - Size = 4; - return result; - } - - MI.clear(); - Size = 0; - return MCDisassembler::Fail; + return decodeInstruction(DecoderTable32, MI, insn, Address, this, STI); } -static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { - const AArch64Disassembler *Dis = static_cast(D); - return Dis->getRegInfo()->getRegClass(RC).getRegister(RegNo); +static MCSymbolizer * +createAArch64ExternalSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo, + LLVMSymbolLookupCallback SymbolLookUp, + void *DisInfo, MCContext *Ctx, + MCRelocationInfo *RelInfo) { + return new llvm::AArch64ExternalSymbolizer( + *Ctx, + std::unique_ptr(RelInfo), + GetOpInfo, SymbolLookUp, DisInfo); } -static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; +extern "C" void LLVMInitializeAArch64Disassembler() { + TargetRegistry::RegisterMCDisassembler(TheAArch64leTarget, + createAArch64Disassembler); + TargetRegistry::RegisterMCDisassembler(TheAArch64beTarget, + createAArch64Disassembler); + TargetRegistry::RegisterMCSymbolizer(TheAArch64leTarget, + createAArch64ExternalSymbolizer); + TargetRegistry::RegisterMCSymbolizer(TheAArch64beTarget, + createAArch64ExternalSymbolizer); - uint16_t Register = getReg(Decoder, AArch64::GPR64RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; + TargetRegistry::RegisterMCDisassembler(TheARM64leTarget, + createAArch64Disassembler); + TargetRegistry::RegisterMCDisassembler(TheARM64beTarget, + createAArch64Disassembler); + TargetRegistry::RegisterMCSymbolizer(TheARM64leTarget, + createAArch64ExternalSymbolizer); + TargetRegistry::RegisterMCSymbolizer(TheARM64beTarget, + createAArch64ExternalSymbolizer); } -static DecodeStatus -DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::GPR64xspRegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} +static const unsigned FPR128DecoderTable[] = { + AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, + AArch64::Q5, AArch64::Q6, AArch64::Q7, AArch64::Q8, AArch64::Q9, + AArch64::Q10, AArch64::Q11, AArch64::Q12, AArch64::Q13, AArch64::Q14, + AArch64::Q15, AArch64::Q16, AArch64::Q17, AArch64::Q18, AArch64::Q19, + AArch64::Q20, AArch64::Q21, AArch64::Q22, AArch64::Q23, AArch64::Q24, + AArch64::Q25, AArch64::Q26, AArch64::Q27, AArch64::Q28, AArch64::Q29, + AArch64::Q30, AArch64::Q31 +}; -static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { +static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const void *Decoder) { if (RegNo > 31) - return MCDisassembler::Fail; + return Fail; - uint16_t Register = getReg(Decoder, AArch64::GPR32RegClassID, RegNo); + unsigned Register = FPR128DecoderTable[RegNo]; Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; + return Success; } -static DecodeStatus -DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::GPR32wspRegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; +static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const void *Decoder) { + if (RegNo > 15) + return Fail; + return DecodeFPR128RegisterClass(Inst, RegNo, Addr, Decoder); } -static DecodeStatus -DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, AArch64::FPR8RegClassID, RegNo); - Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; -} +static const unsigned FPR64DecoderTable[] = { + AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, + AArch64::D5, AArch64::D6, AArch64::D7, AArch64::D8, AArch64::D9, + AArch64::D10, AArch64::D11, AArch64::D12, AArch64::D13, AArch64::D14, + AArch64::D15, AArch64::D16, AArch64::D17, AArch64::D18, AArch64::D19, + AArch64::D20, AArch64::D21, AArch64::D22, AArch64::D23, AArch64::D24, + AArch64::D25, AArch64::D26, AArch64::D27, AArch64::D28, AArch64::D29, + AArch64::D30, AArch64::D31 +}; -static DecodeStatus -DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const void *Decoder) { if (RegNo > 31) - return MCDisassembler::Fail; + return Fail; - uint16_t Register = getReg(Decoder, AArch64::FPR16RegClassID, RegNo); + unsigned Register = FPR64DecoderTable[RegNo]; Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; + return Success; } +static const unsigned FPR32DecoderTable[] = { + AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, + AArch64::S5, AArch64::S6, AArch64::S7, AArch64::S8, AArch64::S9, + AArch64::S10, AArch64::S11, AArch64::S12, AArch64::S13, AArch64::S14, + AArch64::S15, AArch64::S16, AArch64::S17, AArch64::S18, AArch64::S19, + AArch64::S20, AArch64::S21, AArch64::S22, AArch64::S23, AArch64::S24, + AArch64::S25, AArch64::S26, AArch64::S27, AArch64::S28, AArch64::S29, + AArch64::S30, AArch64::S31 +}; -static DecodeStatus -DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const void *Decoder) { if (RegNo > 31) - return MCDisassembler::Fail; + return Fail; - uint16_t Register = getReg(Decoder, AArch64::FPR32RegClassID, RegNo); + unsigned Register = FPR32DecoderTable[RegNo]; Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; + return Success; } -static DecodeStatus -DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { +static const unsigned FPR16DecoderTable[] = { + AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, + AArch64::H5, AArch64::H6, AArch64::H7, AArch64::H8, AArch64::H9, + AArch64::H10, AArch64::H11, AArch64::H12, AArch64::H13, AArch64::H14, + AArch64::H15, AArch64::H16, AArch64::H17, AArch64::H18, AArch64::H19, + AArch64::H20, AArch64::H21, AArch64::H22, AArch64::H23, AArch64::H24, + AArch64::H25, AArch64::H26, AArch64::H27, AArch64::H28, AArch64::H29, + AArch64::H30, AArch64::H31 +}; + +static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const void *Decoder) { if (RegNo > 31) - return MCDisassembler::Fail; + return Fail; - uint16_t Register = getReg(Decoder, AArch64::FPR64RegClassID, RegNo); + unsigned Register = FPR16DecoderTable[RegNo]; Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; + return Success; } -static DecodeStatus -DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 15) - return MCDisassembler::Fail; - - return DecodeFPR64RegisterClass(Inst, RegNo, Address, Decoder); -} +static const unsigned FPR8DecoderTable[] = { + AArch64::B0, AArch64::B1, AArch64::B2, AArch64::B3, AArch64::B4, + AArch64::B5, AArch64::B6, AArch64::B7, AArch64::B8, AArch64::B9, + AArch64::B10, AArch64::B11, AArch64::B12, AArch64::B13, AArch64::B14, + AArch64::B15, AArch64::B16, AArch64::B17, AArch64::B18, AArch64::B19, + AArch64::B20, AArch64::B21, AArch64::B22, AArch64::B23, AArch64::B24, + AArch64::B25, AArch64::B26, AArch64::B27, AArch64::B28, AArch64::B29, + AArch64::B30, AArch64::B31 +}; -static DecodeStatus -DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { +static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const void *Decoder) { if (RegNo > 31) - return MCDisassembler::Fail; + return Fail; - uint16_t Register = getReg(Decoder, AArch64::FPR128RegClassID, RegNo); + unsigned Register = FPR8DecoderTable[RegNo]; Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; + return Success; } -static DecodeStatus -DecodeFPR128LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { - if (RegNo > 15) - return MCDisassembler::Fail; - - return DecodeFPR128RegisterClass(Inst, RegNo, Address, Decoder); -} +static const unsigned GPR64DecoderTable[] = { + AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, + AArch64::X5, AArch64::X6, AArch64::X7, AArch64::X8, AArch64::X9, + AArch64::X10, AArch64::X11, AArch64::X12, AArch64::X13, AArch64::X14, + AArch64::X15, AArch64::X16, AArch64::X17, AArch64::X18, AArch64::X19, + AArch64::X20, AArch64::X21, AArch64::X22, AArch64::X23, AArch64::X24, + AArch64::X25, AArch64::X26, AArch64::X27, AArch64::X28, AArch64::FP, + AArch64::LR, AArch64::XZR +}; -static DecodeStatus DecodeGPR64noxzrRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder) { - if (RegNo > 30) - return MCDisassembler::Fail; +static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const void *Decoder) { + if (RegNo > 31) + return Fail; - uint16_t Register = getReg(Decoder, AArch64::GPR64noxzrRegClassID, RegNo); + unsigned Register = GPR64DecoderTable[RegNo]; Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; + return Success; } -static DecodeStatus DecodeRegisterClassByID(llvm::MCInst &Inst, unsigned RegNo, - unsigned RegID, - const void *Decoder) { +static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const void *Decoder) { if (RegNo > 31) - return MCDisassembler::Fail; - - uint16_t Register = getReg(Decoder, RegID, RegNo); + return Fail; + unsigned Register = GPR64DecoderTable[RegNo]; + if (Register == AArch64::XZR) + Register = AArch64::SP; Inst.addOperand(MCOperand::CreateReg(Register)); - return MCDisassembler::Success; + return Success; } -static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::DPairRegClassID, - Decoder); -} +static const unsigned GPR32DecoderTable[] = { + AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, + AArch64::W5, AArch64::W6, AArch64::W7, AArch64::W8, AArch64::W9, + AArch64::W10, AArch64::W11, AArch64::W12, AArch64::W13, AArch64::W14, + AArch64::W15, AArch64::W16, AArch64::W17, AArch64::W18, AArch64::W19, + AArch64::W20, AArch64::W21, AArch64::W22, AArch64::W23, AArch64::W24, + AArch64::W25, AArch64::W26, AArch64::W27, AArch64::W28, AArch64::W29, + AArch64::W30, AArch64::WZR +}; -static DecodeStatus DecodeQPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, +static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::QPairRegClassID, - Decoder); -} + if (RegNo > 31) + return Fail; -static DecodeStatus DecodeDTripleRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::DTripleRegClassID, - Decoder); + unsigned Register = GPR32DecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return Success; } -static DecodeStatus DecodeQTripleRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, +static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::QTripleRegClassID, - Decoder); -} + if (RegNo > 31) + return Fail; -static DecodeStatus DecodeDQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::DQuadRegClassID, - Decoder); + unsigned Register = GPR32DecoderTable[RegNo]; + if (Register == AArch64::WZR) + Register = AArch64::WSP; + Inst.addOperand(MCOperand::CreateReg(Register)); + return Success; } -static DecodeStatus DecodeQQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder) { - return DecodeRegisterClassByID(Inst, RegNo, AArch64::QQuadRegClassID, - Decoder); -} +static const unsigned VectorDecoderTable[] = { + AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, + AArch64::Q5, AArch64::Q6, AArch64::Q7, AArch64::Q8, AArch64::Q9, + AArch64::Q10, AArch64::Q11, AArch64::Q12, AArch64::Q13, AArch64::Q14, + AArch64::Q15, AArch64::Q16, AArch64::Q17, AArch64::Q18, AArch64::Q19, + AArch64::Q20, AArch64::Q21, AArch64::Q22, AArch64::Q23, AArch64::Q24, + AArch64::Q25, AArch64::Q26, AArch64::Q27, AArch64::Q28, AArch64::Q29, + AArch64::Q30, AArch64::Q31 +}; -static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, - unsigned OptionHiS, - uint64_t Address, - const void *Decoder) { - // Option{1} must be 1. OptionHiS is made up of {Option{2}, Option{1}, - // S}. Hence we want to check bit 1. - if (!(OptionHiS & 2)) - return MCDisassembler::Fail; +static DecodeStatus DecodeVectorRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const void *Decoder) { + if (RegNo > 31) + return Fail; - Inst.addOperand(MCOperand::CreateImm(OptionHiS)); - return MCDisassembler::Success; + unsigned Register = VectorDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return Success; } -static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst, - unsigned Imm6Bits, - uint64_t Address, - const void *Decoder) { - // In the 32-bit variant, bit 6 must be zero. I.e. the immediate must be - // between 0 and 31. - if (Imm6Bits > 31) - return MCDisassembler::Fail; +static const unsigned QQDecoderTable[] = { + AArch64::Q0_Q1, AArch64::Q1_Q2, AArch64::Q2_Q3, AArch64::Q3_Q4, + AArch64::Q4_Q5, AArch64::Q5_Q6, AArch64::Q6_Q7, AArch64::Q7_Q8, + AArch64::Q8_Q9, AArch64::Q9_Q10, AArch64::Q10_Q11, AArch64::Q11_Q12, + AArch64::Q12_Q13, AArch64::Q13_Q14, AArch64::Q14_Q15, AArch64::Q15_Q16, + AArch64::Q16_Q17, AArch64::Q17_Q18, AArch64::Q18_Q19, AArch64::Q19_Q20, + AArch64::Q20_Q21, AArch64::Q21_Q22, AArch64::Q22_Q23, AArch64::Q23_Q24, + AArch64::Q24_Q25, AArch64::Q25_Q26, AArch64::Q26_Q27, AArch64::Q27_Q28, + AArch64::Q28_Q29, AArch64::Q29_Q30, AArch64::Q30_Q31, AArch64::Q31_Q0 +}; - Inst.addOperand(MCOperand::CreateImm(Imm6Bits)); - return MCDisassembler::Success; +static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, const void *Decoder) { + if (RegNo > 31) + return Fail; + unsigned Register = QQDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return Success; } -static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst, - unsigned Imm6Bits, - uint64_t Address, - const void *Decoder) { - // 1 <= Imm <= 32. Encoded as 64 - Imm so: 63 >= Encoded >= 32. - if (Imm6Bits < 32) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Imm6Bits)); - return MCDisassembler::Success; -} +static const unsigned QQQDecoderTable[] = { + AArch64::Q0_Q1_Q2, AArch64::Q1_Q2_Q3, AArch64::Q2_Q3_Q4, + AArch64::Q3_Q4_Q5, AArch64::Q4_Q5_Q6, AArch64::Q5_Q6_Q7, + AArch64::Q6_Q7_Q8, AArch64::Q7_Q8_Q9, AArch64::Q8_Q9_Q10, + AArch64::Q9_Q10_Q11, AArch64::Q10_Q11_Q12, AArch64::Q11_Q12_Q13, + AArch64::Q12_Q13_Q14, AArch64::Q13_Q14_Q15, AArch64::Q14_Q15_Q16, + AArch64::Q15_Q16_Q17, AArch64::Q16_Q17_Q18, AArch64::Q17_Q18_Q19, + AArch64::Q18_Q19_Q20, AArch64::Q19_Q20_Q21, AArch64::Q20_Q21_Q22, + AArch64::Q21_Q22_Q23, AArch64::Q22_Q23_Q24, AArch64::Q23_Q24_Q25, + AArch64::Q24_Q25_Q26, AArch64::Q25_Q26_Q27, AArch64::Q26_Q27_Q28, + AArch64::Q27_Q28_Q29, AArch64::Q28_Q29_Q30, AArch64::Q29_Q30_Q31, + AArch64::Q30_Q31_Q0, AArch64::Q31_Q0_Q1 +}; -static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, - unsigned RmBits, - uint64_t Address, - const void *Decoder) { - // Any bits are valid in the instruction (they're architecturally ignored), - // but a code generator should insert 0. - Inst.addOperand(MCOperand::CreateImm(0)); - return MCDisassembler::Success; +static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, const void *Decoder) { + if (RegNo > 31) + return Fail; + unsigned Register = QQQDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return Success; } -static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(8 - Val)); - return MCDisassembler::Success; -} +static const unsigned QQQQDecoderTable[] = { + AArch64::Q0_Q1_Q2_Q3, AArch64::Q1_Q2_Q3_Q4, AArch64::Q2_Q3_Q4_Q5, + AArch64::Q3_Q4_Q5_Q6, AArch64::Q4_Q5_Q6_Q7, AArch64::Q5_Q6_Q7_Q8, + AArch64::Q6_Q7_Q8_Q9, AArch64::Q7_Q8_Q9_Q10, AArch64::Q8_Q9_Q10_Q11, + AArch64::Q9_Q10_Q11_Q12, AArch64::Q10_Q11_Q12_Q13, AArch64::Q11_Q12_Q13_Q14, + AArch64::Q12_Q13_Q14_Q15, AArch64::Q13_Q14_Q15_Q16, AArch64::Q14_Q15_Q16_Q17, + AArch64::Q15_Q16_Q17_Q18, AArch64::Q16_Q17_Q18_Q19, AArch64::Q17_Q18_Q19_Q20, + AArch64::Q18_Q19_Q20_Q21, AArch64::Q19_Q20_Q21_Q22, AArch64::Q20_Q21_Q22_Q23, + AArch64::Q21_Q22_Q23_Q24, AArch64::Q22_Q23_Q24_Q25, AArch64::Q23_Q24_Q25_Q26, + AArch64::Q24_Q25_Q26_Q27, AArch64::Q25_Q26_Q27_Q28, AArch64::Q26_Q27_Q28_Q29, + AArch64::Q27_Q28_Q29_Q30, AArch64::Q28_Q29_Q30_Q31, AArch64::Q29_Q30_Q31_Q0, + AArch64::Q30_Q31_Q0_Q1, AArch64::Q31_Q0_Q1_Q2 +}; -static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(16 - Val)); - return MCDisassembler::Success; +static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const void *Decoder) { + if (RegNo > 31) + return Fail; + unsigned Register = QQQQDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return Success; } -static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(32 - Val)); - return MCDisassembler::Success; -} +static const unsigned DDDecoderTable[] = { + AArch64::D0_D1, AArch64::D1_D2, AArch64::D2_D3, AArch64::D3_D4, + AArch64::D4_D5, AArch64::D5_D6, AArch64::D6_D7, AArch64::D7_D8, + AArch64::D8_D9, AArch64::D9_D10, AArch64::D10_D11, AArch64::D11_D12, + AArch64::D12_D13, AArch64::D13_D14, AArch64::D14_D15, AArch64::D15_D16, + AArch64::D16_D17, AArch64::D17_D18, AArch64::D18_D19, AArch64::D19_D20, + AArch64::D20_D21, AArch64::D21_D22, AArch64::D22_D23, AArch64::D23_D24, + AArch64::D24_D25, AArch64::D25_D26, AArch64::D26_D27, AArch64::D27_D28, + AArch64::D28_D29, AArch64::D29_D30, AArch64::D30_D31, AArch64::D31_D0 +}; -static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(64 - Val)); - return MCDisassembler::Success; +static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, const void *Decoder) { + if (RegNo > 31) + return Fail; + unsigned Register = DDDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return Success; } -static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - if (Val > 7) - return MCDisassembler::Fail; +static const unsigned DDDDecoderTable[] = { + AArch64::D0_D1_D2, AArch64::D1_D2_D3, AArch64::D2_D3_D4, + AArch64::D3_D4_D5, AArch64::D4_D5_D6, AArch64::D5_D6_D7, + AArch64::D6_D7_D8, AArch64::D7_D8_D9, AArch64::D8_D9_D10, + AArch64::D9_D10_D11, AArch64::D10_D11_D12, AArch64::D11_D12_D13, + AArch64::D12_D13_D14, AArch64::D13_D14_D15, AArch64::D14_D15_D16, + AArch64::D15_D16_D17, AArch64::D16_D17_D18, AArch64::D17_D18_D19, + AArch64::D18_D19_D20, AArch64::D19_D20_D21, AArch64::D20_D21_D22, + AArch64::D21_D22_D23, AArch64::D22_D23_D24, AArch64::D23_D24_D25, + AArch64::D24_D25_D26, AArch64::D25_D26_D27, AArch64::D26_D27_D28, + AArch64::D27_D28_D29, AArch64::D28_D29_D30, AArch64::D29_D30_D31, + AArch64::D30_D31_D0, AArch64::D31_D0_D1 +}; - Inst.addOperand(MCOperand::CreateImm(Val)); - return MCDisassembler::Success; +static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, const void *Decoder) { + if (RegNo > 31) + return Fail; + unsigned Register = DDDDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return Success; } -static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - if (Val > 15) - return MCDisassembler::Fail; +static const unsigned DDDDDecoderTable[] = { + AArch64::D0_D1_D2_D3, AArch64::D1_D2_D3_D4, AArch64::D2_D3_D4_D5, + AArch64::D3_D4_D5_D6, AArch64::D4_D5_D6_D7, AArch64::D5_D6_D7_D8, + AArch64::D6_D7_D8_D9, AArch64::D7_D8_D9_D10, AArch64::D8_D9_D10_D11, + AArch64::D9_D10_D11_D12, AArch64::D10_D11_D12_D13, AArch64::D11_D12_D13_D14, + AArch64::D12_D13_D14_D15, AArch64::D13_D14_D15_D16, AArch64::D14_D15_D16_D17, + AArch64::D15_D16_D17_D18, AArch64::D16_D17_D18_D19, AArch64::D17_D18_D19_D20, + AArch64::D18_D19_D20_D21, AArch64::D19_D20_D21_D22, AArch64::D20_D21_D22_D23, + AArch64::D21_D22_D23_D24, AArch64::D22_D23_D24_D25, AArch64::D23_D24_D25_D26, + AArch64::D24_D25_D26_D27, AArch64::D25_D26_D27_D28, AArch64::D26_D27_D28_D29, + AArch64::D27_D28_D29_D30, AArch64::D28_D29_D30_D31, AArch64::D29_D30_D31_D0, + AArch64::D30_D31_D0_D1, AArch64::D31_D0_D1_D2 +}; - Inst.addOperand(MCOperand::CreateImm(Val)); - return MCDisassembler::Success; +static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Addr, + const void *Decoder) { + if (RegNo > 31) + return Fail; + unsigned Register = DDDDDecoderTable[RegNo]; + Inst.addOperand(MCOperand::CreateReg(Register)); + return Success; } -static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - if (Val > 31) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Val)); - return MCDisassembler::Success; +static DecodeStatus DecodeFixedPointScaleImm32(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, + const void *Decoder) { + // scale{5} is asserted as 1 in tblgen. + Imm |= 0x20; + Inst.addOperand(MCOperand::CreateImm(64 - Imm)); + return Success; } -static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val, - uint64_t Address, - const void *Decoder) { - if (Val > 63) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Val)); - return MCDisassembler::Success; +static DecodeStatus DecodeFixedPointScaleImm64(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, + const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(64 - Imm)); + return Success; } -template -static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, - unsigned FullImm, - uint64_t Address, - const void *Decoder) { - unsigned Imm16 = FullImm & 0xffff; - unsigned Shift = FullImm >> 16; +static DecodeStatus DecodePCRelLabel19(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder) { + int64_t ImmVal = Imm; + const AArch64Disassembler *Dis = + static_cast(Decoder); + + // Sign-extend 19-bit immediate. + if (ImmVal & (1 << (19 - 1))) + ImmVal |= ~((1LL << 19) - 1); - if (RegWidth == 32 && Shift > 1) return MCDisassembler::Fail; + if (!Dis->tryAddingSymbolicOperand(Inst, ImmVal << 2, Addr, + Inst.getOpcode() != AArch64::LDRXl, 0, 4)) + Inst.addOperand(MCOperand::CreateImm(ImmVal)); + return Success; +} - Inst.addOperand(MCOperand::CreateImm(Imm16)); - Inst.addOperand(MCOperand::CreateImm(Shift)); - return MCDisassembler::Success; +static DecodeStatus DecodeMemExtend(llvm::MCInst &Inst, unsigned Imm, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm((Imm >> 1) & 1)); + Inst.addOperand(MCOperand::CreateImm(Imm & 1)); + return Success; } -template -static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst, - unsigned Bits, +static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm, uint64_t Address, const void *Decoder) { - uint64_t Imm; - if (!A64Imms::isLogicalImmBits(RegWidth, Bits, Imm)) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(Bits)); - return MCDisassembler::Success; -} + const AArch64Disassembler *Dis = + static_cast(Decoder); + const MCSubtargetInfo &STI = Dis->getSubtargetInfo(); + Imm |= 0x8000; + Inst.addOperand(MCOperand::CreateImm(Imm)); -static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst, - unsigned ShiftAmount, - uint64_t Address, - const void *Decoder) { - // Only values 0-4 are valid for this 3-bit field - if (ShiftAmount > 4) - return MCDisassembler::Fail; + bool ValidNamed; + (void)AArch64SysReg::MRSMapper(STI.getFeatureBits()) + .toString(Imm, ValidNamed); - Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); - return MCDisassembler::Success; + return ValidNamed ? Success : Fail; } -static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst, - unsigned ShiftAmount, +static DecodeStatus DecodeMSRSystemRegister(llvm::MCInst &Inst, unsigned Imm, uint64_t Address, const void *Decoder) { - // Only values below 32 are valid for a 32-bit register - if (ShiftAmount > 31) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); - return MCDisassembler::Success; -} - -static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned ImmS = fieldFromInstruction(Insn, 10, 6); - unsigned ImmR = fieldFromInstruction(Insn, 16, 6); - unsigned SF = fieldFromInstruction(Insn, 31, 1); - - // Undef for 0b11 just in case it occurs. Don't want the compiler to optimise - // out assertions that it thinks should never be hit. - enum OpcTypes { SBFM = 0, BFM, UBFM, Undef } Opc; - Opc = (OpcTypes)fieldFromInstruction(Insn, 29, 2); - - if (!SF) { - // ImmR and ImmS must be between 0 and 31 for 32-bit instructions. - if (ImmR > 31 || ImmS > 31) - return MCDisassembler::Fail; - } + const AArch64Disassembler *Dis = + static_cast(Decoder); + const MCSubtargetInfo &STI = Dis->getSubtargetInfo(); - if (SF) { - DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); - // BFM MCInsts use Rd as a source too. - if (Opc == BFM) DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); - DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder); - } else { - DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder); - // BFM MCInsts use Rd as a source too. - if (Opc == BFM) DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder); - DecodeGPR32RegisterClass(Inst, Rn, Address, Decoder); - } - - // ASR and LSR have more specific patterns so they won't get here: - assert(!(ImmS == 31 && !SF && Opc != BFM) - && "shift should have used auto decode"); - assert(!(ImmS == 63 && SF && Opc != BFM) - && "shift should have used auto decode"); - - // Extension instructions similarly: - if (Opc == SBFM && ImmR == 0) { - assert((ImmS != 7 && ImmS != 15) && "extension got here"); - assert((ImmS != 31 || SF == 0) && "extension got here"); - } else if (Opc == UBFM && ImmR == 0) { - assert((SF != 0 || (ImmS != 7 && ImmS != 15)) && "extension got here"); - } + Imm |= 0x8000; + Inst.addOperand(MCOperand::CreateImm(Imm)); - if (Opc == UBFM) { - // It might be a LSL instruction, which actually takes the shift amount - // itself as an MCInst operand. - if (SF && (ImmS + 1) % 64 == ImmR) { - Inst.setOpcode(AArch64::LSLxxi); - Inst.addOperand(MCOperand::CreateImm(63 - ImmS)); - return MCDisassembler::Success; - } else if (!SF && (ImmS + 1) % 32 == ImmR) { - Inst.setOpcode(AArch64::LSLwwi); - Inst.addOperand(MCOperand::CreateImm(31 - ImmS)); - return MCDisassembler::Success; - } - } - - // Otherwise it's definitely either an extract or an insert depending on which - // of ImmR or ImmS is larger. - unsigned ExtractOp, InsertOp; - switch (Opc) { - default: llvm_unreachable("unexpected instruction trying to decode bitfield"); - case SBFM: - ExtractOp = SF ? AArch64::SBFXxxii : AArch64::SBFXwwii; - InsertOp = SF ? AArch64::SBFIZxxii : AArch64::SBFIZwwii; - break; - case BFM: - ExtractOp = SF ? AArch64::BFXILxxii : AArch64::BFXILwwii; - InsertOp = SF ? AArch64::BFIxxii : AArch64::BFIwwii; - break; - case UBFM: - ExtractOp = SF ? AArch64::UBFXxxii : AArch64::UBFXwwii; - InsertOp = SF ? AArch64::UBFIZxxii : AArch64::UBFIZwwii; - break; - } - - // Otherwise it's a boring insert or extract - Inst.addOperand(MCOperand::CreateImm(ImmR)); - Inst.addOperand(MCOperand::CreateImm(ImmS)); - - - if (ImmS < ImmR) - Inst.setOpcode(InsertOp); - else - Inst.setOpcode(ExtractOp); + bool ValidNamed; + (void)AArch64SysReg::MSRMapper(STI.getFeatureBits()) + .toString(Imm, ValidNamed); - return MCDisassembler::Success; + return ValidNamed ? Success : Fail; } static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, @@ -763,811 +681,879 @@ static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, // Add the lane Inst.addOperand(MCOperand::CreateImm(1)); - return MCDisassembler::Success; + return Success; } +static DecodeStatus DecodeVecShiftRImm(llvm::MCInst &Inst, unsigned Imm, + unsigned Add) { + Inst.addOperand(MCOperand::CreateImm(Add - Imm)); + return Success; +} -static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst, - unsigned Insn, - uint64_t Address, - const void *Decoder) { - DecodeStatus Result = MCDisassembler::Success; - unsigned Rt = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned Rt2 = fieldFromInstruction(Insn, 10, 5); - unsigned SImm7 = fieldFromInstruction(Insn, 15, 7); - unsigned L = fieldFromInstruction(Insn, 22, 1); - unsigned V = fieldFromInstruction(Insn, 26, 1); - unsigned Opc = fieldFromInstruction(Insn, 30, 2); - - // Not an official name, but it turns out that bit 23 distinguishes indexed - // from non-indexed operations. - unsigned Indexed = fieldFromInstruction(Insn, 23, 1); - - if (Indexed && L == 0) { - // The MCInst for an indexed store has an out operand and 4 ins: - // Rn_wb, Rt, Rt2, Rn, Imm - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - } - - // You shouldn't load to the same register twice in an instruction... - if (L && Rt == Rt2) - Result = MCDisassembler::SoftFail; - - // ... or do any operation that writes-back to a transfer register. But note - // that "stp xzr, xzr, [sp], #4" is fine because xzr and sp are different. - if (Indexed && V == 0 && Rn != 31 && (Rt == Rn || Rt2 == Rn)) - Result = MCDisassembler::SoftFail; - - // Exactly how we decode the MCInst's registers depends on the Opc and V - // fields of the instruction. These also obviously determine the size of the - // operation so we can fill in that information while we're at it. - if (V) { - // The instruction operates on the FP/SIMD registers - switch (Opc) { - default: return MCDisassembler::Fail; - case 0: - DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder); - DecodeFPR32RegisterClass(Inst, Rt2, Address, Decoder); - break; - case 1: - DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); - DecodeFPR64RegisterClass(Inst, Rt2, Address, Decoder); - break; - case 2: - DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); - DecodeFPR128RegisterClass(Inst, Rt2, Address, Decoder); - break; - } - } else { - switch (Opc) { - default: return MCDisassembler::Fail; - case 0: - DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder); - DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder); - break; - case 1: - assert(L && "unexpected \"store signed\" attempt"); - DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); - DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder); - break; - case 2: - DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); - DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder); - break; - } - } - - if (Indexed && L == 1) { - // The MCInst for an indexed load has 3 out operands and an 3 ins: - // Rt, Rt2, Rn_wb, Rt2, Rn, Imm - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - } - - - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - Inst.addOperand(MCOperand::CreateImm(SImm7)); +static DecodeStatus DecodeVecShiftLImm(llvm::MCInst &Inst, unsigned Imm, + unsigned Add) { + Inst.addOperand(MCOperand::CreateImm((Imm + Add) & (Add - 1))); + return Success; +} - return Result; +static DecodeStatus DecodeVecShiftR64Imm(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder) { + return DecodeVecShiftRImm(Inst, Imm, 64); } -static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst, - uint32_t Val, - uint64_t Address, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(Val, 0, 5); - unsigned Rn = fieldFromInstruction(Val, 5, 5); - unsigned Rt2 = fieldFromInstruction(Val, 10, 5); - unsigned MemSize = fieldFromInstruction(Val, 30, 2); - - DecodeStatus S = MCDisassembler::Success; - if (Rt == Rt2) S = MCDisassembler::SoftFail; - - switch (MemSize) { - case 2: - if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder))) - return MCDisassembler::Fail; - break; - case 3: - if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder))) - return MCDisassembler::Fail; - if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder))) - return MCDisassembler::Fail; - break; - default: - llvm_unreachable("Invalid MemSize in DecodeLoadPairExclusiveInstruction"); - } +static DecodeStatus DecodeVecShiftR64ImmNarrow(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, + const void *Decoder) { + return DecodeVecShiftRImm(Inst, Imm | 0x20, 64); +} - if (!Check(S, DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder))) - return MCDisassembler::Fail; +static DecodeStatus DecodeVecShiftR32Imm(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder) { + return DecodeVecShiftRImm(Inst, Imm, 32); +} - return S; +static DecodeStatus DecodeVecShiftR32ImmNarrow(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, + const void *Decoder) { + return DecodeVecShiftRImm(Inst, Imm | 0x10, 32); } -template -static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { - SomeNamedImmMapper Mapper; - bool ValidNamed; - Mapper.toString(Val, ValidNamed); - if (ValidNamed || Mapper.validImm(Val)) { - Inst.addOperand(MCOperand::CreateImm(Val)); - return MCDisassembler::Success; - } +static DecodeStatus DecodeVecShiftR16Imm(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder) { + return DecodeVecShiftRImm(Inst, Imm, 16); +} - return MCDisassembler::Fail; +static DecodeStatus DecodeVecShiftR16ImmNarrow(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, + const void *Decoder) { + return DecodeVecShiftRImm(Inst, Imm | 0x8, 16); } -static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &Mapper, - llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { - bool ValidNamed; - Mapper.toString(Val, ValidNamed); +static DecodeStatus DecodeVecShiftR8Imm(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder) { + return DecodeVecShiftRImm(Inst, Imm, 8); +} - Inst.addOperand(MCOperand::CreateImm(Val)); +static DecodeStatus DecodeVecShiftL64Imm(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder) { + return DecodeVecShiftLImm(Inst, Imm, 64); +} - return ValidNamed ? MCDisassembler::Success : MCDisassembler::Fail; +static DecodeStatus DecodeVecShiftL32Imm(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder) { + return DecodeVecShiftLImm(Inst, Imm, 32); } -static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { - return DecodeSysRegOperand(A64SysReg::MRSMapper(), Inst, Val, Address, - Decoder); +static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder) { + return DecodeVecShiftLImm(Inst, Imm, 16); } -static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst, - unsigned Val, - uint64_t Address, - const void *Decoder) { - return DecodeSysRegOperand(A64SysReg::MSRMapper(), Inst, Val, Address, - Decoder); +static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm, + uint64_t Addr, const void *Decoder) { + return DecodeVecShiftLImm(Inst, Imm, 8); } -static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst, - unsigned Insn, - uint64_t Address, +static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst, + uint32_t insn, uint64_t Addr, const void *Decoder) { - unsigned Rt = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned Imm9 = fieldFromInstruction(Insn, 12, 9); - - unsigned Opc = fieldFromInstruction(Insn, 22, 2); - unsigned V = fieldFromInstruction(Insn, 26, 1); - unsigned Size = fieldFromInstruction(Insn, 30, 2); - - if (Opc == 0 || (V == 1 && Opc == 2)) { - // It's a store, the MCInst gets: Rn_wb, Rt, Rn, Imm - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + unsigned Rd = fieldFromInstruction(insn, 0, 5); + unsigned Rn = fieldFromInstruction(insn, 5, 5); + unsigned Rm = fieldFromInstruction(insn, 16, 5); + unsigned shiftHi = fieldFromInstruction(insn, 22, 2); + unsigned shiftLo = fieldFromInstruction(insn, 10, 6); + unsigned shift = (shiftHi << 6) | shiftLo; + switch (Inst.getOpcode()) { + default: + return Fail; + case AArch64::ADDWrs: + case AArch64::ADDSWrs: + case AArch64::SUBWrs: + case AArch64::SUBSWrs: + // if shift == '11' then ReservedValue() + if (shiftHi == 0x3) + return Fail; + // Deliberate fallthrough + case AArch64::ANDWrs: + case AArch64::ANDSWrs: + case AArch64::BICWrs: + case AArch64::BICSWrs: + case AArch64::ORRWrs: + case AArch64::ORNWrs: + case AArch64::EORWrs: + case AArch64::EONWrs: { + // if sf == '0' and imm6<5> == '1' then ReservedValue() + if (shiftLo >> 5 == 1) + return Fail; + DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); + DecodeGPR32RegisterClass(Inst, Rn, Addr, Decoder); + DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); + break; } - - if (V == 0 && (Opc == 2 || Size == 3)) { - DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder); - } else if (V == 0) { - DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder); - } else if (V == 1 && (Opc & 2)) { - DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); - } else { - switch (Size) { - case 0: - DecodeFPR8RegisterClass(Inst, Rt, Address, Decoder); - break; - case 1: - DecodeFPR16RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder); - break; - case 3: - DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); - break; - } + case AArch64::ADDXrs: + case AArch64::ADDSXrs: + case AArch64::SUBXrs: + case AArch64::SUBSXrs: + // if shift == '11' then ReservedValue() + if (shiftHi == 0x3) + return Fail; + // Deliberate fallthrough + case AArch64::ANDXrs: + case AArch64::ANDSXrs: + case AArch64::BICXrs: + case AArch64::BICSXrs: + case AArch64::ORRXrs: + case AArch64::ORNXrs: + case AArch64::EORXrs: + case AArch64::EONXrs: + DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); + DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder); + DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); + break; } - if (Opc != 0 && (V != 1 || Opc != 2)) { - // It's a load, the MCInst gets: Rt, Rn_wb, Rn, Imm - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - } + Inst.addOperand(MCOperand::CreateImm(shift)); + return Success; +} - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); +static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn, + uint64_t Addr, + const void *Decoder) { + unsigned Rd = fieldFromInstruction(insn, 0, 5); + unsigned imm = fieldFromInstruction(insn, 5, 16); + unsigned shift = fieldFromInstruction(insn, 21, 2); + shift <<= 4; + switch (Inst.getOpcode()) { + default: + return Fail; + case AArch64::MOVZWi: + case AArch64::MOVNWi: + case AArch64::MOVKWi: + if (shift & (1U << 5)) + return Fail; + DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); + break; + case AArch64::MOVZXi: + case AArch64::MOVNXi: + case AArch64::MOVKXi: + DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); + break; + } - Inst.addOperand(MCOperand::CreateImm(Imm9)); + if (Inst.getOpcode() == AArch64::MOVKWi || + Inst.getOpcode() == AArch64::MOVKXi) + Inst.addOperand(Inst.getOperand(0)); - // N.b. The official documentation says undpredictable if Rt == Rn, but this - // takes place at the architectural rather than encoding level: - // - // "STR xzr, [sp], #4" is perfectly valid. - if (V == 0 && Rt == Rn && Rn != 31) - return MCDisassembler::SoftFail; - else - return MCDisassembler::Success; + Inst.addOperand(MCOperand::CreateImm(imm)); + Inst.addOperand(MCOperand::CreateImm(shift)); + return Success; } -static MCDisassembler *createAArch64Disassembler(const Target &T, - const MCSubtargetInfo &STI) { - return new AArch64Disassembler(STI, T.createMCRegInfo("")); -} +static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst, + uint32_t insn, uint64_t Addr, + const void *Decoder) { + unsigned Rt = fieldFromInstruction(insn, 0, 5); + unsigned Rn = fieldFromInstruction(insn, 5, 5); + unsigned offset = fieldFromInstruction(insn, 10, 12); + const AArch64Disassembler *Dis = + static_cast(Decoder); -extern "C" void LLVMInitializeAArch64Disassembler() { - TargetRegistry::RegisterMCDisassembler(TheAArch64leTarget, - createAArch64Disassembler); - TargetRegistry::RegisterMCDisassembler(TheAArch64beTarget, - createAArch64Disassembler); -} + switch (Inst.getOpcode()) { + default: + return Fail; + case AArch64::PRFMui: + // Rt is an immediate in prefetch. + Inst.addOperand(MCOperand::CreateImm(Rt)); + break; + case AArch64::STRBBui: + case AArch64::LDRBBui: + case AArch64::LDRSBWui: + case AArch64::STRHHui: + case AArch64::LDRHHui: + case AArch64::LDRSHWui: + case AArch64::STRWui: + case AArch64::LDRWui: + DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); + break; + case AArch64::LDRSBXui: + case AArch64::LDRSHXui: + case AArch64::LDRSWui: + case AArch64::STRXui: + case AArch64::LDRXui: + DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); + break; + case AArch64::LDRQui: + case AArch64::STRQui: + DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); + break; + case AArch64::LDRDui: + case AArch64::STRDui: + DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder); + break; + case AArch64::LDRSui: + case AArch64::STRSui: + DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder); + break; + case AArch64::LDRHui: + case AArch64::STRHui: + DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder); + break; + case AArch64::LDRBui: + case AArch64::STRBui: + DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder); + break; + } -template -static DecodeStatus -DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount, - uint64_t Address, const void *Decoder) { - bool IsLSL = false; - if (Ext == A64SE::LSL) - IsLSL = true; - else if (Ext != A64SE::MSL) - return MCDisassembler::Fail; - - // MSL and LSLH accepts encoded shift amount 0 or 1. - if ((!IsLSL || (IsLSL && IsHalf)) && ShiftAmount != 0 && ShiftAmount != 1) - return MCDisassembler::Fail; - - // LSL accepts encoded shift amount 0, 1, 2 or 3. - if (IsLSL && ShiftAmount > 3) - return MCDisassembler::Fail; - - Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); - return MCDisassembler::Success; + DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); + if (!Dis->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 4)) + Inst.addOperand(MCOperand::CreateImm(offset)); + return Success; } -// Decode post-index vector load/store instructions. -// This is necessary as we need to decode Rm: if Rm == 0b11111, the last -// operand is an immediate equal the the length of vector list in bytes, -// or Rm is decoded to a GPR64noxzr register. -static DecodeStatus DecodeVLDSTPostInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned Rm = fieldFromInstruction(Insn, 16, 5); - unsigned Opcode = fieldFromInstruction(Insn, 12, 4); - unsigned IsLoad = fieldFromInstruction(Insn, 22, 1); - // 0 for 64bit vector list, 1 for 128bit vector list - unsigned Is128BitVec = fieldFromInstruction(Insn, 30, 1); +static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst, + uint32_t insn, uint64_t Addr, + const void *Decoder) { + unsigned Rt = fieldFromInstruction(insn, 0, 5); + unsigned Rn = fieldFromInstruction(insn, 5, 5); + int64_t offset = fieldFromInstruction(insn, 12, 9); - unsigned NumVecs; - switch (Opcode) { - case 0: // ld4/st4 - case 2: // ld1/st1 with 4 vectors - NumVecs = 4; break; - case 4: // ld3/st3 - case 6: // ld1/st1 with 3 vectors - NumVecs = 3; break; - case 7: // ld1/st1 with 1 vector - NumVecs = 1; break; - case 8: // ld2/st2 - case 10: // ld1/st1 with 2 vectors - NumVecs = 2; break; + // offset is a 9-bit signed immediate, so sign extend it to + // fill the unsigned. + if (offset & (1 << (9 - 1))) + offset |= ~((1LL << 9) - 1); + + // First operand is always the writeback to the address register, if needed. + switch (Inst.getOpcode()) { default: - llvm_unreachable("Invalid opcode for post-index load/store instructions"); + break; + case AArch64::LDRSBWpre: + case AArch64::LDRSHWpre: + case AArch64::STRBBpre: + case AArch64::LDRBBpre: + case AArch64::STRHHpre: + case AArch64::LDRHHpre: + case AArch64::STRWpre: + case AArch64::LDRWpre: + case AArch64::LDRSBWpost: + case AArch64::LDRSHWpost: + case AArch64::STRBBpost: + case AArch64::LDRBBpost: + case AArch64::STRHHpost: + case AArch64::LDRHHpost: + case AArch64::STRWpost: + case AArch64::LDRWpost: + case AArch64::LDRSBXpre: + case AArch64::LDRSHXpre: + case AArch64::STRXpre: + case AArch64::LDRSWpre: + case AArch64::LDRXpre: + case AArch64::LDRSBXpost: + case AArch64::LDRSHXpost: + case AArch64::STRXpost: + case AArch64::LDRSWpost: + case AArch64::LDRXpost: + case AArch64::LDRQpre: + case AArch64::STRQpre: + case AArch64::LDRQpost: + case AArch64::STRQpost: + case AArch64::LDRDpre: + case AArch64::STRDpre: + case AArch64::LDRDpost: + case AArch64::STRDpost: + case AArch64::LDRSpre: + case AArch64::STRSpre: + case AArch64::LDRSpost: + case AArch64::STRSpost: + case AArch64::LDRHpre: + case AArch64::STRHpre: + case AArch64::LDRHpost: + case AArch64::STRHpost: + case AArch64::LDRBpre: + case AArch64::STRBpre: + case AArch64::LDRBpost: + case AArch64::STRBpost: + DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); + break; } - // Decode vector list of 1/2/3/4 vectors for load instructions. - if (IsLoad) { - switch (NumVecs) { - case 1: - Is128BitVec ? DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder) - : DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - Is128BitVec ? DecodeQPairRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDPairRegisterClass(Inst, Rt, Address, Decoder); - break; - case 3: - Is128BitVec ? DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder); - break; - case 4: - Is128BitVec ? DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder); - break; - } + switch (Inst.getOpcode()) { + default: + return Fail; + case AArch64::PRFUMi: + // Rt is an immediate in prefetch. + Inst.addOperand(MCOperand::CreateImm(Rt)); + break; + case AArch64::STURBBi: + case AArch64::LDURBBi: + case AArch64::LDURSBWi: + case AArch64::STURHHi: + case AArch64::LDURHHi: + case AArch64::LDURSHWi: + case AArch64::STURWi: + case AArch64::LDURWi: + case AArch64::LDTRSBWi: + case AArch64::LDTRSHWi: + case AArch64::STTRWi: + case AArch64::LDTRWi: + case AArch64::STTRHi: + case AArch64::LDTRHi: + case AArch64::LDTRBi: + case AArch64::STTRBi: + case AArch64::LDRSBWpre: + case AArch64::LDRSHWpre: + case AArch64::STRBBpre: + case AArch64::LDRBBpre: + case AArch64::STRHHpre: + case AArch64::LDRHHpre: + case AArch64::STRWpre: + case AArch64::LDRWpre: + case AArch64::LDRSBWpost: + case AArch64::LDRSHWpost: + case AArch64::STRBBpost: + case AArch64::LDRBBpost: + case AArch64::STRHHpost: + case AArch64::LDRHHpost: + case AArch64::STRWpost: + case AArch64::LDRWpost: + DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); + break; + case AArch64::LDURSBXi: + case AArch64::LDURSHXi: + case AArch64::LDURSWi: + case AArch64::STURXi: + case AArch64::LDURXi: + case AArch64::LDTRSBXi: + case AArch64::LDTRSHXi: + case AArch64::LDTRSWi: + case AArch64::STTRXi: + case AArch64::LDTRXi: + case AArch64::LDRSBXpre: + case AArch64::LDRSHXpre: + case AArch64::STRXpre: + case AArch64::LDRSWpre: + case AArch64::LDRXpre: + case AArch64::LDRSBXpost: + case AArch64::LDRSHXpost: + case AArch64::STRXpost: + case AArch64::LDRSWpost: + case AArch64::LDRXpost: + DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); + break; + case AArch64::LDURQi: + case AArch64::STURQi: + case AArch64::LDRQpre: + case AArch64::STRQpre: + case AArch64::LDRQpost: + case AArch64::STRQpost: + DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); + break; + case AArch64::LDURDi: + case AArch64::STURDi: + case AArch64::LDRDpre: + case AArch64::STRDpre: + case AArch64::LDRDpost: + case AArch64::STRDpost: + DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder); + break; + case AArch64::LDURSi: + case AArch64::STURSi: + case AArch64::LDRSpre: + case AArch64::STRSpre: + case AArch64::LDRSpost: + case AArch64::STRSpost: + DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder); + break; + case AArch64::LDURHi: + case AArch64::STURHi: + case AArch64::LDRHpre: + case AArch64::STRHpre: + case AArch64::LDRHpost: + case AArch64::STRHpost: + DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder); + break; + case AArch64::LDURBi: + case AArch64::STURBi: + case AArch64::LDRBpre: + case AArch64::STRBpre: + case AArch64::LDRBpost: + case AArch64::STRBpost: + DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder); + break; } - // Decode write back register, which is equal to Rn. - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - - if (Rm == 31) // If Rm is 0x11111, add the vector list length in byte - Inst.addOperand(MCOperand::CreateImm(NumVecs * (Is128BitVec ? 16 : 8))); - else // Decode Rm - DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder); - - // Decode vector list of 1/2/3/4 vectors for load instructions. - if (!IsLoad) { - switch (NumVecs) { - case 1: - Is128BitVec ? DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder) - : DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - Is128BitVec ? DecodeQPairRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDPairRegisterClass(Inst, Rt, Address, Decoder); - break; - case 3: - Is128BitVec ? DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder); - break; - case 4: - Is128BitVec ? DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder) - : DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder); - break; - } - } + DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); + Inst.addOperand(MCOperand::CreateImm(offset)); - return MCDisassembler::Success; + bool IsLoad = fieldFromInstruction(insn, 22, 1); + bool IsIndexed = fieldFromInstruction(insn, 10, 2) != 0; + bool IsFP = fieldFromInstruction(insn, 26, 1); + + // Cannot write back to a transfer register (but xzr != sp). + if (IsLoad && IsIndexed && !IsFP && Rn != 31 && Rt == Rn) + return SoftFail; + + return Success; } -// Decode post-index vector load/store lane instructions. -// This is necessary as we need to decode Rm: if Rm == 0b11111, the last -// operand is an immediate equal the the length of the changed bytes, -// or Rm is decoded to a GPR64noxzr register. -static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, +static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst, + uint32_t insn, uint64_t Addr, const void *Decoder) { - bool Is64bitVec = false; - bool IsLoadDup = false; - bool IsLoad = false; - // The total number of bytes transferred. - // TransferBytes = NumVecs * OneLaneBytes - unsigned TransferBytes = 0; - unsigned NumVecs = 0; - unsigned Opc = Inst.getOpcode(); - switch (Opc) { - case AArch64::LD1R_WB_8B_fixed: case AArch64::LD1R_WB_8B_register: - case AArch64::LD1R_WB_4H_fixed: case AArch64::LD1R_WB_4H_register: - case AArch64::LD1R_WB_2S_fixed: case AArch64::LD1R_WB_2S_register: - case AArch64::LD1R_WB_1D_fixed: case AArch64::LD1R_WB_1D_register: { - switch (Opc) { - case AArch64::LD1R_WB_8B_fixed: case AArch64::LD1R_WB_8B_register: - TransferBytes = 1; break; - case AArch64::LD1R_WB_4H_fixed: case AArch64::LD1R_WB_4H_register: - TransferBytes = 2; break; - case AArch64::LD1R_WB_2S_fixed: case AArch64::LD1R_WB_2S_register: - TransferBytes = 4; break; - case AArch64::LD1R_WB_1D_fixed: case AArch64::LD1R_WB_1D_register: - TransferBytes = 8; break; - } - Is64bitVec = true; - IsLoadDup = true; - NumVecs = 1; - break; - } + unsigned Rt = fieldFromInstruction(insn, 0, 5); + unsigned Rn = fieldFromInstruction(insn, 5, 5); + unsigned Rt2 = fieldFromInstruction(insn, 10, 5); + unsigned Rs = fieldFromInstruction(insn, 16, 5); - case AArch64::LD1R_WB_16B_fixed: case AArch64::LD1R_WB_16B_register: - case AArch64::LD1R_WB_8H_fixed: case AArch64::LD1R_WB_8H_register: - case AArch64::LD1R_WB_4S_fixed: case AArch64::LD1R_WB_4S_register: - case AArch64::LD1R_WB_2D_fixed: case AArch64::LD1R_WB_2D_register: { - switch (Opc) { - case AArch64::LD1R_WB_16B_fixed: case AArch64::LD1R_WB_16B_register: - TransferBytes = 1; break; - case AArch64::LD1R_WB_8H_fixed: case AArch64::LD1R_WB_8H_register: - TransferBytes = 2; break; - case AArch64::LD1R_WB_4S_fixed: case AArch64::LD1R_WB_4S_register: - TransferBytes = 4; break; - case AArch64::LD1R_WB_2D_fixed: case AArch64::LD1R_WB_2D_register: - TransferBytes = 8; break; - } - IsLoadDup = true; - NumVecs = 1; + unsigned Opcode = Inst.getOpcode(); + switch (Opcode) { + default: + return Fail; + case AArch64::STLXRW: + case AArch64::STLXRB: + case AArch64::STLXRH: + case AArch64::STXRW: + case AArch64::STXRB: + case AArch64::STXRH: + DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder); + // FALLTHROUGH + case AArch64::LDARW: + case AArch64::LDARB: + case AArch64::LDARH: + case AArch64::LDAXRW: + case AArch64::LDAXRB: + case AArch64::LDAXRH: + case AArch64::LDXRW: + case AArch64::LDXRB: + case AArch64::LDXRH: + case AArch64::STLRW: + case AArch64::STLRB: + case AArch64::STLRH: + DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); break; - } - - case AArch64::LD2R_WB_8B_fixed: case AArch64::LD2R_WB_8B_register: - case AArch64::LD2R_WB_4H_fixed: case AArch64::LD2R_WB_4H_register: - case AArch64::LD2R_WB_2S_fixed: case AArch64::LD2R_WB_2S_register: - case AArch64::LD2R_WB_1D_fixed: case AArch64::LD2R_WB_1D_register: { - switch (Opc) { - case AArch64::LD2R_WB_8B_fixed: case AArch64::LD2R_WB_8B_register: - TransferBytes = 2; break; - case AArch64::LD2R_WB_4H_fixed: case AArch64::LD2R_WB_4H_register: - TransferBytes = 4; break; - case AArch64::LD2R_WB_2S_fixed: case AArch64::LD2R_WB_2S_register: - TransferBytes = 8; break; - case AArch64::LD2R_WB_1D_fixed: case AArch64::LD2R_WB_1D_register: - TransferBytes = 16; break; - } - Is64bitVec = true; - IsLoadDup = true; - NumVecs = 2; + case AArch64::STLXRX: + case AArch64::STXRX: + DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder); + // FALLTHROUGH + case AArch64::LDARX: + case AArch64::LDAXRX: + case AArch64::LDXRX: + case AArch64::STLRX: + DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); break; - } - - case AArch64::LD2R_WB_16B_fixed: case AArch64::LD2R_WB_16B_register: - case AArch64::LD2R_WB_8H_fixed: case AArch64::LD2R_WB_8H_register: - case AArch64::LD2R_WB_4S_fixed: case AArch64::LD2R_WB_4S_register: - case AArch64::LD2R_WB_2D_fixed: case AArch64::LD2R_WB_2D_register: { - switch (Opc) { - case AArch64::LD2R_WB_16B_fixed: case AArch64::LD2R_WB_16B_register: - TransferBytes = 2; break; - case AArch64::LD2R_WB_8H_fixed: case AArch64::LD2R_WB_8H_register: - TransferBytes = 4; break; - case AArch64::LD2R_WB_4S_fixed: case AArch64::LD2R_WB_4S_register: - TransferBytes = 8; break; - case AArch64::LD2R_WB_2D_fixed: case AArch64::LD2R_WB_2D_register: - TransferBytes = 16; break; - } - IsLoadDup = true; - NumVecs = 2; + case AArch64::STLXPW: + case AArch64::STXPW: + DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder); + // FALLTHROUGH + case AArch64::LDAXPW: + case AArch64::LDXPW: + DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); + DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder); break; - } - - case AArch64::LD3R_WB_8B_fixed: case AArch64::LD3R_WB_8B_register: - case AArch64::LD3R_WB_4H_fixed: case AArch64::LD3R_WB_4H_register: - case AArch64::LD3R_WB_2S_fixed: case AArch64::LD3R_WB_2S_register: - case AArch64::LD3R_WB_1D_fixed: case AArch64::LD3R_WB_1D_register: { - switch (Opc) { - case AArch64::LD3R_WB_8B_fixed: case AArch64::LD3R_WB_8B_register: - TransferBytes = 3; break; - case AArch64::LD3R_WB_4H_fixed: case AArch64::LD3R_WB_4H_register: - TransferBytes = 6; break; - case AArch64::LD3R_WB_2S_fixed: case AArch64::LD3R_WB_2S_register: - TransferBytes = 12; break; - case AArch64::LD3R_WB_1D_fixed: case AArch64::LD3R_WB_1D_register: - TransferBytes = 24; break; - } - Is64bitVec = true; - IsLoadDup = true; - NumVecs = 3; + case AArch64::STLXPX: + case AArch64::STXPX: + DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder); + // FALLTHROUGH + case AArch64::LDAXPX: + case AArch64::LDXPX: + DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); + DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder); break; } - case AArch64::LD3R_WB_16B_fixed: case AArch64::LD3R_WB_16B_register: - case AArch64::LD3R_WB_4S_fixed: case AArch64::LD3R_WB_8H_register: - case AArch64::LD3R_WB_8H_fixed: case AArch64::LD3R_WB_4S_register: - case AArch64::LD3R_WB_2D_fixed: case AArch64::LD3R_WB_2D_register: { - switch (Opc) { - case AArch64::LD3R_WB_16B_fixed: case AArch64::LD3R_WB_16B_register: - TransferBytes = 3; break; - case AArch64::LD3R_WB_8H_fixed: case AArch64::LD3R_WB_8H_register: - TransferBytes = 6; break; - case AArch64::LD3R_WB_4S_fixed: case AArch64::LD3R_WB_4S_register: - TransferBytes = 12; break; - case AArch64::LD3R_WB_2D_fixed: case AArch64::LD3R_WB_2D_register: - TransferBytes = 24; break; - } - IsLoadDup = true; - NumVecs = 3; - break; - } + DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - case AArch64::LD4R_WB_8B_fixed: case AArch64::LD4R_WB_8B_register: - case AArch64::LD4R_WB_4H_fixed: case AArch64::LD4R_WB_4H_register: - case AArch64::LD4R_WB_2S_fixed: case AArch64::LD4R_WB_2S_register: - case AArch64::LD4R_WB_1D_fixed: case AArch64::LD4R_WB_1D_register: { - switch (Opc) { - case AArch64::LD4R_WB_8B_fixed: case AArch64::LD4R_WB_8B_register: - TransferBytes = 4; break; - case AArch64::LD4R_WB_4H_fixed: case AArch64::LD4R_WB_4H_register: - TransferBytes = 8; break; - case AArch64::LD4R_WB_2S_fixed: case AArch64::LD4R_WB_2S_register: - TransferBytes = 16; break; - case AArch64::LD4R_WB_1D_fixed: case AArch64::LD4R_WB_1D_register: - TransferBytes = 32; break; - } - Is64bitVec = true; - IsLoadDup = true; - NumVecs = 4; - break; - } + // You shouldn't load to the same register twice in an instruction... + if ((Opcode == AArch64::LDAXPW || Opcode == AArch64::LDXPW || + Opcode == AArch64::LDAXPX || Opcode == AArch64::LDXPX) && + Rt == Rt2) + return SoftFail; - case AArch64::LD4R_WB_16B_fixed: case AArch64::LD4R_WB_16B_register: - case AArch64::LD4R_WB_4S_fixed: case AArch64::LD4R_WB_8H_register: - case AArch64::LD4R_WB_8H_fixed: case AArch64::LD4R_WB_4S_register: - case AArch64::LD4R_WB_2D_fixed: case AArch64::LD4R_WB_2D_register: { - switch (Opc) { - case AArch64::LD4R_WB_16B_fixed: case AArch64::LD4R_WB_16B_register: - TransferBytes = 4; break; - case AArch64::LD4R_WB_8H_fixed: case AArch64::LD4R_WB_8H_register: - TransferBytes = 8; break; - case AArch64::LD4R_WB_4S_fixed: case AArch64::LD4R_WB_4S_register: - TransferBytes = 16; break; - case AArch64::LD4R_WB_2D_fixed: case AArch64::LD4R_WB_2D_register: - TransferBytes = 32; break; - } - IsLoadDup = true; - NumVecs = 4; - break; - } + return Success; +} - case AArch64::LD1LN_WB_B_fixed: case AArch64::LD1LN_WB_B_register: - case AArch64::LD1LN_WB_H_fixed: case AArch64::LD1LN_WB_H_register: - case AArch64::LD1LN_WB_S_fixed: case AArch64::LD1LN_WB_S_register: - case AArch64::LD1LN_WB_D_fixed: case AArch64::LD1LN_WB_D_register: { - switch (Opc) { - case AArch64::LD1LN_WB_B_fixed: case AArch64::LD1LN_WB_B_register: - TransferBytes = 1; break; - case AArch64::LD1LN_WB_H_fixed: case AArch64::LD1LN_WB_H_register: - TransferBytes = 2; break; - case AArch64::LD1LN_WB_S_fixed: case AArch64::LD1LN_WB_S_register: - TransferBytes = 4; break; - case AArch64::LD1LN_WB_D_fixed: case AArch64::LD1LN_WB_D_register: - TransferBytes = 8; break; - } - IsLoad = true; - NumVecs = 1; - break; - } +static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn, + uint64_t Addr, + const void *Decoder) { + unsigned Rt = fieldFromInstruction(insn, 0, 5); + unsigned Rn = fieldFromInstruction(insn, 5, 5); + unsigned Rt2 = fieldFromInstruction(insn, 10, 5); + int64_t offset = fieldFromInstruction(insn, 15, 7); + bool IsLoad = fieldFromInstruction(insn, 22, 1); - case AArch64::LD2LN_WB_B_fixed: case AArch64::LD2LN_WB_B_register: - case AArch64::LD2LN_WB_H_fixed: case AArch64::LD2LN_WB_H_register: - case AArch64::LD2LN_WB_S_fixed: case AArch64::LD2LN_WB_S_register: - case AArch64::LD2LN_WB_D_fixed: case AArch64::LD2LN_WB_D_register: { - switch (Opc) { - case AArch64::LD2LN_WB_B_fixed: case AArch64::LD2LN_WB_B_register: - TransferBytes = 2; break; - case AArch64::LD2LN_WB_H_fixed: case AArch64::LD2LN_WB_H_register: - TransferBytes = 4; break; - case AArch64::LD2LN_WB_S_fixed: case AArch64::LD2LN_WB_S_register: - TransferBytes = 8; break; - case AArch64::LD2LN_WB_D_fixed: case AArch64::LD2LN_WB_D_register: - TransferBytes = 16; break; - } - IsLoad = true; - NumVecs = 2; - break; - } + // offset is a 7-bit signed immediate, so sign extend it to + // fill the unsigned. + if (offset & (1 << (7 - 1))) + offset |= ~((1LL << 7) - 1); - case AArch64::LD3LN_WB_B_fixed: case AArch64::LD3LN_WB_B_register: - case AArch64::LD3LN_WB_H_fixed: case AArch64::LD3LN_WB_H_register: - case AArch64::LD3LN_WB_S_fixed: case AArch64::LD3LN_WB_S_register: - case AArch64::LD3LN_WB_D_fixed: case AArch64::LD3LN_WB_D_register: { - switch (Opc) { - case AArch64::LD3LN_WB_B_fixed: case AArch64::LD3LN_WB_B_register: - TransferBytes = 3; break; - case AArch64::LD3LN_WB_H_fixed: case AArch64::LD3LN_WB_H_register: - TransferBytes = 6; break; - case AArch64::LD3LN_WB_S_fixed: case AArch64::LD3LN_WB_S_register: - TransferBytes = 12; break; - case AArch64::LD3LN_WB_D_fixed: case AArch64::LD3LN_WB_D_register: - TransferBytes = 24; break; - } - IsLoad = true; - NumVecs = 3; - break; - } + unsigned Opcode = Inst.getOpcode(); + bool NeedsDisjointWritebackTransfer = false; - case AArch64::LD4LN_WB_B_fixed: case AArch64::LD4LN_WB_B_register: - case AArch64::LD4LN_WB_H_fixed: case AArch64::LD4LN_WB_H_register: - case AArch64::LD4LN_WB_S_fixed: case AArch64::LD4LN_WB_S_register: - case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register: { - switch (Opc) { - case AArch64::LD4LN_WB_B_fixed: case AArch64::LD4LN_WB_B_register: - TransferBytes = 4; break; - case AArch64::LD4LN_WB_H_fixed: case AArch64::LD4LN_WB_H_register: - TransferBytes = 8; break; - case AArch64::LD4LN_WB_S_fixed: case AArch64::LD4LN_WB_S_register: - TransferBytes = 16; break; - case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register: - TransferBytes = 32; break; - } - IsLoad = true; - NumVecs = 4; + // First operand is always writeback of base register. + switch (Opcode) { + default: break; - } - - case AArch64::ST1LN_WB_B_fixed: case AArch64::ST1LN_WB_B_register: - case AArch64::ST1LN_WB_H_fixed: case AArch64::ST1LN_WB_H_register: - case AArch64::ST1LN_WB_S_fixed: case AArch64::ST1LN_WB_S_register: - case AArch64::ST1LN_WB_D_fixed: case AArch64::ST1LN_WB_D_register: { - switch (Opc) { - case AArch64::ST1LN_WB_B_fixed: case AArch64::ST1LN_WB_B_register: - TransferBytes = 1; break; - case AArch64::ST1LN_WB_H_fixed: case AArch64::ST1LN_WB_H_register: - TransferBytes = 2; break; - case AArch64::ST1LN_WB_S_fixed: case AArch64::ST1LN_WB_S_register: - TransferBytes = 4; break; - case AArch64::ST1LN_WB_D_fixed: case AArch64::ST1LN_WB_D_register: - TransferBytes = 8; break; - } - NumVecs = 1; + case AArch64::LDPXpost: + case AArch64::STPXpost: + case AArch64::LDPSWpost: + case AArch64::LDPXpre: + case AArch64::STPXpre: + case AArch64::LDPSWpre: + case AArch64::LDPWpost: + case AArch64::STPWpost: + case AArch64::LDPWpre: + case AArch64::STPWpre: + case AArch64::LDPQpost: + case AArch64::STPQpost: + case AArch64::LDPQpre: + case AArch64::STPQpre: + case AArch64::LDPDpost: + case AArch64::STPDpost: + case AArch64::LDPDpre: + case AArch64::STPDpre: + case AArch64::LDPSpost: + case AArch64::STPSpost: + case AArch64::LDPSpre: + case AArch64::STPSpre: + DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); break; } - case AArch64::ST2LN_WB_B_fixed: case AArch64::ST2LN_WB_B_register: - case AArch64::ST2LN_WB_H_fixed: case AArch64::ST2LN_WB_H_register: - case AArch64::ST2LN_WB_S_fixed: case AArch64::ST2LN_WB_S_register: - case AArch64::ST2LN_WB_D_fixed: case AArch64::ST2LN_WB_D_register: { - switch (Opc) { - case AArch64::ST2LN_WB_B_fixed: case AArch64::ST2LN_WB_B_register: - TransferBytes = 2; break; - case AArch64::ST2LN_WB_H_fixed: case AArch64::ST2LN_WB_H_register: - TransferBytes = 4; break; - case AArch64::ST2LN_WB_S_fixed: case AArch64::ST2LN_WB_S_register: - TransferBytes = 8; break; - case AArch64::ST2LN_WB_D_fixed: case AArch64::ST2LN_WB_D_register: - TransferBytes = 16; break; - } - NumVecs = 2; + switch (Opcode) { + default: + return Fail; + case AArch64::LDPXpost: + case AArch64::STPXpost: + case AArch64::LDPSWpost: + case AArch64::LDPXpre: + case AArch64::STPXpre: + case AArch64::LDPSWpre: + NeedsDisjointWritebackTransfer = true; + // Fallthrough + case AArch64::LDNPXi: + case AArch64::STNPXi: + case AArch64::LDPXi: + case AArch64::STPXi: + case AArch64::LDPSWi: + DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); + DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder); break; - } - - case AArch64::ST3LN_WB_B_fixed: case AArch64::ST3LN_WB_B_register: - case AArch64::ST3LN_WB_H_fixed: case AArch64::ST3LN_WB_H_register: - case AArch64::ST3LN_WB_S_fixed: case AArch64::ST3LN_WB_S_register: - case AArch64::ST3LN_WB_D_fixed: case AArch64::ST3LN_WB_D_register: { - switch (Opc) { - case AArch64::ST3LN_WB_B_fixed: case AArch64::ST3LN_WB_B_register: - TransferBytes = 3; break; - case AArch64::ST3LN_WB_H_fixed: case AArch64::ST3LN_WB_H_register: - TransferBytes = 6; break; - case AArch64::ST3LN_WB_S_fixed: case AArch64::ST3LN_WB_S_register: - TransferBytes = 12; break; - case AArch64::ST3LN_WB_D_fixed: case AArch64::ST3LN_WB_D_register: - TransferBytes = 24; break; - } - NumVecs = 3; + case AArch64::LDPWpost: + case AArch64::STPWpost: + case AArch64::LDPWpre: + case AArch64::STPWpre: + NeedsDisjointWritebackTransfer = true; + // Fallthrough + case AArch64::LDNPWi: + case AArch64::STNPWi: + case AArch64::LDPWi: + case AArch64::STPWi: + DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); + DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder); break; - } - - case AArch64::ST4LN_WB_B_fixed: case AArch64::ST4LN_WB_B_register: - case AArch64::ST4LN_WB_H_fixed: case AArch64::ST4LN_WB_H_register: - case AArch64::ST4LN_WB_S_fixed: case AArch64::ST4LN_WB_S_register: - case AArch64::ST4LN_WB_D_fixed: case AArch64::ST4LN_WB_D_register: { - switch (Opc) { - case AArch64::ST4LN_WB_B_fixed: case AArch64::ST4LN_WB_B_register: - TransferBytes = 4; break; - case AArch64::ST4LN_WB_H_fixed: case AArch64::ST4LN_WB_H_register: - TransferBytes = 8; break; - case AArch64::ST4LN_WB_S_fixed: case AArch64::ST4LN_WB_S_register: - TransferBytes = 16; break; - case AArch64::ST4LN_WB_D_fixed: case AArch64::ST4LN_WB_D_register: - TransferBytes = 32; break; - } - NumVecs = 4; + case AArch64::LDNPQi: + case AArch64::STNPQi: + case AArch64::LDPQpost: + case AArch64::STPQpost: + case AArch64::LDPQi: + case AArch64::STPQi: + case AArch64::LDPQpre: + case AArch64::STPQpre: + DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); + DecodeFPR128RegisterClass(Inst, Rt2, Addr, Decoder); + break; + case AArch64::LDNPDi: + case AArch64::STNPDi: + case AArch64::LDPDpost: + case AArch64::STPDpost: + case AArch64::LDPDi: + case AArch64::STPDi: + case AArch64::LDPDpre: + case AArch64::STPDpre: + DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder); + DecodeFPR64RegisterClass(Inst, Rt2, Addr, Decoder); + break; + case AArch64::LDNPSi: + case AArch64::STNPSi: + case AArch64::LDPSpost: + case AArch64::STPSpost: + case AArch64::LDPSi: + case AArch64::STPSi: + case AArch64::LDPSpre: + case AArch64::STPSpre: + DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder); + DecodeFPR32RegisterClass(Inst, Rt2, Addr, Decoder); break; } - default: - return MCDisassembler::Fail; - } // End of switch (Opc) + DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); + Inst.addOperand(MCOperand::CreateImm(offset)); - unsigned Rt = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned Rm = fieldFromInstruction(Insn, 16, 5); - - // Decode post-index of load duplicate lane - if (IsLoadDup) { - switch (NumVecs) { - case 1: - Is64bitVec ? DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder) - : DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - Is64bitVec ? DecodeDPairRegisterClass(Inst, Rt, Address, Decoder) - : DecodeQPairRegisterClass(Inst, Rt, Address, Decoder); - break; - case 3: - Is64bitVec ? DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder) - : DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder); - break; - case 4: - Is64bitVec ? DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder) - : DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder); - } - - // Decode write back register, which is equal to Rn. - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - - if (Rm == 31) // If Rm is 0x11111, add the number of transferred bytes - Inst.addOperand(MCOperand::CreateImm(TransferBytes)); - else // Decode Rm - DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder); - - return MCDisassembler::Success; - } + // You shouldn't load to the same register twice in an instruction... + if (IsLoad && Rt == Rt2) + return SoftFail; - // Decode post-index of load/store lane - // Loads have a vector list as output. - if (IsLoad) { - switch (NumVecs) { - case 1: - DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - DecodeQPairRegisterClass(Inst, Rt, Address, Decoder); - break; - case 3: - DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder); - break; - case 4: - DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder); - } - } + // ... or do any operation that writes-back to a transfer register. But note + // that "stp xzr, xzr, [sp], #4" is fine because xzr and sp are different. + if (NeedsDisjointWritebackTransfer && Rn != 31 && (Rt == Rn || Rt2 == Rn)) + return SoftFail; + + return Success; +} - // Decode write back register, which is equal to Rn. - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); - DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); +static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst, + uint32_t insn, uint64_t Addr, + const void *Decoder) { + unsigned Rd = fieldFromInstruction(insn, 0, 5); + unsigned Rn = fieldFromInstruction(insn, 5, 5); + unsigned Rm = fieldFromInstruction(insn, 16, 5); + unsigned extend = fieldFromInstruction(insn, 10, 6); - if (Rm == 31) // If Rm is 0x11111, add the number of transferred bytes - Inst.addOperand(MCOperand::CreateImm(TransferBytes)); - else // Decode Rm - DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder); + unsigned shift = extend & 0x7; + if (shift > 4) + return Fail; - // Decode the source vector list. - switch (NumVecs) { - case 1: - DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); - break; - case 2: - DecodeQPairRegisterClass(Inst, Rt, Address, Decoder); + switch (Inst.getOpcode()) { + default: + return Fail; + case AArch64::ADDWrx: + case AArch64::SUBWrx: + DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder); + DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder); + DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); break; - case 3: - DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder); + case AArch64::ADDSWrx: + case AArch64::SUBSWrx: + DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); + DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder); + DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); break; - case 4: - DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder); - } - - // Decode lane - unsigned Q = fieldFromInstruction(Insn, 30, 1); - unsigned S = fieldFromInstruction(Insn, 10, 3); - unsigned lane = 0; - // Calculate the number of lanes by number of vectors and transferred bytes. - // NumLanes = 16 bytes / bytes of each lane - unsigned NumLanes = 16 / (TransferBytes / NumVecs); - switch (NumLanes) { - case 16: // A vector has 16 lanes, each lane is 1 bytes. - lane = (Q << 3) | S; + case AArch64::ADDXrx: + case AArch64::SUBXrx: + DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder); + DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); + DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); break; - case 8: - lane = (Q << 2) | (S >> 1); + case AArch64::ADDSXrx: + case AArch64::SUBSXrx: + DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); + DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); + DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); break; - case 4: - lane = (Q << 1) | (S >> 2); + case AArch64::ADDXrx64: + case AArch64::SUBXrx64: + DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder); + DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); + DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); break; - case 2: - lane = Q; + case AArch64::SUBSXrx64: + case AArch64::ADDSXrx64: + DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); + DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); + DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); break; } - Inst.addOperand(MCOperand::CreateImm(lane)); - return MCDisassembler::Success; + Inst.addOperand(MCOperand::CreateImm(extend)); + return Success; } -static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn, - uint64_t Address, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(Insn, 0, 5); - unsigned Rn = fieldFromInstruction(Insn, 5, 5); - unsigned size = fieldFromInstruction(Insn, 22, 2); - unsigned Q = fieldFromInstruction(Insn, 30, 1); +static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst, + uint32_t insn, uint64_t Addr, + const void *Decoder) { + unsigned Rd = fieldFromInstruction(insn, 0, 5); + unsigned Rn = fieldFromInstruction(insn, 5, 5); + unsigned Datasize = fieldFromInstruction(insn, 31, 1); + unsigned imm; + + if (Datasize) { + if (Inst.getOpcode() == AArch64::ANDSXri) + DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); + else + DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder); + DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder); + imm = fieldFromInstruction(insn, 10, 13); + if (!AArch64_AM::isValidDecodeLogicalImmediate(imm, 64)) + return Fail; + } else { + if (Inst.getOpcode() == AArch64::ANDSWri) + DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); + else + DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder); + DecodeGPR32RegisterClass(Inst, Rn, Addr, Decoder); + imm = fieldFromInstruction(insn, 10, 12); + if (!AArch64_AM::isValidDecodeLogicalImmediate(imm, 32)) + return Fail; + } + Inst.addOperand(MCOperand::CreateImm(imm)); + return Success; +} - DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder); +static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn, + uint64_t Addr, + const void *Decoder) { + unsigned Rd = fieldFromInstruction(insn, 0, 5); + unsigned cmode = fieldFromInstruction(insn, 12, 4); + unsigned imm = fieldFromInstruction(insn, 16, 3) << 5; + imm |= fieldFromInstruction(insn, 5, 5); - if(Q) - DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder); + if (Inst.getOpcode() == AArch64::MOVID) + DecodeFPR64RegisterClass(Inst, Rd, Addr, Decoder); else - DecodeFPR64RegisterClass(Inst, Rn, Address, Decoder); + DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder); + + Inst.addOperand(MCOperand::CreateImm(imm)); - switch (size) { - case 0: - Inst.addOperand(MCOperand::CreateImm(8)); + switch (Inst.getOpcode()) { + default: break; - case 1: - Inst.addOperand(MCOperand::CreateImm(16)); + case AArch64::MOVIv4i16: + case AArch64::MOVIv8i16: + case AArch64::MVNIv4i16: + case AArch64::MVNIv8i16: + case AArch64::MOVIv2i32: + case AArch64::MOVIv4i32: + case AArch64::MVNIv2i32: + case AArch64::MVNIv4i32: + Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2)); break; - case 2: - Inst.addOperand(MCOperand::CreateImm(32)); + case AArch64::MOVIv2s_msl: + case AArch64::MOVIv4s_msl: + case AArch64::MVNIv2s_msl: + case AArch64::MVNIv4s_msl: + Inst.addOperand(MCOperand::CreateImm(cmode & 1 ? 0x110 : 0x108)); break; - default : - return MCDisassembler::Fail; } - return MCDisassembler::Success; + + return Success; } +static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst, + uint32_t insn, uint64_t Addr, + const void *Decoder) { + unsigned Rd = fieldFromInstruction(insn, 0, 5); + unsigned cmode = fieldFromInstruction(insn, 12, 4); + unsigned imm = fieldFromInstruction(insn, 16, 3) << 5; + imm |= fieldFromInstruction(insn, 5, 5); + + // Tied operands added twice. + DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder); + DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder); + + Inst.addOperand(MCOperand::CreateImm(imm)); + Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2)); + + return Success; +} + +static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn, + uint64_t Addr, const void *Decoder) { + unsigned Rd = fieldFromInstruction(insn, 0, 5); + int64_t imm = fieldFromInstruction(insn, 5, 19) << 2; + imm |= fieldFromInstruction(insn, 29, 2); + const AArch64Disassembler *Dis = + static_cast(Decoder); + + // Sign-extend the 21-bit immediate. + if (imm & (1 << (21 - 1))) + imm |= ~((1LL << 21) - 1); + + DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); + if (!Dis->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 4)) + Inst.addOperand(MCOperand::CreateImm(imm)); + + return Success; +} + +static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn, + uint64_t Addr, const void *Decoder) { + unsigned Rd = fieldFromInstruction(insn, 0, 5); + unsigned Rn = fieldFromInstruction(insn, 5, 5); + unsigned Imm = fieldFromInstruction(insn, 10, 14); + unsigned S = fieldFromInstruction(insn, 29, 1); + unsigned Datasize = fieldFromInstruction(insn, 31, 1); + + unsigned ShifterVal = (Imm >> 12) & 3; + unsigned ImmVal = Imm & 0xFFF; + const AArch64Disassembler *Dis = + static_cast(Decoder); + + if (ShifterVal != 0 && ShifterVal != 1) + return Fail; + + if (Datasize) { + if (Rd == 31 && !S) + DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder); + else + DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); + DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); + } else { + if (Rd == 31 && !S) + DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder); + else + DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); + DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder); + } + + if (!Dis->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4)) + Inst.addOperand(MCOperand::CreateImm(ImmVal)); + Inst.addOperand(MCOperand::CreateImm(12 * ShifterVal)); + return Success; +} + +static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn, + uint64_t Addr, + const void *Decoder) { + int64_t imm = fieldFromInstruction(insn, 0, 26); + const AArch64Disassembler *Dis = + static_cast(Decoder); + + // Sign-extend the 26-bit immediate. + if (imm & (1 << (26 - 1))) + imm |= ~((1LL << 26) - 1); + + if (!Dis->tryAddingSymbolicOperand(Inst, imm << 2, Addr, true, 0, 4)) + Inst.addOperand(MCOperand::CreateImm(imm)); + + return Success; +} + +static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst, + uint32_t insn, uint64_t Addr, + const void *Decoder) { + uint64_t op1 = fieldFromInstruction(insn, 16, 3); + uint64_t op2 = fieldFromInstruction(insn, 5, 3); + uint64_t crm = fieldFromInstruction(insn, 8, 4); + + uint64_t pstate_field = (op1 << 3) | op2; + + Inst.addOperand(MCOperand::CreateImm(pstate_field)); + Inst.addOperand(MCOperand::CreateImm(crm)); + + bool ValidNamed; + (void)AArch64PState::PStateMapper().toString(pstate_field, ValidNamed); + + return ValidNamed ? Success : Fail; +} + +static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn, + uint64_t Addr, const void *Decoder) { + uint64_t Rt = fieldFromInstruction(insn, 0, 5); + uint64_t bit = fieldFromInstruction(insn, 31, 1) << 5; + bit |= fieldFromInstruction(insn, 19, 5); + int64_t dst = fieldFromInstruction(insn, 5, 14); + const AArch64Disassembler *Dis = + static_cast(Decoder); + + // Sign-extend 14-bit immediate. + if (dst & (1 << (14 - 1))) + dst |= ~((1LL << 14) - 1); + + if (fieldFromInstruction(insn, 31, 1) == 0) + DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); + else + DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); + Inst.addOperand(MCOperand::CreateImm(bit)); + if (!Dis->tryAddingSymbolicOperand(Inst, dst << 2, Addr, true, 0, 4)) + Inst.addOperand(MCOperand::CreateImm(dst)); + + return Success; +} diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.h b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h new file mode 100644 index 0000000..68d4867 --- /dev/null +++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h @@ -0,0 +1,40 @@ +//===- AArch64Disassembler.h - Disassembler for AArch64 ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +#ifndef AArch64DISASSEMBLER_H +#define AArch64DISASSEMBLER_H + +#include "llvm/MC/MCDisassembler.h" + +namespace llvm { + +class MCInst; +class MemoryObject; +class raw_ostream; + +class AArch64Disassembler : public MCDisassembler { +public: + AArch64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx) + : MCDisassembler(STI, Ctx) {} + + ~AArch64Disassembler() {} + + /// getInstruction - See MCDisassembler. + MCDisassembler::DecodeStatus + getInstruction(MCInst &instr, uint64_t &size, const MemoryObject ®ion, + uint64_t address, raw_ostream &vStream, + raw_ostream &cStream) const override; +}; + +} // namespace llvm + +#endif diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp new file mode 100644 index 0000000..2466368 --- /dev/null +++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp @@ -0,0 +1,221 @@ +//===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AArch64ExternalSymbolizer.h" +#include "AArch64Subtarget.h" +#include "MCTargetDesc/AArch64AddressingModes.h" +#include "Utils/AArch64BaseInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-disassembler" + +static MCSymbolRefExpr::VariantKind +getVariant(uint64_t LLVMDisassembler_VariantKind) { + switch (LLVMDisassembler_VariantKind) { + case LLVMDisassembler_VariantKind_None: + return MCSymbolRefExpr::VK_None; + case LLVMDisassembler_VariantKind_ARM64_PAGE: + return MCSymbolRefExpr::VK_PAGE; + case LLVMDisassembler_VariantKind_ARM64_PAGEOFF: + return MCSymbolRefExpr::VK_PAGEOFF; + case LLVMDisassembler_VariantKind_ARM64_GOTPAGE: + return MCSymbolRefExpr::VK_GOTPAGE; + case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF: + return MCSymbolRefExpr::VK_GOTPAGEOFF; + case LLVMDisassembler_VariantKind_ARM64_TLVP: + case LLVMDisassembler_VariantKind_ARM64_TLVOFF: + default: + assert(0 && "bad LLVMDisassembler_VariantKind"); + return MCSymbolRefExpr::VK_None; + } +} + +/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic +/// operand in place of the immediate Value in the MCInst. The immediate +/// Value has not had any PC adjustment made by the caller. If the instruction +/// is a branch that adds the PC to the immediate Value then isBranch is +/// Success, else Fail. If GetOpInfo is non-null, then it is called to get any +/// symbolic information at the Address for this instrution. If that returns +/// non-zero then the symbolic information it returns is used to create an +/// MCExpr and that is added as an operand to the MCInst. If GetOpInfo() +/// returns zero and isBranch is Success then a symbol look up for +/// Address + Value is done and if a symbol is found an MCExpr is created with +/// that, else an MCExpr with Address + Value is created. If GetOpInfo() +/// returns zero and isBranch is Fail then the the Opcode of the MCInst is +/// tested and for ADRP an other instructions that help to load of pointers +/// a symbol look up is done to see it is returns a specific reference type +/// to add to the comment stream. This function returns Success if it adds +/// an operand to the MCInst and Fail otherwise. +bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( + MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, + bool IsBranch, uint64_t Offset, uint64_t InstSize) { + // FIXME: This method shares a lot of code with + // MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible + // refactor the MCExternalSymbolizer interface to allow more of this + // implementation to be shared. + // + struct LLVMOpInfo1 SymbolicOp; + memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); + SymbolicOp.Value = Value; + uint64_t ReferenceType; + const char *ReferenceName; + if (!GetOpInfo || + !GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { + if (IsBranch) { + ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; + const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, + Address, &ReferenceName); + if (Name) { + SymbolicOp.AddSymbol.Name = Name; + SymbolicOp.AddSymbol.Present = true; + SymbolicOp.Value = 0; + } else { + SymbolicOp.Value = Address + Value; + } + if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) + CommentStream << "symbol stub for: " << ReferenceName; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Message) + CommentStream << "Objc message: " << ReferenceName; + } else if (MI.getOpcode() == AArch64::ADRP) { + ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP; + // otool expects the fully encoded ADRP instruction to be passed in as + // the value here, so reconstruct it: + const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo(); + uint32_t EncodedInst = 0x90000000; + EncodedInst |= (Value & 0x3) << 29; // immlo + EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi + EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // reg + SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address, + &ReferenceName); + CommentStream << format("0x%llx", + 0xfffffffffffff000LL & (Address + Value)); + } else if (MI.getOpcode() == AArch64::ADDXri || + MI.getOpcode() == AArch64::LDRXui || + MI.getOpcode() == AArch64::LDRXl || + MI.getOpcode() == AArch64::ADR) { + if (MI.getOpcode() == AArch64::ADDXri) + ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri; + else if (MI.getOpcode() == AArch64::LDRXui) + ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui; + if (MI.getOpcode() == AArch64::LDRXl) { + ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl; + SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, + &ReferenceName); + } else if (MI.getOpcode() == AArch64::ADR) { + ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR; + SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, + &ReferenceName); + } else { + const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo(); + // otool expects the fully encoded ADD/LDR instruction to be passed in + // as the value here, so reconstruct it: + unsigned EncodedInst = + MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000; + EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD] + EncodedInst |= + MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn + EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd + + SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address, + &ReferenceName); + } + if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) + CommentStream << "literal pool symbol address: " << ReferenceName; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) + CommentStream << "literal pool for: \"" << ReferenceName << "\""; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) + CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\""; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Message) + CommentStream << "Objc message: " << ReferenceName; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) + CommentStream << "Objc message ref: " << ReferenceName; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) + CommentStream << "Objc selector ref: " << ReferenceName; + else if (ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) + CommentStream << "Objc class ref: " << ReferenceName; + // For these instructions, the SymbolLookUp() above is just to get the + // ReferenceType and ReferenceName. We want to make sure not to + // fall through so we don't build an MCExpr to leave the disassembly + // of the immediate values of these instructions to the InstPrinter. + return false; + } else { + return false; + } + } + + const MCExpr *Add = nullptr; + if (SymbolicOp.AddSymbol.Present) { + if (SymbolicOp.AddSymbol.Name) { + StringRef Name(SymbolicOp.AddSymbol.Name); + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); + MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind); + if (Variant != MCSymbolRefExpr::VK_None) + Add = MCSymbolRefExpr::Create(Sym, Variant, Ctx); + else + Add = MCSymbolRefExpr::Create(Sym, Ctx); + } else { + Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, Ctx); + } + } + + const MCExpr *Sub = nullptr; + if (SymbolicOp.SubtractSymbol.Present) { + if (SymbolicOp.SubtractSymbol.Name) { + StringRef Name(SymbolicOp.SubtractSymbol.Name); + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); + Sub = MCSymbolRefExpr::Create(Sym, Ctx); + } else { + Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, Ctx); + } + } + + const MCExpr *Off = nullptr; + if (SymbolicOp.Value != 0) + Off = MCConstantExpr::Create(SymbolicOp.Value, Ctx); + + const MCExpr *Expr; + if (Sub) { + const MCExpr *LHS; + if (Add) + LHS = MCBinaryExpr::CreateSub(Add, Sub, Ctx); + else + LHS = MCUnaryExpr::CreateMinus(Sub, Ctx); + if (Off) + Expr = MCBinaryExpr::CreateAdd(LHS, Off, Ctx); + else + Expr = LHS; + } else if (Add) { + if (Off) + Expr = MCBinaryExpr::CreateAdd(Add, Off, Ctx); + else + Expr = Add; + } else { + if (Off) + Expr = Off; + else + Expr = MCConstantExpr::Create(0, Ctx); + } + + MI.addOperand(MCOperand::CreateExpr(Expr)); + + return true; +} diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h new file mode 100644 index 0000000..171d31c --- /dev/null +++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h @@ -0,0 +1,38 @@ +//===- AArch64ExternalSymbolizer.h - Symbolizer for AArch64 -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Symbolize AArch64 assembly code during disassembly using callbacks. +// +//===----------------------------------------------------------------------===// + +#ifndef AArch64EXTERNALSYMBOLIZER_H +#define AArch64EXTERNALSYMBOLIZER_H + +#include "llvm/MC/MCExternalSymbolizer.h" + +namespace llvm { + +class AArch64ExternalSymbolizer : public MCExternalSymbolizer { +public: + AArch64ExternalSymbolizer(MCContext &Ctx, + std::unique_ptr RelInfo, + LLVMOpInfoCallback GetOpInfo, + LLVMSymbolLookupCallback SymbolLookUp, + void *DisInfo) + : MCExternalSymbolizer(Ctx, std::move(RelInfo), GetOpInfo, SymbolLookUp, + DisInfo) {} + + bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream, + int64_t Value, uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t InstSize) override; +}; + +} // namespace llvm + +#endif diff --git a/lib/Target/AArch64/Disassembler/Android.mk b/lib/Target/AArch64/Disassembler/Android.mk index fcc53ad..b89538d 100644 --- a/lib/Target/AArch64/Disassembler/Android.mk +++ b/lib/Target/AArch64/Disassembler/Android.mk @@ -7,7 +7,8 @@ arm64_disassembler_TBLGEN_TABLES := \ AArch64GenRegisterInfo.inc arm64_disassembler_SRC_FILES := \ - AArch64Disassembler.cpp + AArch64Disassembler.cpp \ + AArch64ExternalSymbolizer.cpp # For the device # ===================================================== diff --git a/lib/Target/AArch64/Disassembler/CMakeLists.txt b/lib/Target/AArch64/Disassembler/CMakeLists.txt index 21baf25..be4ccad 100644 --- a/lib/Target/AArch64/Disassembler/CMakeLists.txt +++ b/lib/Target/AArch64/Disassembler/CMakeLists.txt @@ -1,3 +1,14 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + add_llvm_library(LLVMAArch64Disassembler AArch64Disassembler.cpp + AArch64ExternalSymbolizer.cpp ) +# workaround for hanging compilation on MSVC8, 9 and 10 +#if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) +#set_property( +# SOURCE ARMDisassembler.cpp +# PROPERTY COMPILE_FLAGS "/Od" +# ) +#endif() +add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen) diff --git a/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt index 05c4ed1..a4224f4 100644 --- a/lib/Target/AArch64/Disassembler/LLVMBuild.txt +++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/AArch64/Disassembler/LLVMBuild.txt ----------*- Conf -*--===; +;===- ./lib/Target/AArch64/Disassembler/LLVMBuild.txt ------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; diff --git a/lib/Target/AArch64/Disassembler/Makefile b/lib/Target/AArch64/Disassembler/Makefile index 5c86120..741bb81 100644 --- a/lib/Target/AArch64/Disassembler/Makefile +++ b/lib/Target/AArch64/Disassembler/Makefile @@ -10,7 +10,7 @@ LEVEL = ../../../.. LIBRARYNAME = LLVMAArch64Disassembler -# Hack: we need to include 'main' target directory to grab private headers +# Hack: we need to include 'main' arm target directory to grab private headers CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index fd3f009..f484a5b 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -11,529 +11,1306 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "AArch64InstPrinter.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" -#include "llvm/MC/MCExpr.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" - using namespace llvm; +#define DEBUG_TYPE "asm-printer" + #define GET_INSTRUCTION_NAME #define PRINT_ALIAS_INSTR #include "AArch64GenAsmWriter.inc" - -static int64_t unpackSignedImm(int BitWidth, uint64_t Value) { - assert(!(Value & ~((1ULL << BitWidth)-1)) && "immediate not n-bit"); - if (Value & (1ULL << (BitWidth - 1))) - return static_cast(Value) - (1LL << BitWidth); - else - return Value; -} +#define GET_INSTRUCTION_NAME +#define PRINT_ALIAS_INSTR +#include "AArch64GenAsmWriter1.inc" AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) : - MCInstPrinter(MAI, MII, MRI) { + const MCSubtargetInfo &STI) + : MCInstPrinter(MAI, MII, MRI) { // Initialize the set of available features. setAvailableFeatures(STI.getFeatureBits()); } +AArch64AppleInstPrinter::AArch64AppleInstPrinter(const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI) + : AArch64InstPrinter(MAI, MII, MRI, STI) {} + void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { + // This is for .cfi directives. OS << getRegisterName(RegNo); } -void -AArch64InstPrinter::printOffsetSImm9Operand(const MCInst *MI, - unsigned OpNum, raw_ostream &O) { - const MCOperand &MOImm = MI->getOperand(OpNum); - int32_t Imm = unpackSignedImm(9, MOImm.getImm()); - - O << '#' << Imm; -} - -void -AArch64InstPrinter::printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, unsigned MemSize, - unsigned RmSize) { - unsigned ExtImm = MI->getOperand(OpNum).getImm(); - unsigned OptionHi = ExtImm >> 1; - unsigned S = ExtImm & 1; - bool IsLSL = OptionHi == 1 && RmSize == 64; - - const char *Ext; - switch (OptionHi) { - case 1: - Ext = (RmSize == 32) ? "uxtw" : "lsl"; - break; - case 3: - Ext = (RmSize == 32) ? "sxtw" : "sxtx"; - break; - default: - llvm_unreachable("Incorrect Option on load/store (reg offset)"); - } - O << Ext; +void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + // Check for special encodings and print the canonical alias instead. - if (S) { - unsigned ShiftAmt = Log2_32(MemSize); - O << " #" << ShiftAmt; - } else if (IsLSL) { - O << " #0"; - } -} + unsigned Opcode = MI->getOpcode(); -void -AArch64InstPrinter::printAddSubImmLSL0Operand(const MCInst *MI, - unsigned OpNum, raw_ostream &O) { - const MCOperand &Imm12Op = MI->getOperand(OpNum); + if (Opcode == AArch64::SYSxt) + if (printSysAlias(MI, O)) { + printAnnotation(O, Annot); + return; + } - if (Imm12Op.isImm()) { - int64_t Imm12 = Imm12Op.getImm(); - assert(Imm12 >= 0 && "Invalid immediate for add/sub imm"); - O << "#" << Imm12; - } else { - assert(Imm12Op.isExpr() && "Unexpected shift operand type"); - O << "#" << *Imm12Op.getExpr(); - } -} + // SBFM/UBFM should print to a nicer aliased form if possible. + if (Opcode == AArch64::SBFMXri || Opcode == AArch64::SBFMWri || + Opcode == AArch64::UBFMXri || Opcode == AArch64::UBFMWri) { + const MCOperand &Op0 = MI->getOperand(0); + const MCOperand &Op1 = MI->getOperand(1); + const MCOperand &Op2 = MI->getOperand(2); + const MCOperand &Op3 = MI->getOperand(3); + + bool IsSigned = (Opcode == AArch64::SBFMXri || Opcode == AArch64::SBFMWri); + bool Is64Bit = (Opcode == AArch64::SBFMXri || Opcode == AArch64::UBFMXri); + if (Op2.isImm() && Op2.getImm() == 0 && Op3.isImm()) { + const char *AsmMnemonic = nullptr; + + switch (Op3.getImm()) { + default: + break; + case 7: + if (IsSigned) + AsmMnemonic = "sxtb"; + else if (!Is64Bit) + AsmMnemonic = "uxtb"; + break; + case 15: + if (IsSigned) + AsmMnemonic = "sxth"; + else if (!Is64Bit) + AsmMnemonic = "uxth"; + break; + case 31: + // *xtw is only valid for signed 64-bit operations. + if (Is64Bit && IsSigned) + AsmMnemonic = "sxtw"; + break; + } + + if (AsmMnemonic) { + O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg()) + << ", " << getRegisterName(getWRegFromXReg(Op1.getReg())); + printAnnotation(O, Annot); + return; + } + } -void -AArch64InstPrinter::printAddSubImmLSL12Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { + // All immediate shifts are aliases, implemented using the Bitfield + // instruction. In all cases the immediate shift amount shift must be in + // the range 0 to (reg.size -1). + if (Op2.isImm() && Op3.isImm()) { + const char *AsmMnemonic = nullptr; + int shift = 0; + int64_t immr = Op2.getImm(); + int64_t imms = Op3.getImm(); + if (Opcode == AArch64::UBFMWri && imms != 0x1F && ((imms + 1) == immr)) { + AsmMnemonic = "lsl"; + shift = 31 - imms; + } else if (Opcode == AArch64::UBFMXri && imms != 0x3f && + ((imms + 1 == immr))) { + AsmMnemonic = "lsl"; + shift = 63 - imms; + } else if (Opcode == AArch64::UBFMWri && imms == 0x1f) { + AsmMnemonic = "lsr"; + shift = immr; + } else if (Opcode == AArch64::UBFMXri && imms == 0x3f) { + AsmMnemonic = "lsr"; + shift = immr; + } else if (Opcode == AArch64::SBFMWri && imms == 0x1f) { + AsmMnemonic = "asr"; + shift = immr; + } else if (Opcode == AArch64::SBFMXri && imms == 0x3f) { + AsmMnemonic = "asr"; + shift = immr; + } + if (AsmMnemonic) { + O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg()) + << ", " << getRegisterName(Op1.getReg()) << ", #" << shift; + printAnnotation(O, Annot); + return; + } + } - printAddSubImmLSL0Operand(MI, OpNum, O); + // SBFIZ/UBFIZ aliases + if (Op2.getImm() > Op3.getImm()) { + O << '\t' << (IsSigned ? "sbfiz" : "ubfiz") << '\t' + << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg()) + << ", #" << (Is64Bit ? 64 : 32) - Op2.getImm() << ", #" << Op3.getImm() + 1; + printAnnotation(O, Annot); + return; + } - O << ", lsl #12"; -} + // Otherwise SBFX/UBFX is the preferred form + O << '\t' << (IsSigned ? "sbfx" : "ubfx") << '\t' + << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg()) + << ", #" << Op2.getImm() << ", #" << Op3.getImm() - Op2.getImm() + 1; + printAnnotation(O, Annot); + return; + } -void -AArch64InstPrinter::printBareImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - O << MO.getImm(); -} + if (Opcode == AArch64::BFMXri || Opcode == AArch64::BFMWri) { + const MCOperand &Op0 = MI->getOperand(0); // Op1 == Op0 + const MCOperand &Op2 = MI->getOperand(2); + int ImmR = MI->getOperand(3).getImm(); + int ImmS = MI->getOperand(4).getImm(); + + // BFI alias + if (ImmS < ImmR) { + int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32; + int LSB = (BitWidth - ImmR) % BitWidth; + int Width = ImmS + 1; + O << "\tbfi\t" << getRegisterName(Op0.getReg()) << ", " + << getRegisterName(Op2.getReg()) << ", #" << LSB << ", #" << Width; + printAnnotation(O, Annot); + return; + } -template void -AArch64InstPrinter::printBFILSBOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &ImmROp = MI->getOperand(OpNum); - unsigned LSB = ImmROp.getImm() == 0 ? 0 : RegWidth - ImmROp.getImm(); + int LSB = ImmR; + int Width = ImmS - ImmR + 1; + // Otherwise BFXIL the preferred form + O << "\tbfxil\t" + << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op2.getReg()) + << ", #" << LSB << ", #" << Width; + printAnnotation(O, Annot); + return; + } - O << '#' << LSB; -} + // Symbolic operands for MOVZ, MOVN and MOVK already imply a shift + // (e.g. :gottprel_g1: is always going to be "lsl #16") so it should not be + // printed. + if ((Opcode == AArch64::MOVZXi || Opcode == AArch64::MOVZWi || + Opcode == AArch64::MOVNXi || Opcode == AArch64::MOVNWi) && + MI->getOperand(1).isExpr()) { + if (Opcode == AArch64::MOVZXi || Opcode == AArch64::MOVZWi) + O << "\tmovz\t"; + else + O << "\tmovn\t"; -void AArch64InstPrinter::printBFIWidthOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &ImmSOp = MI->getOperand(OpNum); - unsigned Width = ImmSOp.getImm() + 1; + O << getRegisterName(MI->getOperand(0).getReg()) << ", #" + << *MI->getOperand(1).getExpr(); + return; + } - O << '#' << Width; -} + if ((Opcode == AArch64::MOVKXi || Opcode == AArch64::MOVKWi) && + MI->getOperand(2).isExpr()) { + O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #" + << *MI->getOperand(2).getExpr(); + return; + } -void -AArch64InstPrinter::printBFXWidthOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &ImmSOp = MI->getOperand(OpNum); - const MCOperand &ImmROp = MI->getOperand(OpNum - 1); + if (!printAliasInstr(MI, O)) + printInstruction(MI, O); - unsigned ImmR = ImmROp.getImm(); - unsigned ImmS = ImmSOp.getImm(); + printAnnotation(O, Annot); +} - assert(ImmS >= ImmR && "Invalid ImmR, ImmS combination for bitfield extract"); +static bool isTblTbxInstruction(unsigned Opcode, StringRef &Layout, + bool &IsTbx) { + switch (Opcode) { + case AArch64::TBXv8i8One: + case AArch64::TBXv8i8Two: + case AArch64::TBXv8i8Three: + case AArch64::TBXv8i8Four: + IsTbx = true; + Layout = ".8b"; + return true; + case AArch64::TBLv8i8One: + case AArch64::TBLv8i8Two: + case AArch64::TBLv8i8Three: + case AArch64::TBLv8i8Four: + IsTbx = false; + Layout = ".8b"; + return true; + case AArch64::TBXv16i8One: + case AArch64::TBXv16i8Two: + case AArch64::TBXv16i8Three: + case AArch64::TBXv16i8Four: + IsTbx = true; + Layout = ".16b"; + return true; + case AArch64::TBLv16i8One: + case AArch64::TBLv16i8Two: + case AArch64::TBLv16i8Three: + case AArch64::TBLv16i8Four: + IsTbx = false; + Layout = ".16b"; + return true; + default: + return false; + } +} - O << '#' << (ImmS - ImmR + 1); +struct LdStNInstrDesc { + unsigned Opcode; + const char *Mnemonic; + const char *Layout; + int ListOperand; + bool HasLane; + int NaturalOffset; +}; + +static LdStNInstrDesc LdStNInstInfo[] = { + { AArch64::LD1i8, "ld1", ".b", 1, true, 0 }, + { AArch64::LD1i16, "ld1", ".h", 1, true, 0 }, + { AArch64::LD1i32, "ld1", ".s", 1, true, 0 }, + { AArch64::LD1i64, "ld1", ".d", 1, true, 0 }, + { AArch64::LD1i8_POST, "ld1", ".b", 2, true, 1 }, + { AArch64::LD1i16_POST, "ld1", ".h", 2, true, 2 }, + { AArch64::LD1i32_POST, "ld1", ".s", 2, true, 4 }, + { AArch64::LD1i64_POST, "ld1", ".d", 2, true, 8 }, + { AArch64::LD1Rv16b, "ld1r", ".16b", 0, false, 0 }, + { AArch64::LD1Rv8h, "ld1r", ".8h", 0, false, 0 }, + { AArch64::LD1Rv4s, "ld1r", ".4s", 0, false, 0 }, + { AArch64::LD1Rv2d, "ld1r", ".2d", 0, false, 0 }, + { AArch64::LD1Rv8b, "ld1r", ".8b", 0, false, 0 }, + { AArch64::LD1Rv4h, "ld1r", ".4h", 0, false, 0 }, + { AArch64::LD1Rv2s, "ld1r", ".2s", 0, false, 0 }, + { AArch64::LD1Rv1d, "ld1r", ".1d", 0, false, 0 }, + { AArch64::LD1Rv16b_POST, "ld1r", ".16b", 1, false, 1 }, + { AArch64::LD1Rv8h_POST, "ld1r", ".8h", 1, false, 2 }, + { AArch64::LD1Rv4s_POST, "ld1r", ".4s", 1, false, 4 }, + { AArch64::LD1Rv2d_POST, "ld1r", ".2d", 1, false, 8 }, + { AArch64::LD1Rv8b_POST, "ld1r", ".8b", 1, false, 1 }, + { AArch64::LD1Rv4h_POST, "ld1r", ".4h", 1, false, 2 }, + { AArch64::LD1Rv2s_POST, "ld1r", ".2s", 1, false, 4 }, + { AArch64::LD1Rv1d_POST, "ld1r", ".1d", 1, false, 8 }, + { AArch64::LD1Onev16b, "ld1", ".16b", 0, false, 0 }, + { AArch64::LD1Onev8h, "ld1", ".8h", 0, false, 0 }, + { AArch64::LD1Onev4s, "ld1", ".4s", 0, false, 0 }, + { AArch64::LD1Onev2d, "ld1", ".2d", 0, false, 0 }, + { AArch64::LD1Onev8b, "ld1", ".8b", 0, false, 0 }, + { AArch64::LD1Onev4h, "ld1", ".4h", 0, false, 0 }, + { AArch64::LD1Onev2s, "ld1", ".2s", 0, false, 0 }, + { AArch64::LD1Onev1d, "ld1", ".1d", 0, false, 0 }, + { AArch64::LD1Onev16b_POST, "ld1", ".16b", 1, false, 16 }, + { AArch64::LD1Onev8h_POST, "ld1", ".8h", 1, false, 16 }, + { AArch64::LD1Onev4s_POST, "ld1", ".4s", 1, false, 16 }, + { AArch64::LD1Onev2d_POST, "ld1", ".2d", 1, false, 16 }, + { AArch64::LD1Onev8b_POST, "ld1", ".8b", 1, false, 8 }, + { AArch64::LD1Onev4h_POST, "ld1", ".4h", 1, false, 8 }, + { AArch64::LD1Onev2s_POST, "ld1", ".2s", 1, false, 8 }, + { AArch64::LD1Onev1d_POST, "ld1", ".1d", 1, false, 8 }, + { AArch64::LD1Twov16b, "ld1", ".16b", 0, false, 0 }, + { AArch64::LD1Twov8h, "ld1", ".8h", 0, false, 0 }, + { AArch64::LD1Twov4s, "ld1", ".4s", 0, false, 0 }, + { AArch64::LD1Twov2d, "ld1", ".2d", 0, false, 0 }, + { AArch64::LD1Twov8b, "ld1", ".8b", 0, false, 0 }, + { AArch64::LD1Twov4h, "ld1", ".4h", 0, false, 0 }, + { AArch64::LD1Twov2s, "ld1", ".2s", 0, false, 0 }, + { AArch64::LD1Twov1d, "ld1", ".1d", 0, false, 0 }, + { AArch64::LD1Twov16b_POST, "ld1", ".16b", 1, false, 32 }, + { AArch64::LD1Twov8h_POST, "ld1", ".8h", 1, false, 32 }, + { AArch64::LD1Twov4s_POST, "ld1", ".4s", 1, false, 32 }, + { AArch64::LD1Twov2d_POST, "ld1", ".2d", 1, false, 32 }, + { AArch64::LD1Twov8b_POST, "ld1", ".8b", 1, false, 16 }, + { AArch64::LD1Twov4h_POST, "ld1", ".4h", 1, false, 16 }, + { AArch64::LD1Twov2s_POST, "ld1", ".2s", 1, false, 16 }, + { AArch64::LD1Twov1d_POST, "ld1", ".1d", 1, false, 16 }, + { AArch64::LD1Threev16b, "ld1", ".16b", 0, false, 0 }, + { AArch64::LD1Threev8h, "ld1", ".8h", 0, false, 0 }, + { AArch64::LD1Threev4s, "ld1", ".4s", 0, false, 0 }, + { AArch64::LD1Threev2d, "ld1", ".2d", 0, false, 0 }, + { AArch64::LD1Threev8b, "ld1", ".8b", 0, false, 0 }, + { AArch64::LD1Threev4h, "ld1", ".4h", 0, false, 0 }, + { AArch64::LD1Threev2s, "ld1", ".2s", 0, false, 0 }, + { AArch64::LD1Threev1d, "ld1", ".1d", 0, false, 0 }, + { AArch64::LD1Threev16b_POST, "ld1", ".16b", 1, false, 48 }, + { AArch64::LD1Threev8h_POST, "ld1", ".8h", 1, false, 48 }, + { AArch64::LD1Threev4s_POST, "ld1", ".4s", 1, false, 48 }, + { AArch64::LD1Threev2d_POST, "ld1", ".2d", 1, false, 48 }, + { AArch64::LD1Threev8b_POST, "ld1", ".8b", 1, false, 24 }, + { AArch64::LD1Threev4h_POST, "ld1", ".4h", 1, false, 24 }, + { AArch64::LD1Threev2s_POST, "ld1", ".2s", 1, false, 24 }, + { AArch64::LD1Threev1d_POST, "ld1", ".1d", 1, false, 24 }, + { AArch64::LD1Fourv16b, "ld1", ".16b", 0, false, 0 }, + { AArch64::LD1Fourv8h, "ld1", ".8h", 0, false, 0 }, + { AArch64::LD1Fourv4s, "ld1", ".4s", 0, false, 0 }, + { AArch64::LD1Fourv2d, "ld1", ".2d", 0, false, 0 }, + { AArch64::LD1Fourv8b, "ld1", ".8b", 0, false, 0 }, + { AArch64::LD1Fourv4h, "ld1", ".4h", 0, false, 0 }, + { AArch64::LD1Fourv2s, "ld1", ".2s", 0, false, 0 }, + { AArch64::LD1Fourv1d, "ld1", ".1d", 0, false, 0 }, + { AArch64::LD1Fourv16b_POST, "ld1", ".16b", 1, false, 64 }, + { AArch64::LD1Fourv8h_POST, "ld1", ".8h", 1, false, 64 }, + { AArch64::LD1Fourv4s_POST, "ld1", ".4s", 1, false, 64 }, + { AArch64::LD1Fourv2d_POST, "ld1", ".2d", 1, false, 64 }, + { AArch64::LD1Fourv8b_POST, "ld1", ".8b", 1, false, 32 }, + { AArch64::LD1Fourv4h_POST, "ld1", ".4h", 1, false, 32 }, + { AArch64::LD1Fourv2s_POST, "ld1", ".2s", 1, false, 32 }, + { AArch64::LD1Fourv1d_POST, "ld1", ".1d", 1, false, 32 }, + { AArch64::LD2i8, "ld2", ".b", 1, true, 0 }, + { AArch64::LD2i16, "ld2", ".h", 1, true, 0 }, + { AArch64::LD2i32, "ld2", ".s", 1, true, 0 }, + { AArch64::LD2i64, "ld2", ".d", 1, true, 0 }, + { AArch64::LD2i8_POST, "ld2", ".b", 2, true, 2 }, + { AArch64::LD2i16_POST, "ld2", ".h", 2, true, 4 }, + { AArch64::LD2i32_POST, "ld2", ".s", 2, true, 8 }, + { AArch64::LD2i64_POST, "ld2", ".d", 2, true, 16 }, + { AArch64::LD2Rv16b, "ld2r", ".16b", 0, false, 0 }, + { AArch64::LD2Rv8h, "ld2r", ".8h", 0, false, 0 }, + { AArch64::LD2Rv4s, "ld2r", ".4s", 0, false, 0 }, + { AArch64::LD2Rv2d, "ld2r", ".2d", 0, false, 0 }, + { AArch64::LD2Rv8b, "ld2r", ".8b", 0, false, 0 }, + { AArch64::LD2Rv4h, "ld2r", ".4h", 0, false, 0 }, + { AArch64::LD2Rv2s, "ld2r", ".2s", 0, false, 0 }, + { AArch64::LD2Rv1d, "ld2r", ".1d", 0, false, 0 }, + { AArch64::LD2Rv16b_POST, "ld2r", ".16b", 1, false, 2 }, + { AArch64::LD2Rv8h_POST, "ld2r", ".8h", 1, false, 4 }, + { AArch64::LD2Rv4s_POST, "ld2r", ".4s", 1, false, 8 }, + { AArch64::LD2Rv2d_POST, "ld2r", ".2d", 1, false, 16 }, + { AArch64::LD2Rv8b_POST, "ld2r", ".8b", 1, false, 2 }, + { AArch64::LD2Rv4h_POST, "ld2r", ".4h", 1, false, 4 }, + { AArch64::LD2Rv2s_POST, "ld2r", ".2s", 1, false, 8 }, + { AArch64::LD2Rv1d_POST, "ld2r", ".1d", 1, false, 16 }, + { AArch64::LD2Twov16b, "ld2", ".16b", 0, false, 0 }, + { AArch64::LD2Twov8h, "ld2", ".8h", 0, false, 0 }, + { AArch64::LD2Twov4s, "ld2", ".4s", 0, false, 0 }, + { AArch64::LD2Twov2d, "ld2", ".2d", 0, false, 0 }, + { AArch64::LD2Twov8b, "ld2", ".8b", 0, false, 0 }, + { AArch64::LD2Twov4h, "ld2", ".4h", 0, false, 0 }, + { AArch64::LD2Twov2s, "ld2", ".2s", 0, false, 0 }, + { AArch64::LD2Twov16b_POST, "ld2", ".16b", 1, false, 32 }, + { AArch64::LD2Twov8h_POST, "ld2", ".8h", 1, false, 32 }, + { AArch64::LD2Twov4s_POST, "ld2", ".4s", 1, false, 32 }, + { AArch64::LD2Twov2d_POST, "ld2", ".2d", 1, false, 32 }, + { AArch64::LD2Twov8b_POST, "ld2", ".8b", 1, false, 16 }, + { AArch64::LD2Twov4h_POST, "ld2", ".4h", 1, false, 16 }, + { AArch64::LD2Twov2s_POST, "ld2", ".2s", 1, false, 16 }, + { AArch64::LD3i8, "ld3", ".b", 1, true, 0 }, + { AArch64::LD3i16, "ld3", ".h", 1, true, 0 }, + { AArch64::LD3i32, "ld3", ".s", 1, true, 0 }, + { AArch64::LD3i64, "ld3", ".d", 1, true, 0 }, + { AArch64::LD3i8_POST, "ld3", ".b", 2, true, 3 }, + { AArch64::LD3i16_POST, "ld3", ".h", 2, true, 6 }, + { AArch64::LD3i32_POST, "ld3", ".s", 2, true, 12 }, + { AArch64::LD3i64_POST, "ld3", ".d", 2, true, 24 }, + { AArch64::LD3Rv16b, "ld3r", ".16b", 0, false, 0 }, + { AArch64::LD3Rv8h, "ld3r", ".8h", 0, false, 0 }, + { AArch64::LD3Rv4s, "ld3r", ".4s", 0, false, 0 }, + { AArch64::LD3Rv2d, "ld3r", ".2d", 0, false, 0 }, + { AArch64::LD3Rv8b, "ld3r", ".8b", 0, false, 0 }, + { AArch64::LD3Rv4h, "ld3r", ".4h", 0, false, 0 }, + { AArch64::LD3Rv2s, "ld3r", ".2s", 0, false, 0 }, + { AArch64::LD3Rv1d, "ld3r", ".1d", 0, false, 0 }, + { AArch64::LD3Rv16b_POST, "ld3r", ".16b", 1, false, 3 }, + { AArch64::LD3Rv8h_POST, "ld3r", ".8h", 1, false, 6 }, + { AArch64::LD3Rv4s_POST, "ld3r", ".4s", 1, false, 12 }, + { AArch64::LD3Rv2d_POST, "ld3r", ".2d", 1, false, 24 }, + { AArch64::LD3Rv8b_POST, "ld3r", ".8b", 1, false, 3 }, + { AArch64::LD3Rv4h_POST, "ld3r", ".4h", 1, false, 6 }, + { AArch64::LD3Rv2s_POST, "ld3r", ".2s", 1, false, 12 }, + { AArch64::LD3Rv1d_POST, "ld3r", ".1d", 1, false, 24 }, + { AArch64::LD3Threev16b, "ld3", ".16b", 0, false, 0 }, + { AArch64::LD3Threev8h, "ld3", ".8h", 0, false, 0 }, + { AArch64::LD3Threev4s, "ld3", ".4s", 0, false, 0 }, + { AArch64::LD3Threev2d, "ld3", ".2d", 0, false, 0 }, + { AArch64::LD3Threev8b, "ld3", ".8b", 0, false, 0 }, + { AArch64::LD3Threev4h, "ld3", ".4h", 0, false, 0 }, + { AArch64::LD3Threev2s, "ld3", ".2s", 0, false, 0 }, + { AArch64::LD3Threev16b_POST, "ld3", ".16b", 1, false, 48 }, + { AArch64::LD3Threev8h_POST, "ld3", ".8h", 1, false, 48 }, + { AArch64::LD3Threev4s_POST, "ld3", ".4s", 1, false, 48 }, + { AArch64::LD3Threev2d_POST, "ld3", ".2d", 1, false, 48 }, + { AArch64::LD3Threev8b_POST, "ld3", ".8b", 1, false, 24 }, + { AArch64::LD3Threev4h_POST, "ld3", ".4h", 1, false, 24 }, + { AArch64::LD3Threev2s_POST, "ld3", ".2s", 1, false, 24 }, + { AArch64::LD4i8, "ld4", ".b", 1, true, 0 }, + { AArch64::LD4i16, "ld4", ".h", 1, true, 0 }, + { AArch64::LD4i32, "ld4", ".s", 1, true, 0 }, + { AArch64::LD4i64, "ld4", ".d", 1, true, 0 }, + { AArch64::LD4i8_POST, "ld4", ".b", 2, true, 4 }, + { AArch64::LD4i16_POST, "ld4", ".h", 2, true, 8 }, + { AArch64::LD4i32_POST, "ld4", ".s", 2, true, 16 }, + { AArch64::LD4i64_POST, "ld4", ".d", 2, true, 32 }, + { AArch64::LD4Rv16b, "ld4r", ".16b", 0, false, 0 }, + { AArch64::LD4Rv8h, "ld4r", ".8h", 0, false, 0 }, + { AArch64::LD4Rv4s, "ld4r", ".4s", 0, false, 0 }, + { AArch64::LD4Rv2d, "ld4r", ".2d", 0, false, 0 }, + { AArch64::LD4Rv8b, "ld4r", ".8b", 0, false, 0 }, + { AArch64::LD4Rv4h, "ld4r", ".4h", 0, false, 0 }, + { AArch64::LD4Rv2s, "ld4r", ".2s", 0, false, 0 }, + { AArch64::LD4Rv1d, "ld4r", ".1d", 0, false, 0 }, + { AArch64::LD4Rv16b_POST, "ld4r", ".16b", 1, false, 4 }, + { AArch64::LD4Rv8h_POST, "ld4r", ".8h", 1, false, 8 }, + { AArch64::LD4Rv4s_POST, "ld4r", ".4s", 1, false, 16 }, + { AArch64::LD4Rv2d_POST, "ld4r", ".2d", 1, false, 32 }, + { AArch64::LD4Rv8b_POST, "ld4r", ".8b", 1, false, 4 }, + { AArch64::LD4Rv4h_POST, "ld4r", ".4h", 1, false, 8 }, + { AArch64::LD4Rv2s_POST, "ld4r", ".2s", 1, false, 16 }, + { AArch64::LD4Rv1d_POST, "ld4r", ".1d", 1, false, 32 }, + { AArch64::LD4Fourv16b, "ld4", ".16b", 0, false, 0 }, + { AArch64::LD4Fourv8h, "ld4", ".8h", 0, false, 0 }, + { AArch64::LD4Fourv4s, "ld4", ".4s", 0, false, 0 }, + { AArch64::LD4Fourv2d, "ld4", ".2d", 0, false, 0 }, + { AArch64::LD4Fourv8b, "ld4", ".8b", 0, false, 0 }, + { AArch64::LD4Fourv4h, "ld4", ".4h", 0, false, 0 }, + { AArch64::LD4Fourv2s, "ld4", ".2s", 0, false, 0 }, + { AArch64::LD4Fourv16b_POST, "ld4", ".16b", 1, false, 64 }, + { AArch64::LD4Fourv8h_POST, "ld4", ".8h", 1, false, 64 }, + { AArch64::LD4Fourv4s_POST, "ld4", ".4s", 1, false, 64 }, + { AArch64::LD4Fourv2d_POST, "ld4", ".2d", 1, false, 64 }, + { AArch64::LD4Fourv8b_POST, "ld4", ".8b", 1, false, 32 }, + { AArch64::LD4Fourv4h_POST, "ld4", ".4h", 1, false, 32 }, + { AArch64::LD4Fourv2s_POST, "ld4", ".2s", 1, false, 32 }, + { AArch64::ST1i8, "st1", ".b", 0, true, 0 }, + { AArch64::ST1i16, "st1", ".h", 0, true, 0 }, + { AArch64::ST1i32, "st1", ".s", 0, true, 0 }, + { AArch64::ST1i64, "st1", ".d", 0, true, 0 }, + { AArch64::ST1i8_POST, "st1", ".b", 1, true, 1 }, + { AArch64::ST1i16_POST, "st1", ".h", 1, true, 2 }, + { AArch64::ST1i32_POST, "st1", ".s", 1, true, 4 }, + { AArch64::ST1i64_POST, "st1", ".d", 1, true, 8 }, + { AArch64::ST1Onev16b, "st1", ".16b", 0, false, 0 }, + { AArch64::ST1Onev8h, "st1", ".8h", 0, false, 0 }, + { AArch64::ST1Onev4s, "st1", ".4s", 0, false, 0 }, + { AArch64::ST1Onev2d, "st1", ".2d", 0, false, 0 }, + { AArch64::ST1Onev8b, "st1", ".8b", 0, false, 0 }, + { AArch64::ST1Onev4h, "st1", ".4h", 0, false, 0 }, + { AArch64::ST1Onev2s, "st1", ".2s", 0, false, 0 }, + { AArch64::ST1Onev1d, "st1", ".1d", 0, false, 0 }, + { AArch64::ST1Onev16b_POST, "st1", ".16b", 1, false, 16 }, + { AArch64::ST1Onev8h_POST, "st1", ".8h", 1, false, 16 }, + { AArch64::ST1Onev4s_POST, "st1", ".4s", 1, false, 16 }, + { AArch64::ST1Onev2d_POST, "st1", ".2d", 1, false, 16 }, + { AArch64::ST1Onev8b_POST, "st1", ".8b", 1, false, 8 }, + { AArch64::ST1Onev4h_POST, "st1", ".4h", 1, false, 8 }, + { AArch64::ST1Onev2s_POST, "st1", ".2s", 1, false, 8 }, + { AArch64::ST1Onev1d_POST, "st1", ".1d", 1, false, 8 }, + { AArch64::ST1Twov16b, "st1", ".16b", 0, false, 0 }, + { AArch64::ST1Twov8h, "st1", ".8h", 0, false, 0 }, + { AArch64::ST1Twov4s, "st1", ".4s", 0, false, 0 }, + { AArch64::ST1Twov2d, "st1", ".2d", 0, false, 0 }, + { AArch64::ST1Twov8b, "st1", ".8b", 0, false, 0 }, + { AArch64::ST1Twov4h, "st1", ".4h", 0, false, 0 }, + { AArch64::ST1Twov2s, "st1", ".2s", 0, false, 0 }, + { AArch64::ST1Twov1d, "st1", ".1d", 0, false, 0 }, + { AArch64::ST1Twov16b_POST, "st1", ".16b", 1, false, 32 }, + { AArch64::ST1Twov8h_POST, "st1", ".8h", 1, false, 32 }, + { AArch64::ST1Twov4s_POST, "st1", ".4s", 1, false, 32 }, + { AArch64::ST1Twov2d_POST, "st1", ".2d", 1, false, 32 }, + { AArch64::ST1Twov8b_POST, "st1", ".8b", 1, false, 16 }, + { AArch64::ST1Twov4h_POST, "st1", ".4h", 1, false, 16 }, + { AArch64::ST1Twov2s_POST, "st1", ".2s", 1, false, 16 }, + { AArch64::ST1Twov1d_POST, "st1", ".1d", 1, false, 16 }, + { AArch64::ST1Threev16b, "st1", ".16b", 0, false, 0 }, + { AArch64::ST1Threev8h, "st1", ".8h", 0, false, 0 }, + { AArch64::ST1Threev4s, "st1", ".4s", 0, false, 0 }, + { AArch64::ST1Threev2d, "st1", ".2d", 0, false, 0 }, + { AArch64::ST1Threev8b, "st1", ".8b", 0, false, 0 }, + { AArch64::ST1Threev4h, "st1", ".4h", 0, false, 0 }, + { AArch64::ST1Threev2s, "st1", ".2s", 0, false, 0 }, + { AArch64::ST1Threev1d, "st1", ".1d", 0, false, 0 }, + { AArch64::ST1Threev16b_POST, "st1", ".16b", 1, false, 48 }, + { AArch64::ST1Threev8h_POST, "st1", ".8h", 1, false, 48 }, + { AArch64::ST1Threev4s_POST, "st1", ".4s", 1, false, 48 }, + { AArch64::ST1Threev2d_POST, "st1", ".2d", 1, false, 48 }, + { AArch64::ST1Threev8b_POST, "st1", ".8b", 1, false, 24 }, + { AArch64::ST1Threev4h_POST, "st1", ".4h", 1, false, 24 }, + { AArch64::ST1Threev2s_POST, "st1", ".2s", 1, false, 24 }, + { AArch64::ST1Threev1d_POST, "st1", ".1d", 1, false, 24 }, + { AArch64::ST1Fourv16b, "st1", ".16b", 0, false, 0 }, + { AArch64::ST1Fourv8h, "st1", ".8h", 0, false, 0 }, + { AArch64::ST1Fourv4s, "st1", ".4s", 0, false, 0 }, + { AArch64::ST1Fourv2d, "st1", ".2d", 0, false, 0 }, + { AArch64::ST1Fourv8b, "st1", ".8b", 0, false, 0 }, + { AArch64::ST1Fourv4h, "st1", ".4h", 0, false, 0 }, + { AArch64::ST1Fourv2s, "st1", ".2s", 0, false, 0 }, + { AArch64::ST1Fourv1d, "st1", ".1d", 0, false, 0 }, + { AArch64::ST1Fourv16b_POST, "st1", ".16b", 1, false, 64 }, + { AArch64::ST1Fourv8h_POST, "st1", ".8h", 1, false, 64 }, + { AArch64::ST1Fourv4s_POST, "st1", ".4s", 1, false, 64 }, + { AArch64::ST1Fourv2d_POST, "st1", ".2d", 1, false, 64 }, + { AArch64::ST1Fourv8b_POST, "st1", ".8b", 1, false, 32 }, + { AArch64::ST1Fourv4h_POST, "st1", ".4h", 1, false, 32 }, + { AArch64::ST1Fourv2s_POST, "st1", ".2s", 1, false, 32 }, + { AArch64::ST1Fourv1d_POST, "st1", ".1d", 1, false, 32 }, + { AArch64::ST2i8, "st2", ".b", 0, true, 0 }, + { AArch64::ST2i16, "st2", ".h", 0, true, 0 }, + { AArch64::ST2i32, "st2", ".s", 0, true, 0 }, + { AArch64::ST2i64, "st2", ".d", 0, true, 0 }, + { AArch64::ST2i8_POST, "st2", ".b", 1, true, 2 }, + { AArch64::ST2i16_POST, "st2", ".h", 1, true, 4 }, + { AArch64::ST2i32_POST, "st2", ".s", 1, true, 8 }, + { AArch64::ST2i64_POST, "st2", ".d", 1, true, 16 }, + { AArch64::ST2Twov16b, "st2", ".16b", 0, false, 0 }, + { AArch64::ST2Twov8h, "st2", ".8h", 0, false, 0 }, + { AArch64::ST2Twov4s, "st2", ".4s", 0, false, 0 }, + { AArch64::ST2Twov2d, "st2", ".2d", 0, false, 0 }, + { AArch64::ST2Twov8b, "st2", ".8b", 0, false, 0 }, + { AArch64::ST2Twov4h, "st2", ".4h", 0, false, 0 }, + { AArch64::ST2Twov2s, "st2", ".2s", 0, false, 0 }, + { AArch64::ST2Twov16b_POST, "st2", ".16b", 1, false, 32 }, + { AArch64::ST2Twov8h_POST, "st2", ".8h", 1, false, 32 }, + { AArch64::ST2Twov4s_POST, "st2", ".4s", 1, false, 32 }, + { AArch64::ST2Twov2d_POST, "st2", ".2d", 1, false, 32 }, + { AArch64::ST2Twov8b_POST, "st2", ".8b", 1, false, 16 }, + { AArch64::ST2Twov4h_POST, "st2", ".4h", 1, false, 16 }, + { AArch64::ST2Twov2s_POST, "st2", ".2s", 1, false, 16 }, + { AArch64::ST3i8, "st3", ".b", 0, true, 0 }, + { AArch64::ST3i16, "st3", ".h", 0, true, 0 }, + { AArch64::ST3i32, "st3", ".s", 0, true, 0 }, + { AArch64::ST3i64, "st3", ".d", 0, true, 0 }, + { AArch64::ST3i8_POST, "st3", ".b", 1, true, 3 }, + { AArch64::ST3i16_POST, "st3", ".h", 1, true, 6 }, + { AArch64::ST3i32_POST, "st3", ".s", 1, true, 12 }, + { AArch64::ST3i64_POST, "st3", ".d", 1, true, 24 }, + { AArch64::ST3Threev16b, "st3", ".16b", 0, false, 0 }, + { AArch64::ST3Threev8h, "st3", ".8h", 0, false, 0 }, + { AArch64::ST3Threev4s, "st3", ".4s", 0, false, 0 }, + { AArch64::ST3Threev2d, "st3", ".2d", 0, false, 0 }, + { AArch64::ST3Threev8b, "st3", ".8b", 0, false, 0 }, + { AArch64::ST3Threev4h, "st3", ".4h", 0, false, 0 }, + { AArch64::ST3Threev2s, "st3", ".2s", 0, false, 0 }, + { AArch64::ST3Threev16b_POST, "st3", ".16b", 1, false, 48 }, + { AArch64::ST3Threev8h_POST, "st3", ".8h", 1, false, 48 }, + { AArch64::ST3Threev4s_POST, "st3", ".4s", 1, false, 48 }, + { AArch64::ST3Threev2d_POST, "st3", ".2d", 1, false, 48 }, + { AArch64::ST3Threev8b_POST, "st3", ".8b", 1, false, 24 }, + { AArch64::ST3Threev4h_POST, "st3", ".4h", 1, false, 24 }, + { AArch64::ST3Threev2s_POST, "st3", ".2s", 1, false, 24 }, + { AArch64::ST4i8, "st4", ".b", 0, true, 0 }, + { AArch64::ST4i16, "st4", ".h", 0, true, 0 }, + { AArch64::ST4i32, "st4", ".s", 0, true, 0 }, + { AArch64::ST4i64, "st4", ".d", 0, true, 0 }, + { AArch64::ST4i8_POST, "st4", ".b", 1, true, 4 }, + { AArch64::ST4i16_POST, "st4", ".h", 1, true, 8 }, + { AArch64::ST4i32_POST, "st4", ".s", 1, true, 16 }, + { AArch64::ST4i64_POST, "st4", ".d", 1, true, 32 }, + { AArch64::ST4Fourv16b, "st4", ".16b", 0, false, 0 }, + { AArch64::ST4Fourv8h, "st4", ".8h", 0, false, 0 }, + { AArch64::ST4Fourv4s, "st4", ".4s", 0, false, 0 }, + { AArch64::ST4Fourv2d, "st4", ".2d", 0, false, 0 }, + { AArch64::ST4Fourv8b, "st4", ".8b", 0, false, 0 }, + { AArch64::ST4Fourv4h, "st4", ".4h", 0, false, 0 }, + { AArch64::ST4Fourv2s, "st4", ".2s", 0, false, 0 }, + { AArch64::ST4Fourv16b_POST, "st4", ".16b", 1, false, 64 }, + { AArch64::ST4Fourv8h_POST, "st4", ".8h", 1, false, 64 }, + { AArch64::ST4Fourv4s_POST, "st4", ".4s", 1, false, 64 }, + { AArch64::ST4Fourv2d_POST, "st4", ".2d", 1, false, 64 }, + { AArch64::ST4Fourv8b_POST, "st4", ".8b", 1, false, 32 }, + { AArch64::ST4Fourv4h_POST, "st4", ".4h", 1, false, 32 }, + { AArch64::ST4Fourv2s_POST, "st4", ".2s", 1, false, 32 }, +}; + +static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) { + unsigned Idx; + for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx) + if (LdStNInstInfo[Idx].Opcode == Opcode) + return &LdStNInstInfo[Idx]; + + return nullptr; } -void -AArch64InstPrinter::printCRxOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &CRx = MI->getOperand(OpNum); +void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, + StringRef Annot) { + unsigned Opcode = MI->getOpcode(); + StringRef Layout, Mnemonic; - O << 'c' << CRx.getImm(); -} + bool IsTbx; + if (isTblTbxInstruction(MI->getOpcode(), Layout, IsTbx)) { + O << "\t" << (IsTbx ? "tbx" : "tbl") << Layout << '\t' + << getRegisterName(MI->getOperand(0).getReg(), AArch64::vreg) << ", "; + unsigned ListOpNum = IsTbx ? 2 : 1; + printVectorList(MI, ListOpNum, O, ""); -void -AArch64InstPrinter::printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &ScaleOp = MI->getOperand(OpNum); + O << ", " + << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), AArch64::vreg); + printAnnotation(O, Annot); + return; + } - O << '#' << (64 - ScaleOp.getImm()); -} + if (LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) { + O << "\t" << LdStDesc->Mnemonic << LdStDesc->Layout << '\t'; + + // Now onto the operands: first a vector list with possible lane + // specifier. E.g. { v0 }[2] + int OpNum = LdStDesc->ListOperand; + printVectorList(MI, OpNum++, O, ""); + + if (LdStDesc->HasLane) + O << '[' << MI->getOperand(OpNum++).getImm() << ']'; + + // Next the address: [xN] + unsigned AddrReg = MI->getOperand(OpNum++).getReg(); + O << ", [" << getRegisterName(AddrReg) << ']'; + + // Finally, there might be a post-indexed offset. + if (LdStDesc->NaturalOffset != 0) { + unsigned Reg = MI->getOperand(OpNum++).getReg(); + if (Reg != AArch64::XZR) + O << ", " << getRegisterName(Reg); + else { + assert(LdStDesc->NaturalOffset && "no offset on post-inc instruction?"); + O << ", #" << LdStDesc->NaturalOffset; + } + } + printAnnotation(O, Annot); + return; + } -void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &o) { - const MCOperand &MOImm8 = MI->getOperand(OpNum); + AArch64InstPrinter::printInst(MI, O, Annot); +} - assert(MOImm8.isImm() - && "Immediate operand required for floating-point immediate inst"); +bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) { +#ifndef NDEBUG + unsigned Opcode = MI->getOpcode(); + assert(Opcode == AArch64::SYSxt && "Invalid opcode for SYS alias!"); +#endif + + const char *Asm = nullptr; + const MCOperand &Op1 = MI->getOperand(0); + const MCOperand &Cn = MI->getOperand(1); + const MCOperand &Cm = MI->getOperand(2); + const MCOperand &Op2 = MI->getOperand(3); + + unsigned Op1Val = Op1.getImm(); + unsigned CnVal = Cn.getImm(); + unsigned CmVal = Cm.getImm(); + unsigned Op2Val = Op2.getImm(); + + if (CnVal == 7) { + switch (CmVal) { + default: + break; + + // IC aliases + case 1: + if (Op1Val == 0 && Op2Val == 0) + Asm = "ic\tialluis"; + break; + case 5: + if (Op1Val == 0 && Op2Val == 0) + Asm = "ic\tiallu"; + else if (Op1Val == 3 && Op2Val == 1) + Asm = "ic\tivau"; + break; + + // DC aliases + case 4: + if (Op1Val == 3 && Op2Val == 1) + Asm = "dc\tzva"; + break; + case 6: + if (Op1Val == 0 && Op2Val == 1) + Asm = "dc\tivac"; + if (Op1Val == 0 && Op2Val == 2) + Asm = "dc\tisw"; + break; + case 10: + if (Op1Val == 3 && Op2Val == 1) + Asm = "dc\tcvac"; + else if (Op1Val == 0 && Op2Val == 2) + Asm = "dc\tcsw"; + break; + case 11: + if (Op1Val == 3 && Op2Val == 1) + Asm = "dc\tcvau"; + break; + case 14: + if (Op1Val == 3 && Op2Val == 1) + Asm = "dc\tcivac"; + else if (Op1Val == 0 && Op2Val == 2) + Asm = "dc\tcisw"; + break; + + // AT aliases + case 8: + switch (Op1Val) { + default: + break; + case 0: + switch (Op2Val) { + default: + break; + case 0: Asm = "at\ts1e1r"; break; + case 1: Asm = "at\ts1e1w"; break; + case 2: Asm = "at\ts1e0r"; break; + case 3: Asm = "at\ts1e0w"; break; + } + break; + case 4: + switch (Op2Val) { + default: + break; + case 0: Asm = "at\ts1e2r"; break; + case 1: Asm = "at\ts1e2w"; break; + case 4: Asm = "at\ts12e1r"; break; + case 5: Asm = "at\ts12e1w"; break; + case 6: Asm = "at\ts12e0r"; break; + case 7: Asm = "at\ts12e0w"; break; + } + break; + case 6: + switch (Op2Val) { + default: + break; + case 0: Asm = "at\ts1e3r"; break; + case 1: Asm = "at\ts1e3w"; break; + } + break; + } + break; + } + } else if (CnVal == 8) { + // TLBI aliases + switch (CmVal) { + default: + break; + case 3: + switch (Op1Val) { + default: + break; + case 0: + switch (Op2Val) { + default: + break; + case 0: Asm = "tlbi\tvmalle1is"; break; + case 1: Asm = "tlbi\tvae1is"; break; + case 2: Asm = "tlbi\taside1is"; break; + case 3: Asm = "tlbi\tvaae1is"; break; + case 5: Asm = "tlbi\tvale1is"; break; + case 7: Asm = "tlbi\tvaale1is"; break; + } + break; + case 4: + switch (Op2Val) { + default: + break; + case 0: Asm = "tlbi\talle2is"; break; + case 1: Asm = "tlbi\tvae2is"; break; + case 4: Asm = "tlbi\talle1is"; break; + case 5: Asm = "tlbi\tvale2is"; break; + case 6: Asm = "tlbi\tvmalls12e1is"; break; + } + break; + case 6: + switch (Op2Val) { + default: + break; + case 0: Asm = "tlbi\talle3is"; break; + case 1: Asm = "tlbi\tvae3is"; break; + case 5: Asm = "tlbi\tvale3is"; break; + } + break; + } + break; + case 0: + switch (Op1Val) { + default: + break; + case 4: + switch (Op2Val) { + default: + break; + case 1: Asm = "tlbi\tipas2e1is"; break; + case 5: Asm = "tlbi\tipas2le1is"; break; + } + break; + } + break; + case 4: + switch (Op1Val) { + default: + break; + case 4: + switch (Op2Val) { + default: + break; + case 1: Asm = "tlbi\tipas2e1"; break; + case 5: Asm = "tlbi\tipas2le1"; break; + } + break; + } + break; + case 7: + switch (Op1Val) { + default: + break; + case 0: + switch (Op2Val) { + default: + break; + case 0: Asm = "tlbi\tvmalle1"; break; + case 1: Asm = "tlbi\tvae1"; break; + case 2: Asm = "tlbi\taside1"; break; + case 3: Asm = "tlbi\tvaae1"; break; + case 5: Asm = "tlbi\tvale1"; break; + case 7: Asm = "tlbi\tvaale1"; break; + } + break; + case 4: + switch (Op2Val) { + default: + break; + case 0: Asm = "tlbi\talle2"; break; + case 1: Asm = "tlbi\tvae2"; break; + case 4: Asm = "tlbi\talle1"; break; + case 5: Asm = "tlbi\tvale2"; break; + case 6: Asm = "tlbi\tvmalls12e1"; break; + } + break; + case 6: + switch (Op2Val) { + default: + break; + case 0: Asm = "tlbi\talle3"; break; + case 1: Asm = "tlbi\tvae3"; break; + case 5: Asm = "tlbi\tvale3"; break; + } + break; + } + break; + } + } + + if (Asm) { + unsigned Reg = MI->getOperand(4).getReg(); - uint32_t Imm8 = MOImm8.getImm(); - uint32_t Fraction = Imm8 & 0xf; - uint32_t Exponent = (Imm8 >> 4) & 0x7; - uint32_t Negative = (Imm8 >> 7) & 0x1; + O << '\t' << Asm; + if (StringRef(Asm).lower().find("all") == StringRef::npos) + O << ", " << getRegisterName(Reg); + } - float Val = 1.0f + Fraction / 16.0f; + return Asm != nullptr; +} - // That is: - // 000 -> 2^1, 001 -> 2^2, 010 -> 2^3, 011 -> 2^4, - // 100 -> 2^-3, 101 -> 2^-2, 110 -> 2^-1, 111 -> 2^0 - if (Exponent & 0x4) { - Val /= 1 << (7 - Exponent); +void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + O << getRegisterName(Reg); + } else if (Op.isImm()) { + O << '#' << Op.getImm(); } else { - Val *= 1 << (Exponent + 1); + assert(Op.isExpr() && "unknown operand kind in printOperand"); + O << *Op.getExpr(); } +} - Val = Negative ? -Val : Val; - - o << '#' << format("%.8f", Val); +void AArch64InstPrinter::printHexImm(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + O << format("#%#llx", Op.getImm()); } -void AArch64InstPrinter::printFPZeroOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &o) { - o << "#0.0"; +void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo, + unsigned Imm, raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + if (Reg == AArch64::XZR) + O << "#" << Imm; + else + O << getRegisterName(Reg); + } else + assert(0 && "unknown operand kind in printPostIncOperand64"); } -void -AArch64InstPrinter::printCondCodeOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); +void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + assert(Op.isReg() && "Non-register vreg operand!"); + unsigned Reg = Op.getReg(); + O << getRegisterName(Reg, AArch64::vreg); +} - O << A64CondCodeToString(static_cast(MO.getImm())); +void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + assert(Op.isImm() && "System instruction C[nm] operands must be immediates!"); + O << "c" << Op.getImm(); } -template void -AArch64InstPrinter::printLabelOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { +void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); - - if (!MO.isImm()) { - printOperand(MI, OpNum, O); - return; + if (MO.isImm()) { + unsigned Val = (MO.getImm() & 0xfff); + assert(Val == MO.getImm() && "Add/sub immediate out of range!"); + unsigned Shift = + AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm()); + O << '#' << Val; + if (Shift != 0) + printShifter(MI, OpNum + 1, O); + + if (CommentStream) + *CommentStream << '=' << (Val << Shift) << '\n'; + } else { + assert(MO.isExpr() && "Unexpected operand type!"); + O << *MO.getExpr(); + printShifter(MI, OpNum + 1, O); } +} - // The immediate of LDR (lit) instructions is a signed 19-bit immediate, which - // is multiplied by 4 (because all A64 instructions are 32-bits wide). - uint64_t UImm = MO.getImm(); - uint64_t Sign = UImm & (1LL << (field_width - 1)); - int64_t SImm = scale * ((UImm & ~Sign) - Sign); - - O << "#" << SImm; +void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + uint64_t Val = MI->getOperand(OpNum).getImm(); + O << "#0x"; + O.write_hex(AArch64_AM::decodeLogicalImmediate(Val, 32)); } -template void -AArch64InstPrinter::printLogicalImmOperand(const MCInst *MI, unsigned OpNum, +void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - uint64_t Val; - A64Imms::isLogicalImmBits(RegWidth, MO.getImm(), Val); + uint64_t Val = MI->getOperand(OpNum).getImm(); O << "#0x"; - O.write_hex(Val); + O.write_hex(AArch64_AM::decodeLogicalImmediate(Val, 64)); } -void -AArch64InstPrinter::printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, int MemSize) { - const MCOperand &MOImm = MI->getOperand(OpNum); +void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Val = MI->getOperand(OpNum).getImm(); + // LSL #0 should not be printed. + if (AArch64_AM::getShiftType(Val) == AArch64_AM::LSL && + AArch64_AM::getShiftValue(Val) == 0) + return; + O << ", " << AArch64_AM::getShiftExtendName(AArch64_AM::getShiftType(Val)) + << " #" << AArch64_AM::getShiftValue(Val); +} - if (MOImm.isImm()) { - uint32_t Imm = MOImm.getImm() * MemSize; +void AArch64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << getRegisterName(MI->getOperand(OpNum).getReg()); + printShifter(MI, OpNum + 1, O); +} - O << "#" << Imm; - } else { - O << "#" << *MOImm.getExpr(); +void AArch64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << getRegisterName(MI->getOperand(OpNum).getReg()); + printArithExtend(MI, OpNum + 1, O); +} + +void AArch64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned Val = MI->getOperand(OpNum).getImm(); + AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getArithExtendType(Val); + unsigned ShiftVal = AArch64_AM::getArithShiftValue(Val); + + // If the destination or first source register operand is [W]SP, print + // UXTW/UXTX as LSL, and if the shift amount is also zero, print nothing at + // all. + if (ExtType == AArch64_AM::UXTW || ExtType == AArch64_AM::UXTX) { + unsigned Dest = MI->getOperand(0).getReg(); + unsigned Src1 = MI->getOperand(1).getReg(); + if ( ((Dest == AArch64::SP || Src1 == AArch64::SP) && + ExtType == AArch64_AM::UXTX) || + ((Dest == AArch64::WSP || Src1 == AArch64::WSP) && + ExtType == AArch64_AM::UXTW) ) { + if (ShiftVal != 0) + O << ", lsl #" << ShiftVal; + return; + } } + O << ", " << AArch64_AM::getShiftExtendName(ExtType); + if (ShiftVal != 0) + O << " #" << ShiftVal; } -void -AArch64InstPrinter::printShiftOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, - A64SE::ShiftExtSpecifiers Shift) { - const MCOperand &MO = MI->getOperand(OpNum); +void AArch64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum, + raw_ostream &O, char SrcRegKind, + unsigned Width) { + unsigned SignExtend = MI->getOperand(OpNum).getImm(); + unsigned DoShift = MI->getOperand(OpNum + 1).getImm(); - // LSL #0 is not printed - if (Shift == A64SE::LSL && MO.isImm() && MO.getImm() == 0) - return; + // sxtw, sxtx, uxtw or lsl (== uxtx) + bool IsLSL = !SignExtend && SrcRegKind == 'x'; + if (IsLSL) + O << "lsl"; + else + O << (SignExtend ? 's' : 'u') << "xt" << SrcRegKind; - switch (Shift) { - case A64SE::LSL: O << "lsl"; break; - case A64SE::LSR: O << "lsr"; break; - case A64SE::ASR: O << "asr"; break; - case A64SE::ROR: O << "ror"; break; - default: llvm_unreachable("Invalid shift specifier in logical instruction"); - } + if (DoShift || IsLSL) + O << " #" << Log2_32(Width / 8); +} - O << " #" << MO.getImm(); +void AArch64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm(); + O << AArch64CC::getCondCodeName(CC); } -void -AArch64InstPrinter::printMoveWideImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &UImm16MO = MI->getOperand(OpNum); - const MCOperand &ShiftMO = MI->getOperand(OpNum + 1); +void AArch64InstPrinter::printInverseCondCode(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm(); + O << AArch64CC::getCondCodeName(AArch64CC::getInvertedCondCode(CC)); +} - if (UImm16MO.isImm()) { - O << '#' << UImm16MO.getImm(); +void AArch64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']'; +} - if (ShiftMO.getImm() != 0) - O << ", lsl #" << (ShiftMO.getImm() * 16); +template +void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + O << '#' << Scale * MI->getOperand(OpNum).getImm(); +} - return; +void AArch64InstPrinter::printUImm12Offset(const MCInst *MI, unsigned OpNum, + unsigned Scale, raw_ostream &O) { + const MCOperand MO = MI->getOperand(OpNum); + if (MO.isImm()) { + O << "#" << (MO.getImm() * Scale); + } else { + assert(MO.isExpr() && "Unexpected operand type!"); + O << *MO.getExpr(); } - - O << "#" << *UImm16MO.getExpr(); } -void AArch64InstPrinter::printNamedImmOperand(const NamedImmMapper &Mapper, - const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - bool ValidName; - const MCOperand &MO = MI->getOperand(OpNum); - StringRef Name = Mapper.toString(MO.getImm(), ValidName); +void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum, + unsigned Scale, raw_ostream &O) { + const MCOperand MO1 = MI->getOperand(OpNum + 1); + O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()); + if (MO1.isImm()) { + O << ", #" << (MO1.getImm() * Scale); + } else { + assert(MO1.isExpr() && "Unexpected operand type!"); + O << ", " << *MO1.getExpr(); + } + O << ']'; +} - if (ValidName) +void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned prfop = MI->getOperand(OpNum).getImm(); + bool Valid; + StringRef Name = AArch64PRFM::PRFMMapper().toString(prfop, Valid); + if (Valid) O << Name; else - O << '#' << MO.getImm(); + O << '#' << prfop; } -void -AArch64InstPrinter::printSysRegOperand(const A64SysReg::SysRegMapper &Mapper, - const MCInst *MI, unsigned OpNum, - raw_ostream &O) { +void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { const MCOperand &MO = MI->getOperand(OpNum); + float FPImm = + MO.isFPImm() ? MO.getFPImm() : AArch64_AM::getFPImmFloat(MO.getImm()); - bool ValidName; - std::string Name = Mapper.toString(MO.getImm(), ValidName); - if (ValidName) { - O << Name; - return; - } + // 8 decimal places are enough to perfectly represent permitted floats. + O << format("#%.8f", FPImm); } +static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) { + while (Stride--) { + switch (Reg) { + default: + assert(0 && "Vector register expected!"); + case AArch64::Q0: Reg = AArch64::Q1; break; + case AArch64::Q1: Reg = AArch64::Q2; break; + case AArch64::Q2: Reg = AArch64::Q3; break; + case AArch64::Q3: Reg = AArch64::Q4; break; + case AArch64::Q4: Reg = AArch64::Q5; break; + case AArch64::Q5: Reg = AArch64::Q6; break; + case AArch64::Q6: Reg = AArch64::Q7; break; + case AArch64::Q7: Reg = AArch64::Q8; break; + case AArch64::Q8: Reg = AArch64::Q9; break; + case AArch64::Q9: Reg = AArch64::Q10; break; + case AArch64::Q10: Reg = AArch64::Q11; break; + case AArch64::Q11: Reg = AArch64::Q12; break; + case AArch64::Q12: Reg = AArch64::Q13; break; + case AArch64::Q13: Reg = AArch64::Q14; break; + case AArch64::Q14: Reg = AArch64::Q15; break; + case AArch64::Q15: Reg = AArch64::Q16; break; + case AArch64::Q16: Reg = AArch64::Q17; break; + case AArch64::Q17: Reg = AArch64::Q18; break; + case AArch64::Q18: Reg = AArch64::Q19; break; + case AArch64::Q19: Reg = AArch64::Q20; break; + case AArch64::Q20: Reg = AArch64::Q21; break; + case AArch64::Q21: Reg = AArch64::Q22; break; + case AArch64::Q22: Reg = AArch64::Q23; break; + case AArch64::Q23: Reg = AArch64::Q24; break; + case AArch64::Q24: Reg = AArch64::Q25; break; + case AArch64::Q25: Reg = AArch64::Q26; break; + case AArch64::Q26: Reg = AArch64::Q27; break; + case AArch64::Q27: Reg = AArch64::Q28; break; + case AArch64::Q28: Reg = AArch64::Q29; break; + case AArch64::Q29: Reg = AArch64::Q30; break; + case AArch64::Q30: Reg = AArch64::Q31; break; + // Vector lists can wrap around. + case AArch64::Q31: + Reg = AArch64::Q0; + break; + } + } + return Reg; +} -void AArch64InstPrinter::printRegExtendOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O, - A64SE::ShiftExtSpecifiers Ext) { - // FIXME: In principle TableGen should be able to detect this itself far more - // easily. We will only accumulate more of these hacks. - unsigned Reg0 = MI->getOperand(0).getReg(); - unsigned Reg1 = MI->getOperand(1).getReg(); - - if (isStackReg(Reg0) || isStackReg(Reg1)) { - A64SE::ShiftExtSpecifiers LSLEquiv; - - if (Reg0 == AArch64::XSP || Reg1 == AArch64::XSP) - LSLEquiv = A64SE::UXTX; - else - LSLEquiv = A64SE::UXTW; +void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, + raw_ostream &O, + StringRef LayoutSuffix) { + unsigned Reg = MI->getOperand(OpNum).getReg(); - if (Ext == LSLEquiv) { - O << "lsl #" << MI->getOperand(OpNum).getImm(); - return; - } + O << "{ "; + + // Work out how many registers there are in the list (if there is an actual + // list). + unsigned NumRegs = 1; + if (MRI.getRegClass(AArch64::DDRegClassID).contains(Reg) || + MRI.getRegClass(AArch64::QQRegClassID).contains(Reg)) + NumRegs = 2; + else if (MRI.getRegClass(AArch64::DDDRegClassID).contains(Reg) || + MRI.getRegClass(AArch64::QQQRegClassID).contains(Reg)) + NumRegs = 3; + else if (MRI.getRegClass(AArch64::DDDDRegClassID).contains(Reg) || + MRI.getRegClass(AArch64::QQQQRegClassID).contains(Reg)) + NumRegs = 4; + + // Now forget about the list and find out what the first register is. + if (unsigned FirstReg = MRI.getSubReg(Reg, AArch64::dsub0)) + Reg = FirstReg; + else if (unsigned FirstReg = MRI.getSubReg(Reg, AArch64::qsub0)) + Reg = FirstReg; + + // If it's a D-reg, we need to promote it to the equivalent Q-reg before + // printing (otherwise getRegisterName fails). + if (MRI.getRegClass(AArch64::FPR64RegClassID).contains(Reg)) { + const MCRegisterClass &FPR128RC = + MRI.getRegClass(AArch64::FPR128RegClassID); + Reg = MRI.getMatchingSuperReg(Reg, AArch64::dsub, &FPR128RC); } - switch (Ext) { - case A64SE::UXTB: O << "uxtb"; break; - case A64SE::UXTH: O << "uxth"; break; - case A64SE::UXTW: O << "uxtw"; break; - case A64SE::UXTX: O << "uxtx"; break; - case A64SE::SXTB: O << "sxtb"; break; - case A64SE::SXTH: O << "sxth"; break; - case A64SE::SXTW: O << "sxtw"; break; - case A64SE::SXTX: O << "sxtx"; break; - default: llvm_unreachable("Unexpected shift type for printing"); + for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) { + O << getRegisterName(Reg, AArch64::vreg) << LayoutSuffix; + if (i + 1 != NumRegs) + O << ", "; } - const MCOperand &MO = MI->getOperand(OpNum); - if (MO.getImm() != 0) - O << " #" << MO.getImm(); + O << " }"; } -template void -AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MOImm = MI->getOperand(OpNum); - int32_t Imm = unpackSignedImm(7, MOImm.getImm()); +void AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + printVectorList(MI, OpNum, O, ""); +} + +template +void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + std::string Suffix("."); + if (NumLanes) + Suffix += itostr(NumLanes) + LaneKind; + else + Suffix += LaneKind; - O << "#" << (Imm * MemScale); + printVectorList(MI, OpNum, O, Suffix); } -void AArch64InstPrinter::printVPRRegister(const MCInst *MI, unsigned OpNo, +void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - unsigned Reg = MI->getOperand(OpNo).getReg(); - std::string Name = getRegisterName(Reg); - Name[0] = 'v'; - O << Name; + O << "[" << MI->getOperand(OpNum).getImm() << "]"; } -void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isReg()) { - unsigned Reg = Op.getReg(); - O << getRegisterName(Reg); - } else if (Op.isImm()) { - O << '#' << Op.getImm(); +void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNum); + + // If the label has already been resolved to an immediate offset (say, when + // we're running the disassembler), just print the immediate. + if (Op.isImm()) { + O << "#" << (Op.getImm() << 2); + return; + } + + // If the branch target is simply an address then print it in hex. + const MCConstantExpr *BranchTarget = + dyn_cast(MI->getOperand(OpNum).getExpr()); + int64_t Address; + if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { + O << "0x"; + O.write_hex(Address); } else { - assert(Op.isExpr() && "unknown operand kind in printOperand"); - // If a symbolic branch target was added as a constant expression then print - // that address in hex. - const MCConstantExpr *BranchTarget = dyn_cast(Op.getExpr()); - int64_t Address; - if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { - O << "0x"; - O.write_hex(Address); - } - else { - // Otherwise, just print the expression. - O << *Op.getExpr(); - } + // Otherwise, just print the expression. + O << *MI->getOperand(OpNum).getExpr(); } } +void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNum); -void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { - if (MI->getOpcode() == AArch64::TLSDESCCALL) { - // This is a special assembler directive which applies an - // R_AARCH64_TLSDESC_CALL to the following (BLR) instruction. It has a fixed - // form outside the normal TableGenerated scheme. - O << "\t.tlsdesccall " << *MI->getOperand(0).getExpr(); - } else if (!printAliasInstr(MI, O)) - printInstruction(MI, O); + // If the label has already been resolved to an immediate offset (say, when + // we're running the disassembler), just print the immediate. + if (Op.isImm()) { + O << "#" << (Op.getImm() << 12); + return; + } - printAnnotation(O, Annot); + // Otherwise, just print the expression. + O << *MI->getOperand(OpNum).getExpr(); } -template -void AArch64InstPrinter::printNeonMovImmShiftOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - - assert(MO.isImm() && - "Immediate operand required for Neon vector immediate inst."); - - bool IsLSL = false; - if (Ext == A64SE::LSL) - IsLSL = true; - else if (Ext != A64SE::MSL) - llvm_unreachable("Invalid shift specifier in movi instruction"); - - int64_t Imm = MO.getImm(); - - // MSL and LSLH accepts encoded shift amount 0 or 1. - if ((!IsLSL || (IsLSL && isHalf)) && Imm != 0 && Imm != 1) - llvm_unreachable("Invalid shift amount in movi instruction"); - - // LSH accepts encoded shift amount 0, 1, 2 or 3. - if (IsLSL && (Imm < 0 || Imm > 3)) - llvm_unreachable("Invalid shift amount in movi instruction"); - - // Print shift amount as multiple of 8 with MSL encoded shift amount - // 0 and 1 printed as 8 and 16. - if (!IsLSL) - Imm++; - Imm *= 8; - - // LSL #0 is not printed - if (IsLSL) { - if (Imm == 0) - return; - O << ", lsl"; - } else - O << ", msl"; - - O << " #" << Imm; -} +void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Val = MI->getOperand(OpNo).getImm(); + unsigned Opcode = MI->getOpcode(); -void AArch64InstPrinter::printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &o) { - o << "#0x0"; + bool Valid; + StringRef Name; + if (Opcode == AArch64::ISB) + Name = AArch64ISB::ISBMapper().toString(Val, Valid); + else + Name = AArch64DB::DBarrierMapper().toString(Val, Valid); + if (Valid) + O << Name; + else + O << "#" << Val; } -void AArch64InstPrinter::printUImmHexOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MOUImm = MI->getOperand(OpNum); - - assert(MOUImm.isImm() && - "Immediate operand required for Neon vector immediate inst."); +void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Val = MI->getOperand(OpNo).getImm(); - unsigned Imm = MOUImm.getImm(); + bool Valid; + auto Mapper = AArch64SysReg::MRSMapper(getAvailableFeatures()); + std::string Name = Mapper.toString(Val, Valid); - O << "#0x"; - O.write_hex(Imm); + if (Valid) + O << StringRef(Name).upper(); } -void AArch64InstPrinter::printUImmBareOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MOUImm = MI->getOperand(OpNum); +void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Val = MI->getOperand(OpNo).getImm(); - assert(MOUImm.isImm() - && "Immediate operand required for Neon vector immediate inst."); + bool Valid; + auto Mapper = AArch64SysReg::MSRMapper(getAvailableFeatures()); + std::string Name = Mapper.toString(Val, Valid); - unsigned Imm = MOUImm.getImm(); - O << Imm; + if (Valid) + O << StringRef(Name).upper(); } -void AArch64InstPrinter::printNeonUImm64MaskOperand(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &MOUImm8 = MI->getOperand(OpNum); - - assert(MOUImm8.isImm() && - "Immediate operand required for Neon vector immediate bytemask inst."); +void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Val = MI->getOperand(OpNo).getImm(); - uint32_t UImm8 = MOUImm8.getImm(); - uint64_t Mask = 0; - - // Replicates 0x00 or 0xff byte in a 64-bit vector - for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) { - if ((UImm8 >> ByteNum) & 1) - Mask |= (uint64_t)0xff << (8 * ByteNum); - } - - O << "#0x"; - O.write_hex(Mask); + bool Valid; + StringRef Name = AArch64PState::PStateMapper().toString(Val, Valid); + if (Valid) + O << StringRef(Name.str()).upper(); + else + O << "#" << Val; } -// If Count > 1, there are two valid kinds of vector list: -// (1) {Vn.layout, Vn+1.layout, ... , Vm.layout} -// (2) {Vn.layout - Vm.layout} -// We choose the first kind as output. -template -void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - assert(Count >= 1 && Count <= 4 && "Invalid Number of Vectors"); - - unsigned Reg = MI->getOperand(OpNum).getReg(); - std::string LayoutStr = A64VectorLayoutToString(Layout); - O << "{"; - if (Count > 1) { // Print sub registers separately - bool IsVec64 = (Layout < A64Layout::VL_16B); - unsigned SubRegIdx = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0; - for (unsigned I = 0; I < Count; I++) { - std::string Name = getRegisterName(MRI.getSubReg(Reg, SubRegIdx++)); - Name[0] = 'v'; - O << Name << LayoutStr; - if (I != Count - 1) - O << ", "; - } - } else { // Print the register directly when NumVecs is 1. - std::string Name = getRegisterName(Reg); - Name[0] = 'v'; - O << Name << LayoutStr; - } - O << "}"; +void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned RawVal = MI->getOperand(OpNo).getImm(); + uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal); + O << format("#%#016llx", Val); } diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index 37b7273..fe7666e 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_AARCH64INSTPRINTER_H -#define LLVM_AARCH64INSTPRINTER_H +#ifndef AArch64INSTPRINTER_H +#define AArch64INSTPRINTER_H #include "MCTargetDesc/AArch64MCTargetDesc.h" -#include "Utils/AArch64BaseInfo.h" +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -28,154 +28,112 @@ public: AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); - // Autogenerated by tblgen - void printInstruction(const MCInst *MI, raw_ostream &O); - bool printAliasInstr(const MCInst *MI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); - static const char *getInstructionName(unsigned Opcode); + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; + void printRegName(raw_ostream &OS, unsigned RegNo) const override; - void printRegName(raw_ostream &O, unsigned RegNum) const; - - template - void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printAddrRegExtendOperand(MI, OpNum, O, MemSize, RmSize); + // Autogenerated by tblgen. + virtual void printInstruction(const MCInst *MI, raw_ostream &O); + virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O); + virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, + unsigned PrintMethodIdx, raw_ostream &O); + virtual StringRef getRegName(unsigned RegNo) const { + return getRegisterName(RegNo); } + static const char *getRegisterName(unsigned RegNo, + unsigned AltIdx = AArch64::NoRegAltName); - - void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, unsigned MemSize, - unsigned RmSize); - - void printAddSubImmLSL0Operand(const MCInst *MI, - unsigned OpNum, raw_ostream &O); - void printAddSubImmLSL12Operand(const MCInst *MI, - unsigned OpNum, raw_ostream &O); - - void printBareImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - template - void printBFILSBOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printBFIWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printBFXWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - - void printCondCodeOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printCRxOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o); - - void printFPZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o); - - template - void printOffsetUImm12Operand(const MCInst *MI, - unsigned OpNum, raw_ostream &o) { - printOffsetUImm12Operand(MI, OpNum, o, MemScale); +protected: + bool printSysAlias(const MCInst *MI, raw_ostream &O); + // Operand printers + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printHexImm(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm, + raw_ostream &O); + template + void printPostIncOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printPostIncOperand(MI, OpNo, Amount, O); } - void printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &o, int MemScale); - - template - void printLabelOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - template - void printLogicalImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - template - void printNamedImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printNamedImmOperand(SomeNamedImmMapper(), MI, OpNum, O); + void printVRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printSysCROperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printAddSubImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printLogicalImm32(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printArithExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O); + + void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O, + char SrcRegKind, unsigned Width); + template + void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printMemExtend(MI, OpNum, O, SrcRegKind, Width); } - void printNamedImmOperand(const NamedImmMapper &Mapper, - const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printSysRegOperand(const A64SysReg::SysRegMapper &Mapper, - const MCInst *MI, unsigned OpNum, - raw_ostream &O); + void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printInverseCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAlignedLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printUImm12Offset(const MCInst *MI, unsigned OpNum, unsigned Scale, + raw_ostream &O); + void printAMIndexedWB(const MCInst *MI, unsigned OpNum, unsigned Scale, + raw_ostream &O); - void printMRSOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printSysRegOperand(A64SysReg::MRSMapper(), MI, OpNum, O); + template + void printUImm12Offset(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printUImm12Offset(MI, OpNum, Scale, O); } - void printMSROperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printSysRegOperand(A64SysReg::MSRMapper(), MI, OpNum, O); + template + void printAMIndexedWB(const MCInst *MI, unsigned OpNum, raw_ostream &O) { + printAMIndexedWB(MI, OpNum, BitWidth / 8, O); } - void printShiftOperand(const char *name, const MCInst *MI, - unsigned OpIdx, raw_ostream &O); - - void printLSLOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAMNoIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printLSROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printShiftOperand("lsr", MI, OpNum, O); - } - void printASROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printShiftOperand("asr", MI, OpNum, O); - } - void printROROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printShiftOperand("ror", MI, OpNum, O); - } + template + void printImmScale(const MCInst *MI, unsigned OpNum, raw_ostream &O); - template - void printShiftOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printShiftOperand(MI, OpNum, O, Shift); - } + void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printShiftOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, A64SE::ShiftExtSpecifiers Sh); + void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O, + StringRef LayoutSuffix); - void printMoveWideImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); + /// Print a list of vector registers where the type suffix is implicit + /// (i.e. attached to the instruction rather than the registers). + void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum, + raw_ostream &O); - template void - printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + template + void printTypedVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printOffsetSImm9Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printPRFMOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - template - void printRegExtendOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printRegExtendOperand(MI, OpNum, O, EXT); - } + void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAdrpLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printBarrierOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printMSRSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printMRSSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printSystemPStateField(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); +}; - void printRegExtendOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O, A64SE::ShiftExtSpecifiers Ext); +class AArch64AppleInstPrinter : public AArch64InstPrinter { +public: + AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); - void printVPRRegister(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; - bool isStackReg(unsigned RegNo) { - return RegNo == AArch64::XSP || RegNo == AArch64::WSP; + void printInstruction(const MCInst *MI, raw_ostream &O) override; + bool printAliasInstr(const MCInst *MI, raw_ostream &O) override; + virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, + unsigned PrintMethodIdx, raw_ostream &O); + StringRef getRegName(unsigned RegNo) const override { + return getRegisterName(RegNo); } - - template - void printNeonMovImmShiftOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printUImmHexOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printUImmBareOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - template - void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo, + unsigned AltIdx = AArch64::NoRegAltName); }; } diff --git a/lib/Target/AArch64/InstPrinter/Android.mk b/lib/Target/AArch64/InstPrinter/Android.mk index ac9b0df..de6aa89 100644 --- a/lib/Target/AArch64/InstPrinter/Android.mk +++ b/lib/Target/AArch64/InstPrinter/Android.mk @@ -2,6 +2,7 @@ LOCAL_PATH := $(call my-dir) arm64_asm_printer_TBLGEN_TABLES := \ AArch64GenAsmWriter.inc \ + AArch64GenAsmWriter1.inc \ AArch64GenRegisterInfo.inc \ AArch64GenSubtargetInfo.inc \ AArch64GenInstrInfo.inc diff --git a/lib/Target/AArch64/InstPrinter/CMakeLists.txt b/lib/Target/AArch64/InstPrinter/CMakeLists.txt index 3db56e4..363f502 100644 --- a/lib/Target/AArch64/InstPrinter/CMakeLists.txt +++ b/lib/Target/AArch64/InstPrinter/CMakeLists.txt @@ -1,3 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + add_llvm_library(LLVMAArch64AsmPrinter AArch64InstPrinter.cpp ) + +add_dependencies(LLVMAArch64AsmPrinter AArch64CommonTableGen) diff --git a/lib/Target/AArch64/InstPrinter/LLVMBuild.txt b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt index 4836c7c..a13e842 100644 --- a/lib/Target/AArch64/InstPrinter/LLVMBuild.txt +++ b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/AArch64/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===; +;===- ./lib/Target/AArch64/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; diff --git a/lib/Target/AArch64/InstPrinter/Makefile b/lib/Target/AArch64/InstPrinter/Makefile index 1c36a8d..b17e8d0 100644 --- a/lib/Target/AArch64/InstPrinter/Makefile +++ b/lib/Target/AArch64/InstPrinter/Makefile @@ -9,7 +9,7 @@ LEVEL = ../../../.. LIBRARYNAME = LLVMAArch64AsmPrinter -# Hack: we need to include 'main' target directory to grab private headers +# Hack: we need to include 'main' arm target directory to grab private headers CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. include $(LEVEL)/Makefile.common diff --git a/lib/Target/AArch64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt index 4c8f101..642c183 100644 --- a/lib/Target/AArch64/LLVMBuild.txt +++ b/lib/Target/AArch64/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/AArch64/LLVMBuild.txt -----------------------*- Conf -*--===; +;===- ./lib/Target/AArch64/LLVMBuild.txt -------------------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -31,5 +31,5 @@ has_jit = 1 type = Library name = AArch64CodeGen parent = AArch64 -required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AArch64Utils AsmPrinter CodeGen Core MC SelectionDAG Support Target +required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AArch64Utils Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target add_to_library_groups = AArch64 diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h new file mode 100644 index 0000000..8b1e44e --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h @@ -0,0 +1,738 @@ +//===- AArch64AddressingModes.h - AArch64 Addressing Modes ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the AArch64 addressing mode implementation stuff. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_AArch64_AArch64ADDRESSINGMODES_H +#define LLVM_TARGET_AArch64_AArch64ADDRESSINGMODES_H + +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include + +namespace llvm { + +/// AArch64_AM - AArch64 Addressing Mode Stuff +namespace AArch64_AM { + +//===----------------------------------------------------------------------===// +// Shifts +// + +enum ShiftExtendType { + InvalidShiftExtend = -1, + LSL = 0, + LSR, + ASR, + ROR, + MSL, + + UXTB, + UXTH, + UXTW, + UXTX, + + SXTB, + SXTH, + SXTW, + SXTX, +}; + +/// getShiftName - Get the string encoding for the shift type. +static inline const char *getShiftExtendName(AArch64_AM::ShiftExtendType ST) { + switch (ST) { + default: assert(false && "unhandled shift type!"); + case AArch64_AM::LSL: return "lsl"; + case AArch64_AM::LSR: return "lsr"; + case AArch64_AM::ASR: return "asr"; + case AArch64_AM::ROR: return "ror"; + case AArch64_AM::MSL: return "msl"; + case AArch64_AM::UXTB: return "uxtb"; + case AArch64_AM::UXTH: return "uxth"; + case AArch64_AM::UXTW: return "uxtw"; + case AArch64_AM::UXTX: return "uxtx"; + case AArch64_AM::SXTB: return "sxtb"; + case AArch64_AM::SXTH: return "sxth"; + case AArch64_AM::SXTW: return "sxtw"; + case AArch64_AM::SXTX: return "sxtx"; + } + return nullptr; +} + +/// getShiftType - Extract the shift type. +static inline AArch64_AM::ShiftExtendType getShiftType(unsigned Imm) { + switch ((Imm >> 6) & 0x7) { + default: return AArch64_AM::InvalidShiftExtend; + case 0: return AArch64_AM::LSL; + case 1: return AArch64_AM::LSR; + case 2: return AArch64_AM::ASR; + case 3: return AArch64_AM::ROR; + case 4: return AArch64_AM::MSL; + } +} + +/// getShiftValue - Extract the shift value. +static inline unsigned getShiftValue(unsigned Imm) { + return Imm & 0x3f; +} + +/// getShifterImm - Encode the shift type and amount: +/// imm: 6-bit shift amount +/// shifter: 000 ==> lsl +/// 001 ==> lsr +/// 010 ==> asr +/// 011 ==> ror +/// 100 ==> msl +/// {8-6} = shifter +/// {5-0} = imm +static inline unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, + unsigned Imm) { + assert((Imm & 0x3f) == Imm && "Illegal shifted immedate value!"); + unsigned STEnc = 0; + switch (ST) { + default: llvm_unreachable("Invalid shift requested"); + case AArch64_AM::LSL: STEnc = 0; break; + case AArch64_AM::LSR: STEnc = 1; break; + case AArch64_AM::ASR: STEnc = 2; break; + case AArch64_AM::ROR: STEnc = 3; break; + case AArch64_AM::MSL: STEnc = 4; break; + } + return (STEnc << 6) | (Imm & 0x3f); +} + +//===----------------------------------------------------------------------===// +// Extends +// + +/// getArithShiftValue - get the arithmetic shift value. +static inline unsigned getArithShiftValue(unsigned Imm) { + return Imm & 0x7; +} + +/// getExtendType - Extract the extend type for operands of arithmetic ops. +static inline AArch64_AM::ShiftExtendType getExtendType(unsigned Imm) { + assert((Imm & 0x7) == Imm && "invalid immediate!"); + switch (Imm) { + default: llvm_unreachable("Compiler bug!"); + case 0: return AArch64_AM::UXTB; + case 1: return AArch64_AM::UXTH; + case 2: return AArch64_AM::UXTW; + case 3: return AArch64_AM::UXTX; + case 4: return AArch64_AM::SXTB; + case 5: return AArch64_AM::SXTH; + case 6: return AArch64_AM::SXTW; + case 7: return AArch64_AM::SXTX; + } +} + +static inline AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm) { + return getExtendType((Imm >> 3) & 0x7); +} + +/// Mapping from extend bits to required operation: +/// shifter: 000 ==> uxtb +/// 001 ==> uxth +/// 010 ==> uxtw +/// 011 ==> uxtx +/// 100 ==> sxtb +/// 101 ==> sxth +/// 110 ==> sxtw +/// 111 ==> sxtx +inline unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET) { + switch (ET) { + default: llvm_unreachable("Invalid extend type requested"); + case AArch64_AM::UXTB: return 0; break; + case AArch64_AM::UXTH: return 1; break; + case AArch64_AM::UXTW: return 2; break; + case AArch64_AM::UXTX: return 3; break; + case AArch64_AM::SXTB: return 4; break; + case AArch64_AM::SXTH: return 5; break; + case AArch64_AM::SXTW: return 6; break; + case AArch64_AM::SXTX: return 7; break; + } +} + +/// getArithExtendImm - Encode the extend type and shift amount for an +/// arithmetic instruction: +/// imm: 3-bit extend amount +/// {5-3} = shifter +/// {2-0} = imm3 +static inline unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, + unsigned Imm) { + assert((Imm & 0x7) == Imm && "Illegal shifted immedate value!"); + return (getExtendEncoding(ET) << 3) | (Imm & 0x7); +} + +/// getMemDoShift - Extract the "do shift" flag value for load/store +/// instructions. +static inline bool getMemDoShift(unsigned Imm) { + return (Imm & 0x1) != 0; +} + +/// getExtendType - Extract the extend type for the offset operand of +/// loads/stores. +static inline AArch64_AM::ShiftExtendType getMemExtendType(unsigned Imm) { + return getExtendType((Imm >> 1) & 0x7); +} + +/// getExtendImm - Encode the extend type and amount for a load/store inst: +/// doshift: should the offset be scaled by the access size +/// shifter: 000 ==> uxtb +/// 001 ==> uxth +/// 010 ==> uxtw +/// 011 ==> uxtx +/// 100 ==> sxtb +/// 101 ==> sxth +/// 110 ==> sxtw +/// 111 ==> sxtx +/// {3-1} = shifter +/// {0} = doshift +static inline unsigned getMemExtendImm(AArch64_AM::ShiftExtendType ET, + bool DoShift) { + return (getExtendEncoding(ET) << 1) | unsigned(DoShift); +} + +static inline uint64_t ror(uint64_t elt, unsigned size) { + return ((elt & 1) << (size-1)) | (elt >> 1); +} + +/// processLogicalImmediate - Determine if an immediate value can be encoded +/// as the immediate operand of a logical instruction for the given register +/// size. If so, return true with "encoding" set to the encoded value in +/// the form N:immr:imms. +static inline bool processLogicalImmediate(uint64_t imm, unsigned regSize, + uint64_t &encoding) { + if (imm == 0ULL || imm == ~0ULL || + (regSize != 64 && (imm >> regSize != 0 || imm == ~0U))) + return false; + + unsigned size = 2; + uint64_t eltVal = imm; + + // First, determine the element size. + while (size < regSize) { + unsigned numElts = regSize / size; + unsigned mask = (1ULL << size) - 1; + uint64_t lowestEltVal = imm & mask; + + bool allMatched = true; + for (unsigned i = 1; i < numElts; ++i) { + uint64_t currEltVal = (imm >> (i*size)) & mask; + if (currEltVal != lowestEltVal) { + allMatched = false; + break; + } + } + + if (allMatched) { + eltVal = lowestEltVal; + break; + } + + size *= 2; + } + + // Second, determine the rotation to make the element be: 0^m 1^n. + for (unsigned i = 0; i < size; ++i) { + eltVal = ror(eltVal, size); + uint32_t clz = countLeadingZeros(eltVal) - (64 - size); + uint32_t cto = CountTrailingOnes_64(eltVal); + + if (clz + cto == size) { + // Encode in immr the number of RORs it would take to get *from* this + // element value to our target value, where i+1 is the number of RORs + // to go the opposite direction. + unsigned immr = size - (i + 1); + + // If size has a 1 in the n'th bit, create a value that has zeroes in + // bits [0, n] and ones above that. + uint64_t nimms = ~(size-1) << 1; + + // Or the CTO value into the low bits, which must be below the Nth bit + // bit mentioned above. + nimms |= (cto-1); + + // Extract the seventh bit and toggle it to create the N field. + unsigned N = ((nimms >> 6) & 1) ^ 1; + + encoding = (N << 12) | (immr << 6) | (nimms & 0x3f); + return true; + } + } + + return false; +} + +/// isLogicalImmediate - Return true if the immediate is valid for a logical +/// immediate instruction of the given register size. Return false otherwise. +static inline bool isLogicalImmediate(uint64_t imm, unsigned regSize) { + uint64_t encoding; + return processLogicalImmediate(imm, regSize, encoding); +} + +/// encodeLogicalImmediate - Return the encoded immediate value for a logical +/// immediate instruction of the given register size. +static inline uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize) { + uint64_t encoding = 0; + bool res = processLogicalImmediate(imm, regSize, encoding); + assert(res && "invalid logical immediate"); + (void)res; + return encoding; +} + +/// decodeLogicalImmediate - Decode a logical immediate value in the form +/// "N:immr:imms" (where the immr and imms fields are each 6 bits) into the +/// integer value it represents with regSize bits. +static inline uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize) { + // Extract the N, imms, and immr fields. + unsigned N = (val >> 12) & 1; + unsigned immr = (val >> 6) & 0x3f; + unsigned imms = val & 0x3f; + + assert((regSize == 64 || N == 0) && "undefined logical immediate encoding"); + int len = 31 - countLeadingZeros((N << 6) | (~imms & 0x3f)); + assert(len >= 0 && "undefined logical immediate encoding"); + unsigned size = (1 << len); + unsigned R = immr & (size - 1); + unsigned S = imms & (size - 1); + assert(S != size - 1 && "undefined logical immediate encoding"); + uint64_t pattern = (1ULL << (S + 1)) - 1; + for (unsigned i = 0; i < R; ++i) + pattern = ror(pattern, size); + + // Replicate the pattern to fill the regSize. + while (size != regSize) { + pattern |= (pattern << size); + size *= 2; + } + return pattern; +} + +/// isValidDecodeLogicalImmediate - Check to see if the logical immediate value +/// in the form "N:immr:imms" (where the immr and imms fields are each 6 bits) +/// is a valid encoding for an integer value with regSize bits. +static inline bool isValidDecodeLogicalImmediate(uint64_t val, + unsigned regSize) { + // Extract the N and imms fields needed for checking. + unsigned N = (val >> 12) & 1; + unsigned imms = val & 0x3f; + + if (regSize == 32 && N != 0) // undefined logical immediate encoding + return false; + int len = 31 - countLeadingZeros((N << 6) | (~imms & 0x3f)); + if (len < 0) // undefined logical immediate encoding + return false; + unsigned size = (1 << len); + unsigned S = imms & (size - 1); + if (S == size - 1) // undefined logical immediate encoding + return false; + + return true; +} + +//===----------------------------------------------------------------------===// +// Floating-point Immediates +// +static inline float getFPImmFloat(unsigned Imm) { + // We expect an 8-bit binary encoding of a floating-point number here. + union { + uint32_t I; + float F; + } FPUnion; + + uint8_t Sign = (Imm >> 7) & 0x1; + uint8_t Exp = (Imm >> 4) & 0x7; + uint8_t Mantissa = Imm & 0xf; + + // 8-bit FP iEEEE Float Encoding + // abcd efgh aBbbbbbc defgh000 00000000 00000000 + // + // where B = NOT(b); + + FPUnion.I = 0; + FPUnion.I |= Sign << 31; + FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30; + FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25; + FPUnion.I |= (Exp & 0x3) << 23; + FPUnion.I |= Mantissa << 19; + return FPUnion.F; +} + +/// getFP32Imm - Return an 8-bit floating-point version of the 32-bit +/// floating-point value. If the value cannot be represented as an 8-bit +/// floating-point value, then return -1. +static inline int getFP32Imm(const APInt &Imm) { + uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; + int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 + int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits + + // We can handle 4 bits of mantissa. + // mantissa = (16+UInt(e:f:g:h))/16. + if (Mantissa & 0x7ffff) + return -1; + Mantissa >>= 19; + if ((Mantissa & 0xf) != Mantissa) + return -1; + + // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 + if (Exp < -3 || Exp > 4) + return -1; + Exp = ((Exp+3) & 0x7) ^ 4; + + return ((int)Sign << 7) | (Exp << 4) | Mantissa; +} + +static inline int getFP32Imm(const APFloat &FPImm) { + return getFP32Imm(FPImm.bitcastToAPInt()); +} + +/// getFP64Imm - Return an 8-bit floating-point version of the 64-bit +/// floating-point value. If the value cannot be represented as an 8-bit +/// floating-point value, then return -1. +static inline int getFP64Imm(const APInt &Imm) { + uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; + int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 + uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL; + + // We can handle 4 bits of mantissa. + // mantissa = (16+UInt(e:f:g:h))/16. + if (Mantissa & 0xffffffffffffULL) + return -1; + Mantissa >>= 48; + if ((Mantissa & 0xf) != Mantissa) + return -1; + + // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 + if (Exp < -3 || Exp > 4) + return -1; + Exp = ((Exp+3) & 0x7) ^ 4; + + return ((int)Sign << 7) | (Exp << 4) | Mantissa; +} + +static inline int getFP64Imm(const APFloat &FPImm) { + return getFP64Imm(FPImm.bitcastToAPInt()); +} + +//===--------------------------------------------------------------------===// +// AdvSIMD Modified Immediates +//===--------------------------------------------------------------------===// + +// 0x00 0x00 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh +static inline bool isAdvSIMDModImmType1(uint64_t Imm) { + return ((Imm >> 32) == (Imm & 0xffffffffULL)) && + ((Imm & 0xffffff00ffffff00ULL) == 0); +} + +static inline uint8_t encodeAdvSIMDModImmType1(uint64_t Imm) { + return (Imm & 0xffULL); +} + +static inline uint64_t decodeAdvSIMDModImmType1(uint8_t Imm) { + uint64_t EncVal = Imm; + return (EncVal << 32) | EncVal; +} + +// 0x00 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh 0x00 +static inline bool isAdvSIMDModImmType2(uint64_t Imm) { + return ((Imm >> 32) == (Imm & 0xffffffffULL)) && + ((Imm & 0xffff00ffffff00ffULL) == 0); +} + +static inline uint8_t encodeAdvSIMDModImmType2(uint64_t Imm) { + return (Imm & 0xff00ULL) >> 8; +} + +static inline uint64_t decodeAdvSIMDModImmType2(uint8_t Imm) { + uint64_t EncVal = Imm; + return (EncVal << 40) | (EncVal << 8); +} + +// 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh 0x00 0x00 +static inline bool isAdvSIMDModImmType3(uint64_t Imm) { + return ((Imm >> 32) == (Imm & 0xffffffffULL)) && + ((Imm & 0xff00ffffff00ffffULL) == 0); +} + +static inline uint8_t encodeAdvSIMDModImmType3(uint64_t Imm) { + return (Imm & 0xff0000ULL) >> 16; +} + +static inline uint64_t decodeAdvSIMDModImmType3(uint8_t Imm) { + uint64_t EncVal = Imm; + return (EncVal << 48) | (EncVal << 16); +} + +// abcdefgh 0x00 0x00 0x00 abcdefgh 0x00 0x00 0x00 +static inline bool isAdvSIMDModImmType4(uint64_t Imm) { + return ((Imm >> 32) == (Imm & 0xffffffffULL)) && + ((Imm & 0x00ffffff00ffffffULL) == 0); +} + +static inline uint8_t encodeAdvSIMDModImmType4(uint64_t Imm) { + return (Imm & 0xff000000ULL) >> 24; +} + +static inline uint64_t decodeAdvSIMDModImmType4(uint8_t Imm) { + uint64_t EncVal = Imm; + return (EncVal << 56) | (EncVal << 24); +} + +// 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh +static inline bool isAdvSIMDModImmType5(uint64_t Imm) { + return ((Imm >> 32) == (Imm & 0xffffffffULL)) && + (((Imm & 0x00ff0000ULL) >> 16) == (Imm & 0x000000ffULL)) && + ((Imm & 0xff00ff00ff00ff00ULL) == 0); +} + +static inline uint8_t encodeAdvSIMDModImmType5(uint64_t Imm) { + return (Imm & 0xffULL); +} + +static inline uint64_t decodeAdvSIMDModImmType5(uint8_t Imm) { + uint64_t EncVal = Imm; + return (EncVal << 48) | (EncVal << 32) | (EncVal << 16) | EncVal; +} + +// abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00 +static inline bool isAdvSIMDModImmType6(uint64_t Imm) { + return ((Imm >> 32) == (Imm & 0xffffffffULL)) && + (((Imm & 0xff000000ULL) >> 16) == (Imm & 0x0000ff00ULL)) && + ((Imm & 0x00ff00ff00ff00ffULL) == 0); +} + +static inline uint8_t encodeAdvSIMDModImmType6(uint64_t Imm) { + return (Imm & 0xff00ULL) >> 8; +} + +static inline uint64_t decodeAdvSIMDModImmType6(uint8_t Imm) { + uint64_t EncVal = Imm; + return (EncVal << 56) | (EncVal << 40) | (EncVal << 24) | (EncVal << 8); +} + +// 0x00 0x00 abcdefgh 0xFF 0x00 0x00 abcdefgh 0xFF +static inline bool isAdvSIMDModImmType7(uint64_t Imm) { + return ((Imm >> 32) == (Imm & 0xffffffffULL)) && + ((Imm & 0xffff00ffffff00ffULL) == 0x000000ff000000ffULL); +} + +static inline uint8_t encodeAdvSIMDModImmType7(uint64_t Imm) { + return (Imm & 0xff00ULL) >> 8; +} + +static inline uint64_t decodeAdvSIMDModImmType7(uint8_t Imm) { + uint64_t EncVal = Imm; + return (EncVal << 40) | (EncVal << 8) | 0x000000ff000000ffULL; +} + +// 0x00 abcdefgh 0xFF 0xFF 0x00 abcdefgh 0xFF 0xFF +static inline bool isAdvSIMDModImmType8(uint64_t Imm) { + return ((Imm >> 32) == (Imm & 0xffffffffULL)) && + ((Imm & 0xff00ffffff00ffffULL) == 0x0000ffff0000ffffULL); +} + +static inline uint64_t decodeAdvSIMDModImmType8(uint8_t Imm) { + uint64_t EncVal = Imm; + return (EncVal << 48) | (EncVal << 16) | 0x0000ffff0000ffffULL; +} + +static inline uint8_t encodeAdvSIMDModImmType8(uint64_t Imm) { + return (Imm & 0x00ff0000ULL) >> 16; +} + +// abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh +static inline bool isAdvSIMDModImmType9(uint64_t Imm) { + return ((Imm >> 32) == (Imm & 0xffffffffULL)) && + ((Imm >> 48) == (Imm & 0x0000ffffULL)) && + ((Imm >> 56) == (Imm & 0x000000ffULL)); +} + +static inline uint8_t encodeAdvSIMDModImmType9(uint64_t Imm) { + return (Imm & 0xffULL); +} + +static inline uint64_t decodeAdvSIMDModImmType9(uint8_t Imm) { + uint64_t EncVal = Imm; + EncVal |= (EncVal << 8); + EncVal |= (EncVal << 16); + EncVal |= (EncVal << 32); + return EncVal; +} + +// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh +// cmode: 1110, op: 1 +static inline bool isAdvSIMDModImmType10(uint64_t Imm) { + uint64_t ByteA = Imm & 0xff00000000000000ULL; + uint64_t ByteB = Imm & 0x00ff000000000000ULL; + uint64_t ByteC = Imm & 0x0000ff0000000000ULL; + uint64_t ByteD = Imm & 0x000000ff00000000ULL; + uint64_t ByteE = Imm & 0x00000000ff000000ULL; + uint64_t ByteF = Imm & 0x0000000000ff0000ULL; + uint64_t ByteG = Imm & 0x000000000000ff00ULL; + uint64_t ByteH = Imm & 0x00000000000000ffULL; + + return (ByteA == 0ULL || ByteA == 0xff00000000000000ULL) && + (ByteB == 0ULL || ByteB == 0x00ff000000000000ULL) && + (ByteC == 0ULL || ByteC == 0x0000ff0000000000ULL) && + (ByteD == 0ULL || ByteD == 0x000000ff00000000ULL) && + (ByteE == 0ULL || ByteE == 0x00000000ff000000ULL) && + (ByteF == 0ULL || ByteF == 0x0000000000ff0000ULL) && + (ByteG == 0ULL || ByteG == 0x000000000000ff00ULL) && + (ByteH == 0ULL || ByteH == 0x00000000000000ffULL); +} + +static inline uint8_t encodeAdvSIMDModImmType10(uint64_t Imm) { + uint8_t BitA = (Imm & 0xff00000000000000ULL) != 0; + uint8_t BitB = (Imm & 0x00ff000000000000ULL) != 0; + uint8_t BitC = (Imm & 0x0000ff0000000000ULL) != 0; + uint8_t BitD = (Imm & 0x000000ff00000000ULL) != 0; + uint8_t BitE = (Imm & 0x00000000ff000000ULL) != 0; + uint8_t BitF = (Imm & 0x0000000000ff0000ULL) != 0; + uint8_t BitG = (Imm & 0x000000000000ff00ULL) != 0; + uint8_t BitH = (Imm & 0x00000000000000ffULL) != 0; + + uint8_t EncVal = BitA; + EncVal <<= 1; + EncVal |= BitB; + EncVal <<= 1; + EncVal |= BitC; + EncVal <<= 1; + EncVal |= BitD; + EncVal <<= 1; + EncVal |= BitE; + EncVal <<= 1; + EncVal |= BitF; + EncVal <<= 1; + EncVal |= BitG; + EncVal <<= 1; + EncVal |= BitH; + return EncVal; +} + +static inline uint64_t decodeAdvSIMDModImmType10(uint8_t Imm) { + uint64_t EncVal = 0; + if (Imm & 0x80) EncVal |= 0xff00000000000000ULL; + if (Imm & 0x40) EncVal |= 0x00ff000000000000ULL; + if (Imm & 0x20) EncVal |= 0x0000ff0000000000ULL; + if (Imm & 0x10) EncVal |= 0x000000ff00000000ULL; + if (Imm & 0x08) EncVal |= 0x00000000ff000000ULL; + if (Imm & 0x04) EncVal |= 0x0000000000ff0000ULL; + if (Imm & 0x02) EncVal |= 0x000000000000ff00ULL; + if (Imm & 0x01) EncVal |= 0x00000000000000ffULL; + return EncVal; +} + +// aBbbbbbc defgh000 0x00 0x00 aBbbbbbc defgh000 0x00 0x00 +static inline bool isAdvSIMDModImmType11(uint64_t Imm) { + uint64_t BString = (Imm & 0x7E000000ULL) >> 25; + return ((Imm >> 32) == (Imm & 0xffffffffULL)) && + (BString == 0x1f || BString == 0x20) && + ((Imm & 0x0007ffff0007ffffULL) == 0); +} + +static inline uint8_t encodeAdvSIMDModImmType11(uint64_t Imm) { + uint8_t BitA = (Imm & 0x80000000ULL) != 0; + uint8_t BitB = (Imm & 0x20000000ULL) != 0; + uint8_t BitC = (Imm & 0x01000000ULL) != 0; + uint8_t BitD = (Imm & 0x00800000ULL) != 0; + uint8_t BitE = (Imm & 0x00400000ULL) != 0; + uint8_t BitF = (Imm & 0x00200000ULL) != 0; + uint8_t BitG = (Imm & 0x00100000ULL) != 0; + uint8_t BitH = (Imm & 0x00080000ULL) != 0; + + uint8_t EncVal = BitA; + EncVal <<= 1; + EncVal |= BitB; + EncVal <<= 1; + EncVal |= BitC; + EncVal <<= 1; + EncVal |= BitD; + EncVal <<= 1; + EncVal |= BitE; + EncVal <<= 1; + EncVal |= BitF; + EncVal <<= 1; + EncVal |= BitG; + EncVal <<= 1; + EncVal |= BitH; + return EncVal; +} + +static inline uint64_t decodeAdvSIMDModImmType11(uint8_t Imm) { + uint64_t EncVal = 0; + if (Imm & 0x80) EncVal |= 0x80000000ULL; + if (Imm & 0x40) EncVal |= 0x3e000000ULL; + else EncVal |= 0x40000000ULL; + if (Imm & 0x20) EncVal |= 0x01000000ULL; + if (Imm & 0x10) EncVal |= 0x00800000ULL; + if (Imm & 0x08) EncVal |= 0x00400000ULL; + if (Imm & 0x04) EncVal |= 0x00200000ULL; + if (Imm & 0x02) EncVal |= 0x00100000ULL; + if (Imm & 0x01) EncVal |= 0x00080000ULL; + return (EncVal << 32) | EncVal; +} + +// aBbbbbbb bbcdefgh 0x00 0x00 0x00 0x00 0x00 0x00 +static inline bool isAdvSIMDModImmType12(uint64_t Imm) { + uint64_t BString = (Imm & 0x7fc0000000000000ULL) >> 54; + return ((BString == 0xff || BString == 0x100) && + ((Imm & 0x0000ffffffffffffULL) == 0)); +} + +static inline uint8_t encodeAdvSIMDModImmType12(uint64_t Imm) { + uint8_t BitA = (Imm & 0x8000000000000000ULL) != 0; + uint8_t BitB = (Imm & 0x0040000000000000ULL) != 0; + uint8_t BitC = (Imm & 0x0020000000000000ULL) != 0; + uint8_t BitD = (Imm & 0x0010000000000000ULL) != 0; + uint8_t BitE = (Imm & 0x0008000000000000ULL) != 0; + uint8_t BitF = (Imm & 0x0004000000000000ULL) != 0; + uint8_t BitG = (Imm & 0x0002000000000000ULL) != 0; + uint8_t BitH = (Imm & 0x0001000000000000ULL) != 0; + + uint8_t EncVal = BitA; + EncVal <<= 1; + EncVal |= BitB; + EncVal <<= 1; + EncVal |= BitC; + EncVal <<= 1; + EncVal |= BitD; + EncVal <<= 1; + EncVal |= BitE; + EncVal <<= 1; + EncVal |= BitF; + EncVal <<= 1; + EncVal |= BitG; + EncVal <<= 1; + EncVal |= BitH; + return EncVal; +} + +static inline uint64_t decodeAdvSIMDModImmType12(uint8_t Imm) { + uint64_t EncVal = 0; + if (Imm & 0x80) EncVal |= 0x8000000000000000ULL; + if (Imm & 0x40) EncVal |= 0x3fc0000000000000ULL; + else EncVal |= 0x4000000000000000ULL; + if (Imm & 0x20) EncVal |= 0x0020000000000000ULL; + if (Imm & 0x10) EncVal |= 0x0010000000000000ULL; + if (Imm & 0x08) EncVal |= 0x0008000000000000ULL; + if (Imm & 0x04) EncVal |= 0x0004000000000000ULL; + if (Imm & 0x02) EncVal |= 0x0002000000000000ULL; + if (Imm & 0x01) EncVal |= 0x0001000000000000ULL; + return (EncVal << 32) | EncVal; +} + +} // end namespace AArch64_AM + +} // end namespace llvm + +#endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index f1452ab..d8900d4 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -6,168 +6,57 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file contains the AArch64 implementation of the MCAsmBackend class, -// which is principally concerned with relaxation of the various fixup kinds. -// -//===----------------------------------------------------------------------===// +#include "AArch64.h" +#include "AArch64RegisterInfo.h" #include "MCTargetDesc/AArch64FixupKinds.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/ADT/Triple.h" #include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ELF.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/MachO.h" using namespace llvm; namespace { -class AArch64AsmBackend : public MCAsmBackend { - const MCSubtargetInfo* STI; -public: - AArch64AsmBackend(const Target &T, const StringRef TT) - : MCAsmBackend(), - STI(AArch64_MC::createAArch64MCSubtargetInfo(TT, "", "")) - {} - - - ~AArch64AsmBackend() { - delete STI; - } - - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const; - - virtual void processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved); -}; -} // end anonymous namespace - -void AArch64AsmBackend::processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, - const MCFragment *DF, - const MCValue &Target, - uint64_t &Value, bool &IsResolved) { - // The ADRP instruction adds some multiple of 0x1000 to the current PC & - // ~0xfff. This means that the required offset to reach a symbol can vary by - // up to one step depending on where the ADRP is in memory. For example: - // - // ADRP x0, there - // there: - // - // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and - // we'll need that as an offset. At any other address "there" will be in the - // same page as the ADRP and the instruction should encode 0x0. Assuming the - // section isn't 0x1000-aligned, we therefore need to delegate this decision - // to the linker -- a relocation! - if ((uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_page || - (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_got_page || - (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_gottprel_page || - (uint32_t)Fixup.getKind() == AArch64::fixup_a64_tlsdesc_adr_page) - IsResolved = false; -} - -static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value); - -namespace { +class AArch64AsmBackend : public MCAsmBackend { + static const unsigned PCRelFlagVal = + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits | MCFixupKindInfo::FKF_IsPCRel; -class ELFAArch64AsmBackend : public AArch64AsmBackend { - uint8_t OSABI; - bool IsLittle; // Big or little endian public: - ELFAArch64AsmBackend(const Target &T, const StringRef TT, - uint8_t _OSABI, bool isLittle) - : AArch64AsmBackend(T, TT), OSABI(_OSABI), IsLittle(isLittle) { } + AArch64AsmBackend(const Target &T) : MCAsmBackend() {} - bool fixupNeedsRelaxation(const MCFixup &Fixup, - uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const; - - unsigned int getNumFixupKinds() const { + unsigned getNumFixupKinds() const override { return AArch64::NumTargetFixupKinds; } - const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = { -// This table *must* be in the order that the fixup_* kinds are defined in -// AArch64FixupKinds.h. -// -// Name Offset (bits) Size (bits) Flags -{ "fixup_a64_ld_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_adr_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_adr_prel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_add_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst8_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst16_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst32_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst64_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst128_lo12", 0, 32, 0 }, -{ "fixup_a64_tstbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_condbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_uncondbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_call", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_movw_uabs_g0", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g0_nc", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g1", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g1_nc", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g2", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g2_nc", 0, 32, 0 }, -{ "fixup_a64_movw_uabs_g3", 0, 32, 0 }, -{ "fixup_a64_movw_sabs_g0", 0, 32, 0 }, -{ "fixup_a64_movw_sabs_g1", 0, 32, 0 }, -{ "fixup_a64_movw_sabs_g2", 0, 32, 0 }, -{ "fixup_a64_adr_prel_got_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_ld64_got_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_movw_dtprel_g2", 0, 32, 0 }, -{ "fixup_a64_movw_dtprel_g1", 0, 32, 0 }, -{ "fixup_a64_movw_dtprel_g1_nc", 0, 32, 0 }, -{ "fixup_a64_movw_dtprel_g0", 0, 32, 0 }, -{ "fixup_a64_movw_dtprel_g0_nc", 0, 32, 0 }, -{ "fixup_a64_add_dtprel_hi12", 0, 32, 0 }, -{ "fixup_a64_add_dtprel_lo12", 0, 32, 0 }, -{ "fixup_a64_add_dtprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst8_dtprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst8_dtprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst16_dtprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst16_dtprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst32_dtprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst32_dtprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst64_dtprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst64_dtprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_movw_gottprel_g1", 0, 32, 0 }, -{ "fixup_a64_movw_gottprel_g0_nc", 0, 32, 0 }, -{ "fixup_a64_adr_gottprel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_ld64_gottprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ld_gottprel_prel19", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_movw_tprel_g2", 0, 32, 0 }, -{ "fixup_a64_movw_tprel_g1", 0, 32, 0 }, -{ "fixup_a64_movw_tprel_g1_nc", 0, 32, 0 }, -{ "fixup_a64_movw_tprel_g0", 0, 32, 0 }, -{ "fixup_a64_movw_tprel_g0_nc", 0, 32, 0 }, -{ "fixup_a64_add_tprel_hi12", 0, 32, 0 }, -{ "fixup_a64_add_tprel_lo12", 0, 32, 0 }, -{ "fixup_a64_add_tprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst8_tprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst8_tprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst16_tprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst16_tprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst32_tprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst32_tprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_ldst64_tprel_lo12", 0, 32, 0 }, -{ "fixup_a64_ldst64_tprel_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_tlsdesc_adr_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_a64_tlsdesc_ld64_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_tlsdesc_add_lo12_nc", 0, 32, 0 }, -{ "fixup_a64_tlsdesc_call", 0, 0, 0 } + // This table *must* be in the order that the fixup_* kinds are defined in + // AArch64FixupKinds.h. + // + // Name Offset (bits) Size (bits) Flags + { "fixup_aarch64_pcrel_adr_imm21", 0, 32, PCRelFlagVal }, + { "fixup_aarch64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal }, + { "fixup_aarch64_add_imm12", 10, 12, 0 }, + { "fixup_aarch64_ldst_imm12_scale1", 10, 12, 0 }, + { "fixup_aarch64_ldst_imm12_scale2", 10, 12, 0 }, + { "fixup_aarch64_ldst_imm12_scale4", 10, 12, 0 }, + { "fixup_aarch64_ldst_imm12_scale8", 10, 12, 0 }, + { "fixup_aarch64_ldst_imm12_scale16", 10, 12, 0 }, + { "fixup_aarch64_ldr_pcrel_imm19", 5, 19, PCRelFlagVal }, + { "fixup_aarch64_movw", 5, 16, 0 }, + { "fixup_aarch64_pcrel_branch14", 5, 14, PCRelFlagVal }, + { "fixup_aarch64_pcrel_branch19", 5, 19, PCRelFlagVal }, + { "fixup_aarch64_pcrel_branch26", 0, 26, PCRelFlagVal }, + { "fixup_aarch64_pcrel_call26", 0, 26, PCRelFlagVal }, + { "fixup_aarch64_tlsdesc_call", 0, 0, 0 } }; + if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); @@ -177,417 +66,501 @@ public: } void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel) const { - unsigned NumBytes = getFixupKindInfo(Fixup.getKind()).TargetSize / 8; - Value = adjustFixupValue(Fixup.getKind(), Value); - if (!Value) return; // Doesn't change encoding. - - unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); - - // For each byte of the fragment that the fixup touches, mask in the bits - // from the fixup value. - for (unsigned i = 0; i != NumBytes; ++i) { - Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); - } - } + uint64_t Value, bool IsPCRel) const override; - bool mayNeedRelaxation(const MCInst&) const { - return false; - } + bool mayNeedRelaxation(const MCInst &Inst) const override; + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override; + void relaxInstruction(const MCInst &Inst, MCInst &Res) const override; + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; - void relaxInstruction(const MCInst&, llvm::MCInst&) const { - llvm_unreachable("Cannot relax instructions"); - } + void HandleAssemblerFlag(MCAssemblerFlag Flag) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createAArch64ELFObjectWriter(OS, OSABI, IsLittle); - } + unsigned getPointerSize() const { return 8; } }; } // end anonymous namespace -bool -ELFAArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, - uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const { - // Correct for now. With all instructions 32-bit only very low-level - // considerations could make you select something which may fail. - return false; -} +/// \brief The number of bytes the fixup may change. +static unsigned getFixupKindNumBytes(unsigned Kind) { + switch (Kind) { + default: + assert(0 && "Unknown fixup kind!"); + case AArch64::fixup_aarch64_tlsdesc_call: + return 0; -bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { - // Can't emit NOP with size not multiple of 32-bits - if (Count % 4 != 0) - return false; + case FK_Data_1: + return 1; - uint64_t NumNops = Count / 4; - for (uint64_t i = 0; i != NumNops; ++i) - OW->Write32(0xd503201f); + case FK_Data_2: + case AArch64::fixup_aarch64_movw: + return 2; + + case AArch64::fixup_aarch64_pcrel_branch14: + case AArch64::fixup_aarch64_add_imm12: + case AArch64::fixup_aarch64_ldst_imm12_scale1: + case AArch64::fixup_aarch64_ldst_imm12_scale2: + case AArch64::fixup_aarch64_ldst_imm12_scale4: + case AArch64::fixup_aarch64_ldst_imm12_scale8: + case AArch64::fixup_aarch64_ldst_imm12_scale16: + case AArch64::fixup_aarch64_ldr_pcrel_imm19: + case AArch64::fixup_aarch64_pcrel_branch19: + return 3; + + case AArch64::fixup_aarch64_pcrel_adr_imm21: + case AArch64::fixup_aarch64_pcrel_adrp_imm21: + case AArch64::fixup_aarch64_pcrel_branch26: + case AArch64::fixup_aarch64_pcrel_call26: + case FK_Data_4: + return 4; - return true; + case FK_Data_8: + return 8; + } } -static unsigned ADRImmBits(unsigned Value) { +static unsigned AdrImmBits(unsigned Value) { unsigned lo2 = Value & 0x3; - unsigned hi19 = (Value & 0x1fffff) >> 2; - + unsigned hi19 = (Value & 0x1ffffc) >> 2; return (hi19 << 5) | (lo2 << 29); } static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { + int64_t SignedValue = static_cast(Value); switch (Kind) { default: - llvm_unreachable("Unknown fixup kind!"); - case FK_Data_2: - assert((int64_t)Value >= -32768 && - (int64_t)Value <= 65536 && - "Out of range ABS16 fixup"); + assert(false && "Unknown fixup kind!"); + case AArch64::fixup_aarch64_pcrel_adr_imm21: + if (SignedValue > 2097151 || SignedValue < -2097152) + report_fatal_error("fixup value out of range"); + return AdrImmBits(Value & 0x1fffffULL); + case AArch64::fixup_aarch64_pcrel_adrp_imm21: + return AdrImmBits((Value & 0x1fffff000ULL) >> 12); + case AArch64::fixup_aarch64_ldr_pcrel_imm19: + case AArch64::fixup_aarch64_pcrel_branch19: + // Signed 21-bit immediate + if (SignedValue > 2097151 || SignedValue < -2097152) + report_fatal_error("fixup value out of range"); + // Low two bits are not encoded. + return (Value >> 2) & 0x7ffff; + case AArch64::fixup_aarch64_add_imm12: + case AArch64::fixup_aarch64_ldst_imm12_scale1: + // Unsigned 12-bit immediate + if (Value >= 0x1000) + report_fatal_error("invalid imm12 fixup value"); return Value; - case FK_Data_4: - assert((int64_t)Value >= -(1LL << 31) && - (int64_t)Value <= (1LL << 32) - 1 && - "Out of range ABS32 fixup"); + case AArch64::fixup_aarch64_ldst_imm12_scale2: + // Unsigned 12-bit immediate which gets multiplied by 2 + if (Value & 1 || Value >= 0x2000) + report_fatal_error("invalid imm12 fixup value"); + return Value >> 1; + case AArch64::fixup_aarch64_ldst_imm12_scale4: + // Unsigned 12-bit immediate which gets multiplied by 4 + if (Value & 3 || Value >= 0x4000) + report_fatal_error("invalid imm12 fixup value"); + return Value >> 2; + case AArch64::fixup_aarch64_ldst_imm12_scale8: + // Unsigned 12-bit immediate which gets multiplied by 8 + if (Value & 7 || Value >= 0x8000) + report_fatal_error("invalid imm12 fixup value"); + return Value >> 3; + case AArch64::fixup_aarch64_ldst_imm12_scale16: + // Unsigned 12-bit immediate which gets multiplied by 16 + if (Value & 15 || Value >= 0x10000) + report_fatal_error("invalid imm12 fixup value"); + return Value >> 4; + case AArch64::fixup_aarch64_movw: + report_fatal_error("no resolvable MOVZ/MOVK fixups supported yet"); return Value; + case AArch64::fixup_aarch64_pcrel_branch14: + // Signed 16-bit immediate + if (SignedValue > 32767 || SignedValue < -32768) + report_fatal_error("fixup value out of range"); + // Low two bits are not encoded (4-byte alignment assumed). + if (Value & 0x3) + report_fatal_error("fixup not sufficiently aligned"); + return (Value >> 2) & 0x3fff; + case AArch64::fixup_aarch64_pcrel_branch26: + case AArch64::fixup_aarch64_pcrel_call26: + // Signed 28-bit immediate + if (SignedValue > 134217727 || SignedValue < -134217728) + report_fatal_error("fixup value out of range"); + // Low two bits are not encoded (4-byte alignment assumed). + if (Value & 0x3) + report_fatal_error("fixup not sufficiently aligned"); + return (Value >> 2) & 0x3ffffff; + case FK_Data_1: + case FK_Data_2: + case FK_Data_4: case FK_Data_8: return Value; + } +} - case AArch64::fixup_a64_ld_gottprel_prel19: - // R_AARCH64_LD_GOTTPREL_PREL19: Set a load-literal immediate to bits 1F - // FFFC of G(TPREL(S+A)) - P; check -2^20 <= X < 2^20. - case AArch64::fixup_a64_ld_prel: - // R_AARCH64_LD_PREL_LO19: Sets a load-literal (immediate) value to bits - // 1F FFFC of S+A-P, checking that -2^20 <= S+A-P < 2^20. - assert((int64_t)Value >= -(1LL << 20) && - (int64_t)Value < (1LL << 20) && "Out of range LDR (lit) fixup"); - return (Value & 0x1ffffc) << 3; - - case AArch64::fixup_a64_adr_prel: - // R_AARCH64_ADR_PREL_LO21: Sets an ADR immediate value to bits 1F FFFF of - // the result of S+A-P, checking that -2^20 <= S+A-P < 2^20. - assert((int64_t)Value >= -(1LL << 20) && - (int64_t)Value < (1LL << 20) && "Out of range ADR fixup"); - return ADRImmBits(Value & 0x1fffff); - - case AArch64::fixup_a64_adr_prel_page: - // R_AARCH64_ADR_PREL_PG_HI21: Sets an ADRP immediate value to bits 1 FFFF - // F000 of the result of the operation, checking that -2^32 <= result < - // 2^32. - assert((int64_t)Value >= -(1LL << 32) && - (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup"); - return ADRImmBits((Value & 0x1fffff000ULL) >> 12); - - case AArch64::fixup_a64_add_dtprel_hi12: - // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits - // FF F000 of DTPREL(S+A), check 0 <= X < 2^24. - case AArch64::fixup_a64_add_tprel_hi12: - // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits - // FF F000 of TPREL(S+A), check 0 <= X < 2^24. - assert((int64_t)Value >= 0 && - (int64_t)Value < (1LL << 24) && "Out of range ADD fixup"); - return (Value & 0xfff000) >> 2; - - case AArch64::fixup_a64_add_dtprel_lo12: - // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits - // FFF of DTPREL(S+A), check 0 <= X < 2^12. - case AArch64::fixup_a64_add_tprel_lo12: - // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits - // FFF of TPREL(S+A), check 0 <= X < 2^12. - assert((int64_t)Value >= 0 && - (int64_t)Value < (1LL << 12) && "Out of range ADD fixup"); - // ... fallthrough to no-checking versions ... - case AArch64::fixup_a64_add_dtprel_lo12_nc: - // R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC: Set an ADD immediate field to bits - // FFF of DTPREL(S+A) with no overflow check. - case AArch64::fixup_a64_add_tprel_lo12_nc: - // R_AARCH64_TLSLD_ADD_TPREL_LO12_NC: Set an ADD immediate field to bits - // FFF of TPREL(S+A) with no overflow check. - case AArch64::fixup_a64_tlsdesc_add_lo12_nc: - // R_AARCH64_TLSDESC_ADD_LO12_NC: Set an ADD immediate field to bits - // FFF of G(TLSDESC(S+A)), with no overflow check. - case AArch64::fixup_a64_add_lo12: - // R_AARCH64_ADD_ABS_LO12_NC: Sets an ADD immediate value to bits FFF of - // S+A, with no overflow check. - return (Value & 0xfff) << 10; - - case AArch64::fixup_a64_ldst8_dtprel_lo12: - // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF - // of DTPREL(S+A), check 0 <= X < 2^12. - case AArch64::fixup_a64_ldst8_tprel_lo12: - // R_AARCH64_TLSLE_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF - // of DTPREL(S+A), check 0 <= X < 2^12. - assert((int64_t) Value >= 0 && - (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); - // ... fallthrough to no-checking versions ... - case AArch64::fixup_a64_ldst8_dtprel_lo12_nc: - // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF - // of DTPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst8_tprel_lo12_nc: - // R_AARCH64_TLSLD_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF - // of TPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst8_lo12: - // R_AARCH64_LDST8_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFF - // of S+A, with no overflow check. - return (Value & 0xfff) << 10; - - case AArch64::fixup_a64_ldst16_dtprel_lo12: - // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE - // of DTPREL(S+A), check 0 <= X < 2^12. - case AArch64::fixup_a64_ldst16_tprel_lo12: - // R_AARCH64_TLSLE_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE - // of DTPREL(S+A), check 0 <= X < 2^12. - assert((int64_t) Value >= 0 && - (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); - // ... fallthrough to no-checking versions ... - case AArch64::fixup_a64_ldst16_dtprel_lo12_nc: - // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE - // of DTPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst16_tprel_lo12_nc: - // R_AARCH64_TLSLD_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE - // of TPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst16_lo12: - // R_AARCH64_LDST16_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFE - // of S+A, with no overflow check. - return (Value & 0xffe) << 9; - - case AArch64::fixup_a64_ldst32_dtprel_lo12: - // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC - // of DTPREL(S+A), check 0 <= X < 2^12. - case AArch64::fixup_a64_ldst32_tprel_lo12: - // R_AARCH64_TLSLE_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC - // of DTPREL(S+A), check 0 <= X < 2^12. - assert((int64_t) Value >= 0 && - (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); - // ... fallthrough to no-checking versions ... - case AArch64::fixup_a64_ldst32_dtprel_lo12_nc: - // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC - // of DTPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst32_tprel_lo12_nc: - // R_AARCH64_TLSLD_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC - // of TPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst32_lo12: - // R_AARCH64_LDST32_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFC - // of S+A, with no overflow check. - return (Value & 0xffc) << 8; - - case AArch64::fixup_a64_ldst64_dtprel_lo12: - // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8 - // of DTPREL(S+A), check 0 <= X < 2^12. - case AArch64::fixup_a64_ldst64_tprel_lo12: - // R_AARCH64_TLSLE_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8 - // of DTPREL(S+A), check 0 <= X < 2^12. - assert((int64_t) Value >= 0 && - (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup"); - // ... fallthrough to no-checking versions ... - case AArch64::fixup_a64_ldst64_dtprel_lo12_nc: - // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8 - // of DTPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst64_tprel_lo12_nc: - // R_AARCH64_TLSLD_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8 - // of TPREL(S+A), with no overflow check. - case AArch64::fixup_a64_ldst64_lo12: - // R_AARCH64_LDST64_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF8 - // of S+A, with no overflow check. - return (Value & 0xff8) << 7; - - case AArch64::fixup_a64_ldst128_lo12: - // R_AARCH64_LDST128_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF0 - // of S+A, with no overflow check. - return (Value & 0xff0) << 6; - - case AArch64::fixup_a64_movw_uabs_g0: - // R_AARCH64_MOVW_UABS_G0: Sets a MOVZ immediate field to bits FFFF of S+A - // with a check that S+A < 2^16 - assert(Value <= 0xffff && "Out of range move wide fixup"); - return (Value & 0xffff) << 5; - - case AArch64::fixup_a64_movw_dtprel_g0_nc: - // R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC: Sets a MOVK immediate field to bits - // FFFF of DTPREL(S+A) with no overflow check. - case AArch64::fixup_a64_movw_gottprel_g0_nc: - // R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: Sets a MOVK immediate field to bits - // FFFF of G(TPREL(S+A)) - GOT with no overflow check. - case AArch64::fixup_a64_movw_tprel_g0_nc: - // R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: Sets a MOVK immediate field to bits - // FFFF of TPREL(S+A) with no overflow check. - case AArch64::fixup_a64_movw_uabs_g0_nc: - // R_AARCH64_MOVW_UABS_G0_NC: Sets a MOVK immediate field to bits FFFF of - // S+A with no overflow check. - return (Value & 0xffff) << 5; - - case AArch64::fixup_a64_movw_uabs_g1: - // R_AARCH64_MOVW_UABS_G1: Sets a MOVZ immediate field to bits FFFF0000 of - // S+A with a check that S+A < 2^32 - assert(Value <= 0xffffffffull && "Out of range move wide fixup"); - return ((Value >> 16) & 0xffff) << 5; - - case AArch64::fixup_a64_movw_dtprel_g1_nc: - // R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC: Set a MOVK immediate field - // to bits FFFF0000 of DTPREL(S+A), with no overflow check. - case AArch64::fixup_a64_movw_tprel_g1_nc: - // R_AARCH64_TLSLD_MOVW_TPREL_G1_NC: Set a MOVK immediate field - // to bits FFFF0000 of TPREL(S+A), with no overflow check. - case AArch64::fixup_a64_movw_uabs_g1_nc: - // R_AARCH64_MOVW_UABS_G1_NC: Sets a MOVK immediate field to bits - // FFFF0000 of S+A with no overflow check. - return ((Value >> 16) & 0xffff) << 5; - - case AArch64::fixup_a64_movw_uabs_g2: - // R_AARCH64_MOVW_UABS_G2: Sets a MOVZ immediate field to bits FFFF 0000 - // 0000 of S+A with a check that S+A < 2^48 - assert(Value <= 0xffffffffffffull && "Out of range move wide fixup"); - return ((Value >> 32) & 0xffff) << 5; - - case AArch64::fixup_a64_movw_uabs_g2_nc: - // R_AARCH64_MOVW_UABS_G2: Sets a MOVK immediate field to bits FFFF 0000 - // 0000 of S+A with no overflow check. - return ((Value >> 32) & 0xffff) << 5; - - case AArch64::fixup_a64_movw_uabs_g3: - // R_AARCH64_MOVW_UABS_G3: Sets a MOVZ immediate field to bits FFFF 0000 - // 0000 0000 of S+A (no overflow check needed) - return ((Value >> 48) & 0xffff) << 5; - - case AArch64::fixup_a64_movw_dtprel_g0: - // R_AARCH64_TLSLD_MOVW_DTPREL_G0: Set a MOV[NZ] immediate field - // to bits FFFF of DTPREL(S+A). - case AArch64::fixup_a64_movw_tprel_g0: - // R_AARCH64_TLSLE_MOVW_TPREL_G0: Set a MOV[NZ] immediate field to - // bits FFFF of TPREL(S+A). - case AArch64::fixup_a64_movw_sabs_g0: { - // R_AARCH64_MOVW_SABS_G0: Sets MOV[NZ] immediate field using bits FFFF of - // S+A (see notes below); check -2^16 <= S+A < 2^16. (notes say that we - // should convert between MOVN and MOVZ to achieve our goals). - int64_t Signed = Value; - assert(Signed >= -(1LL << 16) && Signed < (1LL << 16) - && "Out of range move wide fixup"); - if (Signed >= 0) { - Value = (Value & 0xffff) << 5; - // Bit 30 converts the MOVN encoding into a MOVZ - Value |= 1 << 30; - } else { - // MCCodeEmitter should have encoded a MOVN, which is fine. - Value = (~Value & 0xffff) << 5; - } - return Value; +void AArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, + unsigned DataSize, uint64_t Value, + bool IsPCRel) const { + unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); + if (!Value) + return; // Doesn't change encoding. + MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind()); + // Apply any target-specific value adjustments. + Value = adjustFixupValue(Fixup.getKind(), Value); + + // Shift the value into position. + Value <<= Info.TargetOffset; + + unsigned Offset = Fixup.getOffset(); + assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + + // For each byte of the fragment that the fixup touches, mask in the + // bits from the fixup value. + for (unsigned i = 0; i != NumBytes; ++i) + Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); +} + +bool AArch64AsmBackend::mayNeedRelaxation(const MCInst &Inst) const { + return false; +} + +bool AArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, + uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const { + // FIXME: This isn't correct for AArch64. Just moving the "generic" logic + // into the targets for now. + // + // Relax if the value is too big for a (signed) i8. + return int64_t(Value) != int64_t(int8_t(Value)); +} + +void AArch64AsmBackend::relaxInstruction(const MCInst &Inst, + MCInst &Res) const { + assert(false && "AArch64AsmBackend::relaxInstruction() unimplemented"); +} + +bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { + // If the count is not 4-byte aligned, we must be writing data into the text + // section (otherwise we have unaligned instructions, and thus have far + // bigger problems), so just write zeros instead. + if ((Count & 3) != 0) { + for (uint64_t i = 0, e = (Count & 3); i != e; ++i) + OW->Write8(0); } - case AArch64::fixup_a64_movw_dtprel_g1: - // R_AARCH64_TLSLD_MOVW_DTPREL_G1: Set a MOV[NZ] immediate field - // to bits FFFF0000 of DTPREL(S+A). - case AArch64::fixup_a64_movw_gottprel_g1: - // R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: Set a MOV[NZ] immediate field - // to bits FFFF0000 of G(TPREL(S+A)) - GOT. - case AArch64::fixup_a64_movw_tprel_g1: - // R_AARCH64_TLSLE_MOVW_TPREL_G1: Set a MOV[NZ] immediate field to - // bits FFFF0000 of TPREL(S+A). - case AArch64::fixup_a64_movw_sabs_g1: { - // R_AARCH64_MOVW_SABS_G1: Sets MOV[NZ] immediate field using bits FFFF 0000 - // of S+A (see notes below); check -2^32 <= S+A < 2^32. (notes say that we - // should convert between MOVN and MOVZ to achieve our goals). - int64_t Signed = Value; - assert(Signed >= -(1LL << 32) && Signed < (1LL << 32) - && "Out of range move wide fixup"); - if (Signed >= 0) { - Value = ((Value >> 16) & 0xffff) << 5; - // Bit 30 converts the MOVN encoding into a MOVZ - Value |= 1 << 30; - } else { - Value = ((~Value >> 16) & 0xffff) << 5; - } - return Value; + // We are properly aligned, so write NOPs as requested. + Count /= 4; + for (uint64_t i = 0; i != Count; ++i) + OW->Write32(0xd503201f); + return true; +} + +namespace { + +namespace CU { + +/// \brief Compact unwind encoding values. +enum CompactUnwindEncodings { + /// \brief A "frameless" leaf function, where no non-volatile registers are + /// saved. The return remains in LR throughout the function. + UNWIND_AArch64_MODE_FRAMELESS = 0x02000000, + + /// \brief No compact unwind encoding available. Instead the low 23-bits of + /// the compact unwind encoding is the offset of the DWARF FDE in the + /// __eh_frame section. This mode is never used in object files. It is only + /// generated by the linker in final linked images, which have only DWARF info + /// for a function. + UNWIND_AArch64_MODE_DWARF = 0x03000000, + + /// \brief This is a standard arm64 prologue where FP/LR are immediately + /// pushed on the stack, then SP is copied to FP. If there are any + /// non-volatile register saved, they are copied into the stack fame in pairs + /// in a contiguous ranger right below the saved FP/LR pair. Any subset of the + /// five X pairs and four D pairs can be saved, but the memory layout must be + /// in register number order. + UNWIND_AArch64_MODE_FRAME = 0x04000000, + + /// \brief Frame register pair encodings. + UNWIND_AArch64_FRAME_X19_X20_PAIR = 0x00000001, + UNWIND_AArch64_FRAME_X21_X22_PAIR = 0x00000002, + UNWIND_AArch64_FRAME_X23_X24_PAIR = 0x00000004, + UNWIND_AArch64_FRAME_X25_X26_PAIR = 0x00000008, + UNWIND_AArch64_FRAME_X27_X28_PAIR = 0x00000010, + UNWIND_AArch64_FRAME_D8_D9_PAIR = 0x00000100, + UNWIND_AArch64_FRAME_D10_D11_PAIR = 0x00000200, + UNWIND_AArch64_FRAME_D12_D13_PAIR = 0x00000400, + UNWIND_AArch64_FRAME_D14_D15_PAIR = 0x00000800 +}; + +} // end CU namespace + +// FIXME: This should be in a separate file. +class DarwinAArch64AsmBackend : public AArch64AsmBackend { + const MCRegisterInfo &MRI; + + /// \brief Encode compact unwind stack adjustment for frameless functions. + /// See UNWIND_AArch64_FRAMELESS_STACK_SIZE_MASK in compact_unwind_encoding.h. + /// The stack size always needs to be 16 byte aligned. + uint32_t encodeStackAdjustment(uint32_t StackSize) const { + return (StackSize / 16) << 12; + } + +public: + DarwinAArch64AsmBackend(const Target &T, const MCRegisterInfo &MRI) + : AArch64AsmBackend(T), MRI(MRI) {} + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + return createAArch64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64, + MachO::CPU_SUBTYPE_ARM64_ALL); + } + + bool doesSectionRequireSymbols(const MCSection &Section) const override { + // Any section for which the linker breaks things into atoms needs to + // preserve symbols, including assembler local symbols, to identify + // those atoms. These sections are: + // Sections of type: + // + // S_CSTRING_LITERALS (e.g. __cstring) + // S_LITERAL_POINTERS (e.g. objc selector pointers) + // S_16BYTE_LITERALS, S_8BYTE_LITERALS, S_4BYTE_LITERALS + // + // Sections named: + // + // __TEXT,__eh_frame + // __TEXT,__ustring + // __DATA,__cfstring + // __DATA,__objc_classrefs + // __DATA,__objc_catlist + // + // FIXME: It would be better if the compiler used actual linker local + // symbols for each of these sections rather than preserving what + // are ostensibly assembler local symbols. + const MCSectionMachO &SMO = static_cast(Section); + return (SMO.getType() == MachO::S_CSTRING_LITERALS || + SMO.getType() == MachO::S_4BYTE_LITERALS || + SMO.getType() == MachO::S_8BYTE_LITERALS || + SMO.getType() == MachO::S_16BYTE_LITERALS || + SMO.getType() == MachO::S_LITERAL_POINTERS || + (SMO.getSegmentName() == "__TEXT" && + (SMO.getSectionName() == "__eh_frame" || + SMO.getSectionName() == "__ustring")) || + (SMO.getSegmentName() == "__DATA" && + (SMO.getSectionName() == "__cfstring" || + SMO.getSectionName() == "__objc_classrefs" || + SMO.getSectionName() == "__objc_catlist"))); } - case AArch64::fixup_a64_movw_dtprel_g2: - // R_AARCH64_TLSLD_MOVW_DTPREL_G2: Set a MOV[NZ] immediate field - // to bits FFFF 0000 0000 of DTPREL(S+A). - case AArch64::fixup_a64_movw_tprel_g2: - // R_AARCH64_TLSLE_MOVW_TPREL_G2: Set a MOV[NZ] immediate field to - // bits FFFF 0000 0000 of TPREL(S+A). - case AArch64::fixup_a64_movw_sabs_g2: { - // R_AARCH64_MOVW_SABS_G2: Sets MOV[NZ] immediate field using bits FFFF 0000 - // 0000 of S+A (see notes below); check -2^48 <= S+A < 2^48. (notes say that - // we should convert between MOVN and MOVZ to achieve our goals). - int64_t Signed = Value; - assert(Signed >= -(1LL << 48) && Signed < (1LL << 48) - && "Out of range move wide fixup"); - if (Signed >= 0) { - Value = ((Value >> 32) & 0xffff) << 5; - // Bit 30 converts the MOVN encoding into a MOVZ - Value |= 1 << 30; - } else { - Value = ((~Value >> 32) & 0xffff) << 5; + /// \brief Generate the compact unwind encoding from the CFI directives. + uint32_t generateCompactUnwindEncoding( + ArrayRef Instrs) const override { + if (Instrs.empty()) + return CU::UNWIND_AArch64_MODE_FRAMELESS; + + bool HasFP = false; + unsigned StackSize = 0; + + uint32_t CompactUnwindEncoding = 0; + for (size_t i = 0, e = Instrs.size(); i != e; ++i) { + const MCCFIInstruction &Inst = Instrs[i]; + + switch (Inst.getOperation()) { + default: + // Cannot handle this directive: bail out. + return CU::UNWIND_AArch64_MODE_DWARF; + case MCCFIInstruction::OpDefCfa: { + // Defines a frame pointer. + assert(getXRegFromWReg(MRI.getLLVMRegNum(Inst.getRegister(), true)) == + AArch64::FP && + "Invalid frame pointer!"); + assert(i + 2 < e && "Insufficient CFI instructions to define a frame!"); + + const MCCFIInstruction &LRPush = Instrs[++i]; + assert(LRPush.getOperation() == MCCFIInstruction::OpOffset && + "Link register not pushed!"); + const MCCFIInstruction &FPPush = Instrs[++i]; + assert(FPPush.getOperation() == MCCFIInstruction::OpOffset && + "Frame pointer not pushed!"); + + unsigned LRReg = MRI.getLLVMRegNum(LRPush.getRegister(), true); + unsigned FPReg = MRI.getLLVMRegNum(FPPush.getRegister(), true); + + LRReg = getXRegFromWReg(LRReg); + FPReg = getXRegFromWReg(FPReg); + + assert(LRReg == AArch64::LR && FPReg == AArch64::FP && + "Pushing invalid registers for frame!"); + + // Indicate that the function has a frame. + CompactUnwindEncoding |= CU::UNWIND_AArch64_MODE_FRAME; + HasFP = true; + break; + } + case MCCFIInstruction::OpDefCfaOffset: { + assert(StackSize == 0 && "We already have the CFA offset!"); + StackSize = std::abs(Inst.getOffset()); + break; + } + case MCCFIInstruction::OpOffset: { + // Registers are saved in pairs. We expect there to be two consecutive + // `.cfi_offset' instructions with the appropriate registers specified. + unsigned Reg1 = MRI.getLLVMRegNum(Inst.getRegister(), true); + if (i + 1 == e) + return CU::UNWIND_AArch64_MODE_DWARF; + + const MCCFIInstruction &Inst2 = Instrs[++i]; + if (Inst2.getOperation() != MCCFIInstruction::OpOffset) + return CU::UNWIND_AArch64_MODE_DWARF; + unsigned Reg2 = MRI.getLLVMRegNum(Inst2.getRegister(), true); + + // N.B. The encodings must be in register number order, and the X + // registers before the D registers. + + // X19/X20 pair = 0x00000001, + // X21/X22 pair = 0x00000002, + // X23/X24 pair = 0x00000004, + // X25/X26 pair = 0x00000008, + // X27/X28 pair = 0x00000010 + Reg1 = getXRegFromWReg(Reg1); + Reg2 = getXRegFromWReg(Reg2); + + if (Reg1 == AArch64::X19 && Reg2 == AArch64::X20 && + (CompactUnwindEncoding & 0xF1E) == 0) + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X19_X20_PAIR; + else if (Reg1 == AArch64::X21 && Reg2 == AArch64::X22 && + (CompactUnwindEncoding & 0xF1C) == 0) + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X21_X22_PAIR; + else if (Reg1 == AArch64::X23 && Reg2 == AArch64::X24 && + (CompactUnwindEncoding & 0xF18) == 0) + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X23_X24_PAIR; + else if (Reg1 == AArch64::X25 && Reg2 == AArch64::X26 && + (CompactUnwindEncoding & 0xF10) == 0) + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X25_X26_PAIR; + else if (Reg1 == AArch64::X27 && Reg2 == AArch64::X28 && + (CompactUnwindEncoding & 0xF00) == 0) + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X27_X28_PAIR; + else { + Reg1 = getDRegFromBReg(Reg1); + Reg2 = getDRegFromBReg(Reg2); + + // D8/D9 pair = 0x00000100, + // D10/D11 pair = 0x00000200, + // D12/D13 pair = 0x00000400, + // D14/D15 pair = 0x00000800 + if (Reg1 == AArch64::D8 && Reg2 == AArch64::D9 && + (CompactUnwindEncoding & 0xE00) == 0) + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_D8_D9_PAIR; + else if (Reg1 == AArch64::D10 && Reg2 == AArch64::D11 && + (CompactUnwindEncoding & 0xC00) == 0) + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_D10_D11_PAIR; + else if (Reg1 == AArch64::D12 && Reg2 == AArch64::D13 && + (CompactUnwindEncoding & 0x800) == 0) + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_D12_D13_PAIR; + else if (Reg1 == AArch64::D14 && Reg2 == AArch64::D15) + CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_D14_D15_PAIR; + else + // A pair was pushed which we cannot handle. + return CU::UNWIND_AArch64_MODE_DWARF; + } + + break; + } + } } - return Value; + + if (!HasFP) { + // With compact unwind info we can only represent stack adjustments of up + // to 65520 bytes. + if (StackSize > 65520) + return CU::UNWIND_AArch64_MODE_DWARF; + + CompactUnwindEncoding |= CU::UNWIND_AArch64_MODE_FRAMELESS; + CompactUnwindEncoding |= encodeStackAdjustment(StackSize); + } + + return CompactUnwindEncoding; } +}; - case AArch64::fixup_a64_tstbr: - // R_AARCH64_TSTBR14: Sets the immediate field of a TBZ/TBNZ instruction to - // bits FFFC of S+A-P, checking -2^15 <= S+A-P < 2^15. - assert((int64_t)Value >= -(1LL << 15) && - (int64_t)Value < (1LL << 15) && "Out of range TBZ/TBNZ fixup"); - return (Value & 0xfffc) << (5 - 2); - - case AArch64::fixup_a64_condbr: - // R_AARCH64_CONDBR19: Sets the immediate field of a conditional branch - // instruction to bits 1FFFFC of S+A-P, checking -2^20 <= S+A-P < 2^20. - assert((int64_t)Value >= -(1LL << 20) && - (int64_t)Value < (1LL << 20) && "Out of range B.cond fixup"); - return (Value & 0x1ffffc) << (5 - 2); - - case AArch64::fixup_a64_uncondbr: - // R_AARCH64_JUMP26 same as below (except to a linker, possibly). - case AArch64::fixup_a64_call: - // R_AARCH64_CALL26: Sets a CALL immediate field to bits FFFFFFC of S+A-P, - // checking that -2^27 <= S+A-P < 2^27. - assert((int64_t)Value >= -(1LL << 27) && - (int64_t)Value < (1LL << 27) && "Out of range branch fixup"); - return (Value & 0xffffffc) >> 2; - - case AArch64::fixup_a64_adr_gottprel_page: - // R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: Set an ADRP immediate field to bits - // 1FFFFF000 of Page(G(TPREL(S+A))) - Page(P); check -2^32 <= X < 2^32. - case AArch64::fixup_a64_tlsdesc_adr_page: - // R_AARCH64_TLSDESC_ADR_PAGE: Set an ADRP immediate field to bits 1FFFFF000 - // of Page(G(TLSDESC(S+A))) - Page(P); check -2^32 <= X < 2^32. - case AArch64::fixup_a64_adr_prel_got_page: - // R_AARCH64_ADR_GOT_PAGE: Sets the immediate value of an ADRP to bits - // 1FFFFF000 of the operation, checking that -2^32 < Page(G(S))-Page(GOT) < - // 2^32. - assert((int64_t)Value >= -(1LL << 32) && - (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup"); - return ADRImmBits((Value & 0x1fffff000ULL) >> 12); - - case AArch64::fixup_a64_ld64_gottprel_lo12_nc: - // R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: Set an LD offset field to bits FF8 - // of X, with no overflow check. Check that X & 7 == 0. - case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc: - // R_AARCH64_TLSDESC_LD64_LO12_NC: Set an LD offset field to bits FF8 of - // G(TLSDESC(S+A)), with no overflow check. Check that X & 7 == 0. - case AArch64::fixup_a64_ld64_got_lo12_nc: - // R_AARCH64_LD64_GOT_LO12_NC: Sets the LD/ST immediate field to bits FF8 of - // G(S) with no overflow check. Check X & 7 == 0 - assert(((int64_t)Value & 7) == 0 && "Misaligned fixup"); - return (Value & 0xff8) << 7; - - case AArch64::fixup_a64_tlsdesc_call: - // R_AARCH64_TLSDESC_CALL: For relaxation only. - return 0; +} // end anonymous namespace + +namespace { + +class ELFAArch64AsmBackend : public AArch64AsmBackend { +public: + uint8_t OSABI; + bool IsLittleEndian; + + ELFAArch64AsmBackend(const Target &T, uint8_t OSABI, bool IsLittleEndian) + : AArch64AsmBackend(T), OSABI(OSABI), IsLittleEndian(IsLittleEndian) {} + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian); } + + void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFixup &Fixup, const MCFragment *DF, + const MCValue &Target, uint64_t &Value, + bool &IsResolved) override; + + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value, bool IsPCRel) const override; +}; + +void ELFAArch64AsmBackend::processFixupValue( + const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFixup &Fixup, + const MCFragment *DF, const MCValue &Target, uint64_t &Value, + bool &IsResolved) { + // The ADRP instruction adds some multiple of 0x1000 to the current PC & + // ~0xfff. This means that the required offset to reach a symbol can vary by + // up to one step depending on where the ADRP is in memory. For example: + // + // ADRP x0, there + // there: + // + // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and + // we'll need that as an offset. At any other address "there" will be in the + // same page as the ADRP and the instruction should encode 0x0. Assuming the + // section isn't 0x1000-aligned, we therefore need to delegate this decision + // to the linker -- a relocation! + if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21) + IsResolved = false; +} + +void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, + unsigned DataSize, uint64_t Value, + bool IsPCRel) const { + // store fixups in .eh_frame section in big endian order + if (!IsLittleEndian && Fixup.getKind() == FK_Data_4) { + const MCSection *Sec = Fixup.getValue()->FindAssociatedSection(); + const MCSectionELF *SecELF = static_cast(Sec); + if (SecELF->getSectionName() == ".eh_frame") + Value = ByteSwap_32(unsigned(Value)); + } + AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel); +} } -MCAsmBackend * -llvm::createAArch64leAsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU) { +MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { Triple TheTriple(TT); - return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS(), /*isLittle*/ true); + + if (TheTriple.isOSDarwin()) + return new DarwinAArch64AsmBackend(T, MRI); + + assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target"); + return new ELFAArch64AsmBackend(T, TheTriple.getOS(), /*IsLittleEndian=*/true); } -MCAsmBackend * -llvm::createAArch64beAsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU) { +MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { Triple TheTriple(TT); - return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS(), /*isLittle*/ false); + + assert(TheTriple.isOSBinFormatELF() && + "Big endian is only supported for ELF targets!"); + return new ELFAArch64AsmBackend(T, TheTriple.getOS(), + /*IsLittleEndian=*/false); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index a5fe914..e05191e 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCExpr.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCValue.h" @@ -35,257 +36,222 @@ private: }; } -AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian) - : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64, - /*HasRelocationAddend*/ true) -{} +AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, + bool IsLittleEndian) + : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64, + /*HasRelocationAddend*/ true) {} -AArch64ELFObjectWriter::~AArch64ELFObjectWriter() -{} +AArch64ELFObjectWriter::~AArch64ELFObjectWriter() {} unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const { - unsigned Type; + const MCFixup &Fixup, + bool IsPCRel) const { + AArch64MCExpr::VariantKind RefKind = + static_cast(Target.getRefKind()); + AArch64MCExpr::VariantKind SymLoc = AArch64MCExpr::getSymbolLoc(RefKind); + bool IsNC = AArch64MCExpr::isNotChecked(RefKind); + + assert((!Target.getSymA() || + Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None) && + "Should only be expression-level modifiers here"); + + assert((!Target.getSymB() || + Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None) && + "Should only be expression-level modifiers here"); + if (IsPCRel) { switch ((unsigned)Fixup.getKind()) { - default: - llvm_unreachable("Unimplemented fixup -> relocation"); - case FK_Data_8: - return ELF::R_AARCH64_PREL64; - case FK_Data_4: - return ELF::R_AARCH64_PREL32; case FK_Data_2: return ELF::R_AARCH64_PREL16; - case AArch64::fixup_a64_ld_prel: - Type = ELF::R_AARCH64_LD_PREL_LO19; - break; - case AArch64::fixup_a64_adr_prel: - Type = ELF::R_AARCH64_ADR_PREL_LO21; - break; - case AArch64::fixup_a64_adr_prel_page: - Type = ELF::R_AARCH64_ADR_PREL_PG_HI21; - break; - case AArch64::fixup_a64_adr_prel_got_page: - Type = ELF::R_AARCH64_ADR_GOT_PAGE; - break; - case AArch64::fixup_a64_tstbr: - Type = ELF::R_AARCH64_TSTBR14; - break; - case AArch64::fixup_a64_condbr: - Type = ELF::R_AARCH64_CONDBR19; - break; - case AArch64::fixup_a64_uncondbr: - Type = ELF::R_AARCH64_JUMP26; - break; - case AArch64::fixup_a64_call: - Type = ELF::R_AARCH64_CALL26; - break; - case AArch64::fixup_a64_adr_gottprel_page: - Type = ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21; - break; - case AArch64::fixup_a64_ld_gottprel_prel19: - Type = ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19; - break; - case AArch64::fixup_a64_tlsdesc_adr_page: - Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE; - break; + case FK_Data_4: + return ELF::R_AARCH64_PREL32; + case FK_Data_8: + return ELF::R_AARCH64_PREL64; + case AArch64::fixup_aarch64_pcrel_adr_imm21: + assert(SymLoc == AArch64MCExpr::VK_NONE && "unexpected ADR relocation"); + return ELF::R_AARCH64_ADR_PREL_LO21; + case AArch64::fixup_aarch64_pcrel_adrp_imm21: + if (SymLoc == AArch64MCExpr::VK_ABS && !IsNC) + return ELF::R_AARCH64_ADR_PREL_PG_HI21; + if (SymLoc == AArch64MCExpr::VK_GOT && !IsNC) + return ELF::R_AARCH64_ADR_GOT_PAGE; + if (SymLoc == AArch64MCExpr::VK_GOTTPREL && !IsNC) + return ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21; + if (SymLoc == AArch64MCExpr::VK_TLSDESC && !IsNC) + return ELF::R_AARCH64_TLSDESC_ADR_PAGE; + llvm_unreachable("invalid symbol kind for ADRP relocation"); + case AArch64::fixup_aarch64_pcrel_branch26: + return ELF::R_AARCH64_JUMP26; + case AArch64::fixup_aarch64_pcrel_call26: + return ELF::R_AARCH64_CALL26; + case AArch64::fixup_aarch64_ldr_pcrel_imm19: + if (SymLoc == AArch64MCExpr::VK_GOTTPREL) + return ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19; + return ELF::R_AARCH64_LD_PREL_LO19; + case AArch64::fixup_aarch64_pcrel_branch14: + return ELF::R_AARCH64_TSTBR14; + case AArch64::fixup_aarch64_pcrel_branch19: + return ELF::R_AARCH64_CONDBR19; + default: + llvm_unreachable("Unsupported pc-relative fixup kind"); } } else { switch ((unsigned)Fixup.getKind()) { - default: - llvm_unreachable("Unimplemented fixup -> relocation"); - case FK_Data_8: - return ELF::R_AARCH64_ABS64; - case FK_Data_4: - return ELF::R_AARCH64_ABS32; case FK_Data_2: return ELF::R_AARCH64_ABS16; - case AArch64::fixup_a64_add_lo12: - Type = ELF::R_AARCH64_ADD_ABS_LO12_NC; - break; - case AArch64::fixup_a64_ld64_got_lo12_nc: - Type = ELF::R_AARCH64_LD64_GOT_LO12_NC; - break; - case AArch64::fixup_a64_ldst8_lo12: - Type = ELF::R_AARCH64_LDST8_ABS_LO12_NC; - break; - case AArch64::fixup_a64_ldst16_lo12: - Type = ELF::R_AARCH64_LDST16_ABS_LO12_NC; - break; - case AArch64::fixup_a64_ldst32_lo12: - Type = ELF::R_AARCH64_LDST32_ABS_LO12_NC; - break; - case AArch64::fixup_a64_ldst64_lo12: - Type = ELF::R_AARCH64_LDST64_ABS_LO12_NC; - break; - case AArch64::fixup_a64_ldst128_lo12: - Type = ELF::R_AARCH64_LDST128_ABS_LO12_NC; - break; - case AArch64::fixup_a64_movw_uabs_g0: - Type = ELF::R_AARCH64_MOVW_UABS_G0; - break; - case AArch64::fixup_a64_movw_uabs_g0_nc: - Type = ELF::R_AARCH64_MOVW_UABS_G0_NC; - break; - case AArch64::fixup_a64_movw_uabs_g1: - Type = ELF::R_AARCH64_MOVW_UABS_G1; - break; - case AArch64::fixup_a64_movw_uabs_g1_nc: - Type = ELF::R_AARCH64_MOVW_UABS_G1_NC; - break; - case AArch64::fixup_a64_movw_uabs_g2: - Type = ELF::R_AARCH64_MOVW_UABS_G2; - break; - case AArch64::fixup_a64_movw_uabs_g2_nc: - Type = ELF::R_AARCH64_MOVW_UABS_G2_NC; - break; - case AArch64::fixup_a64_movw_uabs_g3: - Type = ELF::R_AARCH64_MOVW_UABS_G3; - break; - case AArch64::fixup_a64_movw_sabs_g0: - Type = ELF::R_AARCH64_MOVW_SABS_G0; - break; - case AArch64::fixup_a64_movw_sabs_g1: - Type = ELF::R_AARCH64_MOVW_SABS_G1; - break; - case AArch64::fixup_a64_movw_sabs_g2: - Type = ELF::R_AARCH64_MOVW_SABS_G2; - break; + case FK_Data_4: + return ELF::R_AARCH64_ABS32; + case FK_Data_8: + return ELF::R_AARCH64_ABS64; + case AArch64::fixup_aarch64_add_imm12: + if (RefKind == AArch64MCExpr::VK_DTPREL_HI12) + return ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12; + if (RefKind == AArch64MCExpr::VK_TPREL_HI12) + return ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12; + if (RefKind == AArch64MCExpr::VK_DTPREL_LO12_NC) + return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC; + if (RefKind == AArch64MCExpr::VK_DTPREL_LO12) + return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12; + if (RefKind == AArch64MCExpr::VK_TPREL_LO12_NC) + return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC; + if (RefKind == AArch64MCExpr::VK_TPREL_LO12) + return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12; + if (RefKind == AArch64MCExpr::VK_TLSDESC_LO12) + return ELF::R_AARCH64_TLSDESC_ADD_LO12_NC; + if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) + return ELF::R_AARCH64_ADD_ABS_LO12_NC; - // TLS Local-dynamic block - case AArch64::fixup_a64_movw_dtprel_g2: - Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2; - break; - case AArch64::fixup_a64_movw_dtprel_g1: - Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1; - break; - case AArch64::fixup_a64_movw_dtprel_g1_nc: - Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC; - break; - case AArch64::fixup_a64_movw_dtprel_g0: - Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0; - break; - case AArch64::fixup_a64_movw_dtprel_g0_nc: - Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC; - break; - case AArch64::fixup_a64_add_dtprel_hi12: - Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12; - break; - case AArch64::fixup_a64_add_dtprel_lo12: - Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12; - break; - case AArch64::fixup_a64_add_dtprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst8_dtprel_lo12: - Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12; - break; - case AArch64::fixup_a64_ldst8_dtprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst16_dtprel_lo12: - Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12; - break; - case AArch64::fixup_a64_ldst16_dtprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst32_dtprel_lo12: - Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12; - break; - case AArch64::fixup_a64_ldst32_dtprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst64_dtprel_lo12: - Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12; - break; - case AArch64::fixup_a64_ldst64_dtprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC; - break; + report_fatal_error("invalid fixup for add (uimm12) instruction"); + return 0; + case AArch64::fixup_aarch64_ldst_imm12_scale1: + if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) + return ELF::R_AARCH64_LDST8_ABS_LO12_NC; + if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC) + return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12; + if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC) + return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC; + if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC) + return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12; + if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC) + return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC; - // TLS initial-exec block - case AArch64::fixup_a64_movw_gottprel_g1: - Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1; - break; - case AArch64::fixup_a64_movw_gottprel_g0_nc: - Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC; - break; - case AArch64::fixup_a64_ld64_gottprel_lo12_nc: - Type = ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; - break; + report_fatal_error("invalid fixup for 8-bit load/store instruction"); + return 0; + case AArch64::fixup_aarch64_ldst_imm12_scale2: + if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) + return ELF::R_AARCH64_LDST16_ABS_LO12_NC; + if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC) + return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12; + if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC) + return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC; + if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC) + return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12; + if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC) + return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC; - // TLS local-exec block - case AArch64::fixup_a64_movw_tprel_g2: - Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2; - break; - case AArch64::fixup_a64_movw_tprel_g1: - Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1; - break; - case AArch64::fixup_a64_movw_tprel_g1_nc: - Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC; - break; - case AArch64::fixup_a64_movw_tprel_g0: - Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0; - break; - case AArch64::fixup_a64_movw_tprel_g0_nc: - Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC; - break; - case AArch64::fixup_a64_add_tprel_hi12: - Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12; - break; - case AArch64::fixup_a64_add_tprel_lo12: - Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12; - break; - case AArch64::fixup_a64_add_tprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst8_tprel_lo12: - Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12; - break; - case AArch64::fixup_a64_ldst8_tprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst16_tprel_lo12: - Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12; - break; - case AArch64::fixup_a64_ldst16_tprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst32_tprel_lo12: - Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12; - break; - case AArch64::fixup_a64_ldst32_tprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC; - break; - case AArch64::fixup_a64_ldst64_tprel_lo12: - Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12; - break; - case AArch64::fixup_a64_ldst64_tprel_lo12_nc: - Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC; - break; + report_fatal_error("invalid fixup for 16-bit load/store instruction"); + return 0; + case AArch64::fixup_aarch64_ldst_imm12_scale4: + if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) + return ELF::R_AARCH64_LDST32_ABS_LO12_NC; + if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC) + return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12; + if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC) + return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC; + if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC) + return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12; + if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC) + return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC; - // TLS general-dynamic block - case AArch64::fixup_a64_tlsdesc_adr_page: - Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE; - break; - case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc: - Type = ELF::R_AARCH64_TLSDESC_LD64_LO12_NC; - break; - case AArch64::fixup_a64_tlsdesc_add_lo12_nc: - Type = ELF::R_AARCH64_TLSDESC_ADD_LO12_NC; - break; - case AArch64::fixup_a64_tlsdesc_call: - Type = ELF::R_AARCH64_TLSDESC_CALL; - break; + report_fatal_error("invalid fixup for 32-bit load/store instruction"); + return 0; + case AArch64::fixup_aarch64_ldst_imm12_scale8: + if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) + return ELF::R_AARCH64_LDST64_ABS_LO12_NC; + if (SymLoc == AArch64MCExpr::VK_GOT && IsNC) + return ELF::R_AARCH64_LD64_GOT_LO12_NC; + if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC) + return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12; + if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC) + return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC; + if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC) + return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12; + if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC) + return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC; + if (SymLoc == AArch64MCExpr::VK_GOTTPREL && IsNC) + return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; + if (SymLoc == AArch64MCExpr::VK_TLSDESC && IsNC) + return ELF::R_AARCH64_TLSDESC_LD64_LO12_NC; + + report_fatal_error("invalid fixup for 64-bit load/store instruction"); + return 0; + case AArch64::fixup_aarch64_ldst_imm12_scale16: + if (SymLoc == AArch64MCExpr::VK_ABS && IsNC) + return ELF::R_AARCH64_LDST128_ABS_LO12_NC; + + report_fatal_error("invalid fixup for 128-bit load/store instruction"); + return 0; + case AArch64::fixup_aarch64_movw: + if (RefKind == AArch64MCExpr::VK_ABS_G3) + return ELF::R_AARCH64_MOVW_UABS_G3; + if (RefKind == AArch64MCExpr::VK_ABS_G2) + return ELF::R_AARCH64_MOVW_UABS_G2; + if (RefKind == AArch64MCExpr::VK_ABS_G2_S) + return ELF::R_AARCH64_MOVW_SABS_G2; + if (RefKind == AArch64MCExpr::VK_ABS_G2_NC) + return ELF::R_AARCH64_MOVW_UABS_G2_NC; + if (RefKind == AArch64MCExpr::VK_ABS_G1) + return ELF::R_AARCH64_MOVW_UABS_G1; + if (RefKind == AArch64MCExpr::VK_ABS_G1_S) + return ELF::R_AARCH64_MOVW_SABS_G1; + if (RefKind == AArch64MCExpr::VK_ABS_G1_NC) + return ELF::R_AARCH64_MOVW_UABS_G1_NC; + if (RefKind == AArch64MCExpr::VK_ABS_G0) + return ELF::R_AARCH64_MOVW_UABS_G0; + if (RefKind == AArch64MCExpr::VK_ABS_G0_S) + return ELF::R_AARCH64_MOVW_SABS_G0; + if (RefKind == AArch64MCExpr::VK_ABS_G0_NC) + return ELF::R_AARCH64_MOVW_UABS_G0_NC; + if (RefKind == AArch64MCExpr::VK_DTPREL_G2) + return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2; + if (RefKind == AArch64MCExpr::VK_DTPREL_G1) + return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1; + if (RefKind == AArch64MCExpr::VK_DTPREL_G1_NC) + return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC; + if (RefKind == AArch64MCExpr::VK_DTPREL_G0) + return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0; + if (RefKind == AArch64MCExpr::VK_DTPREL_G0_NC) + return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC; + if (RefKind == AArch64MCExpr::VK_TPREL_G2) + return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2; + if (RefKind == AArch64MCExpr::VK_TPREL_G1) + return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1; + if (RefKind == AArch64MCExpr::VK_TPREL_G1_NC) + return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC; + if (RefKind == AArch64MCExpr::VK_TPREL_G0) + return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0; + if (RefKind == AArch64MCExpr::VK_TPREL_G0_NC) + return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC; + if (RefKind == AArch64MCExpr::VK_GOTTPREL_G1) + return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1; + if (RefKind == AArch64MCExpr::VK_GOTTPREL_G0_NC) + return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC; + report_fatal_error("invalid fixup for movz/movk instruction"); + return 0; + case AArch64::fixup_aarch64_tlsdesc_call: + return ELF::R_AARCH64_TLSDESC_CALL; + default: + llvm_unreachable("Unknown ELF relocation type"); } } - return Type; + llvm_unreachable("Unimplemented fixup -> relocation"); } MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS, - uint8_t OSABI, - bool IsLittleEndian) { - MCELFObjectTargetWriter *MOTW = new AArch64ELFObjectWriter(OSABI, IsLittleEndian); - return createELFObjectWriter(MOTW, OS, IsLittleEndian); + uint8_t OSABI, + bool IsLittleEndian) { + MCELFObjectTargetWriter *MOTW = + new AArch64ELFObjectWriter(OSABI, IsLittleEndian); + return createELFObjectWriter(MOTW, OS, IsLittleEndian); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 473b7dd..a79406d 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -56,14 +56,14 @@ namespace { class AArch64ELFStreamer : public MCELFStreamer { public: AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, - MCCodeEmitter *Emitter) + MCCodeEmitter *Emitter) : MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0), LastEMS(EMS_None) {} ~AArch64ELFStreamer() {} - virtual void ChangeSection(const MCSection *Section, - const MCExpr *Subsection) { + void ChangeSection(const MCSection *Section, + const MCExpr *Subsection) override { // We have to keep track of the mapping symbol state of any sections we // use. Each one should start off as EMS_None, which is provided as the // default constructor by DenseMap::lookup. @@ -76,7 +76,8 @@ public: /// This function is the one used to emit instruction data into the ELF /// streamer. We override it to add the appropriate mapping symbol if /// necessary. - virtual void EmitInstruction(const MCInst& Inst, const MCSubtargetInfo &STI) { + void EmitInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI) override { EmitA64MappingSymbol(); MCELFStreamer::EmitInstruction(Inst, STI); } @@ -84,7 +85,7 @@ public: /// This is one of the functions used to emit data into an ELF section, so the /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) /// if necessary. - virtual void EmitBytes(StringRef Data) { + void EmitBytes(StringRef Data) override { EmitDataMappingSymbol(); MCELFStreamer::EmitBytes(Data); } @@ -92,7 +93,8 @@ public: /// This is one of the functions used to emit data into an ELF section, so the /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) /// if necessary. - virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) { + void EmitValueImpl(const MCExpr *Value, unsigned Size, + const SMLoc &Loc) override { EmitDataMappingSymbol(); MCELFStreamer::EmitValueImpl(Value, Size); } @@ -105,13 +107,15 @@ private: }; void EmitDataMappingSymbol() { - if (LastEMS == EMS_Data) return; + if (LastEMS == EMS_Data) + return; EmitMappingSymbol("$d"); LastEMS = EMS_Data; } void EmitA64MappingSymbol() { - if (LastEMS == EMS_A64) return; + if (LastEMS == EMS_A64) + return; EmitMappingSymbol("$x"); LastEMS = EMS_A64; } @@ -120,15 +124,14 @@ private: MCSymbol *Start = getContext().CreateTempSymbol(); EmitLabel(Start); - MCSymbol *Symbol = - getContext().GetOrCreateSymbol(Name + "." + - Twine(MappingSymbolCounter++)); + MCSymbol *Symbol = getContext().GetOrCreateSymbol( + Name + "." + Twine(MappingSymbolCounter++)); MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); MCELF::SetType(SD, ELF::STT_NOTYPE); MCELF::SetBinding(SD, ELF::STB_LOCAL); SD.setExternal(false); - AssignSection(Symbol, getCurrentSection().first); + Symbol->setSection(*getCurrentSection().first); const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); Symbol->setVariableValue(Value); @@ -144,16 +147,14 @@ private: } namespace llvm { - MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll, bool NoExecStack) { - AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); - if (NoExecStack) - S->getAssembler().setNoExecStack(true); - return S; - } +MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack) { + AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + if (NoExecStack) + S->getAssembler().setNoExecStack(true); + return S; +} } - - diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h index 5a89ca5..bc6973b 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h @@ -18,10 +18,9 @@ namespace llvm { - MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, - MCCodeEmitter *Emitter, - bool RelaxAll, bool NoExecStack); +MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool NoExecStack); } #endif // AArch64_ELF_STREAMER_H diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h index eeb122d..bf405fb 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h @@ -1,4 +1,4 @@ -//=- AArch64/AArch64FixupKinds.h - AArch64 Specific Fixup Entries -*- C++ -*-=// +//===-- AArch64FixupKinds.h - AArch64 Specific Fixup Entries ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,108 +6,71 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file describes the LLVM fixups applied to MCInsts in the AArch64 -// backend. -// -//===----------------------------------------------------------------------===// -#ifndef LLVM_AARCH64_AARCH64FIXUPKINDS_H -#define LLVM_AARCH64_AARCH64FIXUPKINDS_H +#ifndef LLVM_AArch64FIXUPKINDS_H +#define LLVM_AArch64FIXUPKINDS_H #include "llvm/MC/MCFixup.h" namespace llvm { - namespace AArch64 { - enum Fixups { - fixup_a64_ld_prel = FirstTargetFixupKind, - fixup_a64_adr_prel, - fixup_a64_adr_prel_page, - - fixup_a64_add_lo12, - - fixup_a64_ldst8_lo12, - fixup_a64_ldst16_lo12, - fixup_a64_ldst32_lo12, - fixup_a64_ldst64_lo12, - fixup_a64_ldst128_lo12, - - fixup_a64_tstbr, - fixup_a64_condbr, - fixup_a64_uncondbr, - fixup_a64_call, - - fixup_a64_movw_uabs_g0, - fixup_a64_movw_uabs_g0_nc, - fixup_a64_movw_uabs_g1, - fixup_a64_movw_uabs_g1_nc, - fixup_a64_movw_uabs_g2, - fixup_a64_movw_uabs_g2_nc, - fixup_a64_movw_uabs_g3, - - fixup_a64_movw_sabs_g0, - fixup_a64_movw_sabs_g1, - fixup_a64_movw_sabs_g2, - - fixup_a64_adr_prel_got_page, - fixup_a64_ld64_got_lo12_nc, - - // Produce offsets relative to the module's dynamic TLS area. - fixup_a64_movw_dtprel_g2, - fixup_a64_movw_dtprel_g1, - fixup_a64_movw_dtprel_g1_nc, - fixup_a64_movw_dtprel_g0, - fixup_a64_movw_dtprel_g0_nc, - fixup_a64_add_dtprel_hi12, - fixup_a64_add_dtprel_lo12, - fixup_a64_add_dtprel_lo12_nc, - fixup_a64_ldst8_dtprel_lo12, - fixup_a64_ldst8_dtprel_lo12_nc, - fixup_a64_ldst16_dtprel_lo12, - fixup_a64_ldst16_dtprel_lo12_nc, - fixup_a64_ldst32_dtprel_lo12, - fixup_a64_ldst32_dtprel_lo12_nc, - fixup_a64_ldst64_dtprel_lo12, - fixup_a64_ldst64_dtprel_lo12_nc, - - // Produce the GOT entry containing a variable's address in TLS's - // initial-exec mode. - fixup_a64_movw_gottprel_g1, - fixup_a64_movw_gottprel_g0_nc, - fixup_a64_adr_gottprel_page, - fixup_a64_ld64_gottprel_lo12_nc, - fixup_a64_ld_gottprel_prel19, - - // Produce offsets relative to the thread pointer: TPIDR_EL0. - fixup_a64_movw_tprel_g2, - fixup_a64_movw_tprel_g1, - fixup_a64_movw_tprel_g1_nc, - fixup_a64_movw_tprel_g0, - fixup_a64_movw_tprel_g0_nc, - fixup_a64_add_tprel_hi12, - fixup_a64_add_tprel_lo12, - fixup_a64_add_tprel_lo12_nc, - fixup_a64_ldst8_tprel_lo12, - fixup_a64_ldst8_tprel_lo12_nc, - fixup_a64_ldst16_tprel_lo12, - fixup_a64_ldst16_tprel_lo12_nc, - fixup_a64_ldst32_tprel_lo12, - fixup_a64_ldst32_tprel_lo12_nc, - fixup_a64_ldst64_tprel_lo12, - fixup_a64_ldst64_tprel_lo12_nc, - - // Produce the special fixups used by the general-dynamic TLS model. - fixup_a64_tlsdesc_adr_page, - fixup_a64_tlsdesc_ld64_lo12_nc, - fixup_a64_tlsdesc_add_lo12_nc, - fixup_a64_tlsdesc_call, - - - // Marker - LastTargetFixupKind, - NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind - }; - } -} +namespace AArch64 { + +enum Fixups { + // fixup_aarch64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into + // an ADR instruction. + fixup_aarch64_pcrel_adr_imm21 = FirstTargetFixupKind, + + // fixup_aarch64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into + // an ADRP instruction. + fixup_aarch64_pcrel_adrp_imm21, + + // fixup_aarch64_imm12 - 12-bit fixup for add/sub instructions. + // No alignment adjustment. All value bits are encoded. + fixup_aarch64_add_imm12, + + // fixup_aarch64_ldst_imm12_* - unsigned 12-bit fixups for load and + // store instructions. + fixup_aarch64_ldst_imm12_scale1, + fixup_aarch64_ldst_imm12_scale2, + fixup_aarch64_ldst_imm12_scale4, + fixup_aarch64_ldst_imm12_scale8, + fixup_aarch64_ldst_imm12_scale16, + + // fixup_aarch64_ldr_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative + // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is used by + // pc-relative loads and generates relocations directly when necessary. + fixup_aarch64_ldr_pcrel_imm19, + + // FIXME: comment + fixup_aarch64_movw, + + // fixup_aarch64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative + // immediate. + fixup_aarch64_pcrel_branch14, + + // fixup_aarch64_pcrel_branch19 - The high 19 bits of a 21-bit pc-relative + // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is use by + // b.cc and generates relocations directly when necessary. + fixup_aarch64_pcrel_branch19, + + // fixup_aarch64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative + // immediate. + fixup_aarch64_pcrel_branch26, + + // fixup_aarch64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative + // immediate. Distinguished from branch26 only on ELF. + fixup_aarch64_pcrel_call26, + + // fixup_aarch64_tlsdesc_call - zero-space placeholder for the ELF + // R_AARCH64_TLSDESC_CALL relocation. + fixup_aarch64_tlsdesc_call, + + // Marker + LastTargetFixupKind, + NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind +}; + +} // end namespace AArch64 +} // end namespace llvm #endif diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index b090a55..dc4a8bf 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -13,26 +13,82 @@ #include "AArch64MCAsmInfo.h" #include "llvm/ADT/Triple.h" - +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; -AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo(StringRef TT) { - Triple TheTriple(TT); - if (TheTriple.getArch() == Triple::aarch64_be) +enum AsmWriterVariantTy { + Default = -1, + Generic = 0, + Apple = 1 +}; + +static cl::opt AsmWriterVariant( + "aarch64-neon-syntax", cl::init(Default), + cl::desc("Choose style of NEON code to emit from AArch64 backend:"), + cl::values(clEnumValN(Generic, "generic", "Emit generic NEON assembly"), + clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly"), + clEnumValEnd)); + +AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() { + // We prefer NEON instructions to be printed in the short form. + AssemblerDialect = AsmWriterVariant == Default ? 1 : AsmWriterVariant; + + PrivateGlobalPrefix = "L"; + SeparatorString = "%%"; + CommentString = ";"; + PointerSize = CalleeSaveStackSlotSize = 8; + + AlignmentIsInBytes = false; + UsesELFSectionDirectiveForBSS = true; + SupportsDebugInformation = true; + UseDataRegionDirectives = true; + + ExceptionsType = ExceptionHandling::DwarfCFI; +} + +const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol( + const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const { + // On Darwin, we can reference dwarf symbols with foo@GOT-., which + // is an indirect pc-relative reference. The default implementation + // won't reference using the GOT, so we need this target-specific + // version. + MCContext &Context = Streamer.getContext(); + const MCExpr *Res = + MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Context); + MCSymbol *PCSym = Context.CreateTempSymbol(); + Streamer.EmitLabel(PCSym); + const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context); + return MCBinaryExpr::CreateSub(Res, PC, Context); +} + +AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) { + Triple T(TT); + if (T.getArch() == Triple::arm64_be || T.getArch() == Triple::aarch64_be) IsLittleEndian = false; + // We prefer NEON instructions to be printed in the short form. + AssemblerDialect = AsmWriterVariant == Default ? 0 : AsmWriterVariant; + PointerSize = 8; // ".comm align is in bytes but .align is pow-2." AlignmentIsInBytes = false; CommentString = "//"; + PrivateGlobalPrefix = ".L"; Code32Directive = ".code\t32"; Data16bitsDirective = "\t.hword\t"; Data32bitsDirective = "\t.word\t"; Data64bitsDirective = "\t.xword\t"; + UseDataRegionDirectives = false; + + WeakRefDirective = "\t.weak\t"; + HasLEB128 = true; SupportsDebugInformation = true; @@ -41,6 +97,3 @@ AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo(StringRef TT) { UseIntegratedAssembler = true; } - -// Pin the vtable to this file. -void AArch64ELFMCAsmInfo::anchor() {} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h index 43c0e47..42a031d 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -1,4 +1,4 @@ -//==-- AArch64MCAsmInfo.h - AArch64 asm properties -------------*- C++ -*--===// +//=====-- AArch64MCAsmInfo.h - AArch64 asm properties ---------*- C++ -*--====// // // The LLVM Compiler Infrastructure // @@ -11,17 +11,24 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_AARCH64TARGETASMINFO_H -#define LLVM_AARCH64TARGETASMINFO_H +#ifndef AArch64TARGETASMINFO_H +#define AArch64TARGETASMINFO_H -#include "llvm/MC/MCAsmInfoELF.h" +#include "llvm/MC/MCAsmInfoDarwin.h" namespace llvm { +class Target; +class StringRef; +class MCStreamer; +struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin { + explicit AArch64MCAsmInfoDarwin(); + const MCExpr * + getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding, + MCStreamer &Streamer) const override; +}; -struct AArch64ELFMCAsmInfo : public MCAsmInfoELF { - explicit AArch64ELFMCAsmInfo(StringRef TT); -private: - virtual void anchor(); +struct AArch64MCAsmInfoELF : public MCAsmInfo { + explicit AArch64MCAsmInfoELF(StringRef TT); }; } // namespace llvm diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index b9a61ef..464a18c 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -1,4 +1,4 @@ -//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code =// +//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code-=// // // The LLVM Compiler Infrastructure // @@ -11,10 +11,9 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mccodeemitter" +#include "MCTargetDesc/AArch64AddressingModes.h" #include "MCTargetDesc/AArch64FixupKinds.h" #include "MCTargetDesc/AArch64MCExpr.h" -#include "MCTargetDesc/AArch64MCTargetDesc.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" @@ -22,524 +21,562 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Support/raw_ostream.h" - using namespace llvm; +#define DEBUG_TYPE "mccodeemitter" + +STATISTIC(MCNumEmitted, "Number of MC instructions emitted."); +STATISTIC(MCNumFixups, "Number of MC fixups created."); + namespace { + class AArch64MCCodeEmitter : public MCCodeEmitter { - AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION; - void operator=(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION; MCContext &Ctx; + AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT public: - AArch64MCCodeEmitter(MCContext &ctx) : Ctx(ctx) {} + AArch64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, + MCContext &ctx) + : Ctx(ctx) {} ~AArch64MCCodeEmitter() {} - unsigned getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + uint64_t getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + /// getMachineOpValue - Return binary encoding of operand. If the machine + /// operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; - unsigned getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx, + /// getLdStUImm12OpValue - Return encoding info for 12-bit unsigned immediate + /// attached to a load, store or prfm instruction. If operand requires a + /// relocation, record it and return zero in that part of the encoding. + template + uint32_t getLdStUImm12OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + /// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label + /// target. + uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + /// getAddSubImmOpValue - Return encoding for the 12-bit immediate value and + /// the 2-bit shift field. + uint32_t getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; - template - unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return getOffsetUImm12OpValue(MI, OpIdx, Fixups, STI, MemSize); - } + /// getCondBranchTargetOpValue - Return the encoded value for a conditional + /// branch target. + uint32_t getCondBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; - unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI, - int MemSize) const; + /// getLoadLiteralOpValue - Return the encoded value for a load-literal + /// pc-relative address. + uint32_t getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; - unsigned getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + /// getMemExtendOpValue - Return the encoded value for a reg-extend load/store + /// instruction: bit 0 is whether a shift is present, bit 1 is whether the + /// operation is a sign extend (as opposed to a zero extend). + uint32_t getMemExtendOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; - unsigned getShiftRightImm8(const MCInst &MI, unsigned Op, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftRightImm16(const MCInst &MI, unsigned Op, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftRightImm32(const MCInst &MI, unsigned Op, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftRightImm64(const MCInst &MI, unsigned Op, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + /// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and- + /// branch target. + uint32_t getTestBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; - unsigned getShiftLeftImm8(const MCInst &MI, unsigned Op, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftLeftImm16(const MCInst &MI, unsigned Op, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftLeftImm32(const MCInst &MI, unsigned Op, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - unsigned getShiftLeftImm64(const MCInst &MI, unsigned Op, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + /// getBranchTargetOpValue - Return the encoded value for an unconditional + /// branch target. + uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; - // Labels are handled mostly the same way: a symbol is needed, and - // just gets some fixup attached. - template - unsigned getLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + /// getMoveWideImmOpValue - Return the encoded value for the immediate operand + /// of a MOVZ or MOVK instruction. + uint32_t getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; - unsigned getLoadLitLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + /// getVecShifterOpValue - Return the encoded value for the vector shifter. + uint32_t getVecShifterOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + /// getMoveVecShifterOpValue - Return the encoded value for the vector move + /// shifter (MSL). + uint32_t getMoveVecShifterOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + /// getFixedPointScaleOpValue - Return the encoded value for the + // FP-to-fixed-point scale factor. + uint32_t getFixedPointScaleOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; - unsigned getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, + uint32_t getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + uint32_t getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + uint32_t getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + uint32_t getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + uint32_t getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + uint32_t getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + uint32_t getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + uint32_t getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + /// getSIMDShift64OpValue - Return the encoded value for the + // shift-by-immediate AdvSIMD instructions. + uint32_t getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; - unsigned getAddressWithFixup(const MCOperand &MO, - unsigned FixupKind, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - + uint32_t getSIMDShift64_32OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; - // getBinaryCodeForInstr - TableGen'erated function for getting the - // binary encoding for an instruction. - uint64_t getBinaryCodeForInstr(const MCInst &MI, + uint32_t getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; - /// getMachineOpValue - Return binary encoding of operand. If the machine - /// operand requires relocation, record the relocation and return zero. - unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + uint32_t getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue, + const MCSubtargetInfo &STI) const; - void EmitByte(unsigned char C, raw_ostream &OS) const { - OS << (char)C; - } + void EmitByte(unsigned char C, raw_ostream &OS) const { OS << (char)C; } - void EmitInstruction(uint32_t Val, raw_ostream &OS) const { + void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const { // Output the constant in little endian byte order. - for (unsigned i = 0; i != 4; ++i) { - EmitByte(Val & 0xff, OS); + for (unsigned i = 0; i != Size; ++i) { + EmitByte(Val & 255, OS); Val >>= 8; } } - void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - template unsigned - fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue, - const MCSubtargetInfo &STI) const; - - unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue, - const MCSubtargetInfo &STI) const; + const MCSubtargetInfo &STI) const override; unsigned fixMulHigh(const MCInst &MI, unsigned EncodedValue, const MCSubtargetInfo &STI) const; + template unsigned + fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue, + const MCSubtargetInfo &STI) const; + unsigned fixOneOperandFPComparison(const MCInst &MI, unsigned EncodedValue, + const MCSubtargetInfo &STI) const; }; } // end anonymous namespace -unsigned AArch64MCCodeEmitter::getAddressWithFixup(const MCOperand &MO, - unsigned FixupKind, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - if (!MO.isExpr()) { - // This can occur for manually decoded or constructed MCInsts, but neither - // the assembly-parser nor instruction selection will currently produce an - // MCInst that's not a symbol reference. - assert(MO.isImm() && "Unexpected address requested"); - return MO.getImm(); - } +MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new AArch64MCCodeEmitter(MCII, STI, Ctx); +} - const MCExpr *Expr = MO.getExpr(); - MCFixupKind Kind = MCFixupKind(FixupKind); - Fixups.push_back(MCFixup::Create(0, Expr, Kind)); +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +unsigned +AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + if (MO.isReg()) + return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); + else { + assert(MO.isImm() && "did not expect relocated expression"); + return static_cast(MO.getImm()); + } + assert(0 && "Unable to encode MCOperand!"); return 0; } -unsigned AArch64MCCodeEmitter:: -getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI, - int MemSize) const { - const MCOperand &ImmOp = MI.getOperand(OpIdx); - if (ImmOp.isImm()) - return ImmOp.getImm(); - - assert(ImmOp.isExpr() && "Unexpected operand type"); - const AArch64MCExpr *Expr = cast(ImmOp.getExpr()); - unsigned FixupKind; - - - switch (Expr->getKind()) { - default: llvm_unreachable("Unexpected operand modifier"); - case AArch64MCExpr::VK_AARCH64_LO12: { - static const unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12, - AArch64::fixup_a64_ldst16_lo12, - AArch64::fixup_a64_ldst32_lo12, - AArch64::fixup_a64_ldst64_lo12, - AArch64::fixup_a64_ldst128_lo12 }; - assert(MemSize <= 16 && "Invalid fixup for operation"); - FixupKind = FixupsBySize[Log2_32(MemSize)]; - break; - } - case AArch64MCExpr::VK_AARCH64_GOT_LO12: - assert(MemSize == 8 && "Invalid fixup for operation"); - FixupKind = AArch64::fixup_a64_ld64_got_lo12_nc; - break; - case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: { - static const unsigned FixupsBySize[] = { - AArch64::fixup_a64_ldst8_dtprel_lo12, - AArch64::fixup_a64_ldst16_dtprel_lo12, - AArch64::fixup_a64_ldst32_dtprel_lo12, - AArch64::fixup_a64_ldst64_dtprel_lo12 - }; - assert(MemSize <= 8 && "Invalid fixup for operation"); - FixupKind = FixupsBySize[Log2_32(MemSize)]; - break; - } - case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: { - static const unsigned FixupsBySize[] = { - AArch64::fixup_a64_ldst8_dtprel_lo12_nc, - AArch64::fixup_a64_ldst16_dtprel_lo12_nc, - AArch64::fixup_a64_ldst32_dtprel_lo12_nc, - AArch64::fixup_a64_ldst64_dtprel_lo12_nc - }; - assert(MemSize <= 8 && "Invalid fixup for operation"); - FixupKind = FixupsBySize[Log2_32(MemSize)]; - break; - } - case AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12: - assert(MemSize == 8 && "Invalid fixup for operation"); - FixupKind = AArch64::fixup_a64_ld64_gottprel_lo12_nc; - break; - case AArch64MCExpr::VK_AARCH64_TPREL_LO12:{ - static const unsigned FixupsBySize[] = { - AArch64::fixup_a64_ldst8_tprel_lo12, - AArch64::fixup_a64_ldst16_tprel_lo12, - AArch64::fixup_a64_ldst32_tprel_lo12, - AArch64::fixup_a64_ldst64_tprel_lo12 - }; - assert(MemSize <= 8 && "Invalid fixup for operation"); - FixupKind = FixupsBySize[Log2_32(MemSize)]; - break; - } - case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: { - static const unsigned FixupsBySize[] = { - AArch64::fixup_a64_ldst8_tprel_lo12_nc, - AArch64::fixup_a64_ldst16_tprel_lo12_nc, - AArch64::fixup_a64_ldst32_tprel_lo12_nc, - AArch64::fixup_a64_ldst64_tprel_lo12_nc - }; - assert(MemSize <= 8 && "Invalid fixup for operation"); - FixupKind = FixupsBySize[Log2_32(MemSize)]; - break; - } - case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12: - assert(MemSize == 8 && "Invalid fixup for operation"); - FixupKind = AArch64::fixup_a64_tlsdesc_ld64_lo12_nc; - break; +template uint32_t +AArch64MCCodeEmitter::getLdStUImm12OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); + uint32_t ImmVal = 0; + + if (MO.isImm()) + ImmVal = static_cast(MO.getImm()); + else { + assert(MO.isExpr() && "unable to encode load/store imm operand"); + MCFixupKind Kind = MCFixupKind(FixupKind); + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); + ++MCNumFixups; } - return getAddressWithFixup(ImmOp, FixupKind, Fixups, STI); + return ImmVal; } -unsigned -AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +/// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label +/// target. +uint32_t +AArch64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); + + // If the destination is an immediate, we have nothing to do. if (MO.isImm()) - return static_cast(MO.getImm()); + return MO.getImm(); + assert(MO.isExpr() && "Unexpected target type!"); + const MCExpr *Expr = MO.getExpr(); - assert(MO.isExpr()); - - unsigned FixupKind = 0; - switch(cast(MO.getExpr())->getKind()) { - default: llvm_unreachable("Invalid expression modifier"); - case AArch64MCExpr::VK_AARCH64_LO12: - FixupKind = AArch64::fixup_a64_add_lo12; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_HI12: - FixupKind = AArch64::fixup_a64_add_dtprel_hi12; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: - FixupKind = AArch64::fixup_a64_add_dtprel_lo12; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: - FixupKind = AArch64::fixup_a64_add_dtprel_lo12_nc; break; - case AArch64MCExpr::VK_AARCH64_TPREL_HI12: - FixupKind = AArch64::fixup_a64_add_tprel_hi12; break; - case AArch64MCExpr::VK_AARCH64_TPREL_LO12: - FixupKind = AArch64::fixup_a64_add_tprel_lo12; break; - case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: - FixupKind = AArch64::fixup_a64_add_tprel_lo12_nc; break; - case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12: - FixupKind = AArch64::fixup_a64_tlsdesc_add_lo12_nc; break; - } + MCFixupKind Kind = MI.getOpcode() == AArch64::ADR + ? MCFixupKind(AArch64::fixup_aarch64_pcrel_adr_imm21) + : MCFixupKind(AArch64::fixup_aarch64_pcrel_adrp_imm21); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); - return getAddressWithFixup(MO, FixupKind, Fixups, STI); -} + MCNumFixups += 1; -unsigned -AArch64MCCodeEmitter::getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { + // All of the information is in the fixup. + return 0; +} +/// getAddSubImmOpValue - Return encoding for the 12-bit immediate value and +/// the 2-bit shift field. The shift field is stored in bits 13-14 of the +/// return value. +uint32_t +AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + // Suboperands are [imm, shifter]. const MCOperand &MO = MI.getOperand(OpIdx); + const MCOperand &MO1 = MI.getOperand(OpIdx + 1); + assert(AArch64_AM::getShiftType(MO1.getImm()) == AArch64_AM::LSL && + "unexpected shift type for add/sub immediate"); + unsigned ShiftVal = AArch64_AM::getShiftValue(MO1.getImm()); + assert((ShiftVal == 0 || ShiftVal == 12) && + "unexpected shift value for add/sub immediate"); if (MO.isImm()) - return static_cast(MO.getImm()); - - assert(MO.isExpr()); + return MO.getImm() | (ShiftVal == 0 ? 0 : (1 << 12)); + assert(MO.isExpr() && "Unable to encode MCOperand!"); + const MCExpr *Expr = MO.getExpr(); - unsigned Modifier = AArch64MCExpr::VK_AARCH64_None; - if (const AArch64MCExpr *Expr = dyn_cast(MO.getExpr())) - Modifier = Expr->getKind(); + // Encode the 12 bits of the fixup. + MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_add_imm12); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); - unsigned FixupKind = 0; - switch(Modifier) { - case AArch64MCExpr::VK_AARCH64_None: - FixupKind = AArch64::fixup_a64_adr_prel_page; - break; - case AArch64MCExpr::VK_AARCH64_GOT: - FixupKind = AArch64::fixup_a64_adr_prel_got_page; - break; - case AArch64MCExpr::VK_AARCH64_GOTTPREL: - FixupKind = AArch64::fixup_a64_adr_gottprel_page; - break; - case AArch64MCExpr::VK_AARCH64_TLSDESC: - FixupKind = AArch64::fixup_a64_tlsdesc_adr_page; - break; - default: - llvm_unreachable("Unknown symbol reference kind for ADRP instruction"); - } + ++MCNumFixups; - return getAddressWithFixup(MO, FixupKind, Fixups, STI); + return 0; } -unsigned -AArch64MCCodeEmitter::getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - +/// getCondBranchTargetOpValue - Return the encoded value for a conditional +/// branch target. +uint32_t AArch64MCCodeEmitter::getCondBranchTargetOpValue( + const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Only immediate expected for shift"); - return ((32 - MO.getImm()) & 0x1f) | (31 - MO.getImm()) << 6; -} + // If the destination is an immediate, we have nothing to do. + if (MO.isImm()) + return MO.getImm(); + assert(MO.isExpr() && "Unexpected target type!"); -unsigned -AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { + MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_pcrel_branch19); + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Only immediate expected for shift"); + ++MCNumFixups; - return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6; + // All of the information is in the fixup. + return 0; } -unsigned AArch64MCCodeEmitter::getShiftRightImm8( - const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 8 - MI.getOperand(Op).getImm(); -} +/// getLoadLiteralOpValue - Return the encoded value for a load-literal +/// pc-relative address. +uint32_t +AArch64MCCodeEmitter::getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); -unsigned AArch64MCCodeEmitter::getShiftRightImm16( - const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 16 - MI.getOperand(Op).getImm(); -} + // If the destination is an immediate, we have nothing to do. + if (MO.isImm()) + return MO.getImm(); + assert(MO.isExpr() && "Unexpected target type!"); -unsigned AArch64MCCodeEmitter::getShiftRightImm32( - const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 32 - MI.getOperand(Op).getImm(); -} + MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_ldr_pcrel_imm19); + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); -unsigned AArch64MCCodeEmitter::getShiftRightImm64( - const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return 64 - MI.getOperand(Op).getImm(); -} + ++MCNumFixups; -unsigned AArch64MCCodeEmitter::getShiftLeftImm8( - const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return MI.getOperand(Op).getImm() - 8; + // All of the information is in the fixup. + return 0; } -unsigned AArch64MCCodeEmitter::getShiftLeftImm16( - const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return MI.getOperand(Op).getImm() - 16; +uint32_t +AArch64MCCodeEmitter::getMemExtendOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + unsigned SignExtend = MI.getOperand(OpIdx).getImm(); + unsigned DoShift = MI.getOperand(OpIdx + 1).getImm(); + return (SignExtend << 1) | DoShift; } -unsigned AArch64MCCodeEmitter::getShiftLeftImm32( - const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return MI.getOperand(Op).getImm() - 32; -} +uint32_t +AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); -unsigned AArch64MCCodeEmitter::getShiftLeftImm64( - const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - return MI.getOperand(Op).getImm() - 64; + if (MO.isImm()) + return MO.getImm(); + assert(MO.isExpr() && "Unexpected movz/movk immediate"); + + Fixups.push_back(MCFixup::Create( + 0, MO.getExpr(), MCFixupKind(AArch64::fixup_aarch64_movw), MI.getLoc())); + + ++MCNumFixups; + + return 0; } -template unsigned -AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI, - unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +/// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and- +/// branch target. +uint32_t AArch64MCCodeEmitter::getTestBranchTargetOpValue( + const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); - if (MO.isExpr()) - return getAddressWithFixup(MO, fixupDesired, Fixups, STI); + // If the destination is an immediate, we have nothing to do. + if (MO.isImm()) + return MO.getImm(); + assert(MO.isExpr() && "Unexpected ADR target type!"); - assert(MO.isImm()); - return MO.getImm(); + MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_pcrel_branch14); + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); + + ++MCNumFixups; + + // All of the information is in the fixup. + return 0; } -unsigned -AArch64MCCodeEmitter::getLoadLitLabelOpValue(const MCInst &MI, - unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +/// getBranchTargetOpValue - Return the encoded value for an unconditional +/// branch target. +uint32_t +AArch64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { const MCOperand &MO = MI.getOperand(OpIdx); + // If the destination is an immediate, we have nothing to do. if (MO.isImm()) return MO.getImm(); + assert(MO.isExpr() && "Unexpected ADR target type!"); - assert(MO.isExpr()); + MCFixupKind Kind = MI.getOpcode() == AArch64::BL + ? MCFixupKind(AArch64::fixup_aarch64_pcrel_call26) + : MCFixupKind(AArch64::fixup_aarch64_pcrel_branch26); + Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); - unsigned FixupKind; - if (isa(MO.getExpr())) { - assert(dyn_cast(MO.getExpr())->getKind() - == AArch64MCExpr::VK_AARCH64_GOTTPREL - && "Invalid symbol modifier for literal load"); - FixupKind = AArch64::fixup_a64_ld_gottprel_prel19; - } else { - FixupKind = AArch64::fixup_a64_ld_prel; - } + ++MCNumFixups; - return getAddressWithFixup(MO, FixupKind, Fixups, STI); + // All of the information is in the fixup. + return 0; } +/// getVecShifterOpValue - Return the encoded value for the vector shifter: +/// +/// 00 -> 0 +/// 01 -> 8 +/// 10 -> 16 +/// 11 -> 24 +uint32_t +AArch64MCCodeEmitter::getVecShifterOpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Expected an immediate value for the shift amount!"); -unsigned -AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, - const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - if (MO.isReg()) { - return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); - } else if (MO.isImm()) { - return static_cast(MO.getImm()); + switch (MO.getImm()) { + default: + break; + case 0: + return 0; + case 8: + return 1; + case 16: + return 2; + case 24: + return 3; } - llvm_unreachable("Unable to encode MCOperand!"); + assert(false && "Invalid value for vector shift amount!"); return 0; } -unsigned -AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &UImm16MO = MI.getOperand(OpIdx); - const MCOperand &ShiftMO = MI.getOperand(OpIdx + 1); +uint32_t +AArch64MCCodeEmitter::getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Expected an immediate value for the shift amount!"); + return 64 - (MO.getImm()); +} - unsigned Result = static_cast(ShiftMO.getImm()) << 16; +uint32_t AArch64MCCodeEmitter::getSIMDShift64_32OpValue( + const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Expected an immediate value for the shift amount!"); + return 64 - (MO.getImm() | 32); +} - if (UImm16MO.isImm()) { - Result |= UImm16MO.getImm(); - return Result; - } +uint32_t +AArch64MCCodeEmitter::getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Expected an immediate value for the shift amount!"); + return 32 - (MO.getImm() | 16); +} - const AArch64MCExpr *A64E = cast(UImm16MO.getExpr()); - AArch64::Fixups requestedFixup; - switch (A64E->getKind()) { - default: llvm_unreachable("unexpected expression modifier"); - case AArch64MCExpr::VK_AARCH64_ABS_G0: - requestedFixup = AArch64::fixup_a64_movw_uabs_g0; break; - case AArch64MCExpr::VK_AARCH64_ABS_G0_NC: - requestedFixup = AArch64::fixup_a64_movw_uabs_g0_nc; break; - case AArch64MCExpr::VK_AARCH64_ABS_G1: - requestedFixup = AArch64::fixup_a64_movw_uabs_g1; break; - case AArch64MCExpr::VK_AARCH64_ABS_G1_NC: - requestedFixup = AArch64::fixup_a64_movw_uabs_g1_nc; break; - case AArch64MCExpr::VK_AARCH64_ABS_G2: - requestedFixup = AArch64::fixup_a64_movw_uabs_g2; break; - case AArch64MCExpr::VK_AARCH64_ABS_G2_NC: - requestedFixup = AArch64::fixup_a64_movw_uabs_g2_nc; break; - case AArch64MCExpr::VK_AARCH64_ABS_G3: - requestedFixup = AArch64::fixup_a64_movw_uabs_g3; break; - case AArch64MCExpr::VK_AARCH64_SABS_G0: - requestedFixup = AArch64::fixup_a64_movw_sabs_g0; break; - case AArch64MCExpr::VK_AARCH64_SABS_G1: - requestedFixup = AArch64::fixup_a64_movw_sabs_g1; break; - case AArch64MCExpr::VK_AARCH64_SABS_G2: - requestedFixup = AArch64::fixup_a64_movw_sabs_g2; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_G2: - requestedFixup = AArch64::fixup_a64_movw_dtprel_g2; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_G1: - requestedFixup = AArch64::fixup_a64_movw_dtprel_g1; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC: - requestedFixup = AArch64::fixup_a64_movw_dtprel_g1_nc; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_G0: - requestedFixup = AArch64::fixup_a64_movw_dtprel_g0; break; - case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC: - requestedFixup = AArch64::fixup_a64_movw_dtprel_g0_nc; break; - case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: - requestedFixup = AArch64::fixup_a64_movw_gottprel_g1; break; - case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC: - requestedFixup = AArch64::fixup_a64_movw_gottprel_g0_nc; break; - case AArch64MCExpr::VK_AARCH64_TPREL_G2: - requestedFixup = AArch64::fixup_a64_movw_tprel_g2; break; - case AArch64MCExpr::VK_AARCH64_TPREL_G1: - requestedFixup = AArch64::fixup_a64_movw_tprel_g1; break; - case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC: - requestedFixup = AArch64::fixup_a64_movw_tprel_g1_nc; break; - case AArch64MCExpr::VK_AARCH64_TPREL_G0: - requestedFixup = AArch64::fixup_a64_movw_tprel_g0; break; - case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC: - requestedFixup = AArch64::fixup_a64_movw_tprel_g0_nc; break; - } +uint32_t +AArch64MCCodeEmitter::getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Expected an immediate value for the shift amount!"); + return 16 - (MO.getImm() | 8); +} - return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups, STI); +/// getFixedPointScaleOpValue - Return the encoded value for the +// FP-to-fixed-point scale factor. +uint32_t AArch64MCCodeEmitter::getFixedPointScaleOpValue( + const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Expected an immediate value for the scale amount!"); + return 64 - MO.getImm(); } -template unsigned -AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI, - unsigned EncodedValue, +uint32_t +AArch64MCCodeEmitter::getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { - if (!hasRs) EncodedValue |= 0x001F0000; - if (!hasRt2) EncodedValue |= 0x00007C00; + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Expected an immediate value for the scale amount!"); + return 64 - MO.getImm(); +} - return EncodedValue; +uint32_t +AArch64MCCodeEmitter::getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Expected an immediate value for the scale amount!"); + return 32 - MO.getImm(); } -unsigned -AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue, - const MCSubtargetInfo &STI) const { +uint32_t +AArch64MCCodeEmitter::getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Expected an immediate value for the scale amount!"); + return 16 - MO.getImm(); +} + +uint32_t +AArch64MCCodeEmitter::getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Expected an immediate value for the scale amount!"); + return 8 - MO.getImm(); +} + +uint32_t +AArch64MCCodeEmitter::getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Expected an immediate value for the scale amount!"); + return MO.getImm() - 64; +} + +uint32_t +AArch64MCCodeEmitter::getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Expected an immediate value for the scale amount!"); + return MO.getImm() - 32; +} + +uint32_t +AArch64MCCodeEmitter::getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Expected an immediate value for the scale amount!"); + return MO.getImm() - 16; +} + +uint32_t +AArch64MCCodeEmitter::getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && "Expected an immediate value for the scale amount!"); + return MO.getImm() - 8; +} + +/// getMoveVecShifterOpValue - Return the encoded value for the vector move +/// shifter (MSL). +uint32_t AArch64MCCodeEmitter::getMoveVecShifterOpValue( + const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpIdx); + assert(MO.isImm() && + "Expected an immediate value for the move shift amount!"); + unsigned ShiftVal = AArch64_AM::getShiftValue(MO.getImm()); + assert((ShiftVal == 8 || ShiftVal == 16) && "Invalid shift amount!"); + return ShiftVal == 8 ? 0 : 1; +} + +unsigned AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue, + const MCSubtargetInfo &STI) const { // If one of the signed fixup kinds is applied to a MOVZ instruction, the // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's // job to ensure that any bits possibly affected by this are 0. This means we @@ -552,23 +589,38 @@ AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue, const AArch64MCExpr *A64E = cast(UImm16MO.getExpr()); switch (A64E->getKind()) { - case AArch64MCExpr::VK_AARCH64_SABS_G0: - case AArch64MCExpr::VK_AARCH64_SABS_G1: - case AArch64MCExpr::VK_AARCH64_SABS_G2: - case AArch64MCExpr::VK_AARCH64_DTPREL_G2: - case AArch64MCExpr::VK_AARCH64_DTPREL_G1: - case AArch64MCExpr::VK_AARCH64_DTPREL_G0: - case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1: - case AArch64MCExpr::VK_AARCH64_TPREL_G2: - case AArch64MCExpr::VK_AARCH64_TPREL_G1: - case AArch64MCExpr::VK_AARCH64_TPREL_G0: + case AArch64MCExpr::VK_DTPREL_G2: + case AArch64MCExpr::VK_DTPREL_G1: + case AArch64MCExpr::VK_DTPREL_G0: + case AArch64MCExpr::VK_GOTTPREL_G1: + case AArch64MCExpr::VK_TPREL_G2: + case AArch64MCExpr::VK_TPREL_G1: + case AArch64MCExpr::VK_TPREL_G0: return EncodedValue & ~(1u << 30); default: // Nothing to do for an unsigned fixup. return EncodedValue; } - llvm_unreachable("Should have returned by now"); + + return EncodedValue & ~(1u << 30); +} + +void AArch64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + if (MI.getOpcode() == AArch64::TLSDESCCALL) { + // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the + // following (BLR) instruction. It doesn't emit any code itself so it + // doesn't go through the normal TableGenerated channels. + MCFixupKind Fixup = MCFixupKind(AArch64::fixup_aarch64_tlsdesc_call); + Fixups.push_back(MCFixup::Create(0, MI.getOperand(0).getExpr(), Fixup)); + return; + } + + uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); + EmitConstant(Binary, 4, OS); + ++MCNumEmitted; // Keep track of the # of mi's emitted. } unsigned @@ -581,32 +633,22 @@ AArch64MCCodeEmitter::fixMulHigh(const MCInst &MI, return EncodedValue; } -MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - return new AArch64MCCodeEmitter(Ctx); -} - -void AArch64MCCodeEmitter:: -EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - if (MI.getOpcode() == AArch64::TLSDESCCALL) { - // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the - // following (BLR) instruction. It doesn't emit any code itself so it - // doesn't go through the normal TableGenerated channels. - MCFixupKind Fixup = MCFixupKind(AArch64::fixup_a64_tlsdesc_call); - const MCExpr *Expr; - Expr = AArch64MCExpr::CreateTLSDesc(MI.getOperand(0).getExpr(), Ctx); - Fixups.push_back(MCFixup::Create(0, Expr, Fixup)); - return; - } - - uint32_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); +template unsigned +AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI, + unsigned EncodedValue, + const MCSubtargetInfo &STI) const { + if (!hasRs) EncodedValue |= 0x001F0000; + if (!hasRt2) EncodedValue |= 0x00007C00; - EmitInstruction(Binary, OS); + return EncodedValue; } +unsigned AArch64MCCodeEmitter::fixOneOperandFPComparison( + const MCInst &MI, unsigned EncodedValue, const MCSubtargetInfo &STI) const { + // The Rm field of FCMP and friends is unused - it should be assembled + // as 0, but is ignored by the processor. + EncodedValue &= ~(0x1f << 16); + return EncodedValue; +} #include "AArch64GenMCCodeEmitter.inc" diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index c7ccaee..85c3ec7 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -12,74 +12,121 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "aarch64mcexpr" #include "AArch64MCExpr.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELF.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" #include "llvm/Object/ELF.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; -const AArch64MCExpr* -AArch64MCExpr::Create(VariantKind Kind, const MCExpr *Expr, - MCContext &Ctx) { - return new (Ctx) AArch64MCExpr(Kind, Expr); +#define DEBUG_TYPE "aarch64symbolrefexpr" + +const AArch64MCExpr *AArch64MCExpr::Create(const MCExpr *Expr, VariantKind Kind, + MCContext &Ctx) { + return new (Ctx) AArch64MCExpr(Expr, Kind); +} + +StringRef AArch64MCExpr::getVariantKindName() const { + switch (static_cast(getKind())) { + case VK_CALL: return ""; + case VK_LO12: return ":lo12:"; + case VK_ABS_G3: return ":abs_g3:"; + case VK_ABS_G2: return ":abs_g2:"; + case VK_ABS_G2_S: return ":abs_g2_s:"; + case VK_ABS_G2_NC: return ":abs_g2_nc:"; + case VK_ABS_G1: return ":abs_g1:"; + case VK_ABS_G1_S: return ":abs_g1_s:"; + case VK_ABS_G1_NC: return ":abs_g1_nc:"; + case VK_ABS_G0: return ":abs_g0:"; + case VK_ABS_G0_S: return ":abs_g0_s:"; + case VK_ABS_G0_NC: return ":abs_g0_nc:"; + case VK_DTPREL_G2: return ":dtprel_g2:"; + case VK_DTPREL_G1: return ":dtprel_g1:"; + case VK_DTPREL_G1_NC: return ":dtprel_g1_nc:"; + case VK_DTPREL_G0: return ":dtprel_g0:"; + case VK_DTPREL_G0_NC: return ":dtprel_g0_nc:"; + case VK_DTPREL_HI12: return ":dtprel_hi12:"; + case VK_DTPREL_LO12: return ":dtprel_lo12:"; + case VK_DTPREL_LO12_NC: return ":dtprel_lo12_nc:"; + case VK_TPREL_G2: return ":tprel_g2:"; + case VK_TPREL_G1: return ":tprel_g1:"; + case VK_TPREL_G1_NC: return ":tprel_g1_nc:"; + case VK_TPREL_G0: return ":tprel_g0:"; + case VK_TPREL_G0_NC: return ":tprel_g0_nc:"; + case VK_TPREL_HI12: return ":tprel_hi12:"; + case VK_TPREL_LO12: return ":tprel_lo12:"; + case VK_TPREL_LO12_NC: return ":tprel_lo12_nc:"; + case VK_TLSDESC_LO12: return ":tlsdesc_lo12:"; + case VK_ABS_PAGE: return ""; + case VK_GOT_PAGE: return ":got:"; + case VK_GOT_LO12: return ":got_lo12:"; + case VK_GOTTPREL_PAGE: return ":gottprel:"; + case VK_GOTTPREL_LO12_NC: return ":gottprel_lo12:"; + case VK_GOTTPREL_G1: return ":gottprel_g1:"; + case VK_GOTTPREL_G0_NC: return ":gottprel_g0_nc:"; + case VK_TLSDESC: return ""; + case VK_TLSDESC_PAGE: return ":tlsdesc:"; + default: + llvm_unreachable("Invalid ELF symbol kind"); + } } void AArch64MCExpr::PrintImpl(raw_ostream &OS) const { - switch (Kind) { - default: llvm_unreachable("Invalid kind!"); - case VK_AARCH64_GOT: OS << ":got:"; break; - case VK_AARCH64_GOT_LO12: OS << ":got_lo12:"; break; - case VK_AARCH64_LO12: OS << ":lo12:"; break; - case VK_AARCH64_ABS_G0: OS << ":abs_g0:"; break; - case VK_AARCH64_ABS_G0_NC: OS << ":abs_g0_nc:"; break; - case VK_AARCH64_ABS_G1: OS << ":abs_g1:"; break; - case VK_AARCH64_ABS_G1_NC: OS << ":abs_g1_nc:"; break; - case VK_AARCH64_ABS_G2: OS << ":abs_g2:"; break; - case VK_AARCH64_ABS_G2_NC: OS << ":abs_g2_nc:"; break; - case VK_AARCH64_ABS_G3: OS << ":abs_g3:"; break; - case VK_AARCH64_SABS_G0: OS << ":abs_g0_s:"; break; - case VK_AARCH64_SABS_G1: OS << ":abs_g1_s:"; break; - case VK_AARCH64_SABS_G2: OS << ":abs_g2_s:"; break; - case VK_AARCH64_DTPREL_G2: OS << ":dtprel_g2:"; break; - case VK_AARCH64_DTPREL_G1: OS << ":dtprel_g1:"; break; - case VK_AARCH64_DTPREL_G1_NC: OS << ":dtprel_g1_nc:"; break; - case VK_AARCH64_DTPREL_G0: OS << ":dtprel_g0:"; break; - case VK_AARCH64_DTPREL_G0_NC: OS << ":dtprel_g0_nc:"; break; - case VK_AARCH64_DTPREL_HI12: OS << ":dtprel_hi12:"; break; - case VK_AARCH64_DTPREL_LO12: OS << ":dtprel_lo12:"; break; - case VK_AARCH64_DTPREL_LO12_NC: OS << ":dtprel_lo12_nc:"; break; - case VK_AARCH64_GOTTPREL_G1: OS << ":gottprel_g1:"; break; - case VK_AARCH64_GOTTPREL_G0_NC: OS << ":gottprel_g0_nc:"; break; - case VK_AARCH64_GOTTPREL: OS << ":gottprel:"; break; - case VK_AARCH64_GOTTPREL_LO12: OS << ":gottprel_lo12:"; break; - case VK_AARCH64_TPREL_G2: OS << ":tprel_g2:"; break; - case VK_AARCH64_TPREL_G1: OS << ":tprel_g1:"; break; - case VK_AARCH64_TPREL_G1_NC: OS << ":tprel_g1_nc:"; break; - case VK_AARCH64_TPREL_G0: OS << ":tprel_g0:"; break; - case VK_AARCH64_TPREL_G0_NC: OS << ":tprel_g0_nc:"; break; - case VK_AARCH64_TPREL_HI12: OS << ":tprel_hi12:"; break; - case VK_AARCH64_TPREL_LO12: OS << ":tprel_lo12:"; break; - case VK_AARCH64_TPREL_LO12_NC: OS << ":tprel_lo12_nc:"; break; - case VK_AARCH64_TLSDESC: OS << ":tlsdesc:"; break; - case VK_AARCH64_TLSDESC_LO12: OS << ":tlsdesc_lo12:"; break; + if (getKind() != VK_NONE) + OS << getVariantKindName(); + OS << *Expr; +} + +// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps +// that method should be made public? +// FIXME: really do above: now that two backends are using it. +static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) { + switch (Value->getKind()) { + case MCExpr::Target: + llvm_unreachable("Can't handle nested target expr!"); + break; + + case MCExpr::Constant: + break; + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Value); + AddValueSymbolsImpl(BE->getLHS(), Asm); + AddValueSymbolsImpl(BE->getRHS(), Asm); + break; + } + + case MCExpr::SymbolRef: + Asm->getOrCreateSymbolData(cast(Value)->getSymbol()); + break; + + case MCExpr::Unary: + AddValueSymbolsImpl(cast(Value)->getSubExpr(), Asm); + break; } +} + +void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const { + AddValueSymbolsImpl(getSubExpr(), Asm); +} - const MCExpr *Expr = getSubExpr(); - if (Expr->getKind() != MCExpr::SymbolRef) - OS << '('; - Expr->print(OS); - if (Expr->getKind() != MCExpr::SymbolRef) - OS << ')'; +const MCSection *AArch64MCExpr::FindAssociatedSection() const { + llvm_unreachable("FIXME: what goes here?"); } -bool -AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const { - return getSubExpr()->EvaluateAsRelocatable(Res, Layout); +bool AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout) const { + if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout)) + return false; + + Res = + MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind()); + + return true; } static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { @@ -113,66 +160,15 @@ static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { } void AArch64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { - switch (getKind()) { + switch (getSymbolLoc(Kind)) { default: return; - case VK_AARCH64_DTPREL_G2: - case VK_AARCH64_DTPREL_G1: - case VK_AARCH64_DTPREL_G1_NC: - case VK_AARCH64_DTPREL_G0: - case VK_AARCH64_DTPREL_G0_NC: - case VK_AARCH64_DTPREL_HI12: - case VK_AARCH64_DTPREL_LO12: - case VK_AARCH64_DTPREL_LO12_NC: - case VK_AARCH64_GOTTPREL_G1: - case VK_AARCH64_GOTTPREL_G0_NC: - case VK_AARCH64_GOTTPREL: - case VK_AARCH64_GOTTPREL_LO12: - case VK_AARCH64_TPREL_G2: - case VK_AARCH64_TPREL_G1: - case VK_AARCH64_TPREL_G1_NC: - case VK_AARCH64_TPREL_G0: - case VK_AARCH64_TPREL_G0_NC: - case VK_AARCH64_TPREL_HI12: - case VK_AARCH64_TPREL_LO12: - case VK_AARCH64_TPREL_LO12_NC: - case VK_AARCH64_TLSDESC: - case VK_AARCH64_TLSDESC_LO12: + case VK_DTPREL: + case VK_GOTTPREL: + case VK_TPREL: + case VK_TLSDESC: break; } fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); } - -// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps -// that method should be made public? -// FIXME: really do above: now that two backends are using it. -static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) { - switch (Value->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle nested target expr!"); - break; - - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast(Value); - AddValueSymbolsImpl(BE->getLHS(), Asm); - AddValueSymbolsImpl(BE->getRHS(), Asm); - break; - } - - case MCExpr::SymbolRef: - Asm->getOrCreateSymbolData(cast(Value)->getSymbol()); - break; - - case MCExpr::Unary: - AddValueSymbolsImpl(cast(Value)->getSubExpr(), Asm); - break; - } -} - -void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const { - AddValueSymbolsImpl(getSubExpr(), Asm); -} diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h index d9798ae..e869ed0 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -1,4 +1,4 @@ -//==- AArch64MCExpr.h - AArch64 specific MC expression classes --*- C++ -*-===// +//=--- AArch64MCExpr.h - AArch64 specific MC expression classes ---*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -12,168 +12,149 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_AARCH64MCEXPR_H -#define LLVM_AARCH64MCEXPR_H +#ifndef LLVM_AArch64MCEXPR_H +#define LLVM_AArch64MCEXPR_H #include "llvm/MC/MCExpr.h" +#include "llvm/Support/ErrorHandling.h" namespace llvm { class AArch64MCExpr : public MCTargetExpr { public: enum VariantKind { - VK_AARCH64_None, - VK_AARCH64_GOT, // :got: modifier in assembly - VK_AARCH64_GOT_LO12, // :got_lo12: - VK_AARCH64_LO12, // :lo12: - - VK_AARCH64_ABS_G0, // :abs_g0: - VK_AARCH64_ABS_G0_NC, // :abs_g0_nc: - VK_AARCH64_ABS_G1, - VK_AARCH64_ABS_G1_NC, - VK_AARCH64_ABS_G2, - VK_AARCH64_ABS_G2_NC, - VK_AARCH64_ABS_G3, - - VK_AARCH64_SABS_G0, // :abs_g0_s: - VK_AARCH64_SABS_G1, - VK_AARCH64_SABS_G2, - - VK_AARCH64_DTPREL_G2, // :dtprel_g2: - VK_AARCH64_DTPREL_G1, - VK_AARCH64_DTPREL_G1_NC, - VK_AARCH64_DTPREL_G0, - VK_AARCH64_DTPREL_G0_NC, - VK_AARCH64_DTPREL_HI12, - VK_AARCH64_DTPREL_LO12, - VK_AARCH64_DTPREL_LO12_NC, - - VK_AARCH64_GOTTPREL_G1, // :gottprel: - VK_AARCH64_GOTTPREL_G0_NC, - VK_AARCH64_GOTTPREL, - VK_AARCH64_GOTTPREL_LO12, - - VK_AARCH64_TPREL_G2, // :tprel: - VK_AARCH64_TPREL_G1, - VK_AARCH64_TPREL_G1_NC, - VK_AARCH64_TPREL_G0, - VK_AARCH64_TPREL_G0_NC, - VK_AARCH64_TPREL_HI12, - VK_AARCH64_TPREL_LO12, - VK_AARCH64_TPREL_LO12_NC, - - VK_AARCH64_TLSDESC, // :tlsdesc: - VK_AARCH64_TLSDESC_LO12 + VK_NONE = 0x000, + + // Symbol locations specifying (roughly speaking) what calculation should be + // performed to construct the final address for the relocated + // symbol. E.g. direct, via the GOT, ... + VK_ABS = 0x001, + VK_SABS = 0x002, + VK_GOT = 0x003, + VK_DTPREL = 0x004, + VK_GOTTPREL = 0x005, + VK_TPREL = 0x006, + VK_TLSDESC = 0x007, + VK_SymLocBits = 0x00f, + + // Variants specifying which part of the final address calculation is + // used. E.g. the low 12 bits for an ADD/LDR, the middle 16 bits for a + // MOVZ/MOVK. + VK_PAGE = 0x010, + VK_PAGEOFF = 0x020, + VK_HI12 = 0x030, + VK_G0 = 0x040, + VK_G1 = 0x050, + VK_G2 = 0x060, + VK_G3 = 0x070, + VK_AddressFragBits = 0x0f0, + + // Whether the final relocation is a checked one (where a linker should + // perform a range-check on the final address) or not. Note that this field + // is unfortunately sometimes omitted from the assembly syntax. E.g. :lo12: + // on its own is a non-checked relocation. We side with ELF on being + // explicit about this! + VK_NC = 0x100, + + // Convenience definitions for referring to specific textual representations + // of relocation specifiers. Note that this means the "_NC" is sometimes + // omitted in line with assembly syntax here (VK_LO12 rather than VK_LO12_NC + // since a user would write ":lo12:"). + VK_CALL = VK_ABS, + VK_ABS_PAGE = VK_ABS | VK_PAGE, + VK_ABS_G3 = VK_ABS | VK_G3, + VK_ABS_G2 = VK_ABS | VK_G2, + VK_ABS_G2_S = VK_SABS | VK_G2, + VK_ABS_G2_NC = VK_ABS | VK_G2 | VK_NC, + VK_ABS_G1 = VK_ABS | VK_G1, + VK_ABS_G1_S = VK_SABS | VK_G1, + VK_ABS_G1_NC = VK_ABS | VK_G1 | VK_NC, + VK_ABS_G0 = VK_ABS | VK_G0, + VK_ABS_G0_S = VK_SABS | VK_G0, + VK_ABS_G0_NC = VK_ABS | VK_G0 | VK_NC, + VK_LO12 = VK_ABS | VK_PAGEOFF | VK_NC, + VK_GOT_LO12 = VK_GOT | VK_PAGEOFF | VK_NC, + VK_GOT_PAGE = VK_GOT | VK_PAGE, + VK_DTPREL_G2 = VK_DTPREL | VK_G2, + VK_DTPREL_G1 = VK_DTPREL | VK_G1, + VK_DTPREL_G1_NC = VK_DTPREL | VK_G1 | VK_NC, + VK_DTPREL_G0 = VK_DTPREL | VK_G0, + VK_DTPREL_G0_NC = VK_DTPREL | VK_G0 | VK_NC, + VK_DTPREL_HI12 = VK_DTPREL | VK_HI12, + VK_DTPREL_LO12 = VK_DTPREL | VK_PAGEOFF, + VK_DTPREL_LO12_NC = VK_DTPREL | VK_PAGEOFF | VK_NC, + VK_GOTTPREL_PAGE = VK_GOTTPREL | VK_PAGE, + VK_GOTTPREL_LO12_NC = VK_GOTTPREL | VK_PAGEOFF | VK_NC, + VK_GOTTPREL_G1 = VK_GOTTPREL | VK_G1, + VK_GOTTPREL_G0_NC = VK_GOTTPREL | VK_G0 | VK_NC, + VK_TPREL_G2 = VK_TPREL | VK_G2, + VK_TPREL_G1 = VK_TPREL | VK_G1, + VK_TPREL_G1_NC = VK_TPREL | VK_G1 | VK_NC, + VK_TPREL_G0 = VK_TPREL | VK_G0, + VK_TPREL_G0_NC = VK_TPREL | VK_G0 | VK_NC, + VK_TPREL_HI12 = VK_TPREL | VK_HI12, + VK_TPREL_LO12 = VK_TPREL | VK_PAGEOFF, + VK_TPREL_LO12_NC = VK_TPREL | VK_PAGEOFF | VK_NC, + VK_TLSDESC_LO12 = VK_TLSDESC | VK_PAGEOFF | VK_NC, + VK_TLSDESC_PAGE = VK_TLSDESC | VK_PAGE, + + VK_INVALID = 0xfff }; private: - const VariantKind Kind; const MCExpr *Expr; + const VariantKind Kind; - explicit AArch64MCExpr(VariantKind _Kind, const MCExpr *_Expr) - : Kind(_Kind), Expr(_Expr) {} + explicit AArch64MCExpr(const MCExpr *Expr, VariantKind Kind) + : Expr(Expr), Kind(Kind) {} public: /// @name Construction /// @{ - static const AArch64MCExpr *Create(VariantKind Kind, const MCExpr *Expr, - MCContext &Ctx); - - static const AArch64MCExpr *CreateLo12(const MCExpr *Expr, MCContext &Ctx) { - return Create(VK_AARCH64_LO12, Expr, Ctx); - } - - static const AArch64MCExpr *CreateGOT(const MCExpr *Expr, MCContext &Ctx) { - return Create(VK_AARCH64_GOT, Expr, Ctx); - } - - static const AArch64MCExpr *CreateGOTLo12(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_GOT_LO12, Expr, Ctx); - } - - static const AArch64MCExpr *CreateDTPREL_G1(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_DTPREL_G1, Expr, Ctx); - } - - static const AArch64MCExpr *CreateDTPREL_G0_NC(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_DTPREL_G0_NC, Expr, Ctx); - } - - static const AArch64MCExpr *CreateGOTTPREL(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_GOTTPREL, Expr, Ctx); - } - - static const AArch64MCExpr *CreateGOTTPRELLo12(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_GOTTPREL_LO12, Expr, Ctx); - } - - static const AArch64MCExpr *CreateTLSDesc(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_TLSDESC, Expr, Ctx); - } + static const AArch64MCExpr *Create(const MCExpr *Expr, VariantKind Kind, + MCContext &Ctx); - static const AArch64MCExpr *CreateTLSDescLo12(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_TLSDESC_LO12, Expr, Ctx); - } + /// @} + /// @name Accessors + /// @{ - static const AArch64MCExpr *CreateTPREL_G1(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_TPREL_G1, Expr, Ctx); - } + /// Get the kind of this expression. + VariantKind getKind() const { return static_cast(Kind); } - static const AArch64MCExpr *CreateTPREL_G0_NC(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx); - } + /// Get the expression this modifier applies to. + const MCExpr *getSubExpr() const { return Expr; } - static const AArch64MCExpr *CreateABS_G3(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_ABS_G3, Expr, Ctx); - } + /// @} + /// @name VariantKind information extractors. + /// @{ - static const AArch64MCExpr *CreateABS_G2_NC(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_ABS_G2_NC, Expr, Ctx); + static VariantKind getSymbolLoc(VariantKind Kind) { + return static_cast(Kind & VK_SymLocBits); } - static const AArch64MCExpr *CreateABS_G1_NC(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_ABS_G1_NC, Expr, Ctx); + static VariantKind getAddressFrag(VariantKind Kind) { + return static_cast(Kind & VK_AddressFragBits); } - static const AArch64MCExpr *CreateABS_G0_NC(const MCExpr *Expr, - MCContext &Ctx) { - return Create(VK_AARCH64_ABS_G0_NC, Expr, Ctx); - } + static bool isNotChecked(VariantKind Kind) { return Kind & VK_NC; } /// @} - /// @name Accessors - /// @{ - /// getOpcode - Get the kind of this expression. - VariantKind getKind() const { return Kind; } + /// Convert the variant kind into an ELF-appropriate modifier + /// (e.g. ":got:", ":lo12:"). + StringRef getVariantKindName() const; - /// getSubExpr - Get the child of this expression. - const MCExpr *getSubExpr() const { return Expr; } + void PrintImpl(raw_ostream &OS) const override; - /// @} + void AddValueSymbols(MCAssembler *) const override; + + const MCSection *FindAssociatedSection() const override; - void PrintImpl(raw_ostream &OS) const; bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const; - void AddValueSymbols(MCAssembler *) const; - const MCSection *FindAssociatedSection() const { - return getSubExpr()->FindAssociatedSection(); - } + const MCAsmLayout *Layout) const override; - void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const; + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 3d19e42..ae698c5 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -1,4 +1,4 @@ -//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions -------------===// +//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,9 +15,7 @@ #include "AArch64ELFStreamer.h" #include "AArch64MCAsmInfo.h" #include "InstPrinter/AArch64InstPrinter.h" -#include "llvm/ADT/APInt.h" #include "llvm/MC/MCCodeGenInfo.h" -#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" @@ -25,8 +23,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" -#define GET_REGINFO_MC_DESC -#include "AArch64GenRegisterInfo.inc" +using namespace llvm; #define GET_INSTRINFO_MC_DESC #include "AArch64GenInstrInfo.inc" @@ -34,26 +31,29 @@ #define GET_SUBTARGETINFO_MC_DESC #include "AArch64GenSubtargetInfo.inc" -using namespace llvm; +#define GET_REGINFO_MC_DESC +#include "AArch64GenRegisterInfo.inc" -MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT, - StringRef CPU, - StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitAArch64MCSubtargetInfo(X, TT, CPU, FS); +static MCInstrInfo *createAArch64MCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitAArch64MCInstrInfo(X); return X; } +static MCSubtargetInfo * +createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); -static MCInstrInfo *createAArch64MCInstrInfo() { - MCInstrInfo *X = new MCInstrInfo(); - InitAArch64MCInstrInfo(X); + if (CPU.empty()) + CPU = "generic"; + + InitAArch64MCSubtargetInfo(X, TT, CPU, FS); return X; } static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) { MCRegisterInfo *X = new MCRegisterInfo(); - InitAArch64MCRegisterInfo(X, AArch64::X30); + InitAArch64MCRegisterInfo(X, AArch64::LR); return X; } @@ -61,9 +61,17 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { Triple TheTriple(TT); - MCAsmInfo *MAI = new AArch64ELFMCAsmInfo(TT); - unsigned Reg = MRI.getDwarfRegNum(AArch64::XSP, true); - MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, Reg, 0); + MCAsmInfo *MAI; + if (TheTriple.isOSDarwin()) + MAI = new AArch64MCAsmInfoDarwin(); + else { + assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF"); + MAI = new AArch64MCAsmInfoELF(TT); + } + + // Initial state of the frame pointer is SP. + unsigned Reg = MRI.getDwarfRegNum(AArch64::SP, true); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0); MAI->addInitialFrameState(Inst); return MAI; @@ -72,40 +80,35 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { - MCCodeGenInfo *X = new MCCodeGenInfo(); - if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) { - // On ELF platforms the default static relocation model has a smart enough - // linker to cope with referencing external symbols defined in a shared - // library. Hence DynamicNoPIC doesn't need to be promoted to PIC. - RM = Reloc::Static; - } + Triple TheTriple(TT); + assert((TheTriple.isOSBinFormatELF() || TheTriple.isOSBinFormatMachO()) && + "Only expect Darwin and ELF targets"); if (CM == CodeModel::Default) CM = CodeModel::Small; - else if (CM == CodeModel::JITDefault) { - // The default MCJIT memory managers make no guarantees about where they can - // find an executable page; JITed code needs to be able to refer to globals - // no matter how far away they are. + // The default MCJIT memory managers make no guarantees about where they can + // find an executable page; JITed code needs to be able to refer to globals + // no matter how far away they are. + else if (CM == CodeModel::JITDefault) CM = CodeModel::Large; - } + else if (CM != CodeModel::Small && CM != CodeModel::Large) + report_fatal_error( + "Only small and large code models are allowed on AArch64"); + + // AArch64 Darwin is always PIC. + if (TheTriple.isOSDarwin()) + RM = Reloc::PIC_; + // On ELF platforms the default static relocation model has a smart enough + // linker to cope with referencing external symbols defined in a shared + // library. Hence DynamicNoPIC doesn't need to be promoted to PIC. + else if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) + RM = Reloc::Static; + MCCodeGenInfo *X = new MCCodeGenInfo(); X->InitMCCodeGenInfo(RM, CM, OL); return X; } -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &OS, - MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, - bool RelaxAll, - bool NoExecStack) { - Triple TheTriple(TT); - - return createAArch64ELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack); -} - - static MCInstPrinter *createAArch64MCInstPrinter(const Target &T, unsigned SyntaxVariant, const MCAsmInfo &MAI, @@ -114,108 +117,109 @@ static MCInstPrinter *createAArch64MCInstPrinter(const Target &T, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) return new AArch64InstPrinter(MAI, MII, MRI, STI); - return 0; -} - -namespace { - -class AArch64MCInstrAnalysis : public MCInstrAnalysis { -public: - AArch64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {} - - virtual bool isUnconditionalBranch(const MCInst &Inst) const { - if (Inst.getOpcode() == AArch64::Bcc - && Inst.getOperand(0).getImm() == A64CC::AL) - return true; - return MCInstrAnalysis::isUnconditionalBranch(Inst); - } - - virtual bool isConditionalBranch(const MCInst &Inst) const { - if (Inst.getOpcode() == AArch64::Bcc - && Inst.getOperand(0).getImm() == A64CC::AL) - return false; - return MCInstrAnalysis::isConditionalBranch(Inst); - } - - bool evaluateBranch(const MCInst &Inst, uint64_t Addr, - uint64_t Size, uint64_t &Target) const { - unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0; - // FIXME: We only handle PCRel branches for now. - if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType - != MCOI::OPERAND_PCREL) - return false; - - int64_t Imm = Inst.getOperand(LblOperand).getImm(); - Target = Addr + Imm; - return true; - } -}; + if (SyntaxVariant == 1) + return new AArch64AppleInstPrinter(MAI, MII, MRI, STI); + return nullptr; } -static MCInstrAnalysis *createAArch64MCInstrAnalysis(const MCInstrInfo *Info) { - return new AArch64MCInstrAnalysis(Info); -} +static MCStreamer *createMCStreamer(const Target &T, StringRef TT, + MCContext &Ctx, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + const MCSubtargetInfo &STI, bool RelaxAll, + bool NoExecStack) { + Triple TheTriple(TT); + if (TheTriple.isOSDarwin()) + return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll, + /*LabelSections*/ true); + return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack); +} +// Force static initialization. extern "C" void LLVMInitializeAArch64TargetMC() { // Register the MC asm info. - RegisterMCAsmInfoFn A(TheAArch64leTarget, createAArch64MCAsmInfo); - RegisterMCAsmInfoFn B(TheAArch64beTarget, createAArch64MCAsmInfo); + RegisterMCAsmInfoFn X(TheAArch64leTarget, createAArch64MCAsmInfo); + RegisterMCAsmInfoFn Y(TheAArch64beTarget, createAArch64MCAsmInfo); + RegisterMCAsmInfoFn Z(TheARM64leTarget, createAArch64MCAsmInfo); + RegisterMCAsmInfoFn W(TheARM64beTarget, createAArch64MCAsmInfo); // Register the MC codegen info. TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget, createAArch64MCCodeGenInfo); TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget, createAArch64MCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheARM64leTarget, + createAArch64MCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheARM64beTarget, + createAArch64MCCodeGenInfo); // Register the MC instruction info. TargetRegistry::RegisterMCInstrInfo(TheAArch64leTarget, createAArch64MCInstrInfo); TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget, createAArch64MCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheARM64leTarget, + createAArch64MCInstrInfo); + TargetRegistry::RegisterMCInstrInfo(TheARM64beTarget, + createAArch64MCInstrInfo); // Register the MC register info. TargetRegistry::RegisterMCRegInfo(TheAArch64leTarget, createAArch64MCRegisterInfo); TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget, createAArch64MCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheARM64leTarget, + createAArch64MCRegisterInfo); + TargetRegistry::RegisterMCRegInfo(TheARM64beTarget, + createAArch64MCRegisterInfo); // Register the MC subtarget info. - using AArch64_MC::createAArch64MCSubtargetInfo; TargetRegistry::RegisterMCSubtargetInfo(TheAArch64leTarget, createAArch64MCSubtargetInfo); TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget, createAArch64MCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheARM64leTarget, + createAArch64MCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo(TheARM64beTarget, + createAArch64MCSubtargetInfo); - // Register the MC instruction analyzer. - TargetRegistry::RegisterMCInstrAnalysis(TheAArch64leTarget, - createAArch64MCInstrAnalysis); - TargetRegistry::RegisterMCInstrAnalysis(TheAArch64beTarget, - createAArch64MCInstrAnalysis); + // Register the asm backend. + TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget, + createAArch64leAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget, + createAArch64beAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheARM64leTarget, + createAArch64leAsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheARM64beTarget, + createAArch64beAsmBackend); // Register the MC Code Emitter TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget, createAArch64MCCodeEmitter); TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget, createAArch64MCCodeEmitter); - - // Register the asm backend. - TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget, - createAArch64leAsmBackend); - TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget, - createAArch64beAsmBackend); + TargetRegistry::RegisterMCCodeEmitter(TheARM64leTarget, + createAArch64MCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheARM64beTarget, + createAArch64MCCodeEmitter); // Register the object streamer. TargetRegistry::RegisterMCObjectStreamer(TheAArch64leTarget, createMCStreamer); TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheARM64leTarget, createMCStreamer); + TargetRegistry::RegisterMCObjectStreamer(TheARM64beTarget, createMCStreamer); // Register the MCInstPrinter. TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget, createAArch64MCInstPrinter); TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget, createAArch64MCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheARM64leTarget, + createAArch64MCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheARM64beTarget, + createAArch64MCInstPrinter); } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index bd8beaf..d886ea2 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -11,18 +11,19 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_AARCH64MCTARGETDESC_H -#define LLVM_AARCH64MCTARGETDESC_H +#ifndef AArch64MCTARGETDESC_H +#define AArch64MCTARGETDESC_H #include "llvm/Support/DataTypes.h" +#include namespace llvm { class MCAsmBackend; class MCCodeEmitter; class MCContext; class MCInstrInfo; -class MCObjectWriter; class MCRegisterInfo; +class MCObjectWriter; class MCSubtargetInfo; class StringRef; class Target; @@ -30,28 +31,25 @@ class raw_ostream; extern Target TheAArch64leTarget; extern Target TheAArch64beTarget; - -namespace AArch64_MC { - MCSubtargetInfo *createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU, - StringRef FS); -} +extern Target TheARM64leTarget; +extern Target TheARM64beTarget; MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx); + const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, + MCContext &Ctx); +MCAsmBackend *createAArch64leAsmBackend(const Target &T, + const MCRegisterInfo &MRI, StringRef TT, + StringRef CPU); +MCAsmBackend *createAArch64beAsmBackend(const Target &T, + const MCRegisterInfo &MRI, StringRef TT, + StringRef CPU); -MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, - uint8_t OSABI, +MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI, bool IsLittleEndian); -MCAsmBackend *createAArch64leAsmBackend(const Target &T, - const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU); - -MCAsmBackend *createAArch64beAsmBackend(const Target &T, - const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU); +MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType, + uint32_t CPUSubtype); } // End llvm namespace diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp new file mode 100644 index 0000000..5c86189 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -0,0 +1,396 @@ +//===-- AArch64MachObjectWriter.cpp - ARM Mach Object Writer --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/AArch64FixupKinds.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCMachObjectWriter.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCValue.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" +using namespace llvm; + +namespace { +class AArch64MachObjectWriter : public MCMachObjectTargetWriter { + bool getAArch64FixupKindMachOInfo(const MCFixup &Fixup, unsigned &RelocType, + const MCSymbolRefExpr *Sym, + unsigned &Log2Size, const MCAssembler &Asm); + +public: + AArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype) + : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype, + /*UseAggressiveSymbolFolding=*/true) {} + + void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, + const MCAsmLayout &Layout, const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) override; +}; +} + +bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo( + const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr *Sym, + unsigned &Log2Size, const MCAssembler &Asm) { + RelocType = unsigned(MachO::ARM64_RELOC_UNSIGNED); + Log2Size = ~0U; + + switch ((unsigned)Fixup.getKind()) { + default: + return false; + + case FK_Data_1: + Log2Size = llvm::Log2_32(1); + return true; + case FK_Data_2: + Log2Size = llvm::Log2_32(2); + return true; + case FK_Data_4: + Log2Size = llvm::Log2_32(4); + if (Sym->getKind() == MCSymbolRefExpr::VK_GOT) + RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT); + return true; + case FK_Data_8: + Log2Size = llvm::Log2_32(8); + if (Sym->getKind() == MCSymbolRefExpr::VK_GOT) + RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT); + return true; + case AArch64::fixup_aarch64_add_imm12: + case AArch64::fixup_aarch64_ldst_imm12_scale1: + case AArch64::fixup_aarch64_ldst_imm12_scale2: + case AArch64::fixup_aarch64_ldst_imm12_scale4: + case AArch64::fixup_aarch64_ldst_imm12_scale8: + case AArch64::fixup_aarch64_ldst_imm12_scale16: + Log2Size = llvm::Log2_32(4); + switch (Sym->getKind()) { + default: + assert(0 && "Unexpected symbol reference variant kind!"); + case MCSymbolRefExpr::VK_PAGEOFF: + RelocType = unsigned(MachO::ARM64_RELOC_PAGEOFF12); + return true; + case MCSymbolRefExpr::VK_GOTPAGEOFF: + RelocType = unsigned(MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12); + return true; + case MCSymbolRefExpr::VK_TLVPPAGEOFF: + RelocType = unsigned(MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12); + return true; + } + case AArch64::fixup_aarch64_pcrel_adrp_imm21: + Log2Size = llvm::Log2_32(4); + // This encompasses the relocation for the whole 21-bit value. + switch (Sym->getKind()) { + default: + Asm.getContext().FatalError(Fixup.getLoc(), + "ADR/ADRP relocations must be GOT relative"); + case MCSymbolRefExpr::VK_PAGE: + RelocType = unsigned(MachO::ARM64_RELOC_PAGE21); + return true; + case MCSymbolRefExpr::VK_GOTPAGE: + RelocType = unsigned(MachO::ARM64_RELOC_GOT_LOAD_PAGE21); + return true; + case MCSymbolRefExpr::VK_TLVPPAGE: + RelocType = unsigned(MachO::ARM64_RELOC_TLVP_LOAD_PAGE21); + return true; + } + return true; + case AArch64::fixup_aarch64_pcrel_branch26: + case AArch64::fixup_aarch64_pcrel_call26: + Log2Size = llvm::Log2_32(4); + RelocType = unsigned(MachO::ARM64_RELOC_BRANCH26); + return true; + } +} + +void AArch64MachObjectWriter::RecordRelocation( + MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); + + // See . + uint32_t FixupOffset = Layout.getFragmentOffset(Fragment); + unsigned Log2Size = 0; + int64_t Value = 0; + unsigned Index = 0; + unsigned IsExtern = 0; + unsigned Type = 0; + unsigned Kind = Fixup.getKind(); + + FixupOffset += Fixup.getOffset(); + + // AArch64 pcrel relocation addends do not include the section offset. + if (IsPCRel) + FixedValue += FixupOffset; + + // ADRP fixups use relocations for the whole symbol value and only + // put the addend in the instruction itself. Clear out any value the + // generic code figured out from the sybmol definition. + if (Kind == AArch64::fixup_aarch64_pcrel_adrp_imm21) + FixedValue = 0; + + // imm19 relocations are for conditional branches, which require + // assembler local symbols. If we got here, that's not what we have, + // so complain loudly. + if (Kind == AArch64::fixup_aarch64_pcrel_branch19) { + Asm.getContext().FatalError(Fixup.getLoc(), + "conditional branch requires assembler-local" + " label. '" + + Target.getSymA()->getSymbol().getName() + + "' is external."); + return; + } + + // 14-bit branch relocations should only target internal labels, and so + // should never get here. + if (Kind == AArch64::fixup_aarch64_pcrel_branch14) { + Asm.getContext().FatalError(Fixup.getLoc(), + "Invalid relocation on conditional branch!"); + return; + } + + if (!getAArch64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size, + Asm)) { + Asm.getContext().FatalError(Fixup.getLoc(), "unknown AArch64 fixup kind!"); + return; + } + + Value = Target.getConstant(); + + if (Target.isAbsolute()) { // constant + // FIXME: Should this always be extern? + // SymbolNum of 0 indicates the absolute section. + Type = MachO::ARM64_RELOC_UNSIGNED; + Index = 0; + + if (IsPCRel) { + IsExtern = 1; + Asm.getContext().FatalError(Fixup.getLoc(), + "PC relative absolute relocation!"); + + // FIXME: x86_64 sets the type to a branch reloc here. Should we do + // something similar? + } + } else if (Target.getSymB()) { // A - B + constant + const MCSymbol *A = &Target.getSymA()->getSymbol(); + const MCSymbolData &A_SD = Asm.getSymbolData(*A); + const MCSymbolData *A_Base = Asm.getAtom(&A_SD); + + const MCSymbol *B = &Target.getSymB()->getSymbol(); + const MCSymbolData &B_SD = Asm.getSymbolData(*B); + const MCSymbolData *B_Base = Asm.getAtom(&B_SD); + + // Check for "_foo@got - .", which comes through here as: + // Ltmp0: + // ... _foo@got - Ltmp0 + if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOT && + Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None && + Layout.getSymbolOffset(&B_SD) == + Layout.getFragmentOffset(Fragment) + Fixup.getOffset()) { + // SymB is the PC, so use a PC-rel pointer-to-GOT relocation. + Index = A_Base->getIndex(); + IsExtern = 1; + Type = MachO::ARM64_RELOC_POINTER_TO_GOT; + IsPCRel = 1; + MachO::any_relocation_info MRE; + MRE.r_word0 = FixupOffset; + MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | + (IsExtern << 27) | (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); + return; + } else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || + Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) + // Otherwise, neither symbol can be modified. + Asm.getContext().FatalError(Fixup.getLoc(), + "unsupported relocation of modified symbol"); + + // We don't support PCrel relocations of differences. + if (IsPCRel) + Asm.getContext().FatalError(Fixup.getLoc(), + "unsupported pc-relative relocation of " + "difference"); + + // AArch64 always uses external relocations. If there is no symbol to use as + // a base address (a local symbol with no preceding non-local symbol), + // error out. + // + // FIXME: We should probably just synthesize an external symbol and use + // that. + if (!A_Base) + Asm.getContext().FatalError( + Fixup.getLoc(), + "unsupported relocation of local symbol '" + A->getName() + + "'. Must have non-local symbol earlier in section."); + if (!B_Base) + Asm.getContext().FatalError( + Fixup.getLoc(), + "unsupported relocation of local symbol '" + B->getName() + + "'. Must have non-local symbol earlier in section."); + + if (A_Base == B_Base && A_Base) + Asm.getContext().FatalError(Fixup.getLoc(), + "unsupported relocation with identical base"); + + Value += (!A_SD.getFragment() ? 0 + : Writer->getSymbolAddress(&A_SD, Layout)) - + (!A_Base || !A_Base->getFragment() + ? 0 + : Writer->getSymbolAddress(A_Base, Layout)); + Value -= (!B_SD.getFragment() ? 0 + : Writer->getSymbolAddress(&B_SD, Layout)) - + (!B_Base || !B_Base->getFragment() + ? 0 + : Writer->getSymbolAddress(B_Base, Layout)); + + Index = A_Base->getIndex(); + IsExtern = 1; + Type = MachO::ARM64_RELOC_UNSIGNED; + + MachO::any_relocation_info MRE; + MRE.r_word0 = FixupOffset; + MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | + (IsExtern << 27) | (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); + + Index = B_Base->getIndex(); + IsExtern = 1; + Type = MachO::ARM64_RELOC_SUBTRACTOR; + } else { // A + constant + const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); + const MCSymbolData &SD = Asm.getSymbolData(*Symbol); + const MCSymbolData *Base = Asm.getAtom(&SD); + const MCSectionMachO &Section = static_cast( + Fragment->getParent()->getSection()); + + // If the symbol is a variable and we weren't able to get a Base for it + // (i.e., it's not in the symbol table associated with a section) resolve + // the relocation based its expansion instead. + if (Symbol->isVariable() && !Base) { + // If the evaluation is an absolute value, just use that directly + // to keep things easy. + int64_t Res; + if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute( + Res, Layout, Writer->getSectionAddressMap())) { + FixedValue = Res; + return; + } + + // FIXME: Will the Target we already have ever have any data in it + // we need to preserve and merge with the new Target? How about + // the FixedValue? + if (!Symbol->getVariableValue()->EvaluateAsRelocatable(Target, &Layout)) + Asm.getContext().FatalError(Fixup.getLoc(), + "unable to resolve variable '" + + Symbol->getName() + "'"); + return RecordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, + FixedValue); + } + + // Relocations inside debug sections always use local relocations when + // possible. This seems to be done because the debugger doesn't fully + // understand relocation entries and expects to find values that + // have already been fixed up. + if (Symbol->isInSection()) { + if (Section.hasAttribute(MachO::S_ATTR_DEBUG)) + Base = nullptr; + } + + // AArch64 uses external relocations as much as possible. For debug + // sections, and for pointer-sized relocations (.quad), we allow section + // relocations. It's code sections that run into trouble. + if (Base) { + Index = Base->getIndex(); + IsExtern = 1; + + // Add the local offset, if needed. + if (Base != &SD) + Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base); + } else if (Symbol->isInSection()) { + // Pointer-sized relocations can use a local relocation. Otherwise, + // we have to be in a debug info section. + if (!Section.hasAttribute(MachO::S_ATTR_DEBUG) && Log2Size != 3) + Asm.getContext().FatalError( + Fixup.getLoc(), + "unsupported relocation of local symbol '" + Symbol->getName() + + "'. Must have non-local symbol earlier in section."); + // Adjust the relocation to be section-relative. + // The index is the section ordinal (1-based). + const MCSectionData &SymSD = + Asm.getSectionData(SD.getSymbol().getSection()); + Index = SymSD.getOrdinal() + 1; + IsExtern = 0; + Value += Writer->getSymbolAddress(&SD, Layout); + + if (IsPCRel) + Value -= Writer->getFragmentAddress(Fragment, Layout) + + Fixup.getOffset() + (1ULL << Log2Size); + } else { + // Resolve constant variables. + if (SD.getSymbol().isVariable()) { + int64_t Res; + if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute( + Res, Layout, Writer->getSectionAddressMap())) { + FixedValue = Res; + return; + } + } + Asm.getContext().FatalError(Fixup.getLoc(), + "unsupported relocation of variable '" + + Symbol->getName() + "'"); + } + } + + // If the relocation kind is Branch26, Page21, or Pageoff12, any addend + // is represented via an Addend relocation, not encoded directly into + // the instruction. + if ((Type == MachO::ARM64_RELOC_BRANCH26 || + Type == MachO::ARM64_RELOC_PAGE21 || + Type == MachO::ARM64_RELOC_PAGEOFF12) && + Value) { + assert((Value & 0xff000000) == 0 && "Added relocation out of range!"); + + MachO::any_relocation_info MRE; + MRE.r_word0 = FixupOffset; + MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | + (IsExtern << 27) | (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); + + // Now set up the Addend relocation. + Type = MachO::ARM64_RELOC_ADDEND; + Index = Value; + IsPCRel = 0; + Log2Size = 2; + IsExtern = 0; + + // Put zero into the instruction itself. The addend is in the relocation. + Value = 0; + } + + // If there's any addend left to handle, encode it in the instruction. + FixedValue = Value; + + // struct relocation_info (8 bytes) + MachO::any_relocation_info MRE; + MRE.r_word0 = FixupOffset; + MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | + (IsExtern << 27) | (Type << 28)); + Writer->addRelocation(Fragment->getParent(), MRE); +} + +MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS, + uint32_t CPUType, + uint32_t CPUSubtype) { + return createMachObjectWriter( + new AArch64MachObjectWriter(CPUType, CPUSubtype), OS, + /*IsLittleEndian=*/true); +} diff --git a/lib/Target/AArch64/MCTargetDesc/Android.mk b/lib/Target/AArch64/MCTargetDesc/Android.mk index edcf1f2..c0cdb2b 100644 --- a/lib/Target/AArch64/MCTargetDesc/Android.mk +++ b/lib/Target/AArch64/MCTargetDesc/Android.mk @@ -10,6 +10,7 @@ arm64_mc_desc_SRC_FILES := \ AArch64AsmBackend.cpp \ AArch64ELFObjectWriter.cpp \ AArch64ELFStreamer.cpp \ + AArch64MachObjectWriter.cpp \ AArch64MCAsmInfo.cpp \ AArch64MCCodeEmitter.cpp \ AArch64MCExpr.cpp \ diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt index 54c4465..7d5bced 100644 --- a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt @@ -6,4 +6,9 @@ add_llvm_library(LLVMAArch64Desc AArch64MCCodeEmitter.cpp AArch64MCExpr.cpp AArch64MCTargetDesc.cpp - ) + AArch64MachObjectWriter.cpp +) +add_dependencies(LLVMAArch64Desc AArch64CommonTableGen) + +# Hack: we need to include 'main' target directory to grab private headers +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt index 37c8035..70cff0b 100644 --- a/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===; +;===- ./lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; diff --git a/lib/Target/AArch64/Makefile b/lib/Target/AArch64/Makefile index 641bb83..f356c58 100644 --- a/lib/Target/AArch64/Makefile +++ b/lib/Target/AArch64/Makefile @@ -12,19 +12,14 @@ LIBRARYNAME = LLVMAArch64CodeGen TARGET = AArch64 # Make sure that tblgen is run, first thing. -BUILT_SOURCES = AArch64GenAsmMatcher.inc \ - AArch64GenAsmWriter.inc \ - AArch64GenCallingConv.inc \ - AArch64GenDAGISel.inc \ - AArch64GenDisassemblerTables.inc \ - AArch64GenInstrInfo.inc \ - AArch64GenMCCodeEmitter.inc \ - AArch64GenMCPseudoLowering.inc \ - AArch64GenRegisterInfo.inc \ - AArch64GenSubtargetInfo.inc +BUILT_SOURCES = AArch64GenRegisterInfo.inc AArch64GenInstrInfo.inc \ + AArch64GenAsmWriter.inc AArch64GenAsmWriter1.inc \ + AArch64GenDAGISel.inc \ + AArch64GenCallingConv.inc AArch64GenAsmMatcher.inc \ + AArch64GenSubtargetInfo.inc AArch64GenMCCodeEmitter.inc \ + AArch64GenFastISel.inc AArch64GenDisassemblerTables.inc \ + AArch64GenMCPseudoLowering.inc -DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils +DIRS = TargetInfo InstPrinter AsmParser Disassembler MCTargetDesc Utils include $(LEVEL)/Makefile.common - - diff --git a/lib/Target/AArch64/README.txt b/lib/Target/AArch64/README.txt deleted file mode 100644 index 601990f..0000000 --- a/lib/Target/AArch64/README.txt +++ /dev/null @@ -1,2 +0,0 @@ -This file will contain changes that need to be made before AArch64 can become an -officially supported target. Currently a placeholder. diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp index 9281e4e..3a382c1 100644 --- a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp +++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp @@ -1,4 +1,4 @@ -//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation -------------===// +//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation -----------------===// // // The LLVM Compiler Infrastructure // @@ -6,22 +6,26 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file contains the key registration step for the architecture. -// -//===----------------------------------------------------------------------===// -#include "AArch64.h" -#include "llvm/IR/Module.h" +#include "llvm/ADT/Triple.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; -Target llvm::TheAArch64leTarget; -Target llvm::TheAArch64beTarget; +namespace llvm { +Target TheAArch64leTarget; +Target TheAArch64beTarget; +Target TheARM64leTarget; +Target TheARM64beTarget; +} // end namespace llvm extern "C" void LLVMInitializeAArch64TargetInfo() { - RegisterTarget - X(TheAArch64leTarget, "aarch64", "AArch64 (ARM 64-bit little endian target)"); - RegisterTarget - Y(TheAArch64beTarget, "aarch64_be", "AArch64 (ARM 64-bit big endian target)"); + RegisterTarget X(TheARM64leTarget, "arm64", + "AArch64 (little endian)"); + RegisterTarget Y(TheARM64beTarget, "arm64_be", + "AArch64 (big endian)"); + + RegisterTarget Z( + TheAArch64leTarget, "aarch64", "AArch64 (little endian)"); + RegisterTarget W( + TheAArch64beTarget, "aarch64_be", "AArch64 (big endian)"); } diff --git a/lib/Target/AArch64/TargetInfo/CMakeLists.txt b/lib/Target/AArch64/TargetInfo/CMakeLists.txt index ee734c6..e236eed 100644 --- a/lib/Target/AArch64/TargetInfo/CMakeLists.txt +++ b/lib/Target/AArch64/TargetInfo/CMakeLists.txt @@ -1,3 +1,7 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + add_llvm_library(LLVMAArch64Info AArch64TargetInfo.cpp ) + +add_dependencies(LLVMAArch64Info AArch64CommonTableGen) diff --git a/lib/Target/AArch64/TargetInfo/LLVMBuild.txt b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt index 6429172..93c5407 100644 --- a/lib/Target/AArch64/TargetInfo/LLVMBuild.txt +++ b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/AArch64/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===; +;===- ./lib/Target/AArch64/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index 2a97cd6..3c24bb3 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -18,7 +18,7 @@ using namespace llvm; -StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const { +StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const { for (unsigned i = 0; i < NumPairs; ++i) { if (Pairs[i].Value == Value) { Valid = true; @@ -30,7 +30,7 @@ StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const { return StringRef(); } -uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const { +uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const { std::string LowerCaseName = Name.lower(); for (unsigned i = 0; i < NumPairs; ++i) { if (Pairs[i].Name == LowerCaseName) { @@ -43,11 +43,11 @@ uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const { return -1; } -bool NamedImmMapper::validImm(uint32_t Value) const { +bool AArch64NamedImmMapper::validImm(uint32_t Value) const { return Value < TooBigImm; } -const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = { {"s1e1r", S1E1R}, {"s1e2r", S1E2R}, {"s1e3r", S1E3R}, @@ -62,10 +62,10 @@ const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = { {"s12e0w", S12E0W}, }; -A64AT::ATMapper::ATMapper() - : NamedImmMapper(ATPairs, 0) {} +AArch64AT::ATMapper::ATMapper() + : AArch64NamedImmMapper(ATPairs, 0) {} -const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[] = { {"oshld", OSHLD}, {"oshst", OSHST}, {"osh", OSH}, @@ -80,10 +80,10 @@ const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = { {"sy", SY} }; -A64DB::DBarrierMapper::DBarrierMapper() - : NamedImmMapper(DBarrierPairs, 16u) {} +AArch64DB::DBarrierMapper::DBarrierMapper() + : AArch64NamedImmMapper(DBarrierPairs, 16u) {} -const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = { {"zva", ZVA}, {"ivac", IVAC}, {"isw", ISW}, @@ -94,26 +94,26 @@ const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = { {"cisw", CISW} }; -A64DC::DCMapper::DCMapper() - : NamedImmMapper(DCPairs, 0) {} +AArch64DC::DCMapper::DCMapper() + : AArch64NamedImmMapper(DCPairs, 0) {} -const NamedImmMapper::Mapping A64IC::ICMapper::ICPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICPairs[] = { {"ialluis", IALLUIS}, {"iallu", IALLU}, {"ivau", IVAU} }; -A64IC::ICMapper::ICMapper() - : NamedImmMapper(ICPairs, 0) {} +AArch64IC::ICMapper::ICMapper() + : AArch64NamedImmMapper(ICPairs, 0) {} -const NamedImmMapper::Mapping A64ISB::ISBMapper::ISBPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBPairs[] = { {"sy", SY}, }; -A64ISB::ISBMapper::ISBMapper() - : NamedImmMapper(ISBPairs, 16) {} +AArch64ISB::ISBMapper::ISBMapper() + : AArch64NamedImmMapper(ISBPairs, 16) {} -const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = { {"pldl1keep", PLDL1KEEP}, {"pldl1strm", PLDL1STRM}, {"pldl2keep", PLDL2KEEP}, @@ -134,19 +134,19 @@ const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = { {"pstl3strm", PSTL3STRM} }; -A64PRFM::PRFMMapper::PRFMMapper() - : NamedImmMapper(PRFMPairs, 32) {} +AArch64PRFM::PRFMMapper::PRFMMapper() + : AArch64NamedImmMapper(PRFMPairs, 32) {} -const NamedImmMapper::Mapping A64PState::PStateMapper::PStatePairs[] = { +const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStatePairs[] = { {"spsel", SPSel}, {"daifset", DAIFSet}, {"daifclr", DAIFClr} }; -A64PState::PStateMapper::PStateMapper() - : NamedImmMapper(PStatePairs, 0) {} +AArch64PState::PStateMapper::PStateMapper() + : AArch64NamedImmMapper(PStatePairs, 0) {} -const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = { {"mdccsr_el0", MDCCSR_EL0}, {"dbgdtrrx_el0", DBGDTRRX_EL0}, {"mdrar_el1", MDRAR_EL1}, @@ -176,16 +176,16 @@ const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = { {"id_isar3_el1", ID_ISAR3_EL1}, {"id_isar4_el1", ID_ISAR4_EL1}, {"id_isar5_el1", ID_ISAR5_EL1}, - {"id_aa64pfr0_el1", ID_AA64PFR0_EL1}, - {"id_aa64pfr1_el1", ID_AA64PFR1_EL1}, - {"id_aa64dfr0_el1", ID_AA64DFR0_EL1}, - {"id_aa64dfr1_el1", ID_AA64DFR1_EL1}, - {"id_aa64afr0_el1", ID_AA64AFR0_EL1}, - {"id_aa64afr1_el1", ID_AA64AFR1_EL1}, - {"id_aa64isar0_el1", ID_AA64ISAR0_EL1}, - {"id_aa64isar1_el1", ID_AA64ISAR1_EL1}, - {"id_aa64mmfr0_el1", ID_AA64MMFR0_EL1}, - {"id_aa64mmfr1_el1", ID_AA64MMFR1_EL1}, + {"id_aa64pfr0_el1", ID_A64PFR0_EL1}, + {"id_aa64pfr1_el1", ID_A64PFR1_EL1}, + {"id_aa64dfr0_el1", ID_A64DFR0_EL1}, + {"id_aa64dfr1_el1", ID_A64DFR1_EL1}, + {"id_aa64afr0_el1", ID_A64AFR0_EL1}, + {"id_aa64afr1_el1", ID_A64AFR1_EL1}, + {"id_aa64isar0_el1", ID_A64ISAR0_EL1}, + {"id_aa64isar1_el1", ID_A64ISAR1_EL1}, + {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1}, + {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1}, {"mvfr0_el1", MVFR0_EL1}, {"mvfr1_el1", MVFR1_EL1}, {"mvfr2_el1", MVFR2_EL1}, @@ -245,12 +245,13 @@ const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = { {"ich_elsr_el2", ICH_ELSR_EL2} }; -A64SysReg::MRSMapper::MRSMapper() { +AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits) + : SysRegMapper(FeatureBits) { InstPairs = &MRSPairs[0]; NumInstPairs = llvm::array_lengthof(MRSPairs); } -const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = { {"dbgdtrtx_el0", DBGDTRTX_EL0}, {"oslar_el1", OSLAR_EL1}, {"pmswinc_el0", PMSWINC_EL0}, @@ -268,13 +269,14 @@ const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = { {"icc_sgi0r_el1", ICC_SGI0R_EL1} }; -A64SysReg::MSRMapper::MSRMapper() { +AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits) + : SysRegMapper(FeatureBits) { InstPairs = &MSRPairs[0]; NumInstPairs = llvm::array_lengthof(MSRPairs); } -const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[] = { {"osdtrrx_el1", OSDTRRX_EL1}, {"osdtrtx_el1", OSDTRTX_EL1}, {"teecr32_el1", TEECR32_EL1}, @@ -753,10 +755,16 @@ const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = { {"ich_lr15_el2", ICH_LR15_EL2} }; +const AArch64NamedImmMapper::Mapping +AArch64SysReg::SysRegMapper::CycloneSysRegPairs[] = { + {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3} +}; + uint32_t -A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { - // First search the registers shared by all +AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { std::string NameLower = Name.lower(); + + // First search the registers shared by all for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { if (SysRegPairs[i].Name == NameLower) { Valid = true; @@ -764,6 +772,16 @@ A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { } } + // Next search for target specific registers + if (FeatureBits & AArch64::ProcCyclone) { + for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) { + if (CycloneSysRegPairs[i].Name == NameLower) { + Valid = true; + return CycloneSysRegPairs[i].Value; + } + } + } + // Now try the instruction-specific registers (either read-only or // write-only). for (unsigned i = 0; i < NumInstPairs; ++i) { @@ -796,7 +814,8 @@ A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const { } std::string -A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const { +AArch64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const { + // First search the registers shared by all for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) { if (SysRegPairs[i].Value == Bits) { Valid = true; @@ -804,6 +823,18 @@ A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const { } } + // Next search for target specific registers + if (FeatureBits & AArch64::ProcCyclone) { + for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) { + if (CycloneSysRegPairs[i].Value == Bits) { + Valid = true; + return CycloneSysRegPairs[i].Name; + } + } + } + + // Now try the instruction-specific registers (either read-only or + // write-only). for (unsigned i = 0; i < NumInstPairs; ++i) { if (InstPairs[i].Value == Bits) { Valid = true; @@ -831,7 +862,7 @@ A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const { + "_c" + utostr(CRm) + "_" + utostr(Op2); } -const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = { +const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = { {"ipas2e1is", IPAS2E1IS}, {"ipas2le1is", IPAS2LE1IS}, {"vmalle1is", VMALLE1IS}, @@ -866,308 +897,5 @@ const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = { {"vaale1", VAALE1} }; -A64TLBI::TLBIMapper::TLBIMapper() - : NamedImmMapper(TLBIPairs, 0) {} - -bool A64Imms::isFPImm(const APFloat &Val, uint32_t &Imm8Bits) { - const fltSemantics &Sem = Val.getSemantics(); - unsigned FracBits = APFloat::semanticsPrecision(Sem) - 1; - - uint32_t ExpMask; - switch (FracBits) { - case 10: // IEEE half-precision - ExpMask = 0x1f; - break; - case 23: // IEEE single-precision - ExpMask = 0xff; - break; - case 52: // IEEE double-precision - ExpMask = 0x7ff; - break; - case 112: // IEEE quad-precision - // No immediates are valid for double precision. - return false; - default: - llvm_unreachable("Only half, single and double precision supported"); - } - - uint32_t ExpStart = FracBits; - uint64_t FracMask = (1ULL << FracBits) - 1; - - uint32_t Sign = Val.isNegative(); - - uint64_t Bits= Val.bitcastToAPInt().getLimitedValue(); - uint64_t Fraction = Bits & FracMask; - int32_t Exponent = ((Bits >> ExpStart) & ExpMask); - Exponent -= ExpMask >> 1; - - // S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 5):imm8<5:0>:Zeros(19) - // D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 8):imm8<5:0>:Zeros(48) - // This translates to: only 4 bits of fraction; -3 <= exp <= 4. - uint64_t A64FracStart = FracBits - 4; - uint64_t A64FracMask = 0xf; - - // Are there too many fraction bits? - if (Fraction & ~(A64FracMask << A64FracStart)) - return false; - - if (Exponent < -3 || Exponent > 4) - return false; - - uint32_t PackedFraction = (Fraction >> A64FracStart) & A64FracMask; - uint32_t PackedExp = (Exponent + 7) & 0x7; - - Imm8Bits = (Sign << 7) | (PackedExp << 4) | PackedFraction; - return true; -} - -// Encoding of the immediate for logical (immediate) instructions: -// -// | N | imms | immr | size | R | S | -// |---+--------+--------+------+--------------+--------------| -// | 1 | ssssss | rrrrrr | 64 | UInt(rrrrrr) | UInt(ssssss) | -// | 0 | 0sssss | xrrrrr | 32 | UInt(rrrrr) | UInt(sssss) | -// | 0 | 10ssss | xxrrrr | 16 | UInt(rrrr) | UInt(ssss) | -// | 0 | 110sss | xxxrrr | 8 | UInt(rrr) | UInt(sss) | -// | 0 | 1110ss | xxxxrr | 4 | UInt(rr) | UInt(ss) | -// | 0 | 11110s | xxxxxr | 2 | UInt(r) | UInt(s) | -// | 0 | 11111x | - | | UNALLOCATED | | -// -// Columns 'R', 'S' and 'size' specify a "bitmask immediate" of size bits in -// which the lower S+1 bits are ones and the remaining bits are zero, then -// rotated right by R bits, which is then replicated across the datapath. -// -// + Values of 'N', 'imms' and 'immr' which do not match the above table are -// RESERVED. -// + If all 's' bits in the imms field are set then the instruction is -// RESERVED. -// + The 'x' bits in the 'immr' field are IGNORED. - -bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) { - int RepeatWidth; - int Rotation = 0; - int Num1s = 0; - - // Because there are S+1 ones in the replicated mask, an immediate of all - // zeros is not allowed. Filtering it here is probably more efficient. - if (Imm == 0) return false; - - for (RepeatWidth = RegWidth; RepeatWidth > 1; RepeatWidth /= 2) { - uint64_t RepeatMask = RepeatWidth == 64 ? -1 : (1ULL << RepeatWidth) - 1; - uint64_t ReplicatedMask = Imm & RepeatMask; - - if (ReplicatedMask == 0) continue; - - // First we have to make sure the mask is actually repeated in each slot for - // this width-specifier. - bool IsReplicatedMask = true; - for (unsigned i = RepeatWidth; i < RegWidth; i += RepeatWidth) { - if (((Imm >> i) & RepeatMask) != ReplicatedMask) { - IsReplicatedMask = false; - break; - } - } - if (!IsReplicatedMask) continue; - - // Now we have to work out the amount of rotation needed. The first part of - // this calculation is actually independent of RepeatWidth, but the complex - // case will depend on it. - Rotation = countTrailingZeros(Imm); - if (Rotation == 0) { - // There were no leading zeros, which means it's either in place or there - // are 1s at each end (e.g. 0x8003 needs rotating). - Rotation = RegWidth == 64 ? CountLeadingOnes_64(Imm) - : CountLeadingOnes_32(Imm); - Rotation = RepeatWidth - Rotation; - } - - uint64_t ReplicatedOnes = ReplicatedMask; - if (Rotation != 0 && Rotation != 64) - ReplicatedOnes = (ReplicatedMask >> Rotation) - | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask); - - // Of course, they may not actually be ones, so we have to check that: - if (!isMask_64(ReplicatedOnes)) - continue; - - Num1s = CountTrailingOnes_64(ReplicatedOnes); - - // We know we've got an almost valid encoding (certainly, if this is invalid - // no other parameters would work). - break; - } - - // The encodings which would produce all 1s are RESERVED. - if (RepeatWidth == 1 || Num1s == RepeatWidth) return false; - - uint32_t N = RepeatWidth == 64; - uint32_t ImmR = RepeatWidth - Rotation; - uint32_t ImmS = Num1s - 1; - - switch (RepeatWidth) { - default: break; // No action required for other valid rotations. - case 16: ImmS |= 0x20; break; // 10ssss - case 8: ImmS |= 0x30; break; // 110sss - case 4: ImmS |= 0x38; break; // 1110ss - case 2: ImmS |= 0x3c; break; // 11110s - } - - Bits = ImmS | (ImmR << 6) | (N << 12); - - return true; -} - - -bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits, - uint64_t &Imm) { - uint32_t N = Bits >> 12; - uint32_t ImmR = (Bits >> 6) & 0x3f; - uint32_t ImmS = Bits & 0x3f; - - // N=1 encodes a 64-bit replication and is invalid for the 32-bit - // instructions. - if (RegWidth == 32 && N != 0) return false; - - int Width = 0; - if (N == 1) - Width = 64; - else if ((ImmS & 0x20) == 0) - Width = 32; - else if ((ImmS & 0x10) == 0) - Width = 16; - else if ((ImmS & 0x08) == 0) - Width = 8; - else if ((ImmS & 0x04) == 0) - Width = 4; - else if ((ImmS & 0x02) == 0) - Width = 2; - else { - // ImmS is 0b11111x: UNALLOCATED - return false; - } - - int Num1s = (ImmS & (Width - 1)) + 1; - - // All encodings which would map to -1 (signed) are RESERVED. - if (Num1s == Width) return false; - - int Rotation = (ImmR & (Width - 1)); - uint64_t Mask = (1ULL << Num1s) - 1; - uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1; - if (Rotation != 0 && Rotation != 64) - Mask = (Mask >> Rotation) - | ((Mask << (Width - Rotation)) & WidthMask); - - Imm = Mask; - for (unsigned i = 1; i < RegWidth / Width; ++i) { - Mask <<= Width; - Imm |= Mask; - } - - return true; -} - -bool A64Imms::isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { - // If high bits are set then a 32-bit MOVZ can't possibly work. - if (RegWidth == 32 && (Value & ~0xffffffffULL)) - return false; - - for (int i = 0; i < RegWidth; i += 16) { - // If the value is 0 when we mask out all the bits that could be set with - // the current LSL value then it's representable. - if ((Value & ~(0xffffULL << i)) == 0) { - Shift = i / 16; - UImm16 = (Value >> i) & 0xffff; - return true; - } - } - return false; -} - -bool A64Imms::isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) { - // MOVN is defined to set its register to NOT(LSL(imm16, shift)). - - // We have to be a little careful about a 32-bit register: 0xffff_1234 *is* - // representable, but ~0xffff_1234 == 0xffff_ffff_0000_edcb which is not - // a valid input for isMOVZImm. - if (RegWidth == 32 && (Value & ~0xffffffffULL)) - return false; - - uint64_t MOVZEquivalent = RegWidth == 32 ? ~Value & 0xffffffff : ~Value; - - return isMOVZImm(RegWidth, MOVZEquivalent, UImm16, Shift); -} - -bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value, - int &UImm16, int &Shift) { - if (isMOVZImm(RegWidth, Value, UImm16, Shift)) - return false; - - return isMOVNImm(RegWidth, Value, UImm16, Shift); -} - -// decodeNeonModShiftImm - Decode a Neon OpCmode value into the -// the shift amount and the shift type (shift zeros or ones in) and -// returns whether the OpCmode value implies a shift operation. -bool A64Imms::decodeNeonModShiftImm(unsigned OpCmode, unsigned &ShiftImm, - unsigned &ShiftOnesIn) { - ShiftImm = 0; - ShiftOnesIn = false; - bool HasShift = true; - - if (OpCmode == 0xe) { - // movi byte - HasShift = false; - } else if (OpCmode == 0x1e) { - // movi 64-bit bytemask - HasShift = false; - } else if ((OpCmode & 0xc) == 0x8) { - // shift zeros, per halfword - ShiftImm = ((OpCmode & 0x2) >> 1); - } else if ((OpCmode & 0x8) == 0) { - // shift zeros, per word - ShiftImm = ((OpCmode & 0x6) >> 1); - } else if ((OpCmode & 0xe) == 0xc) { - // shift ones, per word - ShiftOnesIn = true; - ShiftImm = (OpCmode & 0x1); - } else { - // per byte, per bytemask - llvm_unreachable("Unsupported Neon modified immediate"); - } - - return HasShift; -} - -// decodeNeonModImm - Decode a NEON modified immediate and OpCmode values -// into the element value and the element size in bits. -uint64_t A64Imms::decodeNeonModImm(unsigned Val, unsigned OpCmode, - unsigned &EltBits) { - uint64_t DecodedVal = Val; - EltBits = 0; - - if (OpCmode == 0xe) { - // movi byte - EltBits = 8; - } else if (OpCmode == 0x1e) { - // movi 64-bit bytemask - DecodedVal = 0; - for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) { - if ((Val >> ByteNum) & 1) - DecodedVal |= (uint64_t)0xff << (8 * ByteNum); - } - EltBits = 64; - } else if ((OpCmode & 0xc) == 0x8) { - // shift zeros, per halfword - EltBits = 16; - } else if ((OpCmode & 0x8) == 0) { - // shift zeros, per word - EltBits = 32; - } else if ((OpCmode & 0xe) == 0xc) { - // shift ones, per word - EltBits = 32; - } else { - llvm_unreachable("Unsupported Neon modified immediate"); - } - return DecodedVal; -} +AArch64TLBI::TLBIMapper::TLBIMapper() + : AArch64NamedImmMapper(TLBIPairs, 0) {} diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 39b042b..9e4c389 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -1,4 +1,4 @@ -//===-- AArch64BaseInfo.h - Top level definitions for AArch64- --*- C++ -*-===// +//===-- AArch64BaseInfo.h - Top level definitions for AArch64 ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,96 +14,271 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_AARCH64_BASEINFO_H -#define LLVM_AARCH64_BASEINFO_H +#ifndef AArch64BASEINFO_H +#define AArch64BASEINFO_H +// FIXME: Is it easiest to fix this layering violation by moving the .inc +// #includes from AArch64MCTargetDesc.h to here? +#include "MCTargetDesc/AArch64MCTargetDesc.h" // For AArch64::X0 and friends. #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" namespace llvm { -// // Enums corresponding to AArch64 condition codes -namespace A64CC { - // The CondCodes constants map directly to the 4-bit encoding of the - // condition field for predicated instructions. - enum CondCodes { // Meaning (integer) Meaning (floating-point) - EQ = 0, // Equal Equal - NE, // Not equal Not equal, or unordered - HS, // Unsigned higher or same >, ==, or unordered - LO, // Unsigned lower or same Less than - MI, // Minus, negative Less than - PL, // Plus, positive or zero >, ==, or unordered - VS, // Overflow Unordered - VC, // No overflow Ordered - HI, // Unsigned higher Greater than, or unordered - LS, // Unsigned lower or same Less than or equal - GE, // Greater than or equal Greater than or equal - LT, // Less than Less than, or unordered - GT, // Signed greater than Greater than - LE, // Signed less than or equal <, ==, or unordered - AL, // Always (unconditional) Always (unconditional) - NV, // Always (unconditional) Always (unconditional) - // Note the NV exists purely to disassemble 0b1111. Execution - // is "always". - Invalid - }; +inline static unsigned getWRegFromXReg(unsigned Reg) { + switch (Reg) { + case AArch64::X0: return AArch64::W0; + case AArch64::X1: return AArch64::W1; + case AArch64::X2: return AArch64::W2; + case AArch64::X3: return AArch64::W3; + case AArch64::X4: return AArch64::W4; + case AArch64::X5: return AArch64::W5; + case AArch64::X6: return AArch64::W6; + case AArch64::X7: return AArch64::W7; + case AArch64::X8: return AArch64::W8; + case AArch64::X9: return AArch64::W9; + case AArch64::X10: return AArch64::W10; + case AArch64::X11: return AArch64::W11; + case AArch64::X12: return AArch64::W12; + case AArch64::X13: return AArch64::W13; + case AArch64::X14: return AArch64::W14; + case AArch64::X15: return AArch64::W15; + case AArch64::X16: return AArch64::W16; + case AArch64::X17: return AArch64::W17; + case AArch64::X18: return AArch64::W18; + case AArch64::X19: return AArch64::W19; + case AArch64::X20: return AArch64::W20; + case AArch64::X21: return AArch64::W21; + case AArch64::X22: return AArch64::W22; + case AArch64::X23: return AArch64::W23; + case AArch64::X24: return AArch64::W24; + case AArch64::X25: return AArch64::W25; + case AArch64::X26: return AArch64::W26; + case AArch64::X27: return AArch64::W27; + case AArch64::X28: return AArch64::W28; + case AArch64::FP: return AArch64::W29; + case AArch64::LR: return AArch64::W30; + case AArch64::SP: return AArch64::WSP; + case AArch64::XZR: return AArch64::WZR; + } + // For anything else, return it unchanged. + return Reg; +} -} // namespace A64CC +inline static unsigned getXRegFromWReg(unsigned Reg) { + switch (Reg) { + case AArch64::W0: return AArch64::X0; + case AArch64::W1: return AArch64::X1; + case AArch64::W2: return AArch64::X2; + case AArch64::W3: return AArch64::X3; + case AArch64::W4: return AArch64::X4; + case AArch64::W5: return AArch64::X5; + case AArch64::W6: return AArch64::X6; + case AArch64::W7: return AArch64::X7; + case AArch64::W8: return AArch64::X8; + case AArch64::W9: return AArch64::X9; + case AArch64::W10: return AArch64::X10; + case AArch64::W11: return AArch64::X11; + case AArch64::W12: return AArch64::X12; + case AArch64::W13: return AArch64::X13; + case AArch64::W14: return AArch64::X14; + case AArch64::W15: return AArch64::X15; + case AArch64::W16: return AArch64::X16; + case AArch64::W17: return AArch64::X17; + case AArch64::W18: return AArch64::X18; + case AArch64::W19: return AArch64::X19; + case AArch64::W20: return AArch64::X20; + case AArch64::W21: return AArch64::X21; + case AArch64::W22: return AArch64::X22; + case AArch64::W23: return AArch64::X23; + case AArch64::W24: return AArch64::X24; + case AArch64::W25: return AArch64::X25; + case AArch64::W26: return AArch64::X26; + case AArch64::W27: return AArch64::X27; + case AArch64::W28: return AArch64::X28; + case AArch64::W29: return AArch64::FP; + case AArch64::W30: return AArch64::LR; + case AArch64::WSP: return AArch64::SP; + case AArch64::WZR: return AArch64::XZR; + } + // For anything else, return it unchanged. + return Reg; +} -inline static const char *A64CondCodeToString(A64CC::CondCodes CC) { - switch (CC) { - default: llvm_unreachable("Unknown condition code"); - case A64CC::EQ: return "eq"; - case A64CC::NE: return "ne"; - case A64CC::HS: return "hs"; - case A64CC::LO: return "lo"; - case A64CC::MI: return "mi"; - case A64CC::PL: return "pl"; - case A64CC::VS: return "vs"; - case A64CC::VC: return "vc"; - case A64CC::HI: return "hi"; - case A64CC::LS: return "ls"; - case A64CC::GE: return "ge"; - case A64CC::LT: return "lt"; - case A64CC::GT: return "gt"; - case A64CC::LE: return "le"; - case A64CC::AL: return "al"; - case A64CC::NV: return "nv"; +static inline unsigned getBRegFromDReg(unsigned Reg) { + switch (Reg) { + case AArch64::D0: return AArch64::B0; + case AArch64::D1: return AArch64::B1; + case AArch64::D2: return AArch64::B2; + case AArch64::D3: return AArch64::B3; + case AArch64::D4: return AArch64::B4; + case AArch64::D5: return AArch64::B5; + case AArch64::D6: return AArch64::B6; + case AArch64::D7: return AArch64::B7; + case AArch64::D8: return AArch64::B8; + case AArch64::D9: return AArch64::B9; + case AArch64::D10: return AArch64::B10; + case AArch64::D11: return AArch64::B11; + case AArch64::D12: return AArch64::B12; + case AArch64::D13: return AArch64::B13; + case AArch64::D14: return AArch64::B14; + case AArch64::D15: return AArch64::B15; + case AArch64::D16: return AArch64::B16; + case AArch64::D17: return AArch64::B17; + case AArch64::D18: return AArch64::B18; + case AArch64::D19: return AArch64::B19; + case AArch64::D20: return AArch64::B20; + case AArch64::D21: return AArch64::B21; + case AArch64::D22: return AArch64::B22; + case AArch64::D23: return AArch64::B23; + case AArch64::D24: return AArch64::B24; + case AArch64::D25: return AArch64::B25; + case AArch64::D26: return AArch64::B26; + case AArch64::D27: return AArch64::B27; + case AArch64::D28: return AArch64::B28; + case AArch64::D29: return AArch64::B29; + case AArch64::D30: return AArch64::B30; + case AArch64::D31: return AArch64::B31; } + // For anything else, return it unchanged. + return Reg; } -inline static A64CC::CondCodes A64StringToCondCode(StringRef CondStr) { - return StringSwitch(CondStr.lower()) - .Case("eq", A64CC::EQ) - .Case("ne", A64CC::NE) - .Case("ne", A64CC::NE) - .Case("hs", A64CC::HS) - .Case("cs", A64CC::HS) - .Case("lo", A64CC::LO) - .Case("cc", A64CC::LO) - .Case("mi", A64CC::MI) - .Case("pl", A64CC::PL) - .Case("vs", A64CC::VS) - .Case("vc", A64CC::VC) - .Case("hi", A64CC::HI) - .Case("ls", A64CC::LS) - .Case("ge", A64CC::GE) - .Case("lt", A64CC::LT) - .Case("gt", A64CC::GT) - .Case("le", A64CC::LE) - .Case("al", A64CC::AL) - .Case("nv", A64CC::NV) - .Default(A64CC::Invalid); + +static inline unsigned getDRegFromBReg(unsigned Reg) { + switch (Reg) { + case AArch64::B0: return AArch64::D0; + case AArch64::B1: return AArch64::D1; + case AArch64::B2: return AArch64::D2; + case AArch64::B3: return AArch64::D3; + case AArch64::B4: return AArch64::D4; + case AArch64::B5: return AArch64::D5; + case AArch64::B6: return AArch64::D6; + case AArch64::B7: return AArch64::D7; + case AArch64::B8: return AArch64::D8; + case AArch64::B9: return AArch64::D9; + case AArch64::B10: return AArch64::D10; + case AArch64::B11: return AArch64::D11; + case AArch64::B12: return AArch64::D12; + case AArch64::B13: return AArch64::D13; + case AArch64::B14: return AArch64::D14; + case AArch64::B15: return AArch64::D15; + case AArch64::B16: return AArch64::D16; + case AArch64::B17: return AArch64::D17; + case AArch64::B18: return AArch64::D18; + case AArch64::B19: return AArch64::D19; + case AArch64::B20: return AArch64::D20; + case AArch64::B21: return AArch64::D21; + case AArch64::B22: return AArch64::D22; + case AArch64::B23: return AArch64::D23; + case AArch64::B24: return AArch64::D24; + case AArch64::B25: return AArch64::D25; + case AArch64::B26: return AArch64::D26; + case AArch64::B27: return AArch64::D27; + case AArch64::B28: return AArch64::D28; + case AArch64::B29: return AArch64::D29; + case AArch64::B30: return AArch64::D30; + case AArch64::B31: return AArch64::D31; + } + // For anything else, return it unchanged. + return Reg; } -inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) { - // It turns out that the condition codes have been designed so that in order - // to reverse the intent of the condition you only have to invert the low bit: +namespace AArch64CC { + +// The CondCodes constants map directly to the 4-bit encoding of the condition +// field for predicated instructions. +enum CondCode { // Meaning (integer) Meaning (floating-point) + EQ = 0x0, // Equal Equal + NE = 0x1, // Not equal Not equal, or unordered + HS = 0x2, // Unsigned higher or same >, ==, or unordered + LO = 0x3, // Unsigned lower Less than + MI = 0x4, // Minus, negative Less than + PL = 0x5, // Plus, positive or zero >, ==, or unordered + VS = 0x6, // Overflow Unordered + VC = 0x7, // No overflow Not unordered + HI = 0x8, // Unsigned higher Greater than, or unordered + LS = 0x9, // Unsigned lower or same Less than or equal + GE = 0xa, // Greater than or equal Greater than or equal + LT = 0xb, // Less than Less than, or unordered + GT = 0xc, // Greater than Greater than + LE = 0xd, // Less than or equal <, ==, or unordered + AL = 0xe, // Always (unconditional) Always (unconditional) + NV = 0xf, // Always (unconditional) Always (unconditional) + // Note the NV exists purely to disassemble 0b1111. Execution is "always". + Invalid +}; - return static_cast(static_cast(CC) ^ 0x1); +inline static const char *getCondCodeName(CondCode Code) { + switch (Code) { + default: llvm_unreachable("Unknown condition code"); + case EQ: return "eq"; + case NE: return "ne"; + case HS: return "hs"; + case LO: return "lo"; + case MI: return "mi"; + case PL: return "pl"; + case VS: return "vs"; + case VC: return "vc"; + case HI: return "hi"; + case LS: return "ls"; + case GE: return "ge"; + case LT: return "lt"; + case GT: return "gt"; + case LE: return "le"; + case AL: return "al"; + case NV: return "nv"; + } +} + +inline static CondCode getInvertedCondCode(CondCode Code) { + switch (Code) { + default: llvm_unreachable("Unknown condition code"); + case EQ: return NE; + case NE: return EQ; + case HS: return LO; + case LO: return HS; + case MI: return PL; + case PL: return MI; + case VS: return VC; + case VC: return VS; + case HI: return LS; + case LS: return HI; + case GE: return LT; + case LT: return GE; + case GT: return LE; + case LE: return GT; + } } +/// Given a condition code, return NZCV flags that would satisfy that condition. +/// The flag bits are in the format expected by the ccmp instructions. +/// Note that many different flag settings can satisfy a given condition code, +/// this function just returns one of them. +inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) { + // NZCV flags encoded as expected by ccmp instructions, ARMv8 ISA 5.5.7. + enum { N = 8, Z = 4, C = 2, V = 1 }; + switch (Code) { + default: llvm_unreachable("Unknown condition code"); + case EQ: return Z; // Z == 1 + case NE: return 0; // Z == 0 + case HS: return C; // C == 1 + case LO: return 0; // C == 0 + case MI: return N; // N == 1 + case PL: return 0; // N == 0 + case VS: return V; // V == 1 + case VC: return 0; // V == 0 + case HI: return C; // C == 1 && Z == 0 + case LS: return 0; // C == 0 || Z == 1 + case GE: return 0; // N == V + case LT: return N; // N != V + case GT: return 0; // Z == 0 && N == V + case LE: return Z; // Z == 1 || N != V + } +} +} // end namespace AArch64CC + /// Instances of this class can perform bidirectional mapping from random /// identifier strings to operand encodings. For example "MSR" takes a named /// system-register which must be encoded somehow and decoded for printing. This @@ -115,14 +290,14 @@ inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) { /// out just how often these instructions are emitted before working on it. It /// might even be optimal to just reorder the tables for the common instructions /// rather than changing the algorithm. -struct NamedImmMapper { +struct AArch64NamedImmMapper { struct Mapping { const char *Name; uint32_t Value; }; template - NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm) + AArch64NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm) : Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {} StringRef toString(uint32_t Value, bool &Valid) const; @@ -138,7 +313,7 @@ protected: uint32_t TooBigImm; }; -namespace A64AT { +namespace AArch64AT { enum ATValues { Invalid = -1, // Op0 Op1 CRn CRm Op2 S1E1R = 0x43c0, // 01 000 0111 1000 000 @@ -155,14 +330,14 @@ namespace A64AT { S12E0W = 0x63c7 // 01 100 0111 1000 111 }; - struct ATMapper : NamedImmMapper { + struct ATMapper : AArch64NamedImmMapper { const static Mapping ATPairs[]; ATMapper(); }; } -namespace A64DB { +namespace AArch64DB { enum DBValues { Invalid = -1, OSHLD = 0x1, @@ -179,14 +354,14 @@ namespace A64DB { SY = 0xf }; - struct DBarrierMapper : NamedImmMapper { + struct DBarrierMapper : AArch64NamedImmMapper { const static Mapping DBarrierPairs[]; DBarrierMapper(); }; } -namespace A64DC { +namespace AArch64DC { enum DCValues { Invalid = -1, // Op1 CRn CRm Op2 ZVA = 0x5ba1, // 01 011 0111 0100 001 @@ -199,7 +374,7 @@ namespace A64DC { CISW = 0x43f2 // 01 000 0111 1110 010 }; - struct DCMapper : NamedImmMapper { + struct DCMapper : AArch64NamedImmMapper { const static Mapping DCPairs[]; DCMapper(); @@ -207,7 +382,7 @@ namespace A64DC { } -namespace A64IC { +namespace AArch64IC { enum ICValues { Invalid = -1, // Op1 CRn CRm Op2 IALLUIS = 0x0388, // 000 0111 0001 000 @@ -216,7 +391,7 @@ namespace A64IC { }; - struct ICMapper : NamedImmMapper { + struct ICMapper : AArch64NamedImmMapper { const static Mapping ICPairs[]; ICMapper(); @@ -227,19 +402,19 @@ namespace A64IC { } } -namespace A64ISB { +namespace AArch64ISB { enum ISBValues { Invalid = -1, SY = 0xf }; - struct ISBMapper : NamedImmMapper { + struct ISBMapper : AArch64NamedImmMapper { const static Mapping ISBPairs[]; ISBMapper(); }; } -namespace A64PRFM { +namespace AArch64PRFM { enum PRFMValues { Invalid = -1, PLDL1KEEP = 0x00, @@ -262,14 +437,14 @@ namespace A64PRFM { PSTL3STRM = 0x15 }; - struct PRFMMapper : NamedImmMapper { + struct PRFMMapper : AArch64NamedImmMapper { const static Mapping PRFMPairs[]; PRFMMapper(); }; } -namespace A64PState { +namespace AArch64PState { enum PStateValues { Invalid = -1, SPSel = 0x05, @@ -277,7 +452,7 @@ namespace A64PState { DAIFClr = 0x1f }; - struct PStateMapper : NamedImmMapper { + struct PStateMapper : AArch64NamedImmMapper { const static Mapping PStatePairs[]; PStateMapper(); @@ -285,7 +460,7 @@ namespace A64PState { } -namespace A64SE { +namespace AArch64SE { enum ShiftExtSpecifiers { Invalid = -1, LSL, @@ -306,7 +481,7 @@ namespace A64SE { }; } -namespace A64Layout { +namespace AArch64Layout { enum VectorLayout { Invalid = -1, VL_8B, @@ -329,43 +504,43 @@ namespace A64Layout { } inline static const char * -A64VectorLayoutToString(A64Layout::VectorLayout Layout) { +AArch64VectorLayoutToString(AArch64Layout::VectorLayout Layout) { switch (Layout) { - case A64Layout::VL_8B: return ".8b"; - case A64Layout::VL_4H: return ".4h"; - case A64Layout::VL_2S: return ".2s"; - case A64Layout::VL_1D: return ".1d"; - case A64Layout::VL_16B: return ".16b"; - case A64Layout::VL_8H: return ".8h"; - case A64Layout::VL_4S: return ".4s"; - case A64Layout::VL_2D: return ".2d"; - case A64Layout::VL_B: return ".b"; - case A64Layout::VL_H: return ".h"; - case A64Layout::VL_S: return ".s"; - case A64Layout::VL_D: return ".d"; + case AArch64Layout::VL_8B: return ".8b"; + case AArch64Layout::VL_4H: return ".4h"; + case AArch64Layout::VL_2S: return ".2s"; + case AArch64Layout::VL_1D: return ".1d"; + case AArch64Layout::VL_16B: return ".16b"; + case AArch64Layout::VL_8H: return ".8h"; + case AArch64Layout::VL_4S: return ".4s"; + case AArch64Layout::VL_2D: return ".2d"; + case AArch64Layout::VL_B: return ".b"; + case AArch64Layout::VL_H: return ".h"; + case AArch64Layout::VL_S: return ".s"; + case AArch64Layout::VL_D: return ".d"; default: llvm_unreachable("Unknown Vector Layout"); } } -inline static A64Layout::VectorLayout -A64StringToVectorLayout(StringRef LayoutStr) { - return StringSwitch(LayoutStr) - .Case(".8b", A64Layout::VL_8B) - .Case(".4h", A64Layout::VL_4H) - .Case(".2s", A64Layout::VL_2S) - .Case(".1d", A64Layout::VL_1D) - .Case(".16b", A64Layout::VL_16B) - .Case(".8h", A64Layout::VL_8H) - .Case(".4s", A64Layout::VL_4S) - .Case(".2d", A64Layout::VL_2D) - .Case(".b", A64Layout::VL_B) - .Case(".h", A64Layout::VL_H) - .Case(".s", A64Layout::VL_S) - .Case(".d", A64Layout::VL_D) - .Default(A64Layout::Invalid); +inline static AArch64Layout::VectorLayout +AArch64StringToVectorLayout(StringRef LayoutStr) { + return StringSwitch(LayoutStr) + .Case(".8b", AArch64Layout::VL_8B) + .Case(".4h", AArch64Layout::VL_4H) + .Case(".2s", AArch64Layout::VL_2S) + .Case(".1d", AArch64Layout::VL_1D) + .Case(".16b", AArch64Layout::VL_16B) + .Case(".8h", AArch64Layout::VL_8H) + .Case(".4s", AArch64Layout::VL_4S) + .Case(".2d", AArch64Layout::VL_2D) + .Case(".b", AArch64Layout::VL_B) + .Case(".h", AArch64Layout::VL_H) + .Case(".s", AArch64Layout::VL_S) + .Case(".d", AArch64Layout::VL_D) + .Default(AArch64Layout::Invalid); } -namespace A64SysReg { +namespace AArch64SysReg { enum SysRegROValues { MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000 DBGDTRRX_EL0 = 0x9828, // 10 011 0000 0101 000 @@ -396,16 +571,16 @@ namespace A64SysReg { ID_ISAR3_EL1 = 0xc013, // 11 000 0000 0010 011 ID_ISAR4_EL1 = 0xc014, // 11 000 0000 0010 100 ID_ISAR5_EL1 = 0xc015, // 11 000 0000 0010 101 - ID_AA64PFR0_EL1 = 0xc020, // 11 000 0000 0100 000 - ID_AA64PFR1_EL1 = 0xc021, // 11 000 0000 0100 001 - ID_AA64DFR0_EL1 = 0xc028, // 11 000 0000 0101 000 - ID_AA64DFR1_EL1 = 0xc029, // 11 000 0000 0101 001 - ID_AA64AFR0_EL1 = 0xc02c, // 11 000 0000 0101 100 - ID_AA64AFR1_EL1 = 0xc02d, // 11 000 0000 0101 101 - ID_AA64ISAR0_EL1 = 0xc030, // 11 000 0000 0110 000 - ID_AA64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001 - ID_AA64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000 - ID_AA64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001 + ID_A64PFR0_EL1 = 0xc020, // 11 000 0000 0100 000 + ID_A64PFR1_EL1 = 0xc021, // 11 000 0000 0100 001 + ID_A64DFR0_EL1 = 0xc028, // 11 000 0000 0101 000 + ID_A64DFR1_EL1 = 0xc029, // 11 000 0000 0101 001 + ID_A64AFR0_EL1 = 0xc02c, // 11 000 0000 0101 100 + ID_A64AFR1_EL1 = 0xc02d, // 11 000 0000 0101 101 + ID_A64ISAR0_EL1 = 0xc030, // 11 000 0000 0110 000 + ID_A64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001 + ID_A64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000 + ID_A64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001 MVFR0_EL1 = 0xc018, // 11 000 0000 0011 000 MVFR1_EL1 = 0xc019, // 11 000 0000 0011 001 MVFR2_EL1 = 0xc01a, // 11 000 0000 0011 010 @@ -960,38 +1135,45 @@ namespace A64SysReg { ICH_LR12_EL2 = 0xe66c, // 11 100 1100 1101 100 ICH_LR13_EL2 = 0xe66d, // 11 100 1100 1101 101 ICH_LR14_EL2 = 0xe66e, // 11 100 1100 1101 110 - ICH_LR15_EL2 = 0xe66f // 11 100 1100 1101 111 + ICH_LR15_EL2 = 0xe66f, // 11 100 1100 1101 111 + }; + + // Cyclone specific system registers + enum CycloneSysRegValues { + CPM_IOACC_CTL_EL3 = 0xff90 }; - // Note that these do not inherit from NamedImmMapper. This class is + // Note that these do not inherit from AArch64NamedImmMapper. This class is // sufficiently different in its behaviour that I don't believe it's worth - // burdening the common NamedImmMapper with abstractions only needed in + // burdening the common AArch64NamedImmMapper with abstractions only needed in // this one case. struct SysRegMapper { - static const NamedImmMapper::Mapping SysRegPairs[]; + static const AArch64NamedImmMapper::Mapping SysRegPairs[]; + static const AArch64NamedImmMapper::Mapping CycloneSysRegPairs[]; - const NamedImmMapper::Mapping *InstPairs; + const AArch64NamedImmMapper::Mapping *InstPairs; size_t NumInstPairs; + uint64_t FeatureBits; - SysRegMapper() {} + SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { } uint32_t fromString(StringRef Name, bool &Valid) const; std::string toString(uint32_t Bits, bool &Valid) const; }; struct MSRMapper : SysRegMapper { - static const NamedImmMapper::Mapping MSRPairs[]; - MSRMapper(); + static const AArch64NamedImmMapper::Mapping MSRPairs[]; + MSRMapper(uint64_t FeatureBits); }; struct MRSMapper : SysRegMapper { - static const NamedImmMapper::Mapping MRSPairs[]; - MRSMapper(); + static const AArch64NamedImmMapper::Mapping MRSPairs[]; + MRSMapper(uint64_t FeatureBits); }; uint32_t ParseGenericRegister(StringRef Name, bool &Valid); } -namespace A64TLBI { +namespace AArch64TLBI { enum TLBIValues { Invalid = -1, // Op0 Op1 CRn CRm Op2 IPAS2E1IS = 0x6401, // 01 100 1000 0000 001 @@ -1028,7 +1210,7 @@ namespace A64TLBI { VAALE1 = 0x443f // 01 000 1000 0111 111 }; - struct TLBIMapper : NamedImmMapper { + struct TLBIMapper : AArch64NamedImmMapper { const static Mapping TLBIPairs[]; TLBIMapper(); @@ -1051,88 +1233,62 @@ namespace A64TLBI { return true; } } -} +} namespace AArch64II { - + /// Target Operand Flag enum. enum TOF { - //===--------------------------------------------------------------===// + //===------------------------------------------------------------------===// // AArch64 Specific MachineOperand flags. MO_NO_FLAG, - // MO_GOT - Represents a relocation referring to the GOT entry of a given - // symbol. Used in adrp. - MO_GOT, - - // MO_GOT_LO12 - Represents a relocation referring to the low 12 bits of the - // GOT entry of a given symbol. Used in ldr only. - MO_GOT_LO12, - - // MO_DTPREL_* - Represents a relocation referring to the offset from a - // module's dynamic thread pointer. Used in the local-dynamic TLS access - // model. - MO_DTPREL_G1, - MO_DTPREL_G0_NC, - - // MO_GOTTPREL_* - Represents a relocation referring to a GOT entry - // providing the offset of a variable from the thread-pointer. Used in - // initial-exec TLS model where this offset is assigned in the static thread - // block and thus known by the dynamic linker. - MO_GOTTPREL, - MO_GOTTPREL_LO12, - - // MO_TLSDESC_* - Represents a relocation referring to a GOT entry providing - // a TLS descriptor chosen by the dynamic linker. Used for the - // general-dynamic and local-dynamic TLS access models where very littls is - // known at link-time. - MO_TLSDESC, - MO_TLSDESC_LO12, - - // MO_TPREL_* - Represents a relocation referring to the offset of a - // variable from the thread pointer itself. Used in the local-exec TLS - // access model. - MO_TPREL_G1, - MO_TPREL_G0_NC, - - // MO_LO12 - On a symbol operand, this represents a relocation containing - // lower 12 bits of the address. Used in add/sub/ldr/str. - MO_LO12, - - // MO_ABS_G* - Represent the 16-bit granules of an absolute reference using - // movz/movk instructions. - MO_ABS_G3, - MO_ABS_G2_NC, - MO_ABS_G1_NC, - MO_ABS_G0_NC + MO_FRAGMENT = 0x7, + + /// MO_PAGE - A symbol operand with this flag represents the pc-relative + /// offset of the 4K page containing the symbol. This is used with the + /// ADRP instruction. + MO_PAGE = 1, + + /// MO_PAGEOFF - A symbol operand with this flag represents the offset of + /// that symbol within a 4K page. This offset is added to the page address + /// to produce the complete address. + MO_PAGEOFF = 2, + + /// MO_G3 - A symbol operand with this flag (granule 3) represents the high + /// 16-bits of a 64-bit address, used in a MOVZ or MOVK instruction + MO_G3 = 3, + + /// MO_G2 - A symbol operand with this flag (granule 2) represents the bits + /// 32-47 of a 64-bit address, used in a MOVZ or MOVK instruction + MO_G2 = 4, + + /// MO_G1 - A symbol operand with this flag (granule 1) represents the bits + /// 16-31 of a 64-bit address, used in a MOVZ or MOVK instruction + MO_G1 = 5, + + /// MO_G0 - A symbol operand with this flag (granule 0) represents the bits + /// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction + MO_G0 = 6, + + /// MO_GOT - This flag indicates that a symbol operand represents the + /// address of the GOT entry for the symbol, rather than the address of + /// the symbol itself. + MO_GOT = 8, + + /// MO_NC - Indicates whether the linker is expected to check the symbol + /// reference for overflow. For example in an ADRP/ADD pair of relocations + /// the ADRP usually does check, but not the ADD. + MO_NC = 0x10, + + /// MO_TLS - Indicates that the operand being accessed is some kind of + /// thread-local symbol. On Darwin, only one type of thread-local access + /// exists (pre linker-relaxation), but on ELF the TLSModel used for the + /// referee will affect interpretation. + MO_TLS = 0x20 }; -} - -class APFloat; - -namespace A64Imms { - bool isFPImm(const APFloat &Val, uint32_t &Imm8Bits); - - inline bool isFPImm(const APFloat &Val) { - uint32_t Imm8; - return isFPImm(Val, Imm8); - } - - bool isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits); - bool isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm); - - bool isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); - bool isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); - - // We sometimes want to know whether the immediate is representable with a - // MOVN but *not* with a MOVZ (because that would take priority). - bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); - - uint64_t decodeNeonModImm(unsigned Val, unsigned OpCmode, unsigned &EltBits); - bool decodeNeonModShiftImm(unsigned OpCmode, unsigned &ShiftImm, - unsigned &ShiftOnesIn); - } +} // end namespace AArch64II -} // end namespace llvm; +} // end namespace llvm #endif diff --git a/lib/Target/AArch64/Utils/Android.mk b/lib/Target/AArch64/Utils/Android.mk index b8bf795..3c1d194 100644 --- a/lib/Target/AArch64/Utils/Android.mk +++ b/lib/Target/AArch64/Utils/Android.mk @@ -1,5 +1,10 @@ LOCAL_PATH := $(call my-dir) +arm64_utils_TBLGEN_TABLES := \ + AArch64GenRegisterInfo.inc \ + AArch64GenInstrInfo.inc \ + AArch64GenSubtargetInfo.inc + arm64_utils_SRC_FILES := \ AArch64BaseInfo.cpp @@ -16,7 +21,12 @@ LOCAL_MODULE:= libLLVMARM64Utils LOCAL_MODULE_TAGS := optional +TBLGEN_TD_DIR := $(LOCAL_PATH)/.. +TBLGEN_TABLES := $(arm64_utils_TBLGEN_TABLES) + include $(LLVM_DEVICE_BUILD_MK) +include $(LLVM_TBLGEN_RULES_MK) +include $(LLVM_GEN_INTRINSICS_MK) include $(BUILD_STATIC_LIBRARY) endif @@ -32,5 +42,10 @@ LOCAL_MODULE:= libLLVMARM64Utils LOCAL_MODULE_TAGS := optional +TBLGEN_TD_DIR := $(LOCAL_PATH)/.. +TBLGEN_TABLES := $(arm64_utils_TBLGEN_TABLES) + include $(LLVM_HOST_BUILD_MK) +include $(LLVM_TBLGEN_RULES_MK) +include $(LLVM_GEN_INTRINSICS_MK) include $(BUILD_HOST_STATIC_LIBRARY) diff --git a/lib/Target/AArch64/Utils/LLVMBuild.txt b/lib/Target/AArch64/Utils/LLVMBuild.txt index 4acecc9..bcefeb6 100644 --- a/lib/Target/AArch64/Utils/LLVMBuild.txt +++ b/lib/Target/AArch64/Utils/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/AArch646/Utils/LLVMBuild.txt ----------------*- Conf -*--===; +;===- ./lib/Target/AArch64/Utils/LLVMBuild.txt ----------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; diff --git a/lib/Target/AArch64/Utils/Makefile b/lib/Target/AArch64/Utils/Makefile index 0f4a645..0b80f82 100644 --- a/lib/Target/AArch64/Utils/Makefile +++ b/lib/Target/AArch64/Utils/Makefile @@ -9,7 +9,8 @@ LEVEL = ../../../.. LIBRARYNAME = LLVMAArch64Utils -# Hack: we need to include 'main' AArch64 target directory to grab private headers -#CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. +# Hack: we need to include 'main' AArch64 target directory to grab private +# headers +CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index 28ea879..94faf6f 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -24,7 +24,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "a15-sd-optimizer" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" @@ -39,6 +38,8 @@ using namespace llvm; +#define DEBUG_TYPE "a15-sd-optimizer" + namespace { struct A15SDOptimizer : public MachineFunctionPass { static char ID; @@ -90,7 +91,7 @@ namespace { unsigned createImplicitDef(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, DebugLoc DL); - + // // Various property checkers // @@ -259,7 +260,7 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { if (DPRMI && SPRMI) { // See if the first operand of this insert_subreg is IMPLICIT_DEF MachineInstr *ECDef = elideCopies(DPRMI); - if (ECDef != 0 && ECDef->isImplicitDef()) { + if (ECDef && ECDef->isImplicitDef()) { // Another corner case - if we're inserting something that is purely // a subreg copy of a DPR, just use that DPR. @@ -348,10 +349,10 @@ MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) { if (!MI->isFullCopy()) return MI; if (!TRI->isVirtualRegister(MI->getOperand(1).getReg())) - return NULL; + return nullptr; MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg()); if (!Def) - return NULL; + return nullptr; return elideCopies(Def); } @@ -435,7 +436,7 @@ A15SDOptimizer::createDupLane(MachineBasicBlock &MBB, Out) .addReg(Reg) .addImm(Lane)); - + return Out; } @@ -601,7 +602,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { // * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR // lane, and the other lane(s) of the DPR/QPR register // that we are inserting in are undefined, use the - // original DPR/QPR value. + // original DPR/QPR value. // * Otherwise, fall back on the same stategy as COPY. // // * REG_SEQUENCE: * If all except one of the input operands are @@ -693,7 +694,7 @@ bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { MI != ME;) { Modified |= runOnInstruction(MI++); } - + } for (std::set::iterator I = DeadInstr.begin(), diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 4412b45..55df29c 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -49,8 +49,6 @@ FunctionPass *createThumb2SizeReductionPass(); /// \brief Creates an ARM-specific Target Transformation Info pass. ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM); -FunctionPass *createARMAtomicExpandPass(const TargetMachine *TM); - void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 0fa865f..55e9fe5 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "ARMAsmPrinter.h" #include "ARM.h" #include "ARMConstantPoolValue.h" @@ -45,6 +44,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/COFF.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ELF.h" @@ -55,6 +55,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "asm-printer" + void ARMAsmPrinter::EmitFunctionBodyEnd() { // Make sure to terminate any constant pools that were at the end // of the function. @@ -85,7 +87,7 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { ? MCSymbolRefExpr::VK_ARM_TARGET1 : MCSymbolRefExpr::VK_None), OutContext); - + OutStreamer.EmitValue(E, Size); } @@ -96,7 +98,28 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { AFI = MF.getInfo(); MCP = MF.getConstantPool(); - return AsmPrinter::runOnMachineFunction(MF); + SetupMachineFunction(MF); + + if (Subtarget->isTargetCOFF()) { + bool Internal = MF.getFunction()->hasInternalLinkage(); + COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC + : COFF::IMAGE_SYM_CLASS_EXTERNAL; + int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; + + OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); + OutStreamer.EmitCOFFSymbolStorageClass(Scl); + OutStreamer.EmitCOFFSymbolType(Type); + OutStreamer.EndCOFFSymbolDef(); + } + + // Have common code print out the function header with linkage info etc. + EmitFunctionHeader(); + + // Emit the rest of the function body. + EmitFunctionBody(); + + // We didn't modify anything. + return false; } void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, @@ -239,7 +262,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (ARM::GPRPairRegClass.contains(RegBegin)) { const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0); - O << ARMInstPrinter::getRegisterName(Reg0) << ", ";; + O << ARMInstPrinter::getRegisterName(Reg0) << ", "; RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1); } O << ARMInstPrinter::getRegisterName(RegBegin); @@ -383,7 +406,7 @@ void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, // If either end mode is unknown (EndInfo == NULL) or different than // the start mode, then restore the start mode. const bool WasThumb = isThumb(StartInfo); - if (EndInfo == NULL || WasThumb != isThumb(*EndInfo)) { + if (!EndInfo || WasThumb != isThumb(*EndInfo)) { OutStreamer.EmitAssemblerFlag(WasThumb ? MCAF_Code16 : MCAF_Code32); } } @@ -456,6 +479,29 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { emitAttributes(); } +static void +emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel, + MachineModuleInfoImpl::StubValueTy &MCSym) { + // L_foo$stub: + OutStreamer.EmitLabel(StubLabel); + // .indirect_symbol _foo + OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol); + + if (MCSym.getInt()) + // External to current translation unit. + OutStreamer.EmitIntValue(0, 4/*size*/); + else + // Internal to current translation unit. + // + // When we place the LSDA into the TEXT section, the type info + // pointers need to be indirect and pc-rel. We accomplish this by + // using NLPs; however, sometimes the types are local to the file. + // We need to fill in the value for the NLP in those cases. + OutStreamer.EmitValue( + MCSymbolRefExpr::Create(MCSym.getPointer(), OutStreamer.getContext()), + 4 /*size*/); +} + void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { if (Subtarget->isTargetMachO()) { @@ -472,27 +518,9 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // Switch with ".non_lazy_symbol_pointer" directive. OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); EmitAlignment(2); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - // L_foo$stub: - OutStreamer.EmitLabel(Stubs[i].first); - // .indirect_symbol _foo - MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second; - OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),MCSA_IndirectSymbol); - - if (MCSym.getInt()) - // External to current translation unit. - OutStreamer.EmitIntValue(0, 4/*size*/); - else - // Internal to current translation unit. - // - // When we place the LSDA into the TEXT section, the type info - // pointers need to be indirect and pc-rel. We accomplish this by - // using NLPs; however, sometimes the types are local to the file. - // We need to fill in the value for the NLP in those cases. - OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), - OutContext), - 4/*size*/); - } + + for (auto &Stub : Stubs) + emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second); Stubs.clear(); OutStreamer.AddBlankLine(); @@ -500,17 +528,11 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { Stubs = MMIMacho.GetHiddenGVStubList(); if (!Stubs.empty()) { - OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); + OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); EmitAlignment(2); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - // L_foo$stub: - OutStreamer.EmitLabel(Stubs[i].first); - // .long _foo - OutStreamer.EmitValue(MCSymbolRefExpr:: - Create(Stubs[i].second.getPointer(), - OutContext), - 4/*size*/); - } + + for (auto &Stub : Stubs) + emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second); Stubs.clear(); OutStreamer.AddBlankLine(); @@ -523,6 +545,28 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // generates code that does this, it is always safe to set. OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); } + + // Emit a .data.rel section containing any stubs that were created. + if (Subtarget->isTargetELF()) { + const TargetLoweringObjectFileELF &TLOFELF = + static_cast(getObjFileLowering()); + + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); + const DataLayout *TD = TM.getDataLayout(); + + for (auto &stub: Stubs) { + OutStreamer.EmitLabel(stub.first); + OutStreamer.EmitSymbolValue(stub.second.getPointer(), + TD->getPointerSize(0)); + } + Stubs.clear(); + } + } } //===----------------------------------------------------------------------===// @@ -575,7 +619,7 @@ void ARMAsmPrinter::emitAttributes() { getArchForCPU(CPUString, Subtarget)); // Tag_CPU_arch_profile must have the default value of 0 when "Architecture - // profile is not applicable (e.g. pre v7, or cross-profile code)". + // profile is not applicable (e.g. pre v7, or cross-profile code)". if (Subtarget->hasV7Ops()) { if (Subtarget->isAClass()) { ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, @@ -627,6 +671,20 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitFPU(ARM::VFPV2); } + if (TM.getRelocationModel() == Reloc::PIC_) { + // PIC specific attributes. + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RW_data, + ARMBuildAttrs::AddressRWPCRel); + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RO_data, + ARMBuildAttrs::AddressROPCRel); + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use, + ARMBuildAttrs::AddressGOT); + } else { + // Allow direct addressing of imported data for all other relocation models. + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use, + ARMBuildAttrs::AddressDirect); + } + // Signal various FP modes. if (!TM.Options.UnsafeFPMath) { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed); @@ -723,7 +781,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV, MachineModuleInfoImpl::StubValueTy &StubSym = GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) : MMIMachO.getGVStubEntry(MCSym); - if (StubSym.getPointer() == 0) + if (!StubSym.getPointer()) StubSym = MachineModuleInfoImpl:: StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); return MCSym; @@ -971,7 +1029,8 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { RegList.push_back(SrcReg); break; } - ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD); + if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) + ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD); } else { // Changes of stack / frame pointer. if (SrcReg == ARM::SP) { @@ -1016,18 +1075,20 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { } } - if (DstReg == FramePtr && FramePtr != ARM::SP) - // Set-up of the frame pointer. Positive values correspond to "add" - // instruction. - ATS.emitSetFP(FramePtr, ARM::SP, -Offset); - else if (DstReg == ARM::SP) { - // Change of SP by an offset. Positive values correspond to "sub" - // instruction. - ATS.emitPad(Offset); - } else { - // Move of SP to a register. Positive values correspond to an "add" - // instruction. - ATS.emitMovSP(DstReg, -Offset); + if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) { + if (DstReg == FramePtr && FramePtr != ARM::SP) + // Set-up of the frame pointer. Positive values correspond to "add" + // instruction. + ATS.emitSetFP(FramePtr, ARM::SP, -Offset); + else if (DstReg == ARM::SP) { + // Change of SP by an offset. Positive values correspond to "sub" + // instruction. + ATS.emitPad(Offset); + } else { + // Move of SP to a register. Positive values correspond to an "add" + // instruction. + ATS.emitMovSP(DstReg, -Offset); + } } } else if (DstReg == ARM::SP) { MI->dump(); diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 46c2626..7c103c6 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -47,16 +47,17 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter { bool InConstantPool; public: explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL), InConstantPool(false) { - Subtarget = &TM.getSubtarget(); - } + : AsmPrinter(TM, Streamer), AFI(nullptr), MCP(nullptr), + InConstantPool(false) { + Subtarget = &TM.getSubtarget(); + } const char *getPassName() const override { return "ARM Assembly / Object Emitter"; } void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, - const char *Modifier = 0); + const char *Modifier = nullptr); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, diff --git a/lib/Target/ARM/ARMAtomicExpandPass.cpp b/lib/Target/ARM/ARMAtomicExpandPass.cpp deleted file mode 100644 index 18e0783..0000000 --- a/lib/Target/ARM/ARMAtomicExpandPass.cpp +++ /dev/null @@ -1,406 +0,0 @@ -//===-- ARMAtomicExpandPass.cpp - Expand atomic instructions --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass (at IR level) to replace atomic instructions with -// appropriate (intrinsic-based) ldrex/strex loops. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm-atomic-expand" -#include "ARM.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" -using namespace llvm; - -namespace { - class ARMAtomicExpandPass : public FunctionPass { - const TargetLowering *TLI; - public: - static char ID; // Pass identification, replacement for typeid - explicit ARMAtomicExpandPass(const TargetMachine *TM = 0) - : FunctionPass(ID), TLI(TM->getTargetLowering()) {} - - bool runOnFunction(Function &F) override; - bool expandAtomicInsts(Function &F); - - bool expandAtomicLoad(LoadInst *LI); - bool expandAtomicStore(StoreInst *LI); - bool expandAtomicRMW(AtomicRMWInst *AI); - bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); - - AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord); - void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord); - - /// Perform a load-linked operation on Addr, returning a "Value *" with the - /// corresponding pointee type. This may entail some non-trivial operations - /// to truncate or reconstruct illegal types since intrinsics must be legal - Value *loadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord); - - /// Perform a store-conditional operation to Addr. Return the status of the - /// store: 0 if the it succeeded, non-zero otherwise. - Value *storeConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, - AtomicOrdering Ord); - - /// Return true if the given (atomic) instruction should be expanded by this - /// pass. - bool shouldExpandAtomic(Instruction *Inst); - }; -} - -char ARMAtomicExpandPass::ID = 0; - -FunctionPass *llvm::createARMAtomicExpandPass(const TargetMachine *TM) { - return new ARMAtomicExpandPass(TM); -} - -bool ARMAtomicExpandPass::runOnFunction(Function &F) { - SmallVector AtomicInsts; - - // Changing control-flow while iterating through it is a bad idea, so gather a - // list of all atomic instructions before we start. - for (BasicBlock &BB : F) - for (Instruction &Inst : BB) { - if (isa(&Inst) || isa(&Inst) || - (isa(&Inst) && cast(&Inst)->isAtomic()) || - (isa(&Inst) && cast(&Inst)->isAtomic())) - AtomicInsts.push_back(&Inst); - } - - bool MadeChange = false; - for (Instruction *Inst : AtomicInsts) { - if (!shouldExpandAtomic(Inst)) - continue; - - if (AtomicRMWInst *AI = dyn_cast(Inst)) - MadeChange |= expandAtomicRMW(AI); - else if (AtomicCmpXchgInst *CI = dyn_cast(Inst)) - MadeChange |= expandAtomicCmpXchg(CI); - else if (LoadInst *LI = dyn_cast(Inst)) - MadeChange |= expandAtomicLoad(LI); - else if (StoreInst *SI = dyn_cast(Inst)) - MadeChange |= expandAtomicStore(SI); - else - llvm_unreachable("Unknown atomic instruction"); - } - - return MadeChange; -} - -bool ARMAtomicExpandPass::expandAtomicLoad(LoadInst *LI) { - // Load instructions don't actually need a leading fence, even in the - // SequentiallyConsistent case. - AtomicOrdering MemOpOrder = - TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering(); - - // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is - // an ldrexd (A3.5.3). - IRBuilder<> Builder(LI); - Value *Val = loadLinked(Builder, LI->getPointerOperand(), MemOpOrder); - - insertTrailingFence(Builder, LI->getOrdering()); - - LI->replaceAllUsesWith(Val); - LI->eraseFromParent(); - - return true; -} - -bool ARMAtomicExpandPass::expandAtomicStore(StoreInst *SI) { - // The only atomic 64-bit store on ARM is an strexd that succeeds, which means - // we need a loop and the entire instruction is essentially an "atomicrmw - // xchg" that ignores the value loaded. - IRBuilder<> Builder(SI); - AtomicRMWInst *AI = - Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), - SI->getValueOperand(), SI->getOrdering()); - SI->eraseFromParent(); - - // Now we have an appropriate swap instruction, lower it as usual. - return expandAtomicRMW(AI); -} - -bool ARMAtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) { - AtomicOrdering Order = AI->getOrdering(); - Value *Addr = AI->getPointerOperand(); - BasicBlock *BB = AI->getParent(); - Function *F = BB->getParent(); - LLVMContext &Ctx = F->getContext(); - - // Given: atomicrmw some_op iN* %addr, iN %incr ordering - // - // The standard expansion we produce is: - // [...] - // fence? - // atomicrmw.start: - // %loaded = @load.linked(%addr) - // %new = some_op iN %loaded, %incr - // %stored = @store_conditional(%new, %addr) - // %try_again = icmp i32 ne %stored, 0 - // br i1 %try_again, label %loop, label %atomicrmw.end - // atomicrmw.end: - // fence? - // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); - BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - - // This grabs the DebugLoc from AI. - IRBuilder<> Builder(AI); - - // The split call above "helpfully" added a branch at the end of BB (to the - // wrong place), but we might want a fence too. It's easiest to just remove - // the branch entirely. - std::prev(BB->end())->eraseFromParent(); - Builder.SetInsertPoint(BB); - AtomicOrdering MemOpOrder = insertLeadingFence(Builder, Order); - Builder.CreateBr(LoopBB); - - // Start the main loop block now that we've taken care of the preliminaries. - Builder.SetInsertPoint(LoopBB); - Value *Loaded = loadLinked(Builder, Addr, MemOpOrder); - - Value *NewVal; - switch (AI->getOperation()) { - case AtomicRMWInst::Xchg: - NewVal = AI->getValOperand(); - break; - case AtomicRMWInst::Add: - NewVal = Builder.CreateAdd(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Sub: - NewVal = Builder.CreateSub(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::And: - NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Nand: - NewVal = Builder.CreateAnd(Loaded, Builder.CreateNot(AI->getValOperand()), - "new"); - break; - case AtomicRMWInst::Or: - NewVal = Builder.CreateOr(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Xor: - NewVal = Builder.CreateXor(Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Max: - NewVal = Builder.CreateICmpSGT(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::Min: - NewVal = Builder.CreateICmpSLE(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::UMax: - NewVal = Builder.CreateICmpUGT(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - case AtomicRMWInst::UMin: - NewVal = Builder.CreateICmpULE(Loaded, AI->getValOperand()); - NewVal = Builder.CreateSelect(NewVal, Loaded, AI->getValOperand(), "new"); - break; - default: - llvm_unreachable("Unknown atomic op"); - } - - Value *StoreSuccess = storeConditional(Builder, NewVal, Addr, MemOpOrder); - Value *TryAgain = Builder.CreateICmpNE( - StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); - Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); - - Builder.SetInsertPoint(ExitBB, ExitBB->begin()); - insertTrailingFence(Builder, Order); - - AI->replaceAllUsesWith(Loaded); - AI->eraseFromParent(); - - return true; -} - -bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { - AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); - AtomicOrdering FailureOrder = CI->getFailureOrdering(); - Value *Addr = CI->getPointerOperand(); - BasicBlock *BB = CI->getParent(); - Function *F = BB->getParent(); - LLVMContext &Ctx = F->getContext(); - - // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord - // - // The full expansion we produce is: - // [...] - // fence? - // cmpxchg.start: - // %loaded = @load.linked(%addr) - // %should_store = icmp eq %loaded, %desired - // br i1 %should_store, label %cmpxchg.trystore, - // label %cmpxchg.end/%cmpxchg.barrier - // cmpxchg.trystore: - // %stored = @store_conditional(%new, %addr) - // %try_again = icmp i32 ne %stored, 0 - // br i1 %try_again, label %loop, label %cmpxchg.end - // cmpxchg.barrier: - // fence? - // br label %cmpxchg.end - // cmpxchg.end: - // [...] - BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); - auto BarrierBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ExitBB); - auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.barrier", F, BarrierBB); - auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); - - // This grabs the DebugLoc from CI - IRBuilder<> Builder(CI); - - // The split call above "helpfully" added a branch at the end of BB (to the - // wrong place), but we might want a fence too. It's easiest to just remove - // the branch entirely. - std::prev(BB->end())->eraseFromParent(); - Builder.SetInsertPoint(BB); - AtomicOrdering MemOpOrder = insertLeadingFence(Builder, SuccessOrder); - Builder.CreateBr(LoopBB); - - // Start the main loop block now that we've taken care of the preliminaries. - Builder.SetInsertPoint(LoopBB); - Value *Loaded = loadLinked(Builder, Addr, MemOpOrder); - Value *ShouldStore = - Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); - - // If the the cmpxchg doesn't actually need any ordering when it fails, we can - // jump straight past that fence instruction (if it exists). - BasicBlock *FailureBB = FailureOrder == Monotonic ? ExitBB : BarrierBB; - Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); - - Builder.SetInsertPoint(TryStoreBB); - Value *StoreSuccess = - storeConditional(Builder, CI->getNewValOperand(), Addr, MemOpOrder); - Value *TryAgain = Builder.CreateICmpNE( - StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); - Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB); - - // Finally, make sure later instructions don't get reordered with a fence if - // necessary. - Builder.SetInsertPoint(BarrierBB); - insertTrailingFence(Builder, SuccessOrder); - Builder.CreateBr(ExitBB); - - CI->replaceAllUsesWith(Loaded); - CI->eraseFromParent(); - - return true; -} - -Value *ARMAtomicExpandPass::loadLinked(IRBuilder<> &Builder, Value *Addr, - AtomicOrdering Ord) { - Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - Type *ValTy = cast(Addr->getType())->getElementType(); - bool IsAcquire = - Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent; - - // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd - // intrinsic must return {i32, i32} and we have to recombine them into a - // single i64 here. - if (ValTy->getPrimitiveSizeInBits() == 64) { - Intrinsic::ID Int = - IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; - Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int); - - Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); - Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi"); - - Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); - Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); - Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); - Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); - return Builder.CreateOr( - Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64"); - } - - Type *Tys[] = { Addr->getType() }; - Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; - Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys); - - return Builder.CreateTruncOrBitCast( - Builder.CreateCall(Ldrex, Addr), - cast(Addr->getType())->getElementType()); -} - -Value *ARMAtomicExpandPass::storeConditional(IRBuilder<> &Builder, Value *Val, - Value *Addr, AtomicOrdering Ord) { - Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - bool IsRelease = - Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent; - - // Since the intrinsics must have legal type, the i64 intrinsics take two - // parameters: "i32, i32". We must marshal Val into the appropriate form - // before the call. - if (Val->getType()->getPrimitiveSizeInBits() == 64) { - Intrinsic::ID Int = - IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd; - Function *Strex = Intrinsic::getDeclaration(M, Int); - Type *Int32Ty = Type::getInt32Ty(M->getContext()); - - Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); - Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); - Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); - return Builder.CreateCall3(Strex, Lo, Hi, Addr); - } - - Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex; - Type *Tys[] = { Addr->getType() }; - Function *Strex = Intrinsic::getDeclaration(M, Int, Tys); - - return Builder.CreateCall2( - Strex, Builder.CreateZExtOrBitCast( - Val, Strex->getFunctionType()->getParamType(0)), - Addr); -} - -AtomicOrdering ARMAtomicExpandPass::insertLeadingFence(IRBuilder<> &Builder, - AtomicOrdering Ord) { - if (!TLI->getInsertFencesForAtomic()) - return Ord; - - if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) - Builder.CreateFence(Release); - - // The exclusive operations don't need any barrier if we're adding separate - // fences. - return Monotonic; -} - -void ARMAtomicExpandPass::insertTrailingFence(IRBuilder<> &Builder, - AtomicOrdering Ord) { - if (!TLI->getInsertFencesForAtomic()) - return; - - if (Ord == Acquire || Ord == AcquireRelease) - Builder.CreateFence(Acquire); - else if (Ord == SequentiallyConsistent) - Builder.CreateFence(SequentiallyConsistent); -} - -bool ARMAtomicExpandPass::shouldExpandAtomic(Instruction *Inst) { - // Loads and stores less than 64-bits are already atomic; ones above that - // are doomed anyway, so defer to the default libcall and blame the OS when - // things go wrong: - if (StoreInst *SI = dyn_cast(Inst)) - return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64; - else if (LoadInst *LI = dyn_cast(Inst)) - return LI->getType()->getPrimitiveSizeInBits() == 64; - - // For the real atomic operations, we have ldrex/strex up to 64 bits. - return Inst->getType()->getPrimitiveSizeInBits() <= 64; -} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 47f5bf9..bc266e8 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -37,11 +37,13 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +using namespace llvm; + +#define DEBUG_TYPE "arm-instrinfo" + #define GET_INSTRINFO_CTOR_DTOR #include "ARMGenInstrInfo.inc" -using namespace llvm; - static cl::opt EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); @@ -125,14 +127,14 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // FIXME: Thumb2 support. if (!EnableARM3Addr) - return NULL; + return nullptr; MachineInstr *MI = MBBI; MachineFunction &MF = *MI->getParent()->getParent(); uint64_t TSFlags = MI->getDesc().TSFlags; bool isPre = false; switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { - default: return NULL; + default: return nullptr; case ARMII::IndexModePre: isPre = true; break; @@ -144,10 +146,10 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // operation. unsigned MemOpc = getUnindexedOpcode(MI->getOpcode()); if (MemOpc == 0) - return NULL; + return nullptr; - MachineInstr *UpdateMI = NULL; - MachineInstr *MemMI = NULL; + MachineInstr *UpdateMI = nullptr; + MachineInstr *MemMI = nullptr; unsigned AddrMode = (TSFlags & ARMII::AddrModeMask); const MCInstrDesc &MCID = MI->getDesc(); unsigned NumOps = MCID.getNumOperands(); @@ -169,7 +171,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (ARM_AM::getSOImmVal(Amt) == -1) // Can't encode it in a so_imm operand. This transformation will // add more than 1 instruction. Abandon! - return NULL; + return nullptr; UpdateMI = BuildMI(MF, MI->getDebugLoc(), get(isSub ? ARM::SUBri : ARM::ADDri), WBReg) .addReg(BaseReg).addImm(Amt) @@ -273,8 +275,8 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const { - TBB = 0; - FBB = 0; + TBB = nullptr; + FBB = nullptr; MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) @@ -331,7 +333,7 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, I->isReturn())) { // Forget any previous condition branch information - it no longer applies. Cond.clear(); - FBB = 0; + FBB = nullptr; // If we can modify the function, delete everything below this // unconditional branch. @@ -405,7 +407,7 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, assert((Cond.size() == 2 || Cond.size() == 0) && "ARM branch conditions have two components!"); - if (FBB == 0) { + if (!FBB) { if (Cond.empty()) { // Unconditional branch? if (isThumb) BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0); @@ -535,7 +537,8 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { return true; } -template<> bool IsCPSRDead(MachineInstr* MI) { +namespace llvm { +template <> bool IsCPSRDead(MachineInstr *MI) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isUndef() || MO.isUse()) @@ -548,6 +551,7 @@ template<> bool IsCPSRDead(MachineInstr* MI) { // all definitions of CPSR are dead return true; } +} /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. LLVM_ATTRIBUTE_NOINLINE @@ -620,7 +624,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2)); unsigned JTI = JTOP.getIndex(); const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - assert(MJTI != 0); + assert(MJTI != nullptr); const std::vector &JT = MJTI->getJumpTables(); assert(JTI < JT.size()); // Thumb instructions are 2 byte aligned, but JT entries are 4 byte @@ -1248,7 +1252,7 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { static_cast(MCPE.Val.MachineCPVal); unsigned PCLabelId = AFI->createPICLabelUId(); - ARMConstantPoolValue *NewCPV = 0; + ARMConstantPoolValue *NewCPV = nullptr; // FIXME: The below assumes PIC relocation model and that the function // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and @@ -1659,10 +1663,10 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg); // MOVCC AL can't be inverted. Shouldn't happen. if (CC == ARMCC::AL || PredReg != ARM::CPSR) - return NULL; + return nullptr; MI = TargetInstrInfo::commuteInstruction(MI, NewMI); if (!MI) - return NULL; + return nullptr; // After swapping the MOVCC operands, also invert the condition. MI->getOperand(MI->findFirstPredOperandIdx()) .setImm(ARMCC::getOppositeCondition(CC)); @@ -1678,35 +1682,36 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg, const MachineRegisterInfo &MRI, const TargetInstrInfo *TII) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) - return 0; + return nullptr; if (!MRI.hasOneNonDBGUse(Reg)) - return 0; + return nullptr; MachineInstr *MI = MRI.getVRegDef(Reg); if (!MI) - return 0; + return nullptr; // MI is folded into the MOVCC by predicating it. if (!MI->isPredicable()) - return 0; + return nullptr; // Check if MI has any non-dead defs or physreg uses. This also detects // predicated instructions which will be reading CPSR. for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); // Reject frame index operands, PEI can't handle the predicated pseudos. if (MO.isFI() || MO.isCPI() || MO.isJTI()) - return 0; + return nullptr; if (!MO.isReg()) continue; // MI can't have any tied operands, that would conflict with predication. if (MO.isTied()) - return 0; + return nullptr; if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - return 0; + return nullptr; if (MO.isDef() && !MO.isDead()) - return 0; + return nullptr; } bool DontMoveAcrossStores = true; - if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores)) - return 0; + if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ nullptr, + DontMoveAcrossStores)) + return nullptr; return MI; } @@ -1741,14 +1746,14 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, if (!DefMI) DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this); if (!DefMI) - return 0; + return nullptr; // Find new register class to use. MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); unsigned DestReg = MI->getOperand(0).getReg(); const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); if (!MRI.constrainRegClass(DestReg, PreviousClass)) - return 0; + return nullptr; // Create a new predicated version of DefMI. // Rfalse is the first use. @@ -2254,7 +2259,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // Masked compares sometimes use the same register as the corresponding 'and'. if (CmpMask != ~0) { if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) { - MI = 0; + MI = nullptr; for (MachineRegisterInfo::use_instr_iterator UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end(); UI != UE; ++UI) { @@ -2281,17 +2286,17 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // One is MI, the other is a SUB instruction. // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue). - MachineInstr *Sub = NULL; + MachineInstr *Sub = nullptr; if (SrcReg2 != 0) // MI is not a candidate for CMPrr. - MI = NULL; + MI = nullptr; else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) { // Conservatively refuse to convert an instruction which isn't in the same // BB as the comparison. // For CMPri, we need to check Sub, thus we can't return here. if (CmpInstr->getOpcode() == ARM::CMPri || CmpInstr->getOpcode() == ARM::t2CMPri) - MI = NULL; + MI = nullptr; else return false; } @@ -3295,7 +3300,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI, if (Idx == -1) { Dist = 0; - return 0; + return nullptr; } UseIdx = Idx; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 3ddddcb..4b3e740 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -261,7 +261,7 @@ private: unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, - unsigned *PredCost = 0) const override; + unsigned *PredCost = nullptr) const override; int getInstrLatency(const InstrItineraryData *ItinData, SDNode *Node) const override; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 8130a2d..a2eee9f 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -44,14 +44,18 @@ using namespace llvm; ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti) - : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), - FramePtr((STI.isTargetMachO() || STI.isThumb()) ? ARM::R7 : ARM::R11), - BasePtr(ARM::R6) { + : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) { + if (STI.isTargetMachO()) + FramePtr = ARM::R7; + else if (STI.isTargetWindows()) + FramePtr = ARM::R11; + else // ARM EABI + FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11; } -const uint16_t* +const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - const uint16_t *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI()) + const MCPhysReg *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI()) ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; @@ -107,7 +111,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { // should return NULL if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls - return NULL; + return nullptr; return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; } @@ -173,7 +177,7 @@ ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind const TargetRegisterClass * ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { if (RC == &ARM::CCRRegClass) - return 0; // Can't copy CCR registers. + return nullptr; // Can't copy CCR registers. return RC; } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 66b3c82..91df565 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -100,8 +100,8 @@ protected: public: /// Code Generation virtual methods... - const uint16_t * - getCalleeSavedRegs(const MachineFunction *MF = 0) const override; + const MCPhysReg * + getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; const uint32_t *getCallPreservedMask(CallingConv::ID) const override; const uint32_t *getNoPreservedMask() const; @@ -186,7 +186,7 @@ public: void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS = NULL) const override; + RegScavenger *RS = nullptr) const override; }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index 4f94ad2..dc41c1c 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -28,7 +28,7 @@ namespace llvm { static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State, bool CanFail) { - static const uint16_t RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; // Try to get the first register. if (unsigned Reg = State.AllocateReg(RegList, 4)) @@ -71,10 +71,10 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State, bool CanFail) { - static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 }; - static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 }; - static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 }; - static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 }; + static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 }; + static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 }; + static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); if (Reg == 0) { @@ -123,8 +123,8 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, CCState &State) { - static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 }; - static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 }; + static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 }; + static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 }; unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); if (Reg == 0) @@ -160,6 +160,105 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, State); } +static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, + ARM::S4, ARM::S5, ARM::S6, ARM::S7, + ARM::S8, ARM::S9, ARM::S10, ARM::S11, + ARM::S12, ARM::S13, ARM::S14, ARM::S15 }; +static const uint16_t DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; +static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 }; + +// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA +// has InConsecutiveRegs set, and that the last member also has +// InConsecutiveRegsLast set. We must process all members of the HA before +// we can allocate it, as we need to know the total number of registers that +// will be needed in order to (attempt to) allocate a contiguous block. +static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + SmallVectorImpl &PendingHAMembers = State.getPendingLocs(); + // AAPCS HFAs must have 1-4 elements, all of the same type + assert(PendingHAMembers.size() < 8); + if (PendingHAMembers.size() > 0) + assert(PendingHAMembers[0].getLocVT() == LocVT); + + // Add the argument to the list to be allocated once we know the size of the + // HA + PendingHAMembers.push_back( + CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); + + if (ArgFlags.isInConsecutiveRegsLast()) { + assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 && + "Homogeneous aggregates must have between 1 and 4 members"); + + // Try to allocate a contiguous block of registers, each of the correct + // size to hold one member. + const uint16_t *RegList; + unsigned NumRegs; + switch (LocVT.SimpleTy) { + case MVT::i32: + case MVT::f32: + RegList = SRegList; + NumRegs = 16; + break; + case MVT::f64: + RegList = DRegList; + NumRegs = 8; + break; + case MVT::v2f64: + RegList = QRegList; + NumRegs = 4; + break; + default: + llvm_unreachable("Unexpected member type for HA"); + break; + } + + unsigned RegResult = + State.AllocateRegBlock(RegList, NumRegs, PendingHAMembers.size()); + + if (RegResult) { + for (SmallVectorImpl::iterator It = PendingHAMembers.begin(); + It != PendingHAMembers.end(); ++It) { + It->convertToReg(RegResult); + State.addLoc(*It); + ++RegResult; + } + PendingHAMembers.clear(); + return true; + } + + // Register allocation failed, fall back to the stack + + // Mark all VFP regs as unavailable (AAPCS rule C.2.vfp) + for (unsigned regNo = 0; regNo < 16; ++regNo) + State.AllocateReg(SRegList[regNo]); + + unsigned Size = LocVT.getSizeInBits() / 8; + unsigned Align = Size; + + if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) { + // Vectors are always aligned to 8 bytes. If we've seen an i32 here + // it's because it's been split from a larger type, also with align 8. + Align = 8; + } + + for (auto It : PendingHAMembers) { + It.convertToMem(State.AllocateStack(Size, Align)); + State.addLoc(It); + + // Only the first member needs to be aligned. + Align = 1; + } + + // All pending members have now been allocated + PendingHAMembers.clear(); + } + + // This will be allocated by the last member of the HA + return true; +} + } // End llvm namespace #endif diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 7cffd82..526089b 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -174,6 +174,9 @@ def CC_ARM_AAPCS_VFP : CallingConv<[ CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + // HFAs are passed in a contiguous block of registers, or on the stack + CCIfConsecutiveRegs>, + CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 7359a11..2fd7edd 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jit" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMConstantPoolValue.h" @@ -40,6 +39,8 @@ #endif using namespace llvm; +#define DEBUG_TYPE "jit" + STATISTIC(NumEmitted, "Number of machine instructions emitted"); namespace { @@ -65,10 +66,10 @@ namespace { static char ID; public: ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) - : MachineFunctionPass(ID), JTI(0), + : MachineFunctionPass(ID), JTI(nullptr), II((const ARMBaseInstrInfo *)tm.getInstrInfo()), TD(tm.getDataLayout()), TM(tm), - MCE(mce), MCPEs(0), MJTEs(0), + MCE(mce), MCPEs(nullptr), MJTEs(nullptr), IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {} /// getBinaryCodeForInstr - This function, generated by the @@ -373,7 +374,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { Subtarget = &TM.getSubtarget(); MCPEs = &MF.getConstantPool()->getConstants(); - MJTEs = 0; + MJTEs = nullptr; if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables(); IsPIC = TM.getRelocationModel() == Reloc::PIC_; IsThumb = MF.getInfo()->isThumbFunction(); diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index ba05171..ce264ee 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-cp-islands" #include "ARM.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" @@ -36,6 +35,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "arm-cp-islands" + STATISTIC(NumCPEs, "Number of constpool entries"); STATISTIC(NumSplit, "Number of uncond branches inserted"); STATISTIC(NumCBrFixed, "Number of cond branches fixed"); @@ -593,7 +594,7 @@ ARMConstantIslands::CPEntry if (CPEs[i].CPEMI == CPEMI) return &CPEs[i]; } - return NULL; + return nullptr; } /// getCPELogAlign - Returns the required alignment of the constant pool entry @@ -1102,7 +1103,7 @@ bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI, assert(CPE && "Unexpected!"); if (--CPE->RefCount == 0) { removeDeadCPEMI(CPEMI); - CPE->CPEMI = NULL; + CPE->CPEMI = nullptr; --NumCPEs; return true; } @@ -1135,7 +1136,7 @@ int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) if (CPEs[i].CPEMI == CPEMI) continue; // Removing CPEs can leave empty entries, skip - if (CPEs[i].CPEMI == NULL) + if (CPEs[i].CPEMI == nullptr) continue; if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(), U.NegOk)) { @@ -1317,7 +1318,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, ++MI; unsigned CPUIndex = CPUserIndex+1; unsigned NumCPUsers = CPUsers.size(); - MachineInstr *LastIT = 0; + MachineInstr *LastIT = nullptr; for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI); Offset < BaseInsertOffset; Offset += TII->GetInstSizeInBytes(MI), MI = std::next(MI)) { @@ -1491,7 +1492,7 @@ bool ARMConstantIslands::removeUnusedCPEntries() { for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) { if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) { removeDeadCPEMI(CPEs[j].CPEMI); - CPEs[j].CPEMI = NULL; + CPEs[j].CPEMI = nullptr; MadeChange = true; } } @@ -1844,7 +1845,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { // FIXME: After the tables are shrunk, can we get rid some of the // constantpool tables? MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - if (MJTI == 0) return false; + if (!MJTI) return false; const std::vector &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { @@ -1970,7 +1971,7 @@ bool ARMConstantIslands::reorderThumb2JumpTables() { bool MadeChange = false; MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); - if (MJTI == 0) return false; + if (!MJTI) return false; const std::vector &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { @@ -2012,7 +2013,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { // try to move it; otherwise, create a new block following the jump // table that branches back to the actual target. This is a very simple // heuristic. FIXME: We can definitely improve it. - MachineBasicBlock *TBB = 0, *FBB = 0; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; SmallVector CondPrior; MachineFunction::iterator BBi = BB; @@ -2032,7 +2033,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { // Update numbering to account for the block being moved. MF->RenumberBlocks(); ++NumJTMoved; - return NULL; + return nullptr; } // Create a new MBB for the code after the jump BB. diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index bd4ee44..6045738 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-pseudo" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" @@ -23,6 +22,7 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/CommandLine.h" @@ -31,6 +31,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "arm-pseudo" + static cl::opt VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden, cl::desc("Verify machine code after expanding ARM pseudos")); @@ -345,7 +347,7 @@ static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) { std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode); if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode) return I; - return NULL; + return nullptr; } /// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register, @@ -614,6 +616,39 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, MI.eraseFromParent(); } +static bool IsAnAddressOperand(const MachineOperand &MO) { + // This check is overly conservative. Unless we are certain that the machine + // operand is not a symbol reference, we return that it is a symbol reference. + // This is important as the load pair may not be split up Windows. + switch (MO.getType()) { + case MachineOperand::MO_Register: + case MachineOperand::MO_Immediate: + case MachineOperand::MO_CImmediate: + case MachineOperand::MO_FPImmediate: + return false; + case MachineOperand::MO_MachineBasicBlock: + return true; + case MachineOperand::MO_FrameIndex: + return false; + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: + case MachineOperand::MO_JumpTableIndex: + case MachineOperand::MO_ExternalSymbol: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_BlockAddress: + return true; + case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: + return false; + case MachineOperand::MO_Metadata: + case MachineOperand::MO_MCSymbol: + return true; + case MachineOperand::MO_CFIIndex: + return false; + } + llvm_unreachable("unhandled machine operand type"); +} + void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; @@ -624,10 +659,14 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, bool DstIsDead = MI.getOperand(0).isDead(); bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1); + bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO); MachineInstrBuilder LO16, HI16; if (!STI->hasV6T2Ops() && (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) { + // FIXME Windows CE supports older ARM CPUs + assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+"); + // Expand into a movi + orr. LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg); HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri)) @@ -664,17 +703,29 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) .addReg(DstReg); - if (MO.isImm()) { + switch (MO.getType()) { + case MachineOperand::MO_Immediate: { unsigned Imm = MO.getImm(); unsigned Lo16 = Imm & 0xffff; unsigned Hi16 = (Imm >> 16) & 0xffff; LO16 = LO16.addImm(Lo16); HI16 = HI16.addImm(Hi16); - } else { + break; + } + case MachineOperand::MO_ExternalSymbol: { + const char *ES = MO.getSymbolName(); + unsigned TF = MO.getTargetFlags(); + LO16 = LO16.addExternalSymbol(ES, TF | ARMII::MO_LO16); + HI16 = HI16.addExternalSymbol(ES, TF | ARMII::MO_HI16); + break; + } + default: { const GlobalValue *GV = MO.getGlobal(); unsigned TF = MO.getTargetFlags(); LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); + break; + } } LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); @@ -682,6 +733,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, LO16.addImm(Pred).addReg(PredReg); HI16.addImm(Pred).addReg(PredReg); + if (RequiresBundling) + finalizeBundle(MBB, &*LO16, &*MBBI); + TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); } diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index c442444..6f8fb1a 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -166,8 +166,6 @@ class ARMFastISel final : public FastISel { // Utility routines. private: - unsigned constrainOperandRegClass(const MCInstrDesc &II, unsigned OpNum, - unsigned Op); bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, @@ -191,6 +189,8 @@ class ARMFastISel final : public FastISel { unsigned ARMSelectCallOp(bool UseReg); unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT); + const TargetLowering *getTargetLowering() { return TM.getTargetLowering(); } + // Call handling routines. private: CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, @@ -283,23 +283,6 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { return MIB; } -unsigned ARMFastISel::constrainOperandRegClass(const MCInstrDesc &II, - unsigned Op, unsigned OpNum) { - if (TargetRegisterInfo::isVirtualRegister(Op)) { - const TargetRegisterClass *RegClass = - TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); - if (!MRI.constrainRegClass(Op, RegClass)) { - // If it's not legal to COPY between the register classes, something - // has gone very wrong before we got here. - unsigned NewOp = createResultReg(RegClass); - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), NewOp).addReg(Op)); - return NewOp; - } - } - return Op; -} - unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { @@ -769,7 +752,7 @@ bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { // Computes the address to get to an object. bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { // Some boilerplate from the X86 FastISel. - const User *U = NULL; + const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast(Obj)) { // Don't walk into other basic blocks unless the object is an alloca from @@ -1400,7 +1383,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, const APInt &CIVal = ConstInt->getValue(); Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue(); // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather - // then a cmn, because there is no way to represent 2147483648 as a + // then a cmn, because there is no way to represent 2147483648 as a // signed 32-bit int. if (Imm < 0 && Imm != (int)0x80000000) { isNegativeImm = true; @@ -2182,7 +2165,8 @@ unsigned ARMFastISel::getLibcallReg(const Twine &Name) { if (!LCREVT.isSimple()) return 0; GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false, - GlobalValue::ExternalLinkage, 0, Name); + GlobalValue::ExternalLinkage, nullptr, + Name); assert(GV->getType() == GVTy && "We miscomputed the type for the global!"); return ARMMaterializeGV(GV, LCREVT.getSimpleVT()); } @@ -2286,7 +2270,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { } bool ARMFastISel::SelectCall(const Instruction *I, - const char *IntrMemName = 0) { + const char *IntrMemName = nullptr) { const CallInst *CI = cast(I); const Value *Callee = CI->getCalledValue(); @@ -3092,6 +3076,6 @@ namespace llvm { TM.Options.NoFramePointerElim = true; return new ARMFastISel(funcInfo, libInfo); } - return 0; + return nullptr; } } diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h index a30f4cd..e191a3c 100644 --- a/lib/Target/ARM/ARMFeatures.h +++ b/lib/Target/ARM/ARMFeatures.h @@ -1,4 +1,4 @@ -//===-- ARMFeatures.h - Checks for ARM instruction features ------*- C++ -*-===// +//===-- ARMFeatures.h - Checks for ARM instruction features -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,11 +16,11 @@ #include "MCTargetDesc/ARMMCTargetDesc.h" +namespace llvm { + template // could be MachineInstr or MCInst bool IsCPSRDead(InstrType *Instr); -namespace llvm { - template // could be MachineInstr or MCInst inline bool isV8EligibleForIT(InstrType *Instr) { switch (Instr->getOpcode()) { diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 36ecfca..0caf4bf 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -87,7 +87,7 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { static bool isCSRestore(MachineInstr *MI, const ARMBaseInstrInfo &TII, - const uint16_t *CSRegs) { + const MCPhysReg *CSRegs) { // Integer spill area is handled with "pop". if (isPopOpcode(MI->getOpcode())) { // The first two operands are predicates. The last two are @@ -142,6 +142,14 @@ static int sizeOfSPAdjustment(const MachineInstr *MI) { return count; } +static bool WindowsRequiresStackProbe(const MachineFunction &MF, + size_t StackSizeInBytes) { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + if (MFI->getStackProtectorIndex() > 0) + return StackSizeInBytes >= 4080; + return StackSizeInBytes >= 4096; +} + void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -149,15 +157,16 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { ARMFunctionInfo *AFI = MF.getInfo(); MachineModuleInfo &MMI = MF.getMMI(); MCContext &Context = MMI.getContext(); + const TargetMachine &TM = MF.getTarget(); const MCRegisterInfo *MRI = Context.getRegisterInfo(); const ARMBaseRegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); + static_cast(TM.getRegisterInfo()); const ARMBaseInstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); + *static_cast(TM.getInstrInfo()); assert(!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned Align = TM.getFrameLowering()->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); const std::vector &CSI = MFI->getCalleeSavedInfo(); @@ -187,7 +196,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { .addCFIIndex(CFIIndex); } - if (!AFI->hasStackFrame()) { + if (!AFI->hasStackFrame() && + (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) { if (NumBytes - ArgRegsSaveSize != 0) { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize), MachineInstr::FrameSetup); @@ -284,6 +294,51 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } else NumBytes = DPRCSOffset; + if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) { + uint32_t NumWords = NumBytes >> 2; + + if (NumWords < 65536) + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4) + .addImm(NumWords) + .setMIFlags(MachineInstr::FrameSetup)); + else + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4) + .addImm(NumWords) + .setMIFlags(MachineInstr::FrameSetup); + + switch (TM.getCodeModel()) { + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Default: + case CodeModel::Kernel: + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addExternalSymbol("__chkstk") + .addReg(ARM::R4, RegState::Implicit) + .setMIFlags(MachineInstr::FrameSetup); + break; + case CodeModel::Large: + case CodeModel::JITDefault: + BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12) + .addExternalSymbol("__chkstk") + .setMIFlags(MachineInstr::FrameSetup); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addReg(ARM::R12, RegState::Kill) + .addReg(ARM::R4, RegState::Implicit) + .setMIFlags(MachineInstr::FrameSetup); + break; + } + + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), + ARM::SP) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::R4, RegState::Kill) + .setMIFlags(MachineInstr::FrameSetup))); + NumBytes = 0; + } + unsigned adjustedGPRCS1Size = GPRCS1Size; if (NumBytes) { // Adjust SP after all the callee-save spills. @@ -316,10 +371,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock::iterator Pos = ++GPRCS1Push; BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); - for (std::vector::const_iterator I = CSI.begin(), - E = CSI.end(); I != E; ++I) { - unsigned Reg = I->getReg(); - int FI = I->getFrameIdx(); + for (const auto &Entry : CSI) { + unsigned Reg = Entry.getReg(); + int FI = Entry.getFrameIdx(); switch (Reg) { case ARM::R8: case ARM::R9: @@ -382,10 +436,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } - for (std::vector::const_iterator I = CSI.begin(), - E = CSI.end(); I != E; ++I) { - unsigned Reg = I->getReg(); - int FI = I->getFrameIdx(); + for (const auto &Entry : CSI) { + unsigned Reg = Entry.getReg(); + int FI = Entry.getFrameIdx(); switch (Reg) { case ARM::R8: case ARM::R9: @@ -411,7 +464,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { do { MachineBasicBlock::iterator Push = DPRCSPush++; if (!HasFP) { - CFAOffset -= sizeOfSPAdjustment(Push);; + CFAOffset -= sizeOfSPAdjustment(Push); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) @@ -419,10 +472,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } } while (DPRCSPush->getOpcode() == ARM::VSTMDDB_UPD); - for (std::vector::const_iterator I = CSI.begin(), - E = CSI.end(); I != E; ++I) { - unsigned Reg = I->getReg(); - int FI = I->getFrameIdx(); + for (const auto &Entry : CSI) { + unsigned Reg = Entry.getReg(); + int FI = Entry.getFrameIdx(); if ((Reg >= ARM::D0 && Reg <= ARM::D31) && (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) { unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); @@ -540,7 +592,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize); } else { // Unwind MBBI to point to first LDR / VLDRD. - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); if (MBBI != MBB.begin()) { do { --MBBI; @@ -1205,12 +1257,9 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII) { unsigned FnSize = 0; - for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end(); - MBBI != E; ++MBBI) { - const MachineBasicBlock &MBB = *MBBI; - for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end(); - I != E; ++I) - FnSize += TII.GetInstSizeInBytes(I); + for (auto &MBB : MF) { + for (auto &MI : MBB) + FnSize += TII.GetInstSizeInBytes(&MI); } return FnSize; } @@ -1223,21 +1272,21 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, const TargetFrameLowering *TFI) { const ARMFunctionInfo *AFI = MF.getInfo(); unsigned Limit = (1 << 12) - 1; - for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - if (!I->getOperand(i).isFI()) continue; + for (auto &MBB : MF) { + for (auto &MI : MBB) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + if (!MI.getOperand(i).isFI()) + continue; // When using ADDri to get the address of a stack object, 255 is the // largest offset guaranteed to fit in the immediate offset. - if (I->getOpcode() == ARM::ADDri) { + if (MI.getOpcode() == ARM::ADDri) { Limit = std::min(Limit, (1U << 8) - 1); break; } // Otherwise check the addressing mode. - switch (I->getDesc().TSFlags & ARMII::AddrModeMask) { + switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) { case ARMII::AddrMode3: case ARMII::AddrModeT2_i8: Limit = std::min(Limit, (1U << 8) - 1); @@ -1374,7 +1423,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; @@ -1486,6 +1535,10 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (hasFP(MF)) { MRI.setPhysRegUsed(FramePtr); + auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), + FramePtr); + if (FPPos != UnspilledCS1GPRs.end()) + UnspilledCS1GPRs.erase(FPPos); NumGPRSpills++; } @@ -1681,7 +1734,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { if (MF.getFunction()->isVarArg()) report_fatal_error("Segmented stacks do not support vararg functions."); if (!ST->isTargetAndroid() && !ST->isTargetLinux()) - report_fatal_error("Segmented stacks not supported on this platfrom."); + report_fatal_error("Segmented stacks not supported on this platform."); MachineBasicBlock &prologueMBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -1693,6 +1746,12 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { ARMFunctionInfo *ARMFI = MF.getInfo(); DebugLoc DL; + uint64_t StackSize = MFI->getStackSize(); + + // Do not generate a prologue for functions with a stack of size zero + if (StackSize == 0) + return; + // Use R4 and R5 as scratch registers. // We save R4 and R5 before use and restore them before leaving the function. unsigned ScratchReg0 = ARM::R4; @@ -1722,8 +1781,6 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MF.push_front(PrevStackMBB); // The required stack size that is aligned to ARM constant criterion. - uint64_t StackSize = MFI->getStackSize(); - AlignedStackSize = alignToARMConstant(StackSize); // When the frame size is less than 256 we just compare the stack diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 524ee36..981d320 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -57,7 +57,7 @@ public: void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS) const override; - void adjustForSegmentedStacks(MachineFunction &MF) const; + void adjustForSegmentedStacks(MachineFunction &MF) const override; private: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index 61d4e12..0885c4e 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -77,7 +77,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { } void ARMHazardRecognizer::Reset() { - LastMI = 0; + LastMI = nullptr; FpMLxStalls = 0; ScoreboardHazardRecognizer::Reset(); } @@ -95,7 +95,7 @@ void ARMHazardRecognizer::EmitInstruction(SUnit *SU) { void ARMHazardRecognizer::AdvanceCycle() { if (FpMLxStalls && --FpMLxStalls == 0) // Stalled for 4 cycles but still can't schedule any other instructions. - LastMI = 0; + LastMI = nullptr; ScoreboardHazardRecognizer::AdvanceCycle(); } diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h index e88cd0d..a8198e2 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.h +++ b/lib/Target/ARM/ARMHazardRecognizer.h @@ -35,7 +35,7 @@ public: ARMHazardRecognizer(const InstrItineraryData *ItinData, const ScheduleDAG *DAG) : ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), - LastMI(0) {} + LastMI(nullptr) {} HazardType getHazardType(SUnit *SU, int Stalls) override; void Reset() override; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 70e11c5..08d598d 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-isel" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMTargetMachine.h" @@ -37,6 +36,8 @@ using namespace llvm; +#define DEBUG_TYPE "arm-isel" + static cl::opt DisableShifterOp("disable-shifter-op", cl::Hidden, cl::desc("Disable isel of shifter-op"), @@ -72,6 +73,13 @@ public: Subtarget(&TM.getSubtarget()) { } + bool runOnMachineFunction(MachineFunction &MF) override { + // Reset the subtarget each time through. + Subtarget = &TM.getSubtarget(); + SelectionDAGISel::runOnMachineFunction(MF); + return true; + } + const char *getPassName() const override { return "ARM Instruction Selection"; } @@ -397,7 +405,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() { N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, N1, CurDAG->getConstant(TZ, MVT::i32)); CurDAG->UpdateNodeOperands(N, N0, N1); - } + } } /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS @@ -1440,7 +1448,7 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { LoadSDNode *LD = cast(N); ISD::MemIndexedMode AM = LD->getAddressingMode(); if (AM == ISD::UNINDEXED) - return NULL; + return nullptr; EVT LoadedVT = LD->getMemoryVT(); SDValue Offset, AMOpc; @@ -1506,14 +1514,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { } } - return NULL; + return nullptr; } SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { LoadSDNode *LD = cast(N); ISD::MemIndexedMode AM = LD->getAddressingMode(); if (AM == ISD::UNINDEXED) - return NULL; + return nullptr; EVT LoadedVT = LD->getMemoryVT(); bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; @@ -1540,7 +1548,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; break; default: - return NULL; + return nullptr; } Match = true; } @@ -1554,7 +1562,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { MVT::Other, Ops); } - return NULL; + return nullptr; } /// \brief Form a GPRPair pseudo register from a pair of GPR regs. @@ -1699,10 +1707,10 @@ static bool isVSTfixed(unsigned Opc) case ARM::VST1d16wb_fixed : return true; case ARM::VST1d32wb_fixed : return true; case ARM::VST1d64wb_fixed : return true; - case ARM::VST1q8wb_fixed : return true; - case ARM::VST1q16wb_fixed : return true; - case ARM::VST1q32wb_fixed : return true; - case ARM::VST1q64wb_fixed : return true; + case ARM::VST1q8wb_fixed : return true; + case ARM::VST1q16wb_fixed : return true; + case ARM::VST1q32wb_fixed : return true; + case ARM::VST1q64wb_fixed : return true; case ARM::VST1d64TPseudoWB_fixed : return true; case ARM::VST1d64QPseudoWB_fixed : return true; case ARM::VST2d8wb_fixed : return true; @@ -1776,7 +1784,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue MemAddr, Align; unsigned AddrOpIdx = isUpdating ? 1 : 2; if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) - return NULL; + return nullptr; SDValue Chain = N->getOperand(0); EVT VT = N->getValueType(0); @@ -1895,7 +1903,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); if (isUpdating) ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); - return NULL; + return nullptr; } SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, @@ -1909,7 +1917,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, unsigned AddrOpIdx = isUpdating ? 1 : 2; unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) - return NULL; + return nullptr; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast(N)->getMemOperand(); @@ -2055,7 +2063,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned AddrOpIdx = isUpdating ? 1 : 2; unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) - return NULL; + return nullptr; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast(N)->getMemOperand(); @@ -2160,7 +2168,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); if (isUpdating) ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); - return NULL; + return nullptr; } SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, @@ -2171,7 +2179,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, SDValue MemAddr, Align; if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) - return NULL; + return nullptr; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast(N)->getMemOperand(); @@ -2243,7 +2251,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); if (isUpdating) ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); - return NULL; + return nullptr; } SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, @@ -2282,7 +2290,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { if (!Subtarget->hasV6T2Ops()) - return NULL; + return nullptr; unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) @@ -2295,7 +2303,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, // The immediate is a mask of the low bits iff imm & (imm+1) == 0 if (And_imm & (And_imm + 1)) - return NULL; + return nullptr; unsigned Srl_imm = 0; if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, @@ -2315,7 +2323,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, MVT::i32), getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); } // ARM models shift instructions as MOVsi with shifter operand. @@ -2325,17 +2333,17 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, MVT::i32); SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5); + return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); } SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, MVT::i32), CurDAG->getTargetConstant(Width, MVT::i32), - getAL(CurDAG), Reg0 }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + getAL(CurDAG), Reg0 }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); } } - return NULL; + return nullptr; } // Otherwise, we're looking for a shift of a shift @@ -2349,16 +2357,16 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, unsigned Width = 32 - Srl_imm - 1; int LSB = Srl_imm - Shl_imm; if (LSB < 0) - return NULL; + return nullptr; SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { N->getOperand(0).getOperand(0), CurDAG->getTargetConstant(LSB, MVT::i32), CurDAG->getTargetConstant(Width, MVT::i32), getAL(CurDAG), Reg0 }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); } } - return NULL; + return nullptr; } /// Target-specific DAG combining for ISD::XOR. @@ -2377,10 +2385,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ EVT VT = N->getValueType(0); if (Subtarget->isThumb1Only()) - return NULL; + return nullptr; if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) - return NULL; + return nullptr; SDValue ADDSrc0 = XORSrc0.getOperand(0); SDValue ADDSrc1 = XORSrc0.getOperand(1); @@ -2391,13 +2399,13 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ unsigned Size = XType.getSizeInBits() - 1; if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && - XType.isInteger() && SRAConstant != NULL && + XType.isInteger() && SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) { unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); } - return NULL; + return nullptr; } SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { @@ -2414,7 +2422,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { if (N->isMachineOpcode()) { N->setNodeId(-1); - return NULL; // Already selected. + return nullptr; // Already selected. } switch (N->getOpcode()) { @@ -2478,7 +2486,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { Ops); } ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0)); - return NULL; + return nullptr; } // Other cases are autogenerated. @@ -2492,14 +2500,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { if (Subtarget->isThumb1Only()) { SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops); } else { unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? ARM::t2ADDri : ARM::ADDri); SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); } } case ISD::SRL: @@ -2526,10 +2534,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (Subtarget->isThumb()) { SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6); + return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); } else { SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops, 7); + return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); } } if (isPowerOf2_32(RHSV+1)) { // 2^n-1? @@ -2542,10 +2550,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (Subtarget->isThumb()) { SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6); + return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); } else { SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops, 7); + return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); } } } @@ -2660,7 +2668,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } } case ISD::LOAD: { - SDNode *ResNode = 0; + SDNode *ResNode = nullptr; if (Subtarget->isThumb() && Subtarget->hasThumb2()) ResNode = SelectT2IndexedLoad(N); else @@ -2707,13 +2715,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } ReplaceUses(SDValue(N, 0), SDValue(Chain.getNode(), Chain.getResNo())); - return NULL; + return nullptr; } case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); switch (VT.getSimpleVT().SimpleTy) { - default: return NULL; + default: return nullptr; case MVT::v8i8: Opc = ARM::VZIPd8; break; case MVT::v4i16: Opc = ARM::VZIPd16; break; case MVT::v2f32: @@ -2733,7 +2741,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { unsigned Opc = 0; EVT VT = N->getValueType(0); switch (VT.getSimpleVT().SimpleTy) { - default: return NULL; + default: return nullptr; case MVT::v8i8: Opc = ARM::VUZPd8; break; case MVT::v4i16: Opc = ARM::VUZPd16; break; case MVT::v2f32: @@ -2753,7 +2761,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { unsigned Opc = 0; EVT VT = N->getValueType(0); switch (VT.getSimpleVT().SimpleTy) { - default: return NULL; + default: return nullptr; case MVT::v8i8: Opc = ARM::VTRNd8; break; case MVT::v4i16: Opc = ARM::VTRNd16; break; case MVT::v2f32: @@ -2834,7 +2842,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed, ARM::VLD1q64wb_fixed }; - return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0); + return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); } case ARMISD::VLD2_UPD: { @@ -2845,7 +2853,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q32PseudoWB_fixed }; - return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0); + return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); } case ARMISD::VLD3_UPD: { @@ -2912,7 +2920,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST1q16wb_fixed, ARM::VST1q32wb_fixed, ARM::VST1q64wb_fixed }; - return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0); + return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); } case ARMISD::VST2_UPD: { @@ -2923,7 +2931,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed, ARM::VST2q16PseudoWB_fixed, ARM::VST2q32PseudoWB_fixed }; - return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0); + return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); } case ARMISD::VST3_UPD: { @@ -3047,7 +3055,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ReplaceUses(SDValue(N, 1), Result); } ReplaceUses(SDValue(N, 2), OutChain); - return NULL; + return nullptr; } case Intrinsic::arm_stlexd: case Intrinsic::arm_strexd: { @@ -3093,7 +3101,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD1d32, ARM::VLD1d64 }; static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, ARM::VLD1q32, ARM::VLD1q64}; - return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0); + return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); } case Intrinsic::arm_neon_vld2: { @@ -3101,7 +3109,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VLD2d32, ARM::VLD1q64 }; static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, ARM::VLD2q32Pseudo }; - return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0); + return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); } case Intrinsic::arm_neon_vld3: { @@ -3164,7 +3172,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST1d32, ARM::VST1d64 }; static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, ARM::VST1q32, ARM::VST1q64 }; - return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0); + return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); } case Intrinsic::arm_neon_vst2: { @@ -3172,7 +3180,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { ARM::VST2d32, ARM::VST1q64 }; static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, ARM::VST2q32Pseudo }; - return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0); + return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); } case Intrinsic::arm_neon_vst3: { @@ -3306,7 +3314,8 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ // them into a GPRPair. SDLoc dl(N); - SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) : SDValue(0,0); + SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) + : SDValue(nullptr,0); SmallVector OpChanged; // Glue node will be appended late. @@ -3388,7 +3397,7 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ // Update the original glue user. std::vector Ops(GU->op_begin(), GU->op_end()-1); Ops.push_back(T1.getValue(1)); - CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(GU, Ops); GU = T1.getNode(); } else { @@ -3435,11 +3444,10 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ if (Glue.getNode()) AsmNodeOperands.push_back(Glue); if (!Changed) - return NULL; + return nullptr; SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), - CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], - AsmNodeOperands.size()); + CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); New->setNodeId(-1); return New.getNode(); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 2ebad8e..00d07e8 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-isel" #include "ARMISelLowering.h" #include "ARMCallingConv.h" #include "ARMConstantPoolValue.h" @@ -37,18 +36,22 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Type.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" #include using namespace llvm; +#define DEBUG_TYPE "arm-isel" + STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); @@ -79,7 +82,7 @@ namespace { } // The APCS parameter registers. -static const uint16_t GPRArgRegs[] = { +static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; @@ -155,7 +158,8 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) { static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { if (TM.getSubtarget().isTargetMachO()) return new TargetLoweringObjectFileMachO(); - + if (TM.getSubtarget().isTargetWindows()) + return new TargetLoweringObjectFileCOFF(); return new ARMElfTargetObjectFile(); } @@ -170,7 +174,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (Subtarget->isTargetMachO()) { // Uses VFP for Thumb libfuncs if available. if (Subtarget->isThumb() && Subtarget->hasVFP2() && - Subtarget->hasARMOps()) { + Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) { // Single-precision floating-point arithmetic. setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); @@ -246,173 +250,134 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // These libcalls are not available in 32-bit. - setLibcallName(RTLIB::SHL_I128, 0); - setLibcallName(RTLIB::SRL_I128, 0); - setLibcallName(RTLIB::SRA_I128, 0); + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() && !Subtarget->isTargetWindows()) { - // Double-precision floating-point arithmetic helper functions - // RTABI chapter 4.1.2, Table 2 - setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); - setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); - setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); - setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); - setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); - - // Double-precision floating-point comparison helper functions - // RTABI chapter 4.1.2, Table 3 - setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); - setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); - setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); - setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); - setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); - setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); - setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); - setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); - setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); - setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); - setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); - setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); - setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); - setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); - setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); - setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); - setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); - - // Single-precision floating-point arithmetic helper functions - // RTABI chapter 4.1.2, Table 4 - setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); - setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); - setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); - setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); - setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); - - // Single-precision floating-point comparison helper functions - // RTABI chapter 4.1.2, Table 5 - setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); - setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); - setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); - setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); - setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); - setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); - setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); - setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); - setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); - setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); - setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); - setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); - setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); - setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); - setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); - setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); - setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); - - // Floating-point to integer conversions. - // RTABI chapter 4.1.2, Table 6 - setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); - setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); - setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); - setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); - setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); - setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); - setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); - setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); - setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); - - // Conversions between floating types. - // RTABI chapter 4.1.2, Table 7 - setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); - setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); - setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); - - // Integer to floating-point conversions. - // RTABI chapter 4.1.2, Table 8 - setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); - setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); - setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); - setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); - setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); - setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); - setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); - setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); - setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); - - // Long long helper functions - // RTABI chapter 4.2, Table 9 - setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); - setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); - setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); - setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); - setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); - - // Integer division functions - // RTABI chapter 4.3.1 - setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); - setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); - setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); - setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); - setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); - - // Memory operations - // RTABI chapter 4.3.4 - setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); - setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); - setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); - setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); - setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); + static const struct { + const RTLIB::Libcall Op; + const char * const Name; + const CallingConv::ID CC; + const ISD::CondCode Cond; + } LibraryCalls[] = { + // Double-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 2 + { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Double-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 3 + { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, + { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, + + // Single-precision floating-point arithmetic helper functions + // RTABI chapter 4.1.2, Table 4 + { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Single-precision floating-point comparison helper functions + // RTABI chapter 4.1.2, Table 5 + { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ }, + { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE }, + { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ }, + + // Floating-point to integer conversions. + // RTABI chapter 4.1.2, Table 6 + { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Conversions between floating types. + // RTABI chapter 4.1.2, Table 7 + { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Integer to floating-point conversions. + // RTABI chapter 4.1.2, Table 8 + { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Long long helper functions + // RTABI chapter 4.2, Table 9 + { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Integer division functions + // RTABI chapter 4.3.1 + { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + + // Memory operations + // RTABI chapter 4.3.4 + { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + if (LC.Cond != ISD::SETCC_INVALID) + setCmpLibcallCC(LC.Op, LC.Cond); + } + } + + if (Subtarget->isTargetWindows()) { + static const struct { + const RTLIB::Libcall Op; + const char * const Name; + const CallingConv::ID CC; + } LibraryCalls[] = { + { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP }, + { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP }, + }; + + for (const auto &LC : LibraryCalls) { + setLibcallName(LC.Op, LC.Name); + setLibcallCallingConv(LC.Op, LC.CC); + } } // Use divmod compiler-rt calls for iOS 5.0 and later. @@ -444,6 +409,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); + + setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + + setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); } setOperationAction(ISD::ConstantFP, MVT::f32, Custom); @@ -631,6 +603,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } } + setOperationAction(ISD::SADDO, MVT::i32, Custom); + setOperationAction(ISD::UADDO, MVT::i32, Custom); + setOperationAction(ISD::SSUBO, MVT::i32, Custom); + setOperationAction(ISD::USUBO, MVT::i32, Custom); + // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); @@ -850,7 +827,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); } } - + // Combine sin / cos into one node or libcall if possible. if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); @@ -913,7 +890,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // and extractions. std::pair ARMTargetLowering::findRepresentativeClass(MVT VT) const{ - const TargetRegisterClass *RRC = 0; + const TargetRegisterClass *RRC = nullptr; uint8_t Cost = 1; switch (VT.SimpleTy) { default: @@ -950,7 +927,7 @@ ARMTargetLowering::findRepresentativeClass(MVT VT) const{ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; + default: return nullptr; case ARMISD::Wrapper: return "ARMISD::Wrapper"; case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; @@ -1204,40 +1181,58 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, #include "ARMGenCallingConv.inc" -/// CCAssignFnForNode - Selects the correct CCAssignFn for a the -/// given CallingConvention value. -CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, - bool Return, - bool isVarArg) const { +/// getEffectiveCallingConv - Get the effective calling convention, taking into +/// account presence of floating point hardware and calling convention +/// limitations, such as support for variadic functions. +CallingConv::ID +ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, + bool isVarArg) const { switch (CC) { default: llvm_unreachable("Unsupported calling convention"); - case CallingConv::Fast: - if (Subtarget->hasVFP2() && !isVarArg) { - if (!Subtarget->isAAPCS_ABI()) - return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); - // For AAPCS ABI targets, just use VFP variant of the calling convention. - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - } - // Fallthrough - case CallingConv::C: { - // Use target triple & subtarget features to do actual dispatch. + case CallingConv::ARM_AAPCS: + case CallingConv::ARM_APCS: + case CallingConv::GHC: + return CC; + case CallingConv::ARM_AAPCS_VFP: + return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP; + case CallingConv::C: if (!Subtarget->isAAPCS_ABI()) - return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); + return CallingConv::ARM_APCS; else if (Subtarget->hasVFP2() && getTargetMachine().Options.FloatABIType == FloatABI::Hard && !isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + return CallingConv::ARM_AAPCS_VFP; + else + return CallingConv::ARM_AAPCS; + case CallingConv::Fast: + if (!Subtarget->isAAPCS_ABI()) { + if (Subtarget->hasVFP2() && !isVarArg) + return CallingConv::Fast; + return CallingConv::ARM_APCS; + } else if (Subtarget->hasVFP2() && !isVarArg) + return CallingConv::ARM_AAPCS_VFP; + else + return CallingConv::ARM_AAPCS; } - case CallingConv::ARM_AAPCS_VFP: - if (!isVarArg) - return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); - // Fallthrough - case CallingConv::ARM_AAPCS: - return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); +} + +/// CCAssignFnForNode - Selects the correct CCAssignFn for the given +/// CallingConvention. +CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, + bool Return, + bool isVarArg) const { + switch (getEffectiveCallingConv(CC, isVarArg)) { + default: + llvm_unreachable("Unsupported calling convention"); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); + case CallingConv::ARM_AAPCS: + return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); + case CallingConv::ARM_AAPCS_VFP: + return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); + case CallingConv::Fast: + return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); case CallingConv::GHC: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); } @@ -1286,6 +1281,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); if (VA.getLocVT() == MVT::v2f64) { @@ -1301,6 +1298,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, DAG.getConstant(1, MVT::i32)); @@ -1351,16 +1350,17 @@ void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); - RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); + unsigned id = Subtarget->isLittle() ? 0 : 1; + RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id))); if (NextVA.isRegLoc()) - RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); + RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id))); else { assert(NextVA.isMemLoc()); - if (StackPtr.getNode() == 0) + if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); - MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), + MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id), dl, DAG, NextVA, Flags)); } @@ -1398,6 +1398,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG); + if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) + report_fatal_error("failed to perform tail call elimination on a call " + "site marked musttail"); // We don't support GuaranteedTailCallOpt for ARM, only automatically // detected sibcalls. if (isTailCall) { @@ -1542,7 +1545,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, - Ops, array_lengthof(Ops))); + Ops)); } } else if (!isSibCall) { assert(VA.isMemLoc()); @@ -1553,8 +1556,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. @@ -1741,10 +1743,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (isTailCall) - return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), @@ -2049,8 +2051,7 @@ static SDValue LowerInterruptReturn(SmallVectorImpl &RetOps, RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false)); - return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, - RetOps.data(), RetOps.size()); + return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps); } SDValue @@ -2074,6 +2075,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, SDValue Flag; SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) + bool isLittleEndian = Subtarget->isLittle(); // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; @@ -2100,12 +2102,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain, SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Half); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + HalfGPRs.getValue(isLittleEndian ? 0 : 1), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - HalfGPRs.getValue(1), Flag); + HalfGPRs.getValue(isLittleEndian ? 1 : 0), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc @@ -2117,12 +2122,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain, // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is // available. SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); + DAG.getVTList(MVT::i32, MVT::i32), Arg); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + fmrrd.getValue(isLittleEndian ? 0 : 1), + Flag); Flag = Chain.getValue(1); RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); VA = RVLocs[++i]; // skip ahead to next loc - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + fmrrd.getValue(isLittleEndian ? 1 : 0), Flag); } else Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); @@ -2151,8 +2159,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, return LowerInterruptReturn(RetOps, dl, DAG); } - return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, - RetOps.data(), RetOps.size()); + return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps); } bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { @@ -2314,13 +2321,13 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Entry.Node = Argument; Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); Args.push_back(Entry); + // FIXME: is there useful debug info available here? - TargetLowering::CallLoweringInfo CLI(Chain, - (Type *) Type::getInt32Ty(*DAG.getContext()), - false, false, false, false, - 0, CallingConv::C, /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()), + DAG.getExternalSymbol("__tls_get_addr", PtrVT), &Args, 0); + std::pair CallResult = LowerCallTo(CLI); return CallResult.first; } @@ -2466,6 +2473,23 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, return Result; } +SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported"); + assert(Subtarget->useMovt() && "Windows on ARM expects to use movw/movt"); + + const GlobalValue *GV = cast(Op)->getGlobal(); + EVT PtrVT = getPointerTy(); + SDLoc DL(Op); + + ++NumMovwMovt; + + // FIXME: Once remat is capable of dealing with instructions with register + // operands, expand this into two nodes. + return DAG.getNode(ARMISD::Wrapper, DL, PtrVT, + DAG.getTargetGlobalAddress(GV, DL, PtrVT)); +} + SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetELF() && @@ -2654,7 +2678,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } - + if (!Subtarget->isLittle()) + std::swap (ArgValue, ArgValue2); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } @@ -2803,8 +2828,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return FrameIndex; } else { if (ArgSize == 0) { @@ -2834,8 +2858,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, // If there is no regs to be stored, just point address after last // argument passed via stack. int FrameIndex = - StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), - 0, ArgOffset, 0, ForceMutable, 0, TotalArgRegsSaveSize); + StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr, + CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable, + 0, TotalArgRegsSaveSize); AFI->setVarArgsFrameIndex(FrameIndex); } @@ -3166,11 +3191,96 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); } +std::pair +ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, + SDValue &ARMcc) const { + assert(Op.getValueType() == MVT::i32 && "Unsupported value type"); + + SDValue Value, OverflowCmp; + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + + // FIXME: We are currently always generating CMPs because we don't support + // generating CMN through the backend. This is not as good as the natural + // CMP case because it causes a register dependency and cannot be folded + // later. + + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown overflow instruction!"); + case ISD::SADDO: + ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32); + Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS); + break; + case ISD::UADDO: + ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32); + Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS); + break; + case ISD::SSUBO: + ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32); + Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS); + break; + case ISD::USUBO: + ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32); + Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS); + break; + } // switch (...) + + return std::make_pair(Value, OverflowCmp); +} + + +SDValue +ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) + return SDValue(); + + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + // We use 0 and 1 as false and true values. + SDValue TVal = DAG.getConstant(1, MVT::i32); + SDValue FVal = DAG.getConstant(0, MVT::i32); + EVT VT = Op.getValueType(); + + SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal, + ARMcc, CCR, OverflowCmp); + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow); +} + + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); SDValue SelectFalse = Op.getOperand(2); SDLoc dl(Op); + unsigned Opc = Cond.getOpcode(); + + if (Cond.getResNo() == 1 && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO)) { + if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) + return SDValue(); + + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + EVT VT = Op.getValueType(); + + return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse, + ARMcc, CCR, OverflowCmp); + + } // Convert: // @@ -3472,7 +3582,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { ARMcc = DAG.getConstant(CondCode, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; - return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); + return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); } return SDValue(); @@ -3512,11 +3622,11 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; - SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); if (CondCode2 != ARMCC::AL) { ARMcc = DAG.getConstant(CondCode2, MVT::i32); SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; - Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); + Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); } return Res; } @@ -3713,7 +3823,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Bitcast operand 1 to i32. if (SrcVT == MVT::f64) Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp1, 1).getValue(1); + Tmp1).getValue(1); Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); // Or in the signbit with integer operations. @@ -3729,7 +3839,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // f64: Or the high part with signbit and then combine two parts. Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp0, 1); + Tmp0); SDValue Lo = Tmp0.getValue(0); SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); @@ -3761,14 +3871,16 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ } SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + const ARMBaseRegisterInfo &ARI = + *static_cast(RegInfo); + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetMachO()) - ? ARM::R7 : ARM::R11; + unsigned FrameReg = ARI.getFrameRegister(MF); SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, @@ -3777,6 +3889,18 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned ARMTargetLowering::getRegisterByName(const char* RegName, + EVT VT) const { + unsigned Reg = StringSwitch(RegName) + .Case("sp", ARM::SP) + .Default(0); + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -3806,8 +3930,15 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { // Turn f64->i64 into VMOVRRD. if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { - SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, - DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); + SDValue Cvt; + if (TLI.isBigEndian() && SrcVT.isVector() && + SrcVT.getVectorNumElements() > 1) + Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), + DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op)); + else + Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), Op); // Merge the pieces into a single i64 value. return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); } @@ -3863,7 +3994,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two @@ -3897,7 +4028,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, CCR, Cmp); SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, @@ -4102,7 +4233,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and // captures the result into a carry flag. unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; - Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); + Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi); // The low part is an ARMISD::RRX operand, which shifts the carry in. Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); @@ -4859,7 +4990,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Ops.push_back(N); Ops.push_back(Op.getOperand(I)); Ops.push_back(DAG.getConstant(I, MVT::i32)); - N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3); + N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops); } } return N; @@ -4870,7 +5001,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(i))); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); - SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); + SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); Val = LowerBUILD_VECTOR(Val, DAG, ST); if (Val.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, Val); @@ -4906,7 +5037,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); - SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } @@ -5213,12 +5344,10 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, if (V2.getNode()->getOpcode() == ISD::UNDEF) return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, - &VTBLMask[0], 8)); + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, - DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, - &VTBLMask[0], 8)); + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask)); } static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, @@ -5371,7 +5500,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(ShuffleMask[i] & (NumElts-1), MVT::i32))); } - SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, Val); } @@ -5608,7 +5737,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), - MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); + MVT::getVectorVT(TruncVT, NumElts), Ops); } static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { @@ -5946,12 +6075,12 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { ? "__sincos_stret" : "__sincosf_stret"; SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); - TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, - CallingConv::C, /*isTaillCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed*/false, - Callee, Args, DAG, dl); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee, + &Args, 0) + .setDiscardResult(); + std::pair CallResult = LowerCallTo(CLI); SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, @@ -5998,8 +6127,7 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, }; Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, - DAG.getVTList(MVT::i32, MVT::Other), &Ops[0], - array_lengthof(Ops)); + DAG.getVTList(MVT::i32, MVT::Other), Ops); OutChain = Cycles32.getValue(1); } else { // Intrinsic is defined to return 0 on unsupported platforms. Technically @@ -6022,8 +6150,15 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalAddress: - return Subtarget->isTargetMachO() ? LowerGlobalAddressDarwin(Op, DAG) : - LowerGlobalAddressELF(Op, DAG); + switch (Subtarget->getTargetTriple().getObjectFormat()) { + default: llvm_unreachable("unknown object format"); + case Triple::COFF: + return LowerGlobalAddressWindows(Op, DAG); + case Triple::ELF: + return LowerGlobalAddressELF(Op, DAG); + case Triple::MachO: + return LowerGlobalAddressDarwin(Op, DAG); + } case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); @@ -6068,6 +6203,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + return LowerXALUO(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); @@ -6558,7 +6698,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { } // N.B. the order the invoke BBs are processed in doesn't matter here. - const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF); + const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF); SmallVector MBBLPads; for (SmallPtrSet::iterator I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { @@ -6755,8 +6895,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned UnitSize = 0; - const TargetRegisterClass *TRC = 0; - const TargetRegisterClass *VecTRC = 0; + const TargetRegisterClass *TRC = nullptr; + const TargetRegisterClass *VecTRC = nullptr; bool IsThumb1 = Subtarget->isThumb1Only(); bool IsThumb2 = Subtarget->isThumb2(); @@ -6790,7 +6930,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, ? (const TargetRegisterClass *)&ARM::DPairRegClass : UnitSize == 8 ? (const TargetRegisterClass *)&ARM::DPRRegClass - : 0; + : nullptr; unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; @@ -7520,8 +7660,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, llvm_unreachable("Invalid vector element type for padd optimization."); } - SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), - widenType, &Ops[0], Ops.size()); + SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, Ops); unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE; return DAG.getNode(ExtOp, SDLoc(N), VT, tmp); } @@ -7581,7 +7720,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, // Look for the glued ADDE. SDNode* AddeNode = AddcNode->getGluedUser(); - if (AddeNode == NULL) + if (!AddeNode) return SDValue(); // Make sure it is really an ADDE. @@ -7616,9 +7755,9 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, // Figure out the high and low input values to the MLAL node. SDValue* HiMul = &MULOp; - SDValue* HiAdd = NULL; - SDValue* LoMul = NULL; - SDValue* LowAdd = NULL; + SDValue* HiAdd = nullptr; + SDValue* LoMul = nullptr; + SDValue* LowAdd = nullptr; if (IsLeftOperandMUL) HiAdd = &AddeOp1; @@ -7635,7 +7774,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, LowAdd = &AddcOp0; } - if (LoMul == NULL) + if (!LoMul) return SDValue(); if (LoMul->getNode() != HiMul->getNode()) @@ -7652,8 +7791,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, Ops.push_back(*HiAdd); SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), - DAG.getVTList(MVT::i32, MVT::i32), - &Ops[0], Ops.size()); + DAG.getVTList(MVT::i32, MVT::i32), Ops); // Replace the ADDs' nodes uses by the MLA node's values. SDValue HiMLALResult(MLALNode.getNode(), 1); @@ -8290,8 +8428,7 @@ static SDValue PerformSTORECombine(SDNode *N, Increment); Chains.push_back(Ch); } - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], - Chains.size()); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } if (!ISD::isNormalStore(St)) @@ -8302,16 +8439,18 @@ static SDValue PerformSTORECombine(SDNode *N, if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; + bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian(); SDLoc DL(St); SDValue BasePtr = St->getBasePtr(); SDValue NewST1 = DAG.getStore(St->getChain(), DL, - StVal.getNode()->getOperand(0), BasePtr, - St->getPointerInfo(), St->isVolatile(), + StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ), + BasePtr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), St->getAlignment()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, MVT::i32)); - return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), + return DAG.getStore(NewST1.getValue(0), DL, + StVal.getNode()->getOperand(isBigEndian ? 0 : 1), OffsetPtr, St->getPointerInfo(), St->isVolatile(), St->isNonTemporal(), std::min(4U, St->getAlignment() / 2)); @@ -8387,7 +8526,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, DCI.AddToWorklist(V.getNode()); } EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops); return DAG.getNode(ISD::BITCAST, dl, VT, BV); } @@ -8470,7 +8609,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // Fold obvious case. V = V.getOperand(0); else { - V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); + V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(V.getNode()); } @@ -8666,7 +8805,7 @@ static SDValue CombineBaseUpdate(SDNode *N, Tys[n] = VecTy; Tys[n++] = MVT::i32; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2); + SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs+2)); SmallVector Ops; Ops.push_back(N->getOperand(0)); // incoming chain Ops.push_back(N->getOperand(AddrOpIdx)); @@ -8676,8 +8815,7 @@ static SDValue CombineBaseUpdate(SDNode *N, } MemIntrinsicSDNode *MemInt = cast(N); SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, - Ops.data(), Ops.size(), - MemInt->getMemoryVT(), + Ops, MemInt->getMemoryVT(), MemInt->getMemOperand()); // Update the uses. @@ -8746,11 +8884,11 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { for (n = 0; n < NumVecs; ++n) Tys[n] = VT; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); + SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs+1)); SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; MemIntrinsicSDNode *VLDMemInt = cast(VLD); SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, - Ops, 2, VLDMemInt->getMemoryVT(), + Ops, VLDMemInt->getMemoryVT(), VLDMemInt->getMemOperand()); // Update the uses. @@ -9348,7 +9486,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { if (Res.getNode()) { APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne); + DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne); // Capture demanded bits information that would be otherwise lost. if (KnownZero == 0xfffffffe) Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, @@ -9935,11 +10073,11 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, return true; } -void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const { +void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { unsigned BitWidth = KnownOne.getBitWidth(); KnownZero = KnownOne = APInt(BitWidth, 0); switch (Op.getOpcode()) { @@ -9955,11 +10093,11 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. - DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); if (KnownZero == 0 && KnownOne == 0) return; APInt KnownZeroRHS, KnownOneRHS; - DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); + DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); KnownZero &= KnownZeroRHS; KnownOne &= KnownOneRHS; return; @@ -10053,7 +10191,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight( Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. @@ -10132,7 +10270,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { - SDValue Result(0, 0); + SDValue Result; // Currently only support length 1 constraints. if (Constraint.length() != 1) return; @@ -10331,13 +10469,12 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL); SDLoc dl(Op); - TargetLowering:: - CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true, - 0, getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); - std::pair CallInfo = LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(InChain) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); + std::pair CallInfo = LowerCallTo(CLI); return CallInfo.first; } @@ -10494,3 +10631,160 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return false; return true; } + +bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { + // Loads and stores less than 64-bits are already atomic; ones above that + // are doomed anyway, so defer to the default libcall and blame the OS when + // things go wrong: + if (StoreInst *SI = dyn_cast(Inst)) + return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64; + else if (LoadInst *LI = dyn_cast(Inst)) + return LI->getType()->getPrimitiveSizeInBits() == 64; + + // For the real atomic operations, we have ldrex/strex up to 64 bits. + return Inst->getType()->getPrimitiveSizeInBits() <= 64; +} + +Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Type *ValTy = cast(Addr->getType())->getElementType(); + bool IsAcquire = + Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd + // intrinsic must return {i32, i32} and we have to recombine them into a + // single i64 here. + if (ValTy->getPrimitiveSizeInBits() == 64) { + Intrinsic::ID Int = + IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; + Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int); + + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi"); + + Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); + Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); + Lo = Builder.CreateZExt(Lo, ValTy, "lo64"); + Hi = Builder.CreateZExt(Hi, ValTy, "hi64"); + return Builder.CreateOr( + Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64"); + } + + Type *Tys[] = { Addr->getType() }; + Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; + Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateTruncOrBitCast( + Builder.CreateCall(Ldrex, Addr), + cast(Addr->getType())->getElementType()); +} + +Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, + Value *Addr, + AtomicOrdering Ord) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + bool IsRelease = + Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent; + + // Since the intrinsics must have legal type, the i64 intrinsics take two + // parameters: "i32, i32". We must marshal Val into the appropriate form + // before the call. + if (Val->getType()->getPrimitiveSizeInBits() == 64) { + Intrinsic::ID Int = + IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd; + Function *Strex = Intrinsic::getDeclaration(M, Int); + Type *Int32Ty = Type::getInt32Ty(M->getContext()); + + Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); + Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); + if (!Subtarget->isLittle()) + std::swap (Lo, Hi); + Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); + return Builder.CreateCall3(Strex, Lo, Hi, Addr); + } + + Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex; + Type *Tys[] = { Addr->getType() }; + Function *Strex = Intrinsic::getDeclaration(M, Int, Tys); + + return Builder.CreateCall2( + Strex, Builder.CreateZExtOrBitCast( + Val, Strex->getFunctionType()->getParamType(0)), + Addr); +} + +enum HABaseType { + HA_UNKNOWN = 0, + HA_FLOAT, + HA_DOUBLE, + HA_VECT64, + HA_VECT128 +}; + +static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, + uint64_t &Members) { + if (const StructType *ST = dyn_cast(Ty)) { + for (unsigned i = 0; i < ST->getNumElements(); ++i) { + uint64_t SubMembers = 0; + if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers)) + return false; + Members += SubMembers; + } + } else if (const ArrayType *AT = dyn_cast(Ty)) { + uint64_t SubMembers = 0; + if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers)) + return false; + Members += SubMembers * AT->getNumElements(); + } else if (Ty->isFloatTy()) { + if (Base != HA_UNKNOWN && Base != HA_FLOAT) + return false; + Members = 1; + Base = HA_FLOAT; + } else if (Ty->isDoubleTy()) { + if (Base != HA_UNKNOWN && Base != HA_DOUBLE) + return false; + Members = 1; + Base = HA_DOUBLE; + } else if (const VectorType *VT = dyn_cast(Ty)) { + Members = 1; + switch (Base) { + case HA_FLOAT: + case HA_DOUBLE: + return false; + case HA_VECT64: + return VT->getBitWidth() == 64; + case HA_VECT128: + return VT->getBitWidth() == 128; + case HA_UNKNOWN: + switch (VT->getBitWidth()) { + case 64: + Base = HA_VECT64; + return true; + case 128: + Base = HA_VECT128; + return true; + default: + return false; + } + } + } + + return (Members > 0 && Members <= 4); +} + +/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate. +bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { + if (getEffectiveCallingConv(CallConv, isVarArg) != + CallingConv::ARM_AAPCS_VFP) + return false; + + HABaseType Base = HA_UNKNOWN; + uint64_t Members = 0; + bool result = isHomogeneousAggregate(Ty, Base, Members); + DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n"); + return result; +} diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index f33e6db..c15305c 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -313,10 +313,10 @@ namespace llvm { SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; - void computeMaskedBitsForTargetNode(const SDValue Op, APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const override; + void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const override; bool ExpandInlineAsm(CallInst *CI) const override; @@ -384,6 +384,18 @@ namespace llvm { bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; + /// \brief Returns true if an argument of type Ty needs to be passed in a + /// contiguous block of registers in calling convention CallConv. + bool functionArgumentNeedsConsecutiveRegisters( + Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; + + Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, + AtomicOrdering Ord) const override; + Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, + Value *Addr, AtomicOrdering Ord) const override; + + bool shouldExpandAtomicInIR(Instruction *Inst) const override; + protected: std::pair findRepresentativeClass(MVT VT) const override; @@ -404,6 +416,7 @@ namespace llvm { void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT); void addDRTypeForNEON(MVT VT); void addQRTypeForNEON(MVT VT); + std::pair getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const; typedef SmallVector, 8> RegsToPassVector; void PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, @@ -417,6 +430,8 @@ namespace llvm { SDValue &Root, SelectionDAG &DAG, SDLoc dl) const; + CallingConv::ID getEffectiveCallingConv(CallingConv::ID CC, + bool isVarArg) const; CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, @@ -430,6 +445,7 @@ namespace llvm { SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) const; @@ -438,6 +454,7 @@ namespace llvm { TLSModel::Model model) const; SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; @@ -454,6 +471,8 @@ namespace llvm { SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; + unsigned getRegisterByName(const char* RegName, EVT VT) const override; + /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be /// expanded to FMAs when this method returns true, otherwise fmuladd is @@ -567,7 +586,6 @@ namespace llvm { OtherModImm }; - namespace ARM { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index aafff98..59e9260 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -2029,7 +2029,7 @@ class N2V op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, // Same as N2V but not predicated. class N2Vnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, dag oops, dag iops, InstrItinClass itin, string OpcodeStr, - string Dt, ValueType ResTy, ValueType OpTy, list pattern> + string Dt, list pattern> : NeonInp { bits<5> Vd; @@ -2138,8 +2138,7 @@ class N3V op21_20, bits<4> op11_8, bit op6, bit op4, class N3Vnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, dag oops, dag iops,Format f, InstrItinClass itin, - string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, - SDPatternOperator IntOp, bit Commutable, list pattern> + string OpcodeStr, string Dt, list pattern> : NeonInp { bits<5> Vd; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 75a109e..718d5da 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -991,6 +991,81 @@ def addrmode6oneL32 : Operand, let EncoderMethod = "getAddrMode6OneLane32AddressOpValue"; } +// Base class for addrmode6 with specific alignment restrictions. +class AddrMode6Align : Operand, + ComplexPattern{ + let PrintMethod = "printAddrMode6Operand"; + let MIOperandInfo = (ops GPR:$addr, i32imm:$align); + let EncoderMethod = "getAddrMode6AddressOpValue"; + let DecoderMethod = "DecodeAddrMode6Operand"; +} + +// Special version of addrmode6 to handle no allowed alignment encoding for +// VLD/VST instructions and checking the alignment is not specified. +def AddrMode6AlignNoneAsmOperand : AsmOperandClass { + let Name = "AlignedMemoryNone"; + let DiagnosticType = "AlignedMemoryRequiresNone"; +} +def addrmode6alignNone : AddrMode6Align { + // The alignment specifier can only be omitted. + let ParserMatchClass = AddrMode6AlignNoneAsmOperand; +} + +// Special version of addrmode6 to handle 16-bit alignment encoding for +// VLD/VST instructions and checking the alignment value. +def AddrMode6Align16AsmOperand : AsmOperandClass { + let Name = "AlignedMemory16"; + let DiagnosticType = "AlignedMemoryRequires16"; +} +def addrmode6align16 : AddrMode6Align { + // The alignment specifier can only be 16 or omitted. + let ParserMatchClass = AddrMode6Align16AsmOperand; +} + +// Special version of addrmode6 to handle 32-bit alignment encoding for +// VLD/VST instructions and checking the alignment value. +def AddrMode6Align32AsmOperand : AsmOperandClass { + let Name = "AlignedMemory32"; + let DiagnosticType = "AlignedMemoryRequires32"; +} +def addrmode6align32 : AddrMode6Align { + // The alignment specifier can only be 32 or omitted. + let ParserMatchClass = AddrMode6Align32AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit alignment encoding for +// VLD/VST instructions and checking the alignment value. +def AddrMode6Align64AsmOperand : AsmOperandClass { + let Name = "AlignedMemory64"; + let DiagnosticType = "AlignedMemoryRequires64"; +} +def addrmode6align64 : AddrMode6Align { + // The alignment specifier can only be 64 or omitted. + let ParserMatchClass = AddrMode6Align64AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit or 128-bit alignment encoding +// for VLD/VST instructions and checking the alignment value. +def AddrMode6Align64or128AsmOperand : AsmOperandClass { + let Name = "AlignedMemory64or128"; + let DiagnosticType = "AlignedMemoryRequires64or128"; +} +def addrmode6align64or128 : AddrMode6Align { + // The alignment specifier can only be 64, 128 or omitted. + let ParserMatchClass = AddrMode6Align64or128AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit, 128-bit or 256-bit alignment +// encoding for VLD/VST instructions and checking the alignment value. +def AddrMode6Align64or128or256AsmOperand : AsmOperandClass { + let Name = "AlignedMemory64or128or256"; + let DiagnosticType = "AlignedMemoryRequires64or128or256"; +} +def addrmode6align64or128or256 : AddrMode6Align { + // The alignment specifier can only be 64, 128, 256 or omitted. + let ParserMatchClass = AddrMode6Align64or128or256AsmOperand; +} + // Special version of addrmode6 to handle alignment encoding for VLD-dup // instructions, specifically VLD4-dup. def addrmode6dup : Operand, @@ -1003,6 +1078,69 @@ def addrmode6dup : Operand, let ParserMatchClass = AddrMode6AsmOperand; } +// Base class for addrmode6dup with specific alignment restrictions. +class AddrMode6DupAlign : Operand, + ComplexPattern{ + let PrintMethod = "printAddrMode6Operand"; + let MIOperandInfo = (ops GPR:$addr, i32imm); + let EncoderMethod = "getAddrMode6DupAddressOpValue"; +} + +// Special version of addrmode6 to handle no allowed alignment encoding for +// VLD-dup instruction and checking the alignment is not specified. +def AddrMode6dupAlignNoneAsmOperand : AsmOperandClass { + let Name = "DupAlignedMemoryNone"; + let DiagnosticType = "DupAlignedMemoryRequiresNone"; +} +def addrmode6dupalignNone : AddrMode6DupAlign { + // The alignment specifier can only be omitted. + let ParserMatchClass = AddrMode6dupAlignNoneAsmOperand; +} + +// Special version of addrmode6 to handle 16-bit alignment encoding for VLD-dup +// instruction and checking the alignment value. +def AddrMode6dupAlign16AsmOperand : AsmOperandClass { + let Name = "DupAlignedMemory16"; + let DiagnosticType = "DupAlignedMemoryRequires16"; +} +def addrmode6dupalign16 : AddrMode6DupAlign { + // The alignment specifier can only be 16 or omitted. + let ParserMatchClass = AddrMode6dupAlign16AsmOperand; +} + +// Special version of addrmode6 to handle 32-bit alignment encoding for VLD-dup +// instruction and checking the alignment value. +def AddrMode6dupAlign32AsmOperand : AsmOperandClass { + let Name = "DupAlignedMemory32"; + let DiagnosticType = "DupAlignedMemoryRequires32"; +} +def addrmode6dupalign32 : AddrMode6DupAlign { + // The alignment specifier can only be 32 or omitted. + let ParserMatchClass = AddrMode6dupAlign32AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit alignment encoding for VLD +// instructions and checking the alignment value. +def AddrMode6dupAlign64AsmOperand : AsmOperandClass { + let Name = "DupAlignedMemory64"; + let DiagnosticType = "DupAlignedMemoryRequires64"; +} +def addrmode6dupalign64 : AddrMode6DupAlign { + // The alignment specifier can only be 64 or omitted. + let ParserMatchClass = AddrMode6dupAlign64AsmOperand; +} + +// Special version of addrmode6 to handle 64-bit or 128-bit alignment encoding +// for VLD instructions and checking the alignment value. +def AddrMode6dupAlign64or128AsmOperand : AsmOperandClass { + let Name = "DupAlignedMemory64or128"; + let DiagnosticType = "DupAlignedMemoryRequires64or128"; +} +def addrmode6dupalign64or128 : AddrMode6DupAlign { + // The alignment specifier can only be 64, 128 or omitted. + let ParserMatchClass = AddrMode6dupAlign64or128AsmOperand; +} + // addrmodepc := pc + reg // def addrmodepc : Operand, @@ -1689,7 +1827,8 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, } def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary, - "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> { + "hint", "\t$imm", [(int_arm_hint imm0_239:$imm)]>, + Requires<[IsARM, HasV6]> { bits<8> imm; let Inst{27-8} = 0b00110010000011110000; let Inst{7-0} = imm; @@ -1702,8 +1841,6 @@ def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>; def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>; def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>; -def : Pat<(int_arm_sevl), (HINT 5)>; - def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { bits<4> Rd; @@ -1830,6 +1967,18 @@ def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", let Inst{3-0} = opt; } +// A8.8.247 UDF - Undefined (Encoding A1) +def UDF : AInoP<(outs), (ins imm0_65535:$imm16), MiscFrm, NoItinerary, + "udf", "\t$imm16", [(int_arm_undefined imm0_65535:$imm16)]> { + bits<16> imm16; + let Inst{31-28} = 0b1110; // AL + let Inst{27-25} = 0b011; + let Inst{24-20} = 0b11111; + let Inst{19-8} = imm16{15-4}; + let Inst{7-4} = 0b1111; + let Inst{3-0} = imm16{3-0}; +} + /* * A5.4 Permanently UNDEFINED instructions. * @@ -2282,12 +2431,6 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoad_d_r, "ldrd", "\t$Rt, $Rt2, $addr", []>, Requires<[IsARM, HasV5TE]>; - - // GNU Assembler extension (compatibility) - let isAsmParserOnly = 1 in - def LDRD_PAIR : AI3ld<0b1101, 0, (outs GPRPairOp:$Rt), (ins addrmode3:$addr), - LdMiscFrm, IIC_iLoad_d_r, "ldrd", "\t$Rt, $addr", []>, - Requires<[IsARM, HasV5TE]>; } def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), @@ -2557,14 +2700,6 @@ let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { Requires<[IsARM, HasV5TE]> { let Inst{21} = 0; } - - // GNU Assembler extension (compatibility) - let isAsmParserOnly = 1 in - def STRD_PAIR : AI3str<0b1111, (outs), (ins GPRPairOp:$Rt, addrmode3:$addr), - StMiscFrm, IIC_iStore_d_r, "strd", "\t$Rt, $addr", []>, - Requires<[IsARM, HasV5TE]> { - let Inst{21} = 0; - } } // Indexed stores @@ -3999,6 +4134,11 @@ def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), Requires<[IsARM, HasV6]>, Sched<[WriteALU]>; +def : ARMV6Pat<(srl (bswap (extloadi16 addrmode3:$addr)), (i32 16)), + (REV16 (LDRH addrmode3:$addr))>; +def : ARMV6Pat<(truncstorei16 (srl (bswap GPR:$Rn), (i32 16)), addrmode3:$addr), + (STRH (REV16 GPR:$Rn), addrmode3:$addr)>; + let AddedComplexity = 5 in def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "revsh", "\t$Rd, $Rm", @@ -4816,7 +4956,7 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, imm:$CRm, imm:$opc2)]>, Requires<[PreV8]>; -def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm", +def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm", (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, 0)>; def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, @@ -4824,7 +4964,7 @@ def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>, Requires<[PreV8]>; -def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm", +def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm", (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0)>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 0d46c49..b32b5d2 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -39,6 +39,49 @@ def nImmVMOVI32 : Operand { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmVMOVI32AsmOperand; } + +def nImmVMOVI16AsmOperandByteReplicate : + AsmOperandClass { + let Name = "NEONi16vmovByteReplicate"; + let PredicateMethod = "isNEONi16ByteReplicate"; + let RenderMethod = "addNEONvmovByteReplicateOperands"; +} +def nImmVMOVI32AsmOperandByteReplicate : + AsmOperandClass { + let Name = "NEONi32vmovByteReplicate"; + let PredicateMethod = "isNEONi32ByteReplicate"; + let RenderMethod = "addNEONvmovByteReplicateOperands"; +} +def nImmVMVNI16AsmOperandByteReplicate : + AsmOperandClass { + let Name = "NEONi16invByteReplicate"; + let PredicateMethod = "isNEONi16ByteReplicate"; + let RenderMethod = "addNEONinvByteReplicateOperands"; +} +def nImmVMVNI32AsmOperandByteReplicate : + AsmOperandClass { + let Name = "NEONi32invByteReplicate"; + let PredicateMethod = "isNEONi32ByteReplicate"; + let RenderMethod = "addNEONinvByteReplicateOperands"; +} + +def nImmVMOVI16ByteReplicate : Operand { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMOVI16AsmOperandByteReplicate; +} +def nImmVMOVI32ByteReplicate : Operand { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMOVI32AsmOperandByteReplicate; +} +def nImmVMVNI16ByteReplicate : Operand { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMVNI16AsmOperandByteReplicate; +} +def nImmVMVNI32ByteReplicate : Operand { + let PrintMethod = "printNEONModImmOperand"; + let ParserMatchClass = nImmVMVNI32AsmOperandByteReplicate; +} + def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; } def nImmVMOVI32Neg : Operand { let PrintMethod = "printNEONModImmOperand"; @@ -617,37 +660,37 @@ class VLDQQQQWBPseudo let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // VLD1 : Vector Load (multiple single elements) -class VLD1D op7_4, string Dt> +class VLD1D op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), - (ins addrmode6:$Rn), IIC_VLD1, + (ins AddrMode:$Rn), IIC_VLD1, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -class VLD1Q op7_4, string Dt> +class VLD1Q op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), - (ins addrmode6:$Rn), IIC_VLD1x2, + (ins AddrMode:$Rn), IIC_VLD1x2, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -def VLD1d8 : VLD1D<{0,0,0,?}, "8">; -def VLD1d16 : VLD1D<{0,1,0,?}, "16">; -def VLD1d32 : VLD1D<{1,0,0,?}, "32">; -def VLD1d64 : VLD1D<{1,1,0,?}, "64">; +def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>; +def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>; +def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>; +def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>; -def VLD1q8 : VLD1Q<{0,0,?,?}, "8">; -def VLD1q16 : VLD1Q<{0,1,?,?}, "16">; -def VLD1q32 : VLD1Q<{1,0,?,?}, "32">; -def VLD1q64 : VLD1Q<{1,1,?,?}, "64">; +def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>; +def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>; +def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>; +def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>; // ...with address register writeback: -multiclass VLD1DWB op7_4, string Dt> { +multiclass VLD1DWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), - (ins addrmode6:$Rn), IIC_VLD1u, + (ins AddrMode:$Rn), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -655,16 +698,16 @@ multiclass VLD1DWB op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } } -multiclass VLD1QWB op7_4, string Dt> { +multiclass VLD1QWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), - (ins addrmode6:$Rn), IIC_VLD1x2u, + (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -672,7 +715,7 @@ multiclass VLD1QWB op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; @@ -680,27 +723,27 @@ multiclass VLD1QWB op7_4, string Dt> { } } -defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">; -defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">; -defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">; -defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">; -defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">; -defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; -defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; -defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; +defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>; +defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>; +defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>; +defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>; +defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>; +defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>; +defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>; +defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; // ...with 3 registers -class VLD1D3 op7_4, string Dt> +class VLD1D3 op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), - (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, + (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -multiclass VLD1D3WB op7_4, string Dt> { +multiclass VLD1D3WB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), - (ins addrmode6:$Rn), IIC_VLD1x2u, + (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -708,7 +751,7 @@ multiclass VLD1D3WB op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; @@ -716,32 +759,32 @@ multiclass VLD1D3WB op7_4, string Dt> { } } -def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; -def VLD1d16T : VLD1D3<{0,1,0,?}, "16">; -def VLD1d32T : VLD1D3<{1,0,0,?}, "32">; -def VLD1d64T : VLD1D3<{1,1,0,?}, "64">; +def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>; +def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>; +def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>; +def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>; -defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">; -defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">; -defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">; -defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">; +defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>; +defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; +defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; +defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; def VLD1d64TPseudo : VLDQQPseudo; def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo; def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo; // ...with 4 registers -class VLD1D4 op7_4, string Dt> +class VLD1D4 op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), - (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, + (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -multiclass VLD1D4WB op7_4, string Dt> { +multiclass VLD1D4WB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), - (ins addrmode6:$Rn), IIC_VLD1x2u, + (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -749,7 +792,7 @@ multiclass VLD1D4WB op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; @@ -757,15 +800,15 @@ multiclass VLD1D4WB op7_4, string Dt> { } } -def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; -def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; -def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; -def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; +def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; +def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; +def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; +def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; -defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">; -defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">; -defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; -defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; +defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; +defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; +defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; +defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; def VLD1d64QPseudo : VLDQQPseudo; def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo; @@ -773,22 +816,28 @@ def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo; // VLD2 : Vector Load (multiple 2-element structures) class VLD2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, - InstrItinClass itin> + InstrItinClass itin, Operand AddrMode> : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), - (ins addrmode6:$Rn), itin, + (ins AddrMode:$Rn), itin, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; } -def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; -def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>; -def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>; +def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, + addrmode6align64or128>; +def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, + addrmode6align64or128>; +def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, + addrmode6align64or128>; -def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>; -def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>; -def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>; +def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, + addrmode6align64or128or256>; +def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, + addrmode6align64or128or256>; +def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, + addrmode6align64or128or256>; def VLD2q8Pseudo : VLDQQPseudo; def VLD2q16Pseudo : VLDQQPseudo; @@ -796,9 +845,9 @@ def VLD2q32Pseudo : VLDQQPseudo; // ...with address register writeback: multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, - RegisterOperand VdTy, InstrItinClass itin> { + RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> { def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6:$Rn), itin, + (ins AddrMode:$Rn), itin, "vld2", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -806,7 +855,7 @@ multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm), itin, + (ins AddrMode:$Rn, rGPR:$Rm), itin, "vld2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; @@ -814,13 +863,19 @@ multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, } } -defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>; -defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>; -defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>; +defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, + addrmode6align64or128>; +defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, + addrmode6align64or128>; +defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, + addrmode6align64or128>; -defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>; -defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>; -defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>; +defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, + addrmode6align64or128or256>; +defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, + addrmode6align64or128or256>; +defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, + addrmode6align64or128or256>; def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo; def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo; @@ -830,12 +885,18 @@ def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo; def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo; // ...with double-spaced registers -def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>; -def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>; -def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>; -defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>; -defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>; -defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>; +def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, + addrmode6align64or128>; +def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, + addrmode6align64or128>; +def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, + addrmode6align64or128>; +defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, + addrmode6align64or128>; +defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, + addrmode6align64or128>; +defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, + addrmode6align64or128>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D op11_8, bits<4> op7_4, string Dt> @@ -1293,47 +1354,55 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo; } // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 // VLD1DUP : Vector Load (single element to all lanes) -class VLD1DUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp> +class VLD1DUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp, + Operand AddrMode> : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd), - (ins addrmode6dup:$Rn), + (ins AddrMode:$Rn), IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", [(set VecListOneDAllLanes:$Vd, - (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { + (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } -def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>; -def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>; -def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>; +def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8, + addrmode6dupalignNone>; +def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16, + addrmode6dupalign16>; +def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load, + addrmode6dupalign32>; def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), (VLD1DUPd32 addrmode6:$addr)>; -class VLD1QDUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp> +class VLD1QDUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp, + Operand AddrMode> : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd), - (ins addrmode6dup:$Rn), IIC_VLD1dup, + (ins AddrMode:$Rn), IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", [(set VecListDPairAllLanes:$Vd, - (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> { + (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } -def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>; -def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>; -def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>; +def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8, + addrmode6dupalignNone>; +def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16, + addrmode6dupalign16>; +def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, + addrmode6dupalign32>; def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), (VLD1DUPq32 addrmode6:$addr)>; let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // ...with address register writeback: -multiclass VLD1DUPWB op7_4, string Dt> { +multiclass VLD1DUPWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn), IIC_VLD1dupu, + (ins AddrMode:$Rn), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1342,17 +1411,17 @@ multiclass VLD1DUPWB op7_4, string Dt> { } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; } } -multiclass VLD1QDUPWB op7_4, string Dt> { +multiclass VLD1QDUPWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn), IIC_VLD1dupu, + (ins AddrMode:$Rn), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1361,7 +1430,7 @@ multiclass VLD1QDUPWB op7_4, string Dt> { } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; @@ -1369,38 +1438,47 @@ multiclass VLD1QDUPWB op7_4, string Dt> { } } -defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">; -defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">; -defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">; +defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>; +defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>; +defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>; -defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">; -defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">; -defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">; +defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>; +defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>; +defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>; // VLD2DUP : Vector Load (single 2-element structure to all lanes) -class VLD2DUP op7_4, string Dt, RegisterOperand VdTy> +class VLD2DUP op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode> : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd), - (ins addrmode6dup:$Rn), IIC_VLD2dup, + (ins AddrMode:$Rn), IIC_VLD2dup, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; } -def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>; -def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>; -def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>; +def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes, + addrmode6dupalign16>; +def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes, + addrmode6dupalign32>; +def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes, + addrmode6dupalign64>; +// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or +// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]". // ...with double-spaced registers -def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>; -def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; -def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; +def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes, + addrmode6dupalign16>; +def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, + addrmode6dupalign32>; +def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, + addrmode6dupalign64>; // ...with address register writeback: -multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy> { +multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy, + Operand AddrMode> { def _fixed : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn), IIC_VLD2dupu, + (ins AddrMode:$Rn), IIC_VLD2dupu, "vld2", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1409,7 +1487,7 @@ multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy> { } def _register : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd, GPR:$wb), - (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu, + (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, "vld2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; @@ -1417,13 +1495,19 @@ multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy> { } } -defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>; -defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>; -defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>; +defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes, + addrmode6dupalign16>; +defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes, + addrmode6dupalign32>; +defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes, + addrmode6dupalign64>; -defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>; -defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>; -defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>; +defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes, + addrmode6dupalign16>; +defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes, + addrmode6dupalign32>; +defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, + addrmode6dupalign64>; // VLD3DUP : Vector Load (single 3-element structure to all lanes) class VLD3DUP op7_4, string Dt> @@ -1449,22 +1533,22 @@ def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">; def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">; // ...with address register writeback: -class VLD3DUPWB op7_4, string Dt> +class VLD3DUPWB op7_4, string Dt, Operand AddrMode> : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu, + (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = 0; let DecoderMethod = "DecodeVLD3DupInstruction"; } -def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">; -def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">; -def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">; +def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>; +def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>; +def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>; -def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">; -def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">; -def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">; +def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; +def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; +def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo; def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo; @@ -1560,35 +1644,35 @@ class VSTQQQQWBPseudo "$addr.addr = $wb">; // VST1 : Vector Store (multiple single elements) -class VST1D op7_4, string Dt> - : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd), +class VST1D op7_4, string Dt, Operand AddrMode> + : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -class VST1Q op7_4, string Dt> - : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), +class VST1Q op7_4, string Dt, Operand AddrMode> + : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -def VST1d8 : VST1D<{0,0,0,?}, "8">; -def VST1d16 : VST1D<{0,1,0,?}, "16">; -def VST1d32 : VST1D<{1,0,0,?}, "32">; -def VST1d64 : VST1D<{1,1,0,?}, "64">; +def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>; +def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>; +def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>; +def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>; -def VST1q8 : VST1Q<{0,0,?,?}, "8">; -def VST1q16 : VST1Q<{0,1,?,?}, "16">; -def VST1q32 : VST1Q<{1,0,?,?}, "32">; -def VST1q64 : VST1Q<{1,1,?,?}, "64">; +def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>; +def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>; +def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>; +def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>; // ...with address register writeback: -multiclass VST1DWB op7_4, string Dt> { +multiclass VST1DWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u, + (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1596,7 +1680,7 @@ multiclass VST1DWB op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), + (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), IIC_VLD1u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1604,9 +1688,9 @@ multiclass VST1DWB op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } } -multiclass VST1QWB op7_4, string Dt> { +multiclass VST1QWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, + (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1614,7 +1698,7 @@ multiclass VST1QWB op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), + (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1623,28 +1707,28 @@ multiclass VST1QWB op7_4, string Dt> { } } -defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">; -defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">; -defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">; -defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">; +defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>; +defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>; +defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>; +defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>; -defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">; -defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; -defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; -defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; +defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>; +defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>; +defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>; +defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; // ...with 3 registers -class VST1D3 op7_4, string Dt> +class VST1D3 op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), - (ins addrmode6:$Rn, VecListThreeD:$Vd), + (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -multiclass VST1D3WB op7_4, string Dt> { +multiclass VST1D3WB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, + (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1652,7 +1736,7 @@ multiclass VST1D3WB op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), + (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), IIC_VLD1x3u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1661,33 +1745,33 @@ multiclass VST1D3WB op7_4, string Dt> { } } -def VST1d8T : VST1D3<{0,0,0,?}, "8">; -def VST1d16T : VST1D3<{0,1,0,?}, "16">; -def VST1d32T : VST1D3<{1,0,0,?}, "32">; -def VST1d64T : VST1D3<{1,1,0,?}, "64">; +def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>; +def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>; +def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>; +def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>; -defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">; -defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">; -defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">; -defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">; +defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>; +defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; +defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; +defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; def VST1d64TPseudo : VSTQQPseudo; def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo; def VST1d64TPseudoWB_register : VSTQQWBPseudo; // ...with 4 registers -class VST1D4 op7_4, string Dt> +class VST1D4 op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), - (ins addrmode6:$Rn, VecListFourD:$Vd), + (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } -multiclass VST1D4WB op7_4, string Dt> { +multiclass VST1D4WB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, + (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, "vst1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1695,7 +1779,7 @@ multiclass VST1D4WB op7_4, string Dt> { let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), + (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), IIC_VLD1x4u, "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1704,15 +1788,15 @@ multiclass VST1D4WB op7_4, string Dt> { } } -def VST1d8Q : VST1D4<{0,0,?,?}, "8">; -def VST1d16Q : VST1D4<{0,1,?,?}, "16">; -def VST1d32Q : VST1D4<{1,0,?,?}, "32">; -def VST1d64Q : VST1D4<{1,1,?,?}, "64">; +def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>; +def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>; +def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>; +def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>; -defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">; -defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">; -defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">; -defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">; +defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>; +defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; +defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; +defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; def VST1d64QPseudo : VSTQQPseudo; def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo; @@ -1720,21 +1804,27 @@ def VST1d64QPseudoWB_register : VSTQQWBPseudo; // VST2 : Vector Store (multiple 2-element structures) class VST2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, - InstrItinClass itin> - : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd), + InstrItinClass itin, Operand AddrMode> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd), itin, "vst2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; } -def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; -def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>; -def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>; +def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, + addrmode6align64or128>; +def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, + addrmode6align64or128>; +def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, + addrmode6align64or128>; -def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>; -def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>; -def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>; +def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, + addrmode6align64or128or256>; +def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, + addrmode6align64or128or256>; +def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, + addrmode6align64or128or256>; def VST2q8Pseudo : VSTQQPseudo; def VST2q16Pseudo : VSTQQPseudo; @@ -1742,9 +1832,9 @@ def VST2q32Pseudo : VSTQQPseudo; // ...with address register writeback: multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, - RegisterOperand VdTy> { + RegisterOperand VdTy, Operand AddrMode> { def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u, + (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1752,16 +1842,16 @@ multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, + (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; } } -multiclass VST2QWB op7_4, string Dt> { +multiclass VST2QWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u, + (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. @@ -1769,7 +1859,7 @@ multiclass VST2QWB op7_4, string Dt> { let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), + (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { @@ -1778,13 +1868,16 @@ multiclass VST2QWB op7_4, string Dt> { } } -defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>; -defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>; -defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>; +defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair, + addrmode6align64or128>; +defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair, + addrmode6align64or128>; +defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair, + addrmode6align64or128>; -defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">; -defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">; -defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">; +defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; +defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; +defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo; def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo; @@ -1794,12 +1887,18 @@ def VST2q16PseudoWB_register : VSTQQWBregisterPseudo; def VST2q32PseudoWB_register : VSTQQWBregisterPseudo; // ...with double-spaced registers -def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>; -def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>; -def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>; -defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>; -defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>; -defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>; +def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, + addrmode6align64or128>; +def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2, + addrmode6align64or128>; +def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2, + addrmode6align64or128>; +defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, + addrmode6align64or128>; +defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, + addrmode6align64or128>; +defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, + addrmode6align64or128>; // VST3 : Vector Store (multiple 3-element structures) class VST3D op11_8, bits<4> op7_4, string Dt> @@ -2267,9 +2366,9 @@ def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)), def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), (VST1q64 addrmode6:$addr, QPR:$value)>; def : Pat<(v2f64 (word_alignedload addrmode6:$addr)), - (VLD1q32 addrmode6:$addr)>; + (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>; def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr), - (VST1q32 addrmode6:$addr, QPR:$value)>; + (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>; def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)), (VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>; def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr), @@ -2357,14 +2456,14 @@ class N2VDIntnp op17_16, bits<3> op10_8, bit op7, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), - itin, OpcodeStr, Dt, ResTy, OpTy, + itin, OpcodeStr, Dt, [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; class N2VQIntnp op17_16, bits<3> op10_8, bit op7, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), - itin, OpcodeStr, Dt, ResTy, OpTy, + itin, OpcodeStr, Dt, [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; // Similar to NV2VQIntnp with some more encoding bits exposed (crypto). @@ -2372,7 +2471,7 @@ class N2VQIntXnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, bit op7, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2Vnp; // Same as N2VQIntXnp but with Vd as a src register. @@ -2381,7 +2480,7 @@ class N2VQIntX2np op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> : N2Vnp { let Constraints = "$src = $Vd"; } @@ -2555,7 +2654,6 @@ class N3VDIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, SDPatternOperator IntOp, bit Commutable> : N3Vnp; class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, @@ -2609,7 +2707,6 @@ class N3VQIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, SDPatternOperator IntOp, bit Commutable> : N3Vnp; // Same as N3VQIntnp but with Vd as a src register. @@ -2618,8 +2715,8 @@ class N3VQInt3np op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> : N3Vnp { let Constraints = "$src = $Vd"; @@ -2939,7 +3036,6 @@ class N3VLIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, SDPatternOperator IntOp, bit Commutable> : N3Vnp; class N3VLIntSL op21_20, bits<4> op11_8, InstrItinClass itin, @@ -5245,6 +5341,35 @@ def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd), [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>; } // isReMaterializable +// Add support for bytes replication feature, so it could be GAS compatible. +// E.g. instructions below: +// "vmov.i32 d0, 0xffffffff" +// "vmov.i32 d0, 0xabababab" +// "vmov.i16 d0, 0xabab" +// are incorrect, but we could deal with such cases. +// For last two instructions, for example, it should emit: +// "vmov.i8 d0, 0xab" +def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm", + (VMOVv8i8 DPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm", + (VMOVv8i8 DPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm", + (VMOVv16i8 QPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm", + (VMOVv16i8 QPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>; + +// Also add same support for VMVN instructions. So instruction: +// "vmvn.i32 d0, 0xabababab" +// actually means: +// "vmov.i8 d0, 0x54" +def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm", + (VMOVv8i8 DPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm", + (VMOVv8i8 DPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm", + (VMOVv16i8 QPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>; +def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm", + (VMOVv16i8 QPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>; // On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0" // require zero cycles to execute so they should be used wherever possible for @@ -5617,22 +5742,22 @@ def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; } -def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", +def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", +def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", +def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", +def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", +def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", +def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", +def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; -def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", +def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; @@ -6051,67 +6176,145 @@ def : Pat<(f32 (bitconvert GPR:$a)), //===----------------------------------------------------------------------===// // bit_convert -def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; -def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; -def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>; +} def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>; -def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; -def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; -def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; -def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; -def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>; +} def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>; -def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; -def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; -def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; -def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; -def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; -def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; -def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; -def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; -def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; -def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>; +} def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>; -def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; -def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; -def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; -def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; -def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; -def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>; + def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>; + def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>; + def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>; +} def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>; -def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; -def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>; +} -def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; -def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; -def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>; +} def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>; -def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; -def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; -def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; -def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; -def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>; +} def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>; -def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; -def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; -def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; -def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; -def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; -def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; -def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; -def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; -def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; -def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; -def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>; +} def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>; -def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; -def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; -def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>; +} def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>; -def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; -def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; -def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; -def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; +let Predicates = [IsLE] in { + def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; +} + +let Predicates = [IsBE] in { + // 64 bit conversions + def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; + def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; + def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>; + def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>; + def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>; + def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>; + def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>; + def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>; + def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>; + + // 128 bit conversions + def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; + def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; + def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>; + def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>; + def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>; + def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>; +} // Fold extracting an element out of a v2i32 into a vfp register. def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), @@ -6120,7 +6323,7 @@ def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))), // Vector lengthening move with load, matching extending loads. // extload, zextload and sextload for a standard lengthening load. Example: -// Lengthen_Single<"8", "i16", "8"> = +// Lengthen_Single<"8", "i16", "8"> = // Pat<(v8i16 (extloadvi8 addrmode6:$addr)) // (VMOVLuv8i16 (VLD1d8 addrmode6:$addr, // (f64 (IMPLICIT_DEF)), (i32 0)))>; @@ -6147,7 +6350,7 @@ multiclass Lengthen_Single { // half the lanes available. Example: // Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> = // Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)), -// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, +// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr, // (f64 (IMPLICIT_DEF)), (i32 0))), // dsub_0)>; multiclass Lengthen_HalfSingle; // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (VLD1LNd16 addrmode6:$addr, + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 @@ -6311,379 +6514,442 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", // VLD1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD1LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD1LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VLD1LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD1LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD1LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, rGPR:$Rm, pred:$p)>; def VLD1LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; // VST1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST1LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST1LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VST1LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST1LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm", - (ins VecListOneDByteIndexed:$list, addrmode6:$addr, + (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VST1LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm", - (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, + (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr, rGPR:$Rm, pred:$p)>; def VST1LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm", - (ins VecListOneDWordIndexed:$list, addrmode6:$addr, + (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; // VLD2 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>; def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD2LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VLD2LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD2LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD2LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD2LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD2LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, rGPR:$Rm, pred:$p)>; def VLD2LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VLD2LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD2LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VLD2LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; // VST2 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST2LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, + pred:$p)>; def VST2LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST2LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST2LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST2LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST2LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm", - (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, + (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr, rGPR:$Rm, pred:$p)>; def VST2LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", - (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VST2LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", - (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST2LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm", - (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VST2LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm", - (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, + (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; // VLD3 all-lanes pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, + pred:$p)>; def VLD3DUPdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3DUPdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3DUPdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeDAllLanes:$list, addrmode6:$addr, + (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3DUPqWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3DUPqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3DUPqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQAllLanes:$list, addrmode6:$addr, + (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr, rGPR:$Rm, pred:$p)>; // VLD3 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VLD3LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, + addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, + addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VLD3LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; // VLD3 multiple structure pseudo-instructions. These need special handling for // the vector operands that the normal instructions don't yet model. // FIXME: Remove these when the register classes and instructions are updated. def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3qWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VLD3dWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD3dWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD3dWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD3qWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD3qWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD3qWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; // VST3 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, + pred:$p)>; def VST3LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, + (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VST3LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListThreeDHWordIndexed:$list, + addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VST3LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, + (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VST3LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListThreeQHWordIndexed:$list, + addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; def VST3LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, + (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>; @@ -6691,168 +6957,190 @@ def VST3LNqWB_register_Asm_32 : // the vector operands that the normal instructions don't yet model. // FIXME: Remove these when the register classes and instructions are updated. def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3dWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3dWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3dWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", - (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>; def VST3qWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3qWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3qWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!", - (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>; def VST3dWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST3dWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST3dWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeD:$list, addrmode6:$addr, + (ins VecListThreeD:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST3qWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST3qWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST3qWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm", - (ins VecListThreeQ:$list, addrmode6:$addr, + (ins VecListThreeQ:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; // VLD4 all-lanes pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, + pred:$p)>; def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, + pred:$p)>; def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, + pred:$p)>; def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, + pred:$p)>; def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, + pred:$p)>; def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, + pred:$p)>; def VLD4DUPdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, + pred:$p)>; def VLD4DUPdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, + pred:$p)>; def VLD4DUPdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr, + pred:$p)>; def VLD4DUPqWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, + pred:$p)>; def VLD4DUPqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, + pred:$p)>; def VLD4DUPqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr, + pred:$p)>; def VLD4DUPdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, + (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr, rGPR:$Rm, pred:$p)>; def VLD4DUPdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, + (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr, rGPR:$Rm, pred:$p)>; def VLD4DUPdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourDAllLanes:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourDAllLanes:$list, + addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; def VLD4DUPqWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, + (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr, rGPR:$Rm, pred:$p)>; def VLD4DUPqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, + (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr, rGPR:$Rm, pred:$p)>; def VLD4DUPqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQAllLanes:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourQAllLanes:$list, + addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>; // VLD4 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VLD4LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VLD4LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD4LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VLD4LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VLD4LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VLD4LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VLD4LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD4LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourDWordIndexed:$list, + addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; def VLD4LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VLD4LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourQWordIndexed:$list, + addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; @@ -6860,168 +7148,202 @@ def VLD4LNqWB_register_Asm_32 : // the vector operands that the normal instructions don't yet model. // FIXME: Remove these when the register classes and instructions are updated. def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4qWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VLD4dWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VLD4dWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VLD4dWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VLD4qWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VLD4qWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VLD4qWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; // VST4 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VST4LNdWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, + pred:$p)>; def VST4LNdWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST4LNdWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VST4LNqWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, + pred:$p)>; def VST4LNqWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr, + pred:$p)>; def VST4LNdWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourDByteIndexed:$list, addrmode6:$addr, + (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr, rGPR:$Rm, pred:$p)>; def VST4LNdWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, + (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST4LNdWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourDWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourDWordIndexed:$list, + addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; def VST4LNqWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, + (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr, rGPR:$Rm, pred:$p)>; def VST4LNqWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQWordIndexed:$list, addrmode6:$addr, - rGPR:$Rm, pred:$p)>; + (ins VecListFourQWordIndexed:$list, + addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>; // VST4 multiple structure pseudo-instructions. These need special handling for // the vector operands that the normal instructions don't yet model. // FIXME: Remove these when the register classes and instructions are updated. def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", - (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qWB_fixed_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qWB_fixed_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4qWB_fixed_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!", - (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>; + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, + pred:$p)>; def VST4dWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VST4dWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VST4dWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourD:$list, addrmode6:$addr, + (ins VecListFourD:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VST4qWB_register_Asm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VST4qWB_register_Asm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; def VST4qWB_register_Asm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm", - (ins VecListFourQ:$list, addrmode6:$addr, + (ins VecListFourQ:$list, addrmode6align64or128or256:$addr, rGPR:$Rm, pred:$p)>; // VMOV/VMVN takes an optional datatype suffix diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 754295f..e17f73a 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -269,7 +269,8 @@ class T1SystemEncoding opc> let Inst{7-0} = opc; } -def tHINT : T1pI<(outs), (ins imm0_15:$imm), NoItinerary, "hint", "\t$imm", []>, +def tHINT : T1pI<(outs), (ins imm0_15:$imm), NoItinerary, "hint", "\t$imm", + [(int_arm_hint imm0_15:$imm)]>, T1SystemEncoding<0x00>, Requires<[IsThumb, HasV6M]> { bits<4> imm; @@ -288,7 +289,6 @@ def : tHintAlias<"sev$p", (tHINT 4, pred:$p)>; // A8.6.157 def : tInstAlias<"sevl$p", (tHINT 5, pred:$p)> { let Predicates = [IsThumb2, HasV8]; } -def : T2Pat<(int_arm_sevl), (tHINT 5)>; // The imm operand $val can be used by a debugger to store more information // about the breakpoint. @@ -1193,6 +1193,15 @@ def tTST : // A8.6.230 [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>, Sched<[WriteALU]>; +// A8.8.247 UDF - Undefined (Encoding T1) +def tUDF : TI<(outs), (ins imm0_255:$imm8), IIC_Br, "udf\t$imm8", + [(int_arm_undefined imm0_255:$imm8)]>, Encoding16 { + bits<8> imm8; + let Inst{15-12} = 0b1101; + let Inst{11-8} = 0b1110; + let Inst{7-0} = imm8; +} + // Zero-extend byte def tUXTB : // A8.6.262 T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm), @@ -1308,6 +1317,18 @@ def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs), def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs), (tSUBrr tGPR:$lhs, tGPR:$rhs)>; +// Bswap 16 with load/store +def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rrs2:$addr)), (i32 16)), + (tREV16 (tLDRHr t_addrmode_rrs2:$addr))>; +def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)), + (tREV16 (tLDRHi t_addrmode_is2:$addr))>; +def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)), + t_addrmode_rrs2:$addr), + (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rrs2:$addr)>; +def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)), + t_addrmode_is2:$addr), + (tSTRHi(tREV16 tGPR:$Rn), t_addrmode_is2:$addr)>; + // ConstantPool def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 387bd60..c30d6ab 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1445,7 +1445,7 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si, // Store doubleword let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), - (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr), + (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr), IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>; // Indexed stores @@ -1676,7 +1676,7 @@ defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>; // pci variant is very similar to i12, but supports negative offsets // from the PC. Only PLD and PLI have pci variants (not PLDW) class T2Iplpci inst, string opc> : T2Iso<(outs), (ins t2ldrlabel:$addr), - IIC_Preload, opc, "\t$addr", + IIC_Preload, opc, "\t$addr", [(ARMPreload (ARMWrapper tconstpool:$addr), (i32 0), (i32 inst))]>, Sched<[WritePreLd]> { let Inst{31-25} = 0b1111100; @@ -1918,7 +1918,7 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi, let DecoderMethod = "DecodeT2MOVTWInstruction"; } -def : t2InstAlias<"mov${p} $Rd, $imm", +def : t2InstAlias<"mov${p} $Rd, $imm", (t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p)>; def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd), @@ -2407,6 +2407,19 @@ def t2UBFX: T2TwoRegBitFI< let Inst{15} = 0; } +// A8.8.247 UDF - Undefined (Encoding T2) +def t2UDF : T2XI<(outs), (ins imm0_65535:$imm16), IIC_Br, "udf.w\t$imm16", + [(int_arm_undefined imm0_65535:$imm16)]> { + bits<16> imm16; + let Inst{31-29} = 0b111; + let Inst{28-27} = 0b10; + let Inst{26-20} = 0b1111111; + let Inst{19-16} = imm16{15-12}; + let Inst{15} = 0b1; + let Inst{14-12} = 0b010; + let Inst{11-0} = imm16{11-0}; +} + // A8.6.18 BFI - Bitfield insert (Encoding T1) let Constraints = "$src = $Rd" in { def t2BFI : T2TwoRegBitFI<(outs rGPR:$Rd), @@ -3495,8 +3508,8 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br, let Inst{25-16} = target{20-11}; let Inst{10-0} = target{10-0}; let DecoderMethod = "DecodeT2BInstruction"; - let AsmMatchConverter = "cvtThumbBranches"; -} + let AsmMatchConverter = "cvtThumbBranches"; +} let isNotDuplicable = 1, isIndirectBranch = 1 in { def t2BR_JT : t2PseudoInst<(outs), @@ -3671,7 +3684,8 @@ def : t2InstAlias<"cps.w $mode", (t2CPS1p imm0_31:$mode), 0>; // A6.3.4 Branches and miscellaneous control // Table A6-14 Change Processor State, and hint instructions -def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm",[]> { +def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm", + [(int_arm_hint imm0_239:$imm)]> { bits<8> imm; let Inst{31-3} = 0b11110011101011111000000000000; let Inst{7-0} = imm; @@ -3698,7 +3712,7 @@ def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> { // Secure Monitor Call is a system instruction. // Option = Inst{19-16} -def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", +def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", []>, Requires<[IsThumb2, HasTrustZone]> { let Inst{31-27} = 0b11110; let Inst{26-20} = 0b1111111; @@ -4278,7 +4292,7 @@ def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm", // Aliases for ADD without the ".w" optional width specifier. def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm", - (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, + (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"add${p} $Rd, $Rn, $imm", (t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>; diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index 73c6eb7..8821c2d 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jit" #include "ARMJITInfo.h" #include "ARMConstantPoolValue.h" #include "ARMRelocations.h" @@ -25,6 +24,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "jit" + void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction"); } @@ -319,13 +320,13 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, break; } case ARM::reloc_arm_movw: { - ResultPtr = ResultPtr & 0xFFFF; + ResultPtr = ResultPtr & 0xFFFF; *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; break; } case ARM::reloc_arm_movt: { - ResultPtr = (ResultPtr >> 16) & 0xFFFF; + ResultPtr = (ResultPtr >> 16) & 0xFFFF; *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; break; diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 48e0bd7..ee7df54 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -12,13 +12,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-ldst-opt" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" +#include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "Thumb1RegisterInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -42,6 +43,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "arm-ldst-opt" + STATISTIC(NumLDMGened , "Number of ldm instructions generated"); STATISTIC(NumSTMGened , "Number of stm instructions generated"); STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); @@ -65,9 +68,10 @@ namespace { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const ARMSubtarget *STI; + const TargetLowering *TL; ARMFunctionInfo *AFI; RegScavenger *RS; - bool isThumb2; + bool isThumb1, isThumb2; bool runOnMachineFunction(MachineFunction &Fn) override; @@ -93,7 +97,10 @@ namespace { void findUsesOfImpDef(SmallVectorImpl &UsesOfImpDefs, const MemOpQueue &MemOps, unsigned DefReg, unsigned RangeBegin, unsigned RangeEnd); - + void UpdateBaseRegUses(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc dl, unsigned Base, unsigned WordOffset, + ARMCC::CondCodes Pred, unsigned PredReg); bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int Offset, unsigned Base, bool BaseKill, int Opcode, ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, @@ -119,7 +126,6 @@ namespace { ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, MemOpQueue &MemOps, SmallVectorImpl &Merges); - void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); bool FixInvalidRegPairOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); @@ -159,6 +165,21 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { case ARM_AM::db: return ARM::STMDB; case ARM_AM::ib: return ARM::STMIB; } + case ARM::tLDRi: + // tLDMIA is writeback-only - unless the base register is in the input + // reglist. + ++NumLDMGened; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::tLDMIA; + } + case ARM::tSTRi: + // There is no non-writeback tSTMIA either. + ++NumSTMGened; + switch (Mode) { + default: llvm_unreachable("Unhandled submode!"); + case ARM_AM::ia: return ARM::tSTMIA_UPD; + } case ARM::t2LDRi8: case ARM::t2LDRi12: ++NumLDMGened; @@ -217,6 +238,9 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { case ARM::LDMIA_UPD: case ARM::STMIA: case ARM::STMIA_UPD: + case ARM::tLDMIA: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: case ARM::t2LDMIA_RET: case ARM::t2LDMIA: case ARM::t2LDMIA_UPD: @@ -263,12 +287,20 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { } // end namespace ARM_AM } // end namespace llvm +static bool isT1i32Load(unsigned Opc) { + return Opc == ARM::tLDRi; +} + static bool isT2i32Load(unsigned Opc) { return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8; } static bool isi32Load(unsigned Opc) { - return Opc == ARM::LDRi12 || isT2i32Load(Opc); + return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ; +} + +static bool isT1i32Store(unsigned Opc) { + return Opc == ARM::tSTRi; } static bool isT2i32Store(unsigned Opc) { @@ -276,7 +308,102 @@ static bool isT2i32Store(unsigned Opc) { } static bool isi32Store(unsigned Opc) { - return Opc == ARM::STRi12 || isT2i32Store(Opc); + return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc); +} + +static unsigned getImmScale(unsigned Opc) { + switch (Opc) { + default: llvm_unreachable("Unhandled opcode!"); + case ARM::tLDRi: + case ARM::tSTRi: + return 1; + case ARM::tLDRHi: + case ARM::tSTRHi: + return 2; + case ARM::tLDRBi: + case ARM::tSTRBi: + return 4; + } +} + +/// Update future uses of the base register with the offset introduced +/// due to writeback. This function only works on Thumb1. +void +ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc dl, unsigned Base, + unsigned WordOffset, + ARMCC::CondCodes Pred, unsigned PredReg) { + assert(isThumb1 && "Can only update base register uses for Thumb1!"); + + // Start updating any instructions with immediate offsets. Insert a sub before + // the first non-updateable instruction (if any). + for (; MBBI != MBB.end(); ++MBBI) { + if (MBBI->readsRegister(Base)) { + unsigned Opc = MBBI->getOpcode(); + int Offset; + bool InsertSub = false; + + if (Opc == ARM::tLDRi || Opc == ARM::tSTRi || + Opc == ARM::tLDRHi || Opc == ARM::tSTRHi || + Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) { + // Loads and stores with immediate offsets can be updated, but only if + // the new offset isn't negative. + // The MachineOperand containing the offset immediate is the last one + // before predicates. + MachineOperand &MO = + MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); + // The offsets are scaled by 1, 2 or 4 depending on the Opcode + Offset = MO.getImm() - WordOffset * getImmScale(Opc); + if (Offset >= 0) + MO.setImm(Offset); + else + InsertSub = true; + + } else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) { + // SUB/ADD using this register. Merge it with the update. + // If the merged offset is too large, insert a new sub instead. + MachineOperand &MO = + MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); + Offset = (Opc == ARM::tSUBi8) ? + MO.getImm() + WordOffset * 4 : + MO.getImm() - WordOffset * 4 ; + if (TL->isLegalAddImmediate(Offset)) { + MO.setImm(Offset); + // The base register has now been reset, so exit early. + return; + } else { + InsertSub = true; + } + + } else { + // Can't update the instruction. + InsertSub = true; + } + + if (InsertSub) { + // An instruction above couldn't be updated, so insert a sub. + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base)) + .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) + .addImm(Pred).addReg(PredReg); + return; + } + } + + if (MBBI->killsRegister(Base)) + // Register got killed. Stop updating. + return; + } + + // The end of the block was reached. This means register liveness escapes the + // block, and it's necessary to insert a sub before the last instruction. + if (MBB.succ_size() > 0) + // But only insert the SUB if there is actually a successor block. + // FIXME: Check more carefully if register is live at this point, e.g. by + // also examining the successor block's register liveness information. + AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base)) + .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) + .addImm(Pred).addReg(PredReg); } /// MergeOps - Create and insert a LDM or STM with Base as base register and @@ -296,18 +423,19 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; ARM_AM::AMSubMode Mode = ARM_AM::ia; - // VFP and Thumb2 do not support IB or DA modes. + // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); - bool haveIBAndDA = isNotVFP && !isThumb2; - if (Offset == 4 && haveIBAndDA) + bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1; + + if (Offset == 4 && haveIBAndDA) { Mode = ARM_AM::ib; - else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) + } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) { Mode = ARM_AM::da; - else if (Offset == -4 * (int)NumRegs && isNotVFP) + } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) { // VLDM/VSTM do not support DB mode without also updating the base reg. Mode = ARM_AM::db; - else if (Offset != 0) { - // Check if this is a supported opcode before we insert instructions to + } else if (Offset != 0) { + // Check if this is a supported opcode before inserting instructions to // calculate a new base register. if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; @@ -318,41 +446,98 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; unsigned NewBase; - if (isi32Load(Opcode)) + if (isi32Load(Opcode)) { // If it is a load, then just use one of the destination register to // use as the new base. NewBase = Regs[NumRegs-1].first; - else { + } else { // Use the scratch register to use as a new base. NewBase = Scratch; if (NewBase == 0) return false; } - int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri; + + int BaseOpc = + isThumb2 ? ARM::t2ADDri : + isThumb1 ? ARM::tADDi8 : ARM::ADDri; + if (Offset < 0) { - BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri; + BaseOpc = + isThumb2 ? ARM::t2SUBri : + isThumb1 ? ARM::tSUBi8 : ARM::SUBri; Offset = - Offset; } - int ImmedOffset = isThumb2 - ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset); - if (ImmedOffset == -1) - // FIXME: Try t2ADDri12 or t2SUBri12? - return false; // Probably not worth it then. - - BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) - .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) - .addImm(Pred).addReg(PredReg).addReg(0); + + if (!TL->isLegalAddImmediate(Offset)) + // FIXME: Try add with register operand? + return false; // Probably not worth it then. + + if (isThumb1) { + if (Base != NewBase) { + // Need to insert a MOV to the new base first. + // FIXME: If the immediate fits in 3 bits, use ADD instead. + BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) + .addReg(Base, getKillRegState(BaseKill)) + .addImm(Pred).addReg(PredReg); + } + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)) + .addReg(NewBase, getKillRegState(true)).addImm(Offset) + .addImm(Pred).addReg(PredReg); + } else { + BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) + .addImm(Pred).addReg(PredReg).addReg(0); + } + Base = NewBase; - BaseKill = true; // New base is always killed right its use. + BaseKill = true; // New base is always killed straight away. } bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD); + + // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with + // base register writeback. Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); if (!Opcode) return false; - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) - .addReg(Base, getKillRegState(BaseKill)) - .addImm(Pred).addReg(PredReg); + + bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. + + // Exception: If the base register is in the input reglist, Thumb1 LDM is + // non-writeback. Check for this. + if (Opcode == ARM::tLDRi && isThumb1) + for (unsigned I = 0; I < NumRegs; ++I) + if (Base == Regs[I].first) { + Writeback = false; + break; + } + + MachineInstrBuilder MIB; + + if (Writeback) { + if (Opcode == ARM::tLDMIA) + // Update tLDMIA with writeback if necessary. + Opcode = ARM::tLDMIA_UPD; + + // The base isn't dead after a merged instruction with writeback. Update + // future uses of the base with the added offset (if possible), or reset + // the base register as necessary. + if (!BaseKill) + UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); + + MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); + + // Thumb1: we might need to set base writeback when building the MI. + MIB.addReg(Base, getDefRegState(true)) + .addReg(Base, getKillRegState(BaseKill)); + } else { + // No writeback, simply build the MachineInstr. + MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); + MIB.addReg(Base, getKillRegState(BaseKill)); + } + + MIB.addImm(Pred).addReg(PredReg); + for (unsigned i = 0; i != NumRegs; ++i) MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) | getKillRegState(Regs[i].second)); @@ -492,7 +677,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, // affected uses. for (SmallVectorImpl::iterator I = UsesOfImpDefs.begin(), E = UsesOfImpDefs.end(); - I != E; ++I) + I != E; ++I) (*I)->setIsUndef(); for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { @@ -589,7 +774,6 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); - return; } static bool definesCPSR(MachineInstr *MI) { @@ -616,6 +800,7 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, bool CheckCPSRDef = false; switch (MI->getOpcode()) { default: return false; + case ARM::tSUBi8: case ARM::t2SUBri: case ARM::SUBri: CheckCPSRDef = true; @@ -628,10 +813,11 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, if (Bytes == 0 || (Limit && Bytes >= Limit)) return false; - unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME + unsigned Scale = (MI->getOpcode() == ARM::tSUBspi || + MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && + (MI->getOperand(2).getImm() * Scale) == Bytes && getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -649,6 +835,7 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, bool CheckCPSRDef = false; switch (MI->getOpcode()) { default: return false; + case ARM::tADDi8: case ARM::t2ADDri: case ARM::ADDri: CheckCPSRDef = true; @@ -661,10 +848,11 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base, // Make sure the offset fits in 8 bits. return false; - unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME + unsigned Scale = (MI->getOpcode() == ARM::tADDspi || + MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME if (!(MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - (MI->getOperand(2).getImm()*Scale) == Bytes && + (MI->getOperand(2).getImm() * Scale) == Bytes && getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg)) return false; @@ -677,6 +865,8 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { default: return 0; case ARM::LDRi12: case ARM::STRi12: + case ARM::tLDRi: + case ARM::tSTRi: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -695,6 +885,9 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::STMDA: case ARM::STMDB: case ARM::STMIB: + case ARM::tLDMIA: + case ARM::tLDMIA_UPD: + case ARM::tSTMIA_UPD: case ARM::t2LDMIA: case ARM::t2LDMDB: case ARM::t2STMIA: @@ -791,6 +984,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool &Advance, MachineBasicBlock::iterator &I) { + // Thumb1 is already using updating loads/stores. + if (isThumb1) return false; + MachineInstr *MI = MBBI; unsigned Base = MI->getOperand(0).getReg(); bool BaseKill = MI->getOperand(0).isKill(); @@ -927,6 +1123,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, const TargetInstrInfo *TII, bool &Advance, MachineBasicBlock::iterator &I) { + // Thumb1 doesn't have updating LDR/STR. + // FIXME: Use LDM/STM with single register instead. + if (isThumb1) return false; + MachineInstr *MI = MBBI; unsigned Base = MI->getOperand(1).getReg(); bool BaseKill = MI->getOperand(1).isKill(); @@ -1002,7 +1202,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, return false; if (isAM5) { - // VLDM[SD}_UPD, VSTM[SD]_UPD + // VLDM[SD]_UPD, VSTM[SD]_UPD // (There are no base-updating versions of VLDR/VSTR instructions, but the // updating load/store-multiple instructions can be used with only one // register.) @@ -1100,6 +1300,8 @@ static bool isMemoryOp(const MachineInstr *MI) { return MI->getOperand(1).isReg(); case ARM::LDRi12: case ARM::STRi12: + case ARM::tLDRi: + case ARM::tSTRi: case ARM::t2LDRi8: case ARM::t2LDRi12: case ARM::t2STRi8: @@ -1137,6 +1339,10 @@ static int getMemoryOpOffset(const MachineInstr *MI) { Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) return OffField; + // Thumb1 immediate offsets are scaled by 4 + if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi) + return OffField * 4; + int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; if (isAM3) { @@ -1408,16 +1614,20 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { if (MBBI == E) // Reach the end of the block, try merging the memory instructions. TryMerge = true; - } else + } else { TryMerge = true; + } if (TryMerge) { if (NumMemOps > 1) { // Try to find a free register to use as a new base in case it's needed. // First advance to the instruction just before the start of the chain. AdvanceRS(MBB, MemOps); + // Find a scratch register. - unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass); + unsigned Scratch = + RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass); + // Process the load / store instructions. RS->forward(std::prev(MBBI)); @@ -1483,6 +1693,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { /// => /// ldmfd sp!, {..., pc} bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { + // Thumb1 LDM doesn't allow high registers. + if (isThumb1) return false; if (MBB.empty()) return false; MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); @@ -1513,12 +1725,14 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); + TL = TM.getTargetLowering(); AFI = Fn.getInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); STI = &TM.getSubtarget(); RS = new RegScavenger(); isThumb2 = AFI->isThumb2Function(); + isThumb1 = AFI->isThumbFunction() && !isThumb2; bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; @@ -1666,11 +1880,11 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD unsigned Scale = 1; unsigned Opcode = Op0->getOpcode(); - if (Opcode == ARM::LDRi12) + if (Opcode == ARM::LDRi12) { NewOpc = ARM::LDRD; - else if (Opcode == ARM::STRi12) + } else if (Opcode == ARM::STRi12) { NewOpc = ARM::STRD; - else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { + } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) { NewOpc = ARM::t2LDRDi8; Scale = 4; isT2 = true; @@ -1678,8 +1892,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, NewOpc = ARM::t2STRDi8; Scale = 4; isT2 = true; - } else + } else { return false; + } // Make sure the base address satisfies i64 ld / st alignment requirement. // At the moment, we ignore the memoryoperand's value. @@ -1746,8 +1961,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, while (Ops.size() > 1) { unsigned FirstLoc = ~0U; unsigned LastLoc = 0; - MachineInstr *FirstOp = 0; - MachineInstr *LastOp = 0; + MachineInstr *FirstOp = nullptr; + MachineInstr *LastOp = nullptr; int LastOffset = 0; unsigned LastOpcode = 0; unsigned LastBytes = 0; diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp index 20619fa..2a49255 100644 --- a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp +++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp @@ -8,8 +8,6 @@ // //===------------------------------------------------------------------------------------------===// -#define DEBUG_TYPE "double barriers" - #include "ARM.h" #include "ARMMachineFunctionInfo.h" #include "ARMInstrInfo.h" @@ -17,6 +15,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; +#define DEBUG_TYPE "double barriers" + STATISTIC(NumDMBsRemoved, "Number of DMBs removed"); namespace { @@ -25,9 +25,9 @@ public: static char ID; ARMOptimizeBarriersPass() : MachineFunctionPass(ID) {} - virtual bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "optimise barriers pass"; } diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 7f0fe05..b290e7f 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -116,13 +116,13 @@ def D15 : ARMReg<15, "d15", [S30, S31]>, DwarfRegNum<[271]>; } // VFP3 defines 16 additional double registers -def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>; +def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>; def D17 : ARMFReg<17, "d17">, DwarfRegNum<[273]>; def D18 : ARMFReg<18, "d18">, DwarfRegNum<[274]>; def D19 : ARMFReg<19, "d19">, DwarfRegNum<[275]>; def D20 : ARMFReg<20, "d20">, DwarfRegNum<[276]>; def D21 : ARMFReg<21, "d21">, DwarfRegNum<[277]>; -def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>; +def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>; def D23 : ARMFReg<23, "d23">, DwarfRegNum<[279]>; def D24 : ARMFReg<24, "d24">, DwarfRegNum<[280]>; def D25 : ARMFReg<25, "d25">, DwarfRegNum<[281]>; @@ -158,11 +158,11 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>; // Current Program Status Register. // We model fpscr with two registers: FPSCR models the control bits and will be // reserved. FPSCR_NZCV models the flag bits and will be unreserved. APSR_NZCV -// models the APSR when it's accessed by some special instructions. In such cases +// models the APSR when it's accessed by some special instructions. In such cases // it has the same encoding as PC. def CPSR : ARMReg<0, "cpsr">; def APSR : ARMReg<1, "apsr">; -def APSR_NZCV : ARMReg<15, "apsr_nzcv">; +def APSR_NZCV : ARMReg<15, "apsr_nzcv">; def SPSR : ARMReg<2, "spsr">; def FPSCR : ARMReg<3, "fpscr">; def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> { diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index 0ace9bc..57d0bfb 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -93,7 +93,7 @@ def ARMV6Itineraries : ProcessorItineraries< InstrItinData], [5, 1, 1, 2]>, InstrItinData], [6, 1, 1]>, InstrItinData], [6, 1, 1, 2]>, - + // Integer load pipeline // // Immediate offset @@ -181,7 +181,7 @@ def ARMV6Itineraries : ProcessorItineraries< // // Store multiple + update InstrItinData], [2]>, - + // Branch // // no delay slots, so the latency of a branch is unimportant diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index ba3cf4d..008ad64 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-selectiondag-info" #include "ARMTargetMachine.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DerivedTypes.h" using namespace llvm; +#define DEBUG_TYPE "arm-selectiondag-info" + ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM) : TargetSelectionDAGInfo(TM), Subtarget(&TM.getSubtarget()) { @@ -52,9 +53,10 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, EVT VT = MVT::i32; unsigned VTSize = 4; unsigned i = 0; - const unsigned MAX_LOADS_IN_LDM = 6; - SDValue TFOps[MAX_LOADS_IN_LDM]; - SDValue Loads[MAX_LOADS_IN_LDM]; + // Emit a maximum of 4 loads in Thumb1 since we have fewer registers + const unsigned MAX_LOADS_IN_LDM = Subtarget->isThumb1Only() ? 4 : 6; + SDValue TFOps[6]; + SDValue Loads[6]; uint64_t SrcOff = 0, DstOff = 0; // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the @@ -71,7 +73,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, TFOps[i] = Loads[i].getValue(1); SrcOff += VTSize; } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); for (i = 0; i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { @@ -82,7 +85,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, isVolatile, false, 0); DstOff += VTSize; } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); EmittedNumMemOps += i; } @@ -112,7 +116,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SrcOff += VTSize; BytesLeft -= VTSize; } - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); i = 0; BytesLeft = BytesLeftSave; @@ -133,7 +138,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, DstOff += VTSize; BytesLeft -= VTSize; } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(TFOps, i)); } // Adjust parameters for memset, EABI uses format (ptr, size, value), @@ -146,7 +152,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { // Use default for non-AAPCS (or MachO) subtargets - if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetMachO()) + if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetMachO() || + Subtarget->isTargetWindows()) return SDValue(); const ARMTargetLowering &TLI = @@ -179,22 +186,14 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, Args.push_back(Entry); // Emit __eabi_memset call - TargetLowering::CallLoweringInfo CLI(Chain, - Type::getVoidTy(*DAG.getContext()), // return type - false, // return sign ext - false, // return zero ext - false, // is var arg - false, // is in regs - 0, // number of fixed arguments - TLI.getLibcallCallingConv(RTLIB::MEMSET), // call conv - false, // is tail call - false, // does not return - false, // is return val used - DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), - TLI.getPointerTy()), // callee - Args, DAG, dl); - std::pair CallResult = - TLI.LowerCallTo(CLI); - + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMSET), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), + TLI.getPointerTy()), &Args, 0) + .setDiscardResult(); + + std::pair CallResult = TLI.LowerCallTo(CLI); return CallResult.second; } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 73e2018..5b204f6 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -21,12 +21,14 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" +using namespace llvm; + +#define DEBUG_TYPE "arm-subtarget" + #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "ARMGenSubtargetInfo.inc" -using namespace llvm; - static cl::opt ReserveR9("arm-reserve-r9", cl::Hidden, cl::desc("Reserve R9, making it unavailable as GPR")); diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 3855419..38536b2 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -31,7 +31,7 @@ class TargetOptions; class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { - Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15, + Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15, CortexR5, Swift, CortexA53, CortexA57, Krait }; enum ARMProcClassEnum { @@ -242,9 +242,7 @@ protected: /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. unsigned getMaxInlineSizeThreshold() const { - // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb1. - // Change this once Thumb1 ldmia / stmia support is added. - return isThumb1Only() ? 0 : 64; + return 64; } /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. @@ -396,7 +394,7 @@ public: bool isLittle() const { return IsLittle; } unsigned getMispredictionPenalty() const; - + /// This function returns true if the target has sincos() routine in its /// compiler runtime or math libraries. bool hasSinCos() const; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 4ae539a..8876227 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -228,7 +228,7 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { bool ARMPassConfig::addPreISel() { const ARMSubtarget *Subtarget = &getARMSubtarget(); if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) - addPass(createARMAtomicExpandPass(TM)); + addPass(createAtomicExpandLoadLinkedPass(TM)); if (TM->getOptLevel() != CodeGenOpt::None) addPass(createGlobalMergePass(TM)); @@ -247,8 +247,7 @@ bool ARMPassConfig::addInstSelector() { } bool ARMPassConfig::addPreRegAlloc() { - // FIXME: temporarily disabling load / store optimization pass for Thumb1. - if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only()) + if (getOptLevel() != CodeGenOpt::None) addPass(createARMLoadStoreOptimizationPass(true)); if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) addPass(createMLxExpansionPass()); @@ -262,12 +261,10 @@ bool ARMPassConfig::addPreRegAlloc() { } bool ARMPassConfig::addPreSched2() { - // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (getOptLevel() != CodeGenOpt::None) { - if (!getARMSubtarget().isThumb1Only()) { - addPass(createARMLoadStoreOptimizationPass()); - printAndVerify("After ARM load / store optimizer"); - } + addPass(createARMLoadStoreOptimizationPass()); + printAndVerify("After ARM load / store optimizer"); + if (getARMSubtarget().hasNEON()) addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass)); } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 0c80a95..664c992 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -23,7 +23,6 @@ #include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetMachine.h" @@ -102,7 +101,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine { /// ARMLETargetMachine - ARM little endian target machine. /// class ARMLETargetMachine : public ARMTargetMachine { - virtual void anchor(); + void anchor() override; public: ARMLETargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -113,7 +112,7 @@ public: /// ARMBETargetMachine - ARM big endian target machine. /// class ARMBETargetMachine : public ARMTargetMachine { - virtual void anchor(); + void anchor() override; public: ARMBETargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -128,12 +127,12 @@ public: class ThumbTargetMachine : public ARMBaseTargetMachine { virtual void anchor(); // Either Thumb1InstrInfo or Thumb2InstrInfo. - OwningPtr InstrInfo; + std::unique_ptr InstrInfo; const DataLayout DL; // Calculates type size & alignment ARMTargetLowering TLInfo; ARMSelectionDAGInfo TSInfo; // Either Thumb1FrameLowering or ARMFrameLowering. - OwningPtr FrameLowering; + std::unique_ptr FrameLowering; public: ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -169,7 +168,7 @@ public: /// ThumbLETargetMachine - Thumb little endian target machine. /// class ThumbLETargetMachine : public ThumbTargetMachine { - virtual void anchor(); + void anchor() override; public: ThumbLETargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -180,10 +179,10 @@ public: /// ThumbBETargetMachine - Thumb big endian target machine. /// class ThumbBETargetMachine : public ThumbTargetMachine { - virtual void anchor(); + void anchor() override; public: - ThumbBETargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, + ThumbBETargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); }; diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 3379f85..48238bf 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -11,6 +11,7 @@ #include "ARMSubtarget.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Mangler.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" @@ -31,7 +32,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, InitializeELF(isAAPCS_ABI); if (isAAPCS_ABI) { - LSDASection = NULL; + LSDASection = nullptr; } AttributesSection = @@ -45,6 +46,10 @@ const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference( const GlobalValue *GV, unsigned Encoding, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI, MCStreamer &Streamer) const { + if (TM.getMCAsmInfo()->getExceptionHandlingType() != ExceptionHandling::ARM) + return TargetLoweringObjectFileELF::getTTypeGlobalReference( + GV, Encoding, Mang, TM, MMI, Streamer); + assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only"); return MCSymbolRefExpr::Create(TM.getSymbol(GV, Mang), diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h index 5f8d612..c926421 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.h +++ b/lib/Target/ARM/ARMTargetObjectFile.h @@ -23,7 +23,7 @@ protected: public: ARMElfTargetObjectFile() : TargetLoweringObjectFileELF(), - AttributesSection(NULL) + AttributesSection(nullptr) {} void Initialize(MCContext &Ctx, const TargetMachine &TM) override; diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index d3b43cd..57df7da 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -14,7 +14,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "armtti" #include "ARM.h" #include "ARMTargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -23,8 +22,10 @@ #include "llvm/Target/TargetLowering.h" using namespace llvm; +#define DEBUG_TYPE "armtti" + // Declare the pass initialization routine locally as target-specific passes -// don't havve a target-wide initialization entry point, and so we rely on the +// don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. namespace llvm { void initializeARMTTIPass(PassRegistry &); @@ -42,7 +43,7 @@ class ARMTTI final : public ImmutablePass, public TargetTransformInfo { unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; public: - ARMTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + ARMTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { llvm_unreachable("This pass cannot be directly constructed"); } diff --git a/lib/Target/ARM/Android.mk b/lib/Target/ARM/Android.mk index 4be95aa..095955b 100644 --- a/lib/Target/ARM/Android.mk +++ b/lib/Target/ARM/Android.mk @@ -17,7 +17,6 @@ arm_codegen_TBLGEN_TABLES := \ arm_codegen_SRC_FILES := \ A15SDOptimizer.cpp \ ARMAsmPrinter.cpp \ - ARMAtomicExpandPass.cpp \ ARMBaseInstrInfo.cpp \ ARMBaseRegisterInfo.cpp \ ARMCodeEmitter.cpp \ diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 9c57a24..5cdf394 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -13,7 +13,6 @@ #include "MCTargetDesc/ARMArchName.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCExpr.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" @@ -23,9 +22,7 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" -#include "llvm/MC/MCELF.h" #include "llvm/MC/MCELFStreamer.h" -#include "llvm/MC/MCELFSymbolFlags.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" @@ -345,7 +342,8 @@ public: }; ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &MII) + const MCInstrInfo &MII, + const MCTargetOptions &Options) : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(MII), UC(_Parser) { MCAsmParserExtension::Initialize(_Parser); @@ -416,7 +414,7 @@ class ARMOperand : public MCParsedAsmOperand { k_Token } Kind; - SMLoc StartLoc, EndLoc; + SMLoc StartLoc, EndLoc, AlignmentLoc; SmallVector Registers; struct CCOp { @@ -633,6 +631,12 @@ public: /// operand. SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } + /// getAlignmentLoc - Get the location of the Alignment token of this operand. + SMLoc getAlignmentLoc() const { + assert(Kind == k_Memory && "Invalid access!"); + return AlignmentLoc; + } + ARMCC::CondCodes getCondCode() const { assert(Kind == k_CondCode && "Invalid access!"); return CC.Val; @@ -1089,12 +1093,12 @@ public: bool isPostIdxReg() const { return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift; } - bool isMemNoOffset(bool alignOK = false) const { + bool isMemNoOffset(bool alignOK = false, unsigned Alignment = 0) const { if (!isMem()) return false; // No offset of any kind. - return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 && - (alignOK || Memory.Alignment == 0); + return Memory.OffsetRegNum == 0 && Memory.OffsetImm == nullptr && + (alignOK || Memory.Alignment == Alignment); } bool isMemPCRelImm12() const { if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) @@ -1110,6 +1114,65 @@ public: bool isAlignedMemory() const { return isMemNoOffset(true); } + bool isAlignedMemoryNone() const { + return isMemNoOffset(false, 0); + } + bool isDupAlignedMemoryNone() const { + return isMemNoOffset(false, 0); + } + bool isAlignedMemory16() const { + if (isMemNoOffset(false, 2)) // alignment in bytes for 16-bits is 2. + return true; + return isMemNoOffset(false, 0); + } + bool isDupAlignedMemory16() const { + if (isMemNoOffset(false, 2)) // alignment in bytes for 16-bits is 2. + return true; + return isMemNoOffset(false, 0); + } + bool isAlignedMemory32() const { + if (isMemNoOffset(false, 4)) // alignment in bytes for 32-bits is 4. + return true; + return isMemNoOffset(false, 0); + } + bool isDupAlignedMemory32() const { + if (isMemNoOffset(false, 4)) // alignment in bytes for 32-bits is 4. + return true; + return isMemNoOffset(false, 0); + } + bool isAlignedMemory64() const { + if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. + return true; + return isMemNoOffset(false, 0); + } + bool isDupAlignedMemory64() const { + if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. + return true; + return isMemNoOffset(false, 0); + } + bool isAlignedMemory64or128() const { + if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. + return true; + if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16. + return true; + return isMemNoOffset(false, 0); + } + bool isDupAlignedMemory64or128() const { + if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. + return true; + if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16. + return true; + return isMemNoOffset(false, 0); + } + bool isAlignedMemory64or128or256() const { + if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. + return true; + if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16. + return true; + if (isMemNoOffset(false, 32)) // alignment in bytes for 256-bits is 32. + return true; + return isMemNoOffset(false, 0); + } bool isAddrMode2() const { if (!isMem() || Memory.Alignment != 0) return false; // Check for register offset. @@ -1545,7 +1608,10 @@ public: } bool isNEONi16splat() const { - if (!isImm()) return false; + if (isNEONByteReplicate(2)) + return false; // Leave that for bytes replication and forbid by default. + if (!isImm()) + return false; const MCConstantExpr *CE = dyn_cast(getImm()); // Must be a constant. if (!CE) return false; @@ -1555,7 +1621,10 @@ public: } bool isNEONi32splat() const { - if (!isImm()) return false; + if (isNEONByteReplicate(4)) + return false; // Leave that for bytes replication and forbid by default. + if (!isImm()) + return false; const MCConstantExpr *CE = dyn_cast(getImm()); // Must be a constant. if (!CE) return false; @@ -1567,11 +1636,36 @@ public: (Value >= 0x01000000 && Value <= 0xff000000); } + bool isNEONByteReplicate(unsigned NumBytes) const { + if (!isImm()) + return false; + const MCConstantExpr *CE = dyn_cast(getImm()); + // Must be a constant. + if (!CE) + return false; + int64_t Value = CE->getValue(); + if (!Value) + return false; // Don't bother with zero. + + unsigned char B = Value & 0xff; + for (unsigned i = 1; i < NumBytes; ++i) { + Value >>= 8; + if ((Value & 0xff) != B) + return false; + } + return true; + } + bool isNEONi16ByteReplicate() const { return isNEONByteReplicate(2); } + bool isNEONi32ByteReplicate() const { return isNEONByteReplicate(4); } bool isNEONi32vmov() const { - if (!isImm()) return false; + if (isNEONByteReplicate(4)) + return false; // Let it to be classified as byte-replicate case. + if (!isImm()) + return false; const MCConstantExpr *CE = dyn_cast(getImm()); // Must be a constant. - if (!CE) return false; + if (!CE) + return false; int64_t Value = CE->getValue(); // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X, // for VMOV/VMVN only, 00Xf or 0Xff are also accepted. @@ -1612,7 +1706,7 @@ public: void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediates when possible. Null MCExpr = 0. - if (Expr == 0) + if (!Expr) Inst.addOperand(MCOperand::CreateImm(0)); else if (const MCConstantExpr *CE = dyn_cast(Expr)) Inst.addOperand(MCOperand::CreateImm(CE->getValue())); @@ -1926,6 +2020,50 @@ public: Inst.addOperand(MCOperand::CreateImm(Memory.Alignment)); } + void addDupAlignedMemoryNoneOperands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemoryNoneOperands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemory16Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addDupAlignedMemory16Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemory32Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addDupAlignedMemory32Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemory64Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addDupAlignedMemory64Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemory64or128Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addDupAlignedMemory64or128Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + + void addAlignedMemory64or128or256Operands(MCInst &Inst, unsigned N) const { + addAlignedMemoryOperands(Inst, N); + } + void addAddrMode2Operands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands!"); int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0; @@ -2275,6 +2413,19 @@ public: Inst.addOperand(MCOperand::CreateImm(Value)); } + void addNEONinvByteReplicateOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + const MCConstantExpr *CE = dyn_cast(getImm()); + unsigned Value = CE->getValue(); + assert((Inst.getOpcode() == ARM::VMOVv8i8 || + Inst.getOpcode() == ARM::VMOVv16i8) && + "All vmvn instructions that wants to replicate non-zero byte " + "always must be replaced with VMOVv8i8 or VMOVv16i8."); + unsigned B = ((~Value) & 0xff); + B |= 0xe00; // cmode = 0b1110 + Inst.addOperand(MCOperand::CreateImm(B)); + } void addNEONi32vmovOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. @@ -2289,6 +2440,19 @@ public: Inst.addOperand(MCOperand::CreateImm(Value)); } + void addNEONvmovByteReplicateOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + const MCConstantExpr *CE = dyn_cast(getImm()); + unsigned Value = CE->getValue(); + assert((Inst.getOpcode() == ARM::VMOVv8i8 || + Inst.getOpcode() == ARM::VMOVv16i8) && + "All instructions that wants to replicate non-zero byte " + "always must be replaced with VMOVv8i8 or VMOVv16i8."); + unsigned B = Value & 0xff; + B |= 0xe00; // cmode = 0b1110 + Inst.addOperand(MCOperand::CreateImm(B)); + } void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. @@ -2523,7 +2687,8 @@ public: unsigned ShiftImm, unsigned Alignment, bool isNegative, - SMLoc S, SMLoc E) { + SMLoc S, SMLoc E, + SMLoc AlignmentLoc = SMLoc()) { ARMOperand *Op = new ARMOperand(k_Memory); Op->Memory.BaseRegNum = BaseRegNum; Op->Memory.OffsetImm = OffsetImm; @@ -2534,6 +2699,7 @@ public: Op->Memory.isNegative = isNegative; Op->StartLoc = S; Op->EndLoc = E; + Op->AlignmentLoc = AlignmentLoc; return Op; } @@ -2806,7 +2972,7 @@ int ARMAsmParser::tryParseShiftRegister( // The source register for the shift has already been added to the // operand list, so we need to pop it off and combine it into the shifted // register operand instead. - OwningPtr PrevOp((ARMOperand*)Operands.pop_back_val()); + std::unique_ptr PrevOp((ARMOperand*)Operands.pop_back_val()); if (!PrevOp->isReg()) return Error(PrevOp->getStartLoc(), "shift must be of a register"); int SrcReg = PrevOp->getReg(); @@ -2825,7 +2991,7 @@ int ARMAsmParser::tryParseShiftRegister( Parser.getTok().is(AsmToken::Dollar)) { Parser.Lex(); // Eat hash. SMLoc ImmLoc = Parser.getTok().getLoc(); - const MCExpr *ShiftExpr = 0; + const MCExpr *ShiftExpr = nullptr; if (getParser().parseExpression(ShiftExpr, EndLoc)) { Error(ImmLoc, "invalid immediate shift value"); return -1; @@ -2855,12 +3021,12 @@ int ARMAsmParser::tryParseShiftRegister( EndLoc = Parser.getTok().getEndLoc(); ShiftReg = tryParseRegister(); if (ShiftReg == -1) { - Error (L, "expected immediate or register in shift operand"); + Error(L, "expected immediate or register in shift operand"); return -1; } } else { - Error (Parser.getTok().getLoc(), - "expected immediate or register in shift operand"); + Error(Parser.getTok().getLoc(), + "expected immediate or register in shift operand"); return -1; } } @@ -4323,8 +4489,9 @@ parseMemory(SmallVectorImpl &Operands) { E = Tok.getEndLoc(); Parser.Lex(); // Eat right bracket token. - Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift, - 0, 0, false, S, E)); + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, 0, + ARM_AM::no_shift, 0, 0, false, + S, E)); // If there's a pre-indexing writeback marker, '!', just add it as a token // operand. It's rather odd, but syntactically valid. @@ -4346,6 +4513,7 @@ parseMemory(SmallVectorImpl &Operands) { if (Parser.getTok().is(AsmToken::Colon)) { Parser.Lex(); // Eat the ':'. E = Parser.getTok().getLoc(); + SMLoc AlignmentLoc = Tok.getLoc(); const MCExpr *Expr; if (getParser().parseExpression(Expr)) @@ -4378,9 +4546,9 @@ parseMemory(SmallVectorImpl &Operands) { // Don't worry about range checking the value here. That's handled by // the is*() predicates. - Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, 0, ARM_AM::no_shift, 0, Align, - false, S, E)); + false, S, E, AlignmentLoc)); // If there's a pre-indexing writeback marker, '!', just add it as a token // operand. @@ -4471,7 +4639,7 @@ parseMemory(SmallVectorImpl &Operands) { E = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat right bracket token. - Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum, + Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, OffsetRegNum, ShiftType, ShiftImm, 0, isNegative, S, E)); @@ -4926,8 +5094,9 @@ getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" || - Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic.startswith("crc32") || - Mnemonic.startswith("cps") || Mnemonic.startswith("vsel") || + Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic == "udf" || + Mnemonic.startswith("crc32") || Mnemonic.startswith("cps") || + Mnemonic.startswith("vsel") || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || @@ -5404,21 +5573,24 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } // GNU Assembler extension (compatibility) - if ((Mnemonic == "ldrd" || Mnemonic == "strd") && !isThumb() && - Operands.size() == 4) { - ARMOperand *Op = static_cast(Operands[2]); - assert(Op->isReg() && "expected register argument"); + if ((Mnemonic == "ldrd" || Mnemonic == "strd")) { + ARMOperand *Op2 = static_cast(Operands[2]); + ARMOperand *Op3 = static_cast(Operands[3]); + if (Op3->isMem()) { + assert(Op2->isReg() && "expected register argument"); - unsigned SuperReg = MRI->getMatchingSuperReg( - Op->getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID)); + unsigned SuperReg = MRI->getMatchingSuperReg( + Op2->getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID)); - assert(SuperReg && "expected register pair"); + assert(SuperReg && "expected register pair"); - unsigned PairedReg = MRI->getSubReg(SuperReg, ARM::gsub_1); + unsigned PairedReg = MRI->getSubReg(SuperReg, ARM::gsub_1); - Operands.insert(Operands.begin() + 3, - ARMOperand::CreateReg(PairedReg, Op->getStartLoc(), - Op->getEndLoc())); + Operands.insert(Operands.begin() + 3, + ARMOperand::CreateReg(PairedReg, + Op2->getStartLoc(), + Op2->getEndLoc())); + } } // FIXME: As said above, this is all a pretty gross hack. This instruction @@ -5748,6 +5920,30 @@ validateInstruction(MCInst &Inst, return Error(Operands[Op]->getStartLoc(), "branch target out of range"); break; } + case ARM::MOVi16: + case ARM::t2MOVi16: + case ARM::t2MOVTi16: + { + // We want to avoid misleadingly allowing something like "mov r0, " + // especially when we turn it into a movw and the expression does + // not have a :lower16: or :upper16 as part of the expression. We don't + // want the behavior of silently truncating, which can be unexpected and + // lead to bugs that are difficult to find since this is an easy mistake + // to make. + int i = (Operands[3]->isImm()) ? 3 : 4; + ARMOperand *Op = static_cast(Operands[i]); + const MCConstantExpr *CE = dyn_cast(Op->getImm()); + if (CE) break; + const MCExpr *E = dyn_cast(Op->getImm()); + if (!E) break; + const ARMMCExpr *ARM16Expr = dyn_cast(E); + if (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 && + ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16)) { + return Error(Op->getStartLoc(), + "immediate expression for mov requires :lower16: or :upper16"); + break; + } + } } return false; @@ -5898,7 +6094,7 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { case ARM::VLD3DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD; case ARM::VLD3DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD; case ARM::VLD3DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPq8_UPD; - case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPq16_UPD; + case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 2; return ARM::VLD3DUPq16_UPD; case ARM::VLD3DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD; case ARM::VLD3DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD; case ARM::VLD3DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD; @@ -7860,9 +8056,11 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { return Match_Success; } -template<> inline bool IsCPSRDead(MCInst* Instr) { +namespace llvm { +template <> inline bool IsCPSRDead(MCInst *Instr) { return true; // In an assembly source, no need to second-guess } +} static const char *getSubtargetFeatureName(unsigned Val); bool ARMAsmParser:: @@ -7965,6 +8163,42 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; return Error(ErrorLoc, "immediate operand must be in the range [0,239]"); } + case Match_AlignedMemoryRequiresNone: + case Match_DupAlignedMemoryRequiresNone: + case Match_AlignedMemoryRequires16: + case Match_DupAlignedMemoryRequires16: + case Match_AlignedMemoryRequires32: + case Match_DupAlignedMemoryRequires32: + case Match_AlignedMemoryRequires64: + case Match_DupAlignedMemoryRequires64: + case Match_AlignedMemoryRequires64or128: + case Match_DupAlignedMemoryRequires64or128: + case Match_AlignedMemoryRequires64or128or256: + { + SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getAlignmentLoc(); + if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; + switch (MatchResult) { + default: + llvm_unreachable("Missing Match_Aligned type"); + case Match_AlignedMemoryRequiresNone: + case Match_DupAlignedMemoryRequiresNone: + return Error(ErrorLoc, "alignment must be omitted"); + case Match_AlignedMemoryRequires16: + case Match_DupAlignedMemoryRequires16: + return Error(ErrorLoc, "alignment must be 16 or omitted"); + case Match_AlignedMemoryRequires32: + case Match_DupAlignedMemoryRequires32: + return Error(ErrorLoc, "alignment must be 32 or omitted"); + case Match_AlignedMemoryRequires64: + case Match_DupAlignedMemoryRequires64: + return Error(ErrorLoc, "alignment must be 64 or omitted"); + case Match_AlignedMemoryRequires64or128: + case Match_DupAlignedMemoryRequires64or128: + return Error(ErrorLoc, "alignment must be 64, 128 or omitted"); + case Match_AlignedMemoryRequires64or128or256: + return Error(ErrorLoc, "alignment must be 64, 128, 256 or omitted"); + } + } } llvm_unreachable("Implement any new match types added!"); @@ -7972,6 +8206,10 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, /// parseDirective parses the arm specific directives bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { + const MCObjectFileInfo::Environment Format = + getContext().getObjectFileInfo()->getObjectFileType(); + bool IsMachO = Format == MCObjectFileInfo::IsMachO; + StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") return parseLiteralValues(4, DirectiveID.getLoc()); @@ -7989,16 +8227,6 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveSyntax(DirectiveID.getLoc()); else if (IDVal == ".unreq") return parseDirectiveUnreq(DirectiveID.getLoc()); - else if (IDVal == ".arch") - return parseDirectiveArch(DirectiveID.getLoc()); - else if (IDVal == ".eabi_attribute") - return parseDirectiveEabiAttr(DirectiveID.getLoc()); - else if (IDVal == ".cpu") - return parseDirectiveCPU(DirectiveID.getLoc()); - else if (IDVal == ".fpu") - return parseDirectiveFPU(DirectiveID.getLoc()); - else if (IDVal == ".fnstart") - return parseDirectiveFnStart(DirectiveID.getLoc()); else if (IDVal == ".fnend") return parseDirectiveFnEnd(DirectiveID.getLoc()); else if (IDVal == ".cantunwind") @@ -8015,12 +8243,6 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveRegSave(DirectiveID.getLoc(), false); else if (IDVal == ".vsave") return parseDirectiveRegSave(DirectiveID.getLoc(), true); - else if (IDVal == ".inst") - return parseDirectiveInst(DirectiveID.getLoc()); - else if (IDVal == ".inst.n") - return parseDirectiveInst(DirectiveID.getLoc(), 'n'); - else if (IDVal == ".inst.w") - return parseDirectiveInst(DirectiveID.getLoc(), 'w'); else if (IDVal == ".ltorg" || IDVal == ".pool") return parseDirectiveLtorg(DirectiveID.getLoc()); else if (IDVal == ".even") @@ -8029,18 +8251,38 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectivePersonalityIndex(DirectiveID.getLoc()); else if (IDVal == ".unwind_raw") return parseDirectiveUnwindRaw(DirectiveID.getLoc()); - else if (IDVal == ".tlsdescseq") - return parseDirectiveTLSDescSeq(DirectiveID.getLoc()); else if (IDVal == ".movsp") return parseDirectiveMovSP(DirectiveID.getLoc()); - else if (IDVal == ".object_arch") - return parseDirectiveObjectArch(DirectiveID.getLoc()); else if (IDVal == ".arch_extension") return parseDirectiveArchExtension(DirectiveID.getLoc()); else if (IDVal == ".align") return parseDirectiveAlign(DirectiveID.getLoc()); else if (IDVal == ".thumb_set") return parseDirectiveThumbSet(DirectiveID.getLoc()); + + if (!IsMachO) { + if (IDVal == ".arch") + return parseDirectiveArch(DirectiveID.getLoc()); + else if (IDVal == ".cpu") + return parseDirectiveCPU(DirectiveID.getLoc()); + else if (IDVal == ".eabi_attribute") + return parseDirectiveEabiAttr(DirectiveID.getLoc()); + else if (IDVal == ".fpu") + return parseDirectiveFPU(DirectiveID.getLoc()); + else if (IDVal == ".fnstart") + return parseDirectiveFnStart(DirectiveID.getLoc()); + else if (IDVal == ".inst") + return parseDirectiveInst(DirectiveID.getLoc()); + else if (IDVal == ".inst.n") + return parseDirectiveInst(DirectiveID.getLoc(), 'n'); + else if (IDVal == ".inst.w") + return parseDirectiveInst(DirectiveID.getLoc(), 'w'); + else if (IDVal == ".object_arch") + return parseDirectiveObjectArch(DirectiveID.getLoc()); + else if (IDVal == ".tlsdescseq") + return parseDirectiveTLSDescSeq(DirectiveID.getLoc()); + } + return true; } @@ -8121,32 +8363,6 @@ void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) { if (NextSymbolIsThumb) { getParser().getStreamer().EmitThumbFunc(Symbol); NextSymbolIsThumb = false; - return; - } - - if (!isThumb()) - return; - - const MCObjectFileInfo::Environment Format = - getContext().getObjectFileInfo()->getObjectFileType(); - switch (Format) { - case MCObjectFileInfo::IsCOFF: { - const MCSymbolData &SD = - getParser().getStreamer().getOrCreateSymbolData(Symbol); - char Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; - if (SD.getFlags() & (Type << COFF::SF_TypeShift)) - getParser().getStreamer().EmitThumbFunc(Symbol); - break; - } - case MCObjectFileInfo::IsELF: { - const MCSymbolData &SD = - getParser().getStreamer().getOrCreateSymbolData(Symbol); - if (MCELF::GetType(SD) & (ELF::STT_FUNC << ELF_STT_Shift)) - getParser().getStreamer().EmitThumbFunc(Symbol); - break; - } - case MCObjectFileInfo::IsMachO: - break; } } @@ -8303,14 +8519,6 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { /// parseDirectiveArch /// ::= .arch token bool ARMAsmParser::parseDirectiveArch(SMLoc L) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); - if (isMachO) { - Error(L, ".arch directive not valid for Mach-O"); - Parser.eatToEndOfStatement(); - return false; - } - StringRef Arch = getParser().parseStringToEndOfStatement().trim(); unsigned ID = StringSwitch(Arch) @@ -8334,14 +8542,6 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) { /// ::= .eabi_attribute int, int [, "str"] /// ::= .eabi_attribute Tag_name, int [, "str"] bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); - if (isMachO) { - Error(L, ".eabi_attribute directive not valid for Mach-O"); - Parser.eatToEndOfStatement(); - return false; - } - int64_t Tag; SMLoc TagLoc; TagLoc = Parser.getTok().getLoc(); @@ -8447,14 +8647,6 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { /// parseDirectiveCPU /// ::= .cpu str bool ARMAsmParser::parseDirectiveCPU(SMLoc L) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); - if (isMachO) { - Error(L, ".cpu directive not valid for Mach-O"); - Parser.eatToEndOfStatement(); - return false; - } - StringRef CPU = getParser().parseStringToEndOfStatement().trim(); getTargetStreamer().emitTextAttribute(ARMBuildAttrs::CPU_name, CPU); return false; @@ -8463,14 +8655,6 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) { /// parseDirectiveFPU /// ::= .fpu str bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); - if (isMachO) { - Error(L, ".fpu directive not valid for Mach-O"); - Parser.eatToEndOfStatement(); - return false; - } - StringRef FPU = getParser().parseStringToEndOfStatement().trim(); unsigned ID = StringSwitch(FPU) @@ -8490,14 +8674,6 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { /// parseDirectiveFnStart /// ::= .fnstart bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); - if (isMachO) { - Error(L, ".fnstart directive not valid for Mach-O"); - Parser.eatToEndOfStatement(); - return false; - } - if (UC.hasFnStart()) { Error(L, ".fnstart starts before the end of previous one"); UC.emitFnStartLocNotes(); @@ -8777,14 +8953,6 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) { /// ::= .inst.n opcode [, ...] /// ::= .inst.w opcode [, ...] bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); - if (isMachO) { - Error(Loc, ".inst directive not valid for Mach-O"); - Parser.eatToEndOfStatement(); - return false; - } - int Width; if (isThumb()) { @@ -9033,14 +9201,6 @@ bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) { /// parseDirectiveTLSDescSeq /// ::= .tlsdescseq tls-variable bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); - if (isMachO) { - Error(L, ".tlsdescseq directive not valid for Mach-O"); - Parser.eatToEndOfStatement(); - return false; - } - if (getLexer().isNot(AsmToken::Identifier)) { TokError("expected variable after '.tlsdescseq' directive"); Parser.eatToEndOfStatement(); @@ -9128,14 +9288,6 @@ bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) { /// parseDirectiveObjectArch /// ::= .object_arch name bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); - if (isMachO) { - Error(L, ".object_arch directive not valid for Mach-O"); - Parser.eatToEndOfStatement(); - return false; - } - if (getLexer().isNot(AsmToken::Identifier)) { Error(getLexer().getLoc(), "unexpected token"); Parser.eatToEndOfStatement(); @@ -9221,36 +9373,7 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) { Lex(); MCSymbol *Alias = getContext().GetOrCreateSymbol(Name); - if (const MCSymbolRefExpr *SRE = dyn_cast(Value)) { - MCSymbol *Sym = getContext().LookupSymbol(SRE->getSymbol().getName()); - if (!Sym->isDefined()) { - getStreamer().EmitSymbolAttribute(Sym, MCSA_Global); - getStreamer().EmitAssignment(Alias, Value); - return false; - } - - const MCObjectFileInfo::Environment Format = - getContext().getObjectFileInfo()->getObjectFileType(); - switch (Format) { - case MCObjectFileInfo::IsCOFF: { - char Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; - getStreamer().EmitCOFFSymbolType(Type); - // .set values are always local in COFF - getStreamer().EmitSymbolAttribute(Alias, MCSA_Local); - break; - } - case MCObjectFileInfo::IsELF: - getStreamer().EmitSymbolAttribute(Alias, MCSA_ELF_TypeFunction); - break; - case MCObjectFileInfo::IsMachO: - break; - } - } - - // FIXME: set the function as being a thumb function via the assembler - getStreamer().EmitThumbFunc(Alias); - getStreamer().EmitAssignment(Alias, Value); - + getTargetStreamer().emitThumbSet(Alias, Value); return false; } @@ -9365,8 +9488,8 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, int64_t Value; if (!SOExpr->EvaluateAsAbsolute(Value)) return Match_Success; - assert((Value >= INT32_MIN && Value <= INT32_MAX) && - "expression value must be representiable in 32 bits"); + assert((Value >= INT32_MIN && Value <= UINT32_MAX) && + "expression value must be representable in 32 bits"); } break; case MCK_GPRPair: diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 8e14883..9b5fa75 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -17,7 +17,6 @@ add_public_tablegen_target(ARMCommonTableGen) add_llvm_target(ARMCodeGen A15SDOptimizer.cpp ARMAsmPrinter.cpp - ARMAtomicExpandPass.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp ARMCodeEmitter.cpp diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 9e40381..4d4038d 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "arm-disassembler" - #include "llvm/MC/MCDisassembler.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" @@ -29,6 +27,8 @@ using namespace llvm; +#define DEBUG_TYPE "arm-disassembler" + typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { @@ -90,8 +90,8 @@ class ARMDisassembler : public MCDisassembler { public: /// Constructor - Initializes the disassembler. /// - ARMDisassembler(const MCSubtargetInfo &STI) : - MCDisassembler(STI) { + ARMDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : + MCDisassembler(STI, Ctx) { } ~ARMDisassembler() { @@ -109,8 +109,8 @@ class ThumbDisassembler : public MCDisassembler { public: /// Constructor - Initializes the disassembler. /// - ThumbDisassembler(const MCSubtargetInfo &STI) : - MCDisassembler(STI) { + ThumbDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : + MCDisassembler(STI, Ctx) { } ~ThumbDisassembler() { @@ -400,12 +400,16 @@ static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); #include "ARMGenDisassemblerTables.inc" -static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) { - return new ARMDisassembler(STI); +static MCDisassembler *createARMDisassembler(const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new ARMDisassembler(STI, Ctx); } -static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtargetInfo &STI) { - return new ThumbDisassembler(STI); +static MCDisassembler *createThumbDisassembler(const Target &T, + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new ThumbDisassembler(STI, Ctx); } DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index da3fe01..e4b785d 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "ARMInstPrinter.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" @@ -23,6 +22,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "asm-printer" + #include "ARMGenAsmWriter.inc" /// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing. diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 1db517f..7acd9cc 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -306,8 +306,36 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } +static uint32_t swapHalfWords(uint32_t Value, bool IsLittleEndian) { + if (IsLittleEndian) { + // Note that the halfwords are stored high first and low second in thumb; + // so we need to swap the fixup value here to map properly. + uint32_t Swapped = (Value & 0xFFFF0000) >> 16; + Swapped |= (Value & 0x0000FFFF) << 16; + return Swapped; + } + else + return Value; +} + +static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf, + bool IsLittleEndian) { + uint32_t Value; + + if (IsLittleEndian) { + Value = (SecondHalf & 0xFFFF) << 16; + Value |= (FirstHalf & 0xFFFF); + } else { + Value = (SecondHalf & 0xFFFF); + Value |= (FirstHalf & 0xFFFF) << 16; + } + + return Value; +} + static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, - bool IsPCRel, MCContext *Ctx) { + bool IsPCRel, MCContext *Ctx, + bool IsLittleEndian) { unsigned Kind = Fixup.getKind(); switch (Kind) { default: @@ -316,6 +344,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case FK_Data_2: case FK_Data_4: return Value; + case FK_SecRel_2: + return Value; + case FK_SecRel_4: + return Value; case ARM::fixup_arm_movt_hi16: if (!IsPCRel) Value >>= 16; @@ -342,9 +374,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // inst{14-12} = Mid3; // inst{7-0} = Lo8; Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8); - uint64_t swapped = (Value & 0xFFFF0000) >> 16; - swapped |= (Value & 0x0000FFFF) << 16; - return swapped; + return swapHalfWords(Value, IsLittleEndian); } case ARM::fixup_arm_ldst_pcrel_12: // ARM PC-relative values are offset by 8. @@ -364,11 +394,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // Same addressing mode as fixup_arm_pcrel_10, // but with 16-bit halfwords swapped. - if (Kind == ARM::fixup_t2_ldst_pcrel_12) { - uint64_t swapped = (Value & 0xFFFF0000) >> 16; - swapped |= (Value & 0x0000FFFF) << 16; - return swapped; - } + if (Kind == ARM::fixup_t2_ldst_pcrel_12) + return swapHalfWords(Value, IsLittleEndian); return Value; } @@ -401,9 +428,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, out |= (Value & 0x700) << 4; out |= (Value & 0x0FF); - uint64_t swapped = (out & 0xFFFF0000) >> 16; - swapped |= (out & 0x0000FFFF) << 16; - return swapped; + return swapHalfWords(out, IsLittleEndian); } case ARM::fixup_arm_condbranch: @@ -434,9 +459,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, out |= (Value & 0x1FF800) << 5; // imm6 field out |= (Value & 0x0007FF); // imm11 field - uint64_t swapped = (out & 0xFFFF0000) >> 16; - swapped |= (out & 0x0000FFFF) << 16; - return swapped; + return swapHalfWords(out, IsLittleEndian); } case ARM::fixup_t2_condbranch: { Value = Value - 4; @@ -449,9 +472,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, out |= (Value & 0x1F800) << 5; // imm6 field out |= (Value & 0x007FF); // imm11 field - uint32_t swapped = (out & 0xFFFF0000) >> 16; - swapped |= (out & 0x0000FFFF) << 16; - return swapped; + return swapHalfWords(out, IsLittleEndian); } case ARM::fixup_arm_thumb_bl: { // The value doesn't encode the low bit (always zero) and is offset by @@ -475,13 +496,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, uint32_t imm10Bits = (offset & 0x1FF800) >> 11; uint32_t imm11Bits = (offset & 0x000007FF); - uint32_t Binary = 0; - uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits); - uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | + uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits); + uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | (uint16_t)imm11Bits); - Binary |= secondHalf << 16; - Binary |= firstHalf; - return Binary; + return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian); } case ARM::fixup_arm_thumb_blx: { // The value doesn't encode the low two bits (always zero) and is offset by @@ -508,13 +526,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, uint32_t imm10HBits = (offset & 0xFFC00) >> 10; uint32_t imm10LBits = (offset & 0x3FF); - uint32_t Binary = 0; - uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits); - uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | + uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits); + uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | ((uint16_t)imm10LBits) << 1); - Binary |= secondHalf << 16; - Binary |= firstHalf; - return Binary; + return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian); } case ARM::fixup_arm_thumb_cp: // Offset by 4, and don't encode the low two bits. Two bytes of that @@ -566,11 +581,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords // swapped. - if (Kind == ARM::fixup_t2_pcrel_10) { - uint32_t swapped = (Value & 0xFFFF0000) >> 16; - swapped |= (Value & 0x0000FFFF) << 16; - return swapped; - } + if (Kind == ARM::fixup_t2_pcrel_10) + return swapHalfWords(Value, IsLittleEndian); return Value; } @@ -603,7 +615,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, // the offset when the destination has the same MCFragment. if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) { const MCSymbol &Sym = A->getSymbol().AliasedSymbol(); - MCSymbolData &SymData = Asm.getSymbolData(Sym); + const MCSymbolData &SymData = Asm.getSymbolData(Sym); IsResolved = (SymData.getFragment() == DF); } // We must always generate a relocation for BL/BLX instructions if we have @@ -618,7 +630,8 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, // Try to get the encoded value for the fixup as-if we're mapping it into // the instruction. This allows adjustFixupValue() to issue a diagnostic // if the value aren't invalid. - (void)adjustFixupValue(Fixup, Value, false, &Asm.getContext()); + (void)adjustFixupValue(Fixup, Value, false, &Asm.getContext(), + IsLittleEndian); } /// getFixupKindNumBytes - The number of bytes the fixup may change. @@ -662,6 +675,11 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case ARM::fixup_t2_movt_hi16: case ARM::fixup_t2_movw_lo16: return 4; + + case FK_SecRel_2: + return 2; + case FK_SecRel_4: + return 4; } } @@ -716,7 +734,7 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); - Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr); + Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian); if (!Value) return; // Doesn't change encoding. unsigned Offset = Fixup.getOffset(); @@ -724,8 +742,11 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, // Used to point to big endian bytes. unsigned FullSizeBytes; - if (!IsLittleEndian) + if (!IsLittleEndian) { FullSizeBytes = getFixupKindContainerSizeBytes(Fixup.getKind()); + assert((Offset + FullSizeBytes) <= DataSize && "Invalid fixup size!"); + assert(NumBytes <= FullSizeBytes && "Invalid fixup size!"); + } // For each byte of the fragment that the fixup touches, mask in the bits from // the fixup value. The Value has been "split up" into the appropriate @@ -737,6 +758,15 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, } namespace { +// FIXME: This should be in a separate file. +class ARMWinCOFFAsmBackend : public ARMAsmBackend { +public: + ARMWinCOFFAsmBackend(const Target &T, const StringRef &Triple) + : ARMAsmBackend(T, Triple, true) { } + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false); + } +}; // FIXME: This should be in a separate file. // ELF is an ELF of course... @@ -777,7 +807,9 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, bool isLittle) { Triple TheTriple(TT); - if (TheTriple.isOSBinFormatMachO()) { + switch (TheTriple.getObjectFormat()) { + default: llvm_unreachable("unsupported object format"); + case Triple::MachO: { MachO::CPUSubTypeARM CS = StringSwitch(TheTriple.getArchName()) .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T) @@ -792,15 +824,14 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, return new DarwinARMAsmBackend(T, TT, CS); } - -#if 0 - // FIXME: Introduce yet another checker but assert(0). - if (TheTriple.isOSBinFormatCOFF()) - assert(0 && "Windows not supported on ARM"); -#endif - - uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); - return new ELFARMAsmBackend(T, TT, OSABI, isLittle); + case Triple::COFF: + assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported"); + return new ARMWinCOFFAsmBackend(T, TT); + case Triple::ELF: + assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target"); + uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); + return new ELFARMAsmBackend(T, TT, OSABI, isLittle); + } } MCAsmBackend *llvm::createARMLEAsmBackend(const Target &T, diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index a4661b1..1c84263 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -74,7 +74,7 @@ unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target, unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { - MCSymbolRefExpr::VariantKind Modifier = Fixup.getAccessVariant(); + MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); unsigned Type = 0; if (IsPCRel) { @@ -91,6 +91,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_GOTTPOFF: Type = ELF::R_ARM_TLS_IE32; break; + case MCSymbolRefExpr::VK_GOTPCREL: + Type = ELF::R_ARM_GOT_PREL; + break; } break; case ARM::fixup_arm_blx: @@ -167,6 +170,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_GOTOFF: Type = ELF::R_ARM_GOTOFF32; break; + case MCSymbolRefExpr::VK_GOTPCREL: + Type = ELF::R_ARM_GOT_PREL; + break; case MCSymbolRefExpr::VK_ARM_TARGET1: Type = ELF::R_ARM_TARGET1; break; diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 5a01d26..a4d13ed 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -30,6 +30,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSection.h" @@ -62,7 +63,7 @@ static const char *GetFPUName(unsigned ID) { #define ARM_FPU_NAME(NAME, ID) case ARM::ID: return NAME; #include "ARMFPUName.def" } - return NULL; + return nullptr; } static const char *GetArchName(unsigned ID) { @@ -75,7 +76,7 @@ static const char *GetArchName(unsigned ID) { #define ARM_ARCH_ALIAS(NAME, ID) /* empty */ #include "ARMArchName.def" } - return NULL; + return nullptr; } static const char *GetArchDefaultCPUName(unsigned ID) { @@ -88,7 +89,7 @@ static const char *GetArchDefaultCPUName(unsigned ID) { #define ARM_ARCH_ALIAS(NAME, ID) /* empty */ #include "ARMArchName.def" } - return NULL; + return nullptr; } static unsigned GetArchDefaultCPUArch(unsigned ID) { @@ -139,6 +140,7 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer { void finishAttributeSection() override; void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override; + void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override; public: ARMTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS, @@ -260,6 +262,10 @@ ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) { OS << "\t.tlsdescseq\t" << S->getSymbol().getName(); } +void ARMTargetAsmStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) { + OS << "\t.thumb_set\t" << *Symbol << ", " << *Value << '\n'; +} + void ARMTargetAsmStreamer::emitInst(uint32_t Inst, char Suffix) { OS << "\t.inst"; if (Suffix) @@ -310,7 +316,7 @@ private: for (size_t i = 0; i < Contents.size(); ++i) if (Contents[i].Tag == Attribute) return &Contents[i]; - return 0; + return nullptr; } void setAttributeItem(unsigned Attribute, unsigned Value, @@ -406,8 +412,10 @@ private: void emitFPU(unsigned FPU) override; void emitInst(uint32_t Inst, char Suffix = '\0') override; void finishAttributeSection() override; + void emitLabel(MCSymbol *Symbol) override; void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override; + void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override; size_t calculateContentSize() const; @@ -415,7 +423,7 @@ public: ARMTargetELFStreamer(MCStreamer &S) : ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::INVALID_FPU), Arch(ARM::INVALID_ARCH), EmittedArch(ARM::INVALID_ARCH), - AttributeSection(0) {} + AttributeSection(nullptr) {} }; /// Extend the generic ELFStreamer class so that it can emit mapping symbols at @@ -531,7 +539,8 @@ public: /// This is one of the functions used to emit data into an ELF section, so the /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if /// necessary. - void EmitValueImpl(const MCExpr *Value, unsigned Size) override { + void EmitValueImpl(const MCExpr *Value, unsigned Size, + const SMLoc &Loc) override { EmitDataMappingSymbol(); MCELFStreamer::EmitValueImpl(Value, Size); } @@ -600,12 +609,8 @@ private: } void EmitThumbFunc(MCSymbol *Func) override { - // FIXME: Anything needed here to flag the function as thumb? - getAssembler().setIsThumbFunc(Func); - - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Func); - SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc); + EmitSymbolAttribute(Func, MCSA_ELF_TypeFunction); } // Helper functions for ARM exception handling directives @@ -980,10 +985,35 @@ void ARMTargetELFStreamer::finishAttributeSection() { Contents.clear(); FPU = ARM::INVALID_FPU; } + +void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) { + ARMELFStreamer &Streamer = getStreamer(); + if (!Streamer.IsThumb) + return; + + const MCSymbolData &SD = Streamer.getOrCreateSymbolData(Symbol); + if (MCELF::GetType(SD) & (ELF::STT_FUNC << ELF_STT_Shift)) + Streamer.EmitThumbFunc(Symbol); +} + void ARMTargetELFStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) { getStreamer().EmitFixup(S, FK_Data_4); } + +void ARMTargetELFStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) { + if (const MCSymbolRefExpr *SRE = dyn_cast(Value)) { + const MCSymbol &Sym = SRE->getSymbol(); + if (!Sym.isDefined()) { + getStreamer().EmitAssignment(Symbol, Value); + return; + } + } + + getStreamer().EmitThumbFunc(Symbol); + getStreamer().EmitAssignment(Symbol, Value); +} + void ARMTargetELFStreamer::emitInst(uint32_t Inst, char Suffix) { getStreamer().emitInst(Inst, Suffix); } @@ -1012,7 +1042,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix, } // Get .ARM.extab or .ARM.exidx section - const MCSectionELF *EHSection = NULL; + const MCSectionELF *EHSection = nullptr; if (const MCSymbol *Group = FnSection.getGroup()) { EHSection = getContext().getELFSection( EHSecName, Type, Flags | ELF::SHF_GROUP, Kind, @@ -1049,9 +1079,9 @@ void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) { } void ARMELFStreamer::Reset() { - ExTab = NULL; - FnStart = NULL; - Personality = NULL; + ExTab = nullptr; + FnStart = nullptr; + Personality = nullptr; PersonalityIndex = ARM::EHABI::NUM_PERSONALITY_INDEX; FPReg = ARM::SP; FPOffset = 0; @@ -1065,7 +1095,7 @@ void ARMELFStreamer::Reset() { } void ARMELFStreamer::emitFnStart() { - assert(FnStart == 0); + assert(FnStart == nullptr); FnStart = getContext().CreateTempSymbol(); EmitLabel(FnStart); } @@ -1104,11 +1134,14 @@ void ARMELFStreamer::emitFnEnd() { // the second word of exception index table entry. The size of the unwind // opcodes should always be 4 bytes. assert(PersonalityIndex == ARM::EHABI::AEABI_UNWIND_CPP_PR0 && - "Compact model must use __aeabi_cpp_unwind_pr0 as personality"); + "Compact model must use __aeabi_unwind_cpp_pr0 as personality"); assert(Opcodes.size() == 4u && - "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4"); - EmitBytes(StringRef(reinterpret_cast(Opcodes.data()), - Opcodes.size())); + "Unwind opcode size for __aeabi_unwind_cpp_pr0 must be equal to 4"); + uint64_t Intval = Opcodes[0] | + Opcodes[1] << 8 | + Opcodes[2] << 16 | + Opcodes[3] << 24; + EmitIntValue(Intval, Opcodes.size()); } // Switch to the section containing FnStart @@ -1180,8 +1213,15 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) { } // Emit unwind opcodes - EmitBytes(StringRef(reinterpret_cast(Opcodes.data()), - Opcodes.size())); + assert((Opcodes.size() % 4) == 0 && + "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be multiple of 4"); + for (unsigned I = 0; I != Opcodes.size(); I += 4) { + uint64_t Intval = Opcodes[I] | + Opcodes[I + 1] << 8 | + Opcodes[I + 2] << 16 | + Opcodes[I + 3] << 24; + EmitIntValue(Intval, 4); + } // According to ARM EHABI section 9.2, if the __aeabi_unwind_cpp_pr1() or // __aeabi_unwind_cpp_pr2() is used, then the handler data must be emitted @@ -1283,13 +1323,11 @@ void ARMELFStreamer::emitUnwindRaw(int64_t Offset, namespace llvm { MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useCFI, - bool useDwarfDirectory, + bool isVerboseAsm, bool useDwarfDirectory, MCInstPrinter *InstPrint, MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) { - MCStreamer *S = - llvm::createAsmStreamer(Ctx, OS, isVerboseAsm, useCFI, useDwarfDirectory, - InstPrint, CE, TAB, ShowInst); + MCStreamer *S = llvm::createAsmStreamer( + Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); new ARMTargetAsmStreamer(*S, OS, *InstPrint, isVerboseAsm); return S; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index b7f96e0..7a19208 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -25,7 +25,7 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(StringRef TT) { (TheTriple.getArch() == Triple::thumbeb)) IsLittleEndian = false; - Data64bitsDirective = 0; + Data64bitsDirective = nullptr; CommentString = "@"; Code16Directive = ".code\t16"; Code32Directive = ".code\t32"; @@ -50,7 +50,7 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) { // ".comm align is in bytes but .align is pow-2." AlignmentIsInBytes = false; - Data64bitsDirective = 0; + Data64bitsDirective = nullptr; CommentString = "@"; Code16Directive = ".code\t16"; Code32Directive = ".code\t32"; @@ -59,7 +59,14 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) { SupportsDebugInformation = true; // Exceptions handling - ExceptionsType = ExceptionHandling::ARM; + switch (TheTriple.getOS()) { + case Triple::NetBSD: + ExceptionsType = ExceptionHandling::DwarfCFI; + break; + default: + ExceptionsType = ExceptionHandling::ARM; + break; + } // foo(plt) instead of foo@plt UseParensForSymbolVariant = true; @@ -89,6 +96,7 @@ void ARMCOFFMCAsmInfoGNU::anchor() { } ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() { AlignmentIsInBytes = false; + HasSingleParameterDotFile = true; CommentString = "@"; Code16Directive = ".code\t16"; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h index beaf6a4..51cfa0a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h @@ -35,13 +35,13 @@ namespace llvm { }; class ARMCOFFMCAsmInfoMicrosoft : public MCAsmInfoMicrosoft { - void anchor(); + void anchor() override; public: explicit ARMCOFFMCAsmInfoMicrosoft(); }; class ARMCOFFMCAsmInfoGNU : public MCAsmInfoGNUCOFF { - void anchor(); + void anchor() override; public: explicit ARMCOFFMCAsmInfoGNU(); }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 5564e0a..5b51a52 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mccodeemitter" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" @@ -31,6 +30,8 @@ using namespace llvm; +#define DEBUG_TYPE "mccodeemitter" + STATISTIC(MCNumEmitted, "Number of MC instructions emitted."); STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created."); @@ -1036,16 +1037,17 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, : ARM::fixup_arm_movw_lo16); break; } + Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc())); return 0; } // If the expression doesn't have :upper16: or :lower16: on it, - // it's just a plain immediate expression, and those evaluate to + // it's just a plain immediate expression, previously those evaluated to // the lower 16 bits of the expression regardless of whether - // we have a movt or a movw. - Kind = MCFixupKind(isThumb2(STI) ? ARM::fixup_t2_movw_lo16 - : ARM::fixup_arm_movw_lo16); - Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc())); + // we have a movt or a movw, but that led to misleadingly results. + // This is now disallowed in the the AsmParser in validateInstruction() + // so this should never happen. + assert(0 && "expression without :upper16: or :lower16:"); return 0; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index fc8505b..87ea875 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp @@ -7,12 +7,13 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "armmcexpr" #include "ARMMCExpr.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" using namespace llvm; +#define DEBUG_TYPE "armmcexpr" + const ARMMCExpr* ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr, MCContext &Ctx) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 949a3d5..04d63a7 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" @@ -106,9 +107,11 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { unsigned SubVer = TT[Idx]; if (SubVer == '8') { if (NoCPU) - // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, FeatureMP, - // FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, FeatureT2XtPk, FeatureCrypto, FeatureCRC - ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,+trustzone,+t2xtpk,+crypto,+crc"; + // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, + // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, + // FeatureT2XtPk, FeatureCrypto, FeatureCRC + ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm," + "+trustzone,+t2xtpk,+crypto,+crc"; else // Use CPU to figure out the exact features ARMArchFeature = "+v8"; @@ -245,7 +248,7 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { } unsigned Reg = MRI.getDwarfRegNum(ARM::SP, true); - MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(0, Reg, 0)); + MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(nullptr, Reg, 0)); return MAI; } @@ -273,18 +276,20 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, bool NoExecStack) { Triple TheTriple(TT); - if (TheTriple.isOSBinFormatMachO()) { + switch (TheTriple.getObjectFormat()) { + default: llvm_unreachable("unsupported object format"); + case Triple::MachO: { MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, false); new ARMTargetStreamer(*S); return S; } - - if (TheTriple.isOSWindows()) { - llvm_unreachable("ARM does not support Windows COFF format"); + case Triple::COFF: + assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported"); + return createARMWinCOFFStreamer(Ctx, MAB, *Emitter, OS); + case Triple::ELF: + return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack, + TheTriple.getArch() == Triple::thumb); } - - return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack, - TheTriple.getArch() == Triple::thumb); } static MCInstPrinter *createARMMCInstPrinter(const Target &T, @@ -295,7 +300,7 @@ static MCInstPrinter *createARMMCInstPrinter(const Target &T, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) return new ARMInstPrinter(MAI, MII, MRI, STI); - return 0; + return nullptr; } static MCRelocationInfo *createARMMCRelocationInfo(StringRef TT, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index e81876f..8853a8c 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -47,8 +47,7 @@ namespace ARM_MC { } MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useCFI, - bool useDwarfDirectory, + bool isVerboseAsm, bool useDwarfDirectory, MCInstPrinter *InstPrint, MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst); @@ -78,6 +77,11 @@ MCAsmBackend *createThumbLEAsmBackend(const Target &T, const MCRegisterInfo &MRI MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); +/// createARMWinCOFFStreamer - Construct a PE/COFF machine code streamer which +/// will generate a PE/COFF object file. +MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, + MCCodeEmitter &Emitter, raw_ostream &OS); + /// createARMELFObjectWriter - Construct an ELF Mach-O object writer. MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS, uint8_t OSABI, @@ -89,6 +93,8 @@ MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS, uint32_t CPUType, uint32_t CPUSubtype); +/// createARMWinCOFFObjectWriter - Construct an ARM PE/COFF object writer. +MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit); /// createARMMachORelocationInfo - Construct ARM Mach-O relocation info. MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 3bf5cf1..ecfa4e5 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -156,7 +156,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); + const MCSymbolData *A_SD = &Asm.getSymbolData(*A); if (!A_SD->getFragment()) Asm.getContext().FatalError(Fixup.getLoc(), @@ -170,7 +170,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, FixedValue += SecAddr; if (const MCSymbolRefExpr *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); if (!B_SD->getFragment()) Asm.getContext().FatalError(Fixup.getLoc(), @@ -206,11 +206,11 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, // The thumb bit shouldn't be set in the 'other-half' bit of the // relocation, but it will be set in FixedValue if the base symbol // is a thumb function. Clear it out here. - if (A_SD->getFlags() & SF_ThumbFunc) + if (Asm.isThumbFunc(A)) FixedValue &= 0xfffffffe; break; case ARM::fixup_t2_movt_hi16: - if (A_SD->getFlags() & SF_ThumbFunc) + if (Asm.isThumbFunc(A)) FixedValue &= 0xfffffffe; MovtBit = 1; // Fallthrough @@ -259,7 +259,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); + const MCSymbolData *A_SD = &Asm.getSymbolData(*A); if (!A_SD->getFragment()) Asm.getContext().FatalError(Fixup.getLoc(), @@ -272,7 +272,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, uint32_t Value2 = 0; if (const MCSymbolRefExpr *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); if (!B_SD->getFragment()) Asm.getContext().FatalError(Fixup.getLoc(), @@ -378,7 +378,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, } // Get the symbol data, if any. - MCSymbolData *SD = 0; + const MCSymbolData *SD = nullptr; if (Target.getSymA()) SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index fdc0ed7..e3cfb05 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -109,7 +109,7 @@ ConstantPool * AssemblerConstantPools::getConstantPool(const MCSection *Section) { ConstantPoolMapTy::iterator CP = ConstantPools.find(Section); if (CP == ConstantPools.end()) - return 0; + return nullptr; return &CP->second; } @@ -246,3 +246,7 @@ void ARMTargetStreamer::AnnotateTLSDescriptorSequence( const MCSymbolRefExpr *SRE) { llvm_unreachable("unimplemented"); } + +void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) { + llvm_unreachable("unimplemented"); +} diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp new file mode 100644 index 0000000..d31f1f4 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -0,0 +1,82 @@ +//===-- ARMWinCOFFObjectWriter.cpp - ARM Windows COFF Object Writer -- C++ -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/ARMFixupKinds.h" +#include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/MCWinCOFFObjectWriter.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +namespace { +class ARMWinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { +public: + ARMWinCOFFObjectWriter(bool Is64Bit) + : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARMNT) { + assert(!Is64Bit && "AArch64 support not yet implemented"); + } + virtual ~ARMWinCOFFObjectWriter() { } + + unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsCrossSection) const override; + + bool recordRelocation(const MCFixup &) const override; +}; + +unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target, + const MCFixup &Fixup, + bool IsCrossSection) const { + assert(getMachine() == COFF::IMAGE_FILE_MACHINE_ARMNT && + "AArch64 support not yet implemented"); + + MCSymbolRefExpr::VariantKind Modifier = + Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); + + switch (static_cast(Fixup.getKind())) { + default: llvm_unreachable("unsupported relocation type"); + case FK_Data_4: + switch (Modifier) { + case MCSymbolRefExpr::VK_COFF_IMGREL32: + return COFF::IMAGE_REL_ARM_ADDR32NB; + case MCSymbolRefExpr::VK_SECREL: + return COFF::IMAGE_REL_ARM_SECREL; + default: + return COFF::IMAGE_REL_ARM_ADDR32; + } + case FK_SecRel_2: + return COFF::IMAGE_REL_ARM_SECTION; + case FK_SecRel_4: + return COFF::IMAGE_REL_ARM_SECREL; + case ARM::fixup_t2_condbranch: + return COFF::IMAGE_REL_ARM_BRANCH20T; + case ARM::fixup_t2_uncondbranch: + return COFF::IMAGE_REL_ARM_BRANCH24T; + case ARM::fixup_arm_thumb_bl: + case ARM::fixup_arm_thumb_blx: + return COFF::IMAGE_REL_ARM_BLX23T; + case ARM::fixup_t2_movw_lo16: + case ARM::fixup_t2_movt_hi16: + return COFF::IMAGE_REL_ARM_MOV32T; + } +} + +bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { + return static_cast(Fixup.getKind()) != ARM::fixup_t2_movt_hi16; +} +} + +namespace llvm { +MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit) { + MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit); + return createWinCOFFObjectWriter(MOTW, OS); +} +} + diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp new file mode 100644 index 0000000..b344ced --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -0,0 +1,46 @@ +//===-- ARMWinCOFFStreamer.cpp - ARM Target WinCOFF Streamer ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARMMCTargetDesc.h" +#include "llvm/MC/MCWinCOFFStreamer.h" + +using namespace llvm; + +namespace { +class ARMWinCOFFStreamer : public MCWinCOFFStreamer { +public: + ARMWinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE, + raw_ostream &OS) + : MCWinCOFFStreamer(C, AB, CE, OS) { } + + void EmitAssemblerFlag(MCAssemblerFlag Flag) override; + void EmitThumbFunc(MCSymbol *Symbol) override; +}; + +void ARMWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { + switch (Flag) { + default: llvm_unreachable("not implemented"); + case MCAF_SyntaxUnified: + case MCAF_Code16: + break; + } +} + +void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) { + getAssembler().setIsThumbFunc(Symbol); +} +} + +namespace llvm { +MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, + MCCodeEmitter &Emitter, raw_ostream &OS) { + return new ARMWinCOFFStreamer(Context, MAB, Emitter, OS); +} +} + diff --git a/lib/Target/ARM/MCTargetDesc/Android.mk b/lib/Target/ARM/MCTargetDesc/Android.mk index 074d29e..a5827f7 100644 --- a/lib/Target/ARM/MCTargetDesc/Android.mk +++ b/lib/Target/ARM/MCTargetDesc/Android.mk @@ -17,7 +17,9 @@ arm_mc_desc_SRC_FILES := \ ARMMachObjectWriter.cpp \ ARMMachORelocationInfo.cpp \ ARMTargetStreamer.cpp \ - ARMUnwindOpAsm.cpp + ARMUnwindOpAsm.cpp \ + ARMWinCOFFObjectWriter.cpp \ + ARMWinCOFFStreamer.cpp \ # For the host # ===================================================== diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt index 06812d4..9582e8c 100644 --- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt @@ -1,14 +1,16 @@ add_llvm_library(LLVMARMDesc ARMAsmBackend.cpp ARMELFObjectWriter.cpp + ARMELFObjectWriter.cpp ARMELFStreamer.cpp + ARMMachObjectWriter.cpp + ARMMachORelocationInfo.cpp ARMMCAsmInfo.cpp ARMMCCodeEmitter.cpp ARMMCExpr.cpp ARMMCTargetDesc.cpp - ARMMachObjectWriter.cpp - ARMELFObjectWriter.cpp ARMTargetStreamer.cpp ARMUnwindOpAsm.cpp - ARMMachORelocationInfo.cpp + ARMWinCOFFObjectWriter.cpp + ARMWinCOFFStreamer.cpp ) diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 80af859..f6d24e9 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mlx-expansion" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" @@ -28,6 +27,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "mlx-expansion" + static cl::opt ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden); static cl::opt @@ -73,7 +74,7 @@ namespace { } void MLxExpansion::clearStack() { - std::fill(LastMIs, LastMIs + 4, (MachineInstr*)0); + std::fill(LastMIs, LastMIs + 4, nullptr); MIIdx = 0; } @@ -88,7 +89,7 @@ MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const { // real definition MI. This is important for _sfp instructions. unsigned Reg = MI->getOperand(1).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return 0; + return nullptr; MachineBasicBlock *MBB = MI->getParent(); MachineInstr *DefMI = MRI->getVRegDef(Reg); @@ -352,7 +353,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { if (Domain == ARMII::DomainGeneral) { if (++Skip == 2) // Assume dual issues of non-VFP / NEON instructions. - pushStack(0); + pushStack(nullptr); } else { Skip = 0; diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt index a64707e..f4d9be3 100644 --- a/lib/Target/ARM/README-Thumb.txt +++ b/lib/Target/ARM/README-Thumb.txt @@ -215,10 +215,6 @@ etc. Almost all Thumb instructions clobber condition code. //===---------------------------------------------------------------------===// -Add ldmia, stmia support. - -//===---------------------------------------------------------------------===// - Thumb load / store address mode offsets are scaled. The values kept in the instruction operands are pre-scale values. This probably ought to be changed to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions. diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 2224652..be29dc5 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -293,7 +293,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setShouldRestoreSPFromFP(true); } -static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) { +static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) { if (MI->getOpcode() == ARM::tLDRspi && MI->getOperand(1).isFI() && isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)) @@ -328,7 +328,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, int NumBytes = (int)MFI->getStackSize(); assert((unsigned)NumBytes >= ArgRegsSaveSize && "ArgRegsSaveSize is included in NumBytes"); - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(); unsigned FramePtr = RegInfo->getFrameRegister(MF); if (!AFI->hasStackFrame()) { diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 93e2b5a..0c0abbe 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -56,7 +56,7 @@ public: unsigned Reg) const override; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS = NULL) const override; + RegScavenger *RS = nullptr) const override; }; } diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 406dbe0..edb9ff3 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "thumb2-it" #include "ARM.h" #include "ARMMachineFunctionInfo.h" #include "Thumb2InstrInfo.h" @@ -19,6 +18,8 @@ #include "llvm/CodeGen/MachineInstrBundle.h" using namespace llvm; +#define DEBUG_TYPE "thumb2-it" + STATISTIC(NumITs, "Number of IT blocks inserted"); STATISTIC(NumMovedInsts, "Number of predicated instructions moved"); diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 04b83fb..6267ecf 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "t2-reduce-size" #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" @@ -25,6 +24,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "t2-reduce-size" + STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones"); STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones"); @@ -915,15 +916,14 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); - MachineInstr *BundleMI = 0; + MachineInstr *BundleMI = nullptr; - CPSRDef = 0; + CPSRDef = nullptr; HighLatencyCPSR = false; // Check predecessors for the latest CPSRDef. - for (MachineBasicBlock::pred_iterator - I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) { - const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()]; + for (auto *Pred : MBB.predecessors()) { + const MBBInfo &PInfo = BlockInfo[Pred->getNumber()]; if (!PInfo.Visited) { // Since blocks are visited in RPO, this must be a back-edge. continue; @@ -984,7 +984,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); if (MI->isCall()) { // Calls don't really set CPSR. - CPSRDef = 0; + CPSRDef = nullptr; HighLatencyCPSR = false; IsSelfLoop = false; } else if (DefCPSR) { diff --git a/lib/Target/ARM64/ARM64.h b/lib/Target/ARM64/ARM64.h deleted file mode 100644 index f2c5e60..0000000 --- a/lib/Target/ARM64/ARM64.h +++ /dev/null @@ -1,48 +0,0 @@ -//===-- ARM64.h - Top-level interface for ARM64 representation --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the entry points for global functions defined in the LLVM -// ARM64 back-end. -// -//===----------------------------------------------------------------------===// - -#ifndef TARGET_ARM64_H -#define TARGET_ARM64_H - -#include "MCTargetDesc/ARM64BaseInfo.h" -#include "MCTargetDesc/ARM64MCTargetDesc.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Support/DataTypes.h" - -namespace llvm { - -class ARM64TargetMachine; -class FunctionPass; -class MachineFunctionPass; - -FunctionPass *createARM64DeadRegisterDefinitions(); -FunctionPass *createARM64ConditionalCompares(); -FunctionPass *createARM64AdvSIMDScalar(); -FunctionPass *createARM64BranchRelaxation(); -FunctionPass *createARM64ISelDag(ARM64TargetMachine &TM, - CodeGenOpt::Level OptLevel); -FunctionPass *createARM64StorePairSuppressPass(); -FunctionPass *createARM64ExpandPseudoPass(); -FunctionPass *createARM64LoadStoreOptimizationPass(); -ModulePass *createARM64PromoteConstantPass(); -FunctionPass *createARM64AddressTypePromotionPass(); -/// \brief Creates an ARM-specific Target Transformation Info pass. -ImmutablePass *createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM); - -FunctionPass *createARM64CleanupLocalDynamicTLSPass(); - -FunctionPass *createARM64CollectLOHPass(); -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/ARM64.td b/lib/Target/ARM64/ARM64.td deleted file mode 100644 index 3eef8b2..0000000 --- a/lib/Target/ARM64/ARM64.td +++ /dev/null @@ -1,95 +0,0 @@ -//===- ARM64.td - Describe the ARM64 Target Machine --------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Target-independent interfaces which we are implementing -//===----------------------------------------------------------------------===// - -include "llvm/Target/Target.td" - -//===----------------------------------------------------------------------===// -// ARM64 Subtarget features. -// - -/// Cyclone has register move instructions which are "free". -def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", - "Has zereo-cycle register moves">; - -/// Cyclone has instructions which zero registers for "free". -def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", - "Has zero-cycle zeroing instructions">; - -//===----------------------------------------------------------------------===// -// Register File Description -//===----------------------------------------------------------------------===// - -include "ARM64RegisterInfo.td" -include "ARM64CallingConvention.td" - -//===----------------------------------------------------------------------===// -// Instruction Descriptions -//===----------------------------------------------------------------------===// - -include "ARM64Schedule.td" -include "ARM64InstrInfo.td" - -def ARM64InstrInfo : InstrInfo; - -//===----------------------------------------------------------------------===// -// ARM64 Processors supported. -// -include "ARM64SchedCyclone.td" - -def : ProcessorModel<"arm64-generic", NoSchedModel, []>; - -def : ProcessorModel<"cyclone", CycloneModel, [FeatureZCRegMove, FeatureZCZeroing]>; - -//===----------------------------------------------------------------------===// -// Assembly parser -//===----------------------------------------------------------------------===// - -def GenericAsmParserVariant : AsmParserVariant { - int Variant = 0; - string Name = "generic"; -} - -def AppleAsmParserVariant : AsmParserVariant { - int Variant = 1; - string Name = "apple-neon"; -} - -//===----------------------------------------------------------------------===// -// Assembly printer -//===----------------------------------------------------------------------===// -// ARM64 Uses the MC printer for asm output, so make sure the TableGen -// AsmWriter bits get associated with the correct class. -def GenericAsmWriter : AsmWriter { - string AsmWriterClassName = "InstPrinter"; - int Variant = 0; - bit isMCAsmWriter = 1; -} - -def AppleAsmWriter : AsmWriter { - let AsmWriterClassName = "AppleInstPrinter"; - int Variant = 1; - int isMCAsmWriter = 1; -} - -//===----------------------------------------------------------------------===// -// Target Declaration -//===----------------------------------------------------------------------===// - -def ARM64 : Target { - let InstructionSet = ARM64InstrInfo; - let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant]; - let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter]; -} diff --git a/lib/Target/ARM64/ARM64AddressTypePromotion.cpp b/lib/Target/ARM64/ARM64AddressTypePromotion.cpp deleted file mode 100644 index 72fa6af..0000000 --- a/lib/Target/ARM64/ARM64AddressTypePromotion.cpp +++ /dev/null @@ -1,496 +0,0 @@ - -//===-- ARM64AddressTypePromotion.cpp --- Promote type for addr accesses -===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass tries to promote the computations use to obtained a sign extended -// value used into memory accesses. -// E.g. -// a = add nsw i32 b, 3 -// d = sext i32 a to i64 -// e = getelementptr ..., i64 d -// -// => -// f = sext i32 b to i64 -// a = add nsw i64 f, 3 -// e = getelementptr ..., i64 a -// -// This is legal to do so if the computations are markers with either nsw or nuw -// markers. -// Moreover, the current heuristic is simple: it does not create new sext -// operations, i.e., it gives up when a sext would have forked (e.g., if -// a = add i32 b, c, two sexts are required to promote the computation). -// -// FIXME: This pass may be useful for other targets too. -// ===---------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-type-promotion" -#include "ARM64.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" - -using namespace llvm; - -static cl::opt -EnableAddressTypePromotion("arm64-type-promotion", cl::Hidden, - cl::desc("Enable the type promotion pass"), - cl::init(true)); -static cl::opt -EnableMerge("arm64-type-promotion-merge", cl::Hidden, - cl::desc("Enable merging of redundant sexts when one is dominating" - " the other."), - cl::init(true)); - -//===----------------------------------------------------------------------===// -// ARM64AddressTypePromotion -//===----------------------------------------------------------------------===// - -namespace llvm { -void initializeARM64AddressTypePromotionPass(PassRegistry &); -} - -namespace { -class ARM64AddressTypePromotion : public FunctionPass { - -public: - static char ID; - ARM64AddressTypePromotion() - : FunctionPass(ID), Func(NULL), ConsideredSExtType(NULL) { - initializeARM64AddressTypePromotionPass(*PassRegistry::getPassRegistry()); - } - - virtual const char *getPassName() const { - return "ARM64 Address Type Promotion"; - } - - /// Iterate over the functions and promote the computation of interesting - // sext instructions. - bool runOnFunction(Function &F); - -private: - /// The current function. - Function *Func; - /// Filter out all sexts that does not have this type. - /// Currently initialized with Int64Ty. - Type *ConsideredSExtType; - - // This transformation requires dominator info. - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); - FunctionPass::getAnalysisUsage(AU); - } - - typedef SmallPtrSet SetOfInstructions; - typedef SmallVector Instructions; - typedef DenseMap ValueToInsts; - - /// Check if it is profitable to move a sext through this instruction. - /// Currently, we consider it is profitable if: - /// - Inst is used only once (no need to insert truncate). - /// - Inst has only one operand that will require a sext operation (we do - /// do not create new sext operation). - bool shouldGetThrough(const Instruction *Inst); - - /// Check if it is possible and legal to move a sext through this - /// instruction. - /// Current heuristic considers that we can get through: - /// - Arithmetic operation marked with the nsw or nuw flag. - /// - Other sext operation. - /// - Truncate operation if it was just dropping sign extended bits. - bool canGetThrough(const Instruction *Inst); - - /// Move sext operations through safe to sext instructions. - bool propagateSignExtension(Instructions &SExtInsts); - - /// Is this sext should be considered for code motion. - /// We look for sext with ConsideredSExtType and uses in at least one - // GetElementPtrInst. - bool shouldConsiderSExt(const Instruction *SExt) const; - - /// Collect all interesting sext operations, i.e., the ones with the right - /// type and used in memory accesses. - /// More precisely, a sext instruction is considered as interesting if it - /// is used in a "complex" getelementptr or it exits at least another - /// sext instruction that sign extended the same initial value. - /// A getelementptr is considered as "complex" if it has more than 2 - // operands. - void analyzeSExtension(Instructions &SExtInsts); - - /// Merge redundant sign extension operations in common dominator. - void mergeSExts(ValueToInsts &ValToSExtendedUses, - SetOfInstructions &ToRemove); -}; -} // end anonymous namespace. - -char ARM64AddressTypePromotion::ID = 0; - -INITIALIZE_PASS_BEGIN(ARM64AddressTypePromotion, "arm64-type-promotion", - "ARM64 Type Promotion Pass", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(ARM64AddressTypePromotion, "arm64-type-promotion", - "ARM64 Type Promotion Pass", false, false) - -FunctionPass *llvm::createARM64AddressTypePromotionPass() { - return new ARM64AddressTypePromotion(); -} - -bool ARM64AddressTypePromotion::canGetThrough(const Instruction *Inst) { - if (isa(Inst)) - return true; - - const BinaryOperator *BinOp = dyn_cast(Inst); - if (BinOp && isa(BinOp) && - (BinOp->hasNoUnsignedWrap() || BinOp->hasNoSignedWrap())) - return true; - - // sext(trunc(sext)) --> sext - if (isa(Inst) && isa(Inst->getOperand(0))) { - const Instruction *Opnd = cast(Inst->getOperand(0)); - // Check that the truncate just drop sign extended bits. - if (Inst->getType()->getIntegerBitWidth() >= - Opnd->getOperand(0)->getType()->getIntegerBitWidth() && - Inst->getOperand(0)->getType()->getIntegerBitWidth() <= - ConsideredSExtType->getIntegerBitWidth()) - return true; - } - - return false; -} - -bool ARM64AddressTypePromotion::shouldGetThrough(const Instruction *Inst) { - // If the type of the sext is the same as the considered one, this sext - // will become useless. - // Otherwise, we will have to do something to preserve the original value, - // unless it is used once. - if (isa(Inst) && - (Inst->getType() == ConsideredSExtType || Inst->hasOneUse())) - return true; - - // If the Inst is used more that once, we may need to insert truncate - // operations and we don't do that at the moment. - if (!Inst->hasOneUse()) - return false; - - // This truncate is used only once, thus if we can get thourgh, it will become - // useless. - if (isa(Inst)) - return true; - - // If both operands are not constant, a new sext will be created here. - // Current heuristic is: each step should be profitable. - // Therefore we don't allow to increase the number of sext even if it may - // be profitable later on. - if (isa(Inst) && isa(Inst->getOperand(1))) - return true; - - return false; -} - -static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) { - if (isa(Inst) && OpIdx == 0) - return false; - return true; -} - -bool -ARM64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const { - if (SExt->getType() != ConsideredSExtType) - return false; - - for (Value::const_use_iterator UseIt = SExt->use_begin(), - EndUseIt = SExt->use_end(); - UseIt != EndUseIt; ++UseIt) { - if (isa(*UseIt)) - return true; - } - - return false; -} - -// Input: -// - SExtInsts contains all the sext instructions that are use direclty in -// GetElementPtrInst, i.e., access to memory. -// Algorithm: -// - For each sext operation in SExtInsts: -// Let var be the operand of sext. -// while it is profitable (see shouldGetThrough), legal, and safe -// (see canGetThrough) to move sext through var's definition: -// * promote the type of var's definition. -// * fold var into sext uses. -// * move sext above var's definition. -// * update sext operand to use the operand of var that should be sign -// extended (by construction there is only one). -// -// E.g., -// a = ... i32 c, 3 -// b = sext i32 a to i64 <- is it legal/safe/profitable to get through 'a' -// ... -// = b -// => Yes, update the code -// b = sext i32 c to i64 -// a = ... i64 b, 3 -// ... -// = a -// Iterate on 'c'. -bool -ARM64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) { - DEBUG(dbgs() << "*** Propagate Sign Extension ***\n"); - - bool LocalChange = false; - SetOfInstructions ToRemove; - ValueToInsts ValToSExtendedUses; - while (!SExtInsts.empty()) { - // Get through simple chain. - Instruction *SExt = SExtInsts.pop_back_val(); - - DEBUG(dbgs() << "Consider:\n" << *SExt << '\n'); - - // If this SExt has already been merged continue. - if (SExt->use_empty() && ToRemove.count(SExt)) { - DEBUG(dbgs() << "No uses => marked as delete\n"); - continue; - } - - // Now try to get through the chain of definitions. - while (isa(SExt->getOperand(0))) { - Instruction *Inst = dyn_cast(SExt->getOperand(0)); - DEBUG(dbgs() << "Try to get through:\n" << *Inst << '\n'); - if (!canGetThrough(Inst) || !shouldGetThrough(Inst)) { - // We cannot get through something that is not an Instruction - // or not safe to SExt. - DEBUG(dbgs() << "Cannot get through\n"); - break; - } - - LocalChange = true; - // If this is a sign extend, it becomes useless. - if (isa(Inst) || isa(Inst)) { - DEBUG(dbgs() << "SExt or trunc, mark it as to remove\n"); - // We cannot use replaceAllUsesWith here because we may trigger some - // assertion on the type as all involved sext operation may have not - // been moved yet. - while (!Inst->use_empty()) { - Value::use_iterator UseIt = Inst->use_begin(); - Instruction *UseInst = dyn_cast(*UseIt); - assert(UseInst && "Use of sext is not an Instruction!"); - UseInst->setOperand(UseIt->getOperandNo(), SExt); - } - ToRemove.insert(Inst); - SExt->setOperand(0, Inst->getOperand(0)); - SExt->moveBefore(Inst); - continue; - } - - // Get through the Instruction: - // 1. Update its type. - // 2. Replace the uses of SExt by Inst. - // 3. Sign extend each operand that needs to be sign extended. - - // Step #1. - Inst->mutateType(SExt->getType()); - // Step #2. - SExt->replaceAllUsesWith(Inst); - // Step #3. - Instruction *SExtForOpnd = SExt; - - DEBUG(dbgs() << "Propagate SExt to operands\n"); - for (int OpIdx = 0, EndOpIdx = Inst->getNumOperands(); OpIdx != EndOpIdx; - ++OpIdx) { - DEBUG(dbgs() << "Operand:\n" << *(Inst->getOperand(OpIdx)) << '\n'); - if (Inst->getOperand(OpIdx)->getType() == SExt->getType() || - !shouldSExtOperand(Inst, OpIdx)) { - DEBUG(dbgs() << "No need to propagate\n"); - continue; - } - // Check if we can statically sign extend the operand. - Value *Opnd = Inst->getOperand(OpIdx); - if (const ConstantInt *Cst = dyn_cast(Opnd)) { - DEBUG(dbgs() << "Statically sign extend\n"); - Inst->setOperand(OpIdx, ConstantInt::getSigned(SExt->getType(), - Cst->getSExtValue())); - continue; - } - // UndefValue are typed, so we have to statically sign extend them. - if (isa(Opnd)) { - DEBUG(dbgs() << "Statically sign extend\n"); - Inst->setOperand(OpIdx, UndefValue::get(SExt->getType())); - continue; - } - - // Otherwise we have to explicity sign extend it. - assert(SExtForOpnd && - "Only one operand should have been sign extended"); - - SExtForOpnd->setOperand(0, Opnd); - - DEBUG(dbgs() << "Move before:\n" << *Inst << "\nSign extend\n"); - // Move the sign extension before the insertion point. - SExtForOpnd->moveBefore(Inst); - Inst->setOperand(OpIdx, SExtForOpnd); - // If more sext are required, new instructions will have to be created. - SExtForOpnd = NULL; - } - if (SExtForOpnd == SExt) { - DEBUG(dbgs() << "Sign extension is useless now\n"); - ToRemove.insert(SExt); - break; - } - } - - // If the use is already of the right type, connect its uses to its argument - // and delete it. - // This can happen for an Instruction which all uses are sign extended. - if (!ToRemove.count(SExt) && - SExt->getType() == SExt->getOperand(0)->getType()) { - DEBUG(dbgs() << "Sign extension is useless, attach its use to " - "its argument\n"); - SExt->replaceAllUsesWith(SExt->getOperand(0)); - ToRemove.insert(SExt); - } else - ValToSExtendedUses[SExt->getOperand(0)].push_back(SExt); - } - - if (EnableMerge) - mergeSExts(ValToSExtendedUses, ToRemove); - - // Remove all instructions marked as ToRemove. - for (Instruction *I: ToRemove) - I->eraseFromParent(); - return LocalChange; -} - -void ARM64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses, - SetOfInstructions &ToRemove) { - DominatorTree &DT = getAnalysis().getDomTree(); - - for (auto &Entry: ValToSExtendedUses) { - Instructions &Insts = Entry.second; - Instructions CurPts; - for (Instruction *Inst : Insts) { - if (ToRemove.count(Inst)) - continue; - bool inserted = false; - for (auto Pt : CurPts) { - if (DT.dominates(Inst, Pt)) { - DEBUG(dbgs() << "Replace all uses of:\n" << *Pt << "\nwith:\n" - << *Inst << '\n'); - (Pt)->replaceAllUsesWith(Inst); - ToRemove.insert(Pt); - Pt = Inst; - inserted = true; - break; - } - if (!DT.dominates(Pt, Inst)) - // Give up if we need to merge in a common dominator as the - // expermients show it is not profitable. - continue; - - DEBUG(dbgs() << "Replace all uses of:\n" << *Inst << "\nwith:\n" - << *Pt << '\n'); - Inst->replaceAllUsesWith(Pt); - ToRemove.insert(Inst); - inserted = true; - break; - } - if (!inserted) - CurPts.push_back(Inst); - } - } -} - -void ARM64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) { - DEBUG(dbgs() << "*** Analyze Sign Extensions ***\n"); - - DenseMap SeenChains; - - for (auto &BB : *Func) { - for (auto &II: BB) { - Instruction *SExt = &II; - - // Collect all sext operation per type. - if (!isa(SExt) || !shouldConsiderSExt(SExt)) - continue; - - DEBUG(dbgs() << "Found:\n" << (*SExt) << '\n'); - - // Cases where we actually perform the optimization: - // 1. SExt is used in a getelementptr with more than 2 operand => - // likely we can merge some computation if they are done on 64 bits. - // 2. The beginning of the SExt chain is SExt several time. => - // code sharing is possible. - - bool insert = false; - // #1. - for (Value::use_iterator UseIt = SExt->use_begin(), - EndUseIt = SExt->use_end(); - UseIt != EndUseIt; ++UseIt) { - const Instruction *Inst = dyn_cast(*UseIt); - if (Inst && Inst->getNumOperands() > 2) { - DEBUG(dbgs() << "Interesting use in GetElementPtrInst\n" << *Inst - << '\n'); - insert = true; - break; - } - } - - // #2. - // Check the head of the chain. - Instruction *Inst = SExt; - Value *Last; - do { - int OpdIdx = 0; - const BinaryOperator *BinOp = dyn_cast(Inst); - if (BinOp && isa(BinOp->getOperand(0))) - OpdIdx = 1; - Last = Inst->getOperand(OpdIdx); - Inst = dyn_cast(Last); - } while (Inst && canGetThrough(Inst) && shouldGetThrough(Inst)); - - DEBUG(dbgs() << "Head of the chain:\n" << *Last << '\n'); - DenseMap::iterator AlreadySeen = - SeenChains.find(Last); - if (insert || AlreadySeen != SeenChains.end()) { - DEBUG(dbgs() << "Insert\n"); - SExtInsts.push_back(SExt); - if (AlreadySeen != SeenChains.end() && AlreadySeen->second != NULL) { - DEBUG(dbgs() << "Insert chain member\n"); - SExtInsts.push_back(AlreadySeen->second); - SeenChains[Last] = NULL; - } - } else { - DEBUG(dbgs() << "Record its chain membership\n"); - SeenChains[Last] = SExt; - } - } - } -} - -bool ARM64AddressTypePromotion::runOnFunction(Function &F) { - if (!EnableAddressTypePromotion || F.isDeclaration()) - return false; - Func = &F; - ConsideredSExtType = Type::getInt64Ty(Func->getContext()); - - DEBUG(dbgs() << "*** " << getPassName() << ": " << Func->getName() << '\n'); - - Instructions SExtInsts; - analyzeSExtension(SExtInsts); - return propagateSignExtension(SExtInsts); -} diff --git a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp b/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp deleted file mode 100644 index 83f8cda..0000000 --- a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp +++ /dev/null @@ -1,392 +0,0 @@ -//===-- ARM64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// When profitable, replace GPR targeting i64 instructions with their -// AdvSIMD scalar equivalents. Generally speaking, "profitable" is defined -// as minimizing the number of cross-class register copies. -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// TODO: Graph based predicate heuristics. -// Walking the instruction list linearly will get many, perhaps most, of -// the cases, but to do a truly throrough job of this, we need a more -// wholistic approach. -// -// This optimization is very similar in spirit to the register allocator's -// spill placement, only here we're determining where to place cross-class -// register copies rather than spills. As such, a similar approach is -// called for. -// -// We want to build up a set of graphs of all instructions which are candidates -// for transformation along with instructions which generate their inputs and -// consume their outputs. For each edge in the graph, we assign a weight -// based on whether there is a copy required there (weight zero if not) and -// the block frequency of the block containing the defining or using -// instruction, whichever is less. Our optimization is then a graph problem -// to minimize the total weight of all the graphs, then transform instructions -// and add or remove copy instructions as called for to implement the -// solution. -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-simd-scalar" -#include "ARM64.h" -#include "ARM64InstrInfo.h" -#include "ARM64RegisterInfo.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -static cl::opt -AdvSIMDScalar("arm64-simd-scalar", - cl::desc("enable use of AdvSIMD scalar integer instructions"), - cl::init(false), cl::Hidden); -// Allow forcing all i64 operations with equivalent SIMD instructions to use -// them. For stress-testing the transformation function. -static cl::opt -TransformAll("arm64-simd-scalar-force-all", - cl::desc("Force use of AdvSIMD scalar instructions everywhere"), - cl::init(false), cl::Hidden); - -STATISTIC(NumScalarInsnsUsed, "Number of scalar instructions used"); -STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted"); -STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted"); - -namespace { -class ARM64AdvSIMDScalar : public MachineFunctionPass { - MachineRegisterInfo *MRI; - const ARM64InstrInfo *TII; - -private: - // isProfitableToTransform - Predicate function to determine whether an - // instruction should be transformed to its equivalent AdvSIMD scalar - // instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example. - bool isProfitableToTransform(const MachineInstr *MI) const; - - // tranformInstruction - Perform the transformation of an instruction - // to its equivalant AdvSIMD scalar instruction. Update inputs and outputs - // to be the correct register class, minimizing cross-class copies. - void transformInstruction(MachineInstr *MI); - - // processMachineBasicBlock - Main optimzation loop. - bool processMachineBasicBlock(MachineBasicBlock *MBB); - -public: - static char ID; // Pass identification, replacement for typeid. - explicit ARM64AdvSIMDScalar() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &F); - - const char *getPassName() const { - return "AdvSIMD scalar operation optimization"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } -}; -char ARM64AdvSIMDScalar::ID = 0; -} // end anonymous namespace - -static bool isGPR64(unsigned Reg, unsigned SubReg, - const MachineRegisterInfo *MRI) { - if (SubReg) - return false; - if (TargetRegisterInfo::isVirtualRegister(Reg)) - return MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::GPR64RegClass); - return ARM64::GPR64RegClass.contains(Reg); -} - -static bool isFPR64(unsigned Reg, unsigned SubReg, - const MachineRegisterInfo *MRI) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) - return (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR64RegClass) && - SubReg == 0) || - (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR128RegClass) && - SubReg == ARM64::dsub); - // Physical register references just check the regist class directly. - return (ARM64::FPR64RegClass.contains(Reg) && SubReg == 0) || - (ARM64::FPR128RegClass.contains(Reg) && SubReg == ARM64::dsub); -} - -// getSrcFromCopy - Get the original source register for a GPR64 <--> FPR64 -// copy instruction. Return zero_reg if the instruction is not a copy. -static unsigned getSrcFromCopy(const MachineInstr *MI, - const MachineRegisterInfo *MRI, - unsigned &SubReg) { - SubReg = 0; - // The "FMOV Xd, Dn" instruction is the typical form. - if (MI->getOpcode() == ARM64::FMOVDXr || MI->getOpcode() == ARM64::FMOVXDr) - return MI->getOperand(1).getReg(); - // A lane zero extract "UMOV.d Xd, Vn[0]" is equivalent. We shouldn't see - // these at this stage, but it's easy to check for. - if (MI->getOpcode() == ARM64::UMOVvi64 && MI->getOperand(2).getImm() == 0) { - SubReg = ARM64::dsub; - return MI->getOperand(1).getReg(); - } - // Or just a plain COPY instruction. This can be directly to/from FPR64, - // or it can be a dsub subreg reference to an FPR128. - if (MI->getOpcode() == ARM64::COPY) { - if (isFPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(), - MRI) && - isGPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(), MRI)) - return MI->getOperand(1).getReg(); - if (isGPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(), - MRI) && - isFPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(), - MRI)) { - SubReg = ARM64::dsub; - return MI->getOperand(1).getReg(); - } - } - - // Otherwise, this is some other kind of instruction. - return 0; -} - -// getTransformOpcode - For any opcode for which there is an AdvSIMD equivalent -// that we're considering transforming to, return that AdvSIMD opcode. For all -// others, return the original opcode. -static int getTransformOpcode(unsigned Opc) { - switch (Opc) { - default: - break; - // FIXME: Lots more possibilities. - case ARM64::ADDXrr: - return ARM64::ADDv1i64; - case ARM64::SUBXrr: - return ARM64::SUBv1i64; - } - // No AdvSIMD equivalent, so just return the original opcode. - return Opc; -} - -static bool isTransformable(const MachineInstr *MI) { - int Opc = MI->getOpcode(); - return Opc != getTransformOpcode(Opc); -} - -// isProfitableToTransform - Predicate function to determine whether an -// instruction should be transformed to its equivalent AdvSIMD scalar -// instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example. -bool ARM64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const { - // If this instruction isn't eligible to be transformed (no SIMD equivalent), - // early exit since that's the common case. - if (!isTransformable(MI)) - return false; - - // Count the number of copies we'll need to add and approximate the number - // of copies that a transform will enable us to remove. - unsigned NumNewCopies = 3; - unsigned NumRemovableCopies = 0; - - unsigned OrigSrc0 = MI->getOperand(1).getReg(); - unsigned OrigSrc1 = MI->getOperand(2).getReg(); - unsigned Src0 = 0, SubReg0; - unsigned Src1 = 0, SubReg1; - if (!MRI->def_empty(OrigSrc0)) { - MachineRegisterInfo::def_instr_iterator Def = - MRI->def_instr_begin(OrigSrc0); - assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); - Src0 = getSrcFromCopy(&*Def, MRI, SubReg0); - // If the source was from a copy, we don't need to insert a new copy. - if (Src0) - --NumNewCopies; - // If there are no other users of the original source, we can delete - // that instruction. - if (Src0 && MRI->hasOneNonDBGUse(OrigSrc0)) - ++NumRemovableCopies; - } - if (!MRI->def_empty(OrigSrc1)) { - MachineRegisterInfo::def_instr_iterator Def = - MRI->def_instr_begin(OrigSrc1); - assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); - Src1 = getSrcFromCopy(&*Def, MRI, SubReg1); - if (Src1) - --NumNewCopies; - // If there are no other users of the original source, we can delete - // that instruction. - if (Src1 && MRI->hasOneNonDBGUse(OrigSrc1)) - ++NumRemovableCopies; - } - - // If any of the uses of the original instructions is a cross class copy, - // that's a copy that will be removable if we transform. Likewise, if - // any of the uses is a transformable instruction, it's likely the tranforms - // will chain, enabling us to save a copy there, too. This is an aggressive - // heuristic that approximates the graph based cost analysis described above. - unsigned Dst = MI->getOperand(0).getReg(); - bool AllUsesAreCopies = true; - for (MachineRegisterInfo::use_instr_nodbg_iterator - Use = MRI->use_instr_nodbg_begin(Dst), - E = MRI->use_instr_nodbg_end(); - Use != E; ++Use) { - unsigned SubReg; - if (getSrcFromCopy(&*Use, MRI, SubReg) || isTransformable(&*Use)) - ++NumRemovableCopies; - // If the use is an INSERT_SUBREG, that's still something that can - // directly use the FPR64, so we don't invalidate AllUsesAreCopies. It's - // preferable to have it use the FPR64 in most cases, as if the source - // vector is an IMPLICIT_DEF, the INSERT_SUBREG just goes away entirely. - // Ditto for a lane insert. - else if (Use->getOpcode() == ARM64::INSERT_SUBREG || - Use->getOpcode() == ARM64::INSvi64gpr) - ; - else - AllUsesAreCopies = false; - } - // If all of the uses of the original destination register are copies to - // FPR64, then we won't end up having a new copy back to GPR64 either. - if (AllUsesAreCopies) - --NumNewCopies; - - // If a tranform will not increase the number of cross-class copies required, - // return true. - if (NumNewCopies <= NumRemovableCopies) - return true; - - // Finally, even if we otherwise wouldn't transform, check if we're forcing - // transformation of everything. - return TransformAll; -} - -static MachineInstr *insertCopy(const ARM64InstrInfo *TII, MachineInstr *MI, - unsigned Dst, unsigned Src, bool IsKill) { - MachineInstrBuilder MIB = - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(ARM64::COPY), - Dst) - .addReg(Src, getKillRegState(IsKill)); - DEBUG(dbgs() << " adding copy: " << *MIB); - ++NumCopiesInserted; - return MIB; -} - -// tranformInstruction - Perform the transformation of an instruction -// to its equivalant AdvSIMD scalar instruction. Update inputs and outputs -// to be the correct register class, minimizing cross-class copies. -void ARM64AdvSIMDScalar::transformInstruction(MachineInstr *MI) { - DEBUG(dbgs() << "Scalar transform: " << *MI); - - MachineBasicBlock *MBB = MI->getParent(); - int OldOpc = MI->getOpcode(); - int NewOpc = getTransformOpcode(OldOpc); - assert(OldOpc != NewOpc && "transform an instruction to itself?!"); - - // Check if we need a copy for the source registers. - unsigned OrigSrc0 = MI->getOperand(1).getReg(); - unsigned OrigSrc1 = MI->getOperand(2).getReg(); - unsigned Src0 = 0, SubReg0; - unsigned Src1 = 0, SubReg1; - if (!MRI->def_empty(OrigSrc0)) { - MachineRegisterInfo::def_instr_iterator Def = - MRI->def_instr_begin(OrigSrc0); - assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); - Src0 = getSrcFromCopy(&*Def, MRI, SubReg0); - // If there are no other users of the original source, we can delete - // that instruction. - if (Src0 && MRI->hasOneNonDBGUse(OrigSrc0)) { - assert(Src0 && "Can't delete copy w/o a valid original source!"); - Def->eraseFromParent(); - ++NumCopiesDeleted; - } - } - if (!MRI->def_empty(OrigSrc1)) { - MachineRegisterInfo::def_instr_iterator Def = - MRI->def_instr_begin(OrigSrc1); - assert(std::next(Def) == MRI->def_instr_end() && "Multiple def in SSA!"); - Src1 = getSrcFromCopy(&*Def, MRI, SubReg1); - // If there are no other users of the original source, we can delete - // that instruction. - if (Src1 && MRI->hasOneNonDBGUse(OrigSrc1)) { - assert(Src1 && "Can't delete copy w/o a valid original source!"); - Def->eraseFromParent(); - ++NumCopiesDeleted; - } - } - // If we weren't able to reference the original source directly, create a - // copy. - if (!Src0) { - SubReg0 = 0; - Src0 = MRI->createVirtualRegister(&ARM64::FPR64RegClass); - insertCopy(TII, MI, Src0, OrigSrc0, true); - } - if (!Src1) { - SubReg1 = 0; - Src1 = MRI->createVirtualRegister(&ARM64::FPR64RegClass); - insertCopy(TII, MI, Src1, OrigSrc1, true); - } - - // Create a vreg for the destination. - // FIXME: No need to do this if the ultimate user expects an FPR64. - // Check for that and avoid the copy if possible. - unsigned Dst = MRI->createVirtualRegister(&ARM64::FPR64RegClass); - - // For now, all of the new instructions have the same simple three-register - // form, so no need to special case based on what instruction we're - // building. - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(NewOpc), Dst) - .addReg(Src0, getKillRegState(true), SubReg0) - .addReg(Src1, getKillRegState(true), SubReg1); - - // Now copy the result back out to a GPR. - // FIXME: Try to avoid this if all uses could actually just use the FPR64 - // directly. - insertCopy(TII, MI, MI->getOperand(0).getReg(), Dst, true); - - // Erase the old instruction. - MI->eraseFromParent(); - - ++NumScalarInsnsUsed; -} - -// processMachineBasicBlock - Main optimzation loop. -bool ARM64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) { - bool Changed = false; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { - MachineInstr *MI = I; - ++I; - if (isProfitableToTransform(MI)) { - transformInstruction(MI); - Changed = true; - } - } - return Changed; -} - -// runOnMachineFunction - Pass entry point from PassManager. -bool ARM64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) { - // Early exit if pass disabled. - if (!AdvSIMDScalar) - return false; - - bool Changed = false; - DEBUG(dbgs() << "***** ARM64AdvSIMDScalar *****\n"); - - const TargetMachine &TM = mf.getTarget(); - MRI = &mf.getRegInfo(); - TII = static_cast(TM.getInstrInfo()); - - // Just check things on a one-block-at-a-time basis. - for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) - if (processMachineBasicBlock(I)) - Changed = true; - return Changed; -} - -// createARM64AdvSIMDScalar - Factory function used by ARM64TargetMachine -// to add the pass to the PassManager. -FunctionPass *llvm::createARM64AdvSIMDScalar() { - return new ARM64AdvSIMDScalar(); -} diff --git a/lib/Target/ARM64/ARM64AsmPrinter.cpp b/lib/Target/ARM64/ARM64AsmPrinter.cpp deleted file mode 100644 index d0aa6af..0000000 --- a/lib/Target/ARM64/ARM64AsmPrinter.cpp +++ /dev/null @@ -1,563 +0,0 @@ -//===-- ARM64AsmPrinter.cpp - ARM64 LLVM assembly writer ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to the ARM64 assembly language. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "ARM64.h" -#include "ARM64MachineFunctionInfo.h" -#include "ARM64MCInstLower.h" -#include "ARM64RegisterInfo.h" -#include "InstPrinter/ARM64InstPrinter.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/StackMaps.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstBuilder.h" -#include "llvm/MC/MCLinkerOptimizationHint.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/TargetRegistry.h" -using namespace llvm; - -namespace { - -class ARM64AsmPrinter : public AsmPrinter { - ARM64MCInstLower MCInstLowering; - StackMaps SM; - -public: - ARM64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), MCInstLowering(OutContext, *Mang, *this), - SM(*this), ARM64FI(NULL), LOHLabelCounter(0) {} - - virtual const char *getPassName() const { return "ARM64 Assembly Printer"; } - - /// \brief Wrapper for MCInstLowering.lowerOperand() for the - /// tblgen'erated pseudo lowering. - bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const { - return MCInstLowering.lowerOperand(MO, MCOp); - } - - void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, - const MachineInstr &MI); - void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, - const MachineInstr &MI); - /// \brief tblgen'erated driver function for lowering simple MI->MC - /// pseudo instructions. - bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, - const MachineInstr *MI); - - void EmitInstruction(const MachineInstr *MI); - - void getAnalysisUsage(AnalysisUsage &AU) const { - AsmPrinter::getAnalysisUsage(AU); - AU.setPreservesAll(); - } - - bool runOnMachineFunction(MachineFunction &F) { - ARM64FI = F.getInfo(); - return AsmPrinter::runOnMachineFunction(F); - } - -private: - MachineLocation getDebugValueLocation(const MachineInstr *MI) const; - void printOperand(const MachineInstr *MI, unsigned OpNum, raw_ostream &O); - bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O); - bool printAsmRegInClass(const MachineOperand &MO, - const TargetRegisterClass *RC, bool isVector, - raw_ostream &O); - - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - - void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); - - void EmitFunctionBodyEnd(); - - MCSymbol *GetCPISymbol(unsigned CPID) const; - void EmitEndOfAsmFile(Module &M); - ARM64FunctionInfo *ARM64FI; - - /// \brief Emit the LOHs contained in ARM64FI. - void EmitLOHs(); - - typedef std::map MInstToMCSymbol; - MInstToMCSymbol LOHInstToLabel; - unsigned LOHLabelCounter; -}; - -} // end of anonymous namespace - -//===----------------------------------------------------------------------===// - -void ARM64AsmPrinter::EmitEndOfAsmFile(Module &M) { - // Funny Darwin hack: This flag tells the linker that no global symbols - // contain code that falls through to other global symbols (e.g. the obvious - // implementation of multiple entry points). If this doesn't occur, the - // linker can safely perform dead code stripping. Since LLVM never - // generates code that does this, it is always safe to set. - OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); - SM.serializeToStackMapSection(); -} - -MachineLocation -ARM64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { - MachineLocation Location; - assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!"); - // Frame address. Currently handles register +- offset only. - if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) - Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); - else { - DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); - } - return Location; -} - -void ARM64AsmPrinter::EmitLOHs() { - SmallVector MCArgs; - - for (const auto &D : ARM64FI->getLOHContainer()) { - for (const MachineInstr *MI : D.getArgs()) { - MInstToMCSymbol::iterator LabelIt = LOHInstToLabel.find(MI); - assert(LabelIt != LOHInstToLabel.end() && - "Label hasn't been inserted for LOH related instruction"); - MCArgs.push_back(LabelIt->second); - } - OutStreamer.EmitLOHDirective(D.getKind(), MCArgs); - MCArgs.clear(); - } -} - -void ARM64AsmPrinter::EmitFunctionBodyEnd() { - if (!ARM64FI->getLOHRelated().empty()) - EmitLOHs(); -} - -/// GetCPISymbol - Return the symbol for the specified constant pool entry. -MCSymbol *ARM64AsmPrinter::GetCPISymbol(unsigned CPID) const { - // Darwin uses a linker-private symbol name for constant-pools (to - // avoid addends on the relocation?), ELF has no such concept and - // uses a normal private symbol. - if (getDataLayout().getLinkerPrivateGlobalPrefix()[0]) - return OutContext.GetOrCreateSymbol( - Twine(getDataLayout().getLinkerPrivateGlobalPrefix()) + "CPI" + - Twine(getFunctionNumber()) + "_" + Twine(CPID)); - - return OutContext.GetOrCreateSymbol( - Twine(getDataLayout().getPrivateGlobalPrefix()) + "CPI" + - Twine(getFunctionNumber()) + "_" + Twine(CPID)); -} - -void ARM64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(OpNum); - switch (MO.getType()) { - default: - assert(0 && ""); - case MachineOperand::MO_Register: { - unsigned Reg = MO.getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(Reg)); - assert(!MO.getSubReg() && "Subregs should be eliminated!"); - O << ARM64InstPrinter::getRegisterName(Reg); - break; - } - case MachineOperand::MO_Immediate: { - int64_t Imm = MO.getImm(); - O << '#' << Imm; - break; - } - } -} - -bool ARM64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode, - raw_ostream &O) { - unsigned Reg = MO.getReg(); - switch (Mode) { - default: - return true; // Unknown mode. - case 'w': - Reg = getWRegFromXReg(Reg); - break; - case 'x': - Reg = getXRegFromWReg(Reg); - break; - } - - O << ARM64InstPrinter::getRegisterName(Reg); - return false; -} - -// Prints the register in MO using class RC using the offset in the -// new register class. This should not be used for cross class -// printing. -bool ARM64AsmPrinter::printAsmRegInClass(const MachineOperand &MO, - const TargetRegisterClass *RC, - bool isVector, raw_ostream &O) { - assert(MO.isReg() && "Should only get here with a register!"); - const ARM64RegisterInfo *RI = - static_cast(TM.getRegisterInfo()); - unsigned Reg = MO.getReg(); - unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg)); - assert(RI->regsOverlap(RegToPrint, Reg)); - O << ARM64InstPrinter::getRegisterName( - RegToPrint, isVector ? ARM64::vreg : ARM64::NoRegAltName); - return false; -} - -bool ARM64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, - unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(OpNum); - // Does this asm operand have a single letter operand modifier? - if (ExtraCode && ExtraCode[0]) { - if (ExtraCode[1] != 0) - return true; // Unknown modifier. - - switch (ExtraCode[0]) { - default: - return true; // Unknown modifier. - case 'w': // Print W register - case 'x': // Print X register - if (MO.isReg()) - return printAsmMRegister(MO, ExtraCode[0], O); - if (MO.isImm() && MO.getImm() == 0) { - unsigned Reg = ExtraCode[0] == 'w' ? ARM64::WZR : ARM64::XZR; - O << ARM64InstPrinter::getRegisterName(Reg); - return false; - } - printOperand(MI, OpNum, O); - return false; - case 'b': // Print B register. - case 'h': // Print H register. - case 's': // Print S register. - case 'd': // Print D register. - case 'q': // Print Q register. - if (MO.isReg()) { - const TargetRegisterClass *RC; - switch (ExtraCode[0]) { - case 'b': - RC = &ARM64::FPR8RegClass; - break; - case 'h': - RC = &ARM64::FPR16RegClass; - break; - case 's': - RC = &ARM64::FPR32RegClass; - break; - case 'd': - RC = &ARM64::FPR64RegClass; - break; - case 'q': - RC = &ARM64::FPR128RegClass; - break; - default: - return true; - } - return printAsmRegInClass(MO, RC, false /* vector */, O); - } - printOperand(MI, OpNum, O); - return false; - } - } - - // According to ARM, we should emit x and v registers unless we have a - // modifier. - if (MO.isReg()) { - unsigned Reg = MO.getReg(); - - // If this is a w or x register, print an x register. - if (ARM64::GPR32allRegClass.contains(Reg) || - ARM64::GPR64allRegClass.contains(Reg)) - return printAsmMRegister(MO, 'x', O); - - // If this is a b, h, s, d, or q register, print it as a v register. - return printAsmRegInClass(MO, &ARM64::FPR128RegClass, true /* vector */, O); - } - - printOperand(MI, OpNum, O); - return false; -} - -bool ARM64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNum, unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. - - const MachineOperand &MO = MI->getOperand(OpNum); - assert(MO.isReg() && "unexpected inline asm memory operand"); - O << "[" << ARM64InstPrinter::getRegisterName(MO.getReg()) << "]"; - return false; -} - -void ARM64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, - raw_ostream &OS) { - unsigned NOps = MI->getNumOperands(); - assert(NOps == 4); - OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast(MI->getOperand(NOps - 1).getMetadata())); - OS << V.getName(); - OS << " <- "; - // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); - OS << '['; - printOperand(MI, 0, OS); - OS << '+'; - printOperand(MI, 1, OS); - OS << ']'; - OS << "+"; - printOperand(MI, NOps - 2, OS); -} - -void ARM64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, - const MachineInstr &MI) { - unsigned NumNOPBytes = MI.getOperand(1).getImm(); - - SM.recordStackMap(MI); - // Emit padding. - assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); - for (unsigned i = 0; i < NumNOPBytes; i += 4) - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0)); -} - -// Lower a patchpoint of the form: -// [], , , , -void ARM64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, - const MachineInstr &MI) { - SM.recordPatchPoint(MI); - - PatchPointOpers Opers(&MI); - - int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm(); - unsigned EncodedBytes = 0; - if (CallTarget) { - assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget && - "High 16 bits of call target should be zero."); - unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg(); - EncodedBytes = 16; - // Materialize the jump address: - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVZWi) - .addReg(ScratchReg) - .addImm((CallTarget >> 32) & 0xFFFF) - .addImm(32)); - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi) - .addReg(ScratchReg) - .addReg(ScratchReg) - .addImm((CallTarget >> 16) & 0xFFFF) - .addImm(16)); - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi) - .addReg(ScratchReg) - .addReg(ScratchReg) - .addImm(CallTarget & 0xFFFF) - .addImm(0)); - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::BLR).addReg(ScratchReg)); - } - // Emit padding. - unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm(); - assert(NumBytes >= EncodedBytes && - "Patchpoint can't request size less than the length of a call."); - assert((NumBytes - EncodedBytes) % 4 == 0 && - "Invalid number of NOP bytes requested!"); - for (unsigned i = EncodedBytes; i < NumBytes; i += 4) - EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0)); -} - -// Simple pseudo-instructions have their lowering (with expansion to real -// instructions) auto-generated. -#include "ARM64GenMCPseudoLowering.inc" - -static unsigned getRealIndexedOpcode(unsigned Opc) { - switch (Opc) { - case ARM64::LDRXpre_isel: return ARM64::LDRXpre; - case ARM64::LDRWpre_isel: return ARM64::LDRWpre; - case ARM64::LDRDpre_isel: return ARM64::LDRDpre; - case ARM64::LDRSpre_isel: return ARM64::LDRSpre; - case ARM64::LDRBBpre_isel: return ARM64::LDRBBpre; - case ARM64::LDRHHpre_isel: return ARM64::LDRHHpre; - case ARM64::LDRSBWpre_isel: return ARM64::LDRSBWpre; - case ARM64::LDRSBXpre_isel: return ARM64::LDRSBXpre; - case ARM64::LDRSHWpre_isel: return ARM64::LDRSHWpre; - case ARM64::LDRSHXpre_isel: return ARM64::LDRSHXpre; - case ARM64::LDRSWpre_isel: return ARM64::LDRSWpre; - - case ARM64::LDRDpost_isel: return ARM64::LDRDpost; - case ARM64::LDRSpost_isel: return ARM64::LDRSpost; - case ARM64::LDRXpost_isel: return ARM64::LDRXpost; - case ARM64::LDRWpost_isel: return ARM64::LDRWpost; - case ARM64::LDRHHpost_isel: return ARM64::LDRHHpost; - case ARM64::LDRBBpost_isel: return ARM64::LDRBBpost; - case ARM64::LDRSWpost_isel: return ARM64::LDRSWpost; - case ARM64::LDRSHWpost_isel: return ARM64::LDRSHWpost; - case ARM64::LDRSHXpost_isel: return ARM64::LDRSHXpost; - case ARM64::LDRSBWpost_isel: return ARM64::LDRSBWpost; - case ARM64::LDRSBXpost_isel: return ARM64::LDRSBXpost; - - case ARM64::STRXpre_isel: return ARM64::STRXpre; - case ARM64::STRWpre_isel: return ARM64::STRWpre; - case ARM64::STRHHpre_isel: return ARM64::STRHHpre; - case ARM64::STRBBpre_isel: return ARM64::STRBBpre; - case ARM64::STRDpre_isel: return ARM64::STRDpre; - case ARM64::STRSpre_isel: return ARM64::STRSpre; - } - llvm_unreachable("Unexpected pre-indexed opcode!"); -} - -void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) { - // Do any auto-generated pseudo lowerings. - if (emitPseudoExpansionLowering(OutStreamer, MI)) - return; - - if (ARM64FI->getLOHRelated().count(MI)) { - // Generate a label for LOH related instruction - MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++); - // Associate the instruction with the label - LOHInstToLabel[MI] = LOHLabel; - OutStreamer.EmitLabel(LOHLabel); - } - - // Do any manual lowerings. - switch (MI->getOpcode()) { - default: - break; - case ARM64::DBG_VALUE: { - if (isVerbose() && OutStreamer.hasRawTextSupport()) { - SmallString<128> TmpStr; - raw_svector_ostream OS(TmpStr); - PrintDebugValueComment(MI, OS); - OutStreamer.EmitRawText(StringRef(OS.str())); - } - return; - } - // Indexed loads and stores use a pseudo to handle complex operand - // tricks and writeback to the base register. We strip off the writeback - // operand and switch the opcode here. Post-indexed stores were handled by the - // tablegen'erated pseudos above. (The complex operand <--> simple - // operand isel is beyond tablegen's ability, so we do these manually). - case ARM64::LDRHHpre_isel: - case ARM64::LDRBBpre_isel: - case ARM64::LDRXpre_isel: - case ARM64::LDRWpre_isel: - case ARM64::LDRDpre_isel: - case ARM64::LDRSpre_isel: - case ARM64::LDRSBWpre_isel: - case ARM64::LDRSBXpre_isel: - case ARM64::LDRSHWpre_isel: - case ARM64::LDRSHXpre_isel: - case ARM64::LDRSWpre_isel: - case ARM64::LDRDpost_isel: - case ARM64::LDRSpost_isel: - case ARM64::LDRXpost_isel: - case ARM64::LDRWpost_isel: - case ARM64::LDRHHpost_isel: - case ARM64::LDRBBpost_isel: - case ARM64::LDRSWpost_isel: - case ARM64::LDRSHWpost_isel: - case ARM64::LDRSHXpost_isel: - case ARM64::LDRSBWpost_isel: - case ARM64::LDRSBXpost_isel: { - MCInst TmpInst; - // For loads, the writeback operand to be skipped is the second. - TmpInst.setOpcode(getRealIndexedOpcode(MI->getOpcode())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(2).getReg())); - TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm())); - EmitToStreamer(OutStreamer, TmpInst); - return; - } - case ARM64::STRXpre_isel: - case ARM64::STRWpre_isel: - case ARM64::STRHHpre_isel: - case ARM64::STRBBpre_isel: - case ARM64::STRDpre_isel: - case ARM64::STRSpre_isel: { - MCInst TmpInst; - // For loads, the writeback operand to be skipped is the first. - TmpInst.setOpcode(getRealIndexedOpcode(MI->getOpcode())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(2).getReg())); - TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm())); - EmitToStreamer(OutStreamer, TmpInst); - return; - } - - // Tail calls use pseudo instructions so they have the proper code-gen - // attributes (isCall, isReturn, etc.). We lower them to the real - // instruction here. - case ARM64::TCRETURNri: { - MCInst TmpInst; - TmpInst.setOpcode(ARM64::BR); - TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); - EmitToStreamer(OutStreamer, TmpInst); - return; - } - case ARM64::TCRETURNdi: { - MCOperand Dest; - MCInstLowering.lowerOperand(MI->getOperand(0), Dest); - MCInst TmpInst; - TmpInst.setOpcode(ARM64::B); - TmpInst.addOperand(Dest); - EmitToStreamer(OutStreamer, TmpInst); - return; - } - case ARM64::TLSDESC_BLR: { - MCOperand Callee, Sym; - MCInstLowering.lowerOperand(MI->getOperand(0), Callee); - MCInstLowering.lowerOperand(MI->getOperand(1), Sym); - - // First emit a relocation-annotation. This expands to no code, but requests - // the following instruction gets an R_AARCH64_TLSDESC_CALL. - MCInst TLSDescCall; - TLSDescCall.setOpcode(ARM64::TLSDESCCALL); - TLSDescCall.addOperand(Sym); - EmitToStreamer(OutStreamer, TLSDescCall); - - // Other than that it's just a normal indirect call to the function loaded - // from the descriptor. - MCInst BLR; - BLR.setOpcode(ARM64::BLR); - BLR.addOperand(Callee); - EmitToStreamer(OutStreamer, BLR); - - return; - } - - case TargetOpcode::STACKMAP: - return LowerSTACKMAP(OutStreamer, SM, *MI); - - case TargetOpcode::PATCHPOINT: - return LowerPATCHPOINT(OutStreamer, SM, *MI); - } - - // Finally, do the automated lowerings for everything else. - MCInst TmpInst; - MCInstLowering.Lower(MI, TmpInst); - EmitToStreamer(OutStreamer, TmpInst); -} - -// Force static initialization. -extern "C" void LLVMInitializeARM64AsmPrinter() { - RegisterAsmPrinter X(TheARM64Target); -} diff --git a/lib/Target/ARM64/ARM64BranchRelaxation.cpp b/lib/Target/ARM64/ARM64BranchRelaxation.cpp deleted file mode 100644 index a9bbef5..0000000 --- a/lib/Target/ARM64/ARM64BranchRelaxation.cpp +++ /dev/null @@ -1,505 +0,0 @@ -//===-- ARM64BranchRelaxation.cpp - ARM64 branch relaxation ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-branch-relax" -#include "ARM64.h" -#include "ARM64InstrInfo.h" -#include "ARM64MachineFunctionInfo.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" -using namespace llvm; - -static cl::opt -BranchRelaxation("arm64-branch-relax", cl::Hidden, cl::init(true), - cl::desc("Relax out of range conditional branches")); - -static cl::opt -TBZDisplacementBits("arm64-tbz-offset-bits", cl::Hidden, cl::init(14), - cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); - -static cl::opt -CBZDisplacementBits("arm64-cbz-offset-bits", cl::Hidden, cl::init(19), - cl::desc("Restrict range of CB[N]Z instructions (DEBUG)")); - -static cl::opt -BCCDisplacementBits("arm64-bcc-offset-bits", cl::Hidden, cl::init(19), - cl::desc("Restrict range of Bcc instructions (DEBUG)")); - -STATISTIC(NumSplit, "Number of basic blocks split"); -STATISTIC(NumRelaxed, "Number of conditional branches relaxed"); - -namespace { -class ARM64BranchRelaxation : public MachineFunctionPass { - /// BasicBlockInfo - Information about the offset and size of a single - /// basic block. - struct BasicBlockInfo { - /// Offset - Distance from the beginning of the function to the beginning - /// of this basic block. - /// - /// The offset is always aligned as required by the basic block. - unsigned Offset; - - /// Size - Size of the basic block in bytes. If the block contains - /// inline assembly, this is a worst case estimate. - /// - /// The size does not include any alignment padding whether from the - /// beginning of the block, or from an aligned jump table at the end. - unsigned Size; - - BasicBlockInfo() : Offset(0), Size(0) {} - - /// Compute the offset immediately following this block. If LogAlign is - /// specified, return the offset the successor block will get if it has - /// this alignment. - unsigned postOffset(unsigned LogAlign = 0) const { - unsigned PO = Offset + Size; - unsigned Align = 1 << LogAlign; - return (PO + Align - 1) / Align * Align; - } - }; - - SmallVector BlockInfo; - - MachineFunction *MF; - const ARM64InstrInfo *TII; - - bool relaxBranchInstructions(); - void scanFunction(); - MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI); - void adjustBlockOffsets(MachineBasicBlock *BB); - bool isBlockInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp); - bool fixupConditionalBranch(MachineInstr *MI); - void computeBlockSize(MachineBasicBlock *MBB); - unsigned getInstrOffset(MachineInstr *MI) const; - void dumpBBs(); - void verify(); - -public: - static char ID; - ARM64BranchRelaxation() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual const char *getPassName() const { - return "ARM64 branch relaxation pass"; - } -}; -char ARM64BranchRelaxation::ID = 0; -} - -/// verify - check BBOffsets, BBSizes, alignment of islands -void ARM64BranchRelaxation::verify() { -#ifndef NDEBUG - unsigned PrevNum = MF->begin()->getNumber(); - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E; - ++MBBI) { - MachineBasicBlock *MBB = MBBI; - unsigned Align = MBB->getAlignment(); - unsigned Num = MBB->getNumber(); - assert(BlockInfo[Num].Offset % (1u << Align) == 0); - assert(!Num || BlockInfo[PrevNum].postOffset() <= BlockInfo[Num].Offset); - PrevNum = Num; - } -#endif -} - -/// print block size and offset information - debugging -void ARM64BranchRelaxation::dumpBBs() { - for (auto &MBB: *MF) { - const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()]; - dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset) - << format("size=%#x\n", BBI.Size); - } -} - -/// BBHasFallthrough - Return true if the specified basic block can fallthrough -/// into the block immediately after it. -static bool BBHasFallthrough(MachineBasicBlock *MBB) { - // Get the next machine basic block in the function. - MachineFunction::iterator MBBI = MBB; - // Can't fall off end of function. - if (std::next(MBBI) == MBB->getParent()->end()) - return false; - - MachineBasicBlock *NextBB = std::next(MBBI); - for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); - I != E; ++I) - if (*I == NextBB) - return true; - - return false; -} - -/// scanFunction - Do the initial scan of the function, building up -/// information about each block. -void ARM64BranchRelaxation::scanFunction() { - BlockInfo.clear(); - BlockInfo.resize(MF->getNumBlockIDs()); - - // First thing, compute the size of all basic blocks, and see if the function - // has any inline assembly in it. If so, we have to be conservative about - // alignment assumptions, as we don't know for sure the size of any - // instructions in the inline assembly. - for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) - computeBlockSize(I); - - // Compute block offsets and known bits. - adjustBlockOffsets(MF->begin()); -} - -/// computeBlockSize - Compute the size for MBB. -/// This function updates BlockInfo directly. -void ARM64BranchRelaxation::computeBlockSize(MachineBasicBlock *MBB) { - unsigned Size = 0; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) - Size += TII->GetInstSizeInBytes(I); - BlockInfo[MBB->getNumber()].Size = Size; -} - -/// getInstrOffset - Return the current offset of the specified machine -/// instruction from the start of the function. This offset changes as stuff is -/// moved around inside the function. -unsigned ARM64BranchRelaxation::getInstrOffset(MachineInstr *MI) const { - MachineBasicBlock *MBB = MI->getParent(); - - // The offset is composed of two things: the sum of the sizes of all MBB's - // before this instruction's block, and the offset from the start of the block - // it is in. - unsigned Offset = BlockInfo[MBB->getNumber()].Offset; - - // Sum instructions before MI in MBB. - for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) { - assert(I != MBB->end() && "Didn't find MI in its own basic block?"); - Offset += TII->GetInstSizeInBytes(I); - } - return Offset; -} - -void ARM64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock *Start) { - unsigned PrevNum = Start->getNumber(); - MachineFunction::iterator MBBI = Start, E = MF->end(); - for (++MBBI; MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = MBBI; - unsigned Num = MBB->getNumber(); - if (!Num) // block zero is never changed from offset zero. - continue; - // Get the offset and known bits at the end of the layout predecessor. - // Include the alignment of the current block. - unsigned LogAlign = MBBI->getAlignment(); - BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(LogAlign); - PrevNum = Num; - } -} - -/// Split the basic block containing MI into two blocks, which are joined by -/// an unconditional branch. Update data structures and renumber blocks to -/// account for this change and returns the newly created block. -/// NOTE: Successor list of the original BB is out of date after this function, -/// and must be updated by the caller! Other transforms follow using this -/// utility function, so no point updating now rather than waiting. -MachineBasicBlock * -ARM64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) { - MachineBasicBlock *OrigBB = MI->getParent(); - - // Create a new MBB for the code after the OrigBB. - MachineBasicBlock *NewBB = - MF->CreateMachineBasicBlock(OrigBB->getBasicBlock()); - MachineFunction::iterator MBBI = OrigBB; - ++MBBI; - MF->insert(MBBI, NewBB); - - // Splice the instructions starting with MI over to NewBB. - NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end()); - - // Add an unconditional branch from OrigBB to NewBB. - // Note the new unconditional branch is not being recorded. - // There doesn't seem to be meaningful DebugInfo available; this doesn't - // correspond to anything in the source. - BuildMI(OrigBB, DebugLoc(), TII->get(ARM64::B)).addMBB(NewBB); - - // Insert an entry into BlockInfo to align it properly with the block numbers. - BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); - - // Figure out how large the OrigBB is. As the first half of the original - // block, it cannot contain a tablejump. The size includes - // the new jump we added. (It should be possible to do this without - // recounting everything, but it's very confusing, and this is rarely - // executed.) - computeBlockSize(OrigBB); - - // Figure out how large the NewMBB is. As the second half of the original - // block, it may contain a tablejump. - computeBlockSize(NewBB); - - // All BBOffsets following these blocks must be modified. - adjustBlockOffsets(OrigBB); - - ++NumSplit; - - return NewBB; -} - -/// isBlockInRange - Returns true if the distance between specific MI and -/// specific BB can fit in MI's displacement field. -bool ARM64BranchRelaxation::isBlockInRange(MachineInstr *MI, - MachineBasicBlock *DestBB, - unsigned Bits) { - unsigned MaxOffs = ((1 << (Bits - 1)) - 1) << 2; - unsigned BrOffset = getInstrOffset(MI); - unsigned DestOffset = BlockInfo[DestBB->getNumber()].Offset; - - DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() - << " from BB#" << MI->getParent()->getNumber() - << " max delta=" << MaxOffs << " from " << getInstrOffset(MI) - << " to " << DestOffset << " offset " - << int(DestOffset - BrOffset) << "\t" << *MI); - - // Branch before the Dest. - if (BrOffset <= DestOffset) - return (DestOffset - BrOffset <= MaxOffs); - return (BrOffset - DestOffset <= MaxOffs); -} - -static bool isConditionalBranch(unsigned Opc) { - switch (Opc) { - default: - return false; - case ARM64::TBZ: - case ARM64::TBNZ: - case ARM64::CBZW: - case ARM64::CBNZW: - case ARM64::CBZX: - case ARM64::CBNZX: - case ARM64::Bcc: - return true; - } -} - -static MachineBasicBlock *getDestBlock(MachineInstr *MI) { - switch (MI->getOpcode()) { - default: - assert(0 && "unexpected opcode!"); - case ARM64::TBZ: - case ARM64::TBNZ: - return MI->getOperand(2).getMBB(); - case ARM64::CBZW: - case ARM64::CBNZW: - case ARM64::CBZX: - case ARM64::CBNZX: - case ARM64::Bcc: - return MI->getOperand(1).getMBB(); - } -} - -static unsigned getOppositeConditionOpcode(unsigned Opc) { - switch (Opc) { - default: - assert(0 && "unexpected opcode!"); - case ARM64::TBNZ: return ARM64::TBZ; - case ARM64::TBZ: return ARM64::TBNZ; - case ARM64::CBNZW: return ARM64::CBZW; - case ARM64::CBNZX: return ARM64::CBZX; - case ARM64::CBZW: return ARM64::CBNZW; - case ARM64::CBZX: return ARM64::CBNZX; - case ARM64::Bcc: return ARM64::Bcc; // Condition is an operand for Bcc. - } -} - -static unsigned getBranchDisplacementBits(unsigned Opc) { - switch (Opc) { - default: - assert(0 && "unexpected opcode!"); - case ARM64::TBNZ: - case ARM64::TBZ: - return TBZDisplacementBits; - case ARM64::CBNZW: - case ARM64::CBZW: - case ARM64::CBNZX: - case ARM64::CBZX: - return CBZDisplacementBits; - case ARM64::Bcc: - return BCCDisplacementBits; - } -} - -static inline void invertBccCondition(MachineInstr *MI) { - assert(MI->getOpcode() == ARM64::Bcc && "Unexpected opcode!"); - ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(0).getImm(); - CC = ARM64CC::getInvertedCondCode(CC); - MI->getOperand(0).setImm((int64_t)CC); -} - -/// fixupConditionalBranch - Fix up a conditional branch whose destination is -/// too far away to fit in its displacement field. It is converted to an inverse -/// conditional branch + an unconditional branch to the destination. -bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) { - MachineBasicBlock *DestBB = getDestBlock(MI); - - // Add an unconditional branch to the destination and invert the branch - // condition to jump over it: - // tbz L1 - // => - // tbnz L2 - // b L1 - // L2: - - // If the branch is at the end of its MBB and that has a fall-through block, - // direct the updated conditional branch to the fall-through block. Otherwise, - // split the MBB before the next instruction. - MachineBasicBlock *MBB = MI->getParent(); - MachineInstr *BMI = &MBB->back(); - bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); - - if (BMI != MI) { - if (std::next(MachineBasicBlock::iterator(MI)) == - std::prev(MBB->getLastNonDebugInstr()) && - BMI->getOpcode() == ARM64::B) { - // Last MI in the BB is an unconditional branch. Can we simply invert the - // condition and swap destinations: - // beq L1 - // b L2 - // => - // bne L2 - // b L1 - MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB(); - if (isBlockInRange(MI, NewDest, - getBranchDisplacementBits(MI->getOpcode()))) { - DEBUG(dbgs() << " Invert condition and swap its destination with " - << *BMI); - BMI->getOperand(0).setMBB(DestBB); - unsigned OpNum = - (MI->getOpcode() == ARM64::TBZ || MI->getOpcode() == ARM64::TBNZ) - ? 2 - : 1; - MI->getOperand(OpNum).setMBB(NewDest); - MI->setDesc(TII->get(getOppositeConditionOpcode(MI->getOpcode()))); - if (MI->getOpcode() == ARM64::Bcc) - invertBccCondition(MI); - return true; - } - } - } - - if (NeedSplit) { - // Analyze the branch so we know how to update the successor lists. - MachineBasicBlock *TBB, *FBB; - SmallVector Cond; - TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, false); - - MachineBasicBlock *NewBB = splitBlockBeforeInstr(MI); - // No need for the branch to the next block. We're adding an unconditional - // branch to the destination. - int delta = TII->GetInstSizeInBytes(&MBB->back()); - BlockInfo[MBB->getNumber()].Size -= delta; - MBB->back().eraseFromParent(); - // BlockInfo[SplitBB].Offset is wrong temporarily, fixed below - - // Update the successor lists according to the transformation to follow. - // Do it here since if there's no split, no update is needed. - MBB->replaceSuccessor(FBB, NewBB); - NewBB->addSuccessor(FBB); - } - MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB)); - - DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() - << ", invert condition and change dest. to BB#" - << NextBB->getNumber() << "\n"); - - // Insert a new conditional branch and a new unconditional branch. - MachineInstrBuilder MIB = BuildMI( - MBB, DebugLoc(), TII->get(getOppositeConditionOpcode(MI->getOpcode()))) - .addOperand(MI->getOperand(0)); - if (MI->getOpcode() == ARM64::TBZ || MI->getOpcode() == ARM64::TBNZ) - MIB.addOperand(MI->getOperand(1)); - if (MI->getOpcode() == ARM64::Bcc) - invertBccCondition(MIB); - MIB.addMBB(NextBB); - BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); - BuildMI(MBB, DebugLoc(), TII->get(ARM64::B)).addMBB(DestBB); - BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back()); - - // Remove the old conditional branch. It may or may not still be in MBB. - BlockInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI); - MI->eraseFromParent(); - - // Finally, keep the block offsets up to date. - adjustBlockOffsets(MBB); - return true; -} - -bool ARM64BranchRelaxation::relaxBranchInstructions() { - bool Changed = false; - // Relaxing branches involves creating new basic blocks, so re-eval - // end() for termination. - for (auto &MBB : *MF) { - MachineInstr *MI = MBB.getFirstTerminator(); - if (isConditionalBranch(MI->getOpcode()) && - !isBlockInRange(MI, getDestBlock(MI), - getBranchDisplacementBits(MI->getOpcode()))) { - fixupConditionalBranch(MI); - ++NumRelaxed; - Changed = true; - } - } - return Changed; -} - -bool ARM64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) { - MF = &mf; - - // If the pass is disabled, just bail early. - if (!BranchRelaxation) - return false; - - DEBUG(dbgs() << "***** ARM64BranchRelaxation *****\n"); - - TII = (const ARM64InstrInfo *)MF->getTarget().getInstrInfo(); - - // Renumber all of the machine basic blocks in the function, guaranteeing that - // the numbers agree with the position of the block in the function. - MF->RenumberBlocks(); - - // Do the initial scan of the function, building up information about the - // sizes of each block. - scanFunction(); - - DEBUG(dbgs() << " Basic blocks before relaxation\n"); - DEBUG(dumpBBs()); - - bool MadeChange = false; - while (relaxBranchInstructions()) - MadeChange = true; - - // After a while, this might be made debug-only, but it is not expensive. - verify(); - - DEBUG(dbgs() << " Basic blocks after relaxation\n"); - DEBUG(dbgs() << '\n'; dumpBBs()); - - BlockInfo.clear(); - - return MadeChange; -} - -/// createARM64BranchRelaxation - returns an instance of the constpool -/// island pass. -FunctionPass *llvm::createARM64BranchRelaxation() { - return new ARM64BranchRelaxation(); -} diff --git a/lib/Target/ARM64/ARM64CallingConv.h b/lib/Target/ARM64/ARM64CallingConv.h deleted file mode 100644 index 0128236..0000000 --- a/lib/Target/ARM64/ARM64CallingConv.h +++ /dev/null @@ -1,94 +0,0 @@ -//=== ARM64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the custom routines for the ARM64 Calling Convention that -// aren't done by tablegen. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64CALLINGCONV_H -#define ARM64CALLINGCONV_H - -#include "ARM64InstrInfo.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/Target/TargetInstrInfo.h" - -namespace llvm { - -/// CC_ARM64_Custom_i1i8i16_Reg - customized handling of passing i1/i8/i16 via -/// register. Here, ValVT can be i1/i8/i16 or i32 depending on whether the -/// argument is already promoted and LocVT is i1/i8/i16. We only promote the -/// argument to i32 if we are sure this argument will be passed in register. -static bool CC_ARM64_Custom_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, - CCState &State, - bool IsWebKitJS = false) { - static const uint16_t RegList1[] = { ARM64::W0, ARM64::W1, ARM64::W2, - ARM64::W3, ARM64::W4, ARM64::W5, - ARM64::W6, ARM64::W7 }; - static const uint16_t RegList2[] = { ARM64::X0, ARM64::X1, ARM64::X2, - ARM64::X3, ARM64::X4, ARM64::X5, - ARM64::X6, ARM64::X7 }; - static const uint16_t WebKitRegList1[] = { ARM64::W0 }; - static const uint16_t WebKitRegList2[] = { ARM64::X0 }; - - const uint16_t *List1 = IsWebKitJS ? WebKitRegList1 : RegList1; - const uint16_t *List2 = IsWebKitJS ? WebKitRegList2 : RegList2; - - if (unsigned Reg = State.AllocateReg(List1, List2, 8)) { - // Customized extra section for handling i1/i8/i16: - // We need to promote the argument to i32 if it is not done already. - if (ValVT != MVT::i32) { - if (ArgFlags.isSExt()) - LocInfo = CCValAssign::SExt; - else if (ArgFlags.isZExt()) - LocInfo = CCValAssign::ZExt; - else - LocInfo = CCValAssign::AExt; - ValVT = MVT::i32; - } - // Set LocVT to i32 as well if passing via register. - LocVT = MVT::i32; - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - return true; - } - return false; -} - -/// CC_ARM64_WebKit_JS_i1i8i16_Reg - customized handling of passing i1/i8/i16 -/// via register. This behaves the same as CC_ARM64_Custom_i1i8i16_Reg, but only -/// uses the first register. -static bool CC_ARM64_WebKit_JS_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, - CCState &State) { - return CC_ARM64_Custom_i1i8i16_Reg(ValNo, ValVT, LocVT, LocInfo, ArgFlags, - State, true); -} - -/// CC_ARM64_Custom_i1i8i16_Stack: customized handling of passing i1/i8/i16 on -/// stack. Here, ValVT can be i1/i8/i16 or i32 depending on whether the argument -/// is already promoted and LocVT is i1/i8/i16. If ValVT is already promoted, -/// it will be truncated back to i1/i8/i16. -static bool CC_ARM64_Custom_i1i8i16_Stack(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, - CCState &State) { - unsigned Space = ((LocVT == MVT::i1 || LocVT == MVT::i8) ? 1 : 2); - unsigned Offset12 = State.AllocateStack(Space, Space); - ValVT = LocVT; - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset12, LocVT, LocInfo)); - return true; -} - -} // End llvm namespace - -#endif diff --git a/lib/Target/ARM64/ARM64CallingConvention.td b/lib/Target/ARM64/ARM64CallingConvention.td deleted file mode 100644 index 9ac888f..0000000 --- a/lib/Target/ARM64/ARM64CallingConvention.td +++ /dev/null @@ -1,210 +0,0 @@ -//===- ARM64CallingConv.td - Calling Conventions for ARM64 -*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This describes the calling conventions for ARM64 architecture. -// -//===----------------------------------------------------------------------===// - -/// CCIfAlign - Match of the original alignment of the arg -class CCIfAlign : - CCIf; - -//===----------------------------------------------------------------------===// -// ARM AAPCS64 Calling Convention -//===----------------------------------------------------------------------===// - -def CC_ARM64_AAPCS : CallingConv<[ - CCIfType<[v2f32], CCBitConvertToType>, - CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, - - // An SRet is passed in X8, not X0 like a normal pointer parameter. - CCIfSRet>>, - - // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, - // up to eight each of GPR and FPR. - CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Reg">>, - CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], - [X0, X1, X2, X3, X4, X5, X6, X7]>>, - // i128 is split to two i64s, we can't fit half to register X7. - CCIfType<[i64], CCIfSplit>>, - - // i128 is split to two i64s, and its stack alignment is 16 bytes. - CCIfType<[i64], CCIfSplit>>, - - CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], - [W0, W1, W2, W3, W4, W5, W6, W7]>>, - CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], - CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], - CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - - // If more than will fit in registers, pass them on the stack instead. - CCIfType<[i1, i8, i16], CCAssignToStack<8, 8>>, - CCIfType<[i32, f32], CCAssignToStack<8, 8>>, - CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8], - CCAssignToStack<8, 8>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>> -]>; - -def RetCC_ARM64_AAPCS : CallingConv<[ - CCIfType<[v2f32], CCBitConvertToType>, - CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, - - CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], - [X0, X1, X2, X3, X4, X5, X6, X7]>>, - CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], - [W0, W1, W2, W3, W4, W5, W6, W7]>>, - CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], - CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], - CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> -]>; - - -// Darwin uses a calling convention which differs in only two ways -// from the standard one at this level: -// + i128s (i.e. split i64s) don't need even registers. -// + Stack slots are sized as needed rather than being at least 64-bit. -def CC_ARM64_DarwinPCS : CallingConv<[ - CCIfType<[v2f32], CCBitConvertToType>, - CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, - - // An SRet is passed in X8, not X0 like a normal pointer parameter. - CCIfSRet>>, - - // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, - // up to eight each of GPR and FPR. - CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Reg">>, - CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], - [X0, X1, X2, X3, X4, X5, X6, X7]>>, - // i128 is split to two i64s, we can't fit half to register X7. - CCIfType<[i64], - CCIfSplit>>, - // i128 is split to two i64s, and its stack alignment is 16 bytes. - CCIfType<[i64], CCIfSplit>>, - - CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], - [W0, W1, W2, W3, W4, W5, W6, W7]>>, - CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], - CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], - CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - - // If more than will fit in registers, pass them on the stack instead. - CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Stack">>, - CCIfType<[i32, f32], CCAssignToStack<4, 4>>, - CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8], - CCAssignToStack<8, 8>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>> -]>; - -def CC_ARM64_DarwinPCS_VarArg : CallingConv<[ - CCIfType<[v2f32], CCBitConvertToType>, - CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, - - // Handle all scalar types as either i64 or f64. - CCIfType<[i8, i16, i32], CCPromoteToType>, - CCIfType<[f32], CCPromoteToType>, - - // Everything is on the stack. - // i128 is split to two i64s, and its stack alignment is 16 bytes. - CCIfType<[i64], CCIfSplit>>, - CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32], CCAssignToStack<8, 8>>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>> -]>; - -// The WebKit_JS calling convention only passes the first argument (the callee) -// in register and the remaining arguments on stack. We allow 32bit stack slots, -// so that WebKit can write partial values in the stack and define the other -// 32bit quantity as undef. -def CC_ARM64_WebKit_JS : CallingConv<[ - // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0). - CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_WebKit_JS_i1i8i16_Reg">>, - CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>, - CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>, - - // Pass the remaining arguments on the stack instead. - CCIfType<[i1, i8, i16], CCAssignToStack<4, 4>>, - CCIfType<[i32, f32], CCAssignToStack<4, 4>>, - CCIfType<[i64, f64], CCAssignToStack<8, 8>> -]>; - -def RetCC_ARM64_WebKit_JS : CallingConv<[ - CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], - [X0, X1, X2, X3, X4, X5, X6, X7]>>, - CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], - [W0, W1, W2, W3, W4, W5, W6, W7]>>, - CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, - CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], - [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> -]>; - -// FIXME: LR is only callee-saved in the sense that *we* preserve it and are -// presumably a callee to someone. External functions may not do so, but this -// is currently safe since BL has LR as an implicit-def and what happens after a -// tail call doesn't matter. -// -// It would be better to model its preservation semantics properly (create a -// vreg on entry, use it in RET & tail call generation; make that vreg def if we -// end up saving LR as part of a call frame). Watch this space... -def CSR_ARM64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, - X23, X24, X25, X26, X27, X28, - D8, D9, D10, D11, - D12, D13, D14, D15)>; - -// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since -// 'this' and the pointer return value are both passed in X0 in these cases, -// this can be partially modelled by treating X0 as a callee-saved register; -// only the resulting RegMask is used; the SaveList is ignored -// -// (For generic ARM 64-bit ABI code, clang will not generate constructors or -// destructors with 'this' returns, so this RegMask will not be used in that -// case) -def CSR_ARM64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_ARM64_AAPCS, X0)>; - -// The function used by Darwin to obtain the address of a thread-local variable -// guarantees more than a normal AAPCS function. x16 and x17 are used on the -// fast path for calculation, but other registers except X0 (argument/return) -// and LR (it is a call, after all) are preserved. -def CSR_ARM64_TLS_Darwin - : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17), - FP, - (sequence "Q%u", 0, 31))>; - -// The ELF stub used for TLS-descriptor access saves every feasible -// register. Only X0 and LR are clobbered. -def CSR_ARM64_TLS_ELF - : CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP, - (sequence "Q%u", 0, 31))>; - -def CSR_ARM64_AllRegs - : CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP, - (sequence "X%u", 0, 28), FP, LR, SP, - (sequence "B%u", 0, 31), (sequence "H%u", 0, 31), - (sequence "S%u", 0, 31), (sequence "D%u", 0, 31), - (sequence "Q%u", 0, 31))>; - diff --git a/lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp b/lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp deleted file mode 100644 index e3f8248..0000000 --- a/lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp +++ /dev/null @@ -1,147 +0,0 @@ -//===-- ARM64CleanupLocalDynamicTLSPass.cpp -----------------------*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Local-dynamic access to thread-local variables proceeds in three stages. -// -// 1. The offset of this Module's thread-local area from TPIDR_EL0 is calculated -// in much the same way as a general-dynamic TLS-descriptor access against -// the special symbol _TLS_MODULE_BASE. -// 2. The variable's offset from _TLS_MODULE_BASE_ is calculated using -// instructions with "dtprel" modifiers. -// 3. These two are added, together with TPIDR_EL0, to obtain the variable's -// true address. -// -// This is only better than general-dynamic access to the variable if two or -// more of the first stage TLS-descriptor calculations can be combined. This -// pass looks through a function and performs such combinations. -// -//===----------------------------------------------------------------------===// -#include "ARM64.h" -#include "ARM64InstrInfo.h" -#include "ARM64MachineFunctionInfo.h" -#include "ARM64TargetMachine.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -using namespace llvm; - -namespace { -struct LDTLSCleanup : public MachineFunctionPass { - static char ID; - LDTLSCleanup() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF) { - ARM64FunctionInfo *AFI = MF.getInfo(); - if (AFI->getNumLocalDynamicTLSAccesses() < 2) { - // No point folding accesses if there isn't at least two. - return false; - } - - MachineDominatorTree *DT = &getAnalysis(); - return VisitNode(DT->getRootNode(), 0); - } - - // Visit the dominator subtree rooted at Node in pre-order. - // If TLSBaseAddrReg is non-null, then use that to replace any - // TLS_base_addr instructions. Otherwise, create the register - // when the first such instruction is seen, and then use it - // as we encounter more instructions. - bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { - MachineBasicBlock *BB = Node->getBlock(); - bool Changed = false; - - // Traverse the current block. - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; - ++I) { - switch (I->getOpcode()) { - case ARM64::TLSDESC_BLR: - // Make sure it's a local dynamic access. - if (!I->getOperand(1).isSymbol() || - strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) - break; - - if (TLSBaseAddrReg) - I = replaceTLSBaseAddrCall(I, TLSBaseAddrReg); - else - I = setRegister(I, &TLSBaseAddrReg); - Changed = true; - break; - default: - break; - } - } - - // Visit the children of this block in the dominator tree. - for (MachineDomTreeNode *N : *Node) { - Changed |= VisitNode(N, TLSBaseAddrReg); - } - - return Changed; - } - - // Replace the TLS_base_addr instruction I with a copy from - // TLSBaseAddrReg, returning the new instruction. - MachineInstr *replaceTLSBaseAddrCall(MachineInstr *I, - unsigned TLSBaseAddrReg) { - MachineFunction *MF = I->getParent()->getParent(); - const ARM64TargetMachine *TM = - static_cast(&MF->getTarget()); - const ARM64InstrInfo *TII = TM->getInstrInfo(); - - // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the - // code sequence assumes the address will be. - MachineInstr *Copy = - BuildMI(*I->getParent(), I, I->getDebugLoc(), - TII->get(TargetOpcode::COPY), ARM64::X0).addReg(TLSBaseAddrReg); - - // Erase the TLS_base_addr instruction. - I->eraseFromParent(); - - return Copy; - } - - // Create a virtal register in *TLSBaseAddrReg, and populate it by - // inserting a copy instruction after I. Returns the new instruction. - MachineInstr *setRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) { - MachineFunction *MF = I->getParent()->getParent(); - const ARM64TargetMachine *TM = - static_cast(&MF->getTarget()); - const ARM64InstrInfo *TII = TM->getInstrInfo(); - - // Create a virtual register for the TLS base address. - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - *TLSBaseAddrReg = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass); - - // Insert a copy from X0 to TLSBaseAddrReg for later. - MachineInstr *Next = I->getNextNode(); - MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), - TII->get(TargetOpcode::COPY), - *TLSBaseAddrReg).addReg(ARM64::X0); - - return Copy; - } - - virtual const char *getPassName() const { - return "Local Dynamic TLS Access Clean-up"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired(); - MachineFunctionPass::getAnalysisUsage(AU); - } -}; -} - -char LDTLSCleanup::ID = 0; -FunctionPass *llvm::createARM64CleanupLocalDynamicTLSPass() { - return new LDTLSCleanup(); -} diff --git a/lib/Target/ARM64/ARM64CollectLOH.cpp b/lib/Target/ARM64/ARM64CollectLOH.cpp deleted file mode 100644 index f52778f..0000000 --- a/lib/Target/ARM64/ARM64CollectLOH.cpp +++ /dev/null @@ -1,1157 +0,0 @@ -//===-------------- ARM64CollectLOH.cpp - ARM64 collect LOH pass --*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass that collect the Linker Optimization Hint (LOH). -// This pass should be run at the very end of the compilation flow, just before -// assembly printer. -// To be useful for the linker, the LOH must be printed into the assembly file. -// -// A LOH describes a sequence of instructions that may be optimized by the -// linker. -// This same sequence cannot be optimized by the compiler because some of -// the information will be known at link time. -// For instance, consider the following sequence: -// L1: adrp xA, sym@PAGE -// L2: add xB, xA, sym@PAGEOFF -// L3: ldr xC, [xB, #imm] -// This sequence can be turned into: -// A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB: -// L3: ldr xC, sym+#imm -// It may also be turned into either the following more efficient -// code sequences: -// - If sym@PAGEOFF + #imm fits the encoding space of L3. -// L1: adrp xA, sym@PAGE -// L3: ldr xC, [xB, sym@PAGEOFF + #imm] -// - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB: -// L1: adr xA, sym -// L3: ldr xC, [xB, #imm] -// -// To be valid a LOH must meet all the requirements needed by all the related -// possible linker transformations. -// For instance, using the running example, the constraints to emit -// ".loh AdrpAddLdr" are: -// - L1, L2, and L3 instructions are of the expected type, i.e., -// respectively ADRP, ADD (immediate), and LD. -// - The result of L1 is used only by L2. -// - The register argument (xA) used in the ADD instruction is defined -// only by L1. -// - The result of L2 is used only by L3. -// - The base address (xB) in L3 is defined only L2. -// - The ADRP in L1 and the ADD in L2 must reference the same symbol using -// @PAGE/@PAGEOFF with no additional constants -// -// Currently supported LOHs are: -// * So called non-ADRP-related: -// - .loh AdrpAddLdr L1, L2, L3: -// L1: adrp xA, sym@PAGE -// L2: add xB, xA, sym@PAGEOFF -// L3: ldr xC, [xB, #imm] -// - .loh AdrpLdrGotLdr L1, L2, L3: -// L1: adrp xA, sym@GOTPAGE -// L2: ldr xB, [xA, sym@GOTPAGEOFF] -// L3: ldr xC, [xB, #imm] -// - .loh AdrpLdr L1, L3: -// L1: adrp xA, sym@PAGE -// L3: ldr xC, [xA, sym@PAGEOFF] -// - .loh AdrpAddStr L1, L2, L3: -// L1: adrp xA, sym@PAGE -// L2: add xB, xA, sym@PAGEOFF -// L3: str xC, [xB, #imm] -// - .loh AdrpLdrGotStr L1, L2, L3: -// L1: adrp xA, sym@GOTPAGE -// L2: ldr xB, [xA, sym@GOTPAGEOFF] -// L3: str xC, [xB, #imm] -// - .loh AdrpAdd L1, L2: -// L1: adrp xA, sym@PAGE -// L2: add xB, xA, sym@PAGEOFF -// For all these LOHs, L1, L2, L3 form a simple chain: -// L1 result is used only by L2 and L2 result by L3. -// L3 LOH-related argument is defined only by L2 and L2 LOH-related argument -// by L1. -// All these LOHs aim at using more efficient load/store patterns by folding -// some instructions used to compute the address directly into the load/store. -// -// * So called ADRP-related: -// - .loh AdrpAdrp L2, L1: -// L2: ADRP xA, sym1@PAGE -// L1: ADRP xA, sym2@PAGE -// L2 dominates L1 and xA is not redifined between L2 and L1 -// This LOH aims at getting rid of redundant ADRP instructions. -// -// The overall design for emitting the LOHs is: -// 1. ARM64CollectLOH (this pass) records the LOHs in the ARM64FunctionInfo. -// 2. ARM64AsmPrinter reads the LOHs from ARM64FunctionInfo and it: -// 1. Associates them a label. -// 2. Emits them in a MCStreamer (EmitLOHDirective). -// - The MCMachOStreamer records them into the MCAssembler. -// - The MCAsmStreamer prints them. -// - Other MCStreamers ignore them. -// 3. Closes the MCStreamer: -// - The MachObjectWriter gets them from the MCAssembler and writes -// them in the object file. -// - Other ObjectWriters ignore them. -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-collect-loh" -#include "ARM64.h" -#include "ARM64InstrInfo.h" -#include "ARM64MachineFunctionInfo.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -static cl::opt -PreCollectRegister("arm64-collect-loh-pre-collect-register", cl::Hidden, - cl::desc("Restrict analysis to registers invovled" - " in LOHs"), - cl::init(true)); - -static cl::opt -BasicBlockScopeOnly("arm64-collect-loh-bb-only", cl::Hidden, - cl::desc("Restrict analysis at basic block scope"), - cl::init(true)); - -STATISTIC(NumADRPSimpleCandidate, - "Number of simplifiable ADRP dominate by another"); -STATISTIC(NumADRPComplexCandidate2, - "Number of simplifiable ADRP reachable by 2 defs"); -STATISTIC(NumADRPComplexCandidate3, - "Number of simplifiable ADRP reachable by 3 defs"); -STATISTIC(NumADRPComplexCandidateOther, - "Number of simplifiable ADRP reachable by 4 or more defs"); -STATISTIC(NumADDToSTRWithImm, - "Number of simplifiable STR with imm reachable by ADD"); -STATISTIC(NumLDRToSTRWithImm, - "Number of simplifiable STR with imm reachable by LDR"); -STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD"); -STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR"); -STATISTIC(NumADDToLDRWithImm, - "Number of simplifiable LDR with imm reachable by ADD"); -STATISTIC(NumLDRToLDRWithImm, - "Number of simplifiable LDR with imm reachable by LDR"); -STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD"); -STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR"); -STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP"); -STATISTIC(NumCplxLvl1, "Number of complex case of level 1"); -STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1"); -STATISTIC(NumCplxLvl2, "Number of complex case of level 2"); -STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2"); -STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD"); -STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD"); - -namespace llvm { -void initializeARM64CollectLOHPass(PassRegistry &); -} - -namespace { -struct ARM64CollectLOH : public MachineFunctionPass { - static char ID; - ARM64CollectLOH() : MachineFunctionPass(ID) { - initializeARM64CollectLOHPass(*PassRegistry::getPassRegistry()); - } - - virtual bool runOnMachineFunction(MachineFunction &Fn); - - virtual const char *getPassName() const { - return "ARM64 Collect Linker Optimization Hint (LOH)"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired(); - } - -private: -}; - -/// A set of MachineInstruction. -typedef SetVector SetOfMachineInstr; -/// Map a basic block to a set of instructions per register. -/// This is used to represent the exposed uses of a basic block -/// per register. -typedef MapVector -BlockToSetOfInstrsPerColor; -/// Map a basic block to an instruction per register. -/// This is used to represent the live-out definitions of a basic block -/// per register. -typedef MapVector -BlockToInstrPerColor; -/// Map an instruction to a set of instructions. Used to represent the -/// mapping def to reachable uses or use to definitions. -typedef MapVector InstrToInstrs; -/// Map a basic block to a BitVector. -/// This is used to record the kill registers per basic block. -typedef MapVector BlockToRegSet; - -/// Map a register to a dense id. -typedef DenseMap MapRegToId; -/// Map a dense id to a register. Used for debug purposes. -typedef SmallVector MapIdToReg; -} // end anonymous namespace. - -char ARM64CollectLOH::ID = 0; - -INITIALIZE_PASS_BEGIN(ARM64CollectLOH, "arm64-collect-loh", - "ARM64 Collect Linker Optimization Hint (LOH)", false, - false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_END(ARM64CollectLOH, "arm64-collect-loh", - "ARM64 Collect Linker Optimization Hint (LOH)", false, - false) - -/// Given a couple (MBB, reg) get the corresponding set of instruction from -/// the given "sets". -/// If this couple does not reference any set, an empty set is added to "sets" -/// for this couple and returned. -/// \param nbRegs is used internally allocate some memory. It must be consistent -/// with the way sets is used. -static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets, - const MachineBasicBlock *MBB, unsigned reg, - unsigned nbRegs) { - SetOfMachineInstr *result; - BlockToSetOfInstrsPerColor::iterator it = sets.find(MBB); - if (it != sets.end()) { - result = it->second; - } else { - result = sets[MBB] = new SetOfMachineInstr[nbRegs]; - } - - return result[reg]; -} - -/// Given a couple (reg, MI) get the corresponding set of instructions from the -/// the given "sets". -/// This is used to get the uses record in sets of a definition identified by -/// MI and reg, i.e., MI defines reg. -/// If the couple does not reference anything, an empty set is added to -/// "sets[reg]". -/// \pre set[reg] is valid. -static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg, - const MachineInstr *MI) { - return sets[reg][MI]; -} - -/// Same as getUses but does not modify the input map: sets. -/// \return NULL if the couple (reg, MI) is not in sets. -static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg, - const MachineInstr *MI) { - InstrToInstrs::const_iterator Res = sets[reg].find(MI); - if (Res != sets[reg].end()) - return &(Res->second); - return NULL; -} - -/// Initialize the reaching definition algorithm: -/// For each basic block BB in MF, record: -/// - its kill set. -/// - its reachable uses (uses that are exposed to BB's predecessors). -/// - its the generated definitions. -/// \param DummyOp if not NULL, specifies a Dummy Operation to be added to -/// the list of uses of exposed defintions. -/// \param ADRPMode specifies to only consider ADRP instructions for generated -/// definition. It also consider definitions of ADRP instructions as uses and -/// ignore other uses. The ADRPMode is used to collect the information for LHO -/// that involve ADRP operation only. -static void initReachingDef(MachineFunction *MF, - InstrToInstrs *ColorOpToReachedUses, - BlockToInstrPerColor &Gen, BlockToRegSet &Kill, - BlockToSetOfInstrsPerColor &ReachableUses, - const MapRegToId &RegToId, - const MachineInstr *DummyOp, bool ADRPMode) { - const TargetMachine &TM = MF->getTarget(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - - unsigned NbReg = RegToId.size(); - - for (MachineFunction::const_iterator IMBB = MF->begin(), IMBBEnd = MF->end(); - IMBB != IMBBEnd; ++IMBB) { - const MachineBasicBlock *MBB = &(*IMBB); - const MachineInstr **&BBGen = Gen[MBB]; - BBGen = new const MachineInstr *[NbReg]; - memset(BBGen, 0, sizeof(const MachineInstr *) * NbReg); - - BitVector &BBKillSet = Kill[MBB]; - BBKillSet.resize(NbReg); - for (MachineBasicBlock::const_iterator II = MBB->begin(), IEnd = MBB->end(); - II != IEnd; ++II) { - bool IsADRP = II->getOpcode() == ARM64::ADRP; - - // Process uses first. - if (IsADRP || !ADRPMode) - for (MachineInstr::const_mop_iterator IO = II->operands_begin(), - IOEnd = II->operands_end(); - IO != IOEnd; ++IO) { - // Treat ADRP def as use, as the goal of the analysis is to find - // ADRP defs reached by other ADRP defs. - if (!IO->isReg() || (!ADRPMode && !IO->isUse()) || - (ADRPMode && (!IsADRP || !IO->isDef()))) - continue; - unsigned CurReg = IO->getReg(); - MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); - if (ItCurRegId == RegToId.end()) - continue; - CurReg = ItCurRegId->second; - - // if CurReg has not been defined, this use is reachable. - if (!BBGen[CurReg] && !BBKillSet.test(CurReg)) - getSet(ReachableUses, MBB, CurReg, NbReg).insert(&(*II)); - // current basic block definition for this color, if any, is in Gen. - if (BBGen[CurReg]) - getUses(ColorOpToReachedUses, CurReg, BBGen[CurReg]).insert(&(*II)); - } - - // Process clobbers. - for (MachineInstr::const_mop_iterator IO = II->operands_begin(), - IOEnd = II->operands_end(); - IO != IOEnd; ++IO) { - if (!IO->isRegMask()) - continue; - // Clobbers kill the related colors. - const uint32_t *PreservedRegs = IO->getRegMask(); - - // Set generated regs. - for (const auto Entry : RegToId) { - unsigned Reg = Entry.second; - // Use the global register ID when querying APIs external to this - // pass. - if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) { - // Do not register clobbered definition for no ADRP. - // This definition is not used anyway (otherwise register - // allocation is wrong). - BBGen[Reg] = ADRPMode ? II : NULL; - BBKillSet.set(Reg); - } - } - } - - // Process defs - for (MachineInstr::const_mop_iterator IO = II->operands_begin(), - IOEnd = II->operands_end(); - IO != IOEnd; ++IO) { - if (!IO->isReg() || !IO->isDef()) - continue; - unsigned CurReg = IO->getReg(); - MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg); - if (ItCurRegId == RegToId.end()) - continue; - - for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) { - MapRegToId::const_iterator ItRegId = RegToId.find(*AI); - assert(ItRegId != RegToId.end() && - "Sub-register of an " - "involved register, not recorded as involved!"); - BBKillSet.set(ItRegId->second); - BBGen[ItRegId->second] = &(*II); - } - BBGen[ItCurRegId->second] = &(*II); - } - } - - // If we restrict our analysis to basic block scope, conservatively add a - // dummy - // use for each generated value. - if (!ADRPMode && DummyOp && !MBB->succ_empty()) - for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) - if (BBGen[CurReg]) - getUses(ColorOpToReachedUses, CurReg, BBGen[CurReg]).insert(DummyOp); - } -} - -/// Reaching def core algorithm: -/// while an Out has changed -/// for each bb -/// for each color -/// In[bb][color] = U Out[bb.predecessors][color] -/// insert reachableUses[bb][color] in each in[bb][color] -/// op.reachedUses -/// -/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) -static void reachingDefAlgorithm(MachineFunction *MF, - InstrToInstrs *ColorOpToReachedUses, - BlockToSetOfInstrsPerColor &In, - BlockToSetOfInstrsPerColor &Out, - BlockToInstrPerColor &Gen, BlockToRegSet &Kill, - BlockToSetOfInstrsPerColor &ReachableUses, - unsigned NbReg) { - bool HasChanged; - do { - HasChanged = false; - for (MachineFunction::const_iterator IMBB = MF->begin(), - IMBBEnd = MF->end(); - IMBB != IMBBEnd; ++IMBB) { - const MachineBasicBlock *MBB = &(*IMBB); - unsigned CurReg; - for (CurReg = 0; CurReg < NbReg; ++CurReg) { - SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg); - SetOfMachineInstr &BBReachableUses = - getSet(ReachableUses, MBB, CurReg, NbReg); - SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg); - unsigned Size = BBOutSet.size(); - // In[bb][color] = U Out[bb.predecessors][color] - for (MachineBasicBlock::const_pred_iterator - PredMBB = MBB->pred_begin(), - EndPredMBB = MBB->pred_end(); - PredMBB != EndPredMBB; ++PredMBB) { - SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg); - BBInSet.insert(PredOutSet.begin(), PredOutSet.end()); - } - // insert reachableUses[bb][color] in each in[bb][color] op.reachedses - for (const MachineInstr *MI: BBInSet) { - SetOfMachineInstr &OpReachedUses = - getUses(ColorOpToReachedUses, CurReg, MI); - OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end()); - } - // Out[bb] = Gen[bb] U (In[bb] - Kill[bb]) - if (!Kill[MBB].test(CurReg)) - BBOutSet.insert(BBInSet.begin(), BBInSet.end()); - if (Gen[MBB][CurReg]) - BBOutSet.insert(Gen[MBB][CurReg]); - HasChanged |= BBOutSet.size() != Size; - } - } - } while (HasChanged); -} - -/// Release all memory dynamically allocated during the reaching -/// definition algorithm. -static void finitReachingDef(BlockToSetOfInstrsPerColor &In, - BlockToSetOfInstrsPerColor &Out, - BlockToInstrPerColor &Gen, - BlockToSetOfInstrsPerColor &ReachableUses) { - for (BlockToSetOfInstrsPerColor::const_iterator IT = Out.begin(), - End = Out.end(); - IT != End; ++IT) - delete[] IT->second; - for (BlockToSetOfInstrsPerColor::const_iterator IT = In.begin(), - End = In.end(); - IT != End; ++IT) - delete[] IT->second; - for (BlockToSetOfInstrsPerColor::const_iterator IT = ReachableUses.begin(), - End = ReachableUses.end(); - IT != End; ++IT) - delete[] IT->second; - for (BlockToInstrPerColor::const_iterator IT = Gen.begin(), End = Gen.end(); - IT != End; ++IT) - delete[] IT->second; -} - -/// Reaching definiton algorithm. -/// \param MF function on which the algorithm will operate. -/// \param[out] ColorOpToReachedUses will contain the result of the reaching -/// def algorithm. -/// \param ADRPMode specify whether the reaching def algorithm should be tuned -/// for ADRP optimization. \see initReachingDef for more details. -/// \param DummyOp if not NULL, the algorithm will work at -/// basic block scope and will set for every exposed defintion a use to -/// @p DummyOp. -/// \pre ColorOpToReachedUses is an array of at least number of registers of -/// InstrToInstrs. -static void reachingDef(MachineFunction *MF, - InstrToInstrs *ColorOpToReachedUses, - const MapRegToId &RegToId, bool ADRPMode = false, - const MachineInstr *DummyOp = NULL) { - // structures: - // For each basic block. - // Out: a set per color of definitions that reach the - // out boundary of this block. - // In: Same as Out but for in boundary. - // Gen: generated color in this block (one operation per color). - // Kill: register set of killed color in this block. - // ReachableUses: a set per color of uses (operation) reachable - // for "In" definitions. - BlockToSetOfInstrsPerColor Out, In, ReachableUses; - BlockToInstrPerColor Gen; - BlockToRegSet Kill; - - // Initialize Gen, kill and reachableUses. - initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId, - DummyOp, ADRPMode); - - // Algo. - if (!DummyOp) - reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill, - ReachableUses, RegToId.size()); - - // finit. - finitReachingDef(In, Out, Gen, ReachableUses); -} - -#ifndef NDEBUG -/// print the result of the reaching definition algorithm. -static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses, - unsigned NbReg, const TargetRegisterInfo *TRI, - const MapIdToReg &IdToReg) { - unsigned CurReg; - for (CurReg = 0; CurReg < NbReg; ++CurReg) { - if (ColorOpToReachedUses[CurReg].empty()) - continue; - DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n"); - - InstrToInstrs::const_iterator DefsIt = ColorOpToReachedUses[CurReg].begin(); - InstrToInstrs::const_iterator DefsItEnd = - ColorOpToReachedUses[CurReg].end(); - for (; DefsIt != DefsItEnd; ++DefsIt) { - DEBUG(dbgs() << "Def:\n"); - DEBUG(DefsIt->first->print(dbgs())); - DEBUG(dbgs() << "Reachable uses:\n"); - for (SetOfMachineInstr::const_iterator UsesIt = DefsIt->second.begin(), - UsesItEnd = DefsIt->second.end(); - UsesIt != UsesItEnd; ++UsesIt) { - DEBUG((*UsesIt)->print(dbgs())); - } - } - } -} -#endif // NDEBUG - -/// Answer the following question: Can Def be one of the definition -/// involved in a part of a LOH? -static bool canDefBePartOfLOH(const MachineInstr *Def) { - unsigned Opc = Def->getOpcode(); - // Accept ADRP, ADDLow and LOADGot. - switch (Opc) { - default: - return false; - case ARM64::ADRP: - return true; - case ARM64::ADDXri: - // Check immediate to see if the immediate is an address. - switch (Def->getOperand(2).getType()) { - default: - return false; - case MachineOperand::MO_GlobalAddress: - case MachineOperand::MO_JumpTableIndex: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_BlockAddress: - return true; - } - case ARM64::LDRXui: - // Check immediate to see if the immediate is an address. - switch (Def->getOperand(2).getType()) { - default: - return false; - case MachineOperand::MO_GlobalAddress: - return true; - } - } - // Unreachable. - return false; -} - -/// Check whether the given instruction can the end of a LOH chain involving a -/// store. -static bool isCandidateStore(const MachineInstr *Instr) { - switch (Instr->getOpcode()) { - default: - return false; - case ARM64::STRBui: - case ARM64::STRHui: - case ARM64::STRWui: - case ARM64::STRXui: - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STRQui: - // In case we have str xA, [xA, #imm], this is two different uses - // of xA and we cannot fold, otherwise the xA stored may be wrong, - // even if #imm == 0. - if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg()) - return true; - } - return false; -} - -/// Given the result of a reaching defintion algorithm in ColorOpToReachedUses, -/// Build the Use to Defs information and filter out obvious non-LOH candidates. -/// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions. -/// In non-ADRPMode, non-LOH candidates are "uses" with several definition, -/// i.e., no simple chain. -/// \param ADRPMode -- \see initReachingDef. -static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs, - const InstrToInstrs *ColorOpToReachedUses, - const MapRegToId &RegToId, - bool ADRPMode = false) { - - SetOfMachineInstr NotCandidate; - unsigned NbReg = RegToId.size(); - MapRegToId::const_iterator EndIt = RegToId.end(); - for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) { - // If this color is never defined, continue. - if (ColorOpToReachedUses[CurReg].empty()) - continue; - - InstrToInstrs::const_iterator DefsIt = ColorOpToReachedUses[CurReg].begin(); - InstrToInstrs::const_iterator DefsItEnd = - ColorOpToReachedUses[CurReg].end(); - for (; DefsIt != DefsItEnd; ++DefsIt) { - for (SetOfMachineInstr::const_iterator UsesIt = DefsIt->second.begin(), - UsesItEnd = DefsIt->second.end(); - UsesIt != UsesItEnd; ++UsesIt) { - const MachineInstr *Def = DefsIt->first; - MapRegToId::const_iterator It; - // if all the reaching defs are not adrp, this use will not be - // simplifiable. - if ((ADRPMode && Def->getOpcode() != ARM64::ADRP) || - (!ADRPMode && !canDefBePartOfLOH(Def)) || - (!ADRPMode && isCandidateStore(*UsesIt) && - // store are LOH candidate iff the end of the chain is used as - // base. - ((It = RegToId.find((*UsesIt)->getOperand(1).getReg())) == EndIt || - It->second != CurReg))) { - NotCandidate.insert(*UsesIt); - continue; - } - // Do not consider self reaching as a simplifiable case for ADRP. - if (!ADRPMode || *UsesIt != DefsIt->first) { - UseToReachingDefs[*UsesIt].insert(DefsIt->first); - // If UsesIt has several reaching definitions, it is not - // candidate for simplificaton in non-ADRPMode. - if (!ADRPMode && UseToReachingDefs[*UsesIt].size() > 1) - NotCandidate.insert(*UsesIt); - } - } - } - } - for (const MachineInstr *Elem : NotCandidate) { - DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n"); - // It would have been better if we could just remove the entry - // from the map. Because of that, we have to filter the garbage - // (second.empty) in the subsequence analysis. - UseToReachingDefs[Elem].clear(); - } -} - -/// Based on the use to defs information (in ADRPMode), compute the -/// opportunities of LOH ADRP-related. -static void computeADRP(const InstrToInstrs &UseToDefs, - ARM64FunctionInfo &ARM64FI, - const MachineDominatorTree *MDT) { - DEBUG(dbgs() << "*** Compute LOH for ADRP\n"); - for (const auto &Entry: UseToDefs) { - unsigned Size = Entry.second.size(); - if (Size == 0) - continue; - if (Size == 1) { - const MachineInstr *L2 = *Entry.second.begin(); - const MachineInstr *L1 = Entry.first; - if (!MDT->dominates(L2, L1)) { - DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1 - << '\n'); - continue; - } - DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n'); - SmallVector Args; - Args.push_back(L2); - Args.push_back(L1); - ARM64FI.addLOHDirective(MCLOH_AdrpAdrp, Args); - ++NumADRPSimpleCandidate; - } -#ifdef DEBUG - else if (Size == 2) - ++NumADRPComplexCandidate2; - else if (Size == 3) - ++NumADRPComplexCandidate3; - else - ++NumADRPComplexCandidateOther; -#endif - // if Size < 1, the use should have been removed from the candidates - assert(Size >= 1 && "No reaching defs for that use!"); - } -} - -/// Check whether the given instruction can be the end of a LOH chain -/// involving a load. -static bool isCandidateLoad(const MachineInstr *Instr) { - switch (Instr->getOpcode()) { - default: - return false; - case ARM64::LDRSBWui: - case ARM64::LDRSBXui: - case ARM64::LDRSHWui: - case ARM64::LDRSHXui: - case ARM64::LDRSWui: - case ARM64::LDRBui: - case ARM64::LDRHui: - case ARM64::LDRWui: - case ARM64::LDRXui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: - if (Instr->getOperand(2).getTargetFlags() & ARM64II::MO_GOT) - return false; - return true; - } - // Unreachable. - return false; -} - -/// Check whether the given instruction can load a litteral. -static bool supportLoadFromLiteral(const MachineInstr *Instr) { - switch (Instr->getOpcode()) { - default: - return false; - case ARM64::LDRSWui: - case ARM64::LDRWui: - case ARM64::LDRXui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: - return true; - } - // Unreachable. - return false; -} - -/// Check whether the given instruction is a LOH candidate. -/// \param UseToDefs is used to check that Instr is at the end of LOH supported -/// chain. -/// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are -/// already been filtered out. -static bool isCandidate(const MachineInstr *Instr, - const InstrToInstrs &UseToDefs, - const MachineDominatorTree *MDT) { - if (!isCandidateLoad(Instr) && !isCandidateStore(Instr)) - return false; - - const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin(); - if (Def->getOpcode() != ARM64::ADRP) { - // At this point, Def is ADDXri or LDRXui of the right type of - // symbol, because we filtered out the uses that were not defined - // by these kind of instructions (+ ADRP). - - // Check if this forms a simple chain: each intermediate node must - // dominates the next one. - if (!MDT->dominates(Def, Instr)) - return false; - // Move one node up in the simple chain. - if (UseToDefs.find(Def) == UseToDefs.end() - // The map may contain garbage we have to ignore. - || - UseToDefs.find(Def)->second.empty()) - return false; - Instr = Def; - Def = *UseToDefs.find(Def)->second.begin(); - } - // Check if we reached the top of the simple chain: - // - top is ADRP. - // - check the simple chain property: each intermediate node must - // dominates the next one. - if (Def->getOpcode() == ARM64::ADRP) - return MDT->dominates(Def, Instr); - return false; -} - -static bool registerADRCandidate(const MachineInstr *Use, - const InstrToInstrs &UseToDefs, - const InstrToInstrs *DefsPerColorToUses, - ARM64FunctionInfo &ARM64FI, - SetOfMachineInstr *InvolvedInLOHs, - const MapRegToId &RegToId) { - // Look for opportunities to turn ADRP -> ADD or - // ADRP -> LDR GOTPAGEOFF into ADR. - // If ADRP has more than one use. Give up. - if (Use->getOpcode() != ARM64::ADDXri && - (Use->getOpcode() != ARM64::LDRXui || - !(Use->getOperand(2).getTargetFlags() & ARM64II::MO_GOT))) - return false; - InstrToInstrs::const_iterator It = UseToDefs.find(Use); - // The map may contain garbage that we need to ignore. - if (It == UseToDefs.end() || It->second.empty()) - return false; - const MachineInstr *Def = *It->second.begin(); - if (Def->getOpcode() != ARM64::ADRP) - return false; - // Check the number of users of ADRP. - const SetOfMachineInstr *Users = - getUses(DefsPerColorToUses, - RegToId.find(Def->getOperand(0).getReg())->second, Def); - if (Users->size() > 1) { - ++NumADRComplexCandidate; - return false; - } - ++NumADRSimpleCandidate; - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Def)) && - "ADRP already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Use)) && - "ADD already involved in LOH."); - DEBUG(dbgs() << "Record AdrpAdd\n" << *Def << '\n' << *Use << '\n'); - - SmallVector Args; - Args.push_back(Def); - Args.push_back(Use); - - ARM64FI.addLOHDirective(Use->getOpcode() == ARM64::ADDXri ? MCLOH_AdrpAdd - : MCLOH_AdrpLdrGot, - Args); - return true; -} - -/// Based on the use to defs information (in non-ADRPMode), compute the -/// opportunities of LOH non-ADRP-related -static void computeOthers(const InstrToInstrs &UseToDefs, - const InstrToInstrs *DefsPerColorToUses, - ARM64FunctionInfo &ARM64FI, const MapRegToId &RegToId, - const MachineDominatorTree *MDT) { - SetOfMachineInstr *InvolvedInLOHs = NULL; -#ifdef DEBUG - SetOfMachineInstr InvolvedInLOHsStorage; - InvolvedInLOHs = &InvolvedInLOHsStorage; -#endif // DEBUG - DEBUG(dbgs() << "*** Compute LOH for Others\n"); - // ADRP -> ADD/LDR -> LDR/STR pattern. - // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern. - - // FIXME: When the statistics are not important, - // This initial filtering loop can be merged into the next loop. - // Currently, we didn't do it to have the same code for both DEBUG and - // NDEBUG builds. Indeed, the iterator of the second loop would need - // to be changed. - SetOfMachineInstr PotentialCandidates; - SetOfMachineInstr PotentialADROpportunities; - for (InstrToInstrs::const_iterator UseIt = UseToDefs.begin(), - EndUseIt = UseToDefs.end(); - UseIt != EndUseIt; ++UseIt) { - // If no definition is available, this is a non candidate. - if (UseIt->second.empty()) - continue; - // Keep only instructions that are load or store and at the end of - // a ADRP -> ADD/LDR/Nothing chain. - // We already filtered out the no-chain cases. - if (!isCandidate(UseIt->first, UseToDefs, MDT)) { - PotentialADROpportunities.insert(UseIt->first); - continue; - } - PotentialCandidates.insert(UseIt->first); - } - - // Make the following distinctions for statistics as the linker does - // know how to decode instructions: - // - ADD/LDR/Nothing make there different patterns. - // - LDR/STR make two different patterns. - // Hence, 6 - 1 base patterns. - // (because ADRP-> Nothing -> STR is not simplifiable) - - // The linker is only able to have a simple semantic, i.e., if pattern A - // do B. - // However, we want to see the opportunity we may miss if we were able to - // catch more complex cases. - - // PotentialCandidates are result of a chain ADRP -> ADD/LDR -> - // A potential candidate becomes a candidate, if its current immediate - // operand is zero and all nodes of the chain have respectively only one user - SetOfMachineInstr::const_iterator CandidateIt, EndCandidateIt; -#ifdef DEBUG - SetOfMachineInstr DefsOfPotentialCandidates; -#endif - for (CandidateIt = PotentialCandidates.begin(), - EndCandidateIt = PotentialCandidates.end(); - CandidateIt != EndCandidateIt; ++CandidateIt) { - const MachineInstr *Candidate = *CandidateIt; - // Get the definition of the candidate i.e., ADD or LDR. - const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin(); - // Record the elements of the chain. - const MachineInstr *L1 = Def; - const MachineInstr *L2 = NULL; - unsigned ImmediateDefOpc = Def->getOpcode(); - if (Def->getOpcode() != ARM64::ADRP) { - // Check the number of users of this node. - const SetOfMachineInstr *Users = - getUses(DefsPerColorToUses, - RegToId.find(Def->getOperand(0).getReg())->second, Def); - if (Users->size() > 1) { -#ifdef DEBUG - // if all the uses of this def are in potential candidate, this is - // a complex candidate of level 2. - SetOfMachineInstr::const_iterator UseIt = Users->begin(); - SetOfMachineInstr::const_iterator EndUseIt = Users->end(); - for (; UseIt != EndUseIt; ++UseIt) { - if (!PotentialCandidates.count(*UseIt)) { - ++NumTooCplxLvl2; - break; - } - } - if (UseIt == EndUseIt) - ++NumCplxLvl2; -#endif // DEBUG - PotentialADROpportunities.insert(Def); - continue; - } - L2 = Def; - Def = *UseToDefs.find(Def)->second.begin(); - L1 = Def; - } // else the element in the middle of the chain is nothing, thus - // Def already contains the first element of the chain. - - // Check the number of users of the first node in the chain, i.e., ADRP - const SetOfMachineInstr *Users = - getUses(DefsPerColorToUses, - RegToId.find(Def->getOperand(0).getReg())->second, Def); - if (Users->size() > 1) { -#ifdef DEBUG - // if all the uses of this def are in the defs of the potential candidate, - // this is a complex candidate of level 1 - if (DefsOfPotentialCandidates.empty()) { - // lazy init - DefsOfPotentialCandidates = PotentialCandidates; - for (const MachineInstr *Candidate : PotentialCandidates) { - if (!UseToDefs.find(Candidate)->second.empty()) - DefsOfPotentialCandidates.insert( - *UseToDefs.find(Candidate)->second.begin()); - } - } - bool Found = false; - for (auto &Use: *Users) { - if (!DefsOfPotentialCandidates.count(Use)) { - ++NumTooCplxLvl1; - Found = true; - break; - } - } - if (!Found) - ++NumCplxLvl1; -#endif // DEBUG - continue; - } - - bool IsL2Add = (ImmediateDefOpc == ARM64::ADDXri); - // If the chain is three instructions long and ldr is the second element, - // then this ldr must load form GOT, otherwise this is not a correct chain. - if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != ARM64II::MO_GOT) - continue; - SmallVector Args; - MCLOHType Kind; - if (isCandidateLoad(Candidate)) { - if (L2 == NULL) { - // At this point, the candidate LOH indicates that the ldr instruction - // may use a direct access to the symbol. There is not such encoding - // for loads of byte and half. - if (!supportLoadFromLiteral(Candidate)) - continue; - - DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate - << '\n'); - Kind = MCLOH_AdrpLdr; - Args.push_back(L1); - Args.push_back(Candidate); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && - "L1 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && - "Candidate already involved in LOH."); - ++NumADRPToLDR; - } else { - DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") - << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate - << '\n'); - - Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr; - Args.push_back(L1); - Args.push_back(L2); - Args.push_back(Candidate); - - PotentialADROpportunities.remove(L2); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && - "L1 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && - "L2 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && - "Candidate already involved in LOH."); -#ifdef DEBUG - // get the immediate of the load - if (Candidate->getOperand(2).getImm() == 0) - if (ImmediateDefOpc == ARM64::ADDXri) - ++NumADDToLDR; - else - ++NumLDRToLDR; - else if (ImmediateDefOpc == ARM64::ADDXri) - ++NumADDToLDRWithImm; - else - ++NumLDRToLDRWithImm; -#endif // DEBUG - } - } else { - if (ImmediateDefOpc == ARM64::ADRP) - continue; - else { - - DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot") - << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate - << '\n'); - - Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr; - Args.push_back(L1); - Args.push_back(L2); - Args.push_back(Candidate); - - PotentialADROpportunities.remove(L2); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) && - "L1 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) && - "L2 already involved in LOH."); - assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) && - "Candidate already involved in LOH."); -#ifdef DEBUG - // get the immediate of the store - if (Candidate->getOperand(2).getImm() == 0) - if (ImmediateDefOpc == ARM64::ADDXri) - ++NumADDToSTR; - else - ++NumLDRToSTR; - else if (ImmediateDefOpc == ARM64::ADDXri) - ++NumADDToSTRWithImm; - else - ++NumLDRToSTRWithImm; -#endif // DEBUG - } - } - ARM64FI.addLOHDirective(Kind, Args); - } - - // Now, we grabbed all the big patterns, check ADR opportunities. - for (const MachineInstr *Candidate: PotentialADROpportunities) - registerADRCandidate(Candidate, UseToDefs, DefsPerColorToUses, ARM64FI, - InvolvedInLOHs, RegToId); -} - -/// Look for every register defined by potential LOHs candidates. -/// Map these registers with dense id in @p RegToId and vice-versa in -/// @p IdToReg. @p IdToReg is populated only in DEBUG mode. -static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId, - MapIdToReg &IdToReg, - const TargetRegisterInfo *TRI) { - unsigned CurRegId = 0; - if (!PreCollectRegister) { - unsigned NbReg = TRI->getNumRegs(); - for (; CurRegId < NbReg; ++CurRegId) { - RegToId[CurRegId] = CurRegId; - DEBUG(IdToReg.push_back(CurRegId)); - DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches")); - } - return; - } - - DEBUG(dbgs() << "** Collect Involved Register\n"); - for (MachineFunction::const_iterator IMBB = MF.begin(), IMBBEnd = MF.end(); - IMBB != IMBBEnd; ++IMBB) - for (MachineBasicBlock::const_iterator II = IMBB->begin(), - IEnd = IMBB->end(); - II != IEnd; ++II) { - - if (!canDefBePartOfLOH(II)) - continue; - - // Process defs - for (MachineInstr::const_mop_iterator IO = II->operands_begin(), - IOEnd = II->operands_end(); - IO != IOEnd; ++IO) { - if (!IO->isReg() || !IO->isDef()) - continue; - unsigned CurReg = IO->getReg(); - for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) - if (RegToId.find(*AI) == RegToId.end()) { - DEBUG(IdToReg.push_back(*AI); - assert(IdToReg[CurRegId] == *AI && - "Reg index mismatches insertion index.")); - RegToId[*AI] = CurRegId++; - DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n'); - } - } - } -} - -bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &Fn) { - const TargetMachine &TM = Fn.getTarget(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - const MachineDominatorTree *MDT = &getAnalysis(); - - MapRegToId RegToId; - MapIdToReg IdToReg; - ARM64FunctionInfo *ARM64FI = Fn.getInfo(); - assert(ARM64FI && "No MachineFunctionInfo for this function!"); - - DEBUG(dbgs() << "Looking for LOH in " << Fn.getName() << '\n'); - - collectInvolvedReg(Fn, RegToId, IdToReg, TRI); - if (RegToId.empty()) - return false; - - MachineInstr *DummyOp = NULL; - if (BasicBlockScopeOnly) { - const ARM64InstrInfo *TII = - static_cast(TM.getInstrInfo()); - // For local analysis, create a dummy operation to record uses that are not - // local. - DummyOp = Fn.CreateMachineInstr(TII->get(ARM64::COPY), DebugLoc()); - } - - unsigned NbReg = RegToId.size(); - bool Modified = false; - - // Start with ADRP. - InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg]; - - // Compute the reaching def in ADRP mode, meaning ADRP definitions - // are first considered as uses. - reachingDef(&Fn, ColorOpToReachedUses, RegToId, true, DummyOp); - DEBUG(dbgs() << "ADRP reaching defs\n"); - DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); - - // Translate the definition to uses map into a use to definitions map to ease - // statistic computation. - InstrToInstrs ADRPToReachingDefs; - reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true); - - // Compute LOH for ADRP. - computeADRP(ADRPToReachingDefs, *ARM64FI, MDT); - delete[] ColorOpToReachedUses; - - // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern. - ColorOpToReachedUses = new InstrToInstrs[NbReg]; - - // first perform a regular reaching def analysis. - reachingDef(&Fn, ColorOpToReachedUses, RegToId, false, DummyOp); - DEBUG(dbgs() << "All reaching defs\n"); - DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg)); - - // Turn that into a use to defs to ease statistic computation. - InstrToInstrs UsesToReachingDefs; - reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false); - - // Compute other than AdrpAdrp LOH. - computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *ARM64FI, RegToId, - MDT); - delete[] ColorOpToReachedUses; - - if (BasicBlockScopeOnly) - Fn.DeleteMachineInstr(DummyOp); - - return Modified; -} - -/// createARM64CollectLOHPass - returns an instance of the Statistic for -/// linker optimization pass. -FunctionPass *llvm::createARM64CollectLOHPass() { - return new ARM64CollectLOH(); -} diff --git a/lib/Target/ARM64/ARM64ConditionalCompares.cpp b/lib/Target/ARM64/ARM64ConditionalCompares.cpp deleted file mode 100644 index b495afa..0000000 --- a/lib/Target/ARM64/ARM64ConditionalCompares.cpp +++ /dev/null @@ -1,918 +0,0 @@ -//===-- ARM64ConditionalCompares.cpp --- CCMP formation for ARM64 ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ARM64ConditionalCompares pass which reduces -// branching and code size by using the conditional compare instructions CCMP, -// CCMN, and FCMP. -// -// The CFG transformations for forming conditional compares are very similar to -// if-conversion, and this pass should run immediately before the early -// if-conversion pass. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-ccmp" -#include "ARM64.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SparseSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineTraceMetrics.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" - -using namespace llvm; - -// Absolute maximum number of instructions allowed per speculated block. -// This bypasses all other heuristics, so it should be set fairly high. -static cl::opt BlockInstrLimit( - "arm64-ccmp-limit", cl::init(30), cl::Hidden, - cl::desc("Maximum number of instructions per speculated block.")); - -// Stress testing mode - disable heuristics. -static cl::opt Stress("arm64-stress-ccmp", cl::Hidden, - cl::desc("Turn all knobs to 11")); - -STATISTIC(NumConsidered, "Number of ccmps considered"); -STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)"); -STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)"); -STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)"); -STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)"); -STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)"); -STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)"); -STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)"); -STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)"); -STATISTIC(NumMultCPSRUses, "Number of ccmps rejected (CPSR used)"); -STATISTIC(NumUnknCPSRDefs, "Number of ccmps rejected (CPSR def unknown)"); - -STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)"); - -STATISTIC(NumConverted, "Number of ccmp instructions created"); -STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted"); - -//===----------------------------------------------------------------------===// -// SSACCmpConv -//===----------------------------------------------------------------------===// -// -// The SSACCmpConv class performs ccmp-conversion on SSA form machine code -// after determining if it is possible. The class contains no heuristics; -// external code should be used to determine when ccmp-conversion is a good -// idea. -// -// CCmp-formation works on a CFG representing chained conditions, typically -// from C's short-circuit || and && operators: -// -// From: Head To: Head -// / | CmpBB -// / | / | -// | CmpBB / | -// | / | Tail | -// | / | | | -// Tail | | | -// | | | | -// ... ... ... ... -// -// The Head block is terminated by a br.cond instruction, and the CmpBB block -// contains compare + br.cond. Tail must be a successor of both. -// -// The cmp-conversion turns the compare instruction in CmpBB into a conditional -// compare, and merges CmpBB into Head, speculatively executing its -// instructions. The ARM64 conditional compare instructions have an immediate -// operand that specifies the NZCV flag values when the condition is false and -// the compare isn't executed. This makes it possible to chain compares with -// different condition codes. -// -// Example: -// -// if (a == 5 || b == 17) -// foo(); -// -// Head: -// cmp w0, #5 -// b.eq Tail -// CmpBB: -// cmp w1, #17 -// b.eq Tail -// ... -// Tail: -// bl _foo -// -// Becomes: -// -// Head: -// cmp w0, #5 -// ccmp w1, #17, 4, ne ; 4 = nZcv -// b.eq Tail -// ... -// Tail: -// bl _foo -// -// The ccmp condition code is the one that would cause the Head terminator to -// branch to CmpBB. -// -// FIXME: It should also be possible to speculate a block on the critical edge -// between Head and Tail, just like if-converting a diamond. -// -// FIXME: Handle PHIs in Tail by turning them into selects (if-conversion). - -namespace { -class SSACCmpConv { - MachineFunction *MF; - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - MachineRegisterInfo *MRI; - -public: - /// The first block containing a conditional branch, dominating everything - /// else. - MachineBasicBlock *Head; - - /// The block containing cmp+br.cond with a sucessor shared with Head. - MachineBasicBlock *CmpBB; - - /// The common successor for Head and CmpBB. - MachineBasicBlock *Tail; - - /// The compare instruction in CmpBB that can be converted to a ccmp. - MachineInstr *CmpMI; - -private: - /// The branch condition in Head as determined by AnalyzeBranch. - SmallVector HeadCond; - - /// The condition code that makes Head branch to CmpBB. - ARM64CC::CondCode HeadCmpBBCC; - - /// The branch condition in CmpBB. - SmallVector CmpBBCond; - - /// The condition code that makes CmpBB branch to Tail. - ARM64CC::CondCode CmpBBTailCC; - - /// Check if the Tail PHIs are trivially convertible. - bool trivialTailPHIs(); - - /// Remove CmpBB from the Tail PHIs. - void updateTailPHIs(); - - /// Check if an operand defining DstReg is dead. - bool isDeadDef(unsigned DstReg); - - /// Find the compare instruction in MBB that controls the conditional branch. - /// Return NULL if a convertible instruction can't be found. - MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB); - - /// Return true if all non-terminator instructions in MBB can be safely - /// speculated. - bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI); - -public: - /// runOnMachineFunction - Initialize per-function data structures. - void runOnMachineFunction(MachineFunction &MF) { - this->MF = &MF; - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); - MRI = &MF.getRegInfo(); - } - - /// If the sub-CFG headed by MBB can be cmp-converted, initialize the - /// internal state, and return true. - bool canConvert(MachineBasicBlock *MBB); - - /// Cmo-convert the last block passed to canConvertCmp(), assuming - /// it is possible. Add any erased blocks to RemovedBlocks. - void convert(SmallVectorImpl &RemovedBlocks); - - /// Return the expected code size delta if the conversion into a - /// conditional compare is performed. - int expectedCodeSizeDelta() const; -}; -} // end anonymous namespace - -// Check that all PHIs in Tail are selecting the same value from Head and CmpBB. -// This means that no if-conversion is required when merging CmpBB into Head. -bool SSACCmpConv::trivialTailPHIs() { - for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end(); - I != E && I->isPHI(); ++I) { - unsigned HeadReg = 0, CmpBBReg = 0; - // PHI operands come in (VReg, MBB) pairs. - for (unsigned oi = 1, oe = I->getNumOperands(); oi != oe; oi += 2) { - MachineBasicBlock *MBB = I->getOperand(oi + 1).getMBB(); - unsigned Reg = I->getOperand(oi).getReg(); - if (MBB == Head) { - assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands"); - HeadReg = Reg; - } - if (MBB == CmpBB) { - assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands"); - CmpBBReg = Reg; - } - } - if (HeadReg != CmpBBReg) - return false; - } - return true; -} - -// Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply -// removing the CmpBB operands. The Head operands will be identical. -void SSACCmpConv::updateTailPHIs() { - for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end(); - I != E && I->isPHI(); ++I) { - // I is a PHI. It can have multiple entries for CmpBB. - for (unsigned oi = I->getNumOperands(); oi > 2; oi -= 2) { - // PHI operands are (Reg, MBB) at (oi-2, oi-1). - if (I->getOperand(oi - 1).getMBB() == CmpBB) { - I->RemoveOperand(oi - 1); - I->RemoveOperand(oi - 2); - } - } - } -} - -// This pass runs before the ARM64DeadRegisterDefinitions pass, so compares are -// still writing virtual registers without any uses. -bool SSACCmpConv::isDeadDef(unsigned DstReg) { - // Writes to the zero register are dead. - if (DstReg == ARM64::WZR || DstReg == ARM64::XZR) - return true; - if (!TargetRegisterInfo::isVirtualRegister(DstReg)) - return false; - // A virtual register def without any uses will be marked dead later, and - // eventually replaced by the zero register. - return MRI->use_nodbg_empty(DstReg); -} - -// Parse a condition code returned by AnalyzeBranch, and compute the CondCode -// corresponding to TBB. -// Return -static bool parseCond(ArrayRef Cond, ARM64CC::CondCode &CC) { - // A normal br.cond simply has the condition code. - if (Cond[0].getImm() != -1) { - assert(Cond.size() == 1 && "Unknown Cond array format"); - CC = (ARM64CC::CondCode)(int)Cond[0].getImm(); - return true; - } - // For tbz and cbz instruction, the opcode is next. - switch (Cond[1].getImm()) { - default: - // This includes tbz / tbnz branches which can't be converted to - // ccmp + br.cond. - return false; - case ARM64::CBZW: - case ARM64::CBZX: - assert(Cond.size() == 3 && "Unknown Cond array format"); - CC = ARM64CC::EQ; - return true; - case ARM64::CBNZW: - case ARM64::CBNZX: - assert(Cond.size() == 3 && "Unknown Cond array format"); - CC = ARM64CC::NE; - return true; - } -} - -MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) { - MachineBasicBlock::iterator I = MBB->getFirstTerminator(); - if (I == MBB->end()) - return 0; - // The terminator must be controlled by the flags. - if (!I->readsRegister(ARM64::CPSR)) { - switch (I->getOpcode()) { - case ARM64::CBZW: - case ARM64::CBZX: - case ARM64::CBNZW: - case ARM64::CBNZX: - // These can be converted into a ccmp against #0. - return I; - } - ++NumCmpTermRejs; - DEBUG(dbgs() << "Flags not used by terminator: " << *I); - return 0; - } - - // Now find the instruction controlling the terminator. - for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) { - --I; - assert(!I->isTerminator() && "Spurious terminator"); - switch (I->getOpcode()) { - // cmp is an alias for subs with a dead destination register. - case ARM64::SUBSWri: - case ARM64::SUBSXri: - // cmn is an alias for adds with a dead destination register. - case ARM64::ADDSWri: - case ARM64::ADDSXri: - // Check that the immediate operand is within range, ccmp wants a uimm5. - // Rd = SUBSri Rn, imm, shift - if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) { - DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I); - ++NumImmRangeRejs; - return 0; - } - // Fall through. - case ARM64::SUBSWrr: - case ARM64::SUBSXrr: - case ARM64::ADDSWrr: - case ARM64::ADDSXrr: - if (isDeadDef(I->getOperand(0).getReg())) - return I; - DEBUG(dbgs() << "Can't convert compare with live destination: " << *I); - ++NumLiveDstRejs; - return 0; - case ARM64::FCMPSrr: - case ARM64::FCMPDrr: - case ARM64::FCMPESrr: - case ARM64::FCMPEDrr: - return I; - } - - // Check for flag reads and clobbers. - MIOperands::PhysRegInfo PRI = - MIOperands(I).analyzePhysReg(ARM64::CPSR, TRI); - - if (PRI.Reads) { - // The ccmp doesn't produce exactly the same flags as the original - // compare, so reject the transform if there are uses of the flags - // besides the terminators. - DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I); - ++NumMultCPSRUses; - return 0; - } - - if (PRI.Clobbers) { - DEBUG(dbgs() << "Not convertible compare: " << *I); - ++NumUnknCPSRDefs; - return 0; - } - } - DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n'); - return 0; -} - -/// Determine if all the instructions in MBB can safely -/// be speculated. The terminators are not considered. -/// -/// Only CmpMI is allowed to clobber the flags. -/// -bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB, - const MachineInstr *CmpMI) { - // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to - // get right. - if (!MBB->livein_empty()) { - DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n"); - return false; - } - - unsigned InstrCount = 0; - - // Check all instructions, except the terminators. It is assumed that - // terminators never have side effects or define any used register values. - for (MachineBasicBlock::iterator I = MBB->begin(), - E = MBB->getFirstTerminator(); - I != E; ++I) { - if (I->isDebugValue()) - continue; - - if (++InstrCount > BlockInstrLimit && !Stress) { - DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than " - << BlockInstrLimit << " instructions.\n"); - return false; - } - - // There shouldn't normally be any phis in a single-predecessor block. - if (I->isPHI()) { - DEBUG(dbgs() << "Can't hoist: " << *I); - return false; - } - - // Don't speculate loads. Note that it may be possible and desirable to - // speculate GOT or constant pool loads that are guaranteed not to trap, - // but we don't support that for now. - if (I->mayLoad()) { - DEBUG(dbgs() << "Won't speculate load: " << *I); - return false; - } - - // We never speculate stores, so an AA pointer isn't necessary. - bool DontMoveAcrossStore = true; - if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) { - DEBUG(dbgs() << "Can't speculate: " << *I); - return false; - } - - // Only CmpMI is alowed to clobber the flags. - if (&*I != CmpMI && I->modifiesRegister(ARM64::CPSR, TRI)) { - DEBUG(dbgs() << "Clobbers flags: " << *I); - return false; - } - } - return true; -} - -/// Analyze the sub-cfg rooted in MBB, and return true if it is a potential -/// candidate for cmp-conversion. Fill out the internal state. -/// -bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) { - Head = MBB; - Tail = CmpBB = 0; - - if (Head->succ_size() != 2) - return false; - MachineBasicBlock *Succ0 = Head->succ_begin()[0]; - MachineBasicBlock *Succ1 = Head->succ_begin()[1]; - - // CmpBB can only have a single predecessor. Tail is allowed many. - if (Succ0->pred_size() != 1) - std::swap(Succ0, Succ1); - - // Succ0 is our candidate for CmpBB. - if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2) - return false; - - CmpBB = Succ0; - Tail = Succ1; - - if (!CmpBB->isSuccessor(Tail)) - return false; - - // The CFG topology checks out. - DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() << " -> BB#" - << CmpBB->getNumber() << " -> BB#" << Tail->getNumber() << '\n'); - ++NumConsidered; - - // Tail is allowed to have many predecessors, but we can't handle PHIs yet. - // - // FIXME: Real PHIs could be if-converted as long as the CmpBB values are - // defined before The CmpBB cmp clobbers the flags. Alternatively, it should - // always be safe to sink the ccmp down to immediately before the CmpBB - // terminators. - if (!trivialTailPHIs()) { - DEBUG(dbgs() << "Can't handle phis in Tail.\n"); - ++NumPhiRejs; - return false; - } - - if (!Tail->livein_empty()) { - DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n"); - ++NumPhysRejs; - return false; - } - - // CmpBB should never have PHIs since Head is its only predecessor. - // FIXME: Clean them up if it happens. - if (!CmpBB->empty() && CmpBB->front().isPHI()) { - DEBUG(dbgs() << "Can't handle phis in CmpBB.\n"); - ++NumPhi2Rejs; - return false; - } - - if (!CmpBB->livein_empty()) { - DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n"); - ++NumPhysRejs; - return false; - } - - // The branch we're looking to eliminate must be analyzable. - HeadCond.clear(); - MachineBasicBlock *TBB = 0, *FBB = 0; - if (TII->AnalyzeBranch(*Head, TBB, FBB, HeadCond)) { - DEBUG(dbgs() << "Head branch not analyzable.\n"); - ++NumHeadBranchRejs; - return false; - } - - // This is weird, probably some sort of degenerate CFG, or an edge to a - // landing pad. - if (!TBB || HeadCond.empty()) { - DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in Head.\n"); - ++NumHeadBranchRejs; - return false; - } - - if (!parseCond(HeadCond, HeadCmpBBCC)) { - DEBUG(dbgs() << "Unsupported branch type on Head\n"); - ++NumHeadBranchRejs; - return false; - } - - // Make sure the branch direction is right. - if (TBB != CmpBB) { - assert(TBB == Tail && "Unexpected TBB"); - HeadCmpBBCC = ARM64CC::getInvertedCondCode(HeadCmpBBCC); - } - - CmpBBCond.clear(); - TBB = FBB = 0; - if (TII->AnalyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) { - DEBUG(dbgs() << "CmpBB branch not analyzable.\n"); - ++NumCmpBranchRejs; - return false; - } - - if (!TBB || CmpBBCond.empty()) { - DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch in CmpBB.\n"); - ++NumCmpBranchRejs; - return false; - } - - if (!parseCond(CmpBBCond, CmpBBTailCC)) { - DEBUG(dbgs() << "Unsupported branch type on CmpBB\n"); - ++NumCmpBranchRejs; - return false; - } - - if (TBB != Tail) - CmpBBTailCC = ARM64CC::getInvertedCondCode(CmpBBTailCC); - - DEBUG(dbgs() << "Head->CmpBB on " << ARM64CC::getCondCodeName(HeadCmpBBCC) - << ", CmpBB->Tail on " << ARM64CC::getCondCodeName(CmpBBTailCC) - << '\n'); - - CmpMI = findConvertibleCompare(CmpBB); - if (!CmpMI) - return false; - - if (!canSpeculateInstrs(CmpBB, CmpMI)) { - ++NumSpeculateRejs; - return false; - } - return true; -} - -void SSACCmpConv::convert(SmallVectorImpl &RemovedBlocks) { - DEBUG(dbgs() << "Merging BB#" << CmpBB->getNumber() << " into BB#" - << Head->getNumber() << ":\n" << *CmpBB); - - // All CmpBB instructions are moved into Head, and CmpBB is deleted. - // Update the CFG first. - updateTailPHIs(); - Head->removeSuccessor(CmpBB); - CmpBB->removeSuccessor(Tail); - Head->transferSuccessorsAndUpdatePHIs(CmpBB); - DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc(); - TII->RemoveBranch(*Head); - - // If the Head terminator was one of the cbz / tbz branches with built-in - // compare, we need to insert an explicit compare instruction in its place. - if (HeadCond[0].getImm() == -1) { - ++NumCompBranches; - unsigned Opc = 0; - switch (HeadCond[1].getImm()) { - case ARM64::CBZW: - case ARM64::CBNZW: - Opc = ARM64::SUBSWri; - break; - case ARM64::CBZX: - case ARM64::CBNZX: - Opc = ARM64::SUBSXri; - break; - default: - llvm_unreachable("Cannot convert Head branch"); - } - const MCInstrDesc &MCID = TII->get(Opc); - // Create a dummy virtual register for the SUBS def. - unsigned DestReg = - MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF)); - // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz. - BuildMI(*Head, Head->end(), TermDL, MCID) - .addReg(DestReg, RegState::Define | RegState::Dead) - .addOperand(HeadCond[2]) - .addImm(0) - .addImm(0); - // SUBS uses the GPR*sp register classes. - MRI->constrainRegClass(HeadCond[2].getReg(), - TII->getRegClass(MCID, 1, TRI, *MF)); - } - - Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end()); - - // Now replace CmpMI with a ccmp instruction that also considers the incoming - // flags. - unsigned Opc = 0; - unsigned FirstOp = 1; // First CmpMI operand to copy. - bool isZBranch = false; // CmpMI is a cbz/cbnz instruction. - switch (CmpMI->getOpcode()) { - default: - llvm_unreachable("Unknown compare opcode"); - case ARM64::SUBSWri: Opc = ARM64::CCMPWi; break; - case ARM64::SUBSWrr: Opc = ARM64::CCMPWr; break; - case ARM64::SUBSXri: Opc = ARM64::CCMPXi; break; - case ARM64::SUBSXrr: Opc = ARM64::CCMPXr; break; - case ARM64::ADDSWri: Opc = ARM64::CCMNWi; break; - case ARM64::ADDSWrr: Opc = ARM64::CCMNWr; break; - case ARM64::ADDSXri: Opc = ARM64::CCMNXi; break; - case ARM64::ADDSXrr: Opc = ARM64::CCMNXr; break; - case ARM64::FCMPSrr: Opc = ARM64::FCCMPSrr; FirstOp = 0; break; - case ARM64::FCMPDrr: Opc = ARM64::FCCMPDrr; FirstOp = 0; break; - case ARM64::FCMPESrr: Opc = ARM64::FCCMPESrr; FirstOp = 0; break; - case ARM64::FCMPEDrr: Opc = ARM64::FCCMPEDrr; FirstOp = 0; break; - case ARM64::CBZW: - case ARM64::CBNZW: - Opc = ARM64::CCMPWi; - FirstOp = 0; - isZBranch = true; - break; - case ARM64::CBZX: - case ARM64::CBNZX: - Opc = ARM64::CCMPXi; - FirstOp = 0; - isZBranch = true; - break; - } - - // The ccmp instruction should set the flags according to the comparison when - // Head would have branched to CmpBB. - // The NZCV immediate operand should provide flags for the case where Head - // would have branched to Tail. These flags should cause the new Head - // terminator to branch to tail. - unsigned NZCV = ARM64CC::getNZCVToSatisfyCondCode(CmpBBTailCC); - const MCInstrDesc &MCID = TII->get(Opc); - MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(), - TII->getRegClass(MCID, 0, TRI, *MF)); - if (CmpMI->getOperand(FirstOp + 1).isReg()) - MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(), - TII->getRegClass(MCID, 1, TRI, *MF)); - MachineInstrBuilder MIB = - BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID) - .addOperand(CmpMI->getOperand(FirstOp)); // Register Rn - if (isZBranch) - MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0 - else - MIB.addOperand(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate - MIB.addImm(NZCV).addImm(HeadCmpBBCC); - - // If CmpMI was a terminator, we need a new conditional branch to replace it. - // This now becomes a Head terminator. - if (isZBranch) { - bool isNZ = CmpMI->getOpcode() == ARM64::CBNZW || - CmpMI->getOpcode() == ARM64::CBNZX; - BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(ARM64::Bcc)) - .addImm(isNZ ? ARM64CC::NE : ARM64CC::EQ) - .addOperand(CmpMI->getOperand(1)); // Branch target. - } - CmpMI->eraseFromParent(); - Head->updateTerminator(); - - RemovedBlocks.push_back(CmpBB); - CmpBB->eraseFromParent(); - DEBUG(dbgs() << "Result:\n" << *Head); - ++NumConverted; -} - -int SSACCmpConv::expectedCodeSizeDelta() const { - int delta = 0; - // If the Head terminator was one of the cbz / tbz branches with built-in - // compare, we need to insert an explicit compare instruction in its place - // plus a branch instruction. - if (HeadCond[0].getImm() == -1) { - switch (HeadCond[1].getImm()) { - case ARM64::CBZW: - case ARM64::CBNZW: - case ARM64::CBZX: - case ARM64::CBNZX: - // Therefore delta += 1 - delta = 1; - break; - default: - llvm_unreachable("Cannot convert Head branch"); - } - } - // If the Cmp terminator was one of the cbz / tbz branches with - // built-in compare, it will be turned into a compare instruction - // into Head, but we do not save any instruction. - // Otherwise, we save the branch instruction. - switch (CmpMI->getOpcode()) { - default: - --delta; - break; - case ARM64::CBZW: - case ARM64::CBNZW: - case ARM64::CBZX: - case ARM64::CBNZX: - break; - } - return delta; -} - -//===----------------------------------------------------------------------===// -// ARM64ConditionalCompares Pass -//===----------------------------------------------------------------------===// - -namespace { -class ARM64ConditionalCompares : public MachineFunctionPass { - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - const MCSchedModel *SchedModel; - // Does the proceeded function has Oz attribute. - bool MinSize; - MachineRegisterInfo *MRI; - MachineDominatorTree *DomTree; - MachineLoopInfo *Loops; - MachineTraceMetrics *Traces; - MachineTraceMetrics::Ensemble *MinInstr; - SSACCmpConv CmpConv; - -public: - static char ID; - ARM64ConditionalCompares() : MachineFunctionPass(ID) {} - void getAnalysisUsage(AnalysisUsage &AU) const; - bool runOnMachineFunction(MachineFunction &MF); - const char *getPassName() const { return "ARM64 Conditional Compares"; } - -private: - bool tryConvert(MachineBasicBlock *); - void updateDomTree(ArrayRef Removed); - void updateLoops(ArrayRef Removed); - void invalidateTraces(); - bool shouldConvert(); -}; -} // end anonymous namespace - -char ARM64ConditionalCompares::ID = 0; - -namespace llvm { -void initializeARM64ConditionalComparesPass(PassRegistry &); -} - -INITIALIZE_PASS_BEGIN(ARM64ConditionalCompares, "arm64-ccmp", "ARM64 CCMP Pass", - false, false) -INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) -INITIALIZE_PASS_END(ARM64ConditionalCompares, "arm64-ccmp", "ARM64 CCMP Pass", - false, false) - -FunctionPass *llvm::createARM64ConditionalCompares() { - return new ARM64ConditionalCompares(); -} - -void ARM64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - MachineFunctionPass::getAnalysisUsage(AU); -} - -/// Update the dominator tree after if-conversion erased some blocks. -void -ARM64ConditionalCompares::updateDomTree(ArrayRef Removed) { - // convert() removes CmpBB which was previously dominated by Head. - // CmpBB children should be transferred to Head. - MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head); - for (unsigned i = 0, e = Removed.size(); i != e; ++i) { - MachineDomTreeNode *Node = DomTree->getNode(Removed[i]); - assert(Node != HeadNode && "Cannot erase the head node"); - assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head"); - while (Node->getNumChildren()) - DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode); - DomTree->eraseNode(Removed[i]); - } -} - -/// Update LoopInfo after if-conversion. -void -ARM64ConditionalCompares::updateLoops(ArrayRef Removed) { - if (!Loops) - return; - for (unsigned i = 0, e = Removed.size(); i != e; ++i) - Loops->removeBlock(Removed[i]); -} - -/// Invalidate MachineTraceMetrics before if-conversion. -void ARM64ConditionalCompares::invalidateTraces() { - Traces->invalidate(CmpConv.Head); - Traces->invalidate(CmpConv.CmpBB); -} - -/// Apply cost model and heuristics to the if-conversion in IfConv. -/// Return true if the conversion is a good idea. -/// -bool ARM64ConditionalCompares::shouldConvert() { - // Stress testing mode disables all cost considerations. - if (Stress) - return true; - if (!MinInstr) - MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); - - // Head dominates CmpBB, so it is always included in its trace. - MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB); - - // If code size is the main concern - if (MinSize) { - int CodeSizeDelta = CmpConv.expectedCodeSizeDelta(); - DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n'); - // If we are minimizing the code size, do the conversion whatever - // the cost is. - if (CodeSizeDelta < 0) - return true; - if (CodeSizeDelta > 0) { - DEBUG(dbgs() << "Code size is increasing, give up on this one.\n"); - return false; - } - // CodeSizeDelta == 0, continue with the regular heuristics - } - - // Heuristic: The compare conversion delays the execution of the branch - // instruction because we must wait for the inputs to the second compare as - // well. The branch has no dependent instructions, but delaying it increases - // the cost of a misprediction. - // - // Set a limit on the delay we will accept. - unsigned DelayLimit = SchedModel->MispredictPenalty * 3 / 4; - - // Instruction depths can be computed for all trace instructions above CmpBB. - unsigned HeadDepth = - Trace.getInstrCycles(CmpConv.Head->getFirstTerminator()).Depth; - unsigned CmpBBDepth = - Trace.getInstrCycles(CmpConv.CmpBB->getFirstTerminator()).Depth; - DEBUG(dbgs() << "Head depth: " << HeadDepth - << "\nCmpBB depth: " << CmpBBDepth << '\n'); - if (CmpBBDepth > HeadDepth + DelayLimit) { - DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit - << " cycles.\n"); - return false; - } - - // Check the resource depth at the bottom of CmpBB - these instructions will - // be speculated. - unsigned ResDepth = Trace.getResourceDepth(true); - DEBUG(dbgs() << "Resources: " << ResDepth << '\n'); - - // Heuristic: The speculatively executed instructions must all be able to - // merge into the Head block. The Head critical path should dominate the - // resource cost of the speculated instructions. - if (ResDepth > HeadDepth) { - DEBUG(dbgs() << "Too many instructions to speculate.\n"); - return false; - } - return true; -} - -bool ARM64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) { - bool Changed = false; - while (CmpConv.canConvert(MBB) && shouldConvert()) { - invalidateTraces(); - SmallVector RemovedBlocks; - CmpConv.convert(RemovedBlocks); - Changed = true; - updateDomTree(RemovedBlocks); - updateLoops(RemovedBlocks); - } - return Changed; -} - -bool ARM64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "********** ARM64 Conditional Compares **********\n" - << "********** Function: " << MF.getName() << '\n'); - TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); - SchedModel = - MF.getTarget().getSubtarget().getSchedModel(); - MRI = &MF.getRegInfo(); - DomTree = &getAnalysis(); - Loops = getAnalysisIfAvailable(); - Traces = &getAnalysis(); - MinInstr = 0; - MinSize = MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::MinSize); - - bool Changed = false; - CmpConv.runOnMachineFunction(MF); - - // Visit blocks in dominator tree pre-order. The pre-order enables multiple - // cmp-conversions from the same head block. - // Note that updateDomTree() modifies the children of the DomTree node - // currently being visited. The df_iterator supports that, it doesn't look at - // child_begin() / child_end() until after a node has been visited. - for (df_iterator I = df_begin(DomTree), - E = df_end(DomTree); - I != E; ++I) - if (tryConvert(I->getBlock())) - Changed = true; - - return Changed; -} diff --git a/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp b/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp deleted file mode 100644 index 3e410e5..0000000 --- a/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp +++ /dev/null @@ -1,104 +0,0 @@ -//===-- ARM64DeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// When allowed by the instruction, replace a dead definition of a GPR with -// the zero register. This makes the code a bit friendlier towards the -// hardware's register renamer. -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-dead-defs" -#include "ARM64.h" -#include "ARM64RegisterInfo.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced"); - -namespace { -class ARM64DeadRegisterDefinitions : public MachineFunctionPass { -private: - bool processMachineBasicBlock(MachineBasicBlock *MBB); - -public: - static char ID; // Pass identification, replacement for typeid. - explicit ARM64DeadRegisterDefinitions() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &F); - - const char *getPassName() const { return "Dead register definitions"; } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } -}; -char ARM64DeadRegisterDefinitions::ID = 0; -} // end anonymous namespace - -bool -ARM64DeadRegisterDefinitions::processMachineBasicBlock(MachineBasicBlock *MBB) { - bool Changed = false; - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) { - MachineInstr *MI = I; - for (int i = 0, e = MI->getDesc().getNumDefs(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDead() && MO.isDef()) { - assert(!MO.isImplicit() && "Unexpected implicit def!"); - DEBUG(dbgs() << " Dead def operand #" << i << " in:\n "; - MI->print(dbgs())); - // Be careful not to change the register if it's a tied operand. - if (MI->isRegTiedToUseOperand(i)) { - DEBUG(dbgs() << " Ignoring, def is tied operand.\n"); - continue; - } - // Make sure the instruction take a register class that contains - // the zero register and replace it if so. - unsigned NewReg; - switch (MI->getDesc().OpInfo[i].RegClass) { - default: - DEBUG(dbgs() << " Ignoring, register is not a GPR.\n"); - continue; - case ARM64::GPR32RegClassID: - NewReg = ARM64::WZR; - break; - case ARM64::GPR64RegClassID: - NewReg = ARM64::XZR; - break; - } - DEBUG(dbgs() << " Replacing with zero register. New:\n "); - MO.setReg(NewReg); - DEBUG(MI->print(dbgs())); - ++NumDeadDefsReplaced; - } - } - } - return Changed; -} - -// Scan the function for instructions that have a dead definition of a -// register. Replace that register with the zero register when possible. -bool ARM64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &mf) { - MachineFunction *MF = &mf; - bool Changed = false; - DEBUG(dbgs() << "***** ARM64DeadRegisterDefinitions *****\n"); - - for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) - if (processMachineBasicBlock(I)) - Changed = true; - return Changed; -} - -FunctionPass *llvm::createARM64DeadRegisterDefinitions() { - return new ARM64DeadRegisterDefinitions(); -} diff --git a/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp b/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp deleted file mode 100644 index e082baf..0000000 --- a/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp +++ /dev/null @@ -1,737 +0,0 @@ -//===-- ARM64ExpandPseudoInsts.cpp - Expand pseudo instructions ---*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass that expands pseudo instructions into target -// instructions to allow proper scheduling and other late optimizations. This -// pass should be run after register allocation but before the post-regalloc -// scheduling pass. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "ARM64InstrInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Support/MathExtras.h" -using namespace llvm; - -namespace { -class ARM64ExpandPseudo : public MachineFunctionPass { -public: - static char ID; - ARM64ExpandPseudo() : MachineFunctionPass(ID) {} - - const ARM64InstrInfo *TII; - - virtual bool runOnMachineFunction(MachineFunction &Fn); - - virtual const char *getPassName() const { - return "ARM64 pseudo instruction expansion pass"; - } - -private: - bool expandMBB(MachineBasicBlock &MBB); - bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); - bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - unsigned BitSize); -}; -char ARM64ExpandPseudo::ID = 0; -} - -/// \brief Transfer implicit operands on the pseudo instruction to the -/// instructions created from the expansion. -static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, - MachineInstrBuilder &DefMI) { - const MCInstrDesc &Desc = OldMI.getDesc(); - for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e; - ++i) { - const MachineOperand &MO = OldMI.getOperand(i); - assert(MO.isReg() && MO.getReg()); - if (MO.isUse()) - UseMI.addOperand(MO); - else - DefMI.addOperand(MO); - } -} - -/// \brief Helper function which extracts the specified 16-bit chunk from a -/// 64-bit value. -static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) { - assert(ChunkIdx < 4 && "Out of range chunk index specified!"); - - return (Imm >> (ChunkIdx * 16)) & 0xFFFF; -} - -/// \brief Helper function which replicates a 16-bit chunk within a 64-bit -/// value. Indices correspond to element numbers in a v4i16. -static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) { - assert((FromIdx < 4) && (ToIdx < 4) && "Out of range chunk index specified!"); - const unsigned ShiftAmt = ToIdx * 16; - - // Replicate the source chunk to the destination position. - const uint64_t Chunk = getChunk(Imm, FromIdx) << ShiftAmt; - // Clear the destination chunk. - Imm &= ~(0xFFFFLL << ShiftAmt); - // Insert the replicated chunk. - return Imm | Chunk; -} - -/// \brief Helper function which tries to materialize a 64-bit value with an -/// ORR + MOVK instruction sequence. -static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const ARM64InstrInfo *TII, unsigned ChunkIdx) { - assert(ChunkIdx < 4 && "Out of range chunk index specified!"); - const unsigned ShiftAmt = ChunkIdx * 16; - - uint64_t Encoding; - if (ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) { - // Create the ORR-immediate instruction. - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri)) - .addOperand(MI.getOperand(0)) - .addReg(ARM64::XZR) - .addImm(Encoding); - - // Create the MOVK instruction. - const unsigned Imm16 = getChunk(UImm, ChunkIdx); - const unsigned DstReg = MI.getOperand(0).getReg(); - const bool DstIsDead = MI.getOperand(0).isDead(); - MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi)) - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg) - .addImm(Imm16) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt)); - - transferImpOps(MI, MIB, MIB1); - MI.eraseFromParent(); - return true; - } - - return false; -} - -/// \brief Check whether the given 16-bit chunk replicated to full 64-bit width -/// can be materialized with an ORR instruction. -static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) { - Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk; - - return ARM64_AM::processLogicalImmediate(Chunk, 64, Encoding); -} - -/// \brief Check for identical 16-bit chunks within the constant and if so -/// materialize them with a single ORR instruction. The remaining one or two -/// 16-bit chunks will be materialized with MOVK instructions. -/// -/// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order -/// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with -/// an ORR instruction. -/// -static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const ARM64InstrInfo *TII) { - typedef DenseMap CountMap; - CountMap Counts; - - // Scan the constant and count how often every chunk occurs. - for (unsigned Idx = 0; Idx < 4; ++Idx) - ++Counts[getChunk(UImm, Idx)]; - - // Traverse the chunks to find one which occurs more than once. - for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end(); - Chunk != End; ++Chunk) { - const uint64_t ChunkVal = Chunk->first; - const unsigned Count = Chunk->second; - - uint64_t Encoding = 0; - - // We are looking for chunks which have two or three instances and can be - // materialized with an ORR instruction. - if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding)) - continue; - - const bool CountThree = Count == 3; - // Create the ORR-immediate instruction. - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri)) - .addOperand(MI.getOperand(0)) - .addReg(ARM64::XZR) - .addImm(Encoding); - - const unsigned DstReg = MI.getOperand(0).getReg(); - const bool DstIsDead = MI.getOperand(0).isDead(); - - unsigned ShiftAmt = 0; - uint64_t Imm16 = 0; - // Find the first chunk not materialized with the ORR instruction. - for (; ShiftAmt < 64; ShiftAmt += 16) { - Imm16 = (UImm >> ShiftAmt) & 0xFFFF; - - if (Imm16 != ChunkVal) - break; - } - - // Create the first MOVK instruction. - MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi)) - .addReg(DstReg, - RegState::Define | getDeadRegState(DstIsDead && CountThree)) - .addReg(DstReg) - .addImm(Imm16) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt)); - - // In case we have three instances the whole constant is now materialized - // and we can exit. - if (CountThree) { - transferImpOps(MI, MIB, MIB1); - MI.eraseFromParent(); - return true; - } - - // Find the remaining chunk which needs to be materialized. - for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) { - Imm16 = (UImm >> ShiftAmt) & 0xFFFF; - - if (Imm16 != ChunkVal) - break; - } - - // Create the second MOVK instruction. - MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi)) - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg) - .addImm(Imm16) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt)); - - transferImpOps(MI, MIB, MIB2); - MI.eraseFromParent(); - return true; - } - - return false; -} - -/// \brief Check whether this chunk matches the pattern '1...0...'. This pattern -/// starts a contiguous sequence of ones if we look at the bits from the LSB -/// towards the MSB. -static bool isStartChunk(uint64_t Chunk) { - if (Chunk == 0 || Chunk == UINT64_MAX) - return false; - - return (CountLeadingOnes_64(Chunk) + countTrailingZeros(Chunk)) == 64; -} - -/// \brief Check whether this chunk matches the pattern '0...1...' This pattern -/// ends a contiguous sequence of ones if we look at the bits from the LSB -/// towards the MSB. -static bool isEndChunk(uint64_t Chunk) { - if (Chunk == 0 || Chunk == UINT64_MAX) - return false; - - return (countLeadingZeros(Chunk) + CountTrailingOnes_64(Chunk)) == 64; -} - -/// \brief Clear or set all bits in the chunk at the given index. -static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) { - const uint64_t Mask = 0xFFFF; - - if (Clear) - // Clear chunk in the immediate. - Imm &= ~(Mask << (Idx * 16)); - else - // Set all bits in the immediate for the particular chunk. - Imm |= Mask << (Idx * 16); - - return Imm; -} - -/// \brief Check whether the constant contains a sequence of contiguous ones, -/// which might be interrupted by one or two chunks. If so, materialize the -/// sequence of contiguous ones with an ORR instruction. -/// Materialize the chunks which are either interrupting the sequence or outside -/// of the sequence with a MOVK instruction. -/// -/// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk -/// which ends the sequence (0...1...). Then we are looking for constants which -/// contain at least one S and E chunk. -/// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|. -/// -/// We are also looking for constants like |S|A|B|E| where the contiguous -/// sequence of ones wraps around the MSB into the LSB. -/// -static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const ARM64InstrInfo *TII) { - const int NotSet = -1; - const uint64_t Mask = 0xFFFF; - - int StartIdx = NotSet; - int EndIdx = NotSet; - // Try to find the chunks which start/end a contiguous sequence of ones. - for (int Idx = 0; Idx < 4; ++Idx) { - int64_t Chunk = getChunk(UImm, Idx); - // Sign extend the 16-bit chunk to 64-bit. - Chunk = (Chunk << 48) >> 48; - - if (isStartChunk(Chunk)) - StartIdx = Idx; - else if (isEndChunk(Chunk)) - EndIdx = Idx; - } - - // Early exit in case we can't find a start/end chunk. - if (StartIdx == NotSet || EndIdx == NotSet) - return false; - - // Outside of the contiguous sequence of ones everything needs to be zero. - uint64_t Outside = 0; - // Chunks between the start and end chunk need to have all their bits set. - uint64_t Inside = Mask; - - // If our contiguous sequence of ones wraps around from the MSB into the LSB, - // just swap indices and pretend we are materializing a contiguous sequence - // of zeros surrounded by a contiguous sequence of ones. - if (StartIdx > EndIdx) { - std::swap(StartIdx, EndIdx); - std::swap(Outside, Inside); - } - - uint64_t OrrImm = UImm; - int FirstMovkIdx = NotSet; - int SecondMovkIdx = NotSet; - - // Find out which chunks we need to patch up to obtain a contiguous sequence - // of ones. - for (int Idx = 0; Idx < 4; ++Idx) { - const uint64_t Chunk = getChunk(UImm, Idx); - - // Check whether we are looking at a chunk which is not part of the - // contiguous sequence of ones. - if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) { - OrrImm = updateImm(OrrImm, Idx, Outside == 0); - - // Remember the index we need to patch. - if (FirstMovkIdx == NotSet) - FirstMovkIdx = Idx; - else - SecondMovkIdx = Idx; - - // Check whether we are looking a chunk which is part of the contiguous - // sequence of ones. - } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) { - OrrImm = updateImm(OrrImm, Idx, Inside != Mask); - - // Remember the index we need to patch. - if (FirstMovkIdx == NotSet) - FirstMovkIdx = Idx; - else - SecondMovkIdx = Idx; - } - } - assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!"); - - // Create the ORR-immediate instruction. - uint64_t Encoding = 0; - ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding); - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri)) - .addOperand(MI.getOperand(0)) - .addReg(ARM64::XZR) - .addImm(Encoding); - - const unsigned DstReg = MI.getOperand(0).getReg(); - const bool DstIsDead = MI.getOperand(0).isDead(); - - const bool SingleMovk = SecondMovkIdx == NotSet; - // Create the first MOVK instruction. - MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi)) - .addReg(DstReg, - RegState::Define | getDeadRegState(DstIsDead && SingleMovk)) - .addReg(DstReg) - .addImm(getChunk(UImm, FirstMovkIdx)) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, FirstMovkIdx * 16)); - - // Early exit in case we only need to emit a single MOVK instruction. - if (SingleMovk) { - transferImpOps(MI, MIB, MIB1); - MI.eraseFromParent(); - return true; - } - - // Create the second MOVK instruction. - MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi)) - .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg) - .addImm(getChunk(UImm, SecondMovkIdx)) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, SecondMovkIdx * 16)); - - transferImpOps(MI, MIB, MIB2); - MI.eraseFromParent(); - return true; -} - -/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more -/// real move-immediate instructions to synthesize the immediate. -bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned BitSize) { - MachineInstr &MI = *MBBI; - uint64_t Imm = MI.getOperand(1).getImm(); - const unsigned Mask = 0xFFFF; - - // Try a MOVI instruction (aka ORR-immediate with the zero register). - uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize); - uint64_t Encoding; - if (ARM64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) { - unsigned Opc = (BitSize == 32 ? ARM64::ORRWri : ARM64::ORRXri); - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) - .addOperand(MI.getOperand(0)) - .addReg(BitSize == 32 ? ARM64::WZR : ARM64::XZR) - .addImm(Encoding); - transferImpOps(MI, MIB, MIB); - MI.eraseFromParent(); - return true; - } - - // Scan the immediate and count the number of 16-bit chunks which are either - // all ones or all zeros. - unsigned OneChunks = 0; - unsigned ZeroChunks = 0; - for (unsigned Shift = 0; Shift < BitSize; Shift += 16) { - const unsigned Chunk = (Imm >> Shift) & Mask; - if (Chunk == Mask) - OneChunks++; - else if (Chunk == 0) - ZeroChunks++; - } - - // Since we can't materialize the constant with a single ORR instruction, - // let's see whether we can materialize 3/4 of the constant with an ORR - // instruction and use an additional MOVK instruction to materialize the - // remaining 1/4. - // - // We are looking for constants with a pattern like: |A|X|B|X| or |X|A|X|B|. - // - // E.g. assuming |A|X|A|X| is a pattern which can be materialized with ORR, - // we would create the following instruction sequence: - // - // ORR x0, xzr, |A|X|A|X| - // MOVK x0, |B|, LSL #16 - // - // Only look at 64-bit constants which can't be materialized with a single - // instruction e.g. which have less than either three all zero or all one - // chunks. - // - // Ignore 32-bit constants here, they always can be materialized with a - // MOVZ/MOVN + MOVK pair. Since the 32-bit constant can't be materialized - // with a single ORR, the best sequence we can achieve is a ORR + MOVK pair. - // Thus we fall back to the default code below which in the best case creates - // a single MOVZ/MOVN instruction (in case one chunk is all zero or all one). - // - if (BitSize == 64 && OneChunks < 3 && ZeroChunks < 3) { - // If we interpret the 64-bit constant as a v4i16, are elements 0 and 2 - // identical? - if (getChunk(UImm, 0) == getChunk(UImm, 2)) { - // See if we can come up with a constant which can be materialized with - // ORR-immediate by replicating element 3 into element 1. - uint64_t OrrImm = replicateChunk(UImm, 3, 1); - if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 1)) - return true; - - // See if we can come up with a constant which can be materialized with - // ORR-immediate by replicating element 1 into element 3. - OrrImm = replicateChunk(UImm, 1, 3); - if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 3)) - return true; - - // If we interpret the 64-bit constant as a v4i16, are elements 1 and 3 - // identical? - } else if (getChunk(UImm, 1) == getChunk(UImm, 3)) { - // See if we can come up with a constant which can be materialized with - // ORR-immediate by replicating element 2 into element 0. - uint64_t OrrImm = replicateChunk(UImm, 2, 0); - if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 0)) - return true; - - // See if we can come up with a constant which can be materialized with - // ORR-immediate by replicating element 1 into element 3. - OrrImm = replicateChunk(UImm, 0, 2); - if (tryOrrMovk(UImm, OrrImm, MI, MBB, MBBI, TII, 2)) - return true; - } - } - - // Check for identical 16-bit chunks within the constant and if so materialize - // them with a single ORR instruction. The remaining one or two 16-bit chunks - // will be materialized with MOVK instructions. - if (BitSize == 64 && tryToreplicateChunks(UImm, MI, MBB, MBBI, TII)) - return true; - - // Check whether the constant contains a sequence of contiguous ones, which - // might be interrupted by one or two chunks. If so, materialize the sequence - // of contiguous ones with an ORR instruction. Materialize the chunks which - // are either interrupting the sequence or outside of the sequence with a - // MOVK instruction. - if (BitSize == 64 && trySequenceOfOnes(UImm, MI, MBB, MBBI, TII)) - return true; - - // Use a MOVZ or MOVN instruction to set the high bits, followed by one or - // more MOVK instructions to insert additional 16-bit portions into the - // lower bits. - bool isNeg = false; - - // Use MOVN to materialize the high bits if we have more all one chunks - // than all zero chunks. - if (OneChunks > ZeroChunks) { - isNeg = true; - Imm = ~Imm; - } - - unsigned FirstOpc; - if (BitSize == 32) { - Imm &= (1LL << 32) - 1; - FirstOpc = (isNeg ? ARM64::MOVNWi : ARM64::MOVZWi); - } else { - FirstOpc = (isNeg ? ARM64::MOVNXi : ARM64::MOVZXi); - } - unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN - unsigned LastShift = 0; // LSL amount for last MOVK - if (Imm != 0) { - unsigned LZ = countLeadingZeros(Imm); - unsigned TZ = countTrailingZeros(Imm); - Shift = ((63 - LZ) / 16) * 16; - LastShift = (TZ / 16) * 16; - } - unsigned Imm16 = (Imm >> Shift) & Mask; - unsigned DstReg = MI.getOperand(0).getReg(); - bool DstIsDead = MI.getOperand(0).isDead(); - MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(FirstOpc)) - .addReg(DstReg, RegState::Define | - getDeadRegState(DstIsDead && Shift == LastShift)) - .addImm(Imm16) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift)); - - // If a MOVN was used for the high bits of a negative value, flip the rest - // of the bits back for use with MOVK. - if (isNeg) - Imm = ~Imm; - - if (Shift == LastShift) { - transferImpOps(MI, MIB1, MIB1); - MI.eraseFromParent(); - return true; - } - - MachineInstrBuilder MIB2; - unsigned Opc = (BitSize == 32 ? ARM64::MOVKWi : ARM64::MOVKXi); - while (Shift != LastShift) { - Shift -= 16; - Imm16 = (Imm >> Shift) & Mask; - if (Imm16 == (isNeg ? Mask : 0)) - continue; // This 16-bit portion is already set correctly. - MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(DstIsDead && Shift == LastShift)) - .addReg(DstReg) - .addImm(Imm16) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift)); - } - - transferImpOps(MI, MIB1, MIB2); - MI.eraseFromParent(); - return true; -} - -/// \brief If MBBI references a pseudo instruction that should be expanded here, -/// do the expansion and return true. Otherwise return false. -bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI) { - MachineInstr &MI = *MBBI; - unsigned Opcode = MI.getOpcode(); - switch (Opcode) { - default: - break; - - case ARM64::ADDWrr: - case ARM64::SUBWrr: - case ARM64::ADDXrr: - case ARM64::SUBXrr: - case ARM64::ADDSWrr: - case ARM64::SUBSWrr: - case ARM64::ADDSXrr: - case ARM64::SUBSXrr: - case ARM64::ANDWrr: - case ARM64::ANDXrr: - case ARM64::BICWrr: - case ARM64::BICXrr: - case ARM64::EONWrr: - case ARM64::EONXrr: - case ARM64::EORWrr: - case ARM64::EORXrr: - case ARM64::ORNWrr: - case ARM64::ORNXrr: - case ARM64::ORRWrr: - case ARM64::ORRXrr: { - unsigned Opcode; - switch (MI.getOpcode()) { - default: - return false; - case ARM64::ADDWrr: Opcode = ARM64::ADDWrs; break; - case ARM64::SUBWrr: Opcode = ARM64::SUBWrs; break; - case ARM64::ADDXrr: Opcode = ARM64::ADDXrs; break; - case ARM64::SUBXrr: Opcode = ARM64::SUBXrs; break; - case ARM64::ADDSWrr: Opcode = ARM64::ADDSWrs; break; - case ARM64::SUBSWrr: Opcode = ARM64::SUBSWrs; break; - case ARM64::ADDSXrr: Opcode = ARM64::ADDSXrs; break; - case ARM64::SUBSXrr: Opcode = ARM64::SUBSXrs; break; - case ARM64::ANDWrr: Opcode = ARM64::ANDWrs; break; - case ARM64::ANDXrr: Opcode = ARM64::ANDXrs; break; - case ARM64::BICWrr: Opcode = ARM64::BICWrs; break; - case ARM64::BICXrr: Opcode = ARM64::BICXrs; break; - case ARM64::EONWrr: Opcode = ARM64::EONWrs; break; - case ARM64::EONXrr: Opcode = ARM64::EONXrs; break; - case ARM64::EORWrr: Opcode = ARM64::EORWrs; break; - case ARM64::EORXrr: Opcode = ARM64::EORXrs; break; - case ARM64::ORNWrr: Opcode = ARM64::ORNWrs; break; - case ARM64::ORNXrr: Opcode = ARM64::ORNXrs; break; - case ARM64::ORRWrr: Opcode = ARM64::ORRWrs; break; - case ARM64::ORRXrr: Opcode = ARM64::ORRXrs; break; - } - MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode), - MI.getOperand(0).getReg()) - .addOperand(MI.getOperand(1)) - .addOperand(MI.getOperand(2)) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)); - transferImpOps(MI, MIB1, MIB1); - MI.eraseFromParent(); - return true; - } - - case ARM64::FCVTSHpseudo: { - MachineOperand Src = MI.getOperand(1); - Src.setImplicit(); - unsigned SrcH = TII->getRegisterInfo().getSubReg(Src.getReg(), ARM64::hsub); - auto MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::FCVTSHr)) - .addOperand(MI.getOperand(0)) - .addReg(SrcH, RegState::Undef) - .addOperand(Src); - transferImpOps(MI, MIB, MIB); - MI.eraseFromParent(); - return true; - } - case ARM64::LOADgot: { - // Expand into ADRP + LDR. - unsigned DstReg = MI.getOperand(0).getReg(); - const MachineOperand &MO1 = MI.getOperand(1); - unsigned Flags = MO1.getTargetFlags(); - MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg); - MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::LDRXui)) - .addOperand(MI.getOperand(0)) - .addReg(DstReg); - - if (MO1.isGlobal()) { - MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | ARM64II::MO_PAGE); - MIB2.addGlobalAddress(MO1.getGlobal(), 0, - Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC); - } else if (MO1.isSymbol()) { - MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | ARM64II::MO_PAGE); - MIB2.addExternalSymbol(MO1.getSymbolName(), - Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC); - } else { - assert(MO1.isCPI() && - "Only expect globals, externalsymbols, or constant pools"); - MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), - Flags | ARM64II::MO_PAGE); - MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), - Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC); - } - - transferImpOps(MI, MIB1, MIB2); - MI.eraseFromParent(); - return true; - } - - case ARM64::MOVaddr: - case ARM64::MOVaddrJT: - case ARM64::MOVaddrCP: - case ARM64::MOVaddrBA: - case ARM64::MOVaddrTLS: - case ARM64::MOVaddrEXT: { - // Expand into ADRP + ADD. - unsigned DstReg = MI.getOperand(0).getReg(); - MachineInstrBuilder MIB1 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg) - .addOperand(MI.getOperand(1)); - - MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADDXri)) - .addOperand(MI.getOperand(0)) - .addReg(DstReg) - .addOperand(MI.getOperand(2)) - .addImm(0); - - transferImpOps(MI, MIB1, MIB2); - MI.eraseFromParent(); - return true; - } - - case ARM64::MOVi32imm: - return expandMOVImm(MBB, MBBI, 32); - case ARM64::MOVi64imm: - return expandMOVImm(MBB, MBBI, 64); - case ARM64::RET_ReallyLR: - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::RET)) - .addReg(ARM64::LR); - MI.eraseFromParent(); - return true; - } - return false; -} - -/// \brief Iterate over the instructions in basic block MBB and expand any -/// pseudo instructions. Return true if anything was modified. -bool ARM64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { - bool Modified = false; - - MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - while (MBBI != E) { - MachineBasicBlock::iterator NMBBI = std::next(MBBI); - Modified |= expandMI(MBB, MBBI); - MBBI = NMBBI; - } - - return Modified; -} - -bool ARM64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { - TII = static_cast(MF.getTarget().getInstrInfo()); - - bool Modified = false; - for (auto &MBB : MF) - Modified |= expandMBB(MBB); - return Modified; -} - -/// \brief Returns an instance of the pseudo instruction expansion pass. -FunctionPass *llvm::createARM64ExpandPseudoPass() { - return new ARM64ExpandPseudo(); -} diff --git a/lib/Target/ARM64/ARM64FastISel.cpp b/lib/Target/ARM64/ARM64FastISel.cpp deleted file mode 100644 index 51b0f76..0000000 --- a/lib/Target/ARM64/ARM64FastISel.cpp +++ /dev/null @@ -1,1929 +0,0 @@ -//===-- ARM6464FastISel.cpp - ARM64 FastISel implementation ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the ARM64-specific support for the FastISel class. Some -// of the target-specific code is generated by tablegen in the file -// ARM64GenFastISel.inc, which is #included here. -// -//===----------------------------------------------------------------------===// - -#include "ARM64.h" -#include "ARM64TargetMachine.h" -#include "ARM64Subtarget.h" -#include "ARM64CallingConv.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/FastISel.h" -#include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GetElementPtrTypeIterator.h" -#include "llvm/IR/GlobalAlias.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Operator.h" -#include "llvm/Support/CommandLine.h" -using namespace llvm; - -namespace { - -class ARM64FastISel : public FastISel { - - class Address { - public: - typedef enum { - RegBase, - FrameIndexBase - } BaseKind; - - private: - BaseKind Kind; - union { - unsigned Reg; - int FI; - } Base; - int64_t Offset; - - public: - Address() : Kind(RegBase), Offset(0) { Base.Reg = 0; } - void setKind(BaseKind K) { Kind = K; } - BaseKind getKind() const { return Kind; } - bool isRegBase() const { return Kind == RegBase; } - bool isFIBase() const { return Kind == FrameIndexBase; } - void setReg(unsigned Reg) { - assert(isRegBase() && "Invalid base register access!"); - Base.Reg = Reg; - } - unsigned getReg() const { - assert(isRegBase() && "Invalid base register access!"); - return Base.Reg; - } - void setFI(unsigned FI) { - assert(isFIBase() && "Invalid base frame index access!"); - Base.FI = FI; - } - unsigned getFI() const { - assert(isFIBase() && "Invalid base frame index access!"); - return Base.FI; - } - void setOffset(int64_t O) { Offset = O; } - int64_t getOffset() { return Offset; } - - bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); } - }; - - /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can - /// make the right decision when generating code for different targets. - const ARM64Subtarget *Subtarget; - LLVMContext *Context; - -private: - // Selection routines. - bool SelectLoad(const Instruction *I); - bool SelectStore(const Instruction *I); - bool SelectBranch(const Instruction *I); - bool SelectIndirectBr(const Instruction *I); - bool SelectCmp(const Instruction *I); - bool SelectSelect(const Instruction *I); - bool SelectFPExt(const Instruction *I); - bool SelectFPTrunc(const Instruction *I); - bool SelectFPToInt(const Instruction *I, bool Signed); - bool SelectIntToFP(const Instruction *I, bool Signed); - bool SelectRem(const Instruction *I, unsigned ISDOpcode); - bool SelectCall(const Instruction *I, const char *IntrMemName); - bool SelectIntrinsicCall(const IntrinsicInst &I); - bool SelectRet(const Instruction *I); - bool SelectTrunc(const Instruction *I); - bool SelectIntExt(const Instruction *I); - bool SelectMul(const Instruction *I); - - // Utility helper routines. - bool isTypeLegal(Type *Ty, MVT &VT); - bool isLoadStoreTypeLegal(Type *Ty, MVT &VT); - bool ComputeAddress(const Value *Obj, Address &Addr); - bool SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor, - bool UseUnscaled); - void AddLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, - unsigned Flags, bool UseUnscaled); - bool IsMemCpySmall(uint64_t Len, unsigned Alignment); - bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, - unsigned Alignment); - // Emit functions. - bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt); - bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, - bool UseUnscaled = false); - bool EmitStore(MVT VT, unsigned SrcReg, Address Addr, - bool UseUnscaled = false); - unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); - unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); - - unsigned ARM64MaterializeFP(const ConstantFP *CFP, MVT VT); - unsigned ARM64MaterializeGV(const GlobalValue *GV); - - // Call handling routines. -private: - CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; - bool ProcessCallArgs(SmallVectorImpl &Args, - SmallVectorImpl &ArgRegs, - SmallVectorImpl &ArgVTs, - SmallVectorImpl &ArgFlags, - SmallVectorImpl &RegArgs, CallingConv::ID CC, - unsigned &NumBytes); - bool FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, - const Instruction *I, CallingConv::ID CC, unsigned &NumBytes); - -public: - // Backend specific FastISel code. - virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); - virtual unsigned TargetMaterializeConstant(const Constant *C); - - explicit ARM64FastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo) - : FastISel(funcInfo, libInfo) { - Subtarget = &TM.getSubtarget(); - Context = &funcInfo.Fn->getContext(); - } - - virtual bool TargetSelectInstruction(const Instruction *I); - -#include "ARM64GenFastISel.inc" -}; - -} // end anonymous namespace - -#include "ARM64GenCallingConv.inc" - -CCAssignFn *ARM64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { - if (CC == CallingConv::WebKit_JS) - return CC_ARM64_WebKit_JS; - return Subtarget->isTargetDarwin() ? CC_ARM64_DarwinPCS : CC_ARM64_AAPCS; -} - -unsigned ARM64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) { - assert(TLI.getValueType(AI->getType(), true) == MVT::i64 && - "Alloca should always return a pointer."); - - // Don't handle dynamic allocas. - if (!FuncInfo.StaticAllocaMap.count(AI)) - return 0; - - DenseMap::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - - if (SI != FuncInfo.StaticAllocaMap.end()) { - unsigned ResultReg = createResultReg(&ARM64::GPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADDXri), - ResultReg) - .addFrameIndex(SI->second) - .addImm(0) - .addImm(0); - return ResultReg; - } - - return 0; -} - -unsigned ARM64FastISel::ARM64MaterializeFP(const ConstantFP *CFP, MVT VT) { - const APFloat Val = CFP->getValueAPF(); - bool is64bit = (VT == MVT::f64); - - // This checks to see if we can use FMOV instructions to materialize - // a constant, otherwise we have to materialize via the constant pool. - if (TLI.isFPImmLegal(Val, VT)) { - int Imm; - unsigned Opc; - if (is64bit) { - Imm = ARM64_AM::getFP64Imm(Val); - Opc = ARM64::FMOVDi; - } else { - Imm = ARM64_AM::getFP32Imm(Val); - Opc = ARM64::FMOVSi; - } - unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) - .addImm(Imm); - return ResultReg; - } - - // Materialize via constant pool. MachineConstantPool wants an explicit - // alignment. - unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); - if (Align == 0) - Align = DL.getTypeAllocSize(CFP->getType()); - - unsigned Idx = MCP.getConstantPoolIndex(cast(CFP), Align); - unsigned ADRPReg = createResultReg(&ARM64::GPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP), - ADRPReg).addConstantPoolIndex(Idx, 0, ARM64II::MO_PAGE); - - unsigned Opc = is64bit ? ARM64::LDRDui : ARM64::LDRSui; - unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) - .addReg(ADRPReg) - .addConstantPoolIndex(Idx, 0, ARM64II::MO_PAGEOFF | ARM64II::MO_NC); - return ResultReg; -} - -unsigned ARM64FastISel::ARM64MaterializeGV(const GlobalValue *GV) { - // We can't handle thread-local variables quickly yet. Unfortunately we have - // to peer through any aliases to find out if that rule applies. - const GlobalValue *TLSGV = GV; - if (const GlobalAlias *GA = dyn_cast(GV)) - TLSGV = GA->getAliasedGlobal(); - - if (const GlobalVariable *GVar = dyn_cast(TLSGV)) - if (GVar->isThreadLocal()) - return 0; - - unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); - - EVT DestEVT = TLI.getValueType(GV->getType(), true); - if (!DestEVT.isSimple()) - return 0; - MVT DestVT = DestEVT.getSimpleVT(); - - unsigned ADRPReg = createResultReg(&ARM64::GPR64RegClass); - unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); - - if (OpFlags & ARM64II::MO_GOT) { - // ADRP + LDRX - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP), - ADRPReg) - .addGlobalAddress(GV, 0, ARM64II::MO_GOT | ARM64II::MO_PAGE); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::LDRXui), - ResultReg) - .addReg(ADRPReg) - .addGlobalAddress(GV, 0, ARM64II::MO_GOT | ARM64II::MO_PAGEOFF | - ARM64II::MO_NC); - } else { - // ADRP + ADDX - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP), - ADRPReg).addGlobalAddress(GV, 0, ARM64II::MO_PAGE); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADDXri), - ResultReg) - .addReg(ADRPReg) - .addGlobalAddress(GV, 0, ARM64II::MO_PAGEOFF | ARM64II::MO_NC) - .addImm(0); - } - return ResultReg; -} - -unsigned ARM64FastISel::TargetMaterializeConstant(const Constant *C) { - EVT CEVT = TLI.getValueType(C->getType(), true); - - // Only handle simple types. - if (!CEVT.isSimple()) - return 0; - MVT VT = CEVT.getSimpleVT(); - - // FIXME: Handle ConstantInt. - if (const ConstantFP *CFP = dyn_cast(C)) - return ARM64MaterializeFP(CFP, VT); - else if (const GlobalValue *GV = dyn_cast(C)) - return ARM64MaterializeGV(GV); - - return 0; -} - -// Computes the address to get to an object. -bool ARM64FastISel::ComputeAddress(const Value *Obj, Address &Addr) { - const User *U = NULL; - unsigned Opcode = Instruction::UserOp1; - if (const Instruction *I = dyn_cast(Obj)) { - // Don't walk into other basic blocks unless the object is an alloca from - // another block, otherwise it may not have a virtual register assigned. - if (FuncInfo.StaticAllocaMap.count(static_cast(Obj)) || - FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { - Opcode = I->getOpcode(); - U = I; - } - } else if (const ConstantExpr *C = dyn_cast(Obj)) { - Opcode = C->getOpcode(); - U = C; - } - - if (const PointerType *Ty = dyn_cast(Obj->getType())) - if (Ty->getAddressSpace() > 255) - // Fast instruction selection doesn't support the special - // address spaces. - return false; - - switch (Opcode) { - default: - break; - case Instruction::BitCast: { - // Look through bitcasts. - return ComputeAddress(U->getOperand(0), Addr); - } - case Instruction::IntToPtr: { - // Look past no-op inttoptrs. - if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) - return ComputeAddress(U->getOperand(0), Addr); - break; - } - case Instruction::PtrToInt: { - // Look past no-op ptrtoints. - if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) - return ComputeAddress(U->getOperand(0), Addr); - break; - } - case Instruction::GetElementPtr: { - Address SavedAddr = Addr; - uint64_t TmpOffset = Addr.getOffset(); - - // Iterate through the GEP folding the constants into offsets where - // we can. - gep_type_iterator GTI = gep_type_begin(U); - for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; - ++i, ++GTI) { - const Value *Op = *i; - if (StructType *STy = dyn_cast(*GTI)) { - const StructLayout *SL = DL.getStructLayout(STy); - unsigned Idx = cast(Op)->getZExtValue(); - TmpOffset += SL->getElementOffset(Idx); - } else { - uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); - for (;;) { - if (const ConstantInt *CI = dyn_cast(Op)) { - // Constant-offset addressing. - TmpOffset += CI->getSExtValue() * S; - break; - } - if (canFoldAddIntoGEP(U, Op)) { - // A compatible add with a constant operand. Fold the constant. - ConstantInt *CI = - cast(cast(Op)->getOperand(1)); - TmpOffset += CI->getSExtValue() * S; - // Iterate on the other operand. - Op = cast(Op)->getOperand(0); - continue; - } - // Unsupported - goto unsupported_gep; - } - } - } - - // Try to grab the base operand now. - Addr.setOffset(TmpOffset); - if (ComputeAddress(U->getOperand(0), Addr)) - return true; - - // We failed, restore everything and try the other options. - Addr = SavedAddr; - - unsupported_gep: - break; - } - case Instruction::Alloca: { - const AllocaInst *AI = cast(Obj); - DenseMap::iterator SI = - FuncInfo.StaticAllocaMap.find(AI); - if (SI != FuncInfo.StaticAllocaMap.end()) { - Addr.setKind(Address::FrameIndexBase); - Addr.setFI(SI->second); - return true; - } - break; - } - } - - // Try to get this in a register if nothing else has worked. - if (!Addr.isValid()) - Addr.setReg(getRegForValue(Obj)); - return Addr.isValid(); -} - -bool ARM64FastISel::isTypeLegal(Type *Ty, MVT &VT) { - EVT evt = TLI.getValueType(Ty, true); - - // Only handle simple types. - if (evt == MVT::Other || !evt.isSimple()) - return false; - VT = evt.getSimpleVT(); - - // Handle all legal types, i.e. a register that will directly hold this - // value. - return TLI.isTypeLegal(VT); -} - -bool ARM64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) { - if (isTypeLegal(Ty, VT)) - return true; - - // If this is a type than can be sign or zero-extended to a basic operation - // go ahead and accept it now. For stores, this reflects truncation. - if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) - return true; - - return false; -} - -bool ARM64FastISel::SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor, - bool UseUnscaled) { - bool needsLowering = false; - int64_t Offset = Addr.getOffset(); - switch (VT.SimpleTy) { - default: - return false; - case MVT::i1: - case MVT::i8: - case MVT::i16: - case MVT::i32: - case MVT::i64: - case MVT::f32: - case MVT::f64: - if (!UseUnscaled) - // Using scaled, 12-bit, unsigned immediate offsets. - needsLowering = ((Offset & 0xfff) != Offset); - else - // Using unscaled, 9-bit, signed immediate offsets. - needsLowering = (Offset > 256 || Offset < -256); - break; - } - - // FIXME: If this is a stack pointer and the offset needs to be simplified - // then put the alloca address into a register, set the base type back to - // register and continue. This should almost never happen. - if (needsLowering && Addr.getKind() == Address::FrameIndexBase) { - return false; - } - - // Since the offset is too large for the load/store instruction get the - // reg+offset into a register. - if (needsLowering) { - uint64_t UnscaledOffset = Addr.getOffset() * ScaleFactor; - unsigned ResultReg = FastEmit_ri_(MVT::i64, ISD::ADD, Addr.getReg(), false, - UnscaledOffset, MVT::i64); - if (ResultReg == 0) - return false; - Addr.setReg(ResultReg); - Addr.setOffset(0); - } - return true; -} - -void ARM64FastISel::AddLoadStoreOperands(Address &Addr, - const MachineInstrBuilder &MIB, - unsigned Flags, bool UseUnscaled) { - int64_t Offset = Addr.getOffset(); - // Frame base works a bit differently. Handle it separately. - if (Addr.getKind() == Address::FrameIndexBase) { - int FI = Addr.getFI(); - // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size - // and alignment should be based on the VT. - MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( - MachinePointerInfo::getFixedStack(FI, Offset), Flags, - MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); - // Now add the rest of the operands. - MIB.addFrameIndex(FI).addImm(Offset).addMemOperand(MMO); - } else { - // Now add the rest of the operands. - MIB.addReg(Addr.getReg()); - MIB.addImm(Offset); - } -} - -bool ARM64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr, - bool UseUnscaled) { - // Negative offsets require unscaled, 9-bit, signed immediate offsets. - // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. - if (!UseUnscaled && Addr.getOffset() < 0) - UseUnscaled = true; - - unsigned Opc; - const TargetRegisterClass *RC; - bool VTIsi1 = false; - int64_t ScaleFactor = 0; - switch (VT.SimpleTy) { - default: - return false; - case MVT::i1: - VTIsi1 = true; - // Intentional fall-through. - case MVT::i8: - Opc = UseUnscaled ? ARM64::LDURBBi : ARM64::LDRBBui; - RC = &ARM64::GPR32RegClass; - ScaleFactor = 1; - break; - case MVT::i16: - Opc = UseUnscaled ? ARM64::LDURHHi : ARM64::LDRHHui; - RC = &ARM64::GPR32RegClass; - ScaleFactor = 2; - break; - case MVT::i32: - Opc = UseUnscaled ? ARM64::LDURWi : ARM64::LDRWui; - RC = &ARM64::GPR32RegClass; - ScaleFactor = 4; - break; - case MVT::i64: - Opc = UseUnscaled ? ARM64::LDURXi : ARM64::LDRXui; - RC = &ARM64::GPR64RegClass; - ScaleFactor = 8; - break; - case MVT::f32: - Opc = UseUnscaled ? ARM64::LDURSi : ARM64::LDRSui; - RC = TLI.getRegClassFor(VT); - ScaleFactor = 4; - break; - case MVT::f64: - Opc = UseUnscaled ? ARM64::LDURDi : ARM64::LDRDui; - RC = TLI.getRegClassFor(VT); - ScaleFactor = 8; - break; - } - // Scale the offset. - if (!UseUnscaled) { - int64_t Offset = Addr.getOffset(); - if (Offset & (ScaleFactor - 1)) - // Retry using an unscaled, 9-bit, signed immediate offset. - return EmitLoad(VT, ResultReg, Addr, /*UseUnscaled*/ true); - - Addr.setOffset(Offset / ScaleFactor); - } - - // Simplify this down to something we can handle. - if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled)) - return false; - - // Create the base instruction, then add the operands. - ResultReg = createResultReg(RC); - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(Opc), ResultReg); - AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, UseUnscaled); - - // Loading an i1 requires special handling. - if (VTIsi1) { - unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), - ANDReg) - .addReg(ResultReg) - .addImm(ARM64_AM::encodeLogicalImmediate(1, 32)); - ResultReg = ANDReg; - } - return true; -} - -bool ARM64FastISel::SelectLoad(const Instruction *I) { - MVT VT; - // Verify we have a legal type before going any further. Currently, we handle - // simple types that will directly fit in a register (i32/f32/i64/f64) or - // those that can be sign or zero-extended to a basic operation (i1/i8/i16). - if (!isLoadStoreTypeLegal(I->getType(), VT) || cast(I)->isAtomic()) - return false; - - // See if we can handle this address. - Address Addr; - if (!ComputeAddress(I->getOperand(0), Addr)) - return false; - - unsigned ResultReg; - if (!EmitLoad(VT, ResultReg, Addr)) - return false; - - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr, - bool UseUnscaled) { - // Negative offsets require unscaled, 9-bit, signed immediate offsets. - // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. - if (!UseUnscaled && Addr.getOffset() < 0) - UseUnscaled = true; - - unsigned StrOpc; - bool VTIsi1 = false; - int64_t ScaleFactor = 0; - // Using scaled, 12-bit, unsigned immediate offsets. - switch (VT.SimpleTy) { - default: - return false; - case MVT::i1: - VTIsi1 = true; - case MVT::i8: - StrOpc = UseUnscaled ? ARM64::STURBBi : ARM64::STRBBui; - ScaleFactor = 1; - break; - case MVT::i16: - StrOpc = UseUnscaled ? ARM64::STURHHi : ARM64::STRHHui; - ScaleFactor = 2; - break; - case MVT::i32: - StrOpc = UseUnscaled ? ARM64::STURWi : ARM64::STRWui; - ScaleFactor = 4; - break; - case MVT::i64: - StrOpc = UseUnscaled ? ARM64::STURXi : ARM64::STRXui; - ScaleFactor = 8; - break; - case MVT::f32: - StrOpc = UseUnscaled ? ARM64::STURSi : ARM64::STRSui; - ScaleFactor = 4; - break; - case MVT::f64: - StrOpc = UseUnscaled ? ARM64::STURDi : ARM64::STRDui; - ScaleFactor = 8; - break; - } - // Scale the offset. - if (!UseUnscaled) { - int64_t Offset = Addr.getOffset(); - if (Offset & (ScaleFactor - 1)) - // Retry using an unscaled, 9-bit, signed immediate offset. - return EmitStore(VT, SrcReg, Addr, /*UseUnscaled*/ true); - - Addr.setOffset(Offset / ScaleFactor); - } - - // Simplify this down to something we can handle. - if (!SimplifyAddress(Addr, VT, UseUnscaled ? 1 : ScaleFactor, UseUnscaled)) - return false; - - // Storing an i1 requires special handling. - if (VTIsi1) { - unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), - ANDReg) - .addReg(SrcReg) - .addImm(ARM64_AM::encodeLogicalImmediate(1, 32)); - SrcReg = ANDReg; - } - // Create the base instruction, then add the operands. - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(StrOpc)).addReg(SrcReg); - AddLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, UseUnscaled); - return true; -} - -bool ARM64FastISel::SelectStore(const Instruction *I) { - MVT VT; - Value *Op0 = I->getOperand(0); - // Verify we have a legal type before going any further. Currently, we handle - // simple types that will directly fit in a register (i32/f32/i64/f64) or - // those that can be sign or zero-extended to a basic operation (i1/i8/i16). - if (!isLoadStoreTypeLegal(Op0->getType(), VT) || - cast(I)->isAtomic()) - return false; - - // Get the value to be stored into a register. - unsigned SrcReg = getRegForValue(Op0); - if (SrcReg == 0) - return false; - - // See if we can handle this address. - Address Addr; - if (!ComputeAddress(I->getOperand(1), Addr)) - return false; - - if (!EmitStore(VT, SrcReg, Addr)) - return false; - return true; -} - -static ARM64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { - switch (Pred) { - case CmpInst::FCMP_ONE: - case CmpInst::FCMP_UEQ: - default: - // AL is our "false" for now. The other two need more compares. - return ARM64CC::AL; - case CmpInst::ICMP_EQ: - case CmpInst::FCMP_OEQ: - return ARM64CC::EQ; - case CmpInst::ICMP_SGT: - case CmpInst::FCMP_OGT: - return ARM64CC::GT; - case CmpInst::ICMP_SGE: - case CmpInst::FCMP_OGE: - return ARM64CC::GE; - case CmpInst::ICMP_UGT: - case CmpInst::FCMP_UGT: - return ARM64CC::HI; - case CmpInst::FCMP_OLT: - return ARM64CC::MI; - case CmpInst::ICMP_ULE: - case CmpInst::FCMP_OLE: - return ARM64CC::LS; - case CmpInst::FCMP_ORD: - return ARM64CC::VC; - case CmpInst::FCMP_UNO: - return ARM64CC::VS; - case CmpInst::FCMP_UGE: - return ARM64CC::PL; - case CmpInst::ICMP_SLT: - case CmpInst::FCMP_ULT: - return ARM64CC::LT; - case CmpInst::ICMP_SLE: - case CmpInst::FCMP_ULE: - return ARM64CC::LE; - case CmpInst::FCMP_UNE: - case CmpInst::ICMP_NE: - return ARM64CC::NE; - case CmpInst::ICMP_UGE: - return ARM64CC::CS; - case CmpInst::ICMP_ULT: - return ARM64CC::CC; - } -} - -bool ARM64FastISel::SelectBranch(const Instruction *I) { - const BranchInst *BI = cast(I); - MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; - MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; - - if (const CmpInst *CI = dyn_cast(BI->getCondition())) { - if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { - // We may not handle every CC for now. - ARM64CC::CondCode CC = getCompareCC(CI->getPredicate()); - if (CC == ARM64CC::AL) - return false; - - // Emit the cmp. - if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) - return false; - - // Emit the branch. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc)) - .addImm(CC) - .addMBB(TBB); - FuncInfo.MBB->addSuccessor(TBB); - - FastEmitBranch(FBB, DbgLoc); - return true; - } - } else if (TruncInst *TI = dyn_cast(BI->getCondition())) { - MVT SrcVT; - if (TI->hasOneUse() && TI->getParent() == I->getParent() && - (isLoadStoreTypeLegal(TI->getOperand(0)->getType(), SrcVT))) { - unsigned CondReg = getRegForValue(TI->getOperand(0)); - if (CondReg == 0) - return false; - - // Issue an extract_subreg to get the lower 32-bits. - if (SrcVT == MVT::i64) - CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true, - ARM64::sub_32); - - unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), - ANDReg) - .addReg(CondReg) - .addImm(ARM64_AM::encodeLogicalImmediate(1, 32)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri)) - .addReg(ANDReg) - .addReg(ANDReg) - .addImm(0) - .addImm(0); - - unsigned CC = ARM64CC::NE; - if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { - std::swap(TBB, FBB); - CC = ARM64CC::EQ; - } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc)) - .addImm(CC) - .addMBB(TBB); - FuncInfo.MBB->addSuccessor(TBB); - FastEmitBranch(FBB, DbgLoc); - return true; - } - } else if (const ConstantInt *CI = - dyn_cast(BI->getCondition())) { - uint64_t Imm = CI->getZExtValue(); - MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::B)) - .addMBB(Target); - FuncInfo.MBB->addSuccessor(Target); - return true; - } - - unsigned CondReg = getRegForValue(BI->getCondition()); - if (CondReg == 0) - return false; - - // We've been divorced from our compare! Our block was split, and - // now our compare lives in a predecessor block. We musn't - // re-compare here, as the children of the compare aren't guaranteed - // live across the block boundary (we *could* check for this). - // Regardless, the compare has been done in the predecessor block, - // and it left a value for us in a virtual register. Ergo, we test - // the one-bit value left in the virtual register. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri), - ARM64::WZR) - .addReg(CondReg) - .addImm(0) - .addImm(0); - - unsigned CC = ARM64CC::NE; - if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { - std::swap(TBB, FBB); - CC = ARM64CC::EQ; - } - - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc)) - .addImm(CC) - .addMBB(TBB); - FuncInfo.MBB->addSuccessor(TBB); - FastEmitBranch(FBB, DbgLoc); - return true; -} - -bool ARM64FastISel::SelectIndirectBr(const Instruction *I) { - const IndirectBrInst *BI = cast(I); - unsigned AddrReg = getRegForValue(BI->getOperand(0)); - if (AddrReg == 0) - return false; - - // Emit the indirect branch. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BR)) - .addReg(AddrReg); - - // Make sure the CFG is up-to-date. - for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i) - FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]); - - return true; -} - -bool ARM64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) { - Type *Ty = Src1Value->getType(); - EVT SrcEVT = TLI.getValueType(Ty, true); - if (!SrcEVT.isSimple()) - return false; - MVT SrcVT = SrcEVT.getSimpleVT(); - - // Check to see if the 2nd operand is a constant that we can encode directly - // in the compare. - uint64_t Imm; - bool UseImm = false; - bool isNegativeImm = false; - if (const ConstantInt *ConstInt = dyn_cast(Src2Value)) { - if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 || - SrcVT == MVT::i8 || SrcVT == MVT::i1) { - const APInt &CIVal = ConstInt->getValue(); - - Imm = (isZExt) ? CIVal.getZExtValue() : CIVal.getSExtValue(); - if (CIVal.isNegative()) { - isNegativeImm = true; - Imm = -Imm; - } - // FIXME: We can handle more immediates using shifts. - UseImm = ((Imm & 0xfff) == Imm); - } - } else if (const ConstantFP *ConstFP = dyn_cast(Src2Value)) { - if (SrcVT == MVT::f32 || SrcVT == MVT::f64) - if (ConstFP->isZero() && !ConstFP->isNegative()) - UseImm = true; - } - - unsigned ZReg; - unsigned CmpOpc; - bool isICmp = true; - bool needsExt = false; - switch (SrcVT.SimpleTy) { - default: - return false; - case MVT::i1: - case MVT::i8: - case MVT::i16: - needsExt = true; - // Intentional fall-through. - case MVT::i32: - ZReg = ARM64::WZR; - if (UseImm) - CmpOpc = isNegativeImm ? ARM64::ADDSWri : ARM64::SUBSWri; - else - CmpOpc = ARM64::SUBSWrr; - break; - case MVT::i64: - ZReg = ARM64::XZR; - if (UseImm) - CmpOpc = isNegativeImm ? ARM64::ADDSXri : ARM64::SUBSXri; - else - CmpOpc = ARM64::SUBSXrr; - break; - case MVT::f32: - isICmp = false; - CmpOpc = UseImm ? ARM64::FCMPSri : ARM64::FCMPSrr; - break; - case MVT::f64: - isICmp = false; - CmpOpc = UseImm ? ARM64::FCMPDri : ARM64::FCMPDrr; - break; - } - - unsigned SrcReg1 = getRegForValue(Src1Value); - if (SrcReg1 == 0) - return false; - - unsigned SrcReg2; - if (!UseImm) { - SrcReg2 = getRegForValue(Src2Value); - if (SrcReg2 == 0) - return false; - } - - // We have i1, i8, or i16, we need to either zero extend or sign extend. - if (needsExt) { - SrcReg1 = EmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt); - if (SrcReg1 == 0) - return false; - if (!UseImm) { - SrcReg2 = EmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt); - if (SrcReg2 == 0) - return false; - } - } - - if (isICmp) { - if (UseImm) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) - .addReg(ZReg) - .addReg(SrcReg1) - .addImm(Imm) - .addImm(0); - else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) - .addReg(ZReg) - .addReg(SrcReg1) - .addReg(SrcReg2); - } else { - if (UseImm) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) - .addReg(SrcReg1); - else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) - .addReg(SrcReg1) - .addReg(SrcReg2); - } - return true; -} - -bool ARM64FastISel::SelectCmp(const Instruction *I) { - const CmpInst *CI = cast(I); - - // We may not handle every CC for now. - ARM64CC::CondCode CC = getCompareCC(CI->getPredicate()); - if (CC == ARM64CC::AL) - return false; - - // Emit the cmp. - if (!EmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) - return false; - - // Now set a register based on the comparison. - ARM64CC::CondCode invertedCC = getInvertedCondCode(CC); - unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::CSINCWr), - ResultReg) - .addReg(ARM64::WZR) - .addReg(ARM64::WZR) - .addImm(invertedCC); - - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::SelectSelect(const Instruction *I) { - const SelectInst *SI = cast(I); - - EVT DestEVT = TLI.getValueType(SI->getType(), true); - if (!DestEVT.isSimple()) - return false; - - MVT DestVT = DestEVT.getSimpleVT(); - if (DestVT != MVT::i32 && DestVT != MVT::i64 && DestVT != MVT::f32 && - DestVT != MVT::f64) - return false; - - unsigned CondReg = getRegForValue(SI->getCondition()); - if (CondReg == 0) - return false; - unsigned TrueReg = getRegForValue(SI->getTrueValue()); - if (TrueReg == 0) - return false; - unsigned FalseReg = getRegForValue(SI->getFalseValue()); - if (FalseReg == 0) - return false; - - unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), - ANDReg) - .addReg(CondReg) - .addImm(ARM64_AM::encodeLogicalImmediate(1, 32)); - - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri)) - .addReg(ANDReg) - .addReg(ANDReg) - .addImm(0) - .addImm(0); - - unsigned SelectOpc; - switch (DestVT.SimpleTy) { - default: - return false; - case MVT::i32: - SelectOpc = ARM64::CSELWr; - break; - case MVT::i64: - SelectOpc = ARM64::CSELXr; - break; - case MVT::f32: - SelectOpc = ARM64::FCSELSrrr; - break; - case MVT::f64: - SelectOpc = ARM64::FCSELDrrr; - break; - } - - unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SelectOpc), - ResultReg) - .addReg(TrueReg) - .addReg(FalseReg) - .addImm(ARM64CC::NE); - - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::SelectFPExt(const Instruction *I) { - Value *V = I->getOperand(0); - if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) - return false; - - unsigned Op = getRegForValue(V); - if (Op == 0) - return false; - - unsigned ResultReg = createResultReg(&ARM64::FPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::FCVTDSr), - ResultReg).addReg(Op); - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::SelectFPTrunc(const Instruction *I) { - Value *V = I->getOperand(0); - if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) - return false; - - unsigned Op = getRegForValue(V); - if (Op == 0) - return false; - - unsigned ResultReg = createResultReg(&ARM64::FPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::FCVTSDr), - ResultReg).addReg(Op); - UpdateValueMap(I, ResultReg); - return true; -} - -// FPToUI and FPToSI -bool ARM64FastISel::SelectFPToInt(const Instruction *I, bool Signed) { - MVT DestVT; - if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) - return false; - - unsigned SrcReg = getRegForValue(I->getOperand(0)); - if (SrcReg == 0) - return false; - - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); - - unsigned Opc; - if (SrcVT == MVT::f64) { - if (Signed) - Opc = (DestVT == MVT::i32) ? ARM64::FCVTZSUWDr : ARM64::FCVTZSUXDr; - else - Opc = (DestVT == MVT::i32) ? ARM64::FCVTZUUWDr : ARM64::FCVTZUUXDr; - } else { - if (Signed) - Opc = (DestVT == MVT::i32) ? ARM64::FCVTZSUWSr : ARM64::FCVTZSUXSr; - else - Opc = (DestVT == MVT::i32) ? ARM64::FCVTZUUWSr : ARM64::FCVTZUUXSr; - } - unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) - .addReg(SrcReg); - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::SelectIntToFP(const Instruction *I, bool Signed) { - MVT DestVT; - if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) - return false; - - unsigned SrcReg = getRegForValue(I->getOperand(0)); - if (SrcReg == 0) - return false; - - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true); - - // Handle sign-extension. - if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { - SrcReg = - EmitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); - if (SrcReg == 0) - return false; - } - - unsigned Opc; - if (SrcVT == MVT::i64) { - if (Signed) - Opc = (DestVT == MVT::f32) ? ARM64::SCVTFUXSri : ARM64::SCVTFUXDri; - else - Opc = (DestVT == MVT::f32) ? ARM64::UCVTFUXSri : ARM64::UCVTFUXDri; - } else { - if (Signed) - Opc = (DestVT == MVT::f32) ? ARM64::SCVTFUWSri : ARM64::SCVTFUWDri; - else - Opc = (DestVT == MVT::f32) ? ARM64::UCVTFUWSri : ARM64::UCVTFUWDri; - } - - unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) - .addReg(SrcReg); - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::ProcessCallArgs(SmallVectorImpl &Args, - SmallVectorImpl &ArgRegs, - SmallVectorImpl &ArgVTs, - SmallVectorImpl &ArgFlags, - SmallVectorImpl &RegArgs, - CallingConv::ID CC, unsigned &NumBytes) { - SmallVector ArgLocs; - CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context); - CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); - - // Get a count of how many bytes are to be pushed on the stack. - NumBytes = CCInfo.getNextStackOffset(); - - // Issue CALLSEQ_START - unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) - .addImm(NumBytes); - - // Process the args. - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - unsigned Arg = ArgRegs[VA.getValNo()]; - MVT ArgVT = ArgVTs[VA.getValNo()]; - - // Handle arg promotion: SExt, ZExt, AExt. - switch (VA.getLocInfo()) { - case CCValAssign::Full: - break; - case CCValAssign::SExt: { - MVT DestVT = VA.getLocVT(); - MVT SrcVT = ArgVT; - Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ false); - if (Arg == 0) - return false; - ArgVT = DestVT; - break; - } - case CCValAssign::AExt: - // Intentional fall-through. - case CCValAssign::ZExt: { - MVT DestVT = VA.getLocVT(); - MVT SrcVT = ArgVT; - Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ true); - if (Arg == 0) - return false; - ArgVT = DestVT; - break; - } - default: - llvm_unreachable("Unknown arg promotion!"); - } - - // Now copy/store arg to correct locations. - if (VA.isRegLoc() && !VA.needsCustom()) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg); - RegArgs.push_back(VA.getLocReg()); - } else if (VA.needsCustom()) { - // FIXME: Handle custom args. - return false; - } else { - assert(VA.isMemLoc() && "Assuming store on stack."); - - // Need to store on the stack. - Address Addr; - Addr.setKind(Address::RegBase); - Addr.setReg(ARM64::SP); - Addr.setOffset(VA.getLocMemOffset()); - - if (!EmitStore(ArgVT, Arg, Addr)) - return false; - } - } - return true; -} - -bool ARM64FastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, - const Instruction *I, CallingConv::ID CC, - unsigned &NumBytes) { - // Issue CALLSEQ_END - unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) - .addImm(NumBytes) - .addImm(0); - - // Now the return value. - if (RetVT != MVT::isVoid) { - SmallVector RVLocs; - CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context); - CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); - - // Only handle a single return value. - if (RVLocs.size() != 1) - return false; - - // Copy all of the result registers out of their specified physreg. - MVT CopyVT = RVLocs[0].getValVT(); - unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), - ResultReg).addReg(RVLocs[0].getLocReg()); - UsedRegs.push_back(RVLocs[0].getLocReg()); - - // Finally update the result. - UpdateValueMap(I, ResultReg); - } - - return true; -} - -bool ARM64FastISel::SelectCall(const Instruction *I, - const char *IntrMemName = 0) { - const CallInst *CI = cast(I); - const Value *Callee = CI->getCalledValue(); - - // Don't handle inline asm or intrinsics. - if (isa(Callee)) - return false; - - // Only handle global variable Callees. - const GlobalValue *GV = dyn_cast(Callee); - if (!GV) - return false; - - // Check the calling convention. - ImmutableCallSite CS(CI); - CallingConv::ID CC = CS.getCallingConv(); - - // Let SDISel handle vararg functions. - PointerType *PT = cast(CS.getCalledValue()->getType()); - FunctionType *FTy = cast(PT->getElementType()); - if (FTy->isVarArg()) - return false; - - // Handle *simple* calls for now. - MVT RetVT; - Type *RetTy = I->getType(); - if (RetTy->isVoidTy()) - RetVT = MVT::isVoid; - else if (!isTypeLegal(RetTy, RetVT)) - return false; - - // Set up the argument vectors. - SmallVector Args; - SmallVector ArgRegs; - SmallVector ArgVTs; - SmallVector ArgFlags; - Args.reserve(CS.arg_size()); - ArgRegs.reserve(CS.arg_size()); - ArgVTs.reserve(CS.arg_size()); - ArgFlags.reserve(CS.arg_size()); - - for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); - i != e; ++i) { - // If we're lowering a memory intrinsic instead of a regular call, skip the - // last two arguments, which shouldn't be passed to the underlying function. - if (IntrMemName && e - i <= 2) - break; - - unsigned Arg = getRegForValue(*i); - if (Arg == 0) - return false; - - ISD::ArgFlagsTy Flags; - unsigned AttrInd = i - CS.arg_begin() + 1; - if (CS.paramHasAttr(AttrInd, Attribute::SExt)) - Flags.setSExt(); - if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) - Flags.setZExt(); - - // FIXME: Only handle *easy* calls for now. - if (CS.paramHasAttr(AttrInd, Attribute::InReg) || - CS.paramHasAttr(AttrInd, Attribute::StructRet) || - CS.paramHasAttr(AttrInd, Attribute::Nest) || - CS.paramHasAttr(AttrInd, Attribute::ByVal)) - return false; - - MVT ArgVT; - Type *ArgTy = (*i)->getType(); - if (!isTypeLegal(ArgTy, ArgVT) && - !(ArgVT == MVT::i1 || ArgVT == MVT::i8 || ArgVT == MVT::i16)) - return false; - - // We don't handle vector parameters yet. - if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) - return false; - - unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); - Flags.setOrigAlign(OriginalAlignment); - - Args.push_back(*i); - ArgRegs.push_back(Arg); - ArgVTs.push_back(ArgVT); - ArgFlags.push_back(Flags); - } - - // Handle the arguments now that we've gotten them. - SmallVector RegArgs; - unsigned NumBytes; - if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) - return false; - - // Issue the call. - MachineInstrBuilder MIB; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BL)); - if (!IntrMemName) - MIB.addGlobalAddress(GV, 0, 0); - else - MIB.addExternalSymbol(IntrMemName, 0); - - // Add implicit physical register uses to the call. - for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) - MIB.addReg(RegArgs[i], RegState::Implicit); - - // Add a register mask with the call-preserved registers. - // Proper defs for return values will be added by setPhysRegsDeadExcept(). - MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv())); - - // Finish off the call including any return values. - SmallVector UsedRegs; - if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) - return false; - - // Set all unused physreg defs as dead. - static_cast(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); - - return true; -} - -bool ARM64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) { - if (Alignment) - return Len / Alignment <= 4; - else - return Len < 32; -} - -bool ARM64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, - unsigned Alignment) { - // Make sure we don't bloat code by inlining very large memcpy's. - if (!IsMemCpySmall(Len, Alignment)) - return false; - - int64_t UnscaledOffset = 0; - Address OrigDest = Dest; - Address OrigSrc = Src; - - while (Len) { - MVT VT; - if (!Alignment || Alignment >= 8) { - if (Len >= 8) - VT = MVT::i64; - else if (Len >= 4) - VT = MVT::i32; - else if (Len >= 2) - VT = MVT::i16; - else { - VT = MVT::i8; - } - } else { - // Bound based on alignment. - if (Len >= 4 && Alignment == 4) - VT = MVT::i32; - else if (Len >= 2 && Alignment == 2) - VT = MVT::i16; - else { - VT = MVT::i8; - } - } - - bool RV; - unsigned ResultReg; - RV = EmitLoad(VT, ResultReg, Src); - assert(RV == true && "Should be able to handle this load."); - RV = EmitStore(VT, ResultReg, Dest); - assert(RV == true && "Should be able to handle this store."); - (void)RV; - - int64_t Size = VT.getSizeInBits() / 8; - Len -= Size; - UnscaledOffset += Size; - - // We need to recompute the unscaled offset for each iteration. - Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); - Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); - } - - return true; -} - -bool ARM64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) { - // FIXME: Handle more intrinsics. - switch (I.getIntrinsicID()) { - default: - return false; - case Intrinsic::memcpy: - case Intrinsic::memmove: { - const MemTransferInst &MTI = cast(I); - // Don't handle volatile. - if (MTI.isVolatile()) - return false; - - // Disable inlining for memmove before calls to ComputeAddress. Otherwise, - // we would emit dead code because we don't currently handle memmoves. - bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy); - if (isa(MTI.getLength()) && isMemCpy) { - // Small memcpy's are common enough that we want to do them without a call - // if possible. - uint64_t Len = cast(MTI.getLength())->getZExtValue(); - unsigned Alignment = MTI.getAlignment(); - if (IsMemCpySmall(Len, Alignment)) { - Address Dest, Src; - if (!ComputeAddress(MTI.getRawDest(), Dest) || - !ComputeAddress(MTI.getRawSource(), Src)) - return false; - if (TryEmitSmallMemCpy(Dest, Src, Len, Alignment)) - return true; - } - } - - if (!MTI.getLength()->getType()->isIntegerTy(64)) - return false; - - if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255) - // Fast instruction selection doesn't support the special - // address spaces. - return false; - - const char *IntrMemName = isa(I) ? "memcpy" : "memmove"; - return SelectCall(&I, IntrMemName); - } - case Intrinsic::memset: { - const MemSetInst &MSI = cast(I); - // Don't handle volatile. - if (MSI.isVolatile()) - return false; - - if (!MSI.getLength()->getType()->isIntegerTy(64)) - return false; - - if (MSI.getDestAddressSpace() > 255) - // Fast instruction selection doesn't support the special - // address spaces. - return false; - - return SelectCall(&I, "memset"); - } - case Intrinsic::trap: { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BRK)) - .addImm(1); - return true; - } - } - return false; -} - -bool ARM64FastISel::SelectRet(const Instruction *I) { - const ReturnInst *Ret = cast(I); - const Function &F = *I->getParent()->getParent(); - - if (!FuncInfo.CanLowerReturn) - return false; - - if (F.isVarArg()) - return false; - - // Build a list of return value registers. - SmallVector RetRegs; - - if (Ret->getNumOperands() > 0) { - CallingConv::ID CC = F.getCallingConv(); - SmallVector Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI); - - // Analyze operands of the call, assigning locations to each operand. - SmallVector ValLocs; - CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, - I->getContext()); - CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS - : RetCC_ARM64_AAPCS; - CCInfo.AnalyzeReturn(Outs, RetCC); - - // Only handle a single return value for now. - if (ValLocs.size() != 1) - return false; - - CCValAssign &VA = ValLocs[0]; - const Value *RV = Ret->getOperand(0); - - // Don't bother handling odd stuff for now. - if (VA.getLocInfo() != CCValAssign::Full) - return false; - // Only handle register returns for now. - if (!VA.isRegLoc()) - return false; - unsigned Reg = getRegForValue(RV); - if (Reg == 0) - return false; - - unsigned SrcReg = Reg + VA.getValNo(); - unsigned DestReg = VA.getLocReg(); - // Avoid a cross-class copy. This is very unlikely. - if (!MRI.getRegClass(SrcReg)->contains(DestReg)) - return false; - - EVT RVEVT = TLI.getValueType(RV->getType()); - if (!RVEVT.isSimple()) - return false; - MVT RVVT = RVEVT.getSimpleVT(); - MVT DestVT = VA.getValVT(); - // Special handling for extended integers. - if (RVVT != DestVT) { - if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) - return false; - - if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) - return false; - - bool isZExt = Outs[0].Flags.isZExt(); - SrcReg = EmitIntExt(RVVT, SrcReg, DestVT, isZExt); - if (SrcReg == 0) - return false; - } - - // Make the copy. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); - - // Add register to return instruction. - RetRegs.push_back(VA.getLocReg()); - } - - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(ARM64::RET_ReallyLR)); - for (unsigned i = 0, e = RetRegs.size(); i != e; ++i) - MIB.addReg(RetRegs[i], RegState::Implicit); - return true; -} - -bool ARM64FastISel::SelectTrunc(const Instruction *I) { - Type *DestTy = I->getType(); - Value *Op = I->getOperand(0); - Type *SrcTy = Op->getType(); - - EVT SrcEVT = TLI.getValueType(SrcTy, true); - EVT DestEVT = TLI.getValueType(DestTy, true); - if (!SrcEVT.isSimple()) - return false; - if (!DestEVT.isSimple()) - return false; - - MVT SrcVT = SrcEVT.getSimpleVT(); - MVT DestVT = DestEVT.getSimpleVT(); - - if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && - SrcVT != MVT::i8) - return false; - if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && - DestVT != MVT::i1) - return false; - - unsigned SrcReg = getRegForValue(Op); - if (!SrcReg) - return false; - - // If we're truncating from i64 to a smaller non-legal type then generate an - // AND. Otherwise, we know the high bits are undefined and a truncate doesn't - // generate any code. - if (SrcVT == MVT::i64) { - uint64_t Mask = 0; - switch (DestVT.SimpleTy) { - default: - // Trunc i64 to i32 is handled by the target-independent fast-isel. - return false; - case MVT::i1: - Mask = 0x1; - break; - case MVT::i8: - Mask = 0xff; - break; - case MVT::i16: - Mask = 0xffff; - break; - } - // Issue an extract_subreg to get the lower 32-bits. - unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true, - ARM64::sub_32); - // Create the AND instruction which performs the actual truncation. - unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), - ANDReg) - .addReg(Reg32) - .addImm(ARM64_AM::encodeLogicalImmediate(Mask, 32)); - SrcReg = ANDReg; - } - - UpdateValueMap(I, SrcReg); - return true; -} - -unsigned ARM64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) { - assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || - DestVT == MVT::i64) && - "Unexpected value type."); - // Handle i8 and i16 as i32. - if (DestVT == MVT::i8 || DestVT == MVT::i16) - DestVT = MVT::i32; - - if (isZExt) { - unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri), - ResultReg) - .addReg(SrcReg) - .addImm(ARM64_AM::encodeLogicalImmediate(1, 32)); - - if (DestVT == MVT::i64) { - // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the - // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. - unsigned Reg64 = MRI.createVirtualRegister(&ARM64::GPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(ARM64::SUBREG_TO_REG), Reg64) - .addImm(0) - .addReg(ResultReg) - .addImm(ARM64::sub_32); - ResultReg = Reg64; - } - return ResultReg; - } else { - if (DestVT == MVT::i64) { - // FIXME: We're SExt i1 to i64. - return 0; - } - unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SBFMWri), - ResultReg) - .addReg(SrcReg) - .addImm(0) - .addImm(0); - return ResultReg; - } -} - -unsigned ARM64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, - bool isZExt) { - assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); - unsigned Opc; - unsigned Imm = 0; - - switch (SrcVT.SimpleTy) { - default: - return 0; - case MVT::i1: - return Emiti1Ext(SrcReg, DestVT, isZExt); - case MVT::i8: - if (DestVT == MVT::i64) - Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri; - else - Opc = isZExt ? ARM64::UBFMWri : ARM64::SBFMWri; - Imm = 7; - break; - case MVT::i16: - if (DestVT == MVT::i64) - Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri; - else - Opc = isZExt ? ARM64::UBFMWri : ARM64::SBFMWri; - Imm = 15; - break; - case MVT::i32: - assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); - Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri; - Imm = 31; - break; - } - - // Handle i8 and i16 as i32. - if (DestVT == MVT::i8 || DestVT == MVT::i16) - DestVT = MVT::i32; - - unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) - .addReg(SrcReg) - .addImm(0) - .addImm(Imm); - - return ResultReg; -} - -bool ARM64FastISel::SelectIntExt(const Instruction *I) { - // On ARM, in general, integer casts don't involve legal types; this code - // handles promotable integers. The high bits for a type smaller than - // the register size are assumed to be undefined. - Type *DestTy = I->getType(); - Value *Src = I->getOperand(0); - Type *SrcTy = Src->getType(); - - bool isZExt = isa(I); - unsigned SrcReg = getRegForValue(Src); - if (!SrcReg) - return false; - - EVT SrcEVT = TLI.getValueType(SrcTy, true); - EVT DestEVT = TLI.getValueType(DestTy, true); - if (!SrcEVT.isSimple()) - return false; - if (!DestEVT.isSimple()) - return false; - - MVT SrcVT = SrcEVT.getSimpleVT(); - MVT DestVT = DestEVT.getSimpleVT(); - unsigned ResultReg = EmitIntExt(SrcVT, SrcReg, DestVT, isZExt); - if (ResultReg == 0) - return false; - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) { - EVT DestEVT = TLI.getValueType(I->getType(), true); - if (!DestEVT.isSimple()) - return false; - - MVT DestVT = DestEVT.getSimpleVT(); - if (DestVT != MVT::i64 && DestVT != MVT::i32) - return false; - - unsigned DivOpc; - bool is64bit = (DestVT == MVT::i64); - switch (ISDOpcode) { - default: - return false; - case ISD::SREM: - DivOpc = is64bit ? ARM64::SDIVXr : ARM64::SDIVWr; - break; - case ISD::UREM: - DivOpc = is64bit ? ARM64::UDIVXr : ARM64::UDIVWr; - break; - } - unsigned MSubOpc = is64bit ? ARM64::MSUBXrrr : ARM64::MSUBWrrr; - unsigned Src0Reg = getRegForValue(I->getOperand(0)); - if (!Src0Reg) - return false; - - unsigned Src1Reg = getRegForValue(I->getOperand(1)); - if (!Src1Reg) - return false; - - unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), ResultReg) - .addReg(Src0Reg) - .addReg(Src1Reg); - // The remainder is computed as numerator – (quotient * denominator) using the - // MSUB instruction. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg) - .addReg(ResultReg) - .addReg(Src1Reg) - .addReg(Src0Reg); - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::SelectMul(const Instruction *I) { - EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true); - if (!SrcEVT.isSimple()) - return false; - MVT SrcVT = SrcEVT.getSimpleVT(); - - // Must be simple value type. Don't handle vectors. - if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && - SrcVT != MVT::i8) - return false; - - unsigned Opc; - unsigned ZReg; - switch (SrcVT.SimpleTy) { - default: - return false; - case MVT::i8: - case MVT::i16: - case MVT::i32: - ZReg = ARM64::WZR; - Opc = ARM64::MADDWrrr; - break; - case MVT::i64: - ZReg = ARM64::XZR; - Opc = ARM64::MADDXrrr; - break; - } - - unsigned Src0Reg = getRegForValue(I->getOperand(0)); - if (!Src0Reg) - return false; - - unsigned Src1Reg = getRegForValue(I->getOperand(1)); - if (!Src1Reg) - return false; - - // Create the base instruction, then add the operands. - unsigned ResultReg = createResultReg(TLI.getRegClassFor(SrcVT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) - .addReg(Src0Reg) - .addReg(Src1Reg) - .addReg(ZReg); - UpdateValueMap(I, ResultReg); - return true; -} - -bool ARM64FastISel::TargetSelectInstruction(const Instruction *I) { - switch (I->getOpcode()) { - default: - break; - case Instruction::Load: - return SelectLoad(I); - case Instruction::Store: - return SelectStore(I); - case Instruction::Br: - return SelectBranch(I); - case Instruction::IndirectBr: - return SelectIndirectBr(I); - case Instruction::FCmp: - case Instruction::ICmp: - return SelectCmp(I); - case Instruction::Select: - return SelectSelect(I); - case Instruction::FPExt: - return SelectFPExt(I); - case Instruction::FPTrunc: - return SelectFPTrunc(I); - case Instruction::FPToSI: - return SelectFPToInt(I, /*Signed=*/true); - case Instruction::FPToUI: - return SelectFPToInt(I, /*Signed=*/false); - case Instruction::SIToFP: - return SelectIntToFP(I, /*Signed=*/true); - case Instruction::UIToFP: - return SelectIntToFP(I, /*Signed=*/false); - case Instruction::SRem: - return SelectRem(I, ISD::SREM); - case Instruction::URem: - return SelectRem(I, ISD::UREM); - case Instruction::Call: - if (const IntrinsicInst *II = dyn_cast(I)) - return SelectIntrinsicCall(*II); - return SelectCall(I); - case Instruction::Ret: - return SelectRet(I); - case Instruction::Trunc: - return SelectTrunc(I); - case Instruction::ZExt: - case Instruction::SExt: - return SelectIntExt(I); - case Instruction::Mul: - // FIXME: This really should be handled by the target-independent selector. - return SelectMul(I); - } - return false; - // Silence warnings. - (void)&CC_ARM64_DarwinPCS_VarArg; -} - -namespace llvm { -llvm::FastISel *ARM64::createFastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo) { - return new ARM64FastISel(funcInfo, libInfo); -} -} diff --git a/lib/Target/ARM64/ARM64FrameLowering.cpp b/lib/Target/ARM64/ARM64FrameLowering.cpp deleted file mode 100644 index 798986c..0000000 --- a/lib/Target/ARM64/ARM64FrameLowering.cpp +++ /dev/null @@ -1,816 +0,0 @@ -//===- ARM64FrameLowering.cpp - ARM64 Frame Lowering -----------*- C++ -*-====// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the ARM64 implementation of TargetFrameLowering class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "frame-info" -#include "ARM64FrameLowering.h" -#include "ARM64InstrInfo.h" -#include "ARM64MachineFunctionInfo.h" -#include "ARM64Subtarget.h" -#include "ARM64TargetMachine.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Function.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -static cl::opt EnableRedZone("arm64-redzone", - cl::desc("enable use of redzone on ARM64"), - cl::init(false), cl::Hidden); - -STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); - -static unsigned estimateStackSize(MachineFunction &MF) { - const MachineFrameInfo *FFI = MF.getFrameInfo(); - int Offset = 0; - for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -FFI->getObjectOffset(i); - if (FixedOff > Offset) - Offset = FixedOff; - } - for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { - if (FFI->isDeadObjectIndex(i)) - continue; - Offset += FFI->getObjectSize(i); - unsigned Align = FFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset + Align - 1) / Align * Align; - } - // This does not include the 16 bytes used for fp and lr. - return (unsigned)Offset; -} - -bool ARM64FrameLowering::canUseRedZone(const MachineFunction &MF) const { - if (!EnableRedZone) - return false; - // Don't use the red zone if the function explicitly asks us not to. - // This is typically used for kernel code. - if (MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::NoRedZone)) - return false; - - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const ARM64FunctionInfo *AFI = MF.getInfo(); - unsigned NumBytes = AFI->getLocalStackSize(); - - // Note: currently hasFP() is always true for hasCalls(), but that's an - // implementation detail of the current code, not a strict requirement, - // so stay safe here and check both. - if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128) - return false; - return true; -} - -/// hasFP - Return true if the specified function should have a dedicated frame -/// pointer register. -bool ARM64FrameLowering::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - -#ifndef NDEBUG - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); - assert(!RegInfo->needsStackRealignment(MF) && - "No stack realignment on ARM64!"); -#endif - - return (MFI->hasCalls() || MFI->hasVarSizedObjects() || - MFI->isFrameAddressTaken()); -} - -/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is -/// not required, we reserve argument space for call sites in the function -/// immediately on entry to the current function. This eliminates the need for -/// add/sub sp brackets around call sites. Returns true if the call frame is -/// included as part of the stack frame. -bool ARM64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { - return !MF.getFrameInfo()->hasVarSizedObjects(); -} - -void ARM64FrameLowering::eliminateCallFramePseudoInstr( - MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - const ARM64InstrInfo *TII = - static_cast(MF.getTarget().getInstrInfo()); - if (!TFI->hasReservedCallFrame(MF)) { - // If we have alloca, convert as follows: - // ADJCALLSTACKDOWN -> sub, sp, sp, amount - // ADJCALLSTACKUP -> add, sp, sp, amount - MachineInstr *Old = I; - DebugLoc DL = Old->getDebugLoc(); - unsigned Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned Align = TFI->getStackAlignment(); - Amount = (Amount + Align - 1) / Align * Align; - - // Replace the pseudo instruction with a new instruction... - unsigned Opc = Old->getOpcode(); - if (Opc == ARM64::ADJCALLSTACKDOWN) { - emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, -Amount, TII); - } else { - assert(Opc == ARM64::ADJCALLSTACKUP && "expected ADJCALLSTACKUP"); - emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, Amount, TII); - } - } - } - MBB.erase(I); -} - -void -ARM64FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned FramePtr) const { - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineModuleInfo &MMI = MF.getMMI(); - const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - const ARM64InstrInfo *TII = TM.getInstrInfo(); - DebugLoc DL = MBB.findDebugLoc(MBBI); - - // Add callee saved registers to move list. - const std::vector &CSI = MFI->getCalleeSavedInfo(); - if (CSI.empty()) - return; - - const DataLayout *TD = MF.getTarget().getDataLayout(); - bool HasFP = hasFP(MF); - - // Calculate amount of bytes used for return address storing. - int stackGrowth = -TD->getPointerSize(0); - - // Calculate offsets. - int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth; - unsigned TotalSkipped = 0; - for (const auto &Info : CSI) { - unsigned Reg = Info.getReg(); - int64_t Offset = MFI->getObjectOffset(Info.getFrameIdx()) - - getOffsetOfLocalArea() + saveAreaOffset; - - // Don't output a new CFI directive if we're re-saving the frame pointer or - // link register. This happens when the PrologEpilogInserter has inserted an - // extra "STP" of the frame pointer and link register -- the "emitPrologue" - // method automatically generates the directives when frame pointers are - // used. If we generate CFI directives for the extra "STP"s, the linker will - // lose track of the correct values for the frame pointer and link register. - if (HasFP && (FramePtr == Reg || Reg == ARM64::LR)) { - TotalSkipped += stackGrowth; - continue; - } - - unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); - unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( - nullptr, DwarfReg, Offset - TotalSkipped)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } -} - -void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. - MachineBasicBlock::iterator MBBI = MBB.begin(); - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const Function *Fn = MF.getFunction(); - const ARM64RegisterInfo *RegInfo = TM.getRegisterInfo(); - const ARM64InstrInfo *TII = TM.getInstrInfo(); - MachineModuleInfo &MMI = MF.getMMI(); - ARM64FunctionInfo *AFI = MF.getInfo(); - bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); - bool HasFP = hasFP(MF); - DebugLoc DL = MBB.findDebugLoc(MBBI); - - int NumBytes = (int)MFI->getStackSize(); - if (!AFI->hasStackFrame()) { - assert(!HasFP && "unexpected function without stack frame but with FP"); - - // All of the stack allocation is for locals. - AFI->setLocalStackSize(NumBytes); - - // Label used to tie together the PROLOG_LABEL and the MachineMoves. - MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol(); - - // REDZONE: If the stack size is less than 128 bytes, we don't need - // to actually allocate. - if (NumBytes && !canUseRedZone(MF)) { - emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); - - // Encode the stack size of the leaf function. - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } else if (NumBytes) { - ++NumRedZoneFunctions; - } - - return; - } - - // Only set up FP if we actually need to. - int FPOffset = 0; - if (HasFP) { - // First instruction must a) allocate the stack and b) have an immediate - // that is a multiple of -2. - assert((MBBI->getOpcode() == ARM64::STPXpre || - MBBI->getOpcode() == ARM64::STPDpre) && - MBBI->getOperand(2).getReg() == ARM64::SP && - MBBI->getOperand(3).getImm() < 0 && - (MBBI->getOperand(3).getImm() & 1) == 0); - - // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space - // required for the callee saved register area we get the frame pointer - // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8. - FPOffset = -(MBBI->getOperand(3).getImm() + 2) * 8; - assert(FPOffset >= 0 && "Bad Framepointer Offset"); - } - - // Move past the saves of the callee-saved registers. - while (MBBI->getOpcode() == ARM64::STPXi || - MBBI->getOpcode() == ARM64::STPDi || - MBBI->getOpcode() == ARM64::STPXpre || - MBBI->getOpcode() == ARM64::STPDpre) { - ++MBBI; - NumBytes -= 16; - } - assert(NumBytes >= 0 && "Negative stack allocation size!?"); - if (HasFP) { - // Issue sub fp, sp, FPOffset or - // mov fp,sp when FPOffset is zero. - // Note: All stores of callee-saved registers are marked as "FrameSetup". - // This code marks the instruction(s) that set the FP also. - emitFrameOffset(MBB, MBBI, DL, ARM64::FP, ARM64::SP, FPOffset, TII, - MachineInstr::FrameSetup); - } - - // All of the remaining stack allocations are for locals. - AFI->setLocalStackSize(NumBytes); - - // Allocate space for the rest of the frame. - if (NumBytes) { - // If we're a leaf function, try using the red zone. - if (!canUseRedZone(MF)) - emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); - } - - // If we need a base pointer, set it up here. It's whatever the value of the - // stack pointer is at this point. Any variable size objects will be allocated - // after this, so we can still use the base pointer to reference locals. - // - // FIXME: Clarify FrameSetup flags here. - // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is - // needed. - // - if (RegInfo->hasBasePointer(MF)) - TII->copyPhysReg(MBB, MBBI, DL, ARM64::X19, ARM64::SP, false); - - if (needsFrameMoves) { - const DataLayout *TD = MF.getTarget().getDataLayout(); - const int StackGrowth = -TD->getPointerSize(0); - unsigned FramePtr = RegInfo->getFrameRegister(MF); - - // An example of the prologue: - // - // .globl __foo - // .align 2 - // __foo: - // Ltmp0: - // .cfi_startproc - // .cfi_personality 155, ___gxx_personality_v0 - // Leh_func_begin: - // .cfi_lsda 16, Lexception33 - // - // stp xa,bx, [sp, -#offset]! - // ... - // stp x28, x27, [sp, #offset-32] - // stp fp, lr, [sp, #offset-16] - // add fp, sp, #offset - 16 - // sub sp, sp, #1360 - // - // The Stack: - // +-------------------------------------------+ - // 10000 | ........ | ........ | ........ | ........ | - // 10004 | ........ | ........ | ........ | ........ | - // +-------------------------------------------+ - // 10008 | ........ | ........ | ........ | ........ | - // 1000c | ........ | ........ | ........ | ........ | - // +===========================================+ - // 10010 | X28 Register | - // 10014 | X28 Register | - // +-------------------------------------------+ - // 10018 | X27 Register | - // 1001c | X27 Register | - // +===========================================+ - // 10020 | Frame Pointer | - // 10024 | Frame Pointer | - // +-------------------------------------------+ - // 10028 | Link Register | - // 1002c | Link Register | - // +===========================================+ - // 10030 | ........ | ........ | ........ | ........ | - // 10034 | ........ | ........ | ........ | ........ | - // +-------------------------------------------+ - // 10038 | ........ | ........ | ........ | ........ | - // 1003c | ........ | ........ | ........ | ........ | - // +-------------------------------------------+ - // - // [sp] = 10030 :: >>initial value<< - // sp = 10020 :: stp fp, lr, [sp, #-16]! - // fp = sp == 10020 :: mov fp, sp - // [sp] == 10020 :: stp x28, x27, [sp, #-16]! - // sp == 10010 :: >>final value<< - // - // The frame pointer (w29) points to address 10020. If we use an offset of - // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 - // for w27, and -32 for w28: - // - // Ltmp1: - // .cfi_def_cfa w29, 16 - // Ltmp2: - // .cfi_offset w30, -8 - // Ltmp3: - // .cfi_offset w29, -16 - // Ltmp4: - // .cfi_offset w27, -24 - // Ltmp5: - // .cfi_offset w28, -32 - - if (HasFP) { - // Define the current CFA rule to use the provided FP. - unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - - // Record the location of the stored LR - unsigned LR = RegInfo->getDwarfRegNum(ARM64::LR, true); - CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createOffset(nullptr, LR, StackGrowth)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - - // Record the location of the stored FP - CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth)); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } else { - // Encode the stack size of the leaf function. - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize())); - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - - // Now emit the moves for whatever callee saved regs we have. - emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr); - } -} - -static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) { - for (unsigned i = 0; CSRegs[i]; ++i) - if (Reg == CSRegs[i]) - return true; - return false; -} - -static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) { - if (MI->getOpcode() == ARM64::LDPXpost || - MI->getOpcode() == ARM64::LDPDpost || MI->getOpcode() == ARM64::LDPXi || - MI->getOpcode() == ARM64::LDPDi) { - if (!isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) || - !isCalleeSavedRegister(MI->getOperand(1).getReg(), CSRegs) || - MI->getOperand(2).getReg() != ARM64::SP) - return false; - return true; - } - - return false; -} - -void ARM64FrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - assert(MBBI->isReturn() && "Can only insert epilog into returning blocks"); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const ARM64InstrInfo *TII = - static_cast(MF.getTarget().getInstrInfo()); - const ARM64RegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); - DebugLoc DL = MBBI->getDebugLoc(); - - int NumBytes = MFI->getStackSize(); - unsigned NumRestores = 0; - // Move past the restores of the callee-saved registers. - MachineBasicBlock::iterator LastPopI = MBBI; - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); - if (LastPopI != MBB.begin()) { - do { - ++NumRestores; - --LastPopI; - } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs)); - if (!isCSRestore(LastPopI, CSRegs)) { - ++LastPopI; - --NumRestores; - } - } - NumBytes -= NumRestores * 16; - assert(NumBytes >= 0 && "Negative stack allocation size!?"); - - if (!hasFP(MF)) { - // If this was a redzone leaf function, we don't need to restore the - // stack pointer. - if (!canUseRedZone(MF)) - emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::SP, NumBytes, TII); - return; - } - - // Restore the original stack pointer. - // FIXME: Rather than doing the math here, we should instead just use - // non-post-indexed loads for the restores if we aren't actually going to - // be able to save any instructions. - if (NumBytes || MFI->hasVarSizedObjects()) - emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::FP, - -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags); -} - -/// getFrameIndexOffset - Returns the displacement from the frame register to -/// the stack frame of the specified index. -int ARM64FrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - unsigned FrameReg; - return getFrameIndexReference(MF, FI, FrameReg); -} - -/// getFrameIndexReference - Provide a base+offset reference to an FI slot for -/// debug info. It's the same as what we use for resolving the code-gen -/// references for now. FIXME: This can go wrong when references are -/// SP-relative and simple call frames aren't used. -int ARM64FrameLowering::getFrameIndexReference(const MachineFunction &MF, - int FI, - unsigned &FrameReg) const { - return resolveFrameIndexReference(MF, FI, FrameReg); -} - -int ARM64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, - int FI, unsigned &FrameReg, - bool PreferFP) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const ARM64RegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); - const ARM64FunctionInfo *AFI = MF.getInfo(); - int FPOffset = MFI->getObjectOffset(FI) + 16; - int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); - bool isFixed = MFI->isFixedObjectIndex(FI); - - // Use frame pointer to reference fixed objects. Use it for locals if - // there are VLAs (and thus the SP isn't reliable as a base). - // Make sure useFPForScavengingIndex() does the right thing for the emergency - // spill slot. - bool UseFP = false; - if (AFI->hasStackFrame()) { - // Note: Keeping the following as multiple 'if' statements rather than - // merging to a single expression for readability. - // - // Argument access should always use the FP. - if (isFixed) { - UseFP = hasFP(MF); - } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) { - // Use SP or FP, whichever gives us the best chance of the offset - // being in range for direct access. If the FPOffset is positive, - // that'll always be best, as the SP will be even further away. - // If the FPOffset is negative, we have to keep in mind that the - // available offset range for negative offsets is smaller than for - // positive ones. If we have variable sized objects, we're stuck with - // using the FP regardless, though, as the SP offset is unknown - // and we don't have a base pointer available. If an offset is - // available via the FP and the SP, use whichever is closest. - if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 || - (FPOffset >= -256 && Offset > -FPOffset)) - UseFP = true; - } - } - - if (UseFP) { - FrameReg = RegInfo->getFrameRegister(MF); - return FPOffset; - } - - // Use the base pointer if we have one. - if (RegInfo->hasBasePointer(MF)) - FrameReg = RegInfo->getBaseRegister(); - else { - FrameReg = ARM64::SP; - // If we're using the red zone for this function, the SP won't actually - // be adjusted, so the offsets will be negative. They're also all - // within range of the signed 9-bit immediate instructions. - if (canUseRedZone(MF)) - Offset -= AFI->getLocalStackSize(); - } - - return Offset; -} - -static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { - if (Reg != ARM64::LR) - return getKillRegState(true); - - // LR maybe referred to later by an @llvm.returnaddress intrinsic. - bool LRLiveIn = MF.getRegInfo().isLiveIn(ARM64::LR); - bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken()); - return getKillRegState(LRKill); -} - -bool ARM64FrameLowering::spillCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const { - MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - unsigned Count = CSI.size(); - DebugLoc DL; - assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); - - if (MI != MBB.end()) - DL = MI->getDebugLoc(); - - for (unsigned i = 0; i < Count; i += 2) { - unsigned idx = Count - i - 2; - unsigned Reg1 = CSI[idx].getReg(); - unsigned Reg2 = CSI[idx + 1].getReg(); - // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI - // list to come in sorted by frame index so that we can issue the store - // pair instructions directly. Assert if we see anything otherwise. - // - // The order of the registers in the list is controlled by - // getCalleeSavedRegs(), so they will always be in-order, as well. - assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() && - "Out of order callee saved regs!"); - unsigned StrOpc; - assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); - assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); - // Issue sequence of non-sp increment and pi sp spills for cs regs. The - // first spill is a pre-increment that allocates the stack. - // For example: - // stp x22, x21, [sp, #-48]! // addImm(-6) - // stp x20, x19, [sp, #16] // addImm(+2) - // stp fp, lr, [sp, #32] // addImm(+4) - // Rationale: This sequence saves uop updates compared to a sequence of - // pre-increment spills like stp xi,xj,[sp,#-16]! - // Note: Similar rational and sequence for restores in epilog. - if (ARM64::GPR64RegClass.contains(Reg1)) { - assert(ARM64::GPR64RegClass.contains(Reg2) && - "Expected GPR64 callee-saved register pair!"); - // For first spill use pre-increment store. - if (i == 0) - StrOpc = ARM64::STPXpre; - else - StrOpc = ARM64::STPXi; - } else if (ARM64::FPR64RegClass.contains(Reg1)) { - assert(ARM64::FPR64RegClass.contains(Reg2) && - "Expected FPR64 callee-saved register pair!"); - // For first spill use pre-increment store. - if (i == 0) - StrOpc = ARM64::STPDpre; - else - StrOpc = ARM64::STPDi; - } else - llvm_unreachable("Unexpected callee saved register!"); - DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", " - << TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx() - << ", " << CSI[idx + 1].getFrameIdx() << ")\n"); - // Compute offset: i = 0 => offset = -Count; - // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc. - const int Offset = (i == 0) ? -Count : i; - assert((Offset >= -64 && Offset <= 63) && - "Offset out of bounds for STP immediate"); - BuildMI(MBB, MI, DL, TII.get(StrOpc)) - .addReg(Reg2, getPrologueDeath(MF, Reg2)) - .addReg(Reg1, getPrologueDeath(MF, Reg1)) - .addReg(ARM64::SP) - .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit - .setMIFlag(MachineInstr::FrameSetup); - } - return true; -} - -bool ARM64FrameLowering::restoreCalleeSavedRegisters( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const { - MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - unsigned Count = CSI.size(); - DebugLoc DL; - assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); - - if (MI != MBB.end()) - DL = MI->getDebugLoc(); - - for (unsigned i = 0; i < Count; i += 2) { - unsigned Reg1 = CSI[i].getReg(); - unsigned Reg2 = CSI[i + 1].getReg(); - // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI - // list to come in sorted by frame index so that we can issue the store - // pair instructions directly. Assert if we see anything otherwise. - assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() && - "Out of order callee saved regs!"); - // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only - // the last load is sp-pi post-increment and de-allocates the stack: - // For example: - // ldp fp, lr, [sp, #32] // addImm(+4) - // ldp x20, x19, [sp, #16] // addImm(+2) - // ldp x22, x21, [sp], #48 // addImm(+6) - // Note: see comment in spillCalleeSavedRegisters() - unsigned LdrOpc; - - assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); - assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); - if (ARM64::GPR64RegClass.contains(Reg1)) { - assert(ARM64::GPR64RegClass.contains(Reg2) && - "Expected GPR64 callee-saved register pair!"); - if (i == Count - 2) - LdrOpc = ARM64::LDPXpost; - else - LdrOpc = ARM64::LDPXi; - } else if (ARM64::FPR64RegClass.contains(Reg1)) { - assert(ARM64::FPR64RegClass.contains(Reg2) && - "Expected FPR64 callee-saved register pair!"); - if (i == Count - 2) - LdrOpc = ARM64::LDPDpost; - else - LdrOpc = ARM64::LDPDi; - } else - llvm_unreachable("Unexpected callee saved register!"); - DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", " - << TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx() - << ", " << CSI[i + 1].getFrameIdx() << ")\n"); - - // Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4; - // etc. - const int Offset = (i == Count - 2) ? Count : Count - i - 2; - assert((Offset >= -64 && Offset <= 63) && - "Offset out of bounds for LDP immediate"); - BuildMI(MBB, MI, DL, TII.get(LdrOpc)) - .addReg(Reg2, getDefRegState(true)) - .addReg(Reg1, getDefRegState(true)) - .addReg(ARM64::SP) - .addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8] - // where the factor * 8 is implicit - } - return true; -} - -void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan( - MachineFunction &MF, RegScavenger *RS) const { - const ARM64RegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); - ARM64FunctionInfo *AFI = MF.getInfo(); - MachineRegisterInfo *MRI = &MF.getRegInfo(); - SmallVector UnspilledCSGPRs; - SmallVector UnspilledCSFPRs; - - // The frame record needs to be created by saving the appropriate registers - if (hasFP(MF)) { - MRI->setPhysRegUsed(ARM64::FP); - MRI->setPhysRegUsed(ARM64::LR); - } - - // Spill the BasePtr if it's used. Do this first thing so that the - // getCalleeSavedRegs() below will get the right answer. - if (RegInfo->hasBasePointer(MF)) - MRI->setPhysRegUsed(RegInfo->getBaseRegister()); - - // If any callee-saved registers are used, the frame cannot be eliminated. - unsigned NumGPRSpilled = 0; - unsigned NumFPRSpilled = 0; - bool ExtraCSSpill = false; - bool CanEliminateFrame = true; - DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:"); - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); - - // Check pairs of consecutive callee-saved registers. - for (unsigned i = 0; CSRegs[i]; i += 2) { - assert(CSRegs[i + 1] && "Odd number of callee-saved registers!"); - - const unsigned OddReg = CSRegs[i]; - const unsigned EvenReg = CSRegs[i + 1]; - assert((ARM64::GPR64RegClass.contains(OddReg) && - ARM64::GPR64RegClass.contains(EvenReg)) ^ - (ARM64::FPR64RegClass.contains(OddReg) && - ARM64::FPR64RegClass.contains(EvenReg)) && - "Register class mismatch!"); - - const bool OddRegUsed = MRI->isPhysRegUsed(OddReg); - const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg); - - // Early exit if none of the registers in the register pair is actually - // used. - if (!OddRegUsed && !EvenRegUsed) { - if (ARM64::GPR64RegClass.contains(OddReg)) { - UnspilledCSGPRs.push_back(OddReg); - UnspilledCSGPRs.push_back(EvenReg); - } else { - UnspilledCSFPRs.push_back(OddReg); - UnspilledCSFPRs.push_back(EvenReg); - } - continue; - } - - unsigned Reg = ARM64::NoRegister; - // If only one of the registers of the register pair is used, make sure to - // mark the other one as used as well. - if (OddRegUsed ^ EvenRegUsed) { - // Find out which register is the additional spill. - Reg = OddRegUsed ? EvenReg : OddReg; - MRI->setPhysRegUsed(Reg); - } - - DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo)); - DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo)); - - assert(((OddReg == ARM64::LR && EvenReg == ARM64::FP) || - (RegInfo->getEncodingValue(OddReg) + 1 == - RegInfo->getEncodingValue(EvenReg))) && - "Register pair of non-adjacent registers!"); - if (ARM64::GPR64RegClass.contains(OddReg)) { - NumGPRSpilled += 2; - // If it's not a reserved register, we can use it in lieu of an - // emergency spill slot for the register scavenger. - // FIXME: It would be better to instead keep looking and choose another - // unspilled register that isn't reserved, if there is one. - if (Reg != ARM64::NoRegister && !RegInfo->isReservedReg(MF, Reg)) - ExtraCSSpill = true; - } else - NumFPRSpilled += 2; - - CanEliminateFrame = false; - } - - // FIXME: Set BigStack if any stack slot references may be out of range. - // For now, just conservatively guestimate based on unscaled indexing - // range. We'll end up allocating an unnecessary spill slot a lot, but - // realistically that's not a big deal at this stage of the game. - // The CSR spill slots have not been allocated yet, so estimateStackSize - // won't include them. - MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); - DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); - bool BigStack = (CFSize >= 256); - if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) - AFI->setHasStackFrame(true); - - // Estimate if we might need to scavenge a register at some point in order - // to materialize a stack offset. If so, either spill one additional - // callee-saved register or reserve a special spill slot to facilitate - // register scavenging. If we already spilled an extra callee-saved register - // above to keep the number of spills even, we don't need to do anything else - // here. - if (BigStack && !ExtraCSSpill) { - - // If we're adding a register to spill here, we have to add two of them - // to keep the number of regs to spill even. - assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!"); - unsigned Count = 0; - while (!UnspilledCSGPRs.empty() && Count < 2) { - unsigned Reg = UnspilledCSGPRs.back(); - UnspilledCSGPRs.pop_back(); - DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo) - << " to get a scratch register.\n"); - MRI->setPhysRegUsed(Reg); - ExtraCSSpill = true; - ++Count; - } - - // If we didn't find an extra callee-saved register to spill, create - // an emergency spill slot. - if (!ExtraCSSpill) { - const TargetRegisterClass *RC = &ARM64::GPR64RegClass; - int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false); - RS->addScavengingFrameIndex(FI); - DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI - << " as the emergency spill slot.\n"); - } - } -} diff --git a/lib/Target/ARM64/ARM64FrameLowering.h b/lib/Target/ARM64/ARM64FrameLowering.h deleted file mode 100644 index 02edcdb..0000000 --- a/lib/Target/ARM64/ARM64FrameLowering.h +++ /dev/null @@ -1,75 +0,0 @@ -//===-- ARM64FrameLowering.h - TargetFrameLowering for ARM64 ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64_FRAMELOWERING_H -#define ARM64_FRAMELOWERING_H - -#include "llvm/Target/TargetFrameLowering.h" - -namespace llvm { - -class ARM64Subtarget; -class ARM64TargetMachine; - -class ARM64FrameLowering : public TargetFrameLowering { - const ARM64TargetMachine &TM; - -public: - explicit ARM64FrameLowering(const ARM64TargetMachine &TM, - const ARM64Subtarget &STI) - : TargetFrameLowering(StackGrowsDown, 16, 0, 16, - false /*StackRealignable*/), - TM(TM) {} - - void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned FramePtr) const; - - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into - /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - - int getFrameIndexOffset(const MachineFunction &MF, int FI) const; - int getFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg) const; - int resolveFrameIndexReference(const MachineFunction &MF, int FI, - unsigned &FrameReg, - bool PreferFP = false) const; - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const; - - bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const; - - /// \brief Can this function use the red zone for local allocations. - bool canUseRedZone(const MachineFunction &MF) const; - - bool hasFP(const MachineFunction &MF) const; - bool hasReservedCallFrame(const MachineFunction &MF) const; - - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const; -}; - -} // End llvm namespace - -#endif diff --git a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp deleted file mode 100644 index 2e234c9..0000000 --- a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp +++ /dev/null @@ -1,2381 +0,0 @@ -//===-- ARM64ISelDAGToDAG.cpp - A dag to dag inst selector for ARM64 ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines an instruction selector for the ARM64 target. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-isel" -#include "ARM64TargetMachine.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/IR/Function.h" // To access function attributes. -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -//===--------------------------------------------------------------------===// -/// ARM64DAGToDAGISel - ARM64 specific code to select ARM64 machine -/// instructions for SelectionDAG operations. -/// -namespace { - -class ARM64DAGToDAGISel : public SelectionDAGISel { - ARM64TargetMachine &TM; - - /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can - /// make the right decision when generating code for different targets. - const ARM64Subtarget *Subtarget; - - bool ForCodeSize; - -public: - explicit ARM64DAGToDAGISel(ARM64TargetMachine &tm, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), TM(tm), - Subtarget(&TM.getSubtarget()), ForCodeSize(false) {} - - virtual const char *getPassName() const { - return "ARM64 Instruction Selection"; - } - - virtual bool runOnMachineFunction(MachineFunction &MF) { - AttributeSet FnAttrs = MF.getFunction()->getAttributes(); - ForCodeSize = - FnAttrs.hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize) || - FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); - return SelectionDAGISel::runOnMachineFunction(MF); - } - - SDNode *Select(SDNode *Node); - - /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for - /// inline asm expressions. - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector &OutOps); - - SDNode *SelectMLAV64LaneV128(SDNode *N); - SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N); - bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); - bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); - bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); - bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { - return SelectShiftedRegister(N, false, Reg, Shift); - } - bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { - return SelectShiftedRegister(N, true, Reg, Shift); - } - bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeIndexed(N, 1, Base, OffImm); - } - bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeIndexed(N, 2, Base, OffImm); - } - bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeIndexed(N, 4, Base, OffImm); - } - bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeIndexed(N, 8, Base, OffImm); - } - bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeIndexed(N, 16, Base, OffImm); - } - bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeUnscaled(N, 1, Base, OffImm); - } - bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeUnscaled(N, 2, Base, OffImm); - } - bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeUnscaled(N, 4, Base, OffImm); - } - bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeUnscaled(N, 8, Base, OffImm); - } - bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { - return SelectAddrModeUnscaled(N, 16, Base, OffImm); - } - - bool SelectAddrModeRO8(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Imm) { - return SelectAddrModeRO(N, 1, Base, Offset, Imm); - } - bool SelectAddrModeRO16(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Imm) { - return SelectAddrModeRO(N, 2, Base, Offset, Imm); - } - bool SelectAddrModeRO32(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Imm) { - return SelectAddrModeRO(N, 4, Base, Offset, Imm); - } - bool SelectAddrModeRO64(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Imm) { - return SelectAddrModeRO(N, 8, Base, Offset, Imm); - } - bool SelectAddrModeRO128(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Imm) { - return SelectAddrModeRO(N, 16, Base, Offset, Imm); - } - bool SelectAddrModeNoIndex(SDValue N, SDValue &Val); - - /// Form sequences of consecutive 64/128-bit registers for use in NEON - /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have - /// between 1 and 4 elements. If it contains a single element that is returned - /// unchanged; otherwise a REG_SEQUENCE value is returned. - SDValue createDTuple(ArrayRef Vecs); - SDValue createQTuple(ArrayRef Vecs); - - /// Generic helper for the createDTuple/createQTuple - /// functions. Those should almost always be called instead. - SDValue createTuple(ArrayRef Vecs, unsigned RegClassIDs[], - unsigned SubRegs[]); - - SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); - - SDNode *SelectIndexedLoad(SDNode *N, bool &Done); - - SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, - unsigned SubRegIdx); - SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); - - SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); - SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); - - SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node); - SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node); - - SDNode *SelectAtomic(SDNode *Node, unsigned Op8, unsigned Op16, unsigned Op32, - unsigned Op64); - - SDNode *SelectBitfieldExtractOp(SDNode *N); - SDNode *SelectBitfieldInsertOp(SDNode *N); - - SDNode *SelectLIBM(SDNode *N); - -// Include the pieces autogenerated from the target description. -#include "ARM64GenDAGISel.inc" - -private: - bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, - SDValue &Shift); - bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, - SDValue &OffImm); - bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, - SDValue &OffImm); - bool SelectAddrModeRO(SDValue N, unsigned Size, SDValue &Base, - SDValue &Offset, SDValue &Imm); - bool isWorthFolding(SDValue V) const; - bool SelectExtendedSHL(SDValue N, unsigned Size, SDValue &Offset, - SDValue &Imm); -}; -} // end anonymous namespace - -/// isIntImmediate - This method tests to see if the node is a constant -/// operand. If so Imm will receive the 32-bit value. -static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { - if (const ConstantSDNode *C = dyn_cast(N)) { - Imm = C->getZExtValue(); - return true; - } - return false; -} - -// isIntImmediate - This method tests to see if a constant operand. -// If so Imm will receive the value. -static bool isIntImmediate(SDValue N, uint64_t &Imm) { - return isIntImmediate(N.getNode(), Imm); -} - -// isOpcWithIntImmediate - This method tests to see if the node is a specific -// opcode and that it has a immediate integer right operand. -// If so Imm will receive the 32 bit value. -static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, - uint64_t &Imm) { - return N->getOpcode() == Opc && - isIntImmediate(N->getOperand(1).getNode(), Imm); -} - -bool ARM64DAGToDAGISel::SelectAddrModeNoIndex(SDValue N, SDValue &Val) { - EVT ValTy = N.getValueType(); - if (ValTy != MVT::i64) - return false; - Val = N; - return true; -} - -bool ARM64DAGToDAGISel::SelectInlineAsmMemoryOperand( - const SDValue &Op, char ConstraintCode, std::vector &OutOps) { - assert(ConstraintCode == 'm' && "unexpected asm memory constraint"); - // Require the address to be in a register. That is safe for all ARM64 - // variants and it is hard to do anything much smarter without knowing - // how the operand is used. - OutOps.push_back(Op); - return false; -} - -/// SelectArithImmed - Select an immediate value that can be represented as -/// a 12-bit value shifted left by either 0 or 12. If so, return true with -/// Val set to the 12-bit value and Shift set to the shifter operand. -bool ARM64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, - SDValue &Shift) { - // This function is called from the addsub_shifted_imm ComplexPattern, - // which lists [imm] as the list of opcode it's interested in, however - // we still need to check whether the operand is actually an immediate - // here because the ComplexPattern opcode list is only used in - // root-level opcode matching. - if (!isa(N.getNode())) - return false; - - uint64_t Immed = cast(N.getNode())->getZExtValue(); - unsigned ShiftAmt; - - if (Immed >> 12 == 0) { - ShiftAmt = 0; - } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { - ShiftAmt = 12; - Immed = Immed >> 12; - } else - return false; - - unsigned ShVal = ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt); - Val = CurDAG->getTargetConstant(Immed, MVT::i32); - Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); - return true; -} - -/// SelectNegArithImmed - As above, but negates the value before trying to -/// select it. -bool ARM64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, - SDValue &Shift) { - // This function is called from the addsub_shifted_imm ComplexPattern, - // which lists [imm] as the list of opcode it's interested in, however - // we still need to check whether the operand is actually an immediate - // here because the ComplexPattern opcode list is only used in - // root-level opcode matching. - if (!isa(N.getNode())) - return false; - - // The immediate operand must be a 24-bit zero-extended immediate. - uint64_t Immed = cast(N.getNode())->getZExtValue(); - - // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" - // have the opposite effect on the C flag, so this pattern mustn't match under - // those circumstances. - if (Immed == 0) - return false; - - if (N.getValueType() == MVT::i32) - Immed = ~((uint32_t)Immed) + 1; - else - Immed = ~Immed + 1ULL; - if (Immed & 0xFFFFFFFFFF000000ULL) - return false; - - Immed &= 0xFFFFFFULL; - return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift); -} - -/// getShiftTypeForNode - Translate a shift node to the corresponding -/// ShiftType value. -static ARM64_AM::ShiftType getShiftTypeForNode(SDValue N) { - switch (N.getOpcode()) { - default: - return ARM64_AM::InvalidShift; - case ISD::SHL: - return ARM64_AM::LSL; - case ISD::SRL: - return ARM64_AM::LSR; - case ISD::SRA: - return ARM64_AM::ASR; - case ISD::ROTR: - return ARM64_AM::ROR; - } -} - -/// \brief Determine wether it is worth to fold V into an extended register. -bool ARM64DAGToDAGISel::isWorthFolding(SDValue V) const { - // it hurts if the a value is used at least twice, unless we are optimizing - // for code size. - if (ForCodeSize || V.hasOneUse()) - return true; - return false; -} - -/// SelectShiftedRegister - Select a "shifted register" operand. If the value -/// is not shifted, set the Shift operand to default of "LSL 0". The logical -/// instructions allow the shifted register to be rotated, but the arithmetic -/// instructions do not. The AllowROR parameter specifies whether ROR is -/// supported. -bool ARM64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, - SDValue &Reg, SDValue &Shift) { - ARM64_AM::ShiftType ShType = getShiftTypeForNode(N); - if (ShType == ARM64_AM::InvalidShift) - return false; - if (!AllowROR && ShType == ARM64_AM::ROR) - return false; - - if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { - unsigned BitSize = N.getValueType().getSizeInBits(); - unsigned Val = RHS->getZExtValue() & (BitSize - 1); - unsigned ShVal = ARM64_AM::getShifterImm(ShType, Val); - - Reg = N.getOperand(0); - Shift = CurDAG->getTargetConstant(ShVal, MVT::i32); - return isWorthFolding(N); - } - - return false; -} - -/// getExtendTypeForNode - Translate an extend node to the corresponding -/// ExtendType value. -static ARM64_AM::ExtendType getExtendTypeForNode(SDValue N, - bool IsLoadStore = false) { - if (N.getOpcode() == ISD::SIGN_EXTEND || - N.getOpcode() == ISD::SIGN_EXTEND_INREG) { - EVT SrcVT; - if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) - SrcVT = cast(N.getOperand(1))->getVT(); - else - SrcVT = N.getOperand(0).getValueType(); - - if (!IsLoadStore && SrcVT == MVT::i8) - return ARM64_AM::SXTB; - else if (!IsLoadStore && SrcVT == MVT::i16) - return ARM64_AM::SXTH; - else if (SrcVT == MVT::i32) - return ARM64_AM::SXTW; - else if (SrcVT == MVT::i64) - return ARM64_AM::SXTX; - - return ARM64_AM::InvalidExtend; - } else if (N.getOpcode() == ISD::ZERO_EXTEND || - N.getOpcode() == ISD::ANY_EXTEND) { - EVT SrcVT = N.getOperand(0).getValueType(); - if (!IsLoadStore && SrcVT == MVT::i8) - return ARM64_AM::UXTB; - else if (!IsLoadStore && SrcVT == MVT::i16) - return ARM64_AM::UXTH; - else if (SrcVT == MVT::i32) - return ARM64_AM::UXTW; - else if (SrcVT == MVT::i64) - return ARM64_AM::UXTX; - - return ARM64_AM::InvalidExtend; - } else if (N.getOpcode() == ISD::AND) { - ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); - if (!CSD) - return ARM64_AM::InvalidExtend; - uint64_t AndMask = CSD->getZExtValue(); - - switch (AndMask) { - default: - return ARM64_AM::InvalidExtend; - case 0xFF: - return !IsLoadStore ? ARM64_AM::UXTB : ARM64_AM::InvalidExtend; - case 0xFFFF: - return !IsLoadStore ? ARM64_AM::UXTH : ARM64_AM::InvalidExtend; - case 0xFFFFFFFF: - return ARM64_AM::UXTW; - } - } - - return ARM64_AM::InvalidExtend; -} - -// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. -static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { - if (DL->getOpcode() != ARM64ISD::DUPLANE16 && - DL->getOpcode() != ARM64ISD::DUPLANE32) - return false; - - SDValue SV = DL->getOperand(0); - if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) - return false; - - SDValue EV = SV.getOperand(1); - if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) - return false; - - ConstantSDNode *DLidx = cast(DL->getOperand(1).getNode()); - ConstantSDNode *EVidx = cast(EV.getOperand(1).getNode()); - LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); - LaneOp = EV.getOperand(0); - - return true; -} - -// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a -// high lane extract. -static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, - SDValue &LaneOp, int &LaneIdx) { - - if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { - std::swap(Op0, Op1); - if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) - return false; - } - StdOp = Op1; - return true; -} - -/// SelectMLAV64LaneV128 - ARM64 supports 64-bit vector MLAs (v4i16 and v2i32) -/// where one multiplicand is a lane in the upper half of a 128-bit vector. -/// Recognize and select this so that we don't emit unnecessary lane extracts. -SDNode *ARM64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) { - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); - SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. - SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. - int LaneIdx = -1; // Will hold the lane index. - - if (Op1.getOpcode() != ISD::MUL || - !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, - LaneIdx)) { - std::swap(Op0, Op1); - if (Op1.getOpcode() != ISD::MUL || - !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, - LaneIdx)) - return 0; - } - - SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); - - SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; - - unsigned MLAOpc = ~0U; - - switch (N->getSimpleValueType(0).SimpleTy) { - default: - llvm_unreachable("Unrecognized MLA."); - case MVT::v4i16: - MLAOpc = ARM64::MLAv4i16_indexed; - break; - case MVT::v2i32: - MLAOpc = ARM64::MLAv2i32_indexed; - break; - } - - return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops); -} - -SDNode *ARM64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) { - SDValue SMULLOp0; - SDValue SMULLOp1; - int LaneIdx; - - if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, - LaneIdx)) - return 0; - - SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64); - - SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; - - unsigned SMULLOpc = ~0U; - - if (IntNo == Intrinsic::arm64_neon_smull) { - switch (N->getSimpleValueType(0).SimpleTy) { - default: - llvm_unreachable("Unrecognized SMULL."); - case MVT::v4i32: - SMULLOpc = ARM64::SMULLv4i16_indexed; - break; - case MVT::v2i64: - SMULLOpc = ARM64::SMULLv2i32_indexed; - break; - } - } else if (IntNo == Intrinsic::arm64_neon_umull) { - switch (N->getSimpleValueType(0).SimpleTy) { - default: - llvm_unreachable("Unrecognized SMULL."); - case MVT::v4i32: - SMULLOpc = ARM64::UMULLv4i16_indexed; - break; - case MVT::v2i64: - SMULLOpc = ARM64::UMULLv2i32_indexed; - break; - } - } else - llvm_unreachable("Unrecognized intrinsic."); - - return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops); -} - -/// SelectArithExtendedRegister - Select a "extended register" operand. This -/// operand folds in an extend followed by an optional left shift. -bool ARM64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, - SDValue &Shift) { - unsigned ShiftVal = 0; - ARM64_AM::ExtendType Ext; - - if (N.getOpcode() == ISD::SHL) { - ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); - if (!CSD) - return false; - ShiftVal = CSD->getZExtValue(); - if ((ShiftVal & 0x3) != ShiftVal) - return false; - - Ext = getExtendTypeForNode(N.getOperand(0)); - if (Ext == ARM64_AM::InvalidExtend) - return false; - - Reg = N.getOperand(0).getOperand(0); - } else { - Ext = getExtendTypeForNode(N); - if (Ext == ARM64_AM::InvalidExtend) - return false; - - Reg = N.getOperand(0); - } - - // ARM64 mandates that the RHS of the operation must use the smallest - // register classs that could contain the size being extended from. Thus, - // if we're folding a (sext i8), we need the RHS to be a GPR32, even though - // there might not be an actual 32-bit value in the program. We can - // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. - if (Reg.getValueType() == MVT::i64 && Ext != ARM64_AM::UXTX && - Ext != ARM64_AM::SXTX) { - SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32); - MachineSDNode *Node = CurDAG->getMachineNode( - TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32, Reg, SubReg); - Reg = SDValue(Node, 0); - } - - Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32); - return isWorthFolding(N); -} - -/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit -/// immediate" address. The "Size" argument is the size in bytes of the memory -/// reference, which determines the scale. -bool ARM64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, - SDValue &Base, SDValue &OffImm) { - const TargetLowering *TLI = getTargetLowering(); - if (N.getOpcode() == ISD::FrameIndex) { - int FI = cast(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); - OffImm = CurDAG->getTargetConstant(0, MVT::i64); - return true; - } - - if (N.getOpcode() == ARM64ISD::ADDlow) { - GlobalAddressSDNode *GAN = - dyn_cast(N.getOperand(1).getNode()); - Base = N.getOperand(0); - OffImm = N.getOperand(1); - if (!GAN) - return true; - - const GlobalValue *GV = GAN->getGlobal(); - unsigned Alignment = GV->getAlignment(); - const DataLayout *DL = TLI->getDataLayout(); - if (Alignment == 0 && !Subtarget->isTargetDarwin()) - Alignment = DL->getABITypeAlignment(GV->getType()->getElementType()); - - if (Alignment >= Size) - return true; - } - - if (CurDAG->isBaseWithConstantOffset(N)) { - if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { - int64_t RHSC = (int64_t)RHS->getZExtValue(); - unsigned Scale = Log2_32(Size); - if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { - Base = N.getOperand(0); - if (Base.getOpcode() == ISD::FrameIndex) { - int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); - } - OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64); - return true; - } - } - } - - // Before falling back to our general case, check if the unscaled - // instructions can handle this. If so, that's preferable. - if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) - return false; - - // Base only. The address will be materialized into a register before - // the memory is accessed. - // add x0, Xbase, #offset - // ldr x0, [x0] - Base = N; - OffImm = CurDAG->getTargetConstant(0, MVT::i64); - return true; -} - -/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit -/// immediate" address. This should only match when there is an offset that -/// is not valid for a scaled immediate addressing mode. The "Size" argument -/// is the size in bytes of the memory reference, which is needed here to know -/// what is valid for a scaled immediate. -bool ARM64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, - SDValue &Base, SDValue &OffImm) { - if (!CurDAG->isBaseWithConstantOffset(N)) - return false; - if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { - int64_t RHSC = RHS->getSExtValue(); - // If the offset is valid as a scaled immediate, don't match here. - if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && - RHSC < (0x1000 << Log2_32(Size))) - return false; - if (RHSC >= -256 && RHSC < 256) { - Base = N.getOperand(0); - if (Base.getOpcode() == ISD::FrameIndex) { - int FI = cast(Base)->getIndex(); - const TargetLowering *TLI = getTargetLowering(); - Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); - } - OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64); - return true; - } - } - return false; -} - -static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { - SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32); - SDValue ImpDef = SDValue( - CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64), - 0); - MachineSDNode *Node = CurDAG->getMachineNode( - TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg); - return SDValue(Node, 0); -} - -static SDValue WidenIfNeeded(SelectionDAG *CurDAG, SDValue N) { - if (N.getValueType() == MVT::i32) { - return Widen(CurDAG, N); - } - - return N; -} - -/// \brief Check if the given SHL node (\p N), can be used to form an -/// extended register for an addressing mode. -bool ARM64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, - SDValue &Offset, SDValue &Imm) { - assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); - ConstantSDNode *CSD = dyn_cast(N.getOperand(1)); - if (CSD && (CSD->getZExtValue() & 0x7) == CSD->getZExtValue()) { - - ARM64_AM::ExtendType Ext = getExtendTypeForNode(N.getOperand(0), true); - if (Ext == ARM64_AM::InvalidExtend) { - Ext = ARM64_AM::UXTX; - Offset = WidenIfNeeded(CurDAG, N.getOperand(0)); - } else { - Offset = WidenIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); - } - - unsigned LegalShiftVal = Log2_32(Size); - unsigned ShiftVal = CSD->getZExtValue(); - - if (ShiftVal != 0 && ShiftVal != LegalShiftVal) - return false; - - Imm = CurDAG->getTargetConstant( - ARM64_AM::getMemExtendImm(Ext, ShiftVal != 0), MVT::i32); - if (isWorthFolding(N)) - return true; - } - return false; -} - -bool ARM64DAGToDAGISel::SelectAddrModeRO(SDValue N, unsigned Size, - SDValue &Base, SDValue &Offset, - SDValue &Imm) { - if (N.getOpcode() != ISD::ADD) - return false; - SDValue LHS = N.getOperand(0); - SDValue RHS = N.getOperand(1); - - // We don't want to match immediate adds here, because they are better lowered - // to the register-immediate addressing modes. - if (isa(LHS) || isa(RHS)) - return false; - - // Check if this particular node is reused in any non-memory related - // operation. If yes, do not try to fold this node into the address - // computation, since the computation will be kept. - const SDNode *Node = N.getNode(); - for (SDNode::use_iterator UI = Node->use_begin(), UE = Node->use_end(); - UI != UE; ++UI) { - if (!isa(*UI)) - return false; - } - - // Remember if it is worth folding N when it produces extended register. - bool IsExtendedRegisterWorthFolding = isWorthFolding(N); - - // Try to match a shifted extend on the RHS. - if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && - SelectExtendedSHL(RHS, Size, Offset, Imm)) { - Base = LHS; - return true; - } - - // Try to match a shifted extend on the LHS. - if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && - SelectExtendedSHL(LHS, Size, Offset, Imm)) { - Base = RHS; - return true; - } - - ARM64_AM::ExtendType Ext = ARM64_AM::UXTX; - // Try to match an unshifted extend on the LHS. - if (IsExtendedRegisterWorthFolding && - (Ext = getExtendTypeForNode(LHS, true)) != ARM64_AM::InvalidExtend) { - Base = RHS; - Offset = WidenIfNeeded(CurDAG, LHS.getOperand(0)); - Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false), - MVT::i32); - if (isWorthFolding(LHS)) - return true; - } - - // Try to match an unshifted extend on the RHS. - if (IsExtendedRegisterWorthFolding && - (Ext = getExtendTypeForNode(RHS, true)) != ARM64_AM::InvalidExtend) { - Base = LHS; - Offset = WidenIfNeeded(CurDAG, RHS.getOperand(0)); - Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false), - MVT::i32); - if (isWorthFolding(RHS)) - return true; - } - - // Match any non-shifted, non-extend, non-immediate add expression. - Base = LHS; - Offset = WidenIfNeeded(CurDAG, RHS); - Ext = ARM64_AM::UXTX; - Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false), - MVT::i32); - // Reg1 + Reg2 is free: no check needed. - return true; -} - -SDValue ARM64DAGToDAGISel::createDTuple(ArrayRef Regs) { - static unsigned RegClassIDs[] = { ARM64::DDRegClassID, ARM64::DDDRegClassID, - ARM64::DDDDRegClassID }; - static unsigned SubRegs[] = { ARM64::dsub0, ARM64::dsub1, - ARM64::dsub2, ARM64::dsub3 }; - - return createTuple(Regs, RegClassIDs, SubRegs); -} - -SDValue ARM64DAGToDAGISel::createQTuple(ArrayRef Regs) { - static unsigned RegClassIDs[] = { ARM64::QQRegClassID, ARM64::QQQRegClassID, - ARM64::QQQQRegClassID }; - static unsigned SubRegs[] = { ARM64::qsub0, ARM64::qsub1, - ARM64::qsub2, ARM64::qsub3 }; - - return createTuple(Regs, RegClassIDs, SubRegs); -} - -SDValue ARM64DAGToDAGISel::createTuple(ArrayRef Regs, - unsigned RegClassIDs[], - unsigned SubRegs[]) { - // There's no special register-class for a vector-list of 1 element: it's just - // a vector. - if (Regs.size() == 1) - return Regs[0]; - - assert(Regs.size() >= 2 && Regs.size() <= 4); - - SDLoc DL(Regs[0].getNode()); - - SmallVector Ops; - - // First operand of REG_SEQUENCE is the desired RegClass. - Ops.push_back( - CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32)); - - // Then we get pairs of source & subregister-position for the components. - for (unsigned i = 0; i < Regs.size(); ++i) { - Ops.push_back(Regs[i]); - Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32)); - } - - SDNode *N = - CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); - return SDValue(N, 0); -} - -SDNode *ARM64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, - unsigned Opc, bool isExt) { - SDLoc dl(N); - EVT VT = N->getValueType(0); - - unsigned ExtOff = isExt; - - // Form a REG_SEQUENCE to force register allocation. - unsigned Vec0Off = ExtOff + 1; - SmallVector Regs(N->op_begin() + Vec0Off, - N->op_begin() + Vec0Off + NumVecs); - SDValue RegSeq = createQTuple(Regs); - - SmallVector Ops; - if (isExt) - Ops.push_back(N->getOperand(1)); - Ops.push_back(RegSeq); - Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); - return CurDAG->getMachineNode(Opc, dl, VT, Ops); -} - -SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) { - LoadSDNode *LD = cast(N); - if (LD->isUnindexed()) - return NULL; - EVT VT = LD->getMemoryVT(); - EVT DstVT = N->getValueType(0); - ISD::MemIndexedMode AM = LD->getAddressingMode(); - bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; - - // We're not doing validity checking here. That was done when checking - // if we should mark the load as indexed or not. We're just selecting - // the right instruction. - unsigned Opcode = 0; - - ISD::LoadExtType ExtType = LD->getExtensionType(); - bool InsertTo64 = false; - if (VT == MVT::i64) - Opcode = IsPre ? ARM64::LDRXpre_isel : ARM64::LDRXpost_isel; - else if (VT == MVT::i32) { - if (ExtType == ISD::NON_EXTLOAD) - Opcode = IsPre ? ARM64::LDRWpre_isel : ARM64::LDRWpost_isel; - else if (ExtType == ISD::SEXTLOAD) - Opcode = IsPre ? ARM64::LDRSWpre_isel : ARM64::LDRSWpost_isel; - else { - Opcode = IsPre ? ARM64::LDRWpre_isel : ARM64::LDRWpost_isel; - InsertTo64 = true; - // The result of the load is only i32. It's the subreg_to_reg that makes - // it into an i64. - DstVT = MVT::i32; - } - } else if (VT == MVT::i16) { - if (ExtType == ISD::SEXTLOAD) { - if (DstVT == MVT::i64) - Opcode = IsPre ? ARM64::LDRSHXpre_isel : ARM64::LDRSHXpost_isel; - else - Opcode = IsPre ? ARM64::LDRSHWpre_isel : ARM64::LDRSHWpost_isel; - } else { - Opcode = IsPre ? ARM64::LDRHHpre_isel : ARM64::LDRHHpost_isel; - InsertTo64 = DstVT == MVT::i64; - // The result of the load is only i32. It's the subreg_to_reg that makes - // it into an i64. - DstVT = MVT::i32; - } - } else if (VT == MVT::i8) { - if (ExtType == ISD::SEXTLOAD) { - if (DstVT == MVT::i64) - Opcode = IsPre ? ARM64::LDRSBXpre_isel : ARM64::LDRSBXpost_isel; - else - Opcode = IsPre ? ARM64::LDRSBWpre_isel : ARM64::LDRSBWpost_isel; - } else { - Opcode = IsPre ? ARM64::LDRBBpre_isel : ARM64::LDRBBpost_isel; - InsertTo64 = DstVT == MVT::i64; - // The result of the load is only i32. It's the subreg_to_reg that makes - // it into an i64. - DstVT = MVT::i32; - } - } else if (VT == MVT::f32) { - Opcode = IsPre ? ARM64::LDRSpre_isel : ARM64::LDRSpost_isel; - } else if (VT == MVT::f64) { - Opcode = IsPre ? ARM64::LDRDpre_isel : ARM64::LDRDpost_isel; - } else - return NULL; - SDValue Chain = LD->getChain(); - SDValue Base = LD->getBasePtr(); - ConstantSDNode *OffsetOp = cast(LD->getOffset()); - int OffsetVal = (int)OffsetOp->getZExtValue(); - SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64); - SDValue Ops[] = { Base, Offset, Chain }; - SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), DstVT, MVT::i64, - MVT::Other, Ops); - // Either way, we're replacing the node, so tell the caller that. - Done = true; - if (InsertTo64) { - SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32); - SDNode *Sub = CurDAG->getMachineNode( - ARM64::SUBREG_TO_REG, SDLoc(N), MVT::i64, - CurDAG->getTargetConstant(0, MVT::i64), SDValue(Res, 0), SubReg); - ReplaceUses(SDValue(N, 0), SDValue(Sub, 0)); - ReplaceUses(SDValue(N, 1), SDValue(Res, 1)); - ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); - return 0; - } - return Res; -} - -SDNode *ARM64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, - unsigned SubRegIdx) { - SDLoc dl(N); - EVT VT = N->getValueType(0); - SDValue Chain = N->getOperand(0); - - SmallVector Ops; - Ops.push_back(N->getOperand(2)); // Mem operand; - Ops.push_back(Chain); - - std::vector ResTys; - ResTys.push_back(MVT::Untyped); - ResTys.push_back(MVT::Other); - - SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); - SDValue SuperReg = SDValue(Ld, 0); - - // MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - // MemOp[0] = cast(N)->getMemOperand(); - // cast(Ld)->setMemRefs(MemOp, MemOp + 1); - - switch (NumVecs) { - case 4: - ReplaceUses(SDValue(N, 3), CurDAG->getTargetExtractSubreg(SubRegIdx + 3, dl, - VT, SuperReg)); - // FALLTHROUGH - case 3: - ReplaceUses(SDValue(N, 2), CurDAG->getTargetExtractSubreg(SubRegIdx + 2, dl, - VT, SuperReg)); - // FALLTHROUGH - case 2: - ReplaceUses(SDValue(N, 1), CurDAG->getTargetExtractSubreg(SubRegIdx + 1, dl, - VT, SuperReg)); - ReplaceUses(SDValue(N, 0), - CurDAG->getTargetExtractSubreg(SubRegIdx, dl, VT, SuperReg)); - break; - case 1: - ReplaceUses(SDValue(N, 0), SuperReg); - break; - } - - ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); - - return 0; -} - -SDNode *ARM64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, - unsigned Opc) { - SDLoc dl(N); - EVT VT = N->getOperand(2)->getValueType(0); - - // Form a REG_SEQUENCE to force register allocation. - bool Is128Bit = VT.getSizeInBits() == 128; - SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); - SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); - - SmallVector Ops; - Ops.push_back(RegSeq); - Ops.push_back(N->getOperand(NumVecs + 2)); - Ops.push_back(N->getOperand(0)); - SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); - - return St; -} - -/// WidenVector - Given a value in the V64 register class, produce the -/// equivalent value in the V128 register class. -class WidenVector { - SelectionDAG &DAG; - -public: - WidenVector(SelectionDAG &DAG) : DAG(DAG) {} - - SDValue operator()(SDValue V64Reg) { - EVT VT = V64Reg.getValueType(); - unsigned NarrowSize = VT.getVectorNumElements(); - MVT EltTy = VT.getVectorElementType().getSimpleVT(); - MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); - SDLoc DL(V64Reg); - - SDValue Undef = - SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); - return DAG.getTargetInsertSubreg(ARM64::dsub, DL, WideTy, Undef, V64Reg); - } -}; - -/// NarrowVector - Given a value in the V128 register class, produce the -/// equivalent value in the V64 register class. -static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { - EVT VT = V128Reg.getValueType(); - unsigned WideSize = VT.getVectorNumElements(); - MVT EltTy = VT.getVectorElementType().getSimpleVT(); - MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); - - return DAG.getTargetExtractSubreg(ARM64::dsub, SDLoc(V128Reg), NarrowTy, - V128Reg); -} - -SDNode *ARM64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, - unsigned Opc) { - SDLoc dl(N); - EVT VT = N->getValueType(0); - bool Narrow = VT.getSizeInBits() == 64; - - // Form a REG_SEQUENCE to force register allocation. - SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); - - if (Narrow) - std::transform(Regs.begin(), Regs.end(), Regs.begin(), - WidenVector(*CurDAG)); - - SDValue RegSeq = createQTuple(Regs); - - std::vector ResTys; - ResTys.push_back(MVT::Untyped); - ResTys.push_back(MVT::Other); - - unsigned LaneNo = - cast(N->getOperand(NumVecs + 2))->getZExtValue(); - - SmallVector Ops; - Ops.push_back(RegSeq); - Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); - Ops.push_back(N->getOperand(NumVecs + 3)); - Ops.push_back(N->getOperand(0)); - SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); - SDValue SuperReg = SDValue(Ld, 0); - - EVT WideVT = RegSeq.getOperand(1)->getValueType(0); - switch (NumVecs) { - case 4: { - SDValue NV3 = - CurDAG->getTargetExtractSubreg(ARM64::qsub3, dl, WideVT, SuperReg); - if (Narrow) - ReplaceUses(SDValue(N, 3), NarrowVector(NV3, *CurDAG)); - else - ReplaceUses(SDValue(N, 3), NV3); - } - // FALLTHROUGH - case 3: { - SDValue NV2 = - CurDAG->getTargetExtractSubreg(ARM64::qsub2, dl, WideVT, SuperReg); - if (Narrow) - ReplaceUses(SDValue(N, 2), NarrowVector(NV2, *CurDAG)); - else - ReplaceUses(SDValue(N, 2), NV2); - } - // FALLTHROUGH - case 2: { - SDValue NV1 = - CurDAG->getTargetExtractSubreg(ARM64::qsub1, dl, WideVT, SuperReg); - SDValue NV0 = - CurDAG->getTargetExtractSubreg(ARM64::qsub0, dl, WideVT, SuperReg); - if (Narrow) { - ReplaceUses(SDValue(N, 1), NarrowVector(NV1, *CurDAG)); - ReplaceUses(SDValue(N, 0), NarrowVector(NV0, *CurDAG)); - } else { - ReplaceUses(SDValue(N, 1), NV1); - ReplaceUses(SDValue(N, 0), NV0); - } - break; - } - } - - ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); - - return Ld; -} - -SDNode *ARM64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, - unsigned Opc) { - SDLoc dl(N); - EVT VT = N->getOperand(2)->getValueType(0); - bool Narrow = VT.getSizeInBits() == 64; - - // Form a REG_SEQUENCE to force register allocation. - SmallVector Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); - - if (Narrow) - std::transform(Regs.begin(), Regs.end(), Regs.begin(), - WidenVector(*CurDAG)); - - SDValue RegSeq = createQTuple(Regs); - - unsigned LaneNo = - cast(N->getOperand(NumVecs + 2))->getZExtValue(); - - SmallVector Ops; - Ops.push_back(RegSeq); - Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); - Ops.push_back(N->getOperand(NumVecs + 3)); - Ops.push_back(N->getOperand(0)); - SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); - - // Transfer memoperands. - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast(N)->getMemOperand(); - cast(St)->setMemRefs(MemOp, MemOp + 1); - - return St; -} - -SDNode *ARM64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, - unsigned Op16, unsigned Op32, - unsigned Op64) { - // Mostly direct translation to the given operations, except that we preserve - // the AtomicOrdering for use later on. - AtomicSDNode *AN = cast(Node); - EVT VT = AN->getMemoryVT(); - - unsigned Op; - if (VT == MVT::i8) - Op = Op8; - else if (VT == MVT::i16) - Op = Op16; - else if (VT == MVT::i32) - Op = Op32; - else if (VT == MVT::i64) - Op = Op64; - else - llvm_unreachable("Unexpected atomic operation"); - - SmallVector Ops; - for (unsigned i = 1; i < AN->getNumOperands(); ++i) - Ops.push_back(AN->getOperand(i)); - - Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); - Ops.push_back(AN->getOperand(0)); // Chain moves to the end - - return CurDAG->SelectNodeTo(Node, Op, AN->getValueType(0), MVT::Other, - &Ops[0], Ops.size()); -} - -static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, - unsigned &Opc, SDValue &Opd0, - unsigned &LSB, unsigned &MSB, - unsigned NumberOfIgnoredLowBits, - bool BiggerPattern) { - assert(N->getOpcode() == ISD::AND && - "N must be a AND operation to call this function"); - - EVT VT = N->getValueType(0); - - // Here we can test the type of VT and return false when the type does not - // match, but since it is done prior to that call in the current context - // we turned that into an assert to avoid redundant code. - assert((VT == MVT::i32 || VT == MVT::i64) && - "Type checking must have been done before calling this function"); - - // FIXME: simplify-demanded-bits in DAGCombine will probably have - // changed the AND node to a 32-bit mask operation. We'll have to - // undo that as part of the transform here if we want to catch all - // the opportunities. - // Currently the NumberOfIgnoredLowBits argument helps to recover - // form these situations when matching bigger pattern (bitfield insert). - - // For unsigned extracts, check for a shift right and mask - uint64_t And_imm = 0; - if (!isOpcWithIntImmediate(N, ISD::AND, And_imm)) - return false; - - const SDNode *Op0 = N->getOperand(0).getNode(); - - // Because of simplify-demanded-bits in DAGCombine, the mask may have been - // simplified. Try to undo that - And_imm |= (1 << NumberOfIgnoredLowBits) - 1; - - // The immediate is a mask of the low bits iff imm & (imm+1) == 0 - if (And_imm & (And_imm + 1)) - return false; - - bool ClampMSB = false; - uint64_t Srl_imm = 0; - // Handle the SRL + ANY_EXTEND case. - if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && - isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) { - // Extend the incoming operand of the SRL to 64-bit. - Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); - // Make sure to clamp the MSB so that we preserve the semantics of the - // original operations. - ClampMSB = true; - } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) { - Opd0 = Op0->getOperand(0); - } else if (BiggerPattern) { - // Let's pretend a 0 shift right has been performed. - // The resulting code will be at least as good as the original one - // plus it may expose more opportunities for bitfield insert pattern. - // FIXME: Currently we limit this to the bigger pattern, because - // some optimizations expect AND and not UBFM - Opd0 = N->getOperand(0); - } else - return false; - - assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) && - "bad amount in shift node!"); - - LSB = Srl_imm; - MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm) - : CountTrailingOnes_64(And_imm)) - - 1; - if (ClampMSB) - // Since we're moving the extend before the right shift operation, we need - // to clamp the MSB to make sure we don't shift in undefined bits instead of - // the zeros which would get shifted in with the original right shift - // operation. - MSB = MSB > 31 ? 31 : MSB; - - Opc = VT == MVT::i32 ? ARM64::UBFMWri : ARM64::UBFMXri; - return true; -} - -static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, - unsigned &LSB, unsigned &MSB) { - // We are looking for the following pattern which basically extracts a single - // bit from the source value and places it in the LSB of the destination - // value, all other bits of the destination value or set to zero: - // - // Value2 = AND Value, MaskImm - // SRL Value2, ShiftImm - // - // with MaskImm >> ShiftImm == 1. - // - // This gets selected into a single UBFM: - // - // UBFM Value, ShiftImm, ShiftImm - // - - if (N->getOpcode() != ISD::SRL) - return false; - - uint64_t And_mask = 0; - if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask)) - return false; - - Opd0 = N->getOperand(0).getOperand(0); - - uint64_t Srl_imm = 0; - if (!isIntImmediate(N->getOperand(1), Srl_imm)) - return false; - - // Check whether we really have a one bit extract here. - if (And_mask >> Srl_imm == 0x1) { - if (N->getValueType(0) == MVT::i32) - Opc = ARM64::UBFMWri; - else - Opc = ARM64::UBFMXri; - - LSB = MSB = Srl_imm; - - return true; - } - - return false; -} - -static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, - unsigned &LSB, unsigned &MSB, - bool BiggerPattern) { - assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && - "N must be a SHR/SRA operation to call this function"); - - EVT VT = N->getValueType(0); - - // Here we can test the type of VT and return false when the type does not - // match, but since it is done prior to that call in the current context - // we turned that into an assert to avoid redundant code. - assert((VT == MVT::i32 || VT == MVT::i64) && - "Type checking must have been done before calling this function"); - - // Check for AND + SRL doing a one bit extract. - if (isOneBitExtractOpFromShr(N, Opc, Opd0, LSB, MSB)) - return true; - - // we're looking for a shift of a shift - uint64_t Shl_imm = 0; - if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { - Opd0 = N->getOperand(0).getOperand(0); - } else if (BiggerPattern) { - // Let's pretend a 0 shift left has been performed. - // FIXME: Currently we limit this to the bigger pattern case, - // because some optimizations expect AND and not UBFM - Opd0 = N->getOperand(0); - } else - return false; - - assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!"); - uint64_t Srl_imm = 0; - if (!isIntImmediate(N->getOperand(1), Srl_imm)) - return false; - - assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() && - "bad amount in shift node!"); - // Note: The width operand is encoded as width-1. - unsigned Width = VT.getSizeInBits() - Srl_imm - 1; - int sLSB = Srl_imm - Shl_imm; - if (sLSB < 0) - return false; - LSB = sLSB; - MSB = LSB + Width; - // SRA requires a signed extraction - if (VT == MVT::i32) - Opc = N->getOpcode() == ISD::SRA ? ARM64::SBFMWri : ARM64::UBFMWri; - else - Opc = N->getOpcode() == ISD::SRA ? ARM64::SBFMXri : ARM64::UBFMXri; - return true; -} - -static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, - SDValue &Opd0, unsigned &LSB, unsigned &MSB, - unsigned NumberOfIgnoredLowBits = 0, - bool BiggerPattern = false) { - if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) - return false; - - switch (N->getOpcode()) { - default: - if (!N->isMachineOpcode()) - return false; - break; - case ISD::AND: - return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB, - NumberOfIgnoredLowBits, BiggerPattern); - case ISD::SRL: - case ISD::SRA: - return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern); - } - - unsigned NOpc = N->getMachineOpcode(); - switch (NOpc) { - default: - return false; - case ARM64::SBFMWri: - case ARM64::UBFMWri: - case ARM64::SBFMXri: - case ARM64::UBFMXri: - Opc = NOpc; - Opd0 = N->getOperand(0); - LSB = cast(N->getOperand(1).getNode())->getZExtValue(); - MSB = cast(N->getOperand(2).getNode())->getZExtValue(); - return true; - } - // Unreachable - return false; -} - -SDNode *ARM64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) { - unsigned Opc, LSB, MSB; - SDValue Opd0; - if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB)) - return NULL; - - EVT VT = N->getValueType(0); - SDValue Ops[] = { Opd0, CurDAG->getTargetConstant(LSB, VT), - CurDAG->getTargetConstant(MSB, VT) }; - return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 3); -} - -// Is mask a i32 or i64 binary sequence 1..10..0 and -// CountTrailingZeros(mask) == ExpectedTrailingZeros -static bool isHighMask(uint64_t Mask, unsigned ExpectedTrailingZeros, - unsigned NumberOfIgnoredHighBits, EVT VT) { - assert((VT == MVT::i32 || VT == MVT::i64) && - "i32 or i64 mask type expected!"); - - uint64_t ExpectedMask; - if (VT == MVT::i32) { - uint32_t ExpectedMaski32 = ~0 << ExpectedTrailingZeros; - ExpectedMask = ExpectedMaski32; - if (NumberOfIgnoredHighBits) { - uint32_t highMask = ~0 << (32 - NumberOfIgnoredHighBits); - Mask |= highMask; - } - } else { - ExpectedMask = ((uint64_t) ~0) << ExpectedTrailingZeros; - if (NumberOfIgnoredHighBits) - Mask |= ((uint64_t) ~0) << (64 - NumberOfIgnoredHighBits); - } - - return Mask == ExpectedMask; -} - -// Look for bits that will be useful for later uses. -// A bit is consider useless as soon as it is dropped and never used -// before it as been dropped. -// E.g., looking for useful bit of x -// 1. y = x & 0x7 -// 2. z = y >> 2 -// After #1, x useful bits are 0x7, then the useful bits of x, live through -// y. -// After #2, the useful bits of x are 0x4. -// However, if x is used on an unpredicatable instruction, then all its bits -// are useful. -// E.g. -// 1. y = x & 0x7 -// 2. z = y >> 2 -// 3. str x, [@x] -static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); - -static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, - unsigned Depth) { - uint64_t Imm = - cast(Op.getOperand(1).getNode())->getZExtValue(); - Imm = ARM64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); - UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); - getUsefulBits(Op, UsefulBits, Depth + 1); -} - -static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, - uint64_t Imm, uint64_t MSB, - unsigned Depth) { - // inherit the bitwidth value - APInt OpUsefulBits(UsefulBits); - OpUsefulBits = 1; - - if (MSB >= Imm) { - OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); - --OpUsefulBits; - // The interesting part will be in the lower part of the result - getUsefulBits(Op, OpUsefulBits, Depth + 1); - // The interesting part was starting at Imm in the argument - OpUsefulBits = OpUsefulBits.shl(Imm); - } else { - OpUsefulBits = OpUsefulBits.shl(MSB + 1); - --OpUsefulBits; - // The interesting part will be shifted in the result - OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm); - getUsefulBits(Op, OpUsefulBits, Depth + 1); - // The interesting part was at zero in the argument - OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm); - } - - UsefulBits &= OpUsefulBits; -} - -static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, - unsigned Depth) { - uint64_t Imm = - cast(Op.getOperand(1).getNode())->getZExtValue(); - uint64_t MSB = - cast(Op.getOperand(2).getNode())->getZExtValue(); - - getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); -} - -static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, - unsigned Depth) { - uint64_t ShiftTypeAndValue = - cast(Op.getOperand(2).getNode())->getZExtValue(); - APInt Mask(UsefulBits); - Mask.clearAllBits(); - Mask.flipAllBits(); - - if (ARM64_AM::getShiftType(ShiftTypeAndValue) == ARM64_AM::LSL) { - // Shift Left - uint64_t ShiftAmt = ARM64_AM::getShiftValue(ShiftTypeAndValue); - Mask = Mask.shl(ShiftAmt); - getUsefulBits(Op, Mask, Depth + 1); - Mask = Mask.lshr(ShiftAmt); - } else if (ARM64_AM::getShiftType(ShiftTypeAndValue) == ARM64_AM::LSR) { - // Shift Right - // We do not handle ARM64_AM::ASR, because the sign will change the - // number of useful bits - uint64_t ShiftAmt = ARM64_AM::getShiftValue(ShiftTypeAndValue); - Mask = Mask.lshr(ShiftAmt); - getUsefulBits(Op, Mask, Depth + 1); - Mask = Mask.shl(ShiftAmt); - } else - return; - - UsefulBits &= Mask; -} - -static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, - unsigned Depth) { - uint64_t Imm = - cast(Op.getOperand(2).getNode())->getZExtValue(); - uint64_t MSB = - cast(Op.getOperand(3).getNode())->getZExtValue(); - - if (Op.getOperand(1) == Orig) - return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); - - APInt OpUsefulBits(UsefulBits); - OpUsefulBits = 1; - - if (MSB >= Imm) { - OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1); - --OpUsefulBits; - UsefulBits &= ~OpUsefulBits; - getUsefulBits(Op, UsefulBits, Depth + 1); - } else { - OpUsefulBits = OpUsefulBits.shl(MSB + 1); - --OpUsefulBits; - UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm)); - getUsefulBits(Op, UsefulBits, Depth + 1); - } -} - -static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, - SDValue Orig, unsigned Depth) { - - // Users of this node should have already been instruction selected - // FIXME: Can we turn that into an assert? - if (!UserNode->isMachineOpcode()) - return; - - switch (UserNode->getMachineOpcode()) { - default: - return; - case ARM64::ANDSWri: - case ARM64::ANDSXri: - case ARM64::ANDWri: - case ARM64::ANDXri: - // We increment Depth only when we call the getUsefulBits - return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, - Depth); - case ARM64::UBFMWri: - case ARM64::UBFMXri: - return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); - - case ARM64::ORRWrs: - case ARM64::ORRXrs: - if (UserNode->getOperand(1) != Orig) - return; - return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, - Depth); - case ARM64::BFMWri: - case ARM64::BFMXri: - return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); - } -} - -static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { - if (Depth >= 6) - return; - // Initialize UsefulBits - if (!Depth) { - unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits(); - // At the beginning, assume every produced bits is useful - UsefulBits = APInt(Bitwidth, 0); - UsefulBits.flipAllBits(); - } - APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); - - for (SDNode::use_iterator UseIt = Op.getNode()->use_begin(), - UseEnd = Op.getNode()->use_end(); - UseIt != UseEnd; ++UseIt) { - // A use cannot produce useful bits - APInt UsefulBitsForUse = APInt(UsefulBits); - getUsefulBitsForUse(*UseIt, UsefulBitsForUse, Op, Depth); - UsersUsefulBits |= UsefulBitsForUse; - } - // UsefulBits contains the produced bits that are meaningful for the - // current definition, thus a user cannot make a bit meaningful at - // this point - UsefulBits &= UsersUsefulBits; -} - -// Given a OR operation, check if we have the following pattern -// ubfm c, b, imm, imm2 (or something that does the same jobs, see -// isBitfieldExtractOp) -// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and -// countTrailingZeros(mask2) == imm2 - imm + 1 -// f = d | c -// if yes, given reference arguments will be update so that one can replace -// the OR instruction with: -// f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2 -static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Opd0, - SDValue &Opd1, unsigned &LSB, - unsigned &MSB, SelectionDAG *CurDAG) { - assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); - - // Set Opc - EVT VT = N->getValueType(0); - if (VT == MVT::i32) - Opc = ARM64::BFMWri; - else if (VT == MVT::i64) - Opc = ARM64::BFMXri; - else - return false; - - // Because of simplify-demanded-bits in DAGCombine, involved masks may not - // have the expected shape. Try to undo that. - APInt UsefulBits; - getUsefulBits(SDValue(N, 0), UsefulBits); - - unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); - unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); - - // OR is commutative, check both possibilities (does llvm provide a - // way to do that directely, e.g., via code matcher?) - SDValue OrOpd1Val = N->getOperand(1); - SDNode *OrOpd0 = N->getOperand(0).getNode(); - SDNode *OrOpd1 = N->getOperand(1).getNode(); - for (int i = 0; i < 2; - ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) { - unsigned BFXOpc; - // Set Opd1, LSB and MSB arguments by looking for - // c = ubfm b, imm, imm2 - if (!isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Opd1, LSB, MSB, - NumberOfIgnoredLowBits, true)) - continue; - - // Check that the returned opcode is compatible with the pattern, - // i.e., same type and zero extended (U and not S) - if ((BFXOpc != ARM64::UBFMXri && VT == MVT::i64) || - (BFXOpc != ARM64::UBFMWri && VT == MVT::i32)) - continue; - - // Compute the width of the bitfield insertion - int sMSB = MSB - LSB + 1; - // FIXME: This constraints is to catch bitfield insertion we may - // want to widen the pattern if we want to grab general bitfied - // move case - if (sMSB <= 0) - continue; - - // Check the second part of the pattern - EVT VT = OrOpd1->getValueType(0); - if (VT != MVT::i32 && VT != MVT::i64) - continue; - - // Compute the Known Zero for the candidate of the first operand. - // This allows to catch more general case than just looking for - // AND with imm. Indeed, simplify-demanded-bits may have removed - // the AND instruction because it proves it was useless. - APInt KnownZero, KnownOne; - CurDAG->ComputeMaskedBits(OrOpd1Val, KnownZero, KnownOne); - - // Check if there is enough room for the second operand to appear - // in the first one - if (KnownZero.countTrailingOnes() < (unsigned)sMSB) - continue; - - // Set the first operand - uint64_t Imm; - if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && - isHighMask(Imm, sMSB, NumberOfIgnoredHighBits, VT)) - // In that case, we can eliminate the AND - Opd0 = OrOpd1->getOperand(0); - else - // Maybe the AND has been removed by simplify-demanded-bits - // or is useful because it discards more bits - Opd0 = OrOpd1Val; - - // both parts match - return true; - } - - return false; -} - -SDNode *ARM64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) { - if (N->getOpcode() != ISD::OR) - return NULL; - - unsigned Opc; - unsigned LSB, MSB; - SDValue Opd0, Opd1; - - if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG)) - return NULL; - - EVT VT = N->getValueType(0); - SDValue Ops[] = { Opd0, - Opd1, - CurDAG->getTargetConstant(LSB, VT), - CurDAG->getTargetConstant(MSB, VT) }; - return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 4); -} - -SDNode *ARM64DAGToDAGISel::SelectLIBM(SDNode *N) { - EVT VT = N->getValueType(0); - unsigned Variant; - unsigned Opc; - unsigned FRINTXOpcs[] = { ARM64::FRINTXSr, ARM64::FRINTXDr }; - - if (VT == MVT::f32) { - Variant = 0; - } else if (VT == MVT::f64) { - Variant = 1; - } else - return 0; // Unrecognized argument type. Fall back on default codegen. - - // Pick the FRINTX variant needed to set the flags. - unsigned FRINTXOpc = FRINTXOpcs[Variant]; - - switch (N->getOpcode()) { - default: - return 0; // Unrecognized libm ISD node. Fall back on default codegen. - case ISD::FCEIL: { - unsigned FRINTPOpcs[] = { ARM64::FRINTPSr, ARM64::FRINTPDr }; - Opc = FRINTPOpcs[Variant]; - break; - } - case ISD::FFLOOR: { - unsigned FRINTMOpcs[] = { ARM64::FRINTMSr, ARM64::FRINTMDr }; - Opc = FRINTMOpcs[Variant]; - break; - } - case ISD::FTRUNC: { - unsigned FRINTZOpcs[] = { ARM64::FRINTZSr, ARM64::FRINTZDr }; - Opc = FRINTZOpcs[Variant]; - break; - } - case ISD::FROUND: { - unsigned FRINTAOpcs[] = { ARM64::FRINTASr, ARM64::FRINTADr }; - Opc = FRINTAOpcs[Variant]; - break; - } - } - - SDLoc dl(N); - SDValue In = N->getOperand(0); - SmallVector Ops; - Ops.push_back(In); - - if (!TM.Options.UnsafeFPMath) { - SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In); - Ops.push_back(SDValue(FRINTX, 1)); - } - - return CurDAG->getMachineNode(Opc, dl, VT, Ops); -} - -SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) { - // Dump information about the Node being selected - DEBUG(errs() << "Selecting: "); - DEBUG(Node->dump(CurDAG)); - DEBUG(errs() << "\n"); - - // If we have a custom node, we already have selected! - if (Node->isMachineOpcode()) { - DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); - Node->setNodeId(-1); - return NULL; - } - - // Few custom selection stuff. - SDNode *ResNode = 0; - EVT VT = Node->getValueType(0); - - switch (Node->getOpcode()) { - default: - break; - - case ISD::ADD: - if (SDNode *I = SelectMLAV64LaneV128(Node)) - return I; - break; - - case ISD::ATOMIC_LOAD_ADD: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_ADD_I8, - ARM64::ATOMIC_LOAD_ADD_I16, ARM64::ATOMIC_LOAD_ADD_I32, - ARM64::ATOMIC_LOAD_ADD_I64); - case ISD::ATOMIC_LOAD_SUB: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_SUB_I8, - ARM64::ATOMIC_LOAD_SUB_I16, ARM64::ATOMIC_LOAD_SUB_I32, - ARM64::ATOMIC_LOAD_SUB_I64); - case ISD::ATOMIC_LOAD_AND: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_AND_I8, - ARM64::ATOMIC_LOAD_AND_I16, ARM64::ATOMIC_LOAD_AND_I32, - ARM64::ATOMIC_LOAD_AND_I64); - case ISD::ATOMIC_LOAD_OR: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_OR_I8, - ARM64::ATOMIC_LOAD_OR_I16, ARM64::ATOMIC_LOAD_OR_I32, - ARM64::ATOMIC_LOAD_OR_I64); - case ISD::ATOMIC_LOAD_XOR: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_XOR_I8, - ARM64::ATOMIC_LOAD_XOR_I16, ARM64::ATOMIC_LOAD_XOR_I32, - ARM64::ATOMIC_LOAD_XOR_I64); - case ISD::ATOMIC_LOAD_NAND: - return SelectAtomic( - Node, ARM64::ATOMIC_LOAD_NAND_I8, ARM64::ATOMIC_LOAD_NAND_I16, - ARM64::ATOMIC_LOAD_NAND_I32, ARM64::ATOMIC_LOAD_NAND_I64); - case ISD::ATOMIC_LOAD_MIN: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_MIN_I8, - ARM64::ATOMIC_LOAD_MIN_I16, ARM64::ATOMIC_LOAD_MIN_I32, - ARM64::ATOMIC_LOAD_MIN_I64); - case ISD::ATOMIC_LOAD_MAX: - return SelectAtomic(Node, ARM64::ATOMIC_LOAD_MAX_I8, - ARM64::ATOMIC_LOAD_MAX_I16, ARM64::ATOMIC_LOAD_MAX_I32, - ARM64::ATOMIC_LOAD_MAX_I64); - case ISD::ATOMIC_LOAD_UMIN: - return SelectAtomic( - Node, ARM64::ATOMIC_LOAD_UMIN_I8, ARM64::ATOMIC_LOAD_UMIN_I16, - ARM64::ATOMIC_LOAD_UMIN_I32, ARM64::ATOMIC_LOAD_UMIN_I64); - case ISD::ATOMIC_LOAD_UMAX: - return SelectAtomic( - Node, ARM64::ATOMIC_LOAD_UMAX_I8, ARM64::ATOMIC_LOAD_UMAX_I16, - ARM64::ATOMIC_LOAD_UMAX_I32, ARM64::ATOMIC_LOAD_UMAX_I64); - case ISD::ATOMIC_SWAP: - return SelectAtomic(Node, ARM64::ATOMIC_SWAP_I8, ARM64::ATOMIC_SWAP_I16, - ARM64::ATOMIC_SWAP_I32, ARM64::ATOMIC_SWAP_I64); - case ISD::ATOMIC_CMP_SWAP: - return SelectAtomic(Node, ARM64::ATOMIC_CMP_SWAP_I8, - ARM64::ATOMIC_CMP_SWAP_I16, ARM64::ATOMIC_CMP_SWAP_I32, - ARM64::ATOMIC_CMP_SWAP_I64); - - case ISD::LOAD: { - // Try to select as an indexed load. Fall through to normal processing - // if we can't. - bool Done = false; - SDNode *I = SelectIndexedLoad(Node, Done); - if (Done) - return I; - break; - } - - case ISD::SRL: - case ISD::AND: - case ISD::SRA: - if (SDNode *I = SelectBitfieldExtractOp(Node)) - return I; - break; - - case ISD::OR: - if (SDNode *I = SelectBitfieldInsertOp(Node)) - return I; - break; - - case ISD::EXTRACT_VECTOR_ELT: { - // Extracting lane zero is a special case where we can just use a plain - // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for - // the rest of the compiler, especially the register allocator and copyi - // propagation, to reason about, so is preferred when it's possible to - // use it. - ConstantSDNode *LaneNode = cast(Node->getOperand(1)); - // Bail and use the default Select() for non-zero lanes. - if (LaneNode->getZExtValue() != 0) - break; - // If the element type is not the same as the result type, likewise - // bail and use the default Select(), as there's more to do than just - // a cross-class COPY. This catches extracts of i8 and i16 elements - // since they will need an explicit zext. - if (VT != Node->getOperand(0).getValueType().getVectorElementType()) - break; - unsigned SubReg; - switch (Node->getOperand(0) - .getValueType() - .getVectorElementType() - .getSizeInBits()) { - default: - assert(0 && "Unexpected vector element type!"); - case 64: - SubReg = ARM64::dsub; - break; - case 32: - SubReg = ARM64::ssub; - break; - case 16: // FALLTHROUGH - case 8: - llvm_unreachable("unexpected zext-requiring extract element!"); - } - SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT, - Node->getOperand(0)); - DEBUG(dbgs() << "ISEL: Custom selection!\n=> "); - DEBUG(Extract->dumpr(CurDAG)); - DEBUG(dbgs() << "\n"); - return Extract.getNode(); - } - case ISD::Constant: { - // Materialize zero constants as copies from WZR/XZR. This allows - // the coalescer to propagate these into other instructions. - ConstantSDNode *ConstNode = cast(Node); - if (ConstNode->isNullValue()) { - if (VT == MVT::i32) - return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), - ARM64::WZR, MVT::i32).getNode(); - else if (VT == MVT::i64) - return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node), - ARM64::XZR, MVT::i64).getNode(); - } - break; - } - - case ISD::FrameIndex: { - // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. - int FI = cast(Node)->getIndex(); - unsigned Shifter = ARM64_AM::getShifterImm(ARM64_AM::LSL, 0); - const TargetLowering *TLI = getTargetLowering(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); - SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), - CurDAG->getTargetConstant(Shifter, MVT::i32) }; - return CurDAG->SelectNodeTo(Node, ARM64::ADDXri, MVT::i64, Ops, 3); - } - case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); - switch (IntNo) { - default: - break; - case Intrinsic::arm64_ldxp: { - SDValue MemAddr = Node->getOperand(2); - SDLoc DL(Node); - SDValue Chain = Node->getOperand(0); - - SDNode *Ld = CurDAG->getMachineNode(ARM64::LDXPX, DL, MVT::i64, MVT::i64, - MVT::Other, MemAddr, Chain); - - // Transfer memoperands. - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast(Node)->getMemOperand(); - cast(Ld)->setMemRefs(MemOp, MemOp + 1); - return Ld; - } - case Intrinsic::arm64_stxp: { - SDLoc DL(Node); - SDValue Chain = Node->getOperand(0); - SDValue ValLo = Node->getOperand(2); - SDValue ValHi = Node->getOperand(3); - SDValue MemAddr = Node->getOperand(4); - - // Place arguments in the right order. - SmallVector Ops; - Ops.push_back(ValLo); - Ops.push_back(ValHi); - Ops.push_back(MemAddr); - Ops.push_back(Chain); - - SDNode *St = - CurDAG->getMachineNode(ARM64::STXPX, DL, MVT::i32, MVT::Other, Ops); - // Transfer memoperands. - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast(Node)->getMemOperand(); - cast(St)->setMemRefs(MemOp, MemOp + 1); - - return St; - } - case Intrinsic::arm64_neon_ld1x2: - if (VT == MVT::v8i8) - return SelectLoad(Node, 2, ARM64::LD1Twov8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 2, ARM64::LD1Twov16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 2, ARM64::LD1Twov4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 2, ARM64::LD1Twov8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 2, ARM64::LD1Twov2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 2, ARM64::LD1Twov4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 2, ARM64::LD1Twov1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 2, ARM64::LD1Twov2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld1x3: - if (VT == MVT::v8i8) - return SelectLoad(Node, 3, ARM64::LD1Threev8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 3, ARM64::LD1Threev16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 3, ARM64::LD1Threev4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 3, ARM64::LD1Threev8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 3, ARM64::LD1Threev2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 3, ARM64::LD1Threev4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 3, ARM64::LD1Threev1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 3, ARM64::LD1Threev2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld1x4: - if (VT == MVT::v8i8) - return SelectLoad(Node, 4, ARM64::LD1Fourv8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 4, ARM64::LD1Fourv16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 4, ARM64::LD1Fourv4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 4, ARM64::LD1Fourv8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 4, ARM64::LD1Fourv2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 4, ARM64::LD1Fourv4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 4, ARM64::LD1Fourv1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 4, ARM64::LD1Fourv2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld2: - if (VT == MVT::v8i8) - return SelectLoad(Node, 2, ARM64::LD2Twov8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 2, ARM64::LD2Twov16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 2, ARM64::LD2Twov4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 2, ARM64::LD2Twov8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 2, ARM64::LD2Twov2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 2, ARM64::LD2Twov4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 2, ARM64::LD1Twov1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 2, ARM64::LD2Twov2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld3: - if (VT == MVT::v8i8) - return SelectLoad(Node, 3, ARM64::LD3Threev8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 3, ARM64::LD3Threev16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 3, ARM64::LD3Threev4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 3, ARM64::LD3Threev8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 3, ARM64::LD3Threev2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 3, ARM64::LD3Threev4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 3, ARM64::LD1Threev1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 3, ARM64::LD3Threev2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld4: - if (VT == MVT::v8i8) - return SelectLoad(Node, 4, ARM64::LD4Fourv8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 4, ARM64::LD4Fourv16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 4, ARM64::LD4Fourv4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 4, ARM64::LD4Fourv8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 4, ARM64::LD4Fourv2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 4, ARM64::LD4Fourv4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 4, ARM64::LD1Fourv1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 4, ARM64::LD4Fourv2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld2r: - if (VT == MVT::v8i8) - return SelectLoad(Node, 2, ARM64::LD2Rv8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 2, ARM64::LD2Rv16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 2, ARM64::LD2Rv4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 2, ARM64::LD2Rv8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 2, ARM64::LD2Rv2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 2, ARM64::LD2Rv4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 2, ARM64::LD2Rv1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 2, ARM64::LD2Rv2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld3r: - if (VT == MVT::v8i8) - return SelectLoad(Node, 3, ARM64::LD3Rv8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 3, ARM64::LD3Rv16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 3, ARM64::LD3Rv4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 3, ARM64::LD3Rv8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 3, ARM64::LD3Rv2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 3, ARM64::LD3Rv4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 3, ARM64::LD3Rv1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 3, ARM64::LD3Rv2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld4r: - if (VT == MVT::v8i8) - return SelectLoad(Node, 4, ARM64::LD4Rv8b, ARM64::dsub0); - else if (VT == MVT::v16i8) - return SelectLoad(Node, 4, ARM64::LD4Rv16b, ARM64::qsub0); - else if (VT == MVT::v4i16) - return SelectLoad(Node, 4, ARM64::LD4Rv4h, ARM64::dsub0); - else if (VT == MVT::v8i16) - return SelectLoad(Node, 4, ARM64::LD4Rv8h, ARM64::qsub0); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectLoad(Node, 4, ARM64::LD4Rv2s, ARM64::dsub0); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectLoad(Node, 4, ARM64::LD4Rv4s, ARM64::qsub0); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectLoad(Node, 4, ARM64::LD4Rv1d, ARM64::dsub0); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectLoad(Node, 4, ARM64::LD4Rv2d, ARM64::qsub0); - break; - case Intrinsic::arm64_neon_ld2lane: - if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectLoadLane(Node, 2, ARM64::LD2i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectLoadLane(Node, 2, ARM64::LD2i16); - else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || - VT == MVT::v2f32) - return SelectLoadLane(Node, 2, ARM64::LD2i32); - else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || - VT == MVT::v1f64) - return SelectLoadLane(Node, 2, ARM64::LD2i64); - break; - case Intrinsic::arm64_neon_ld3lane: - if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectLoadLane(Node, 3, ARM64::LD3i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectLoadLane(Node, 3, ARM64::LD3i16); - else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || - VT == MVT::v2f32) - return SelectLoadLane(Node, 3, ARM64::LD3i32); - else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || - VT == MVT::v1f64) - return SelectLoadLane(Node, 3, ARM64::LD3i64); - break; - case Intrinsic::arm64_neon_ld4lane: - if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectLoadLane(Node, 4, ARM64::LD4i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectLoadLane(Node, 4, ARM64::LD4i16); - else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || - VT == MVT::v2f32) - return SelectLoadLane(Node, 4, ARM64::LD4i32); - else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || - VT == MVT::v1f64) - return SelectLoadLane(Node, 4, ARM64::LD4i64); - break; - } - } break; - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast(Node->getOperand(0))->getZExtValue(); - switch (IntNo) { - default: - break; - case Intrinsic::arm64_neon_tbl2: - return SelectTable(Node, 2, VT == MVT::v8i8 ? ARM64::TBLv8i8Two - : ARM64::TBLv16i8Two, - false); - case Intrinsic::arm64_neon_tbl3: - return SelectTable(Node, 3, VT == MVT::v8i8 ? ARM64::TBLv8i8Three - : ARM64::TBLv16i8Three, - false); - case Intrinsic::arm64_neon_tbl4: - return SelectTable(Node, 4, VT == MVT::v8i8 ? ARM64::TBLv8i8Four - : ARM64::TBLv16i8Four, - false); - case Intrinsic::arm64_neon_tbx2: - return SelectTable(Node, 2, VT == MVT::v8i8 ? ARM64::TBXv8i8Two - : ARM64::TBXv16i8Two, - true); - case Intrinsic::arm64_neon_tbx3: - return SelectTable(Node, 3, VT == MVT::v8i8 ? ARM64::TBXv8i8Three - : ARM64::TBXv16i8Three, - true); - case Intrinsic::arm64_neon_tbx4: - return SelectTable(Node, 4, VT == MVT::v8i8 ? ARM64::TBXv8i8Four - : ARM64::TBXv16i8Four, - true); - case Intrinsic::arm64_neon_smull: - case Intrinsic::arm64_neon_umull: - if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node)) - return N; - break; - } - break; - } - case ISD::INTRINSIC_VOID: { - unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); - if (Node->getNumOperands() >= 3) - VT = Node->getOperand(2)->getValueType(0); - switch (IntNo) { - default: - break; - case Intrinsic::arm64_neon_st1x2: { - if (VT == MVT::v8i8) - return SelectStore(Node, 2, ARM64::ST1Twov8b); - else if (VT == MVT::v16i8) - return SelectStore(Node, 2, ARM64::ST1Twov16b); - else if (VT == MVT::v4i16) - return SelectStore(Node, 2, ARM64::ST1Twov4h); - else if (VT == MVT::v8i16) - return SelectStore(Node, 2, ARM64::ST1Twov8h); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 2, ARM64::ST1Twov2s); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 2, ARM64::ST1Twov4s); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 2, ARM64::ST1Twov2d); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 2, ARM64::ST1Twov1d); - break; - } - case Intrinsic::arm64_neon_st1x3: { - if (VT == MVT::v8i8) - return SelectStore(Node, 3, ARM64::ST1Threev8b); - else if (VT == MVT::v16i8) - return SelectStore(Node, 3, ARM64::ST1Threev16b); - else if (VT == MVT::v4i16) - return SelectStore(Node, 3, ARM64::ST1Threev4h); - else if (VT == MVT::v8i16) - return SelectStore(Node, 3, ARM64::ST1Threev8h); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 3, ARM64::ST1Threev2s); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 3, ARM64::ST1Threev4s); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 3, ARM64::ST1Threev2d); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 3, ARM64::ST1Threev1d); - break; - } - case Intrinsic::arm64_neon_st1x4: { - if (VT == MVT::v8i8) - return SelectStore(Node, 4, ARM64::ST1Fourv8b); - else if (VT == MVT::v16i8) - return SelectStore(Node, 4, ARM64::ST1Fourv16b); - else if (VT == MVT::v4i16) - return SelectStore(Node, 4, ARM64::ST1Fourv4h); - else if (VT == MVT::v8i16) - return SelectStore(Node, 4, ARM64::ST1Fourv8h); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 4, ARM64::ST1Fourv2s); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 4, ARM64::ST1Fourv4s); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 4, ARM64::ST1Fourv2d); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 4, ARM64::ST1Fourv1d); - break; - } - case Intrinsic::arm64_neon_st2: { - if (VT == MVT::v8i8) - return SelectStore(Node, 2, ARM64::ST2Twov8b); - else if (VT == MVT::v16i8) - return SelectStore(Node, 2, ARM64::ST2Twov16b); - else if (VT == MVT::v4i16) - return SelectStore(Node, 2, ARM64::ST2Twov4h); - else if (VT == MVT::v8i16) - return SelectStore(Node, 2, ARM64::ST2Twov8h); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 2, ARM64::ST2Twov2s); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 2, ARM64::ST2Twov4s); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 2, ARM64::ST2Twov2d); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 2, ARM64::ST1Twov1d); - break; - } - case Intrinsic::arm64_neon_st3: { - if (VT == MVT::v8i8) - return SelectStore(Node, 3, ARM64::ST3Threev8b); - else if (VT == MVT::v16i8) - return SelectStore(Node, 3, ARM64::ST3Threev16b); - else if (VT == MVT::v4i16) - return SelectStore(Node, 3, ARM64::ST3Threev4h); - else if (VT == MVT::v8i16) - return SelectStore(Node, 3, ARM64::ST3Threev8h); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 3, ARM64::ST3Threev2s); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 3, ARM64::ST3Threev4s); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 3, ARM64::ST3Threev2d); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 3, ARM64::ST1Threev1d); - break; - } - case Intrinsic::arm64_neon_st4: { - if (VT == MVT::v8i8) - return SelectStore(Node, 4, ARM64::ST4Fourv8b); - else if (VT == MVT::v16i8) - return SelectStore(Node, 4, ARM64::ST4Fourv16b); - else if (VT == MVT::v4i16) - return SelectStore(Node, 4, ARM64::ST4Fourv4h); - else if (VT == MVT::v8i16) - return SelectStore(Node, 4, ARM64::ST4Fourv8h); - else if (VT == MVT::v2i32 || VT == MVT::v2f32) - return SelectStore(Node, 4, ARM64::ST4Fourv2s); - else if (VT == MVT::v4i32 || VT == MVT::v4f32) - return SelectStore(Node, 4, ARM64::ST4Fourv4s); - else if (VT == MVT::v2i64 || VT == MVT::v2f64) - return SelectStore(Node, 4, ARM64::ST4Fourv2d); - else if (VT == MVT::v1i64 || VT == MVT::v1f64) - return SelectStore(Node, 4, ARM64::ST1Fourv1d); - break; - } - case Intrinsic::arm64_neon_st2lane: { - if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectStoreLane(Node, 2, ARM64::ST2i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectStoreLane(Node, 2, ARM64::ST2i16); - else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || - VT == MVT::v2f32) - return SelectStoreLane(Node, 2, ARM64::ST2i32); - else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || - VT == MVT::v1f64) - return SelectStoreLane(Node, 2, ARM64::ST2i64); - break; - } - case Intrinsic::arm64_neon_st3lane: { - if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectStoreLane(Node, 3, ARM64::ST3i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectStoreLane(Node, 3, ARM64::ST3i16); - else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || - VT == MVT::v2f32) - return SelectStoreLane(Node, 3, ARM64::ST3i32); - else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || - VT == MVT::v1f64) - return SelectStoreLane(Node, 3, ARM64::ST3i64); - break; - } - case Intrinsic::arm64_neon_st4lane: { - if (VT == MVT::v16i8 || VT == MVT::v8i8) - return SelectStoreLane(Node, 4, ARM64::ST4i8); - else if (VT == MVT::v8i16 || VT == MVT::v4i16) - return SelectStoreLane(Node, 4, ARM64::ST4i16); - else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || - VT == MVT::v2f32) - return SelectStoreLane(Node, 4, ARM64::ST4i32); - else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || - VT == MVT::v1f64) - return SelectStoreLane(Node, 4, ARM64::ST4i64); - break; - } - } - } - - case ISD::FCEIL: - case ISD::FFLOOR: - case ISD::FTRUNC: - case ISD::FROUND: - if (SDNode *I = SelectLIBM(Node)) - return I; - break; - } - - // Select the default instruction - ResNode = SelectCode(Node); - - DEBUG(errs() << "=> "); - if (ResNode == NULL || ResNode == Node) - DEBUG(Node->dump(CurDAG)); - else - DEBUG(ResNode->dump(CurDAG)); - DEBUG(errs() << "\n"); - - return ResNode; -} - -/// createARM64ISelDag - This pass converts a legalized DAG into a -/// ARM64-specific DAG, ready for instruction scheduling. -FunctionPass *llvm::createARM64ISelDag(ARM64TargetMachine &TM, - CodeGenOpt::Level OptLevel) { - return new ARM64DAGToDAGISel(TM, OptLevel); -} diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp deleted file mode 100644 index 641f591..0000000 --- a/lib/Target/ARM64/ARM64ISelLowering.cpp +++ /dev/null @@ -1,7551 +0,0 @@ -//===-- ARM64ISelLowering.cpp - ARM64 DAG Lowering Implementation --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ARM64TargetLowering class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-lower" - -#include "ARM64ISelLowering.h" -#include "ARM64PerfectShuffle.h" -#include "ARM64Subtarget.h" -#include "ARM64CallingConv.h" -#include "ARM64MachineFunctionInfo.h" -#include "ARM64TargetMachine.h" -#include "ARM64TargetObjectFile.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/IR/Type.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetOptions.h" -using namespace llvm; - -STATISTIC(NumTailCalls, "Number of tail calls"); -STATISTIC(NumShiftInserts, "Number of vector shift inserts"); - -// This option should go away when tail calls fully work. -static cl::opt -EnableARM64TailCalls("arm64-tail-calls", cl::Hidden, - cl::desc("Generate ARM64 tail calls (TEMPORARY OPTION)."), - cl::init(true)); - -static cl::opt -StrictAlign("arm64-strict-align", cl::Hidden, - cl::desc("Disallow all unaligned memory accesses")); - -// Place holder until extr generation is tested fully. -static cl::opt -EnableARM64ExtrGeneration("arm64-extr-generation", cl::Hidden, - cl::desc("Allow ARM64 (or (shift)(shift))->extract"), - cl::init(true)); - -static cl::opt -EnableARM64SlrGeneration("arm64-shift-insert-generation", cl::Hidden, - cl::desc("Allow ARM64 SLI/SRI formation"), - cl::init(false)); - -//===----------------------------------------------------------------------===// -// ARM64 Lowering public interface. -//===----------------------------------------------------------------------===// -static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { - if (TM.getSubtarget().isTargetDarwin()) - return new ARM64_MachoTargetObjectFile(); - - return new ARM64_ELFTargetObjectFile(); -} - -ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM) - : TargetLowering(TM, createTLOF(TM)) { - Subtarget = &TM.getSubtarget(); - - // ARM64 doesn't have comparisons which set GPRs or setcc instructions, so - // we have to make something up. Arbitrarily, choose ZeroOrOne. - setBooleanContents(ZeroOrOneBooleanContent); - // When comparing vectors the result sets the different elements in the - // vector to all-one or all-zero. - setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - - // Set up the register classes. - addRegisterClass(MVT::i32, &ARM64::GPR32allRegClass); - addRegisterClass(MVT::i64, &ARM64::GPR64allRegClass); - addRegisterClass(MVT::f32, &ARM64::FPR32RegClass); - addRegisterClass(MVT::f64, &ARM64::FPR64RegClass); - addRegisterClass(MVT::f128, &ARM64::FPR128RegClass); - addRegisterClass(MVT::v16i8, &ARM64::FPR8RegClass); - addRegisterClass(MVT::v8i16, &ARM64::FPR16RegClass); - - // Someone set us up the NEON. - addDRTypeForNEON(MVT::v2f32); - addDRTypeForNEON(MVT::v8i8); - addDRTypeForNEON(MVT::v4i16); - addDRTypeForNEON(MVT::v2i32); - addDRTypeForNEON(MVT::v1i64); - addDRTypeForNEON(MVT::v1f64); - - addQRTypeForNEON(MVT::v4f32); - addQRTypeForNEON(MVT::v2f64); - addQRTypeForNEON(MVT::v16i8); - addQRTypeForNEON(MVT::v8i16); - addQRTypeForNEON(MVT::v4i32); - addQRTypeForNEON(MVT::v2i64); - - // Compute derived properties from the register classes - computeRegisterProperties(); - - // Provide all sorts of operation actions - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); - setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); - setOperationAction(ISD::SETCC, MVT::i32, Custom); - setOperationAction(ISD::SETCC, MVT::i64, Custom); - setOperationAction(ISD::SETCC, MVT::f32, Custom); - setOperationAction(ISD::SETCC, MVT::f64, Custom); - setOperationAction(ISD::BRCOND, MVT::Other, Expand); - setOperationAction(ISD::BR_CC, MVT::i32, Custom); - setOperationAction(ISD::BR_CC, MVT::i64, Custom); - setOperationAction(ISD::BR_CC, MVT::f32, Custom); - setOperationAction(ISD::BR_CC, MVT::f64, Custom); - setOperationAction(ISD::SELECT, MVT::i32, Custom); - setOperationAction(ISD::SELECT, MVT::i64, Custom); - setOperationAction(ISD::SELECT, MVT::f32, Custom); - setOperationAction(ISD::SELECT, MVT::f64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::JumpTable, MVT::i64, Custom); - - setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); - setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); - setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); - - setOperationAction(ISD::FREM, MVT::f32, Expand); - setOperationAction(ISD::FREM, MVT::f64, Expand); - setOperationAction(ISD::FREM, MVT::f80, Expand); - - // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to - // silliness like this: - setOperationAction(ISD::FABS, MVT::v1f64, Expand); - setOperationAction(ISD::FADD, MVT::v1f64, Expand); - setOperationAction(ISD::FCEIL, MVT::v1f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand); - setOperationAction(ISD::FCOS, MVT::v1f64, Expand); - setOperationAction(ISD::FDIV, MVT::v1f64, Expand); - setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand); - setOperationAction(ISD::FMA, MVT::v1f64, Expand); - setOperationAction(ISD::FMUL, MVT::v1f64, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand); - setOperationAction(ISD::FNEG, MVT::v1f64, Expand); - setOperationAction(ISD::FPOW, MVT::v1f64, Expand); - setOperationAction(ISD::FREM, MVT::v1f64, Expand); - setOperationAction(ISD::FROUND, MVT::v1f64, Expand); - setOperationAction(ISD::FRINT, MVT::v1f64, Expand); - setOperationAction(ISD::FSIN, MVT::v1f64, Expand); - setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand); - setOperationAction(ISD::FSQRT, MVT::v1f64, Expand); - setOperationAction(ISD::FSUB, MVT::v1f64, Expand); - setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand); - setOperationAction(ISD::SETCC, MVT::v1f64, Expand); - setOperationAction(ISD::BR_CC, MVT::v1f64, Expand); - setOperationAction(ISD::SELECT, MVT::v1f64, Expand); - setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand); - setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand); - - setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand); - setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand); - setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand); - - // Custom lowering hooks are needed for XOR - // to fold it into CSINC/CSINV. - setOperationAction(ISD::XOR, MVT::i32, Custom); - setOperationAction(ISD::XOR, MVT::i64, Custom); - - // Virtually no operation on f128 is legal, but LLVM can't expand them when - // there's a valid register class, so we need custom operations in most cases. - setOperationAction(ISD::FABS, MVT::f128, Expand); - setOperationAction(ISD::FADD, MVT::f128, Custom); - setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); - setOperationAction(ISD::FCOS, MVT::f128, Expand); - setOperationAction(ISD::FDIV, MVT::f128, Custom); - setOperationAction(ISD::FMA, MVT::f128, Expand); - setOperationAction(ISD::FMUL, MVT::f128, Custom); - setOperationAction(ISD::FNEG, MVT::f128, Expand); - setOperationAction(ISD::FPOW, MVT::f128, Expand); - setOperationAction(ISD::FREM, MVT::f128, Expand); - setOperationAction(ISD::FRINT, MVT::f128, Expand); - setOperationAction(ISD::FSIN, MVT::f128, Expand); - setOperationAction(ISD::FSINCOS, MVT::f128, Expand); - setOperationAction(ISD::FSQRT, MVT::f128, Expand); - setOperationAction(ISD::FSUB, MVT::f128, Custom); - setOperationAction(ISD::FTRUNC, MVT::f128, Expand); - setOperationAction(ISD::SETCC, MVT::f128, Custom); - setOperationAction(ISD::BR_CC, MVT::f128, Custom); - setOperationAction(ISD::SELECT, MVT::f128, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f128, Custom); - setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); - - // Lowering for many of the conversions is actually specified by the non-f128 - // type. The LowerXXX function will be trivial when f128 isn't involved. - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom); - setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); - setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); - - // 128-bit atomics - setOperationAction(ISD::ATOMIC_SWAP, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i128, Custom); - // These are surprisingly difficult. The only single-copy atomic 128-bit - // instruction on AArch64 is stxp (when it succeeds). So a store can safely - // become a simple swap, but a load can only be determined to have been atomic - // if storing the same value back succeeds. - setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); - setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Expand); - - // Variable arguments. - setOperationAction(ISD::VASTART, MVT::Other, Custom); - setOperationAction(ISD::VAARG, MVT::Other, Custom); - setOperationAction(ISD::VACOPY, MVT::Other, Custom); - setOperationAction(ISD::VAEND, MVT::Other, Expand); - - // Variable-sized objects. - setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); - setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); - - // Exception handling. - // FIXME: These are guesses. Has this been defined yet? - setExceptionPointerRegister(ARM64::X0); - setExceptionSelectorRegister(ARM64::X1); - - // Constant pool entries - setOperationAction(ISD::ConstantPool, MVT::i64, Custom); - - // BlockAddress - setOperationAction(ISD::BlockAddress, MVT::i64, Custom); - - // Add/Sub overflow ops with MVT::Glues are lowered to CPSR dependences. - setOperationAction(ISD::ADDC, MVT::i32, Custom); - setOperationAction(ISD::ADDE, MVT::i32, Custom); - setOperationAction(ISD::SUBC, MVT::i32, Custom); - setOperationAction(ISD::SUBE, MVT::i32, Custom); - setOperationAction(ISD::ADDC, MVT::i64, Custom); - setOperationAction(ISD::ADDE, MVT::i64, Custom); - setOperationAction(ISD::SUBC, MVT::i64, Custom); - setOperationAction(ISD::SUBE, MVT::i64, Custom); - - // ARM64 lacks both left-rotate and popcount instructions. - setOperationAction(ISD::ROTL, MVT::i32, Expand); - setOperationAction(ISD::ROTL, MVT::i64, Expand); - - // ARM64 doesn't have a direct vector ->f32 conversion instructions for - // elements smaller than i32, so promote the input to i32 first. - setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote); - // Similarly, there is no direct i32 -> f64 vector conversion instruction. - setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); - setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom); - - // ARM64 doesn't have {U|S}MUL_LOHI. - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - - // ARM64 doesn't have MUL.2d: - setOperationAction(ISD::MUL, MVT::v2i64, Expand); - - // Expand the undefined-at-zero variants to cttz/ctlz to their defined-at-zero - // counterparts, which ARM64 supports directly. - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); - - setOperationAction(ISD::CTPOP, MVT::i32, Custom); - setOperationAction(ISD::CTPOP, MVT::i64, Custom); - - setOperationAction(ISD::SDIVREM, MVT::i32, Expand); - setOperationAction(ISD::SDIVREM, MVT::i64, Expand); - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i64, Expand); - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); - - // Custom lower Add/Sub/Mul with overflow. - setOperationAction(ISD::SADDO, MVT::i32, Custom); - setOperationAction(ISD::SADDO, MVT::i64, Custom); - setOperationAction(ISD::UADDO, MVT::i32, Custom); - setOperationAction(ISD::UADDO, MVT::i64, Custom); - setOperationAction(ISD::SSUBO, MVT::i32, Custom); - setOperationAction(ISD::SSUBO, MVT::i64, Custom); - setOperationAction(ISD::USUBO, MVT::i32, Custom); - setOperationAction(ISD::USUBO, MVT::i64, Custom); - setOperationAction(ISD::SMULO, MVT::i32, Custom); - setOperationAction(ISD::SMULO, MVT::i64, Custom); - setOperationAction(ISD::UMULO, MVT::i32, Custom); - setOperationAction(ISD::UMULO, MVT::i64, Custom); - - setOperationAction(ISD::FSIN, MVT::f32, Expand); - setOperationAction(ISD::FSIN, MVT::f64, Expand); - setOperationAction(ISD::FCOS, MVT::f32, Expand); - setOperationAction(ISD::FCOS, MVT::f64, Expand); - setOperationAction(ISD::FPOW, MVT::f32, Expand); - setOperationAction(ISD::FPOW, MVT::f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); - - // ARM64 has implementations of a lot of rounding-like FP operations. - static MVT RoundingTypes[] = { MVT::f32, MVT::f64, MVT::v2f32, - MVT::v4f32, MVT::v2f64 }; - for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) { - MVT Ty = RoundingTypes[I]; - setOperationAction(ISD::FFLOOR, Ty, Legal); - setOperationAction(ISD::FNEARBYINT, Ty, Legal); - setOperationAction(ISD::FCEIL, Ty, Legal); - setOperationAction(ISD::FRINT, Ty, Legal); - setOperationAction(ISD::FTRUNC, Ty, Legal); - setOperationAction(ISD::FROUND, Ty, Legal); - } - - setOperationAction(ISD::PREFETCH, MVT::Other, Custom); - - if (Subtarget->isTargetMachO()) { - // For iOS, we don't want to the normal expansion of a libcall to - // sincos. We want to issue a libcall to __sincos_stret to avoid memory - // traffic. - setOperationAction(ISD::FSINCOS, MVT::f64, Custom); - setOperationAction(ISD::FSINCOS, MVT::f32, Custom); - } else { - setOperationAction(ISD::FSINCOS, MVT::f64, Expand); - setOperationAction(ISD::FSINCOS, MVT::f32, Expand); - } - - // ARM64 does not have floating-point extending loads, i1 sign-extending load, - // floating-point truncating stores, or v2i32->v2i16 truncating store. - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand); - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand); - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - setTruncStoreAction(MVT::f128, MVT::f80, Expand); - setTruncStoreAction(MVT::f128, MVT::f64, Expand); - setTruncStoreAction(MVT::f128, MVT::f32, Expand); - setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand); - // Indexed loads and stores are supported. - for (unsigned im = (unsigned)ISD::PRE_INC; - im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { - setIndexedLoadAction(im, MVT::i8, Legal); - setIndexedLoadAction(im, MVT::i16, Legal); - setIndexedLoadAction(im, MVT::i32, Legal); - setIndexedLoadAction(im, MVT::i64, Legal); - setIndexedLoadAction(im, MVT::f64, Legal); - setIndexedLoadAction(im, MVT::f32, Legal); - setIndexedStoreAction(im, MVT::i8, Legal); - setIndexedStoreAction(im, MVT::i16, Legal); - setIndexedStoreAction(im, MVT::i32, Legal); - setIndexedStoreAction(im, MVT::i64, Legal); - setIndexedStoreAction(im, MVT::f64, Legal); - setIndexedStoreAction(im, MVT::f32, Legal); - } - - // Likewise, narrowing and extending vector loads/stores aren't handled - // directly. - for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { - - setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, - Expand); - - for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) - setTruncStoreAction((MVT::SimpleValueType)VT, - (MVT::SimpleValueType)InnerVT, Expand); - setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); - setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); - setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); - } - - // Trap. - setOperationAction(ISD::TRAP, MVT::Other, Legal); - setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal); - - // We combine OR nodes for bitfield operations. - setTargetDAGCombine(ISD::OR); - - // Vector add and sub nodes may conceal a high-half opportunity. - // Also, try to fold ADD into CSINC/CSINV.. - setTargetDAGCombine(ISD::ADD); - setTargetDAGCombine(ISD::SUB); - - setTargetDAGCombine(ISD::XOR); - setTargetDAGCombine(ISD::SINT_TO_FP); - setTargetDAGCombine(ISD::UINT_TO_FP); - - setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); - - setTargetDAGCombine(ISD::ANY_EXTEND); - setTargetDAGCombine(ISD::ZERO_EXTEND); - setTargetDAGCombine(ISD::SIGN_EXTEND); - setTargetDAGCombine(ISD::BITCAST); - setTargetDAGCombine(ISD::CONCAT_VECTORS); - setTargetDAGCombine(ISD::STORE); - - setTargetDAGCombine(ISD::MUL); - - MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8; - MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4; - MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4; - - setStackPointerRegisterToSaveRestore(ARM64::SP); - - setSchedulingPreference(Sched::Hybrid); - - // Enable TBZ/TBNZ - MaskAndBranchFoldingIsLegal = true; - - setMinFunctionAlignment(2); - - RequireStrictAlign = StrictAlign; -} - -void ARM64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) { - if (VT == MVT::v2f32) { - setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); - AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i32); - - setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); - AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i32); - } else if (VT == MVT::v2f64 || VT == MVT::v4f32) { - setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); - AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i64); - - setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); - AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i64); - } - - // Mark vector float intrinsics as expand. - if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) { - setOperationAction(ISD::FSIN, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FCOS, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FPOWI, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FPOW, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FLOG, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FLOG2, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FLOG10, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FEXP, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FEXP2, VT.getSimpleVT(), Expand); - } - - setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, VT.getSimpleVT(), Custom); - setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); - setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); - setOperationAction(ISD::AND, VT.getSimpleVT(), Custom); - setOperationAction(ISD::OR, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom); - setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); - - setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); - setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); - setOperationAction(ISD::VSELECT, VT.getSimpleVT(), Expand); - setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand); - - setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); - setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); - setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); - setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); - setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); - - setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom); - setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom); -} - -void ARM64TargetLowering::addDRTypeForNEON(MVT VT) { - addRegisterClass(VT, &ARM64::FPR64RegClass); - addTypeForNEON(VT, MVT::v2i32); -} - -void ARM64TargetLowering::addQRTypeForNEON(MVT VT) { - addRegisterClass(VT, &ARM64::FPR128RegClass); - addTypeForNEON(VT, MVT::v4i32); -} - -EVT ARM64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { - if (!VT.isVector()) - return MVT::i32; - return VT.changeVectorElementTypeToInteger(); -} - -/// computeMaskedBitsForTargetNode - Determine which of the bits specified in -/// Mask are known to be either zero or one and return them in the -/// KnownZero/KnownOne bitsets. -void ARM64TargetLowering::computeMaskedBitsForTargetNode( - const SDValue Op, APInt &KnownZero, APInt &KnownOne, - const SelectionDAG &DAG, unsigned Depth) const { - switch (Op.getOpcode()) { - default: - break; - case ARM64ISD::CSEL: { - APInt KnownZero2, KnownOne2; - DAG.ComputeMaskedBits(Op->getOperand(0), KnownZero, KnownOne, Depth + 1); - DAG.ComputeMaskedBits(Op->getOperand(1), KnownZero2, KnownOne2, Depth + 1); - KnownZero &= KnownZero2; - KnownOne &= KnownOne2; - break; - } - case ISD::INTRINSIC_W_CHAIN: - break; - case ISD::INTRINSIC_WO_CHAIN: - case ISD::INTRINSIC_VOID: { - unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); - switch (IntNo) { - default: - break; - case Intrinsic::arm64_neon_umaxv: - case Intrinsic::arm64_neon_uminv: { - // Figure out the datatype of the vector operand. The UMINV instruction - // will zero extend the result, so we can mark as known zero all the - // bits larger than the element datatype. 32-bit or larget doesn't need - // this as those are legal types and will be handled by isel directly. - MVT VT = Op.getOperand(1).getValueType().getSimpleVT(); - unsigned BitWidth = KnownZero.getBitWidth(); - if (VT == MVT::v8i8 || VT == MVT::v16i8) { - assert(BitWidth >= 8 && "Unexpected width!"); - APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8); - KnownZero |= Mask; - } else if (VT == MVT::v4i16 || VT == MVT::v8i16) { - assert(BitWidth >= 16 && "Unexpected width!"); - APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16); - KnownZero |= Mask; - } - break; - } break; - } - } - } -} - -MVT ARM64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const { - return MVT::i64; -} - -unsigned ARM64TargetLowering::getMaximalGlobalOffset() const { - // FIXME: On ARM64, this depends on the type. - // Basically, the addressable offsets are o to 4095 * Ty.getSizeInBytes(). - // and the offset has to be a multiple of the related size in bytes. - return 4095; -} - -FastISel * -ARM64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo) const { - return ARM64::createFastISel(funcInfo, libInfo); -} - -const char *ARM64TargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: - return 0; - case ARM64ISD::CALL: return "ARM64ISD::CALL"; - case ARM64ISD::ADRP: return "ARM64ISD::ADRP"; - case ARM64ISD::ADDlow: return "ARM64ISD::ADDlow"; - case ARM64ISD::LOADgot: return "ARM64ISD::LOADgot"; - case ARM64ISD::RET_FLAG: return "ARM64ISD::RET_FLAG"; - case ARM64ISD::BRCOND: return "ARM64ISD::BRCOND"; - case ARM64ISD::CSEL: return "ARM64ISD::CSEL"; - case ARM64ISD::FCSEL: return "ARM64ISD::FCSEL"; - case ARM64ISD::CSINV: return "ARM64ISD::CSINV"; - case ARM64ISD::CSNEG: return "ARM64ISD::CSNEG"; - case ARM64ISD::CSINC: return "ARM64ISD::CSINC"; - case ARM64ISD::THREAD_POINTER: return "ARM64ISD::THREAD_POINTER"; - case ARM64ISD::TLSDESC_CALL: return "ARM64ISD::TLSDESC_CALL"; - case ARM64ISD::ADC: return "ARM64ISD::ADC"; - case ARM64ISD::SBC: return "ARM64ISD::SBC"; - case ARM64ISD::ADDS: return "ARM64ISD::ADDS"; - case ARM64ISD::SUBS: return "ARM64ISD::SUBS"; - case ARM64ISD::ADCS: return "ARM64ISD::ADCS"; - case ARM64ISD::SBCS: return "ARM64ISD::SBCS"; - case ARM64ISD::ANDS: return "ARM64ISD::ANDS"; - case ARM64ISD::FCMP: return "ARM64ISD::FCMP"; - case ARM64ISD::FMIN: return "ARM64ISD::FMIN"; - case ARM64ISD::FMAX: return "ARM64ISD::FMAX"; - case ARM64ISD::DUP: return "ARM64ISD::DUP"; - case ARM64ISD::DUPLANE8: return "ARM64ISD::DUPLANE8"; - case ARM64ISD::DUPLANE16: return "ARM64ISD::DUPLANE16"; - case ARM64ISD::DUPLANE32: return "ARM64ISD::DUPLANE32"; - case ARM64ISD::DUPLANE64: return "ARM64ISD::DUPLANE64"; - case ARM64ISD::MOVI: return "ARM64ISD::MOVI"; - case ARM64ISD::MOVIshift: return "ARM64ISD::MOVIshift"; - case ARM64ISD::MOVIedit: return "ARM64ISD::MOVIedit"; - case ARM64ISD::MOVImsl: return "ARM64ISD::MOVImsl"; - case ARM64ISD::FMOV: return "ARM64ISD::FMOV"; - case ARM64ISD::MVNIshift: return "ARM64ISD::MVNIshift"; - case ARM64ISD::MVNImsl: return "ARM64ISD::MVNImsl"; - case ARM64ISD::BICi: return "ARM64ISD::BICi"; - case ARM64ISD::ORRi: return "ARM64ISD::ORRi"; - case ARM64ISD::NEG: return "ARM64ISD::NEG"; - case ARM64ISD::EXTR: return "ARM64ISD::EXTR"; - case ARM64ISD::ZIP1: return "ARM64ISD::ZIP1"; - case ARM64ISD::ZIP2: return "ARM64ISD::ZIP2"; - case ARM64ISD::UZP1: return "ARM64ISD::UZP1"; - case ARM64ISD::UZP2: return "ARM64ISD::UZP2"; - case ARM64ISD::TRN1: return "ARM64ISD::TRN1"; - case ARM64ISD::TRN2: return "ARM64ISD::TRN2"; - case ARM64ISD::REV16: return "ARM64ISD::REV16"; - case ARM64ISD::REV32: return "ARM64ISD::REV32"; - case ARM64ISD::REV64: return "ARM64ISD::REV64"; - case ARM64ISD::EXT: return "ARM64ISD::EXT"; - case ARM64ISD::VSHL: return "ARM64ISD::VSHL"; - case ARM64ISD::VLSHR: return "ARM64ISD::VLSHR"; - case ARM64ISD::VASHR: return "ARM64ISD::VASHR"; - case ARM64ISD::CMEQ: return "ARM64ISD::CMEQ"; - case ARM64ISD::CMGE: return "ARM64ISD::CMGE"; - case ARM64ISD::CMGT: return "ARM64ISD::CMGT"; - case ARM64ISD::CMHI: return "ARM64ISD::CMHI"; - case ARM64ISD::CMHS: return "ARM64ISD::CMHS"; - case ARM64ISD::FCMEQ: return "ARM64ISD::FCMEQ"; - case ARM64ISD::FCMGE: return "ARM64ISD::FCMGE"; - case ARM64ISD::FCMGT: return "ARM64ISD::FCMGT"; - case ARM64ISD::CMEQz: return "ARM64ISD::CMEQz"; - case ARM64ISD::CMGEz: return "ARM64ISD::CMGEz"; - case ARM64ISD::CMGTz: return "ARM64ISD::CMGTz"; - case ARM64ISD::CMLEz: return "ARM64ISD::CMLEz"; - case ARM64ISD::CMLTz: return "ARM64ISD::CMLTz"; - case ARM64ISD::FCMEQz: return "ARM64ISD::FCMEQz"; - case ARM64ISD::FCMGEz: return "ARM64ISD::FCMGEz"; - case ARM64ISD::FCMGTz: return "ARM64ISD::FCMGTz"; - case ARM64ISD::FCMLEz: return "ARM64ISD::FCMLEz"; - case ARM64ISD::FCMLTz: return "ARM64ISD::FCMLTz"; - case ARM64ISD::NOT: return "ARM64ISD::NOT"; - case ARM64ISD::BIT: return "ARM64ISD::BIT"; - case ARM64ISD::CBZ: return "ARM64ISD::CBZ"; - case ARM64ISD::CBNZ: return "ARM64ISD::CBNZ"; - case ARM64ISD::TBZ: return "ARM64ISD::TBZ"; - case ARM64ISD::TBNZ: return "ARM64ISD::TBNZ"; - case ARM64ISD::TC_RETURN: return "ARM64ISD::TC_RETURN"; - case ARM64ISD::SITOF: return "ARM64ISD::SITOF"; - case ARM64ISD::UITOF: return "ARM64ISD::UITOF"; - case ARM64ISD::SQSHL_I: return "ARM64ISD::SQSHL_I"; - case ARM64ISD::UQSHL_I: return "ARM64ISD::UQSHL_I"; - case ARM64ISD::SRSHR_I: return "ARM64ISD::SRSHR_I"; - case ARM64ISD::URSHR_I: return "ARM64ISD::URSHR_I"; - case ARM64ISD::SQSHLU_I: return "ARM64ISD::SQSHLU_I"; - case ARM64ISD::WrapperLarge: return "ARM64ISD::WrapperLarge"; - } -} - -static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, - unsigned &LdrOpc, unsigned &StrOpc) { - static unsigned LoadBares[] = { ARM64::LDXRB, ARM64::LDXRH, ARM64::LDXRW, - ARM64::LDXRX, ARM64::LDXPX }; - static unsigned LoadAcqs[] = { ARM64::LDAXRB, ARM64::LDAXRH, ARM64::LDAXRW, - ARM64::LDAXRX, ARM64::LDAXPX }; - static unsigned StoreBares[] = { ARM64::STXRB, ARM64::STXRH, ARM64::STXRW, - ARM64::STXRX, ARM64::STXPX }; - static unsigned StoreRels[] = { ARM64::STLXRB, ARM64::STLXRH, ARM64::STLXRW, - ARM64::STLXRX, ARM64::STLXPX }; - - unsigned *LoadOps, *StoreOps; - if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) - LoadOps = LoadAcqs; - else - LoadOps = LoadBares; - - if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) - StoreOps = StoreRels; - else - StoreOps = StoreBares; - - assert(isPowerOf2_32(Size) && Size <= 16 && - "unsupported size for atomic binary op!"); - - LdrOpc = LoadOps[Log2_32(Size)]; - StrOpc = StoreOps[Log2_32(Size)]; -} - -MachineBasicBlock *ARM64TargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size) const { - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned oldval = MI->getOperand(2).getReg(); - unsigned newval = MI->getOperand(3).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(4).getImm()); - unsigned scratch = BB->getParent()->getRegInfo().createVirtualRegister( - &ARM64::GPR32RegClass); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); - - // FIXME: We currently always generate a seq_cst operation; we should - // be able to relax this in some cases. - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, ldrOpc, strOpc); - - MachineFunction *MF = BB->getParent(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; // insert the new blocks after the current block - - MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loop1MBB); - MF->insert(It, loop2MBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // thisMBB: - // ... - // fallthrough --> loop1MBB - BB->addSuccessor(loop1MBB); - - // loop1MBB: - // ldrex dest, [ptr] - // cmp dest, oldval - // bne exitMBB - BB = loop1MBB; - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - BuildMI(BB, dl, TII->get(Size == 8 ? ARM64::SUBSXrr : ARM64::SUBSWrr)) - .addReg(Size == 8 ? ARM64::XZR : ARM64::WZR, RegState::Define) - .addReg(dest) - .addReg(oldval); - BuildMI(BB, dl, TII->get(ARM64::Bcc)).addImm(ARM64CC::NE).addMBB(exitMBB); - BB->addSuccessor(loop2MBB); - BB->addSuccessor(exitMBB); - - // loop2MBB: - // strex scratch, newval, [ptr] - // cmp scratch, #0 - // bne loop1MBB - BB = loop2MBB; - BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); - BuildMI(BB, dl, TII->get(ARM64::CBNZW)).addReg(scratch).addMBB(loop1MBB); - BB->addSuccessor(loop1MBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARM64TargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size, unsigned BinOpcode) const { - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned dest = MI->getOperand(0).getReg(); - unsigned ptr = MI->getOperand(1).getReg(); - unsigned incr = MI->getOperand(2).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); - DebugLoc dl = MI->getDebugLoc(); - - unsigned ldrOpc, strOpc; - getExclusiveOperation(Size, Ord, ldrOpc, strOpc); - - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, loopMBB); - MF->insert(It, exitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - exitMBB->splice(exitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - exitMBB->transferSuccessorsAndUpdatePHIs(BB); - - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - unsigned scratch = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass); - unsigned scratch2 = - (!BinOpcode) - ? incr - : RegInfo.createVirtualRegister(Size == 8 ? &ARM64::GPR64RegClass - : &ARM64::GPR32RegClass); - - // thisMBB: - // ... - // fallthrough --> loopMBB - BB->addSuccessor(loopMBB); - - // loopMBB: - // ldxr dest, ptr - // scratch2, dest, incr - // stxr scratch, scratch2, ptr - // cbnz scratch, loopMBB - // fallthrough --> exitMBB - BB = loopMBB; - BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); - if (BinOpcode) { - // operand order needs to go the other way for NAND - if (BinOpcode == ARM64::BICWrr || BinOpcode == ARM64::BICXrr) - BuildMI(BB, dl, TII->get(BinOpcode), scratch2).addReg(incr).addReg(dest); - else - BuildMI(BB, dl, TII->get(BinOpcode), scratch2).addReg(dest).addReg(incr); - } - - BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); - BuildMI(BB, dl, TII->get(ARM64::CBNZW)).addReg(scratch).addMBB(loopMBB); - - BB->addSuccessor(loopMBB); - BB->addSuccessor(exitMBB); - - // exitMBB: - // ... - BB = exitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock *ARM64TargetLowering::EmitAtomicBinary128( - MachineInstr *MI, MachineBasicBlock *BB, unsigned BinOpcodeLo, - unsigned BinOpcodeHi) const { - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned DestLo = MI->getOperand(0).getReg(); - unsigned DestHi = MI->getOperand(1).getReg(); - unsigned Ptr = MI->getOperand(2).getReg(); - unsigned IncrLo = MI->getOperand(3).getReg(); - unsigned IncrHi = MI->getOperand(4).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(5).getImm()); - DebugLoc DL = MI->getDebugLoc(); - - unsigned LdrOpc, StrOpc; - getExclusiveOperation(16, Ord, LdrOpc, StrOpc); - - MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, LoopMBB); - MF->insert(It, ExitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - ExitMBB->splice(ExitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(BB); - - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - unsigned ScratchRes = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass); - unsigned ScratchLo = IncrLo, ScratchHi = IncrHi; - if (BinOpcodeLo) { - assert(BinOpcodeHi && "Expect neither or both opcodes to be defined"); - ScratchLo = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass); - ScratchHi = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass); - } - - // ThisMBB: - // ... - // fallthrough --> LoopMBB - BB->addSuccessor(LoopMBB); - - // LoopMBB: - // ldxp DestLo, DestHi, Ptr - // ScratchLo, DestLo, IncrLo - // ScratchHi, DestHi, IncrHi - // stxp ScratchRes, ScratchLo, ScratchHi, ptr - // cbnz ScratchRes, LoopMBB - // fallthrough --> ExitMBB - BB = LoopMBB; - BuildMI(BB, DL, TII->get(LdrOpc), DestLo) - .addReg(DestHi, RegState::Define) - .addReg(Ptr); - if (BinOpcodeLo) { - // operand order needs to go the other way for NAND - if (BinOpcodeLo == ARM64::BICXrr) { - std::swap(IncrLo, DestLo); - std::swap(IncrHi, DestHi); - } - - BuildMI(BB, DL, TII->get(BinOpcodeLo), ScratchLo).addReg(DestLo).addReg( - IncrLo); - BuildMI(BB, DL, TII->get(BinOpcodeHi), ScratchHi).addReg(DestHi).addReg( - IncrHi); - } - - BuildMI(BB, DL, TII->get(StrOpc), ScratchRes) - .addReg(ScratchLo) - .addReg(ScratchHi) - .addReg(Ptr); - BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(LoopMBB); - - BB->addSuccessor(LoopMBB); - BB->addSuccessor(ExitMBB); - - // ExitMBB: - // ... - BB = ExitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARM64TargetLowering::EmitAtomicCmpSwap128(MachineInstr *MI, - MachineBasicBlock *BB) const { - unsigned DestLo = MI->getOperand(0).getReg(); - unsigned DestHi = MI->getOperand(1).getReg(); - unsigned Ptr = MI->getOperand(2).getReg(); - unsigned OldValLo = MI->getOperand(3).getReg(); - unsigned OldValHi = MI->getOperand(4).getReg(); - unsigned NewValLo = MI->getOperand(5).getReg(); - unsigned NewValHi = MI->getOperand(6).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(7).getImm()); - unsigned ScratchRes = BB->getParent()->getRegInfo().createVirtualRegister( - &ARM64::GPR32RegClass); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - - unsigned LdrOpc, StrOpc; - getExclusiveOperation(16, Ord, LdrOpc, StrOpc); - - MachineFunction *MF = BB->getParent(); - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; // insert the new blocks after the current block - - MachineBasicBlock *Loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *Loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, Loop1MBB); - MF->insert(It, Loop2MBB); - MF->insert(It, ExitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - ExitMBB->splice(ExitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(BB); - - // ThisMBB: - // ... - // fallthrough --> Loop1MBB - BB->addSuccessor(Loop1MBB); - - // Loop1MBB: - // ldxp DestLo, DestHi, [Ptr] - // cmp DestLo, OldValLo - // sbc xzr, DestHi, OldValHi - // bne ExitMBB - BB = Loop1MBB; - BuildMI(BB, DL, TII->get(LdrOpc), DestLo) - .addReg(DestHi, RegState::Define) - .addReg(Ptr); - BuildMI(BB, DL, TII->get(ARM64::SUBSXrr), ARM64::XZR).addReg(DestLo).addReg( - OldValLo); - BuildMI(BB, DL, TII->get(ARM64::SBCXr), ARM64::XZR).addReg(DestHi).addReg( - OldValHi); - - BuildMI(BB, DL, TII->get(ARM64::Bcc)).addImm(ARM64CC::NE).addMBB(ExitMBB); - BB->addSuccessor(Loop2MBB); - BB->addSuccessor(ExitMBB); - - // Loop2MBB: - // stxp ScratchRes, NewValLo, NewValHi, [Ptr] - // cbnz ScratchRes, Loop1MBB - BB = Loop2MBB; - BuildMI(BB, DL, TII->get(StrOpc), ScratchRes) - .addReg(NewValLo) - .addReg(NewValHi) - .addReg(Ptr); - BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(Loop1MBB); - BB->addSuccessor(Loop1MBB); - BB->addSuccessor(ExitMBB); - - // ExitMBB: - // ... - BB = ExitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock *ARM64TargetLowering::EmitAtomicMinMax128( - MachineInstr *MI, MachineBasicBlock *BB, unsigned CondCode) const { - // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction *MF = BB->getParent(); - MachineFunction::iterator It = BB; - ++It; - - unsigned DestLo = MI->getOperand(0).getReg(); - unsigned DestHi = MI->getOperand(1).getReg(); - unsigned Ptr = MI->getOperand(2).getReg(); - unsigned IncrLo = MI->getOperand(3).getReg(); - unsigned IncrHi = MI->getOperand(4).getReg(); - AtomicOrdering Ord = static_cast(MI->getOperand(5).getImm()); - DebugLoc DL = MI->getDebugLoc(); - - unsigned LdrOpc, StrOpc; - getExclusiveOperation(16, Ord, LdrOpc, StrOpc); - - MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, LoopMBB); - MF->insert(It, ExitMBB); - - // Transfer the remainder of BB and its successor edges to exitMBB. - ExitMBB->splice(ExitMBB->begin(), BB, - std::next(MachineBasicBlock::iterator(MI)), BB->end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(BB); - - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - unsigned ScratchRes = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass); - unsigned ScratchLo = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass); - unsigned ScratchHi = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass); - - // ThisMBB: - // ... - // fallthrough --> LoopMBB - BB->addSuccessor(LoopMBB); - - // LoopMBB: - // ldxp DestLo, DestHi, Ptr - // cmp ScratchLo, DestLo, IncrLo - // sbc xzr, ScratchHi, DestHi, IncrHi - // csel ScratchLo, DestLo, IncrLo, - // csel ScratchHi, DestHi, IncrHi, - // stxp ScratchRes, ScratchLo, ScratchHi, ptr - // cbnz ScratchRes, LoopMBB - // fallthrough --> ExitMBB - BB = LoopMBB; - BuildMI(BB, DL, TII->get(LdrOpc), DestLo) - .addReg(DestHi, RegState::Define) - .addReg(Ptr); - - BuildMI(BB, DL, TII->get(ARM64::SUBSXrr), ARM64::XZR).addReg(DestLo).addReg( - IncrLo); - BuildMI(BB, DL, TII->get(ARM64::SBCXr), ARM64::XZR).addReg(DestHi).addReg( - IncrHi); - - BuildMI(BB, DL, TII->get(ARM64::CSELXr), ScratchLo) - .addReg(DestLo) - .addReg(IncrLo) - .addImm(CondCode); - BuildMI(BB, DL, TII->get(ARM64::CSELXr), ScratchHi) - .addReg(DestHi) - .addReg(IncrHi) - .addImm(CondCode); - - BuildMI(BB, DL, TII->get(StrOpc), ScratchRes) - .addReg(ScratchLo) - .addReg(ScratchHi) - .addReg(Ptr); - BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(LoopMBB); - - BB->addSuccessor(LoopMBB); - BB->addSuccessor(ExitMBB); - - // ExitMBB: - // ... - BB = ExitMBB; - - MI->eraseFromParent(); // The instruction is gone now. - - return BB; -} - -MachineBasicBlock * -ARM64TargetLowering::EmitF128CSEL(MachineInstr *MI, - MachineBasicBlock *MBB) const { - // We materialise the F128CSEL pseudo-instruction as some control flow and a - // phi node: - - // OrigBB: - // [... previous instrs leading to comparison ...] - // b.ne TrueBB - // b EndBB - // TrueBB: - // ; Fallthrough - // EndBB: - // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB] - - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - MachineFunction *MF = MBB->getParent(); - const BasicBlock *LLVM_BB = MBB->getBasicBlock(); - DebugLoc DL = MI->getDebugLoc(); - MachineFunction::iterator It = MBB; - ++It; - - unsigned DestReg = MI->getOperand(0).getReg(); - unsigned IfTrueReg = MI->getOperand(1).getReg(); - unsigned IfFalseReg = MI->getOperand(2).getReg(); - unsigned CondCode = MI->getOperand(3).getImm(); - bool CPSRKilled = MI->getOperand(4).isKill(); - - MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB); - MF->insert(It, TrueBB); - MF->insert(It, EndBB); - - // Transfer rest of current basic-block to EndBB - EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)), - MBB->end()); - EndBB->transferSuccessorsAndUpdatePHIs(MBB); - - BuildMI(MBB, DL, TII->get(ARM64::Bcc)).addImm(CondCode).addMBB(TrueBB); - BuildMI(MBB, DL, TII->get(ARM64::B)).addMBB(EndBB); - MBB->addSuccessor(TrueBB); - MBB->addSuccessor(EndBB); - - // TrueBB falls through to the end. - TrueBB->addSuccessor(EndBB); - - if (!CPSRKilled) { - TrueBB->addLiveIn(ARM64::CPSR); - EndBB->addLiveIn(ARM64::CPSR); - } - - BuildMI(*EndBB, EndBB->begin(), DL, TII->get(ARM64::PHI), DestReg) - .addReg(IfTrueReg) - .addMBB(TrueBB) - .addReg(IfFalseReg) - .addMBB(MBB); - - MI->eraseFromParent(); - return EndBB; -} - -MachineBasicBlock * -ARM64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { - switch (MI->getOpcode()) { - default: -#ifndef NDEBUG - MI->dump(); -#endif - assert(0 && "Unexpected instruction for custom inserter!"); - break; - - case ARM64::ATOMIC_LOAD_ADD_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::ADDWrr); - case ARM64::ATOMIC_LOAD_ADD_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::ADDWrr); - case ARM64::ATOMIC_LOAD_ADD_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::ADDWrr); - case ARM64::ATOMIC_LOAD_ADD_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::ADDXrr); - case ARM64::ATOMIC_LOAD_ADD_I128: - return EmitAtomicBinary128(MI, BB, ARM64::ADDSXrr, ARM64::ADCXr); - - case ARM64::ATOMIC_LOAD_AND_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::ANDWrr); - case ARM64::ATOMIC_LOAD_AND_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::ANDWrr); - case ARM64::ATOMIC_LOAD_AND_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::ANDWrr); - case ARM64::ATOMIC_LOAD_AND_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::ANDXrr); - case ARM64::ATOMIC_LOAD_AND_I128: - return EmitAtomicBinary128(MI, BB, ARM64::ANDXrr, ARM64::ANDXrr); - - case ARM64::ATOMIC_LOAD_OR_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::ORRWrr); - case ARM64::ATOMIC_LOAD_OR_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::ORRWrr); - case ARM64::ATOMIC_LOAD_OR_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::ORRWrr); - case ARM64::ATOMIC_LOAD_OR_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::ORRXrr); - case ARM64::ATOMIC_LOAD_OR_I128: - return EmitAtomicBinary128(MI, BB, ARM64::ORRXrr, ARM64::ORRXrr); - - case ARM64::ATOMIC_LOAD_XOR_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::EORWrr); - case ARM64::ATOMIC_LOAD_XOR_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::EORWrr); - case ARM64::ATOMIC_LOAD_XOR_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::EORWrr); - case ARM64::ATOMIC_LOAD_XOR_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::EORXrr); - case ARM64::ATOMIC_LOAD_XOR_I128: - return EmitAtomicBinary128(MI, BB, ARM64::EORXrr, ARM64::EORXrr); - - case ARM64::ATOMIC_LOAD_NAND_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::BICWrr); - case ARM64::ATOMIC_LOAD_NAND_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::BICWrr); - case ARM64::ATOMIC_LOAD_NAND_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::BICWrr); - case ARM64::ATOMIC_LOAD_NAND_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::BICXrr); - case ARM64::ATOMIC_LOAD_NAND_I128: - return EmitAtomicBinary128(MI, BB, ARM64::BICXrr, ARM64::BICXrr); - - case ARM64::ATOMIC_LOAD_SUB_I8: - return EmitAtomicBinary(MI, BB, 1, ARM64::SUBWrr); - case ARM64::ATOMIC_LOAD_SUB_I16: - return EmitAtomicBinary(MI, BB, 2, ARM64::SUBWrr); - case ARM64::ATOMIC_LOAD_SUB_I32: - return EmitAtomicBinary(MI, BB, 4, ARM64::SUBWrr); - case ARM64::ATOMIC_LOAD_SUB_I64: - return EmitAtomicBinary(MI, BB, 8, ARM64::SUBXrr); - case ARM64::ATOMIC_LOAD_SUB_I128: - return EmitAtomicBinary128(MI, BB, ARM64::SUBSXrr, ARM64::SBCXr); - - case ARM64::ATOMIC_LOAD_MIN_I128: - return EmitAtomicMinMax128(MI, BB, ARM64CC::LT); - - case ARM64::ATOMIC_LOAD_MAX_I128: - return EmitAtomicMinMax128(MI, BB, ARM64CC::GT); - - case ARM64::ATOMIC_LOAD_UMIN_I128: - return EmitAtomicMinMax128(MI, BB, ARM64CC::CC); - - case ARM64::ATOMIC_LOAD_UMAX_I128: - return EmitAtomicMinMax128(MI, BB, ARM64CC::HI); - - case ARM64::ATOMIC_SWAP_I8: - return EmitAtomicBinary(MI, BB, 1, 0); - case ARM64::ATOMIC_SWAP_I16: - return EmitAtomicBinary(MI, BB, 2, 0); - case ARM64::ATOMIC_SWAP_I32: - return EmitAtomicBinary(MI, BB, 4, 0); - case ARM64::ATOMIC_SWAP_I64: - return EmitAtomicBinary(MI, BB, 8, 0); - case ARM64::ATOMIC_SWAP_I128: - return EmitAtomicBinary128(MI, BB, 0, 0); - - case ARM64::ATOMIC_CMP_SWAP_I8: - return EmitAtomicCmpSwap(MI, BB, 1); - case ARM64::ATOMIC_CMP_SWAP_I16: - return EmitAtomicCmpSwap(MI, BB, 2); - case ARM64::ATOMIC_CMP_SWAP_I32: - return EmitAtomicCmpSwap(MI, BB, 4); - case ARM64::ATOMIC_CMP_SWAP_I64: - return EmitAtomicCmpSwap(MI, BB, 8); - case ARM64::ATOMIC_CMP_SWAP_I128: - return EmitAtomicCmpSwap128(MI, BB); - - case ARM64::F128CSEL: - return EmitF128CSEL(MI, BB); - - case TargetOpcode::STACKMAP: - case TargetOpcode::PATCHPOINT: - return emitPatchPoint(MI, BB); - } - llvm_unreachable("Unexpected instruction for custom inserter!"); -} - -//===----------------------------------------------------------------------===// -// ARM64 Lowering private implementation. -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Lowering Code -//===----------------------------------------------------------------------===// - -/// changeIntCCToARM64CC - Convert a DAG integer condition code to an ARM64 CC -static ARM64CC::CondCode changeIntCCToARM64CC(ISD::CondCode CC) { - switch (CC) { - default: - llvm_unreachable("Unknown condition code!"); - case ISD::SETNE: - return ARM64CC::NE; - case ISD::SETEQ: - return ARM64CC::EQ; - case ISD::SETGT: - return ARM64CC::GT; - case ISD::SETGE: - return ARM64CC::GE; - case ISD::SETLT: - return ARM64CC::LT; - case ISD::SETLE: - return ARM64CC::LE; - case ISD::SETUGT: - return ARM64CC::HI; - case ISD::SETUGE: - return ARM64CC::CS; - case ISD::SETULT: - return ARM64CC::CC; - case ISD::SETULE: - return ARM64CC::LS; - } -} - -/// changeFPCCToARM64CC - Convert a DAG fp condition code to an ARM64 CC. -static void changeFPCCToARM64CC(ISD::CondCode CC, ARM64CC::CondCode &CondCode, - ARM64CC::CondCode &CondCode2) { - CondCode2 = ARM64CC::AL; - switch (CC) { - default: - llvm_unreachable("Unknown FP condition!"); - case ISD::SETEQ: - case ISD::SETOEQ: - CondCode = ARM64CC::EQ; - break; - case ISD::SETGT: - case ISD::SETOGT: - CondCode = ARM64CC::GT; - break; - case ISD::SETGE: - case ISD::SETOGE: - CondCode = ARM64CC::GE; - break; - case ISD::SETOLT: - CondCode = ARM64CC::MI; - break; - case ISD::SETOLE: - CondCode = ARM64CC::LS; - break; - case ISD::SETONE: - CondCode = ARM64CC::MI; - CondCode2 = ARM64CC::GT; - break; - case ISD::SETO: - CondCode = ARM64CC::VC; - break; - case ISD::SETUO: - CondCode = ARM64CC::VS; - break; - case ISD::SETUEQ: - CondCode = ARM64CC::EQ; - CondCode2 = ARM64CC::VS; - break; - case ISD::SETUGT: - CondCode = ARM64CC::HI; - break; - case ISD::SETUGE: - CondCode = ARM64CC::PL; - break; - case ISD::SETLT: - case ISD::SETULT: - CondCode = ARM64CC::LT; - break; - case ISD::SETLE: - case ISD::SETULE: - CondCode = ARM64CC::LE; - break; - case ISD::SETNE: - case ISD::SETUNE: - CondCode = ARM64CC::NE; - break; - } -} - -static bool isLegalArithImmed(uint64_t C) { - // Matches ARM64DAGToDAGISel::SelectArithImmed(). - return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0); -} - -static SDValue emitComparison(SDValue LHS, SDValue RHS, SDLoc dl, - SelectionDAG &DAG) { - EVT VT = LHS.getValueType(); - - if (VT.isFloatingPoint()) - return DAG.getNode(ARM64ISD::FCMP, dl, VT, LHS, RHS); - - // The CMP instruction is just an alias for SUBS, and representing it as - // SUBS means that it's possible to get CSE with subtract operations. - // A later phase can perform the optimization of setting the destination - // register to WZR/XZR if it ends up being unused. - return DAG.getNode(ARM64ISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS) - .getValue(1); -} - -static SDValue getARM64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARM64cc, SelectionDAG &DAG, SDLoc dl) { - if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { - EVT VT = RHS.getValueType(); - uint64_t C = RHSC->getZExtValue(); - if (!isLegalArithImmed(C)) { - // Constant does not fit, try adjusting it by one? - switch (CC) { - default: - break; - case ISD::SETLT: - case ISD::SETGE: - if ((VT == MVT::i32 && C != 0x80000000 && - isLegalArithImmed((uint32_t)(C - 1))) || - (VT == MVT::i64 && C != 0x80000000ULL && - isLegalArithImmed(C - 1ULL))) { - CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; - C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; - RHS = DAG.getConstant(C, VT); - } - break; - case ISD::SETULT: - case ISD::SETUGE: - if ((VT == MVT::i32 && C != 0 && - isLegalArithImmed((uint32_t)(C - 1))) || - (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) { - CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; - C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; - RHS = DAG.getConstant(C, VT); - } - break; - case ISD::SETLE: - case ISD::SETGT: - if ((VT == MVT::i32 && C != 0x7fffffff && - isLegalArithImmed((uint32_t)(C + 1))) || - (VT == MVT::i64 && C != 0x7ffffffffffffffULL && - isLegalArithImmed(C + 1ULL))) { - CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; - C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; - RHS = DAG.getConstant(C, VT); - } - break; - case ISD::SETULE: - case ISD::SETUGT: - if ((VT == MVT::i32 && C != 0xffffffff && - isLegalArithImmed((uint32_t)(C + 1))) || - (VT == MVT::i64 && C != 0xfffffffffffffffULL && - isLegalArithImmed(C + 1ULL))) { - CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; - C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; - RHS = DAG.getConstant(C, VT); - } - break; - } - } - } - - SDValue Cmp = emitComparison(LHS, RHS, dl, DAG); - ARM64CC::CondCode ARM64CC = changeIntCCToARM64CC(CC); - ARM64cc = DAG.getConstant(ARM64CC, MVT::i32); - return Cmp; -} - -static std::pair -getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) { - assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && - "Unsupported value type"); - SDValue Value, Overflow; - SDLoc DL(Op); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - unsigned Opc = 0; - switch (Op.getOpcode()) { - default: - llvm_unreachable("Unknown overflow instruction!"); - case ISD::SADDO: - Opc = ARM64ISD::ADDS; - CC = ARM64CC::VS; - break; - case ISD::UADDO: - Opc = ARM64ISD::ADDS; - CC = ARM64CC::CS; - break; - case ISD::SSUBO: - Opc = ARM64ISD::SUBS; - CC = ARM64CC::VS; - break; - case ISD::USUBO: - Opc = ARM64ISD::SUBS; - CC = ARM64CC::CC; - break; - // Multiply needs a little bit extra work. - case ISD::SMULO: - case ISD::UMULO: { - CC = ARM64CC::NE; - bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false; - if (Op.getValueType() == MVT::i32) { - unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; - // For a 32 bit multiply with overflow check we want the instruction - // selector to generate a widening multiply (SMADDL/UMADDL). For that we - // need to generate the following pattern: - // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b)) - LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS); - RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS); - SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); - SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul, - DAG.getConstant(0, MVT::i64)); - // On ARM64 the upper 32 bits are always zero extended for a 32 bit - // operation. We need to clear out the upper 32 bits, because we used a - // widening multiply that wrote all 64 bits. In the end this should be a - // noop. - Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add); - if (IsSigned) { - // The signed overflow check requires more than just a simple check for - // any bit set in the upper 32 bits of the result. These bits could be - // just the sign bits of a negative number. To perform the overflow - // check we have to arithmetic shift right the 32nd bit of the result by - // 31 bits. Then we compare the result to the upper 32 bits. - SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add, - DAG.getConstant(32, MVT::i64)); - UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits); - SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value, - DAG.getConstant(31, MVT::i64)); - // It is important that LowerBits is last, otherwise the arithmetic - // shift will not be folded into the compare (SUBS). - SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32); - Overflow = DAG.getNode(ARM64ISD::SUBS, DL, VTs, UpperBits, LowerBits) - .getValue(1); - } else { - // The overflow check for unsigned multiply is easy. We only need to - // check if any of the upper 32 bits are set. This can be done with a - // CMP (shifted register). For that we need to generate the following - // pattern: - // (i64 ARM64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32) - SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, - DAG.getConstant(32, MVT::i64)); - SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); - Overflow = - DAG.getNode(ARM64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64), - UpperBits).getValue(1); - } - break; - } - assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type"); - // For the 64 bit multiply - Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS); - if (IsSigned) { - SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS); - SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value, - DAG.getConstant(63, MVT::i64)); - // It is important that LowerBits is last, otherwise the arithmetic - // shift will not be folded into the compare (SUBS). - SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); - Overflow = DAG.getNode(ARM64ISD::SUBS, DL, VTs, UpperBits, LowerBits) - .getValue(1); - } else { - SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS); - SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); - Overflow = - DAG.getNode(ARM64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64), - UpperBits).getValue(1); - } - break; - } - } // switch (...) - - if (Opc) { - SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32); - - // Emit the ARM64 operation with overflow check. - Value = DAG.getNode(Opc, DL, VTs, LHS, RHS); - Overflow = Value.getValue(1); - } - return std::make_pair(Value, Overflow); -} - -SDValue ARM64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, - RTLIB::Libcall Call) const { - SmallVector Ops; - for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) - Ops.push_back(Op.getOperand(i)); - - return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false, - SDLoc(Op)).first; -} - -static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) { - SDValue Sel = Op.getOperand(0); - SDValue Other = Op.getOperand(1); - - // If neither operand is a SELECT_CC, give up. - if (Sel.getOpcode() != ISD::SELECT_CC) - std::swap(Sel, Other); - if (Sel.getOpcode() != ISD::SELECT_CC) - return Op; - - // The folding we want to perform is: - // (xor x, (select_cc a, b, cc, 0, -1) ) - // --> - // (csel x, (xor x, -1), cc ...) - // - // The latter will get matched to a CSINV instruction. - - ISD::CondCode CC = cast(Sel.getOperand(4))->get(); - SDValue LHS = Sel.getOperand(0); - SDValue RHS = Sel.getOperand(1); - SDValue TVal = Sel.getOperand(2); - SDValue FVal = Sel.getOperand(3); - SDLoc dl(Sel); - - // FIXME: This could be generalized to non-integer comparisons. - if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64) - return Op; - - ConstantSDNode *CFVal = dyn_cast(FVal); - ConstantSDNode *CTVal = dyn_cast(TVal); - - // The the values aren't constants, this isn't the pattern we're looking for. - if (!CFVal || !CTVal) - return Op; - - // We can commute the SELECT_CC by inverting the condition. This - // might be needed to make this fit into a CSINV pattern. - if (CTVal->isAllOnesValue() && CFVal->isNullValue()) { - std::swap(TVal, FVal); - std::swap(CTVal, CFVal); - CC = ISD::getSetCCInverse(CC, true); - } - - // If the constants line up, perform the transform! - if (CTVal->isNullValue() && CFVal->isAllOnesValue()) { - SDValue CCVal; - SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl); - - FVal = Other; - TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other, - DAG.getConstant(-1ULL, Other.getValueType())); - - return DAG.getNode(ARM64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal, - CCVal, Cmp); - } - - return Op; -} - -static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { - EVT VT = Op.getValueType(); - - // Let legalize expand this if it isn't a legal type yet. - if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) - return SDValue(); - - SDVTList VTs = DAG.getVTList(VT, MVT::i32); - - unsigned Opc; - bool ExtraOp = false; - switch (Op.getOpcode()) { - default: - assert(0 && "Invalid code"); - case ISD::ADDC: - Opc = ARM64ISD::ADDS; - break; - case ISD::SUBC: - Opc = ARM64ISD::SUBS; - break; - case ISD::ADDE: - Opc = ARM64ISD::ADCS; - ExtraOp = true; - break; - case ISD::SUBE: - Opc = ARM64ISD::SBCS; - ExtraOp = true; - break; - } - - if (!ExtraOp) - return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1)); - return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1), - Op.getOperand(2)); -} - -static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { - // Let legalize expand this if it isn't a legal type yet. - if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) - return SDValue(); - - ARM64CC::CondCode CC; - // The actual operation that sets the overflow or carry flag. - SDValue Value, Overflow; - std::tie(Value, Overflow) = getARM64XALUOOp(CC, Op, DAG); - - // We use 0 and 1 as false and true values. - SDValue TVal = DAG.getConstant(1, MVT::i32); - SDValue FVal = DAG.getConstant(0, MVT::i32); - - // We use an inverted condition, because the conditional select is inverted - // too. This will allow it to be selected to a single instruction: - // CSINC Wd, WZR, WZR, invert(cond). - SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), MVT::i32); - Overflow = DAG.getNode(ARM64ISD::CSEL, SDLoc(Op), MVT::i32, FVal, TVal, CCVal, - Overflow); - - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); - return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow); -} - -// Prefetch operands are: -// 1: Address to prefetch -// 2: bool isWrite -// 3: int locality (0 = no locality ... 3 = extreme locality) -// 4: bool isDataCache -static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) { - SDLoc DL(Op); - unsigned IsWrite = cast(Op.getOperand(2))->getZExtValue(); - unsigned Locality = cast(Op.getOperand(3))->getZExtValue(); - // The data thing is not used. - // unsigned isData = cast(Op.getOperand(4))->getZExtValue(); - - bool IsStream = !Locality; - // When the locality number is set - if (Locality) { - // The front-end should have filtered out the out-of-range values - assert(Locality <= 3 && "Prefetch locality out-of-range"); - // The locality degree is the opposite of the cache speed. - // Put the number the other way around. - // The encoding starts at 0 for level 1 - Locality = 3 - Locality; - } - - // built the mask value encoding the expected behavior. - unsigned PrfOp = (IsWrite << 4) | // Load/Store bit - (Locality << 1) | // Cache level bits - (unsigned)IsStream; // Stream bit - return DAG.getNode(ARM64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0), - DAG.getConstant(PrfOp, MVT::i32), Op.getOperand(1)); -} - -SDValue ARM64TargetLowering::LowerFP_EXTEND(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getValueType() == MVT::f128 && "Unexpected lowering"); - - RTLIB::Libcall LC; - LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); - - return LowerF128Call(Op, DAG, LC); -} - -SDValue ARM64TargetLowering::LowerFP_ROUND(SDValue Op, - SelectionDAG &DAG) const { - if (Op.getOperand(0).getValueType() != MVT::f128) { - // It's legal except when f128 is involved - return Op; - } - - RTLIB::Libcall LC; - LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); - - // FP_ROUND node has a second operand indicating whether it is known to be - // precise. That doesn't take part in the LibCall so we can't directly use - // LowerF128Call. - SDValue SrcVal = Op.getOperand(0); - return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, - /*isSigned*/ false, SDLoc(Op)).first; -} - -static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { - // Warning: We maintain cost tables in ARM64TargetTransformInfo.cpp. - // Any additional optimization in this function should be recorded - // in the cost tables. - EVT InVT = Op.getOperand(0).getValueType(); - EVT VT = Op.getValueType(); - - // FP_TO_XINT conversion from the same type are legal. - if (VT.getSizeInBits() == InVT.getSizeInBits()) - return Op; - - if (InVT == MVT::v2f64) { - SDLoc dl(Op); - SDValue Cv = DAG.getNode(Op.getOpcode(), dl, MVT::v2i64, Op.getOperand(0)); - return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv); - } - - // Type changing conversions are illegal. - return SDValue(); -} - -SDValue ARM64TargetLowering::LowerFP_TO_INT(SDValue Op, - SelectionDAG &DAG) const { - if (Op.getOperand(0).getValueType().isVector()) - return LowerVectorFP_TO_INT(Op, DAG); - - if (Op.getOperand(0).getValueType() != MVT::f128) { - // It's legal except when f128 is involved - return Op; - } - - RTLIB::Libcall LC; - if (Op.getOpcode() == ISD::FP_TO_SINT) - LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType()); - else - LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType()); - - SmallVector Ops; - for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) - Ops.push_back(Op.getOperand(i)); - - return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false, - SDLoc(Op)).first; -} - -static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { - // Warning: We maintain cost tables in ARM64TargetTransformInfo.cpp. - // Any additional optimization in this function should be recorded - // in the cost tables. - EVT VT = Op.getValueType(); - SDLoc dl(Op); - SDValue In = Op.getOperand(0); - EVT InVT = In.getValueType(); - - // v2i32 to v2f32 is legal. - if (VT == MVT::v2f32 && InVT == MVT::v2i32) - return Op; - - // This function only handles v2f64 outputs. - if (VT == MVT::v2f64) { - // Extend the input argument to a v2i64 that we can feed into the - // floating point conversion. Zero or sign extend based on whether - // we're doing a signed or unsigned float conversion. - unsigned Opc = - Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; - assert(Op.getNumOperands() == 1 && "FP conversions take one argument"); - SDValue Promoted = DAG.getNode(Opc, dl, MVT::v2i64, Op.getOperand(0)); - return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Promoted); - } - - // Scalarize v2i64 to v2f32 conversions. - std::vector BuildVectorOps; - for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { - SDValue Sclr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, In, - DAG.getConstant(i, MVT::i64)); - Sclr = DAG.getNode(Op->getOpcode(), dl, MVT::f32, Sclr); - BuildVectorOps.push_back(Sclr); - } - - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &BuildVectorOps[0], - BuildVectorOps.size()); -} - -SDValue ARM64TargetLowering::LowerINT_TO_FP(SDValue Op, - SelectionDAG &DAG) const { - if (Op.getValueType().isVector()) - return LowerVectorINT_TO_FP(Op, DAG); - - // i128 conversions are libcalls. - if (Op.getOperand(0).getValueType() == MVT::i128) - return SDValue(); - - // Other conversions are legal, unless it's to the completely software-based - // fp128. - if (Op.getValueType() != MVT::f128) - return Op; - - RTLIB::Libcall LC; - if (Op.getOpcode() == ISD::SINT_TO_FP) - LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); - else - LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType()); - - return LowerF128Call(Op, DAG, LC); -} - -SDValue ARM64TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { - // For iOS, we want to call an alternative entry point: __sincos_stret, - // which returns the values in two S / D registers. - SDLoc dl(Op); - SDValue Arg = Op.getOperand(0); - EVT ArgVT = Arg.getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - - ArgListTy Args; - ArgListEntry Entry; - - Entry.Node = Arg; - Entry.Ty = ArgTy; - Entry.isSExt = false; - Entry.isZExt = false; - Args.push_back(Entry); - - const char *LibcallName = - (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; - SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); - - StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); - TargetLowering::CallLoweringInfo CLI( - DAG.getEntryNode(), RetTy, false, false, false, false, 0, - CallingConv::Fast, /*isTaillCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed*/ true, Callee, Args, DAG, dl); - std::pair CallResult = LowerCallTo(CLI); - return CallResult.first; -} - -SDValue ARM64TargetLowering::LowerOperation(SDValue Op, - SelectionDAG &DAG) const { - switch (Op.getOpcode()) { - default: - llvm_unreachable("unimplemented operand"); - return SDValue(); - case ISD::GlobalAddress: - return LowerGlobalAddress(Op, DAG); - case ISD::GlobalTLSAddress: - return LowerGlobalTLSAddress(Op, DAG); - case ISD::SETCC: - return LowerSETCC(Op, DAG); - case ISD::BR_CC: - return LowerBR_CC(Op, DAG); - case ISD::SELECT: - return LowerSELECT(Op, DAG); - case ISD::SELECT_CC: - return LowerSELECT_CC(Op, DAG); - case ISD::JumpTable: - return LowerJumpTable(Op, DAG); - case ISD::ConstantPool: - return LowerConstantPool(Op, DAG); - case ISD::BlockAddress: - return LowerBlockAddress(Op, DAG); - case ISD::VASTART: - return LowerVASTART(Op, DAG); - case ISD::VACOPY: - return LowerVACOPY(Op, DAG); - case ISD::VAARG: - return LowerVAARG(Op, DAG); - case ISD::ADDC: - case ISD::ADDE: - case ISD::SUBC: - case ISD::SUBE: - return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); - case ISD::SADDO: - case ISD::UADDO: - case ISD::SSUBO: - case ISD::USUBO: - case ISD::SMULO: - case ISD::UMULO: - return LowerXALUO(Op, DAG); - case ISD::FADD: - return LowerF128Call(Op, DAG, RTLIB::ADD_F128); - case ISD::FSUB: - return LowerF128Call(Op, DAG, RTLIB::SUB_F128); - case ISD::FMUL: - return LowerF128Call(Op, DAG, RTLIB::MUL_F128); - case ISD::FDIV: - return LowerF128Call(Op, DAG, RTLIB::DIV_F128); - case ISD::FP_ROUND: - return LowerFP_ROUND(Op, DAG); - case ISD::FP_EXTEND: - return LowerFP_EXTEND(Op, DAG); - case ISD::FRAMEADDR: - return LowerFRAMEADDR(Op, DAG); - case ISD::RETURNADDR: - return LowerRETURNADDR(Op, DAG); - case ISD::INSERT_VECTOR_ELT: - return LowerINSERT_VECTOR_ELT(Op, DAG); - case ISD::EXTRACT_VECTOR_ELT: - return LowerEXTRACT_VECTOR_ELT(Op, DAG); - case ISD::SCALAR_TO_VECTOR: - return LowerSCALAR_TO_VECTOR(Op, DAG); - case ISD::BUILD_VECTOR: - return LowerBUILD_VECTOR(Op, DAG); - case ISD::VECTOR_SHUFFLE: - return LowerVECTOR_SHUFFLE(Op, DAG); - case ISD::EXTRACT_SUBVECTOR: - return LowerEXTRACT_SUBVECTOR(Op, DAG); - case ISD::SRA: - case ISD::SRL: - case ISD::SHL: - return LowerVectorSRA_SRL_SHL(Op, DAG); - case ISD::SHL_PARTS: - return LowerShiftLeftParts(Op, DAG); - case ISD::SRL_PARTS: - case ISD::SRA_PARTS: - return LowerShiftRightParts(Op, DAG); - case ISD::CTPOP: - return LowerCTPOP(Op, DAG); - case ISD::FCOPYSIGN: - return LowerFCOPYSIGN(Op, DAG); - case ISD::AND: - return LowerVectorAND(Op, DAG); - case ISD::OR: - return LowerVectorOR(Op, DAG); - case ISD::XOR: - return LowerXOR(Op, DAG); - case ISD::PREFETCH: - return LowerPREFETCH(Op, DAG); - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - return LowerINT_TO_FP(Op, DAG); - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: - return LowerFP_TO_INT(Op, DAG); - case ISD::FSINCOS: - return LowerFSINCOS(Op, DAG); - } -} - -/// getFunctionAlignment - Return the Log2 alignment of this function. -unsigned ARM64TargetLowering::getFunctionAlignment(const Function *F) const { - return 2; -} - -//===----------------------------------------------------------------------===// -// Calling Convention Implementation -//===----------------------------------------------------------------------===// - -#include "ARM64GenCallingConv.inc" - -/// Selects the correct CCAssignFn for a the given CallingConvention -/// value. -CCAssignFn *ARM64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, - bool IsVarArg) const { - switch (CC) { - default: - llvm_unreachable("Unsupported calling convention."); - case CallingConv::WebKit_JS: - return CC_ARM64_WebKit_JS; - case CallingConv::C: - case CallingConv::Fast: - if (!Subtarget->isTargetDarwin()) - return CC_ARM64_AAPCS; - return IsVarArg ? CC_ARM64_DarwinPCS_VarArg : CC_ARM64_DarwinPCS; - } -} - -SDValue ARM64TargetLowering::LowerFormalArguments( - SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, SDLoc DL, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - - // Assign locations to all of the incoming arguments. - SmallVector ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - - // At this point, Ins[].VT may already be promoted to i32. To correctly - // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and - // i8 to CC_ARM64_AAPCS with i32 being ValVT and i8 being LocVT. - // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here - // we use a special version of AnalyzeFormalArguments to pass in ValVT and - // LocVT. - unsigned NumArgs = Ins.size(); - Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); - unsigned CurArgIdx = 0; - for (unsigned i = 0; i != NumArgs; ++i) { - MVT ValVT = Ins[i].VT; - std::advance(CurOrigArg, Ins[i].OrigArgIndex - CurArgIdx); - CurArgIdx = Ins[i].OrigArgIndex; - - // Get type of the original argument. - EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true); - MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other; - // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. - MVT LocVT = ValVT; - if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) - LocVT = MVT::i8; - else if (ActualMVT == MVT::i16) - LocVT = MVT::i16; - - CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false); - bool Res = - AssignFn(i, ValVT, LocVT, CCValAssign::Full, Ins[i].Flags, CCInfo); - assert(!Res && "Call operand has unhandled type"); - (void)Res; - } - - SmallVector ArgValues; - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - - // Arguments stored in registers. - if (VA.isRegLoc()) { - EVT RegVT = VA.getLocVT(); - - SDValue ArgValue; - const TargetRegisterClass *RC; - - if (RegVT == MVT::i32) - RC = &ARM64::GPR32RegClass; - else if (RegVT == MVT::i64) - RC = &ARM64::GPR64RegClass; - else if (RegVT == MVT::f32) - RC = &ARM64::FPR32RegClass; - else if (RegVT == MVT::f64 || RegVT == MVT::v1i64 || - RegVT == MVT::v1f64 || RegVT == MVT::v2i32 || - RegVT == MVT::v4i16 || RegVT == MVT::v8i8) - RC = &ARM64::FPR64RegClass; - else if (RegVT == MVT::v2i64 || RegVT == MVT::v4i32 || - RegVT == MVT::v8i16 || RegVT == MVT::v16i8) - RC = &ARM64::FPR128RegClass; - else - llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); - - // Transform the arguments in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); - ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT); - - // If this is an 8, 16 or 32-bit value, it is really passed promoted - // to 64 bits. Insert an assert[sz]ext to capture this, then - // truncate to the right size. - switch (VA.getLocInfo()) { - default: - llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: - break; - case CCValAssign::BCvt: - ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue); - break; - case CCValAssign::SExt: - ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue, - DAG.getValueType(VA.getValVT())); - ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue); - break; - case CCValAssign::ZExt: - ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue, - DAG.getValueType(VA.getValVT())); - ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue); - break; - } - - InVals.push_back(ArgValue); - - } else { // VA.isRegLoc() - assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem"); - unsigned ArgOffset = VA.getLocMemOffset(); - unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; - int FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true); - - // Create load nodes to retrieve arguments from the stack. - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), false, - false, false, 0)); - } - } - - // varargs - if (isVarArg) { - if (!Subtarget->isTargetDarwin()) { - // The AAPCS variadic function ABI is identical to the non-variadic - // one. As a result there may be more arguments in registers and we should - // save them for future reference. - saveVarArgRegisters(CCInfo, DAG, DL, Chain); - } - - ARM64FunctionInfo *AFI = MF.getInfo(); - // This will point to the next argument passed via stack. - unsigned StackOffset = CCInfo.getNextStackOffset(); - // We currently pass all varargs at 8-byte alignment. - StackOffset = ((StackOffset + 7) & ~7); - AFI->setVarArgsStackIndex(MFI->CreateFixedObject(4, StackOffset, true)); - } - - return Chain; -} - -void ARM64TargetLowering::saveVarArgRegisters(CCState &CCInfo, - SelectionDAG &DAG, SDLoc DL, - SDValue &Chain) const { - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - ARM64FunctionInfo *FuncInfo = MF.getInfo(); - - SmallVector MemOps; - - static const uint16_t GPRArgRegs[] = { ARM64::X0, ARM64::X1, ARM64::X2, - ARM64::X3, ARM64::X4, ARM64::X5, - ARM64::X6, ARM64::X7 }; - static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs); - unsigned FirstVariadicGPR = - CCInfo.getFirstUnallocated(GPRArgRegs, NumGPRArgRegs); - - static const uint16_t FPRArgRegs[] = { ARM64::Q0, ARM64::Q1, ARM64::Q2, - ARM64::Q3, ARM64::Q4, ARM64::Q5, - ARM64::Q6, ARM64::Q7 }; - static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs); - unsigned FirstVariadicFPR = - CCInfo.getFirstUnallocated(FPRArgRegs, NumFPRArgRegs); - - unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR); - int GPRIdx = 0; - if (GPRSaveSize != 0) { - GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false); - - SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy()); - - for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) { - unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &ARM64::GPR64RegClass); - SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); - SDValue Store = - DAG.getStore(Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(i * 8), false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, - DAG.getConstant(8, getPointerTy())); - } - } - - unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); - int FPRIdx = 0; - if (FPRSaveSize != 0) { - FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false); - - SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); - - for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { - unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &ARM64::FPR128RegClass); - SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::v2i64); - SDValue Store = - DAG.getStore(Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(i * 16), false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, - DAG.getConstant(16, getPointerTy())); - } - } - - FuncInfo->setVarArgsGPRIndex(GPRIdx); - FuncInfo->setVarArgsGPRSize(GPRSaveSize); - FuncInfo->setVarArgsFPRIndex(FPRIdx); - FuncInfo->setVarArgsFPRSize(FPRSaveSize); - - if (!MemOps.empty()) { - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], - MemOps.size()); - } -} - -/// LowerCallResult - Lower the result values of a call into the -/// appropriate copies out of appropriate physical registers. -SDValue ARM64TargetLowering::LowerCallResult( - SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, SDLoc DL, SelectionDAG &DAG, - SmallVectorImpl &InVals, bool isThisReturn, - SDValue ThisVal) const { - CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS - : RetCC_ARM64_AAPCS; - // Assign locations to each value returned by this call. - SmallVector RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeCallResult(Ins, RetCC); - - // Copy all of the result registers out of their specified physreg. - for (unsigned i = 0; i != RVLocs.size(); ++i) { - CCValAssign VA = RVLocs[i]; - - // Pass 'this' value directly from the argument to return value, to avoid - // reg unit interference - if (i == 0 && isThisReturn) { - assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 && - "unexpected return calling convention register assignment"); - InVals.push_back(ThisVal); - continue; - } - - SDValue Val = - DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag); - Chain = Val.getValue(1); - InFlag = Val.getValue(2); - - switch (VA.getLocInfo()) { - default: - llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: - break; - case CCValAssign::BCvt: - Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); - break; - } - - InVals.push_back(Val); - } - - return Chain; -} - -bool ARM64TargetLowering::isEligibleForTailCallOptimization( - SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, - bool isCalleeStructRet, bool isCallerStructRet, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, SelectionDAG &DAG) const { - // Look for obvious safe cases to perform tail call optimization that do not - // require ABI changes. This is what gcc calls sibcall. - - // Do not sibcall optimize vararg calls unless the call site is not passing - // any arguments. - if (isVarArg && !Outs.empty()) - return false; - - // Also avoid sibcall optimization if either caller or callee uses struct - // return semantics. - if (isCalleeStructRet || isCallerStructRet) - return false; - - // Note that currently ARM64 "C" calling convention and "Fast" calling - // convention are compatible. If/when that ever changes, we'll need to - // add checks here to make sure any interactions are OK. - - // If the callee takes no arguments then go on to check the results of the - // call. - if (!Outs.empty()) { - // Check if stack adjustment is needed. For now, do not do this if any - // argument is passed on the stack. - SmallVector ArgLocs; - CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - CCAssignFn *AssignFn = CCAssignFnForCall(CalleeCC, /*IsVarArg=*/false); - CCInfo.AnalyzeCallOperands(Outs, AssignFn); - if (CCInfo.getNextStackOffset()) { - // Check if the arguments are already laid out in the right way as - // the caller's fixed stack objects. - for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; - ++i, ++realArgIdx) { - CCValAssign &VA = ArgLocs[i]; - if (VA.getLocInfo() == CCValAssign::Indirect) - return false; - if (VA.needsCustom()) { - // Just don't handle anything that needs custom adjustments for now. - // If need be, we can revisit later, but we shouldn't ever end up - // here. - return false; - } else if (!VA.isRegLoc()) { - // Likewise, don't try to handle stack based arguments for the - // time being. - return false; - } - } - } - } - - return true; -} -/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain, -/// and add input and output parameter nodes. -SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const { - SelectionDAG &DAG = CLI.DAG; - SDLoc &DL = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; - SDValue Chain = CLI.Chain; - SDValue Callee = CLI.Callee; - bool &IsTailCall = CLI.IsTailCall; - CallingConv::ID CallConv = CLI.CallConv; - bool IsVarArg = CLI.IsVarArg; - - MachineFunction &MF = DAG.getMachineFunction(); - bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); - bool IsThisReturn = false; - - // If tail calls are explicitly disabled, make sure not to use them. - if (!EnableARM64TailCalls) - IsTailCall = false; - - if (IsTailCall) { - // Check if it's really possible to do a tail call. - IsTailCall = isEligibleForTailCallOptimization( - Callee, CallConv, IsVarArg, IsStructRet, - MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG); - // We don't support GuaranteedTailCallOpt, only automatically - // detected sibcalls. - // FIXME: Re-evaluate. Is this true? Should it be true? - if (IsTailCall) - ++NumTailCalls; - } - - // Analyze operands of the call, assigning locations to each operand. - SmallVector ArgLocs; - CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - - if (IsVarArg) { - // Handle fixed and variable vector arguments differently. - // Variable vector arguments always go into memory. - unsigned NumArgs = Outs.size(); - - for (unsigned i = 0; i != NumArgs; ++i) { - MVT ArgVT = Outs[i].VT; - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, - /*IsVarArg=*/ !Outs[i].IsFixed); - bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); - assert(!Res && "Call operand has unhandled type"); - (void)Res; - } - } else { - // At this point, Outs[].VT may already be promoted to i32. To correctly - // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and - // i8 to CC_ARM64_AAPCS with i32 being ValVT and i8 being LocVT. - // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here - // we use a special version of AnalyzeCallOperands to pass in ValVT and - // LocVT. - unsigned NumArgs = Outs.size(); - for (unsigned i = 0; i != NumArgs; ++i) { - MVT ValVT = Outs[i].VT; - // Get type of the original argument. - EVT ActualVT = getValueType(CLI.Args[Outs[i].OrigArgIndex].Ty, - /*AllowUnknown*/ true); - MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT; - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. - MVT LocVT = ValVT; - if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) - LocVT = MVT::i8; - else if (ActualMVT == MVT::i16) - LocVT = MVT::i16; - - CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false); - bool Res = AssignFn(i, ValVT, LocVT, CCValAssign::Full, ArgFlags, CCInfo); - assert(!Res && "Call operand has unhandled type"); - (void)Res; - } - } - - // Get a count of how many bytes are to be pushed on the stack. - unsigned NumBytes = CCInfo.getNextStackOffset(); - - // Adjust the stack pointer for the new arguments... - // These operations are automatically eliminated by the prolog/epilog pass - if (!IsTailCall) - Chain = - DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), DL); - - SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, ARM64::SP, getPointerTy()); - - SmallVector, 8> RegsToPass; - SmallVector MemOpChains; - - // Walk the register/memloc assignments, inserting copies/loads. - for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; - ++i, ++realArgIdx) { - CCValAssign &VA = ArgLocs[i]; - SDValue Arg = OutVals[realArgIdx]; - ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; - - // Promote the value if needed. - switch (VA.getLocInfo()) { - default: - llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: - break; - case CCValAssign::SExt: - Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); - break; - case CCValAssign::ZExt: - Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); - break; - case CCValAssign::AExt: - Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); - break; - case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); - break; - case CCValAssign::FPExt: - Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg); - break; - } - - if (VA.isRegLoc()) { - if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i64) { - assert(VA.getLocVT() == MVT::i64 && - "unexpected calling convention register assignment"); - assert(!Ins.empty() && Ins[0].VT == MVT::i64 && - "unexpected use of 'returned'"); - IsThisReturn = true; - } - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - } else { - assert(VA.isMemLoc()); - // There's no reason we can't support stack args w/ tailcall, but - // we currently don't, so assert if we see one. - assert(!IsTailCall && "stack argument with tail call!?"); - unsigned LocMemOffset = VA.getLocMemOffset(); - SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); - PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff); - - // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already - // promoted to a legal register type i32, we should truncate Arg back to - // i1/i8/i16. - if (Arg.getValueType().isSimple() && - Arg.getValueType().getSimpleVT() == MVT::i32 && - (VA.getLocVT() == MVT::i1 || VA.getLocVT() == MVT::i8 || - VA.getLocVT() == MVT::i16)) - Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getLocVT(), Arg); - - SDValue Store = DAG.getStore(Chain, DL, Arg, PtrOff, - MachinePointerInfo::getStack(LocMemOffset), - false, false, 0); - MemOpChains.push_back(Store); - } - } - - if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOpChains[0], - MemOpChains.size()); - - // Build a sequence of copy-to-reg nodes chained together with token chain - // and flag operands which copy the outgoing args into the appropriate regs. - SDValue InFlag; - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every - // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol - // node so that legalize doesn't hack it. - if (getTargetMachine().getCodeModel() == CodeModel::Large && - Subtarget->isTargetMachO()) { - if (GlobalAddressSDNode *G = dyn_cast(Callee)) { - const GlobalValue *GV = G->getGlobal(); - bool InternalLinkage = GV->hasInternalLinkage(); - if (InternalLinkage) - Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0); - else { - Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, - ARM64II::MO_GOT); - Callee = DAG.getNode(ARM64ISD::LOADgot, DL, getPointerTy(), Callee); - } - } else if (ExternalSymbolSDNode *S = - dyn_cast(Callee)) { - const char *Sym = S->getSymbol(); - Callee = - DAG.getTargetExternalSymbol(Sym, getPointerTy(), ARM64II::MO_GOT); - Callee = DAG.getNode(ARM64ISD::LOADgot, DL, getPointerTy(), Callee); - } - } else if (GlobalAddressSDNode *G = dyn_cast(Callee)) { - const GlobalValue *GV = G->getGlobal(); - Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0); - } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { - const char *Sym = S->getSymbol(); - Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), 0); - } - - std::vector Ops; - Ops.push_back(Chain); - Ops.push_back(Callee); - - // Add argument registers to the end of the list so that they are known live - // into the call. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - - // Add a register mask operand representing the call-preserved registers. - const uint32_t *Mask; - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const ARM64RegisterInfo *ARI = static_cast(TRI); - if (IsThisReturn) { - // For 'this' returns, use the X0-preserving mask if applicable - Mask = ARI->getThisReturnPreservedMask(CallConv); - if (!Mask) { - IsThisReturn = false; - Mask = ARI->getCallPreservedMask(CallConv); - } - } else - Mask = ARI->getCallPreservedMask(CallConv); - - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); - - if (InFlag.getNode()) - Ops.push_back(InFlag); - - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - - // If we're doing a tall call, use a TC_RETURN here rather than an - // actual call instruction. - if (IsTailCall) - return DAG.getNode(ARM64ISD::TC_RETURN, DL, NodeTys, &Ops[0], Ops.size()); - - // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(ARM64ISD::CALL, DL, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); - - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag, DL); - if (!Ins.empty()) - InFlag = Chain.getValue(1); - - // Handle result values, copying them out of physregs into vregs that we - // return. - return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG, - InVals, IsThisReturn, - IsThisReturn ? OutVals[0] : SDValue()); -} - -bool ARM64TargetLowering::CanLowerReturn( - CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, - const SmallVectorImpl &Outs, LLVMContext &Context) const { - CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS - : RetCC_ARM64_AAPCS; - SmallVector RVLocs; - CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context); - return CCInfo.CheckReturn(Outs, RetCC); -} - -SDValue -ARM64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - SDLoc DL, SelectionDAG &DAG) const { - CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS - : RetCC_ARM64_AAPCS; - SmallVector RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - CCInfo.AnalyzeReturn(Outs, RetCC); - - // Copy the result values into the output registers. - SDValue Flag; - SmallVector RetOps(1, Chain); - for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); - ++i, ++realRVLocIdx) { - CCValAssign &VA = RVLocs[i]; - assert(VA.isRegLoc() && "Can only return in registers!"); - SDValue Arg = OutVals[realRVLocIdx]; - - switch (VA.getLocInfo()) { - default: - llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: - break; - case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); - break; - } - - Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag); - Flag = Chain.getValue(1); - RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); - } - - RetOps[0] = Chain; // Update chain. - - // Add the flag if we have it. - if (Flag.getNode()) - RetOps.push_back(Flag); - - return DAG.getNode(ARM64ISD::RET_FLAG, DL, MVT::Other, &RetOps[0], - RetOps.size()); -} - -//===----------------------------------------------------------------------===// -// Other Lowering Code -//===----------------------------------------------------------------------===// - -SDValue ARM64TargetLowering::LowerGlobalAddress(SDValue Op, - SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(); - SDLoc DL(Op); - const GlobalValue *GV = cast(Op)->getGlobal(); - unsigned char OpFlags = - Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); - - assert(cast(Op)->getOffset() == 0 && - "unexpected offset in global node"); - - // This also catched the large code model case for Darwin. - if ((OpFlags & ARM64II::MO_GOT) != 0) { - SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); - // FIXME: Once remat is capable of dealing with instructions with register - // operands, expand this into two nodes instead of using a wrapper node. - return DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, GotAddr); - } - - if (getTargetMachine().getCodeModel() == CodeModel::Large) { - const unsigned char MO_NC = ARM64II::MO_NC; - return DAG.getNode( - ARM64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G3), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G2 | MO_NC), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G1 | MO_NC), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G0 | MO_NC)); - } else { - // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and - // the only correct model on Darwin. - SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - OpFlags | ARM64II::MO_PAGE); - unsigned char LoFlags = OpFlags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC; - SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags); - - SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo); - } -} - -/// \brief Convert a TLS address reference into the correct sequence of loads -/// and calls to compute the variable's address (for Darwin, currently) and -/// return an SDValue containing the final node. - -/// Darwin only has one TLS scheme which must be capable of dealing with the -/// fully general situation, in the worst case. This means: -/// + "extern __thread" declaration. -/// + Defined in a possibly unknown dynamic library. -/// -/// The general system is that each __thread variable has a [3 x i64] descriptor -/// which contains information used by the runtime to calculate the address. The -/// only part of this the compiler needs to know about is the first xword, which -/// contains a function pointer that must be called with the address of the -/// entire descriptor in "x0". -/// -/// Since this descriptor may be in a different unit, in general even the -/// descriptor must be accessed via an indirect load. The "ideal" code sequence -/// is: -/// adrp x0, _var@TLVPPAGE -/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor -/// ldr x1, [x0] ; x1 contains 1st entry of descriptor, -/// ; the function pointer -/// blr x1 ; Uses descriptor address in x0 -/// ; Address of _var is now in x0. -/// -/// If the address of _var's descriptor *is* known to the linker, then it can -/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for -/// a slight efficiency gain. -SDValue -ARM64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, - SelectionDAG &DAG) const { - assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin"); - - SDLoc DL(Op); - MVT PtrVT = getPointerTy(); - const GlobalValue *GV = cast(Op)->getGlobal(); - - SDValue TLVPAddr = - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS); - SDValue DescAddr = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, TLVPAddr); - - // The first entry in the descriptor is a function pointer that we must call - // to obtain the address of the variable. - SDValue Chain = DAG.getEntryNode(); - SDValue FuncTLVGet = - DAG.getLoad(MVT::i64, DL, Chain, DescAddr, MachinePointerInfo::getGOT(), - false, true, true, 8); - Chain = FuncTLVGet.getValue(1); - - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - MFI->setAdjustsStack(true); - - // TLS calls preserve all registers except those that absolutely must be - // trashed: X0 (it takes an argument), LR (it's a call) and CPSR (let's not be - // silly). - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const ARM64RegisterInfo *ARI = static_cast(TRI); - const uint32_t *Mask = ARI->getTLSCallPreservedMask(); - - // Finally, we can make the call. This is just a degenerate version of a - // normal ARM64 call node: x0 takes the address of the descriptor, and returns - // the address of the variable in this thread. - Chain = DAG.getCopyToReg(Chain, DL, ARM64::X0, DescAddr, SDValue()); - Chain = DAG.getNode(ARM64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue), - Chain, FuncTLVGet, DAG.getRegister(ARM64::X0, MVT::i64), - DAG.getRegisterMask(Mask), Chain.getValue(1)); - return DAG.getCopyFromReg(Chain, DL, ARM64::X0, PtrVT, Chain.getValue(1)); -} - -/// When accessing thread-local variables under either the general-dynamic or -/// local-dynamic system, we make a "TLS-descriptor" call. The variable will -/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry -/// is a function pointer to carry out the resolution. This function takes the -/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All -/// other registers (except LR, CPSR) are preserved. -/// -/// Thus, the ideal call sequence on AArch64 is: -/// -/// adrp x0, :tlsdesc:thread_var -/// ldr x8, [x0, :tlsdesc_lo12:thread_var] -/// add x0, x0, :tlsdesc_lo12:thread_var -/// .tlsdesccall thread_var -/// blr x8 -/// (TPIDR_EL0 offset now in x0). -/// -/// The ".tlsdesccall" directive instructs the assembler to insert a particular -/// relocation to help the linker relax this sequence if it turns out to be too -/// conservative. -/// -/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this -/// is harmless. -SDValue ARM64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr, - SDValue DescAddr, SDLoc DL, - SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(); - - // The function we need to call is simply the first entry in the GOT for this - // descriptor, load it in preparation. - SDValue Func = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, SymAddr); - - // TLS calls preserve all registers except those that absolutely must be - // trashed: X0 (it takes an argument), LR (it's a call) and CPSR (let's not be - // silly). - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const ARM64RegisterInfo *ARI = static_cast(TRI); - const uint32_t *Mask = ARI->getTLSCallPreservedMask(); - - // The function takes only one argument: the address of the descriptor itself - // in X0. - SDValue Glue, Chain; - Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM64::X0, DescAddr, Glue); - Glue = Chain.getValue(1); - - // We're now ready to populate the argument list, as with a normal call: - SmallVector Ops; - Ops.push_back(Chain); - Ops.push_back(Func); - Ops.push_back(SymAddr); - Ops.push_back(DAG.getRegister(ARM64::X0, PtrVT)); - Ops.push_back(DAG.getRegisterMask(Mask)); - Ops.push_back(Glue); - - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(ARM64ISD::TLSDESC_CALL, DL, NodeTys, &Ops[0], Ops.size()); - Glue = Chain.getValue(1); - - return DAG.getCopyFromReg(Chain, DL, ARM64::X0, PtrVT, Glue); -} - -SDValue ARM64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op, - SelectionDAG &DAG) const { - assert(Subtarget->isTargetELF() && "This function expects an ELF target"); - assert(getTargetMachine().getCodeModel() == CodeModel::Small && - "ELF TLS only supported in small memory model"); - const GlobalAddressSDNode *GA = cast(Op); - - TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal()); - - SDValue TPOff; - EVT PtrVT = getPointerTy(); - SDLoc DL(Op); - const GlobalValue *GV = GA->getGlobal(); - - SDValue ThreadBase = DAG.getNode(ARM64ISD::THREAD_POINTER, DL, PtrVT); - - if (Model == TLSModel::LocalExec) { - SDValue HiVar = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_G1); - SDValue LoVar = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_G0 | ARM64II::MO_NC); - - TPOff = SDValue(DAG.getMachineNode(ARM64::MOVZXi, DL, PtrVT, HiVar, - DAG.getTargetConstant(16, MVT::i32)), - 0); - TPOff = SDValue(DAG.getMachineNode(ARM64::MOVKXi, DL, PtrVT, TPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), - 0); - } else if (Model == TLSModel::InitialExec) { - TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS); - TPOff = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, TPOff); - } else if (Model == TLSModel::LocalDynamic) { - // Local-dynamic accesses proceed in two phases. A general-dynamic TLS - // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate - // the beginning of the module's TLS region, followed by a DTPREL offset - // calculation. - - // These accesses will need deduplicating if there's more than one. - ARM64FunctionInfo *MFI = - DAG.getMachineFunction().getInfo(); - MFI->incNumLocalDynamicTLSAccesses(); - - // Accesses used in this sequence go via the TLS descriptor which lives in - // the GOT. Prepare an address we can use to handle this. - SDValue HiDesc = DAG.getTargetExternalSymbol( - "_TLS_MODULE_BASE_", PtrVT, ARM64II::MO_TLS | ARM64II::MO_PAGE); - SDValue LoDesc = DAG.getTargetExternalSymbol( - "_TLS_MODULE_BASE_", PtrVT, - ARM64II::MO_TLS | ARM64II::MO_PAGEOFF | ARM64II::MO_NC); - - // First argument to the descriptor call is the address of the descriptor - // itself. - SDValue DescAddr = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, HiDesc); - DescAddr = DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); - - // The call needs a relocation too for linker relaxation. It doesn't make - // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of - // the address. - SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT, - ARM64II::MO_TLS); - - // Now we can calculate the offset from TPIDR_EL0 to this module's - // thread-local area. - TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG); - - // Now use :dtprel_whatever: operations to calculate this variable's offset - // in its thread-storage area. - SDValue HiVar = DAG.getTargetGlobalAddress( - GV, DL, MVT::i64, 0, ARM64II::MO_TLS | ARM64II::MO_G1); - SDValue LoVar = DAG.getTargetGlobalAddress( - GV, DL, MVT::i64, 0, ARM64II::MO_TLS | ARM64II::MO_G0 | ARM64II::MO_NC); - - SDValue DTPOff = - SDValue(DAG.getMachineNode(ARM64::MOVZXi, DL, PtrVT, HiVar, - DAG.getTargetConstant(16, MVT::i32)), - 0); - DTPOff = SDValue(DAG.getMachineNode(ARM64::MOVKXi, DL, PtrVT, DTPOff, LoVar, - DAG.getTargetConstant(0, MVT::i32)), - 0); - - TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff); - } else if (Model == TLSModel::GeneralDynamic) { - // Accesses used in this sequence go via the TLS descriptor which lives in - // the GOT. Prepare an address we can use to handle this. - SDValue HiDesc = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_PAGE); - SDValue LoDesc = DAG.getTargetGlobalAddress( - GV, DL, PtrVT, 0, - ARM64II::MO_TLS | ARM64II::MO_PAGEOFF | ARM64II::MO_NC); - - // First argument to the descriptor call is the address of the descriptor - // itself. - SDValue DescAddr = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, HiDesc); - DescAddr = DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc); - - // The call needs a relocation too for linker relaxation. It doesn't make - // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of - // the address. - SDValue SymAddr = - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS); - - // Finally we can make a call to calculate the offset from tpidr_el0. - TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG); - } else - llvm_unreachable("Unsupported ELF TLS access model"); - - return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff); -} - -SDValue ARM64TargetLowering::LowerGlobalTLSAddress(SDValue Op, - SelectionDAG &DAG) const { - if (Subtarget->isTargetDarwin()) - return LowerDarwinGlobalTLSAddress(Op, DAG); - else if (Subtarget->isTargetELF()) - return LowerELFGlobalTLSAddress(Op, DAG); - - llvm_unreachable("Unexpected platform trying to use TLS"); -} -SDValue ARM64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { - SDValue Chain = Op.getOperand(0); - ISD::CondCode CC = cast(Op.getOperand(1))->get(); - SDValue LHS = Op.getOperand(2); - SDValue RHS = Op.getOperand(3); - SDValue Dest = Op.getOperand(4); - SDLoc dl(Op); - - // Handle f128 first, since lowering it will result in comparing the return - // value of a libcall against zero, which is just what the rest of LowerBR_CC - // is expecting to deal with. - if (LHS.getValueType() == MVT::f128) { - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); - - // If softenSetCCOperands returned a scalar, we need to compare the result - // against zero to select between true and false values. - if (RHS.getNode() == 0) { - RHS = DAG.getConstant(0, LHS.getValueType()); - CC = ISD::SETNE; - } - } - - // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch - // instruction. - unsigned Opc = LHS.getOpcode(); - if (LHS.getResNo() == 1 && isa(RHS) && - cast(RHS)->isOne() && - (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || - Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { - assert((CC == ISD::SETEQ || CC == ISD::SETNE) && - "Unexpected condition code."); - // Only lower legal XALUO ops. - if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0))) - return SDValue(); - - // The actual operation with overflow check. - ARM64CC::CondCode OFCC; - SDValue Value, Overflow; - std::tie(Value, Overflow) = getARM64XALUOOp(OFCC, LHS.getValue(0), DAG); - - if (CC == ISD::SETNE) - OFCC = getInvertedCondCode(OFCC); - SDValue CCVal = DAG.getConstant(OFCC, MVT::i32); - - return DAG.getNode(ARM64ISD::BRCOND, SDLoc(LHS), MVT::Other, Chain, Dest, - CCVal, Overflow); - } - - if (LHS.getValueType().isInteger()) { - assert((LHS.getValueType() == RHS.getValueType()) && - (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)); - - // If the RHS of the comparison is zero, we can potentially fold this - // to a specialized branch. - const ConstantSDNode *RHSC = dyn_cast(RHS); - if (RHSC && RHSC->getZExtValue() == 0) { - if (CC == ISD::SETEQ) { - // See if we can use a TBZ to fold in an AND as well. - // TBZ has a smaller branch displacement than CBZ. If the offset is - // out of bounds, a late MI-layer pass rewrites branches. - // 403.gcc is an example that hits this case. - if (LHS.getOpcode() == ISD::AND && - isa(LHS.getOperand(1)) && - isPowerOf2_64(LHS.getConstantOperandVal(1))) { - SDValue Test = LHS.getOperand(0); - uint64_t Mask = LHS.getConstantOperandVal(1); - - // TBZ only operates on i64's, but the ext should be free. - if (Test.getValueType() == MVT::i32) - Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64); - - return DAG.getNode(ARM64ISD::TBZ, dl, MVT::Other, Chain, Test, - DAG.getConstant(Log2_64(Mask), MVT::i64), Dest); - } - - return DAG.getNode(ARM64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest); - } else if (CC == ISD::SETNE) { - // See if we can use a TBZ to fold in an AND as well. - // TBZ has a smaller branch displacement than CBZ. If the offset is - // out of bounds, a late MI-layer pass rewrites branches. - // 403.gcc is an example that hits this case. - if (LHS.getOpcode() == ISD::AND && - isa(LHS.getOperand(1)) && - isPowerOf2_64(LHS.getConstantOperandVal(1))) { - SDValue Test = LHS.getOperand(0); - uint64_t Mask = LHS.getConstantOperandVal(1); - - // TBNZ only operates on i64's, but the ext should be free. - if (Test.getValueType() == MVT::i32) - Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64); - - return DAG.getNode(ARM64ISD::TBNZ, dl, MVT::Other, Chain, Test, - DAG.getConstant(Log2_64(Mask), MVT::i64), Dest); - } - - return DAG.getNode(ARM64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest); - } - } - - SDValue CCVal; - SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl); - return DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, - Cmp); - } - - assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); - - // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally - // clean. Some of them require two branches to implement. - SDValue Cmp = emitComparison(LHS, RHS, dl, DAG); - ARM64CC::CondCode CC1, CC2; - changeFPCCToARM64CC(CC, CC1, CC2); - SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); - SDValue BR1 = - DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp); - if (CC2 != ARM64CC::AL) { - SDValue CC2Val = DAG.getConstant(CC2, MVT::i32); - return DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val, - Cmp); - } - - return BR1; -} - -SDValue ARM64TargetLowering::LowerFCOPYSIGN(SDValue Op, - SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - SDLoc DL(Op); - - SDValue In1 = Op.getOperand(0); - SDValue In2 = Op.getOperand(1); - EVT SrcVT = In2.getValueType(); - if (SrcVT != VT) { - if (SrcVT == MVT::f32 && VT == MVT::f64) - In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2); - else if (SrcVT == MVT::f64 && VT == MVT::f32) - In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0)); - else - // FIXME: Src type is different, bail out for now. Can VT really be a - // vector type? - return SDValue(); - } - - EVT VecVT; - EVT EltVT; - SDValue EltMask, VecVal1, VecVal2; - if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) { - EltVT = MVT::i32; - VecVT = MVT::v4i32; - EltMask = DAG.getConstant(0x80000000ULL, EltVT); - - if (!VT.isVector()) { - VecVal1 = DAG.getTargetInsertSubreg(ARM64::ssub, DL, VecVT, - DAG.getUNDEF(VecVT), In1); - VecVal2 = DAG.getTargetInsertSubreg(ARM64::ssub, DL, VecVT, - DAG.getUNDEF(VecVT), In2); - } else { - VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1); - VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2); - } - } else if (VT == MVT::f64 || VT == MVT::v2f64) { - EltVT = MVT::i64; - VecVT = MVT::v2i64; - - // We want to materialize a mask with the the high bit set, but the AdvSIMD - // immediate moves cannot materialize that in a single instruction for - // 64-bit elements. Instead, materialize zero and then negate it. - EltMask = DAG.getConstant(0, EltVT); - - if (!VT.isVector()) { - VecVal1 = DAG.getTargetInsertSubreg(ARM64::dsub, DL, VecVT, - DAG.getUNDEF(VecVT), In1); - VecVal2 = DAG.getTargetInsertSubreg(ARM64::dsub, DL, VecVT, - DAG.getUNDEF(VecVT), In2); - } else { - VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1); - VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2); - } - } else { - llvm_unreachable("Invalid type for copysign!"); - } - - std::vector BuildVectorOps; - for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) - BuildVectorOps.push_back(EltMask); - - SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, - &BuildVectorOps[0], BuildVectorOps.size()); - - // If we couldn't materialize the mask above, then the mask vector will be - // the zero vector, and we need to negate it here. - if (VT == MVT::f64 || VT == MVT::v2f64) { - BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec); - BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec); - BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec); - } - - SDValue Sel = - DAG.getNode(ARM64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec); - - if (VT == MVT::f32) - return DAG.getTargetExtractSubreg(ARM64::ssub, DL, VT, Sel); - else if (VT == MVT::f64) - return DAG.getTargetExtractSubreg(ARM64::dsub, DL, VT, Sel); - else - return DAG.getNode(ISD::BITCAST, DL, VT, Sel); -} - -SDValue ARM64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const { - if (DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::NoImplicitFloat)) - return SDValue(); - - // While there is no integer popcount instruction, it can - // be more efficiently lowered to the following sequence that uses - // AdvSIMD registers/instructions as long as the copies to/from - // the AdvSIMD registers are cheap. - // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd - // CNT V0.8B, V0.8B // 8xbyte pop-counts - // ADDV B0, V0.8B // sum 8xbyte pop-counts - // UMOV X0, V0.B[0] // copy byte result back to integer reg - SDValue Val = Op.getOperand(0); - SDLoc DL(Op); - EVT VT = Op.getValueType(); - SDValue ZeroVec = DAG.getUNDEF(MVT::v8i8); - - SDValue VecVal; - if (VT == MVT::i32) { - VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val); - VecVal = - DAG.getTargetInsertSubreg(ARM64::ssub, DL, MVT::v8i8, ZeroVec, VecVal); - } else { - VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val); - } - - SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, VecVal); - SDValue UaddLV = DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32, - DAG.getConstant(Intrinsic::arm64_neon_uaddlv, MVT::i32), CtPop); - - if (VT == MVT::i64) - UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV); - return UaddLV; -} - -SDValue ARM64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - - if (Op.getValueType().isVector()) - return LowerVSETCC(Op, DAG); - - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - ISD::CondCode CC = cast(Op.getOperand(2))->get(); - SDLoc dl(Op); - - // We chose ZeroOrOneBooleanContents, so use zero and one. - EVT VT = Op.getValueType(); - SDValue TVal = DAG.getConstant(1, VT); - SDValue FVal = DAG.getConstant(0, VT); - - // Handle f128 first, since one possible outcome is a normal integer - // comparison which gets picked up by the next if statement. - if (LHS.getValueType() == MVT::f128) { - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); - - // If softenSetCCOperands returned a scalar, use it. - if (RHS.getNode() == 0) { - assert(LHS.getValueType() == Op.getValueType() && - "Unexpected setcc expansion!"); - return LHS; - } - } - - if (LHS.getValueType().isInteger()) { - SDValue CCVal; - SDValue Cmp = - getARM64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl); - - // Note that we inverted the condition above, so we reverse the order of - // the true and false operands here. This will allow the setcc to be - // matched to a single CSINC instruction. - return DAG.getNode(ARM64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp); - } - - // Now we know we're dealing with FP values. - assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); - - // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead - // and do the comparison. - SDValue Cmp = emitComparison(LHS, RHS, dl, DAG); - - ARM64CC::CondCode CC1, CC2; - changeFPCCToARM64CC(CC, CC1, CC2); - if (CC2 == ARM64CC::AL) { - changeFPCCToARM64CC(ISD::getSetCCInverse(CC, false), CC1, CC2); - SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); - - // Note that we inverted the condition above, so we reverse the order of - // the true and false operands here. This will allow the setcc to be - // matched to a single CSINC instruction. - return DAG.getNode(ARM64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp); - } else { - // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally - // clean. Some of them require two CSELs to implement. As is in this case, - // we emit the first CSEL and then emit a second using the output of the - // first as the RHS. We're effectively OR'ing the two CC's together. - - // FIXME: It would be nice if we could match the two CSELs to two CSINCs. - SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); - SDValue CS1 = DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); - - SDValue CC2Val = DAG.getConstant(CC2, MVT::i32); - return DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); - } -} - -/// A SELECT_CC operation is really some kind of max or min if both values being -/// compared are, in some sense, equal to the results in either case. However, -/// it is permissible to compare f32 values and produce directly extended f64 -/// values. -/// -/// Extending the comparison operands would also be allowed, but is less likely -/// to happen in practice since their use is right here. Note that truncate -/// operations would *not* be semantically equivalent. -static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) { - if (Cmp == Result) - return true; - - ConstantFPSDNode *CCmp = dyn_cast(Cmp); - ConstantFPSDNode *CResult = dyn_cast(Result); - if (CCmp && CResult && Cmp.getValueType() == MVT::f32 && - Result.getValueType() == MVT::f64) { - bool Lossy; - APFloat CmpVal = CCmp->getValueAPF(); - CmpVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &Lossy); - return CResult->getValueAPF().bitwiseIsEqual(CmpVal); - } - - return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp; -} - -SDValue ARM64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { - SDValue CC = Op->getOperand(0); - SDValue TVal = Op->getOperand(1); - SDValue FVal = Op->getOperand(2); - SDLoc DL(Op); - - unsigned Opc = CC.getOpcode(); - // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select - // instruction. - if (CC.getResNo() == 1 && - (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || - Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) { - // Only lower legal XALUO ops. - if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0))) - return SDValue(); - - ARM64CC::CondCode OFCC; - SDValue Value, Overflow; - std::tie(Value, Overflow) = getARM64XALUOOp(OFCC, CC.getValue(0), DAG); - SDValue CCVal = DAG.getConstant(OFCC, MVT::i32); - - return DAG.getNode(ARM64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, CCVal, - Overflow); - } - - if (CC.getOpcode() == ISD::SETCC) - return DAG.getSelectCC(DL, CC.getOperand(0), CC.getOperand(1), TVal, FVal, - cast(CC.getOperand(2))->get()); - else - return DAG.getSelectCC(DL, CC, DAG.getConstant(0, CC.getValueType()), TVal, - FVal, ISD::SETNE); -} - -SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op, - SelectionDAG &DAG) const { - ISD::CondCode CC = cast(Op.getOperand(4))->get(); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue TVal = Op.getOperand(2); - SDValue FVal = Op.getOperand(3); - SDLoc dl(Op); - - // Handle f128 first, because it will result in a comparison of some RTLIB - // call result against zero. - if (LHS.getValueType() == MVT::f128) { - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); - - // If softenSetCCOperands returned a scalar, we need to compare the result - // against zero to select between true and false values. - if (RHS.getNode() == 0) { - RHS = DAG.getConstant(0, LHS.getValueType()); - CC = ISD::SETNE; - } - } - - // Handle integers first. - if (LHS.getValueType().isInteger()) { - assert((LHS.getValueType() == RHS.getValueType()) && - (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)); - - unsigned Opcode = ARM64ISD::CSEL; - - // If both the TVal and the FVal are constants, see if we can swap them in - // order to for a CSINV or CSINC out of them. - ConstantSDNode *CFVal = dyn_cast(FVal); - ConstantSDNode *CTVal = dyn_cast(TVal); - - if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) { - std::swap(TVal, FVal); - std::swap(CTVal, CFVal); - CC = ISD::getSetCCInverse(CC, true); - } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) { - std::swap(TVal, FVal); - std::swap(CTVal, CFVal); - CC = ISD::getSetCCInverse(CC, true); - } else if (TVal.getOpcode() == ISD::XOR) { - // If TVal is a NOT we want to swap TVal and FVal so that we can match - // with a CSINV rather than a CSEL. - ConstantSDNode *CVal = dyn_cast(TVal.getOperand(1)); - - if (CVal && CVal->isAllOnesValue()) { - std::swap(TVal, FVal); - std::swap(CTVal, CFVal); - CC = ISD::getSetCCInverse(CC, true); - } - } else if (TVal.getOpcode() == ISD::SUB) { - // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so - // that we can match with a CSNEG rather than a CSEL. - ConstantSDNode *CVal = dyn_cast(TVal.getOperand(0)); - - if (CVal && CVal->isNullValue()) { - std::swap(TVal, FVal); - std::swap(CTVal, CFVal); - CC = ISD::getSetCCInverse(CC, true); - } - } else if (CTVal && CFVal) { - const int64_t TrueVal = CTVal->getSExtValue(); - const int64_t FalseVal = CFVal->getSExtValue(); - bool Swap = false; - - // If both TVal and FVal are constants, see if FVal is the - // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC - // instead of a CSEL in that case. - if (TrueVal == ~FalseVal) { - Opcode = ARM64ISD::CSINV; - } else if (TrueVal == -FalseVal) { - Opcode = ARM64ISD::CSNEG; - } else if (TVal.getValueType() == MVT::i32) { - // If our operands are only 32-bit wide, make sure we use 32-bit - // arithmetic for the check whether we can use CSINC. This ensures that - // the addition in the check will wrap around properly in case there is - // an overflow (which would not be the case if we do the check with - // 64-bit arithmetic). - const uint32_t TrueVal32 = CTVal->getZExtValue(); - const uint32_t FalseVal32 = CFVal->getZExtValue(); - - if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) { - Opcode = ARM64ISD::CSINC; - - if (TrueVal32 > FalseVal32) { - Swap = true; - } - } - // 64-bit check whether we can use CSINC. - } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) { - Opcode = ARM64ISD::CSINC; - - if (TrueVal > FalseVal) { - Swap = true; - } - } - - // Swap TVal and FVal if necessary. - if (Swap) { - std::swap(TVal, FVal); - std::swap(CTVal, CFVal); - CC = ISD::getSetCCInverse(CC, true); - } - - if (Opcode != ARM64ISD::CSEL) { - // Drop FVal since we can get its value by simply inverting/negating - // TVal. - FVal = TVal; - } - } - - SDValue CCVal; - SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl); - - EVT VT = Op.getValueType(); - return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp); - } - - // Now we know we're dealing with FP values. - assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); - assert(LHS.getValueType() == RHS.getValueType()); - EVT VT = Op.getValueType(); - - // Try to match this select into a max/min operation, which have dedicated - // opcode in the instruction set. - // NOTE: This is not correct in the presence of NaNs, so we only enable this - // in no-NaNs mode. - if (getTargetMachine().Options.NoNaNsFPMath) { - if (selectCCOpsAreFMaxCompatible(LHS, FVal) && - selectCCOpsAreFMaxCompatible(RHS, TVal)) { - CC = ISD::getSetCCSwappedOperands(CC); - std::swap(TVal, FVal); - } - - if (selectCCOpsAreFMaxCompatible(LHS, TVal) && - selectCCOpsAreFMaxCompatible(RHS, FVal)) { - switch (CC) { - default: - break; - case ISD::SETGT: - case ISD::SETGE: - case ISD::SETUGT: - case ISD::SETUGE: - case ISD::SETOGT: - case ISD::SETOGE: - return DAG.getNode(ARM64ISD::FMAX, dl, VT, TVal, FVal); - break; - case ISD::SETLT: - case ISD::SETLE: - case ISD::SETULT: - case ISD::SETULE: - case ISD::SETOLT: - case ISD::SETOLE: - return DAG.getNode(ARM64ISD::FMIN, dl, VT, TVal, FVal); - break; - } - } - } - - // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead - // and do the comparison. - SDValue Cmp = emitComparison(LHS, RHS, dl, DAG); - - // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally - // clean. Some of them require two CSELs to implement. - ARM64CC::CondCode CC1, CC2; - changeFPCCToARM64CC(CC, CC1, CC2); - SDValue CC1Val = DAG.getConstant(CC1, MVT::i32); - SDValue CS1 = DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); - - // If we need a second CSEL, emit it, using the output of the first as the - // RHS. We're effectively OR'ing the two CC's together. - if (CC2 != ARM64CC::AL) { - SDValue CC2Val = DAG.getConstant(CC2, MVT::i32); - return DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); - } - - // Otherwise, return the output of the first CSEL. - return CS1; -} - -SDValue ARM64TargetLowering::LowerJumpTable(SDValue Op, - SelectionDAG &DAG) const { - // Jump table entries as PC relative offsets. No additional tweaking - // is necessary here. Just get the address of the jump table. - JumpTableSDNode *JT = cast(Op); - EVT PtrVT = getPointerTy(); - SDLoc DL(Op); - - SDValue Hi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, ARM64II::MO_PAGE); - SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, - ARM64II::MO_PAGEOFF | ARM64II::MO_NC); - SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo); -} - -SDValue ARM64TargetLowering::LowerConstantPool(SDValue Op, - SelectionDAG &DAG) const { - ConstantPoolSDNode *CP = cast(Op); - EVT PtrVT = getPointerTy(); - SDLoc DL(Op); - - if (getTargetMachine().getCodeModel() == CodeModel::Large) { - // Use the GOT for the large code model on iOS. - if (Subtarget->isTargetMachO()) { - SDValue GotAddr = DAG.getTargetConstantPool( - CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), - ARM64II::MO_GOT); - return DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, GotAddr); - } - - const unsigned char MO_NC = ARM64II::MO_NC; - return DAG.getNode( - ARM64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), ARM64II::MO_G3), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), ARM64II::MO_G2 | MO_NC), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), ARM64II::MO_G1 | MO_NC), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), ARM64II::MO_G0 | MO_NC)); - } else { - // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on - // ELF, the only valid one on Darwin. - SDValue Hi = - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), ARM64II::MO_PAGE); - SDValue Lo = DAG.getTargetConstantPool( - CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), - ARM64II::MO_PAGEOFF | ARM64II::MO_NC); - - SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo); - } -} - -SDValue ARM64TargetLowering::LowerBlockAddress(SDValue Op, - SelectionDAG &DAG) const { - const BlockAddress *BA = cast(Op)->getBlockAddress(); - EVT PtrVT = getPointerTy(); - SDLoc DL(Op); - if (getTargetMachine().getCodeModel() == CodeModel::Large && - !Subtarget->isTargetMachO()) { - const unsigned char MO_NC = ARM64II::MO_NC; - return DAG.getNode( - ARM64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G3), - DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G2 | MO_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G1 | MO_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G0 | MO_NC)); - } else { - SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_PAGE); - SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_PAGEOFF | - ARM64II::MO_NC); - SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo); - } -} - -SDValue ARM64TargetLowering::LowerDarwin_VASTART(SDValue Op, - SelectionDAG &DAG) const { - ARM64FunctionInfo *FuncInfo = - DAG.getMachineFunction().getInfo(); - - SDLoc DL(Op); - SDValue FR = - DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy()); - const Value *SV = cast(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), - MachinePointerInfo(SV), false, false, 0); -} - -SDValue ARM64TargetLowering::LowerAAPCS_VASTART(SDValue Op, - SelectionDAG &DAG) const { - // The layout of the va_list struct is specified in the AArch64 Procedure Call - // Standard, section B.3. - MachineFunction &MF = DAG.getMachineFunction(); - ARM64FunctionInfo *FuncInfo = MF.getInfo(); - SDLoc DL(Op); - - SDValue Chain = Op.getOperand(0); - SDValue VAList = Op.getOperand(1); - const Value *SV = cast(Op.getOperand(2))->getValue(); - SmallVector MemOps; - - // void *__stack at offset 0 - SDValue Stack = - DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy()); - MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList, - MachinePointerInfo(SV), false, false, 8)); - - // void *__gr_top at offset 8 - int GPRSize = FuncInfo->getVarArgsGPRSize(); - if (GPRSize > 0) { - SDValue GRTop, GRTopAddr; - - GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(8, getPointerTy())); - - GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), getPointerTy()); - GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop, - DAG.getConstant(GPRSize, getPointerTy())); - - MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr, - MachinePointerInfo(SV, 8), false, false, 8)); - } - - // void *__vr_top at offset 16 - int FPRSize = FuncInfo->getVarArgsFPRSize(); - if (FPRSize > 0) { - SDValue VRTop, VRTopAddr; - VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(16, getPointerTy())); - - VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), getPointerTy()); - VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop, - DAG.getConstant(FPRSize, getPointerTy())); - - MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr, - MachinePointerInfo(SV, 16), false, false, 8)); - } - - // int __gr_offs at offset 24 - SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(24, getPointerTy())); - MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32), - GROffsAddr, MachinePointerInfo(SV, 24), false, - false, 4)); - - // int __vr_offs at offset 28 - SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(28, getPointerTy())); - MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32), - VROffsAddr, MachinePointerInfo(SV, 28), false, - false, 4)); - - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], - MemOps.size()); -} - -SDValue ARM64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { - return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG) - : LowerAAPCS_VASTART(Op, DAG); -} - -SDValue ARM64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { - // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single - // pointer. - unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32; - const Value *DestSV = cast(Op.getOperand(3))->getValue(); - const Value *SrcSV = cast(Op.getOperand(4))->getValue(); - - return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1), - Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32), - 8, false, false, MachinePointerInfo(DestSV), - MachinePointerInfo(SrcSV)); -} - -SDValue ARM64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { - assert(Subtarget->isTargetDarwin() && - "automatic va_arg instruction only works on Darwin"); - - const Value *V = cast(Op.getOperand(2))->getValue(); - EVT VT = Op.getValueType(); - SDLoc DL(Op); - SDValue Chain = Op.getOperand(0); - SDValue Addr = Op.getOperand(1); - unsigned Align = Op.getConstantOperandVal(3); - - SDValue VAList = DAG.getLoad(getPointerTy(), DL, Chain, Addr, - MachinePointerInfo(V), false, false, false, 0); - Chain = VAList.getValue(1); - - if (Align > 8) { - assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2"); - VAList = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(Align - 1, getPointerTy())); - VAList = DAG.getNode(ISD::AND, DL, getPointerTy(), VAList, - DAG.getConstant(-(int64_t)Align, getPointerTy())); - } - - Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); - uint64_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy); - - // Scalar integer and FP values smaller than 64 bits are implicitly extended - // up to 64 bits. At the very least, we have to increase the striding of the - // vaargs list to match this, and for FP values we need to introduce - // FP_ROUND nodes as well. - if (VT.isInteger() && !VT.isVector()) - ArgSize = 8; - bool NeedFPTrunc = false; - if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) { - ArgSize = 8; - NeedFPTrunc = true; - } - - // Increment the pointer, VAList, to the next vaarg - SDValue VANext = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList, - DAG.getConstant(ArgSize, getPointerTy())); - // Store the incremented VAList to the legalized pointer - SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V), - false, false, 0); - - // Load the actual argument out of the pointer VAList - if (NeedFPTrunc) { - // Load the value as an f64. - SDValue WideFP = DAG.getLoad(MVT::f64, DL, APStore, VAList, - MachinePointerInfo(), false, false, false, 0); - // Round the value down to an f32. - SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0), - DAG.getIntPtrConstant(1)); - SDValue Ops[] = { NarrowFP, WideFP.getValue(1) }; - // Merge the rounded value with the chain output of the load. - return DAG.getMergeValues(Ops, 2, DL); - } - - return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo(), false, - false, false, 0); -} - -SDValue ARM64TargetLowering::LowerFRAMEADDR(SDValue Op, - SelectionDAG &DAG) const { - MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); - MFI->setFrameAddressIsTaken(true); - - EVT VT = Op.getValueType(); - SDLoc DL(Op); - unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); - SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, ARM64::FP, VT); - while (Depth--) - FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr, - MachinePointerInfo(), false, false, false, 0); - return FrameAddr; -} - -SDValue ARM64TargetLowering::LowerRETURNADDR(SDValue Op, - SelectionDAG &DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MFI->setReturnAddressIsTaken(true); - - EVT VT = Op.getValueType(); - SDLoc DL(Op); - unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); - if (Depth) { - SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); - SDValue Offset = DAG.getConstant(8, getPointerTy()); - return DAG.getLoad(VT, DL, DAG.getEntryNode(), - DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), - MachinePointerInfo(), false, false, false, 0); - } - - // Return LR, which contains the return address. Mark it an implicit live-in. - unsigned Reg = MF.addLiveIn(ARM64::LR, &ARM64::GPR64RegClass); - return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT); -} - -/// LowerShiftRightParts - Lower SRA_PARTS, which returns two -/// i64 values and take a 2 x i64 value to shift plus a shift amount. -SDValue ARM64TargetLowering::LowerShiftRightParts(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getNumOperands() == 3 && "Not a double-shift!"); - EVT VT = Op.getValueType(); - unsigned VTBits = VT.getSizeInBits(); - SDLoc dl(Op); - SDValue ShOpLo = Op.getOperand(0); - SDValue ShOpHi = Op.getOperand(1); - SDValue ShAmt = Op.getOperand(2); - SDValue ARMcc; - unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; - - assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); - - SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, - DAG.getConstant(VTBits, MVT::i64), ShAmt); - SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); - SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, - DAG.getConstant(VTBits, MVT::i64)); - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); - - SDValue Cmp = - emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64), dl, DAG); - SDValue CCVal = DAG.getConstant(ARM64CC::GE, MVT::i32); - - SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); - SDValue Lo = - DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp); - - // ARM64 shifts larger than the register width are wrapped rather than - // clamped, so we can't just emit "hi >> x". - SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); - SDValue TrueValHi = Opc == ISD::SRA - ? DAG.getNode(Opc, dl, VT, ShOpHi, - DAG.getConstant(VTBits - 1, MVT::i64)) - : DAG.getConstant(0, VT); - SDValue Hi = - DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp); - - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); -} - -/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two -/// i64 values and take a 2 x i64 value to shift plus a shift amount. -SDValue ARM64TargetLowering::LowerShiftLeftParts(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getNumOperands() == 3 && "Not a double-shift!"); - EVT VT = Op.getValueType(); - unsigned VTBits = VT.getSizeInBits(); - SDLoc dl(Op); - SDValue ShOpLo = Op.getOperand(0); - SDValue ShOpHi = Op.getOperand(1); - SDValue ShAmt = Op.getOperand(2); - SDValue ARMcc; - - assert(Op.getOpcode() == ISD::SHL_PARTS); - SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, - DAG.getConstant(VTBits, MVT::i64), ShAmt); - SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); - SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, - DAG.getConstant(VTBits, MVT::i64)); - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); - SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); - - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - - SDValue Cmp = - emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64), dl, DAG); - SDValue CCVal = DAG.getConstant(ARM64CC::GE, MVT::i32); - SDValue Hi = DAG.getNode(ARM64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp); - - // ARM64 shifts of larger than register sizes are wrapped rather than clamped, - // so we can't just emit "lo << a" if a is too big. - SDValue TrueValLo = DAG.getConstant(0, VT); - SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); - SDValue Lo = - DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp); - - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); -} - -bool -ARM64TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { - // The ARM64 target doesn't support folding offsets into global addresses. - return false; -} - -bool ARM64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { - // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases. - // FIXME: We should be able to handle f128 as well with a clever lowering. - if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32)) - return true; - - if (VT == MVT::f64) - return ARM64_AM::getFP64Imm(Imm) != -1; - else if (VT == MVT::f32) - return ARM64_AM::getFP32Imm(Imm) != -1; - return false; -} - -//===----------------------------------------------------------------------===// -// ARM64 Optimization Hooks -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// ARM64 Inline Assembly Support -//===----------------------------------------------------------------------===// - -// Table of Constraints -// TODO: This is the current set of constraints supported by ARM for the -// compiler, not all of them may make sense, e.g. S may be difficult to support. -// -// r - A general register -// w - An FP/SIMD register of some size in the range v0-v31 -// x - An FP/SIMD register of some size in the range v0-v15 -// I - Constant that can be used with an ADD instruction -// J - Constant that can be used with a SUB instruction -// K - Constant that can be used with a 32-bit logical instruction -// L - Constant that can be used with a 64-bit logical instruction -// M - Constant that can be used as a 32-bit MOV immediate -// N - Constant that can be used as a 64-bit MOV immediate -// Q - A memory reference with base register and no offset -// S - A symbolic address -// Y - Floating point constant zero -// Z - Integer constant zero -// -// Note that general register operands will be output using their 64-bit x -// register name, whatever the size of the variable, unless the asm operand -// is prefixed by the %w modifier. Floating-point and SIMD register operands -// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or -// %q modifier. - -/// getConstraintType - Given a constraint letter, return the type of -/// constraint it is for this target. -ARM64TargetLowering::ConstraintType -ARM64TargetLowering::getConstraintType(const std::string &Constraint) const { - if (Constraint.size() == 1) { - switch (Constraint[0]) { - default: - break; - case 'z': - return C_Other; - case 'x': - case 'w': - return C_RegisterClass; - // An address with a single base register. Due to the way we - // currently handle addresses it is the same as 'r'. - case 'Q': - return C_Memory; - } - } - return TargetLowering::getConstraintType(Constraint); -} - -/// Examine constraint type and operand type and determine a weight value. -/// This object must already have been set up with the operand type -/// and the current alternative constraint selected. -TargetLowering::ConstraintWeight -ARM64TargetLowering::getSingleConstraintMatchWeight( - AsmOperandInfo &info, const char *constraint) const { - ConstraintWeight weight = CW_Invalid; - Value *CallOperandVal = info.CallOperandVal; - // If we don't have a value, we can't do a match, - // but allow it at the lowest weight. - if (CallOperandVal == NULL) - return CW_Default; - Type *type = CallOperandVal->getType(); - // Look at the constraint type. - switch (*constraint) { - default: - weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); - break; - case 'x': - case 'w': - if (type->isFloatingPointTy() || type->isVectorTy()) - weight = CW_Register; - break; - case 'z': - weight = CW_Constant; - break; - } - return weight; -} - -std::pair -ARM64TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const { - if (Constraint.size() == 1) { - switch (Constraint[0]) { - case 'r': - if (VT.getSizeInBits() == 64) - return std::make_pair(0U, &ARM64::GPR64commonRegClass); - return std::make_pair(0U, &ARM64::GPR32commonRegClass); - case 'w': - if (VT == MVT::f32) - return std::make_pair(0U, &ARM64::FPR32RegClass); - if (VT.getSizeInBits() == 64) - return std::make_pair(0U, &ARM64::FPR64RegClass); - if (VT.getSizeInBits() == 128) - return std::make_pair(0U, &ARM64::FPR128RegClass); - break; - // The instructions that this constraint is designed for can - // only take 128-bit registers so just use that regclass. - case 'x': - if (VT.getSizeInBits() == 128) - return std::make_pair(0U, &ARM64::FPR128_loRegClass); - break; - } - } - if (StringRef("{cc}").equals_lower(Constraint)) - return std::make_pair(unsigned(ARM64::CPSR), &ARM64::CCRRegClass); - - // Use the default implementation in TargetLowering to convert the register - // constraint into a member of a register class. - std::pair Res; - Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); - - // Not found as a standard register? - if (Res.second == 0) { - unsigned Size = Constraint.size(); - if ((Size == 4 || Size == 5) && Constraint[0] == '{' && - tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') { - const std::string Reg = - std::string(&Constraint[2], &Constraint[Size - 1]); - int RegNo = atoi(Reg.c_str()); - if (RegNo >= 0 && RegNo <= 31) { - // v0 - v31 are aliases of q0 - q31. - // By default we'll emit v0-v31 for this unless there's a modifier where - // we'll emit the correct register as well. - Res.first = ARM64::FPR128RegClass.getRegister(RegNo); - Res.second = &ARM64::FPR128RegClass; - } - } - } - - return Res; -} - -/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops -/// vector. If it is invalid, don't add anything to Ops. -void ARM64TargetLowering::LowerAsmOperandForConstraint( - SDValue Op, std::string &Constraint, std::vector &Ops, - SelectionDAG &DAG) const { - SDValue Result(0, 0); - - // Currently only support length 1 constraints. - if (Constraint.length() != 1) - return; - - char ConstraintLetter = Constraint[0]; - switch (ConstraintLetter) { - default: - break; - - // This set of constraints deal with valid constants for various instructions. - // Validate and return a target constant for them if we can. - case 'z': { - // 'z' maps to xzr or wzr so it needs an input of 0. - ConstantSDNode *C = dyn_cast(Op); - if (!C || C->getZExtValue() != 0) - return; - - if (Op.getValueType() == MVT::i64) - Result = DAG.getRegister(ARM64::XZR, MVT::i64); - else - Result = DAG.getRegister(ARM64::WZR, MVT::i32); - break; - } - - case 'I': - case 'J': - case 'K': - case 'L': - case 'M': - case 'N': - ConstantSDNode *C = dyn_cast(Op); - if (!C) - return; - - // Grab the value and do some validation. - uint64_t CVal = C->getZExtValue(); - switch (ConstraintLetter) { - // The I constraint applies only to simple ADD or SUB immediate operands: - // i.e. 0 to 4095 with optional shift by 12 - // The J constraint applies only to ADD or SUB immediates that would be - // valid when negated, i.e. if [an add pattern] were to be output as a SUB - // instruction [or vice versa], in other words -1 to -4095 with optional - // left shift by 12. - case 'I': - if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal)) - break; - return; - case 'J': { - uint64_t NVal = -C->getSExtValue(); - if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) - break; - return; - } - // The K and L constraints apply *only* to logical immediates, including - // what used to be the MOVI alias for ORR (though the MOVI alias has now - // been removed and MOV should be used). So these constraints have to - // distinguish between bit patterns that are valid 32-bit or 64-bit - // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but - // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice - // versa. - case 'K': - if (ARM64_AM::isLogicalImmediate(CVal, 32)) - break; - return; - case 'L': - if (ARM64_AM::isLogicalImmediate(CVal, 64)) - break; - return; - // The M and N constraints are a superset of K and L respectively, for use - // with the MOV (immediate) alias. As well as the logical immediates they - // also match 32 or 64-bit immediates that can be loaded either using a - // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca - // (M) or 64-bit 0x1234000000000000 (N) etc. - // As a note some of this code is liberally stolen from the asm parser. - case 'M': { - if (!isUInt<32>(CVal)) - return; - if (ARM64_AM::isLogicalImmediate(CVal, 32)) - break; - if ((CVal & 0xFFFF) == CVal) - break; - if ((CVal & 0xFFFF0000ULL) == CVal) - break; - uint64_t NCVal = ~(uint32_t)CVal; - if ((NCVal & 0xFFFFULL) == NCVal) - break; - if ((NCVal & 0xFFFF0000ULL) == NCVal) - break; - return; - } - case 'N': { - if (ARM64_AM::isLogicalImmediate(CVal, 64)) - break; - if ((CVal & 0xFFFFULL) == CVal) - break; - if ((CVal & 0xFFFF0000ULL) == CVal) - break; - if ((CVal & 0xFFFF00000000ULL) == CVal) - break; - if ((CVal & 0xFFFF000000000000ULL) == CVal) - break; - uint64_t NCVal = ~CVal; - if ((NCVal & 0xFFFFULL) == NCVal) - break; - if ((NCVal & 0xFFFF0000ULL) == NCVal) - break; - if ((NCVal & 0xFFFF00000000ULL) == NCVal) - break; - if ((NCVal & 0xFFFF000000000000ULL) == NCVal) - break; - return; - } - default: - return; - } - - // All assembler immediates are 64-bit integers. - Result = DAG.getTargetConstant(CVal, MVT::i64); - break; - } - - if (Result.getNode()) { - Ops.push_back(Result); - return; - } - - return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); -} - -//===----------------------------------------------------------------------===// -// ARM64 Advanced SIMD Support -//===----------------------------------------------------------------------===// - -/// WidenVector - Given a value in the V64 register class, produce the -/// equivalent value in the V128 register class. -static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) { - EVT VT = V64Reg.getValueType(); - unsigned NarrowSize = VT.getVectorNumElements(); - MVT EltTy = VT.getVectorElementType().getSimpleVT(); - MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); - SDLoc DL(V64Reg); - - return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy), - V64Reg, DAG.getConstant(0, MVT::i32)); -} - -/// getExtFactor - Determine the adjustment factor for the position when -/// generating an "extract from vector registers" instruction. -static unsigned getExtFactor(SDValue &V) { - EVT EltType = V.getValueType().getVectorElementType(); - return EltType.getSizeInBits() / 8; -} - -/// NarrowVector - Given a value in the V128 register class, produce the -/// equivalent value in the V64 register class. -static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { - EVT VT = V128Reg.getValueType(); - unsigned WideSize = VT.getVectorNumElements(); - MVT EltTy = VT.getVectorElementType().getSimpleVT(); - MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); - SDLoc DL(V128Reg); - - return DAG.getTargetExtractSubreg(ARM64::dsub, DL, NarrowTy, V128Reg); -} - -// Gather data to see if the operation can be modelled as a -// shuffle in combination with VEXTs. -SDValue ARM64TargetLowering::ReconstructShuffle(SDValue Op, - SelectionDAG &DAG) const { - SDLoc dl(Op); - EVT VT = Op.getValueType(); - unsigned NumElts = VT.getVectorNumElements(); - - SmallVector SourceVecs; - SmallVector MinElts; - SmallVector MaxElts; - - for (unsigned i = 0; i < NumElts; ++i) { - SDValue V = Op.getOperand(i); - if (V.getOpcode() == ISD::UNDEF) - continue; - else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { - // A shuffle can only come from building a vector from various - // elements of other vectors. - return SDValue(); - } - - // Record this extraction against the appropriate vector if possible... - SDValue SourceVec = V.getOperand(0); - unsigned EltNo = cast(V.getOperand(1))->getZExtValue(); - bool FoundSource = false; - for (unsigned j = 0; j < SourceVecs.size(); ++j) { - if (SourceVecs[j] == SourceVec) { - if (MinElts[j] > EltNo) - MinElts[j] = EltNo; - if (MaxElts[j] < EltNo) - MaxElts[j] = EltNo; - FoundSource = true; - break; - } - } - - // Or record a new source if not... - if (!FoundSource) { - SourceVecs.push_back(SourceVec); - MinElts.push_back(EltNo); - MaxElts.push_back(EltNo); - } - } - - // Currently only do something sane when at most two source vectors - // involved. - if (SourceVecs.size() > 2) - return SDValue(); - - SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; - int VEXTOffsets[2] = { 0, 0 }; - - // This loop extracts the usage patterns of the source vectors - // and prepares appropriate SDValues for a shuffle if possible. - for (unsigned i = 0; i < SourceVecs.size(); ++i) { - if (SourceVecs[i].getValueType() == VT) { - // No VEXT necessary - ShuffleSrcs[i] = SourceVecs[i]; - VEXTOffsets[i] = 0; - continue; - } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { - // It probably isn't worth padding out a smaller vector just to - // break it down again in a shuffle. - return SDValue(); - } - - // Don't attempt to extract subvectors from BUILD_VECTOR sources - // that expand or trunc the original value. - // TODO: We can try to bitcast and ANY_EXTEND the result but - // we need to consider the cost of vector ANY_EXTEND, and the - // legality of all the types. - if (SourceVecs[i].getValueType().getVectorElementType() != - VT.getVectorElementType()) - return SDValue(); - - // Since only 64-bit and 128-bit vectors are legal on ARM and - // we've eliminated the other cases... - assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts && - "unexpected vector sizes in ReconstructShuffle"); - - if (MaxElts[i] - MinElts[i] >= NumElts) { - // Span too large for a VEXT to cope - return SDValue(); - } - - if (MinElts[i] >= NumElts) { - // The extraction can just take the second half - VEXTOffsets[i] = NumElts; - ShuffleSrcs[i] = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i], - DAG.getIntPtrConstant(NumElts)); - } else if (MaxElts[i] < NumElts) { - // The extraction can just take the first half - VEXTOffsets[i] = 0; - ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - SourceVecs[i], DAG.getIntPtrConstant(0)); - } else { - // An actual VEXT is needed - VEXTOffsets[i] = MinElts[i]; - SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - SourceVecs[i], DAG.getIntPtrConstant(0)); - SDValue VEXTSrc2 = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i], - DAG.getIntPtrConstant(NumElts)); - unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1); - ShuffleSrcs[i] = DAG.getNode(ARM64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2, - DAG.getConstant(Imm, MVT::i32)); - } - } - - SmallVector Mask; - - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Entry = Op.getOperand(i); - if (Entry.getOpcode() == ISD::UNDEF) { - Mask.push_back(-1); - continue; - } - - SDValue ExtractVec = Entry.getOperand(0); - int ExtractElt = - cast(Op.getOperand(i).getOperand(1))->getSExtValue(); - if (ExtractVec == SourceVecs[0]) { - Mask.push_back(ExtractElt - VEXTOffsets[0]); - } else { - Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); - } - } - - // Final check before we try to produce nonsense... - if (isShuffleMaskLegal(Mask, VT)) - return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], - &Mask[0]); - - return SDValue(); -} - -// check if an EXT instruction can handle the shuffle mask when the -// vector sources of the shuffle are the same. -static bool isSingletonEXTMask(ArrayRef M, EVT VT, unsigned &Imm) { - unsigned NumElts = VT.getVectorNumElements(); - - // Assume that the first shuffle index is not UNDEF. Fail if it is. - if (M[0] < 0) - return false; - - Imm = M[0]; - - // If this is a VEXT shuffle, the immediate value is the index of the first - // element. The other shuffle indices must be the successive elements after - // the first one. - unsigned ExpectedElt = Imm; - for (unsigned i = 1; i < NumElts; ++i) { - // Increment the expected index. If it wraps around, just follow it - // back to index zero and keep going. - ++ExpectedElt; - if (ExpectedElt == NumElts) - ExpectedElt = 0; - - if (M[i] < 0) - continue; // ignore UNDEF indices - if (ExpectedElt != static_cast(M[i])) - return false; - } - - return true; -} - -// check if an EXT instruction can handle the shuffle mask when the -// vector sources of the shuffle are different. -static bool isEXTMask(ArrayRef M, EVT VT, bool &ReverseEXT, - unsigned &Imm) { - unsigned NumElts = VT.getVectorNumElements(); - ReverseEXT = false; - - // Assume that the first shuffle index is not UNDEF. Fail if it is. - if (M[0] < 0) - return false; - - Imm = M[0]; - - // If this is a VEXT shuffle, the immediate value is the index of the first - // element. The other shuffle indices must be the successive elements after - // the first one. - unsigned ExpectedElt = Imm; - for (unsigned i = 1; i < NumElts; ++i) { - // Increment the expected index. If it wraps around, it may still be - // a VEXT but the source vectors must be swapped. - ExpectedElt += 1; - if (ExpectedElt == NumElts * 2) { - ExpectedElt = 0; - ReverseEXT = true; - } - - if (M[i] < 0) - continue; // ignore UNDEF indices - if (ExpectedElt != static_cast(M[i])) - return false; - } - - // Adjust the index value if the source operands will be swapped. - if (ReverseEXT) - Imm -= NumElts; - - return true; -} - -/// isREVMask - Check if a vector shuffle corresponds to a REV -/// instruction with the specified blocksize. (The order of the elements -/// within each block of the vector is reversed.) -static bool isREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { - assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && - "Only possible block sizes for REV are: 16, 32, 64"); - - unsigned EltSz = VT.getVectorElementType().getSizeInBits(); - if (EltSz == 64) - return false; - - unsigned NumElts = VT.getVectorNumElements(); - unsigned BlockElts = M[0] + 1; - // If the first shuffle index is UNDEF, be optimistic. - if (M[0] < 0) - BlockElts = BlockSize / EltSz; - - if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) - return false; - - for (unsigned i = 0; i < NumElts; ++i) { - if (M[i] < 0) - continue; // ignore UNDEF indices - if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) - return false; - } - - return true; -} - -static bool isZIPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { - unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); - unsigned Idx = WhichResult * NumElts / 2; - for (unsigned i = 0; i != NumElts; i += 2) { - if ((M[i] >= 0 && (unsigned)M[i] != Idx) || - (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts)) - return false; - Idx += 1; - } - - return true; -} - -static bool isUZPMask(ArrayRef M, EVT VT, unsigned &WhichResult) { - unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); - for (unsigned i = 0; i != NumElts; ++i) { - if (M[i] < 0) - continue; // ignore UNDEF indices - if ((unsigned)M[i] != 2 * i + WhichResult) - return false; - } - - return true; -} - -static bool isTRNMask(ArrayRef M, EVT VT, unsigned &WhichResult) { - unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); - for (unsigned i = 0; i < NumElts; i += 2) { - if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) || - (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult)) - return false; - } - return true; -} - -/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of -/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". -/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. -static bool isZIP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult) { - unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); - unsigned Idx = WhichResult * NumElts / 2; - for (unsigned i = 0; i != NumElts; i += 2) { - if ((M[i] >= 0 && (unsigned)M[i] != Idx) || - (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx)) - return false; - Idx += 1; - } - - return true; -} - -/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of -/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". -/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, -static bool isUZP_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult) { - unsigned Half = VT.getVectorNumElements() / 2; - WhichResult = (M[0] == 0 ? 0 : 1); - for (unsigned j = 0; j != 2; ++j) { - unsigned Idx = WhichResult; - for (unsigned i = 0; i != Half; ++i) { - int MIdx = M[i + j * Half]; - if (MIdx >= 0 && (unsigned)MIdx != Idx) - return false; - Idx += 2; - } - } - - return true; -} - -/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of -/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". -/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. -static bool isTRN_v_undef_Mask(ArrayRef M, EVT VT, unsigned &WhichResult) { - unsigned NumElts = VT.getVectorNumElements(); - WhichResult = (M[0] == 0 ? 0 : 1); - for (unsigned i = 0; i < NumElts; i += 2) { - if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) || - (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult)) - return false; - } - return true; -} - -/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit -/// the specified operations to build the shuffle. -static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, - SDValue RHS, SelectionDAG &DAG, - SDLoc dl) { - unsigned OpNum = (PFEntry >> 26) & 0x0F; - unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1); - unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1); - - enum { - OP_COPY = 0, // Copy, used for things like to say it is <0,1,2,3> - OP_VREV, - OP_VDUP0, - OP_VDUP1, - OP_VDUP2, - OP_VDUP3, - OP_VEXT1, - OP_VEXT2, - OP_VEXT3, - OP_VUZPL, // VUZP, left result - OP_VUZPR, // VUZP, right result - OP_VZIPL, // VZIP, left result - OP_VZIPR, // VZIP, right result - OP_VTRNL, // VTRN, left result - OP_VTRNR // VTRN, right result - }; - - if (OpNum == OP_COPY) { - if (LHSID == (1 * 9 + 2) * 9 + 3) - return LHS; - assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!"); - return RHS; - } - - SDValue OpLHS, OpRHS; - OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); - OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); - EVT VT = OpLHS.getValueType(); - - switch (OpNum) { - default: - llvm_unreachable("Unknown shuffle opcode!"); - case OP_VREV: - // VREV divides the vector in half and swaps within the half. - if (VT.getVectorElementType() == MVT::i32 || - VT.getVectorElementType() == MVT::f32) - return DAG.getNode(ARM64ISD::REV64, dl, VT, OpLHS); - // vrev <4 x i16> -> REV32 - if (VT.getVectorElementType() == MVT::i16) - return DAG.getNode(ARM64ISD::REV32, dl, VT, OpLHS); - // vrev <4 x i8> -> REV16 - assert(VT.getVectorElementType() == MVT::i8); - return DAG.getNode(ARM64ISD::REV16, dl, VT, OpLHS); - case OP_VDUP0: - case OP_VDUP1: - case OP_VDUP2: - case OP_VDUP3: { - EVT EltTy = VT.getVectorElementType(); - unsigned Opcode; - if (EltTy == MVT::i8) - Opcode = ARM64ISD::DUPLANE8; - else if (EltTy == MVT::i16) - Opcode = ARM64ISD::DUPLANE16; - else if (EltTy == MVT::i32 || EltTy == MVT::f32) - Opcode = ARM64ISD::DUPLANE32; - else if (EltTy == MVT::i64 || EltTy == MVT::f64) - Opcode = ARM64ISD::DUPLANE64; - else - llvm_unreachable("Invalid vector element type?"); - - if (VT.getSizeInBits() == 64) - OpLHS = WidenVector(OpLHS, DAG); - SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, MVT::i64); - return DAG.getNode(Opcode, dl, VT, OpLHS, Lane); - } - case OP_VEXT1: - case OP_VEXT2: - case OP_VEXT3: { - unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS); - return DAG.getNode(ARM64ISD::EXT, dl, VT, OpLHS, OpRHS, - DAG.getConstant(Imm, MVT::i32)); - } - case OP_VUZPL: - return DAG.getNode(ARM64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); - case OP_VUZPR: - return DAG.getNode(ARM64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); - case OP_VZIPL: - return DAG.getNode(ARM64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); - case OP_VZIPR: - return DAG.getNode(ARM64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); - case OP_VTRNL: - return DAG.getNode(ARM64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); - case OP_VTRNR: - return DAG.getNode(ARM64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS); - } -} - -static SDValue GenerateTBL(SDValue Op, ArrayRef ShuffleMask, - SelectionDAG &DAG) { - // Check to see if we can use the TBL instruction. - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - SDLoc DL(Op); - - EVT EltVT = Op.getValueType().getVectorElementType(); - unsigned BytesPerElt = EltVT.getSizeInBits() / 8; - - SmallVector TBLMask; - for (int Val : ShuffleMask) { - for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { - unsigned Offset = Byte + Val * BytesPerElt; - TBLMask.push_back(DAG.getConstant(Offset, MVT::i32)); - } - } - - MVT IndexVT = MVT::v8i8; - unsigned IndexLen = 8; - if (Op.getValueType().getSizeInBits() == 128) { - IndexVT = MVT::v16i8; - IndexLen = 16; - } - - SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1); - SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2); - - SDValue Shuffle; - if (V2.getNode()->getOpcode() == ISD::UNDEF) { - if (IndexLen == 8) - V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst); - Shuffle = DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, - DAG.getConstant(Intrinsic::arm64_neon_tbl1, MVT::i32), V1Cst, - DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, &TBLMask[0], IndexLen)); - } else { - if (IndexLen == 8) { - V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst); - Shuffle = DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, - DAG.getConstant(Intrinsic::arm64_neon_tbl1, MVT::i32), V1Cst, - DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, &TBLMask[0], IndexLen)); - } else { - // FIXME: We cannot, for the moment, emit a TBL2 instruction because we - // cannot currently represent the register constraints on the input - // table registers. - // Shuffle = DAG.getNode(ARM64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst, - // DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, - // &TBLMask[0], IndexLen)); - Shuffle = DAG.getNode( - ISD::INTRINSIC_WO_CHAIN, DL, IndexVT, - DAG.getConstant(Intrinsic::arm64_neon_tbl2, MVT::i32), V1Cst, V2Cst, - DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, &TBLMask[0], IndexLen)); - } - } - return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle); -} - -static unsigned getDUPLANEOp(EVT EltType) { - if (EltType == MVT::i8) - return ARM64ISD::DUPLANE8; - if (EltType == MVT::i16) - return ARM64ISD::DUPLANE16; - if (EltType == MVT::i32 || EltType == MVT::f32) - return ARM64ISD::DUPLANE32; - if (EltType == MVT::i64 || EltType == MVT::f64) - return ARM64ISD::DUPLANE64; - - llvm_unreachable("Invalid vector element type?"); -} - -SDValue ARM64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, - SelectionDAG &DAG) const { - SDLoc dl(Op); - EVT VT = Op.getValueType(); - - ShuffleVectorSDNode *SVN = cast(Op.getNode()); - - // Convert shuffles that are directly supported on NEON to target-specific - // DAG nodes, instead of keeping them as shuffles and matching them again - // during code selection. This is more efficient and avoids the possibility - // of inconsistencies between legalization and selection. - ArrayRef ShuffleMask = SVN->getMask(); - - SDValue V1 = Op.getOperand(0); - SDValue V2 = Op.getOperand(1); - - if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], - V1.getValueType().getSimpleVT())) { - int Lane = SVN->getSplatIndex(); - // If this is undef splat, generate it via "just" vdup, if possible. - if (Lane == -1) - Lane = 0; - - if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) - return DAG.getNode(ARM64ISD::DUP, dl, V1.getValueType(), - V1.getOperand(0)); - // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non- - // constant. If so, we can just reference the lane's definition directly. - if (V1.getOpcode() == ISD::BUILD_VECTOR && - !isa(V1.getOperand(Lane))) - return DAG.getNode(ARM64ISD::DUP, dl, VT, V1.getOperand(Lane)); - - // Otherwise, duplicate from the lane of the input vector. - unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType()); - - // SelectionDAGBuilder may have "helpfully" already extracted or conatenated - // to make a vector of the same size as this SHUFFLE. We can ignore the - // extract entirely, and canonicalise the concat using WidenVector. - if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) { - Lane += cast(V1.getOperand(1))->getZExtValue(); - V1 = V1.getOperand(0); - } else if (V1.getOpcode() == ISD::CONCAT_VECTORS) { - unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2; - Lane -= Idx * VT.getVectorNumElements() / 2; - V1 = WidenVector(V1.getOperand(Idx), DAG); - } else if (VT.getSizeInBits() == 64) - V1 = WidenVector(V1, DAG); - - return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, MVT::i64)); - } - - if (isREVMask(ShuffleMask, VT, 64)) - return DAG.getNode(ARM64ISD::REV64, dl, V1.getValueType(), V1, V2); - if (isREVMask(ShuffleMask, VT, 32)) - return DAG.getNode(ARM64ISD::REV32, dl, V1.getValueType(), V1, V2); - if (isREVMask(ShuffleMask, VT, 16)) - return DAG.getNode(ARM64ISD::REV16, dl, V1.getValueType(), V1, V2); - - bool ReverseEXT = false; - unsigned Imm; - if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) { - if (ReverseEXT) - std::swap(V1, V2); - Imm *= getExtFactor(V1); - return DAG.getNode(ARM64ISD::EXT, dl, V1.getValueType(), V1, V2, - DAG.getConstant(Imm, MVT::i32)); - } else if (V2->getOpcode() == ISD::UNDEF && - isSingletonEXTMask(ShuffleMask, VT, Imm)) { - Imm *= getExtFactor(V1); - return DAG.getNode(ARM64ISD::EXT, dl, V1.getValueType(), V1, V1, - DAG.getConstant(Imm, MVT::i32)); - } - - unsigned WhichResult; - if (isZIPMask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::ZIP1 : ARM64ISD::ZIP2; - return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); - } - if (isUZPMask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::UZP1 : ARM64ISD::UZP2; - return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); - } - if (isTRNMask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::TRN1 : ARM64ISD::TRN2; - return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2); - } - - if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::ZIP1 : ARM64ISD::ZIP2; - return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); - } - if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::UZP1 : ARM64ISD::UZP2; - return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); - } - if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) { - unsigned Opc = (WhichResult == 0) ? ARM64ISD::TRN1 : ARM64ISD::TRN2; - return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1); - } - - // If the shuffle is not directly supported and it has 4 elements, use - // the PerfectShuffle-generated table to synthesize it from other shuffles. - unsigned NumElts = VT.getVectorNumElements(); - if (NumElts == 4) { - unsigned PFIndexes[4]; - for (unsigned i = 0; i != 4; ++i) { - if (ShuffleMask[i] < 0) - PFIndexes[i] = 8; - else - PFIndexes[i] = ShuffleMask[i]; - } - - // Compute the index in the perfect shuffle table. - unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 + - PFIndexes[2] * 9 + PFIndexes[3]; - unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; - unsigned Cost = (PFEntry >> 30); - - if (Cost <= 4) - return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); - } - - return GenerateTBL(Op, ShuffleMask, DAG); -} - -static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, - APInt &UndefBits) { - EVT VT = BVN->getValueType(0); - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { - unsigned NumSplats = VT.getSizeInBits() / SplatBitSize; - - for (unsigned i = 0; i < NumSplats; ++i) { - CnstBits <<= SplatBitSize; - UndefBits <<= SplatBitSize; - CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits()); - UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits()); - } - - return true; - } - - return false; -} - -SDValue ARM64TargetLowering::LowerVectorAND(SDValue Op, - SelectionDAG &DAG) const { - BuildVectorSDNode *BVN = - dyn_cast(Op.getOperand(1).getNode()); - SDValue LHS = Op.getOperand(0); - SDLoc dl(Op); - EVT VT = Op.getValueType(); - - if (!BVN) - return Op; - - APInt CnstBits(VT.getSizeInBits(), 0); - APInt UndefBits(VT.getSizeInBits(), 0); - if (resolveBuildVector(BVN, CnstBits, UndefBits)) { - // We only have BIC vector immediate instruction, which is and-not. - CnstBits = ~CnstBits; - - // We make use of a little bit of goto ickiness in order to avoid having to - // duplicate the immediate matching logic for the undef toggled case. - bool SecondTry = false; - AttemptModImm: - - if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) { - CnstBits = CnstBits.zextOrTrunc(64); - uint64_t CnstVal = CnstBits.getZExtValue(); - - if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(16, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(24, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - } - - if (SecondTry) - goto FailedModImm; - SecondTry = true; - CnstBits = ~UndefBits; - goto AttemptModImm; - } - -// We can always fall back to a non-immediate AND. -FailedModImm: - return Op; -} - -// Specialized code to quickly find if PotentialBVec is a BuildVector that -// consists of only the same constant int value, returned in reference arg -// ConstVal -static bool isAllConstantBuildVector(const SDValue &PotentialBVec, - uint64_t &ConstVal) { - BuildVectorSDNode *Bvec = dyn_cast(PotentialBVec); - if (!Bvec) - return false; - ConstantSDNode *FirstElt = dyn_cast(Bvec->getOperand(0)); - if (!FirstElt) - return false; - EVT VT = Bvec->getValueType(0); - unsigned NumElts = VT.getVectorNumElements(); - for (unsigned i = 1; i < NumElts; ++i) - if (dyn_cast(Bvec->getOperand(i)) != FirstElt) - return false; - ConstVal = FirstElt->getZExtValue(); - return true; -} - -static unsigned getIntrinsicID(const SDNode *N) { - unsigned Opcode = N->getOpcode(); - switch (Opcode) { - default: - return Intrinsic::not_intrinsic; - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IID = cast(N->getOperand(0))->getZExtValue(); - if (IID < Intrinsic::num_intrinsics) - return IID; - return Intrinsic::not_intrinsic; - } - } -} - -// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)), -// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a -// BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2. -// Also, logical shift right -> sri, with the same structure. -static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) { - EVT VT = N->getValueType(0); - - if (!VT.isVector()) - return SDValue(); - - SDLoc DL(N); - - // Is the first op an AND? - const SDValue And = N->getOperand(0); - if (And.getOpcode() != ISD::AND) - return SDValue(); - - // Is the second op an shl or lshr? - SDValue Shift = N->getOperand(1); - // This will have been turned into: ARM64ISD::VSHL vector, #shift - // or ARM64ISD::VLSHR vector, #shift - unsigned ShiftOpc = Shift.getOpcode(); - if ((ShiftOpc != ARM64ISD::VSHL && ShiftOpc != ARM64ISD::VLSHR)) - return SDValue(); - bool IsShiftRight = ShiftOpc == ARM64ISD::VLSHR; - - // Is the shift amount constant? - ConstantSDNode *C2node = dyn_cast(Shift.getOperand(1)); - if (!C2node) - return SDValue(); - - // Is the and mask vector all constant? - uint64_t C1; - if (!isAllConstantBuildVector(And.getOperand(1), C1)) - return SDValue(); - - // Is C1 == ~C2, taking into account how much one can shift elements of a - // particular size? - uint64_t C2 = C2node->getZExtValue(); - unsigned ElemSizeInBits = VT.getVectorElementType().getSizeInBits(); - if (C2 > ElemSizeInBits) - return SDValue(); - unsigned ElemMask = (1 << ElemSizeInBits) - 1; - if ((C1 & ElemMask) != (~C2 & ElemMask)) - return SDValue(); - - SDValue X = And.getOperand(0); - SDValue Y = Shift.getOperand(0); - - unsigned Intrin = - IsShiftRight ? Intrinsic::arm64_neon_vsri : Intrinsic::arm64_neon_vsli; - SDValue ResultSLI = - DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrin, MVT::i32), X, Y, Shift.getOperand(1)); - - DEBUG(dbgs() << "arm64-lower: transformed: \n"); - DEBUG(N->dump(&DAG)); - DEBUG(dbgs() << "into: \n"); - DEBUG(ResultSLI->dump(&DAG)); - - ++NumShiftInserts; - return ResultSLI; -} - -SDValue ARM64TargetLowering::LowerVectorOR(SDValue Op, - SelectionDAG &DAG) const { - // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2)) - if (EnableARM64SlrGeneration) { - SDValue Res = tryLowerToSLI(Op.getNode(), DAG); - if (Res.getNode()) - return Res; - } - - BuildVectorSDNode *BVN = - dyn_cast(Op.getOperand(0).getNode()); - SDValue LHS = Op.getOperand(1); - SDLoc dl(Op); - EVT VT = Op.getValueType(); - - // OR commutes, so try swapping the operands. - if (!BVN) { - LHS = Op.getOperand(0); - BVN = dyn_cast(Op.getOperand(1).getNode()); - } - if (!BVN) - return Op; - - APInt CnstBits(VT.getSizeInBits(), 0); - APInt UndefBits(VT.getSizeInBits(), 0); - if (resolveBuildVector(BVN, CnstBits, UndefBits)) { - // We make use of a little bit of goto ickiness in order to avoid having to - // duplicate the immediate matching logic for the undef toggled case. - bool SecondTry = false; - AttemptModImm: - - if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) { - CnstBits = CnstBits.zextOrTrunc(64); - uint64_t CnstVal = CnstBits.getZExtValue(); - - if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(16, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(24, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - } - - if (SecondTry) - goto FailedModImm; - SecondTry = true; - CnstBits = UndefBits; - goto AttemptModImm; - } - -// We can always fall back to a non-immediate OR. -FailedModImm: - return Op; -} - -SDValue ARM64TargetLowering::LowerBUILD_VECTOR(SDValue Op, - SelectionDAG &DAG) const { - BuildVectorSDNode *BVN = cast(Op.getNode()); - SDLoc dl(Op); - EVT VT = Op.getValueType(); - - APInt CnstBits(VT.getSizeInBits(), 0); - APInt UndefBits(VT.getSizeInBits(), 0); - if (resolveBuildVector(BVN, CnstBits, UndefBits)) { - // We make use of a little bit of goto ickiness in order to avoid having to - // duplicate the immediate matching logic for the undef toggled case. - bool SecondTry = false; - AttemptModImm: - - if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) { - CnstBits = CnstBits.zextOrTrunc(64); - uint64_t CnstVal = CnstBits.getZExtValue(); - - // Certain magic vector constants (used to express things like NOT - // and NEG) are passed through unmodified. This allows codegen patterns - // for these operations to match. Special-purpose patterns will lower - // these immediates to MOVIs if it proves necessary. - if (VT.isInteger() && (CnstVal == 0 || CnstVal == ~0ULL)) - return Op; - - // The many faces of MOVI... - if (ARM64_AM::isAdvSIMDModImmType10(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType10(CnstVal); - if (VT.getSizeInBits() == 128) { - SDValue Mov = DAG.getNode(ARM64ISD::MOVIedit, dl, MVT::v2i64, - DAG.getConstant(CnstVal, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - // Support the V64 version via subregister insertion. - SDValue Mov = DAG.getNode(ARM64ISD::MOVIedit, dl, MVT::f64, - DAG.getConstant(CnstVal, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(16, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(24, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType7(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType7(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVImsl, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(264, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType8(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType8(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MOVImsl, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(272, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType9(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType9(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8; - SDValue Mov = DAG.getNode(ARM64ISD::MOVI, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - // The few faces of FMOV... - if (ARM64_AM::isAdvSIMDModImmType11(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType11(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32; - SDValue Mov = DAG.getNode(ARM64ISD::FMOV, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType12(CnstVal) && - VT.getSizeInBits() == 128) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType12(CnstVal); - SDValue Mov = DAG.getNode(ARM64ISD::FMOV, dl, MVT::v2f64, - DAG.getConstant(CnstVal, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - // The many faces of MVNI... - CnstVal = ~CnstVal; - if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(16, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(24, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(0, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16; - SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(8, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType7(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType7(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNImsl, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(264, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - - if (ARM64_AM::isAdvSIMDModImmType8(CnstVal)) { - CnstVal = ARM64_AM::encodeAdvSIMDModImmType8(CnstVal); - MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32; - SDValue Mov = DAG.getNode(ARM64ISD::MVNImsl, dl, MovTy, - DAG.getConstant(CnstVal, MVT::i32), - DAG.getConstant(272, MVT::i32)); - return DAG.getNode(ISD::BITCAST, dl, VT, Mov); - } - } - - if (SecondTry) - goto FailedModImm; - SecondTry = true; - CnstBits = UndefBits; - goto AttemptModImm; - } -FailedModImm: - - // Scan through the operands to find some interesting properties we can - // exploit: - // 1) If only one value is used, we can use a DUP, or - // 2) if only the low element is not undef, we can just insert that, or - // 3) if only one constant value is used (w/ some non-constant lanes), - // we can splat the constant value into the whole vector then fill - // in the non-constant lanes. - // 4) FIXME: If different constant values are used, but we can intelligently - // select the values we'll be overwriting for the non-constant - // lanes such that we can directly materialize the vector - // some other way (MOVI, e.g.), we can be sneaky. - unsigned NumElts = VT.getVectorNumElements(); - bool isOnlyLowElement = true; - bool usesOnlyOneValue = true; - bool usesOnlyOneConstantValue = true; - bool isConstant = true; - unsigned NumConstantLanes = 0; - SDValue Value; - SDValue ConstantValue; - for (unsigned i = 0; i < NumElts; ++i) { - SDValue V = Op.getOperand(i); - if (V.getOpcode() == ISD::UNDEF) - continue; - if (i > 0) - isOnlyLowElement = false; - if (!isa(V) && !isa(V)) - isConstant = false; - - if (isa(V)) { - ++NumConstantLanes; - if (!ConstantValue.getNode()) - ConstantValue = V; - else if (ConstantValue != V) - usesOnlyOneConstantValue = false; - } - - if (!Value.getNode()) - Value = V; - else if (V != Value) - usesOnlyOneValue = false; - } - - if (!Value.getNode()) - return DAG.getUNDEF(VT); - - if (isOnlyLowElement) - return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); - - // Use DUP for non-constant splats. For f32 constant splats, reduce to - // i32 and try again. - if (usesOnlyOneValue) { - if (!isConstant) { - if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - Value.getValueType() != VT) - return DAG.getNode(ARM64ISD::DUP, dl, VT, Value); - - // This is actually a DUPLANExx operation, which keeps everything vectory. - - // DUPLANE works on 128-bit vectors, widen it if necessary. - SDValue Lane = Value.getOperand(1); - Value = Value.getOperand(0); - if (Value.getValueType().getSizeInBits() == 64) - Value = WidenVector(Value, DAG); - - unsigned Opcode = getDUPLANEOp(VT.getVectorElementType()); - return DAG.getNode(Opcode, dl, VT, Value, Lane); - } - - if (VT.getVectorElementType().isFloatingPoint()) { - SmallVector Ops; - MVT NewType = - (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; - for (unsigned i = 0; i < NumElts; ++i) - Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i))); - EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts); - SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); - Val = LowerBUILD_VECTOR(Val, DAG); - if (Val.getNode()) - return DAG.getNode(ISD::BITCAST, dl, VT, Val); - } - } - - // If there was only one constant value used and for more than one lane, - // start by splatting that value, then replace the non-constant lanes. This - // is better than the default, which will perform a separate initialization - // for each lane. - if (NumConstantLanes > 0 && usesOnlyOneConstantValue) { - SDValue Val = DAG.getNode(ARM64ISD::DUP, dl, VT, ConstantValue); - // Now insert the non-constant lanes. - for (unsigned i = 0; i < NumElts; ++i) { - SDValue V = Op.getOperand(i); - SDValue LaneIdx = DAG.getConstant(i, MVT::i64); - if (!isa(V)) { - // Note that type legalization likely mucked about with the VT of the - // source operand, so we may have to convert it here before inserting. - Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx); - } - } - return Val; - } - - // If all elements are constants and the case above didn't get hit, fall back - // to the default expansion, which will generate a load from the constant - // pool. - if (isConstant) - return SDValue(); - - // Empirical tests suggest this is rarely worth it for vectors of length <= 2. - if (NumElts >= 4) { - SDValue shuffle = ReconstructShuffle(Op, DAG); - if (shuffle != SDValue()) - return shuffle; - } - - // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we - // know the default expansion would otherwise fall back on something even - // worse. For a vector with one or two non-undef values, that's - // scalar_to_vector for the elements followed by a shuffle (provided the - // shuffle is valid for the target) and materialization element by element - // on the stack followed by a load for everything else. - if (!isConstant && !usesOnlyOneValue) { - SDValue Vec = DAG.getUNDEF(VT); - SDValue Op0 = Op.getOperand(0); - unsigned ElemSize = VT.getVectorElementType().getSizeInBits(); - unsigned i = 0; - // For 32 and 64 bit types, use INSERT_SUBREG for lane zero to - // a) Avoid a RMW dependency on the full vector register, and - // b) Allow the register coalescer to fold away the copy if the - // value is already in an S or D register. - if (Op0.getOpcode() != ISD::UNDEF && (ElemSize == 32 || ElemSize == 64)) { - unsigned SubIdx = ElemSize == 32 ? ARM64::ssub : ARM64::dsub; - MachineSDNode *N = - DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0, - DAG.getTargetConstant(SubIdx, MVT::i32)); - Vec = SDValue(N, 0); - ++i; - } - for (; i < NumElts; ++i) { - SDValue V = Op.getOperand(i); - if (V.getOpcode() == ISD::UNDEF) - continue; - SDValue LaneIdx = DAG.getConstant(i, MVT::i64); - Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); - } - return Vec; - } - - // Just use the default expansion. We failed to find a better alternative. - return SDValue(); -} - -SDValue ARM64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!"); - - // Check for non-constant lane. - if (!isa(Op.getOperand(2))) - return SDValue(); - - EVT VT = Op.getOperand(0).getValueType(); - - // Insertion/extraction are legal for V128 types. - if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || - VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64) - return Op; - - if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 && - VT != MVT::v1i64 && VT != MVT::v2f32) - return SDValue(); - - // For V64 types, we perform insertion by expanding the value - // to a V128 type and perform the insertion on that. - SDLoc DL(Op); - SDValue WideVec = WidenVector(Op.getOperand(0), DAG); - EVT WideTy = WideVec.getValueType(); - - SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec, - Op.getOperand(1), Op.getOperand(2)); - // Re-narrow the resultant vector. - return NarrowVector(Node, DAG); -} - -SDValue ARM64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!"); - - // Check for non-constant lane. - if (!isa(Op.getOperand(1))) - return SDValue(); - - EVT VT = Op.getOperand(0).getValueType(); - - // Insertion/extraction are legal for V128 types. - if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 || - VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64) - return Op; - - if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 && - VT != MVT::v1i64 && VT != MVT::v2f32) - return SDValue(); - - // For V64 types, we perform extraction by expanding the value - // to a V128 type and perform the extraction on that. - SDLoc DL(Op); - SDValue WideVec = WidenVector(Op.getOperand(0), DAG); - EVT WideTy = WideVec.getValueType(); - - EVT ExtrTy = WideTy.getVectorElementType(); - if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8) - ExtrTy = MVT::i32; - - // For extractions, we just return the result directly. - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec, - Op.getOperand(1)); -} - -SDValue ARM64TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getOpcode() == ISD::SCALAR_TO_VECTOR && "Unknown opcode!"); - // Some AdvSIMD intrinsics leave their results in the scalar B/H/S/D - // registers. The default lowering will copy those to a GPR then back - // to a vector register. Instead, just recognize those cases and reference - // the vector register they're already a subreg of. - SDValue Op0 = Op->getOperand(0); - if (Op0->getOpcode() != ISD::INTRINSIC_WO_CHAIN) - return Op; - unsigned IID = getIntrinsicID(Op0.getNode()); - // The below list of intrinsics isn't exhaustive. Add cases as-needed. - // FIXME: Even better would be if there were an attribute on the node - // that we could query and set in the intrinsics definition or something. - unsigned SubIdx; - switch (IID) { - default: - // Early exit if this isn't one of the intrinsics we handle. - return Op; - case Intrinsic::arm64_neon_uaddv: - case Intrinsic::arm64_neon_saddv: - case Intrinsic::arm64_neon_uaddlv: - case Intrinsic::arm64_neon_saddlv: - switch (Op0.getValueType().getSizeInBits()) { - default: - llvm_unreachable("Illegal result size from ARM64 vector intrinsic!"); - case 8: - SubIdx = ARM64::bsub; - break; - case 16: - SubIdx = ARM64::hsub; - break; - case 32: - SubIdx = ARM64::ssub; - break; - case 64: - SubIdx = ARM64::dsub; - break; - } - } - MachineSDNode *N = - DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(Op), - Op.getValueType(), DAG.getUNDEF(Op.getValueType()), - Op0, DAG.getTargetConstant(SubIdx, MVT::i32)); - return SDValue(N, 0); -} - -SDValue ARM64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, - SelectionDAG &DAG) const { - EVT VT = Op.getOperand(0).getValueType(); - SDLoc dl(Op); - // Just in case... - if (!VT.isVector()) - return SDValue(); - - ConstantSDNode *Cst = dyn_cast(Op.getOperand(1)); - if (!Cst) - return SDValue(); - unsigned Val = Cst->getZExtValue(); - - unsigned Size = Op.getValueType().getSizeInBits(); - if (Val == 0) { - switch (Size) { - case 8: - return DAG.getTargetExtractSubreg(ARM64::bsub, dl, Op.getValueType(), - Op.getOperand(0)); - case 16: - return DAG.getTargetExtractSubreg(ARM64::hsub, dl, Op.getValueType(), - Op.getOperand(0)); - case 32: - return DAG.getTargetExtractSubreg(ARM64::ssub, dl, Op.getValueType(), - Op.getOperand(0)); - case 64: - return DAG.getTargetExtractSubreg(ARM64::dsub, dl, Op.getValueType(), - Op.getOperand(0)); - default: - llvm_unreachable("Unexpected vector type in extract_subvector!"); - } - } - // If this is extracting the upper 64-bits of a 128-bit vector, we match - // that directly. - if (Size == 64 && Val * VT.getVectorElementType().getSizeInBits() == 64) - return Op; - - return SDValue(); -} - -bool ARM64TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, - EVT VT) const { - if (VT.getVectorNumElements() == 4 && - (VT.is128BitVector() || VT.is64BitVector())) { - unsigned PFIndexes[4]; - for (unsigned i = 0; i != 4; ++i) { - if (M[i] < 0) - PFIndexes[i] = 8; - else - PFIndexes[i] = M[i]; - } - - // Compute the index in the perfect shuffle table. - unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 + - PFIndexes[2] * 9 + PFIndexes[3]; - unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; - unsigned Cost = (PFEntry >> 30); - - if (Cost <= 4) - return true; - } - - bool ReverseVEXT; - unsigned Imm, WhichResult; - - return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) || - isREVMask(M, VT, 32) || isREVMask(M, VT, 16) || - isEXTMask(M, VT, ReverseVEXT, Imm) || - // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM. - isTRNMask(M, VT, WhichResult) || isUZPMask(M, VT, WhichResult) || - isZIPMask(M, VT, WhichResult) || - isTRN_v_undef_Mask(M, VT, WhichResult) || - isUZP_v_undef_Mask(M, VT, WhichResult) || - isZIP_v_undef_Mask(M, VT, WhichResult)); -} - -/// getVShiftImm - Check if this is a valid build_vector for the immediate -/// operand of a vector shift operation, where all the elements of the -/// build_vector must have the same constant integer value. -static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { - // Ignore bit_converts. - while (Op.getOpcode() == ISD::BITCAST) - Op = Op.getOperand(0); - BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, - HasAnyUndefs, ElementBits) || - SplatBitSize > ElementBits) - return false; - Cnt = SplatBits.getSExtValue(); - return true; -} - -/// isVShiftLImm - Check if this is a valid build_vector for the immediate -/// operand of a vector shift left operation. That value must be in the range: -/// 0 <= Value < ElementBits for a left shift; or -/// 0 <= Value <= ElementBits for a long left shift. -static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { - assert(VT.isVector() && "vector shift count is not a vector type"); - unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); - if (!getVShiftImm(Op, ElementBits, Cnt)) - return false; - return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits); -} - -/// isVShiftRImm - Check if this is a valid build_vector for the immediate -/// operand of a vector shift right operation. For a shift opcode, the value -/// is positive, but for an intrinsic the value count must be negative. The -/// absolute value must be in the range: -/// 1 <= |Value| <= ElementBits for a right shift; or -/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. -static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, - int64_t &Cnt) { - assert(VT.isVector() && "vector shift count is not a vector type"); - unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); - if (!getVShiftImm(Op, ElementBits, Cnt)) - return false; - if (isIntrinsic) - Cnt = -Cnt; - return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits)); -} - -SDValue ARM64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op, - SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - SDLoc DL(Op); - int64_t Cnt; - - if (!Op.getOperand(1).getValueType().isVector()) - return Op; - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); - - switch (Op.getOpcode()) { - default: - llvm_unreachable("unexpected shift opcode"); - - case ISD::SHL: - if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) - return DAG.getNode(ARM64ISD::VSHL, SDLoc(Op), VT, Op.getOperand(0), - DAG.getConstant(Cnt, MVT::i32)); - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Intrinsic::arm64_neon_ushl, MVT::i32), - Op.getOperand(0), Op.getOperand(1)); - case ISD::SRA: - case ISD::SRL: - // Right shift immediate - if (isVShiftRImm(Op.getOperand(1), VT, false, false, Cnt) && - Cnt < EltSize) { - unsigned Opc = - (Op.getOpcode() == ISD::SRA) ? ARM64ISD::VASHR : ARM64ISD::VLSHR; - return DAG.getNode(Opc, SDLoc(Op), VT, Op.getOperand(0), - DAG.getConstant(Cnt, MVT::i32)); - } - - // Right shift register. Note, there is not a shift right register - // instruction, but the shift left register instruction takes a signed - // value, where negative numbers specify a right shift. - unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::arm64_neon_sshl - : Intrinsic::arm64_neon_ushl; - // negate the shift amount - SDValue NegShift = DAG.getNode(ARM64ISD::NEG, DL, VT, Op.getOperand(1)); - SDValue NegShiftLeft = - DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, - DAG.getConstant(Opc, MVT::i32), Op.getOperand(0), NegShift); - return NegShiftLeft; - } - - return SDValue(); -} - -static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, - ARM64CC::CondCode CC, bool NoNans, EVT VT, - SDLoc dl, SelectionDAG &DAG) { - EVT SrcVT = LHS.getValueType(); - - BuildVectorSDNode *BVN = dyn_cast(RHS.getNode()); - APInt CnstBits(VT.getSizeInBits(), 0); - APInt UndefBits(VT.getSizeInBits(), 0); - bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits); - bool IsZero = IsCnst && (CnstBits == 0); - - if (SrcVT.getVectorElementType().isFloatingPoint()) { - switch (CC) { - default: - return SDValue(); - case ARM64CC::NE: { - SDValue Fcmeq; - if (IsZero) - Fcmeq = DAG.getNode(ARM64ISD::FCMEQz, dl, VT, LHS); - else - Fcmeq = DAG.getNode(ARM64ISD::FCMEQ, dl, VT, LHS, RHS); - return DAG.getNode(ARM64ISD::NOT, dl, VT, Fcmeq); - } - case ARM64CC::EQ: - if (IsZero) - return DAG.getNode(ARM64ISD::FCMEQz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::FCMEQ, dl, VT, LHS, RHS); - case ARM64CC::GE: - if (IsZero) - return DAG.getNode(ARM64ISD::FCMGEz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::FCMGE, dl, VT, LHS, RHS); - case ARM64CC::GT: - if (IsZero) - return DAG.getNode(ARM64ISD::FCMGTz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::FCMGT, dl, VT, LHS, RHS); - case ARM64CC::LS: - if (IsZero) - return DAG.getNode(ARM64ISD::FCMLEz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::FCMGE, dl, VT, RHS, LHS); - case ARM64CC::LT: - if (!NoNans) - return SDValue(); - // If we ignore NaNs then we can use to the MI implementation. - // Fallthrough. - case ARM64CC::MI: - if (IsZero) - return DAG.getNode(ARM64ISD::FCMLTz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::FCMGT, dl, VT, RHS, LHS); - } - } - - switch (CC) { - default: - return SDValue(); - case ARM64CC::NE: { - SDValue Cmeq; - if (IsZero) - Cmeq = DAG.getNode(ARM64ISD::CMEQz, dl, VT, LHS); - else - Cmeq = DAG.getNode(ARM64ISD::CMEQ, dl, VT, LHS, RHS); - return DAG.getNode(ARM64ISD::NOT, dl, VT, Cmeq); - } - case ARM64CC::EQ: - if (IsZero) - return DAG.getNode(ARM64ISD::CMEQz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::CMEQ, dl, VT, LHS, RHS); - case ARM64CC::GE: - if (IsZero) - return DAG.getNode(ARM64ISD::CMGEz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::CMGE, dl, VT, LHS, RHS); - case ARM64CC::GT: - if (IsZero) - return DAG.getNode(ARM64ISD::CMGTz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::CMGT, dl, VT, LHS, RHS); - case ARM64CC::LE: - if (IsZero) - return DAG.getNode(ARM64ISD::CMLEz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::CMGE, dl, VT, RHS, LHS); - case ARM64CC::LS: - return DAG.getNode(ARM64ISD::CMHS, dl, VT, RHS, LHS); - case ARM64CC::CC: - return DAG.getNode(ARM64ISD::CMHI, dl, VT, RHS, LHS); - case ARM64CC::LT: - if (IsZero) - return DAG.getNode(ARM64ISD::CMLTz, dl, VT, LHS); - return DAG.getNode(ARM64ISD::CMGT, dl, VT, RHS, LHS); - case ARM64CC::HI: - return DAG.getNode(ARM64ISD::CMHI, dl, VT, LHS, RHS); - case ARM64CC::CS: - return DAG.getNode(ARM64ISD::CMHS, dl, VT, LHS, RHS); - } -} - -SDValue ARM64TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { - ISD::CondCode CC = cast(Op.getOperand(2))->get(); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDLoc dl(Op); - - if (LHS.getValueType().getVectorElementType().isInteger()) { - assert(LHS.getValueType() == RHS.getValueType()); - ARM64CC::CondCode ARM64CC = changeIntCCToARM64CC(CC); - return EmitVectorComparison(LHS, RHS, ARM64CC, false, Op.getValueType(), dl, - DAG); - } - - assert(LHS.getValueType().getVectorElementType() == MVT::f32 || - LHS.getValueType().getVectorElementType() == MVT::f64); - - // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally - // clean. Some of them require two branches to implement. - ARM64CC::CondCode CC1, CC2; - changeFPCCToARM64CC(CC, CC1, CC2); - - bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath; - SDValue Cmp1 = - EmitVectorComparison(LHS, RHS, CC1, NoNaNs, Op.getValueType(), dl, DAG); - if (!Cmp1.getNode()) - return SDValue(); - - if (CC2 != ARM64CC::AL) { - SDValue Cmp2 = - EmitVectorComparison(LHS, RHS, CC2, NoNaNs, Op.getValueType(), dl, DAG); - if (!Cmp2.getNode()) - return SDValue(); - - return DAG.getNode(ISD::OR, dl, Cmp1.getValueType(), Cmp1, Cmp2); - } - - return Cmp1; -} - -/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as -/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment -/// specified in the intrinsic calls. -bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, - unsigned Intrinsic) const { - switch (Intrinsic) { - case Intrinsic::arm64_neon_ld2: - case Intrinsic::arm64_neon_ld3: - case Intrinsic::arm64_neon_ld4: - case Intrinsic::arm64_neon_ld2lane: - case Intrinsic::arm64_neon_ld3lane: - case Intrinsic::arm64_neon_ld4lane: - case Intrinsic::arm64_neon_ld2r: - case Intrinsic::arm64_neon_ld3r: - case Intrinsic::arm64_neon_ld4r: { - Info.opc = ISD::INTRINSIC_W_CHAIN; - // Conservatively set memVT to the entire set of vectors loaded. - uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; - Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); - Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); - Info.offset = 0; - Info.align = 0; - Info.vol = false; // volatile loads with NEON intrinsics not supported - Info.readMem = true; - Info.writeMem = false; - return true; - } - case Intrinsic::arm64_neon_st2: - case Intrinsic::arm64_neon_st3: - case Intrinsic::arm64_neon_st4: - case Intrinsic::arm64_neon_st2lane: - case Intrinsic::arm64_neon_st3lane: - case Intrinsic::arm64_neon_st4lane: { - Info.opc = ISD::INTRINSIC_VOID; - // Conservatively set memVT to the entire set of vectors stored. - unsigned NumElts = 0; - for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { - Type *ArgTy = I.getArgOperand(ArgI)->getType(); - if (!ArgTy->isVectorTy()) - break; - NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; - } - Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); - Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); - Info.offset = 0; - Info.align = 0; - Info.vol = false; // volatile stores with NEON intrinsics not supported - Info.readMem = false; - Info.writeMem = true; - return true; - } - case Intrinsic::arm64_ldxr: { - PointerType *PtrTy = cast(I.getArgOperand(0)->getType()); - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(PtrTy->getElementType()); - Info.ptrVal = I.getArgOperand(0); - Info.offset = 0; - Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); - Info.vol = true; - Info.readMem = true; - Info.writeMem = false; - return true; - } - case Intrinsic::arm64_stxr: { - PointerType *PtrTy = cast(I.getArgOperand(1)->getType()); - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(PtrTy->getElementType()); - Info.ptrVal = I.getArgOperand(1); - Info.offset = 0; - Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); - Info.vol = true; - Info.readMem = false; - Info.writeMem = true; - return true; - } - case Intrinsic::arm64_ldxp: { - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::i128; - Info.ptrVal = I.getArgOperand(0); - Info.offset = 0; - Info.align = 16; - Info.vol = true; - Info.readMem = true; - Info.writeMem = false; - return true; - } - case Intrinsic::arm64_stxp: { - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::i128; - Info.ptrVal = I.getArgOperand(2); - Info.offset = 0; - Info.align = 16; - Info.vol = true; - Info.readMem = false; - Info.writeMem = true; - return true; - } - default: - break; - } - - return false; -} - -// Truncations from 64-bit GPR to 32-bit GPR is free. -bool ARM64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { - if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) - return false; - unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); - unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; -} -bool ARM64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { - if (!VT1.isInteger() || !VT2.isInteger()) - return false; - unsigned NumBits1 = VT1.getSizeInBits(); - unsigned NumBits2 = VT2.getSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; -} - -// All 32-bit GPR operations implicitly zero the high-half of the corresponding -// 64-bit GPR. -bool ARM64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { - if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) - return false; - unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); - unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - if (NumBits1 == 32 && NumBits2 == 64) - return true; - return false; -} -bool ARM64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { - if (!VT1.isInteger() || !VT2.isInteger()) - return false; - unsigned NumBits1 = VT1.getSizeInBits(); - unsigned NumBits2 = VT2.getSizeInBits(); - if (NumBits1 == 32 && NumBits2 == 64) - return true; - return false; -} - -bool ARM64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { - EVT VT1 = Val.getValueType(); - if (isZExtFree(VT1, VT2)) { - return true; - } - - if (Val.getOpcode() != ISD::LOAD) - return false; - - // 8-, 16-, and 32-bit integer loads all implicitly zero-extend. - return (VT1.isSimple() && VT1.isInteger() && VT2.isSimple() && - VT2.isInteger() && VT1.getSizeInBits() <= 32); -} - -bool ARM64TargetLowering::hasPairedLoad(Type *LoadedType, - unsigned &RequiredAligment) const { - if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy()) - return false; - // Cyclone supports unaligned accesses. - RequiredAligment = 0; - unsigned NumBits = LoadedType->getPrimitiveSizeInBits(); - return NumBits == 32 || NumBits == 64; -} - -bool ARM64TargetLowering::hasPairedLoad(EVT LoadedType, - unsigned &RequiredAligment) const { - if (!LoadedType.isSimple() || - (!LoadedType.isInteger() && !LoadedType.isFloatingPoint())) - return false; - // Cyclone supports unaligned accesses. - RequiredAligment = 0; - unsigned NumBits = LoadedType.getSizeInBits(); - return NumBits == 32 || NumBits == 64; -} - -static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, - unsigned AlignCheck) { - return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && - (DstAlign == 0 || DstAlign % AlignCheck == 0)); -} - -EVT ARM64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, - unsigned SrcAlign, bool IsMemset, - bool ZeroMemset, bool MemcpyStrSrc, - MachineFunction &MF) const { - // Don't use AdvSIMD to implement 16-byte memset. It would have taken one - // instruction to materialize the v2i64 zero and one store (with restrictive - // addressing mode). Just do two i64 store of zero-registers. - bool Fast; - const Function *F = MF.getFunction(); - if (!IsMemset && Size >= 16 && - !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::NoImplicitFloat) && - (memOpAlign(SrcAlign, DstAlign, 16) || - (allowsUnalignedMemoryAccesses(MVT::v2i64, 0, &Fast) && Fast))) - return MVT::v2i64; - - return Size >= 8 ? MVT::i64 : MVT::i32; -} - -// 12-bit optionally shifted immediates are legal for adds. -bool ARM64TargetLowering::isLegalAddImmediate(int64_t Immed) const { - if ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0)) - return true; - return false; -} - -// Integer comparisons are implemented with ADDS/SUBS, so the range of valid -// immediates is the same as for an add or a sub. -bool ARM64TargetLowering::isLegalICmpImmediate(int64_t Immed) const { - if (Immed < 0) - Immed *= -1; - return isLegalAddImmediate(Immed); -} - -/// isLegalAddressingMode - Return true if the addressing mode represented -/// by AM is legal for this target, for a load/store of the specified type. -bool ARM64TargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { - // ARM64 has five basic addressing modes: - // reg - // reg + 9-bit signed offset - // reg + SIZE_IN_BYTES * 12-bit unsigned offset - // reg1 + reg2 - // reg + SIZE_IN_BYTES * reg - - // No global is ever allowed as a base. - if (AM.BaseGV) - return false; - - // No reg+reg+imm addressing. - if (AM.HasBaseReg && AM.BaseOffs && AM.Scale) - return false; - - // check reg + imm case: - // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12 - uint64_t NumBytes = 0; - if (Ty->isSized()) { - uint64_t NumBits = getDataLayout()->getTypeSizeInBits(Ty); - NumBytes = NumBits / 8; - if (!isPowerOf2_64(NumBits)) - NumBytes = 0; - } - - if (!AM.Scale) { - int64_t Offset = AM.BaseOffs; - - // 9-bit signed offset - if (Offset >= -(1LL << 9) && Offset <= (1LL << 9) - 1) - return true; - - // 12-bit unsigned offset - unsigned shift = Log2_64(NumBytes); - if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 && - // Must be a multiple of NumBytes (NumBytes is a power of 2) - (Offset >> shift) << shift == Offset) - return true; - return false; - } - - // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2 - - if (!AM.Scale || AM.Scale == 1 || - (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes)) - return true; - return false; -} - -int ARM64TargetLowering::getScalingFactorCost(const AddrMode &AM, - Type *Ty) const { - // Scaling factors are not free at all. - // Operands | Rt Latency - // ------------------------------------------- - // Rt, [Xn, Xm] | 4 - // ------------------------------------------- - // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5 - // Rt, [Xn, Wm, #imm] | - if (isLegalAddressingMode(AM, Ty)) - // Scale represents reg2 * scale, thus account for 1 if - // it is not equal to 0 or 1. - return AM.Scale != 0 && AM.Scale != 1; - return -1; -} - -bool ARM64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { - VT = VT.getScalarType(); - - if (!VT.isSimple()) - return false; - - switch (VT.getSimpleVT().SimpleTy) { - case MVT::f32: - case MVT::f64: - return true; - default: - break; - } - - return false; -} - -const uint16_t * -ARM64TargetLowering::getScratchRegisters(CallingConv::ID) const { - // LR is a callee-save register, but we must treat it as clobbered by any call - // site. Hence we include LR in the scratch registers, which are in turn added - // as implicit-defs for stackmaps and patchpoints. - static const uint16_t ScratchRegs[] = { - ARM64::X16, ARM64::X17, ARM64::LR, 0 - }; - return ScratchRegs; -} - -bool ARM64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, - Type *Ty) const { - assert(Ty->isIntegerTy()); - - unsigned BitSize = Ty->getPrimitiveSizeInBits(); - if (BitSize == 0) - return false; - - int64_t Val = Imm.getSExtValue(); - if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, BitSize)) - return true; - - if ((int64_t)Val < 0) - Val = ~Val; - if (BitSize == 32) - Val &= (1LL << 32) - 1; - - unsigned LZ = countLeadingZeros((uint64_t)Val); - unsigned Shift = (63 - LZ) / 16; - // MOVZ is free so return true for one or fewer MOVK. - return (Shift < 3) ? true : false; -} - -// Generate SUBS and CSEL for integer abs. -static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { - EVT VT = N->getValueType(0); - - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - SDLoc DL(N); - - // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1) - // and change it to SUB and CSEL. - if (VT.isInteger() && N->getOpcode() == ISD::XOR && - N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 && - N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0)) - if (ConstantSDNode *Y1C = dyn_cast(N1.getOperand(1))) - if (Y1C->getAPIntValue() == VT.getSizeInBits() - 1) { - SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), - N0.getOperand(0)); - // Generate SUBS & CSEL. - SDValue Cmp = - DAG.getNode(ARM64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32), - N0.getOperand(0), DAG.getConstant(0, VT)); - return DAG.getNode(ARM64ISD::CSEL, DL, VT, N0.getOperand(0), Neg, - DAG.getConstant(ARM64CC::PL, MVT::i32), - SDValue(Cmp.getNode(), 1)); - } - return SDValue(); -} - -// performXorCombine - Attempts to handle integer ABS. -static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const ARM64Subtarget *Subtarget) { - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - - return performIntegerAbsCombine(N, DAG); -} - -static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const ARM64Subtarget *Subtarget) { - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - - // Multiplication of a power of two plus/minus one can be done more - // cheaply as as shift+add/sub. For now, this is true unilaterally. If - // future CPUs have a cheaper MADD instruction, this may need to be - // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and - // 64-bit is 5 cycles, so this is always a win. - if (ConstantSDNode *C = dyn_cast(N->getOperand(1))) { - APInt Value = C->getAPIntValue(); - EVT VT = N->getValueType(0); - APInt VP1 = Value + 1; - if (VP1.isPowerOf2()) { - // Multiplying by one less than a power of two, replace with a shift - // and a subtract. - SDValue ShiftedVal = - DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), - DAG.getConstant(VP1.logBase2(), MVT::i64)); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal, N->getOperand(0)); - } - APInt VM1 = Value - 1; - if (VM1.isPowerOf2()) { - // Multiplying by one more than a power of two, replace with a shift - // and an add. - SDValue ShiftedVal = - DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), - DAG.getConstant(VM1.logBase2(), MVT::i64)); - return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0)); - } - } - return SDValue(); -} - -static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) { - EVT VT = N->getValueType(0); - if (VT != MVT::f32 && VT != MVT::f64) - return SDValue(); - // Only optimize when the source and destination types have the same width. - if (VT.getSizeInBits() != N->getOperand(0).getValueType().getSizeInBits()) - return SDValue(); - - // If the result of an integer load is only used by an integer-to-float - // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead. - // This eliminates an "integer-to-vector-move UOP and improve throughput. - SDValue N0 = N->getOperand(0); - if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && - // Do not change the width of a volatile load. - !cast(N0)->isVolatile()) { - LoadSDNode *LN0 = cast(N0); - SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->isVolatile(), - LN0->isNonTemporal(), LN0->isInvariant(), - LN0->getAlignment()); - - // Make sure successors of the original load stay after it by updating them - // to use the new Chain. - DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1)); - - unsigned Opcode = - (N->getOpcode() == ISD::SINT_TO_FP) ? ARM64ISD::SITOF : ARM64ISD::UITOF; - return DAG.getNode(Opcode, SDLoc(N), VT, Load); - } - - return SDValue(); -} - -/// An EXTR instruction is made up of two shifts, ORed together. This helper -/// searches for and classifies those shifts. -static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, - bool &FromHi) { - if (N.getOpcode() == ISD::SHL) - FromHi = false; - else if (N.getOpcode() == ISD::SRL) - FromHi = true; - else - return false; - - if (!isa(N.getOperand(1))) - return false; - - ShiftAmount = N->getConstantOperandVal(1); - Src = N->getOperand(0); - return true; -} - -/// EXTR instruction extracts a contiguous chunk of bits from two existing -/// registers viewed as a high/low pair. This function looks for the pattern: -/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an -/// EXTR. Can't quite be done in TableGen because the two immediates aren't -/// independent. -static SDValue tryCombineToEXTR(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - SelectionDAG &DAG = DCI.DAG; - SDLoc DL(N); - EVT VT = N->getValueType(0); - - assert(N->getOpcode() == ISD::OR && "Unexpected root"); - - if (VT != MVT::i32 && VT != MVT::i64) - return SDValue(); - - SDValue LHS; - uint32_t ShiftLHS = 0; - bool LHSFromHi = 0; - if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi)) - return SDValue(); - - SDValue RHS; - uint32_t ShiftRHS = 0; - bool RHSFromHi = 0; - if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi)) - return SDValue(); - - // If they're both trying to come from the high part of the register, they're - // not really an EXTR. - if (LHSFromHi == RHSFromHi) - return SDValue(); - - if (ShiftLHS + ShiftRHS != VT.getSizeInBits()) - return SDValue(); - - if (LHSFromHi) { - std::swap(LHS, RHS); - std::swap(ShiftLHS, ShiftRHS); - } - - return DAG.getNode(ARM64ISD::EXTR, DL, VT, LHS, RHS, - DAG.getConstant(ShiftRHS, MVT::i64)); -} - -static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, - const ARM64Subtarget *Subtarget) { - // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) - if (!EnableARM64ExtrGeneration) - return SDValue(); - SelectionDAG &DAG = DCI.DAG; - EVT VT = N->getValueType(0); - - if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) - return SDValue(); - - SDValue Res = tryCombineToEXTR(N, DCI); - if (Res.getNode()) - return Res; - - return SDValue(); -} - -static SDValue performBitcastCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - // Wait 'til after everything is legalized to try this. That way we have - // legal vector types and such. - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - - // Remove extraneous bitcasts around an extract_subvector. - // For example, - // (v4i16 (bitconvert - // (extract_subvector (v2i64 (bitconvert (v8i16 ...)), (i64 1))))) - // becomes - // (extract_subvector ((v8i16 ...), (i64 4))) - - // Only interested in 64-bit vectors as the ultimate result. - EVT VT = N->getValueType(0); - if (!VT.isVector()) - return SDValue(); - if (VT.getSimpleVT().getSizeInBits() != 64) - return SDValue(); - // Is the operand an extract_subvector starting at the beginning or halfway - // point of the vector? A low half may also come through as an - // EXTRACT_SUBREG, so look for that, too. - SDValue Op0 = N->getOperand(0); - if (Op0->getOpcode() != ISD::EXTRACT_SUBVECTOR && - !(Op0->isMachineOpcode() && - Op0->getMachineOpcode() == ARM64::EXTRACT_SUBREG)) - return SDValue(); - uint64_t idx = cast(Op0->getOperand(1))->getZExtValue(); - if (Op0->getOpcode() == ISD::EXTRACT_SUBVECTOR) { - if (Op0->getValueType(0).getVectorNumElements() != idx && idx != 0) - return SDValue(); - } else if (Op0->getMachineOpcode() == ARM64::EXTRACT_SUBREG) { - if (idx != ARM64::dsub) - return SDValue(); - // The dsub reference is equivalent to a lane zero subvector reference. - idx = 0; - } - // Look through the bitcast of the input to the extract. - if (Op0->getOperand(0)->getOpcode() != ISD::BITCAST) - return SDValue(); - SDValue Source = Op0->getOperand(0)->getOperand(0); - // If the source type has twice the number of elements as our destination - // type, we know this is an extract of the high or low half of the vector. - EVT SVT = Source->getValueType(0); - if (SVT.getVectorNumElements() != VT.getVectorNumElements() * 2) - return SDValue(); - - DEBUG(dbgs() << "arm64-lower: bitcast extract_subvector simplification\n"); - - // Create the simplified form to just extract the low or high half of the - // vector directly rather than bothering with the bitcasts. - SDLoc dl(N); - unsigned NumElements = VT.getVectorNumElements(); - if (idx) { - SDValue HalfIdx = DAG.getConstant(NumElements, MVT::i64); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx); - } else { - SDValue SubReg = DAG.getTargetConstant(ARM64::dsub, MVT::i32); - return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT, - Source, SubReg), - 0); - } -} - -static SDValue performConcatVectorsCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - // Wait 'til after everything is legalized to try this. That way we have - // legal vector types and such. - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - - SDLoc dl(N); - EVT VT = N->getValueType(0); - - // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector - // splat. The indexed instructions are going to be expecting a DUPLANE64, so - // canonicalise to that. - if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) { - assert(VT.getVectorElementType().getSizeInBits() == 64); - return DAG.getNode(ARM64ISD::DUPLANE64, dl, VT, - WidenVector(N->getOperand(0), DAG), - DAG.getConstant(0, MVT::i64)); - } - - // Canonicalise concat_vectors so that the right-hand vector has as few - // bit-casts as possible before its real operation. The primary matching - // destination for these operations will be the narrowing "2" instructions, - // which depend on the operation being performed on this right-hand vector. - // For example, - // (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS)))) - // becomes - // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS)) - - SDValue Op1 = N->getOperand(1); - if (Op1->getOpcode() != ISD::BITCAST) - return SDValue(); - SDValue RHS = Op1->getOperand(0); - MVT RHSTy = RHS.getValueType().getSimpleVT(); - // If the RHS is not a vector, this is not the pattern we're looking for. - if (!RHSTy.isVector()) - return SDValue(); - - DEBUG(dbgs() << "arm64-lower: concat_vectors bitcast simplification\n"); - - MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(), - RHSTy.getVectorNumElements() * 2); - return DAG.getNode( - ISD::BITCAST, dl, VT, - DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy, - DAG.getNode(ISD::BITCAST, dl, RHSTy, N->getOperand(0)), RHS)); -} - -static SDValue tryCombineFixedPointConvert(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - // Wait 'til after everything is legalized to try this. That way we have - // legal vector types and such. - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - // Transform a scalar conversion of a value from a lane extract into a - // lane extract of a vector conversion. E.g., from foo1 to foo2: - // double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); } - // double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; } - // - // The second form interacts better with instruction selection and the - // register allocator to avoid cross-class register copies that aren't - // coalescable due to a lane reference. - - // Check the operand and see if it originates from a lane extract. - SDValue Op1 = N->getOperand(1); - if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { - // Yep, no additional predication needed. Perform the transform. - SDValue IID = N->getOperand(0); - SDValue Shift = N->getOperand(2); - SDValue Vec = Op1.getOperand(0); - SDValue Lane = Op1.getOperand(1); - EVT ResTy = N->getValueType(0); - EVT VecResTy; - SDLoc DL(N); - - // The vector width should be 128 bits by the time we get here, even - // if it started as 64 bits (the extract_vector handling will have - // done so). - assert(Vec.getValueType().getSizeInBits() == 128 && - "unexpected vector size on extract_vector_elt!"); - if (Vec.getValueType() == MVT::v4i32) - VecResTy = MVT::v4f32; - else if (Vec.getValueType() == MVT::v2i64) - VecResTy = MVT::v2f64; - else - assert(0 && "unexpected vector type!"); - - SDValue Convert = - DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane); - } - return SDValue(); -} - -// AArch64 high-vector "long" operations are formed by performing the non-high -// version on an extract_subvector of each operand which gets the high half: -// -// (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS)) -// -// However, there are cases which don't have an extract_high explicitly, but -// have another operation that can be made compatible with one for free. For -// example: -// -// (dupv64 scalar) --> (extract_high (dup128 scalar)) -// -// This routine does the actual conversion of such DUPs, once outer routines -// have determined that everything else is in order. -static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) { - // We can handle most types of duplicate, but the lane ones have an extra - // operand saying *which* lane, so we need to know. - bool IsDUPLANE; - switch (N.getOpcode()) { - case ARM64ISD::DUP: - IsDUPLANE = false; - break; - case ARM64ISD::DUPLANE8: - case ARM64ISD::DUPLANE16: - case ARM64ISD::DUPLANE32: - case ARM64ISD::DUPLANE64: - IsDUPLANE = true; - break; - default: - return SDValue(); - } - - MVT NarrowTy = N.getSimpleValueType(); - if (!NarrowTy.is64BitVector()) - return SDValue(); - - MVT ElementTy = NarrowTy.getVectorElementType(); - unsigned NumElems = NarrowTy.getVectorNumElements(); - MVT NewDUPVT = MVT::getVectorVT(ElementTy, NumElems * 2); - - SDValue NewDUP; - if (IsDUPLANE) - NewDUP = DAG.getNode(N.getOpcode(), SDLoc(N), NewDUPVT, N.getOperand(0), - N.getOperand(1)); - else - NewDUP = DAG.getNode(ARM64ISD::DUP, SDLoc(N), NewDUPVT, N.getOperand(0)); - - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N.getNode()), NarrowTy, - NewDUP, DAG.getConstant(NumElems, MVT::i64)); -} - -static bool isEssentiallyExtractSubvector(SDValue N) { - if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR) - return true; - - return N.getOpcode() == ISD::BITCAST && - N.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR; -} - -/// \brief Helper structure to keep track of ISD::SET_CC operands. -struct GenericSetCCInfo { - const SDValue *Opnd0; - const SDValue *Opnd1; - ISD::CondCode CC; -}; - -/// \brief Helper structure to keep track of a SET_CC lowered into ARM64 code. -struct ARM64SetCCInfo { - const SDValue *Cmp; - ARM64CC::CondCode CC; -}; - -/// \brief Helper structure to keep track of SetCC information. -union SetCCInfo { - GenericSetCCInfo Generic; - ARM64SetCCInfo ARM64; -}; - -/// \brief Helper structure to be able to read SetCC information. -/// If set to true, IsARM64 field, Info is a ARM64SetCCInfo, otherwise Info is -/// a GenericSetCCInfo. -struct SetCCInfoAndKind { - SetCCInfo Info; - bool IsARM64; -}; - -/// \brief Check whether or not \p Op is a SET_CC operation, either a generic or -/// an -/// ARM64 lowered one. -/// \p SetCCInfo is filled accordingly. -/// \post SetCCInfo is meanginfull only when this function returns true. -/// \return True when Op is a kind of SET_CC operation. -static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) { - // If this is a setcc, this is straight forward. - if (Op.getOpcode() == ISD::SETCC) { - SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0); - SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1); - SetCCInfo.Info.Generic.CC = cast(Op.getOperand(2))->get(); - SetCCInfo.IsARM64 = false; - return true; - } - // Otherwise, check if this is a matching csel instruction. - // In other words: - // - csel 1, 0, cc - // - csel 0, 1, !cc - if (Op.getOpcode() != ARM64ISD::CSEL) - return false; - // Set the information about the operands. - // TODO: we want the operands of the Cmp not the csel - SetCCInfo.Info.ARM64.Cmp = &Op.getOperand(3); - SetCCInfo.IsARM64 = true; - SetCCInfo.Info.ARM64.CC = static_cast( - cast(Op.getOperand(2))->getZExtValue()); - - // Check that the operands matches the constraints: - // (1) Both operands must be constants. - // (2) One must be 1 and the other must be 0. - ConstantSDNode *TValue = dyn_cast(Op.getOperand(0)); - ConstantSDNode *FValue = dyn_cast(Op.getOperand(1)); - - // Check (1). - if (!TValue || !FValue) - return false; - - // Check (2). - if (!TValue->isOne()) { - // Update the comparison when we are interested in !cc. - std::swap(TValue, FValue); - SetCCInfo.Info.ARM64.CC = - ARM64CC::getInvertedCondCode(SetCCInfo.Info.ARM64.CC); - } - return TValue->isOne() && FValue->isNullValue(); -} - -// The folding we want to perform is: -// (add x, (setcc cc ...) ) -// --> -// (csel x, (add x, 1), !cc ...) -// -// The latter will get matched to a CSINC instruction. -static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) { - assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!"); - SDValue LHS = Op->getOperand(0); - SDValue RHS = Op->getOperand(1); - SetCCInfoAndKind InfoAndKind; - - // If neither operand is a SET_CC, give up. - if (!isSetCC(LHS, InfoAndKind)) { - std::swap(LHS, RHS); - if (!isSetCC(LHS, InfoAndKind)) - return SDValue(); - } - - // FIXME: This could be generatized to work for FP comparisons. - EVT CmpVT = InfoAndKind.IsARM64 - ? InfoAndKind.Info.ARM64.Cmp->getOperand(0).getValueType() - : InfoAndKind.Info.Generic.Opnd0->getValueType(); - if (CmpVT != MVT::i32 && CmpVT != MVT::i64) - return SDValue(); - - SDValue CCVal; - SDValue Cmp; - SDLoc dl(Op); - if (InfoAndKind.IsARM64) { - CCVal = DAG.getConstant( - ARM64CC::getInvertedCondCode(InfoAndKind.Info.ARM64.CC), MVT::i32); - Cmp = *InfoAndKind.Info.ARM64.Cmp; - } else - Cmp = getARM64Cmp(*InfoAndKind.Info.Generic.Opnd0, - *InfoAndKind.Info.Generic.Opnd1, - ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, true), - CCVal, DAG, dl); - - EVT VT = Op->getValueType(0); - LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, VT)); - return DAG.getNode(ARM64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp); -} - -// The basic add/sub long vector instructions have variants with "2" on the end -// which act on the high-half of their inputs. They are normally matched by -// patterns like: -// -// (add (zeroext (extract_high LHS)), -// (zeroext (extract_high RHS))) -// -> uaddl2 vD, vN, vM -// -// However, if one of the extracts is something like a duplicate, this -// instruction can still be used profitably. This function puts the DAG into a -// more appropriate form for those patterns to trigger. -static SDValue performAddSubLongCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - - MVT VT = N->getSimpleValueType(0); - if (!VT.is128BitVector()) { - if (N->getOpcode() == ISD::ADD) - return performSetccAddFolding(N, DAG); - return SDValue(); - } - - // Make sure both branches are extended in the same way. - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - if ((LHS.getOpcode() != ISD::ZERO_EXTEND && - LHS.getOpcode() != ISD::SIGN_EXTEND) || - LHS.getOpcode() != RHS.getOpcode()) - return SDValue(); - - unsigned ExtType = LHS.getOpcode(); - - // It's not worth doing if at least one of the inputs isn't already an - // extract, but we don't know which it'll be so we have to try both. - if (isEssentiallyExtractSubvector(LHS.getOperand(0))) { - RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG); - if (!RHS.getNode()) - return SDValue(); - - RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS); - } else if (isEssentiallyExtractSubvector(RHS.getOperand(0))) { - LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG); - if (!LHS.getNode()) - return SDValue(); - - LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS); - } - - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS); -} - -// Massage DAGs which we can use the high-half "long" operations on into -// something isel will recognize better. E.g. -// -// (arm64_neon_umull (extract_high vec) (dupv64 scalar)) --> -// (arm64_neon_umull (extract_high (v2i64 vec))) -// (extract_high (v2i64 (dup128 scalar))))) -// -static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - if (DCI.isBeforeLegalizeOps()) - return SDValue(); - - SDValue LHS = N->getOperand(1); - SDValue RHS = N->getOperand(2); - assert(LHS.getValueType().is64BitVector() && - RHS.getValueType().is64BitVector() && - "unexpected shape for long operation"); - - // Either node could be a DUP, but it's not worth doing both of them (you'd - // just as well use the non-high version) so look for a corresponding extract - // operation on the other "wing". - if (isEssentiallyExtractSubvector(LHS)) { - RHS = tryExtendDUPToExtractHigh(RHS, DAG); - if (!RHS.getNode()) - return SDValue(); - } else if (isEssentiallyExtractSubvector(RHS)) { - LHS = tryExtendDUPToExtractHigh(LHS, DAG); - if (!LHS.getNode()) - return SDValue(); - } - - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0), - N->getOperand(0), LHS, RHS); -} - -static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) { - MVT ElemTy = N->getSimpleValueType(0).getScalarType(); - unsigned ElemBits = ElemTy.getSizeInBits(); - - int64_t ShiftAmount; - if (BuildVectorSDNode *BVN = dyn_cast(N->getOperand(2))) { - APInt SplatValue, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, - HasAnyUndefs, ElemBits) || - SplatBitSize != ElemBits) - return SDValue(); - - ShiftAmount = SplatValue.getSExtValue(); - } else if (ConstantSDNode *CVN = dyn_cast(N->getOperand(2))) { - ShiftAmount = CVN->getSExtValue(); - } else - return SDValue(); - - unsigned Opcode; - bool IsRightShift; - switch (IID) { - default: - llvm_unreachable("Unknown shift intrinsic"); - case Intrinsic::arm64_neon_sqshl: - Opcode = ARM64ISD::SQSHL_I; - IsRightShift = false; - break; - case Intrinsic::arm64_neon_uqshl: - Opcode = ARM64ISD::UQSHL_I; - IsRightShift = false; - break; - case Intrinsic::arm64_neon_srshl: - Opcode = ARM64ISD::SRSHR_I; - IsRightShift = true; - break; - case Intrinsic::arm64_neon_urshl: - Opcode = ARM64ISD::URSHR_I; - IsRightShift = true; - break; - case Intrinsic::arm64_neon_sqshlu: - Opcode = ARM64ISD::SQSHLU_I; - IsRightShift = false; - break; - } - - if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) - return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1), - DAG.getConstant(-ShiftAmount, MVT::i32)); - else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount <= ElemBits) - return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1), - DAG.getConstant(ShiftAmount, MVT::i32)); - - return SDValue(); -} - -// The CRC32[BH] instructions ignore the high bits of their data operand. Since -// the intrinsics must be legal and take an i32, this means there's almost -// certainly going to be a zext in the DAG which we can eliminate. -static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) { - SDValue AndN = N->getOperand(2); - if (AndN.getOpcode() != ISD::AND) - return SDValue(); - - ConstantSDNode *CMask = dyn_cast(AndN.getOperand(1)); - if (!CMask || CMask->getZExtValue() != Mask) - return SDValue(); - - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), MVT::i32, - N->getOperand(0), N->getOperand(1), AndN.getOperand(0)); -} - -static SDValue performIntrinsicCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const ARM64Subtarget *Subtarget) { - SelectionDAG &DAG = DCI.DAG; - unsigned IID = getIntrinsicID(N); - switch (IID) { - default: - break; - case Intrinsic::arm64_neon_vcvtfxs2fp: - case Intrinsic::arm64_neon_vcvtfxu2fp: - return tryCombineFixedPointConvert(N, DCI, DAG); - break; - case Intrinsic::arm64_neon_fmax: - return DAG.getNode(ARM64ISD::FMAX, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2)); - case Intrinsic::arm64_neon_fmin: - return DAG.getNode(ARM64ISD::FMIN, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2)); - case Intrinsic::arm64_neon_smull: - case Intrinsic::arm64_neon_umull: - case Intrinsic::arm64_neon_pmull: - case Intrinsic::arm64_neon_sqdmull: - return tryCombineLongOpWithDup(IID, N, DCI, DAG); - case Intrinsic::arm64_neon_sqshl: - case Intrinsic::arm64_neon_uqshl: - case Intrinsic::arm64_neon_sqshlu: - case Intrinsic::arm64_neon_srshl: - case Intrinsic::arm64_neon_urshl: - return tryCombineShiftImm(IID, N, DAG); - case Intrinsic::arm64_crc32b: - case Intrinsic::arm64_crc32cb: - return tryCombineCRC32(0xff, N, DAG); - case Intrinsic::arm64_crc32h: - case Intrinsic::arm64_crc32ch: - return tryCombineCRC32(0xffff, N, DAG); - } - return SDValue(); -} - -static SDValue performExtendCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - // If we see something like (zext (sabd (extract_high ...), (DUP ...))) then - // we can convert that DUP into another extract_high (of a bigger DUP), which - // helps the backend to decide that an sabdl2 would be useful, saving a real - // extract_high operation. - if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND && - N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) { - SDNode *ABDNode = N->getOperand(0).getNode(); - unsigned IID = getIntrinsicID(ABDNode); - if (IID == Intrinsic::arm64_neon_sabd || - IID == Intrinsic::arm64_neon_uabd) { - SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG); - if (!NewABD.getNode()) - return SDValue(); - - return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), - NewABD); - } - } - - // This is effectively a custom type legalization for ARM64. - // - // Type legalization will split an extend of a small, legal, type to a larger - // illegal type by first splitting the destination type, often creating - // illegal source types, which then get legalized in isel-confusing ways, - // leading to really terrible codegen. E.g., - // %result = v8i32 sext v8i8 %value - // becomes - // %losrc = extract_subreg %value, ... - // %hisrc = extract_subreg %value, ... - // %lo = v4i32 sext v4i8 %losrc - // %hi = v4i32 sext v4i8 %hisrc - // Things go rapidly downhill from there. - // - // For ARM64, the [sz]ext vector instructions can only go up one element - // size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32 - // take two instructions. - // - // This implies that the most efficient way to do the extend from v8i8 - // to two v4i32 values is to first extend the v8i8 to v8i16, then do - // the normal splitting to happen for the v8i16->v8i32. - - // This is pre-legalization to catch some cases where the default - // type legalization will create ill-tempered code. - if (!DCI.isBeforeLegalizeOps()) - return SDValue(); - - // We're only interested in cleaning things up for non-legal vector types - // here. If both the source and destination are legal, things will just - // work naturally without any fiddling. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT ResVT = N->getValueType(0); - if (!ResVT.isVector() || TLI.isTypeLegal(ResVT)) - return SDValue(); - // If the vector type isn't a simple VT, it's beyond the scope of what - // we're worried about here. Let legalization do its thing and hope for - // the best. - if (!ResVT.isSimple()) - return SDValue(); - - SDValue Src = N->getOperand(0); - MVT SrcVT = Src->getValueType(0).getSimpleVT(); - // If the source VT is a 64-bit vector, we can play games and get the - // better results we want. - if (SrcVT.getSizeInBits() != 64) - return SDValue(); - - unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits(); - unsigned ElementCount = SrcVT.getVectorNumElements(); - SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), ElementCount); - SDLoc DL(N); - Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src); - - // Now split the rest of the operation into two halves, each with a 64 - // bit source. - EVT LoVT, HiVT; - SDValue Lo, Hi; - unsigned NumElements = ResVT.getVectorNumElements(); - assert(!(NumElements & 1) && "Splitting vector, but not in half!"); - LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), - ResVT.getVectorElementType(), NumElements / 2); - - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(), - LoVT.getVectorNumElements()); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src, - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo); - Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi); - - // Now combine the parts back together so we still have a single result - // like the combiner expects. - return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); -} - -/// Replace a splat of a scalar to a vector store by scalar stores of the scalar -/// value. The load store optimizer pass will merge them to store pair stores. -/// This has better performance than a splat of the scalar followed by a split -/// vector store. Even if the stores are not merged it is four stores vs a dup, -/// followed by an ext.b and two stores. -static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) { - SDValue StVal = St->getValue(); - EVT VT = StVal.getValueType(); - - // Don't replace floating point stores, they possibly won't be transformed to - // stp because of the store pair suppress pass. - if (VT.isFloatingPoint()) - return SDValue(); - - // Check for insert vector elements. - if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT) - return SDValue(); - - // We can express a splat as store pair(s) for 2 or 4 elements. - unsigned NumVecElts = VT.getVectorNumElements(); - if (NumVecElts != 4 && NumVecElts != 2) - return SDValue(); - SDValue SplatVal = StVal.getOperand(1); - unsigned RemainInsertElts = NumVecElts - 1; - - // Check that this is a splat. - while (--RemainInsertElts) { - SDValue NextInsertElt = StVal.getOperand(0); - if (NextInsertElt.getOpcode() != ISD::INSERT_VECTOR_ELT) - return SDValue(); - if (NextInsertElt.getOperand(1) != SplatVal) - return SDValue(); - StVal = NextInsertElt; - } - unsigned OrigAlignment = St->getAlignment(); - unsigned EltOffset = NumVecElts == 4 ? 4 : 8; - unsigned Alignment = std::min(OrigAlignment, EltOffset); - - // Create scalar stores. This is at least as good as the code sequence for a - // split unaligned store wich is a dup.s, ext.b, and two stores. - // Most of the time the three stores should be replaced by store pair - // instructions (stp). - SDLoc DL(St); - SDValue BasePtr = St->getBasePtr(); - SDValue NewST1 = - DAG.getStore(St->getChain(), DL, SplatVal, BasePtr, St->getPointerInfo(), - St->isVolatile(), St->isNonTemporal(), St->getAlignment()); - - unsigned Offset = EltOffset; - while (--NumVecElts) { - SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, - DAG.getConstant(Offset, MVT::i64)); - NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr, - St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), Alignment); - Offset += EltOffset; - } - return NewST1; -} - -static SDValue performSTORECombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG, - const ARM64Subtarget *Subtarget) { - if (!DCI.isBeforeLegalize()) - return SDValue(); - - StoreSDNode *S = cast(N); - if (S->isVolatile()) - return SDValue(); - - // Cyclone has bad performance on unaligned 16B stores when crossing line and - // page boundries. We want to split such stores. - if (!Subtarget->isCyclone()) - return SDValue(); - - // Don't split at Oz. - MachineFunction &MF = DAG.getMachineFunction(); - bool IsMinSize = MF.getFunction()->getAttributes().hasAttribute( - AttributeSet::FunctionIndex, Attribute::MinSize); - if (IsMinSize) - return SDValue(); - - SDValue StVal = S->getValue(); - EVT VT = StVal.getValueType(); - - // Don't split v2i64 vectors. Memcpy lowering produces those and splitting - // those up regresses performance on micro-benchmarks and olden/bh. - if (!VT.isVector() || VT.getVectorNumElements() < 2 || VT == MVT::v2i64) - return SDValue(); - - // Split unaligned 16B stores. They are terrible for performance. - // Don't split stores with alignment of 1 or 2. Code that uses clang vector - // extensions can use this to mark that it does not want splitting to happen - // (by underspecifying alignment to be 1 or 2). Furthermore, the chance of - // eliminating alignment hazards is only 1 in 8 for alignment of 2. - if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 || - S->getAlignment() <= 2) - return SDValue(); - - // If we get a splat of a scalar convert this vector store to a store of - // scalars. They will be merged into store pairs thereby removing two - // instructions. - SDValue ReplacedSplat = replaceSplatVectorStore(DAG, S); - if (ReplacedSplat != SDValue()) - return ReplacedSplat; - - SDLoc DL(S); - unsigned NumElts = VT.getVectorNumElements() / 2; - // Split VT into two. - EVT HalfVT = - EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts); - SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, - DAG.getIntPtrConstant(0)); - SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal, - DAG.getIntPtrConstant(NumElts)); - SDValue BasePtr = S->getBasePtr(); - SDValue NewST1 = - DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(), - S->isVolatile(), S->isNonTemporal(), S->getAlignment()); - SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, - DAG.getConstant(8, MVT::i64)); - return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr, - S->getPointerInfo(), S->isVolatile(), S->isNonTemporal(), - S->getAlignment()); -} - -// Optimize compare with zero and branch. -static SDValue performBRCONDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - SelectionDAG &DAG) { - SDValue Chain = N->getOperand(0); - SDValue Dest = N->getOperand(1); - SDValue CCVal = N->getOperand(2); - SDValue Cmp = N->getOperand(3); - - assert(isa(CCVal) && "Expected a ConstantSDNode here!"); - unsigned CC = cast(CCVal)->getZExtValue(); - if (CC != ARM64CC::EQ && CC != ARM64CC::NE) - return SDValue(); - - unsigned CmpOpc = Cmp.getOpcode(); - if (CmpOpc != ARM64ISD::ADDS && CmpOpc != ARM64ISD::SUBS) - return SDValue(); - - // Only attempt folding if there is only one use of the flag and no use of the - // value. - if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1)) - return SDValue(); - - SDValue LHS = Cmp.getOperand(0); - SDValue RHS = Cmp.getOperand(1); - - assert(LHS.getValueType() == RHS.getValueType() && - "Expected the value type to be the same for both operands!"); - if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64) - return SDValue(); - - if (isa(LHS) && cast(LHS)->isNullValue()) - std::swap(LHS, RHS); - - if (!isa(RHS) || !cast(RHS)->isNullValue()) - return SDValue(); - - if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA || - LHS.getOpcode() == ISD::SRL) - return SDValue(); - - // Fold the compare into the branch instruction. - SDValue BR; - if (CC == ARM64CC::EQ) - BR = DAG.getNode(ARM64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest); - else - BR = DAG.getNode(ARM64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest); - - // Do not add new nodes to DAG combiner worklist. - DCI.CombineTo(N, BR, false); - - return SDValue(); -} - -SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const { - SelectionDAG &DAG = DCI.DAG; - switch (N->getOpcode()) { - default: - break; - case ISD::ADD: - case ISD::SUB: - return performAddSubLongCombine(N, DCI, DAG); - case ISD::XOR: - return performXorCombine(N, DAG, DCI, Subtarget); - case ISD::MUL: - return performMulCombine(N, DAG, DCI, Subtarget); - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - return performIntToFpCombine(N, DAG); - case ISD::OR: - return performORCombine(N, DCI, Subtarget); - case ISD::INTRINSIC_WO_CHAIN: - return performIntrinsicCombine(N, DCI, Subtarget); - case ISD::ANY_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: - return performExtendCombine(N, DCI, DAG); - case ISD::BITCAST: - return performBitcastCombine(N, DCI, DAG); - case ISD::CONCAT_VECTORS: - return performConcatVectorsCombine(N, DCI, DAG); - case ISD::STORE: - return performSTORECombine(N, DCI, DAG, Subtarget); - case ARM64ISD::BRCOND: - return performBRCONDCombine(N, DCI, DAG); - } - return SDValue(); -} - -// Check if the return value is used as only a return value, as otherwise -// we can't perform a tail-call. In particular, we need to check for -// target ISD nodes that are returns and any other "odd" constructs -// that the generic analysis code won't necessarily catch. -bool ARM64TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { - if (N->getNumValues() != 1) - return false; - if (!N->hasNUsesOfValue(1, 0)) - return false; - - SDValue TCChain = Chain; - SDNode *Copy = *N->use_begin(); - if (Copy->getOpcode() == ISD::CopyToReg) { - // If the copy has a glue operand, we conservatively assume it isn't safe to - // perform a tail call. - if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == - MVT::Glue) - return false; - TCChain = Copy->getOperand(0); - } else if (Copy->getOpcode() != ISD::FP_EXTEND) - return false; - - bool HasRet = false; - for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); - UI != UE; ++UI) { - if (UI->getOpcode() != ARM64ISD::RET_FLAG) - return false; - HasRet = true; - } - - if (!HasRet) - return false; - - Chain = TCChain; - return true; -} - -// Return whether the an instruction can potentially be optimized to a tail -// call. This will cause the optimizers to attempt to move, or duplicate, -// return instructions to help enable tail call optimizations for this -// instruction. -bool ARM64TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { - if (!EnableARM64TailCalls) - return false; - - if (!CI->isTailCall()) - return false; - - return true; -} - -bool ARM64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base, - SDValue &Offset, - ISD::MemIndexedMode &AM, - bool &IsInc, - SelectionDAG &DAG) const { - if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) - return false; - - Base = Op->getOperand(0); - // All of the indexed addressing mode instructions take a signed - // 9 bit immediate offset. - if (ConstantSDNode *RHS = dyn_cast(Op->getOperand(1))) { - int64_t RHSC = (int64_t)RHS->getZExtValue(); - if (RHSC >= 256 || RHSC <= -256) - return false; - IsInc = (Op->getOpcode() == ISD::ADD); - Offset = Op->getOperand(1); - return true; - } - return false; -} - -bool ARM64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, - SDValue &Offset, - ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const { - EVT VT; - SDValue Ptr; - if (LoadSDNode *LD = dyn_cast(N)) { - VT = LD->getMemoryVT(); - Ptr = LD->getBasePtr(); - } else if (StoreSDNode *ST = dyn_cast(N)) { - VT = ST->getMemoryVT(); - Ptr = ST->getBasePtr(); - } else - return false; - - bool IsInc; - if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG)) - return false; - AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC; - return true; -} - -bool ARM64TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, - SDValue &Base, - SDValue &Offset, - ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const { - EVT VT; - SDValue Ptr; - if (LoadSDNode *LD = dyn_cast(N)) { - VT = LD->getMemoryVT(); - Ptr = LD->getBasePtr(); - } else if (StoreSDNode *ST = dyn_cast(N)) { - VT = ST->getMemoryVT(); - Ptr = ST->getBasePtr(); - } else - return false; - - bool IsInc; - if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG)) - return false; - // Post-indexing updates the base, so it's not a valid transform - // if that's not the same as the load's pointer. - if (Ptr != Base) - return false; - AM = IsInc ? ISD::POST_INC : ISD::POST_DEC; - return true; -} - -/// The only 128-bit atomic operation is an stxp that succeeds. In particular -/// neither ldp nor ldxp are atomic. So the canonical sequence for an atomic -/// load is: -/// loop: -/// ldxp x0, x1, [x8] -/// stxp w2, x0, x1, [x8] -/// cbnz w2, loop -/// If the stxp succeeds then the ldxp managed to get both halves without an -/// intervening stxp from a different thread and the read was atomic. -static void ReplaceATOMIC_LOAD_128(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) { - SDLoc DL(N); - AtomicSDNode *AN = cast(N); - EVT VT = AN->getMemoryVT(); - SDValue Zero = DAG.getConstant(0, VT); - - // FIXME: Really want ATOMIC_LOAD_NOP but that doesn't fit into the existing - // scheme very well. Given the complexity of what we're already generating, an - // extra couple of ORRs probably won't make much difference. - SDValue Result = DAG.getAtomic(ISD::ATOMIC_LOAD_OR, DL, AN->getMemoryVT(), - N->getOperand(0), N->getOperand(1), Zero, - AN->getMemOperand(), AN->getOrdering(), - AN->getSynchScope()); - - Results.push_back(Result.getValue(0)); // Value - Results.push_back(Result.getValue(1)); // Chain -} - -static void ReplaceATOMIC_OP_128(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG, unsigned NewOp) { - SDLoc DL(N); - AtomicOrdering Ordering = cast(N)->getOrdering(); - assert(N->getValueType(0) == MVT::i128 && - "Only know how to expand i128 atomics"); - - SmallVector Ops; - Ops.push_back(N->getOperand(1)); // Ptr - // Low part of Val1 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, - N->getOperand(2), DAG.getIntPtrConstant(0))); - // High part of Val1 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, - N->getOperand(2), DAG.getIntPtrConstant(1))); - if (NewOp == ARM64::ATOMIC_CMP_SWAP_I128) { - // Low part of Val2 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, - N->getOperand(3), DAG.getIntPtrConstant(0))); - // High part of Val2 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, - N->getOperand(3), DAG.getIntPtrConstant(1))); - } - - Ops.push_back(DAG.getTargetConstant(Ordering, MVT::i32)); - Ops.push_back(N->getOperand(0)); // Chain - - SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other); - SDNode *Result = DAG.getMachineNode(NewOp, DL, Tys, Ops); - SDValue OpsF[] = { SDValue(Result, 0), SDValue(Result, 1) }; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, OpsF, 2)); - Results.push_back(SDValue(Result, 2)); -} - -void ARM64TargetLowering::ReplaceNodeResults(SDNode *N, - SmallVectorImpl &Results, - SelectionDAG &DAG) const { - switch (N->getOpcode()) { - default: - llvm_unreachable("Don't know how to custom expand this"); - case ISD::ATOMIC_LOAD: - ReplaceATOMIC_LOAD_128(N, Results, DAG); - return; - case ISD::ATOMIC_LOAD_ADD: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_ADD_I128); - return; - case ISD::ATOMIC_LOAD_SUB: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_SUB_I128); - return; - case ISD::ATOMIC_LOAD_AND: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_AND_I128); - return; - case ISD::ATOMIC_LOAD_OR: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_OR_I128); - return; - case ISD::ATOMIC_LOAD_XOR: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_XOR_I128); - return; - case ISD::ATOMIC_LOAD_NAND: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_NAND_I128); - return; - case ISD::ATOMIC_SWAP: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_SWAP_I128); - return; - case ISD::ATOMIC_LOAD_MIN: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_MIN_I128); - return; - case ISD::ATOMIC_LOAD_MAX: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_MAX_I128); - return; - case ISD::ATOMIC_LOAD_UMIN: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_UMIN_I128); - return; - case ISD::ATOMIC_LOAD_UMAX: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_UMAX_I128); - return; - case ISD::ATOMIC_CMP_SWAP: - ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_CMP_SWAP_I128); - return; - case ISD::FP_TO_UINT: - case ISD::FP_TO_SINT: - assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion"); - // Let normal code take care of it by not adding anything to Results. - return; - } -} diff --git a/lib/Target/ARM64/ARM64ISelLowering.h b/lib/Target/ARM64/ARM64ISelLowering.h deleted file mode 100644 index a4664ac..0000000 --- a/lib/Target/ARM64/ARM64ISelLowering.h +++ /dev/null @@ -1,422 +0,0 @@ -//==-- ARM64ISelLowering.h - ARM64 DAG Lowering Interface --------*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interfaces that ARM64 uses to lower LLVM code into a -// selection DAG. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_ARM64_ISELLOWERING_H -#define LLVM_TARGET_ARM64_ISELLOWERING_H - -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/Target/TargetLowering.h" - -namespace llvm { - -namespace ARM64ISD { - -enum { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses. - CALL, // Function call. - - // Almost the same as a normal call node, except that a TLSDesc relocation is - // needed so the linker can relax it correctly if possible. - TLSDESC_CALL, - ADRP, // Page address of a TargetGlobalAddress operand. - ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand. - LOADgot, // Load from automatically generated descriptor (e.g. Global - // Offset Table, TLS record). - RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand. - BRCOND, // Conditional branch instruction; "b.cond". - CSEL, - FCSEL, // Conditional move instruction. - CSINV, // Conditional select invert. - CSNEG, // Conditional select negate. - CSINC, // Conditional select increment. - - // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on - // ELF. - THREAD_POINTER, - ADC, - SBC, // adc, sbc instructions - - // Arithmetic instructions which write flags. - ADDS, - SUBS, - ADCS, - SBCS, - ANDS, - - // Floating point comparison - FCMP, - - // Floating point max and min instructions. - FMAX, - FMIN, - - // Scalar extract - EXTR, - - // Scalar-to-vector duplication - DUP, - DUPLANE8, - DUPLANE16, - DUPLANE32, - DUPLANE64, - - // Vector immedate moves - MOVI, - MOVIshift, - MOVIedit, - MOVImsl, - FMOV, - MVNIshift, - MVNImsl, - - // Vector immediate ops - BICi, - ORRi, - - // Vector arithmetic negation - NEG, - - // Vector shuffles - ZIP1, - ZIP2, - UZP1, - UZP2, - TRN1, - TRN2, - REV16, - REV32, - REV64, - EXT, - - // Vector shift by scalar - VSHL, - VLSHR, - VASHR, - - // Vector shift by scalar (again) - SQSHL_I, - UQSHL_I, - SQSHLU_I, - SRSHR_I, - URSHR_I, - - // Vector comparisons - CMEQ, - CMGE, - CMGT, - CMHI, - CMHS, - FCMEQ, - FCMGE, - FCMGT, - - // Vector zero comparisons - CMEQz, - CMGEz, - CMGTz, - CMLEz, - CMLTz, - FCMEQz, - FCMGEz, - FCMGTz, - FCMLEz, - FCMLTz, - - // Vector bitwise negation - NOT, - - // Vector bitwise selection - BIT, - - // Compare-and-branch - CBZ, - CBNZ, - TBZ, - TBNZ, - - // Tail calls - TC_RETURN, - - // Custom prefetch handling - PREFETCH, - - // {s|u}int to FP within a FP register. - SITOF, - UITOF -}; - -} // end namespace ARM64ISD - -class ARM64Subtarget; -class ARM64TargetMachine; - -class ARM64TargetLowering : public TargetLowering { - bool RequireStrictAlign; - -public: - explicit ARM64TargetLowering(ARM64TargetMachine &TM); - - /// Selects the correct CCAssignFn for a the given CallingConvention - /// value. - CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; - - /// computeMaskedBitsForTargetNode - Determine which of the bits specified in - /// Mask are known to be either zero or one and return them in the - /// KnownZero/KnownOne bitsets. - void computeMaskedBitsForTargetNode(const SDValue Op, APInt &KnownZero, - APInt &KnownOne, const SelectionDAG &DAG, - unsigned Depth = 0) const; - - MVT getScalarShiftAmountTy(EVT LHSTy) const override; - - /// allowsUnalignedMemoryAccesses - Returns true if the target allows - /// unaligned memory accesses. of the specified type. - bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0, - bool *Fast = 0) const override { - if (RequireStrictAlign) - return false; - // FIXME: True for Cyclone, but not necessary others. - if (Fast) - *Fast = true; - return true; - } - - /// LowerOperation - Provide custom lowering hooks for some operations. - SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - - const char *getTargetNodeName(unsigned Opcode) const override; - - SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - - /// getFunctionAlignment - Return the Log2 alignment of this function. - unsigned getFunctionAlignment(const Function *F) const; - - /// getMaximalGlobalOffset - Returns the maximal possible offset which can - /// be used for loads / stores from the global. - unsigned getMaximalGlobalOffset() const override; - - /// Returns true if a cast between SrcAS and DestAS is a noop. - bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { - // Addrspacecasts are always noops. - return true; - } - - /// createFastISel - This method returns a target specific FastISel object, - /// or null if the target does not support "fast" ISel. - FastISel *createFastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo) const override; - - bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; - - bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; - - /// isShuffleMaskLegal - Return true if the given shuffle mask can be - /// codegen'd directly, or if it should be stack expanded. - bool isShuffleMaskLegal(const SmallVectorImpl &M, EVT VT) const override; - - /// getSetCCResultType - Return the ISD::SETCC ValueType - EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; - - SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; - - MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size, unsigned BinOpcode) const; - MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, - unsigned Size) const; - MachineBasicBlock *EmitAtomicBinary128(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned BinOpcodeLo, - unsigned BinOpcodeHi) const; - MachineBasicBlock *EmitAtomicCmpSwap128(MachineInstr *MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitAtomicMinMax128(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned CondCode) const; - MachineBasicBlock *EmitF128CSEL(MachineInstr *MI, - MachineBasicBlock *BB) const; - - MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const override; - - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, - unsigned Intrinsic) const override; - - bool isTruncateFree(Type *Ty1, Type *Ty2) const override; - bool isTruncateFree(EVT VT1, EVT VT2) const override; - - bool isZExtFree(Type *Ty1, Type *Ty2) const override; - bool isZExtFree(EVT VT1, EVT VT2) const override; - bool isZExtFree(SDValue Val, EVT VT2) const override; - - bool hasPairedLoad(Type *LoadedType, - unsigned &RequiredAligment) const override; - bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override; - - bool isLegalAddImmediate(int64_t) const override; - bool isLegalICmpImmediate(int64_t) const override; - - EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, - MachineFunction &MF) const override; - - /// isLegalAddressingMode - Return true if the addressing mode represented - /// by AM is legal for this target, for a load/store of the specified type. - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; - - /// \brief Return the cost of the scaling factor used in the addressing - /// mode represented by AM for this target, for a load/store - /// of the specified type. - /// If the AM is supported, the return value must be >= 0. - /// If the AM is not supported, it returns a negative value. - int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override; - - /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster - /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be - /// expanded to FMAs when this method returns true, otherwise fmuladd is - /// expanded to fmul + fadd. - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; - - const uint16_t *getScratchRegisters(CallingConv::ID CC) const override; - - bool shouldConvertConstantLoadToIntImm(const APInt &Imm, - Type *Ty) const override; - -private: - /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can - /// make the right decision when generating code for different targets. - const ARM64Subtarget *Subtarget; - - void addTypeForNEON(EVT VT, EVT PromotedBitwiseVT); - void addDRTypeForNEON(MVT VT); - void addQRTypeForNEON(MVT VT); - - SDValue - LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, SDLoc DL, - SelectionDAG &DAG, - SmallVectorImpl &InVals) const override; - - SDValue LowerCall(CallLoweringInfo & /*CLI*/, - SmallVectorImpl &InVals) const override; - - SDValue LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, SDLoc DL, - SelectionDAG &DAG, SmallVectorImpl &InVals, - bool isThisReturn, SDValue ThisVal) const; - - bool isEligibleForTailCallOptimization( - SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, - bool isCalleeStructRet, bool isCallerStructRet, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, SelectionDAG &DAG) const; - - void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL, - SDValue &Chain) const; - - bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, - bool isVarArg, - const SmallVectorImpl &Outs, - LLVMContext &Context) const override; - - SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, SDLoc DL, - SelectionDAG &DAG) const override; - - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL, - SelectionDAG &DAG) const; - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG, - RTLIB::Libcall Call) const; - SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVectorAND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; - - ConstraintType getConstraintType(const std::string &Constraint) const; - - /// Examine constraint string and operand type and determine a weight value. - /// The operand object must already have been set up with the operand type. - ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, - const char *constraint) const; - - std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; - void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, - std::vector &Ops, - SelectionDAG &DAG) const; - - bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const; - bool mayBeEmittedAsTailCall(CallInst *CI) const; - bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset, - ISD::MemIndexedMode &AM, bool &IsInc, - SelectionDAG &DAG) const; - bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, - ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const; - bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, - SDValue &Offset, ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const; - - void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) const; -}; - -namespace ARM64 { -FastISel *createFastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo); -} // end namespace ARM64 - -} // end namespace llvm - -#endif // LLVM_TARGET_ARM64_ISELLOWERING_H diff --git a/lib/Target/ARM64/ARM64InstrAtomics.td b/lib/Target/ARM64/ARM64InstrAtomics.td deleted file mode 100644 index 0d36e06..0000000 --- a/lib/Target/ARM64/ARM64InstrAtomics.td +++ /dev/null @@ -1,293 +0,0 @@ -//===- ARM64InstrAtomics.td - ARM64 Atomic codegen support -*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// ARM64 Atomic operand code-gen constructs. -// -//===----------------------------------------------------------------------===// - -//===---------------------------------- -// Atomic fences -//===---------------------------------- -def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>; -def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>; - -//===---------------------------------- -// Atomic loads -//===---------------------------------- - -// When they're actually atomic, only one addressing mode (GPR64sp) is -// supported, but when they're relaxed and anything can be used, all the -// standard modes would be valid and may give efficiency gains. - -// A atomic load operation that actually needs acquire semantics. -class acquiring_load - : PatFrag<(ops node:$ptr), (base node:$ptr), [{ - AtomicOrdering Ordering = cast(N)->getOrdering(); - assert(Ordering != AcquireRelease && "unexpected load ordering"); - return Ordering == Acquire || Ordering == SequentiallyConsistent; -}]>; - -// An atomic load operation that does not need either acquire or release -// semantics. -class relaxed_load - : PatFrag<(ops node:$ptr), (base node:$ptr), [{ - AtomicOrdering Ordering = cast(N)->getOrdering(); - return Ordering == Monotonic || Ordering == Unordered; -}]>; - -// 8-bit loads -def : Pat<(acquiring_load GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>; -def : Pat<(relaxed_load ro_indexed8:$addr), - (LDRBBro ro_indexed8:$addr)>; -def : Pat<(relaxed_load am_indexed8:$addr), - (LDRBBui am_indexed8:$addr)>; -def : Pat<(relaxed_load am_unscaled8:$addr), - (LDURBBi am_unscaled8:$addr)>; - -// 16-bit loads -def : Pat<(acquiring_load GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>; -def : Pat<(relaxed_load ro_indexed16:$addr), - (LDRHHro ro_indexed16:$addr)>; -def : Pat<(relaxed_load am_indexed16:$addr), - (LDRHHui am_indexed16:$addr)>; -def : Pat<(relaxed_load am_unscaled16:$addr), - (LDURHHi am_unscaled16:$addr)>; - -// 32-bit loads -def : Pat<(acquiring_load GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>; -def : Pat<(relaxed_load ro_indexed32:$addr), - (LDRWro ro_indexed32:$addr)>; -def : Pat<(relaxed_load am_indexed32:$addr), - (LDRWui am_indexed32:$addr)>; -def : Pat<(relaxed_load am_unscaled32:$addr), - (LDURWi am_unscaled32:$addr)>; - -// 64-bit loads -def : Pat<(acquiring_load GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>; -def : Pat<(relaxed_load ro_indexed64:$addr), - (LDRXro ro_indexed64:$addr)>; -def : Pat<(relaxed_load am_indexed64:$addr), - (LDRXui am_indexed64:$addr)>; -def : Pat<(relaxed_load am_unscaled64:$addr), - (LDURXi am_unscaled64:$addr)>; - -//===---------------------------------- -// Atomic stores -//===---------------------------------- - -// When they're actually atomic, only one addressing mode (GPR64sp) is -// supported, but when they're relaxed and anything can be used, all the -// standard modes would be valid and may give efficiency gains. - -// A store operation that actually needs release semantics. -class releasing_store - : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ - AtomicOrdering Ordering = cast(N)->getOrdering(); - assert(Ordering != AcquireRelease && "unexpected store ordering"); - return Ordering == Release || Ordering == SequentiallyConsistent; -}]>; - -// An atomic store operation that doesn't actually need to be atomic on ARM64. -class relaxed_store - : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ - AtomicOrdering Ordering = cast(N)->getOrdering(); - return Ordering == Monotonic || Ordering == Unordered; -}]>; - -// 8-bit stores -def : Pat<(releasing_store GPR64sp:$ptr, GPR32:$val), - (STLRB GPR32:$val, GPR64sp:$ptr)>; -def : Pat<(relaxed_store ro_indexed8:$ptr, GPR32:$val), - (STRBBro GPR32:$val, ro_indexed8:$ptr)>; -def : Pat<(relaxed_store am_indexed8:$ptr, GPR32:$val), - (STRBBui GPR32:$val, am_indexed8:$ptr)>; -def : Pat<(relaxed_store am_unscaled8:$ptr, GPR32:$val), - (STURBBi GPR32:$val, am_unscaled8:$ptr)>; - -// 16-bit stores -def : Pat<(releasing_store GPR64sp:$ptr, GPR32:$val), - (STLRH GPR32:$val, GPR64sp:$ptr)>; -def : Pat<(relaxed_store ro_indexed16:$ptr, GPR32:$val), - (STRHHro GPR32:$val, ro_indexed16:$ptr)>; -def : Pat<(relaxed_store am_indexed16:$ptr, GPR32:$val), - (STRHHui GPR32:$val, am_indexed16:$ptr)>; -def : Pat<(relaxed_store am_unscaled16:$ptr, GPR32:$val), - (STURHHi GPR32:$val, am_unscaled16:$ptr)>; - -// 32-bit stores -def : Pat<(releasing_store GPR64sp:$ptr, GPR32:$val), - (STLRW GPR32:$val, GPR64sp:$ptr)>; -def : Pat<(relaxed_store ro_indexed32:$ptr, GPR32:$val), - (STRWro GPR32:$val, ro_indexed32:$ptr)>; -def : Pat<(relaxed_store am_indexed32:$ptr, GPR32:$val), - (STRWui GPR32:$val, am_indexed32:$ptr)>; -def : Pat<(relaxed_store am_unscaled32:$ptr, GPR32:$val), - (STURWi GPR32:$val, am_unscaled32:$ptr)>; - -// 64-bit stores -def : Pat<(releasing_store GPR64sp:$ptr, GPR64:$val), - (STLRX GPR64:$val, GPR64sp:$ptr)>; -def : Pat<(relaxed_store ro_indexed64:$ptr, GPR64:$val), - (STRXro GPR64:$val, ro_indexed64:$ptr)>; -def : Pat<(relaxed_store am_indexed64:$ptr, GPR64:$val), - (STRXui GPR64:$val, am_indexed64:$ptr)>; -def : Pat<(relaxed_store am_unscaled64:$ptr, GPR64:$val), - (STURXi GPR64:$val, am_unscaled64:$ptr)>; - -//===---------------------------------- -// Atomic read-modify-write operations -//===---------------------------------- - -// More complicated operations need lots of C++ support, so we just create -// skeletons here for the C++ code to refer to. - -let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in { -multiclass AtomicSizes { - def _I8 : Pseudo<(outs GPR32:$dst), - (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I16 : Pseudo<(outs GPR32:$dst), - (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I32 : Pseudo<(outs GPR32:$dst), - (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>; - def _I64 : Pseudo<(outs GPR64:$dst), - (ins GPR64sp:$ptr, GPR64:$incr, i32imm:$ordering), []>; - def _I128 : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi), - (ins GPR64sp:$ptr, GPR64:$incrlo, GPR64:$incrhi, - i32imm:$ordering), []>; -} -} - -defm ATOMIC_LOAD_ADD : AtomicSizes; -defm ATOMIC_LOAD_SUB : AtomicSizes; -defm ATOMIC_LOAD_AND : AtomicSizes; -defm ATOMIC_LOAD_OR : AtomicSizes; -defm ATOMIC_LOAD_XOR : AtomicSizes; -defm ATOMIC_LOAD_NAND : AtomicSizes; -defm ATOMIC_SWAP : AtomicSizes; -let Defs = [CPSR] in { - // These operations need a CMP to calculate the correct value - defm ATOMIC_LOAD_MIN : AtomicSizes; - defm ATOMIC_LOAD_MAX : AtomicSizes; - defm ATOMIC_LOAD_UMIN : AtomicSizes; - defm ATOMIC_LOAD_UMAX : AtomicSizes; -} - -class AtomicCmpSwap - : Pseudo<(outs GPRData:$dst), - (ins GPR64sp:$ptr, GPRData:$old, GPRData:$new, - i32imm:$ordering), []> { - let usesCustomInserter = 1; - let hasCtrlDep = 1; - let mayLoad = 1; - let mayStore = 1; - let Defs = [CPSR]; -} - -def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap; -def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap; -def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap; -def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap; - -def ATOMIC_CMP_SWAP_I128 - : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi), - (ins GPR64sp:$ptr, GPR64:$oldlo, GPR64:$oldhi, - GPR64:$newlo, GPR64:$newhi, i32imm:$ordering), []> { - let usesCustomInserter = 1; - let hasCtrlDep = 1; - let mayLoad = 1; - let mayStore = 1; - let Defs = [CPSR]; -} - -//===---------------------------------- -// Low-level exclusive operations -//===---------------------------------- - -// Load-exclusives. - -def ldxr_1 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]>; - -def ldxr_2 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]>; - -def ldxr_4 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]>; - -def ldxr_8 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i64; -}]>; - -def : Pat<(ldxr_1 am_noindex:$addr), - (SUBREG_TO_REG (i64 0), (LDXRB am_noindex:$addr), sub_32)>; -def : Pat<(ldxr_2 am_noindex:$addr), - (SUBREG_TO_REG (i64 0), (LDXRH am_noindex:$addr), sub_32)>; -def : Pat<(ldxr_4 am_noindex:$addr), - (SUBREG_TO_REG (i64 0), (LDXRW am_noindex:$addr), sub_32)>; -def : Pat<(ldxr_8 am_noindex:$addr), (LDXRX am_noindex:$addr)>; - -def : Pat<(and (ldxr_1 am_noindex:$addr), 0xff), - (SUBREG_TO_REG (i64 0), (LDXRB am_noindex:$addr), sub_32)>; -def : Pat<(and (ldxr_2 am_noindex:$addr), 0xffff), - (SUBREG_TO_REG (i64 0), (LDXRH am_noindex:$addr), sub_32)>; -def : Pat<(and (ldxr_4 am_noindex:$addr), 0xffffffff), - (SUBREG_TO_REG (i64 0), (LDXRW am_noindex:$addr), sub_32)>; - -// Store-exclusives. - -def stxr_1 : PatFrag<(ops node:$val, node:$ptr), - (int_arm64_stxr node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i8; -}]>; - -def stxr_2 : PatFrag<(ops node:$val, node:$ptr), - (int_arm64_stxr node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i16; -}]>; - -def stxr_4 : PatFrag<(ops node:$val, node:$ptr), - (int_arm64_stxr node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i32; -}]>; - -def stxr_8 : PatFrag<(ops node:$val, node:$ptr), - (int_arm64_stxr node:$val, node:$ptr), [{ - return cast(N)->getMemoryVT() == MVT::i64; -}]>; - -def : Pat<(stxr_1 GPR64:$val, am_noindex:$addr), - (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stxr_2 GPR64:$val, am_noindex:$addr), - (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stxr_4 GPR64:$val, am_noindex:$addr), - (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stxr_8 GPR64:$val, am_noindex:$addr), - (STXRX GPR64:$val, am_noindex:$addr)>; - -def : Pat<(stxr_1 (zext (and GPR32:$val, 0xff)), am_noindex:$addr), - (STXRB GPR32:$val, am_noindex:$addr)>; -def : Pat<(stxr_2 (zext (and GPR32:$val, 0xffff)), am_noindex:$addr), - (STXRH GPR32:$val, am_noindex:$addr)>; -def : Pat<(stxr_4 (zext GPR32:$val), am_noindex:$addr), - (STXRW GPR32:$val, am_noindex:$addr)>; - -def : Pat<(stxr_1 (and GPR64:$val, 0xff), am_noindex:$addr), - (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stxr_2 (and GPR64:$val, 0xffff), am_noindex:$addr), - (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; -def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), am_noindex:$addr), - (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>; - - -// And clear exclusive. - -def : Pat<(int_arm64_clrex), (CLREX 0xf)>; diff --git a/lib/Target/ARM64/ARM64InstrFormats.td b/lib/Target/ARM64/ARM64InstrFormats.td deleted file mode 100644 index 38406f8..0000000 --- a/lib/Target/ARM64/ARM64InstrFormats.td +++ /dev/null @@ -1,8193 +0,0 @@ -//===- ARM64InstrFormats.td - ARM64 Instruction Formats ------*- tblgen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Describe ARM64 instructions format here -// - -// Format specifies the encoding used by the instruction. This is part of the -// ad-hoc solution used to emit machine instruction encodings by our machine -// code emitter. -class Format val> { - bits<2> Value = val; -} - -def PseudoFrm : Format<0>; -def NormalFrm : Format<1>; // Do we need any others? - -// ARM64 Instruction Format -class ARM64Inst : Instruction { - field bits<32> Inst; // Instruction encoding. - // Mask of bits that cause an encoding to be UNPREDICTABLE. - // If a bit is set, then if the corresponding bit in the - // target encoding differs from its value in the "Inst" field, - // the instruction is UNPREDICTABLE (SoftFail in abstract parlance). - field bits<32> Unpredictable = 0; - // SoftFail is the generic name for this field, but we alias it so - // as to make it more obvious what it means in ARM-land. - field bits<32> SoftFail = Unpredictable; - let Namespace = "ARM64"; - Format F = f; - bits<2> Form = F.Value; - let Pattern = []; - let Constraints = cstr; -} - -// Pseudo instructions (don't have encoding information) -class Pseudo pattern, string cstr = ""> - : ARM64Inst { - dag OutOperandList = oops; - dag InOperandList = iops; - let Pattern = pattern; - let isCodeGenOnly = 1; -} - -// Real instructions (have encoding information) -class EncodedI pattern> : ARM64Inst { - let Pattern = pattern; - let Size = 4; -} - -// Normal instructions -class I pattern> - : EncodedI { - dag OutOperandList = oops; - dag InOperandList = iops; - let AsmString = !strconcat(asm, operands); -} - -class TriOpFrag : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>; -class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; -class UnOpFrag : PatFrag<(ops node:$LHS), res>; - -// Helper fragment for an extract of the high portion of a 128-bit vector. -def extract_high_v16i8 : - UnOpFrag<(extract_subvector (v16i8 node:$LHS), (i64 8))>; -def extract_high_v8i16 : - UnOpFrag<(extract_subvector (v8i16 node:$LHS), (i64 4))>; -def extract_high_v4i32 : - UnOpFrag<(extract_subvector (v4i32 node:$LHS), (i64 2))>; -def extract_high_v2i64 : - UnOpFrag<(extract_subvector (v2i64 node:$LHS), (i64 1))>; - -//===----------------------------------------------------------------------===// -// Asm Operand Classes. -// - -// Shifter operand for arithmetic shifted encodings. -def ShifterOperand : AsmOperandClass { - let Name = "Shifter"; -} - -// Shifter operand for mov immediate encodings. -def MovImm32ShifterOperand : AsmOperandClass { - let SuperClasses = [ShifterOperand]; - let Name = "MovImm32Shifter"; -} -def MovImm64ShifterOperand : AsmOperandClass { - let SuperClasses = [ShifterOperand]; - let Name = "MovImm64Shifter"; -} - -// Shifter operand for arithmetic register shifted encodings. -def ArithmeticShifterOperand : AsmOperandClass { - let SuperClasses = [ShifterOperand]; - let Name = "ArithmeticShifter"; -} - -// Shifter operand for arithmetic shifted encodings for ADD/SUB instructions. -def AddSubShifterOperand : AsmOperandClass { - let SuperClasses = [ArithmeticShifterOperand]; - let Name = "AddSubShifter"; -} - -// Shifter operand for logical vector 128/64-bit shifted encodings. -def LogicalVecShifterOperand : AsmOperandClass { - let SuperClasses = [ShifterOperand]; - let Name = "LogicalVecShifter"; -} -def LogicalVecHalfWordShifterOperand : AsmOperandClass { - let SuperClasses = [LogicalVecShifterOperand]; - let Name = "LogicalVecHalfWordShifter"; -} - -// The "MSL" shifter on the vector MOVI instruction. -def MoveVecShifterOperand : AsmOperandClass { - let SuperClasses = [ShifterOperand]; - let Name = "MoveVecShifter"; -} - -// Extend operand for arithmetic encodings. -def ExtendOperand : AsmOperandClass { let Name = "Extend"; } -def ExtendOperand64 : AsmOperandClass { - let SuperClasses = [ExtendOperand]; - let Name = "Extend64"; -} -// 'extend' that's a lsl of a 64-bit register. -def ExtendOperandLSL64 : AsmOperandClass { - let SuperClasses = [ExtendOperand]; - let Name = "ExtendLSL64"; -} - -// 8-bit floating-point immediate encodings. -def FPImmOperand : AsmOperandClass { - let Name = "FPImm"; - let ParserMethod = "tryParseFPImm"; -} - -// 8-bit immediate for AdvSIMD where 64-bit values of the form: -// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh -// are encoded as the eight bit value 'abcdefgh'. -def SIMDImmType10Operand : AsmOperandClass { let Name = "SIMDImmType10"; } - - -//===----------------------------------------------------------------------===// -// Operand Definitions. -// - -// ADR[P] instruction labels. -def AdrpOperand : AsmOperandClass { - let Name = "AdrpLabel"; - let ParserMethod = "tryParseAdrpLabel"; -} -def adrplabel : Operand { - let EncoderMethod = "getAdrLabelOpValue"; - let PrintMethod = "printAdrpLabel"; - let ParserMatchClass = AdrpOperand; -} - -def AdrOperand : AsmOperandClass { - let Name = "AdrLabel"; - let ParserMethod = "tryParseAdrLabel"; -} -def adrlabel : Operand { - let EncoderMethod = "getAdrLabelOpValue"; - let ParserMatchClass = AdrOperand; -} - -// simm9 predicate - True if the immediate is in the range [-256, 255]. -def SImm9Operand : AsmOperandClass { - let Name = "SImm9"; - let DiagnosticType = "InvalidMemoryIndexedSImm9"; -} -def simm9 : Operand, ImmLeaf= -256 && Imm < 256; }]> { - let ParserMatchClass = SImm9Operand; -} - -// simm7s4 predicate - True if the immediate is a multiple of 4 in the range -// [-256, 252]. -def SImm7s4Operand : AsmOperandClass { - let Name = "SImm7s4"; - let DiagnosticType = "InvalidMemoryIndexed32SImm7"; -} -def simm7s4 : Operand { - let ParserMatchClass = SImm7s4Operand; - let PrintMethod = "printImmScale4"; -} - -// simm7s8 predicate - True if the immediate is a multiple of 8 in the range -// [-512, 504]. -def SImm7s8Operand : AsmOperandClass { - let Name = "SImm7s8"; - let DiagnosticType = "InvalidMemoryIndexed64SImm7"; -} -def simm7s8 : Operand { - let ParserMatchClass = SImm7s8Operand; - let PrintMethod = "printImmScale8"; -} - -// simm7s16 predicate - True if the immediate is a multiple of 16 in the range -// [-1024, 1008]. -def SImm7s16Operand : AsmOperandClass { - let Name = "SImm7s16"; - let DiagnosticType = "InvalidMemoryIndexed64SImm7"; -} -def simm7s16 : Operand { - let ParserMatchClass = SImm7s16Operand; - let PrintMethod = "printImmScale16"; -} - -// imm0_65535 predicate - True if the immediate is in the range [0,65535]. -def Imm0_65535Operand : AsmOperandClass { let Name = "Imm0_65535"; } -def imm0_65535 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_65535Operand; -} - -def Imm1_8Operand : AsmOperandClass { - let Name = "Imm1_8"; - let DiagnosticType = "InvalidImm1_8"; -} -def Imm1_16Operand : AsmOperandClass { - let Name = "Imm1_16"; - let DiagnosticType = "InvalidImm1_16"; -} -def Imm1_32Operand : AsmOperandClass { - let Name = "Imm1_32"; - let DiagnosticType = "InvalidImm1_32"; -} -def Imm1_64Operand : AsmOperandClass { - let Name = "Imm1_64"; - let DiagnosticType = "InvalidImm1_64"; -} - -def MovZSymbolG3AsmOperand : AsmOperandClass { - let Name = "MovZSymbolG3"; - let RenderMethod = "addImmOperands"; -} - -def movz_symbol_g3 : Operand { - let ParserMatchClass = MovZSymbolG3AsmOperand; -} - -def MovZSymbolG2AsmOperand : AsmOperandClass { - let Name = "MovZSymbolG2"; - let RenderMethod = "addImmOperands"; -} - -def movz_symbol_g2 : Operand { - let ParserMatchClass = MovZSymbolG2AsmOperand; -} - -def MovZSymbolG1AsmOperand : AsmOperandClass { - let Name = "MovZSymbolG1"; - let RenderMethod = "addImmOperands"; -} - -def movz_symbol_g1 : Operand { - let ParserMatchClass = MovZSymbolG1AsmOperand; -} - -def MovZSymbolG0AsmOperand : AsmOperandClass { - let Name = "MovZSymbolG0"; - let RenderMethod = "addImmOperands"; -} - -def movz_symbol_g0 : Operand { - let ParserMatchClass = MovZSymbolG0AsmOperand; -} - -def MovKSymbolG2AsmOperand : AsmOperandClass { - let Name = "MovKSymbolG2"; - let RenderMethod = "addImmOperands"; -} - -def movk_symbol_g2 : Operand { - let ParserMatchClass = MovKSymbolG2AsmOperand; -} - -def MovKSymbolG1AsmOperand : AsmOperandClass { - let Name = "MovKSymbolG1"; - let RenderMethod = "addImmOperands"; -} - -def movk_symbol_g1 : Operand { - let ParserMatchClass = MovKSymbolG1AsmOperand; -} - -def MovKSymbolG0AsmOperand : AsmOperandClass { - let Name = "MovKSymbolG0"; - let RenderMethod = "addImmOperands"; -} - -def movk_symbol_g0 : Operand { - let ParserMatchClass = MovKSymbolG0AsmOperand; -} - -def fixedpoint32 : Operand { - let EncoderMethod = "getFixedPointScaleOpValue"; - let DecoderMethod = "DecodeFixedPointScaleImm"; - let ParserMatchClass = Imm1_32Operand; -} -def fixedpoint64 : Operand { - let EncoderMethod = "getFixedPointScaleOpValue"; - let DecoderMethod = "DecodeFixedPointScaleImm"; - let ParserMatchClass = Imm1_64Operand; -} - -def vecshiftR8 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 9); -}]> { - let EncoderMethod = "getVecShiftR8OpValue"; - let DecoderMethod = "DecodeVecShiftR8Imm"; - let ParserMatchClass = Imm1_8Operand; -} -def vecshiftR16 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 17); -}]> { - let EncoderMethod = "getVecShiftR16OpValue"; - let DecoderMethod = "DecodeVecShiftR16Imm"; - let ParserMatchClass = Imm1_16Operand; -} -def vecshiftR16Narrow : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 9); -}]> { - let EncoderMethod = "getVecShiftR16OpValue"; - let DecoderMethod = "DecodeVecShiftR16ImmNarrow"; - let ParserMatchClass = Imm1_8Operand; -} -def vecshiftR32 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 33); -}]> { - let EncoderMethod = "getVecShiftR32OpValue"; - let DecoderMethod = "DecodeVecShiftR32Imm"; - let ParserMatchClass = Imm1_32Operand; -} -def vecshiftR32Narrow : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 17); -}]> { - let EncoderMethod = "getVecShiftR32OpValue"; - let DecoderMethod = "DecodeVecShiftR32ImmNarrow"; - let ParserMatchClass = Imm1_16Operand; -} -def vecshiftR64 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 65); -}]> { - let EncoderMethod = "getVecShiftR64OpValue"; - let DecoderMethod = "DecodeVecShiftR64Imm"; - let ParserMatchClass = Imm1_64Operand; -} -def vecshiftR64Narrow : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 33); -}]> { - let EncoderMethod = "getVecShiftR64OpValue"; - let DecoderMethod = "DecodeVecShiftR64ImmNarrow"; - let ParserMatchClass = Imm1_32Operand; -} - -def Imm0_7Operand : AsmOperandClass { let Name = "Imm0_7"; } -def Imm0_15Operand : AsmOperandClass { let Name = "Imm0_15"; } -def Imm0_31Operand : AsmOperandClass { let Name = "Imm0_31"; } -def Imm0_63Operand : AsmOperandClass { let Name = "Imm0_63"; } - -def vecshiftL8 : Operand, ImmLeaf { - let EncoderMethod = "getVecShiftL8OpValue"; - let DecoderMethod = "DecodeVecShiftL8Imm"; - let ParserMatchClass = Imm0_7Operand; -} -def vecshiftL16 : Operand, ImmLeaf { - let EncoderMethod = "getVecShiftL16OpValue"; - let DecoderMethod = "DecodeVecShiftL16Imm"; - let ParserMatchClass = Imm0_15Operand; -} -def vecshiftL32 : Operand, ImmLeaf { - let EncoderMethod = "getVecShiftL32OpValue"; - let DecoderMethod = "DecodeVecShiftL32Imm"; - let ParserMatchClass = Imm0_31Operand; -} -def vecshiftL64 : Operand, ImmLeaf { - let EncoderMethod = "getVecShiftL64OpValue"; - let DecoderMethod = "DecodeVecShiftL64Imm"; - let ParserMatchClass = Imm0_63Operand; -} - - -// Crazy immediate formats used by 32-bit and 64-bit logical immediate -// instructions for splatting repeating bit patterns across the immediate. -def logical_imm32_XFORM : SDNodeXFormgetZExtValue(), 32); - return CurDAG->getTargetConstant(enc, MVT::i32); -}]>; -def logical_imm64_XFORM : SDNodeXFormgetZExtValue(), 64); - return CurDAG->getTargetConstant(enc, MVT::i32); -}]>; - -def LogicalImm32Operand : AsmOperandClass { let Name = "LogicalImm32"; } -def LogicalImm64Operand : AsmOperandClass { let Name = "LogicalImm64"; } -def logical_imm32 : Operand, PatLeaf<(imm), [{ - return ARM64_AM::isLogicalImmediate(N->getZExtValue(), 32); -}], logical_imm32_XFORM> { - let PrintMethod = "printLogicalImm32"; - let ParserMatchClass = LogicalImm32Operand; -} -def logical_imm64 : Operand, PatLeaf<(imm), [{ - return ARM64_AM::isLogicalImmediate(N->getZExtValue(), 64); -}], logical_imm64_XFORM> { - let PrintMethod = "printLogicalImm64"; - let ParserMatchClass = LogicalImm64Operand; -} - -// imm0_255 predicate - True if the immediate is in the range [0,255]. -def Imm0_255Operand : AsmOperandClass { let Name = "Imm0_255"; } -def imm0_255 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_255Operand; -} - -// imm0_127 predicate - True if the immediate is in the range [0,127] -def Imm0_127Operand : AsmOperandClass { let Name = "Imm0_127"; } -def imm0_127 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_127Operand; -} - -// NOTE: These imm0_N operands have to be of type i64 because i64 is the size -// for all shift-amounts. - -// imm0_63 predicate - True if the immediate is in the range [0,63] -def imm0_63 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_63Operand; -} - -// imm0_31 predicate - True if the immediate is in the range [0,31] -def imm0_31 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_31Operand; -} - -// imm0_15 predicate - True if the immediate is in the range [0,15] -def imm0_15 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_15Operand; -} - -// imm0_7 predicate - True if the immediate is in the range [0,7] -def imm0_7 : Operand, ImmLeaf { - let ParserMatchClass = Imm0_7Operand; -} - -// An arithmetic shifter operand: -// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr -// {5-0} - imm6 -def arith_shift : Operand { - let PrintMethod = "printShifter"; - let ParserMatchClass = ArithmeticShifterOperand; -} - -class arith_shifted_reg - : Operand, - ComplexPattern { - let PrintMethod = "printShiftedRegister"; - let MIOperandInfo = (ops regclass, arith_shift); -} - -def arith_shifted_reg32 : arith_shifted_reg; -def arith_shifted_reg64 : arith_shifted_reg; - -// An arithmetic shifter operand: -// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr, 11 = ror -// {5-0} - imm6 -def logical_shift : Operand { - let PrintMethod = "printShifter"; - let ParserMatchClass = ShifterOperand; -} - -class logical_shifted_reg - : Operand, - ComplexPattern { - let PrintMethod = "printShiftedRegister"; - let MIOperandInfo = (ops regclass, logical_shift); -} - -def logical_shifted_reg32 : logical_shifted_reg; -def logical_shifted_reg64 : logical_shifted_reg; - -// A logical vector shifter operand: -// {7-6} - shift type: 00 = lsl -// {5-0} - imm6: #0, #8, #16, or #24 -def logical_vec_shift : Operand { - let PrintMethod = "printShifter"; - let EncoderMethod = "getVecShifterOpValue"; - let ParserMatchClass = LogicalVecShifterOperand; -} - -// A logical vector half-word shifter operand: -// {7-6} - shift type: 00 = lsl -// {5-0} - imm6: #0 or #8 -def logical_vec_hw_shift : Operand { - let PrintMethod = "printShifter"; - let EncoderMethod = "getVecShifterOpValue"; - let ParserMatchClass = LogicalVecHalfWordShifterOperand; -} - -// A vector move shifter operand: -// {0} - imm1: #8 or #16 -def move_vec_shift : Operand { - let PrintMethod = "printShifter"; - let EncoderMethod = "getMoveVecShifterOpValue"; - let ParserMatchClass = MoveVecShifterOperand; -} - -// An ADD/SUB immediate shifter operand: -// {7-6} - shift type: 00 = lsl -// {5-0} - imm6: #0 or #12 -def addsub_shift : Operand { - let ParserMatchClass = AddSubShifterOperand; -} - -class addsub_shifted_imm - : Operand, ComplexPattern { - let PrintMethod = "printAddSubImm"; - let EncoderMethod = "getAddSubImmOpValue"; - let MIOperandInfo = (ops i32imm, addsub_shift); -} - -def addsub_shifted_imm32 : addsub_shifted_imm; -def addsub_shifted_imm64 : addsub_shifted_imm; - -class neg_addsub_shifted_imm - : Operand, ComplexPattern { - let PrintMethod = "printAddSubImm"; - let EncoderMethod = "getAddSubImmOpValue"; - let MIOperandInfo = (ops i32imm, addsub_shift); -} - -def neg_addsub_shifted_imm32 : neg_addsub_shifted_imm; -def neg_addsub_shifted_imm64 : neg_addsub_shifted_imm; - -// An extend operand: -// {5-3} - extend type -// {2-0} - imm3 -def arith_extend : Operand { - let PrintMethod = "printExtend"; - let ParserMatchClass = ExtendOperand; -} -def arith_extend64 : Operand { - let PrintMethod = "printExtend"; - let ParserMatchClass = ExtendOperand64; -} - -// 'extend' that's a lsl of a 64-bit register. -def arith_extendlsl64 : Operand { - let PrintMethod = "printExtend"; - let ParserMatchClass = ExtendOperandLSL64; -} - -class arith_extended_reg32 : Operand, - ComplexPattern { - let PrintMethod = "printExtendedRegister"; - let MIOperandInfo = (ops GPR32, arith_extend); -} - -class arith_extended_reg32to64 : Operand, - ComplexPattern { - let PrintMethod = "printExtendedRegister"; - let MIOperandInfo = (ops GPR32, arith_extend64); -} - -// Floating-point immediate. -def fpimm32 : Operand, - PatLeaf<(f32 fpimm), [{ - return ARM64_AM::getFP32Imm(N->getValueAPF()) != -1; - }], SDNodeXFormgetValueAPF(); - uint32_t enc = ARM64_AM::getFP32Imm(InVal); - return CurDAG->getTargetConstant(enc, MVT::i32); - }]>> { - let ParserMatchClass = FPImmOperand; - let PrintMethod = "printFPImmOperand"; -} -def fpimm64 : Operand, - PatLeaf<(f64 fpimm), [{ - return ARM64_AM::getFP64Imm(N->getValueAPF()) != -1; - }], SDNodeXFormgetValueAPF(); - uint32_t enc = ARM64_AM::getFP64Imm(InVal); - return CurDAG->getTargetConstant(enc, MVT::i32); - }]>> { - let ParserMatchClass = FPImmOperand; - let PrintMethod = "printFPImmOperand"; -} - -def fpimm8 : Operand { - let ParserMatchClass = FPImmOperand; - let PrintMethod = "printFPImmOperand"; -} - -def fpimm0 : PatLeaf<(fpimm), [{ - return N->isExactlyValue(+0.0); -}]>; - -// 8-bit immediate for AdvSIMD where 64-bit values of the form: -// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh -// are encoded as the eight bit value 'abcdefgh'. -def simdimmtype10 : Operand, - PatLeaf<(f64 fpimm), [{ - return ARM64_AM::isAdvSIMDModImmType10(N->getValueAPF() - .bitcastToAPInt() - .getZExtValue()); - }], SDNodeXFormgetValueAPF(); - uint32_t enc = ARM64_AM::encodeAdvSIMDModImmType10(N->getValueAPF() - .bitcastToAPInt() - .getZExtValue()); - return CurDAG->getTargetConstant(enc, MVT::i32); - }]>> { - let ParserMatchClass = SIMDImmType10Operand; - let PrintMethod = "printSIMDType10Operand"; -} - - -//--- -// Sytem management -//--- - -// Base encoding for system instruction operands. -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class BaseSystemI - : I { - let Inst{31-22} = 0b1101010100; - let Inst{21} = L; -} - -// System instructions which do not have an Rt register. -class SimpleSystemI - : BaseSystemI { - let Inst{4-0} = 0b11111; -} - -// System instructions which have an Rt register. -class RtSystemI - : BaseSystemI, - Sched<[WriteSys]> { - bits<5> Rt; - let Inst{4-0} = Rt; -} - -// Hint instructions that take both a CRm and a 3-bit immediate. -class HintI - : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#" $imm", "">, - Sched<[WriteHint]> { - bits <7> imm; - let Inst{20-12} = 0b000110010; - let Inst{11-5} = imm; -} - -// System instructions taking a single literal operand which encodes into -// CRm. op2 differentiates the opcodes. -def BarrierAsmOperand : AsmOperandClass { - let Name = "Barrier"; - let ParserMethod = "tryParseBarrierOperand"; -} -def barrier_op : Operand { - let PrintMethod = "printBarrierOption"; - let ParserMatchClass = BarrierAsmOperand; -} -class CRmSystemI opc, string asm> - : SimpleSystemI<0, (ins crmtype:$CRm), asm, "\t$CRm">, - Sched<[WriteBarrier]> { - bits<4> CRm; - let Inst{20-12} = 0b000110011; - let Inst{11-8} = CRm; - let Inst{7-5} = opc; -} - -// MRS/MSR system instructions. -def SystemRegisterOperand : AsmOperandClass { - let Name = "SystemRegister"; - let ParserMethod = "tryParseSystemRegister"; -} -// concatenation of 1, op0, op1, CRn, CRm, op2. 16-bit immediate. -def sysreg_op : Operand { - let ParserMatchClass = SystemRegisterOperand; - let DecoderMethod = "DecodeSystemRegister"; - let PrintMethod = "printSystemRegister"; -} - -class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins sysreg_op:$systemreg), - "mrs", "\t$Rt, $systemreg"> { - bits<15> systemreg; - let Inst{20} = 1; - let Inst{19-5} = systemreg; -} - -// FIXME: Some of these def CPSR, others don't. Best way to model that? -// Explicitly modeling each of the system register as a register class -// would do it, but feels like overkill at this point. -class MSRI : RtSystemI<0, (outs), (ins sysreg_op:$systemreg, GPR64:$Rt), - "msr", "\t$systemreg, $Rt"> { - bits<15> systemreg; - let Inst{20} = 1; - let Inst{19-5} = systemreg; -} - -def SystemCPSRFieldOperand : AsmOperandClass { - let Name = "SystemCPSRField"; - let ParserMethod = "tryParseCPSRField"; -} -def cpsrfield_op : Operand { - let ParserMatchClass = SystemCPSRFieldOperand; - let PrintMethod = "printSystemCPSRField"; -} - -let Defs = [CPSR] in -class MSRcpsrI : SimpleSystemI<0, (ins cpsrfield_op:$cpsr_field, imm0_15:$imm), - "msr", "\t$cpsr_field, $imm">, - Sched<[WriteSys]> { - bits<6> cpsrfield; - bits<4> imm; - let Inst{20-19} = 0b00; - let Inst{18-16} = cpsrfield{5-3}; - let Inst{15-12} = 0b0100; - let Inst{11-8} = imm; - let Inst{7-5} = cpsrfield{2-0}; - - let DecoderMethod = "DecodeSystemCPSRInstruction"; -} - -// SYS and SYSL generic system instructions. -def SysCRAsmOperand : AsmOperandClass { - let Name = "SysCR"; - let ParserMethod = "tryParseSysCROperand"; -} - -def sys_cr_op : Operand { - let PrintMethod = "printSysCROperand"; - let ParserMatchClass = SysCRAsmOperand; -} - -class SystemI - : SimpleSystemI, - Sched<[WriteSys]> { - bits<3> op1; - bits<4> Cn; - bits<4> Cm; - bits<3> op2; - let Inst{20-19} = 0b01; - let Inst{18-16} = op1; - let Inst{15-12} = Cn; - let Inst{11-8} = Cm; - let Inst{7-5} = op2; -} - -class SystemXtI - : RtSystemI { - bits<3> op1; - bits<4> Cn; - bits<4> Cm; - bits<3> op2; - let Inst{20-19} = 0b01; - let Inst{18-16} = op1; - let Inst{15-12} = Cn; - let Inst{11-8} = Cm; - let Inst{7-5} = op2; -} - -class SystemLXtI - : RtSystemI { - bits<3> op1; - bits<4> Cn; - bits<4> Cm; - bits<3> op2; - let Inst{20-19} = 0b01; - let Inst{18-16} = op1; - let Inst{15-12} = Cn; - let Inst{11-8} = Cm; - let Inst{7-5} = op2; -} - - -// Branch (register) instructions: -// -// case opc of -// 0001 blr -// 0000 br -// 0101 dret -// 0100 eret -// 0010 ret -// otherwise UNDEFINED -class BaseBranchReg opc, dag oops, dag iops, string asm, - string operands, list pattern> - : I, Sched<[WriteBrReg]> { - let Inst{31-25} = 0b1101011; - let Inst{24-21} = opc; - let Inst{20-16} = 0b11111; - let Inst{15-10} = 0b000000; - let Inst{4-0} = 0b00000; -} - -class BranchReg opc, string asm, list pattern> - : BaseBranchReg { - bits<5> Rn; - let Inst{9-5} = Rn; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 1, isReturn = 1 in -class SpecialReturn opc, string asm> - : BaseBranchReg { - let Inst{9-5} = 0b11111; -} - -//--- -// Conditional branch instruction. -//--- -// Branch condition code. -// 4-bit immediate. Pretty-printed as . -def dotCcode : Operand { - let PrintMethod = "printDotCondCode"; -} - -// Conditional branch target. 19-bit immediate. The low two bits of the target -// offset are implied zero and so are not part of the immediate. -def BranchTarget19Operand : AsmOperandClass { - let Name = "BranchTarget19"; -} -def am_brcond : Operand { - let EncoderMethod = "getCondBranchTargetOpValue"; - let DecoderMethod = "DecodeCondBranchTarget"; - let PrintMethod = "printAlignedBranchTarget"; - let ParserMatchClass = BranchTarget19Operand; -} - -class BranchCond : I<(outs), (ins dotCcode:$cond, am_brcond:$target), - "b", "$cond\t$target", "", - [(ARM64brcond bb:$target, imm:$cond, CPSR)]>, - Sched<[WriteBr]> { - let isBranch = 1; - let isTerminator = 1; - let Uses = [CPSR]; - - bits<4> cond; - bits<19> target; - let Inst{31-24} = 0b01010100; - let Inst{23-5} = target; - let Inst{4} = 0; - let Inst{3-0} = cond; -} - -//--- -// Compare-and-branch instructions. -//--- -class BaseCmpBranch - : I<(outs), (ins regtype:$Rt, am_brcond:$target), - asm, "\t$Rt, $target", "", - [(node regtype:$Rt, bb:$target)]>, - Sched<[WriteBr]> { - let isBranch = 1; - let isTerminator = 1; - - bits<5> Rt; - bits<19> target; - let Inst{30-25} = 0b011010; - let Inst{24} = op; - let Inst{23-5} = target; - let Inst{4-0} = Rt; -} - -multiclass CmpBranch { - def W : BaseCmpBranch { - let Inst{31} = 0; - } - def X : BaseCmpBranch { - let Inst{31} = 1; - } -} - -//--- -// Test-bit-and-branch instructions. -//--- -// Test-and-branch target. 14-bit sign-extended immediate. The low two bits of -// the target offset are implied zero and so are not part of the immediate. -def BranchTarget14Operand : AsmOperandClass { - let Name = "BranchTarget14"; -} -def am_tbrcond : Operand { - let EncoderMethod = "getTestBranchTargetOpValue"; - let PrintMethod = "printAlignedBranchTarget"; - let ParserMatchClass = BranchTarget14Operand; -} - -class TestBranch - : I<(outs), (ins GPR64:$Rt, imm0_63:$bit_off, am_tbrcond:$target), - asm, "\t$Rt, $bit_off, $target", "", - [(node GPR64:$Rt, imm0_63:$bit_off, bb:$target)]>, - Sched<[WriteBr]> { - let isBranch = 1; - let isTerminator = 1; - - bits<5> Rt; - bits<6> bit_off; - bits<14> target; - - let Inst{31} = bit_off{5}; - let Inst{30-25} = 0b011011; - let Inst{24} = op; - let Inst{23-19} = bit_off{4-0}; - let Inst{18-5} = target; - let Inst{4-0} = Rt; - - let DecoderMethod = "DecodeTestAndBranch"; -} - -//--- -// Unconditional branch (immediate) instructions. -//--- -def BranchTarget26Operand : AsmOperandClass { - let Name = "BranchTarget26"; -} -def am_b_target : Operand { - let EncoderMethod = "getBranchTargetOpValue"; - let PrintMethod = "printAlignedBranchTarget"; - let ParserMatchClass = BranchTarget26Operand; -} -def am_bl_target : Operand { - let EncoderMethod = "getBranchTargetOpValue"; - let PrintMethod = "printAlignedBranchTarget"; - let ParserMatchClass = BranchTarget26Operand; -} - -class BImm pattern> - : I<(outs), iops, asm, "\t$addr", "", pattern>, Sched<[WriteBr]> { - bits<26> addr; - let Inst{31} = op; - let Inst{30-26} = 0b00101; - let Inst{25-0} = addr; - - let DecoderMethod = "DecodeUnconditionalBranch"; -} - -class BranchImm pattern> - : BImm; -class CallImm pattern> - : BImm; - -//--- -// Basic one-operand data processing instructions. -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseOneOperandData opc, RegisterClass regtype, string asm, - SDPatternOperator node> - : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "", - [(set regtype:$Rd, (node regtype:$Rn))]>, - Sched<[WriteI]> { - bits<5> Rd; - bits<5> Rn; - - let Inst{30-13} = 0b101101011000000000; - let Inst{12-10} = opc; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -multiclass OneOperandData opc, string asm, - SDPatternOperator node = null_frag> { - def Wr : BaseOneOperandData { - let Inst{31} = 0; - } - - def Xr : BaseOneOperandData { - let Inst{31} = 1; - } -} - -class OneWRegData opc, string asm, SDPatternOperator node> - : BaseOneOperandData { - let Inst{31} = 0; -} - -class OneXRegData opc, string asm, SDPatternOperator node> - : BaseOneOperandData { - let Inst{31} = 1; -} - -//--- -// Basic two-operand data processing instructions. -//--- -class BaseBaseAddSubCarry pattern> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", pattern>, - Sched<[WriteI]> { - let Uses = [CPSR]; - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{30} = isSub; - let Inst{28-21} = 0b11010000; - let Inst{20-16} = Rm; - let Inst{15-10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class BaseAddSubCarry - : BaseBaseAddSubCarry; - -class BaseAddSubCarrySetFlags - : BaseBaseAddSubCarry { - let Defs = [CPSR]; -} - -multiclass AddSubCarry { - def Wr : BaseAddSubCarry { - let Inst{31} = 0; - let Inst{29} = 0; - } - def Xr : BaseAddSubCarry { - let Inst{31} = 1; - let Inst{29} = 0; - } - - // Sets flags. - def SWr : BaseAddSubCarrySetFlags { - let Inst{31} = 0; - let Inst{29} = 1; - } - def SXr : BaseAddSubCarrySetFlags { - let Inst{31} = 1; - let Inst{29} = 1; - } -} - -class BaseTwoOperand opc, RegisterClass regtype, string asm, - SDPatternOperator OpNode> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", - [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{30-21} = 0b0011010110; - let Inst{20-16} = Rm; - let Inst{15-14} = 0b00; - let Inst{13-10} = opc; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class BaseDiv - : BaseTwoOperand<{0,0,1,?}, regtype, asm, OpNode> { - let Inst{10} = isSigned; -} - -multiclass Div { - def Wr : BaseDiv, - Sched<[WriteID32]> { - let Inst{31} = 0; - } - def Xr : BaseDiv, - Sched<[WriteID64]> { - let Inst{31} = 1; - } -} - -class BaseShift shift_type, RegisterClass regtype, string asm, - SDPatternOperator OpNode = null_frag> - : BaseTwoOperand<{1,0,?,?}, regtype, asm, OpNode>, - Sched<[WriteIS]> { - let Inst{11-10} = shift_type; -} - -multiclass Shift shift_type, string asm, SDNode OpNode> { - def Wr : BaseShift { - let Inst{31} = 0; - } - - def Xr : BaseShift { - let Inst{31} = 1; - } - - def : Pat<(i32 (OpNode GPR32:$Rn, i64:$Rm)), - (!cast(NAME # "Wr") GPR32:$Rn, - (EXTRACT_SUBREG i64:$Rm, sub_32))>; - - def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (zext GPR32:$Rm)))), - (!cast(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; - - def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (anyext GPR32:$Rm)))), - (!cast(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; - - def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (sext GPR32:$Rm)))), - (!cast(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>; -} - -class ShiftAlias - : InstAlias; - -class BaseMulAccum opc, RegisterClass multype, - RegisterClass addtype, string asm, - list pattern> - : I<(outs addtype:$Rd), (ins multype:$Rn, multype:$Rm, addtype:$Ra), - asm, "\t$Rd, $Rn, $Rm, $Ra", "", pattern> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<5> Ra; - let Inst{30-24} = 0b0011011; - let Inst{23-21} = opc; - let Inst{20-16} = Rm; - let Inst{15} = isSub; - let Inst{14-10} = Ra; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass MulAccum { - def Wrrr : BaseMulAccum, - Sched<[WriteIM32]> { - let Inst{31} = 0; - } - - def Xrrr : BaseMulAccum, - Sched<[WriteIM64]> { - let Inst{31} = 1; - } -} - -class WideMulAccum opc, string asm, - SDNode AccNode, SDNode ExtNode> - : BaseMulAccum, - Sched<[WriteIM32]> { - let Inst{31} = 1; -} - -class MulHi opc, string asm, SDNode OpNode> - : I<(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", - [(set GPR64:$Rd, (OpNode GPR64:$Rn, GPR64:$Rm))]>, - Sched<[WriteIM64]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-24} = 0b10011011; - let Inst{23-21} = opc; - let Inst{20-16} = Rm; - let Inst{15-10} = 0b011111; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class MulAccumWAlias - : InstAlias; -class MulAccumXAlias - : InstAlias; -class WideMulAccumAlias - : InstAlias; - -class BaseCRC32 sz, bit C, RegisterClass StreamReg, - SDPatternOperator OpNode, string asm> - : I<(outs GPR32:$Rd), (ins GPR32:$Rn, StreamReg:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", - [(set GPR32:$Rd, (OpNode GPR32:$Rn, StreamReg:$Rm))]>, - Sched<[WriteISReg]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - - let Inst{31} = sf; - let Inst{30-21} = 0b0011010110; - let Inst{20-16} = Rm; - let Inst{15-13} = 0b010; - let Inst{12} = C; - let Inst{11-10} = sz; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -//--- -// Address generation. -//--- - -class ADRI pattern> - : I<(outs GPR64:$Xd), (ins adr:$label), asm, "\t$Xd, $label", "", - pattern>, - Sched<[WriteI]> { - bits<5> Xd; - bits<21> label; - let Inst{31} = page; - let Inst{30-29} = label{1-0}; - let Inst{28-24} = 0b10000; - let Inst{23-5} = label{20-2}; - let Inst{4-0} = Xd; - - let DecoderMethod = "DecodeAdrInstruction"; -} - -//--- -// Move immediate. -//--- - -def movimm32_imm : Operand { - let ParserMatchClass = Imm0_65535Operand; - let EncoderMethod = "getMoveWideImmOpValue"; -} -def movimm32_shift : Operand { - let PrintMethod = "printShifter"; - let ParserMatchClass = MovImm32ShifterOperand; -} -def movimm64_shift : Operand { - let PrintMethod = "printShifter"; - let ParserMatchClass = MovImm64ShifterOperand; -} -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseMoveImmediate opc, RegisterClass regtype, Operand shifter, - string asm> - : I<(outs regtype:$Rd), (ins movimm32_imm:$imm, shifter:$shift), - asm, "\t$Rd, $imm$shift", "", []>, - Sched<[WriteImm]> { - bits<5> Rd; - bits<16> imm; - bits<6> shift; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100101; - let Inst{22-21} = shift{5-4}; - let Inst{20-5} = imm; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeMoveImmInstruction"; -} - -multiclass MoveImmediate opc, string asm> { - def Wi : BaseMoveImmediate { - let Inst{31} = 0; - } - - def Xi : BaseMoveImmediate { - let Inst{31} = 1; - } -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseInsertImmediate opc, RegisterClass regtype, Operand shifter, - string asm> - : I<(outs regtype:$Rd), - (ins regtype:$src, movimm32_imm:$imm, shifter:$shift), - asm, "\t$Rd, $imm$shift", "$src = $Rd", []>, - Sched<[WriteI]> { - bits<5> Rd; - bits<16> imm; - bits<6> shift; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100101; - let Inst{22-21} = shift{5-4}; - let Inst{20-5} = imm; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeMoveImmInstruction"; -} - -multiclass InsertImmediate opc, string asm> { - def Wi : BaseInsertImmediate { - let Inst{31} = 0; - } - - def Xi : BaseInsertImmediate { - let Inst{31} = 1; - } -} - -//--- -// Add/Subtract -//--- - -class BaseAddSubImm - : I<(outs dstRegtype:$Rd), (ins srcRegtype:$Rn, immtype:$imm), - asm, "\t$Rd, $Rn, $imm", "", - [(set dstRegtype:$Rd, (OpNode srcRegtype:$Rn, immtype:$imm))]>, - Sched<[WriteI]> { - bits<5> Rd; - bits<5> Rn; - bits<14> imm; - let Inst{30} = isSub; - let Inst{29} = setFlags; - let Inst{28-24} = 0b10001; - let Inst{23-22} = imm{13-12}; // '00' => lsl #0, '01' => lsl #12 - let Inst{21-10} = imm{11-0}; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; - let DecoderMethod = "DecodeBaseAddSubImm"; -} - -class BaseAddSubRegPseudo - : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), - [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>, - Sched<[WriteI]>; - -class BaseAddSubSReg - : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", - [(set regtype:$Rd, (OpNode regtype:$Rn, shifted_regtype:$Rm))]>, - Sched<[WriteISReg]> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> src1; - bits<5> src2; - bits<8> shift; - let Inst{30} = isSub; - let Inst{29} = setFlags; - let Inst{28-24} = 0b01011; - let Inst{23-22} = shift{7-6}; - let Inst{21} = 0; - let Inst{20-16} = src2; - let Inst{15-10} = shift{5-0}; - let Inst{9-5} = src1; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeThreeAddrSRegInstruction"; -} - -class BaseAddSubEReg - : I<(outs dstRegtype:$R1), - (ins src1Regtype:$R2, src2Regtype:$R3), - asm, "\t$R1, $R2, $R3", "", - [(set dstRegtype:$R1, (OpNode src1Regtype:$R2, src2Regtype:$R3))]>, - Sched<[WriteIEReg]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<6> ext; - let Inst{30} = isSub; - let Inst{29} = setFlags; - let Inst{28-24} = 0b01011; - let Inst{23-21} = 0b001; - let Inst{20-16} = Rm; - let Inst{15-13} = ext{5-3}; - let Inst{12-10} = ext{2-0}; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeAddSubERegInstruction"; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseAddSubEReg64 - : I<(outs dstRegtype:$Rd), - (ins src1Regtype:$Rn, src2Regtype:$Rm, ext_op:$ext), - asm, "\t$Rd, $Rn, $Rm$ext", "", []>, - Sched<[WriteIEReg]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<6> ext; - let Inst{30} = isSub; - let Inst{29} = setFlags; - let Inst{28-24} = 0b01011; - let Inst{23-21} = 0b001; - let Inst{20-16} = Rm; - let Inst{15} = ext{5}; - let Inst{12-10} = ext{2-0}; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeAddSubERegInstruction"; -} - -// Aliases for register+register add/subtract. -class AddSubRegAlias - : InstAlias; - -multiclass AddSub { - let hasSideEffects = 0 in { - // Add/Subtract immediate - def Wri : BaseAddSubImm { - let Inst{31} = 0; - } - def Xri : BaseAddSubImm { - let Inst{31} = 1; - } - - // Add/Subtract register - Only used for CodeGen - def Wrr : BaseAddSubRegPseudo; - def Xrr : BaseAddSubRegPseudo; - - // Add/Subtract shifted register - def Wrs : BaseAddSubSReg { - let Inst{31} = 0; - } - def Xrs : BaseAddSubSReg { - let Inst{31} = 1; - } - } - - // Add/Subtract extended register - let AddedComplexity = 1, hasSideEffects = 0 in { - def Wrx : BaseAddSubEReg, mnemonic, OpNode> { - let Inst{31} = 0; - } - def Xrx : BaseAddSubEReg, mnemonic, OpNode> { - let Inst{31} = 1; - } - } - - def Xrx64 : BaseAddSubEReg64 { - // UXTX and SXTX only. - let Inst{14-13} = 0b11; - let Inst{31} = 1; - } - - // Register/register aliases with no shift when SP is not used. - def : AddSubRegAlias(NAME#"Wrs"), - GPR32, GPR32, GPR32, 0>; - def : AddSubRegAlias(NAME#"Xrs"), - GPR64, GPR64, GPR64, 0>; - - // Register/register aliases with no shift when either the destination or - // first source register is SP. This relies on the shifted register aliases - // above matching first in the case when SP is not used. - def : AddSubRegAlias(NAME#"Wrx"), - GPR32sp, GPR32sp, GPR32, 16>; // UXTW #0 - def : AddSubRegAlias(NAME#"Xrx64"), - GPR64sp, GPR64sp, GPR64, 24>; // UXTX #0 -} - -multiclass AddSubS { - let isCompare = 1, Defs = [CPSR] in { - // Add/Subtract immediate - def Wri : BaseAddSubImm { - let Inst{31} = 0; - } - def Xri : BaseAddSubImm { - let Inst{31} = 1; - } - - // Add/Subtract register - def Wrr : BaseAddSubRegPseudo; - def Xrr : BaseAddSubRegPseudo; - - // Add/Subtract shifted register - def Wrs : BaseAddSubSReg { - let Inst{31} = 0; - } - def Xrs : BaseAddSubSReg { - let Inst{31} = 1; - } - - // Add/Subtract extended register - let AddedComplexity = 1 in { - def Wrx : BaseAddSubEReg, mnemonic, OpNode> { - let Inst{31} = 0; - } - def Xrx : BaseAddSubEReg, mnemonic, OpNode> { - let Inst{31} = 1; - } - } - - def Xrx64 : BaseAddSubEReg64 { - // UXTX and SXTX only. - let Inst{14-13} = 0b11; - let Inst{31} = 1; - } - } // Defs = [CPSR] - - // Register/register aliases with no shift when SP is not used. - def : AddSubRegAlias(NAME#"Wrs"), - GPR32, GPR32, GPR32, 0>; - def : AddSubRegAlias(NAME#"Xrs"), - GPR64, GPR64, GPR64, 0>; - - // Register/register aliases with no shift when the first source register - // is SP. This relies on the shifted register aliases above matching first - // in the case when SP is not used. - def : AddSubRegAlias(NAME#"Wrx"), - GPR32, GPR32sp, GPR32, 16>; // UXTW #0 - def : AddSubRegAlias(NAME#"Xrx64"), - GPR64, GPR64sp, GPR64, 24>; // UXTX #0 -} - -//--- -// Extract -//--- -def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisPtrTy<3>]>; -def ARM64Extr : SDNode<"ARM64ISD::EXTR", SDTA64EXTR>; - -class BaseExtractImm patterns> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, imm_type:$imm), - asm, "\t$Rd, $Rn, $Rm, $imm", "", patterns>, - Sched<[WriteExtr, ReadExtrHi]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<6> imm; - - let Inst{30-23} = 0b00100111; - let Inst{21} = 0; - let Inst{20-16} = Rm; - let Inst{15-10} = imm; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass ExtractImm { - def Wrri : BaseExtractImm { - let Inst{31} = 0; - let Inst{22} = 0; - } - def Xrri : BaseExtractImm { - - let Inst{31} = 1; - let Inst{22} = 1; - } -} - -//--- -// Bitfield -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseBitfieldImm opc, - RegisterClass regtype, Operand imm_type, string asm> - : I<(outs regtype:$Rd), (ins regtype:$Rn, imm_type:$immr, imm_type:$imms), - asm, "\t$Rd, $Rn, $immr, $imms", "", []>, - Sched<[WriteIS]> { - bits<5> Rd; - bits<5> Rn; - bits<6> immr; - bits<6> imms; - - let Inst{30-29} = opc; - let Inst{28-23} = 0b100110; - let Inst{21-16} = immr; - let Inst{15-10} = imms; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass BitfieldImm opc, string asm> { - def Wri : BaseBitfieldImm { - let Inst{31} = 0; - let Inst{22} = 0; - } - def Xri : BaseBitfieldImm { - let Inst{31} = 1; - let Inst{22} = 1; - } -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseBitfieldImmWith2RegArgs opc, - RegisterClass regtype, Operand imm_type, string asm> - : I<(outs regtype:$Rd), (ins regtype:$src, regtype:$Rn, imm_type:$immr, - imm_type:$imms), - asm, "\t$Rd, $Rn, $immr, $imms", "$src = $Rd", []>, - Sched<[WriteIS]> { - bits<5> Rd; - bits<5> Rn; - bits<6> immr; - bits<6> imms; - - let Inst{30-29} = opc; - let Inst{28-23} = 0b100110; - let Inst{21-16} = immr; - let Inst{15-10} = imms; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass BitfieldImmWith2RegArgs opc, string asm> { - def Wri : BaseBitfieldImmWith2RegArgs { - let Inst{31} = 0; - let Inst{22} = 0; - } - def Xri : BaseBitfieldImmWith2RegArgs { - let Inst{31} = 1; - let Inst{22} = 1; - } -} - -//--- -// Logical -//--- - -// Logical (immediate) -class BaseLogicalImm opc, RegisterClass dregtype, - RegisterClass sregtype, Operand imm_type, string asm, - list pattern> - : I<(outs dregtype:$Rd), (ins sregtype:$Rn, imm_type:$imm), - asm, "\t$Rd, $Rn, $imm", "", pattern>, - Sched<[WriteI]> { - bits<5> Rd; - bits<5> Rn; - bits<13> imm; - let Inst{30-29} = opc; - let Inst{28-23} = 0b100100; - let Inst{22} = imm{12}; - let Inst{21-16} = imm{11-6}; - let Inst{15-10} = imm{5-0}; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; - - let DecoderMethod = "DecodeLogicalImmInstruction"; -} - -// Logical (shifted register) -class BaseLogicalSReg opc, bit N, RegisterClass regtype, - logical_shifted_reg shifted_regtype, string asm, - list pattern> - : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", pattern>, - Sched<[WriteISReg]> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> src1; - bits<5> src2; - bits<8> shift; - let Inst{30-29} = opc; - let Inst{28-24} = 0b01010; - let Inst{23-22} = shift{7-6}; - let Inst{21} = N; - let Inst{20-16} = src2; - let Inst{15-10} = shift{5-0}; - let Inst{9-5} = src1; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeThreeAddrSRegInstruction"; -} - -// Aliases for register+register logical instructions. -class LogicalRegAlias - : InstAlias; - -let AddedComplexity = 6 in -multiclass LogicalImm opc, string mnemonic, SDNode OpNode> { - def Wri : BaseLogicalImm { - let Inst{31} = 0; - let Inst{22} = 0; // 64-bit version has an additional bit of immediate. - } - def Xri : BaseLogicalImm { - let Inst{31} = 1; - } -} - -multiclass LogicalImmS opc, string mnemonic, SDNode OpNode> { - let isCompare = 1, Defs = [CPSR] in { - def Wri : BaseLogicalImm { - let Inst{31} = 0; - let Inst{22} = 0; // 64-bit version has an additional bit of immediate. - } - def Xri : BaseLogicalImm { - let Inst{31} = 1; - } - } // end Defs = [CPSR] -} - -class BaseLogicalRegPseudo - : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), - [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>, - Sched<[WriteI]>; - -// Split from LogicalImm as not all instructions have both. -multiclass LogicalReg opc, bit N, string mnemonic, - SDPatternOperator OpNode> { - def Wrr : BaseLogicalRegPseudo; - def Xrr : BaseLogicalRegPseudo; - - def Wrs : BaseLogicalSReg { - let Inst{31} = 0; - } - def Xrs : BaseLogicalSReg { - let Inst{31} = 1; - } - - def : LogicalRegAlias(NAME#"Wrs"), GPR32>; - def : LogicalRegAlias(NAME#"Xrs"), GPR64>; -} - -// Split from LogicalReg to allow setting CPSR Defs -multiclass LogicalRegS opc, bit N, string mnemonic> { - let Defs = [CPSR], mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def Wrs : BaseLogicalSReg{ - let Inst{31} = 0; - } - def Xrs : BaseLogicalSReg{ - let Inst{31} = 1; - } - } // Defs = [CPSR] - - def : LogicalRegAlias(NAME#"Wrs"), GPR32>; - def : LogicalRegAlias(NAME#"Xrs"), GPR64>; -} - -//--- -// Conditionally set flags -//--- - -// Condition code. -// 4-bit immediate. Pretty-printed as -def ccode : Operand { - let PrintMethod = "printCondCode"; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondSetFlagsImm - : I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $imm, $nzcv, $cond", "", []>, - Sched<[WriteI]> { - let Uses = [CPSR]; - let Defs = [CPSR]; - - bits<5> Rn; - bits<5> imm; - bits<4> nzcv; - bits<4> cond; - - let Inst{30} = op; - let Inst{29-21} = 0b111010010; - let Inst{20-16} = imm; - let Inst{15-12} = cond; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4} = 0b0; - let Inst{3-0} = nzcv; -} - -multiclass CondSetFlagsImm { - def Wi : BaseCondSetFlagsImm { - let Inst{31} = 0; - } - def Xi : BaseCondSetFlagsImm { - let Inst{31} = 1; - } -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseCondSetFlagsReg - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, - Sched<[WriteI]> { - let Uses = [CPSR]; - let Defs = [CPSR]; - - bits<5> Rn; - bits<5> Rm; - bits<4> nzcv; - bits<4> cond; - - let Inst{30} = op; - let Inst{29-21} = 0b111010010; - let Inst{20-16} = Rm; - let Inst{15-12} = cond; - let Inst{11-10} = 0b00; - let Inst{9-5} = Rn; - let Inst{4} = 0b0; - let Inst{3-0} = nzcv; -} - -multiclass CondSetFlagsReg { - def Wr : BaseCondSetFlagsReg { - let Inst{31} = 0; - } - def Xr : BaseCondSetFlagsReg { - let Inst{31} = 1; - } -} - -//--- -// Conditional select -//--- - -class BaseCondSelect op2, RegisterClass regtype, string asm> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), - asm, "\t$Rd, $Rn, $Rm, $cond", "", - [(set regtype:$Rd, - (ARM64csel regtype:$Rn, regtype:$Rm, (i32 imm:$cond), CPSR))]>, - Sched<[WriteI]> { - let Uses = [CPSR]; - - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<4> cond; - - let Inst{30} = op; - let Inst{29-21} = 0b011010100; - let Inst{20-16} = Rm; - let Inst{15-12} = cond; - let Inst{11-10} = op2; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass CondSelect op2, string asm> { - def Wr : BaseCondSelect { - let Inst{31} = 0; - } - def Xr : BaseCondSelect { - let Inst{31} = 1; - } -} - -class BaseCondSelectOp op2, RegisterClass regtype, string asm, - PatFrag frag> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), - asm, "\t$Rd, $Rn, $Rm, $cond", "", - [(set regtype:$Rd, - (ARM64csel regtype:$Rn, (frag regtype:$Rm), - (i32 imm:$cond), CPSR))]>, - Sched<[WriteI]> { - let Uses = [CPSR]; - - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<4> cond; - - let Inst{30} = op; - let Inst{29-21} = 0b011010100; - let Inst{20-16} = Rm; - let Inst{15-12} = cond; - let Inst{11-10} = op2; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass CondSelectOp op2, string asm, PatFrag frag> { - def Wr : BaseCondSelectOp { - let Inst{31} = 0; - } - def Xr : BaseCondSelectOp { - let Inst{31} = 1; - } -} - -//--- -// Special Mask Value -//--- -def maski8_or_more : Operand, - ImmLeaf { -} -def maski16_or_more : Operand, - ImmLeaf { -} - - -//--- -// Load/store -//--- - -// (unsigned immediate) -// Indexed for 8-bit registers. offset is in range [0,4095]. -def MemoryIndexed8Operand : AsmOperandClass { - let Name = "MemoryIndexed8"; - let DiagnosticType = "InvalidMemoryIndexed8"; -} -def am_indexed8 : Operand, - ComplexPattern { - let PrintMethod = "printAMIndexed8"; - let EncoderMethod - = "getAMIndexed8OpValue"; - let ParserMatchClass = MemoryIndexed8Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} - -// Indexed for 16-bit registers. offset is multiple of 2 in range [0,8190], -// stored as immval/2 (the 12-bit literal that encodes directly into the insn). -def MemoryIndexed16Operand : AsmOperandClass { - let Name = "MemoryIndexed16"; - let DiagnosticType = "InvalidMemoryIndexed16"; -} -def am_indexed16 : Operand, - ComplexPattern { - let PrintMethod = "printAMIndexed16"; - let EncoderMethod - = "getAMIndexed8OpValue"; - let ParserMatchClass = MemoryIndexed16Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} - -// Indexed for 32-bit registers. offset is multiple of 4 in range [0,16380], -// stored as immval/4 (the 12-bit literal that encodes directly into the insn). -def MemoryIndexed32Operand : AsmOperandClass { - let Name = "MemoryIndexed32"; - let DiagnosticType = "InvalidMemoryIndexed32"; -} -def am_indexed32 : Operand, - ComplexPattern { - let PrintMethod = "printAMIndexed32"; - let EncoderMethod - = "getAMIndexed8OpValue"; - let ParserMatchClass = MemoryIndexed32Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} - -// Indexed for 64-bit registers. offset is multiple of 8 in range [0,32760], -// stored as immval/8 (the 12-bit literal that encodes directly into the insn). -def MemoryIndexed64Operand : AsmOperandClass { - let Name = "MemoryIndexed64"; - let DiagnosticType = "InvalidMemoryIndexed64"; -} -def am_indexed64 : Operand, - ComplexPattern { - let PrintMethod = "printAMIndexed64"; - let EncoderMethod - = "getAMIndexed8OpValue"; - let ParserMatchClass = MemoryIndexed64Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} - -// Indexed for 128-bit registers. offset is multiple of 16 in range [0,65520], -// stored as immval/16 (the 12-bit literal that encodes directly into the insn). -def MemoryIndexed128Operand : AsmOperandClass { - let Name = "MemoryIndexed128"; - let DiagnosticType = "InvalidMemoryIndexed128"; -} -def am_indexed128 : Operand, - ComplexPattern { - let PrintMethod = "printAMIndexed128"; - let EncoderMethod - = "getAMIndexed8OpValue"; - let ParserMatchClass = MemoryIndexed128Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} - -// No offset. -def MemoryNoIndexOperand : AsmOperandClass { let Name = "MemoryNoIndex"; } -def am_noindex : Operand, - ComplexPattern { - let PrintMethod = "printAMNoIndex"; - let ParserMatchClass = MemoryNoIndexOperand; - let MIOperandInfo = (ops GPR64sp:$base); -} - -class BaseLoadStoreUI sz, bit V, bits<2> opc, dag oops, dag iops, - string asm, list pattern> - : I { - bits<5> dst; - - bits<17> addr; - bits<5> base = addr{4-0}; - bits<12> offset = addr{16-5}; - - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b01; - let Inst{23-22} = opc; - let Inst{21-10} = offset; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeUnsignedLdStInstruction"; -} - -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -class LoadUI sz, bit V, bits<2> opc, RegisterClass regtype, - Operand indextype, string asm, list pattern> - : BaseLoadStoreUI, - Sched<[WriteLD]>; - -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -class StoreUI sz, bit V, bits<2> opc, RegisterClass regtype, - Operand indextype, string asm, list pattern> - : BaseLoadStoreUI, - Sched<[WriteST]>; - -def PrefetchOperand : AsmOperandClass { - let Name = "Prefetch"; - let ParserMethod = "tryParsePrefetch"; -} -def prfop : Operand { - let PrintMethod = "printPrefetchOp"; - let ParserMatchClass = PrefetchOperand; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class PrefetchUI sz, bit V, bits<2> opc, string asm, list pat> - : BaseLoadStoreUI, - Sched<[WriteLD]>; - -//--- -// Load literal -//--- - -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -class LoadLiteral opc, bit V, RegisterClass regtype, string asm> - : I<(outs regtype:$Rt), (ins am_brcond:$label), - asm, "\t$Rt, $label", "", []>, - Sched<[WriteLD]> { - bits<5> Rt; - bits<19> label; - let Inst{31-30} = opc; - let Inst{29-27} = 0b011; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-5} = label; - let Inst{4-0} = Rt; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class PrefetchLiteral opc, bit V, string asm, list pat> - : I<(outs), (ins prfop:$Rt, am_brcond:$label), - asm, "\t$Rt, $label", "", pat>, - Sched<[WriteLD]> { - bits<5> Rt; - bits<19> label; - let Inst{31-30} = opc; - let Inst{29-27} = 0b011; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-5} = label; - let Inst{4-0} = Rt; -} - -//--- -// Load/store register offset -//--- - -class MemROAsmOperand : AsmOperandClass { - let Name = "MemoryRegisterOffset"#sz; -} - -def MemROAsmOperand8 : MemROAsmOperand<8>; -def MemROAsmOperand16 : MemROAsmOperand<16>; -def MemROAsmOperand32 : MemROAsmOperand<32>; -def MemROAsmOperand64 : MemROAsmOperand<64>; -def MemROAsmOperand128 : MemROAsmOperand<128>; - -class ro_indexed : Operand { // ComplexPattern<...> - let PrintMethod = "printMemoryRegOffset"#sz; - let MIOperandInfo = (ops GPR64sp:$base, GPR64:$offset, i32imm:$extend); -} - -def ro_indexed8 : ro_indexed<8>, ComplexPattern { - let ParserMatchClass = MemROAsmOperand8; -} - -def ro_indexed16 : ro_indexed<16>, ComplexPattern { - let ParserMatchClass = MemROAsmOperand16; -} - -def ro_indexed32 : ro_indexed<32>, ComplexPattern { - let ParserMatchClass = MemROAsmOperand32; -} - -def ro_indexed64 : ro_indexed<64>, ComplexPattern { - let ParserMatchClass = MemROAsmOperand64; -} - -def ro_indexed128 : ro_indexed<128>, ComplexPattern { - let ParserMatchClass = MemROAsmOperand128; -} - -class LoadStore8RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, dag ins, dag outs, list pat> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; - let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; -} - -class Load8RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore8RO, - Sched<[WriteLDIdx, ReadAdrBase]>; - -class Store8RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore8RO, - Sched<[WriteSTIdx, ReadAdrBase]>; - -class LoadStore16RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, dag ins, dag outs, list pat> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; - let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; -} - -class Load16RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore16RO, - Sched<[WriteLDIdx, ReadAdrBase]>; - -class Store16RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore16RO, - Sched<[WriteSTIdx, ReadAdrBase]>; - -class LoadStore32RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, dag ins, dag outs, list pat> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; - let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; -} - -class Load32RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore32RO, - Sched<[WriteLDIdx, ReadAdrBase]>; - -class Store32RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore32RO, - Sched<[WriteSTIdx, ReadAdrBase]>; - -class LoadStore64RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, dag ins, dag outs, list pat> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; - let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; -} - -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -class Load64RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore64RO, - Sched<[WriteLDIdx, ReadAdrBase]>; - -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -class Store64RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore64RO, - Sched<[WriteSTIdx, ReadAdrBase]>; - - -class LoadStore128RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, dag ins, dag outs, list pat> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; - let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; -} - -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -class Load128RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore128RO, - Sched<[WriteLDIdx, ReadAdrBase]>; - -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -class Store128RO sz, bit V, bits<2> opc, RegisterClass regtype, - string asm, list pat> - : LoadStore128RO, - Sched<[WriteSTIdx, ReadAdrBase]>; - -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class PrefetchRO sz, bit V, bits<2> opc, string asm, list pat> - : I<(outs), (ins prfop:$Rt, ro_indexed64:$addr), asm, - "\t$Rt, $addr", "", pat>, - Sched<[WriteLD]> { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<5> offset; - bits<4> extend; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 1; - let Inst{20-16} = offset; - let Inst{15-13} = extend{3-1}; - - let Inst{12} = extend{0}; - let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeRegOffsetLdStInstruction"; -} - -//--- -// Load/store unscaled immediate -//--- - -def MemoryUnscaledOperand : AsmOperandClass { - let Name = "MemoryUnscaled"; - let DiagnosticType = "InvalidMemoryIndexedSImm9"; -} -class am_unscaled_operand : Operand { - let PrintMethod = "printAMUnscaled"; - let ParserMatchClass = MemoryUnscaledOperand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} -def am_unscaled : am_unscaled_operand; -def am_unscaled8 : am_unscaled_operand, - ComplexPattern; -def am_unscaled16 : am_unscaled_operand, - ComplexPattern; -def am_unscaled32 : am_unscaled_operand, - ComplexPattern; -def am_unscaled64 : am_unscaled_operand, - ComplexPattern; -def am_unscaled128 : am_unscaled_operand, - ComplexPattern; - -class BaseLoadStoreUnscale sz, bit V, bits<2> opc, dag oops, dag iops, - string asm, list pattern> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<9> offset; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0; - let Inst{20-12} = offset; - let Inst{11-10} = 0b00; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeSignedLdStInstruction"; -} - -let AddedComplexity = 1 in // try this before LoadUI -class LoadUnscaled sz, bit V, bits<2> opc, RegisterClass regtype, - Operand amtype, string asm, list pattern> - : BaseLoadStoreUnscale, - Sched<[WriteLD]>; - -let AddedComplexity = 1 in // try this before StoreUI -class StoreUnscaled sz, bit V, bits<2> opc, RegisterClass regtype, - Operand amtype, string asm, list pattern> - : BaseLoadStoreUnscale, - Sched<[WriteST]>; - -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class PrefetchUnscaled sz, bit V, bits<2> opc, string asm, list pat> - : BaseLoadStoreUnscale, - Sched<[WriteLD]>; - -//--- -// Load/store unscaled immediate, unprivileged -//--- - -class BaseLoadStoreUnprivileged sz, bit V, bits<2> opc, - dag oops, dag iops, string asm> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> base; - bits<9> offset; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0; - let Inst{20-12} = offset; - let Inst{11-10} = 0b10; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeSignedLdStInstruction"; -} - -let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in { -class LoadUnprivileged sz, bit V, bits<2> opc, RegisterClass regtype, - string asm> - : BaseLoadStoreUnprivileged, - Sched<[WriteLD]>; -} - -let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in { -class StoreUnprivileged sz, bit V, bits<2> opc, RegisterClass regtype, - string asm> - : BaseLoadStoreUnprivileged, - Sched<[WriteST]>; -} - -//--- -// Load/store pre-indexed -//--- - -class BaseLoadStorePreIdx sz, bit V, bits<2> opc, dag oops, dag iops, - string asm, string cstr> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. - bits<5> dst; - bits<5> base; - bits<9> offset; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0; - let Inst{23-22} = opc; - let Inst{21} = 0; - let Inst{20-12} = offset; - let Inst{11-10} = 0b11; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeSignedLdStInstruction"; -} - -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -// FIXME: Modeling the write-back of these instructions for isel is tricky. -// we need the complex addressing mode for the memory reference, but -// we also need the write-back specified as a tied operand to the -// base register. That combination does not play nicely with -// the asm matcher and friends. -class LoadPreIdx sz, bit V, bits<2> opc, RegisterClass regtype, - string asm> - : BaseLoadStorePreIdx, - Sched<[WriteLD, WriteAdr]>; - -let mayStore = 1, mayLoad = 0 in -class StorePreIdx sz, bit V, bits<2> opc, RegisterClass regtype, - string asm> - : BaseLoadStorePreIdx, - Sched<[WriteAdr, WriteST]>; -} // hasSideEffects = 0 - -// ISel pseudo-instructions which have the tied operands. When the MC lowering -// logic finally gets smart enough to strip off tied operands that are just -// for isel convenience, we can get rid of these pseudos and just reference -// the real instructions directly. -// -// Ironically, also because of the writeback operands, we can't put the -// matcher pattern directly on the instruction, but need to define it -// separately. -// -// Loads aren't matched with patterns here at all, but rather in C++ -// custom lowering. -let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in { -class LoadPreIdxPseudo - : Pseudo<(outs regtype:$Rt, GPR64sp:$wback), - (ins am_noindex:$addr, simm9:$offset), [], - "$addr.base = $wback,@earlyclobber $wback">, - Sched<[WriteLD, WriteAdr]>; -class LoadPostIdxPseudo - : Pseudo<(outs regtype:$Rt, GPR64sp:$wback), - (ins am_noindex:$addr, simm9:$offset), [], - "$addr.base = $wback,@earlyclobber $wback">, - Sched<[WriteLD, WriteI]>; -} -multiclass StorePreIdxPseudo { - let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in - def _isel: Pseudo<(outs GPR64sp:$wback), - (ins regtype:$Rt, am_noindex:$addr, simm9:$offset), [], - "$addr.base = $wback,@earlyclobber $wback">, - Sched<[WriteAdr, WriteST]>; - - def : Pat<(OpNode (Ty regtype:$Rt), am_noindex:$addr, simm9:$offset), - (!cast(NAME#_isel) regtype:$Rt, am_noindex:$addr, - simm9:$offset)>; -} - -//--- -// Load/store post-indexed -//--- - -// (pre-index) load/stores. -class BaseLoadStorePostIdx sz, bit V, bits<2> opc, dag oops, dag iops, - string asm, string cstr> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. - bits<5> dst; - bits<5> base; - bits<9> offset; - let Inst{31-30} = sz; - let Inst{29-27} = 0b111; - let Inst{26} = V; - let Inst{25-24} = 0b00; - let Inst{23-22} = opc; - let Inst{21} = 0b0; - let Inst{20-12} = offset; - let Inst{11-10} = 0b01; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodeSignedLdStInstruction"; -} - -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -// FIXME: Modeling the write-back of these instructions for isel is tricky. -// we need the complex addressing mode for the memory reference, but -// we also need the write-back specified as a tied operand to the -// base register. That combination does not play nicely with -// the asm matcher and friends. -class LoadPostIdx sz, bit V, bits<2> opc, RegisterClass regtype, - string asm> - : BaseLoadStorePostIdx, - Sched<[WriteLD, WriteI]>; - -let mayStore = 1, mayLoad = 0 in -class StorePostIdx sz, bit V, bits<2> opc, RegisterClass regtype, - string asm> - : BaseLoadStorePostIdx, - Sched<[WriteAdr, WriteST, ReadAdrBase]>; -} // hasSideEffects = 0 - -// ISel pseudo-instructions which have the tied operands. When the MC lowering -// logic finally gets smart enough to strip off tied operands that are just -// for isel convenience, we can get rid of these pseudos and just reference -// the real instructions directly. -// -// Ironically, also because of the writeback operands, we can't put the -// matcher pattern directly on the instruction, but need to define it -// separately. -multiclass StorePostIdxPseudo { - let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in - def _isel: Pseudo<(outs GPR64sp:$wback), - (ins regtype:$Rt, am_noindex:$addr, simm9:$idx), [], - "$addr.base = $wback,@earlyclobber $wback">, - PseudoInstExpansion<(Insn regtype:$Rt, am_noindex:$addr, simm9:$idx)>, - Sched<[WriteAdr, WriteST, ReadAdrBase]>; - - def : Pat<(OpNode (Ty regtype:$Rt), am_noindex:$addr, simm9:$idx), - (!cast(NAME#_isel) regtype:$Rt, am_noindex:$addr, - simm9:$idx)>; -} - -//--- -// Load/store pair -//--- - -// (indexed, offset) - -class BaseLoadStorePairOffset opc, bit V, bit L, dag oops, dag iops, - string asm> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> dst2; - bits<5> base; - bits<7> offset; - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = V; - let Inst{25-23} = 0b010; - let Inst{22} = L; - let Inst{21-15} = offset; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodePairLdStInstruction"; -} - -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -class LoadPairOffset opc, bit V, RegisterClass regtype, - Operand indextype, string asm> - : BaseLoadStorePairOffset, - Sched<[WriteLD, WriteLDHi]>; - -let mayLoad = 0, mayStore = 1 in -class StorePairOffset opc, bit V, RegisterClass regtype, - Operand indextype, string asm> - : BaseLoadStorePairOffset, - Sched<[WriteSTP]>; -} // hasSideEffects = 0 - -// (pre-indexed) - -def MemoryIndexed32SImm7 : AsmOperandClass { - let Name = "MemoryIndexed32SImm7"; - let DiagnosticType = "InvalidMemoryIndexed32SImm7"; -} -def am_indexed32simm7 : Operand { // ComplexPattern<...> - let PrintMethod = "printAMIndexed32"; - let ParserMatchClass = MemoryIndexed32SImm7; - let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset); -} - -def MemoryIndexed64SImm7 : AsmOperandClass { - let Name = "MemoryIndexed64SImm7"; - let DiagnosticType = "InvalidMemoryIndexed64SImm7"; -} -def am_indexed64simm7 : Operand { // ComplexPattern<...> - let PrintMethod = "printAMIndexed64"; - let ParserMatchClass = MemoryIndexed64SImm7; - let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset); -} - -def MemoryIndexed128SImm7 : AsmOperandClass { - let Name = "MemoryIndexed128SImm7"; - let DiagnosticType = "InvalidMemoryIndexed128SImm7"; -} -def am_indexed128simm7 : Operand { // ComplexPattern<...> - let PrintMethod = "printAMIndexed128"; - let ParserMatchClass = MemoryIndexed128SImm7; - let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset); -} - -class BaseLoadStorePairPreIdx opc, bit V, bit L, dag oops, dag iops, - string asm> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> dst2; - bits<5> base; - bits<7> offset; - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = V; - let Inst{25-23} = 0b011; - let Inst{22} = L; - let Inst{21-15} = offset; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodePairLdStInstruction"; -} - -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -class LoadPairPreIdx opc, bit V, RegisterClass regtype, - Operand addrmode, string asm> - : BaseLoadStorePairPreIdx, - Sched<[WriteLD, WriteLDHi, WriteAdr]>; - -let mayStore = 1, mayLoad = 0 in -class StorePairPreIdx opc, bit V, RegisterClass regtype, - Operand addrmode, string asm> - : BaseLoadStorePairPreIdx, - Sched<[WriteAdr, WriteSTP]>; -} // hasSideEffects = 0 - -// (post-indexed) - -class BaseLoadStorePairPostIdx opc, bit V, bit L, dag oops, dag iops, - string asm> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> dst2; - bits<5> base; - bits<7> offset; - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = V; - let Inst{25-23} = 0b001; - let Inst{22} = L; - let Inst{21-15} = offset; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodePairLdStInstruction"; -} - -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -class LoadPairPostIdx opc, bit V, RegisterClass regtype, - Operand idxtype, string asm> - : BaseLoadStorePairPostIdx, - Sched<[WriteLD, WriteLDHi, WriteAdr]>; - -let mayStore = 1, mayLoad = 0 in -class StorePairPostIdx opc, bit V, RegisterClass regtype, - Operand idxtype, string asm> - : BaseLoadStorePairPostIdx, - Sched<[WriteAdr, WriteSTP]>; -} // hasSideEffects = 0 - -// (no-allocate) - -class BaseLoadStorePairNoAlloc opc, bit V, bit L, dag oops, dag iops, - string asm> - : I { - // The operands are in order to match the 'addr' MI operands, so we - // don't need an encoder method and by-name matching. Just use the default - // in-order handling. Since we're using by-order, make sure the names - // do not match. - bits<5> dst; - bits<5> dst2; - bits<5> base; - bits<7> offset; - let Inst{31-30} = opc; - let Inst{29-27} = 0b101; - let Inst{26} = V; - let Inst{25-23} = 0b000; - let Inst{22} = L; - let Inst{21-15} = offset; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst; - - let DecoderMethod = "DecodePairLdStInstruction"; -} - -let hasSideEffects = 0 in { -let mayStore = 0, mayLoad = 1 in -class LoadPairNoAlloc opc, bit V, RegisterClass regtype, - Operand indextype, string asm> - : BaseLoadStorePairNoAlloc, - Sched<[WriteLD, WriteLDHi]>; - -let mayStore = 1, mayLoad = 0 in -class StorePairNoAlloc opc, bit V, RegisterClass regtype, - Operand indextype, string asm> - : BaseLoadStorePairNoAlloc, - Sched<[WriteSTP]>; -} // hasSideEffects = 0 - -//--- -// Load/store exclusive -//--- - -// True exclusive operations write to and/or read from the system's exclusive -// monitors, which as far as a compiler is concerned can be modelled as a -// random shared memory address. Hence LoadExclusive mayStore. -let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in -class BaseLoadStoreExclusive sz, bit o2, bit L, bit o1, bit o0, - dag oops, dag iops, string asm, string operands> - : I { - let Inst{31-30} = sz; - let Inst{29-24} = 0b001000; - let Inst{23} = o2; - let Inst{22} = L; - let Inst{21} = o1; - let Inst{15} = o0; - - let DecoderMethod = "DecodeExclusiveLdStInstruction"; -} - -// Neither Rs nor Rt2 operands. -class LoadStoreExclusiveSimple sz, bit o2, bit L, bit o1, bit o0, - dag oops, dag iops, string asm, string operands> - : BaseLoadStoreExclusive { - bits<5> reg; - bits<5> base; - let Inst{20-16} = 0b11111; - let Inst{14-10} = 0b11111; - let Inst{9-5} = base; - let Inst{4-0} = reg; -} - -// Simple load acquires don't set the exclusive monitor -let mayLoad = 1, mayStore = 0 in -class LoadAcquire sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : LoadStoreExclusiveSimple, - Sched<[WriteLD]>; - -class LoadExclusive sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : LoadStoreExclusiveSimple, - Sched<[WriteLD]>; - -class LoadExclusivePair sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : BaseLoadStoreExclusive, - Sched<[WriteLD, WriteLDHi]> { - bits<5> dst1; - bits<5> dst2; - bits<5> base; - let Inst{20-16} = 0b11111; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst1; -} - -// Simple store release operations do not check the exclusive monitor. -let mayLoad = 0, mayStore = 1 in -class StoreRelease sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : LoadStoreExclusiveSimple, - Sched<[WriteST]>; - -let mayLoad = 1, mayStore = 1 in -class StoreExclusive sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : BaseLoadStoreExclusive, - Sched<[WriteSTX]> { - bits<5> status; - bits<5> reg; - bits<5> base; - let Inst{20-16} = status; - let Inst{14-10} = 0b11111; - let Inst{9-5} = base; - let Inst{4-0} = reg; - - let Constraints = "@earlyclobber $Ws"; -} - -class StoreExclusivePair sz, bit o2, bit L, bit o1, bit o0, - RegisterClass regtype, string asm> - : BaseLoadStoreExclusive, - Sched<[WriteSTX]> { - bits<5> status; - bits<5> dst1; - bits<5> dst2; - bits<5> base; - let Inst{20-16} = status; - let Inst{14-10} = dst2; - let Inst{9-5} = base; - let Inst{4-0} = dst1; - - let Constraints = "@earlyclobber $Ws"; -} - -//--- -// Exception generation -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in -class ExceptionGeneration op1, bits<2> ll, string asm> - : I<(outs), (ins imm0_65535:$imm), asm, "\t$imm", "", []>, - Sched<[WriteSys]> { - bits<16> imm; - let Inst{31-24} = 0b11010100; - let Inst{23-21} = op1; - let Inst{20-5} = imm; - let Inst{4-2} = 0b000; - let Inst{1-0} = ll; -} - -//--- -// Floating point to integer conversion -//--- - -class BaseFPToIntegerUnscaled type, bits<2> rmode, bits<3> opcode, - RegisterClass srcType, RegisterClass dstType, - string asm, list pattern> - : I<(outs dstType:$Rd), (ins srcType:$Rn), - asm, "\t$Rd, $Rn", "", pattern>, - Sched<[WriteFCvt]> { - bits<5> Rd; - bits<5> Rn; - let Inst{30} = 0; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 1; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseFPToInteger type, bits<2> rmode, bits<3> opcode, - RegisterClass srcType, RegisterClass dstType, - Operand immType, string asm> - : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale), - asm, "\t$Rd, $Rn, $scale", "", []>, - Sched<[WriteFCvt]> { - bits<5> Rd; - bits<5> Rn; - bits<6> scale; - let Inst{30} = 0; - let Inst{28-24} = 0b11110; - let Inst{23-22} = type; - let Inst{21} = 0; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = scale; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass FPToInteger rmode, bits<3> opcode, string asm, SDPatternOperator OpN> { - // Unscaled single-precision to 32-bit - def UWSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR32, asm, - [(set GPR32:$Rd, (OpN FPR32:$Rn))]> { - let Inst{31} = 0; // 32-bit GPR flag - } - - // Unscaled single-precision to 64-bit - def UXSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR64, asm, - [(set GPR64:$Rd, (OpN FPR32:$Rn))]> { - let Inst{31} = 1; // 64-bit GPR flag - } - - // Unscaled double-precision to 32-bit - def UWDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR32, asm, - [(set GPR32:$Rd, (OpN (f64 FPR64:$Rn)))]> { - let Inst{31} = 0; // 32-bit GPR flag - } - - // Unscaled double-precision to 64-bit - def UXDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR64, asm, - [(set GPR64:$Rd, (OpN (f64 FPR64:$Rn)))]> { - let Inst{31} = 1; // 64-bit GPR flag - } - - // Scaled single-precision to 32-bit - def SWSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR32, - fixedpoint32, asm> { - let Inst{31} = 0; // 32-bit GPR flag - } - - // Scaled single-precision to 64-bit - def SXSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR64, - fixedpoint64, asm> { - let Inst{31} = 1; // 64-bit GPR flag - } - - // Scaled double-precision to 32-bit - def SWDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR32, - fixedpoint32, asm> { - let Inst{31} = 0; // 32-bit GPR flag - } - - // Scaled double-precision to 64-bit - def SXDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR64, - fixedpoint64, asm> { - let Inst{31} = 1; // 64-bit GPR flag - } -} - -//--- -// Integer to floating point conversion -//--- - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseIntegerToFP - : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale), - asm, "\t$Rd, $Rn, $scale", "", []>, - Sched<[WriteFCvt]> { - bits<5> Rd; - bits<5> Rn; - bits<6> scale; - let Inst{30-23} = 0b00111100; - let Inst{21-17} = 0b00001; - let Inst{16} = isUnsigned; - let Inst{15-10} = scale; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class BaseIntegerToFPUnscaled - : I<(outs dstType:$Rd), (ins srcType:$Rn), - asm, "\t$Rd, $Rn", "", [(set (dvt dstType:$Rd), (node srcType:$Rn))]>, - Sched<[WriteFCvt]> { - bits<5> Rd; - bits<5> Rn; - bits<6> scale; - let Inst{30-23} = 0b00111100; - let Inst{21-17} = 0b10001; - let Inst{16} = isUnsigned; - let Inst{15-10} = 0b000000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass IntegerToFP { - // Unscaled - def UWSri: BaseIntegerToFPUnscaled { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag - } - - def UWDri: BaseIntegerToFPUnscaled { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag - } - - def UXSri: BaseIntegerToFPUnscaled { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag - } - - def UXDri: BaseIntegerToFPUnscaled { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag - } - - // Scaled - def SWSri: BaseIntegerToFP { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag - } - - def SWDri: BaseIntegerToFP { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag - } - - def SXSri: BaseIntegerToFP { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag - } - - def SXDri: BaseIntegerToFP { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag - } -} - -//--- -// Unscaled integer <-> floating point conversion (i.e. FMOV) -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseUnscaledConversion rmode, bits<3> opcode, - RegisterClass srcType, RegisterClass dstType, - string asm> - : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", - // We use COPY_TO_REGCLASS for these bitconvert operations. - // copyPhysReg() expands the resultant COPY instructions after - // regalloc is done. This gives greater freedom for the allocator - // and related passes (coalescing, copy propagation, et. al.) to - // be more effective. - [/*(set (dvt dstType:$Rd), (bitconvert (svt srcType:$Rn)))*/]>, - Sched<[WriteFCopy]> { - bits<5> Rd; - bits<5> Rn; - let Inst{30-23} = 0b00111100; - let Inst{21} = 1; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = 0b000000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseUnscaledConversionToHigh rmode, bits<3> opcode, - RegisterClass srcType, RegisterOperand dstType, string asm> - : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd[1], $Rn", "", []>, - Sched<[WriteFCopy]> { - bits<5> Rd; - bits<5> Rn; - let Inst{30-23} = 0b00111101; - let Inst{21} = 1; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = 0b000000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseUnscaledConversionFromHigh rmode, bits<3> opcode, - RegisterOperand srcType, RegisterClass dstType, string asm> - : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn[1]", "", []>, - Sched<[WriteFCopy]> { - bits<5> Rd; - bits<5> Rn; - let Inst{30-23} = 0b00111101; - let Inst{21} = 1; - let Inst{20-19} = rmode; - let Inst{18-16} = opcode; - let Inst{15-10} = 0b000000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - - - -multiclass UnscaledConversion { - def WSr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR32, asm> { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag - } - - def XDr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR64, asm> { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag - } - - def SWr : BaseUnscaledConversion<0b00, 0b110, FPR32, GPR32, asm> { - let Inst{31} = 0; // 32-bit GPR flag - let Inst{22} = 0; // 32-bit FPR flag - } - - def DXr : BaseUnscaledConversion<0b00, 0b110, FPR64, GPR64, asm> { - let Inst{31} = 1; // 64-bit GPR flag - let Inst{22} = 1; // 64-bit FPR flag - } - - def XDHighr : BaseUnscaledConversionToHigh<0b01, 0b111, GPR64, V128, - asm#".d"> { - let Inst{31} = 1; - let Inst{22} = 0; - } - - def DXHighr : BaseUnscaledConversionFromHigh<0b01, 0b110, V128, GPR64, - asm#".d"> { - let Inst{31} = 1; - let Inst{22} = 0; - } - - def : InstAlias(NAME#XDHighr) V128:$Vd, GPR64:$Rn), 0>; - def : InstAlias(NAME#DXHighr) GPR64:$Rd, V128:$Vn), 0>; -} - -//--- -// Floating point conversion -//--- - -class BaseFPConversion type, bits<2> opcode, RegisterClass dstType, - RegisterClass srcType, string asm, list pattern> - : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", pattern>, - Sched<[WriteFCvt]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-24} = 0b00011110; - let Inst{23-22} = type; - let Inst{21-17} = 0b10001; - let Inst{16-15} = opcode; - let Inst{14-10} = 0b10000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass FPConversion { - // Double-precision to Half-precision - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in - def HDr : BaseFPConversion<0b01, 0b11, FPR16, FPR64, asm, []>; - - // Double-precision to Single-precision - def SDr : BaseFPConversion<0b01, 0b00, FPR32, FPR64, asm, - [(set FPR32:$Rd, (fround FPR64:$Rn))]>; - - // Half-precision to Double-precision - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in - def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm, []>; - - // Half-precision to Single-precision - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in - def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm, []>; - - // Single-precision to Double-precision - def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm, - [(set FPR64:$Rd, (fextend FPR32:$Rn))]>; - - // Single-precision to Half-precision - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in - def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm, []>; -} - -//--- -// Single operand floating point data processing -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSingleOperandFPData opcode, RegisterClass regtype, - ValueType vt, string asm, SDPatternOperator node> - : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "", - [(set (vt regtype:$Rd), (node (vt regtype:$Rn)))]>, - Sched<[WriteF]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-23} = 0b000111100; - let Inst{21-19} = 0b100; - let Inst{18-15} = opcode; - let Inst{14-10} = 0b10000; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SingleOperandFPData opcode, string asm, - SDPatternOperator node = null_frag> { - def Sr : BaseSingleOperandFPData { - let Inst{22} = 0; // 32-bit size flag - } - - def Dr : BaseSingleOperandFPData { - let Inst{22} = 1; // 64-bit size flag - } -} - -//--- -// Two operand floating point data processing -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseTwoOperandFPData opcode, RegisterClass regtype, - string asm, list pat> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), - asm, "\t$Rd, $Rn, $Rm", "", pat>, - Sched<[WriteF]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-23} = 0b000111100; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass TwoOperandFPData opcode, string asm, - SDPatternOperator node = null_frag> { - def Srr : BaseTwoOperandFPData { - let Inst{22} = 0; // 32-bit size flag - } - - def Drr : BaseTwoOperandFPData { - let Inst{22} = 1; // 64-bit size flag - } -} - -multiclass TwoOperandFPDataNeg opcode, string asm, SDNode node> { - def Srr : BaseTwoOperandFPData { - let Inst{22} = 0; // 32-bit size flag - } - - def Drr : BaseTwoOperandFPData { - let Inst{22} = 1; // 64-bit size flag - } -} - - -//--- -// Three operand floating point data processing -//--- - -class BaseThreeOperandFPData pat> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, regtype: $Ra), - asm, "\t$Rd, $Rn, $Rm, $Ra", "", pat>, - Sched<[WriteFMul]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<5> Ra; - let Inst{31-23} = 0b000111110; - let Inst{21} = isNegated; - let Inst{20-16} = Rm; - let Inst{15} = isSub; - let Inst{14-10} = Ra; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass ThreeOperandFPData { - def Srrr : BaseThreeOperandFPData { - let Inst{22} = 0; // 32-bit size flag - } - - def Drrr : BaseThreeOperandFPData { - let Inst{22} = 1; // 64-bit size flag - } -} - -//--- -// Floating point data comparisons -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseOneOperandFPComparison pat> - : I<(outs), (ins regtype:$Rn), asm, "\t$Rn, #0.0", "", pat>, - Sched<[WriteFCmp]> { - bits<5> Rn; - let Inst{31-23} = 0b000111100; - let Inst{21} = 1; - - let Inst{20-16} = 0b00000; - let Inst{15-10} = 0b001000; - let Inst{9-5} = Rn; - let Inst{4} = signalAllNans; - let Inst{3-0} = 0b1000; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseTwoOperandFPComparison pat> - : I<(outs), (ins regtype:$Rn, regtype:$Rm), asm, "\t$Rn, $Rm", "", pat>, - Sched<[WriteFCmp]> { - bits<5> Rm; - bits<5> Rn; - let Inst{31-23} = 0b000111100; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-10} = 0b001000; - let Inst{9-5} = Rn; - let Inst{4} = signalAllNans; - let Inst{3-0} = 0b0000; -} - -multiclass FPComparison { - let Defs = [CPSR] in { - def Srr : BaseTwoOperandFPComparison { - let Inst{22} = 0; - } - - def Sri : BaseOneOperandFPComparison { - let Inst{22} = 0; - } - - def Drr : BaseTwoOperandFPComparison { - let Inst{22} = 1; - } - - def Dri : BaseOneOperandFPComparison { - let Inst{22} = 1; - } - } // Defs = [CPSR] -} - -//--- -// Floating point conditional comparisons -//--- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseFPCondComparison - : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond), - asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>, - Sched<[WriteFCmp]> { - bits<5> Rn; - bits<5> Rm; - bits<4> nzcv; - bits<4> cond; - - let Inst{31-23} = 0b000111100; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-12} = cond; - let Inst{11-10} = 0b01; - let Inst{9-5} = Rn; - let Inst{4} = signalAllNans; - let Inst{3-0} = nzcv; -} - -multiclass FPCondComparison { - let Defs = [CPSR], Uses = [CPSR] in { - def Srr : BaseFPCondComparison { - let Inst{22} = 0; - } - - def Drr : BaseFPCondComparison { - let Inst{22} = 1; - } - } // Defs = [CPSR], Uses = [CPSR] -} - -//--- -// Floating point conditional select -//--- - -class BaseFPCondSelect - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond), - asm, "\t$Rd, $Rn, $Rm, $cond", "", - [(set regtype:$Rd, - (ARM64csel (vt regtype:$Rn), regtype:$Rm, - (i32 imm:$cond), CPSR))]>, - Sched<[WriteF]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<4> cond; - - let Inst{31-23} = 0b000111100; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-12} = cond; - let Inst{11-10} = 0b11; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass FPCondSelect { - let Uses = [CPSR] in { - def Srrr : BaseFPCondSelect { - let Inst{22} = 0; - } - - def Drrr : BaseFPCondSelect { - let Inst{22} = 1; - } - } // Uses = [CPSR] -} - -//--- -// Floating move immediate -//--- - -class BaseFPMoveImmediate - : I<(outs regtype:$Rd), (ins fpimmtype:$imm), asm, "\t$Rd, $imm", "", - [(set regtype:$Rd, fpimmtype:$imm)]>, - Sched<[WriteFImm]> { - bits<5> Rd; - bits<8> imm; - let Inst{31-23} = 0b000111100; - let Inst{21} = 1; - let Inst{20-13} = imm; - let Inst{12-5} = 0b10000000; - let Inst{4-0} = Rd; -} - -multiclass FPMoveImmediate { - def Si : BaseFPMoveImmediate { - let Inst{22} = 0; - } - - def Di : BaseFPMoveImmediate { - let Inst{22} = 1; - } -} - -//---------------------------------------------------------------------------- -// AdvSIMD -//---------------------------------------------------------------------------- - -def VectorIndexBOperand : AsmOperandClass { let Name = "VectorIndexB"; } -def VectorIndexHOperand : AsmOperandClass { let Name = "VectorIndexH"; } -def VectorIndexSOperand : AsmOperandClass { let Name = "VectorIndexS"; } -def VectorIndexDOperand : AsmOperandClass { let Name = "VectorIndexD"; } -def VectorIndexB : Operand, ImmLeaf { - let ParserMatchClass = VectorIndexBOperand; - let PrintMethod = "printVectorIndex"; - let MIOperandInfo = (ops i64imm); -} -def VectorIndexH : Operand, ImmLeaf { - let ParserMatchClass = VectorIndexHOperand; - let PrintMethod = "printVectorIndex"; - let MIOperandInfo = (ops i64imm); -} -def VectorIndexS : Operand, ImmLeaf { - let ParserMatchClass = VectorIndexSOperand; - let PrintMethod = "printVectorIndex"; - let MIOperandInfo = (ops i64imm); -} -def VectorIndexD : Operand, ImmLeaf { - let ParserMatchClass = VectorIndexDOperand; - let PrintMethod = "printVectorIndex"; - let MIOperandInfo = (ops i64imm); -} - -//---------------------------------------------------------------------------- -// AdvSIMD three register vector instructions -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDThreeSameVector size, bits<5> opcode, - RegisterOperand regtype, string asm, string kind, - list pattern> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, - "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # - "|" # kind # "\t$Rd, $Rn, $Rm|}", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-11} = opcode; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDThreeSameVectorTied size, bits<5> opcode, - RegisterOperand regtype, string asm, string kind, - list pattern> - : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm, - "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # - "|" # kind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-11} = opcode; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -// All operand sizes distinguished in the encoding. -multiclass SIMDThreeSameVector opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, - asm, ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, - asm, ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, - asm, ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, - asm, ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, - asm, ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, - asm, ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; - def v2i64 : BaseSIMDThreeSameVector<1, U, 0b11, opc, V128, - asm, ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>; -} - -// As above, but D sized elements unsupported. -multiclass SIMDThreeSameVectorBHS opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, - asm, ".8b", - [(set V64:$Rd, (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, - asm, ".16b", - [(set V128:$Rd, (v16i8 (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm))))]>; - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, - asm, ".4h", - [(set V64:$Rd, (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, - asm, ".8h", - [(set V128:$Rd, (v8i16 (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, - asm, ".2s", - [(set V64:$Rd, (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, - asm, ".4s", - [(set V128:$Rd, (v4i32 (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>; -} - -multiclass SIMDThreeSameVectorBHSTied opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b00, opc, V64, - asm, ".8b", - [(set (v8i8 V64:$dst), - (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b00, opc, V128, - asm, ".16b", - [(set (v16i8 V128:$dst), - (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; - def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b01, opc, V64, - asm, ".4h", - [(set (v4i16 V64:$dst), - (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b01, opc, V128, - asm, ".8h", - [(set (v8i16 V128:$dst), - (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b10, opc, V64, - asm, ".2s", - [(set (v2i32 V64:$dst), - (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b10, opc, V128, - asm, ".4s", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; -} - -// As above, but only B sized elements supported. -multiclass SIMDThreeSameVectorB opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, - asm, ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, - asm, ".16b", - [(set (v16i8 V128:$Rd), - (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; -} - -// As above, but only S and D sized floating point elements supported. -multiclass SIMDThreeSameVectorFP opc, - string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64, - asm, ".2s", - [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128, - asm, ".4s", - [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128, - asm, ".2d", - [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; -} - -multiclass SIMDThreeSameVectorFPCmp opc, - string asm, - SDPatternOperator OpNode> { - def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64, - asm, ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128, - asm, ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128, - asm, ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; -} - -multiclass SIMDThreeSameVectorFPTied opc, - string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0}, opc, V64, - asm, ".2s", - [(set (v2f32 V64:$dst), - (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0}, opc, V128, - asm, ".4s", - [(set (v4f32 V128:$dst), - (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,1}, opc, V128, - asm, ".2d", - [(set (v2f64 V128:$dst), - (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; -} - -// As above, but D and B sized elements unsupported. -multiclass SIMDThreeSameVectorHS opc, string asm, - SDPatternOperator OpNode> { - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, - asm, ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, - asm, ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, - asm, ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, - asm, ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; -} - -// Logical three vector ops share opcode bits, and only use B sized elements. -multiclass SIMDLogicalThreeVector size, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8 : BaseSIMDThreeSameVector<0, U, size, 0b00011, V64, - asm, ".8b", - [(set (v8i8 V64:$Rd), (OpNode V64:$Rn, V64:$Rm))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, size, 0b00011, V128, - asm, ".16b", - [(set (v16i8 V128:$Rd), (OpNode V128:$Rn, V128:$Rm))]>; - - def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)), - (!cast(NAME#"v8i8") V64:$LHS, V64:$RHS)>; - def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)), - (!cast(NAME#"v8i8") V64:$LHS, V64:$RHS)>; - def : Pat<(v1i64 (OpNode V64:$LHS, V64:$RHS)), - (!cast(NAME#"v8i8") V64:$LHS, V64:$RHS)>; - - def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)), - (!cast(NAME#"v16i8") V128:$LHS, V128:$RHS)>; - def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)), - (!cast(NAME#"v16i8") V128:$LHS, V128:$RHS)>; - def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)), - (!cast(NAME#"v16i8") V128:$LHS, V128:$RHS)>; -} - -multiclass SIMDLogicalThreeVectorTied size, - string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVectorTied<0, U, size, 0b00011, V64, - asm, ".8b", - [(set (v8i8 V64:$dst), - (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVectorTied<1, U, size, 0b00011, V128, - asm, ".16b", - [(set (v16i8 V128:$dst), - (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), - (v16i8 V128:$Rm)))]>; - - def : Pat<(v4i16 (OpNode (v4i16 V64:$LHS), (v4i16 V64:$MHS), - (v4i16 V64:$RHS))), - (!cast(NAME#"v8i8") - V64:$LHS, V64:$MHS, V64:$RHS)>; - def : Pat<(v2i32 (OpNode (v2i32 V64:$LHS), (v2i32 V64:$MHS), - (v2i32 V64:$RHS))), - (!cast(NAME#"v8i8") - V64:$LHS, V64:$MHS, V64:$RHS)>; - def : Pat<(v1i64 (OpNode (v1i64 V64:$LHS), (v1i64 V64:$MHS), - (v1i64 V64:$RHS))), - (!cast(NAME#"v8i8") - V64:$LHS, V64:$MHS, V64:$RHS)>; - - def : Pat<(v8i16 (OpNode (v8i16 V128:$LHS), (v8i16 V128:$MHS), - (v8i16 V128:$RHS))), - (!cast(NAME#"v16i8") - V128:$LHS, V128:$MHS, V128:$RHS)>; - def : Pat<(v4i32 (OpNode (v4i32 V128:$LHS), (v4i32 V128:$MHS), - (v4i32 V128:$RHS))), - (!cast(NAME#"v16i8") - V128:$LHS, V128:$MHS, V128:$RHS)>; - def : Pat<(v2i64 (OpNode (v2i64 V128:$LHS), (v2i64 V128:$MHS), - (v2i64 V128:$RHS))), - (!cast(NAME#"v16i8") - V128:$LHS, V128:$MHS, V128:$RHS)>; -} - - -//---------------------------------------------------------------------------- -// AdvSIMD two register vector instructions. -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDTwoSameVector size, bits<5> opcode, - RegisterOperand regtype, string asm, string dstkind, - string srckind, list pattern> - : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, - "{\t$Rd" # dstkind # ", $Rn" # srckind # - "|" # dstkind # "\t$Rd, $Rn}", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDTwoSameVectorTied size, bits<5> opcode, - RegisterOperand regtype, string asm, string dstkind, - string srckind, list pattern> - : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm, - "{\t$Rd" # dstkind # ", $Rn" # srckind # - "|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -// Supports B, H, and S element sizes. -multiclass SIMDTwoVectorBHS opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, - asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, - asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; -} - -class BaseSIMDVectorLShiftLongBySize size, - RegisterOperand regtype, string asm, string dstkind, - string srckind, string amount> - : I<(outs V128:$Rd), (ins regtype:$Rn), asm, - "{\t$Rd" # dstkind # ", $Rn" # srckind # ", #" # amount # - "|" # dstkind # "\t$Rd, $Rn, #" # amount # "}", "", []>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29-24} = 0b101110; - let Inst{23-22} = size; - let Inst{21-10} = 0b100001001110; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDVectorLShiftLongBySizeBHS { - let neverHasSideEffects = 1 in { - def v8i8 : BaseSIMDVectorLShiftLongBySize<0, 0b00, V64, - "shll", ".8h", ".8b", "8">; - def v16i8 : BaseSIMDVectorLShiftLongBySize<1, 0b00, V128, - "shll2", ".8h", ".16b", "8">; - def v4i16 : BaseSIMDVectorLShiftLongBySize<0, 0b01, V64, - "shll", ".4s", ".4h", "16">; - def v8i16 : BaseSIMDVectorLShiftLongBySize<1, 0b01, V128, - "shll2", ".4s", ".8h", "16">; - def v2i32 : BaseSIMDVectorLShiftLongBySize<0, 0b10, V64, - "shll", ".2d", ".2s", "32">; - def v4i32 : BaseSIMDVectorLShiftLongBySize<1, 0b10, V128, - "shll2", ".2d", ".4s", "32">; - } -} - -// Supports all element sizes. -multiclass SIMDLongTwoVector opc, string asm, - SDPatternOperator OpNode> { - def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, - asm, ".4h", ".8b", - [(set (v4i16 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, - asm, ".8h", ".16b", - [(set (v8i16 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, - asm, ".2s", ".4h", - [(set (v2i32 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, - asm, ".4s", ".8h", - [(set (v4i32 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, - asm, ".1d", ".2s", - [(set (v1i64 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, - asm, ".2d", ".4s", - [(set (v2i64 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; -} - -multiclass SIMDLongTwoVectorTied opc, string asm, - SDPatternOperator OpNode> { - def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64, - asm, ".4h", ".8b", - [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), - (v8i8 V64:$Rn)))]>; - def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128, - asm, ".8h", ".16b", - [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), - (v16i8 V128:$Rn)))]>; - def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64, - asm, ".2s", ".4h", - [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), - (v4i16 V64:$Rn)))]>; - def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128, - asm, ".4s", ".8h", - [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), - (v8i16 V128:$Rn)))]>; - def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64, - asm, ".1d", ".2s", - [(set (v1i64 V64:$dst), (OpNode (v1i64 V64:$Rd), - (v2i32 V64:$Rn)))]>; - def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128, - asm, ".2d", ".4s", - [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), - (v4i32 V128:$Rn)))]>; -} - -// Supports all element sizes, except 1xD. -multiclass SIMDTwoVectorBHSDTied opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64, - asm, ".8b", ".8b", - [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128, - asm, ".16b", ".16b", - [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64, - asm, ".4h", ".4h", - [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128, - asm, ".8h", ".8h", - [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64, - asm, ".2s", ".2s", - [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128, - asm, ".4s", ".4s", - [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>; - def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, V128, - asm, ".2d", ".2d", - [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn)))]>; -} - -multiclass SIMDTwoVectorBHSD opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, - asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, - asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; - def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, V128, - asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; -} - - -// Supports only B element sizes. -multiclass SIMDTwoVectorB size, bits<5> opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, V64, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, V128, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - -} - -// Supports only B and H element sizes. -multiclass SIMDTwoVectorBH opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode V64:$Rn))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode V128:$Rn))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, - asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode V64:$Rn))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, - asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>; -} - -// Supports only S and D element sizes, uses high bit of the size field -// as an extra opcode bit. -multiclass SIMDTwoVectorFP opc, string asm, - SDPatternOperator OpNode> { - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, - asm, ".2s", ".2s", - [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, - asm, ".4s", ".4s", - [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, - asm, ".2d", ".2d", - [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; -} - -// Supports only S element size. -multiclass SIMDTwoVectorS opc, string asm, - SDPatternOperator OpNode> { - def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; -} - - -multiclass SIMDTwoVectorFPToInt opc, string asm, - SDPatternOperator OpNode> { - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, - asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; -} - -multiclass SIMDTwoVectorIntToFP opc, string asm, - SDPatternOperator OpNode> { - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, - asm, ".2s", ".2s", - [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, - asm, ".4s", ".4s", - [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, - asm, ".2d", ".2d", - [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; -} - - -class BaseSIMDMixedTwoVector size, bits<5> opcode, - RegisterOperand inreg, RegisterOperand outreg, - string asm, string outkind, string inkind, - list pattern> - : I<(outs outreg:$Rd), (ins inreg:$Rn), asm, - "{\t$Rd" # outkind # ", $Rn" # inkind # - "|" # outkind # "\t$Rd, $Rn}", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class BaseSIMDMixedTwoVectorTied size, bits<5> opcode, - RegisterOperand inreg, RegisterOperand outreg, - string asm, string outkind, string inkind, - list pattern> - : I<(outs outreg:$dst), (ins outreg:$Rd, inreg:$Rn), asm, - "{\t$Rd" # outkind # ", $Rn" # inkind # - "|" # outkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDMixedTwoVector opc, string asm, - SDPatternOperator OpNode> { - def v8i8 : BaseSIMDMixedTwoVector<0, U, 0b00, opc, V128, V64, - asm, ".8b", ".8h", - [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v16i8 : BaseSIMDMixedTwoVectorTied<1, U, 0b00, opc, V128, V128, - asm#"2", ".16b", ".8h", []>; - def v4i16 : BaseSIMDMixedTwoVector<0, U, 0b01, opc, V128, V64, - asm, ".4h", ".4s", - [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn)))]>; - def v8i16 : BaseSIMDMixedTwoVectorTied<1, U, 0b01, opc, V128, V128, - asm#"2", ".8h", ".4s", []>; - def v2i32 : BaseSIMDMixedTwoVector<0, U, 0b10, opc, V128, V64, - asm, ".2s", ".2d", - [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn)))]>; - def v4i32 : BaseSIMDMixedTwoVectorTied<1, U, 0b10, opc, V128, V128, - asm#"2", ".4s", ".2d", []>; - - def : Pat<(concat_vectors (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn))), - (!cast(NAME # "v16i8") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; - def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn))), - (!cast(NAME # "v8i16") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; - def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn))), - (!cast(NAME # "v4i32") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; -} - -class BaseSIMDCmpTwoVector size, bits<5> opcode, - RegisterOperand regtype, string asm, string kind, - ValueType dty, ValueType sty, SDNode OpNode> - : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, - "{\t$Rd" # kind # ", $Rn" # kind # ", #0" # - "|" # kind # "\t$Rd, $Rn, #0}", "", - [(set (dty regtype:$Rd), (OpNode (sty regtype:$Rn)))]>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -// Comparisons support all element sizes, except 1xD. -multiclass SIMDCmpTwoVector opc, string asm, - SDNode OpNode> { - def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, opc, V64, - asm, ".8b", - v8i8, v8i8, OpNode>; - def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, opc, V128, - asm, ".16b", - v16i8, v16i8, OpNode>; - def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, opc, V64, - asm, ".4h", - v4i16, v4i16, OpNode>; - def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, opc, V128, - asm, ".8h", - v8i16, v8i16, OpNode>; - def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, opc, V64, - asm, ".2s", - v2i32, v2i32, OpNode>; - def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, opc, V128, - asm, ".4s", - v4i32, v4i32, OpNode>; - def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, opc, V128, - asm, ".2d", - v2i64, v2i64, OpNode>; -} - -// FP Comparisons support only S and D element sizes. -multiclass SIMDFPCmpTwoVector opc, - string asm, SDNode OpNode> { - def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, opc, V64, - asm, ".2s", - v2i32, v2f32, OpNode>; - def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, opc, V128, - asm, ".4s", - v4i32, v4f32, OpNode>; - def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, opc, V128, - asm, ".2d", - v2i64, v2f64, OpNode>; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDFPCvtTwoVector size, bits<5> opcode, - RegisterOperand outtype, RegisterOperand intype, - string asm, string VdTy, string VnTy, - list pattern> - : I<(outs outtype:$Rd), (ins intype:$Rn), asm, - !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class BaseSIMDFPCvtTwoVectorTied size, bits<5> opcode, - RegisterOperand outtype, RegisterOperand intype, - string asm, string VdTy, string VnTy, - list pattern> - : I<(outs outtype:$dst), (ins outtype:$Rd, intype:$Rn), asm, - !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "$Rd = $dst", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDFPWidenTwoVector opc, string asm> { - def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V128, V64, - asm, ".4s", ".4h", []>; - def v8i16 : BaseSIMDFPCvtTwoVector<1, U, {S,0}, opc, V128, V128, - asm#"2", ".4s", ".8h", []>; - def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V128, V64, - asm, ".2d", ".2s", []>; - def v4i32 : BaseSIMDFPCvtTwoVector<1, U, {S,1}, opc, V128, V128, - asm#"2", ".2d", ".4s", []>; -} - -multiclass SIMDFPNarrowTwoVector opc, string asm> { - def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V64, V128, - asm, ".4h", ".4s", []>; - def v8i16 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,0}, opc, V128, V128, - asm#"2", ".8h", ".4s", []>; - def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128, - asm, ".2s", ".2d", []>; - def v4i32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128, - asm#"2", ".4s", ".2d", []>; -} - -multiclass SIMDFPInexactCvtTwoVector opc, string asm, - Intrinsic OpNode> { - def v2f32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128, - asm, ".2s", ".2d", - [(set (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn)))]>; - def v4f32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128, - asm#"2", ".4s", ".2d", []>; - - def : Pat<(concat_vectors (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn))), - (!cast(NAME # "v4f32") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD three register different-size vector instructions. -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDDifferentThreeVector size, bits<4> opcode, - RegisterOperand outtype, RegisterOperand intype1, - RegisterOperand intype2, string asm, - string outkind, string inkind1, string inkind2, - list pattern> - : I<(outs outtype:$Rd), (ins intype1:$Rn, intype2:$Rm), asm, - "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 # - "|" # outkind # "\t$Rd, $Rn, $Rm}", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31} = 0; - let Inst{30} = size{0}; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size{2-1}; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-12} = opcode; - let Inst{11-10} = 0b00; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDDifferentThreeVectorTied size, bits<4> opcode, - RegisterOperand outtype, RegisterOperand intype1, - RegisterOperand intype2, string asm, - string outkind, string inkind1, string inkind2, - list pattern> - : I<(outs outtype:$dst), (ins outtype:$Rd, intype1:$Rn, intype2:$Rm), asm, - "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 # - "|" # outkind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31} = 0; - let Inst{30} = size{0}; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size{2-1}; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-12} = opcode; - let Inst{11-10} = 0b00; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -// FIXME: TableGen doesn't know how to deal with expanded types that also -// change the element count (in this case, placing the results in -// the high elements of the result register rather than the low -// elements). Until that's fixed, we can't code-gen those. -multiclass SIMDNarrowThreeVectorBHS opc, string asm, - Intrinsic IntOp> { - def v8i16_v8i8 : BaseSIMDDifferentThreeVector; - def v8i16_v16i8 : BaseSIMDDifferentThreeVectorTied; - def v4i32_v4i16 : BaseSIMDDifferentThreeVector; - def v4i32_v8i16 : BaseSIMDDifferentThreeVectorTied; - def v2i64_v2i32 : BaseSIMDDifferentThreeVector; - def v2i64_v4i32 : BaseSIMDDifferentThreeVectorTied; - - - // Patterns for the '2' variants involve INSERT_SUBREG, which you can't put in - // a version attached to an instruction. - def : Pat<(concat_vectors (v8i8 V64:$Rd), (IntOp (v8i16 V128:$Rn), - (v8i16 V128:$Rm))), - (!cast(NAME # "v8i16_v16i8") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; - def : Pat<(concat_vectors (v4i16 V64:$Rd), (IntOp (v4i32 V128:$Rn), - (v4i32 V128:$Rm))), - (!cast(NAME # "v4i32_v8i16") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; - def : Pat<(concat_vectors (v2i32 V64:$Rd), (IntOp (v2i64 V128:$Rn), - (v2i64 V128:$Rm))), - (!cast(NAME # "v2i64_v4i32") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -} - -multiclass SIMDDifferentThreeVectorBD opc, string asm, - Intrinsic IntOp> { - def v8i8 : BaseSIMDDifferentThreeVector; - def v16i8 : BaseSIMDDifferentThreeVector; - def v1i64 : BaseSIMDDifferentThreeVector; - def v2i64 : BaseSIMDDifferentThreeVector; - - def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 V128:$Rn)), - (v8i8 (extract_high_v16i8 V128:$Rm)))), - (!cast(NAME#"v16i8") V128:$Rn, V128:$Rm)>; -} - -multiclass SIMDLongThreeVectorHS opc, string asm, - SDPatternOperator OpNode> { - def v4i16_v4i32 : BaseSIMDDifferentThreeVector; - def v8i16_v4i32 : BaseSIMDDifferentThreeVector; - def v2i32_v2i64 : BaseSIMDDifferentThreeVector; - def v4i32_v2i64 : BaseSIMDDifferentThreeVector; -} - -multiclass SIMDLongThreeVectorBHSabdl opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8_v8i16 : BaseSIMDDifferentThreeVector; - def v16i8_v8i16 : BaseSIMDDifferentThreeVector; - def v4i16_v4i32 : BaseSIMDDifferentThreeVector; - def v8i16_v4i32 : BaseSIMDDifferentThreeVector; - def v2i32_v2i64 : BaseSIMDDifferentThreeVector; - def v4i32_v2i64 : BaseSIMDDifferentThreeVector; -} - -multiclass SIMDLongThreeVectorTiedBHSabal opc, - string asm, - SDPatternOperator OpNode> { - def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied; - def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied; - def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied; - def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied; - def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied; - def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied; -} - -multiclass SIMDLongThreeVectorBHS opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8_v8i16 : BaseSIMDDifferentThreeVector; - def v16i8_v8i16 : BaseSIMDDifferentThreeVector; - def v4i16_v4i32 : BaseSIMDDifferentThreeVector; - def v8i16_v4i32 : BaseSIMDDifferentThreeVector; - def v2i32_v2i64 : BaseSIMDDifferentThreeVector; - def v4i32_v2i64 : BaseSIMDDifferentThreeVector; -} - -multiclass SIMDLongThreeVectorTiedBHS opc, - string asm, - SDPatternOperator OpNode> { - def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied; - def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied; - def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied; - def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied; - def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied; - def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied; -} - -multiclass SIMDLongThreeVectorSQDMLXTiedHS opc, string asm, - SDPatternOperator Accum> { - def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied; - def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied; - def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied; - def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied; -} - -multiclass SIMDWideThreeVectorBHS opc, string asm, - SDPatternOperator OpNode> { - def v8i8_v8i16 : BaseSIMDDifferentThreeVector; - def v16i8_v8i16 : BaseSIMDDifferentThreeVector; - def v4i16_v4i32 : BaseSIMDDifferentThreeVector; - def v8i16_v4i32 : BaseSIMDDifferentThreeVector; - def v2i32_v2i64 : BaseSIMDDifferentThreeVector; - def v4i32_v2i64 : BaseSIMDDifferentThreeVector; -} - -//---------------------------------------------------------------------------- -// AdvSIMD bitwise extract from vector -//---------------------------------------------------------------------------- - -class BaseSIMDBitwiseExtract - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, i32imm:$imm), asm, - "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $imm" # - "|" # kind # "\t$Rd, $Rn, $Rm, $imm}", "", - [(set (vty regtype:$Rd), - (ARM64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - bits<4> imm; - let Inst{31} = 0; - let Inst{30} = size; - let Inst{29-21} = 0b101110000; - let Inst{20-16} = Rm; - let Inst{15} = 0; - let Inst{14-11} = imm; - let Inst{10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - - -multiclass SIMDBitwiseExtract { - def v8i8 : BaseSIMDBitwiseExtract<0, V64, v8i8, asm, ".8b">; - def v16i8 : BaseSIMDBitwiseExtract<1, V128, v16i8, asm, ".16b">; -} - -//---------------------------------------------------------------------------- -// AdvSIMD zip vector -//---------------------------------------------------------------------------- - -class BaseSIMDZipVector size, bits<3> opc, RegisterOperand regtype, - string asm, string kind, SDNode OpNode, ValueType valty> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, - "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # - "|" # kind # "\t$Rd, $Rn, $Rm}", "", - [(set (valty regtype:$Rd), (OpNode regtype:$Rn, regtype:$Rm))]>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31} = 0; - let Inst{30} = size{0}; - let Inst{29-24} = 0b001110; - let Inst{23-22} = size{2-1}; - let Inst{21} = 0; - let Inst{20-16} = Rm; - let Inst{15} = 0; - let Inst{14-12} = opc; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDZipVectoropc, string asm, - SDNode OpNode> { - def v8i8 : BaseSIMDZipVector<0b000, opc, V64, - asm, ".8b", OpNode, v8i8>; - def v16i8 : BaseSIMDZipVector<0b001, opc, V128, - asm, ".16b", OpNode, v16i8>; - def v4i16 : BaseSIMDZipVector<0b010, opc, V64, - asm, ".4h", OpNode, v4i16>; - def v8i16 : BaseSIMDZipVector<0b011, opc, V128, - asm, ".8h", OpNode, v8i16>; - def v2i32 : BaseSIMDZipVector<0b100, opc, V64, - asm, ".2s", OpNode, v2i32>; - def v4i32 : BaseSIMDZipVector<0b101, opc, V128, - asm, ".4s", OpNode, v4i32>; - def v2i64 : BaseSIMDZipVector<0b111, opc, V128, - asm, ".2d", OpNode, v2i64>; - - def : Pat<(v2f32 (OpNode V64:$Rn, V64:$Rm)), - (!cast(NAME#"v2i32") V64:$Rn, V64:$Rm)>; - def : Pat<(v4f32 (OpNode V128:$Rn, V128:$Rm)), - (!cast(NAME#"v4i32") V128:$Rn, V128:$Rm)>; - def : Pat<(v2f64 (OpNode V128:$Rn, V128:$Rm)), - (!cast(NAME#"v2i64") V128:$Rn, V128:$Rm)>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD three register scalar instructions -//---------------------------------------------------------------------------- - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDThreeScalar size, bits<5> opcode, - RegisterClass regtype, string asm, - list pattern> - : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, - "\t$Rd, $Rn, $Rm", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-11} = opcode; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDThreeScalarD opc, string asm, - SDPatternOperator OpNode> { - def v1i64 : BaseSIMDThreeScalar; -} - -multiclass SIMDThreeScalarBHSD opc, string asm, - SDPatternOperator OpNode> { - def v1i64 : BaseSIMDThreeScalar; - def v1i32 : BaseSIMDThreeScalar; - def v1i16 : BaseSIMDThreeScalar; - def v1i8 : BaseSIMDThreeScalar; - - def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))), - (!cast(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>; - def : Pat<(i32 (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))), - (!cast(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>; -} - -multiclass SIMDThreeScalarHS opc, string asm, - SDPatternOperator OpNode> { - def v1i32 : BaseSIMDThreeScalar; - def v1i16 : BaseSIMDThreeScalar; -} - -multiclass SIMDThreeScalarSD opc, string asm, - SDPatternOperator OpNode = null_frag> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def #NAME#64 : BaseSIMDThreeScalar; - def #NAME#32 : BaseSIMDThreeScalar; - } - - def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (!cast(NAME # "64") FPR64:$Rn, FPR64:$Rm)>; -} - -multiclass SIMDThreeScalarFPCmp opc, string asm, - SDPatternOperator OpNode = null_frag> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def #NAME#64 : BaseSIMDThreeScalar; - def #NAME#32 : BaseSIMDThreeScalar; - } - - def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (!cast(NAME # "64") FPR64:$Rn, FPR64:$Rm)>; -} - -class BaseSIMDThreeScalarMixed size, bits<5> opcode, - dag oops, dag iops, string asm, string cstr, list pat> - : I, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21} = 1; - let Inst{20-16} = Rm; - let Inst{15-11} = opcode; - let Inst{10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDThreeScalarMixedHS opc, string asm, - SDPatternOperator OpNode = null_frag> { - def i16 : BaseSIMDThreeScalarMixed; - def i32 : BaseSIMDThreeScalarMixed; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDThreeScalarMixedTiedHS opc, string asm, - SDPatternOperator OpNode = null_frag> { - def i16 : BaseSIMDThreeScalarMixed; - def i32 : BaseSIMDThreeScalarMixed; -} - -//---------------------------------------------------------------------------- -// AdvSIMD two register scalar instructions -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDTwoScalar size, bits<5> opcode, - RegisterClass regtype, RegisterClass regtype2, - string asm, list pat> - : I<(outs regtype:$Rd), (ins regtype2:$Rn), asm, - "\t$Rd, $Rn", "", pat>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDTwoScalarTied size, bits<5> opcode, - RegisterClass regtype, RegisterClass regtype2, - string asm, list pat> - : I<(outs regtype:$dst), (ins regtype:$Rd, regtype2:$Rn), asm, - "\t$Rd, $Rn", "$Rd = $dst", pat>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDCmpTwoScalar size, bits<5> opcode, - RegisterClass regtype, string asm> - : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, - "\t$Rd, $Rn, #0", "", []>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21-17} = 0b10000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class SIMDInexactCvtTwoScalar opcode, string asm> - : I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "", - [(set (f32 FPR32:$Rd), (int_arm64_sisd_fcvtxn (f64 FPR64:$Rn)))]>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-17} = 0b011111100110000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDCmpTwoScalarD opc, string asm, - SDPatternOperator OpNode> { - def v1i64rz : BaseSIMDCmpTwoScalar; - - def : Pat<(v1i64 (OpNode FPR64:$Rn)), - (!cast(NAME # v1i64rz) FPR64:$Rn)>; -} - -multiclass SIMDCmpTwoScalarSD opc, string asm, - SDPatternOperator OpNode> { - def v1i64rz : BaseSIMDCmpTwoScalar; - def v1i32rz : BaseSIMDCmpTwoScalar; - - def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))), - (!cast(NAME # v1i64rz) FPR64:$Rn)>; -} - -multiclass SIMDTwoScalarD opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v1i64 : BaseSIMDTwoScalar; - - def : Pat<(i64 (OpNode (i64 FPR64:$Rn))), - (!cast(NAME # "v1i64") FPR64:$Rn)>; -} - -multiclass SIMDTwoScalarSD opc, string asm> { - def v1i64 : BaseSIMDTwoScalar; - def v1i32 : BaseSIMDTwoScalar; -} - -multiclass SIMDTwoScalarCVTSD opc, string asm, - SDPatternOperator OpNode> { - def v1i64 : BaseSIMDTwoScalar; - def v1i32 : BaseSIMDTwoScalar; -} - -multiclass SIMDTwoScalarBHSD opc, string asm, - SDPatternOperator OpNode = null_frag> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def v1i64 : BaseSIMDTwoScalar; - def v1i32 : BaseSIMDTwoScalar; - def v1i16 : BaseSIMDTwoScalar; - def v1i8 : BaseSIMDTwoScalar; - } - - def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))), - (!cast(NAME # v1i64) FPR64:$Rn)>; -} - -multiclass SIMDTwoScalarBHSDTied opc, string asm, - Intrinsic OpNode> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def v1i64 : BaseSIMDTwoScalarTied; - def v1i32 : BaseSIMDTwoScalarTied; - def v1i16 : BaseSIMDTwoScalarTied; - def v1i8 : BaseSIMDTwoScalarTied; - } - - def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn))), - (!cast(NAME # v1i64) FPR64:$Rd, FPR64:$Rn)>; -} - - - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDTwoScalarMixedBHS opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v1i32 : BaseSIMDTwoScalar; - def v1i16 : BaseSIMDTwoScalar; - def v1i8 : BaseSIMDTwoScalar; -} - -//---------------------------------------------------------------------------- -// AdvSIMD scalar pairwise instructions -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDPairwiseScalar size, bits<5> opcode, - RegisterOperand regtype, RegisterOperand vectype, - string asm, string kind> - : I<(outs regtype:$Rd), (ins vectype:$Rn), asm, - "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", []>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21-17} = 0b11000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDPairwiseScalarD opc, string asm> { - def v2i64p : BaseSIMDPairwiseScalar; -} - -multiclass SIMDPairwiseScalarSD opc, string asm> { - def v2i32p : BaseSIMDPairwiseScalar; - def v2i64p : BaseSIMDPairwiseScalar; -} - -//---------------------------------------------------------------------------- -// AdvSIMD across lanes instructions -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDAcrossLanes size, bits<5> opcode, - RegisterClass regtype, RegisterOperand vectype, - string asm, string kind, list pattern> - : I<(outs regtype:$Rd), (ins vectype:$Rn), asm, - "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21-17} = 0b11000; - let Inst{16-12} = opcode; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDAcrossLanesBHS opcode, - string asm> { - def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR8, V64, - asm, ".8b", []>; - def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR8, V128, - asm, ".16b", []>; - def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR16, V64, - asm, ".4h", []>; - def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR16, V128, - asm, ".8h", []>; - def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR32, V128, - asm, ".4s", []>; -} - -multiclass SIMDAcrossLanesHSD opcode, string asm> { - def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR16, V64, - asm, ".8b", []>; - def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR16, V128, - asm, ".16b", []>; - def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR32, V64, - asm, ".4h", []>; - def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR32, V128, - asm, ".8h", []>; - def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR64, V128, - asm, ".4s", []>; -} - -multiclass SIMDAcrossLanesS opcode, bit sz1, string asm, - Intrinsic intOp> { - def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128, - asm, ".4s", - [(set FPR32:$Rd, (intOp (v4f32 V128:$Rn)))]>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD INS/DUP instructions -//---------------------------------------------------------------------------- - -// FIXME: There has got to be a better way to factor these. ugh. - -class BaseSIMDInsDup pattern> - : I, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = op; - let Inst{28-21} = 0b01110000; - let Inst{15} = 0; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class SIMDDupFromMain imm5, string size, ValueType vectype, - RegisterOperand vecreg, RegisterClass regtype> - : BaseSIMDInsDup { - let Inst{20-16} = imm5; - let Inst{14-11} = 0b0001; -} - -class SIMDDupFromElement - : BaseSIMDInsDup { - let Inst{14-11} = 0b0000; -} - -class SIMDDup64FromElement - : SIMDDupFromElement<1, ".2d", ".d", v2i64, v2i64, V128, - VectorIndexD, i64, ARM64duplane64> { - bits<1> idx; - let Inst{20} = idx; - let Inst{19-16} = 0b1000; -} - -class SIMDDup32FromElement - : SIMDDupFromElement { - bits<2> idx; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; -} - -class SIMDDup16FromElement - : SIMDDupFromElement { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; -} - -class SIMDDup8FromElement - : SIMDDupFromElement { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; -} - -class BaseSIMDMov imm4, RegisterClass regtype, - Operand idxtype, string asm, list pattern> - : BaseSIMDInsDup { - let Inst{14-11} = imm4; -} - -class SIMDSMov - : BaseSIMDMov; -class SIMDUMov - : BaseSIMDMov; - -class SIMDMovAlias - : InstAlias; - -multiclass SMov { - def vi8to32 : SIMDSMov<0, ".b", GPR32, VectorIndexB> { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; - } - def vi8to64 : SIMDSMov<1, ".b", GPR64, VectorIndexB> { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; - } - def vi16to32 : SIMDSMov<0, ".h", GPR32, VectorIndexH> { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - } - def vi16to64 : SIMDSMov<1, ".h", GPR64, VectorIndexH> { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - } - def vi32to64 : SIMDSMov<1, ".s", GPR64, VectorIndexS> { - bits<2> idx; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; - } -} - -multiclass UMov { - def vi8 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndexB> { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; - } - def vi16 : SIMDUMov<0, ".h", v8i16, GPR32, VectorIndexH> { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - } - def vi32 : SIMDUMov<0, ".s", v4i32, GPR32, VectorIndexS> { - bits<2> idx; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; - } - def vi64 : SIMDUMov<1, ".d", v2i64, GPR64, VectorIndexD> { - bits<1> idx; - let Inst{20} = idx; - let Inst{19-16} = 0b1000; - } - def : SIMDMovAlias<"mov", ".s", - !cast(NAME#"vi32"), - GPR32, VectorIndexS>; - def : SIMDMovAlias<"mov", ".d", - !cast(NAME#"vi64"), - GPR64, VectorIndexD>; -} - -class SIMDInsFromMain - : BaseSIMDInsDup<1, 0, (outs V128:$dst), - (ins V128:$Rd, idxtype:$idx, regtype:$Rn), "ins", - "{\t$Rd" # size # "$idx, $Rn" # - "|" # size # "\t$Rd$idx, $Rn}", - "$Rd = $dst", - [(set V128:$dst, - (vector_insert (vectype V128:$Rd), regtype:$Rn, idxtype:$idx))]> { - let Inst{14-11} = 0b0011; -} - -class SIMDInsFromElement - : BaseSIMDInsDup<1, 1, (outs V128:$dst), - (ins V128:$Rd, idxtype:$idx, V128:$Rn, idxtype:$idx2), "ins", - "{\t$Rd" # size # "$idx, $Rn" # size # "$idx2" # - "|" # size # "\t$Rd$idx, $Rn$idx2}", - "$Rd = $dst", - [(set V128:$dst, - (vector_insert - (vectype V128:$Rd), - (elttype (vector_extract (vectype V128:$Rn), idxtype:$idx2)), - idxtype:$idx))]>; - -class SIMDInsMainMovAlias - : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # - "|" # size #"\t$dst$idx, $src}", - (inst V128:$dst, idxtype:$idx, regtype:$src)>; -class SIMDInsElementMovAlias - : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # size # "$idx2" # - # "|" # size #" $dst$idx, $src$idx2}", - (inst V128:$dst, idxtype:$idx, V128:$src, idxtype:$idx2)>; - - -multiclass SIMDIns { - def vi8gpr : SIMDInsFromMain<".b", v16i8, GPR32, VectorIndexB> { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; - } - def vi16gpr : SIMDInsFromMain<".h", v8i16, GPR32, VectorIndexH> { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - } - def vi32gpr : SIMDInsFromMain<".s", v4i32, GPR32, VectorIndexS> { - bits<2> idx; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; - } - def vi64gpr : SIMDInsFromMain<".d", v2i64, GPR64, VectorIndexD> { - bits<1> idx; - let Inst{20} = idx; - let Inst{19-16} = 0b1000; - } - - def vi8lane : SIMDInsFromElement<".b", v16i8, i32, VectorIndexB> { - bits<4> idx; - bits<4> idx2; - let Inst{20-17} = idx; - let Inst{16} = 1; - let Inst{14-11} = idx2; - } - def vi16lane : SIMDInsFromElement<".h", v8i16, i32, VectorIndexH> { - bits<3> idx; - bits<3> idx2; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - let Inst{14-12} = idx2; - let Inst{11} = 0; - } - def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> { - bits<2> idx; - bits<2> idx2; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; - let Inst{14-13} = idx2; - let Inst{12-11} = 0; - } - def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> { - bits<1> idx; - bits<1> idx2; - let Inst{20} = idx; - let Inst{19-16} = 0b1000; - let Inst{14} = idx2; - let Inst{13-11} = 0; - } - - // For all forms of the INS instruction, the "mov" mnemonic is the - // preferred alias. Why they didn't just call the instruction "mov" in - // the first place is a very good question indeed... - def : SIMDInsMainMovAlias<".b", !cast(NAME#"vi8gpr"), - GPR32, VectorIndexB>; - def : SIMDInsMainMovAlias<".h", !cast(NAME#"vi16gpr"), - GPR32, VectorIndexH>; - def : SIMDInsMainMovAlias<".s", !cast(NAME#"vi32gpr"), - GPR32, VectorIndexS>; - def : SIMDInsMainMovAlias<".d", !cast(NAME#"vi64gpr"), - GPR64, VectorIndexD>; - - def : SIMDInsElementMovAlias<".b", !cast(NAME#"vi8lane"), - VectorIndexB>; - def : SIMDInsElementMovAlias<".h", !cast(NAME#"vi16lane"), - VectorIndexH>; - def : SIMDInsElementMovAlias<".s", !cast(NAME#"vi32lane"), - VectorIndexS>; - def : SIMDInsElementMovAlias<".d", !cast(NAME#"vi64lane"), - VectorIndexD>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD TBL/TBX -//---------------------------------------------------------------------------- - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDTableLookup len, bit op, RegisterOperand vectype, - RegisterOperand listtype, string asm, string kind> - : I<(outs vectype:$Vd), (ins listtype:$Vn, vectype:$Vm), asm, - "\t$Vd" # kind # ", $Vn, $Vm" # kind, "", []>, - Sched<[WriteV]> { - bits<5> Vd; - bits<5> Vn; - bits<5> Vm; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29-21} = 0b001110000; - let Inst{20-16} = Vm; - let Inst{15} = 0; - let Inst{14-13} = len; - let Inst{12} = op; - let Inst{11-10} = 0b00; - let Inst{9-5} = Vn; - let Inst{4-0} = Vd; -} - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDTableLookupTied len, bit op, RegisterOperand vectype, - RegisterOperand listtype, string asm, string kind> - : I<(outs vectype:$dst), (ins vectype:$Vd, listtype:$Vn, vectype:$Vm), asm, - "\t$Vd" # kind # ", $Vn, $Vm" # kind, "$Vd = $dst", []>, - Sched<[WriteV]> { - bits<5> Vd; - bits<5> Vn; - bits<5> Vm; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29-21} = 0b001110000; - let Inst{20-16} = Vm; - let Inst{15} = 0; - let Inst{14-13} = len; - let Inst{12} = op; - let Inst{11-10} = 0b00; - let Inst{9-5} = Vn; - let Inst{4-0} = Vd; -} - -class SIMDTableLookupAlias - : InstAlias; - -multiclass SIMDTableLookup { - def v8i8One : BaseSIMDTableLookup<0, 0b00, op, V64, VecListOne16b, - asm, ".8b">; - def v8i8Two : BaseSIMDTableLookup<0, 0b01, op, V64, VecListTwo16b, - asm, ".8b">; - def v8i8Three : BaseSIMDTableLookup<0, 0b10, op, V64, VecListThree16b, - asm, ".8b">; - def v8i8Four : BaseSIMDTableLookup<0, 0b11, op, V64, VecListFour16b, - asm, ".8b">; - def v16i8One : BaseSIMDTableLookup<1, 0b00, op, V128, VecListOne16b, - asm, ".16b">; - def v16i8Two : BaseSIMDTableLookup<1, 0b01, op, V128, VecListTwo16b, - asm, ".16b">; - def v16i8Three: BaseSIMDTableLookup<1, 0b10, op, V128, VecListThree16b, - asm, ".16b">; - def v16i8Four : BaseSIMDTableLookup<1, 0b11, op, V128, VecListFour16b, - asm, ".16b">; - - def : SIMDTableLookupAlias(NAME#"v8i8One"), - V64, VecListOne128>; - def : SIMDTableLookupAlias(NAME#"v8i8Two"), - V64, VecListTwo128>; - def : SIMDTableLookupAlias(NAME#"v8i8Three"), - V64, VecListThree128>; - def : SIMDTableLookupAlias(NAME#"v8i8Four"), - V64, VecListFour128>; - def : SIMDTableLookupAlias(NAME#"v16i8One"), - V128, VecListOne128>; - def : SIMDTableLookupAlias(NAME#"v16i8Two"), - V128, VecListTwo128>; - def : SIMDTableLookupAlias(NAME#"v16i8Three"), - V128, VecListThree128>; - def : SIMDTableLookupAlias(NAME#"v16i8Four"), - V128, VecListFour128>; -} - -multiclass SIMDTableLookupTied { - def v8i8One : BaseSIMDTableLookupTied<0, 0b00, op, V64, VecListOne16b, - asm, ".8b">; - def v8i8Two : BaseSIMDTableLookupTied<0, 0b01, op, V64, VecListTwo16b, - asm, ".8b">; - def v8i8Three : BaseSIMDTableLookupTied<0, 0b10, op, V64, VecListThree16b, - asm, ".8b">; - def v8i8Four : BaseSIMDTableLookupTied<0, 0b11, op, V64, VecListFour16b, - asm, ".8b">; - def v16i8One : BaseSIMDTableLookupTied<1, 0b00, op, V128, VecListOne16b, - asm, ".16b">; - def v16i8Two : BaseSIMDTableLookupTied<1, 0b01, op, V128, VecListTwo16b, - asm, ".16b">; - def v16i8Three: BaseSIMDTableLookupTied<1, 0b10, op, V128, VecListThree16b, - asm, ".16b">; - def v16i8Four : BaseSIMDTableLookupTied<1, 0b11, op, V128, VecListFour16b, - asm, ".16b">; - - def : SIMDTableLookupAlias(NAME#"v8i8One"), - V64, VecListOne128>; - def : SIMDTableLookupAlias(NAME#"v8i8Two"), - V64, VecListTwo128>; - def : SIMDTableLookupAlias(NAME#"v8i8Three"), - V64, VecListThree128>; - def : SIMDTableLookupAlias(NAME#"v8i8Four"), - V64, VecListFour128>; - def : SIMDTableLookupAlias(NAME#"v16i8One"), - V128, VecListOne128>; - def : SIMDTableLookupAlias(NAME#"v16i8Two"), - V128, VecListTwo128>; - def : SIMDTableLookupAlias(NAME#"v16i8Three"), - V128, VecListThree128>; - def : SIMDTableLookupAlias(NAME#"v16i8Four"), - V128, VecListFour128>; -} - - -//---------------------------------------------------------------------------- -// AdvSIMD scalar CPY -//---------------------------------------------------------------------------- -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDScalarCPY - : I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), "mov", - "{\t$dst, $src" # kind # "$idx" # - "|\t$dst, $src$idx}", "", []>, - Sched<[WriteV]> { - bits<5> dst; - bits<5> src; - let Inst{31-21} = 0b01011110000; - let Inst{15-10} = 0b000001; - let Inst{9-5} = src; - let Inst{4-0} = dst; -} - -class SIMDScalarCPYAlias - : InstAlias; - - -multiclass SIMDScalarCPY { - def i8 : BaseSIMDScalarCPY { - bits<4> idx; - let Inst{20-17} = idx; - let Inst{16} = 1; - } - def i16 : BaseSIMDScalarCPY { - bits<3> idx; - let Inst{20-18} = idx; - let Inst{17-16} = 0b10; - } - def i32 : BaseSIMDScalarCPY { - bits<2> idx; - let Inst{20-19} = idx; - let Inst{18-16} = 0b100; - } - def i64 : BaseSIMDScalarCPY { - bits<1> idx; - let Inst{20} = idx; - let Inst{19-16} = 0b1000; - } - - // 'DUP' mnemonic aliases. - def : SIMDScalarCPYAlias<"dup", ".b", - !cast(NAME#"i8"), - FPR8, V128, VectorIndexB>; - def : SIMDScalarCPYAlias<"dup", ".h", - !cast(NAME#"i16"), - FPR16, V128, VectorIndexH>; - def : SIMDScalarCPYAlias<"dup", ".s", - !cast(NAME#"i32"), - FPR32, V128, VectorIndexS>; - def : SIMDScalarCPYAlias<"dup", ".d", - !cast(NAME#"i64"), - FPR64, V128, VectorIndexD>; -} - -//---------------------------------------------------------------------------- -// AdvSIMD modified immediate instructions -//---------------------------------------------------------------------------- - -class BaseSIMDModifiedImm pattern> - : I, - Sched<[WriteV]> { - bits<5> Rd; - bits<8> imm8; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = op; - let Inst{28-19} = 0b0111100000; - let Inst{18-16} = imm8{7-5}; - let Inst{11-10} = 0b01; - let Inst{9-5} = imm8{4-0}; - let Inst{4-0} = Rd; -} - -class BaseSIMDModifiedImmVector pattern> - : BaseSIMDModifiedImm { - let DecoderMethod = "DecodeModImmInstruction"; -} - -class BaseSIMDModifiedImmVectorTied pattern> - : BaseSIMDModifiedImm { - let DecoderMethod = "DecodeModImmTiedInstruction"; -} - -class BaseSIMDModifiedImmVectorShift b15_b12, - RegisterOperand vectype, string asm, - string kind, list pattern> - : BaseSIMDModifiedImmVector { - bits<2> shift; - let Inst{15} = b15_b12{1}; - let Inst{14-13} = shift; - let Inst{12} = b15_b12{0}; -} - -class BaseSIMDModifiedImmVectorShiftTied b15_b12, - RegisterOperand vectype, string asm, - string kind, list pattern> - : BaseSIMDModifiedImmVectorTied { - bits<2> shift; - let Inst{15} = b15_b12{1}; - let Inst{14-13} = shift; - let Inst{12} = b15_b12{0}; -} - - -class BaseSIMDModifiedImmVectorShiftHalf b15_b12, - RegisterOperand vectype, string asm, - string kind, list pattern> - : BaseSIMDModifiedImmVector { - bits<2> shift; - let Inst{15} = b15_b12{1}; - let Inst{14} = 0; - let Inst{13} = shift{0}; - let Inst{12} = b15_b12{0}; -} - -class BaseSIMDModifiedImmVectorShiftHalfTied b15_b12, - RegisterOperand vectype, string asm, - string kind, list pattern> - : BaseSIMDModifiedImmVectorTied { - bits<2> shift; - let Inst{15} = b15_b12{1}; - let Inst{14} = 0; - let Inst{13} = shift{0}; - let Inst{12} = b15_b12{0}; -} - -multiclass SIMDModifiedImmVectorShift hw_cmode, bits<2> w_cmode, - string asm> { - def v4i16 : BaseSIMDModifiedImmVectorShiftHalf<0, op, hw_cmode, V64, - asm, ".4h", []>; - def v8i16 : BaseSIMDModifiedImmVectorShiftHalf<1, op, hw_cmode, V128, - asm, ".8h", []>; - - def v2i32 : BaseSIMDModifiedImmVectorShift<0, op, w_cmode, V64, - asm, ".2s", []>; - def v4i32 : BaseSIMDModifiedImmVectorShift<1, op, w_cmode, V128, - asm, ".4s", []>; -} - -multiclass SIMDModifiedImmVectorShiftTied hw_cmode, - bits<2> w_cmode, string asm, - SDNode OpNode> { - def v4i16 : BaseSIMDModifiedImmVectorShiftHalfTied<0, op, hw_cmode, V64, - asm, ".4h", - [(set (v4i16 V64:$dst), (OpNode V64:$Rd, - imm0_255:$imm8, - (i32 imm:$shift)))]>; - def v8i16 : BaseSIMDModifiedImmVectorShiftHalfTied<1, op, hw_cmode, V128, - asm, ".8h", - [(set (v8i16 V128:$dst), (OpNode V128:$Rd, - imm0_255:$imm8, - (i32 imm:$shift)))]>; - - def v2i32 : BaseSIMDModifiedImmVectorShiftTied<0, op, w_cmode, V64, - asm, ".2s", - [(set (v2i32 V64:$dst), (OpNode V64:$Rd, - imm0_255:$imm8, - (i32 imm:$shift)))]>; - def v4i32 : BaseSIMDModifiedImmVectorShiftTied<1, op, w_cmode, V128, - asm, ".4s", - [(set (v4i32 V128:$dst), (OpNode V128:$Rd, - imm0_255:$imm8, - (i32 imm:$shift)))]>; -} - -class SIMDModifiedImmMoveMSL cmode, - RegisterOperand vectype, string asm, - string kind, list pattern> - : BaseSIMDModifiedImmVector { - bits<1> shift; - let Inst{15-13} = cmode{3-1}; - let Inst{12} = shift; -} - -class SIMDModifiedImmVectorNoShift cmode, - RegisterOperand vectype, - Operand imm_type, string asm, - string kind, list pattern> - : BaseSIMDModifiedImmVector { - let Inst{15-12} = cmode; -} - -class SIMDModifiedImmScalarNoShift cmode, string asm, - list pattern> - : BaseSIMDModifiedImm { - let Inst{15-12} = cmode; - let DecoderMethod = "DecodeModImmInstruction"; -} - -//---------------------------------------------------------------------------- -// AdvSIMD indexed element -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDIndexed size, bits<4> opc, - RegisterOperand dst_reg, RegisterOperand lhs_reg, - RegisterOperand rhs_reg, Operand vec_idx, string asm, - string apple_kind, string dst_kind, string lhs_kind, - string rhs_kind, list pattern> - : I<(outs dst_reg:$Rd), (ins lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), - asm, - "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" # - "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28} = Scalar; - let Inst{27-24} = 0b1111; - let Inst{23-22} = size; - // Bit 21 must be set by the derived class. - let Inst{20-16} = Rm; - let Inst{15-12} = opc; - // Bit 11 must be set by the derived class. - let Inst{10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDIndexedTied size, bits<4> opc, - RegisterOperand dst_reg, RegisterOperand lhs_reg, - RegisterOperand rhs_reg, Operand vec_idx, string asm, - string apple_kind, string dst_kind, string lhs_kind, - string rhs_kind, list pattern> - : I<(outs dst_reg:$dst), - (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), asm, - "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" # - "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "$Rd = $dst", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28} = Scalar; - let Inst{27-24} = 0b1111; - let Inst{23-22} = size; - // Bit 21 must be set by the derived class. - let Inst{20-16} = Rm; - let Inst{15-12} = opc; - // Bit 11 must be set by the derived class. - let Inst{10} = 0; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDFPIndexedSD opc, string asm, - SDPatternOperator OpNode> { - def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, - V64, V64, - V128, VectorIndexS, - asm, ".2s", ".2s", ".2s", ".s", - [(set (v2f32 V64:$Rd), - (OpNode (v2f32 V64:$Rn), - (v2f32 (ARM64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm, ".4s", ".4s", ".4s", ".s", - [(set (v4f32 V128:$Rd), - (OpNode (v4f32 V128:$Rn), - (v4f32 (ARM64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v2i64_indexed : BaseSIMDIndexed<1, U, 0, 0b11, opc, - V128, V128, - V128, VectorIndexD, - asm, ".2d", ".2d", ".2d", ".d", - [(set (v2f64 V128:$Rd), - (OpNode (v2f64 V128:$Rn), - (v2f64 (ARM64duplane64 (v2f64 V128:$Rm), VectorIndexD:$idx))))]> { - bits<1> idx; - let Inst{11} = idx{0}; - let Inst{21} = 0; - } - - def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, - FPR32Op, FPR32Op, V128, VectorIndexS, - asm, ".s", "", "", ".s", - [(set (f32 FPR32Op:$Rd), - (OpNode (f32 FPR32Op:$Rn), - (f32 (vector_extract (v4f32 V128:$Rm), - VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b11, opc, - FPR64Op, FPR64Op, V128, VectorIndexD, - asm, ".d", "", "", ".d", - [(set (f64 FPR64Op:$Rd), - (OpNode (f64 FPR64Op:$Rn), - (f64 (vector_extract (v2f64 V128:$Rm), - VectorIndexD:$idx))))]> { - bits<1> idx; - let Inst{11} = idx{0}; - let Inst{21} = 0; - } -} - -multiclass SIMDFPIndexedSDTiedPatterns { - // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar. - def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (ARM64duplane32 (v4f32 V128:$Rm), - VectorIndexS:$idx))), - (!cast(INST # v2i32_indexed) - V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (ARM64dup (f32 FPR32Op:$Rm)))), - (!cast(INST # "v2i32_indexed") V64:$Rd, V64:$Rn, - (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; - - - // 2 variants for the .4s version: DUPLANE from 128-bit and DUP scalar. - def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (ARM64duplane32 (v4f32 V128:$Rm), - VectorIndexS:$idx))), - (!cast(INST # "v4i32_indexed") - V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (ARM64dup (f32 FPR32Op:$Rm)))), - (!cast(INST # "v4i32_indexed") V128:$Rd, V128:$Rn, - (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; - - // 2 variants for the .2d version: DUPLANE from 128-bit and DUP scalar. - def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), - (ARM64duplane64 (v2f64 V128:$Rm), - VectorIndexD:$idx))), - (!cast(INST # "v2i64_indexed") - V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), - (ARM64dup (f64 FPR64Op:$Rm)))), - (!cast(INST # "v2i64_indexed") V128:$Rd, V128:$Rn, - (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; - - // 2 variants for 32-bit scalar version: extract from .2s or from .4s - def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), - (vector_extract (v4f32 V128:$Rm), VectorIndexS:$idx))), - (!cast(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn, - V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), - (vector_extract (v2f32 V64:$Rm), VectorIndexS:$idx))), - (!cast(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn, - (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; - - // 1 variant for 64-bit scalar version: extract from .1d or from .2d - def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), - (vector_extract (v2f64 V128:$Rm), VectorIndexD:$idx))), - (!cast(INST # "v1i64_indexed") FPR64:$Rd, FPR64:$Rn, - V128:$Rm, VectorIndexD:$idx)>; -} - -multiclass SIMDFPIndexedSDTied opc, string asm> { - def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64, - V128, VectorIndexS, - asm, ".2s", ".2s", ".2s", ".s", []> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm, ".4s", ".4s", ".4s", ".s", []> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v2i64_indexed : BaseSIMDIndexedTied<1, U, 0, 0b11, opc, - V128, V128, - V128, VectorIndexD, - asm, ".2d", ".2d", ".2d", ".d", []> { - bits<1> idx; - let Inst{11} = idx{0}; - let Inst{21} = 0; - } - - - def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, - FPR32Op, FPR32Op, V128, VectorIndexS, - asm, ".s", "", "", ".s", []> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b11, opc, - FPR64Op, FPR64Op, V128, VectorIndexD, - asm, ".d", "", "", ".d", []> { - bits<1> idx; - let Inst{11} = idx{0}; - let Inst{21} = 0; - } -} - -multiclass SIMDIndexedHS opc, string asm, - SDPatternOperator OpNode> { - def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, V64, V64, - V128_lo, VectorIndexH, - asm, ".4h", ".4h", ".4h", ".h", - [(set (v4i16 V64:$Rd), - (OpNode (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm, ".8h", ".8h", ".8h", ".h", - [(set (v8i16 V128:$Rd), - (OpNode (v8i16 V128:$Rn), - (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, - V64, V64, - V128, VectorIndexS, - asm, ".2s", ".2s", ".2s", ".s", - [(set (v2i32 V64:$Rd), - (OpNode (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm, ".4s", ".4s", ".4s", ".s", - [(set (v4i32 V128:$Rd), - (OpNode (v4i32 V128:$Rn), - (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc, - FPR16Op, FPR16Op, V128_lo, VectorIndexH, - asm, ".h", "", "", ".h", []> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, - FPR32Op, FPR32Op, V128, VectorIndexS, - asm, ".s", "", "", ".s", - [(set (i32 FPR32Op:$Rd), - (OpNode FPR32Op:$Rn, - (i32 (vector_extract (v4i32 V128:$Rm), - VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } -} - -multiclass SIMDVectorIndexedHS opc, string asm, - SDPatternOperator OpNode> { - def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, - V64, V64, - V128_lo, VectorIndexH, - asm, ".4h", ".4h", ".4h", ".h", - [(set (v4i16 V64:$Rd), - (OpNode (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm, ".8h", ".8h", ".8h", ".h", - [(set (v8i16 V128:$Rd), - (OpNode (v8i16 V128:$Rn), - (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, - V64, V64, - V128, VectorIndexS, - asm, ".2s", ".2s", ".2s", ".s", - [(set (v2i32 V64:$Rd), - (OpNode (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm, ".4s", ".4s", ".4s", ".s", - [(set (v4i32 V128:$Rd), - (OpNode (v4i32 V128:$Rn), - (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } -} - -multiclass SIMDVectorIndexedHSTied opc, string asm, - SDPatternOperator OpNode> { - def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, V64, V64, - V128_lo, VectorIndexH, - asm, ".4h", ".4h", ".4h", ".h", - [(set (v4i16 V64:$dst), - (OpNode (v4i16 V64:$Rd),(v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm, ".8h", ".8h", ".8h", ".h", - [(set (v8i16 V128:$dst), - (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), - (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, - V64, V64, - V128, VectorIndexS, - asm, ".2s", ".2s", ".2s", ".s", - [(set (v2i32 V64:$dst), - (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm, ".4s", ".4s", ".4s", ".s", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), - (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } -} - -multiclass SIMDIndexedLongSD opc, string asm, - SDPatternOperator OpNode> { - def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, - V128, V64, - V128_lo, VectorIndexH, - asm, ".4s", ".4s", ".4h", ".h", - [(set (v4i32 V128:$Rd), - (OpNode (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm#"2", ".4s", ".4s", ".8h", ".h", - [(set (v4i32 V128:$Rd), - (OpNode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))]> { - - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, - V128, V64, - V128, VectorIndexS, - asm, ".2d", ".2d", ".2s", ".s", - [(set (v2i64 V128:$Rd), - (OpNode (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm#"2", ".2d", ".2d", ".4s", ".s", - [(set (v2i64 V128:$Rd), - (OpNode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc, - FPR32Op, FPR16Op, V128_lo, VectorIndexH, - asm, ".h", "", "", ".h", []> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, - FPR64Op, FPR32Op, V128, VectorIndexS, - asm, ".s", "", "", ".s", []> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } -} - -multiclass SIMDIndexedLongSQDMLXSDTied opc, string asm, - SDPatternOperator Accum> { - def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, - V128, V64, - V128_lo, VectorIndexH, - asm, ".4s", ".4s", ".4h", ".h", - [(set (v4i32 V128:$dst), - (Accum (v4i32 V128:$Rd), - (v4i32 (int_arm64_neon_sqdmull - (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - // FIXME: it would be nice to use the scalar (v1i32) instruction here, but an - // intermediate EXTRACT_SUBREG would be untyped. - def : Pat<(i32 (Accum (i32 FPR32Op:$Rd), - (i32 (vector_extract (v4i32 - (int_arm64_neon_sqdmull (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx)))), - (i64 0))))), - (EXTRACT_SUBREG - (!cast(NAME # v4i16_indexed) - (SUBREG_TO_REG (i32 0), FPR32Op:$Rd, ssub), V64:$Rn, - V128_lo:$Rm, VectorIndexH:$idx), - ssub)>; - - def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm#"2", ".4s", ".4s", ".8h", ".h", - [(set (v4i32 V128:$dst), - (Accum (v4i32 V128:$Rd), - (v4i32 (int_arm64_neon_sqdmull - (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 - (ARM64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, - V128, V64, - V128, VectorIndexS, - asm, ".2d", ".2d", ".2s", ".s", - [(set (v2i64 V128:$dst), - (Accum (v2i64 V128:$Rd), - (v2i64 (int_arm64_neon_sqdmull - (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm#"2", ".2d", ".2d", ".4s", ".s", - [(set (v2i64 V128:$dst), - (Accum (v2i64 V128:$Rd), - (v2i64 (int_arm64_neon_sqdmull - (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 - (ARM64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc, - FPR32Op, FPR16Op, V128_lo, VectorIndexH, - asm, ".h", "", "", ".h", []> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - - def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, - FPR64Op, FPR32Op, V128, VectorIndexS, - asm, ".s", "", "", ".s", - [(set (i64 FPR64Op:$dst), - (Accum (i64 FPR64Op:$Rd), - (i64 (int_arm64_neon_sqdmulls_scalar - (i32 FPR32Op:$Rn), - (i32 (vector_extract (v4i32 V128:$Rm), - VectorIndexS:$idx))))))]> { - - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } -} - -multiclass SIMDVectorIndexedLongSD opc, string asm, - SDPatternOperator OpNode> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, - V128, V64, - V128_lo, VectorIndexH, - asm, ".4s", ".4s", ".4h", ".h", - [(set (v4i32 V128:$Rd), - (OpNode (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm#"2", ".4s", ".4s", ".8h", ".h", - [(set (v4i32 V128:$Rd), - (OpNode (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))]> { - - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, - V128, V64, - V128, VectorIndexS, - asm, ".2d", ".2d", ".2s", ".s", - [(set (v2i64 V128:$Rd), - (OpNode (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm#"2", ".2d", ".2d", ".4s", ".s", - [(set (v2i64 V128:$Rd), - (OpNode (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - } -} - -multiclass SIMDVectorIndexedLongSDTied opc, string asm, - SDPatternOperator OpNode> { - let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, - V128, V64, - V128_lo, VectorIndexH, - asm, ".4s", ".4s", ".4h", ".h", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn), - (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc, - V128, V128, - V128_lo, VectorIndexH, - asm#"2", ".4s", ".4s", ".8h", ".h", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), - (extract_high_v8i16 V128:$Rn), - (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), - VectorIndexH:$idx))))]> { - bits<3> idx; - let Inst{11} = idx{2}; - let Inst{21} = idx{1}; - let Inst{20} = idx{0}; - } - - def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, - V128, V64, - V128, VectorIndexS, - asm, ".2d", ".2d", ".2s", ".s", - [(set (v2i64 V128:$dst), - (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn), - (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - - def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc, - V128, V128, - V128, VectorIndexS, - asm#"2", ".2d", ".2d", ".4s", ".s", - [(set (v2i64 V128:$dst), - (OpNode (v2i64 V128:$Rd), - (extract_high_v4i32 V128:$Rn), - (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm), - VectorIndexS:$idx))))]> { - bits<2> idx; - let Inst{11} = idx{1}; - let Inst{21} = idx{0}; - } - } -} - -//---------------------------------------------------------------------------- -// AdvSIMD scalar shift by immediate -//---------------------------------------------------------------------------- - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDScalarShift opc, bits<7> fixed_imm, - RegisterClass regtype1, RegisterClass regtype2, - Operand immtype, string asm, list pattern> - : I<(outs regtype1:$Rd), (ins regtype2:$Rn, immtype:$imm), - asm, "\t$Rd, $Rn, $imm", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<7> imm; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-23} = 0b111110; - let Inst{22-16} = fixed_imm; - let Inst{15-11} = opc; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDScalarShiftTied opc, bits<7> fixed_imm, - RegisterClass regtype1, RegisterClass regtype2, - Operand immtype, string asm, list pattern> - : I<(outs regtype1:$dst), (ins regtype1:$Rd, regtype2:$Rn, immtype:$imm), - asm, "\t$Rd, $Rn, $imm", "$Rd = $dst", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - bits<7> imm; - let Inst{31-30} = 0b01; - let Inst{29} = U; - let Inst{28-23} = 0b111110; - let Inst{22-16} = fixed_imm; - let Inst{15-11} = opc; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - - -multiclass SIMDScalarRShiftSD opc, string asm> { - def s : BaseSIMDScalarShift { - let Inst{20-16} = imm{4-0}; - } - - def d : BaseSIMDScalarShift { - let Inst{21-16} = imm{5-0}; - } -} - -multiclass SIMDScalarRShiftD opc, string asm, - SDPatternOperator OpNode> { - def d : BaseSIMDScalarShift { - let Inst{21-16} = imm{5-0}; - } - - def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftR64:$imm))), - (!cast(NAME # "d") FPR64:$Rn, vecshiftR64:$imm)>; -} - -multiclass SIMDScalarRShiftDTied opc, string asm, - SDPatternOperator OpNode = null_frag> { - def d : BaseSIMDScalarShiftTied { - let Inst{21-16} = imm{5-0}; - } - - def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), - (i32 vecshiftR64:$imm))), - (!cast(NAME # "d") FPR64:$Rd, FPR64:$Rn, - vecshiftR64:$imm)>; -} - -multiclass SIMDScalarLShiftD opc, string asm, - SDPatternOperator OpNode> { - def d : BaseSIMDScalarShift { - let Inst{21-16} = imm{5-0}; - } -} - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -multiclass SIMDScalarLShiftDTied opc, string asm> { - def d : BaseSIMDScalarShiftTied { - let Inst{21-16} = imm{5-0}; - } -} - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -multiclass SIMDScalarRShiftBHS opc, string asm, - SDPatternOperator OpNode = null_frag> { - def b : BaseSIMDScalarShift { - let Inst{18-16} = imm{2-0}; - } - - def h : BaseSIMDScalarShift { - let Inst{19-16} = imm{3-0}; - } - - def s : BaseSIMDScalarShift { - let Inst{20-16} = imm{4-0}; - } -} - -multiclass SIMDScalarLShiftBHSD opc, string asm, - SDPatternOperator OpNode> { - def b : BaseSIMDScalarShift { - let Inst{18-16} = imm{2-0}; - } - - def h : BaseSIMDScalarShift { - let Inst{19-16} = imm{3-0}; - } - - def s : BaseSIMDScalarShift { - let Inst{20-16} = imm{4-0}; - } - - def d : BaseSIMDScalarShift { - let Inst{21-16} = imm{5-0}; - } -} - -multiclass SIMDScalarRShiftBHSD opc, string asm> { - def b : BaseSIMDScalarShift { - let Inst{18-16} = imm{2-0}; - } - - def h : BaseSIMDScalarShift { - let Inst{19-16} = imm{3-0}; - } - - def s : BaseSIMDScalarShift { - let Inst{20-16} = imm{4-0}; - } - - def d : BaseSIMDScalarShift { - let Inst{21-16} = imm{5-0}; - } -} - -//---------------------------------------------------------------------------- -// AdvSIMD vector x indexed element -//---------------------------------------------------------------------------- - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDVectorShift opc, bits<7> fixed_imm, - RegisterOperand dst_reg, RegisterOperand src_reg, - Operand immtype, - string asm, string dst_kind, string src_kind, - list pattern> - : I<(outs dst_reg:$Rd), (ins src_reg:$Rn, immtype:$imm), - asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" # - "|" # dst_kind # "\t$Rd, $Rn, $imm}", "", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-23} = 0b011110; - let Inst{22-16} = fixed_imm; - let Inst{15-11} = opc; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDVectorShiftTied opc, bits<7> fixed_imm, - RegisterOperand vectype1, RegisterOperand vectype2, - Operand immtype, - string asm, string dst_kind, string src_kind, - list pattern> - : I<(outs vectype1:$dst), (ins vectype1:$Rd, vectype2:$Rn, immtype:$imm), - asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" # - "|" # dst_kind # "\t$Rd, $Rn, $imm}", "$Rd = $dst", pattern>, - Sched<[WriteV]> { - bits<5> Rd; - bits<5> Rn; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29} = U; - let Inst{28-23} = 0b011110; - let Inst{22-16} = fixed_imm; - let Inst{15-11} = opc; - let Inst{10} = 1; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -multiclass SIMDVectorRShiftSD opc, string asm, - Intrinsic OpNode> { - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftR32, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 imm:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftR32, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 imm:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftR64, - asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 imm:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -multiclass SIMDVectorRShiftSDToFP opc, string asm, - Intrinsic OpNode> { - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftR32, - asm, ".2s", ".2s", - [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 imm:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftR32, - asm, ".4s", ".4s", - [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 imm:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftR64, - asm, ".2d", ".2d", - [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 imm:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -multiclass SIMDVectorRShiftNarrowBHS opc, string asm, - SDPatternOperator OpNode> { - def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, - V64, V128, vecshiftR16Narrow, - asm, ".8b", ".8h", - [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftR16Narrow, - asm#"2", ".16b", ".8h", []> { - bits<3> imm; - let Inst{18-16} = imm; - let hasSideEffects = 0; - } - - def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, - V64, V128, vecshiftR32Narrow, - asm, ".4h", ".4s", - [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftR32Narrow, - asm#"2", ".8h", ".4s", []> { - bits<4> imm; - let Inst{19-16} = imm; - let hasSideEffects = 0; - } - - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V64, V128, vecshiftR64Narrow, - asm, ".2s", ".2d", - [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftR64Narrow, - asm#"2", ".4s", ".2d", []> { - bits<5> imm; - let Inst{20-16} = imm; - let hasSideEffects = 0; - } - - // TableGen doesn't like patters w/ INSERT_SUBREG on the instructions - // themselves, so put them here instead. - - // Patterns involving what's effectively an insert high and a normal - // intrinsic, represented by CONCAT_VECTORS. - def : Pat<(concat_vectors (v8i8 V64:$Rd),(OpNode (v8i16 V128:$Rn), - vecshiftR16Narrow:$imm)), - (!cast(NAME # "v16i8_shift") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR16Narrow:$imm)>; - def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn), - vecshiftR32Narrow:$imm)), - (!cast(NAME # "v8i16_shift") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR32Narrow:$imm)>; - def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn), - vecshiftR64Narrow:$imm)), - (!cast(NAME # "v4i32_shift") - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR64Narrow:$imm)>; -} - -multiclass SIMDVectorLShiftBHSD opc, string asm, - SDPatternOperator OpNode> { - def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, - V64, V64, vecshiftL8, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), - (i32 vecshiftL8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftL8, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), - (i32 vecshiftL8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, - V64, V64, vecshiftL16, - asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), - (i32 vecshiftL16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftL16, - asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), - (i32 vecshiftL16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftL32, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), - (i32 vecshiftL32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftL32, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), - (i32 vecshiftL32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftL64, - asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), - (i32 vecshiftL64:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -multiclass SIMDVectorRShiftBHSD opc, string asm, - SDPatternOperator OpNode> { - def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, - V64, V64, vecshiftR8, - asm, ".8b", ".8b", - [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), - (i32 vecshiftR8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftR8, - asm, ".16b", ".16b", - [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), - (i32 vecshiftR8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, - V64, V64, vecshiftR16, - asm, ".4h", ".4h", - [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), - (i32 vecshiftR16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftR16, - asm, ".8h", ".8h", - [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), - (i32 vecshiftR16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftR32, - asm, ".2s", ".2s", - [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), - (i32 vecshiftR32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftR32, - asm, ".4s", ".4s", - [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), - (i32 vecshiftR32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftR64, - asm, ".2d", ".2d", - [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), - (i32 vecshiftR64:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDVectorRShiftBHSDTied opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?}, - V64, V64, vecshiftR8, asm, ".8b", ".8b", - [(set (v8i8 V64:$dst), - (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), - (i32 vecshiftR8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftR8, asm, ".16b", ".16b", - [(set (v16i8 V128:$dst), - (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), - (i32 vecshiftR8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?}, - V64, V64, vecshiftR16, asm, ".4h", ".4h", - [(set (v4i16 V64:$dst), - (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), - (i32 vecshiftR16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftR16, asm, ".8h", ".8h", - [(set (v8i16 V128:$dst), - (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), - (i32 vecshiftR16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftR32, asm, ".2s", ".2s", - [(set (v2i32 V64:$dst), - (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), - (i32 vecshiftR32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftR32, asm, ".4s", ".4s", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), - (i32 vecshiftR32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftR64, - asm, ".2d", ".2d", [(set (v2i64 V128:$dst), - (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn), - (i32 vecshiftR64:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -multiclass SIMDVectorLShiftBHSDTied opc, string asm, - SDPatternOperator OpNode = null_frag> { - def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?}, - V64, V64, vecshiftL8, - asm, ".8b", ".8b", - [(set (v8i8 V64:$dst), - (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), - (i32 vecshiftL8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftL8, - asm, ".16b", ".16b", - [(set (v16i8 V128:$dst), - (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), - (i32 vecshiftL8:$imm)))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?}, - V64, V64, vecshiftL16, - asm, ".4h", ".4h", - [(set (v4i16 V64:$dst), - (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), - (i32 vecshiftL16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftL16, - asm, ".8h", ".8h", - [(set (v8i16 V128:$dst), - (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), - (i32 vecshiftL16:$imm)))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?}, - V64, V64, vecshiftL32, - asm, ".2s", ".2s", - [(set (v2i32 V64:$dst), - (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), - (i32 vecshiftL32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftL32, - asm, ".4s", ".4s", - [(set (v4i32 V128:$dst), - (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), - (i32 vecshiftL32:$imm)))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?}, - V128, V128, vecshiftL64, - asm, ".2d", ".2d", - [(set (v2i64 V128:$dst), - (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn), - (i32 vecshiftL64:$imm)))]> { - bits<6> imm; - let Inst{21-16} = imm; - } -} - -multiclass SIMDVectorLShiftLongBHSD opc, string asm, - SDPatternOperator OpNode> { - def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?}, - V128, V64, vecshiftL8, asm, ".8h", ".8b", - [(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), vecshiftL8:$imm))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?}, - V128, V128, vecshiftL8, - asm#"2", ".8h", ".16b", - [(set (v8i16 V128:$Rd), - (OpNode (extract_high_v16i8 V128:$Rn), vecshiftL8:$imm))]> { - bits<3> imm; - let Inst{18-16} = imm; - } - - def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, - V128, V64, vecshiftL16, asm, ".4s", ".4h", - [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), vecshiftL16:$imm))]> { - bits<4> imm; - let Inst{19-16} = imm; - } - - def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, - V128, V128, vecshiftL16, - asm#"2", ".4s", ".8h", - [(set (v4i32 V128:$Rd), - (OpNode (extract_high_v8i16 V128:$Rn), vecshiftL16:$imm))]> { - - bits<4> imm; - let Inst{19-16} = imm; - } - - def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, - V128, V64, vecshiftL32, asm, ".2d", ".2s", - [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), vecshiftL32:$imm))]> { - bits<5> imm; - let Inst{20-16} = imm; - } - - def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?}, - V128, V128, vecshiftL32, - asm#"2", ".2d", ".4s", - [(set (v2i64 V128:$Rd), - (OpNode (extract_high_v4i32 V128:$Rn), vecshiftL32:$imm))]> { - bits<5> imm; - let Inst{20-16} = imm; - } -} - - -//--- -// Vector load/store -//--- -// SIMD ldX/stX no-index memory references don't allow the optional -// ", #0" constant and handle post-indexing explicitly, so we use -// a more specialized parse method for them. Otherwise, it's the same as -// the general am_noindex handling. -def MemorySIMDNoIndexOperand : AsmOperandClass { - let Name = "MemorySIMDNoIndex"; - let ParserMethod = "tryParseNoIndexMemory"; -} -def am_simdnoindex : Operand, - ComplexPattern { - let PrintMethod = "printAMNoIndex"; - let ParserMatchClass = MemorySIMDNoIndexOperand; - let MIOperandInfo = (ops GPR64sp:$base); - let DecoderMethod = "DecodeGPR64spRegisterClass"; -} - -class BaseSIMDLdSt opcode, bits<2> size, - string asm, dag oops, dag iops, list pattern> - : I { - bits<5> Vt; - bits<5> vaddr; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29-23} = 0b0011000; - let Inst{22} = L; - let Inst{21-16} = 0b000000; - let Inst{15-12} = opcode; - let Inst{11-10} = size; - let Inst{9-5} = vaddr; - let Inst{4-0} = Vt; -} - -class BaseSIMDLdStPost opcode, bits<2> size, - string asm, dag oops, dag iops> - : I { - bits<5> Vt; - bits<5> vaddr; - bits<5> Xm; - let Inst{31} = 0; - let Inst{30} = Q; - let Inst{29-23} = 0b0011001; - let Inst{22} = L; - let Inst{21} = 0; - let Inst{20-16} = Xm; - let Inst{15-12} = opcode; - let Inst{11-10} = size; - let Inst{9-5} = vaddr; - let Inst{4-0} = Vt; - let DecoderMethod = "DecodeSIMDLdStPost"; -} - -// The immediate form of AdvSIMD post-indexed addressing is encoded with -// register post-index addressing from the zero register. -multiclass SIMDLdStAliases { - // E.g. "ld1 { v0.8b, v1.8b }, [x1], #16" - // "ld1\t$Vt, $vaddr, #16" - // may get mapped to - // (LD1Twov8b_POST VecListTwo8b:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias(NAME # Count # "v" # layout # "_POST") - !cast("VecList" # Count # layout):$Vt, - am_simdnoindex:$vaddr, XZR), 1>; - - // E.g. "ld1.8b { v0, v1 }, [x1], #16" - // "ld1.8b\t$Vt, $vaddr, #16" - // may get mapped to - // (LD1Twov8b_POST VecListTwo64:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias(NAME # Count # "v" # layout # "_POST") - !cast("VecList" # Count # Size):$Vt, - am_simdnoindex:$vaddr, XZR), 0>; - - // E.g. "ld1.8b { v0, v1 }, [x1]" - // "ld1\t$Vt, $vaddr" - // may get mapped to - // (LD1Twov8b VecListTwo64:$Vt, am_simdnoindex:$vaddr) - def : InstAlias(NAME # Count # "v" # layout) - !cast("VecList" # Count # Size):$Vt, - am_simdnoindex:$vaddr), 0>; - - // E.g. "ld1.8b { v0, v1 }, [x1], x2" - // "ld1\t$Vt, $vaddr, $Xm" - // may get mapped to - // (LD1Twov8b_POST VecListTwo64:$Vt, am_simdnoindex:$vaddr, GPR64pi8:$Xm) - def : InstAlias(NAME # Count # "v" # layout # "_POST") - !cast("VecList" # Count # Size):$Vt, - am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset):$Xm), 0>; -} - -multiclass BaseSIMDLdN opcode> { - let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { - def v16b: BaseSIMDLdSt<1, 1, opcode, 0b00, asm, - (outs !cast(veclist # "16b"):$Vt), - (ins am_simdnoindex:$vaddr), []>; - def v8h : BaseSIMDLdSt<1, 1, opcode, 0b01, asm, - (outs !cast(veclist # "8h"):$Vt), - (ins am_simdnoindex:$vaddr), []>; - def v4s : BaseSIMDLdSt<1, 1, opcode, 0b10, asm, - (outs !cast(veclist # "4s"):$Vt), - (ins am_simdnoindex:$vaddr), []>; - def v2d : BaseSIMDLdSt<1, 1, opcode, 0b11, asm, - (outs !cast(veclist # "2d"):$Vt), - (ins am_simdnoindex:$vaddr), []>; - def v8b : BaseSIMDLdSt<0, 1, opcode, 0b00, asm, - (outs !cast(veclist # "8b"):$Vt), - (ins am_simdnoindex:$vaddr), []>; - def v4h : BaseSIMDLdSt<0, 1, opcode, 0b01, asm, - (outs !cast(veclist # "4h"):$Vt), - (ins am_simdnoindex:$vaddr), []>; - def v2s : BaseSIMDLdSt<0, 1, opcode, 0b10, asm, - (outs !cast(veclist # "2s"):$Vt), - (ins am_simdnoindex:$vaddr), []>; - - - def v16b_POST: BaseSIMDLdStPost<1, 1, opcode, 0b00, asm, - (outs !cast(veclist # "16b"):$Vt), - (ins am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset128):$Xm)>; - def v8h_POST : BaseSIMDLdStPost<1, 1, opcode, 0b01, asm, - (outs !cast(veclist # "8h"):$Vt), - (ins am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset128):$Xm)>; - def v4s_POST : BaseSIMDLdStPost<1, 1, opcode, 0b10, asm, - (outs !cast(veclist # "4s"):$Vt), - (ins am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset128):$Xm)>; - def v2d_POST : BaseSIMDLdStPost<1, 1, opcode, 0b11, asm, - (outs !cast(veclist # "2d"):$Vt), - (ins am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset128):$Xm)>; - def v8b_POST : BaseSIMDLdStPost<0, 1, opcode, 0b00, asm, - (outs !cast(veclist # "8b"):$Vt), - (ins am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset64):$Xm)>; - def v4h_POST : BaseSIMDLdStPost<0, 1, opcode, 0b01, asm, - (outs !cast(veclist # "4h"):$Vt), - (ins am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset64):$Xm)>; - def v2s_POST : BaseSIMDLdStPost<0, 1, opcode, 0b10, asm, - (outs !cast(veclist # "2s"):$Vt), - (ins am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset64):$Xm)>; - } - - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; -} - -// Only ld1/st1 has a v1d version. -multiclass BaseSIMDStN opcode> { - let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in { - def v16b : BaseSIMDLdSt<1, 0, opcode, 0b00, asm, (outs), - (ins !cast(veclist # "16b"):$Vt, - am_simdnoindex:$vaddr), []>; - def v8h : BaseSIMDLdSt<1, 0, opcode, 0b01, asm, (outs), - (ins !cast(veclist # "8h"):$Vt, - am_simdnoindex:$vaddr), []>; - def v4s : BaseSIMDLdSt<1, 0, opcode, 0b10, asm, (outs), - (ins !cast(veclist # "4s"):$Vt, - am_simdnoindex:$vaddr), []>; - def v2d : BaseSIMDLdSt<1, 0, opcode, 0b11, asm, (outs), - (ins !cast(veclist # "2d"):$Vt, - am_simdnoindex:$vaddr), []>; - def v8b : BaseSIMDLdSt<0, 0, opcode, 0b00, asm, (outs), - (ins !cast(veclist # "8b"):$Vt, - am_simdnoindex:$vaddr), []>; - def v4h : BaseSIMDLdSt<0, 0, opcode, 0b01, asm, (outs), - (ins !cast(veclist # "4h"):$Vt, - am_simdnoindex:$vaddr), []>; - def v2s : BaseSIMDLdSt<0, 0, opcode, 0b10, asm, (outs), - (ins !cast(veclist # "2s"):$Vt, - am_simdnoindex:$vaddr), []>; - - def v16b_POST : BaseSIMDLdStPost<1, 0, opcode, 0b00, asm, (outs), - (ins !cast(veclist # "16b"):$Vt, - am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset128):$Xm)>; - def v8h_POST : BaseSIMDLdStPost<1, 0, opcode, 0b01, asm, (outs), - (ins !cast(veclist # "8h"):$Vt, - am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset128):$Xm)>; - def v4s_POST : BaseSIMDLdStPost<1, 0, opcode, 0b10, asm, (outs), - (ins !cast(veclist # "4s"):$Vt, - am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset128):$Xm)>; - def v2d_POST : BaseSIMDLdStPost<1, 0, opcode, 0b11, asm, (outs), - (ins !cast(veclist # "2d"):$Vt, - am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset128):$Xm)>; - def v8b_POST : BaseSIMDLdStPost<0, 0, opcode, 0b00, asm, (outs), - (ins !cast(veclist # "8b"):$Vt, - am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset64):$Xm)>; - def v4h_POST : BaseSIMDLdStPost<0, 0, opcode, 0b01, asm, (outs), - (ins !cast(veclist # "4h"):$Vt, - am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset64):$Xm)>; - def v2s_POST : BaseSIMDLdStPost<0, 0, opcode, 0b10, asm, (outs), - (ins !cast(veclist # "2s"):$Vt, - am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset64):$Xm)>; - } - - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; - defm : SIMDLdStAliases; -} - -multiclass BaseSIMDLd1 opcode> - : BaseSIMDLdN { - - // LD1 instructions have extra "1d" variants. - let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { - def v1d : BaseSIMDLdSt<0, 1, opcode, 0b11, asm, - (outs !cast(veclist # "1d"):$Vt), - (ins am_simdnoindex:$vaddr), []>; - - def v1d_POST : BaseSIMDLdStPost<0, 1, opcode, 0b11, asm, - (outs !cast(veclist # "1d"):$Vt), - (ins am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset64):$Xm)>; - } - - defm : SIMDLdStAliases; -} - -multiclass BaseSIMDSt1 opcode> - : BaseSIMDStN { - - // ST1 instructions have extra "1d" variants. - let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in { - def v1d : BaseSIMDLdSt<0, 0, opcode, 0b11, asm, (outs), - (ins !cast(veclist # "1d"):$Vt, - am_simdnoindex:$vaddr), []>; - - def v1d_POST : BaseSIMDLdStPost<0, 0, opcode, 0b11, asm, (outs), - (ins !cast(veclist # "1d"):$Vt, - am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset64):$Xm)>; - } - - defm : SIMDLdStAliases; -} - -multiclass SIMDLd1Multiple { - defm One : BaseSIMDLd1<"One", asm, "VecListOne", 16, 8, 0b0111>; - defm Two : BaseSIMDLd1<"Two", asm, "VecListTwo", 32, 16, 0b1010>; - defm Three : BaseSIMDLd1<"Three", asm, "VecListThree", 48, 24, 0b0110>; - defm Four : BaseSIMDLd1<"Four", asm, "VecListFour", 64, 32, 0b0010>; -} - -multiclass SIMDSt1Multiple { - defm One : BaseSIMDSt1<"One", asm, "VecListOne", 16, 8, 0b0111>; - defm Two : BaseSIMDSt1<"Two", asm, "VecListTwo", 32, 16, 0b1010>; - defm Three : BaseSIMDSt1<"Three", asm, "VecListThree", 48, 24, 0b0110>; - defm Four : BaseSIMDSt1<"Four", asm, "VecListFour", 64, 32, 0b0010>; -} - -multiclass SIMDLd2Multiple { - defm Two : BaseSIMDLdN<"Two", asm, "VecListTwo", 32, 16, 0b1000>; -} - -multiclass SIMDSt2Multiple { - defm Two : BaseSIMDStN<"Two", asm, "VecListTwo", 32, 16, 0b1000>; -} - -multiclass SIMDLd3Multiple { - defm Three : BaseSIMDLdN<"Three", asm, "VecListThree", 48, 24, 0b0100>; -} - -multiclass SIMDSt3Multiple { - defm Three : BaseSIMDStN<"Three", asm, "VecListThree", 48, 24, 0b0100>; -} - -multiclass SIMDLd4Multiple { - defm Four : BaseSIMDLdN<"Four", asm, "VecListFour", 64, 32, 0b0000>; -} - -multiclass SIMDSt4Multiple { - defm Four : BaseSIMDStN<"Four", asm, "VecListFour", 64, 32, 0b0000>; -} - -//--- -// AdvSIMD Load/store single-element -//--- - -class BaseSIMDLdStSingle opcode, - string asm, string operands, dag oops, dag iops, - list pattern> - : I { - bits<5> Vt; - bits<5> vaddr; - let Inst{31} = 0; - let Inst{29-24} = 0b001101; - let Inst{22} = L; - let Inst{21} = R; - let Inst{15-13} = opcode; - let Inst{9-5} = vaddr; - let Inst{4-0} = Vt; - let DecoderMethod = "DecodeSIMDLdStSingle"; -} - -class BaseSIMDLdStSingleTied opcode, - string asm, string operands, dag oops, dag iops, - list pattern> - : I { - bits<5> Vt; - bits<5> vaddr; - let Inst{31} = 0; - let Inst{29-24} = 0b001101; - let Inst{22} = L; - let Inst{21} = R; - let Inst{15-13} = opcode; - let Inst{9-5} = vaddr; - let Inst{4-0} = Vt; - let DecoderMethod = "DecodeSIMDLdStSingleTied"; -} - - -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDLdR opcode, bit S, bits<2> size, string asm, - Operand listtype> - : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, $vaddr", - (outs listtype:$Vt), (ins am_simdnoindex:$vaddr), []> { - let Inst{30} = Q; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = S; - let Inst{11-10} = size; -} -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDLdRPost opcode, bit S, bits<2> size, - string asm, Operand listtype, Operand GPR64pi> - : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, $vaddr, $Xm", - (outs listtype:$Vt), - (ins am_simdnoindex:$vaddr, GPR64pi:$Xm), []> { - bits<5> Xm; - let Inst{30} = Q; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = S; - let Inst{11-10} = size; -} - -multiclass SIMDLdrAliases { - // E.g. "ld1r { v0.8b }, [x1], #1" - // "ld1r.8b\t$Vt, $vaddr, #1" - // may get mapped to - // (LD1Rv8b_POST VecListOne8b:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias(NAME # "v" # layout # "_POST") - !cast("VecList" # Count # layout):$Vt, - am_simdnoindex:$vaddr, XZR), 1>; - - // E.g. "ld1r.8b { v0 }, [x1], #1" - // "ld1r.8b\t$Vt, $vaddr, #1" - // may get mapped to - // (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias(NAME # "v" # layout # "_POST") - !cast("VecList" # Count # Size):$Vt, - am_simdnoindex:$vaddr, XZR), 0>; - - // E.g. "ld1r.8b { v0 }, [x1]" - // "ld1r.8b\t$Vt, $vaddr" - // may get mapped to - // (LD1Rv8b VecListOne64:$Vt, am_simdnoindex:$vaddr) - def : InstAlias(NAME # "v" # layout) - !cast("VecList" # Count # Size):$Vt, - am_simdnoindex:$vaddr), 0>; - - // E.g. "ld1r.8b { v0 }, [x1], x2" - // "ld1r.8b\t$Vt, $vaddr, $Xm" - // may get mapped to - // (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, GPR64pi1:$Xm) - def : InstAlias(NAME # "v" # layout # "_POST") - !cast("VecList" # Count # Size):$Vt, - am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset):$Xm), 0>; -} - -multiclass SIMDLdR opcode, bit S, string asm, string Count, - int Offset1, int Offset2, int Offset4, int Offset8> { - def v8b : BaseSIMDLdR<0, R, opcode, S, 0b00, asm, - !cast("VecList" # Count # "8b")>; - def v16b: BaseSIMDLdR<1, R, opcode, S, 0b00, asm, - !cast("VecList" # Count #"16b")>; - def v4h : BaseSIMDLdR<0, R, opcode, S, 0b01, asm, - !cast("VecList" # Count #"4h")>; - def v8h : BaseSIMDLdR<1, R, opcode, S, 0b01, asm, - !cast("VecList" # Count #"8h")>; - def v2s : BaseSIMDLdR<0, R, opcode, S, 0b10, asm, - !cast("VecList" # Count #"2s")>; - def v4s : BaseSIMDLdR<1, R, opcode, S, 0b10, asm, - !cast("VecList" # Count #"4s")>; - def v1d : BaseSIMDLdR<0, R, opcode, S, 0b11, asm, - !cast("VecList" # Count #"1d")>; - def v2d : BaseSIMDLdR<1, R, opcode, S, 0b11, asm, - !cast("VecList" # Count #"2d")>; - - def v8b_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b00, asm, - !cast("VecList" # Count # "8b"), - !cast("GPR64pi" # Offset1)>; - def v16b_POST: BaseSIMDLdRPost<1, R, opcode, S, 0b00, asm, - !cast("VecList" # Count # "16b"), - !cast("GPR64pi" # Offset1)>; - def v4h_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b01, asm, - !cast("VecList" # Count # "4h"), - !cast("GPR64pi" # Offset2)>; - def v8h_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b01, asm, - !cast("VecList" # Count # "8h"), - !cast("GPR64pi" # Offset2)>; - def v2s_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b10, asm, - !cast("VecList" # Count # "2s"), - !cast("GPR64pi" # Offset4)>; - def v4s_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b10, asm, - !cast("VecList" # Count # "4s"), - !cast("GPR64pi" # Offset4)>; - def v1d_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b11, asm, - !cast("VecList" # Count # "1d"), - !cast("GPR64pi" # Offset8)>; - def v2d_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b11, asm, - !cast("VecList" # Count # "2d"), - !cast("GPR64pi" # Offset8)>; - - defm : SIMDLdrAliases; - defm : SIMDLdrAliases; - defm : SIMDLdrAliases; - defm : SIMDLdrAliases; - defm : SIMDLdrAliases; - defm : SIMDLdrAliases; - defm : SIMDLdrAliases; - defm : SIMDLdrAliases; -} - -class SIMDLdStSingleB opcode, string asm, - dag oops, dag iops, list pattern> - : BaseSIMDLdStSingle { - // idx encoded in Q:S:size fields. - bits<4> idx; - let Inst{30} = idx{3}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{2}; - let Inst{11-10} = idx{1-0}; -} -class SIMDLdStSingleBTied opcode, string asm, - dag oops, dag iops, list pattern> - : BaseSIMDLdStSingleTied { - // idx encoded in Q:S:size fields. - bits<4> idx; - let Inst{30} = idx{3}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{2}; - let Inst{11-10} = idx{1-0}; -} -class SIMDLdStSingleBPost opcode, string asm, - dag oops, dag iops> - : BaseSIMDLdStSingle { - // idx encoded in Q:S:size fields. - bits<4> idx; - bits<5> Xm; - let Inst{30} = idx{3}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{2}; - let Inst{11-10} = idx{1-0}; -} -class SIMDLdStSingleBTiedPost opcode, string asm, - dag oops, dag iops> - : BaseSIMDLdStSingleTied { - // idx encoded in Q:S:size fields. - bits<4> idx; - bits<5> Xm; - let Inst{30} = idx{3}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{2}; - let Inst{11-10} = idx{1-0}; -} - -class SIMDLdStSingleH opcode, bit size, string asm, - dag oops, dag iops, list pattern> - : BaseSIMDLdStSingle { - // idx encoded in Q:S:size<1> fields. - bits<3> idx; - let Inst{30} = idx{2}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{1}; - let Inst{11} = idx{0}; - let Inst{10} = size; -} -class SIMDLdStSingleHTied opcode, bit size, string asm, - dag oops, dag iops, list pattern> - : BaseSIMDLdStSingleTied { - // idx encoded in Q:S:size<1> fields. - bits<3> idx; - let Inst{30} = idx{2}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{1}; - let Inst{11} = idx{0}; - let Inst{10} = size; -} - -class SIMDLdStSingleHPost opcode, bit size, string asm, - dag oops, dag iops> - : BaseSIMDLdStSingle { - // idx encoded in Q:S:size<1> fields. - bits<3> idx; - bits<5> Xm; - let Inst{30} = idx{2}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{1}; - let Inst{11} = idx{0}; - let Inst{10} = size; -} -class SIMDLdStSingleHTiedPost opcode, bit size, string asm, - dag oops, dag iops> - : BaseSIMDLdStSingleTied { - // idx encoded in Q:S:size<1> fields. - bits<3> idx; - bits<5> Xm; - let Inst{30} = idx{2}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{1}; - let Inst{11} = idx{0}; - let Inst{10} = size; -} -class SIMDLdStSingleS opcode, bits<2> size, string asm, - dag oops, dag iops, list pattern> - : BaseSIMDLdStSingle { - // idx encoded in Q:S fields. - bits<2> idx; - let Inst{30} = idx{1}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{0}; - let Inst{11-10} = size; -} -class SIMDLdStSingleSTied opcode, bits<2> size, string asm, - dag oops, dag iops, list pattern> - : BaseSIMDLdStSingleTied { - // idx encoded in Q:S fields. - bits<2> idx; - let Inst{30} = idx{1}; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = idx{0}; - let Inst{11-10} = size; -} -class SIMDLdStSingleSPost opcode, bits<2> size, - string asm, dag oops, dag iops> - : BaseSIMDLdStSingle { - // idx encoded in Q:S fields. - bits<2> idx; - bits<5> Xm; - let Inst{30} = idx{1}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{0}; - let Inst{11-10} = size; -} -class SIMDLdStSingleSTiedPost opcode, bits<2> size, - string asm, dag oops, dag iops> - : BaseSIMDLdStSingleTied { - // idx encoded in Q:S fields. - bits<2> idx; - bits<5> Xm; - let Inst{30} = idx{1}; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = idx{0}; - let Inst{11-10} = size; -} -class SIMDLdStSingleD opcode, bits<2> size, string asm, - dag oops, dag iops, list pattern> - : BaseSIMDLdStSingle { - // idx encoded in Q field. - bits<1> idx; - let Inst{30} = idx; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = 0; - let Inst{11-10} = size; -} -class SIMDLdStSingleDTied opcode, bits<2> size, string asm, - dag oops, dag iops, list pattern> - : BaseSIMDLdStSingleTied { - // idx encoded in Q field. - bits<1> idx; - let Inst{30} = idx; - let Inst{23} = 0; - let Inst{20-16} = 0b00000; - let Inst{12} = 0; - let Inst{11-10} = size; -} -class SIMDLdStSingleDPost opcode, bits<2> size, - string asm, dag oops, dag iops> - : BaseSIMDLdStSingle { - // idx encoded in Q field. - bits<1> idx; - bits<5> Xm; - let Inst{30} = idx; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = 0; - let Inst{11-10} = size; -} -class SIMDLdStSingleDTiedPost opcode, bits<2> size, - string asm, dag oops, dag iops> - : BaseSIMDLdStSingleTied { - // idx encoded in Q field. - bits<1> idx; - bits<5> Xm; - let Inst{30} = idx; - let Inst{23} = 1; - let Inst{20-16} = Xm; - let Inst{12} = 0; - let Inst{11-10} = size; -} - -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDLdSingleBTied opcode, string asm, - RegisterOperand listtype, - RegisterOperand GPR64pi> { - def i8 : SIMDLdStSingleBTied<1, R, opcode, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexB:$idx, - am_simdnoindex:$vaddr), []>; - - def i8_POST : SIMDLdStSingleBTiedPost<1, R, opcode, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexB:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; -} -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDLdSingleHTied opcode, bit size, string asm, - RegisterOperand listtype, - RegisterOperand GPR64pi> { - def i16 : SIMDLdStSingleHTied<1, R, opcode, size, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexH:$idx, - am_simdnoindex:$vaddr), []>; - - def i16_POST : SIMDLdStSingleHTiedPost<1, R, opcode, size, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexH:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; -} -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDLdSingleSTied opcode, bits<2> size,string asm, - RegisterOperand listtype, - RegisterOperand GPR64pi> { - def i32 : SIMDLdStSingleSTied<1, R, opcode, size, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexS:$idx, - am_simdnoindex:$vaddr), []>; - - def i32_POST : SIMDLdStSingleSTiedPost<1, R, opcode, size, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexS:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; -} -let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in -multiclass SIMDLdSingleDTied opcode, bits<2> size, string asm, - RegisterOperand listtype, RegisterOperand GPR64pi> { - def i64 : SIMDLdStSingleDTied<1, R, opcode, size, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexD:$idx, - am_simdnoindex:$vaddr), []>; - - def i64_POST : SIMDLdStSingleDTiedPost<1, R, opcode, size, asm, - (outs listtype:$dst), - (ins listtype:$Vt, VectorIndexD:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; -} -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -multiclass SIMDStSingleB opcode, string asm, - RegisterOperand listtype, RegisterOperand GPR64pi> { - def i8 : SIMDLdStSingleB<0, R, opcode, asm, - (outs), (ins listtype:$Vt, VectorIndexB:$idx, - am_simdnoindex:$vaddr), []>; - - def i8_POST : SIMDLdStSingleBPost<0, R, opcode, asm, - (outs), (ins listtype:$Vt, VectorIndexB:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; -} -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -multiclass SIMDStSingleH opcode, bit size, string asm, - RegisterOperand listtype, RegisterOperand GPR64pi> { - def i16 : SIMDLdStSingleH<0, R, opcode, size, asm, - (outs), (ins listtype:$Vt, VectorIndexH:$idx, - am_simdnoindex:$vaddr), []>; - - def i16_POST : SIMDLdStSingleHPost<0, R, opcode, size, asm, - (outs), (ins listtype:$Vt, VectorIndexH:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; -} -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -multiclass SIMDStSingleS opcode, bits<2> size,string asm, - RegisterOperand listtype, RegisterOperand GPR64pi> { - def i32 : SIMDLdStSingleS<0, R, opcode, size, asm, - (outs), (ins listtype:$Vt, VectorIndexS:$idx, - am_simdnoindex:$vaddr), []>; - - def i32_POST : SIMDLdStSingleSPost<0, R, opcode, size, asm, - (outs), (ins listtype:$Vt, VectorIndexS:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; -} -let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in -multiclass SIMDStSingleD opcode, bits<2> size, string asm, - RegisterOperand listtype, RegisterOperand GPR64pi> { - def i64 : SIMDLdStSingleD<0, R, opcode, size, asm, - (outs), (ins listtype:$Vt, VectorIndexD:$idx, - am_simdnoindex:$vaddr), []>; - - def i64_POST : SIMDLdStSingleDPost<0, R, opcode, size, asm, - (outs), (ins listtype:$Vt, VectorIndexD:$idx, - am_simdnoindex:$vaddr, GPR64pi:$Xm)>; -} - -multiclass SIMDLdStSingleAliases { - // E.g. "ld1 { v0.8b }[0], [x1], #1" - // "ld1\t$Vt, $vaddr, #1" - // may get mapped to - // (LD1Rv8b_POST VecListOne8b:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias(NAME # Type # "_POST") - !cast("VecList" # Count # layout):$Vt, - idxtype:$idx, am_simdnoindex:$vaddr, XZR), 1>; - - // E.g. "ld1.8b { v0 }[0], [x1], #1" - // "ld1.8b\t$Vt, $vaddr, #1" - // may get mapped to - // (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, XZR) - def : InstAlias(NAME # Type # "_POST") - !cast("VecList" # Count # "128"):$Vt, - idxtype:$idx, am_simdnoindex:$vaddr, XZR), 0>; - - // E.g. "ld1.8b { v0 }[0], [x1]" - // "ld1.8b\t$Vt, $vaddr" - // may get mapped to - // (LD1Rv8b VecListOne64:$Vt, am_simdnoindex:$vaddr) - def : InstAlias(NAME # Type) - !cast("VecList" # Count # "128"):$Vt, - idxtype:$idx, am_simdnoindex:$vaddr), 0>; - - // E.g. "ld1.8b { v0 }[0], [x1], x2" - // "ld1.8b\t$Vt, $vaddr, $Xm" - // may get mapped to - // (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, GPR64pi1:$Xm) - def : InstAlias(NAME # Type # "_POST") - !cast("VecList" # Count # "128"):$Vt, - idxtype:$idx, am_simdnoindex:$vaddr, - !cast("GPR64pi" # Offset):$Xm), 0>; -} - -multiclass SIMDLdSt1SingleAliases { - defm : SIMDLdStSingleAliases; - defm : SIMDLdStSingleAliases; - defm : SIMDLdStSingleAliases; - defm : SIMDLdStSingleAliases; -} - -multiclass SIMDLdSt2SingleAliases { - defm : SIMDLdStSingleAliases; - defm : SIMDLdStSingleAliases; - defm : SIMDLdStSingleAliases; - defm : SIMDLdStSingleAliases; -} - -multiclass SIMDLdSt3SingleAliases { - defm : SIMDLdStSingleAliases; - defm : SIMDLdStSingleAliases; - defm : SIMDLdStSingleAliases; - defm : SIMDLdStSingleAliases; -} - -multiclass SIMDLdSt4SingleAliases { - defm : SIMDLdStSingleAliases; - defm : SIMDLdStSingleAliases; - defm : SIMDLdStSingleAliases; - defm : SIMDLdStSingleAliases; -} - -//---------------------------------------------------------------------------- -// Crypto extensions -//---------------------------------------------------------------------------- - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class AESBase opc, string asm, dag outs, dag ins, string cstr, - list pat> - : I, - Sched<[WriteV]>{ - bits<5> Rd; - bits<5> Rn; - let Inst{31-16} = 0b0100111000101000; - let Inst{15-12} = opc; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class AESInst opc, string asm, Intrinsic OpNode> - : AESBase; - -class AESTiedInst opc, string asm, Intrinsic OpNode> - : AESBase; - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class SHA3OpTiedInst opc, string asm, string dst_lhs_kind, - dag oops, dag iops, list pat> - : I, - Sched<[WriteV]>{ - bits<5> Rd; - bits<5> Rn; - bits<5> Rm; - let Inst{31-21} = 0b01011110000; - let Inst{20-16} = Rm; - let Inst{15} = 0; - let Inst{14-12} = opc; - let Inst{11-10} = 0b00; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class SHATiedInstQSV opc, string asm, Intrinsic OpNode> - : SHA3OpTiedInst; - -class SHATiedInstVVV opc, string asm, Intrinsic OpNode> - : SHA3OpTiedInst; - -class SHATiedInstQQV opc, string asm, Intrinsic OpNode> - : SHA3OpTiedInst; - -let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class SHA2OpInst opc, string asm, string kind, - string cstr, dag oops, dag iops, - list pat> - : I, - Sched<[WriteV]>{ - bits<5> Rd; - bits<5> Rn; - let Inst{31-16} = 0b0101111000101000; - let Inst{15-12} = opc; - let Inst{11-10} = 0b10; - let Inst{9-5} = Rn; - let Inst{4-0} = Rd; -} - -class SHATiedInstVV opc, string asm, Intrinsic OpNode> - : SHA2OpInst; - -class SHAInstSS opc, string asm, Intrinsic OpNode> - : SHA2OpInst; - -// Allow the size specifier tokens to be upper case, not just lower. -def : TokenAlias<".8B", ".8b">; -def : TokenAlias<".4H", ".4h">; -def : TokenAlias<".2S", ".2s">; -def : TokenAlias<".1D", ".1d">; -def : TokenAlias<".16B", ".16b">; -def : TokenAlias<".8H", ".8h">; -def : TokenAlias<".4S", ".4s">; -def : TokenAlias<".2D", ".2d">; -def : TokenAlias<".B", ".b">; -def : TokenAlias<".H", ".h">; -def : TokenAlias<".S", ".s">; -def : TokenAlias<".D", ".d">; diff --git a/lib/Target/ARM64/ARM64InstrInfo.cpp b/lib/Target/ARM64/ARM64InstrInfo.cpp deleted file mode 100644 index 8f11757..0000000 --- a/lib/Target/ARM64/ARM64InstrInfo.cpp +++ /dev/null @@ -1,1864 +0,0 @@ -//===- ARM64InstrInfo.cpp - ARM64 Instruction Information -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the ARM64 implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#include "ARM64InstrInfo.h" -#include "ARM64Subtarget.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/MC/MCInst.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_INSTRINFO_CTOR_DTOR -#include "ARM64GenInstrInfo.inc" - -using namespace llvm; - -ARM64InstrInfo::ARM64InstrInfo(const ARM64Subtarget &STI) - : ARM64GenInstrInfo(ARM64::ADJCALLSTACKDOWN, ARM64::ADJCALLSTACKUP), - RI(this, &STI), Subtarget(STI) {} - -/// GetInstSize - Return the number of bytes of code the specified -/// instruction may be. This returns the maximum number of bytes. -unsigned ARM64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { - const MCInstrDesc &Desc = MI->getDesc(); - - switch (Desc.getOpcode()) { - default: - // Anything not explicitly designated otherwise is a nomal 4-byte insn. - return 4; - case TargetOpcode::DBG_VALUE: - case TargetOpcode::EH_LABEL: - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - return 0; - } - - llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size"); -} - -static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, - SmallVectorImpl &Cond) { - // Block ends with fall-through condbranch. - switch (LastInst->getOpcode()) { - default: - llvm_unreachable("Unknown branch instruction?"); - case ARM64::Bcc: - Target = LastInst->getOperand(1).getMBB(); - Cond.push_back(LastInst->getOperand(0)); - break; - case ARM64::CBZW: - case ARM64::CBZX: - case ARM64::CBNZW: - case ARM64::CBNZX: - Target = LastInst->getOperand(1).getMBB(); - Cond.push_back(MachineOperand::CreateImm(-1)); - Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); - Cond.push_back(LastInst->getOperand(0)); - break; - case ARM64::TBZ: - case ARM64::TBNZ: - Target = LastInst->getOperand(2).getMBB(); - Cond.push_back(MachineOperand::CreateImm(-1)); - Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); - Cond.push_back(LastInst->getOperand(0)); - Cond.push_back(LastInst->getOperand(1)); - } -} - -// Branch analysis. -bool ARM64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, - MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const { - // If the block has no terminators, it just falls into the block after it. - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) - return false; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } - if (!isUnpredicatedTerminator(I)) - return false; - - // Get the last instruction in the block. - MachineInstr *LastInst = I; - - // If there is only one terminator instruction, process it. - unsigned LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (isUncondBranchOpcode(LastOpc)) { - TBB = LastInst->getOperand(0).getMBB(); - return false; - } - if (isCondBranchOpcode(LastOpc)) { - // Block ends with fall-through condbranch. - parseCondBranch(LastInst, TBB, Cond); - return false; - } - return true; // Can't handle indirect branch. - } - - // Get the instruction before it if it is a terminator. - MachineInstr *SecondLastInst = I; - unsigned SecondLastOpc = SecondLastInst->getOpcode(); - - // If AllowModify is true and the block ends with two or more unconditional - // branches, delete all but the first unconditional branch. - if (AllowModify && isUncondBranchOpcode(LastOpc)) { - while (isUncondBranchOpcode(SecondLastOpc)) { - LastInst->eraseFromParent(); - LastInst = SecondLastInst; - LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - // Return now the only terminator is an unconditional branch. - TBB = LastInst->getOperand(0).getMBB(); - return false; - } else { - SecondLastInst = I; - SecondLastOpc = SecondLastInst->getOpcode(); - } - } - } - - // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) - return true; - - // If the block ends with a B and a Bcc, handle it. - if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - parseCondBranch(SecondLastInst, TBB, Cond); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } - - // If the block ends with two unconditional branches, handle it. The second - // one is not executed, so remove it. - if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return false; - } - - // ...likewise if it ends with an indirect branch followed by an unconditional - // branch. - if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return true; - } - - // Otherwise, can't handle this. - return true; -} - -bool ARM64InstrInfo::ReverseBranchCondition( - SmallVectorImpl &Cond) const { - if (Cond[0].getImm() != -1) { - // Regular Bcc - ARM64CC::CondCode CC = (ARM64CC::CondCode)(int)Cond[0].getImm(); - Cond[0].setImm(ARM64CC::getInvertedCondCode(CC)); - } else { - // Folded compare-and-branch - switch (Cond[1].getImm()) { - default: - llvm_unreachable("Unknown conditional branch!"); - case ARM64::CBZW: - Cond[1].setImm(ARM64::CBNZW); - break; - case ARM64::CBNZW: - Cond[1].setImm(ARM64::CBZW); - break; - case ARM64::CBZX: - Cond[1].setImm(ARM64::CBNZX); - break; - case ARM64::CBNZX: - Cond[1].setImm(ARM64::CBZX); - break; - case ARM64::TBZ: - Cond[1].setImm(ARM64::TBNZ); - break; - case ARM64::TBNZ: - Cond[1].setImm(ARM64::TBZ); - break; - } - } - - return false; -} - -unsigned ARM64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) - return 0; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return 0; - --I; - } - if (!isUncondBranchOpcode(I->getOpcode()) && - !isCondBranchOpcode(I->getOpcode())) - return 0; - - // Remove the branch. - I->eraseFromParent(); - - I = MBB.end(); - - if (I == MBB.begin()) - return 1; - --I; - if (!isCondBranchOpcode(I->getOpcode())) - return 1; - - // Remove the branch. - I->eraseFromParent(); - return 2; -} - -void ARM64InstrInfo::instantiateCondBranch( - MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB, - const SmallVectorImpl &Cond) const { - if (Cond[0].getImm() != -1) { - // Regular Bcc - BuildMI(&MBB, DL, get(ARM64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB); - } else { - // Folded compare-and-branch - const MachineInstrBuilder MIB = - BuildMI(&MBB, DL, get(Cond[1].getImm())).addReg(Cond[2].getReg()); - if (Cond.size() > 3) - MIB.addImm(Cond[3].getImm()); - MIB.addMBB(TBB); - } -} - -unsigned ARM64InstrInfo::InsertBranch( - MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, DebugLoc DL) const { - // Shouldn't be a fall through. - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - - if (FBB == 0) { - if (Cond.empty()) // Unconditional branch? - BuildMI(&MBB, DL, get(ARM64::B)).addMBB(TBB); - else - instantiateCondBranch(MBB, DL, TBB, Cond); - return 1; - } - - // Two-way conditional branch. - instantiateCondBranch(MBB, DL, TBB, Cond); - BuildMI(&MBB, DL, get(ARM64::B)).addMBB(FBB); - return 2; -} - -// Find the original register that VReg is copied from. -static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { - while (TargetRegisterInfo::isVirtualRegister(VReg)) { - const MachineInstr *DefMI = MRI.getVRegDef(VReg); - if (!DefMI->isFullCopy()) - return VReg; - VReg = DefMI->getOperand(1).getReg(); - } - return VReg; -} - -// Determine if VReg is defined by an instruction that can be folded into a -// csel instruction. If so, return the folded opcode, and the replacement -// register. -static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, - unsigned *NewVReg = 0) { - VReg = removeCopies(MRI, VReg); - if (!TargetRegisterInfo::isVirtualRegister(VReg)) - return 0; - - bool Is64Bit = ARM64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); - const MachineInstr *DefMI = MRI.getVRegDef(VReg); - unsigned Opc = 0; - unsigned SrcOpNum = 0; - switch (DefMI->getOpcode()) { - case ARM64::ADDSXri: - case ARM64::ADDSWri: - // if CPSR is used, do not fold. - if (DefMI->findRegisterDefOperandIdx(ARM64::CPSR, true) == -1) - return 0; - // fall-through to ADDXri and ADDWri. - case ARM64::ADDXri: - case ARM64::ADDWri: - // add x, 1 -> csinc. - if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || - DefMI->getOperand(3).getImm() != 0) - return 0; - SrcOpNum = 1; - Opc = Is64Bit ? ARM64::CSINCXr : ARM64::CSINCWr; - break; - - case ARM64::ORNXrr: - case ARM64::ORNWrr: { - // not x -> csinv, represented as orn dst, xzr, src. - unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); - if (ZReg != ARM64::XZR && ZReg != ARM64::WZR) - return 0; - SrcOpNum = 2; - Opc = Is64Bit ? ARM64::CSINVXr : ARM64::CSINVWr; - break; - } - - case ARM64::SUBSXrr: - case ARM64::SUBSWrr: - // if CPSR is used, do not fold. - if (DefMI->findRegisterDefOperandIdx(ARM64::CPSR, true) == -1) - return 0; - // fall-through to SUBXrr and SUBWrr. - case ARM64::SUBXrr: - case ARM64::SUBWrr: { - // neg x -> csneg, represented as sub dst, xzr, src. - unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); - if (ZReg != ARM64::XZR && ZReg != ARM64::WZR) - return 0; - SrcOpNum = 2; - Opc = Is64Bit ? ARM64::CSNEGXr : ARM64::CSNEGWr; - break; - } - default: - return 0; - } - assert(Opc && SrcOpNum && "Missing parameters"); - - if (NewVReg) - *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); - return Opc; -} - -bool ARM64InstrInfo::canInsertSelect( - const MachineBasicBlock &MBB, const SmallVectorImpl &Cond, - unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, - int &FalseCycles) const { - // Check register classes. - const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - const TargetRegisterClass *RC = - RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg)); - if (!RC) - return false; - - // Expanding cbz/tbz requires an extra cycle of latency on the condition. - unsigned ExtraCondLat = Cond.size() != 1; - - // GPRs are handled by csel. - // FIXME: Fold in x+1, -x, and ~x when applicable. - if (ARM64::GPR64allRegClass.hasSubClassEq(RC) || - ARM64::GPR32allRegClass.hasSubClassEq(RC)) { - // Single-cycle csel, csinc, csinv, and csneg. - CondCycles = 1 + ExtraCondLat; - TrueCycles = FalseCycles = 1; - if (canFoldIntoCSel(MRI, TrueReg)) - TrueCycles = 0; - else if (canFoldIntoCSel(MRI, FalseReg)) - FalseCycles = 0; - return true; - } - - // Scalar floating point is handled by fcsel. - // FIXME: Form fabs, fmin, and fmax when applicable. - if (ARM64::FPR64RegClass.hasSubClassEq(RC) || - ARM64::FPR32RegClass.hasSubClassEq(RC)) { - CondCycles = 5 + ExtraCondLat; - TrueCycles = FalseCycles = 2; - return true; - } - - // Can't do vectors. - return false; -} - -void ARM64InstrInfo::insertSelect(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DstReg, - const SmallVectorImpl &Cond, - unsigned TrueReg, unsigned FalseReg) const { - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - - // Parse the condition code, see parseCondBranch() above. - ARM64CC::CondCode CC; - switch (Cond.size()) { - default: - llvm_unreachable("Unknown condition opcode in Cond"); - case 1: // b.cc - CC = ARM64CC::CondCode(Cond[0].getImm()); - break; - case 3: { // cbz/cbnz - // We must insert a compare against 0. - bool Is64Bit; - switch (Cond[1].getImm()) { - default: - llvm_unreachable("Unknown branch opcode in Cond"); - case ARM64::CBZW: - Is64Bit = 0; - CC = ARM64CC::EQ; - break; - case ARM64::CBZX: - Is64Bit = 1; - CC = ARM64CC::EQ; - break; - case ARM64::CBNZW: - Is64Bit = 0; - CC = ARM64CC::NE; - break; - case ARM64::CBNZX: - Is64Bit = 1; - CC = ARM64CC::NE; - break; - } - unsigned SrcReg = Cond[2].getReg(); - if (Is64Bit) { - // cmp reg, #0 is actually subs xzr, reg, #0. - MRI.constrainRegClass(SrcReg, &ARM64::GPR64spRegClass); - BuildMI(MBB, I, DL, get(ARM64::SUBSXri), ARM64::XZR) - .addReg(SrcReg) - .addImm(0) - .addImm(0); - } else { - MRI.constrainRegClass(SrcReg, &ARM64::GPR32spRegClass); - BuildMI(MBB, I, DL, get(ARM64::SUBSWri), ARM64::WZR) - .addReg(SrcReg) - .addImm(0) - .addImm(0); - } - break; - } - case 4: { // tbz/tbnz - // We must insert a tst instruction. - switch (Cond[1].getImm()) { - default: - llvm_unreachable("Unknown branch opcode in Cond"); - case ARM64::TBZ: - CC = ARM64CC::EQ; - break; - case ARM64::TBNZ: - CC = ARM64CC::NE; - break; - } - // cmp reg, #foo is actually ands xzr, reg, #1< 64 bit extension case, these instructions can do - // much more. - if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31) - return false; - // This is a signed or unsigned 32 -> 64 bit extension. - SrcReg = MI.getOperand(1).getReg(); - DstReg = MI.getOperand(0).getReg(); - SubIdx = ARM64::sub_32; - return true; - } -} - -/// analyzeCompare - For a comparison instruction, return the source registers -/// in SrcReg and SrcReg2, and the value it compares against in CmpValue. -/// Return true if the comparison instruction can be analyzed. -bool ARM64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, - unsigned &SrcReg2, int &CmpMask, - int &CmpValue) const { - switch (MI->getOpcode()) { - default: - break; - case ARM64::SUBSWrr: - case ARM64::SUBSWrs: - case ARM64::SUBSWrx: - case ARM64::SUBSXrr: - case ARM64::SUBSXrs: - case ARM64::SUBSXrx: - case ARM64::ADDSWrr: - case ARM64::ADDSWrs: - case ARM64::ADDSWrx: - case ARM64::ADDSXrr: - case ARM64::ADDSXrs: - case ARM64::ADDSXrx: - // Replace SUBSWrr with SUBWrr if CPSR is not used. - SrcReg = MI->getOperand(1).getReg(); - SrcReg2 = MI->getOperand(2).getReg(); - CmpMask = ~0; - CmpValue = 0; - return true; - case ARM64::SUBSWri: - case ARM64::ADDSWri: - case ARM64::ANDSWri: - case ARM64::SUBSXri: - case ARM64::ADDSXri: - case ARM64::ANDSXri: - SrcReg = MI->getOperand(1).getReg(); - SrcReg2 = 0; - CmpMask = ~0; - CmpValue = MI->getOperand(2).getImm(); - return true; - } - - return false; -} - -static bool UpdateOperandRegClass(MachineInstr *Instr) { - MachineBasicBlock *MBB = Instr->getParent(); - assert(MBB && "Can't get MachineBasicBlock here"); - MachineFunction *MF = MBB->getParent(); - assert(MF && "Can't get MachineFunction here"); - const TargetMachine *TM = &MF->getTarget(); - const TargetInstrInfo *TII = TM->getInstrInfo(); - const TargetRegisterInfo *TRI = TM->getRegisterInfo(); - MachineRegisterInfo *MRI = &MF->getRegInfo(); - - for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx; - ++OpIdx) { - MachineOperand &MO = Instr->getOperand(OpIdx); - const TargetRegisterClass *OpRegCstraints = - Instr->getRegClassConstraint(OpIdx, TII, TRI); - - // If there's no constraint, there's nothing to do. - if (!OpRegCstraints) - continue; - // If the operand is a frame index, there's nothing to do here. - // A frame index operand will resolve correctly during PEI. - if (MO.isFI()) - continue; - - assert(MO.isReg() && - "Operand has register constraints without being a register!"); - - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (!OpRegCstraints->contains(Reg)) - return false; - } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) && - !MRI->constrainRegClass(Reg, OpRegCstraints)) - return false; - } - - return true; -} - -/// optimizeCompareInstr - Convert the instruction supplying the argument to the -/// comparison into one that sets the zero bit in the flags register. -bool ARM64InstrInfo::optimizeCompareInstr( - MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask, - int CmpValue, const MachineRegisterInfo *MRI) const { - - // Replace SUBSWrr with SUBWrr if CPSR is not used. - int Cmp_CPSR = CmpInstr->findRegisterDefOperandIdx(ARM64::CPSR, true); - if (Cmp_CPSR != -1) { - unsigned NewOpc; - switch (CmpInstr->getOpcode()) { - default: - return false; - case ARM64::ADDSWrr: NewOpc = ARM64::ADDWrr; break; - case ARM64::ADDSWri: NewOpc = ARM64::ADDWri; break; - case ARM64::ADDSWrs: NewOpc = ARM64::ADDWrs; break; - case ARM64::ADDSWrx: NewOpc = ARM64::ADDWrx; break; - case ARM64::ADDSXrr: NewOpc = ARM64::ADDXrr; break; - case ARM64::ADDSXri: NewOpc = ARM64::ADDXri; break; - case ARM64::ADDSXrs: NewOpc = ARM64::ADDXrs; break; - case ARM64::ADDSXrx: NewOpc = ARM64::ADDXrx; break; - case ARM64::SUBSWrr: NewOpc = ARM64::SUBWrr; break; - case ARM64::SUBSWri: NewOpc = ARM64::SUBWri; break; - case ARM64::SUBSWrs: NewOpc = ARM64::SUBWrs; break; - case ARM64::SUBSWrx: NewOpc = ARM64::SUBWrx; break; - case ARM64::SUBSXrr: NewOpc = ARM64::SUBXrr; break; - case ARM64::SUBSXri: NewOpc = ARM64::SUBXri; break; - case ARM64::SUBSXrs: NewOpc = ARM64::SUBXrs; break; - case ARM64::SUBSXrx: NewOpc = ARM64::SUBXrx; break; - } - - const MCInstrDesc &MCID = get(NewOpc); - CmpInstr->setDesc(MCID); - CmpInstr->RemoveOperand(Cmp_CPSR); - bool succeeded = UpdateOperandRegClass(CmpInstr); - (void)succeeded; - assert(succeeded && "Some operands reg class are incompatible!"); - return true; - } - - // Continue only if we have a "ri" where immediate is zero. - if (CmpValue != 0 || SrcReg2 != 0) - return false; - - // CmpInstr is a Compare instruction if destination register is not used. - if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg())) - return false; - - // Get the unique definition of SrcReg. - MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg); - if (!MI) - return false; - - // We iterate backward, starting from the instruction before CmpInstr and - // stop when reaching the definition of the source register or done with the - // basic block, to check whether CPSR is used or modified in between. - MachineBasicBlock::iterator I = CmpInstr, E = MI, - B = CmpInstr->getParent()->begin(); - - // Early exit if CmpInstr is at the beginning of the BB. - if (I == B) - return false; - - // Check whether the definition of SrcReg is in the same basic block as - // Compare. If not, we can't optimize away the Compare. - if (MI->getParent() != CmpInstr->getParent()) - return false; - - // Check that CPSR isn't set between the comparison instruction and the one we - // want to change. - const TargetRegisterInfo *TRI = &getRegisterInfo(); - for (--I; I != E; --I) { - const MachineInstr &Instr = *I; - - if (Instr.modifiesRegister(ARM64::CPSR, TRI) || - Instr.readsRegister(ARM64::CPSR, TRI)) - // This instruction modifies or uses CPSR after the one we want to - // change. We can't do this transformation. - return false; - if (I == B) - // The 'and' is below the comparison instruction. - return false; - } - - unsigned NewOpc = MI->getOpcode(); - switch (MI->getOpcode()) { - default: - return false; - case ARM64::ADDSWrr: - case ARM64::ADDSWri: - case ARM64::ADDSXrr: - case ARM64::ADDSXri: - case ARM64::SUBSWrr: - case ARM64::SUBSWri: - case ARM64::SUBSXrr: - case ARM64::SUBSXri: - break; - case ARM64::ADDWrr: NewOpc = ARM64::ADDSWrr; break; - case ARM64::ADDWri: NewOpc = ARM64::ADDSWri; break; - case ARM64::ADDXrr: NewOpc = ARM64::ADDSXrr; break; - case ARM64::ADDXri: NewOpc = ARM64::ADDSXri; break; - case ARM64::ADCWr: NewOpc = ARM64::ADCSWr; break; - case ARM64::ADCXr: NewOpc = ARM64::ADCSXr; break; - case ARM64::SUBWrr: NewOpc = ARM64::SUBSWrr; break; - case ARM64::SUBWri: NewOpc = ARM64::SUBSWri; break; - case ARM64::SUBXrr: NewOpc = ARM64::SUBSXrr; break; - case ARM64::SUBXri: NewOpc = ARM64::SUBSXri; break; - case ARM64::SBCWr: NewOpc = ARM64::SBCSWr; break; - case ARM64::SBCXr: NewOpc = ARM64::SBCSXr; break; - case ARM64::ANDWri: NewOpc = ARM64::ANDSWri; break; - case ARM64::ANDXri: NewOpc = ARM64::ANDSXri; break; - } - - // Scan forward for the use of CPSR. - // When checking against MI: if it's a conditional code requires - // checking of V bit, then this is not safe to do. - // It is safe to remove CmpInstr if CPSR is redefined or killed. - // If we are done with the basic block, we need to check whether CPSR is - // live-out. - bool IsSafe = false; - for (MachineBasicBlock::iterator I = CmpInstr, - E = CmpInstr->getParent()->end(); - !IsSafe && ++I != E;) { - const MachineInstr &Instr = *I; - for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO; - ++IO) { - const MachineOperand &MO = Instr.getOperand(IO); - if (MO.isRegMask() && MO.clobbersPhysReg(ARM64::CPSR)) { - IsSafe = true; - break; - } - if (!MO.isReg() || MO.getReg() != ARM64::CPSR) - continue; - if (MO.isDef()) { - IsSafe = true; - break; - } - - // Decode the condition code. - unsigned Opc = Instr.getOpcode(); - ARM64CC::CondCode CC; - switch (Opc) { - default: - return false; - case ARM64::Bcc: - CC = (ARM64CC::CondCode)Instr.getOperand(IO - 2).getImm(); - break; - case ARM64::CSINVWr: - case ARM64::CSINVXr: - case ARM64::CSINCWr: - case ARM64::CSINCXr: - case ARM64::CSELWr: - case ARM64::CSELXr: - case ARM64::CSNEGWr: - case ARM64::CSNEGXr: - CC = (ARM64CC::CondCode)Instr.getOperand(IO - 1).getImm(); - break; - } - - // It is not safe to remove Compare instruction if Overflow(V) is used. - switch (CC) { - default: - // CPSR can be used multiple times, we should continue. - break; - case ARM64CC::VS: - case ARM64CC::VC: - case ARM64CC::GE: - case ARM64CC::LT: - case ARM64CC::GT: - case ARM64CC::LE: - return false; - } - } - } - - // If CPSR is not killed nor re-defined, we should check whether it is - // live-out. If it is live-out, do not optimize. - if (!IsSafe) { - MachineBasicBlock *MBB = CmpInstr->getParent(); - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); - SI != SE; ++SI) - if ((*SI)->isLiveIn(ARM64::CPSR)) - return false; - } - - // Update the instruction to set CPSR. - MI->setDesc(get(NewOpc)); - CmpInstr->eraseFromParent(); - bool succeeded = UpdateOperandRegClass(MI); - (void)succeeded; - assert(succeeded && "Some operands reg class are incompatible!"); - MI->addRegisterDefined(ARM64::CPSR, TRI); - return true; -} - -// Return true if this instruction simply sets its single destination register -// to zero. This is equivalent to a register rename of the zero-register. -bool ARM64InstrInfo::isGPRZero(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - default: - break; - case ARM64::MOVZWi: - case ARM64::MOVZXi: // movz Rd, #0 (LSL #0) - if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) { - assert(MI->getDesc().getNumOperands() == 3 && - MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands"); - return true; - } - break; - case ARM64::ANDWri: // and Rd, Rzr, #imm - return MI->getOperand(1).getReg() == ARM64::WZR; - case ARM64::ANDXri: - return MI->getOperand(1).getReg() == ARM64::XZR; - case TargetOpcode::COPY: - return MI->getOperand(1).getReg() == ARM64::WZR; - } - return false; -} - -// Return true if this instruction simply renames a general register without -// modifying bits. -bool ARM64InstrInfo::isGPRCopy(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - default: - break; - case TargetOpcode::COPY: { - // GPR32 copies will by lowered to ORRXrs - unsigned DstReg = MI->getOperand(0).getReg(); - return (ARM64::GPR32RegClass.contains(DstReg) || - ARM64::GPR64RegClass.contains(DstReg)); - } - case ARM64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0) - if (MI->getOperand(1).getReg() == ARM64::XZR) { - assert(MI->getDesc().getNumOperands() == 4 && - MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands"); - return true; - } - case ARM64::ADDXri: // add Xd, Xn, #0 (LSL #0) - if (MI->getOperand(2).getImm() == 0) { - assert(MI->getDesc().getNumOperands() == 4 && - MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands"); - return true; - } - } - return false; -} - -// Return true if this instruction simply renames a general register without -// modifying bits. -bool ARM64InstrInfo::isFPRCopy(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - default: - break; - case TargetOpcode::COPY: { - // FPR64 copies will by lowered to ORR.16b - unsigned DstReg = MI->getOperand(0).getReg(); - return (ARM64::FPR64RegClass.contains(DstReg) || - ARM64::FPR128RegClass.contains(DstReg)); - } - case ARM64::ORRv16i8: - if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { - assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() && - "invalid ORRv16i8 operands"); - return true; - } - } - return false; -} - -unsigned ARM64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: - break; - case ARM64::LDRWui: - case ARM64::LDRXui: - case ARM64::LDRBui: - case ARM64::LDRHui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: - if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - - return 0; -} - -unsigned ARM64InstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: - break; - case ARM64::STRWui: - case ARM64::STRXui: - case ARM64::STRBui: - case ARM64::STRHui: - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STRQui: - if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - return 0; -} - -/// Return true if this is load/store scales or extends its register offset. -/// This refers to scaling a dynamic index as opposed to scaled immediates. -/// MI should be a memory op that allows scaled addressing. -bool ARM64InstrInfo::isScaledAddr(const MachineInstr *MI) const { - switch (MI->getOpcode()) { - default: - break; - case ARM64::LDRBBro: - case ARM64::LDRBro: - case ARM64::LDRDro: - case ARM64::LDRHHro: - case ARM64::LDRHro: - case ARM64::LDRQro: - case ARM64::LDRSBWro: - case ARM64::LDRSBXro: - case ARM64::LDRSHWro: - case ARM64::LDRSHXro: - case ARM64::LDRSWro: - case ARM64::LDRSro: - case ARM64::LDRWro: - case ARM64::LDRXro: - case ARM64::STRBBro: - case ARM64::STRBro: - case ARM64::STRDro: - case ARM64::STRHHro: - case ARM64::STRHro: - case ARM64::STRQro: - case ARM64::STRSro: - case ARM64::STRWro: - case ARM64::STRXro: - unsigned Val = MI->getOperand(3).getImm(); - ARM64_AM::ExtendType ExtType = ARM64_AM::getMemExtendType(Val); - return (ExtType != ARM64_AM::UXTX) || ARM64_AM::getMemDoShift(Val); - } - return false; -} - -/// Check all MachineMemOperands for a hint to suppress pairing. -bool ARM64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const { - assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) && - "Too many target MO flags"); - for (MachineInstr::mmo_iterator MM = MI->memoperands_begin(), - E = MI->memoperands_end(); - MM != E; ++MM) { - - if ((*MM)->getFlags() & - (MOSuppressPair << MachineMemOperand::MOTargetStartBit)) { - return true; - } - } - return false; -} - -/// Set a flag on the first MachineMemOperand to suppress pairing. -void ARM64InstrInfo::suppressLdStPair(MachineInstr *MI) const { - if (MI->memoperands_empty()) - return; - - assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) && - "Too many target MO flags"); - (*MI->memoperands_begin()) - ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit); -} - -bool ARM64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, - unsigned &Offset, - const TargetRegisterInfo *TRI) const { - switch (LdSt->getOpcode()) { - default: - return false; - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STRQui: - case ARM64::STRXui: - case ARM64::STRWui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: - case ARM64::LDRXui: - case ARM64::LDRWui: - if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm()) - return false; - BaseReg = LdSt->getOperand(1).getReg(); - MachineFunction &MF = *LdSt->getParent()->getParent(); - unsigned Width = getRegClass(LdSt->getDesc(), 0, TRI, MF)->getSize(); - Offset = LdSt->getOperand(2).getImm() * Width; - return true; - }; -} - -/// Detect opportunities for ldp/stp formation. -/// -/// Only called for LdSt for which getLdStBaseRegImmOfs returns true. -bool ARM64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt, - MachineInstr *SecondLdSt, - unsigned NumLoads) const { - // Only cluster up to a single pair. - if (NumLoads > 1) - return false; - if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode()) - return false; - // getLdStBaseRegImmOfs guarantees that oper 2 isImm. - unsigned Ofs1 = FirstLdSt->getOperand(2).getImm(); - // Allow 6 bits of positive range. - if (Ofs1 > 64) - return false; - // The caller should already have ordered First/SecondLdSt by offset. - unsigned Ofs2 = SecondLdSt->getOperand(2).getImm(); - return Ofs1 + 1 == Ofs2; -} - -bool ARM64InstrInfo::shouldScheduleAdjacent(MachineInstr *First, - MachineInstr *Second) const { - // Cyclone can fuse CMN, CMP followed by Bcc. - - // FIXME: B0 can also fuse: - // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ. - if (Second->getOpcode() != ARM64::Bcc) - return false; - switch (First->getOpcode()) { - default: - return false; - case ARM64::SUBSWri: - case ARM64::ADDSWri: - case ARM64::ANDSWri: - case ARM64::SUBSXri: - case ARM64::ADDSXri: - case ARM64::ANDSXri: - return true; - } -} - -MachineInstr *ARM64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, - uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const { - MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM64::DBG_VALUE)) - .addFrameIndex(FrameIx) - .addImm(0) - .addImm(Offset) - .addMetadata(MDPtr); - return &*MIB; -} - -static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB, - unsigned Reg, unsigned SubIdx, - unsigned State, - const TargetRegisterInfo *TRI) { - if (!SubIdx) - return MIB.addReg(Reg, State); - - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State); - return MIB.addReg(Reg, State, SubIdx); -} - -static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, - unsigned NumRegs) { - // We really want the positive remainder mod 32 here, that happens to be - // easily obtainable with a mask. - return ((DestReg - SrcReg) & 0x1f) < NumRegs; -} - -void ARM64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - DebugLoc DL, unsigned DestReg, - unsigned SrcReg, bool KillSrc, - unsigned Opcode, - llvm::ArrayRef Indices) const { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - uint16_t DestEncoding = TRI->getEncodingValue(DestReg); - uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg); - unsigned NumRegs = Indices.size(); - - int SubReg = 0, End = NumRegs, Incr = 1; - if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) { - SubReg = NumRegs - 1; - End = -1; - Incr = -1; - } - - for (; SubReg != End; SubReg += Incr) { - const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode)); - AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); - AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); - AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); - } -} - -void ARM64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { - if (ARM64::GPR32spRegClass.contains(DestReg) && - (ARM64::GPR32spRegClass.contains(SrcReg) || SrcReg == ARM64::WZR)) { - const TargetRegisterInfo *TRI = &getRegisterInfo(); - - if (DestReg == ARM64::WSP || SrcReg == ARM64::WSP) { - // If either operand is WSP, expand to ADD #0. - if (Subtarget.hasZeroCycleRegMove()) { - // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move. - unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, ARM64::sub_32, - &ARM64::GPR64spRegClass); - unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, ARM64::sub_32, - &ARM64::GPR64spRegClass); - // This instruction is reading and writing X registers. This may upset - // the register scavenger and machine verifier, so we need to indicate - // that we are reading an undefined value from SrcRegX, but a proper - // value from SrcReg. - BuildMI(MBB, I, DL, get(ARM64::ADDXri), DestRegX) - .addReg(SrcRegX, RegState::Undef) - .addImm(0) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)) - .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); - } else { - BuildMI(MBB, I, DL, get(ARM64::ADDWri), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)) - .addImm(0) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)); - } - } else if (SrcReg == ARM64::WZR && Subtarget.hasZeroCycleZeroing()) { - BuildMI(MBB, I, DL, get(ARM64::MOVZWi), DestReg).addImm(0).addImm( - ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)); - } else { - if (Subtarget.hasZeroCycleRegMove()) { - // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move. - unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, ARM64::sub_32, - &ARM64::GPR64spRegClass); - unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, ARM64::sub_32, - &ARM64::GPR64spRegClass); - // This instruction is reading and writing X registers. This may upset - // the register scavenger and machine verifier, so we need to indicate - // that we are reading an undefined value from SrcRegX, but a proper - // value from SrcReg. - BuildMI(MBB, I, DL, get(ARM64::ORRXrr), DestRegX) - .addReg(ARM64::XZR) - .addReg(SrcRegX, RegState::Undef) - .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc)); - } else { - // Otherwise, expand to ORR WZR. - BuildMI(MBB, I, DL, get(ARM64::ORRWrr), DestReg) - .addReg(ARM64::WZR) - .addReg(SrcReg, getKillRegState(KillSrc)); - } - } - return; - } - - if (ARM64::GPR64spRegClass.contains(DestReg) && - (ARM64::GPR64spRegClass.contains(SrcReg) || SrcReg == ARM64::XZR)) { - if (DestReg == ARM64::SP || SrcReg == ARM64::SP) { - // If either operand is SP, expand to ADD #0. - BuildMI(MBB, I, DL, get(ARM64::ADDXri), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)) - .addImm(0) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)); - } else if (SrcReg == ARM64::XZR && Subtarget.hasZeroCycleZeroing()) { - BuildMI(MBB, I, DL, get(ARM64::MOVZXi), DestReg).addImm(0).addImm( - ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)); - } else { - // Otherwise, expand to ORR XZR. - BuildMI(MBB, I, DL, get(ARM64::ORRXrr), DestReg) - .addReg(ARM64::XZR) - .addReg(SrcReg, getKillRegState(KillSrc)); - } - return; - } - - // Copy a DDDD register quad by copying the individual sub-registers. - if (ARM64::DDDDRegClass.contains(DestReg) && - ARM64::DDDDRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1, - ARM64::dsub2, ARM64::dsub3 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8, - Indices); - return; - } - - // Copy a DDD register triple by copying the individual sub-registers. - if (ARM64::DDDRegClass.contains(DestReg) && - ARM64::DDDRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1, - ARM64::dsub2 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8, - Indices); - return; - } - - // Copy a DD register pair by copying the individual sub-registers. - if (ARM64::DDRegClass.contains(DestReg) && - ARM64::DDRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8, - Indices); - return; - } - - // Copy a QQQQ register quad by copying the individual sub-registers. - if (ARM64::QQQQRegClass.contains(DestReg) && - ARM64::QQQQRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1, - ARM64::qsub2, ARM64::qsub3 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8, - Indices); - return; - } - - // Copy a QQQ register triple by copying the individual sub-registers. - if (ARM64::QQQRegClass.contains(DestReg) && - ARM64::QQQRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1, - ARM64::qsub2 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8, - Indices); - return; - } - - // Copy a QQ register pair by copying the individual sub-registers. - if (ARM64::QQRegClass.contains(DestReg) && - ARM64::QQRegClass.contains(SrcReg)) { - static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1 }; - copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8, - Indices); - return; - } - - if (ARM64::FPR128RegClass.contains(DestReg) && - ARM64::FPR128RegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); - return; - } - - if (ARM64::FPR64RegClass.contains(DestReg) && - ARM64::FPR64RegClass.contains(SrcReg)) { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::dsub, &ARM64::FPR128RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::dsub, &ARM64::FPR128RegClass); - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); - return; - } - - if (ARM64::FPR32RegClass.contains(DestReg) && - ARM64::FPR32RegClass.contains(SrcReg)) { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::ssub, &ARM64::FPR128RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::ssub, &ARM64::FPR128RegClass); - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); - return; - } - - if (ARM64::FPR16RegClass.contains(DestReg) && - ARM64::FPR16RegClass.contains(SrcReg)) { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::hsub, &ARM64::FPR128RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::hsub, &ARM64::FPR128RegClass); - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); - return; - } - - if (ARM64::FPR8RegClass.contains(DestReg) && - ARM64::FPR8RegClass.contains(SrcReg)) { - DestReg = - RI.getMatchingSuperReg(DestReg, ARM64::bsub, &ARM64::FPR128RegClass); - SrcReg = - RI.getMatchingSuperReg(SrcReg, ARM64::bsub, &ARM64::FPR128RegClass); - BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg( - SrcReg, getKillRegState(KillSrc)); - return; - } - - // Copies between GPR64 and FPR64. - if (ARM64::FPR64RegClass.contains(DestReg) && - ARM64::GPR64RegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(ARM64::FMOVXDr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; - } - if (ARM64::GPR64RegClass.contains(DestReg) && - ARM64::FPR64RegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(ARM64::FMOVDXr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; - } - // Copies between GPR32 and FPR32. - if (ARM64::FPR32RegClass.contains(DestReg) && - ARM64::GPR32RegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(ARM64::FMOVWSr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; - } - if (ARM64::GPR32RegClass.contains(DestReg) && - ARM64::FPR32RegClass.contains(SrcReg)) { - BuildMI(MBB, I, DL, get(ARM64::FMOVSWr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; - } - - assert(0 && "unimplemented reg-to-reg copy"); -} - -void ARM64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - DebugLoc DL; - if (MBBI != MBB.end()) - DL = MBBI->getDebugLoc(); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - unsigned Align = MFI.getObjectAlignment(FI); - - MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); - MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); - unsigned Opc = 0; - bool Offset = true; - switch (RC->getSize()) { - case 1: - if (ARM64::FPR8RegClass.hasSubClassEq(RC)) - Opc = ARM64::STRBui; - break; - case 2: - if (ARM64::FPR16RegClass.hasSubClassEq(RC)) - Opc = ARM64::STRHui; - break; - case 4: - if (ARM64::GPR32allRegClass.hasSubClassEq(RC)) { - Opc = ARM64::STRWui; - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) - MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR32RegClass); - else - assert(SrcReg != ARM64::WSP); - } else if (ARM64::FPR32RegClass.hasSubClassEq(RC)) - Opc = ARM64::STRSui; - break; - case 8: - if (ARM64::GPR64allRegClass.hasSubClassEq(RC)) { - Opc = ARM64::STRXui; - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) - MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR64RegClass); - else - assert(SrcReg != ARM64::SP); - } else if (ARM64::FPR64RegClass.hasSubClassEq(RC)) - Opc = ARM64::STRDui; - break; - case 16: - if (ARM64::FPR128RegClass.hasSubClassEq(RC)) - Opc = ARM64::STRQui; - else if (ARM64::DDRegClass.hasSubClassEq(RC)) - Opc = ARM64::ST1Twov1d, Offset = false; - break; - case 24: - if (ARM64::DDDRegClass.hasSubClassEq(RC)) - Opc = ARM64::ST1Threev1d, Offset = false; - break; - case 32: - if (ARM64::DDDDRegClass.hasSubClassEq(RC)) - Opc = ARM64::ST1Fourv1d, Offset = false; - else if (ARM64::QQRegClass.hasSubClassEq(RC)) - Opc = ARM64::ST1Twov2d, Offset = false; - break; - case 48: - if (ARM64::QQQRegClass.hasSubClassEq(RC)) - Opc = ARM64::ST1Threev2d, Offset = false; - break; - case 64: - if (ARM64::QQQQRegClass.hasSubClassEq(RC)) - Opc = ARM64::ST1Fourv2d, Offset = false; - break; - } - assert(Opc && "Unknown register class"); - - const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI); - - if (Offset) - MI.addImm(0); - MI.addMemOperand(MMO); -} - -void ARM64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - DebugLoc DL; - if (MBBI != MBB.end()) - DL = MBBI->getDebugLoc(); - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - unsigned Align = MFI.getObjectAlignment(FI); - MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI)); - MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align); - - unsigned Opc = 0; - bool Offset = true; - switch (RC->getSize()) { - case 1: - if (ARM64::FPR8RegClass.hasSubClassEq(RC)) - Opc = ARM64::LDRBui; - break; - case 2: - if (ARM64::FPR16RegClass.hasSubClassEq(RC)) - Opc = ARM64::LDRHui; - break; - case 4: - if (ARM64::GPR32allRegClass.hasSubClassEq(RC)) { - Opc = ARM64::LDRWui; - if (TargetRegisterInfo::isVirtualRegister(DestReg)) - MF.getRegInfo().constrainRegClass(DestReg, &ARM64::GPR32RegClass); - else - assert(DestReg != ARM64::WSP); - } else if (ARM64::FPR32RegClass.hasSubClassEq(RC)) - Opc = ARM64::LDRSui; - break; - case 8: - if (ARM64::GPR64allRegClass.hasSubClassEq(RC)) { - Opc = ARM64::LDRXui; - if (TargetRegisterInfo::isVirtualRegister(DestReg)) - MF.getRegInfo().constrainRegClass(DestReg, &ARM64::GPR64RegClass); - else - assert(DestReg != ARM64::SP); - } else if (ARM64::FPR64RegClass.hasSubClassEq(RC)) - Opc = ARM64::LDRDui; - break; - case 16: - if (ARM64::FPR128RegClass.hasSubClassEq(RC)) - Opc = ARM64::LDRQui; - else if (ARM64::DDRegClass.hasSubClassEq(RC)) - Opc = ARM64::LD1Twov1d, Offset = false; - break; - case 24: - if (ARM64::DDDRegClass.hasSubClassEq(RC)) - Opc = ARM64::LD1Threev1d, Offset = false; - break; - case 32: - if (ARM64::DDDDRegClass.hasSubClassEq(RC)) - Opc = ARM64::LD1Fourv1d, Offset = false; - else if (ARM64::QQRegClass.hasSubClassEq(RC)) - Opc = ARM64::LD1Twov2d, Offset = false; - break; - case 48: - if (ARM64::QQQRegClass.hasSubClassEq(RC)) - Opc = ARM64::LD1Threev2d, Offset = false; - break; - case 64: - if (ARM64::QQQQRegClass.hasSubClassEq(RC)) - Opc = ARM64::LD1Fourv2d, Offset = false; - break; - } - assert(Opc && "Unknown register class"); - - const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) - .addReg(DestReg, getDefRegState(true)) - .addFrameIndex(FI); - if (Offset) - MI.addImm(0); - MI.addMemOperand(MMO); -} - -void llvm::emitFrameOffset(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, int Offset, - const ARM64InstrInfo *TII, MachineInstr::MIFlag Flag, - bool SetCPSR) { - if (DestReg == SrcReg && Offset == 0) - return; - - bool isSub = Offset < 0; - if (isSub) - Offset = -Offset; - - // FIXME: If the offset won't fit in 24-bits, compute the offset into a - // scratch register. If DestReg is a virtual register, use it as the - // scratch register; otherwise, create a new virtual register (to be - // replaced by the scavenger at the end of PEI). That case can be optimized - // slightly if DestReg is SP which is always 16-byte aligned, so the scratch - // register can be loaded with offset%8 and the add/sub can use an extending - // instruction with LSL#3. - // Currently the function handles any offsets but generates a poor sequence - // of code. - // assert(Offset < (1 << 24) && "unimplemented reg plus immediate"); - - unsigned Opc; - if (SetCPSR) - Opc = isSub ? ARM64::SUBSXri : ARM64::ADDSXri; - else - Opc = isSub ? ARM64::SUBXri : ARM64::ADDXri; - const unsigned MaxEncoding = 0xfff; - const unsigned ShiftSize = 12; - const unsigned MaxEncodableValue = MaxEncoding << ShiftSize; - while (((unsigned)Offset) >= (1 << ShiftSize)) { - unsigned ThisVal; - if (((unsigned)Offset) > MaxEncodableValue) { - ThisVal = MaxEncodableValue; - } else { - ThisVal = Offset & MaxEncodableValue; - } - assert((ThisVal >> ShiftSize) <= MaxEncoding && - "Encoding cannot handle value that big"); - BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) - .addReg(SrcReg) - .addImm(ThisVal >> ShiftSize) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftSize)) - .setMIFlag(Flag); - - SrcReg = DestReg; - Offset -= ThisVal; - if (Offset == 0) - return; - } - BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg) - .addReg(SrcReg) - .addImm(Offset) - .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0)) - .setMIFlag(Flag); -} - -MachineInstr * -ARM64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl &Ops, - int FrameIndex) const { - // This is a bit of a hack. Consider this instruction: - // - // %vreg0 = COPY %SP; GPR64all:%vreg0 - // - // We explicitly chose GPR64all for the virtual register so such a copy might - // be eliminated by RegisterCoalescer. However, that may not be possible, and - // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all - // register class, TargetInstrInfo::foldMemoryOperand() is going to try. - // - // To prevent that, we are going to constrain the %vreg0 register class here. - // - // - // - if (MI->isCopy()) { - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); - if (SrcReg == ARM64::SP && TargetRegisterInfo::isVirtualRegister(DstReg)) { - MF.getRegInfo().constrainRegClass(DstReg, &ARM64::GPR64RegClass); - return 0; - } - if (DstReg == ARM64::SP && TargetRegisterInfo::isVirtualRegister(SrcReg)) { - MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR64RegClass); - return 0; - } - } - - // Cannot fold. - return 0; -} - -int llvm::isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset, - bool *OutUseUnscaledOp, - unsigned *OutUnscaledOp, - int *EmittableOffset) { - int Scale = 1; - bool IsSigned = false; - // The ImmIdx should be changed case by case if it is not 2. - unsigned ImmIdx = 2; - unsigned UnscaledOp = 0; - // Set output values in case of early exit. - if (EmittableOffset) - *EmittableOffset = 0; - if (OutUseUnscaledOp) - *OutUseUnscaledOp = false; - if (OutUnscaledOp) - *OutUnscaledOp = 0; - switch (MI.getOpcode()) { - default: - assert(0 && "unhandled opcode in rewriteARM64FrameIndex"); - // Vector spills/fills can't take an immediate offset. - case ARM64::LD1Twov2d: - case ARM64::LD1Threev2d: - case ARM64::LD1Fourv2d: - case ARM64::LD1Twov1d: - case ARM64::LD1Threev1d: - case ARM64::LD1Fourv1d: - case ARM64::ST1Twov2d: - case ARM64::ST1Threev2d: - case ARM64::ST1Fourv2d: - case ARM64::ST1Twov1d: - case ARM64::ST1Threev1d: - case ARM64::ST1Fourv1d: - return ARM64FrameOffsetCannotUpdate; - case ARM64::PRFMui: - Scale = 8; - UnscaledOp = ARM64::PRFUMi; - break; - case ARM64::LDRXui: - Scale = 8; - UnscaledOp = ARM64::LDURXi; - break; - case ARM64::LDRWui: - Scale = 4; - UnscaledOp = ARM64::LDURWi; - break; - case ARM64::LDRBui: - Scale = 1; - UnscaledOp = ARM64::LDURBi; - break; - case ARM64::LDRHui: - Scale = 2; - UnscaledOp = ARM64::LDURHi; - break; - case ARM64::LDRSui: - Scale = 4; - UnscaledOp = ARM64::LDURSi; - break; - case ARM64::LDRDui: - Scale = 8; - UnscaledOp = ARM64::LDURDi; - break; - case ARM64::LDRQui: - Scale = 16; - UnscaledOp = ARM64::LDURQi; - break; - case ARM64::LDRBBui: - Scale = 1; - UnscaledOp = ARM64::LDURBBi; - break; - case ARM64::LDRHHui: - Scale = 2; - UnscaledOp = ARM64::LDURHHi; - break; - case ARM64::LDRSBXui: - Scale = 1; - UnscaledOp = ARM64::LDURSBXi; - break; - case ARM64::LDRSBWui: - Scale = 1; - UnscaledOp = ARM64::LDURSBWi; - break; - case ARM64::LDRSHXui: - Scale = 2; - UnscaledOp = ARM64::LDURSHXi; - break; - case ARM64::LDRSHWui: - Scale = 2; - UnscaledOp = ARM64::LDURSHWi; - break; - case ARM64::LDRSWui: - Scale = 4; - UnscaledOp = ARM64::LDURSWi; - break; - - case ARM64::STRXui: - Scale = 8; - UnscaledOp = ARM64::STURXi; - break; - case ARM64::STRWui: - Scale = 4; - UnscaledOp = ARM64::STURWi; - break; - case ARM64::STRBui: - Scale = 1; - UnscaledOp = ARM64::STURBi; - break; - case ARM64::STRHui: - Scale = 2; - UnscaledOp = ARM64::STURHi; - break; - case ARM64::STRSui: - Scale = 4; - UnscaledOp = ARM64::STURSi; - break; - case ARM64::STRDui: - Scale = 8; - UnscaledOp = ARM64::STURDi; - break; - case ARM64::STRQui: - Scale = 16; - UnscaledOp = ARM64::STURQi; - break; - case ARM64::STRBBui: - Scale = 1; - UnscaledOp = ARM64::STURBBi; - break; - case ARM64::STRHHui: - Scale = 2; - UnscaledOp = ARM64::STURHHi; - break; - - case ARM64::LDPXi: - case ARM64::LDPDi: - case ARM64::STPXi: - case ARM64::STPDi: - IsSigned = true; - Scale = 8; - break; - case ARM64::LDPQi: - case ARM64::STPQi: - IsSigned = true; - Scale = 16; - break; - case ARM64::LDPWi: - case ARM64::LDPSi: - case ARM64::STPWi: - case ARM64::STPSi: - IsSigned = true; - Scale = 4; - break; - - case ARM64::LDURXi: - case ARM64::LDURWi: - case ARM64::LDURBi: - case ARM64::LDURHi: - case ARM64::LDURSi: - case ARM64::LDURDi: - case ARM64::LDURQi: - case ARM64::LDURHHi: - case ARM64::LDURBBi: - case ARM64::LDURSBXi: - case ARM64::LDURSBWi: - case ARM64::LDURSHXi: - case ARM64::LDURSHWi: - case ARM64::LDURSWi: - case ARM64::STURXi: - case ARM64::STURWi: - case ARM64::STURBi: - case ARM64::STURHi: - case ARM64::STURSi: - case ARM64::STURDi: - case ARM64::STURQi: - case ARM64::STURBBi: - case ARM64::STURHHi: - Scale = 1; - break; - } - - Offset += MI.getOperand(ImmIdx).getImm() * Scale; - - bool useUnscaledOp = false; - // If the offset doesn't match the scale, we rewrite the instruction to - // use the unscaled instruction instead. Likewise, if we have a negative - // offset (and have an unscaled op to use). - if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0)) - useUnscaledOp = true; - - // Use an unscaled addressing mode if the instruction has a negative offset - // (or if the instruction is already using an unscaled addressing mode). - unsigned MaskBits; - if (IsSigned) { - // ldp/stp instructions. - MaskBits = 7; - Offset /= Scale; - } else if (UnscaledOp == 0 || useUnscaledOp) { - MaskBits = 9; - IsSigned = true; - Scale = 1; - } else { - MaskBits = 12; - IsSigned = false; - Offset /= Scale; - } - - // Attempt to fold address computation. - int MaxOff = (1 << (MaskBits - IsSigned)) - 1; - int MinOff = (IsSigned ? (-MaxOff - 1) : 0); - if (Offset >= MinOff && Offset <= MaxOff) { - if (EmittableOffset) - *EmittableOffset = Offset; - Offset = 0; - } else { - int NewOff = Offset < 0 ? MinOff : MaxOff; - if (EmittableOffset) - *EmittableOffset = NewOff; - Offset = (Offset - NewOff) * Scale; - } - if (OutUseUnscaledOp) - *OutUseUnscaledOp = useUnscaledOp; - if (OutUnscaledOp) - *OutUnscaledOp = UnscaledOp; - return ARM64FrameOffsetCanUpdate | - (Offset == 0 ? ARM64FrameOffsetIsLegal : 0); -} - -bool llvm::rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, - const ARM64InstrInfo *TII) { - unsigned Opcode = MI.getOpcode(); - unsigned ImmIdx = FrameRegIdx + 1; - - if (Opcode == ARM64::ADDSXri || Opcode == ARM64::ADDXri) { - Offset += MI.getOperand(ImmIdx).getImm(); - emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), - MI.getOperand(0).getReg(), FrameReg, Offset, TII, - MachineInstr::NoFlags, (Opcode == ARM64::ADDSXri)); - MI.eraseFromParent(); - Offset = 0; - return true; - } - - int NewOffset; - unsigned UnscaledOp; - bool UseUnscaledOp; - int Status = isARM64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, &UnscaledOp, - &NewOffset); - if (Status & ARM64FrameOffsetCanUpdate) { - if (Status & ARM64FrameOffsetIsLegal) - // Replace the FrameIndex with FrameReg. - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - if (UseUnscaledOp) - MI.setDesc(TII->get(UnscaledOp)); - - MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); - return Offset == 0; - } - - return false; -} - -void ARM64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { - NopInst.setOpcode(ARM64::HINT); - NopInst.addOperand(MCOperand::CreateImm(0)); -} diff --git a/lib/Target/ARM64/ARM64InstrInfo.h b/lib/Target/ARM64/ARM64InstrInfo.h deleted file mode 100644 index 2591ca0..0000000 --- a/lib/Target/ARM64/ARM64InstrInfo.h +++ /dev/null @@ -1,219 +0,0 @@ -//===- ARM64InstrInfo.h - ARM64 Instruction Information ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the ARM64 implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_ARM64INSTRINFO_H -#define LLVM_TARGET_ARM64INSTRINFO_H - -#include "ARM64.h" -#include "ARM64RegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" - -#define GET_INSTRINFO_HEADER -#include "ARM64GenInstrInfo.inc" - -namespace llvm { - -class ARM64Subtarget; -class ARM64TargetMachine; - -class ARM64InstrInfo : public ARM64GenInstrInfo { - // Reserve bits in the MachineMemOperand target hint flags, starting at 1. - // They will be shifted into MOTargetHintStart when accessed. - enum TargetMemOperandFlags { - MOSuppressPair = 1 - }; - - const ARM64RegisterInfo RI; - const ARM64Subtarget &Subtarget; - -public: - explicit ARM64InstrInfo(const ARM64Subtarget &STI); - - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As - /// such, whenever a client has an instance of instruction info, it should - /// always be able to get register info as well (through this method). - const ARM64RegisterInfo &getRegisterInfo() const { return RI; } - - unsigned GetInstSizeInBytes(const MachineInstr *MI) const; - - bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, - unsigned &DstReg, unsigned &SubIdx) const override; - - unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const override; - unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const override; - - /// \brief Does this instruction set its full destination register to zero? - bool isGPRZero(const MachineInstr *MI) const; - - /// \brief Does this instruction rename a GPR without modifying bits? - bool isGPRCopy(const MachineInstr *MI) const; - - /// \brief Does this instruction rename an FPR without modifying bits? - bool isFPRCopy(const MachineInstr *MI) const; - - /// Return true if this is load/store scales or extends its register offset. - /// This refers to scaling a dynamic index as opposed to scaled immediates. - /// MI should be a memory op that allows scaled addressing. - bool isScaledAddr(const MachineInstr *MI) const; - - /// Return true if pairing the given load or store is hinted to be - /// unprofitable. - bool isLdStPairSuppressed(const MachineInstr *MI) const; - - /// Hint that pairing the given load or store is unprofitable. - void suppressLdStPair(MachineInstr *MI) const; - - bool getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg, - unsigned &Offset, - const TargetRegisterInfo *TRI) const override; - - bool enableClusterLoads() const override { return true; } - - bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt, - unsigned NumLoads) const override; - - bool shouldScheduleAdjacent(MachineInstr *First, - MachineInstr *Second) const override; - - MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, - uint64_t Offset, const MDNode *MDPtr, - DebugLoc DL) const; - void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - DebugLoc DL, unsigned DestReg, unsigned SrcReg, - bool KillSrc, unsigned Opcode, - llvm::ArrayRef Indices) const; - void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - DebugLoc DL, unsigned DestReg, unsigned SrcReg, - bool KillSrc) const override; - - void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, unsigned SrcReg, - bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; - - void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, unsigned DestReg, - int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const override; - - MachineInstr * - foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl &Ops, - int FrameIndex) const override; - - bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify = false) const override; - unsigned RemoveBranch(MachineBasicBlock &MBB) const override; - unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const override; - bool - ReverseBranchCondition(SmallVectorImpl &Cond) const override; - bool canInsertSelect(const MachineBasicBlock &, - const SmallVectorImpl &Cond, unsigned, - unsigned, int &, int &, int &) const override; - void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - DebugLoc DL, unsigned DstReg, - const SmallVectorImpl &Cond, - unsigned TrueReg, unsigned FalseReg) const override; - void getNoopForMachoTarget(MCInst &NopInst) const override; - - /// analyzeCompare - For a comparison instruction, return the source registers - /// in SrcReg and SrcReg2, and the value it compares against in CmpValue. - /// Return true if the comparison instruction can be analyzed. - bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, - unsigned &SrcReg2, int &CmpMask, - int &CmpValue) const override; - /// optimizeCompareInstr - Convert the instruction supplying the argument to - /// the comparison into one that sets the zero bit in the flags register. - bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, - unsigned SrcReg2, int CmpMask, int CmpValue, - const MachineRegisterInfo *MRI) const override; - -private: - void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL, - MachineBasicBlock *TBB, - const SmallVectorImpl &Cond) const; -}; - -/// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg -/// plus Offset. This is intended to be used from within the prolog/epilog -/// insertion (PEI) pass, where a virtual scratch register may be allocated -/// if necessary, to be replaced by the scavenger at the end of PEI. -void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset, - const ARM64InstrInfo *TII, - MachineInstr::MIFlag = MachineInstr::NoFlags, - bool SetCPSR = false); - -/// rewriteARM64FrameIndex - Rewrite MI to access 'Offset' bytes from the -/// FP. Return false if the offset could not be handled directly in MI, and -/// return the left-over portion by reference. -bool rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, - unsigned FrameReg, int &Offset, - const ARM64InstrInfo *TII); - -/// \brief Use to report the frame offset status in isARM64FrameOffsetLegal. -enum ARM64FrameOffsetStatus { - ARM64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply. - ARM64FrameOffsetIsLegal = 0x1, ///< Offset is legal. - ARM64FrameOffsetCanUpdate = 0x2 ///< Offset can apply, at least partly. -}; - -/// \brief Check if the @p Offset is a valid frame offset for @p MI. -/// The returned value reports the validity of the frame offset for @p MI. -/// It uses the values defined by ARM64FrameOffsetStatus for that. -/// If result == ARM64FrameOffsetCannotUpdate, @p MI cannot be updated to -/// use an offset.eq -/// If result & ARM64FrameOffsetIsLegal, @p Offset can completely be -/// rewriten in @p MI. -/// If result & ARM64FrameOffsetCanUpdate, @p Offset contains the -/// amount that is off the limit of the legal offset. -/// If set, @p OutUseUnscaledOp will contain the whether @p MI should be -/// turned into an unscaled operator, which opcode is in @p OutUnscaledOp. -/// If set, @p EmittableOffset contains the amount that can be set in @p MI -/// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that -/// is a legal offset. -int isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset, - bool *OutUseUnscaledOp = NULL, - unsigned *OutUnscaledOp = NULL, - int *EmittableOffset = NULL); - -static inline bool isUncondBranchOpcode(int Opc) { return Opc == ARM64::B; } - -static inline bool isCondBranchOpcode(int Opc) { - switch (Opc) { - case ARM64::Bcc: - case ARM64::CBZW: - case ARM64::CBZX: - case ARM64::CBNZW: - case ARM64::CBNZX: - case ARM64::TBZ: - case ARM64::TBNZ: - return true; - default: - return false; - } -} - -static inline bool isIndirectBranchOpcode(int Opc) { return Opc == ARM64::BR; } - -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td deleted file mode 100644 index 2fe1720..0000000 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ /dev/null @@ -1,4458 +0,0 @@ -//===- ARM64InstrInfo.td - Describe the ARM64 Instructions -*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// ARM64 Instruction definitions. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// ARM64-specific DAG Nodes. -// - -// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS -def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2, - [SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>, - SDTCisInt<0>, SDTCisVT<1, i32>]>; - -// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS -def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3, - [SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisInt<0>, - SDTCisVT<3, i32>]>; - -// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS -def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, - [SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>, - SDTCisInt<0>, - SDTCisVT<1, i32>, - SDTCisVT<4, i32>]>; - -def SDT_ARM64Brcond : SDTypeProfile<0, 3, - [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, - SDTCisVT<2, i32>]>; -def SDT_ARM64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; -def SDT_ARM64tbz : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisVT<1, i64>, - SDTCisVT<2, OtherVT>]>; - - -def SDT_ARM64CSel : SDTypeProfile<1, 4, - [SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisInt<3>, - SDTCisVT<4, i32>]>; -def SDT_ARM64FCmp : SDTypeProfile<0, 2, - [SDTCisFP<0>, - SDTCisSameAs<0, 1>]>; -def SDT_ARM64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>; -def SDT_ARM64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>; -def SDT_ARM64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>, - SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>]>; -def SDT_ARM64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>; -def SDT_ARM64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; -def SDT_ARM64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisInt<2>, SDTCisInt<3>]>; -def SDT_ARM64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; -def SDT_ARM64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, SDTCisInt<3>]>; -def SDT_ARM64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>; - -def SDT_ARM64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; -def SDT_ARM64fcmpz : SDTypeProfile<1, 1, []>; -def SDT_ARM64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>; -def SDT_ARM64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>]>; -def SDT_ARM64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>, - SDTCisSameAs<0,3>]>; -def SDT_ARM64TCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; -def SDT_ARM64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; - -def SDT_ARM64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>; - -def SDT_ARM64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, - SDTCisPtrTy<1>]>; -def SDT_ARM64WrapperLarge : SDTypeProfile<1, 4, - [SDTCisVT<0, i64>, SDTCisVT<1, i32>, - SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, - SDTCisSameAs<1, 4>]>; - - -// Node definitions. -def ARM64adrp : SDNode<"ARM64ISD::ADRP", SDTIntUnaryOp, []>; -def ARM64addlow : SDNode<"ARM64ISD::ADDlow", SDTIntBinOp, []>; -def ARM64LOADgot : SDNode<"ARM64ISD::LOADgot", SDTIntUnaryOp>; -def ARM64callseq_start : SDNode<"ISD::CALLSEQ_START", - SDCallSeqStart<[ SDTCisVT<0, i32> ]>, - [SDNPHasChain, SDNPOutGlue]>; -def ARM64callseq_end : SDNode<"ISD::CALLSEQ_END", - SDCallSeqEnd<[ SDTCisVT<0, i32>, - SDTCisVT<1, i32> ]>, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def ARM64call : SDNode<"ARM64ISD::CALL", - SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; -def ARM64brcond : SDNode<"ARM64ISD::BRCOND", SDT_ARM64Brcond, - [SDNPHasChain]>; -def ARM64cbz : SDNode<"ARM64ISD::CBZ", SDT_ARM64cbz, - [SDNPHasChain]>; -def ARM64cbnz : SDNode<"ARM64ISD::CBNZ", SDT_ARM64cbz, - [SDNPHasChain]>; -def ARM64tbz : SDNode<"ARM64ISD::TBZ", SDT_ARM64tbz, - [SDNPHasChain]>; -def ARM64tbnz : SDNode<"ARM64ISD::TBNZ", SDT_ARM64tbz, - [SDNPHasChain]>; - - -def ARM64csel : SDNode<"ARM64ISD::CSEL", SDT_ARM64CSel>; -def ARM64csinv : SDNode<"ARM64ISD::CSINV", SDT_ARM64CSel>; -def ARM64csneg : SDNode<"ARM64ISD::CSNEG", SDT_ARM64CSel>; -def ARM64csinc : SDNode<"ARM64ISD::CSINC", SDT_ARM64CSel>; -def ARM64retflag : SDNode<"ARM64ISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def ARM64adc : SDNode<"ARM64ISD::ADC", SDTBinaryArithWithFlagsIn >; -def ARM64sbc : SDNode<"ARM64ISD::SBC", SDTBinaryArithWithFlagsIn>; -def ARM64add_flag : SDNode<"ARM64ISD::ADDS", SDTBinaryArithWithFlagsOut, - [SDNPCommutative]>; -def ARM64sub_flag : SDNode<"ARM64ISD::SUBS", SDTBinaryArithWithFlagsOut>; -def ARM64and_flag : SDNode<"ARM64ISD::ANDS", SDTBinaryArithWithFlagsOut>; -def ARM64adc_flag : SDNode<"ARM64ISD::ADCS", SDTBinaryArithWithFlagsInOut>; -def ARM64sbc_flag : SDNode<"ARM64ISD::SBCS", SDTBinaryArithWithFlagsInOut>; - -def ARM64threadpointer : SDNode<"ARM64ISD::THREAD_POINTER", SDTPtrLeaf>; - -def ARM64fcmp : SDNode<"ARM64ISD::FCMP", SDT_ARM64FCmp>; - -def ARM64fmax : SDNode<"ARM64ISD::FMAX", SDTFPBinOp>; -def ARM64fmin : SDNode<"ARM64ISD::FMIN", SDTFPBinOp>; - -def ARM64dup : SDNode<"ARM64ISD::DUP", SDT_ARM64Dup>; -def ARM64duplane8 : SDNode<"ARM64ISD::DUPLANE8", SDT_ARM64DupLane>; -def ARM64duplane16 : SDNode<"ARM64ISD::DUPLANE16", SDT_ARM64DupLane>; -def ARM64duplane32 : SDNode<"ARM64ISD::DUPLANE32", SDT_ARM64DupLane>; -def ARM64duplane64 : SDNode<"ARM64ISD::DUPLANE64", SDT_ARM64DupLane>; - -def ARM64zip1 : SDNode<"ARM64ISD::ZIP1", SDT_ARM64Zip>; -def ARM64zip2 : SDNode<"ARM64ISD::ZIP2", SDT_ARM64Zip>; -def ARM64uzp1 : SDNode<"ARM64ISD::UZP1", SDT_ARM64Zip>; -def ARM64uzp2 : SDNode<"ARM64ISD::UZP2", SDT_ARM64Zip>; -def ARM64trn1 : SDNode<"ARM64ISD::TRN1", SDT_ARM64Zip>; -def ARM64trn2 : SDNode<"ARM64ISD::TRN2", SDT_ARM64Zip>; - -def ARM64movi_edit : SDNode<"ARM64ISD::MOVIedit", SDT_ARM64MOVIedit>; -def ARM64movi_shift : SDNode<"ARM64ISD::MOVIshift", SDT_ARM64MOVIshift>; -def ARM64movi_msl : SDNode<"ARM64ISD::MOVImsl", SDT_ARM64MOVIshift>; -def ARM64mvni_shift : SDNode<"ARM64ISD::MVNIshift", SDT_ARM64MOVIshift>; -def ARM64mvni_msl : SDNode<"ARM64ISD::MVNImsl", SDT_ARM64MOVIshift>; -def ARM64movi : SDNode<"ARM64ISD::MOVI", SDT_ARM64MOVIedit>; -def ARM64fmov : SDNode<"ARM64ISD::FMOV", SDT_ARM64MOVIedit>; - -def ARM64rev16 : SDNode<"ARM64ISD::REV16", SDT_ARM64UnaryVec>; -def ARM64rev32 : SDNode<"ARM64ISD::REV32", SDT_ARM64UnaryVec>; -def ARM64rev64 : SDNode<"ARM64ISD::REV64", SDT_ARM64UnaryVec>; -def ARM64ext : SDNode<"ARM64ISD::EXT", SDT_ARM64ExtVec>; - -def ARM64vashr : SDNode<"ARM64ISD::VASHR", SDT_ARM64vshift>; -def ARM64vlshr : SDNode<"ARM64ISD::VLSHR", SDT_ARM64vshift>; -def ARM64vshl : SDNode<"ARM64ISD::VSHL", SDT_ARM64vshift>; -def ARM64sqshli : SDNode<"ARM64ISD::SQSHL_I", SDT_ARM64vshift>; -def ARM64uqshli : SDNode<"ARM64ISD::UQSHL_I", SDT_ARM64vshift>; -def ARM64sqshlui : SDNode<"ARM64ISD::SQSHLU_I", SDT_ARM64vshift>; -def ARM64srshri : SDNode<"ARM64ISD::SRSHR_I", SDT_ARM64vshift>; -def ARM64urshri : SDNode<"ARM64ISD::URSHR_I", SDT_ARM64vshift>; - -def ARM64not: SDNode<"ARM64ISD::NOT", SDT_ARM64unvec>; -def ARM64bit: SDNode<"ARM64ISD::BIT", SDT_ARM64trivec>; - -def ARM64cmeq: SDNode<"ARM64ISD::CMEQ", SDT_ARM64binvec>; -def ARM64cmge: SDNode<"ARM64ISD::CMGE", SDT_ARM64binvec>; -def ARM64cmgt: SDNode<"ARM64ISD::CMGT", SDT_ARM64binvec>; -def ARM64cmhi: SDNode<"ARM64ISD::CMHI", SDT_ARM64binvec>; -def ARM64cmhs: SDNode<"ARM64ISD::CMHS", SDT_ARM64binvec>; - -def ARM64fcmeq: SDNode<"ARM64ISD::FCMEQ", SDT_ARM64fcmp>; -def ARM64fcmge: SDNode<"ARM64ISD::FCMGE", SDT_ARM64fcmp>; -def ARM64fcmgt: SDNode<"ARM64ISD::FCMGT", SDT_ARM64fcmp>; - -def ARM64cmeqz: SDNode<"ARM64ISD::CMEQz", SDT_ARM64unvec>; -def ARM64cmgez: SDNode<"ARM64ISD::CMGEz", SDT_ARM64unvec>; -def ARM64cmgtz: SDNode<"ARM64ISD::CMGTz", SDT_ARM64unvec>; -def ARM64cmlez: SDNode<"ARM64ISD::CMLEz", SDT_ARM64unvec>; -def ARM64cmltz: SDNode<"ARM64ISD::CMLTz", SDT_ARM64unvec>; -def ARM64cmtst : PatFrag<(ops node:$LHS, node:$RHS), - (ARM64not (ARM64cmeqz (and node:$LHS, node:$RHS)))>; - -def ARM64fcmeqz: SDNode<"ARM64ISD::FCMEQz", SDT_ARM64fcmpz>; -def ARM64fcmgez: SDNode<"ARM64ISD::FCMGEz", SDT_ARM64fcmpz>; -def ARM64fcmgtz: SDNode<"ARM64ISD::FCMGTz", SDT_ARM64fcmpz>; -def ARM64fcmlez: SDNode<"ARM64ISD::FCMLEz", SDT_ARM64fcmpz>; -def ARM64fcmltz: SDNode<"ARM64ISD::FCMLTz", SDT_ARM64fcmpz>; - -def ARM64bici: SDNode<"ARM64ISD::BICi", SDT_ARM64vecimm>; -def ARM64orri: SDNode<"ARM64ISD::ORRi", SDT_ARM64vecimm>; - -def ARM64neg : SDNode<"ARM64ISD::NEG", SDT_ARM64unvec>; - -def ARM64tcret: SDNode<"ARM64ISD::TC_RETURN", SDT_ARM64TCRET, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; - -def ARM64Prefetch : SDNode<"ARM64ISD::PREFETCH", SDT_ARM64PREFETCH, - [SDNPHasChain, SDNPSideEffect]>; - -def ARM64sitof: SDNode<"ARM64ISD::SITOF", SDT_ARM64ITOF>; -def ARM64uitof: SDNode<"ARM64ISD::UITOF", SDT_ARM64ITOF>; - -def ARM64tlsdesc_call : SDNode<"ARM64ISD::TLSDESC_CALL", SDT_ARM64TLSDescCall, - [SDNPInGlue, SDNPOutGlue, SDNPHasChain, - SDNPVariadic]>; - -def ARM64WrapperLarge : SDNode<"ARM64ISD::WrapperLarge", SDT_ARM64WrapperLarge>; - - -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// - -// ARM64 Instruction Predicate Definitions. -// -def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">; -def NoZCZ : Predicate<"!Subtarget->hasZeroCycleZeroing()">; -def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; -def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">; -def ForCodeSize : Predicate<"ForCodeSize">; -def NotForCodeSize : Predicate<"!ForCodeSize">; - -include "ARM64InstrFormats.td" - -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Miscellaneous instructions. -//===----------------------------------------------------------------------===// - -let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in { -def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), - [(ARM64callseq_start timm:$amt)]>; -def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), - [(ARM64callseq_end timm:$amt1, timm:$amt2)]>; -} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 - -let isReMaterializable = 1, isCodeGenOnly = 1 in { -// FIXME: The following pseudo instructions are only needed because remat -// cannot handle multiple instructions. When that changes, they can be -// removed, along with the ARM64Wrapper node. - -let AddedComplexity = 10 in -def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr), - [(set GPR64:$dst, (ARM64LOADgot tglobaladdr:$addr))]>, - Sched<[WriteLDAdr]>; - -// The MOVaddr instruction should match only when the add is not folded -// into a load or store address. -def MOVaddr - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp tglobaladdr:$hi), - tglobaladdr:$low))]>, - Sched<[WriteAdrAdr]>; -def MOVaddrJT - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp tjumptable:$hi), - tjumptable:$low))]>, - Sched<[WriteAdrAdr]>; -def MOVaddrCP - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp tconstpool:$hi), - tconstpool:$low))]>, - Sched<[WriteAdrAdr]>; -def MOVaddrBA - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp tblockaddress:$hi), - tblockaddress:$low))]>, - Sched<[WriteAdrAdr]>; -def MOVaddrTLS - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp tglobaltlsaddr:$hi), - tglobaltlsaddr:$low))]>, - Sched<[WriteAdrAdr]>; -def MOVaddrEXT - : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), - [(set GPR64:$dst, (ARM64addlow (ARM64adrp texternalsym:$hi), - texternalsym:$low))]>, - Sched<[WriteAdrAdr]>; - -} // isReMaterializable, isCodeGenOnly - -def : Pat<(ARM64LOADgot tglobaltlsaddr:$addr), - (LOADgot tglobaltlsaddr:$addr)>; - -def : Pat<(ARM64LOADgot texternalsym:$addr), - (LOADgot texternalsym:$addr)>; - -def : Pat<(ARM64LOADgot tconstpool:$addr), - (LOADgot tconstpool:$addr)>; - -//===----------------------------------------------------------------------===// -// System instructions. -//===----------------------------------------------------------------------===// - -def HINT : HintI<"hint">; -def : InstAlias<"nop", (HINT 0b000)>; -def : InstAlias<"yield",(HINT 0b001)>; -def : InstAlias<"wfe", (HINT 0b010)>; -def : InstAlias<"wfi", (HINT 0b011)>; -def : InstAlias<"sev", (HINT 0b100)>; -def : InstAlias<"sevl", (HINT 0b101)>; - - // As far as LLVM is concerned this writes to the system's exclusive monitors. -let mayLoad = 1, mayStore = 1 in -def CLREX : CRmSystemI; - -def DMB : CRmSystemI; -def DSB : CRmSystemI; -def ISB : CRmSystemI; -def : InstAlias<"clrex", (CLREX 0xf)>; -def : InstAlias<"isb", (ISB 0xf)>; - -def MRS : MRSI; -def MSR : MSRI; -def MSRcpsr: MSRcpsrI; - -// The thread pointer (on Linux, at least, where this has been implemented) is -// TPIDR_EL0. -def : Pat<(ARM64threadpointer), (MRS 0xde82)>; - -// Generic system instructions -def SYS : SystemI<0, "sys">; -def SYSxt : SystemXtI<0, "sys">; -def SYSLxt : SystemLXtI<1, "sysl">; - -//===----------------------------------------------------------------------===// -// Move immediate instructions. -//===----------------------------------------------------------------------===// - -defm MOVK : InsertImmediate<0b11, "movk">; -defm MOVN : MoveImmediate<0b00, "movn">; - -let PostEncoderMethod = "fixMOVZ" in -defm MOVZ : MoveImmediate<0b10, "movz">; - -def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>; -def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>; - -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; -def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; - -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>; -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>; -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>; -def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>; - -def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g3:$sym, 48)>; -def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g2:$sym, 32)>; -def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>; -def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>; - -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32)>; -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16)>; -def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0)>; - -def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g2:$sym, 32)>; -def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16)>; -def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0)>; - -let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1, - isAsCheapAsAMove = 1 in { -// FIXME: The following pseudo instructions are only needed because remat -// cannot handle multiple instructions. When that changes, we can select -// directly to the real instructions and get rid of these pseudos. - -def MOVi32imm - : Pseudo<(outs GPR32:$dst), (ins i32imm:$src), - [(set GPR32:$dst, imm:$src)]>, - Sched<[WriteImm]>; -def MOVi64imm - : Pseudo<(outs GPR64:$dst), (ins i64imm:$src), - [(set GPR64:$dst, imm:$src)]>, - Sched<[WriteImm]>; -} // isReMaterializable, isCodeGenOnly - -def : Pat<(ARM64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2, - tglobaladdr:$g1, tglobaladdr:$g0), - (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g3, 48), - tglobaladdr:$g2, 32), - tglobaladdr:$g1, 16), - tglobaladdr:$g0, 0)>; - -def : Pat<(ARM64WrapperLarge tblockaddress:$g3, tblockaddress:$g2, - tblockaddress:$g1, tblockaddress:$g0), - (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g3, 48), - tblockaddress:$g2, 32), - tblockaddress:$g1, 16), - tblockaddress:$g0, 0)>; - -def : Pat<(ARM64WrapperLarge tconstpool:$g3, tconstpool:$g2, - tconstpool:$g1, tconstpool:$g0), - (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g3, 48), - tconstpool:$g2, 32), - tconstpool:$g1, 16), - tconstpool:$g0, 0)>; - - -//===----------------------------------------------------------------------===// -// Arithmetic instructions. -//===----------------------------------------------------------------------===// - -// Add/subtract with carry. -defm ADC : AddSubCarry<0, "adc", "adcs", ARM64adc, ARM64adc_flag>; -defm SBC : AddSubCarry<1, "sbc", "sbcs", ARM64sbc, ARM64sbc_flag>; - -def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>; -def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>; -def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>; -def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>; - -// Add/subtract -defm ADD : AddSub<0, "add", add>; -defm SUB : AddSub<1, "sub">; - -defm ADDS : AddSubS<0, "adds", ARM64add_flag>; -defm SUBS : AddSubS<1, "subs", ARM64sub_flag>; - -// Use SUBS instead of SUB to enable CSE between SUBS and SUB. -def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm), - (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>; -def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm), - (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>; -def : Pat<(sub GPR32:$Rn, GPR32:$Rm), - (SUBSWrr GPR32:$Rn, GPR32:$Rm)>; -def : Pat<(sub GPR64:$Rn, GPR64:$Rm), - (SUBSXrr GPR64:$Rn, GPR64:$Rm)>; -def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm), - (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>; -def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm), - (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>; -def : Pat<(sub GPR32sp:$R2, arith_extended_reg32:$R3), - (SUBSWrx GPR32sp:$R2, arith_extended_reg32:$R3)>; -def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64:$R3), - (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64:$R3)>; - -// Because of the immediate format for add/sub-imm instructions, the -// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). -// These patterns capture that transformation. -let AddedComplexity = 1 in { -def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm), - (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; -def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm), - (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; -def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm), - (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; -def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm), - (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; -} - -def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0)>; -def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0)>; -def : InstAlias<"neg $dst, $src, $shift", - (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift:$shift)>; -def : InstAlias<"neg $dst, $src, $shift", - (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift:$shift)>; - -// Because of the immediate format for add/sub-imm instructions, the -// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1). -// These patterns capture that transformation. -let AddedComplexity = 1 in { -def : Pat<(ARM64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), - (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; -def : Pat<(ARM64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), - (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; -def : Pat<(ARM64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm), - (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>; -def : Pat<(ARM64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm), - (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>; -} - -def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0)>; -def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0)>; -def : InstAlias<"negs $dst, $src, $shift", - (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift:$shift)>; -def : InstAlias<"negs $dst, $src, $shift", - (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift:$shift)>; - -// Unsigned/Signed divide -defm UDIV : Div<0, "udiv", udiv>; -defm SDIV : Div<1, "sdiv", sdiv>; -let isCodeGenOnly = 1 in { -defm UDIV_Int : Div<0, "udiv", int_arm64_udiv>; -defm SDIV_Int : Div<1, "sdiv", int_arm64_sdiv>; -} - -// Variable shift -defm ASRV : Shift<0b10, "asrv", sra>; -defm LSLV : Shift<0b00, "lslv", shl>; -defm LSRV : Shift<0b01, "lsrv", srl>; -defm RORV : Shift<0b11, "rorv", rotr>; - -def : ShiftAlias<"asr", ASRVWr, GPR32>; -def : ShiftAlias<"asr", ASRVXr, GPR64>; -def : ShiftAlias<"lsl", LSLVWr, GPR32>; -def : ShiftAlias<"lsl", LSLVXr, GPR64>; -def : ShiftAlias<"lsr", LSRVWr, GPR32>; -def : ShiftAlias<"lsr", LSRVXr, GPR64>; -def : ShiftAlias<"ror", RORVWr, GPR32>; -def : ShiftAlias<"ror", RORVXr, GPR64>; - -// Multiply-add -let AddedComplexity = 7 in { -defm MADD : MulAccum<0, "madd", add>; -defm MSUB : MulAccum<1, "msub", sub>; - -def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)), - (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; -def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)), - (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; - -def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))), - (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>; -def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))), - (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>; -} // AddedComplexity = 7 - -let AddedComplexity = 5 in { -def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>; -def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>; -def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>; -def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>; - -def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))), - (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; -def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))), - (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; - -def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))), - (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; -def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))), - (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>; -} // AddedComplexity = 5 - -def : MulAccumWAlias<"mul", MADDWrrr>; -def : MulAccumXAlias<"mul", MADDXrrr>; -def : MulAccumWAlias<"mneg", MSUBWrrr>; -def : MulAccumXAlias<"mneg", MSUBXrrr>; -def : WideMulAccumAlias<"smull", SMADDLrrr>; -def : WideMulAccumAlias<"smnegl", SMSUBLrrr>; -def : WideMulAccumAlias<"umull", UMADDLrrr>; -def : WideMulAccumAlias<"umnegl", UMSUBLrrr>; - -// Multiply-high -def SMULHrr : MulHi<0b010, "smulh", mulhs>; -def UMULHrr : MulHi<0b110, "umulh", mulhu>; - -// CRC32 -def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_arm64_crc32b, "crc32b">; -def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_arm64_crc32h, "crc32h">; -def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_arm64_crc32w, "crc32w">; -def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_arm64_crc32x, "crc32x">; - -def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_arm64_crc32cb, "crc32cb">; -def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_arm64_crc32ch, "crc32ch">; -def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_arm64_crc32cw, "crc32cw">; -def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_arm64_crc32cx, "crc32cx">; - - -//===----------------------------------------------------------------------===// -// Logical instructions. -//===----------------------------------------------------------------------===// - -// (immediate) -defm ANDS : LogicalImmS<0b11, "ands", ARM64and_flag>; -defm AND : LogicalImm<0b00, "and", and>; -defm EOR : LogicalImm<0b10, "eor", xor>; -defm ORR : LogicalImm<0b01, "orr", or>; - -def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR, - logical_imm32:$imm)>; -def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR, - logical_imm64:$imm)>; - - -// (register) -defm ANDS : LogicalRegS<0b11, 0, "ands">; -defm BICS : LogicalRegS<0b11, 1, "bics">; -defm AND : LogicalReg<0b00, 0, "and", and>; -defm BIC : LogicalReg<0b00, 1, "bic", - BinOpFrag<(and node:$LHS, (not node:$RHS))>>; -defm EON : LogicalReg<0b10, 1, "eon", - BinOpFrag<(xor node:$LHS, (not node:$RHS))>>; -defm EOR : LogicalReg<0b10, 0, "eor", xor>; -defm ORN : LogicalReg<0b01, 1, "orn", - BinOpFrag<(or node:$LHS, (not node:$RHS))>>; -defm ORR : LogicalReg<0b01, 0, "orr", or>; - -def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0)>; -def : InstAlias<"mov $dst, $src", - (ADDWri GPR32sp:$dst, GPR32sp:$src, 0, 0)>; -def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0)>; -def : InstAlias<"mov $dst, $src", - (ADDXri GPR64sp:$dst, GPR64sp:$src, 0, 0)>; - -def : InstAlias<"tst $src1, $src2", - (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2)>; -def : InstAlias<"tst $src1, $src2", - (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2)>; - -def : InstAlias<"tst $src1, $src2", - (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0)>; -def : InstAlias<"tst $src1, $src2", - (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0)>; - -def : InstAlias<"tst $src1, $src2, $sh", - (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift:$sh)>; -def : InstAlias<"tst $src1, $src2, $sh", - (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift:$sh)>; - -def : InstAlias<"mvn $Wd, $Wm", - (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0)>; -def : InstAlias<"mvn $Xd, $Xm", - (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0)>; - -def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>; -def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; - - -//===----------------------------------------------------------------------===// -// One operand data processing instructions. -//===----------------------------------------------------------------------===// - -defm CLS : OneOperandData<0b101, "cls">; -defm CLZ : OneOperandData<0b100, "clz", ctlz>; -defm RBIT : OneOperandData<0b000, "rbit">; -def REV16Wr : OneWRegData<0b001, "rev16", - UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; -def REV16Xr : OneXRegData<0b001, "rev16", - UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; - -def : Pat<(cttz GPR32:$Rn), - (CLZWr (RBITWr GPR32:$Rn))>; -def : Pat<(cttz GPR64:$Rn), - (CLZXr (RBITXr GPR64:$Rn))>; - -// Unlike the other one operand instructions, the instructions with the "rev" -// mnemonic do *not* just different in the size bit, but actually use different -// opcode bits for the different sizes. -def REVWr : OneWRegData<0b010, "rev", bswap>; -def REVXr : OneXRegData<0b011, "rev", bswap>; -def REV32Xr : OneXRegData<0b010, "rev32", - UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>; - -//===----------------------------------------------------------------------===// -// Bitfield immediate extraction instruction. -//===----------------------------------------------------------------------===// -let neverHasSideEffects = 1 in -defm EXTR : ExtractImm<"extr">; -def : InstAlias<"ror $dst, $src, $shift", - (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>; -def : InstAlias<"ror $dst, $src, $shift", - (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>; - -def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)), - (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>; -def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)), - (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>; - -//===----------------------------------------------------------------------===// -// Other bitfield immediate instructions. -//===----------------------------------------------------------------------===// -let neverHasSideEffects = 1 in { -defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">; -defm SBFM : BitfieldImm<0b00, "sbfm">; -defm UBFM : BitfieldImm<0b10, "ubfm">; -} - -def i32shift_a : Operand, SDNodeXFormgetZExtValue()) & 0x1f; - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -def i32shift_b : Operand, SDNodeXFormgetZExtValue(); - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -// min(7, 31 - shift_amt) -def i32shift_sext_i8 : Operand, SDNodeXFormgetZExtValue(); - enc = enc > 7 ? 7 : enc; - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -// min(15, 31 - shift_amt) -def i32shift_sext_i16 : Operand, SDNodeXFormgetZExtValue(); - enc = enc > 15 ? 15 : enc; - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -def i64shift_a : Operand, SDNodeXFormgetZExtValue()) & 0x3f; - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -def i64shift_b : Operand, SDNodeXFormgetZExtValue(); - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -// min(7, 63 - shift_amt) -def i64shift_sext_i8 : Operand, SDNodeXFormgetZExtValue(); - enc = enc > 7 ? 7 : enc; - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -// min(15, 63 - shift_amt) -def i64shift_sext_i16 : Operand, SDNodeXFormgetZExtValue(); - enc = enc > 15 ? 15 : enc; - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -// min(31, 63 - shift_amt) -def i64shift_sext_i32 : Operand, SDNodeXFormgetZExtValue(); - enc = enc > 31 ? 31 : enc; - return CurDAG->getTargetConstant(enc, MVT::i64); -}]>; - -def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)), - (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), - (i64 (i32shift_b imm0_31:$imm)))>; -def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)), - (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), - (i64 (i64shift_b imm0_63:$imm)))>; - -let AddedComplexity = 10 in { -def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)), - (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; -def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)), - (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; -} - -def : InstAlias<"asr $dst, $src, $shift", - (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; -def : InstAlias<"asr $dst, $src, $shift", - (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; -def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; -def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; -def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; -def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; -def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; - -def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)), - (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>; -def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)), - (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>; - -def : InstAlias<"lsr $dst, $src, $shift", - (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>; -def : InstAlias<"lsr $dst, $src, $shift", - (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>; -def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>; -def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>; -def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>; -def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>; -def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>; - -//===----------------------------------------------------------------------===// -// Conditionally set flags instructions. -//===----------------------------------------------------------------------===// -defm CCMN : CondSetFlagsImm<0, "ccmn">; -defm CCMP : CondSetFlagsImm<1, "ccmp">; - -defm CCMN : CondSetFlagsReg<0, "ccmn">; -defm CCMP : CondSetFlagsReg<1, "ccmp">; - -//===----------------------------------------------------------------------===// -// Conditional select instructions. -//===----------------------------------------------------------------------===// -defm CSEL : CondSelect<0, 0b00, "csel">; - -def inc : PatFrag<(ops node:$in), (add node:$in, 1)>; -defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>; -defm CSINV : CondSelectOp<1, 0b00, "csinv", not>; -defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>; - -def : Pat<(ARM64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), CPSR), - (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; -def : Pat<(ARM64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), CPSR), - (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; -def : Pat<(ARM64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), CPSR), - (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; -def : Pat<(ARM64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), CPSR), - (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; -def : Pat<(ARM64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), CPSR), - (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>; -def : Pat<(ARM64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), CPSR), - (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>; - -def : Pat<(ARM64csel (i32 0), (i32 1), (i32 imm:$cc), CPSR), - (CSINCWr WZR, WZR, (i32 imm:$cc))>; -def : Pat<(ARM64csel (i64 0), (i64 1), (i32 imm:$cc), CPSR), - (CSINCXr XZR, XZR, (i32 imm:$cc))>; -def : Pat<(ARM64csel (i32 0), (i32 -1), (i32 imm:$cc), CPSR), - (CSINVWr WZR, WZR, (i32 imm:$cc))>; -def : Pat<(ARM64csel (i64 0), (i64 -1), (i32 imm:$cc), CPSR), - (CSINVXr XZR, XZR, (i32 imm:$cc))>; - -// The inverse of the condition code from the alias instruction is what is used -// in the aliased instruction. The parser all ready inverts the condition code -// for these aliases. -// FIXME: Is this the correct way to handle these aliases? -def : InstAlias<"cset $dst, $cc", (CSINCWr GPR32:$dst, WZR, WZR, ccode:$cc)>; -def : InstAlias<"cset $dst, $cc", (CSINCXr GPR64:$dst, XZR, XZR, ccode:$cc)>; - -def : InstAlias<"csetm $dst, $cc", (CSINVWr GPR32:$dst, WZR, WZR, ccode:$cc)>; -def : InstAlias<"csetm $dst, $cc", (CSINVXr GPR64:$dst, XZR, XZR, ccode:$cc)>; - -def : InstAlias<"cinc $dst, $src, $cc", - (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, ccode:$cc)>; -def : InstAlias<"cinc $dst, $src, $cc", - (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, ccode:$cc)>; - -def : InstAlias<"cinv $dst, $src, $cc", - (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, ccode:$cc)>; -def : InstAlias<"cinv $dst, $src, $cc", - (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, ccode:$cc)>; - -def : InstAlias<"cneg $dst, $src, $cc", - (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, ccode:$cc)>; -def : InstAlias<"cneg $dst, $src, $cc", - (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, ccode:$cc)>; - -//===----------------------------------------------------------------------===// -// PC-relative instructions. -//===----------------------------------------------------------------------===// -let isReMaterializable = 1 in { -let neverHasSideEffects = 1, mayStore = 0, mayLoad = 0 in { -def ADR : ADRI<0, "adr", adrlabel, []>; -} // neverHasSideEffects = 1 - -def ADRP : ADRI<1, "adrp", adrplabel, - [(set GPR64:$Xd, (ARM64adrp tglobaladdr:$label))]>; -} // isReMaterializable = 1 - -// page address of a constant pool entry, block address -def : Pat<(ARM64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>; -def : Pat<(ARM64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>; - -//===----------------------------------------------------------------------===// -// Unconditional branch (register) instructions. -//===----------------------------------------------------------------------===// - -let isReturn = 1, isTerminator = 1, isBarrier = 1 in { -def RET : BranchReg<0b0010, "ret", []>; -def DRPS : SpecialReturn<0b0101, "drps">; -def ERET : SpecialReturn<0b0100, "eret">; -} // isReturn = 1, isTerminator = 1, isBarrier = 1 - -// Default to the LR register. -def : InstAlias<"ret", (RET LR)>; - -let isCall = 1, Defs = [LR], Uses = [SP] in { -def BLR : BranchReg<0b0001, "blr", [(ARM64call GPR64:$Rn)]>; -} // isCall - -let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { -def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; -} // isBranch, isTerminator, isBarrier, isIndirectBranch - -// Create a separate pseudo-instruction for codegen to use so that we don't -// flag lr as used in every function. It'll be restored before the RET by the -// epilogue if it's legitimately used. -def RET_ReallyLR : Pseudo<(outs), (ins), [(ARM64retflag)]> { - let isTerminator = 1; - let isBarrier = 1; - let isReturn = 1; -} - -// This is a directive-like pseudo-instruction. The purpose is to insert an -// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction -// (which in the usual case is a BLR). -let hasSideEffects = 1 in -def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> { - let AsmString = ".tlsdesccall $sym"; -} - -// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It -// gets expanded to two MCInsts during lowering. -let isCall = 1, Defs = [LR] in -def TLSDESC_BLR - : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym), - [(ARM64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>; - -def : Pat<(ARM64tlsdesc_call GPR64:$dest, texternalsym:$sym), - (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>; -//===----------------------------------------------------------------------===// -// Conditional branch (immediate) instruction. -//===----------------------------------------------------------------------===// -def Bcc : BranchCond; - -//===----------------------------------------------------------------------===// -// Compare-and-branch instructions. -//===----------------------------------------------------------------------===// -defm CBZ : CmpBranch<0, "cbz", ARM64cbz>; -defm CBNZ : CmpBranch<1, "cbnz", ARM64cbnz>; - -//===----------------------------------------------------------------------===// -// Test-bit-and-branch instructions. -//===----------------------------------------------------------------------===// -def TBZ : TestBranch<0, "tbz", ARM64tbz>; -def TBNZ : TestBranch<1, "tbnz", ARM64tbnz>; - -//===----------------------------------------------------------------------===// -// Unconditional branch (immediate) instructions. -//===----------------------------------------------------------------------===// -let isBranch = 1, isTerminator = 1, isBarrier = 1 in { -def B : BranchImm<0, "b", [(br bb:$addr)]>; -} // isBranch, isTerminator, isBarrier - -let isCall = 1, Defs = [LR], Uses = [SP] in { -def BL : CallImm<1, "bl", [(ARM64call tglobaladdr:$addr)]>; -} // isCall -def : Pat<(ARM64call texternalsym:$func), (BL texternalsym:$func)>; - -//===----------------------------------------------------------------------===// -// Exception generation instructions. -//===----------------------------------------------------------------------===// -def BRK : ExceptionGeneration<0b001, 0b00, "brk">; -def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">; -def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">; -def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">; -def HLT : ExceptionGeneration<0b010, 0b00, "hlt">; -def HVC : ExceptionGeneration<0b000, 0b10, "hvc">; -def SMC : ExceptionGeneration<0b000, 0b11, "smc">; -def SVC : ExceptionGeneration<0b000, 0b01, "svc">; - -// DCPSn defaults to an immediate operand of zero if unspecified. -def : InstAlias<"dcps1", (DCPS1 0)>; -def : InstAlias<"dcps2", (DCPS2 0)>; -def : InstAlias<"dcps3", (DCPS3 0)>; - -//===----------------------------------------------------------------------===// -// Load instructions. -//===----------------------------------------------------------------------===// - -// Pair (indexed, offset) -def LDPWi : LoadPairOffset<0b00, 0, GPR32, am_indexed32simm7, "ldp">; -def LDPXi : LoadPairOffset<0b10, 0, GPR64, am_indexed64simm7, "ldp">; -def LDPSi : LoadPairOffset<0b00, 1, FPR32, am_indexed32simm7, "ldp">; -def LDPDi : LoadPairOffset<0b01, 1, FPR64, am_indexed64simm7, "ldp">; -def LDPQi : LoadPairOffset<0b10, 1, FPR128, am_indexed128simm7, "ldp">; - -def LDPSWi : LoadPairOffset<0b01, 0, GPR64, am_indexed32simm7, "ldpsw">; - -// Pair (pre-indexed) -def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32, am_indexed32simm7, "ldp">; -def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64, am_indexed64simm7, "ldp">; -def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32, am_indexed32simm7, "ldp">; -def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64, am_indexed64simm7, "ldp">; -def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128, am_indexed128simm7, "ldp">; - -def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64, am_indexed32simm7, "ldpsw">; - -// Pair (post-indexed) -def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32, simm7s4, "ldp">; -def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64, simm7s8, "ldp">; -def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32, simm7s4, "ldp">; -def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64, simm7s8, "ldp">; -def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128, simm7s16, "ldp">; - -def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64, simm7s4, "ldpsw">; - - -// Pair (no allocate) -def LDNPWi : LoadPairNoAlloc<0b00, 0, GPR32, am_indexed32simm7, "ldnp">; -def LDNPXi : LoadPairNoAlloc<0b10, 0, GPR64, am_indexed64simm7, "ldnp">; -def LDNPSi : LoadPairNoAlloc<0b00, 1, FPR32, am_indexed32simm7, "ldnp">; -def LDNPDi : LoadPairNoAlloc<0b01, 1, FPR64, am_indexed64simm7, "ldnp">; -def LDNPQi : LoadPairNoAlloc<0b10, 1, FPR128, am_indexed128simm7, "ldnp">; - -//--- -// (register offset) -//--- - -let AddedComplexity = 10 in { -// Integer -def LDRBBro : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", - [(set GPR32:$Rt, (zextloadi8 ro_indexed8:$addr))]>; -def LDRHHro : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", - [(set GPR32:$Rt, (zextloadi16 ro_indexed16:$addr))]>; -def LDRWro : Load32RO<0b10, 0, 0b01, GPR32, "ldr", - [(set GPR32:$Rt, (load ro_indexed32:$addr))]>; -def LDRXro : Load64RO<0b11, 0, 0b01, GPR64, "ldr", - [(set GPR64:$Rt, (load ro_indexed64:$addr))]>; - -// Floating-point -def LDRBro : Load8RO<0b00, 1, 0b01, FPR8, "ldr", - [(set FPR8:$Rt, (load ro_indexed8:$addr))]>; -def LDRHro : Load16RO<0b01, 1, 0b01, FPR16, "ldr", - [(set FPR16:$Rt, (load ro_indexed16:$addr))]>; -def LDRSro : Load32RO<0b10, 1, 0b01, FPR32, "ldr", - [(set (f32 FPR32:$Rt), (load ro_indexed32:$addr))]>; -def LDRDro : Load64RO<0b11, 1, 0b01, FPR64, "ldr", - [(set (f64 FPR64:$Rt), (load ro_indexed64:$addr))]>; -def LDRQro : Load128RO<0b00, 1, 0b11, FPR128, "ldr", []> { - let mayLoad = 1; -} - -// For regular load, we do not have any alignment requirement. -// Thus, it is safe to directly map the vector loads with interesting -// addressing modes. -// FIXME: We could do the same for bitconvert to floating point vectors. -def : Pat <(v8i8 (scalar_to_vector (i32 (extloadi8 ro_indexed8:$addr)))), - (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), - (LDRBro ro_indexed8:$addr), bsub)>; -def : Pat <(v16i8 (scalar_to_vector (i32 (extloadi8 ro_indexed8:$addr)))), - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (LDRBro ro_indexed8:$addr), bsub)>; -def : Pat <(v4i16 (scalar_to_vector (i32 (extloadi16 ro_indexed16:$addr)))), - (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), hsub)>; -def : Pat <(v8i16 (scalar_to_vector (i32 (extloadi16 ro_indexed16:$addr)))), - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), hsub)>; -def : Pat <(v2i32 (scalar_to_vector (i32 (load ro_indexed32:$addr)))), - (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), - (LDRSro ro_indexed32:$addr), ssub)>; -def : Pat <(v4i32 (scalar_to_vector (i32 (load ro_indexed32:$addr)))), - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), - (LDRSro ro_indexed32:$addr), ssub)>; -def : Pat <(v1i64 (scalar_to_vector (i64 (load ro_indexed64:$addr)))), - (LDRDro ro_indexed64:$addr)>; -def : Pat <(v2i64 (scalar_to_vector (i64 (load ro_indexed64:$addr)))), - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), - (LDRDro ro_indexed64:$addr), dsub)>; - -// Match all load 64 bits width whose type is compatible with FPR64 -def : Pat<(v2f32 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; -def : Pat<(v1f64 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; -def : Pat<(v8i8 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; -def : Pat<(v4i16 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; -def : Pat<(v2i32 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; -def : Pat<(v1i64 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>; - -// Match all load 128 bits width whose type is compatible with FPR128 -def : Pat<(v4f32 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; -def : Pat<(v2f64 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; -def : Pat<(v16i8 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; -def : Pat<(v8i16 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; -def : Pat<(v4i32 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; -def : Pat<(v2i64 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; -def : Pat<(f128 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>; - -// Load sign-extended half-word -def LDRSHWro : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", - [(set GPR32:$Rt, (sextloadi16 ro_indexed16:$addr))]>; -def LDRSHXro : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", - [(set GPR64:$Rt, (sextloadi16 ro_indexed16:$addr))]>; - -// Load sign-extended byte -def LDRSBWro : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", - [(set GPR32:$Rt, (sextloadi8 ro_indexed8:$addr))]>; -def LDRSBXro : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", - [(set GPR64:$Rt, (sextloadi8 ro_indexed8:$addr))]>; - -// Load sign-extended word -def LDRSWro : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", - [(set GPR64:$Rt, (sextloadi32 ro_indexed32:$addr))]>; - -// Pre-fetch. -def PRFMro : PrefetchRO<0b11, 0, 0b10, "prfm", - [(ARM64Prefetch imm:$Rt, ro_indexed64:$addr)]>; - -// zextload -> i64 -def : Pat<(i64 (zextloadi8 ro_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>; -def : Pat<(i64 (zextloadi16 ro_indexed16:$addr)), - (SUBREG_TO_REG (i64 0), (LDRHHro ro_indexed16:$addr), sub_32)>; - -// zextloadi1 -> zextloadi8 -def : Pat<(i32 (zextloadi1 ro_indexed8:$addr)), (LDRBBro ro_indexed8:$addr)>; -def : Pat<(i64 (zextloadi1 ro_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>; - -// extload -> zextload -def : Pat<(i32 (extloadi16 ro_indexed16:$addr)), (LDRHHro ro_indexed16:$addr)>; -def : Pat<(i32 (extloadi8 ro_indexed8:$addr)), (LDRBBro ro_indexed8:$addr)>; -def : Pat<(i32 (extloadi1 ro_indexed8:$addr)), (LDRBBro ro_indexed8:$addr)>; -def : Pat<(i64 (extloadi32 ro_indexed32:$addr)), - (SUBREG_TO_REG (i64 0), (LDRWro ro_indexed32:$addr), sub_32)>; -def : Pat<(i64 (extloadi16 ro_indexed16:$addr)), - (SUBREG_TO_REG (i64 0), (LDRHHro ro_indexed16:$addr), sub_32)>; -def : Pat<(i64 (extloadi8 ro_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>; -def : Pat<(i64 (extloadi1 ro_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>; - -} // AddedComplexity = 10 - -//--- -// (unsigned immediate) -//--- -def LDRXui : LoadUI<0b11, 0, 0b01, GPR64, am_indexed64, "ldr", - [(set GPR64:$Rt, (load am_indexed64:$addr))]>; -def LDRWui : LoadUI<0b10, 0, 0b01, GPR32, am_indexed32, "ldr", - [(set GPR32:$Rt, (load am_indexed32:$addr))]>; -def LDRBui : LoadUI<0b00, 1, 0b01, FPR8, am_indexed8, "ldr", - [(set FPR8:$Rt, (load am_indexed8:$addr))]>; -def LDRHui : LoadUI<0b01, 1, 0b01, FPR16, am_indexed16, "ldr", - [(set FPR16:$Rt, (load am_indexed16:$addr))]>; -def LDRSui : LoadUI<0b10, 1, 0b01, FPR32, am_indexed32, "ldr", - [(set (f32 FPR32:$Rt), (load am_indexed32:$addr))]>; -def LDRDui : LoadUI<0b11, 1, 0b01, FPR64, am_indexed64, "ldr", - [(set (f64 FPR64:$Rt), (load am_indexed64:$addr))]>; -def LDRQui : LoadUI<0b00, 1, 0b11, FPR128, am_indexed128, "ldr", - [(set (f128 FPR128:$Rt), (load am_indexed128:$addr))]>; - -// For regular load, we do not have any alignment requirement. -// Thus, it is safe to directly map the vector loads with interesting -// addressing modes. -// FIXME: We could do the same for bitconvert to floating point vectors. -def : Pat <(v8i8 (scalar_to_vector (i32 (extloadi8 am_indexed8:$addr)))), - (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), - (LDRBui am_indexed8:$addr), bsub)>; -def : Pat <(v16i8 (scalar_to_vector (i32 (extloadi8 am_indexed8:$addr)))), - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (LDRBui am_indexed8:$addr), bsub)>; -def : Pat <(v4i16 (scalar_to_vector (i32 (extloadi16 am_indexed16:$addr)))), - (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), hsub)>; -def : Pat <(v8i16 (scalar_to_vector (i32 (extloadi16 am_indexed16:$addr)))), - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), hsub)>; -def : Pat <(v2i32 (scalar_to_vector (i32 (load am_indexed32:$addr)))), - (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), - (LDRSui am_indexed32:$addr), ssub)>; -def : Pat <(v4i32 (scalar_to_vector (i32 (load am_indexed32:$addr)))), - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), - (LDRSui am_indexed32:$addr), ssub)>; -def : Pat <(v1i64 (scalar_to_vector (i64 (load am_indexed64:$addr)))), - (LDRDui am_indexed64:$addr)>; -def : Pat <(v2i64 (scalar_to_vector (i64 (load am_indexed64:$addr)))), - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), - (LDRDui am_indexed64:$addr), dsub)>; - -// Match all load 64 bits width whose type is compatible with FPR64 -def : Pat<(v2f32 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; -def : Pat<(v1f64 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; -def : Pat<(v8i8 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; -def : Pat<(v4i16 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; -def : Pat<(v2i32 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; -def : Pat<(v1i64 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>; - -// Match all load 128 bits width whose type is compatible with FPR128 -def : Pat<(v4f32 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; -def : Pat<(v2f64 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; -def : Pat<(v16i8 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; -def : Pat<(v8i16 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; -def : Pat<(v4i32 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; -def : Pat<(v2i64 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; -def : Pat<(f128 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>; - -def LDRHHui : LoadUI<0b01, 0, 0b01, GPR32, am_indexed16, "ldrh", - [(set GPR32:$Rt, (zextloadi16 am_indexed16:$addr))]>; -def LDRBBui : LoadUI<0b00, 0, 0b01, GPR32, am_indexed8, "ldrb", - [(set GPR32:$Rt, (zextloadi8 am_indexed8:$addr))]>; -// zextload -> i64 -def : Pat<(i64 (zextloadi8 am_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>; -def : Pat<(i64 (zextloadi16 am_indexed16:$addr)), - (SUBREG_TO_REG (i64 0), (LDRHHui am_indexed16:$addr), sub_32)>; - -// zextloadi1 -> zextloadi8 -def : Pat<(i32 (zextloadi1 am_indexed8:$addr)), (LDRBBui am_indexed8:$addr)>; -def : Pat<(i64 (zextloadi1 am_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>; - -// extload -> zextload -def : Pat<(i32 (extloadi16 am_indexed16:$addr)), (LDRHHui am_indexed16:$addr)>; -def : Pat<(i32 (extloadi8 am_indexed8:$addr)), (LDRBBui am_indexed8:$addr)>; -def : Pat<(i32 (extloadi1 am_indexed8:$addr)), (LDRBBui am_indexed8:$addr)>; -def : Pat<(i64 (extloadi32 am_indexed32:$addr)), - (SUBREG_TO_REG (i64 0), (LDRWui am_indexed32:$addr), sub_32)>; -def : Pat<(i64 (extloadi16 am_indexed16:$addr)), - (SUBREG_TO_REG (i64 0), (LDRHHui am_indexed16:$addr), sub_32)>; -def : Pat<(i64 (extloadi8 am_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>; -def : Pat<(i64 (extloadi1 am_indexed8:$addr)), - (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>; - -// load sign-extended half-word -def LDRSHWui : LoadUI<0b01, 0, 0b11, GPR32, am_indexed16, "ldrsh", - [(set GPR32:$Rt, (sextloadi16 am_indexed16:$addr))]>; -def LDRSHXui : LoadUI<0b01, 0, 0b10, GPR64, am_indexed16, "ldrsh", - [(set GPR64:$Rt, (sextloadi16 am_indexed16:$addr))]>; - -// load sign-extended byte -def LDRSBWui : LoadUI<0b00, 0, 0b11, GPR32, am_indexed8, "ldrsb", - [(set GPR32:$Rt, (sextloadi8 am_indexed8:$addr))]>; -def LDRSBXui : LoadUI<0b00, 0, 0b10, GPR64, am_indexed8, "ldrsb", - [(set GPR64:$Rt, (sextloadi8 am_indexed8:$addr))]>; - -// load sign-extended word -def LDRSWui : LoadUI<0b10, 0, 0b10, GPR64, am_indexed32, "ldrsw", - [(set GPR64:$Rt, (sextloadi32 am_indexed32:$addr))]>; - -// load zero-extended word -def : Pat<(i64 (zextloadi32 am_indexed32:$addr)), - (SUBREG_TO_REG (i64 0), (LDRWui am_indexed32:$addr), sub_32)>; - -// Pre-fetch. -def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm", - [(ARM64Prefetch imm:$Rt, am_indexed64:$addr)]>; - -//--- -// (literal) -def LDRWl : LoadLiteral<0b00, 0, GPR32, "ldr">; -def LDRXl : LoadLiteral<0b01, 0, GPR64, "ldr">; -def LDRSl : LoadLiteral<0b00, 1, FPR32, "ldr">; -def LDRDl : LoadLiteral<0b01, 1, FPR64, "ldr">; -def LDRQl : LoadLiteral<0b10, 1, FPR128, "ldr">; - -// load sign-extended word -def LDRSWl : LoadLiteral<0b10, 0, GPR64, "ldrsw">; - -// prefetch -def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>; -// [(ARM64Prefetch imm:$Rt, tglobaladdr:$label)]>; - -//--- -// (unscaled immediate) -def LDURXi : LoadUnscaled<0b11, 0, 0b01, GPR64, am_unscaled64, "ldur", - [(set GPR64:$Rt, (load am_unscaled64:$addr))]>; -def LDURWi : LoadUnscaled<0b10, 0, 0b01, GPR32, am_unscaled32, "ldur", - [(set GPR32:$Rt, (load am_unscaled32:$addr))]>; -def LDURBi : LoadUnscaled<0b00, 1, 0b01, FPR8, am_unscaled8, "ldur", - [(set FPR8:$Rt, (load am_unscaled8:$addr))]>; -def LDURHi : LoadUnscaled<0b01, 1, 0b01, FPR16, am_unscaled16, "ldur", - [(set FPR16:$Rt, (load am_unscaled16:$addr))]>; -def LDURSi : LoadUnscaled<0b10, 1, 0b01, FPR32, am_unscaled32, "ldur", - [(set (f32 FPR32:$Rt), (load am_unscaled32:$addr))]>; -def LDURDi : LoadUnscaled<0b11, 1, 0b01, FPR64, am_unscaled64, "ldur", - [(set (f64 FPR64:$Rt), (load am_unscaled64:$addr))]>; -def LDURQi : LoadUnscaled<0b00, 1, 0b11, FPR128, am_unscaled128, "ldur", - [(set (v2f64 FPR128:$Rt), (load am_unscaled128:$addr))]>; - -def LDURHHi - : LoadUnscaled<0b01, 0, 0b01, GPR32, am_unscaled16, "ldurh", - [(set GPR32:$Rt, (zextloadi16 am_unscaled16:$addr))]>; -def LDURBBi - : LoadUnscaled<0b00, 0, 0b01, GPR32, am_unscaled8, "ldurb", - [(set GPR32:$Rt, (zextloadi8 am_unscaled8:$addr))]>; - -// Match all load 64 bits width whose type is compatible with FPR64 -def : Pat<(v2f32 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; -def : Pat<(v1f64 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; -def : Pat<(v8i8 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; -def : Pat<(v4i16 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; -def : Pat<(v2i32 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; -def : Pat<(v1i64 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>; - -// Match all load 128 bits width whose type is compatible with FPR128 -def : Pat<(v4f32 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; -def : Pat<(v2f64 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; -def : Pat<(v16i8 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; -def : Pat<(v8i16 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; -def : Pat<(v4i32 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; -def : Pat<(v2i64 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; -def : Pat<(f128 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>; - -// anyext -> zext -def : Pat<(i32 (extloadi16 am_unscaled16:$addr)), (LDURHHi am_unscaled16:$addr)>; -def : Pat<(i32 (extloadi8 am_unscaled8:$addr)), (LDURBBi am_unscaled8:$addr)>; -def : Pat<(i32 (extloadi1 am_unscaled8:$addr)), (LDURBBi am_unscaled8:$addr)>; -def : Pat<(i64 (extloadi32 am_unscaled32:$addr)), - (SUBREG_TO_REG (i64 0), (LDURWi am_unscaled32:$addr), sub_32)>; -def : Pat<(i64 (extloadi16 am_unscaled16:$addr)), - (SUBREG_TO_REG (i64 0), (LDURHHi am_unscaled16:$addr), sub_32)>; -def : Pat<(i64 (extloadi8 am_unscaled8:$addr)), - (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>; -def : Pat<(i64 (extloadi1 am_unscaled8:$addr)), - (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>; -// unscaled zext -def : Pat<(i32 (zextloadi16 am_unscaled16:$addr)), - (LDURHHi am_unscaled16:$addr)>; -def : Pat<(i32 (zextloadi8 am_unscaled8:$addr)), - (LDURBBi am_unscaled8:$addr)>; -def : Pat<(i32 (zextloadi1 am_unscaled8:$addr)), - (LDURBBi am_unscaled8:$addr)>; -def : Pat<(i64 (zextloadi32 am_unscaled32:$addr)), - (SUBREG_TO_REG (i64 0), (LDURWi am_unscaled32:$addr), sub_32)>; -def : Pat<(i64 (zextloadi16 am_unscaled16:$addr)), - (SUBREG_TO_REG (i64 0), (LDURHHi am_unscaled16:$addr), sub_32)>; -def : Pat<(i64 (zextloadi8 am_unscaled8:$addr)), - (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>; -def : Pat<(i64 (zextloadi1 am_unscaled8:$addr)), - (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>; - - -//--- -// LDR mnemonics fall back to LDUR for negative or unaligned offsets. - -// Define new assembler match classes as we want to only match these when -// the don't otherwise match the scaled addressing mode for LDR/STR. Don't -// associate a DiagnosticType either, as we want the diagnostic for the -// canonical form (the scaled operand) to take precedence. -def MemoryUnscaledFB8Operand : AsmOperandClass { - let Name = "MemoryUnscaledFB8"; - let RenderMethod = "addMemoryUnscaledOperands"; -} -def MemoryUnscaledFB16Operand : AsmOperandClass { - let Name = "MemoryUnscaledFB16"; - let RenderMethod = "addMemoryUnscaledOperands"; -} -def MemoryUnscaledFB32Operand : AsmOperandClass { - let Name = "MemoryUnscaledFB32"; - let RenderMethod = "addMemoryUnscaledOperands"; -} -def MemoryUnscaledFB64Operand : AsmOperandClass { - let Name = "MemoryUnscaledFB64"; - let RenderMethod = "addMemoryUnscaledOperands"; -} -def MemoryUnscaledFB128Operand : AsmOperandClass { - let Name = "MemoryUnscaledFB128"; - let RenderMethod = "addMemoryUnscaledOperands"; -} -def am_unscaled_fb8 : Operand { - let ParserMatchClass = MemoryUnscaledFB8Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} -def am_unscaled_fb16 : Operand { - let ParserMatchClass = MemoryUnscaledFB16Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} -def am_unscaled_fb32 : Operand { - let ParserMatchClass = MemoryUnscaledFB32Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} -def am_unscaled_fb64 : Operand { - let ParserMatchClass = MemoryUnscaledFB64Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} -def am_unscaled_fb128 : Operand { - let ParserMatchClass = MemoryUnscaledFB128Operand; - let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset); -} -def : InstAlias<"ldr $Rt, $addr", (LDURXi GPR64:$Rt, am_unscaled_fb64:$addr)>; -def : InstAlias<"ldr $Rt, $addr", (LDURWi GPR32:$Rt, am_unscaled_fb32:$addr)>; -def : InstAlias<"ldr $Rt, $addr", (LDURBi FPR8:$Rt, am_unscaled_fb8:$addr)>; -def : InstAlias<"ldr $Rt, $addr", (LDURHi FPR16:$Rt, am_unscaled_fb16:$addr)>; -def : InstAlias<"ldr $Rt, $addr", (LDURSi FPR32:$Rt, am_unscaled_fb32:$addr)>; -def : InstAlias<"ldr $Rt, $addr", (LDURDi FPR64:$Rt, am_unscaled_fb64:$addr)>; -def : InstAlias<"ldr $Rt, $addr", (LDURQi FPR128:$Rt, am_unscaled_fb128:$addr)>; - -// zextload -> i64 -def : Pat<(i64 (zextloadi8 am_unscaled8:$addr)), - (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>; -def : Pat<(i64 (zextloadi16 am_unscaled16:$addr)), - (SUBREG_TO_REG (i64 0), (LDURHHi am_unscaled16:$addr), sub_32)>; - -// load sign-extended half-word -def LDURSHWi - : LoadUnscaled<0b01, 0, 0b11, GPR32, am_unscaled16, "ldursh", - [(set GPR32:$Rt, (sextloadi16 am_unscaled16:$addr))]>; -def LDURSHXi - : LoadUnscaled<0b01, 0, 0b10, GPR64, am_unscaled16, "ldursh", - [(set GPR64:$Rt, (sextloadi16 am_unscaled16:$addr))]>; - -// load sign-extended byte -def LDURSBWi - : LoadUnscaled<0b00, 0, 0b11, GPR32, am_unscaled8, "ldursb", - [(set GPR32:$Rt, (sextloadi8 am_unscaled8:$addr))]>; -def LDURSBXi - : LoadUnscaled<0b00, 0, 0b10, GPR64, am_unscaled8, "ldursb", - [(set GPR64:$Rt, (sextloadi8 am_unscaled8:$addr))]>; - -// load sign-extended word -def LDURSWi - : LoadUnscaled<0b10, 0, 0b10, GPR64, am_unscaled32, "ldursw", - [(set GPR64:$Rt, (sextloadi32 am_unscaled32:$addr))]>; - -// zero and sign extending aliases from generic LDR* mnemonics to LDUR*. -def : InstAlias<"ldrb $Rt, $addr", (LDURBBi GPR32:$Rt, am_unscaled_fb8:$addr)>; -def : InstAlias<"ldrh $Rt, $addr", (LDURHHi GPR32:$Rt, am_unscaled_fb16:$addr)>; -def : InstAlias<"ldrsb $Rt, $addr", (LDURSBWi GPR32:$Rt, am_unscaled_fb8:$addr)>; -def : InstAlias<"ldrsb $Rt, $addr", (LDURSBXi GPR64:$Rt, am_unscaled_fb8:$addr)>; -def : InstAlias<"ldrsh $Rt, $addr", (LDURSHWi GPR32:$Rt, am_unscaled_fb16:$addr)>; -def : InstAlias<"ldrsh $Rt, $addr", (LDURSHXi GPR64:$Rt, am_unscaled_fb16:$addr)>; -def : InstAlias<"ldrsw $Rt, $addr", (LDURSWi GPR64:$Rt, am_unscaled_fb32:$addr)>; - -// Pre-fetch. -def PRFUMi : PrefetchUnscaled<0b11, 0, 0b10, "prfum", - [(ARM64Prefetch imm:$Rt, am_unscaled64:$addr)]>; - -//--- -// (unscaled immediate, unprivileged) -def LDTRXi : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">; -def LDTRWi : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">; - -def LDTRHi : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">; -def LDTRBi : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">; - -// load sign-extended half-word -def LDTRSHWi : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">; -def LDTRSHXi : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">; - -// load sign-extended byte -def LDTRSBWi : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">; -def LDTRSBXi : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">; - -// load sign-extended word -def LDTRSWi : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">; - -//--- -// (immediate pre-indexed) -def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32, "ldr">; -def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64, "ldr">; -def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8, "ldr">; -def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16, "ldr">; -def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32, "ldr">; -def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64, "ldr">; -def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128, "ldr">; - -// load sign-extended half-word -def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32, "ldrsh">; -def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64, "ldrsh">; - -// load sign-extended byte -def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32, "ldrsb">; -def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64, "ldrsb">; - -// load zero-extended byte -def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32, "ldrb">; -def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32, "ldrh">; - -// load sign-extended word -def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64, "ldrsw">; - -// ISel pseudos and patterns. See expanded comment on LoadPreIdxPseudo. -def LDRDpre_isel : LoadPreIdxPseudo; -def LDRSpre_isel : LoadPreIdxPseudo; -def LDRXpre_isel : LoadPreIdxPseudo; -def LDRWpre_isel : LoadPreIdxPseudo; -def LDRHHpre_isel : LoadPreIdxPseudo; -def LDRBBpre_isel : LoadPreIdxPseudo; - -def LDRSWpre_isel : LoadPreIdxPseudo; -def LDRSHWpre_isel : LoadPreIdxPseudo; -def LDRSHXpre_isel : LoadPreIdxPseudo; -def LDRSBWpre_isel : LoadPreIdxPseudo; -def LDRSBXpre_isel : LoadPreIdxPseudo; - -//--- -// (immediate post-indexed) -def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32, "ldr">; -def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64, "ldr">; -def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8, "ldr">; -def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16, "ldr">; -def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32, "ldr">; -def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64, "ldr">; -def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128, "ldr">; - -// load sign-extended half-word -def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32, "ldrsh">; -def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64, "ldrsh">; - -// load sign-extended byte -def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32, "ldrsb">; -def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64, "ldrsb">; - -// load zero-extended byte -def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32, "ldrb">; -def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32, "ldrh">; - -// load sign-extended word -def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64, "ldrsw">; - -// ISel pseudos and patterns. See expanded comment on LoadPostIdxPseudo. -def LDRDpost_isel : LoadPostIdxPseudo; -def LDRSpost_isel : LoadPostIdxPseudo; -def LDRXpost_isel : LoadPostIdxPseudo; -def LDRWpost_isel : LoadPostIdxPseudo; -def LDRHHpost_isel : LoadPostIdxPseudo; -def LDRBBpost_isel : LoadPostIdxPseudo; - -def LDRSWpost_isel : LoadPostIdxPseudo; -def LDRSHWpost_isel : LoadPostIdxPseudo; -def LDRSHXpost_isel : LoadPostIdxPseudo; -def LDRSBWpost_isel : LoadPostIdxPseudo; -def LDRSBXpost_isel : LoadPostIdxPseudo; - -//===----------------------------------------------------------------------===// -// Store instructions. -//===----------------------------------------------------------------------===// - -// Pair (indexed, offset) -// FIXME: Use dedicated range-checked addressing mode operand here. -def STPWi : StorePairOffset<0b00, 0, GPR32, am_indexed32simm7, "stp">; -def STPXi : StorePairOffset<0b10, 0, GPR64, am_indexed64simm7, "stp">; -def STPSi : StorePairOffset<0b00, 1, FPR32, am_indexed32simm7, "stp">; -def STPDi : StorePairOffset<0b01, 1, FPR64, am_indexed64simm7, "stp">; -def STPQi : StorePairOffset<0b10, 1, FPR128, am_indexed128simm7, "stp">; - -// Pair (pre-indexed) -def STPWpre : StorePairPreIdx<0b00, 0, GPR32, am_indexed32simm7, "stp">; -def STPXpre : StorePairPreIdx<0b10, 0, GPR64, am_indexed64simm7, "stp">; -def STPSpre : StorePairPreIdx<0b00, 1, FPR32, am_indexed32simm7, "stp">; -def STPDpre : StorePairPreIdx<0b01, 1, FPR64, am_indexed64simm7, "stp">; -def STPQpre : StorePairPreIdx<0b10, 1, FPR128, am_indexed128simm7, "stp">; - -// Pair (pre-indexed) -def STPWpost : StorePairPostIdx<0b00, 0, GPR32, simm7s4, "stp">; -def STPXpost : StorePairPostIdx<0b10, 0, GPR64, simm7s8, "stp">; -def STPSpost : StorePairPostIdx<0b00, 1, FPR32, simm7s4, "stp">; -def STPDpost : StorePairPostIdx<0b01, 1, FPR64, simm7s8, "stp">; -def STPQpost : StorePairPostIdx<0b10, 1, FPR128, simm7s16, "stp">; - -// Pair (no allocate) -def STNPWi : StorePairNoAlloc<0b00, 0, GPR32, am_indexed32simm7, "stnp">; -def STNPXi : StorePairNoAlloc<0b10, 0, GPR64, am_indexed64simm7, "stnp">; -def STNPSi : StorePairNoAlloc<0b00, 1, FPR32, am_indexed32simm7, "stnp">; -def STNPDi : StorePairNoAlloc<0b01, 1, FPR64, am_indexed64simm7, "stnp">; -def STNPQi : StorePairNoAlloc<0b10, 1, FPR128, am_indexed128simm7, "stnp">; - -//--- -// (Register offset) - -let AddedComplexity = 10 in { - -// Integer -def STRHHro : Store16RO<0b01, 0, 0b00, GPR32, "strh", - [(truncstorei16 GPR32:$Rt, ro_indexed16:$addr)]>; -def STRBBro : Store8RO<0b00, 0, 0b00, GPR32, "strb", - [(truncstorei8 GPR32:$Rt, ro_indexed8:$addr)]>; -def STRWro : Store32RO<0b10, 0, 0b00, GPR32, "str", - [(store GPR32:$Rt, ro_indexed32:$addr)]>; -def STRXro : Store64RO<0b11, 0, 0b00, GPR64, "str", - [(store GPR64:$Rt, ro_indexed64:$addr)]>; - -// truncstore i64 -def : Pat<(truncstorei8 GPR64:$Rt, ro_indexed8:$addr), - (STRBBro (EXTRACT_SUBREG GPR64:$Rt, sub_32), ro_indexed8:$addr)>; -def : Pat<(truncstorei16 GPR64:$Rt, ro_indexed16:$addr), - (STRHHro (EXTRACT_SUBREG GPR64:$Rt, sub_32), ro_indexed16:$addr)>; -def : Pat<(truncstorei32 GPR64:$Rt, ro_indexed32:$addr), - (STRWro (EXTRACT_SUBREG GPR64:$Rt, sub_32), ro_indexed32:$addr)>; - - -// Floating-point -def STRBro : Store8RO<0b00, 1, 0b00, FPR8, "str", - [(store FPR8:$Rt, ro_indexed8:$addr)]>; -def STRHro : Store16RO<0b01, 1, 0b00, FPR16, "str", - [(store FPR16:$Rt, ro_indexed16:$addr)]>; -def STRSro : Store32RO<0b10, 1, 0b00, FPR32, "str", - [(store (f32 FPR32:$Rt), ro_indexed32:$addr)]>; -def STRDro : Store64RO<0b11, 1, 0b00, FPR64, "str", - [(store (f64 FPR64:$Rt), ro_indexed64:$addr)]>; -def STRQro : Store128RO<0b00, 1, 0b10, FPR128, "str", []> { - let mayStore = 1; -} - -// Match all store 64 bits width whose type is compatible with FPR64 -def : Pat<(store (v2f32 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; -def : Pat<(store (v1f64 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; -def : Pat<(store (v8i8 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; -def : Pat<(store (v4i16 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; -def : Pat<(store (v2i32 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; -def : Pat<(store (v1i64 FPR64:$Rn), ro_indexed64:$addr), - (STRDro FPR64:$Rn, ro_indexed64:$addr)>; - -// Match all store 128 bits width whose type is compatible with FPR128 -def : Pat<(store (v4f32 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; -def : Pat<(store (v2f64 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; -def : Pat<(store (v16i8 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; -def : Pat<(store (v8i16 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; -def : Pat<(store (v4i32 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; -def : Pat<(store (v2i64 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; -def : Pat<(store (f128 FPR128:$Rn), ro_indexed128:$addr), - (STRQro FPR128:$Rn, ro_indexed128:$addr)>; - -//--- -// (unsigned immediate) -def STRXui : StoreUI<0b11, 0, 0b00, GPR64, am_indexed64, "str", - [(store GPR64:$Rt, am_indexed64:$addr)]>; -def STRWui : StoreUI<0b10, 0, 0b00, GPR32, am_indexed32, "str", - [(store GPR32:$Rt, am_indexed32:$addr)]>; -def STRBui : StoreUI<0b00, 1, 0b00, FPR8, am_indexed8, "str", - [(store FPR8:$Rt, am_indexed8:$addr)]>; -def STRHui : StoreUI<0b01, 1, 0b00, FPR16, am_indexed16, "str", - [(store FPR16:$Rt, am_indexed16:$addr)]>; -def STRSui : StoreUI<0b10, 1, 0b00, FPR32, am_indexed32, "str", - [(store (f32 FPR32:$Rt), am_indexed32:$addr)]>; -def STRDui : StoreUI<0b11, 1, 0b00, FPR64, am_indexed64, "str", - [(store (f64 FPR64:$Rt), am_indexed64:$addr)]>; -def STRQui : StoreUI<0b00, 1, 0b10, FPR128, am_indexed128, "str", []> { - let mayStore = 1; -} - -// Match all store 64 bits width whose type is compatible with FPR64 -def : Pat<(store (v2f32 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; -def : Pat<(store (v1f64 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; -def : Pat<(store (v8i8 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; -def : Pat<(store (v4i16 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; -def : Pat<(store (v2i32 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; -def : Pat<(store (v1i64 FPR64:$Rn), am_indexed64:$addr), - (STRDui FPR64:$Rn, am_indexed64:$addr)>; - -// Match all store 128 bits width whose type is compatible with FPR128 -def : Pat<(store (v4f32 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; -def : Pat<(store (v2f64 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; -def : Pat<(store (v16i8 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; -def : Pat<(store (v8i16 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; -def : Pat<(store (v4i32 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; -def : Pat<(store (v2i64 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; -def : Pat<(store (f128 FPR128:$Rn), am_indexed128:$addr), - (STRQui FPR128:$Rn, am_indexed128:$addr)>; - -def STRHHui : StoreUI<0b01, 0, 0b00, GPR32, am_indexed16, "strh", - [(truncstorei16 GPR32:$Rt, am_indexed16:$addr)]>; -def STRBBui : StoreUI<0b00, 0, 0b00, GPR32, am_indexed8, "strb", - [(truncstorei8 GPR32:$Rt, am_indexed8:$addr)]>; - -// truncstore i64 -def : Pat<(truncstorei32 GPR64:$Rt, am_indexed32:$addr), - (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_indexed32:$addr)>; -def : Pat<(truncstorei16 GPR64:$Rt, am_indexed16:$addr), - (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_indexed16:$addr)>; -def : Pat<(truncstorei8 GPR64:$Rt, am_indexed8:$addr), - (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_indexed8:$addr)>; - -} // AddedComplexity = 10 - -//--- -// (unscaled immediate) -def STURXi : StoreUnscaled<0b11, 0, 0b00, GPR64, am_unscaled64, "stur", - [(store GPR64:$Rt, am_unscaled64:$addr)]>; -def STURWi : StoreUnscaled<0b10, 0, 0b00, GPR32, am_unscaled32, "stur", - [(store GPR32:$Rt, am_unscaled32:$addr)]>; -def STURBi : StoreUnscaled<0b00, 1, 0b00, FPR8, am_unscaled8, "stur", - [(store FPR8:$Rt, am_unscaled8:$addr)]>; -def STURHi : StoreUnscaled<0b01, 1, 0b00, FPR16, am_unscaled16, "stur", - [(store FPR16:$Rt, am_unscaled16:$addr)]>; -def STURSi : StoreUnscaled<0b10, 1, 0b00, FPR32, am_unscaled32, "stur", - [(store (f32 FPR32:$Rt), am_unscaled32:$addr)]>; -def STURDi : StoreUnscaled<0b11, 1, 0b00, FPR64, am_unscaled64, "stur", - [(store (f64 FPR64:$Rt), am_unscaled64:$addr)]>; -def STURQi : StoreUnscaled<0b00, 1, 0b10, FPR128, am_unscaled128, "stur", - [(store (v2f64 FPR128:$Rt), am_unscaled128:$addr)]>; -def STURHHi : StoreUnscaled<0b01, 0, 0b00, GPR32, am_unscaled16, "sturh", - [(truncstorei16 GPR32:$Rt, am_unscaled16:$addr)]>; -def STURBBi : StoreUnscaled<0b00, 0, 0b00, GPR32, am_unscaled8, "sturb", - [(truncstorei8 GPR32:$Rt, am_unscaled8:$addr)]>; - -// Match all store 64 bits width whose type is compatible with FPR64 -def : Pat<(store (v2f32 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; -def : Pat<(store (v1f64 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; -def : Pat<(store (v8i8 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; -def : Pat<(store (v4i16 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; -def : Pat<(store (v2i32 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; -def : Pat<(store (v1i64 FPR64:$Rn), am_unscaled64:$addr), - (STURDi FPR64:$Rn, am_unscaled64:$addr)>; - -// Match all store 128 bits width whose type is compatible with FPR128 -def : Pat<(store (v4f32 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; -def : Pat<(store (v2f64 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; -def : Pat<(store (v16i8 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; -def : Pat<(store (v8i16 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; -def : Pat<(store (v4i32 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; -def : Pat<(store (v2i64 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; -def : Pat<(store (f128 FPR128:$Rn), am_unscaled128:$addr), - (STURQi FPR128:$Rn, am_unscaled128:$addr)>; - -// unscaled i64 truncating stores -def : Pat<(truncstorei32 GPR64:$Rt, am_unscaled32:$addr), - (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_unscaled32:$addr)>; -def : Pat<(truncstorei16 GPR64:$Rt, am_unscaled16:$addr), - (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_unscaled16:$addr)>; -def : Pat<(truncstorei8 GPR64:$Rt, am_unscaled8:$addr), - (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_unscaled8:$addr)>; - -//--- -// STR mnemonics fall back to STUR for negative or unaligned offsets. -def : InstAlias<"str $Rt, $addr", (STURXi GPR64:$Rt, am_unscaled_fb64:$addr)>; -def : InstAlias<"str $Rt, $addr", (STURWi GPR32:$Rt, am_unscaled_fb32:$addr)>; -def : InstAlias<"str $Rt, $addr", (STURBi FPR8:$Rt, am_unscaled_fb8:$addr)>; -def : InstAlias<"str $Rt, $addr", (STURHi FPR16:$Rt, am_unscaled_fb16:$addr)>; -def : InstAlias<"str $Rt, $addr", (STURSi FPR32:$Rt, am_unscaled_fb32:$addr)>; -def : InstAlias<"str $Rt, $addr", (STURDi FPR64:$Rt, am_unscaled_fb64:$addr)>; -def : InstAlias<"str $Rt, $addr", (STURQi FPR128:$Rt, am_unscaled_fb128:$addr)>; - -def : InstAlias<"strb $Rt, $addr", (STURBBi GPR32:$Rt, am_unscaled_fb8:$addr)>; -def : InstAlias<"strh $Rt, $addr", (STURHHi GPR32:$Rt, am_unscaled_fb16:$addr)>; - -//--- -// (unscaled immediate, unprivileged) -def STTRWi : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">; -def STTRXi : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">; - -def STTRHi : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">; -def STTRBi : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">; - -//--- -// (immediate pre-indexed) -def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32, "str">; -def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64, "str">; -def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8, "str">; -def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16, "str">; -def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32, "str">; -def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64, "str">; -def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128, "str">; - -def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32, "strb">; -def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32, "strh">; - -// ISel pseudos and patterns. See expanded comment on StorePreIdxPseudo. -defm STRDpre : StorePreIdxPseudo; -defm STRSpre : StorePreIdxPseudo; -defm STRXpre : StorePreIdxPseudo; -defm STRWpre : StorePreIdxPseudo; -defm STRHHpre : StorePreIdxPseudo; -defm STRBBpre : StorePreIdxPseudo; -// truncstore i64 -def : Pat<(pre_truncsti32 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRWpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, - simm9:$off)>; -def : Pat<(pre_truncsti16 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRHHpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, - simm9:$off)>; -def : Pat<(pre_truncsti8 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRBBpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, - simm9:$off)>; - -//--- -// (immediate post-indexed) -def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32, "str">; -def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64, "str">; -def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8, "str">; -def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16, "str">; -def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32, "str">; -def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64, "str">; -def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128, "str">; - -def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32, "strb">; -def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32, "strh">; - -// ISel pseudos and patterns. See expanded comment on StorePostIdxPseudo. -defm STRDpost : StorePostIdxPseudo; -defm STRSpost : StorePostIdxPseudo; -defm STRXpost : StorePostIdxPseudo; -defm STRWpost : StorePostIdxPseudo; -defm STRHHpost : StorePostIdxPseudo; -defm STRBBpost : StorePostIdxPseudo; -// truncstore i64 -def : Pat<(post_truncsti32 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRWpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, - simm9:$off)>; -def : Pat<(post_truncsti16 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRHHpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, - simm9:$off)>; -def : Pat<(post_truncsti8 GPR64:$Rt, am_noindex:$addr, simm9:$off), - (STRBBpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr, - simm9:$off)>; - - -//===----------------------------------------------------------------------===// -// Load/store exclusive instructions. -//===----------------------------------------------------------------------===// - -def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">; -def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">; -def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">; -def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">; - -def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">; -def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">; -def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">; -def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">; - -def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">; -def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">; -def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">; -def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">; - -def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">; -def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">; -def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">; -def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">; - -def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">; -def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">; -def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">; -def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">; - -def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">; -def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">; -def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">; -def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">; - -def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">; -def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">; - -def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">; -def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">; - -def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">; -def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">; - -def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">; -def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">; - -//===----------------------------------------------------------------------===// -// Scaled floating point to integer conversion instructions. -//===----------------------------------------------------------------------===// - -defm FCVTAS : FPToInteger<0b00, 0b100, "fcvtas", int_arm64_neon_fcvtas>; -defm FCVTAU : FPToInteger<0b00, 0b101, "fcvtau", int_arm64_neon_fcvtau>; -defm FCVTMS : FPToInteger<0b10, 0b000, "fcvtms", int_arm64_neon_fcvtms>; -defm FCVTMU : FPToInteger<0b10, 0b001, "fcvtmu", int_arm64_neon_fcvtmu>; -defm FCVTNS : FPToInteger<0b00, 0b000, "fcvtns", int_arm64_neon_fcvtns>; -defm FCVTNU : FPToInteger<0b00, 0b001, "fcvtnu", int_arm64_neon_fcvtnu>; -defm FCVTPS : FPToInteger<0b01, 0b000, "fcvtps", int_arm64_neon_fcvtps>; -defm FCVTPU : FPToInteger<0b01, 0b001, "fcvtpu", int_arm64_neon_fcvtpu>; -defm FCVTZS : FPToInteger<0b11, 0b000, "fcvtzs", fp_to_sint>; -defm FCVTZU : FPToInteger<0b11, 0b001, "fcvtzu", fp_to_uint>; -let isCodeGenOnly = 1 in { -defm FCVTZS_Int : FPToInteger<0b11, 0b000, "fcvtzs", int_arm64_neon_fcvtzs>; -defm FCVTZU_Int : FPToInteger<0b11, 0b001, "fcvtzu", int_arm64_neon_fcvtzu>; -} - -//===----------------------------------------------------------------------===// -// Scaled integer to floating point conversion instructions. -//===----------------------------------------------------------------------===// - -defm SCVTF : IntegerToFP<0, "scvtf", sint_to_fp>; -defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>; - -//===----------------------------------------------------------------------===// -// Unscaled integer to floating point conversion instruction. -//===----------------------------------------------------------------------===// - -defm FMOV : UnscaledConversion<"fmov">; - -def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>; -def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>; - -def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; - -def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), (FMOVDXr V64:$Vn)>; -def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), (FMOVDXr V64:$Vn)>; -def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), (FMOVDXr V64:$Vn)>; -def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), (FMOVDXr V64:$Vn)>; -def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), (FMOVDXr V64:$Vn)>; -def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), (FMOVDXr V64:$Vn)>; - -def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), (COPY_TO_REGCLASS GPR32:$Xn, - FPR32)>; -def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), (COPY_TO_REGCLASS FPR32:$Xn, - GPR32)>; -def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), (COPY_TO_REGCLASS GPR64:$Xn, - FPR64)>; -def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), (COPY_TO_REGCLASS FPR64:$Xn, - GPR64)>; - -//===----------------------------------------------------------------------===// -// Floating point conversion instruction. -//===----------------------------------------------------------------------===// - -defm FCVT : FPConversion<"fcvt">; - -def : Pat<(f32_to_f16 FPR32:$Rn), - (i32 (COPY_TO_REGCLASS - (f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)), - GPR32))>; - -def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn), - [(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>; - -//===----------------------------------------------------------------------===// -// Floating point single operand instructions. -//===----------------------------------------------------------------------===// - -defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>; -defm FMOV : SingleOperandFPData<0b0000, "fmov">; -defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>; -defm FRINTA : SingleOperandFPData<0b1100, "frinta", frnd>; -defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>; -defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>; -defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_arm64_neon_frintn>; -defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>; - -def : Pat<(v1f64 (int_arm64_neon_frintn (v1f64 FPR64:$Rn))), - (FRINTNDr FPR64:$Rn)>; - -// FRINTX is inserted to set the flags as required by FENV_ACCESS ON behavior -// in the C spec. Setting hasSideEffects ensures it is not DCE'd. -// -// TODO: We should really model the FPSR flags correctly. This is really ugly. -let hasSideEffects = 1 in { -defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>; -} - -defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>; - -let SchedRW = [WriteFDiv] in { -defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>; -} - -//===----------------------------------------------------------------------===// -// Floating point two operand instructions. -//===----------------------------------------------------------------------===// - -defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>; -let SchedRW = [WriteFDiv] in { -defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>; -} -defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_arm64_neon_fmaxnm>; -defm FMAX : TwoOperandFPData<0b0100, "fmax", ARM64fmax>; -defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_arm64_neon_fminnm>; -defm FMIN : TwoOperandFPData<0b0101, "fmin", ARM64fmin>; -let SchedRW = [WriteFMul] in { -defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>; -defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>; -} -defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>; - -def : Pat<(v1f64 (ARM64fmax (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FMAXDrr FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (ARM64fmin (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FMINDrr FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (int_arm64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(v1f64 (int_arm64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>; - -//===----------------------------------------------------------------------===// -// Floating point three operand instructions. -//===----------------------------------------------------------------------===// - -defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>; -defm FMSUB : ThreeOperandFPData<0, 1, "fmsub", - TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >; -defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd", - TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >; -defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub", - TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >; - -// The following def pats catch the case where the LHS of an FMA is negated. -// The TriOpFrag above catches the case where the middle operand is negated. -def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Rd)), - (FMSUBSrrr FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>; - -def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Rd)), - (FMSUBDrrr FPR64:$Rd, FPR64:$Rn, FPR64:$Rm)>; - -//===----------------------------------------------------------------------===// -// Floating point comparison instructions. -//===----------------------------------------------------------------------===// - -defm FCMPE : FPComparison<1, "fcmpe">; -defm FCMP : FPComparison<0, "fcmp", ARM64fcmp>; - -//===----------------------------------------------------------------------===// -// Floating point conditional comparison instructions. -//===----------------------------------------------------------------------===// - -defm FCCMPE : FPCondComparison<1, "fccmpe">; -defm FCCMP : FPCondComparison<0, "fccmp">; - -//===----------------------------------------------------------------------===// -// Floating point conditional select instruction. -//===----------------------------------------------------------------------===// - -defm FCSEL : FPCondSelect<"fcsel">; - -// CSEL instructions providing f128 types need to be handled by a -// pseudo-instruction since the eventual code will need to introduce basic -// blocks and control flow. -def F128CSEL : Pseudo<(outs FPR128:$Rd), - (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond), - [(set (f128 FPR128:$Rd), - (ARM64csel FPR128:$Rn, FPR128:$Rm, - (i32 imm:$cond), CPSR))]> { - let Uses = [CPSR]; - let usesCustomInserter = 1; -} - - -//===----------------------------------------------------------------------===// -// Floating point immediate move. -//===----------------------------------------------------------------------===// - -let isReMaterializable = 1 in { -defm FMOV : FPMoveImmediate<"fmov">; -} - -//===----------------------------------------------------------------------===// -// Advanced SIMD two vector instructions. -//===----------------------------------------------------------------------===// - -defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_arm64_neon_abs>; -defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_arm64_neon_cls>; -defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>; -defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", ARM64cmeqz>; -defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", ARM64cmgez>; -defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", ARM64cmgtz>; -defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", ARM64cmlez>; -defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", ARM64cmltz>; -defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>; -defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>; - -defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", ARM64fcmeqz>; -defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", ARM64fcmgez>; -defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", ARM64fcmgtz>; -defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", ARM64fcmlez>; -defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", ARM64fcmltz>; -defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_arm64_neon_fcvtas>; -defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_arm64_neon_fcvtau>; -defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">; -def : Pat<(v4f32 (int_arm64_neon_vcvthf2fp (v4i16 V64:$Rn))), - (FCVTLv4i16 V64:$Rn)>; -def : Pat<(v4f32 (int_arm64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn), - (i64 4)))), - (FCVTLv8i16 V128:$Rn)>; -def : Pat<(v2f64 (fextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>; -def : Pat<(v2f64 (fextend (v2f32 (extract_subvector (v4f32 V128:$Rn), - (i64 2))))), - (FCVTLv4i32 V128:$Rn)>; - -defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_arm64_neon_fcvtms>; -defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_arm64_neon_fcvtmu>; -defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_arm64_neon_fcvtns>; -defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_arm64_neon_fcvtnu>; -defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">; -def : Pat<(v4i16 (int_arm64_neon_vcvtfp2hf (v4f32 V128:$Rn))), - (FCVTNv4i16 V128:$Rn)>; -def : Pat<(concat_vectors V64:$Rd, - (v4i16 (int_arm64_neon_vcvtfp2hf (v4f32 V128:$Rn)))), - (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; -def : Pat<(v2f32 (fround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>; -def : Pat<(concat_vectors V64:$Rd, (v2f32 (fround (v2f64 V128:$Rn)))), - (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; -defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_arm64_neon_fcvtps>; -defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_arm64_neon_fcvtpu>; -defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn", - int_arm64_neon_fcvtxn>; -defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>; -defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>; -let isCodeGenOnly = 1 in { -defm FCVTZS_Int : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", - int_arm64_neon_fcvtzs>; -defm FCVTZU_Int : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", - int_arm64_neon_fcvtzu>; -} -defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>; -defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_arm64_neon_frecpe>; -defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", frnd>; -defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>; -defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>; -defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_arm64_neon_frintn>; -defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>; -defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>; -defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>; -defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_arm64_neon_frsqrte>; -defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>; -defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg", - UnOpFrag<(sub immAllZerosV, node:$LHS)> >; -defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>; -// Aliases for MVN -> NOT. -def : InstAlias<"mvn.8b $Vd, $Vn", (NOTv8i8 V64:$Vd, V64:$Vn)>; -def : InstAlias<"mvn.16b $Vd, $Vn", (NOTv16i8 V128:$Vd, V128:$Vn)>; -def : InstAlias<"mvn $Vd.8b, $Vn.8b", (NOTv8i8 V64:$Vd, V64:$Vn)>; -def : InstAlias<"mvn $Vd.16b, $Vn.16b", (NOTv16i8 V128:$Vd, V128:$Vn)>; - -def : Pat<(ARM64neg (v8i8 V64:$Rn)), (NEGv8i8 V64:$Rn)>; -def : Pat<(ARM64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>; -def : Pat<(ARM64neg (v4i16 V64:$Rn)), (NEGv4i16 V64:$Rn)>; -def : Pat<(ARM64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>; -def : Pat<(ARM64neg (v2i32 V64:$Rn)), (NEGv2i32 V64:$Rn)>; -def : Pat<(ARM64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>; -def : Pat<(ARM64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>; - -def : Pat<(ARM64not (v8i8 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(ARM64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(ARM64not (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(ARM64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(ARM64not (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(ARM64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(ARM64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; - -def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>; -def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>; -def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>; - -defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_arm64_neon_rbit>; -defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", ARM64rev16>; -defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", ARM64rev32>; -defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", ARM64rev64>; -defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp", - BinOpFrag<(add node:$LHS, (int_arm64_neon_saddlp node:$RHS))> >; -defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_arm64_neon_saddlp>; -defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>; -defm SHLL : SIMDVectorLShiftLongBySizeBHS; -defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_arm64_neon_sqabs>; -defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_arm64_neon_sqneg>; -defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_arm64_neon_sqxtn>; -defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_arm64_neon_sqxtun>; -defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_arm64_neon_suqadd>; -defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp", - BinOpFrag<(add node:$LHS, (int_arm64_neon_uaddlp node:$RHS))> >; -defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", - int_arm64_neon_uaddlp>; -defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>; -defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_arm64_neon_uqxtn>; -defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_arm64_neon_urecpe>; -defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_arm64_neon_ursqrte>; -defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_arm64_neon_usqadd>; -defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>; - -def : Pat<(v2f32 (ARM64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>; -def : Pat<(v4f32 (ARM64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>; - -// Patterns for vector long shift (by element width). These need to match all -// three of zext, sext and anyext so it's easier to pull the patterns out of the -// definition. -multiclass SIMDVectorLShiftLongBySizeBHSPats { - def : Pat<(ARM64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)), - (SHLLv8i8 V64:$Rn)>; - def : Pat<(ARM64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)), - (SHLLv16i8 V128:$Rn)>; - def : Pat<(ARM64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)), - (SHLLv4i16 V64:$Rn)>; - def : Pat<(ARM64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)), - (SHLLv8i16 V128:$Rn)>; - def : Pat<(ARM64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)), - (SHLLv2i32 V64:$Rn)>; - def : Pat<(ARM64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)), - (SHLLv4i32 V128:$Rn)>; -} - -defm : SIMDVectorLShiftLongBySizeBHSPats; -defm : SIMDVectorLShiftLongBySizeBHSPats; -defm : SIMDVectorLShiftLongBySizeBHSPats; - -//===----------------------------------------------------------------------===// -// Advanced SIMD three vector instructions. -//===----------------------------------------------------------------------===// - -defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>; -defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_arm64_neon_addp>; -defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", ARM64cmeq>; -defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", ARM64cmge>; -defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", ARM64cmgt>; -defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", ARM64cmhi>; -defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", ARM64cmhs>; -defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", ARM64cmtst>; -defm FABD : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_arm64_neon_fabd>; -defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_arm64_neon_facge>; -defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_arm64_neon_facgt>; -defm FADDP : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_arm64_neon_addp>; -defm FADD : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>; -defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", ARM64fcmeq>; -defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", ARM64fcmge>; -defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", ARM64fcmgt>; -defm FDIV : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>; -defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_arm64_neon_fmaxnmp>; -defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_arm64_neon_fmaxnm>; -defm FMAXP : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_arm64_neon_fmaxp>; -defm FMAX : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", ARM64fmax>; -defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_arm64_neon_fminnmp>; -defm FMINNM : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_arm64_neon_fminnm>; -defm FMINP : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_arm64_neon_fminp>; -defm FMIN : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", ARM64fmin>; - -// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the -// instruction expects the addend first, while the fma intrinsic puts it last. -defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b11001, "fmla", - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; -defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b11001, "fmls", - TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; - -// The following def pats catch the case where the LHS of an FMA is negated. -// The TriOpFrag above catches the case where the middle operand is negated. -def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)), - (FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>; - -def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), - (FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>; - -def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), - (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>; - -defm FMULX : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_arm64_neon_fmulx>; -defm FMUL : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>; -defm FRECPS : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_arm64_neon_frecps>; -defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_arm64_neon_frsqrts>; -defm FSUB : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>; -defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", - TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >; -defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", - TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >; -defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>; -defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_arm64_neon_pmul>; -defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba", - TriOpFrag<(add node:$LHS, (int_arm64_neon_sabd node:$MHS, node:$RHS))> >; -defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_arm64_neon_sabd>; -defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_arm64_neon_shadd>; -defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_arm64_neon_shsub>; -defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_arm64_neon_smaxp>; -defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", int_arm64_neon_smax>; -defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_arm64_neon_sminp>; -defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", int_arm64_neon_smin>; -defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_arm64_neon_sqadd>; -defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_arm64_neon_sqdmulh>; -defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_arm64_neon_sqrdmulh>; -defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_arm64_neon_sqrshl>; -defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_arm64_neon_sqshl>; -defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_arm64_neon_sqsub>; -defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_arm64_neon_srhadd>; -defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_arm64_neon_srshl>; -defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_arm64_neon_sshl>; -defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>; -defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba", - TriOpFrag<(add node:$LHS, (int_arm64_neon_uabd node:$MHS, node:$RHS))> >; -defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_arm64_neon_uabd>; -defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_arm64_neon_uhadd>; -defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_arm64_neon_uhsub>; -defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_arm64_neon_umaxp>; -defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", int_arm64_neon_umax>; -defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_arm64_neon_uminp>; -defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", int_arm64_neon_umin>; -defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_arm64_neon_uqadd>; -defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_arm64_neon_uqrshl>; -defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_arm64_neon_uqshl>; -defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_arm64_neon_uqsub>; -defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_arm64_neon_urhadd>; -defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_arm64_neon_urshl>; -defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_arm64_neon_ushl>; - -defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>; -defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic", - BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >; -defm BIF : SIMDLogicalThreeVector<1, 0b11, "bif">; -defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", ARM64bit>; -defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl", - TriOpFrag<(or (and node:$LHS, node:$MHS), (and (vnot node:$LHS), node:$RHS))>>; -defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>; -defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn", - BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >; -defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>; - -// FIXME: the .16b and .8b variantes should be emitted by the -// AsmWriter. TableGen's AsmWriter-generator doesn't deal with variant syntaxes -// in aliases yet though. -def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}", - (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; -def : InstAlias<"{mov\t$dst.8h, $src.8h|mov.8h\t$dst, $src}", - (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; -def : InstAlias<"{mov\t$dst.4s, $src.4s|mov.4s\t$dst, $src}", - (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; -def : InstAlias<"{mov\t$dst.2d, $src.2d|mov.2d\t$dst, $src}", - (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>; - -def : InstAlias<"{mov\t$dst.8b, $src.8b|mov.8b\t$dst, $src}", - (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; -def : InstAlias<"{mov\t$dst.4h, $src.4h|mov.4h\t$dst, $src}", - (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; -def : InstAlias<"{mov\t$dst.2s, $src.2s|mov.2s\t$dst, $src}", - (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; -def : InstAlias<"{mov\t$dst.1d, $src.1d|mov.1d\t$dst, $src}", - (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>; - -def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" # - "|cmls.8b\t$dst, $src1, $src2}", - (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" # - "|cmls.16b\t$dst, $src1, $src2}", - (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" # - "|cmls.4h\t$dst, $src1, $src2}", - (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" # - "|cmls.8h\t$dst, $src1, $src2}", - (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" # - "|cmls.2s\t$dst, $src1, $src2}", - (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" # - "|cmls.4s\t$dst, $src1, $src2}", - (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" # - "|cmls.2d\t$dst, $src1, $src2}", - (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; - -def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" # - "|cmlo.8b\t$dst, $src1, $src2}", - (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" # - "|cmlo.16b\t$dst, $src1, $src2}", - (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" # - "|cmlo.4h\t$dst, $src1, $src2}", - (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" # - "|cmlo.8h\t$dst, $src1, $src2}", - (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" # - "|cmlo.2s\t$dst, $src1, $src2}", - (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" # - "|cmlo.4s\t$dst, $src1, $src2}", - (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" # - "|cmlo.2d\t$dst, $src1, $src2}", - (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; - -def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" # - "|cmle.8b\t$dst, $src1, $src2}", - (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" # - "|cmle.16b\t$dst, $src1, $src2}", - (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" # - "|cmle.4h\t$dst, $src1, $src2}", - (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" # - "|cmle.8h\t$dst, $src1, $src2}", - (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" # - "|cmle.2s\t$dst, $src1, $src2}", - (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" # - "|cmle.4s\t$dst, $src1, $src2}", - (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" # - "|cmle.2d\t$dst, $src1, $src2}", - (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; - -def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" # - "|cmlt.8b\t$dst, $src1, $src2}", - (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" # - "|cmlt.16b\t$dst, $src1, $src2}", - (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" # - "|cmlt.4h\t$dst, $src1, $src2}", - (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" # - "|cmlt.8h\t$dst, $src1, $src2}", - (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" # - "|cmlt.2s\t$dst, $src1, $src2}", - (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" # - "|cmlt.4s\t$dst, $src1, $src2}", - (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # - "|cmlt.2d\t$dst, $src1, $src2}", - (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; - -def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # - "|fcmle.2s\t$dst, $src1, $src2}", - (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" # - "|fcmle.4s\t$dst, $src1, $src2}", - (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # - "|fcmle.2d\t$dst, $src1, $src2}", - (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; - -def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # - "|fcmlt.2s\t$dst, $src1, $src2}", - (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" # - "|fcmlt.4s\t$dst, $src1, $src2}", - (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # - "|fcmlt.2d\t$dst, $src1, $src2}", - (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; - -def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # - "|facle.2s\t$dst, $src1, $src2}", - (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" # - "|facle.4s\t$dst, $src1, $src2}", - (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # - "|facle.2d\t$dst, $src1, $src2}", - (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; - -def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # - "|faclt.2s\t$dst, $src1, $src2}", - (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; -def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" # - "|faclt.4s\t$dst, $src1, $src2}", - (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>; -def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" # - "|faclt.2d\t$dst, $src1, $src2}", - (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; - -//===----------------------------------------------------------------------===// -// Advanced SIMD three scalar instructions. -//===----------------------------------------------------------------------===// - -defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>; -defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", ARM64cmeq>; -defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", ARM64cmge>; -defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", ARM64cmgt>; -defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", ARM64cmhi>; -defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", ARM64cmhs>; -defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", ARM64cmtst>; -defm FABD : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_arm64_sisd_fabd>; -def : Pat<(v1f64 (int_arm64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), - (FABD64 FPR64:$Rn, FPR64:$Rm)>; -defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge", - int_arm64_neon_facge>; -defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt", - int_arm64_neon_facgt>; -defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", ARM64fcmeq>; -defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", ARM64fcmge>; -defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", ARM64fcmgt>; -defm FMULX : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_arm64_neon_fmulx>; -defm FRECPS : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_arm64_neon_frecps>; -defm FRSQRTS : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_arm64_neon_frsqrts>; -defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_arm64_neon_sqadd>; -defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_arm64_neon_sqdmulh>; -defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_arm64_neon_sqrdmulh>; -defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_arm64_neon_sqrshl>; -defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_arm64_neon_sqshl>; -defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_arm64_neon_sqsub>; -defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_arm64_neon_srshl>; -defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_arm64_neon_sshl>; -defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>; -defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_arm64_neon_uqadd>; -defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_arm64_neon_uqrshl>; -defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_arm64_neon_uqshl>; -defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_arm64_neon_uqsub>; -defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_arm64_neon_urshl>; -defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_arm64_neon_ushl>; - -def : InstAlias<"cmls $dst, $src1, $src2", - (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>; -def : InstAlias<"cmle $dst, $src1, $src2", - (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>; -def : InstAlias<"cmlo $dst, $src1, $src2", - (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>; -def : InstAlias<"cmlt $dst, $src1, $src2", - (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>; -def : InstAlias<"fcmle $dst, $src1, $src2", - (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1)>; -def : InstAlias<"fcmle $dst, $src1, $src2", - (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>; -def : InstAlias<"fcmlt $dst, $src1, $src2", - (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1)>; -def : InstAlias<"fcmlt $dst, $src1, $src2", - (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>; -def : InstAlias<"facle $dst, $src1, $src2", - (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1)>; -def : InstAlias<"facle $dst, $src1, $src2", - (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>; -def : InstAlias<"faclt $dst, $src1, $src2", - (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1)>; -def : InstAlias<"faclt $dst, $src1, $src2", - (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>; - -//===----------------------------------------------------------------------===// -// Advanced SIMD three scalar instructions (mixed operands). -//===----------------------------------------------------------------------===// -defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull", - int_arm64_neon_sqdmulls_scalar>; -defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">; -defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">; - -def : Pat<(i64 (int_arm64_neon_sqadd (i64 FPR64:$Rd), - (i64 (int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn), - (i32 FPR32:$Rm))))), - (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; -def : Pat<(i64 (int_arm64_neon_sqsub (i64 FPR64:$Rd), - (i64 (int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn), - (i32 FPR32:$Rm))))), - (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>; - -//===----------------------------------------------------------------------===// -// Advanced SIMD two scalar instructions. -//===----------------------------------------------------------------------===// - -defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", int_arm64_neon_abs>; -defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", ARM64cmeqz>; -defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", ARM64cmgez>; -defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", ARM64cmgtz>; -defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", ARM64cmlez>; -defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", ARM64cmltz>; -defm FCMEQ : SIMDCmpTwoScalarSD<0, 1, 0b01101, "fcmeq", ARM64fcmeqz>; -defm FCMGE : SIMDCmpTwoScalarSD<1, 1, 0b01100, "fcmge", ARM64fcmgez>; -defm FCMGT : SIMDCmpTwoScalarSD<0, 1, 0b01100, "fcmgt", ARM64fcmgtz>; -defm FCMLE : SIMDCmpTwoScalarSD<1, 1, 0b01101, "fcmle", ARM64fcmlez>; -defm FCMLT : SIMDCmpTwoScalarSD<0, 1, 0b01110, "fcmlt", ARM64fcmltz>; -defm FCVTAS : SIMDTwoScalarSD< 0, 0, 0b11100, "fcvtas">; -defm FCVTAU : SIMDTwoScalarSD< 1, 0, 0b11100, "fcvtau">; -defm FCVTMS : SIMDTwoScalarSD< 0, 0, 0b11011, "fcvtms">; -defm FCVTMU : SIMDTwoScalarSD< 1, 0, 0b11011, "fcvtmu">; -defm FCVTNS : SIMDTwoScalarSD< 0, 0, 0b11010, "fcvtns">; -defm FCVTNU : SIMDTwoScalarSD< 1, 0, 0b11010, "fcvtnu">; -defm FCVTPS : SIMDTwoScalarSD< 0, 1, 0b11010, "fcvtps">; -defm FCVTPU : SIMDTwoScalarSD< 1, 1, 0b11010, "fcvtpu">; -def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; -defm FCVTZS : SIMDTwoScalarSD< 0, 1, 0b11011, "fcvtzs">; -defm FCVTZU : SIMDTwoScalarSD< 1, 1, 0b11011, "fcvtzu">; -defm FRECPE : SIMDTwoScalarSD< 0, 1, 0b11101, "frecpe">; -defm FRECPX : SIMDTwoScalarSD< 0, 1, 0b11111, "frecpx">; -defm FRSQRTE : SIMDTwoScalarSD< 1, 1, 0b11101, "frsqrte">; -defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", - UnOpFrag<(sub immAllZerosV, node:$LHS)> >; -defm SCVTF : SIMDTwoScalarCVTSD< 0, 0, 0b11101, "scvtf", ARM64sitof>; -defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_arm64_neon_sqabs>; -defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_arm64_neon_sqneg>; -defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_arm64_neon_scalar_sqxtn>; -defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_arm64_neon_scalar_sqxtun>; -defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", - int_arm64_neon_suqadd>; -defm UCVTF : SIMDTwoScalarCVTSD< 1, 0, 0b11101, "ucvtf", ARM64uitof>; -defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_arm64_neon_scalar_uqxtn>; -defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", - int_arm64_neon_usqadd>; - -def : Pat<(v1i64 (int_arm64_neon_fcvtas (v1f64 FPR64:$Rn))), - (FCVTASv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtau (v1f64 FPR64:$Rn))), - (FCVTAUv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtms (v1f64 FPR64:$Rn))), - (FCVTMSv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtmu (v1f64 FPR64:$Rn))), - (FCVTMUv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtns (v1f64 FPR64:$Rn))), - (FCVTNSv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtnu (v1f64 FPR64:$Rn))), - (FCVTNUv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtps (v1f64 FPR64:$Rn))), - (FCVTPSv1i64 FPR64:$Rn)>; -def : Pat<(v1i64 (int_arm64_neon_fcvtpu (v1f64 FPR64:$Rn))), - (FCVTPUv1i64 FPR64:$Rn)>; - -def : Pat<(f32 (int_arm64_neon_frecpe (f32 FPR32:$Rn))), - (FRECPEv1i32 FPR32:$Rn)>; -def : Pat<(f64 (int_arm64_neon_frecpe (f64 FPR64:$Rn))), - (FRECPEv1i64 FPR64:$Rn)>; -def : Pat<(v1f64 (int_arm64_neon_frecpe (v1f64 FPR64:$Rn))), - (FRECPEv1i64 FPR64:$Rn)>; - -def : Pat<(f32 (int_arm64_neon_frecpx (f32 FPR32:$Rn))), - (FRECPXv1i32 FPR32:$Rn)>; -def : Pat<(f64 (int_arm64_neon_frecpx (f64 FPR64:$Rn))), - (FRECPXv1i64 FPR64:$Rn)>; - -def : Pat<(f32 (int_arm64_neon_frsqrte (f32 FPR32:$Rn))), - (FRSQRTEv1i32 FPR32:$Rn)>; -def : Pat<(f64 (int_arm64_neon_frsqrte (f64 FPR64:$Rn))), - (FRSQRTEv1i64 FPR64:$Rn)>; -def : Pat<(v1f64 (int_arm64_neon_frsqrte (v1f64 FPR64:$Rn))), - (FRSQRTEv1i64 FPR64:$Rn)>; - -// If an integer is about to be converted to a floating point value, -// just load it on the floating point unit. -// Here are the patterns for 8 and 16-bits to float. -// 8-bits -> float. -def : Pat <(f32 (uint_to_fp (i32 (zextloadi8 ro_indexed8:$addr)))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDRBro ro_indexed8:$addr), bsub))>; -def : Pat <(f32 (uint_to_fp (i32 (zextloadi8 am_indexed8:$addr)))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDRBui am_indexed8:$addr), bsub))>; -def : Pat <(f32 (uint_to_fp (i32 (zextloadi8 am_unscaled8:$addr)))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDURBi am_unscaled8:$addr), bsub))>; -// 16-bits -> float. -def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 ro_indexed16:$addr)))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), hsub))>; -def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 am_indexed16:$addr)))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), hsub))>; -def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 am_unscaled16:$addr)))), - (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)), - (LDURHi am_unscaled16:$addr), hsub))>; -// 32-bits are handled in target specific dag combine: -// performIntToFpCombine. -// 64-bits integer to 32-bits floating point, not possible with -// UCVTF on floating point registers (both source and destination -// must have the same size). - -// Here are the patterns for 8, 16, 32, and 64-bits to double. -// 8-bits -> double. -def : Pat <(f64 (uint_to_fp (i32 (zextloadi8 ro_indexed8:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRBro ro_indexed8:$addr), bsub))>; -def : Pat <(f64 (uint_to_fp (i32 (zextloadi8 am_indexed8:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRBui am_indexed8:$addr), bsub))>; -def : Pat <(f64 (uint_to_fp (i32 (zextloadi8 am_unscaled8:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURBi am_unscaled8:$addr), bsub))>; -// 16-bits -> double. -def : Pat <(f64 (uint_to_fp (i32 (zextloadi16 ro_indexed16:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), hsub))>; -def : Pat <(f64 (uint_to_fp (i32 (zextloadi16 am_indexed16:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), hsub))>; -def : Pat <(f64 (uint_to_fp (i32 (zextloadi16 am_unscaled16:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURHi am_unscaled16:$addr), hsub))>; -// 32-bits -> double. -def : Pat <(f64 (uint_to_fp (i32 (load ro_indexed32:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRSro ro_indexed32:$addr), ssub))>; -def : Pat <(f64 (uint_to_fp (i32 (load am_indexed32:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRSui am_indexed32:$addr), ssub))>; -def : Pat <(f64 (uint_to_fp (i32 (load am_unscaled32:$addr)))), - (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURSi am_unscaled32:$addr), ssub))>; -// 64-bits -> double are handled in target specific dag combine: -// performIntToFpCombine. - -//===----------------------------------------------------------------------===// -// Advanced SIMD three different-sized vector instructions. -//===----------------------------------------------------------------------===// - -defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_arm64_neon_addhn>; -defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_arm64_neon_subhn>; -defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_arm64_neon_raddhn>; -defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_arm64_neon_rsubhn>; -defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_arm64_neon_pmull>; -defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal", - int_arm64_neon_sabd>; -defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl", - int_arm64_neon_sabd>; -defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl", - BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>; -defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw", - BinOpFrag<(add node:$LHS, (sext node:$RHS))>>; -defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal", - TriOpFrag<(add node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>; -defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl", - TriOpFrag<(sub node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>; -defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_arm64_neon_smull>; -defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal", - int_arm64_neon_sqadd>; -defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl", - int_arm64_neon_sqsub>; -defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull", - int_arm64_neon_sqdmull>; -defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl", - BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>; -defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw", - BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>; -defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal", - int_arm64_neon_uabd>; -defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl", - int_arm64_neon_uabd>; -defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl", - BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>; -defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw", - BinOpFrag<(add node:$LHS, (zext node:$RHS))>>; -defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal", - TriOpFrag<(add node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>; -defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl", - TriOpFrag<(sub node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>; -defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_arm64_neon_umull>; -defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl", - BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>; -defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw", - BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>; - -// Patterns for 64-bit pmull -def : Pat<(int_arm64_neon_pmull64 V64:$Rn, V64:$Rm), - (PMULLv1i64 V64:$Rn, V64:$Rm)>; -def : Pat<(int_arm64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)), - (vector_extract (v2i64 V128:$Rm), (i64 1))), - (PMULLv2i64 V128:$Rn, V128:$Rm)>; - -// CodeGen patterns for addhn and subhn instructions, which can actually be -// written in LLVM IR without too much difficulty. - -// ADDHN -def : Pat<(v8i8 (trunc (v8i16 (ARM64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))), - (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; -def : Pat<(v4i16 (trunc (v4i32 (ARM64vlshr (add V128:$Rn, V128:$Rm), - (i32 16))))), - (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; -def : Pat<(v2i32 (trunc (v2i64 (ARM64vlshr (add V128:$Rn, V128:$Rm), - (i32 32))))), - (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v8i8 V64:$Rd), - (trunc (v8i16 (ARM64vlshr (add V128:$Rn, V128:$Rm), - (i32 8))))), - (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v4i16 V64:$Rd), - (trunc (v4i32 (ARM64vlshr (add V128:$Rn, V128:$Rm), - (i32 16))))), - (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v2i32 V64:$Rd), - (trunc (v2i64 (ARM64vlshr (add V128:$Rn, V128:$Rm), - (i32 32))))), - (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; - -// SUBHN -def : Pat<(v8i8 (trunc (v8i16 (ARM64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))), - (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>; -def : Pat<(v4i16 (trunc (v4i32 (ARM64vlshr (sub V128:$Rn, V128:$Rm), - (i32 16))))), - (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>; -def : Pat<(v2i32 (trunc (v2i64 (ARM64vlshr (sub V128:$Rn, V128:$Rm), - (i32 32))))), - (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v8i8 V64:$Rd), - (trunc (v8i16 (ARM64vlshr (sub V128:$Rn, V128:$Rm), - (i32 8))))), - (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v4i16 V64:$Rd), - (trunc (v4i32 (ARM64vlshr (sub V128:$Rn, V128:$Rm), - (i32 16))))), - (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; -def : Pat<(concat_vectors (v2i32 V64:$Rd), - (trunc (v2i64 (ARM64vlshr (sub V128:$Rn, V128:$Rm), - (i32 32))))), - (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub), - V128:$Rn, V128:$Rm)>; - -//---------------------------------------------------------------------------- -// AdvSIMD bitwise extract from vector instruction. -//---------------------------------------------------------------------------- - -defm EXT : SIMDBitwiseExtract<"ext">; - -def : Pat<(v4i16 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), - (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; -def : Pat<(v8i16 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), - (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; -def : Pat<(v2i32 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), - (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; -def : Pat<(v2f32 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))), - (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>; -def : Pat<(v4i32 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), - (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; -def : Pat<(v4f32 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), - (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; -def : Pat<(v2i64 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), - (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; -def : Pat<(v2f64 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))), - (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>; - -// We use EXT to handle extract_subvector to copy the upper 64-bits of a -// 128-bit vector. -def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 8))), - (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; -def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 4))), - (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; -def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))), - (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; -def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))), - (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; -def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))), - (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; -def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))), - (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>; - - -//---------------------------------------------------------------------------- -// AdvSIMD zip vector -//---------------------------------------------------------------------------- - -defm TRN1 : SIMDZipVector<0b010, "trn1", ARM64trn1>; -defm TRN2 : SIMDZipVector<0b110, "trn2", ARM64trn2>; -defm UZP1 : SIMDZipVector<0b001, "uzp1", ARM64uzp1>; -defm UZP2 : SIMDZipVector<0b101, "uzp2", ARM64uzp2>; -defm ZIP1 : SIMDZipVector<0b011, "zip1", ARM64zip1>; -defm ZIP2 : SIMDZipVector<0b111, "zip2", ARM64zip2>; - -//---------------------------------------------------------------------------- -// AdvSIMD TBL/TBX instructions -//---------------------------------------------------------------------------- - -defm TBL : SIMDTableLookup< 0, "tbl">; -defm TBX : SIMDTableLookupTied<1, "tbx">; - -def : Pat<(v8i8 (int_arm64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), - (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>; -def : Pat<(v16i8 (int_arm64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))), - (TBLv16i8One V128:$Ri, V128:$Rn)>; - -def : Pat<(v8i8 (int_arm64_neon_tbx1 (v8i8 V64:$Rd), - (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))), - (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>; -def : Pat<(v16i8 (int_arm64_neon_tbx1 (v16i8 V128:$Rd), - (v16i8 V128:$Ri), (v16i8 V128:$Rn))), - (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>; - - -//---------------------------------------------------------------------------- -// AdvSIMD scalar CPY instruction -//---------------------------------------------------------------------------- - -defm CPY : SIMDScalarCPY<"cpy">; - -//---------------------------------------------------------------------------- -// AdvSIMD scalar pairwise instructions -//---------------------------------------------------------------------------- - -defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; -defm FADDP : SIMDPairwiseScalarSD<1, 0, 0b01101, "faddp">; -defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">; -defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">; -defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">; -defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">; -def : Pat<(i64 (int_arm64_neon_saddv (v2i64 V128:$Rn))), - (ADDPv2i64p V128:$Rn)>; -def : Pat<(i64 (int_arm64_neon_uaddv (v2i64 V128:$Rn))), - (ADDPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_arm64_neon_faddv (v2f32 V64:$Rn))), - (FADDPv2i32p V64:$Rn)>; -def : Pat<(f32 (int_arm64_neon_faddv (v4f32 V128:$Rn))), - (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>; -def : Pat<(f64 (int_arm64_neon_faddv (v2f64 V128:$Rn))), - (FADDPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_arm64_neon_fmaxnmv (v2f32 V64:$Rn))), - (FMAXNMPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_arm64_neon_fmaxnmv (v2f64 V128:$Rn))), - (FMAXNMPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_arm64_neon_fmaxv (v2f32 V64:$Rn))), - (FMAXPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_arm64_neon_fmaxv (v2f64 V128:$Rn))), - (FMAXPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_arm64_neon_fminnmv (v2f32 V64:$Rn))), - (FMINNMPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_arm64_neon_fminnmv (v2f64 V128:$Rn))), - (FMINNMPv2i64p V128:$Rn)>; -def : Pat<(f32 (int_arm64_neon_fminv (v2f32 V64:$Rn))), - (FMINPv2i32p V64:$Rn)>; -def : Pat<(f64 (int_arm64_neon_fminv (v2f64 V128:$Rn))), - (FMINPv2i64p V128:$Rn)>; - -//---------------------------------------------------------------------------- -// AdvSIMD INS/DUP instructions -//---------------------------------------------------------------------------- - -def DUPv8i8gpr : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>; -def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>; -def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>; -def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>; -def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>; -def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>; -def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>; - -def DUPv2i64lane : SIMDDup64FromElement; -def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>; -def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>; -def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>; -def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>; -def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>; -def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>; - -def : Pat<(v2f32 (ARM64dup (f32 FPR32:$Rn))), - (v2f32 (DUPv2i32lane - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), - (i64 0)))>; -def : Pat<(v4f32 (ARM64dup (f32 FPR32:$Rn))), - (v4f32 (DUPv4i32lane - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub), - (i64 0)))>; -def : Pat<(v2f64 (ARM64dup (f64 FPR64:$Rn))), - (v2f64 (DUPv2i64lane - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub), - (i64 0)))>; - -def : Pat<(v2f32 (ARM64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), - (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>; -def : Pat<(v4f32 (ARM64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)), - (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>; -def : Pat<(v2f64 (ARM64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)), - (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>; - -defm SMOV : SMov; -defm UMOV : UMov; - -def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), - (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>; -def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8), - (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>; -def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), - (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; -def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), - (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>; -def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16), - (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>; -def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))), - (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>; - -// Extracting i8 or i16 elements will have the zero-extend transformed to -// an 'and' mask by type legalization since neither i8 nor i16 are legal types -// for ARM64. Match these patterns here since UMOV already zeroes out the high -// bits of the destination register. -def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), - (i32 0xff)), - (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>; -def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), - (i32 0xffff)), - (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>; - -defm INS : SIMDIns; - -def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), - (INSvi8gpr (v16i8 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn)>; -def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), - (EXTRACT_SUBREG - (INSvi8gpr (v16i8 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn), dsub)>; - -def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), - (INSvi16gpr (v8i16 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn)>; -def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), - (EXTRACT_SUBREG - (INSvi16gpr (v8i16 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn), dsub)>; - -def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), - (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), - (i32 FPR32:$Rn), ssub))>; -def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))), - (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), - (i32 FPR32:$Rn), ssub))>; -def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), - (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), - (i64 FPR64:$Rn), dsub))>; - -def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; -def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), - (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; -def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>; - -def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn), - (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), - (EXTRACT_SUBREG - (INSvi32lane - (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), - VectorIndexS:$imm, - (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), - (i64 0)), - dsub)>; -def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn), - (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))), - (INSvi32lane - V128:$Rn, VectorIndexS:$imm, - (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)), - (i64 0))>; -def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn), - (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))), - (INSvi64lane - V128:$Rn, VectorIndexD:$imm, - (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)), - (i64 0))>; - -// Copy an element at a constant index in one vector into a constant indexed -// element of another. -// FIXME refactor to a shared class/dev parameterized on vector type, vector -// index type and INS extension -def : Pat<(v16i8 (int_arm64_neon_vcopy_lane - (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs), - VectorIndexB:$idx2)), - (v16i8 (INSvi8lane - V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2) - )>; -def : Pat<(v8i16 (int_arm64_neon_vcopy_lane - (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs), - VectorIndexH:$idx2)), - (v8i16 (INSvi16lane - V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2) - )>; -def : Pat<(v4i32 (int_arm64_neon_vcopy_lane - (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs), - VectorIndexS:$idx2)), - (v4i32 (INSvi32lane - V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2) - )>; -def : Pat<(v2i64 (int_arm64_neon_vcopy_lane - (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs), - VectorIndexD:$idx2)), - (v2i64 (INSvi64lane - V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2) - )>; - -// Floating point vector extractions are codegen'd as either a sequence of -// subregister extractions, possibly fed by an INS if the lane number is -// anything other than zero. -def : Pat<(vector_extract (v2f64 V128:$Rn), 0), - (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>; -def : Pat<(vector_extract (v4f32 V128:$Rn), 0), - (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>; -def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx), - (f64 (EXTRACT_SUBREG - (INSvi64lane (v2f64 (IMPLICIT_DEF)), 0, - V128:$Rn, VectorIndexD:$idx), - dsub))>; -def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx), - (f32 (EXTRACT_SUBREG - (INSvi32lane (v4f32 (IMPLICIT_DEF)), 0, - V128:$Rn, VectorIndexS:$idx), - ssub))>; - -// All concat_vectors operations are canonicalised to act on i64 vectors for -// ARM64. In the general case we need an instruction, which had just as well be -// INS. -class ConcatPat - : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)), - (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1, - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>; - -def : ConcatPat; -def : ConcatPat; -def : ConcatPat; -def : ConcatPat; -def : ConcatPat; -def : ConcatPat; - -// If the high lanes are undef, though, we can just ignore them: -class ConcatUndefPat - : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)), - (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>; - -def : ConcatUndefPat; -def : ConcatUndefPat; -def : ConcatUndefPat; -def : ConcatUndefPat; -def : ConcatUndefPat; -def : ConcatUndefPat; - -//---------------------------------------------------------------------------- -// AdvSIMD across lanes instructions -//---------------------------------------------------------------------------- - -defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">; -defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">; -defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">; -defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; -defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; -defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; -defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; -defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_arm64_neon_fmaxnmv>; -defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_arm64_neon_fmaxv>; -defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_arm64_neon_fminnmv>; -defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_arm64_neon_fminv>; - -multiclass SIMDAcrossLanesSignedIntrinsic { -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it - def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - (i64 0)))>; - def : Pat<(i32 (intOp (v8i8 V64:$Rn))), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - (i64 0)))>; -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it -def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - (i64 0)))>; -def : Pat<(i32 (intOp (v16i8 V128:$Rn))), - (i32 (SMOVvi8to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - (i64 0)))>; -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it -def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), - (i64 0)))>; -def : Pat<(i32 (intOp (v4i16 V64:$Rn))), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), - (i64 0)))>; -// If there is a sign extension after this intrinsic, consume it as smov already -// performed it -def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - (i64 0)))>; -def : Pat<(i32 (intOp (v8i16 V128:$Rn))), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - (i64 0)))>; - -def : Pat<(i32 (intOp (v4i32 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), - ssub))>; -} - -multiclass SIMDAcrossLanesUnsignedIntrinsic { -// If there is a masking operation keeping only what has been actually -// generated, consume it. - def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - ssub))>; - def : Pat<(i32 (intOp (v8i8 V64:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub), - ssub))>; -// If there is a masking operation keeping only what has been actually -// generated, consume it. -def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - ssub))>; -def : Pat<(i32 (intOp (v16i8 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub), - ssub))>; - -// If there is a masking operation keeping only what has been actually -// generated, consume it. -def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), - ssub))>; -def : Pat<(i32 (intOp (v4i16 V64:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub), - ssub))>; -// If there is a masking operation keeping only what has been actually -// generated, consume it. -def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - ssub))>; -def : Pat<(i32 (intOp (v8i16 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub), - ssub))>; - -def : Pat<(i32 (intOp (v4i32 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub), - ssub))>; - -} - -multiclass SIMDAcrossLanesSignedLongIntrinsic { - def : Pat<(i32 (intOp (v8i8 V64:$Rn))), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), - (i64 0)))>; -def : Pat<(i32 (intOp (v16i8 V128:$Rn))), - (i32 (SMOVvi16to32 - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), - (i64 0)))>; - -def : Pat<(i32 (intOp (v4i16 V64:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), - ssub))>; -def : Pat<(i32 (intOp (v8i16 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), - ssub))>; - -def : Pat<(i64 (intOp (v4i32 V128:$Rn))), - (i64 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), - dsub))>; -} - -multiclass SIMDAcrossLanesUnsignedLongIntrinsic { - def : Pat<(i32 (intOp (v8i8 V64:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub), - ssub))>; -def : Pat<(i32 (intOp (v16i8 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub), - ssub))>; - -def : Pat<(i32 (intOp (v4i16 V64:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub), - ssub))>; -def : Pat<(i32 (intOp (v8i16 V128:$Rn))), - (i32 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub), - ssub))>; - -def : Pat<(i64 (intOp (v4i32 V128:$Rn))), - (i64 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (!cast(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub), - dsub))>; -} - -defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", int_arm64_neon_saddv>; -// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm -def : Pat<(i32 (int_arm64_neon_saddv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", int_arm64_neon_uaddv>; -// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm -def : Pat<(i32 (int_arm64_neon_uaddv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_arm64_neon_smaxv>; -def : Pat<(i32 (int_arm64_neon_smaxv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_arm64_neon_sminv>; -def : Pat<(i32 (int_arm64_neon_sminv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_arm64_neon_umaxv>; -def : Pat<(i32 (int_arm64_neon_umaxv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_arm64_neon_uminv>; -def : Pat<(i32 (int_arm64_neon_uminv (v2i32 V64:$Rn))), - (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>; - -defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_arm64_neon_saddlv>; -defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_arm64_neon_uaddlv>; - -// The vaddlv_s32 intrinsic gets mapped to SADDLP. -def : Pat<(i64 (int_arm64_neon_saddlv (v2i32 V64:$Rn))), - (i64 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (SADDLPv2i32_v1i64 V64:$Rn), dsub), - dsub))>; -// The vaddlv_u32 intrinsic gets mapped to UADDLP. -def : Pat<(i64 (int_arm64_neon_uaddlv (v2i32 V64:$Rn))), - (i64 (EXTRACT_SUBREG - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), - (UADDLPv2i32_v1i64 V64:$Rn), dsub), - dsub))>; - -//------------------------------------------------------------------------------ -// AdvSIMD modified immediate instructions -//------------------------------------------------------------------------------ - -// AdvSIMD BIC -defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", ARM64bici>; -// AdvSIMD ORR -defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", ARM64orri>; - - -// AdvSIMD FMOV -def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8, - "fmov", ".2d", - [(set (v2f64 V128:$Rd), (ARM64fmov imm0_255:$imm8))]>; -def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64, fpimm8, - "fmov", ".2s", - [(set (v2f32 V64:$Rd), (ARM64fmov imm0_255:$imm8))]>; -def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8, - "fmov", ".4s", - [(set (v4f32 V128:$Rd), (ARM64fmov imm0_255:$imm8))]>; - -// AdvSIMD MOVI - -// EDIT byte mask: scalar -let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi", - [(set FPR64:$Rd, simdimmtype10:$imm8)]>; -// The movi_edit node has the immediate value already encoded, so we use -// a plain imm0_255 here. -def : Pat<(f64 (ARM64movi_edit imm0_255:$shift)), - (MOVID imm0_255:$shift)>; - -def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>; -def : Pat<(v2i32 immAllZerosV), (MOVID (i32 0))>; -def : Pat<(v4i16 immAllZerosV), (MOVID (i32 0))>; -def : Pat<(v8i8 immAllZerosV), (MOVID (i32 0))>; - -def : Pat<(v1i64 immAllOnesV), (MOVID (i32 255))>; -def : Pat<(v2i32 immAllOnesV), (MOVID (i32 255))>; -def : Pat<(v4i16 immAllOnesV), (MOVID (i32 255))>; -def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>; - -// EDIT byte mask: 2d - -// The movi_edit node has the immediate value already encoded, so we use -// a plain imm0_255 in the pattern -let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128, - simdimmtype10, - "movi", ".2d", - [(set (v2i64 V128:$Rd), (ARM64movi_edit imm0_255:$imm8))]>; - - -// Use movi.2d to materialize 0.0 if the HW does zero-cycle zeroing. -// Complexity is added to break a tie with a plain MOVI. -let AddedComplexity = 1 in { -def : Pat<(f32 fpimm0), - (f32 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), ssub))>, - Requires<[HasZCZ]>; -def : Pat<(f64 fpimm0), - (f64 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), dsub))>, - Requires<[HasZCZ]>; -} - -def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>; -def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>; -def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>; -def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>; - -def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>; -def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>; -def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>; -def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>; - -// EDIT per word & halfword: 2s, 4h, 4s, & 8h -defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">; -def : Pat<(v2i32 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))), - (MOVIv2i32 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v4i32 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))), - (MOVIv4i32 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v4i16 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))), - (MOVIv4i16 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v8i16 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))), - (MOVIv8i16 imm0_255:$imm8, imm:$shift)>; - -// EDIT per word: 2s & 4s with MSL shifter -def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s", - [(set (v2i32 V64:$Rd), - (ARM64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; -def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", - [(set (v4i32 V128:$Rd), - (ARM64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; - -// Per byte: 8b & 16b -def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64, imm0_255, - "movi", ".8b", - [(set (v8i8 V64:$Rd), (ARM64movi imm0_255:$imm8))]>; -def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255, - "movi", ".16b", - [(set (v16i8 V128:$Rd), (ARM64movi imm0_255:$imm8))]>; - -// AdvSIMD MVNI - -// EDIT per word & halfword: 2s, 4h, 4s, & 8h -defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">; -def : Pat<(v2i32 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), - (MVNIv2i32 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v4i32 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), - (MVNIv4i32 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v4i16 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), - (MVNIv4i16 imm0_255:$imm8, imm:$shift)>; -def : Pat<(v8i16 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))), - (MVNIv8i16 imm0_255:$imm8, imm:$shift)>; - -// EDIT per word: 2s & 4s with MSL shifter -def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s", - [(set (v2i32 V64:$Rd), - (ARM64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; -def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s", - [(set (v4i32 V128:$Rd), - (ARM64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>; - -//---------------------------------------------------------------------------- -// AdvSIMD indexed element -//---------------------------------------------------------------------------- - -let neverHasSideEffects = 1 in { - defm FMLA : SIMDFPIndexedSDTied<0, 0b0001, "fmla">; - defm FMLS : SIMDFPIndexedSDTied<0, 0b0101, "fmls">; -} - -// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the -// instruction expects the addend first, while the intrinsic expects it last. - -// On the other hand, there are quite a few valid combinatorial options due to -// the commutativity of multiplication and the fact that (-x) * y = x * (-y). -defm : SIMDFPIndexedSDTiedPatterns<"FMLA", - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>; -defm : SIMDFPIndexedSDTiedPatterns<"FMLA", - TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>; - -defm : SIMDFPIndexedSDTiedPatterns<"FMLS", - TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; -defm : SIMDFPIndexedSDTiedPatterns<"FMLS", - TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >; -defm : SIMDFPIndexedSDTiedPatterns<"FMLS", - TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >; -defm : SIMDFPIndexedSDTiedPatterns<"FMLS", - TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >; - -multiclass FMLSIndexedAfterNegPatterns { - // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit - // and DUP scalar. - def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (ARM64duplane32 (v4f32 (fneg V128:$Rm)), - VectorIndexS:$idx))), - (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (v2f32 (ARM64duplane32 - (v4f32 (insert_subvector undef, - (v2f32 (fneg V64:$Rm)), - (i32 0))), - VectorIndexS:$idx)))), - (FMLSv2i32_indexed V64:$Rd, V64:$Rn, - (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), - VectorIndexS:$idx)>; - def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), - (ARM64dup (f32 (fneg FPR32Op:$Rm))))), - (FMLSv2i32_indexed V64:$Rd, V64:$Rn, - (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; - - // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit - // and DUP scalar. - def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (ARM64duplane32 (v4f32 (fneg V128:$Rm)), - VectorIndexS:$idx))), - (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm, - VectorIndexS:$idx)>; - def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (v4f32 (ARM64duplane32 - (v4f32 (insert_subvector undef, - (v2f32 (fneg V64:$Rm)), - (i32 0))), - VectorIndexS:$idx)))), - (FMLSv4i32_indexed V128:$Rd, V128:$Rn, - (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), - VectorIndexS:$idx)>; - def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), - (ARM64dup (f32 (fneg FPR32Op:$Rm))))), - (FMLSv4i32_indexed V128:$Rd, V128:$Rn, - (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>; - - // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar - // (DUPLANE from 64-bit would be trivial). - def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), - (ARM64duplane64 (v2f64 (fneg V128:$Rm)), - VectorIndexD:$idx))), - (FMLSv2i64_indexed - V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), - (ARM64dup (f64 (fneg FPR64Op:$Rm))))), - (FMLSv2i64_indexed V128:$Rd, V128:$Rn, - (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>; - - // 2 variants for 32-bit scalar version: extract from .2s or from .4s - def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), - (vector_extract (v4f32 (fneg V128:$Rm)), - VectorIndexS:$idx))), - (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, - V128:$Rm, VectorIndexS:$idx)>; - def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), - (vector_extract (v2f32 (fneg V64:$Rm)), - VectorIndexS:$idx))), - (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, - (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; - - // 1 variant for 64-bit scalar version: extract from .1d or from .2d - def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn), - (vector_extract (v2f64 (fneg V128:$Rm)), - VectorIndexS:$idx))), - (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn, - V128:$Rm, VectorIndexS:$idx)>; -} - -defm : FMLSIndexedAfterNegPatterns< - TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; -defm : FMLSIndexedAfterNegPatterns< - TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >; - -defm FMULX : SIMDFPIndexedSD<1, 0b1001, "fmulx", int_arm64_neon_fmulx>; -defm FMUL : SIMDFPIndexedSD<0, 0b1001, "fmul", fmul>; - -def : Pat<(v2f32 (fmul V64:$Rn, (ARM64dup (f32 FPR32:$Rm)))), - (FMULv2i32_indexed V64:$Rn, - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), - (i64 0))>; -def : Pat<(v4f32 (fmul V128:$Rn, (ARM64dup (f32 FPR32:$Rm)))), - (FMULv4i32_indexed V128:$Rn, - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub), - (i64 0))>; -def : Pat<(v2f64 (fmul V128:$Rn, (ARM64dup (f64 FPR64:$Rm)))), - (FMULv2i64_indexed V128:$Rn, - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub), - (i64 0))>; - -defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_arm64_neon_sqdmulh>; -defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_arm64_neon_sqrdmulh>; -defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", - TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>; -defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", - TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>; -defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>; -defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal", - TriOpFrag<(add node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>; -defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl", - TriOpFrag<(sub node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>; -defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", - int_arm64_neon_smull>; -defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal", - int_arm64_neon_sqadd>; -defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl", - int_arm64_neon_sqsub>; -defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_arm64_neon_sqdmull>; -defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal", - TriOpFrag<(add node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>; -defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl", - TriOpFrag<(sub node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>; -defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull", - int_arm64_neon_umull>; - -// A scalar sqdmull with the second operand being a vector lane can be -// handled directly with the indexed instruction encoding. -def : Pat<(int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn), - (vector_extract (v4i32 V128:$Vm), - VectorIndexS:$idx)), - (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>; - -//---------------------------------------------------------------------------- -// AdvSIMD scalar shift instructions -//---------------------------------------------------------------------------- -defm FCVTZS : SIMDScalarRShiftSD<0, 0b11111, "fcvtzs">; -defm FCVTZU : SIMDScalarRShiftSD<1, 0b11111, "fcvtzu">; -defm SCVTF : SIMDScalarRShiftSD<0, 0b11100, "scvtf">; -defm UCVTF : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">; -// Codegen patterns for the above. We don't put these directly on the -// instructions because TableGen's type inference can't handle the truth. -// Having the same base pattern for fp <--> int totally freaks it out. -def : Pat<(int_arm64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm), - (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>; -def : Pat<(int_arm64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm), - (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>; -def : Pat<(i64 (int_arm64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)), - (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(i64 (int_arm64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)), - (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(v1i64 (int_arm64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn), - vecshiftR64:$imm)), - (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(v1i64 (int_arm64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), - vecshiftR64:$imm)), - (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(int_arm64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), - (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; -def : Pat<(int_arm64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), - (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; -def : Pat<(f64 (int_arm64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), - (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(f64 (int_arm64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), - (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(v1f64 (int_arm64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), - vecshiftR64:$imm)), - (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(v1f64 (int_arm64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), - vecshiftR64:$imm)), - (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; - -defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", ARM64vshl>; -defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; -defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", - int_arm64_neon_sqrshrn>; -defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun", - int_arm64_neon_sqrshrun>; -defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", ARM64sqshlui>; -defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", ARM64sqshli>; -defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn", - int_arm64_neon_sqshrn>; -defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun", - int_arm64_neon_sqshrun>; -defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">; -defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", ARM64srshri>; -defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra", - TriOpFrag<(add node:$LHS, - (ARM64srshri node:$MHS, node:$RHS))>>; -defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", ARM64vashr>; -defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra", - TriOpFrag<(add node:$LHS, - (ARM64vashr node:$MHS, node:$RHS))>>; -defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn", - int_arm64_neon_uqrshrn>; -defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", ARM64uqshli>; -defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn", - int_arm64_neon_uqshrn>; -defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", ARM64urshri>; -defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra", - TriOpFrag<(add node:$LHS, - (ARM64urshri node:$MHS, node:$RHS))>>; -defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", ARM64vlshr>; -defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", - TriOpFrag<(add node:$LHS, - (ARM64vlshr node:$MHS, node:$RHS))>>; - -//---------------------------------------------------------------------------- -// AdvSIMD vector shift instructions -//---------------------------------------------------------------------------- -defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_arm64_neon_vcvtfp2fxs>; -defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_arm64_neon_vcvtfp2fxu>; -defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf", - int_arm64_neon_vcvtfxs2fp>; -defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", - int_arm64_neon_rshrn>; -defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", ARM64vshl>; -defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn", - BinOpFrag<(trunc (ARM64vashr node:$LHS, node:$RHS))>>; -defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_arm64_neon_vsli>; -def : Pat<(v1i64 (int_arm64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), - (i32 vecshiftL64:$imm))), - (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>; -defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn", - int_arm64_neon_sqrshrn>; -defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun", - int_arm64_neon_sqrshrun>; -defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", ARM64sqshlui>; -defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", ARM64sqshli>; -defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn", - int_arm64_neon_sqshrn>; -defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun", - int_arm64_neon_sqshrun>; -defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_arm64_neon_vsri>; -def : Pat<(v1i64 (int_arm64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn), - (i32 vecshiftR64:$imm))), - (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>; -defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", ARM64srshri>; -defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra", - TriOpFrag<(add node:$LHS, - (ARM64srshri node:$MHS, node:$RHS))> >; -defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", - BinOpFrag<(ARM64vshl (sext node:$LHS), node:$RHS)>>; - -defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", ARM64vashr>; -defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", - TriOpFrag<(add node:$LHS, (ARM64vashr node:$MHS, node:$RHS))>>; -defm UCVTF : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf", - int_arm64_neon_vcvtfxu2fp>; -defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", - int_arm64_neon_uqrshrn>; -defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", ARM64uqshli>; -defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn", - int_arm64_neon_uqshrn>; -defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", ARM64urshri>; -defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra", - TriOpFrag<(add node:$LHS, - (ARM64urshri node:$MHS, node:$RHS))> >; -defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll", - BinOpFrag<(ARM64vshl (zext node:$LHS), node:$RHS)>>; -defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", ARM64vlshr>; -defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", - TriOpFrag<(add node:$LHS, (ARM64vlshr node:$MHS, node:$RHS))> >; - -// SHRN patterns for when a logical right shift was used instead of arithmetic -// (the immediate guarantees no sign bits actually end up in the result so it -// doesn't matter). -def : Pat<(v8i8 (trunc (ARM64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))), - (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>; -def : Pat<(v4i16 (trunc (ARM64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))), - (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>; -def : Pat<(v2i32 (trunc (ARM64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))), - (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>; - -def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd), - (trunc (ARM64vlshr (v8i16 V128:$Rn), - vecshiftR16Narrow:$imm)))), - (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR16Narrow:$imm)>; -def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd), - (trunc (ARM64vlshr (v4i32 V128:$Rn), - vecshiftR32Narrow:$imm)))), - (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR32Narrow:$imm)>; -def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd), - (trunc (ARM64vlshr (v2i64 V128:$Rn), - vecshiftR64Narrow:$imm)))), - (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), - V128:$Rn, vecshiftR32Narrow:$imm)>; - -// Vector sign and zero extensions are implemented with SSHLL and USSHLL. -// Anyexts are implemented as zexts. -def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>; -def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; -def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>; -def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>; -def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; -def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>; -def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>; -def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; -def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>; -// Also match an extend from the upper half of a 128 bit source register. -def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), - (USHLLv16i8_shift V128:$Rn, (i32 0))>; -def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), - (USHLLv16i8_shift V128:$Rn, (i32 0))>; -def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))), - (SSHLLv16i8_shift V128:$Rn, (i32 0))>; -def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), - (USHLLv8i16_shift V128:$Rn, (i32 0))>; -def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), - (USHLLv8i16_shift V128:$Rn, (i32 0))>; -def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))), - (SSHLLv8i16_shift V128:$Rn, (i32 0))>; -def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), - (USHLLv4i32_shift V128:$Rn, (i32 0))>; -def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), - (USHLLv4i32_shift V128:$Rn, (i32 0))>; -def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))), - (SSHLLv4i32_shift V128:$Rn, (i32 0))>; - -// Vector shift sxtl aliases -def : InstAlias<"sxtl.8h $dst, $src1", - (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"sxtl $dst.8h, $src1.8b", - (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"sxtl.4s $dst, $src1", - (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"sxtl $dst.4s, $src1.4h", - (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"sxtl.2d $dst, $src1", - (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"sxtl $dst.2d, $src1.2s", - (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>; - -// Vector shift sxtl2 aliases -def : InstAlias<"sxtl2.8h $dst, $src1", - (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"sxtl2 $dst.8h, $src1.16b", - (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"sxtl2.4s $dst, $src1", - (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"sxtl2 $dst.4s, $src1.8h", - (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"sxtl2.2d $dst, $src1", - (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"sxtl2 $dst.2d, $src1.4s", - (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>; - -// Vector shift uxtl aliases -def : InstAlias<"uxtl.8h $dst, $src1", - (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"uxtl $dst.8h, $src1.8b", - (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"uxtl.4s $dst, $src1", - (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"uxtl $dst.4s, $src1.4h", - (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"uxtl.2d $dst, $src1", - (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; -def : InstAlias<"uxtl $dst.2d, $src1.2s", - (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>; - -// Vector shift uxtl2 aliases -def : InstAlias<"uxtl2.8h $dst, $src1", - (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"uxtl2 $dst.8h, $src1.16b", - (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"uxtl2.4s $dst, $src1", - (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"uxtl2 $dst.4s, $src1.8h", - (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"uxtl2.2d $dst, $src1", - (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; -def : InstAlias<"uxtl2 $dst.2d, $src1.4s", - (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>; - -// If an integer is about to be converted to a floating point value, -// just load it on the floating point unit. -// These patterns are more complex because floating point loads do not -// support sign extension. -// The sign extension has to be explicitly added and is only supported for -// one step: byte-to-half, half-to-word, word-to-doubleword. -// SCVTF GPR -> FPR is 9 cycles. -// SCVTF FPR -> FPR is 4 cyclces. -// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles. -// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR -// and still being faster. -// However, this is not good for code size. -// 8-bits -> float. 2 sizes step-up. -def : Pat <(f32 (sint_to_fp (i32 (sextloadi8 ro_indexed8:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv8i8_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRBro ro_indexed8:$addr), - bsub), - 0), - dsub)), - 0), - ssub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f32 (sint_to_fp (i32 (sextloadi8 am_indexed8:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv8i8_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRBui am_indexed8:$addr), - bsub), - 0), - dsub)), - 0), - ssub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f32 (sint_to_fp (i32 (sextloadi8 am_unscaled8:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv8i8_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURBi am_unscaled8:$addr), - bsub), - 0), - dsub)), - 0), - ssub)))>, Requires<[NotForCodeSize]>; -// 16-bits -> float. 1 size step-up. -def : Pat <(f32 (sint_to_fp (i32 (sextloadi16 ro_indexed16:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), - hsub), - 0), - ssub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f32 (sint_to_fp (i32 (sextloadi16 am_indexed16:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), - hsub), - 0), - ssub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f32 (sint_to_fp (i32 (sextloadi16 am_unscaled16:$addr)))), - (SCVTFv1i32 (f32 (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURHi am_unscaled16:$addr), - hsub), - 0), - ssub)))>, Requires<[NotForCodeSize]>; -// 32-bits to 32-bits are handled in target specific dag combine: -// performIntToFpCombine. -// 64-bits integer to 32-bits floating point, not possible with -// SCVTF on floating point registers (both source and destination -// must have the same size). - -// Here are the patterns for 8, 16, 32, and 64-bits to double. -// 8-bits -> double. 3 size step-up: give up. -// 16-bits -> double. 2 size step. -def : Pat <(f64 (sint_to_fp (i32 (sextloadi16 ro_indexed16:$addr)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHro ro_indexed16:$addr), - hsub), - 0), - dsub)), - 0), - dsub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f64 (sint_to_fp (i32 (sextloadi16 am_indexed16:$addr)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRHui am_indexed16:$addr), - hsub), - 0), - dsub)), - 0), - dsub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f64 (sint_to_fp (i32 (sextloadi16 am_unscaled16:$addr)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (f64 - (EXTRACT_SUBREG - (SSHLLv4i16_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURHi am_unscaled16:$addr), - hsub), - 0), - dsub)), - 0), - dsub)))>, Requires<[NotForCodeSize]>; -// 32-bits -> double. 1 size step-up. -def : Pat <(f64 (sint_to_fp (i32 (load ro_indexed32:$addr)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRSro ro_indexed32:$addr), - ssub), - 0), - dsub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f64 (sint_to_fp (i32 (load am_indexed32:$addr)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDRSui am_indexed32:$addr), - ssub), - 0), - dsub)))>, Requires<[NotForCodeSize]>; -def : Pat <(f64 (sint_to_fp (i32 (load am_unscaled32:$addr)))), - (SCVTFv1i64 (f64 (EXTRACT_SUBREG - (SSHLLv2i32_shift - (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (LDURSi am_unscaled32:$addr), - ssub), - 0), - dsub)))>, Requires<[NotForCodeSize]>; -// 64-bits -> double are handled in target specific dag combine: -// performIntToFpCombine. - - -//---------------------------------------------------------------------------- -// AdvSIMD Load-Store Structure -//---------------------------------------------------------------------------- -defm LD1 : SIMDLd1Multiple<"ld1">; -defm LD2 : SIMDLd2Multiple<"ld2">; -defm LD3 : SIMDLd3Multiple<"ld3">; -defm LD4 : SIMDLd4Multiple<"ld4">; - -defm ST1 : SIMDSt1Multiple<"st1">; -defm ST2 : SIMDSt2Multiple<"st2">; -defm ST3 : SIMDSt3Multiple<"st3">; -defm ST4 : SIMDSt4Multiple<"st4">; - -class Ld1Pat - : Pat<(ty (load am_simdnoindex:$vaddr)), (INST am_simdnoindex:$vaddr)>; - -def : Ld1Pat; -def : Ld1Pat; -def : Ld1Pat; -def : Ld1Pat; -def : Ld1Pat; -def : Ld1Pat; -def : Ld1Pat; -def : Ld1Pat; - -class St1Pat - : Pat<(store ty:$Vt, am_simdnoindex:$vaddr), - (INST ty:$Vt, am_simdnoindex:$vaddr)>; - -def : St1Pat; -def : St1Pat; -def : St1Pat; -def : St1Pat; -def : St1Pat; -def : St1Pat; -def : St1Pat; -def : St1Pat; - -//--- -// Single-element -//--- - -defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>; -defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>; -defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>; -defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>; -let mayLoad = 1, neverHasSideEffects = 1 in { -defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>; -defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>; -defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>; -defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>; -defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>; -defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>; -defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>; -defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>; -defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>; -defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>; -defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>; -defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>; -defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>; -defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>; -defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>; -defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>; -} - -def : Pat<(v8i8 (ARM64dup (i32 (extloadi8 am_simdnoindex:$vaddr)))), - (LD1Rv8b am_simdnoindex:$vaddr)>; -def : Pat<(v16i8 (ARM64dup (i32 (extloadi8 am_simdnoindex:$vaddr)))), - (LD1Rv16b am_simdnoindex:$vaddr)>; -def : Pat<(v4i16 (ARM64dup (i32 (extloadi16 am_simdnoindex:$vaddr)))), - (LD1Rv4h am_simdnoindex:$vaddr)>; -def : Pat<(v8i16 (ARM64dup (i32 (extloadi16 am_simdnoindex:$vaddr)))), - (LD1Rv8h am_simdnoindex:$vaddr)>; -def : Pat<(v2i32 (ARM64dup (i32 (load am_simdnoindex:$vaddr)))), - (LD1Rv2s am_simdnoindex:$vaddr)>; -def : Pat<(v4i32 (ARM64dup (i32 (load am_simdnoindex:$vaddr)))), - (LD1Rv4s am_simdnoindex:$vaddr)>; -def : Pat<(v2i64 (ARM64dup (i64 (load am_simdnoindex:$vaddr)))), - (LD1Rv2d am_simdnoindex:$vaddr)>; -def : Pat<(v1i64 (ARM64dup (i64 (load am_simdnoindex:$vaddr)))), - (LD1Rv1d am_simdnoindex:$vaddr)>; -// Grab the floating point version too -def : Pat<(v2f32 (ARM64dup (f32 (load am_simdnoindex:$vaddr)))), - (LD1Rv2s am_simdnoindex:$vaddr)>; -def : Pat<(v4f32 (ARM64dup (f32 (load am_simdnoindex:$vaddr)))), - (LD1Rv4s am_simdnoindex:$vaddr)>; -def : Pat<(v2f64 (ARM64dup (f64 (load am_simdnoindex:$vaddr)))), - (LD1Rv2d am_simdnoindex:$vaddr)>; -def : Pat<(v1f64 (ARM64dup (f64 (load am_simdnoindex:$vaddr)))), - (LD1Rv1d am_simdnoindex:$vaddr)>; - -class Ld1Lane128Pat - : Pat<(vector_insert (VTy VecListOne128:$Rd), - (STy (scalar_load am_simdnoindex:$vaddr)), VecIndex:$idx), - (LD1 VecListOne128:$Rd, VecIndex:$idx, am_simdnoindex:$vaddr)>; - -def : Ld1Lane128Pat; -def : Ld1Lane128Pat; -def : Ld1Lane128Pat; -def : Ld1Lane128Pat; -def : Ld1Lane128Pat; -def : Ld1Lane128Pat; - -class Ld1Lane64Pat - : Pat<(vector_insert (VTy VecListOne64:$Rd), - (STy (scalar_load am_simdnoindex:$vaddr)), VecIndex:$idx), - (EXTRACT_SUBREG - (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), - VecIndex:$idx, am_simdnoindex:$vaddr), - dsub)>; - -def : Ld1Lane64Pat; -def : Ld1Lane64Pat; -def : Ld1Lane64Pat; -def : Ld1Lane64Pat; - - -defm LD1 : SIMDLdSt1SingleAliases<"ld1">; -defm LD2 : SIMDLdSt2SingleAliases<"ld2">; -defm LD3 : SIMDLdSt3SingleAliases<"ld3">; -defm LD4 : SIMDLdSt4SingleAliases<"ld4">; - -// Stores -defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; -defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; -defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; -defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; - -let AddedComplexity = 8 in -class St1Lane128Pat - : Pat<(scalar_store - (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), - am_simdnoindex:$vaddr), - (ST1 VecListOne128:$Vt, VecIndex:$idx, am_simdnoindex:$vaddr)>; - -def : St1Lane128Pat; -def : St1Lane128Pat; -def : St1Lane128Pat; -def : St1Lane128Pat; -def : St1Lane128Pat; -def : St1Lane128Pat; - -let AddedComplexity = 8 in -class St1Lane64Pat - : Pat<(scalar_store - (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), - am_simdnoindex:$vaddr), - (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), - VecIndex:$idx, am_simdnoindex:$vaddr)>; - -def : St1Lane64Pat; -def : St1Lane64Pat; -def : St1Lane64Pat; -def : St1Lane64Pat; - -let mayStore = 1, neverHasSideEffects = 1 in { -defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; -defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; -defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; -defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; -defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; -defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; -defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; -defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; -defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; -defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; -defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; -defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; -} - -defm ST1 : SIMDLdSt1SingleAliases<"st1">; -defm ST2 : SIMDLdSt2SingleAliases<"st2">; -defm ST3 : SIMDLdSt3SingleAliases<"st3">; -defm ST4 : SIMDLdSt4SingleAliases<"st4">; - -//---------------------------------------------------------------------------- -// Crypto extensions -//---------------------------------------------------------------------------- - -def AESErr : AESTiedInst<0b0100, "aese", int_arm64_crypto_aese>; -def AESDrr : AESTiedInst<0b0101, "aesd", int_arm64_crypto_aesd>; -def AESMCrr : AESInst< 0b0110, "aesmc", int_arm64_crypto_aesmc>; -def AESIMCrr : AESInst< 0b0111, "aesimc", int_arm64_crypto_aesimc>; - -def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_arm64_crypto_sha1c>; -def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_arm64_crypto_sha1p>; -def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_arm64_crypto_sha1m>; -def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_arm64_crypto_sha1su0>; -def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_arm64_crypto_sha256h>; -def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_arm64_crypto_sha256h2>; -def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_arm64_crypto_sha256su1>; - -def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_arm64_crypto_sha1h>; -def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_arm64_crypto_sha1su1>; -def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_arm64_crypto_sha256su0>; - -//---------------------------------------------------------------------------- -// Compiler-pseudos -//---------------------------------------------------------------------------- -// FIXME: Like for X86, these should go in their own separate .td file. - -// Any instruction that defines a 32-bit result leaves the high half of the -// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may -// be copying from a truncate. But any other 32-bit operation will zero-extend -// up to 64 bits. -// FIXME: X86 also checks for CMOV here. Do we need something similar? -def def32 : PatLeaf<(i32 GPR32:$src), [{ - return N->getOpcode() != ISD::TRUNCATE && - N->getOpcode() != TargetOpcode::EXTRACT_SUBREG && - N->getOpcode() != ISD::CopyFromReg; -}]>; - -// In the case of a 32-bit def that is known to implicitly zero-extend, -// we can use a SUBREG_TO_REG. -def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>; - -// For an anyext, we don't care what the high bits are, so we can perform an -// INSERT_SUBREF into an IMPLICIT_DEF. -def : Pat<(i64 (anyext GPR32:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>; - -// When we need to explicitly zero-extend, we use an unsigned bitfield move -// instruction (UBFM) on the enclosing super-reg. -def : Pat<(i64 (zext GPR32:$src)), - (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; - -// To sign extend, we use a signed bitfield move instruction (SBFM) on the -// containing super-reg. -def : Pat<(i64 (sext GPR32:$src)), - (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>; -def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>; -def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>; -def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>; -def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>; -def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>; -def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>; -def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>; - -def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)), - (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), - (i64 (i32shift_sext_i8 imm0_31:$imm)))>; -def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)), - (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), - (i64 (i64shift_sext_i8 imm0_63:$imm)))>; - -def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)), - (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)), - (i64 (i32shift_sext_i16 imm0_31:$imm)))>; -def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)), - (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)), - (i64 (i64shift_sext_i16 imm0_63:$imm)))>; - -def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)), - (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), - (i64 (i64shift_a imm0_63:$imm)), - (i64 (i64shift_sext_i32 imm0_63:$imm)))>; - -// sra patterns have an AddedComplexity of 10, so make sure we have a higher -// AddedComplexity for the following patterns since we want to match sext + sra -// patterns before we attempt to match a single sra node. -let AddedComplexity = 20 in { -// We support all sext + sra combinations which preserve at least one bit of the -// original value which is to be sign extended. E.g. we support shifts up to -// bitwidth-1 bits. -def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)), - (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>; -def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)), - (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>; - -def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)), - (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>; -def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)), - (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>; - -def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)), - (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32), - (i64 imm0_31:$imm), 31)>; -} // AddedComplexity = 20 - -// To truncate, we can simply extract from a subregister. -def : Pat<(i32 (trunc GPR64sp:$src)), - (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>; - -// __builtin_trap() uses the BRK instruction on ARM64. -def : Pat<(trap), (BRK 1)>; - -// Conversions within AdvSIMD types in the same register size are free. - -def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; -def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>; - -def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; -def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>; - -def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; -def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>; - -def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; -def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>; - -def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>; -def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; - -def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; -def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>; - -def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>; -def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>; - - -def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>; -def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>; -def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>; -def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>; -def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>; - -def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>; -def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>; - -def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>; -def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>; - -def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>; -def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>; - -def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>; -def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>; - -def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>; -def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>; - -def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>; -def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>; - -def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))), - (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; -def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))), - (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; -def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))), - (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; -def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))), - (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>; - -// A 64-bit subvector insert to the first 128-bit vector position -// is a subregister copy that needs no instruction. -def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; -def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>; -def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; -def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>; -def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>; -def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (i32 0)), - (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>; - -// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64 -// or v2f32. -def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)), - (vector_extract (v2i64 FPR128:$Rn), (i64 1)))), - (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>; -def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)), - (vector_extract (v2f64 FPR128:$Rn), (i64 1)))), - (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>; - // vector_extract on 64-bit vectors gets promoted to a 128 bit vector, - // so we match on v4f32 here, not v2f32. This will also catch adding - // the low two lanes of a true v4f32 vector. -def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)), - (vector_extract (v4f32 FPR128:$Rn), (i64 1))), - (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>; - -// Scalar 64-bit shifts in FPR64 registers. -def : Pat<(i64 (int_arm64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), - (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(i64 (int_arm64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), - (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(i64 (int_arm64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), - (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; -def : Pat<(i64 (int_arm64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))), - (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>; - -// Tail call return handling. These are all compiler pseudo-instructions, -// so no encoding information or anything like that. -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { - def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst), []>; - def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst), []>; -} - -def : Pat<(ARM64tcret tcGPR64:$dst), (TCRETURNri tcGPR64:$dst)>; -def : Pat<(ARM64tcret (i64 tglobaladdr:$dst)), (TCRETURNdi texternalsym:$dst)>; -def : Pat<(ARM64tcret (i64 texternalsym:$dst)), (TCRETURNdi texternalsym:$dst)>; - -include "ARM64InstrAtomics.td" diff --git a/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp b/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp deleted file mode 100644 index c0031a4..0000000 --- a/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp +++ /dev/null @@ -1,947 +0,0 @@ -//===-- ARM64LoadStoreOptimizer.cpp - ARM64 load/store opt. pass --*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a pass that performs load / store related peephole -// optimizations. This pass should be run after register allocation. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-ldst-opt" -#include "ARM64InstrInfo.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -/// ARM64AllocLoadStoreOpt - Post-register allocation pass to combine -/// load / store instructions to form ldp / stp instructions. - -STATISTIC(NumPairCreated, "Number of load/store pair instructions generated"); -STATISTIC(NumPostFolded, "Number of post-index updates folded"); -STATISTIC(NumPreFolded, "Number of pre-index updates folded"); -STATISTIC(NumUnscaledPairCreated, - "Number of load/store from unscaled generated"); - -static cl::opt DoLoadStoreOpt("arm64-load-store-opt", cl::init(true), - cl::Hidden); -static cl::opt ScanLimit("arm64-load-store-scan-limit", cl::init(20), - cl::Hidden); - -// Place holder while testing unscaled load/store combining -static cl::opt -EnableARM64UnscaledMemOp("arm64-unscaled-mem-op", cl::Hidden, - cl::desc("Allow ARM64 unscaled load/store combining"), - cl::init(true)); - -namespace { -struct ARM64LoadStoreOpt : public MachineFunctionPass { - static char ID; - ARM64LoadStoreOpt() : MachineFunctionPass(ID) {} - - const ARM64InstrInfo *TII; - const TargetRegisterInfo *TRI; - - // Scan the instructions looking for a load/store that can be combined - // with the current instruction into a load/store pair. - // Return the matching instruction if one is found, else MBB->end(). - // If a matching instruction is found, mergeForward is set to true if the - // merge is to remove the first instruction and replace the second with - // a pair-wise insn, and false if the reverse is true. - MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, - bool &mergeForward, - unsigned Limit); - // Merge the two instructions indicated into a single pair-wise instruction. - // If mergeForward is true, erase the first instruction and fold its - // operation into the second. If false, the reverse. Return the instruction - // following the first instruction (which may change during proecessing). - MachineBasicBlock::iterator - mergePairedInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, bool mergeForward); - - // Scan the instruction list to find a base register update that can - // be combined with the current instruction (a load or store) using - // pre or post indexed addressing with writeback. Scan forwards. - MachineBasicBlock::iterator - findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit, - int Value); - - // Scan the instruction list to find a base register update that can - // be combined with the current instruction (a load or store) using - // pre or post indexed addressing with writeback. Scan backwards. - MachineBasicBlock::iterator - findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit); - - // Merge a pre-index base register update into a ld/st instruction. - MachineBasicBlock::iterator - mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Update); - - // Merge a post-index base register update into a ld/st instruction. - MachineBasicBlock::iterator - mergePostIdxUpdateInsn(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Update); - - bool optimizeBlock(MachineBasicBlock &MBB); - - virtual bool runOnMachineFunction(MachineFunction &Fn); - - virtual const char *getPassName() const { - return "ARM64 load / store optimization pass"; - } - -private: - int getMemSize(MachineInstr *MemMI); -}; -char ARM64LoadStoreOpt::ID = 0; -} - -static bool isUnscaledLdst(unsigned Opc) { - switch (Opc) { - default: - return false; - case ARM64::STURSi: - return true; - case ARM64::STURDi: - return true; - case ARM64::STURQi: - return true; - case ARM64::STURWi: - return true; - case ARM64::STURXi: - return true; - case ARM64::LDURSi: - return true; - case ARM64::LDURDi: - return true; - case ARM64::LDURQi: - return true; - case ARM64::LDURWi: - return true; - case ARM64::LDURXi: - return true; - } -} - -// Size in bytes of the data moved by an unscaled load or store -int ARM64LoadStoreOpt::getMemSize(MachineInstr *MemMI) { - switch (MemMI->getOpcode()) { - default: - llvm_unreachable("Opcode has has unknown size!"); - case ARM64::STRSui: - case ARM64::STURSi: - return 4; - case ARM64::STRDui: - case ARM64::STURDi: - return 8; - case ARM64::STRQui: - case ARM64::STURQi: - return 16; - case ARM64::STRWui: - case ARM64::STURWi: - return 4; - case ARM64::STRXui: - case ARM64::STURXi: - return 8; - case ARM64::LDRSui: - case ARM64::LDURSi: - return 4; - case ARM64::LDRDui: - case ARM64::LDURDi: - return 8; - case ARM64::LDRQui: - case ARM64::LDURQi: - return 16; - case ARM64::LDRWui: - case ARM64::LDURWi: - return 4; - case ARM64::LDRXui: - case ARM64::LDURXi: - return 8; - } -} - -static unsigned getMatchingPairOpcode(unsigned Opc) { - switch (Opc) { - default: - llvm_unreachable("Opcode has no pairwise equivalent!"); - case ARM64::STRSui: - case ARM64::STURSi: - return ARM64::STPSi; - case ARM64::STRDui: - case ARM64::STURDi: - return ARM64::STPDi; - case ARM64::STRQui: - case ARM64::STURQi: - return ARM64::STPQi; - case ARM64::STRWui: - case ARM64::STURWi: - return ARM64::STPWi; - case ARM64::STRXui: - case ARM64::STURXi: - return ARM64::STPXi; - case ARM64::LDRSui: - case ARM64::LDURSi: - return ARM64::LDPSi; - case ARM64::LDRDui: - case ARM64::LDURDi: - return ARM64::LDPDi; - case ARM64::LDRQui: - case ARM64::LDURQi: - return ARM64::LDPQi; - case ARM64::LDRWui: - case ARM64::LDURWi: - return ARM64::LDPWi; - case ARM64::LDRXui: - case ARM64::LDURXi: - return ARM64::LDPXi; - } -} - -static unsigned getPreIndexedOpcode(unsigned Opc) { - switch (Opc) { - default: - llvm_unreachable("Opcode has no pre-indexed equivalent!"); - case ARM64::STRSui: return ARM64::STRSpre; - case ARM64::STRDui: return ARM64::STRDpre; - case ARM64::STRQui: return ARM64::STRQpre; - case ARM64::STRWui: return ARM64::STRWpre; - case ARM64::STRXui: return ARM64::STRXpre; - case ARM64::LDRSui: return ARM64::LDRSpre; - case ARM64::LDRDui: return ARM64::LDRDpre; - case ARM64::LDRQui: return ARM64::LDRQpre; - case ARM64::LDRWui: return ARM64::LDRWpre; - case ARM64::LDRXui: return ARM64::LDRXpre; - } -} - -static unsigned getPostIndexedOpcode(unsigned Opc) { - switch (Opc) { - default: - llvm_unreachable("Opcode has no post-indexed wise equivalent!"); - case ARM64::STRSui: - return ARM64::STRSpost; - case ARM64::STRDui: - return ARM64::STRDpost; - case ARM64::STRQui: - return ARM64::STRQpost; - case ARM64::STRWui: - return ARM64::STRWpost; - case ARM64::STRXui: - return ARM64::STRXpost; - case ARM64::LDRSui: - return ARM64::LDRSpost; - case ARM64::LDRDui: - return ARM64::LDRDpost; - case ARM64::LDRQui: - return ARM64::LDRQpost; - case ARM64::LDRWui: - return ARM64::LDRWpost; - case ARM64::LDRXui: - return ARM64::LDRXpost; - } -} - -MachineBasicBlock::iterator -ARM64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, - bool mergeForward) { - MachineBasicBlock::iterator NextI = I; - ++NextI; - // If NextI is the second of the two instructions to be merged, we need - // to skip one further. Either way we merge will invalidate the iterator, - // and we don't need to scan the new instruction, as it's a pairwise - // instruction, which we're not considering for further action anyway. - if (NextI == Paired) - ++NextI; - - bool IsUnscaled = isUnscaledLdst(I->getOpcode()); - int OffsetStride = IsUnscaled && EnableARM64UnscaledMemOp ? getMemSize(I) : 1; - - unsigned NewOpc = getMatchingPairOpcode(I->getOpcode()); - // Insert our new paired instruction after whichever of the paired - // instructions mergeForward indicates. - MachineBasicBlock::iterator InsertionPoint = mergeForward ? Paired : I; - // Also based on mergeForward is from where we copy the base register operand - // so we get the flags compatible with the input code. - MachineOperand &BaseRegOp = - mergeForward ? Paired->getOperand(1) : I->getOperand(1); - - // Which register is Rt and which is Rt2 depends on the offset order. - MachineInstr *RtMI, *Rt2MI; - if (I->getOperand(2).getImm() == - Paired->getOperand(2).getImm() + OffsetStride) { - RtMI = Paired; - Rt2MI = I; - } else { - RtMI = I; - Rt2MI = Paired; - } - // Handle Unscaled - int OffsetImm = RtMI->getOperand(2).getImm(); - if (IsUnscaled && EnableARM64UnscaledMemOp) - OffsetImm /= OffsetStride; - - // Construct the new instruction. - MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint, - I->getDebugLoc(), TII->get(NewOpc)) - .addOperand(RtMI->getOperand(0)) - .addOperand(Rt2MI->getOperand(0)) - .addOperand(BaseRegOp) - .addImm(OffsetImm); - (void)MIB; - - // FIXME: Do we need/want to copy the mem operands from the source - // instructions? Probably. What uses them after this? - - DEBUG(dbgs() << "Creating pair load/store. Replacing instructions:\n "); - DEBUG(I->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG(Paired->print(dbgs())); - DEBUG(dbgs() << " with instruction:\n "); - DEBUG(((MachineInstr *)MIB)->print(dbgs())); - DEBUG(dbgs() << "\n"); - - // Erase the old instructions. - I->eraseFromParent(); - Paired->eraseFromParent(); - - return NextI; -} - -/// trackRegDefsUses - Remember what registers the specified instruction uses -/// and modifies. -static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs, - BitVector &UsedRegs, - const TargetRegisterInfo *TRI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isRegMask()) - ModifiedRegs.setBitsNotInMask(MO.getRegMask()); - - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (MO.isDef()) { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - ModifiedRegs.set(*AI); - } else { - assert(MO.isUse() && "Reg operand not a def and not a use?!?"); - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - UsedRegs.set(*AI); - } - } -} - -static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { - if (!IsUnscaled && (Offset > 63 || Offset < -64)) - return false; - if (IsUnscaled) { - // Convert the byte-offset used by unscaled into an "element" offset used - // by the scaled pair load/store instructions. - int elemOffset = Offset / OffsetStride; - if (elemOffset > 63 || elemOffset < -64) - return false; - } - return true; -} - -// Do alignment, specialized to power of 2 and for signed ints, -// avoiding having to do a C-style cast from uint_64t to int when -// using RoundUpToAlignment from include/llvm/Support/MathExtras.h. -// FIXME: Move this function to include/MathExtras.h? -static int alignTo(int Num, int PowOf2) { - return (Num + PowOf2 - 1) & ~(PowOf2 - 1); -} - -/// findMatchingInsn - Scan the instructions looking for a load/store that can -/// be combined with the current instruction into a load/store pair. -MachineBasicBlock::iterator -ARM64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, - bool &mergeForward, unsigned Limit) { - MachineBasicBlock::iterator E = I->getParent()->end(); - MachineBasicBlock::iterator MBBI = I; - MachineInstr *FirstMI = I; - ++MBBI; - - int Opc = FirstMI->getOpcode(); - bool mayLoad = FirstMI->mayLoad(); - bool IsUnscaled = isUnscaledLdst(Opc); - unsigned Reg = FirstMI->getOperand(0).getReg(); - unsigned BaseReg = FirstMI->getOperand(1).getReg(); - int Offset = FirstMI->getOperand(2).getImm(); - - // Early exit if the first instruction modifies the base register. - // e.g., ldr x0, [x0] - // Early exit if the offset if not possible to match. (6 bits of positive - // range, plus allow an extra one in case we find a later insn that matches - // with Offset-1 - if (FirstMI->modifiesRegister(BaseReg, TRI)) - return E; - int OffsetStride = - IsUnscaled && EnableARM64UnscaledMemOp ? getMemSize(FirstMI) : 1; - if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride)) - return E; - - // Track which registers have been modified and used between the first insn - // (inclusive) and the second insn. - BitVector ModifiedRegs, UsedRegs; - ModifiedRegs.resize(TRI->getNumRegs()); - UsedRegs.resize(TRI->getNumRegs()); - for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) { - MachineInstr *MI = MBBI; - // Skip DBG_VALUE instructions. Otherwise debug info can affect the - // optimization by changing how far we scan. - if (MI->isDebugValue()) - continue; - - // Now that we know this is a real instruction, count it. - ++Count; - - if (Opc == MI->getOpcode() && MI->getOperand(2).isImm()) { - // If we've found another instruction with the same opcode, check to see - // if the base and offset are compatible with our starting instruction. - // These instructions all have scaled immediate operands, so we just - // check for +1/-1. Make sure to check the new instruction offset is - // actually an immediate and not a symbolic reference destined for - // a relocation. - // - // Pairwise instructions have a 7-bit signed offset field. Single insns - // have a 12-bit unsigned offset field. To be a valid combine, the - // final offset must be in range. - unsigned MIBaseReg = MI->getOperand(1).getReg(); - int MIOffset = MI->getOperand(2).getImm(); - if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) || - (Offset + OffsetStride == MIOffset))) { - int MinOffset = Offset < MIOffset ? Offset : MIOffset; - // If this is a volatile load/store that otherwise matched, stop looking - // as something is going on that we don't have enough information to - // safely transform. Similarly, stop if we see a hint to avoid pairs. - if (MI->hasOrderedMemoryRef() || TII->isLdStPairSuppressed(MI)) - return E; - // If the resultant immediate offset of merging these instructions - // is out of range for a pairwise instruction, bail and keep looking. - bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode()); - if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - continue; - } - // If the alignment requirements of the paired (scaled) instruction - // can't express the offset of the unscaled input, bail and keep - // looking. - if (IsUnscaled && EnableARM64UnscaledMemOp && - (alignTo(MinOffset, OffsetStride) != MinOffset)) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - continue; - } - // If the destination register of the loads is the same register, bail - // and keep looking. A load-pair instruction with both destination - // registers the same is UNPREDICTABLE and will result in an exception. - if (mayLoad && Reg == MI->getOperand(0).getReg()) { - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - continue; - } - - // If the Rt of the second instruction was not modified or used between - // the two instructions, we can combine the second into the first. - if (!ModifiedRegs[MI->getOperand(0).getReg()] && - !UsedRegs[MI->getOperand(0).getReg()]) { - mergeForward = false; - return MBBI; - } - - // Likewise, if the Rt of the first instruction is not modified or used - // between the two instructions, we can combine the first into the - // second. - if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] && - !UsedRegs[FirstMI->getOperand(0).getReg()]) { - mergeForward = true; - return MBBI; - } - // Unable to combine these instructions due to interference in between. - // Keep looking. - } - } - - // If the instruction wasn't a matching load or store, but does (or can) - // modify memory, stop searching, as we don't have alias analysis or - // anything like that to tell us whether the access is tromping on the - // locations we care about. The big one we want to catch is calls. - // - // FIXME: Theoretically, we can do better than that for SP and FP based - // references since we can effectively know where those are touching. It's - // unclear if it's worth the extra code, though. Most paired instructions - // will be sequential, perhaps with a few intervening non-memory related - // instructions. - if (MI->mayStore() || MI->isCall()) - return E; - // Likewise, if we're matching a store instruction, we don't want to - // move across a load, as it may be reading the same location. - if (FirstMI->mayStore() && MI->mayLoad()) - return E; - - // Update modified / uses register lists. - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - - // Otherwise, if the base register is modified, we have no match, so - // return early. - if (ModifiedRegs[BaseReg]) - return E; - } - return E; -} - -MachineBasicBlock::iterator -ARM64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Update) { - assert((Update->getOpcode() == ARM64::ADDXri || - Update->getOpcode() == ARM64::SUBXri) && - "Unexpected base register update instruction to merge!"); - MachineBasicBlock::iterator NextI = I; - // Return the instruction following the merged instruction, which is - // the instruction following our unmerged load. Unless that's the add/sub - // instruction we're merging, in which case it's the one after that. - if (++NextI == Update) - ++NextI; - - int Value = Update->getOperand(2).getImm(); - assert(ARM64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && - "Can't merge 1 << 12 offset into pre-indexed load / store"); - if (Update->getOpcode() == ARM64::SUBXri) - Value = -Value; - - unsigned NewOpc = getPreIndexedOpcode(I->getOpcode()); - MachineInstrBuilder MIB = - BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) - .addOperand(I->getOperand(0)) - .addOperand(I->getOperand(1)) - .addImm(Value); - (void)MIB; - - DEBUG(dbgs() << "Creating pre-indexed load/store."); - DEBUG(dbgs() << " Replacing instructions:\n "); - DEBUG(I->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG(Update->print(dbgs())); - DEBUG(dbgs() << " with instruction:\n "); - DEBUG(((MachineInstr *)MIB)->print(dbgs())); - DEBUG(dbgs() << "\n"); - - // Erase the old instructions for the block. - I->eraseFromParent(); - Update->eraseFromParent(); - - return NextI; -} - -MachineBasicBlock::iterator -ARM64LoadStoreOpt::mergePostIdxUpdateInsn(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Update) { - assert((Update->getOpcode() == ARM64::ADDXri || - Update->getOpcode() == ARM64::SUBXri) && - "Unexpected base register update instruction to merge!"); - MachineBasicBlock::iterator NextI = I; - // Return the instruction following the merged instruction, which is - // the instruction following our unmerged load. Unless that's the add/sub - // instruction we're merging, in which case it's the one after that. - if (++NextI == Update) - ++NextI; - - int Value = Update->getOperand(2).getImm(); - assert(ARM64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 && - "Can't merge 1 << 12 offset into post-indexed load / store"); - if (Update->getOpcode() == ARM64::SUBXri) - Value = -Value; - - unsigned NewOpc = getPostIndexedOpcode(I->getOpcode()); - MachineInstrBuilder MIB = - BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc)) - .addOperand(I->getOperand(0)) - .addOperand(I->getOperand(1)) - .addImm(Value); - (void)MIB; - - DEBUG(dbgs() << "Creating post-indexed load/store."); - DEBUG(dbgs() << " Replacing instructions:\n "); - DEBUG(I->print(dbgs())); - DEBUG(dbgs() << " "); - DEBUG(Update->print(dbgs())); - DEBUG(dbgs() << " with instruction:\n "); - DEBUG(((MachineInstr *)MIB)->print(dbgs())); - DEBUG(dbgs() << "\n"); - - // Erase the old instructions for the block. - I->eraseFromParent(); - Update->eraseFromParent(); - - return NextI; -} - -static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg, - int Offset) { - switch (MI->getOpcode()) { - default: - break; - case ARM64::SUBXri: - // Negate the offset for a SUB instruction. - Offset *= -1; - // FALLTHROUGH - case ARM64::ADDXri: - // Make sure it's a vanilla immediate operand, not a relocation or - // anything else we can't handle. - if (!MI->getOperand(2).isImm()) - break; - // Watch out for 1 << 12 shifted value. - if (ARM64_AM::getShiftValue(MI->getOperand(3).getImm())) - break; - // If the instruction has the base register as source and dest and the - // immediate will fit in a signed 9-bit integer, then we have a match. - if (MI->getOperand(0).getReg() == BaseReg && - MI->getOperand(1).getReg() == BaseReg && - MI->getOperand(2).getImm() <= 255 && - MI->getOperand(2).getImm() >= -256) { - // If we have a non-zero Offset, we check that it matches the amount - // we're adding to the register. - if (!Offset || Offset == MI->getOperand(2).getImm()) - return true; - } - break; - } - return false; -} - -MachineBasicBlock::iterator -ARM64LoadStoreOpt::findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, - unsigned Limit, int Value) { - MachineBasicBlock::iterator E = I->getParent()->end(); - MachineInstr *MemMI = I; - MachineBasicBlock::iterator MBBI = I; - const MachineFunction &MF = *MemMI->getParent()->getParent(); - - unsigned DestReg = MemMI->getOperand(0).getReg(); - unsigned BaseReg = MemMI->getOperand(1).getReg(); - int Offset = MemMI->getOperand(2).getImm() * - TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize(); - - // If the base register overlaps the destination register, we can't - // merge the update. - if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) - return E; - - // Scan forward looking for post-index opportunities. - // Updating instructions can't be formed if the memory insn already - // has an offset other than the value we're looking for. - if (Offset != Value) - return E; - - // Track which registers have been modified and used between the first insn - // (inclusive) and the second insn. - BitVector ModifiedRegs, UsedRegs; - ModifiedRegs.resize(TRI->getNumRegs()); - UsedRegs.resize(TRI->getNumRegs()); - ++MBBI; - for (unsigned Count = 0; MBBI != E; ++MBBI) { - MachineInstr *MI = MBBI; - // Skip DBG_VALUE instructions. Otherwise debug info can affect the - // optimization by changing how far we scan. - if (MI->isDebugValue()) - continue; - - // Now that we know this is a real instruction, count it. - ++Count; - - // If we found a match, return it. - if (isMatchingUpdateInsn(MI, BaseReg, Value)) - return MBBI; - - // Update the status of what the instruction clobbered and used. - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - - // Otherwise, if the base register is used or modified, we have no match, so - // return early. - if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) - return E; - } - return E; -} - -MachineBasicBlock::iterator -ARM64LoadStoreOpt::findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, - unsigned Limit) { - MachineBasicBlock::iterator B = I->getParent()->begin(); - MachineBasicBlock::iterator E = I->getParent()->end(); - MachineInstr *MemMI = I; - MachineBasicBlock::iterator MBBI = I; - const MachineFunction &MF = *MemMI->getParent()->getParent(); - - unsigned DestReg = MemMI->getOperand(0).getReg(); - unsigned BaseReg = MemMI->getOperand(1).getReg(); - int Offset = MemMI->getOperand(2).getImm(); - unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize(); - - // If the load/store is the first instruction in the block, there's obviously - // not any matching update. Ditto if the memory offset isn't zero. - if (MBBI == B || Offset != 0) - return E; - // If the base register overlaps the destination register, we can't - // merge the update. - if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg)) - return E; - - // Track which registers have been modified and used between the first insn - // (inclusive) and the second insn. - BitVector ModifiedRegs, UsedRegs; - ModifiedRegs.resize(TRI->getNumRegs()); - UsedRegs.resize(TRI->getNumRegs()); - --MBBI; - for (unsigned Count = 0; MBBI != B; --MBBI) { - MachineInstr *MI = MBBI; - // Skip DBG_VALUE instructions. Otherwise debug info can affect the - // optimization by changing how far we scan. - if (MI->isDebugValue()) - continue; - - // Now that we know this is a real instruction, count it. - ++Count; - - // If we found a match, return it. - if (isMatchingUpdateInsn(MI, BaseReg, RegSize)) - return MBBI; - - // Update the status of what the instruction clobbered and used. - trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); - - // Otherwise, if the base register is used or modified, we have no match, so - // return early. - if (ModifiedRegs[BaseReg] || UsedRegs[BaseReg]) - return E; - } - return E; -} - -bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { - bool Modified = false; - // Two tranformations to do here: - // 1) Find loads and stores that can be merged into a single load or store - // pair instruction. - // e.g., - // ldr x0, [x2] - // ldr x1, [x2, #8] - // ; becomes - // ldp x0, x1, [x2] - // 2) Find base register updates that can be merged into the load or store - // as a base-reg writeback. - // e.g., - // ldr x0, [x2] - // add x2, x2, #4 - // ; becomes - // ldr x0, [x2], #4 - - for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - MBBI != E;) { - MachineInstr *MI = MBBI; - switch (MI->getOpcode()) { - default: - // Just move on to the next instruction. - ++MBBI; - break; - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STRQui: - case ARM64::STRXui: - case ARM64::STRWui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: - case ARM64::LDRXui: - case ARM64::LDRWui: - // do the unscaled versions as well - case ARM64::STURSi: - case ARM64::STURDi: - case ARM64::STURQi: - case ARM64::STURWi: - case ARM64::STURXi: - case ARM64::LDURSi: - case ARM64::LDURDi: - case ARM64::LDURQi: - case ARM64::LDURWi: - case ARM64::LDURXi: { - // If this is a volatile load/store, don't mess with it. - if (MI->hasOrderedMemoryRef()) { - ++MBBI; - break; - } - // Make sure this is a reg+imm (as opposed to an address reloc). - if (!MI->getOperand(2).isImm()) { - ++MBBI; - break; - } - // Check if this load/store has a hint to avoid pair formation. - // MachineMemOperands hints are set by the ARM64StorePairSuppress pass. - if (TII->isLdStPairSuppressed(MI)) { - ++MBBI; - break; - } - // Look ahead up to ScanLimit instructions for a pairable instruction. - bool mergeForward = false; - MachineBasicBlock::iterator Paired = - findMatchingInsn(MBBI, mergeForward, ScanLimit); - if (Paired != E) { - // Merge the loads into a pair. Keeping the iterator straight is a - // pain, so we let the merge routine tell us what the next instruction - // is after it's done mucking about. - MBBI = mergePairedInsns(MBBI, Paired, mergeForward); - - Modified = true; - ++NumPairCreated; - if (isUnscaledLdst(MI->getOpcode())) - ++NumUnscaledPairCreated; - break; - } - ++MBBI; - break; - } - // FIXME: Do the other instructions. - } - } - - for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - MBBI != E;) { - MachineInstr *MI = MBBI; - // Do update merging. It's simpler to keep this separate from the above - // switch, though not strictly necessary. - int Opc = MI->getOpcode(); - switch (Opc) { - default: - // Just move on to the next instruction. - ++MBBI; - break; - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STRQui: - case ARM64::STRXui: - case ARM64::STRWui: - case ARM64::LDRSui: - case ARM64::LDRDui: - case ARM64::LDRQui: - case ARM64::LDRXui: - case ARM64::LDRWui: - // do the unscaled versions as well - case ARM64::STURSi: - case ARM64::STURDi: - case ARM64::STURQi: - case ARM64::STURWi: - case ARM64::STURXi: - case ARM64::LDURSi: - case ARM64::LDURDi: - case ARM64::LDURQi: - case ARM64::LDURWi: - case ARM64::LDURXi: { - // Make sure this is a reg+imm (as opposed to an address reloc). - if (!MI->getOperand(2).isImm()) { - ++MBBI; - break; - } - // Look ahead up to ScanLimit instructions for a mergable instruction. - MachineBasicBlock::iterator Update = - findMatchingUpdateInsnForward(MBBI, ScanLimit, 0); - if (Update != E) { - // Merge the update into the ld/st. - MBBI = mergePostIdxUpdateInsn(MBBI, Update); - Modified = true; - ++NumPostFolded; - break; - } - // Don't know how to handle pre/post-index versions, so move to the next - // instruction. - if (isUnscaledLdst(Opc)) { - ++MBBI; - break; - } - - // Look back to try to find a pre-index instruction. For example, - // add x0, x0, #8 - // ldr x1, [x0] - // merged into: - // ldr x1, [x0, #8]! - Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit); - if (Update != E) { - // Merge the update into the ld/st. - MBBI = mergePreIdxUpdateInsn(MBBI, Update); - Modified = true; - ++NumPreFolded; - break; - } - - // Look forward to try to find a post-index instruction. For example, - // ldr x1, [x0, #64] - // add x0, x0, #64 - // merged into: - // ldr x1, [x0], #64 - - // The immediate in the load/store is scaled by the size of the register - // being loaded. The immediate in the add we're looking for, - // however, is not, so adjust here. - int Value = MI->getOperand(2).getImm() * - TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent())) - ->getSize(); - Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value); - if (Update != E) { - // Merge the update into the ld/st. - MBBI = mergePreIdxUpdateInsn(MBBI, Update); - Modified = true; - ++NumPreFolded; - break; - } - - // Nothing found. Just move to the next instruction. - ++MBBI; - break; - } - // FIXME: Do the other instructions. - } - } - - return Modified; -} - -bool ARM64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - // Early exit if pass disabled. - if (!DoLoadStoreOpt) - return false; - - const TargetMachine &TM = Fn.getTarget(); - TII = static_cast(TM.getInstrInfo()); - TRI = TM.getRegisterInfo(); - - bool Modified = false; - for (auto &MBB : Fn) - Modified |= optimizeBlock(MBB); - - return Modified; -} - -// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep -// loads and stores near one another? - -/// createARMLoadStoreOptimizationPass - returns an instance of the load / store -/// optimization pass. -FunctionPass *llvm::createARM64LoadStoreOptimizationPass() { - return new ARM64LoadStoreOpt(); -} diff --git a/lib/Target/ARM64/ARM64MCInstLower.cpp b/lib/Target/ARM64/ARM64MCInstLower.cpp deleted file mode 100644 index 01dc229..0000000 --- a/lib/Target/ARM64/ARM64MCInstLower.cpp +++ /dev/null @@ -1,201 +0,0 @@ -//===-- ARM64MCInstLower.cpp - Convert ARM64 MachineInstr to an MCInst---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains code to lower ARM64 MachineInstrs to their corresponding -// MCInst records. -// -//===----------------------------------------------------------------------===// - -#include "ARM64MCInstLower.h" -#include "MCTargetDesc/ARM64BaseInfo.h" -#include "MCTargetDesc/ARM64MCExpr.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/IR/Mangler.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/Support/CodeGen.h" -#include "llvm/Target/TargetMachine.h" -using namespace llvm; - -ARM64MCInstLower::ARM64MCInstLower(MCContext &ctx, Mangler &mang, - AsmPrinter &printer) - : Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {} - -MCSymbol * -ARM64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const { - return Printer.getSymbol(MO.getGlobal()); -} - -MCSymbol * -ARM64MCInstLower::GetExternalSymbolSymbol(const MachineOperand &MO) const { - return Printer.GetExternalSymbolSymbol(MO.getSymbolName()); -} - -MCOperand ARM64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO, - MCSymbol *Sym) const { - // FIXME: We would like an efficient form for this, so we don't have to do a - // lot of extra uniquing. - MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; - if ((MO.getTargetFlags() & ARM64II::MO_GOT) != 0) { - if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE) - RefKind = MCSymbolRefExpr::VK_GOTPAGE; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == - ARM64II::MO_PAGEOFF) - RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF; - else - assert(0 && "Unexpected target flags with MO_GOT on GV operand"); - } else if ((MO.getTargetFlags() & ARM64II::MO_TLS) != 0) { - if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE) - RefKind = MCSymbolRefExpr::VK_TLVPPAGE; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == - ARM64II::MO_PAGEOFF) - RefKind = MCSymbolRefExpr::VK_TLVPPAGEOFF; - else - llvm_unreachable("Unexpected target flags with MO_TLS on GV operand"); - } else { - if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE) - RefKind = MCSymbolRefExpr::VK_PAGE; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == - ARM64II::MO_PAGEOFF) - RefKind = MCSymbolRefExpr::VK_PAGEOFF; - } - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx); - if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd( - Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx); - return MCOperand::CreateExpr(Expr); -} - -MCOperand ARM64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO, - MCSymbol *Sym) const { - uint32_t RefFlags = 0; - - if (MO.getTargetFlags() & ARM64II::MO_GOT) - RefFlags |= ARM64MCExpr::VK_GOT; - else if (MO.getTargetFlags() & ARM64II::MO_TLS) { - TLSModel::Model Model; - if (MO.isGlobal()) { - const GlobalValue *GV = MO.getGlobal(); - Model = Printer.TM.getTLSModel(GV); - } else { - assert(MO.isSymbol() && - StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" && - "unexpected external TLS symbol"); - Model = TLSModel::GeneralDynamic; - } - switch (Model) { - case TLSModel::InitialExec: - RefFlags |= ARM64MCExpr::VK_GOTTPREL; - break; - case TLSModel::LocalExec: - RefFlags |= ARM64MCExpr::VK_TPREL; - break; - case TLSModel::LocalDynamic: - RefFlags |= ARM64MCExpr::VK_DTPREL; - break; - case TLSModel::GeneralDynamic: - RefFlags |= ARM64MCExpr::VK_TLSDESC; - break; - } - } else { - // No modifier means this is a generic reference, classified as absolute for - // the cases where it matters (:abs_g0: etc). - RefFlags |= ARM64MCExpr::VK_ABS; - } - - if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE) - RefFlags |= ARM64MCExpr::VK_PAGE; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGEOFF) - RefFlags |= ARM64MCExpr::VK_PAGEOFF; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G3) - RefFlags |= ARM64MCExpr::VK_G3; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G2) - RefFlags |= ARM64MCExpr::VK_G2; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G1) - RefFlags |= ARM64MCExpr::VK_G1; - else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G0) - RefFlags |= ARM64MCExpr::VK_G0; - - if (MO.getTargetFlags() & ARM64II::MO_NC) - RefFlags |= ARM64MCExpr::VK_NC; - - const MCExpr *Expr = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, Ctx); - if (!MO.isJTI() && MO.getOffset()) - Expr = MCBinaryExpr::CreateAdd( - Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx); - - ARM64MCExpr::VariantKind RefKind; - RefKind = static_cast(RefFlags); - Expr = ARM64MCExpr::Create(Expr, RefKind, Ctx); - - return MCOperand::CreateExpr(Expr); -} - -MCOperand ARM64MCInstLower::LowerSymbolOperand(const MachineOperand &MO, - MCSymbol *Sym) const { - if (TargetTriple.isOSDarwin()) - return lowerSymbolOperandDarwin(MO, Sym); - - assert(TargetTriple.isOSBinFormatELF() && "Expect Darwin or ELF target"); - return lowerSymbolOperandELF(MO, Sym); -} - -bool ARM64MCInstLower::lowerOperand(const MachineOperand &MO, - MCOperand &MCOp) const { - switch (MO.getType()) { - default: - assert(0 && "unknown operand type"); - case MachineOperand::MO_Register: - // Ignore all implicit register operands. - if (MO.isImplicit()) - return false; - MCOp = MCOperand::CreateReg(MO.getReg()); - break; - case MachineOperand::MO_RegisterMask: - // Regmasks are like implicit defs. - return false; - case MachineOperand::MO_Immediate: - MCOp = MCOperand::CreateImm(MO.getImm()); - break; - case MachineOperand::MO_MachineBasicBlock: - MCOp = MCOperand::CreateExpr( - MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx)); - break; - case MachineOperand::MO_GlobalAddress: - MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); - break; - case MachineOperand::MO_ExternalSymbol: - MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); - break; - case MachineOperand::MO_JumpTableIndex: - MCOp = LowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex())); - break; - case MachineOperand::MO_ConstantPoolIndex: - MCOp = LowerSymbolOperand(MO, Printer.GetCPISymbol(MO.getIndex())); - break; - case MachineOperand::MO_BlockAddress: - MCOp = LowerSymbolOperand( - MO, Printer.GetBlockAddressSymbol(MO.getBlockAddress())); - break; - } - return true; -} - -void ARM64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { - OutMI.setOpcode(MI->getOpcode()); - - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MCOperand MCOp; - if (lowerOperand(MI->getOperand(i), MCOp)) - OutMI.addOperand(MCOp); - } -} diff --git a/lib/Target/ARM64/ARM64MCInstLower.h b/lib/Target/ARM64/ARM64MCInstLower.h deleted file mode 100644 index 7e3a2c8..0000000 --- a/lib/Target/ARM64/ARM64MCInstLower.h +++ /dev/null @@ -1,52 +0,0 @@ -//===-- ARM64MCInstLower.h - Lower MachineInstr to MCInst ----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64_MCINSTLOWER_H -#define ARM64_MCINSTLOWER_H - -#include "llvm/ADT/Triple.h" -#include "llvm/Support/Compiler.h" - -namespace llvm { -class AsmPrinter; -class MCAsmInfo; -class MCContext; -class MCInst; -class MCOperand; -class MCSymbol; -class MachineInstr; -class MachineModuleInfoMachO; -class MachineOperand; -class Mangler; - -/// ARM64MCInstLower - This class is used to lower an MachineInstr -/// into an MCInst. -class LLVM_LIBRARY_VISIBILITY ARM64MCInstLower { - MCContext &Ctx; - AsmPrinter &Printer; - Triple TargetTriple; - -public: - ARM64MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer); - - bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const; - void Lower(const MachineInstr *MI, MCInst &OutMI) const; - - MCOperand lowerSymbolOperandDarwin(const MachineOperand &MO, - MCSymbol *Sym) const; - MCOperand lowerSymbolOperandELF(const MachineOperand &MO, - MCSymbol *Sym) const; - MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; - - MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; - MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; -}; -} - -#endif diff --git a/lib/Target/ARM64/ARM64MachineFunctionInfo.h b/lib/Target/ARM64/ARM64MachineFunctionInfo.h deleted file mode 100644 index 02bf7cf..0000000 --- a/lib/Target/ARM64/ARM64MachineFunctionInfo.h +++ /dev/null @@ -1,139 +0,0 @@ -//===- ARM64MachineFuctionInfo.h - ARM64 machine function info --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares ARM64-specific per-machine-function information. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64MACHINEFUNCTIONINFO_H -#define ARM64MACHINEFUNCTIONINFO_H - -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/MC/MCLinkerOptimizationHint.h" - -namespace llvm { - -/// ARM64FunctionInfo - This class is derived from MachineFunctionInfo and -/// contains private ARM64-specific information for each MachineFunction. -class ARM64FunctionInfo : public MachineFunctionInfo { - - /// HasStackFrame - True if this function has a stack frame. Set by - /// processFunctionBeforeCalleeSavedScan(). - bool HasStackFrame; - - /// \brief Amount of stack frame size, not including callee-saved registers. - unsigned LocalStackSize; - - /// \brief Number of TLS accesses using the special (combinable) - /// _TLS_MODULE_BASE_ symbol. - unsigned NumLocalDynamicTLSAccesses; - - /// \brief FrameIndex for start of varargs area for arguments passed on the - /// stack. - int VarArgsStackIndex; - - /// \brief FrameIndex for start of varargs area for arguments passed in - /// general purpose registers. - int VarArgsGPRIndex; - - /// \brief Size of the varargs area for arguments passed in general purpose - /// registers. - unsigned VarArgsGPRSize; - - /// \brief FrameIndex for start of varargs area for arguments passed in - /// floating-point registers. - int VarArgsFPRIndex; - - /// \brief Size of the varargs area for arguments passed in floating-point - /// registers. - unsigned VarArgsFPRSize; - -public: - ARM64FunctionInfo() - : HasStackFrame(false), NumLocalDynamicTLSAccesses(0), - VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0), - VarArgsFPRIndex(0), VarArgsFPRSize(0) {} - - explicit ARM64FunctionInfo(MachineFunction &MF) - : HasStackFrame(false), NumLocalDynamicTLSAccesses(0), - VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0), - VarArgsFPRIndex(0), VarArgsFPRSize(0) { - (void)MF; - } - - bool hasStackFrame() const { return HasStackFrame; } - void setHasStackFrame(bool s) { HasStackFrame = s; } - - void setLocalStackSize(unsigned Size) { LocalStackSize = Size; } - unsigned getLocalStackSize() const { return LocalStackSize; } - - void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; } - unsigned getNumLocalDynamicTLSAccesses() const { - return NumLocalDynamicTLSAccesses; - } - - int getVarArgsStackIndex() const { return VarArgsStackIndex; } - void setVarArgsStackIndex(int Index) { VarArgsStackIndex = Index; } - - int getVarArgsGPRIndex() const { return VarArgsGPRIndex; } - void setVarArgsGPRIndex(int Index) { VarArgsGPRIndex = Index; } - - unsigned getVarArgsGPRSize() const { return VarArgsGPRSize; } - void setVarArgsGPRSize(unsigned Size) { VarArgsGPRSize = Size; } - - int getVarArgsFPRIndex() const { return VarArgsFPRIndex; } - void setVarArgsFPRIndex(int Index) { VarArgsFPRIndex = Index; } - - unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; } - void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; } - - typedef SmallPtrSet SetOfInstructions; - - const SetOfInstructions &getLOHRelated() const { return LOHRelated; } - - // Shortcuts for LOH related types. - class MILOHDirective { - MCLOHType Kind; - - /// Arguments of this directive. Order matters. - SmallVector Args; - - public: - typedef SmallVectorImpl LOHArgs; - - MILOHDirective(MCLOHType Kind, const LOHArgs &Args) - : Kind(Kind), Args(Args.begin(), Args.end()) { - assert(isValidMCLOHType(Kind) && "Invalid LOH directive type!"); - } - - MCLOHType getKind() const { return Kind; } - const LOHArgs &getArgs() const { return Args; } - }; - - typedef MILOHDirective::LOHArgs MILOHArgs; - typedef SmallVector MILOHContainer; - - const MILOHContainer &getLOHContainer() const { return LOHContainerSet; } - - /// Add a LOH directive of this @p Kind and this @p Args. - void addLOHDirective(MCLOHType Kind, const MILOHArgs &Args) { - LOHContainerSet.push_back(MILOHDirective(Kind, Args)); - LOHRelated.insert(Args.begin(), Args.end()); - } - -private: - // Hold the lists of LOHs. - MILOHContainer LOHContainerSet; - SetOfInstructions LOHRelated; -}; -} // End llvm namespace - -#endif // ARM64MACHINEFUNCTIONINFO_H diff --git a/lib/Target/ARM64/ARM64PerfectShuffle.h b/lib/Target/ARM64/ARM64PerfectShuffle.h deleted file mode 100644 index 6759236..0000000 --- a/lib/Target/ARM64/ARM64PerfectShuffle.h +++ /dev/null @@ -1,6586 +0,0 @@ -//===-- ARM64PerfectShuffle.h - AdvSIMD Perfect Shuffle Table -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file, which was autogenerated by llvm-PerfectShuffle, contains data -// for the optimal way to build a perfect shuffle using AdvSIMD instructions. -// -//===----------------------------------------------------------------------===// - -// 31 entries have cost 0 -// 242 entries have cost 1 -// 1447 entries have cost 2 -// 3602 entries have cost 3 -// 1237 entries have cost 4 -// 2 entries have cost 5 - -// This table is 6561*4 = 26244 bytes in size. -static const unsigned PerfectShuffleTable[6561+1] = { - 135053414U, // <0,0,0,0>: Cost 1 vdup0 LHS - 1543503974U, // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS - 2618572962U, // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0> - 2568054923U, // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0> - 1476398390U, // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS - 2550140624U, // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3> - 2550141434U, // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3> - 2591945711U, // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0> - 135053414U, // <0,0,0,u>: Cost 1 vdup0 LHS - 2886516736U, // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0> - 1812775014U, // <0,0,1,1>: Cost 2 vzipl LHS, LHS - 1618133094U, // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS - 2625209292U, // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0> - 2886558034U, // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5> - 2617246864U, // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7> - 3659723031U, // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1> - 2591953904U, // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1> - 1812775581U, // <0,0,1,u>: Cost 2 vzipl LHS, LHS - 3020734464U, // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0> - 3020734474U, // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1> - 1946992742U, // <0,0,2,2>: Cost 2 vtrnl LHS, LHS - 2631181989U, // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0> - 3020734668U, // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6> - 3826550569U, // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6> - 2617247674U, // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7> - 2591962097U, // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2> - 1946992796U, // <0,0,2,u>: Cost 2 vtrnl LHS, LHS - 2635163787U, // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0> - 2686419196U, // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0> - 2686492933U, // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0> - 2617248156U, // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3> - 2617248258U, // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6> - 3826551298U, // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6> - 3690990200U, // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7> - 3713551042U, // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0> - 2635163787U, // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0> - 2617248658U, // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1> - 2888450150U, // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS - 3021570150U, // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS - 3641829519U, // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4> - 3021570252U, // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6> - 1543507254U, // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS - 2752810294U, // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS - 3786998152U, // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5> - 1543507497U, // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS - 2684354972U, // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7> - 2617249488U, // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3> - 3765617070U, // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7> - 3635865780U, // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5> - 2617249734U, // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6> - 2617249796U, // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5> - 2718712274U, // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7> - 2617249960U, // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7> - 2720039396U, // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7> - 2684355053U, // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7> - 3963609190U, // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS - 2617250298U, // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3> - 3796435464U, // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7> - 3659762998U, // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS - 3659763810U, // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0> - 2617250616U, // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6> - 2657727309U, // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0> - 2658390942U, // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0> - 2659054575U, // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0> - 3635880854U, // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0> - 3635881401U, // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7> - 3734787298U, // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0> - 2617251174U, // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6> - 3659772002U, // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0> - 3659772189U, // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7> - 2617251436U, // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7> - 2659054575U, // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0> - 135053414U, // <0,0,u,0>: Cost 1 vdup0 LHS - 1817419878U, // <0,0,u,1>: Cost 2 vzipl LHS, LHS - 1947435110U, // <0,0,u,2>: Cost 2 vtrnl LHS, LHS - 2568120467U, // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u> - 1476463926U, // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS - 1543510170U, // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS - 2752813210U, // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS - 2592011255U, // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u> - 135053414U, // <0,0,u,u>: Cost 1 vdup0 LHS - 2618581002U, // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1> - 1557446758U, // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS - 2618581155U, // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1> - 2690548468U, // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0> - 2626543954U, // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5> - 4094985216U, // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7> - 2592019278U, // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1> - 2592019448U, // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0> - 1557447325U, // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS - 1476476938U, // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1> - 2886517556U, // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1> - 2886517654U, // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0> - 2886517720U, // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3> - 1476480310U, // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS - 2886558864U, // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7> - 2550223354U, // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3> - 2550223856U, // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1> - 1476482862U, // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS - 1494401126U, // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS - 3020735284U, // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1> - 2562172349U, // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2> - 835584U, // <0,1,2,3>: Cost 0 copy LHS - 1494404406U, // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS - 3020735488U, // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7> - 2631190458U, // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7> - 1518294010U, // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2> - 835584U, // <0,1,2,u>: Cost 0 copy LHS - 2692318156U, // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0> - 2691875800U, // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3> - 2691875806U, // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0> - 2692539367U, // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0> - 2562182454U, // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS - 2691875840U, // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7> - 2692760578U, // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0> - 2639817411U, // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1> - 2691875863U, // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3> - 2568159334U, // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS - 4095312692U, // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1> - 2568160934U, // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1> - 2568161432U, // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4> - 2568162614U, // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS - 1557450038U, // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS - 2754235702U, // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS - 2592052220U, // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4> - 1557450281U, // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS - 3765617775U, // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1> - 2647781007U, // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1> - 3704934138U, // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0> - 2691875984U, // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7> - 2657734598U, // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6> - 2650435539U, // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1> - 2651099172U, // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1> - 2651762805U, // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1> - 2691876029U, // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7> - 2592063590U, // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS - 3765617871U, // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7> - 2654417337U, // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1> - 3765617889U, // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7> - 2592066870U, // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS - 3765617907U, // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7> - 2657071869U, // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1> - 1583993678U, // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1> - 1584657311U, // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1> - 2657735672U, // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0> - 2657735808U, // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1> - 2631193772U, // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0> - 2661053667U, // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1> - 2657736038U, // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6> - 3721524621U, // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0> - 2657736158U, // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0> - 2657736300U, // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7> - 2657736322U, // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2> - 1494450278U, // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS - 1557452590U, // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS - 2754238254U, // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS - 835584U, // <0,1,u,3>: Cost 0 copy LHS - 1494453558U, // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS - 1557452954U, // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS - 2754238618U, // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS - 1518343168U, // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u> - 835584U, // <0,1,u,u>: Cost 0 copy LHS - 2752299008U, // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0> - 1544847462U, // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS - 1678557286U, // <0,2,0,2>: Cost 2 vuzpl LHS, LHS - 2696521165U, // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0> - 2752340172U, // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6> - 2691876326U, // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7> - 2618589695U, // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7> - 2592093185U, // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0> - 1678557340U, // <0,2,0,u>: Cost 2 vuzpl LHS, LHS - 2618589942U, // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2> - 2752299828U, // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1> - 2886518376U, // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2> - 2752299766U, // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2> - 2550295862U, // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS - 2752340992U, // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7> - 2886559674U, // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7> - 3934208106U, // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7> - 2752340771U, // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2> - 1476558868U, // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2> - 2226628029U, // <0,2,2,1>: Cost 3 vrev <2,0,1,2> - 2752300648U, // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2> - 3020736114U, // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3> - 1476562230U, // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS - 2550304464U, // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3> - 2618591162U, // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7> - 2550305777U, // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2> - 1476564782U, // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS - 2618591382U, // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2> - 2752301206U, // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2> - 3826043121U, // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3> - 2752301468U, // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3> - 2618591746U, // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6> - 2752301570U, // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6> - 3830688102U, // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3> - 2698807012U, // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0> - 2752301269U, // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2> - 2562261094U, // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS - 4095313828U, // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3> - 2226718152U, // <0,2,4,2>: Cost 3 vrev <2,0,2,4> - 2568235169U, // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4> - 2562264374U, // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS - 1544850742U, // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS - 1678560566U, // <0,2,4,6>: Cost 2 vuzpl LHS, RHS - 2592125957U, // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4> - 1678560584U, // <0,2,4,u>: Cost 2 vuzpl LHS, RHS - 2691876686U, // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7> - 2618592976U, // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3> - 3765618528U, // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7> - 3765618536U, // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6> - 2618593222U, // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6> - 2752303108U, // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5> - 2618593378U, // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0> - 2824785206U, // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS - 2824785207U, // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS - 2752303950U, // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1> - 3830690081U, // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2> - 2618593786U, // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3> - 2691876794U, // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7> - 2752303990U, // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5> - 3830690445U, // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6> - 2752303928U, // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6> - 2657743695U, // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2> - 2691876839U, // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7> - 2659070961U, // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2> - 2659734594U, // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2> - 3734140051U, // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2> - 2701166596U, // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0> - 2662389094U, // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6> - 2662389126U, // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2> - 3736794583U, // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2> - 2752304748U, // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7> - 2659070961U, // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2> - 1476608026U, // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u> - 1544853294U, // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS - 1678563118U, // <0,2,u,2>: Cost 2 vuzpl LHS, LHS - 3021178482U, // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3> - 1476611382U, // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS - 1544853658U, // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS - 1678563482U, // <0,2,u,6>: Cost 2 vuzpl LHS, RHS - 2824785449U, // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS - 1678563172U, // <0,2,u,u>: Cost 2 vuzpl LHS, LHS - 2556329984U, // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0> - 2686421142U, // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2> - 2562303437U, // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0> - 4094986652U, // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3> - 2556333366U, // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS - 4094986754U, // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6> - 3798796488U, // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7> - 3776530634U, // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0> - 2556335918U, // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS - 2886518934U, // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2> - 2556338933U, // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1> - 2691877105U, // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3> - 2886519196U, // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3> - 2886519298U, // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6> - 4095740418U, // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6> - 3659944242U, // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1> - 3769600286U, // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3> - 2886519582U, // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2> - 1482604646U, // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS - 1482605302U, // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2> - 2556348008U, // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2> - 3020736924U, // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3> - 1482607926U, // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS - 3020737026U, // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6> - 2598154746U, // <0,3,2,6>: Cost 3 vext1 , <6,2,7,3> - 2598155258U, // <0,3,2,7>: Cost 3 vext1 , <7,0,1,2> - 1482610478U, // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS - 3692341398U, // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2> - 2635851999U, // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3> - 3636069840U, // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3> - 2691877276U, // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3> - 3961522690U, // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6> - 3826797058U, // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6> - 3703622282U, // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7> - 3769600452U, // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7> - 2640497430U, // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3> - 3962194070U, // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2> - 2232617112U, // <0,3,4,1>: Cost 3 vrev <3,0,1,4> - 2232690849U, // <0,3,4,2>: Cost 3 vrev <3,0,2,4> - 4095314332U, // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3> - 3962194434U, // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6> - 2691877378U, // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6> - 3826765110U, // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS - 3665941518U, // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4> - 2691877405U, // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6> - 3630112870U, // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS - 3630113526U, // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2> - 4035199734U, // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2> - 3769600578U, // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7> - 2232846516U, // <0,3,5,4>: Cost 3 vrev <3,0,4,5> - 3779037780U, // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7> - 2718714461U, // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7> - 2706106975U, // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0> - 2233141464U, // <0,3,5,u>: Cost 3 vrev <3,0,u,5> - 2691877496U, // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7> - 3727511914U, // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3> - 3765619338U, // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7> - 3765619347U, // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7> - 3765987996U, // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7> - 3306670270U, // <0,3,6,5>: Cost 4 vrev <3,0,5,6> - 3792456365U, // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6> - 2706770608U, // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0> - 2706844345U, // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0> - 3769600707U, // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1> - 2659742787U, // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3> - 3636102612U, // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7> - 3769600740U, // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7> - 3769600747U, // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5> - 3769600758U, // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7> - 3659993400U, // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7> - 3781176065U, // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0> - 2664388218U, // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3> - 1482653798U, // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS - 1482654460U, // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u> - 2556397160U, // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2> - 3021179292U, // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3> - 1482657078U, // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS - 3021179394U, // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6> - 2598203898U, // <0,3,u,6>: Cost 3 vext1 , <6,2,7,3> - 2708097874U, // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0> - 1482659630U, // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS - 2617278468U, // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4> - 2618605670U, // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS - 2618605734U, // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4> - 3642091695U, // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0> - 2753134796U, // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6> - 2718714770U, // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1> - 3021245750U, // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS - 3665982483U, // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0> - 3021245768U, // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS - 2568355942U, // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS - 3692348212U, // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1> - 3692348310U, // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0> - 2568358064U, // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1> - 2568359222U, // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS - 1812778294U, // <0,4,1,5>: Cost 2 vzipl LHS, RHS - 3022671158U, // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS - 2592248852U, // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1> - 1812778537U, // <0,4,1,u>: Cost 2 vzipl LHS, RHS - 2568364134U, // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS - 2238573423U, // <0,4,2,1>: Cost 3 vrev <4,0,1,2> - 3692349032U, // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2> - 2631214761U, // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4> - 2568367414U, // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS - 2887028022U, // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS - 1946996022U, // <0,4,2,6>: Cost 2 vtrnl LHS, RHS - 2592257045U, // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2> - 1946996040U, // <0,4,2,u>: Cost 2 vtrnl LHS, RHS - 3692349590U, // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2> - 3826878614U, // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2> - 3826878625U, // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4> - 3692349852U, // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3> - 3692349954U, // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6> - 3826878978U, // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6> - 4095200566U, // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS - 3713583814U, // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4> - 3692350238U, // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2> - 2550464552U, // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4> - 3962194914U, // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0> - 3693677631U, // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3> - 3642124467U, // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4> - 2718715088U, // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4> - 2618608950U, // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS - 2753137974U, // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS - 3666015255U, // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4> - 2618609193U, // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS - 2568388710U, // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS - 2568389526U, // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0> - 3636159963U, // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5> - 2568390836U, // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5> - 2568391990U, // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS - 2718715180U, // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6> - 1618136374U, // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS - 2592281624U, // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5> - 1618136392U, // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS - 2550480938U, // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6> - 3826880801U, // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2> - 2562426332U, // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6> - 3786190181U, // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0> - 2718715252U, // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6> - 3826881165U, // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6> - 2712669568U, // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0> - 2657760081U, // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4> - 2718715284U, // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2> - 3654090854U, // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS - 3934229326U, // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1> - 3734156437U, // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4> - 3734820070U, // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4> - 3654094134U, // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS - 2713259464U, // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0> - 2713333201U, // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0> - 3654095866U, // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2> - 2713259464U, // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0> - 2568413286U, // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS - 2618611502U, // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS - 2753140526U, // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS - 2568415415U, // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u> - 2568416566U, // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS - 1817423158U, // <0,4,u,5>: Cost 2 vzipl LHS, RHS - 1947438390U, // <0,4,u,6>: Cost 2 vtrnl LHS, RHS - 2592306203U, // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u> - 1947438408U, // <0,4,u,u>: Cost 2 vtrnl LHS, RHS - 3630219264U, // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0> - 2625912934U, // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS - 3692355748U, // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2> - 3693019384U, // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5> - 3630222646U, // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS - 3699655062U, // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1> - 2718715508U, // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1> - 3087011126U, // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS - 2625913501U, // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS - 1500659814U, // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS - 2886520528U, // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3> - 2574403176U, // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2> - 2574403734U, // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2> - 1500662674U, // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1> - 2886520836U, // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5> - 2886520930U, // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0> - 2718715600U, // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3> - 1500665646U, // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS - 2556493926U, // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS - 2244546120U, // <0,5,2,1>: Cost 3 vrev <5,0,1,2> - 3692357256U, // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7> - 2568439994U, // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2> - 2556497206U, // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS - 3020738564U, // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5> - 4027877161U, // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6> - 3093220662U, // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS - 3093220663U, // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS - 3699656854U, // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2> - 3699656927U, // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3> - 3699657006U, // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1> - 3699657116U, // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3> - 2637859284U, // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5> - 3790319453U, // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0> - 3699657354U, // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7> - 2716725103U, // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0> - 2716798840U, // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0> - 2661747602U, // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1> - 3630252810U, // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4> - 3636225507U, // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4> - 3716910172U, // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5> - 3962195892U, // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6> - 2625916214U, // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS - 3718901071U, // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5> - 2718715846U, // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6> - 2625916457U, // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS - 3791278034U, // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0> - 3791351771U, // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0> - 3318386260U, // <0,5,5,2>: Cost 4 vrev <5,0,2,5> - 3791499245U, // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0> - 3318533734U, // <0,5,5,4>: Cost 4 vrev <5,0,4,5> - 2718715908U, // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5> - 2657767522U, // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0> - 2718715928U, // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7> - 2718715937U, // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7> - 2592358502U, // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS - 3792015404U, // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0> - 3731509754U, // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3> - 3785748546U, // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4> - 2592361782U, // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS - 2592362594U, // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0> - 3785748576U, // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7> - 1644974178U, // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0> - 1645047915U, // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0> - 2562506854U, // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS - 2562507670U, // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0> - 2562508262U, // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7> - 3636250774U, // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2> - 2562510134U, // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS - 2718716072U, // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7> - 2718716074U, // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0> - 2719379635U, // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0> - 2562512686U, // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS - 1500717158U, // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS - 2625918766U, // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS - 2719674583U, // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0> - 2568489152U, // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u> - 1500720025U, // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u> - 2625919130U, // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS - 2586407243U, // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u> - 1646301444U, // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0> - 1646375181U, // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0> - 2586411110U, // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS - 2619949158U, // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS - 2619949220U, // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2> - 3785748789U, // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4> - 2619949386U, // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6> - 2586415202U, // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0> - 2586415436U, // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0> - 2952793398U, // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS - 2619949725U, // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS - 2562531430U, // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS - 3693691700U, // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1> - 2886521338U, // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3> - 3693691864U, // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3> - 2562534710U, // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS - 2580450932U, // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1> - 2886521656U, // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6> - 2966736182U, // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS - 2966736183U, // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS - 1500741734U, // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS - 2250518817U, // <0,6,2,1>: Cost 3 vrev <6,0,1,2> - 2574485096U, // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2> - 2631894694U, // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1> - 1500744604U, // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2> - 2574487248U, // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3> - 3020739384U, // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6> - 2954136886U, // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS - 1500747566U, // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS - 3693693078U, // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2> - 3705637136U, // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7> - 3705637192U, // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0> - 3693693340U, // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3> - 2637867477U, // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6> - 3705637424U, // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7> - 3666154056U, // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0> - 2722697800U, // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0> - 2722771537U, // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0> - 2562556006U, // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS - 4095316257U, // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2> - 2562557420U, // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4> - 3636299926U, // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2> - 2562559286U, // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS - 2619952438U, // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS - 2723287696U, // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0> - 4027895094U, // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS - 2619952681U, // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS - 2718716594U, // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7> - 3648250774U, // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0> - 3792458436U, // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7> - 3705638767U, // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0> - 3648252831U, // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5> - 3797619416U, // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0> - 3792458472U, // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7> - 4035202358U, // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS - 2718716594U, // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7> - 3786412796U, // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0> - 3792458504U, // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3> - 3728200126U, // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6> - 3798135575U, // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0> - 3786412836U, // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4> - 3792458543U, // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6> - 2718716728U, // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6> - 2718716738U, // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7> - 2718716747U, // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7> - 2718716750U, // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1> - 2724909910U, // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0> - 3636323823U, // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7> - 2725057384U, // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0> - 2718716790U, // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5> - 2718716800U, // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6> - 3792458629U, // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2> - 2725352332U, // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0> - 2718716822U, // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1> - 1500790886U, // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS - 2619954990U, // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS - 2562590192U, // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u> - 2725721017U, // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0> - 1500793762U, // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u> - 2619955354U, // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS - 2725942228U, // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0> - 2954186038U, // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS - 1500796718U, // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS - 2256401391U, // <0,7,0,0>: Cost 3 vrev <7,0,0,0> - 2632564838U, // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS - 2256548865U, // <0,7,0,2>: Cost 3 vrev <7,0,2,0> - 3700998396U, // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0> - 2718716952U, // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5> - 2718716962U, // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6> - 2621284845U, // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7> - 3904685542U, // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7> - 2632565405U, // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS - 2256409584U, // <0,7,1,0>: Cost 3 vrev <7,0,0,1> - 3706307380U, // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1> - 2632565654U, // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0> - 3769603168U, // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5> - 2256704532U, // <0,7,1,4>: Cost 3 vrev <7,0,4,1> - 3769603184U, // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3> - 3700999366U, // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7> - 2886522476U, // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7> - 2256999480U, // <0,7,1,u>: Cost 3 vrev <7,0,u,1> - 2586501222U, // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS - 1182749690U, // <0,7,2,1>: Cost 2 vrev <7,0,1,2> - 3636356595U, // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2> - 2727711916U, // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0> - 2586504502U, // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS - 2632566606U, // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7> - 2586505559U, // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2> - 3020740204U, // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7> - 1183265849U, // <0,7,2,u>: Cost 2 vrev <7,0,u,2> - 3701000342U, // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2> - 3706308849U, // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3> - 3330315268U, // <0,7,3,2>: Cost 4 vrev <7,0,2,3> - 3706309020U, // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3> - 3706309122U, // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6> - 3712281127U, // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7> - 2639202936U, // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7> - 3802412321U, // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0> - 2640530202U, // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7> - 3654287462U, // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS - 2256507900U, // <0,7,4,1>: Cost 3 vrev <7,0,1,4> - 2256581637U, // <0,7,4,2>: Cost 3 vrev <7,0,2,4> - 3660262008U, // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7> - 3786413405U, // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6> - 2632568118U, // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS - 3718917457U, // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7> - 3787003255U, // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5> - 2632568361U, // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS - 3706310268U, // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0> - 3792459156U, // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7> - 3330331654U, // <0,7,5,2>: Cost 4 vrev <7,0,2,5> - 3722899255U, // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7> - 2256737304U, // <0,7,5,4>: Cost 3 vrev <7,0,4,5> - 3724226521U, // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7> - 2718717377U, // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7> - 2729997763U, // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0> - 2720044499U, // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7> - 3712946517U, // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0> - 2256524286U, // <0,7,6,1>: Cost 3 vrev <7,0,1,6> - 3792459246U, // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7> - 3796440567U, // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7> - 3654307126U, // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS - 2656457394U, // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7> - 3792459281U, // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6> - 2730661396U, // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0> - 2658448293U, // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7> - 3787003431U, // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1> - 3654312854U, // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0> - 3654313446U, // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7> - 3804771905U, // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0> - 3654315318U, // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS - 3654315651U, // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7> - 3660288348U, // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7> - 2718717548U, // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7> - 2664420990U, // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7> - 2256466935U, // <0,7,u,0>: Cost 3 vrev <7,0,0,u> - 1182798848U, // <0,7,u,1>: Cost 2 vrev <7,0,1,u> - 2256614409U, // <0,7,u,2>: Cost 3 vrev <7,0,2,u> - 2731693714U, // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0> - 2256761883U, // <0,7,u,4>: Cost 3 vrev <7,0,4,u> - 2632571034U, // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS - 2669066421U, // <0,7,u,6>: Cost 3 vext2 , - 2731988662U, // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0> - 1183315007U, // <0,7,u,u>: Cost 2 vrev <7,0,u,u> - 135053414U, // <0,u,0,0>: Cost 1 vdup0 LHS - 1544896614U, // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS - 1678999654U, // <0,u,0,2>: Cost 2 vuzpl LHS, LHS - 2691880677U, // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, - 1476988214U, // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS - 2718791419U, // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, - 3021248666U, // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS - 2592535607U, // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0> - 135053414U, // <0,u,0,u>: Cost 1 vdup0 LHS - 1476993097U, // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1> - 1812780846U, // <0,u,1,1>: Cost 2 vzipl LHS, LHS - 1618138926U, // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS - 2752742134U, // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2> - 1476996406U, // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS - 1812781210U, // <0,u,1,5>: Cost 2 vzipl LHS, RHS - 2887006416U, // <0,u,1,6>: Cost 3 vzipl LHS, - 2966736200U, // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS - 1812781413U, // <0,u,1,u>: Cost 2 vzipl LHS, LHS - 1482973286U, // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS - 1482973987U, // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2> - 1946998574U, // <0,u,2,2>: Cost 2 vtrnl LHS, LHS - 835584U, // <0,u,2,3>: Cost 0 copy LHS - 1482976566U, // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS - 3020781631U, // <0,u,2,5>: Cost 3 vtrnl LHS, - 1946998938U, // <0,u,2,6>: Cost 2 vtrnl LHS, RHS - 1518810169U, // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2> - 835584U, // <0,u,2,u>: Cost 0 copy LHS - 2618640534U, // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2> - 2752743574U, // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2> - 2636556597U, // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u> - 2752743836U, // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3> - 2618640898U, // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6> - 2752743938U, // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6> - 2639202936U, // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7> - 2639874762U, // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u> - 2752743637U, // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2> - 2562703462U, // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS - 2888455982U, // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS - 3021575982U, // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS - 2568677591U, // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4> - 2562706742U, // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS - 1544899894U, // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS - 1679002934U, // <0,u,4,6>: Cost 2 vuzpl LHS, RHS - 2718718033U, // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, - 1679002952U, // <0,u,4,u>: Cost 2 vuzpl LHS, RHS - 2568683622U, // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS - 2568684438U, // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0> - 3765622902U, // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, - 2691881087U, // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, - 2568686902U, // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS - 2650492890U, // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u> - 1618139290U, // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS - 2824834358U, // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS - 1618139308U, // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS - 2592579686U, // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS - 2262496983U, // <0,u,6,1>: Cost 3 vrev - 2654474688U, // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u> - 2691881168U, // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, - 2592582966U, // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS - 2656465587U, // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u> - 2657129220U, // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u> - 1584051029U, // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u> - 1584714662U, // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u> - 2562728038U, // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS - 2562728854U, // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0> - 2562729473U, // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7> - 2661111018U, // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u> - 2562731318U, // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS - 2718718258U, // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, - 2586620261U, // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7> - 2657793644U, // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7> - 2562733870U, // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS - 135053414U, // <0,u,u,0>: Cost 1 vdup0 LHS - 1544902446U, // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS - 1679005486U, // <0,u,u,2>: Cost 2 vuzpl LHS, LHS - 835584U, // <0,u,u,3>: Cost 0 copy LHS - 1483025718U, // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS - 1544902810U, // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS - 1679005850U, // <0,u,u,6>: Cost 2 vuzpl LHS, RHS - 1518859327U, // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u> - 835584U, // <0,u,u,u>: Cost 0 copy LHS - 2689744896U, // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0> - 1610694666U, // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1> - 2689744916U, // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2> - 2619310332U, // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0> - 2684657701U, // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1> - 2620637598U, // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0> - 3708977654U, // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7> - 3666351168U, // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0> - 1611210825U, // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1> - 2556780646U, // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS - 2556781355U, // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1> - 1616003174U, // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS - 3693052888U, // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3> - 2556783926U, // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS - 2580672143U, // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1> - 2724839566U, // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7> - 3654415354U, // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2> - 1616003228U, // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS - 2685690019U, // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1> - 2685763756U, // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1> - 2698297524U, // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0> - 2685911230U, // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1> - 2689745100U, // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6> - 3764814038U, // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7> - 2724839640U, // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0> - 2592625658U, // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2> - 2686279915U, // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1> - 3087843328U, // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0> - 3087843338U, // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1> - 67944550U, // <1,0,3,2>: Cost 1 vrev LHS - 2568743135U, // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3> - 2562772278U, // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS - 4099850454U, // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7> - 3704998538U, // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7> - 2592633923U, // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3> - 68386972U, // <1,0,3,u>: Cost 1 vrev LHS - 2620640146U, // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1> - 2689745234U, // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5> - 2689745244U, // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6> - 3760980320U, // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1> - 3761054057U, // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1> - 2619313462U, // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS - 3761201531U, // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1> - 3666383940U, // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4> - 2619313705U, // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS - 4029300736U, // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0> - 2895249510U, // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS - 3028287590U, // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS - 3642501345U, // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5> - 2215592058U, // <1,0,5,4>: Cost 3 vrev <0,1,4,5> - 3724242907U, // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0> - 3724906540U, // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0> - 3911118134U, // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS - 3028287644U, // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS - 3762086375U, // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1> - 2698297846U, // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7> - 3760022015U, // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7> - 3642509538U, // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6> - 3762381323U, // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1> - 3730215604U, // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0> - 3730879237U, // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0> - 2657801046U, // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0> - 2658464679U, // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0> - 2659128312U, // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0> - 4047898278U, // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1> - 2215460970U, // <1,0,7,2>: Cost 3 vrev <0,1,2,7> - 3734861035U, // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0> - 3731543398U, // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6> - 3736188301U, // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0> - 2663110110U, // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0> - 3731543660U, // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7> - 2664437376U, // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0> - 3087884288U, // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0> - 1616003730U, // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1> - 67985515U, // <1,0,u,2>: Cost 1 vrev LHS - 2689893028U, // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1> - 2689745586U, // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6> - 2619316378U, // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS - 2669082807U, // <1,0,u,6>: Cost 3 vext2 , - 2592674888U, // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u> - 68427937U, // <1,0,u,u>: Cost 1 vrev LHS - 1543585802U, // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1> - 1548894310U, // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS - 2618654892U, // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1> - 2689745654U, // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2> - 2622636370U, // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5> - 2620645791U, // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1> - 3696378367U, // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7> - 3666424905U, // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0> - 1548894866U, // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1> - 1483112550U, // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS - 202162278U, // <1,1,1,1>: Cost 1 vdup1 LHS - 2622636950U, // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0> - 2622637016U, // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3> - 1483115830U, // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS - 2622637200U, // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7> - 2622637263U, // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7> - 2592691274U, // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1> - 202162278U, // <1,1,1,u>: Cost 1 vdup1 LHS - 2550890588U, // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2> - 2617329183U, // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1> - 2622637672U, // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2> - 2622637734U, // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1> - 2550893878U, // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS - 3696379744U, // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7> - 2622638010U, // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7> - 3804554170U, // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0> - 2622638139U, // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1> - 2622638230U, // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2> - 3087844148U, // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1> - 4161585244U, // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2> - 2014101606U, // <1,1,3,3>: Cost 2 vtrnr LHS, LHS - 2622638594U, // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6> - 2689745920U, // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7> - 3763487753U, // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7> - 2592707660U, // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3> - 2014101611U, // <1,1,3,u>: Cost 2 vtrnr LHS, LHS - 2556878950U, // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS - 2221335351U, // <1,1,4,1>: Cost 3 vrev <1,1,1,4> - 3696380988U, // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0> - 3763487805U, // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5> - 2556882230U, // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS - 1548897590U, // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS - 2758184246U, // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS - 3666457677U, // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4> - 1548897833U, // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS - 2693653615U, // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1> - 2617331408U, // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3> - 4029302934U, // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2> - 2689746064U, // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7> - 2221564755U, // <1,1,5,4>: Cost 3 vrev <1,1,4,5> - 2955559250U, // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5> - 2617331810U, // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0> - 2825293110U, // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS - 2689746109U, // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7> - 3696382241U, // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2> - 2689746127U, // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7> - 2617332218U, // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3> - 3763487969U, // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7> - 3696382605U, // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6> - 4029309266U, // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5> - 2617332536U, // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6> - 2724840702U, // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0> - 2725504263U, // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0> - 2617332720U, // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1> - 2659800138U, // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1> - 3691074717U, // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3> - 4167811174U, // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS - 2617333094U, // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6> - 3295396702U, // <1,1,7,5>: Cost 4 vrev <1,1,5,7> - 3803891014U, // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0> - 2617333356U, // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7> - 2659800138U, // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1> - 1483112550U, // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS - 202162278U, // <1,1,u,1>: Cost 1 vdup1 LHS - 2622642056U, // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, - 2014142566U, // <1,1,u,3>: Cost 2 vtrnr LHS, LHS - 1483115830U, // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS - 1548900506U, // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS - 2622642384U, // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, - 2825293353U, // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS - 202162278U, // <1,1,u,u>: Cost 1 vdup1 LHS - 2635251712U, // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0> - 1561509990U, // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS - 2618663085U, // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2> - 2696529358U, // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1> - 2635252050U, // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5> - 3769533926U, // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7> - 2621317617U, // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2> - 2659140170U, // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1> - 1561510557U, // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS - 2623308516U, // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2> - 2635252532U, // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1> - 2631271318U, // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0> - 2958180454U, // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS - 2550959414U, // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS - 2635252880U, // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7> - 2635252952U, // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7> - 3732882731U, // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0> - 2958180459U, // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS - 2629281213U, // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2> - 2635253280U, // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2> - 2618664552U, // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2> - 2689746546U, // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3> - 3764815485U, // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5> - 3760023176U, // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7> - 2635253690U, // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7> - 2659141610U, // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1> - 2689746591U, // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3> - 403488870U, // <1,2,3,0>: Cost 1 vext1 LHS, LHS - 1477231350U, // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2> - 1477232232U, // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2> - 1477233052U, // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3> - 403492150U, // <1,2,3,4>: Cost 1 vext1 LHS, RHS - 1525010128U, // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3> - 1525010938U, // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3> - 1525011450U, // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2> - 403494702U, // <1,2,3,u>: Cost 1 vext1 LHS, LHS - 2641226607U, // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2> - 3624723446U, // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6> - 3301123609U, // <1,2,4,2>: Cost 4 vrev <2,1,2,4> - 2598759198U, // <1,2,4,3>: Cost 3 vext1 , <3,u,1,2> - 2659142864U, // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4> - 1561513270U, // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS - 2659143028U, // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6> - 2659143112U, // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0> - 1561513513U, // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS - 2550988902U, // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS - 2550989824U, // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7> - 3624732264U, // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2> - 2955559014U, // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS - 2550992182U, // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS - 2659143684U, // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5> - 2659143778U, // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0> - 2659143848U, // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7> - 2550994734U, // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS - 2700289945U, // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1> - 2635256232U, // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2> - 2659144186U, // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3> - 2689746874U, // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7> - 3763488705U, // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5> - 3763488716U, // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7> - 2659144504U, // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6> - 2657817432U, // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2> - 2689746919U, // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7> - 1585402874U, // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2> - 2659144770U, // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2> - 3708998858U, // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3> - 2635257059U, // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1> - 2659145062U, // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6> - 3732886916U, // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0> - 3732886998U, // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1> - 2659145255U, // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1> - 1590711938U, // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2> - 403529835U, // <1,2,u,0>: Cost 1 vext1 LHS, LHS - 1477272310U, // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2> - 1477273192U, // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2> - 1477273750U, // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2> - 403533110U, // <1,2,u,4>: Cost 1 vext1 LHS, RHS - 1561516186U, // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS - 1525051898U, // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3> - 1525052410U, // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2> - 403535662U, // <1,2,u,u>: Cost 1 vext1 LHS, LHS - 2819407872U, // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0> - 1551564902U, // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS - 2819408630U, // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2> - 2619334911U, // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3> - 2625306962U, // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5> - 3832725879U, // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6> - 3699048959U, // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7> - 3776538827U, // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1> - 1551565469U, // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS - 2618671862U, // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2> - 2819408692U, // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1> - 2624643975U, // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3> - 1745666150U, // <1,3,1,3>: Cost 2 vuzpr LHS, LHS - 2557005110U, // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS - 2625307792U, // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7> - 3698386127U, // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7> - 2592838748U, // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1> - 1745666155U, // <1,3,1,u>: Cost 2 vuzpr LHS, LHS - 2819408790U, // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0> - 2625308193U, // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3> - 2819408036U, // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2> - 2819851890U, // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3> - 2819408794U, // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4> - 3893149890U, // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5> - 2819408076U, // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6> - 3772041583U, // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3> - 2819408042U, // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u> - 1483276390U, // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS - 1483277128U, // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3> - 2557019752U, // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2> - 2819408856U, // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3> - 1483279670U, // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS - 2819409614U, // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5> - 2598826490U, // <1,3,3,6>: Cost 3 vext1 , <6,2,7,3> - 3087844352U, // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7> - 1483282222U, // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS - 2568970342U, // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS - 2568971224U, // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3> - 3832761290U, // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3> - 2233428219U, // <1,3,4,3>: Cost 3 vrev <3,1,3,4> - 2568973622U, // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS - 1551568182U, // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS - 2819410434U, // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6> - 3666605151U, // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4> - 1551568425U, // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS - 2563006566U, // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS - 2568979456U, // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7> - 2563008035U, // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5> - 2233436412U, // <1,3,5,3>: Cost 3 vrev <3,1,3,5> - 2563009846U, // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS - 2867187716U, // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5> - 2655834214U, // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4> - 1745669430U, // <1,3,5,7>: Cost 2 vuzpr LHS, RHS - 1745669431U, // <1,3,5,u>: Cost 2 vuzpr LHS, RHS - 2867187810U, // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0> - 3699052931U, // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1> - 2654507460U, // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3> - 3766291091U, // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7> - 2655834726U, // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3> - 3923384562U, // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, - 2657161992U, // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3> - 2819852218U, // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7> - 2819852219U, // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u> - 2706926275U, // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1> - 2659816524U, // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3> - 3636766245U, // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7> - 2867187903U, // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3> - 2625312102U, // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6> - 2867188598U, // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5> - 3728250344U, // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1> - 2867187880U, // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7> - 2707516171U, // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1> - 1483317350U, // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS - 1483318093U, // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u> - 2819410718U, // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2> - 1745666717U, // <1,3,u,3>: Cost 2 vuzpr LHS, LHS - 1483320630U, // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS - 1551571098U, // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS - 2819410758U, // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6> - 1745669673U, // <1,3,u,7>: Cost 2 vuzpr LHS, RHS - 1745666722U, // <1,3,u,u>: Cost 2 vuzpr LHS, LHS - 2617352205U, // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4> - 2619342950U, // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS - 3692421295U, // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4> - 2619343104U, // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4> - 2617352530U, // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5> - 1634880402U, // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1> - 2713930652U, // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2> - 3732898396U, // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1> - 1635101613U, // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1> - 3693085430U, // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2> - 2623988535U, // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4> - 3693085590U, // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0> - 3692422134U, // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6> - 3693085726U, // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1> - 2892401974U, // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS - 3026619702U, // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS - 3800206324U, // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0> - 2892402217U, // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS - 3966978927U, // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2> - 3966979018U, // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3> - 3693086312U, // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2> - 2635269798U, // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1> - 3966979280U, // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4> - 2893204790U, // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS - 3693086650U, // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7> - 3666662502U, // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2> - 2893205033U, // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS - 2563063910U, // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS - 2563064730U, // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4> - 2563065386U, // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3> - 3693087132U, // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3> - 2619345410U, // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6> - 3087843666U, // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5> - 3087843676U, // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6> - 3666670695U, // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3> - 3087843669U, // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u> - 2620672914U, // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1> - 3630842706U, // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4> - 3313069003U, // <1,4,4,2>: Cost 4 vrev <4,1,2,4> - 3642788100U, // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4> - 2713930960U, // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4> - 2619346230U, // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS - 2713930980U, // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6> - 3736882642U, // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1> - 2619346473U, // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS - 2557108326U, // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS - 2557109075U, // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5> - 2598913774U, // <1,4,5,2>: Cost 3 vext1 , <2,3,u,1> - 3630852246U, // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2> - 2557111606U, // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS - 2895252790U, // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS - 1616006454U, // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS - 3899059510U, // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS - 1616006472U, // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS - 2557116518U, // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS - 2557117236U, // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1> - 3630859880U, // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2> - 2569062550U, // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2> - 2557119798U, // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS - 3763490174U, // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7> - 3763490183U, // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7> - 2712751498U, // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1> - 2557122350U, // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS - 2659161084U, // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4> - 3732903040U, // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1> - 3734230174U, // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4> - 3734893807U, // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4> - 3660729654U, // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS - 3786493384U, // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0> - 2713341394U, // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1> - 3660731386U, // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2> - 2664470148U, // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4> - 2557132902U, // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS - 2619348782U, // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS - 2563106351U, // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u> - 2713783816U, // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1> - 2622666815U, // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, - 1640189466U, // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1> - 1616006697U, // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS - 2712751498U, // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1> - 1616006715U, // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS - 2620014592U, // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0> - 1546272870U, // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS - 2618687664U, // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5> - 3693093120U, // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4> - 1546273106U, // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5> - 2620678563U, // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5> - 2714668660U, // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1> - 3772042877U, // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1> - 1546273437U, // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS - 2620015350U, // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2> - 2620015412U, // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1> - 2620015510U, // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0> - 2618688512U, // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7> - 2620015677U, // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5> - 2620015727U, // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1> - 2620015859U, // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7> - 3093728566U, // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS - 2620015981U, // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3> - 3692430816U, // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1> - 2620016163U, // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5> - 2620016232U, // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2> - 2620016294U, // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1> - 3693758221U, // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5> - 3692431209U, // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7> - 2620016570U, // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7> - 4173598006U, // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS - 2620016699U, // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1> - 2620016790U, // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2> - 2569110672U, // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7> - 3693758785U, // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2> - 2620017052U, // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3> - 2620017154U, // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6> - 3135623172U, // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5> - 4161587048U, // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6> - 2014104886U, // <1,5,3,7>: Cost 2 vtrnr LHS, RHS - 2014104887U, // <1,5,3,u>: Cost 2 vtrnr LHS, RHS - 2620017554U, // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1> - 2620017634U, // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0> - 3693759551U, // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3> - 3642861837U, // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4> - 2575092710U, // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4> - 1546276150U, // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS - 2759855414U, // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS - 2713931718U, // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6> - 1546276393U, // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS - 2557182054U, // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS - 2557182812U, // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5> - 3630925347U, // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5> - 4029301675U, // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3> - 2557185334U, // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS - 2713931780U, // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5> - 2667794530U, // <1,5,5,6>: Cost 3 vext2 , <5,6,7,0> - 2713931800U, // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7> - 2557187886U, // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS - 2718208036U, // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1> - 2620019115U, // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5> - 2667794938U, // <1,5,6,2>: Cost 3 vext2 , <6,2,7,3> - 3787673666U, // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4> - 3693761165U, // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6> - 3319279297U, // <1,5,6,5>: Cost 4 vrev <5,1,5,6> - 2667795256U, // <1,5,6,6>: Cost 3 vext2 , <6,6,6,6> - 2713931874U, // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0> - 2713931883U, // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0> - 2557198438U, // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS - 2557199156U, // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1> - 2569143974U, // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1> - 2569144592U, // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7> - 2557201718U, // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS - 2713931944U, // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7> - 3787673770U, // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0> - 2719387828U, // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1> - 2557204270U, // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS - 2620020435U, // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, - 1546278702U, // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS - 2620020616U, // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, - 2620020668U, // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, - 1594054682U, // <1,5,u,4>: Cost 2 vext2 , - 1546279066U, // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS - 2620020944U, // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, - 2014145846U, // <1,5,u,7>: Cost 2 vtrnr LHS, RHS - 2014145847U, // <1,5,u,u>: Cost 2 vtrnr LHS, RHS - 3692437504U, // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0> - 2618695782U, // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS - 2618695857U, // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6> - 3794161970U, // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1> - 2620023122U, // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5> - 2620686756U, // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6> - 2621350389U, // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6> - 4028599606U, // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS - 2618696349U, // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS - 3692438262U, // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2> - 2625995572U, // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1> - 3692438422U, // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0> - 3692438488U, // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3> - 2625995820U, // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6> - 3692438672U, // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7> - 3692438720U, // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1> - 2958183734U, // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS - 2958183735U, // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS - 2721526201U, // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1> - 3692439097U, // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0> - 3692439144U, // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2> - 3692439206U, // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1> - 3636948278U, // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS - 3787674092U, // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7> - 2618697658U, // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7> - 2970799414U, // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS - 2970799415U, // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS - 2563211366U, // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS - 3699738854U, // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1> - 2563212860U, // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3> - 3692439964U, // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3> - 2563214646U, // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS - 4191820018U, // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, - 2587103648U, // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3> - 3087845306U, // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7> - 3087845307U, // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u> - 3693767570U, // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1> - 3693767650U, // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0> - 3636962877U, // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4> - 3325088134U, // <1,6,4,3>: Cost 4 vrev <6,1,3,4> - 3693767898U, // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5> - 2618699062U, // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS - 3833670966U, // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS - 4028632374U, // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS - 2618699305U, // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS - 3693768264U, // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2> - 3630998373U, // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5> - 3636971070U, // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5> - 3642943767U, // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5> - 3693768628U, // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6> - 3732918276U, // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5> - 2620690530U, // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0> - 2955562294U, // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS - 2955562295U, // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS - 2724180733U, // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1> - 3631006566U, // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6> - 3631007674U, // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7> - 3692442184U, // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0> - 3631009078U, // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS - 3787674416U, // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7> - 2713932600U, // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6> - 2713932610U, // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7> - 2713932619U, // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7> - 1651102542U, // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1> - 2724918103U, // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1> - 2698302306U, // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3> - 3642960153U, // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7> - 2713932662U, // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5> - 2725213051U, // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1> - 2724844426U, // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7> - 4035956022U, // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS - 1651692438U, // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1> - 1651766175U, // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1> - 2618701614U, // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS - 3135663508U, // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2> - 3692443580U, // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, - 2713932743U, // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5> - 2618701978U, // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS - 2622683344U, // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, - 3087886266U, // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7> - 1652356071U, // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1> - 2726171632U, // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1> - 2626666598U, // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS - 3695100067U, // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1> - 3707044102U, // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1> - 2726466580U, // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1> - 3654921933U, // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0> - 2621358582U, // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7> - 2622022215U, // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7> - 2626667165U, // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS - 2593128550U, // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS - 2626667316U, // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1> - 3700409238U, // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0> - 2257294428U, // <1,7,1,3>: Cost 3 vrev <7,1,3,1> - 2593131830U, // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS - 2626667646U, // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7> - 2627331279U, // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7> - 2593133696U, // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1> - 2628658545U, // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7> - 2587164774U, // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS - 3701073445U, // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7> - 3700409960U, // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2> - 2638612134U, // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1> - 2587168054U, // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS - 3706382167U, // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7> - 2587169192U, // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2> - 3660911610U, // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2> - 2587170606U, // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS - 1507459174U, // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS - 2569257984U, // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7> - 2581202536U, // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2> - 2569259294U, // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3> - 1507462454U, // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS - 1507462864U, // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3> - 2581205498U, // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3> - 2581206010U, // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2> - 1507465006U, // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS - 2728826164U, // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1> - 3654951732U, // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1> - 3330987094U, // <1,7,4,2>: Cost 4 vrev <7,1,2,4> - 3331060831U, // <1,7,4,3>: Cost 4 vrev <7,1,3,4> - 3787674971U, // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4> - 2626669878U, // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS - 3785979241U, // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0> - 3787085176U, // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6> - 2626670121U, // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS - 2569273446U, // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS - 2569274368U, // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7> - 3643016808U, // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2> - 2569275680U, // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5> - 2569276726U, // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS - 4102034790U, // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6> - 2651222067U, // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7> - 3899378998U, // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS - 2569279278U, // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS - 2730153430U, // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1> - 2724845022U, // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0> - 3643025338U, // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7> - 3643025697U, // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6> - 3643026742U, // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS - 3654971091U, // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6> - 3787675153U, // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6> - 2724845076U, // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0> - 2725508637U, // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0> - 2730817063U, // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1> - 3631088436U, // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1> - 3660949158U, // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1> - 3801904705U, // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0> - 3631090998U, // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS - 2662503828U, // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7> - 3660951981U, // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7> - 2713933420U, // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7> - 2731406959U, // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1> - 1507500134U, // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS - 2626672430U, // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS - 2581243496U, // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2> - 2569300259U, // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u> - 1507503414U, // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS - 1507503829U, // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u> - 2581246458U, // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3> - 2581246970U, // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2> - 1507505966U, // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS - 1543643153U, // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u> - 1546297446U, // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS - 2819448852U, // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2> - 2619375876U, // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u> - 1546297685U, // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u> - 1658771190U, // <1,u,0,5>: Cost 2 vext3 , - 2736789248U, // <1,u,0,6>: Cost 3 vext3 , - 2659189376U, // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1> - 1546298013U, // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS - 1483112550U, // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS - 202162278U, // <1,u,1,1>: Cost 1 vdup1 LHS - 1616009006U, // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS - 1745707110U, // <1,u,1,3>: Cost 2 vuzpr LHS, LHS - 1483115830U, // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS - 2620040336U, // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7> - 3026622618U, // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS - 2958183752U, // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS - 202162278U, // <1,u,1,u>: Cost 1 vdup1 LHS - 2819449750U, // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0> - 2893207342U, // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS - 2819448996U, // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2> - 2819450482U, // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3> - 2819449754U, // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4> - 2893207706U, // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS - 2819449036U, // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6> - 2970799432U, // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS - 2819449002U, // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u> - 403931292U, // <1,u,3,0>: Cost 1 vext1 LHS, LHS - 1477673718U, // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2> - 115726126U, // <1,u,3,2>: Cost 1 vrev LHS - 2014102173U, // <1,u,3,3>: Cost 2 vtrnr LHS, LHS - 403934518U, // <1,u,3,4>: Cost 1 vext1 LHS, RHS - 1507536601U, // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3> - 1525453306U, // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3> - 2014105129U, // <1,u,3,7>: Cost 2 vtrnr LHS, RHS - 403937070U, // <1,u,3,u>: Cost 1 vext1 LHS, LHS - 2620042157U, // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1> - 2620042237U, // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0> - 2263217967U, // <1,u,4,2>: Cost 3 vrev - 2569341224U, // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4> - 2569342262U, // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS - 1546300726U, // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS - 2819449180U, // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6> - 2724845649U, // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, - 1546300969U, // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS - 2551431270U, // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS - 2551432192U, // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7> - 3028293422U, // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS - 2955559068U, // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS - 2551434550U, // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS - 2895255706U, // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS - 1616009370U, // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS - 1745710390U, // <1,u,5,7>: Cost 2 vuzpr LHS, RHS - 1745710391U, // <1,u,5,u>: Cost 2 vuzpr LHS, RHS - 2653221159U, // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u> - 2725509303U, // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, - 2659193338U, // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3> - 2689751248U, // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, - 2867228774U, // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4> - 3764820194U, // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, - 2657202957U, // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u> - 2819450810U, // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7> - 2819450811U, // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u> - 1585452032U, // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u> - 2557420340U, // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1> - 2569365158U, // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1> - 2569365803U, // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7> - 2557422902U, // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS - 2662512021U, // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u> - 2724845884U, // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, - 2659194476U, // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7> - 1590761096U, // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u> - 403972257U, // <1,u,u,0>: Cost 1 vext1 LHS, LHS - 202162278U, // <1,u,u,1>: Cost 1 vdup1 LHS - 115767091U, // <1,u,u,2>: Cost 1 vrev LHS - 1745707677U, // <1,u,u,3>: Cost 2 vuzpr LHS, LHS - 403975478U, // <1,u,u,4>: Cost 1 vext1 LHS, RHS - 1546303642U, // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS - 1616009613U, // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS - 1745710633U, // <1,u,u,7>: Cost 2 vuzpr LHS, RHS - 403978030U, // <1,u,u,u>: Cost 1 vext1 LHS, LHS - 2551463936U, // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0> - 2685698058U, // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1> - 1610776596U, // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2> - 2619384069U, // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0> - 2551467318U, // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS - 3899836596U, // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5> - 2621374968U, // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0> - 4168271334U, // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7> - 1611219018U, // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2> - 2551472138U, // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1> - 2690564186U, // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0> - 1611956326U, // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS - 2826092646U, // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS - 2551475510U, // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS - 3692463248U, // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7> - 2587308473U, // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1> - 3661050874U, // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2> - 1611956380U, // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS - 1477738598U, // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS - 2551481078U, // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2> - 2551481796U, // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0> - 2551482518U, // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2> - 1477741878U, // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS - 2551484112U, // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3> - 2551484759U, // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2> - 2551485434U, // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2> - 1477744430U, // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS - 2953625600U, // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0> - 2953627302U, // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1> - 2953625764U, // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2> - 4027369695U, // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3> - 3625233718U, // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS - 3899836110U, // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5> - 4032012618U, // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6> - 3899835392U, // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7> - 2953625770U, // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u> - 2551496806U, // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS - 2685698386U, // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5> - 2685698396U, // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6> - 3625240726U, // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2> - 2551500086U, // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS - 2618723638U, // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS - 2765409590U, // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS - 3799990664U, // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5> - 2685698450U, // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6> - 3625246822U, // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS - 3289776304U, // <2,0,5,1>: Cost 4 vrev <0,2,1,5> - 2690564526U, // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7> - 3289923778U, // <2,0,5,3>: Cost 4 vrev <0,2,3,5> - 2216255691U, // <2,0,5,4>: Cost 3 vrev <0,2,4,5> - 3726307332U, // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5> - 3726307426U, // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0> - 2826095926U, // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS - 2216550639U, // <2,0,5,u>: Cost 3 vrev <0,2,u,5> - 4162420736U, // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0> - 2901885030U, // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS - 2685698559U, // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7> - 3643173171U, // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6> - 2216263884U, // <2,0,6,4>: Cost 3 vrev <0,2,4,6> - 3730289341U, // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0> - 3726308152U, // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6> - 3899836346U, // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7> - 2216558832U, // <2,0,6,u>: Cost 3 vrev <0,2,u,6> - 2659202049U, // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0> - 3726308437U, // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3> - 2726249034U, // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1> - 3734934772U, // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0> - 3726308710U, // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6> - 3726308814U, // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2> - 3736925671U, // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0> - 3726308972U, // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7> - 2659202049U, // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0> - 1477787750U, // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS - 2953668262U, // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1> - 1611956893U, // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS - 2551531670U, // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2> - 1477791030U, // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS - 2618726554U, // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS - 2765412506U, // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS - 2826096169U, // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS - 1611956947U, // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS - 2569453670U, // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS - 2619392102U, // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS - 3759440619U, // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0> - 1616823030U, // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2> - 2569456950U, // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS - 2690712328U, // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2> - 3661115841U, // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0> - 2622046794U, // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1> - 1617191715U, // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2> - 2551545958U, // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS - 2685698868U, // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1> - 2628682646U, // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0> - 2685698888U, // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3> - 2551549238U, // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS - 3693134992U, // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7> - 3661124034U, // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1> - 3625292794U, // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2> - 2685698933U, // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3> - 2551554150U, // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS - 3893649571U, // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1> - 2551555688U, // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2> - 2685698966U, // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0> - 2551557430U, // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS - 3763422123U, // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3> - 3693135802U, // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7> - 2726249402U, // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0> - 2685699011U, // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0> - 2551562342U, // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS - 2953625610U, // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1> - 2953627798U, // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2> - 2953626584U, // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3> - 2551565622U, // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS - 2953625938U, // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5> - 2587398596U, // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3> - 4032013519U, // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7> - 2953625617U, // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u> - 2690565154U, // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5> - 3625313270U, // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6> - 3771532340U, // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5> - 1148404634U, // <2,1,4,3>: Cost 2 vrev <1,2,3,4> - 3625315638U, // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS - 2619395382U, // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS - 3837242678U, // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS - 3799991394U, // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6> - 1148773319U, // <2,1,4,u>: Cost 2 vrev <1,2,u,4> - 2551578726U, // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS - 2551579648U, // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7> - 3625321952U, // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1> - 2685699216U, // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7> - 2551582006U, // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS - 3740913668U, // <2,1,5,5>: Cost 4 vext2 , <5,5,5,5> - 3661156806U, // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5> - 3893652790U, // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS - 2685699261U, // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7> - 2551586918U, // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS - 3625329398U, // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2> - 2551588794U, // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7> - 3088679014U, // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS - 2551590198U, // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS - 4029382994U, // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5> - 3625333560U, // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6> - 3731624800U, // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1> - 2551592750U, // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS - 2622051322U, // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2> - 3733615699U, // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1> - 3795125538U, // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0> - 2222171037U, // <2,1,7,3>: Cost 3 vrev <1,2,3,7> - 3740915046U, // <2,1,7,4>: Cost 4 vext2 , <7,4,5,6> - 3296060335U, // <2,1,7,5>: Cost 4 vrev <1,2,5,7> - 3736933864U, // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1> - 3805300055U, // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u> - 2669827714U, // <2,1,7,u>: Cost 3 vext2 , <7,u,1,2> - 2551603302U, // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS - 2953666570U, // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1> - 2953668758U, // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2> - 1148437406U, // <2,1,u,3>: Cost 2 vrev <1,2,3,u> - 2551606582U, // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS - 2953666898U, // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5> - 2587398596U, // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3> - 2669828370U, // <2,1,u,7>: Cost 3 vext2 , - 1148806091U, // <2,1,u,u>: Cost 2 vrev <1,2,u,u> - 1543667732U, // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2> - 1548976230U, // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS - 2685699524U, // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0> - 2685699535U, // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2> - 2551614774U, // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS - 3704422830U, // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7> - 3893657642U, // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6> - 3770574323U, // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2> - 1548976796U, // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2> - 2622718710U, // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2> - 2622718772U, // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1> - 2622718870U, // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0> - 2819915878U, // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS - 3625364790U, // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS - 2622719120U, // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7> - 3760031292U, // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3> - 3667170468U, // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1> - 2819915883U, // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS - 1489829990U, // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS - 2563572470U, // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2> - 269271142U, // <2,2,2,2>: Cost 1 vdup2 LHS - 2685699698U, // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3> - 1489833270U, // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS - 2685699720U, // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7> - 2622719930U, // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7> - 2593436837U, // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2> - 269271142U, // <2,2,2,u>: Cost 1 vdup2 LHS - 2685699750U, // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1> - 2690565806U, // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0> - 2953627240U, // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2> - 1879883878U, // <2,2,3,3>: Cost 2 vzipr LHS, LHS - 2685699790U, // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5> - 3893659342U, // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5> - 2958270812U, // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6> - 2593445030U, // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3> - 1879883883U, // <2,2,3,u>: Cost 2 vzipr LHS, LHS - 2551644262U, // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS - 3625386742U, // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2> - 2551645902U, // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5> - 3759441686U, // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5> - 2551647542U, // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS - 1548979510U, // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS - 2764901686U, // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS - 3667195047U, // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4> - 1548979753U, // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS - 3696463432U, // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2> - 2617413328U, // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3> - 2685699936U, // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7> - 4027383910U, // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS - 2228201085U, // <2,2,5,4>: Cost 3 vrev <2,2,4,5> - 2617413636U, // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5> - 2617413730U, // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0> - 2819919158U, // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS - 2819919159U, // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS - 3625402554U, // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6> - 3760031652U, // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3> - 2617414138U, // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3> - 2685700026U, // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7> - 3625405750U, // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS - 3760031692U, // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7> - 3088679116U, // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6> - 2657891169U, // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2> - 2685700071U, // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7> - 2726250474U, // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1> - 3704427616U, // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5> - 2660545701U, // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2> - 4030718054U, // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS - 2617415014U, // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6> - 3302033032U, // <2,2,7,5>: Cost 4 vrev <2,2,5,7> - 3661246929U, // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7> - 2617415276U, // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7> - 2731558962U, // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1> - 1489829990U, // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS - 1548982062U, // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS - 269271142U, // <2,2,u,2>: Cost 1 vdup2 LHS - 1879924838U, // <2,2,u,3>: Cost 2 vzipr LHS, LHS - 1489833270U, // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS - 1548982426U, // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS - 2953666908U, // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6> - 2819919401U, // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS - 269271142U, // <2,2,u,u>: Cost 1 vdup2 LHS - 1544339456U, // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0> - 470597734U, // <2,3,0,1>: Cost 1 vext2 LHS, LHS - 1548984484U, // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2> - 2619408648U, // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3> - 1548984658U, // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5> - 2665857454U, // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7> - 2622726655U, // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7> - 2593494188U, // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0> - 470598301U, // <2,3,0,u>: Cost 1 vext2 LHS, LHS - 1544340214U, // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2> - 1544340276U, // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1> - 1544340374U, // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0> - 1548985304U, // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3> - 2551696694U, // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS - 1548985488U, // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7> - 2622727375U, // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7> - 2665858347U, // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0> - 1548985709U, // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3> - 2622727613U, // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2> - 2622727711U, // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1> - 1544341096U, // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2> - 1544341158U, // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1> - 2622727958U, // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5> - 2622728032U, // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7> - 1548986298U, // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7> - 2665859050U, // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1> - 1548986427U, // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1> - 1548986518U, // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2> - 2622728415U, // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3> - 1489913458U, // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3> - 1544341916U, // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3> - 1548986882U, // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6> - 2665859632U, // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7> - 2234304870U, // <2,3,3,6>: Cost 3 vrev <3,2,6,3> - 2958271632U, // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7> - 1548987166U, // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2> - 1483948134U, // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS - 1483948954U, // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4> - 2622729276U, // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0> - 2557692054U, // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2> - 1483951414U, // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS - 470601014U, // <2,3,4,5>: Cost 1 vext2 LHS, RHS - 1592118644U, // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6> - 2593526960U, // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4> - 470601257U, // <2,3,4,u>: Cost 1 vext2 LHS, RHS - 2551726182U, // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS - 1592118992U, // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3> - 2665860862U, // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4> - 2551728642U, // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6> - 1592119238U, // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6> - 1592119300U, // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5> - 1592119394U, // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0> - 1592119464U, // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7> - 1592119545U, // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7> - 2622730529U, // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2> - 2557707164U, // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6> - 1592119802U, // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3> - 2665861682U, // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5> - 2622730893U, // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6> - 2665861810U, // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7> - 1592120120U, // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6> - 1592120142U, // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1> - 1592120223U, // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1> - 1592120314U, // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2> - 2659890261U, // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3> - 2660553894U, // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3> - 2665862371U, // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1> - 1592120678U, // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6> - 2665862534U, // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2> - 2665862614U, // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1> - 1592120940U, // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7> - 1592120962U, // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2> - 1548990163U, // <2,3,u,0>: Cost 2 vext2 LHS, - 470603566U, // <2,3,u,1>: Cost 1 vext2 LHS, LHS - 1548990341U, // <2,3,u,2>: Cost 2 vext2 LHS, - 1548990396U, // <2,3,u,3>: Cost 2 vext2 LHS, - 1548990527U, // <2,3,u,4>: Cost 2 vext2 LHS, - 470603930U, // <2,3,u,5>: Cost 1 vext2 LHS, RHS - 1548990672U, // <2,3,u,6>: Cost 2 vext2 LHS, - 1592121600U, // <2,3,u,7>: Cost 2 vext2 LHS, - 470604133U, // <2,3,u,u>: Cost 1 vext2 LHS, LHS - 2617425942U, // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4> - 2618753126U, // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS - 2618753208U, // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4> - 2619416841U, // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4> - 2587593628U, // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2> - 2712832914U, // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1> - 1634962332U, // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2> - 3799993252U, // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1> - 1634962332U, // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2> - 2619417334U, // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2> - 3692495668U, // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1> - 2625389466U, // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4> - 2826125414U, // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS - 3699794995U, // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4> - 3692496016U, // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7> - 3763424238U, // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3> - 3667317942U, // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1> - 2826125419U, // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS - 2629371336U, // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4> - 3699131946U, // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3> - 2630698602U, // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4> - 2618754766U, // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5> - 2826126234U, // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4> - 2899119414U, // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS - 3033337142U, // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS - 3800214597U, // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0> - 2899119657U, // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS - 2635344033U, // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4> - 4032012325U, // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1> - 3692497228U, // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4> - 3692497308U, // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3> - 3001404624U, // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4> - 2953627342U, // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5> - 2953625804U, // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6> - 3899868160U, // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7> - 2953625806U, // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u> - 2710916266U, // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2> - 3899869648U, // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1> - 3899869658U, // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2> - 3899868930U, // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3> - 2712833232U, // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4> - 2618756406U, // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS - 2765737270U, // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS - 4168304426U, // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7> - 2618756649U, // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS - 2551800011U, // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5> - 2569716470U, // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2> - 2563745405U, // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5> - 2569718102U, // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5> - 2551803190U, // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS - 3625545732U, // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5> - 1611959606U, // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS - 2826128694U, // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS - 1611959624U, // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS - 1478066278U, // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS - 2551808758U, // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2> - 2551809516U, // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4> - 2551810198U, // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2> - 1478069558U, // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS - 2901888310U, // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS - 2551812920U, // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6> - 2726251914U, // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1> - 1478072110U, // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS - 2659234821U, // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4> - 3786722726U, // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2> - 3734303911U, // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4> - 3734967544U, // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4> - 3727005030U, // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6> - 2726251976U, // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0> - 2726251986U, // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1> - 3727005292U, // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7> - 2659234821U, // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4> - 1478082662U, // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS - 2618758958U, // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS - 2551826024U, // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2> - 2551826582U, // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2> - 1478085942U, // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS - 2953668302U, // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5> - 1611959849U, // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS - 2826128937U, // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS - 1611959867U, // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS - 3691839488U, // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0> - 2618097766U, // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS - 2620088484U, // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2> - 2619425034U, // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5> - 2620088667U, // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5> - 2620752300U, // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5> - 3693830655U, // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7> - 3094531382U, // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS - 2618098333U, // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS - 3691840246U, // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2> - 3691840308U, // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1> - 2626061206U, // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0> - 2618098688U, // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7> - 2626061364U, // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5> - 3691840656U, // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7> - 3789082310U, // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2> - 2712833744U, // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3> - 2628715896U, // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5> - 3693831613U, // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2> - 4026698642U, // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1> - 2632033896U, // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2> - 3691841190U, // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1> - 2632034061U, // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5> - 3691841352U, // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1> - 3691841466U, // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7> - 3088354614U, // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS - 3088354615U, // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS - 2557829222U, // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS - 2557830059U, // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3> - 2575746766U, // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5> - 3691841948U, // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3> - 2619427330U, // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6> - 2581720847U, // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3> - 2953628162U, // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6> - 2953626624U, // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7> - 2953626625U, // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u> - 2569781350U, // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS - 3631580076U, // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4> - 2569782990U, // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5> - 2569783646U, // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4> - 2569784630U, // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS - 2618101046U, // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS - 3893905922U, // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6> - 3094564150U, // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS - 2618101289U, // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS - 2551873638U, // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS - 3637560320U, // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7> - 3637560966U, // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5> - 3723030343U, // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5> - 2551876918U, // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS - 2712834052U, // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5> - 4028713474U, // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6> - 2712834072U, // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7> - 2712834081U, // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7> - 2575769702U, // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS - 3631596462U, // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6> - 2655924730U, // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3> - 3643541856U, // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6> - 2655924849U, // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5> - 3787755607U, // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7> - 4029385218U, // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6> - 3088682294U, // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS - 3088682295U, // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS - 2563833958U, // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS - 2551890678U, // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2> - 2563835528U, // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7> - 3637577878U, // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2> - 2563837238U, // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS - 2712834216U, // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7> - 2712834220U, // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2> - 4174449974U, // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS - 2563839790U, // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS - 2563842150U, // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS - 2618103598U, // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS - 2563843721U, // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u> - 2569816418U, // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u> - 2622748735U, // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, - 2618103962U, // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS - 2953669122U, // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6> - 2953667584U, // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7> - 2618104165U, // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS - 2620096512U, // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0> - 1546354790U, // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS - 2620096676U, // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2> - 3693838588U, // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0> - 1546355036U, // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6> - 3694502317U, // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6> - 2551911246U, // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1> - 2720723287U, // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2> - 1546355357U, // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS - 2620097270U, // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2> - 2620097332U, // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1> - 2620097430U, // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0> - 2820243558U, // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS - 2620097598U, // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6> - 2620097680U, // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7> - 3693839585U, // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7> - 2721386920U, // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2> - 2820243563U, // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS - 2714014137U, // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1> - 2712834500U, // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3> - 2620098152U, // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2> - 2620098214U, // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1> - 2632042254U, // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6> - 2712834540U, // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7> - 2820243660U, // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6> - 2958265654U, // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS - 2620098619U, // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1> - 2620098710U, // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2> - 3893986982U, // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1> - 2569848762U, // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7> - 2620098972U, // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3> - 2620099074U, // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6> - 3893987022U, // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5> - 3001404644U, // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6> - 1879887158U, // <2,6,3,7>: Cost 2 vzipr LHS, RHS - 1879887159U, // <2,6,3,u>: Cost 2 vzipr LHS, RHS - 2620099484U, // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2> - 2620099566U, // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3> - 2620099644U, // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0> - 3643599207U, // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4> - 2575830080U, // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4> - 1546358070U, // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS - 2667875700U, // <2,6,4,6>: Cost 3 vext2 , <4,6,4,6> - 4028042550U, // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS - 1546358313U, // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS - 3693841992U, // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2> - 2667876048U, // <2,6,5,1>: Cost 3 vext2 , <5,1,7,3> - 2712834756U, // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7> - 3643607400U, // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5> - 2252091873U, // <2,6,5,4>: Cost 3 vrev <6,2,4,5> - 2667876356U, // <2,6,5,5>: Cost 3 vext2 , <5,5,5,5> - 2667876450U, // <2,6,5,6>: Cost 3 vext2 , <5,6,7,0> - 2820246838U, // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS - 2820246839U, // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS - 2563899494U, // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS - 3893988683U, // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1> - 2563901072U, // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6> - 3893987236U, // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3> - 2563902774U, // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS - 3893988723U, // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5> - 2712834872U, // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6> - 2955644214U, // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS - 2955644215U, // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS - 2712834894U, // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1> - 2724926296U, // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2> - 2725000033U, // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2> - 2702365544U, // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0> - 2712834934U, // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5> - 3776107393U, // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7> - 2725294981U, // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2> - 2726253452U, // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0> - 2712834966U, // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1> - 2620102355U, // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, - 1546360622U, // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS - 2620102536U, // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, - 2820244125U, // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS - 1594136612U, // <2,6,u,4>: Cost 2 vext2 , - 1546360986U, // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS - 2620102864U, // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, - 1879928118U, // <2,6,u,7>: Cost 2 vzipr LHS, RHS - 1879928119U, // <2,6,u,u>: Cost 2 vzipr LHS, RHS - 2726179825U, // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2> - 1652511738U, // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2> - 2621431972U, // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2> - 2257949868U, // <2,7,0,3>: Cost 3 vrev <7,2,3,0> - 2726474773U, // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2> - 2620768686U, // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7> - 2621432319U, // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7> - 2599760953U, // <2,7,0,7>: Cost 3 vext1 , <7,0,u,2> - 1653027897U, // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2> - 2639348470U, // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2> - 3695174452U, // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1> - 3695174550U, // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0> - 3694511104U, // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7> - 3713090594U, // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5> - 3693184144U, // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7> - 2627405016U, // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7> - 3799995519U, // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0> - 2639348470U, // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2> - 3695175101U, // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2> - 3643655168U, // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7> - 2257892517U, // <2,7,2,2>: Cost 3 vrev <7,2,2,2> - 3695175334U, // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1> - 3695175465U, // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6> - 2632714080U, // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7> - 2633377713U, // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7> - 3695175658U, // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1> - 2634704979U, // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7> - 1514094694U, // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS - 2569921680U, // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7> - 2587838056U, // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2> - 2569922927U, // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3> - 1514097974U, // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS - 2581868321U, // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3> - 1514099194U, // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3> - 2587841530U, // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2> - 1514100526U, // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS - 2708706617U, // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6> - 3649643418U, // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4> - 3649644330U, // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7> - 2257982640U, // <2,7,4,3>: Cost 3 vrev <7,2,3,4> - 3649645641U, // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4> - 2621435190U, // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS - 2712835441U, // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u> - 3799995762U, // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0> - 2621435433U, // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS - 2729497990U, // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2> - 3643679744U, // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7> - 3637708424U, // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7> - 3643681137U, // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5> - 2599800118U, // <2,7,5,4>: Cost 3 vext1 , RHS - 3786577334U, // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5> - 3786577345U, // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7> - 2599802214U, // <2,7,5,7>: Cost 3 vext1 , <7,4,5,6> - 2599802670U, // <2,7,5,u>: Cost 3 vext1 , LHS - 2581889126U, // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS - 3643687936U, // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7> - 2663240186U, // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3> - 3643689330U, // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6> - 2581892406U, // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS - 2581892900U, // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6> - 2587865597U, // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6> - 3786577428U, // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0> - 2581894958U, // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS - 2726254119U, // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1> - 3804640817U, // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2> - 3637724826U, // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7> - 3734992123U, // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7> - 2552040758U, // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS - 3799995992U, // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5> - 2663241198U, // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7> - 2712835692U, // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7> - 2731562607U, // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1> - 1514135654U, // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS - 1657820802U, // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2> - 2587879016U, // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2> - 2569963892U, // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u> - 1514138934U, // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS - 2621438106U, // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS - 1514140159U, // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u> - 2587882490U, // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2> - 1514141486U, // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS - 1544380416U, // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0> - 470638699U, // <2,u,0,1>: Cost 1 vext2 LHS, LHS - 1544380580U, // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2> - 1658631909U, // <2,u,0,3>: Cost 2 vext3 , - 1544380754U, // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5> - 2665898414U, // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7> - 1658853120U, // <2,u,0,6>: Cost 2 vext3 , - 3094531625U, // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS - 470639261U, // <2,u,0,u>: Cost 1 vext2 LHS, LHS - 1544381174U, // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2> - 1544381236U, // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1> - 1544381334U, // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0> - 1544381400U, // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3> - 2618123325U, // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5> - 1544381584U, // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7> - 2618123489U, // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7> - 2726254427U, // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, - 1544381823U, // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3> - 1478328422U, // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS - 2618123807U, // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1> - 269271142U, // <2,u,2,2>: Cost 1 vdup2 LHS - 1544382118U, // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1> - 1478331702U, // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS - 2618124136U, // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6> - 1544382394U, // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7> - 3088354857U, // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS - 269271142U, // <2,u,2,u>: Cost 1 vdup2 LHS - 1544382614U, // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2> - 2953627374U, // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1> - 1490282143U, // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3> - 1879883932U, // <2,u,3,3>: Cost 2 vzipr LHS, LHS - 1544382978U, // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6> - 2953627378U, // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5> - 1514172931U, // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3> - 1879887176U, // <2,u,3,7>: Cost 2 vzipr LHS, RHS - 1879883937U, // <2,u,3,u>: Cost 2 vzipr LHS, LHS - 1484316774U, // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS - 1484317639U, // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4> - 2552088270U, // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5> - 1190213513U, // <2,u,4,3>: Cost 2 vrev - 1484320054U, // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS - 470641974U, // <2,u,4,5>: Cost 1 vext2 LHS, RHS - 1592159604U, // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6> - 3094564393U, // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS - 470642217U, // <2,u,4,u>: Cost 1 vext2 LHS, RHS - 2552094959U, // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5> - 1592159952U, // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3> - 2564040353U, // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5> - 2690275455U, // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, - 1592160198U, // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6> - 1592160260U, // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5> - 1611962522U, // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS - 1592160424U, // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7> - 1611962540U, // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS - 1478361190U, // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS - 2552103670U, // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2> - 1592160762U, // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3> - 2685704400U, // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, - 1478364470U, // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS - 2901891226U, // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS - 1592161080U, // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6> - 1592161102U, // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1> - 1478367022U, // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS - 1592161274U, // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2> - 2659931226U, // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u> - 2564056739U, // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7> - 2665903331U, // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1> - 1592161638U, // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6> - 2665903494U, // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2> - 2587947527U, // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7> - 1592161900U, // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7> - 1592161922U, // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2> - 1478377574U, // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS - 470644526U, // <2,u,u,1>: Cost 1 vext2 LHS, LHS - 269271142U, // <2,u,u,2>: Cost 1 vdup2 LHS - 1879924892U, // <2,u,u,3>: Cost 2 vzipr LHS, LHS - 1478380854U, // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS - 470644890U, // <2,u,u,5>: Cost 1 vext2 LHS, RHS - 1611962765U, // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS - 1879928136U, // <2,u,u,7>: Cost 2 vzipr LHS, RHS - 470645093U, // <2,u,u,u>: Cost 1 vext2 LHS, LHS - 1611448320U, // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0> - 1611890698U, // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1> - 1611890708U, // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2> - 3763576860U, // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1> - 2689835045U, // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1> - 3698508206U, // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7> - 3763576887U, // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1> - 3667678434U, // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0> - 1616093258U, // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2> - 1490337894U, // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS - 2685632602U, // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0> - 537706598U, // <3,0,1,2>: Cost 1 vext3 LHS, LHS - 2624766936U, // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3> - 1490341174U, // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS - 2624767120U, // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7> - 2732966030U, // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7> - 2593944803U, // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1> - 537706652U, // <3,0,1,u>: Cost 1 vext3 LHS, LHS - 1611890852U, // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2> - 2685632684U, // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1> - 2685632692U, // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0> - 2685632702U, // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1> - 1611890892U, // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6> - 2732966102U, // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7> - 2624767930U, // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7> - 2685632744U, // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7> - 1611890924U, // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2> - 2624768150U, // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2> - 2685632764U, // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0> - 2685632774U, // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1> - 2624768412U, // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3> - 2624768514U, // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6> - 3702491714U, // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7> - 2624768632U, // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7> - 3702491843U, // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1> - 2686959934U, // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3> - 2689835336U, // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4> - 1611891026U, // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5> - 1611891036U, // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6> - 3763577184U, // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1> - 2689835374U, // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6> - 1551027510U, // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS - 2666573172U, // <3,0,4,6>: Cost 3 vext2 , <4,6,4,6> - 3667711206U, // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4> - 1616093586U, // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6> - 2685190556U, // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7> - 2666573520U, // <3,0,5,1>: Cost 3 vext2 , <5,1,7,3> - 3040886886U, // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS - 3625912834U, // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6> - 2666573766U, // <3,0,5,4>: Cost 3 vext2 , <5,4,7,6> - 2666573828U, // <3,0,5,5>: Cost 3 vext2 , <5,5,5,5> - 2732966354U, // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7> - 2666573992U, // <3,0,5,7>: Cost 3 vext2 , <5,7,5,7> - 3040886940U, // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS - 2685190637U, // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7> - 2732966390U, // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7> - 2689835519U, // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7> - 3667724438U, // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2> - 3763577355U, // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1> - 3806708243U, // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0> - 2666574648U, // <3,0,6,6>: Cost 3 vext2 , <6,6,6,6> - 2657948520U, // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0> - 2689835573U, // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7> - 2666574842U, // <3,0,7,0>: Cost 3 vext2 , <7,0,1,2> - 2685633095U, // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7> - 2660603052U, // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0> - 3643844997U, // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7> - 2666575206U, // <3,0,7,4>: Cost 3 vext2 , <7,4,5,6> - 3655790391U, // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7> - 3731690968U, // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3> - 2666575468U, // <3,0,7,7>: Cost 3 vext2 , <7,7,7,7> - 2664584850U, // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0> - 1616093834U, // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2> - 1611891346U, // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1> - 537707165U, // <3,0,u,2>: Cost 1 vext3 LHS, LHS - 2689835684U, // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1> - 1616093874U, // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6> - 1551030426U, // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS - 2624772304U, // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, - 2594002154U, // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u> - 537707219U, // <3,0,u,u>: Cost 1 vext3 LHS, LHS - 2552201318U, // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS - 2618802278U, // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS - 2618802366U, // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1> - 1611449078U, // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2> - 2552204598U, // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS - 2732966663U, // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1> - 3906258396U, // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6> - 3667752171U, // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0> - 1611891491U, // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2> - 2689835819U, // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1> - 1611449140U, // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1> - 2624775063U, // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1> - 1611891528U, // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3> - 2689835859U, // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5> - 2689835868U, // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5> - 3763577701U, // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5> - 3765273452U, // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3> - 1611891573U, // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3> - 2629420494U, // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1> - 2689835911U, // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3> - 2564163248U, // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2> - 1611449238U, // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0> - 2564164918U, // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS - 2689835947U, // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3> - 3692545978U, // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7> - 2732966842U, // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0> - 1611891651U, // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0> - 1484456038U, // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS - 1611891672U, // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3> - 2685633502U, // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0> - 2685633512U, // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1> - 1484459318U, // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS - 1611891712U, // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7> - 2689836041U, // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7> - 2733409294U, // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3> - 1611891735U, // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3> - 2552234086U, // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS - 2732966955U, // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5> - 2732966964U, // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5> - 2685633597U, // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5> - 2552237366U, // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS - 2618805558U, // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS - 2769472822U, // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS - 3667784943U, // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4> - 2685633642U, // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5> - 2689836143U, // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1> - 2564187280U, // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7> - 2564187827U, // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5> - 1611891856U, // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7> - 2689836183U, // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5> - 3759375522U, // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7> - 3720417378U, // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0> - 2832518454U, // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS - 1611891901U, // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7> - 3763578048U, // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1> - 2689836239U, // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7> - 2732967128U, // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7> - 2685633761U, // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7> - 3763578088U, // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5> - 2689836275U, // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7> - 3763578108U, // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7> - 2732967166U, // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0> - 2685633806U, // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7> - 3631972454U, // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS - 2659947612U, // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1> - 4036102294U, // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2> - 3095396454U, // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS - 3631975734U, // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS - 2222982144U, // <3,1,7,5>: Cost 3 vrev <1,3,5,7> - 3296797705U, // <3,1,7,6>: Cost 4 vrev <1,3,6,7> - 3720418924U, // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7> - 3095396459U, // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS - 1484496998U, // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS - 1611892077U, // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3> - 2685633907U, // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0> - 1611892092U, // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0> - 1484500278U, // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS - 1611892117U, // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7> - 2685633950U, // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7> - 2832518697U, // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS - 1611892140U, // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3> - 2623455232U, // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0> - 1549713510U, // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS - 2689836484U, // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0> - 2685633997U, // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0> - 2623455570U, // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5> - 2732967398U, // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7> - 2689836524U, // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4> - 2229044964U, // <3,2,0,7>: Cost 3 vrev <2,3,7,0> - 1549714077U, // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS - 1549714166U, // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2> - 2623456052U, // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1> - 2623456150U, // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0> - 2685634079U, // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1> - 2552286518U, // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS - 2623456400U, // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7> - 2689836604U, // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3> - 3667834101U, // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1> - 1155385070U, // <3,2,1,u>: Cost 2 vrev <2,3,u,1> - 2689836629U, // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1> - 2689836640U, // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3> - 1611449960U, // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2> - 1611892338U, // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3> - 2689836669U, // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5> - 2689836680U, // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7> - 2689836688U, // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6> - 3763578518U, // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3> - 1611892383U, // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3> - 1611450022U, // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1> - 2685191854U, // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0> - 2685191865U, // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2> - 2685191875U, // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3> - 1611450062U, // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5> - 2732967635U, // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1> - 2732967645U, // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2> - 2732967652U, // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0> - 1611450094U, // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1> - 2558279782U, // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS - 2558280602U, // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4> - 2732967692U, // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4> - 2685634326U, // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5> - 2558283062U, // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS - 1549716790U, // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS - 2689836844U, // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0> - 2229077736U, // <3,2,4,7>: Cost 3 vrev <2,3,7,4> - 1549717033U, // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS - 2552316006U, // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS - 2228643507U, // <3,2,5,1>: Cost 3 vrev <2,3,1,5> - 2689836896U, // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7> - 2685634408U, // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6> - 1155122894U, // <3,2,5,4>: Cost 2 vrev <2,3,4,5> - 2665263108U, // <3,2,5,5>: Cost 3 vext2 , <5,5,5,5> - 2689836932U, // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7> - 2665263272U, // <3,2,5,7>: Cost 3 vext2 , <5,7,5,7> - 1155417842U, // <3,2,5,u>: Cost 2 vrev <2,3,u,5> - 2689836953U, // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1> - 2689836964U, // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3> - 2689836976U, // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6> - 1611892666U, // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7> - 2689836993U, // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5> - 2689837004U, // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7> - 2689837013U, // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7> - 2665263950U, // <3,2,6,7>: Cost 3 vext2 , <6,7,0,1> - 1611892711U, // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7> - 2665264122U, // <3,2,7,0>: Cost 3 vext2 , <7,0,1,2> - 2623460419U, // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3> - 4169138340U, // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2> - 2962358374U, // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS - 2665264486U, // <3,2,7,4>: Cost 3 vext2 , <7,4,5,6> - 2228954841U, // <3,2,7,5>: Cost 3 vrev <2,3,5,7> - 2229028578U, // <3,2,7,6>: Cost 3 vrev <2,3,6,7> - 2665264748U, // <3,2,7,7>: Cost 3 vext2 , <7,7,7,7> - 2962358379U, // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS - 1611892795U, // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1> - 1549719342U, // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS - 1611449960U, // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2> - 1611892824U, // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3> - 1611892835U, // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5> - 1549719706U, // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS - 2689837168U, // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0> - 2665265408U, // <3,2,u,7>: Cost 3 vext2 , - 1611892867U, // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1> - 2685192331U, // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0> - 1611450518U, // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2> - 2685634717U, // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0> - 2564294806U, // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2> - 2685634736U, // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1> - 2732968122U, // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2> - 3763579075U, // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2> - 4034053264U, // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7> - 1611450581U, // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2> - 2685192415U, // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3> - 1550385992U, // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3> - 2685192433U, // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3> - 2685634808U, // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1> - 2558332214U, // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS - 2685634828U, // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3> - 3759376661U, // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3> - 2703477022U, // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3> - 1555031423U, // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3> - 2564309094U, // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS - 2630100513U, // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3> - 1557022322U, // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3> - 2685192520U, // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0> - 2564312374U, // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS - 2732968286U, // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4> - 2685634918U, // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3> - 2704140655U, // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3> - 1561004120U, // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3> - 1496547430U, // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS - 2624129256U, // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3> - 2630764866U, // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3> - 336380006U, // <3,3,3,3>: Cost 1 vdup3 LHS - 1496550710U, // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS - 2732968368U, // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5> - 2624129683U, // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7> - 2594182400U, // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3> - 336380006U, // <3,3,3,u>: Cost 1 vdup3 LHS - 2558353510U, // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS - 2558354411U, // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4> - 2564327108U, // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4> - 2564327938U, // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6> - 2960343962U, // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4> - 1611893250U, // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6> - 2771619126U, // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS - 4034086032U, // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7> - 1611893277U, // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6> - 2558361702U, // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS - 2558362604U, // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5> - 2558363342U, // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5> - 2732968512U, // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5> - 2558364982U, // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS - 3101279950U, // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5> - 2665934946U, // <3,3,5,6>: Cost 3 vext2 , <5,6,7,0> - 2826636598U, // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS - 2826636599U, // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS - 2732968568U, // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7> - 3763579521U, // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7> - 2732968586U, // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7> - 2732968595U, // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7> - 2732968604U, // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7> - 3763579557U, // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7> - 2732968621U, // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6> - 2657973099U, // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3> - 2658636732U, // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3> - 2558378086U, // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS - 2558378990U, // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7> - 2564351687U, // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7> - 2661291264U, // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3> - 2558381366U, // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS - 2732968694U, // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7> - 3781126907U, // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3> - 3095397376U, // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7> - 2558383918U, // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS - 1496547430U, // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS - 1611893534U, // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2> - 1592858504U, // <3,3,u,2>: Cost 2 vext2 , - 336380006U, // <3,3,u,3>: Cost 1 vdup3 LHS - 1496550710U, // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS - 1611893574U, // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6> - 2690280268U, // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3> - 2826636841U, // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS - 336380006U, // <3,3,u,u>: Cost 1 vdup3 LHS - 2624798720U, // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0> - 1551056998U, // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS - 2624798884U, // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2> - 3693232384U, // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4> - 2624799058U, // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5> - 1659227026U, // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1> - 1659227036U, // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2> - 3667973382U, // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0> - 1551057565U, // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS - 2624799478U, // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2> - 2624799540U, // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1> - 1551057818U, // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4> - 2624799704U, // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3> - 2564377910U, // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS - 2689838050U, // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0> - 2689838062U, // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3> - 2628117807U, // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4> - 1555039616U, // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4> - 3626180710U, // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS - 2624800298U, // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3> - 2624800360U, // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2> - 2624800422U, // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1> - 2624800514U, // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3> - 2709965878U, // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3> - 2689838140U, // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0> - 2634090504U, // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4> - 2689838158U, // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0> - 2624800918U, // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2> - 2636081403U, // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4> - 2636745036U, // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4> - 2624801180U, // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3> - 2624801232U, // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1> - 2905836854U, // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS - 3040054582U, // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS - 3702524611U, // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1> - 2624801566U, // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2> - 2564399206U, // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS - 2564400026U, // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4> - 2564400845U, // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4> - 2570373542U, // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4> - 1659227344U, // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4> - 1551060278U, // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS - 1659227364U, // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6> - 3668006154U, // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4> - 1551060521U, // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS - 1490665574U, // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS - 2689838341U, // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3> - 1490667214U, // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5> - 2564409494U, // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2> - 1490668854U, // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS - 2689838381U, // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7> - 537709878U, // <3,4,5,6>: Cost 1 vext3 LHS, RHS - 2594272523U, // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5> - 537709896U, // <3,4,5,u>: Cost 1 vext3 LHS, RHS - 2689838411U, // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1> - 2558444534U, // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6> - 2666607098U, // <3,4,6,2>: Cost 3 vext2 , <6,2,7,3> - 2558446082U, // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6> - 1659227508U, // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6> - 2689838462U, // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7> - 2689838471U, // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7> - 2657981292U, // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4> - 1659227540U, // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2> - 2666607610U, // <3,4,7,0>: Cost 3 vext2 , <7,0,1,2> - 3702527072U, // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5> - 2660635824U, // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4> - 3644139945U, // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7> - 2666607974U, // <3,4,7,4>: Cost 3 vext2 , <7,4,5,6> - 2732969416U, // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0> - 2732969425U, // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0> - 2666608236U, // <3,4,7,7>: Cost 3 vext2 , <7,7,7,7> - 2664617622U, // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4> - 1490690150U, // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS - 1551062830U, // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS - 1490691793U, // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u> - 2624804796U, // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, - 1490693430U, // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS - 1551063194U, // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS - 537710121U, // <3,4,u,6>: Cost 1 vext3 LHS, RHS - 2594297102U, // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u> - 537710139U, // <3,4,u,u>: Cost 1 vext3 LHS, RHS - 3692576768U, // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0> - 2618835046U, // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS - 2618835138U, // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5> - 3692577024U, // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4> - 2689838690U, // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1> - 2732969579U, // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1> - 2732969588U, // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1> - 2246963055U, // <3,5,0,7>: Cost 3 vrev <5,3,7,0> - 2618835613U, // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS - 2594308198U, // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS - 3692577588U, // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1> - 2624807835U, // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5> - 2625471468U, // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5> - 2626135101U, // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5> - 2594311888U, // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3> - 3699877107U, // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7> - 1641680592U, // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3> - 1641754329U, // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3> - 3692578274U, // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3> - 2630116899U, // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5> - 3692578408U, // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2> - 2625472206U, // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5> - 2632107798U, // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5> - 2715938575U, // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3> - 3692578746U, // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7> - 2716086049U, // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3> - 2634762330U, // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5> - 3692578966U, // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2> - 2636089596U, // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5> - 3699214668U, // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4> - 2638080412U, // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3> - 2618837506U, // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6> - 2832844494U, // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5> - 4033415682U, // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6> - 3095072054U, // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS - 3095072055U, // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS - 2600304742U, // <3,5,4,0>: Cost 3 vext1 , LHS - 3763580815U, // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5> - 2564474582U, // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4> - 3699879044U, // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0> - 2600308022U, // <3,5,4,4>: Cost 3 vext1 , RHS - 2618838326U, // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS - 2772454710U, // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS - 1659228102U, // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6> - 1659228111U, // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6> - 2570453094U, // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS - 2624810704U, // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3> - 2570454734U, // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5> - 2570455472U, // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5> - 2570456374U, // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS - 1659228164U, // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5> - 2732969998U, // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6> - 1659228184U, // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7> - 1659228193U, // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7> - 2732970020U, // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1> - 2732970035U, // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7> - 2564490968U, // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6> - 2732970050U, // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4> - 2732970060U, // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5> - 2732970071U, // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7> - 2732970080U, // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7> - 1659228258U, // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0> - 1659228267U, // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0> - 1484783718U, // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS - 1484784640U, // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7> - 2558527080U, // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2> - 2558527638U, // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2> - 1484786998U, // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS - 1659228328U, // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7> - 2732970154U, // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0> - 2558531180U, // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7> - 1484789550U, // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS - 1484791910U, // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS - 1484792833U, // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u> - 2558535272U, // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2> - 2558535830U, // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2> - 1484795190U, // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS - 1659228409U, // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7> - 2772457626U, // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS - 1646326023U, // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3> - 1484797742U, // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS - 2558541926U, // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS - 2689839393U, // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2> - 2689839404U, // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4> - 3706519808U, // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4> - 2689839420U, // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2> - 2732970314U, // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7> - 2732970316U, // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0> - 2960313654U, // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS - 2689839456U, // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2> - 3763581290U, // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3> - 3763581297U, // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1> - 2624816028U, // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6> - 3763581315U, // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1> - 2626143294U, // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6> - 3763581335U, // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3> - 2721321376U, // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3> - 2721395113U, // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3> - 2628797826U, // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6> - 2594390118U, // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS - 2721616324U, // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3> - 2630788725U, // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6> - 3763581395U, // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0> - 2632115991U, // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6> - 2632779624U, // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6> - 2594394618U, // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3> - 1648316922U, // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3> - 1648390659U, // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3> - 3693914262U, // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2> - 3638281176U, // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3> - 3696568678U, // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3> - 2638088604U, // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3> - 2632780290U, // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6> - 3712494145U, // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6> - 3698559612U, // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2> - 2959674678U, // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS - 2959674679U, // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS - 3763581536U, // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6> - 2722943590U, // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3> - 2732970609U, // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5> - 3698560147U, // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6> - 2732970628U, // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6> - 2689839757U, // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6> - 2732970640U, // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0> - 2960346422U, // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS - 2689839784U, // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6> - 2576498790U, // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS - 3650241270U, // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2> - 2732970692U, // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7> - 2576501250U, // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6> - 2576501906U, // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5> - 3650244622U, // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6> - 4114633528U, // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6> - 2732970735U, // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5> - 2576504622U, // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS - 2732970749U, // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1> - 2724270856U, // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3> - 2624819706U, // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3> - 3656223234U, // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6> - 2732970788U, // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4> - 2732970800U, // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7> - 1659228984U, // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6> - 1659228994U, // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7> - 1659229003U, // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7> - 1659229006U, // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1> - 2558600201U, // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7> - 2558601146U, // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7> - 2725081963U, // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3> - 1659229046U, // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5> - 2715423611U, // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1> - 2722059141U, // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2> - 2962361654U, // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS - 1659229078U, // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1> - 1659229087U, // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1> - 2689840041U, // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2> - 2558609339U, // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u> - 2576525853U, // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6> - 1659229127U, // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5> - 2689840081U, // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6> - 1659228984U, // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6> - 1652298720U, // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3> - 1659229159U, // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1> - 2626813952U, // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0> - 1553072230U, // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS - 2626814116U, // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2> - 3700556028U, // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0> - 2626814290U, // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5> - 2582507375U, // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0> - 2588480072U, // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0> - 2732971055U, // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1> - 1553072797U, // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS - 2626814710U, // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2> - 2626814772U, // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1> - 2626814870U, // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0> - 2625487854U, // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7> - 2582514998U, // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS - 1553073296U, // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7> - 2627478753U, // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7> - 2727367810U, // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3> - 1555064195U, // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7> - 2588491878U, // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS - 3700557318U, // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3> - 2626815592U, // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2> - 2626815654U, // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1> - 2588495158U, // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS - 2632787817U, // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7> - 1559709626U, // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7> - 2728031443U, // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3> - 1561036892U, // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7> - 2626816150U, // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2> - 2626816268U, // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3> - 2633451878U, // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3> - 2626816412U, // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3> - 2626816514U, // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6> - 2638760514U, // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7> - 2639424147U, // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7> - 2826961920U, // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7> - 2626816798U, // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2> - 2582536294U, // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS - 2582537360U, // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7> - 2588510138U, // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7> - 3700558996U, // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7> - 2582539574U, // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS - 1553075510U, // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS - 2588512844U, // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4> - 2564625766U, // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6> - 1553075753U, // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS - 2732971398U, // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2> - 2626817744U, // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3> - 3700559649U, // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3> - 2626817903U, // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0> - 2258728203U, // <3,7,5,4>: Cost 3 vrev <7,3,4,5> - 2732971446U, // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5> - 2732971457U, // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7> - 2826964278U, // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS - 2826964279U, // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS - 2732971478U, // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1> - 2732971486U, // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0> - 2633454074U, // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3> - 2633454152U, // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0> - 2732971518U, // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5> - 2732971526U, // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4> - 2732971537U, // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6> - 2732971540U, // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0> - 2726041124U, // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7> - 2570616934U, // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS - 2570617856U, // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7> - 2564646635U, // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7> - 2570619332U, // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7> - 2570620214U, // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS - 2582564726U, // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7> - 2588537423U, // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7> - 1659229804U, // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7> - 1659229804U, // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7> - 2626819795U, // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, - 1553078062U, // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS - 2626819973U, // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, - 2826961565U, // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS - 2626820159U, // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, - 1553078426U, // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS - 1595545808U, // <3,7,u,6>: Cost 2 vext2 , - 1659229804U, // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7> - 1553078629U, // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS - 1611448320U, // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0> - 1611896531U, // <3,u,0,1>: Cost 2 vext3 LHS, - 1659672284U, // <3,u,0,2>: Cost 2 vext3 LHS, - 1616099045U, // <3,u,0,3>: Cost 2 vext3 LHS, - 2685638381U, // <3,u,0,4>: Cost 3 vext3 LHS, - 1663874806U, // <3,u,0,5>: Cost 2 vext3 LHS, - 1663874816U, // <3,u,0,6>: Cost 2 vext3 LHS, - 2960313672U, // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS - 1611896594U, // <3,u,0,u>: Cost 2 vext3 LHS, - 1549763324U, // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u> - 1550426957U, // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u> - 537712430U, // <3,u,1,2>: Cost 1 vext3 LHS, LHS - 1616541495U, // <3,u,1,3>: Cost 2 vext3 LHS, - 1490930998U, // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS - 1553081489U, // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u> - 2627486946U, // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u> - 1659230043U, // <3,u,1,7>: Cost 2 vext3 LHS, - 537712484U, // <3,u,1,u>: Cost 1 vext3 LHS, LHS - 1611890852U, // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2> - 2624833102U, // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3> - 1557063287U, // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u> - 1616099205U, // <3,u,2,3>: Cost 2 vext3 LHS, - 1611890892U, // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6> - 2689841054U, // <3,u,2,5>: Cost 3 vext3 LHS, - 1559717819U, // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u> - 1659230124U, // <3,u,2,7>: Cost 2 vext3 LHS, - 1616541618U, // <3,u,2,u>: Cost 2 vext3 LHS, - 1611896764U, // <3,u,3,0>: Cost 2 vext3 LHS, - 1484973079U, // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3> - 2685638607U, // <3,u,3,2>: Cost 3 vext3 LHS, - 336380006U, // <3,u,3,3>: Cost 1 vdup3 LHS - 1611896804U, // <3,u,3,4>: Cost 2 vext3 LHS, - 1616541679U, // <3,u,3,5>: Cost 2 vext3 LHS, - 2690283512U, // <3,u,3,6>: Cost 3 vext3 LHS, - 2959674696U, // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS - 336380006U, // <3,u,3,u>: Cost 1 vdup3 LHS - 2558722150U, // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS - 1659672602U, // <3,u,4,1>: Cost 2 vext3 LHS, - 1659672612U, // <3,u,4,2>: Cost 2 vext3 LHS, - 2689841196U, // <3,u,4,3>: Cost 3 vext3 LHS, - 1659227344U, // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4> - 1611896895U, // <3,u,4,5>: Cost 2 vext3 LHS, - 1663875144U, // <3,u,4,6>: Cost 2 vext3 LHS, - 1659230289U, // <3,u,4,7>: Cost 2 vext3 LHS, - 1611896922U, // <3,u,4,u>: Cost 2 vext3 LHS, - 1490960486U, // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS - 2689841261U, // <3,u,5,1>: Cost 3 vext3 LHS, - 1490962162U, // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5> - 1616541823U, // <3,u,5,3>: Cost 2 vext3 LHS, - 1490963766U, // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS - 1659228164U, // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5> - 537712794U, // <3,u,5,6>: Cost 1 vext3 LHS, RHS - 1659230371U, // <3,u,5,7>: Cost 2 vext3 LHS, - 537712812U, // <3,u,5,u>: Cost 1 vext3 LHS, RHS - 2689841327U, // <3,u,6,0>: Cost 3 vext3 LHS, - 2558739482U, // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6> - 2689841351U, // <3,u,6,2>: Cost 3 vext3 LHS, - 1616099536U, // <3,u,6,3>: Cost 2 vext3 LHS, - 1659227508U, // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6> - 2690283746U, // <3,u,6,5>: Cost 3 vext3 LHS, - 1659228984U, // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6> - 1659230445U, // <3,u,6,7>: Cost 2 vext3 LHS, - 1616099581U, // <3,u,6,u>: Cost 2 vext3 LHS, - 1485004902U, // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS - 1485005851U, // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7> - 2558748264U, // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2> - 3095397021U, // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS - 1485008182U, // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS - 1659228328U, // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7> - 2722060599U, // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, - 1659229804U, // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7> - 1485010734U, // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS - 1616099665U, // <3,u,u,0>: Cost 2 vext3 LHS, - 1611897179U, // <3,u,u,1>: Cost 2 vext3 LHS, - 537712997U, // <3,u,u,2>: Cost 1 vext3 LHS, LHS - 336380006U, // <3,u,u,3>: Cost 1 vdup3 LHS - 1616099705U, // <3,u,u,4>: Cost 2 vext3 LHS, - 1611897219U, // <3,u,u,5>: Cost 2 vext3 LHS, - 537713037U, // <3,u,u,6>: Cost 1 vext3 LHS, RHS - 1659230607U, // <3,u,u,7>: Cost 2 vext3 LHS, - 537713051U, // <3,u,u,u>: Cost 1 vext3 LHS, LHS - 2691907584U, // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0> - 2691907594U, // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1> - 2691907604U, // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2> - 3709862144U, // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4> - 2684682280U, // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4> - 3694600633U, // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0> - 3291431290U, // <4,0,0,6>: Cost 4 vrev <0,4,6,0> - 3668342067U, // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0> - 2691907657U, // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1> - 2570715238U, // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS - 2570716058U, // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4> - 1618165862U, // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS - 2570717648U, // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1> - 2570718518U, // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS - 2594607206U, // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4> - 3662377563U, // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1> - 2594608436U, // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1> - 1618165916U, // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS - 2685714598U, // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4> - 3759530159U, // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4> - 2685862072U, // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4> - 2631476937U, // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0> - 2685714636U, // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6> - 3765649622U, // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7> - 2686157020U, // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4> - 3668358453U, // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2> - 2686304494U, // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4> - 3632529510U, // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS - 2686451968U, // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4> - 2686525705U, // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4> - 3760341266U, // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4> - 3632532790U, // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS - 3913254606U, // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5> - 3705219740U, // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7> - 3713845990U, // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0> - 2686451968U, // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4> - 2552823910U, // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS - 2691907922U, // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5> - 2691907932U, // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6> - 3626567830U, // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2> - 2552827190U, // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS - 2631478582U, // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS - 3626570017U, // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2> - 3668374839U, // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4> - 2552829742U, // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS - 2558804070U, // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS - 1839644774U, // <4,0,5,1>: Cost 2 vzipl RHS, LHS - 2913386660U, // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2> - 2570750420U, // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5> - 2558807350U, // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS - 3987128750U, // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7> - 3987128822U, // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7> - 2594641208U, // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5> - 1839645341U, // <4,0,5,u>: Cost 2 vzipl RHS, LHS - 2552840294U, // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS - 3047604234U, // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1> - 1973862502U, // <4,0,6,2>: Cost 2 vtrnl RHS, LHS - 2570758613U, // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6> - 2552843574U, // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS - 2217664887U, // <4,0,6,5>: Cost 3 vrev <0,4,5,6> - 3662418528U, // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6> - 2658022257U, // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0> - 1973862556U, // <4,0,6,u>: Cost 2 vtrnl RHS, LHS - 3731764218U, // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2> - 3988324454U, // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS - 4122034278U, // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS - 3735082246U, // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0> - 3731764536U, // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5> - 3937145718U, // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5> - 3737073145U, // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0> - 3731764844U, // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7> - 4122034332U, // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS - 2552856678U, // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS - 1841635430U, // <4,0,u,1>: Cost 2 vzipl RHS, LHS - 1618166429U, // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS - 2570774999U, // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u> - 2552859958U, // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS - 2631481498U, // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS - 2686157020U, // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4> - 2594665787U, // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u> - 1618166483U, // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS - 2617548837U, // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1> - 2622857318U, // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS - 3693281484U, // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6> - 2691908342U, // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2> - 2622857554U, // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5> - 3764470538U, // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4> - 3695272459U, // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1> - 3733094980U, // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4> - 2622857885U, // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS - 3696599798U, // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2> - 2691097399U, // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4> - 2631484314U, // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4> - 2691908424U, // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3> - 3696600125U, // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5> - 3696600175U, // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1> - 3696600307U, // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7> - 3668423997U, // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1> - 2691908469U, // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3> - 2570797158U, // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS - 2570797978U, // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4> - 3696600680U, // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2> - 1618166682U, // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4> - 2570800438U, // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS - 3765650347U, // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3> - 3696601018U, // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7> - 3668432190U, // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2> - 1618535367U, // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4> - 2564833382U, // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS - 2691908568U, // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3> - 2691908578U, // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4> - 2692572139U, // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4> - 2564836662U, // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS - 2691908608U, // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7> - 2588725862U, // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> - 3662468090U, // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2> - 2691908631U, // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3> - 3760194590U, // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1> - 3693947874U, // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0> - 3765650484U, // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5> - 3113877606U, // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS - 3760194630U, // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5> - 2622860598U, // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS - 3297436759U, // <4,1,4,6>: Cost 4 vrev <1,4,6,4> - 3800007772U, // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0> - 2622860841U, // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS - 1479164006U, // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS - 2552906486U, // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2> - 2552907299U, // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5> - 2552907926U, // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2> - 1479167286U, // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS - 2913387664U, // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7> - 2600686074U, // <4,1,5,6>: Cost 3 vext1 , <6,2,7,3> - 2600686586U, // <4,1,5,7>: Cost 3 vext1 , <7,0,1,2> - 1479169838U, // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS - 2552914022U, // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS - 2558886708U, // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1> - 4028205206U, // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2> - 3089858662U, // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS - 2552917302U, // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS - 2223637584U, // <4,1,6,5>: Cost 3 vrev <1,4,5,6> - 4121347081U, // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7> - 3721155406U, // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1> - 2552919854U, // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS - 2659357716U, // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1> - 3733763173U, // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1> - 3734426806U, // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1> - 2695226671U, // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4> - 3721155942U, // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6> - 3721155976U, // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4> - 3662500458U, // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7> - 3721156204U, // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7> - 2659357716U, // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1> - 1479188582U, // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS - 2552931062U, // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2> - 2552931944U, // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2> - 1622148480U, // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4> - 1479191862U, // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS - 2622863514U, // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS - 2588725862U, // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> - 2600686586U, // <4,1,u,7>: Cost 3 vext1 , <7,0,1,2> - 1479194414U, // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS - 2617557030U, // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2> - 2622865510U, // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS - 2622865612U, // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6> - 3693289753U, // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2> - 2635473244U, // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6> - 3765650918U, // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7> - 2696775148U, // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4> - 3695944285U, // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2> - 2622866077U, // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS - 3696607990U, // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2> - 3696608052U, // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1> - 3696608150U, // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0> - 3895574630U, // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS - 2691909162U, // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3> - 3696608400U, // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7> - 3760784956U, // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3> - 3773908549U, // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3> - 2691909162U, // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3> - 3696608748U, // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4> - 3696608828U, // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3> - 2691909224U, // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2> - 2691909234U, // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3> - 3759605368U, // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0> - 3696609156U, // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7> - 3760785040U, // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6> - 3668505927U, // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2> - 2691909279U, // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3> - 2691909286U, // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1> - 3764840111U, // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1> - 3765651129U, // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2> - 2698544836U, // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4> - 2685863630U, // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5> - 2698692310U, // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4> - 3772507871U, // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4> - 2698839784U, // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4> - 2691909358U, // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1> - 2564915302U, // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS - 2564916122U, // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4> - 2564917004U, // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4> - 2699208469U, // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4> - 2564918582U, // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS - 2622868790U, // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS - 2229667632U, // <4,2,4,6>: Cost 3 vrev <2,4,6,4> - 3800082229U, // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0> - 2622869033U, // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS - 2552979558U, // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS - 2558952342U, // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0> - 2564925032U, // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2> - 2967060582U, // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS - 2552982838U, // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS - 3987130190U, // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7> - 2913388474U, // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7> - 3895577910U, // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS - 2552985390U, // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS - 1479245926U, // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS - 2552988406U, // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2> - 2552989288U, // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2> - 2954461286U, // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS - 1479249206U, // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS - 2229610281U, // <4,2,6,5>: Cost 3 vrev <2,4,5,6> - 2600767994U, // <4,2,6,6>: Cost 3 vext1 , <6,2,7,3> - 2600768506U, // <4,2,6,7>: Cost 3 vext1 , <7,0,1,2> - 1479251758U, // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS - 2659365909U, // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2> - 3733771366U, // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2> - 3734434999U, // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2> - 2701199368U, // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4> - 4175774618U, // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4> - 3303360298U, // <4,2,7,5>: Cost 4 vrev <2,4,5,7> - 3727136217U, // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4> - 3727136364U, // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7> - 2659365909U, // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2> - 1479262310U, // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS - 2553004790U, // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2> - 2553005672U, // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2> - 2954477670U, // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS - 1479265590U, // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS - 2622871706U, // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS - 2229700404U, // <4,2,u,6>: Cost 3 vrev <2,4,6,u> - 2600784890U, // <4,2,u,7>: Cost 3 vext1 , <7,0,1,2> - 1479268142U, // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS - 3765651595U, // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0> - 2691909782U, // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2> - 2702452897U, // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4> - 3693297946U, // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3> - 3760711856U, // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1> - 2235533820U, // <4,3,0,5>: Cost 3 vrev <3,4,5,0> - 3309349381U, // <4,3,0,6>: Cost 4 vrev <3,4,6,0> - 3668563278U, // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0> - 2691909845U, // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2> - 2235173328U, // <4,3,1,0>: Cost 3 vrev <3,4,0,1> - 3764840678U, // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1> - 2630173594U, // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4> - 2703190267U, // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4> - 3760195840U, // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0> - 3765651724U, // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3> - 3309357574U, // <4,3,1,6>: Cost 4 vrev <3,4,6,1> - 3769633054U, // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3> - 2703558952U, // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4> - 3626770534U, // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS - 2630174250U, // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3> - 3765651777U, // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2> - 2703853900U, // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4> - 3626773814U, // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS - 2704001374U, // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4> - 3765651814U, // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3> - 3769633135U, // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3> - 2634819681U, // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3> - 3765651839U, // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1> - 3765651848U, // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1> - 3710552404U, // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3> - 2691910044U, // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3> - 2704591270U, // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4> - 3769633202U, // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7> - 3703917212U, // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7> - 3769633220U, // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7> - 2691910044U, // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3> - 2691910096U, // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1> - 2691910106U, // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2> - 2564990741U, // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4> - 3765651946U, // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0> - 2691910136U, // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5> - 2686454274U, // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6> - 2235640329U, // <4,3,4,6>: Cost 3 vrev <3,4,6,4> - 3801483792U, // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2> - 2691910168U, // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1> - 2559025254U, // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS - 2559026237U, // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5> - 2564998862U, // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5> - 2570971548U, // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3> - 2559028534U, // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS - 4163519477U, // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5> - 3309390346U, // <4,3,5,6>: Cost 4 vrev <3,4,6,5> - 2706139747U, // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4> - 2559031086U, // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS - 2559033446U, // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS - 2559034430U, // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6> - 2565007127U, // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6> - 2570979740U, // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3> - 2559036726U, // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS - 1161841154U, // <4,3,6,5>: Cost 2 vrev <3,4,5,6> - 4028203932U, // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6> - 2706803380U, // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4> - 1162062365U, // <4,3,6,u>: Cost 2 vrev <3,4,u,6> - 3769633475U, // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1> - 3769633488U, // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5> - 3638757144U, // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7> - 3769633508U, // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7> - 3769633515U, // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5> - 3769633526U, // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7> - 3662647932U, // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7> - 3781208837U, // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4> - 3769633547U, // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1> - 2559049830U, // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS - 2691910430U, // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2> - 2565023513U, // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u> - 2707835698U, // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4> - 2559053110U, // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS - 1161857540U, // <4,3,u,5>: Cost 2 vrev <3,4,5,u> - 2235673101U, // <4,3,u,6>: Cost 3 vrev <3,4,6,u> - 2708130646U, // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4> - 1162078751U, // <4,3,u,u>: Cost 2 vrev <3,4,u,u> - 2617573416U, // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4> - 1570373734U, // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS - 2779676774U, // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS - 3760196480U, // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1> - 2576977100U, // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0> - 2718747538U, // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1> - 2718747548U, // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2> - 3668637015U, // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0> - 1570374301U, // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS - 2644116214U, // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2> - 2644116276U, // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1> - 2691910602U, // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3> - 2644116440U, // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3> - 2711227356U, // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3> - 2709310438U, // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4> - 3765652462U, // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3> - 3768970231U, // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3> - 2695891968U, // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3> - 3703260634U, // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4> - 3765652499U, // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4> - 2644117096U, // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2> - 2631509709U, // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4> - 2644117269U, // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4> - 3705251698U, // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7> - 2710047808U, // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4> - 3783863369U, // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4> - 2634827874U, // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4> - 2644117654U, // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2> - 3638797210U, // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4> - 3638798082U, // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3> - 2637482406U, // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4> - 2638146039U, // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4> - 3913287374U, // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5> - 3765652625U, // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4> - 3713878762U, // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4> - 2637482406U, // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4> - 1503264870U, // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS - 2577007514U, // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4> - 2577008232U, // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2> - 2571037175U, // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4> - 161926454U, // <4,4,4,4>: Cost 1 vdup0 RHS - 1570377014U, // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS - 2779680054U, // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS - 2594927963U, // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4> - 161926454U, // <4,4,4,u>: Cost 1 vdup0 RHS - 2571042918U, // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS - 2571043738U, // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4> - 3638814495U, // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5> - 2571045368U, // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5> - 2571046198U, // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS - 1839648054U, // <4,4,5,5>: Cost 2 vzipl RHS, RHS - 1618169142U, // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS - 2594936156U, // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5> - 1618169160U, // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS - 2553135206U, // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS - 3626877686U, // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2> - 2565080782U, // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5> - 2571053561U, // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6> - 2553138486U, // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS - 2241555675U, // <4,4,6,5>: Cost 3 vrev <4,4,5,6> - 1973865782U, // <4,4,6,6>: Cost 2 vtrnl RHS, RHS - 2658055029U, // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4> - 1973865800U, // <4,4,6,u>: Cost 2 vtrnl RHS, RHS - 2644120570U, // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2> - 3638829978U, // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4> - 3638830881U, // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7> - 3735115018U, // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4> - 2662036827U, // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4> - 2713292236U, // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4> - 2713365973U, // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4> - 2644121196U, // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7> - 2662036827U, // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4> - 1503297638U, // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS - 1570379566U, // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS - 2779682606U, // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS - 2571069947U, // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u> - 161926454U, // <4,4,u,4>: Cost 1 vdup0 RHS - 1841638710U, // <4,4,u,5>: Cost 2 vzipl RHS, RHS - 1618169385U, // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS - 2594960735U, // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u> - 161926454U, // <4,4,u,u>: Cost 1 vdup0 RHS - 2631516160U, // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0> - 1557774438U, // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS - 2618908875U, // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5> - 2571078140U, // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0> - 2626871634U, // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5> - 3705258414U, // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7> - 2594968438U, // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5> - 2594968928U, // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0> - 1557775005U, // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS - 2631516918U, // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2> - 2624217939U, // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5> - 2631517078U, // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0> - 2821341286U, // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS - 3895086054U, // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4> - 2626872471U, // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5> - 3895083131U, // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6> - 2718748368U, // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3> - 2821341291U, // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS - 2571092070U, // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS - 3699287585U, // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3> - 2630854269U, // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5> - 1557776078U, // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5> - 2631517974U, // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5> - 3692652384U, // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7> - 2631518138U, // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7> - 4164013366U, // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS - 1561094243U, // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5> - 2631518358U, // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2> - 3895084710U, // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1> - 2631518540U, // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4> - 2631518620U, // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3> - 2631518716U, // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0> - 2631518784U, // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5> - 2658060980U, // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4> - 2640145131U, // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5> - 2631519006U, // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2> - 2571108454U, // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS - 3632907342U, // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4> - 2571110094U, // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5> - 2571110912U, // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4> - 2571111734U, // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS - 1557777718U, // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS - 2645454195U, // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5> - 2718748614U, // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6> - 1557777961U, // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS - 1503346790U, // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS - 2913398480U, // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3> - 2631519998U, // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4> - 2577090710U, // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2> - 1503349978U, // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5> - 2631520260U, // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5> - 2913390690U, // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0> - 2821344566U, // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS - 1503352622U, // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS - 1497383014U, // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS - 2559181904U, // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6> - 2565154601U, // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6> - 1497385474U, // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6> - 1497386294U, // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS - 3047608324U, // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5> - 2571129656U, // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6> - 27705344U, // <4,5,6,7>: Cost 0 copy RHS - 27705344U, // <4,5,6,u>: Cost 0 copy RHS - 2565161062U, // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS - 2565161882U, // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4> - 2565162794U, // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7> - 2661381387U, // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5> - 2565164342U, // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS - 2718748840U, // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7> - 2718748846U, // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4> - 2719412407U, // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4> - 2565166894U, // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS - 1497399398U, // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS - 1557780270U, // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS - 2631522181U, // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, - 1497401860U, // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u> - 1497402678U, // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS - 1557780634U, // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS - 2631522512U, // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, - 27705344U, // <4,5,u,7>: Cost 0 copy RHS - 27705344U, // <4,5,u,u>: Cost 0 copy RHS - 2618916864U, // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0> - 1545175142U, // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS - 1545175244U, // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6> - 3692658940U, // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0> - 2618917202U, // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5> - 3852910806U, // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7> - 2253525648U, // <4,6,0,6>: Cost 3 vrev <6,4,6,0> - 4040764726U, // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS - 1545175709U, // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS - 2618917622U, // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2> - 2618917684U, // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1> - 2618917782U, // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0> - 2618917848U, // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3> - 3692659773U, // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5> - 2618918032U, // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7> - 3692659937U, // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7> - 4032146742U, // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS - 2618918253U, // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3> - 2618918380U, // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4> - 2618918460U, // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3> - 2618918504U, // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2> - 2618918566U, // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1> - 2618918679U, // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6> - 2618918788U, // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7> - 2618918842U, // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7> - 2718749178U, // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3> - 2618918971U, // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1> - 2618919062U, // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2> - 2636171526U, // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6> - 3692661057U, // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2> - 2618919324U, // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3> - 2618919426U, // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6> - 2638826058U, // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6> - 3913303030U, // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6> - 2722730572U, // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4> - 2618919710U, // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2> - 2565210214U, // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS - 2718749286U, // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3> - 2565211952U, // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4> - 2571184649U, // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4> - 2565213494U, // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS - 1545178422U, // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS - 1705430326U, // <4,6,4,6>: Cost 2 vuzpl RHS, RHS - 2595075437U, // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4> - 1545178665U, // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS - 2565218406U, // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS - 2645462736U, // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3> - 2913399290U, // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3> - 3913305394U, // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3> - 2645462982U, // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6> - 2779172868U, // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5> - 2913391416U, // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6> - 2821426486U, // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS - 2821426487U, // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS - 1503428710U, // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS - 2577171190U, // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2> - 2645463546U, // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3> - 2577172630U, // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2> - 1503431908U, // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6> - 2253501069U, // <4,6,6,5>: Cost 3 vrev <6,4,5,6> - 2618921784U, // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6> - 2954464566U, // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS - 1503434542U, // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS - 2645464058U, // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2> - 2779173882U, // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2> - 3638978355U, // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7> - 2725090156U, // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4> - 2645464422U, // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6> - 2779174246U, // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6> - 3852915914U, // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3> - 2779174508U, // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7> - 2779173945U, // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2> - 1503445094U, // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS - 1545180974U, // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS - 1705432878U, // <4,6,u,2>: Cost 2 vuzpl RHS, LHS - 2618922940U, // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, - 1503448294U, // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u> - 1545181338U, // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS - 1705433242U, // <4,6,u,6>: Cost 2 vuzpl RHS, RHS - 2954480950U, // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS - 1545181541U, // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS - 3706601472U, // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0> - 2632859750U, // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS - 2726343685U, // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4> - 3701293312U, // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4> - 3706601810U, // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5> - 2259424608U, // <4,7,0,5>: Cost 3 vrev <7,4,5,0> - 3695321617U, // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7> - 3800454194U, // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4> - 2632860317U, // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS - 2259064116U, // <4,7,1,0>: Cost 3 vrev <7,4,0,1> - 3700630324U, // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1> - 2632860570U, // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4> - 3769635936U, // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5> - 3656920374U, // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS - 3700630681U, // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7> - 3701294314U, // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7> - 3793818754U, // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3> - 2259654012U, // <4,7,1,u>: Cost 3 vrev <7,4,u,1> - 3656925286U, // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS - 3706603050U, // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3> - 3706603112U, // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2> - 2727744688U, // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4> - 3705939745U, // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7> - 2632861554U, // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7> - 3706603450U, // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7> - 3792491731U, // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3> - 2634852453U, // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7> - 3706603670U, // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2> - 3662906266U, // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4> - 3725183326U, // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4> - 3706603932U, // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3> - 3701295618U, // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6> - 2638834251U, // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7> - 2639497884U, // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7> - 3802445093U, // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4> - 2640825150U, // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7> - 2718750004U, // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1> - 3706604490U, // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3> - 3656943474U, // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7> - 3779884371U, // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5> - 2259383643U, // <4,7,4,4>: Cost 3 vrev <7,4,4,4> - 2632863030U, // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS - 2259531117U, // <4,7,4,6>: Cost 3 vrev <7,4,6,4> - 3907340074U, // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7> - 2632863273U, // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS - 2913391610U, // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2> - 3645006848U, // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7> - 2589181646U, // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5> - 3645008403U, // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5> - 2913391974U, // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6> - 2583211973U, // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5> - 2589184670U, // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5> - 2913392236U, // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7> - 2913392258U, // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2> - 1509474406U, // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS - 3047609338U, // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2> - 2583217768U, // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2> - 2583218326U, // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2> - 1509477686U, // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS - 1509478342U, // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6> - 2583220730U, // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3> - 3047609964U, // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7> - 1509480238U, // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS - 3650994278U, // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS - 3650995098U, // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4> - 3650996010U, // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7> - 3804804677U, // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4> - 3650997486U, // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7> - 2662725039U, // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7> - 3662942880U, // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7> - 2718750316U, // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7> - 2664715938U, // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7> - 1509490790U, // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS - 2632865582U, // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS - 2583234152U, // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2> - 2583234710U, // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2> - 1509494070U, // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS - 1509494728U, // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u> - 2583237114U, // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3> - 3047757420U, // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7> - 1509496622U, // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS - 2618933248U, // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0> - 1545191526U, // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS - 1545191630U, // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u> - 2691913445U, // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, - 2618933586U, // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5> - 2265397305U, // <4,u,0,5>: Cost 3 vrev - 2595189625U, // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u> - 2595190139U, // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0> - 1545192093U, // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS - 2618934006U, // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2> - 2618934068U, // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1> - 1618171694U, // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS - 2618934232U, // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3> - 2695894848U, // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, - 2618934416U, // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7> - 3692676321U, // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7> - 2718750555U, // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, - 1618171748U, // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS - 2553397350U, // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS - 2630215215U, // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u> - 2618934888U, // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2> - 1557800657U, // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u> - 2618935065U, // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u> - 2733864859U, // <4,u,2,5>: Cost 3 vext3 , - 2618935226U, // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7> - 2718750636U, // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, - 1561118822U, // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u> - 2618935446U, // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2> - 2779318422U, // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2> - 2636851545U, // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u> - 2618935708U, // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3> - 2618935810U, // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6> - 2691913711U, // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, - 2588725862U, // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3> - 2640169710U, // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u> - 2618936094U, // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2> - 1503559782U, // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS - 2692282391U, // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, - 2565359426U, // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4> - 2571332123U, // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4> - 161926454U, // <4,u,4,4>: Cost 1 vdup0 RHS - 1545194806U, // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS - 1705577782U, // <4,u,4,6>: Cost 2 vuzpl RHS, RHS - 2718750801U, // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, - 161926454U, // <4,u,4,u>: Cost 1 vdup0 RHS - 1479164006U, // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS - 1839650606U, // <4,u,5,1>: Cost 2 vzipl RHS, LHS - 2565367502U, // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5> - 3089777309U, // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS - 1479167286U, // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS - 1839650970U, // <4,u,5,5>: Cost 2 vzipl RHS, RHS - 1618172058U, // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS - 3089780265U, // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS - 1618172076U, // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS - 1479688294U, // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS - 2553430774U, // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2> - 1973868334U, // <4,u,6,2>: Cost 2 vtrnl RHS, LHS - 1497606685U, // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6> - 1479691574U, // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS - 1509552079U, // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6> - 1973868698U, // <4,u,6,6>: Cost 2 vtrnl RHS, RHS - 27705344U, // <4,u,6,7>: Cost 0 copy RHS - 27705344U, // <4,u,6,u>: Cost 0 copy RHS - 2565382246U, // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS - 2565383066U, // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4> - 2565384005U, // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7> - 2661405966U, // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u> - 2565385526U, // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS - 2779321702U, // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6> - 2589274793U, // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7> - 2779321964U, // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7> - 2565388078U, // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS - 1479704678U, // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS - 1545197358U, // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS - 1618172261U, // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS - 1497623071U, // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u> - 161926454U, // <4,u,u,4>: Cost 1 vdup0 RHS - 1545197722U, // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS - 1618172301U, // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS - 27705344U, // <4,u,u,7>: Cost 0 copy RHS - 27705344U, // <4,u,u,u>: Cost 0 copy RHS - 2687123456U, // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0> - 2687123466U, // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1> - 2687123476U, // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2> - 3710599434U, // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5> - 2642166098U, // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5> - 3657060306U, // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0> - 3292094923U, // <5,0,0,6>: Cost 4 vrev <0,5,6,0> - 3669005700U, // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0> - 2687123530U, // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2> - 2559434854U, // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS - 2559435887U, // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1> - 1613381734U, // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS - 3698656256U, // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7> - 2559438134U, // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS - 2583326675U, // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1> - 3715908851U, // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7> - 3657069562U, // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2> - 1613381788U, // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS - 2686017700U, // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2> - 2685796528U, // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5> - 2698625208U, // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4> - 2685944002U, // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5> - 2686017739U, // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5> - 2686091476U, // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5> - 2725167324U, // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4> - 2595280230U, // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6> - 2686312687U, // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5> - 3760128248U, // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5> - 3759685888U, // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4> - 2686533898U, // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5> - 3760349459U, // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5> - 2638187004U, // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0> - 3776348452U, // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4> - 3713256094U, // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0> - 3914064896U, // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7> - 2686976320U, // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5> - 2559459430U, // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS - 1613381970U, // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5> - 2687123804U, // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6> - 3761013092U, // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5> - 2559462710U, // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS - 2638187830U, // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS - 3761234303U, // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5> - 2646150600U, // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0> - 1613381970U, // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5> - 3766763926U, // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1> - 2919268454U, // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS - 3053486182U, // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS - 3723210589U, // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0> - 3766763966U, // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5> - 2650796031U, // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0> - 3719893090U, // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0> - 3914067254U, // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS - 2919269021U, // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS - 4047519744U, // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0> - 2920038502U, // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS - 3759759871U, // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7> - 3645164070U, // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6> - 3762414095U, // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5> - 3993780690U, // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7> - 3719893816U, // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6> - 2662077302U, // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5> - 2920039069U, // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS - 2565455974U, // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS - 2565456790U, // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0> - 2565457742U, // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7> - 3639199894U, // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2> - 2565459254U, // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS - 2589347938U, // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0> - 2589348530U, // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7> - 4188456422U, // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7> - 2565461806U, // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS - 2687124106U, // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2> - 1616036502U, // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5> - 1613382301U, // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS - 2689925800U, // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5> - 2687124146U, // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6> - 2638190746U, // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS - 2589356723U, // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u> - 2595280230U, // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6> - 1613382355U, // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS - 2646818816U, // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0> - 1573077094U, // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS - 2646818980U, // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2> - 2687124214U, // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2> - 2641510738U, // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5> - 2641510814U, // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0> - 3720561142U, // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7> - 3298141357U, // <5,1,0,7>: Cost 4 vrev <1,5,7,0> - 1573077661U, // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS - 2223891567U, // <5,1,1,0>: Cost 3 vrev <1,5,0,1> - 2687124276U, // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1> - 2646819734U, // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0> - 2687124296U, // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3> - 2691326803U, // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5> - 2691400540U, // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5> - 3765216101U, // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5> - 3765289838U, // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5> - 2687124341U, // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3> - 3297641584U, // <5,1,2,0>: Cost 4 vrev <1,5,0,2> - 3763520391U, // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3> - 2646820456U, // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2> - 2687124374U, // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0> - 2691990436U, // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5> - 2687124395U, // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3> - 2646820794U, // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7> - 3808199610U, // <5,1,2,7>: Cost 4 vext3 , <1,2,7,0> - 2687124419U, // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0> - 2577440870U, // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS - 2687124440U, // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3> - 3759686627U, // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5> - 2692580332U, // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5> - 2687124469U, // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5> - 2685207552U, // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7> - 3760866313U, // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7> - 2692875280U, // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5> - 2687124503U, // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3> - 1567771538U, // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1> - 2693096491U, // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5> - 2693170228U, // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5> - 2687124541U, // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5> - 2646822096U, // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4> - 1573080374U, // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS - 2646822260U, // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6> - 3298174129U, // <5,1,4,7>: Cost 4 vrev <1,5,7,4> - 1573080602U, // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1> - 2687124591U, // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1> - 2646822543U, // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1> - 3760866433U, // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1> - 2687124624U, // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7> - 2687124631U, // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5> - 2646822916U, // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5> - 2646823010U, // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0> - 2646823080U, // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7> - 2687124663U, // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1> - 2553577574U, // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS - 3763520719U, // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7> - 2646823418U, // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3> - 3760866529U, // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7> - 2553580854U, // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS - 2687124723U, // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7> - 2646823736U, // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6> - 2646823758U, // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1> - 2646823839U, // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1> - 2559557734U, // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS - 2559558452U, // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1> - 2571503270U, // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1> - 2040971366U, // <5,1,7,3>: Cost 2 vtrnr RHS, LHS - 2559561014U, // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS - 2595393232U, // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3> - 4188455035U, // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6> - 2646824556U, // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7> - 2040971371U, // <5,1,7,u>: Cost 2 vtrnr RHS, LHS - 1591662326U, // <5,1,u,0>: Cost 2 vext2 , - 1573082926U, // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS - 2695824760U, // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5> - 2040979558U, // <5,1,u,3>: Cost 2 vtrnr RHS, LHS - 2687124874U, // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5> - 1573083290U, // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS - 2646825168U, // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, - 2646825216U, // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, - 2040979563U, // <5,1,u,u>: Cost 2 vtrnr RHS, LHS - 3702652928U, // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0> - 2628911206U, // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS - 2641518756U, // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2> - 3759760847U, // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2> - 3760866775U, // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1> - 3759539680U, // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1> - 3760866796U, // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4> - 3304114054U, // <5,2,0,7>: Cost 4 vrev <2,5,7,0> - 2628911773U, // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS - 2623603464U, // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2> - 3698008921U, // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2> - 3633325603U, // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5> - 2687125027U, // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5> - 3633327414U, // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS - 3759539760U, // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0> - 3760866876U, // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3> - 3304122247U, // <5,2,1,7>: Cost 4 vrev <2,5,7,1> - 2687125072U, // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5> - 3633332326U, // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS - 3759760992U, // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3> - 2687125096U, // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2> - 2687125106U, // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3> - 2697963133U, // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5> - 3759466120U, // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7> - 3760866960U, // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6> - 3771926168U, // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5> - 2687125151U, // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3> - 2687125158U, // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1> - 2698405555U, // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5> - 2577516238U, // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5> - 3759687365U, // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5> - 1624884942U, // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5> - 2698700503U, // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5> - 3772368608U, // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5> - 3702655716U, // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7> - 1625179890U, // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5> - 2641521555U, // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2> - 3772368642U, // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3> - 2699142925U, // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5> - 2698626838U, // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5> - 2698626848U, // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6> - 2628914486U, // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS - 2645503353U, // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2> - 3304146826U, // <5,2,4,7>: Cost 4 vrev <2,5,7,4> - 2628914729U, // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS - 2553643110U, // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS - 3758950227U, // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3> - 3759761248U, // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7> - 2982396006U, // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS - 2553646390U, // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS - 2553647108U, // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5> - 3760867204U, // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7> - 3702657141U, // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1> - 2982396011U, // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS - 3627393126U, // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS - 3760867236U, // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3> - 2645504506U, // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3> - 2687125434U, // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7> - 2700617665U, // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5> - 3760867276U, // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7> - 3763521493U, // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7> - 3719246670U, // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1> - 2687125479U, // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7> - 2565603430U, // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS - 2553660150U, // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2> - 2565605216U, // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7> - 2961178726U, // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS - 2565606710U, // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS - 4034920552U, // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5> - 3114713292U, // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6> - 3702658668U, // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7> - 2961178731U, // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS - 2687125563U, // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1> - 2628917038U, // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS - 2565613409U, // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u> - 2687125592U, // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3> - 1628203107U, // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5> - 2628917402U, // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS - 2702092405U, // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5> - 3304179598U, // <5,2,u,7>: Cost 4 vrev <2,5,7,u> - 1628498055U, // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5> - 3760867467U, // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0> - 2687125654U, // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2> - 3759761565U, // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0> - 3633391766U, // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2> - 2687125680U, // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1> - 3760277690U, // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2> - 3310013014U, // <5,3,0,6>: Cost 4 vrev <3,5,6,0> - 2236344927U, // <5,3,0,7>: Cost 3 vrev <3,5,7,0> - 2687125717U, // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2> - 3760867551U, // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3> - 3760867558U, // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1> - 2624938923U, // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3> - 2703198460U, // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5> - 3760867587U, // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3> - 2636219536U, // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7> - 3698681075U, // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7> - 2703493408U, // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5> - 2628920721U, // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3> - 3766765870U, // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1> - 3698681379U, // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5> - 3760867649U, // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2> - 2698627404U, // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4> - 2703935830U, // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5> - 2698627422U, // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4> - 3760867686U, // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3> - 3769788783U, // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3> - 2701945209U, // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4> - 3760867711U, // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1> - 2636220684U, // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3> - 3772369298U, // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2> - 2687125916U, // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3> - 2704599463U, // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5> - 2704673200U, // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5> - 3709962935U, // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7> - 3772369346U, // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5> - 2704894411U, // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5> - 2704968148U, // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5> - 3698682850U, // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0> - 2642857014U, // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3> - 2705189359U, // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5> - 2705263096U, // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5> - 2685946370U, // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6> - 3779152394U, // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5> - 2236377699U, // <5,3,4,7>: Cost 3 vrev <3,5,7,4> - 2687126045U, // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6> - 2571632742U, // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS - 2559689870U, // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5> - 2571634382U, // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5> - 2571635264U, // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5> - 2571636022U, // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS - 2559692804U, // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5> - 3720581218U, // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0> - 2236385892U, // <5,3,5,7>: Cost 3 vrev <3,5,7,5> - 2571638574U, // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS - 2565668966U, // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS - 3633439887U, // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6> - 2565670760U, // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6> - 2565671426U, // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6> - 2565672246U, // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS - 3639414630U, // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0> - 4047521640U, // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6> - 2725169844U, // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4> - 2565674798U, // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS - 1485963366U, // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS - 1485964432U, // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7> - 2559706728U, // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2> - 2559707286U, // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2> - 1485966646U, // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS - 2559708880U, // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3> - 2601513466U, // <5,3,7,6>: Cost 3 vext1 , <6,2,7,3> - 3114714112U, // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7> - 1485969198U, // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS - 1485971558U, // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS - 1485972625U, // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u> - 2559714920U, // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2> - 2559715478U, // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2> - 1485974838U, // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS - 2687126342U, // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6> - 2601521658U, // <5,3,u,6>: Cost 3 vext1 , <6,2,7,3> - 2236410471U, // <5,3,u,7>: Cost 3 vrev <3,5,7,u> - 1485977390U, // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS - 3627491430U, // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS - 2636890214U, // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS - 3703333028U, // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2> - 3782249348U, // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5> - 2642198866U, // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5> - 2687126418U, // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1> - 2242243887U, // <5,4,0,6>: Cost 3 vrev <4,5,6,0> - 3316059448U, // <5,4,0,7>: Cost 4 vrev <4,5,7,0> - 2636890781U, // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS - 2241809658U, // <5,4,1,0>: Cost 3 vrev <4,5,0,1> - 3698025307U, // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4> - 3698688940U, // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4> - 3698689024U, // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7> - 3700016206U, // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4> - 2687126498U, // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0> - 3760868336U, // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5> - 3316067641U, // <5,4,1,7>: Cost 4 vrev <4,5,7,1> - 2242399554U, // <5,4,1,u>: Cost 3 vrev <4,5,u,1> - 3703334371U, // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4> - 3703998004U, // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4> - 3704661637U, // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4> - 2636891854U, // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5> - 3705988903U, // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4> - 2698628150U, // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3> - 3760868415U, // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3> - 3783871562U, // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5> - 2666752099U, // <5,4,2,u>: Cost 3 vext2 , <2,u,4,5> - 3639459942U, // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS - 3709970701U, // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4> - 2636892510U, // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4> - 3710634396U, // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3> - 2638219776U, // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4> - 3766987908U, // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0> - 2710719634U, // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5> - 3914097664U, // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7> - 2640874308U, // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4> - 2583642214U, // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS - 2642201574U, // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4> - 3710635062U, // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3> - 3717270664U, // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4> - 2713963728U, // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4> - 1637567706U, // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5> - 2242276659U, // <5,4,4,6>: Cost 3 vrev <4,5,6,4> - 2646183372U, // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4> - 1637788917U, // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5> - 2559762534U, // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS - 2559763607U, // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5> - 2698628366U, // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3> - 3633506454U, // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2> - 2559765814U, // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS - 2583654395U, // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5> - 1613385014U, // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS - 3901639990U, // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS - 1613385032U, // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS - 2559770726U, // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS - 2559771648U, // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7> - 3633514088U, // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2> - 2571717122U, // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6> - 2559774006U, // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS - 2712636796U, // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5> - 3760868743U, // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7> - 2712784270U, // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5> - 2559776558U, // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS - 2565750886U, // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS - 2565751706U, // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4> - 2565752690U, // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7> - 2571725387U, // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7> - 2565754166U, // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS - 3114713426U, // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5> - 94817590U, // <5,4,7,6>: Cost 1 vrev RHS - 2595616175U, // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7> - 94965064U, // <5,4,7,u>: Cost 1 vrev RHS - 2559787110U, // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS - 2559788186U, // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u> - 2242014483U, // <5,4,u,2>: Cost 3 vrev <4,5,2,u> - 2667419628U, // <5,4,u,3>: Cost 3 vext2 , - 2559790390U, // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS - 1640222238U, // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5> - 94825783U, // <5,4,u,6>: Cost 1 vrev RHS - 2714111536U, // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5> - 94973257U, // <5,4,u,u>: Cost 1 vrev RHS - 2646851584U, // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0> - 1573109862U, // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS - 2646851748U, // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2> - 3760279130U, // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2> - 2687127138U, // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1> - 2248142847U, // <5,5,0,5>: Cost 3 vrev <5,5,5,0> - 3720593910U, // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7> - 4182502710U, // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS - 1573110429U, // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS - 2646852342U, // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2> - 2624291676U, // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5> - 2646852502U, // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0> - 2646852568U, // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3> - 2715217591U, // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5> - 2628936848U, // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7> - 3698033907U, // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7> - 2713964240U, // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3> - 2628937107U, // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5> - 3645497446U, // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS - 3760869099U, // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3> - 2646853224U, // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2> - 2698628862U, // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4> - 3772370694U, // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3> - 2713964303U, // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3> - 2646853562U, // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7> - 4038198272U, // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7> - 2701946667U, // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4> - 2646853782U, // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2> - 3698034922U, // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5> - 3702679919U, // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3> - 2637564336U, // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5> - 2646854146U, // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6> - 2638891602U, // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5> - 3702680247U, // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7> - 3702680259U, // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1> - 2646854430U, // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2> - 2646854546U, // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1> - 2642209767U, // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5> - 3711306806U, // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3> - 3645516369U, // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4> - 1570458842U, // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5> - 1573113142U, // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS - 2645527932U, // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5> - 2713964486U, // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6> - 1573113374U, // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5> - 1509982310U, // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS - 2646855376U, // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3> - 2583725672U, // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2> - 2583726230U, // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2> - 1509985590U, // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS - 229035318U, // <5,5,5,5>: Cost 1 vdup1 RHS - 2646855778U, // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0> - 2646855848U, // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7> - 229035318U, // <5,5,5,u>: Cost 1 vdup1 RHS - 2577760358U, // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS - 3633587361U, // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6> - 2646856186U, // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3> - 3633588738U, // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6> - 2718535756U, // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5> - 2644202223U, // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5> - 2973780482U, // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6> - 2646856526U, // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1> - 2646856607U, // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1> - 2571796582U, // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS - 3633595392U, // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7> - 2571798222U, // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5> - 2571799124U, // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7> - 2571799862U, // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS - 3114717188U, // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5> - 4034923010U, // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6> - 2040974646U, // <5,5,7,7>: Cost 2 vtrnr RHS, RHS - 2040974647U, // <5,5,7,u>: Cost 2 vtrnr RHS, RHS - 1509982310U, // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS - 1573115694U, // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS - 2571806414U, // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5> - 2571807317U, // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u> - 1509985590U, // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS - 229035318U, // <5,5,u,5>: Cost 1 vdup1 RHS - 2646857936U, // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, - 2040982838U, // <5,5,u,7>: Cost 2 vtrnr RHS, RHS - 229035318U, // <5,5,u,u>: Cost 1 vdup1 RHS - 2638233600U, // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0> - 1564491878U, // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS - 2632261796U, // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2> - 2638233856U, // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4> - 2638233938U, // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5> - 3706003885U, // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6> - 3706003967U, // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7> - 4047473974U, // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS - 1564492445U, // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS - 2638234358U, // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2> - 2638234420U, // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1> - 2638234518U, // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0> - 2638234584U, // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3> - 2626290768U, // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6> - 2638234768U, // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7> - 3700032719U, // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7> - 2982366518U, // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS - 2628945300U, // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6> - 3706004925U, // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2> - 3711976966U, // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3> - 2638235240U, // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2> - 2638235302U, // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1> - 2632263465U, // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6> - 2638235496U, // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6> - 2638235578U, // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7> - 2713965050U, // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3> - 2634917997U, // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6> - 2638235798U, // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2> - 3711977695U, // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3> - 3710650720U, // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6> - 2638236060U, // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3> - 1564494338U, // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6> - 2638236234U, // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6> - 3711978104U, // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7> - 4034227510U, // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS - 1567148870U, // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6> - 2577817702U, // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS - 3700034544U, // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5> - 2723033713U, // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5> - 2638236818U, // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5> - 2644208859U, // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6> - 1564495158U, // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS - 2645536125U, // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6> - 2723402398U, // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5> - 1564495401U, // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS - 2577825894U, // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS - 2662125264U, // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3> - 3775836867U, // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6> - 3711979343U, // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4> - 2650181556U, // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6> - 2662125572U, // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5> - 2638237732U, // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1> - 2982399286U, // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS - 2982399287U, // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS - 2583806054U, // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS - 3711979910U, // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4> - 2662126074U, // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3> - 2583808514U, // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6> - 2583809334U, // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS - 2583810062U, // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6> - 2638238520U, // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6> - 2973781302U, // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS - 2973781303U, // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS - 430358630U, // <5,6,7,0>: Cost 1 vext1 RHS, LHS - 1504101110U, // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2> - 1504101992U, // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2> - 1504102550U, // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2> - 430361910U, // <5,6,7,4>: Cost 1 vext1 RHS, RHS - 1504104390U, // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6> - 1504105272U, // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6> - 1504106092U, // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7> - 430364462U, // <5,6,7,u>: Cost 1 vext1 RHS, LHS - 430366822U, // <5,6,u,0>: Cost 1 vext1 RHS, LHS - 1564497710U, // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS - 1504110184U, // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2> - 1504110742U, // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2> - 430370103U, // <5,6,u,4>: Cost 1 vext1 RHS, RHS - 1564498074U, // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS - 1504113146U, // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3> - 1504113658U, // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2> - 430372654U, // <5,6,u,u>: Cost 1 vext1 RHS, LHS - 2625634304U, // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0> - 1551892582U, // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS - 2625634468U, // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2> - 2571889247U, // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0> - 2625634642U, // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5> - 2595778728U, // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7> - 3699376639U, // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7> - 2260235715U, // <5,7,0,7>: Cost 3 vrev <7,5,7,0> - 1551893149U, // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS - 2625635062U, // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2> - 2624308020U, // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1> - 2625635222U, // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0> - 1551893504U, // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7> - 2571898166U, // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS - 2625635472U, // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7> - 2627626227U, // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7> - 3702031684U, // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7> - 1555211669U, // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7> - 2629617126U, // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7> - 3699377670U, // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3> - 2625635944U, // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2> - 2625636006U, // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1> - 2632271658U, // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7> - 2625636201U, // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7> - 2625636282U, // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7> - 3708004381U, // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7> - 2625636411U, // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1> - 2625636502U, // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2> - 2625636604U, // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5> - 3699378478U, // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1> - 2625636764U, // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3> - 2625636866U, // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6> - 2625636959U, // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0> - 3699378808U, // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7> - 2640235254U, // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7> - 2625637150U, // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2> - 2571919462U, // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS - 2571920384U, // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7> - 3699379260U, // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0> - 2571922019U, // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4> - 2571922742U, // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS - 1551895862U, // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS - 2846277980U, // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6> - 2646207951U, // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7> - 1551896105U, // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS - 2583871590U, // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS - 2652180176U, // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3> - 2625638177U, // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3> - 2625638262U, // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7> - 2583874870U, // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS - 2846281732U, // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5> - 2651517015U, // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7> - 1772539190U, // <5,7,5,7>: Cost 2 vuzpr RHS, RHS - 1772539191U, // <5,7,5,u>: Cost 2 vuzpr RHS, RHS - 2846281826U, // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0> - 3699380615U, // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5> - 2846281108U, // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2> - 2589854210U, // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6> - 2846281830U, // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4> - 2725467658U, // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u> - 2846281076U, // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6> - 2846279610U, // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7> - 2846279611U, // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u> - 1510146150U, // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS - 2846282574U, // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1> - 2583889512U, // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2> - 2846281919U, // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3> - 1510149430U, // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS - 1510150168U, // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7> - 2583892474U, // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3> - 2625640044U, // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7> - 1510151982U, // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS - 1510154342U, // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS - 1551898414U, // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS - 2625640325U, // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, - 1772536477U, // <5,7,u,3>: Cost 2 vuzpr RHS, LHS - 1510157622U, // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS - 1551898778U, // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS - 2625640656U, // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, - 1772539433U, // <5,7,u,7>: Cost 2 vuzpr RHS, RHS - 1551898981U, // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS - 2625642496U, // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0> - 1551900774U, // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS - 2625642660U, // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2> - 2698630885U, // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, - 2687129325U, // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, - 2689783542U, // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, - 2266134675U, // <5,u,0,6>: Cost 3 vrev - 2595853772U, // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0> - 1551901341U, // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS - 2625643254U, // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2> - 2625643316U, // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1> - 1613387566U, // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS - 1551901697U, // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u> - 2626307154U, // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u> - 2689783622U, // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, - 2627634420U, // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u> - 2982366536U, // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS - 1613387620U, // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS - 2846286742U, // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0> - 2685796528U, // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5> - 2625644136U, // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2> - 2687129480U, // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, - 2632279851U, // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u> - 2625644394U, // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u> - 2625644474U, // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7> - 2713966508U, // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, - 2625644603U, // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1> - 2687129532U, // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, - 2636261649U, // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u> - 2636925282U, // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u> - 2625644956U, // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3> - 1564510724U, // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u> - 2625645160U, // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0> - 2734610422U, // <5,u,3,6>: Cost 3 vext3 , - 2640243447U, // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u> - 1567165256U, // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u> - 1567828889U, // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u> - 1661163546U, // <5,u,4,1>: Cost 2 vext3 , - 2734463012U, // <5,u,4,2>: Cost 3 vext3 , - 2698631212U, // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, - 1570458842U, // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5> - 1551904054U, // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS - 2846286172U, // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6> - 2646216144U, // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u> - 1551904297U, // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS - 1509982310U, // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS - 2560058555U, // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5> - 2698926194U, // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, - 2698631295U, // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, - 1509985590U, // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS - 229035318U, // <5,u,5,5>: Cost 1 vdup1 RHS - 1613387930U, // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS - 1772547382U, // <5,u,5,7>: Cost 2 vuzpr RHS, RHS - 229035318U, // <5,u,5,u>: Cost 1 vdup1 RHS - 2566037606U, // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS - 2920044334U, // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS - 2566039445U, // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6> - 2687129808U, // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, - 2566040886U, // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS - 2920044698U, // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS - 2846289268U, // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6> - 2973781320U, // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS - 2687129853U, // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, - 430506086U, // <5,u,7,0>: Cost 1 vext1 RHS, LHS - 1486333117U, // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7> - 1504249448U, // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2> - 2040971933U, // <5,u,7,3>: Cost 2 vtrnr RHS, LHS - 430509384U, // <5,u,7,4>: Cost 1 vext1 RHS, RHS - 1504251600U, // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3> - 118708378U, // <5,u,7,6>: Cost 1 vrev RHS - 2040974889U, // <5,u,7,7>: Cost 2 vtrnr RHS, RHS - 430511918U, // <5,u,7,u>: Cost 1 vext1 RHS, LHS - 430514278U, // <5,u,u,0>: Cost 1 vext1 RHS, LHS - 1551906606U, // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS - 1613388133U, // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS - 1772544669U, // <5,u,u,3>: Cost 2 vuzpr RHS, LHS - 430517577U, // <5,u,u,4>: Cost 1 vext1 RHS, RHS - 229035318U, // <5,u,u,5>: Cost 1 vdup1 RHS - 118716571U, // <5,u,u,6>: Cost 1 vrev RHS - 1772547625U, // <5,u,u,7>: Cost 2 vuzpr RHS, RHS - 430520110U, // <5,u,u,u>: Cost 1 vext1 RHS, LHS - 2686025728U, // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0> - 2686025738U, // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1> - 2686025748U, // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2> - 3779084320U, // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5> - 2642903388U, // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6> - 3657723939U, // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0> - 3926676514U, // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6> - 3926675786U, // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7> - 2686025802U, // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2> - 2566070374U, // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS - 3759767642U, // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0> - 1612284006U, // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS - 2583988738U, // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6> - 2566073654U, // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS - 2583990308U, // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1> - 2589963005U, // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1> - 2595935702U, // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1> - 1612284060U, // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS - 2686025892U, // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2> - 2685804721U, // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6> - 3759620282U, // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6> - 2705342658U, // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5> - 1612284108U, // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6> - 3706029956U, // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7> - 2686173406U, // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6> - 3651769338U, // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2> - 1612579056U, // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6> - 3706030230U, // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2> - 2705342720U, // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4> - 2705342730U, // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5> - 3706030492U, // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3> - 2644896258U, // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6> - 3718638154U, // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6> - 3729918619U, // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6> - 3926672384U, // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7> - 2705342784U, // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5> - 2687058250U, // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6> - 2686026066U, // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5> - 1613463900U, // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6> - 3761021285U, // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6> - 2687353198U, // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6> - 2632289590U, // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS - 2645560704U, // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0> - 2646224337U, // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0> - 1613906322U, // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6> - 3651788902U, // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS - 2687795620U, // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6> - 3761611181U, // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6> - 3723284326U, // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0> - 2646224838U, // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6> - 3718639630U, // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6> - 2652196962U, // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0> - 2852932918U, // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS - 2852932919U, // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS - 2852933730U, // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0> - 2925985894U, // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS - 3060203622U, // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS - 3718640178U, // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5> - 2656178832U, // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0> - 3725939378U, // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7> - 2657506098U, // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0> - 2619020110U, // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1> - 2925986461U, // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS - 2572091494U, // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS - 2572092310U, // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0> - 2980495524U, // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2> - 2572094072U, // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7> - 2572094774U, // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS - 4054238242U, // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5> - 3645837653U, // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0> - 4054239054U, // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7> - 2572097326U, // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS - 2686026378U, // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2> - 2686026386U, // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1> - 1612284573U, // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS - 2705343144U, // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5> - 1616265906U, // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6> - 2632292506U, // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS - 2590020356U, // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u> - 2852933161U, // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS - 1612284627U, // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS - 2595995750U, // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS - 2646229094U, // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS - 3694092492U, // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6> - 2686026486U, // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2> - 2595999030U, // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS - 3767730952U, // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2> - 2596000590U, // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1> - 2596001246U, // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0> - 2686026531U, // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2> - 3763602219U, // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1> - 2686026548U, // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1> - 3764929346U, // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6> - 2686026568U, // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3> - 2691334996U, // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6> - 3760874332U, // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5> - 3765224294U, // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6> - 3669751263U, // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1> - 2686026613U, // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3> - 2554208358U, // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS - 3763602311U, // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3> - 3639895971U, // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2> - 2686026646U, // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0> - 2554211638U, // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS - 3760874411U, // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3> - 2554212858U, // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3> - 3802973114U, // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0> - 2686026691U, // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0> - 2566160486U, // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS - 2686026712U, // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3> - 2686026724U, // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6> - 3759768552U, // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1> - 2692662262U, // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6> - 2686026752U, // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7> - 2590053128U, // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3> - 3663795194U, // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2> - 2686026775U, // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3> - 2641587099U, // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1> - 2693104684U, // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6> - 3639912357U, // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4> - 2687206462U, // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6> - 3633941814U, // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS - 2693399632U, // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6> - 3765077075U, // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0> - 2646232530U, // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1> - 2687206507U, // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6> - 2647559796U, // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1> - 3765077118U, // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7> - 3767583878U, // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6> - 2686026896U, // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7> - 2693989528U, // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6> - 3767805089U, // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6> - 2652868706U, // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0> - 3908250934U, // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS - 2686026941U, // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7> - 2554241126U, // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS - 3763602639U, // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7> - 3759547607U, // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6> - 3115221094U, // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS - 2554244406U, // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS - 3760874739U, // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7> - 2554245944U, // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6> - 3719975758U, // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1> - 3115221099U, // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS - 2560221286U, // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS - 2560222415U, // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7> - 2980497558U, // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2> - 3103211622U, // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS - 2560224566U, // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS - 2980495698U, // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5> - 3633967526U, // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0> - 4054237686U, // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7> - 2560227118U, // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS - 2560229478U, // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS - 2686027117U, // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3> - 2686027129U, // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6> - 2686027132U, // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0> - 2687206795U, // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6> - 2686027157U, // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7> - 2590094093U, // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u> - 2596066790U, // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u> - 2686027177U, // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0> - 2646900736U, // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0> - 1573159014U, // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS - 2646900900U, // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2> - 3759769037U, // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0> - 2641592668U, // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6> - 3779085794U, // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3> - 2686027244U, // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4> - 3669816807U, // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0> - 1573159581U, // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS - 2230527897U, // <6,2,1,0>: Cost 3 vrev <2,6,0,1> - 2646901556U, // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1> - 2646901654U, // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0> - 2847047782U, // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS - 3771049517U, // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6> - 2646901904U, // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7> - 2686027324U, // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3> - 3669825000U, // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1> - 2231117793U, // <6,2,1,u>: Cost 3 vrev <2,6,u,1> - 3763603029U, // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1> - 3759769184U, // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3> - 2686027368U, // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2> - 2686027378U, // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3> - 2697971326U, // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6> - 3759769224U, // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7> - 2698118800U, // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6> - 3920794092U, // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7> - 2686027423U, // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3> - 2686027430U, // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1> - 3759769262U, // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0> - 2698487485U, // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6> - 2705344196U, // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4> - 2686027470U, // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5> - 2698708696U, // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6> - 2724660961U, // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6> - 2729232104U, // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4> - 2686027502U, // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1> - 1567853468U, // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2> - 3759769351U, // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u> - 2699151118U, // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6> - 2686027543U, // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6> - 2699298592U, // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6> - 1573162294U, // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS - 2686027564U, // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0> - 3719982547U, // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2> - 1573162532U, // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2> - 3779086154U, // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3> - 2646904528U, // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3> - 3759769440U, // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7> - 2699888488U, // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6> - 2230855617U, // <6,2,5,4>: Cost 3 vrev <2,6,4,5> - 2646904836U, // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5> - 2646904930U, // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0> - 2847051062U, // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS - 2700257173U, // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6> - 2687207321U, // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1> - 2686027684U, // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3> - 2566260656U, // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6> - 2685806522U, // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7> - 2687207361U, // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5> - 2686027724U, // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7> - 2646905656U, // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6> - 2646905678U, // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1> - 2686027751U, // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7> - 2554323046U, // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS - 2572239606U, // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2> - 2566268849U, // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7> - 1906753638U, // <6,2,7,3>: Cost 2 vzipr RHS, LHS - 2554326326U, // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS - 3304687564U, // <6,2,7,5>: Cost 4 vrev <2,6,5,7> - 2980495708U, // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6> - 2646906476U, // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7> - 1906753643U, // <6,2,7,u>: Cost 2 vzipr RHS, LHS - 1591744256U, // <6,2,u,0>: Cost 2 vext2 , - 1573164846U, // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS - 2701805650U, // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6> - 1906761830U, // <6,2,u,3>: Cost 2 vzipr RHS, LHS - 2686027875U, // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5> - 1573165210U, // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS - 2686322800U, // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0> - 2847051305U, // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS - 1906761835U, // <6,2,u,u>: Cost 2 vzipr RHS, LHS - 3759769739U, // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0> - 2686027926U, // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2> - 2686027937U, // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4> - 3640027286U, // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2> - 2687207601U, // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2> - 2705344698U, // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2> - 3663917847U, // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0> - 2237008560U, // <6,3,0,7>: Cost 3 vrev <3,6,7,0> - 2686027989U, // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2> - 3759769823U, // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3> - 3759769830U, // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1> - 3759769841U, // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3> - 3759769848U, // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1> - 2703280390U, // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6> - 3759769868U, // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3> - 3704063194U, // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0> - 3767732510U, // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3> - 2703280390U, // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6> - 3704063468U, // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4> - 2630321724U, // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3> - 3759769921U, // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2> - 3759769928U, // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0> - 3704063767U, // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6> - 3704063876U, // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7> - 2636957626U, // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7> - 3777907058U, // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6> - 2630321724U, // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3> - 3759769983U, // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1> - 3710036245U, // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3> - 2636958054U, // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3> - 2686028188U, // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3> - 2704607656U, // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6> - 3773041072U, // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5> - 3711363731U, // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7> - 3767732676U, // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7> - 2707999179U, // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5> - 2584232038U, // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS - 2642267118U, // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3> - 2642930751U, // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3> - 2705197552U, // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6> - 2584235318U, // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS - 1631603202U, // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6> - 2654211444U, // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6> - 2237041332U, // <6,3,4,7>: Cost 3 vrev <3,6,7,4> - 1631824413U, // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6> - 3640066150U, // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS - 3772746288U, // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7> - 3640067790U, // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5> - 3773041216U, // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5> - 2705934922U, // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6> - 3773041236U, // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7> - 3779086940U, // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6> - 3767732831U, // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0> - 2706229870U, // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6> - 2602164326U, // <6,3,6,0>: Cost 3 vext1 , LHS - 2654212512U, // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3> - 2566334393U, // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6> - 3704066588U, // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1> - 2602167524U, // <6,3,6,4>: Cost 3 vext1 , <4,4,6,6> - 3710702321U, // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7> - 2724661933U, // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6> - 3710702465U, // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7> - 2602170158U, // <6,3,6,u>: Cost 3 vext1 , LHS - 1492598886U, // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS - 2560369889U, // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7> - 1492600762U, // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7> - 2566342806U, // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2> - 1492602166U, // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS - 2602176208U, // <6,3,7,5>: Cost 3 vext1 , <5,1,7,3> - 2566345210U, // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3> - 2980496528U, // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7> - 1492604718U, // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS - 1492607078U, // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS - 2686028574U, // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2> - 1492608955U, // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u> - 2566350998U, // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2> - 1492610358U, // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS - 1634257734U, // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6> - 2566353489U, // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0> - 2980504720U, // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7> - 1492612910U, // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS - 3703406592U, // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0> - 2629664870U, // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS - 2629664972U, // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6> - 3779087232U, // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1> - 2642936156U, // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6> - 2712570770U, // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1> - 2687208348U, // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2> - 3316723081U, // <6,4,0,7>: Cost 4 vrev <4,6,7,0> - 2629665437U, // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS - 2242473291U, // <6,4,1,0>: Cost 3 vrev <4,6,0,1> - 3700089652U, // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1> - 3703407510U, // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0> - 2852962406U, // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS - 3628166454U, // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS - 3760876514U, // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0> - 2687208430U, // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3> - 3316731274U, // <6,4,1,7>: Cost 4 vrev <4,6,7,1> - 2243063187U, // <6,4,1,u>: Cost 3 vrev <4,6,u,1> - 2629666284U, // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4> - 3703408188U, // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3> - 3703408232U, // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2> - 3703408294U, // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1> - 2632320816U, // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4> - 2923384118U, // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS - 2687208508U, // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0> - 3760950341U, // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0> - 2634975348U, // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4> - 3703408790U, // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2> - 3316305238U, // <6,4,3,1>: Cost 4 vrev <4,6,1,3> - 3703408947U, // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6> - 3703409052U, // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3> - 2644929026U, // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6> - 3718670922U, // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6> - 2705345682U, // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5> - 3926705152U, // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7> - 2668817222U, // <6,4,3,u>: Cost 3 vext2 , <3,u,5,6> - 2590277734U, // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS - 3716017135U, // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4> - 2642938944U, // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4> - 3717344401U, // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4> - 2712571088U, // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4> - 2629668150U, // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS - 1637649636U, // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6> - 2646257109U, // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4> - 1637649636U, // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6> - 2566398054U, // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS - 3760876805U, // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3> - 2566399937U, // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5> - 2584316418U, // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6> - 2566401334U, // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS - 2584318028U, // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5> - 1612287286U, // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS - 2852965686U, // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS - 1612287304U, // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS - 1504608358U, // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS - 2578350838U, // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2> - 2578351720U, // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2> - 2578352278U, // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2> - 1504611638U, // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS - 2578353872U, // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3> - 2578354682U, // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3> - 2578355194U, // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2> - 1504614190U, // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS - 2572386406U, // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS - 2572387226U, // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4> - 3640157902U, // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5> - 2572389020U, // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7> - 2572389686U, // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS - 2980497102U, // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5> - 2980495564U, // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6> - 4054239090U, // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7> - 2572392238U, // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS - 1504608358U, // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS - 2629670702U, // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS - 2566424516U, // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u> - 2584340994U, // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6> - 1640156694U, // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6> - 2629671066U, // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS - 1612287529U, // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS - 2852965929U, // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS - 1612287547U, // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS - 3708723200U, // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0> - 2634981478U, // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS - 3694125260U, // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6> - 3779087962U, // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2> - 3760877154U, // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1> - 4195110916U, // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5> - 3696779775U, // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7> - 1175212130U, // <6,5,0,7>: Cost 2 vrev <5,6,7,0> - 1175285867U, // <6,5,0,u>: Cost 2 vrev <5,6,u,0> - 2248445988U, // <6,5,1,0>: Cost 3 vrev <5,6,0,1> - 3698107237U, // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5> - 3708724118U, // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0> - 3908575334U, // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS - 3716023376U, // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6> - 3708724368U, // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7> - 3767733960U, // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4> - 2712571600U, // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3> - 2712571609U, // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3> - 2578391142U, // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS - 3704079934U, // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5> - 3708724840U, // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2> - 3705407182U, // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5> - 2578394422U, // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS - 3717351272U, // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6> - 2634983354U, // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7> - 3115486518U, // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS - 2634983541U, // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5> - 3708725398U, // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2> - 3710052631U, // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5> - 3708725606U, // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3> - 3708725660U, // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3> - 2643610114U, // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6> - 3717352010U, // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6> - 3773632358U, // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0> - 2248978533U, // <6,5,3,7>: Cost 3 vrev <5,6,7,3> - 2249052270U, // <6,5,3,u>: Cost 3 vrev <5,6,u,3> - 2596323430U, // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS - 3716025328U, // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5> - 3716688961U, // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5> - 2643610770U, // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5> - 2596326710U, // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS - 2634984758U, // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS - 3767734199U, // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0> - 1643696070U, // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6> - 1643769807U, // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6> - 2578415718U, // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS - 3652158198U, // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2> - 3652159080U, // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2> - 3652159638U, // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2> - 2578418998U, // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS - 2712571908U, // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5> - 2718027790U, // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6> - 2712571928U, // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7> - 2712571937U, // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7> - 2705346596U, // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1> - 3767144496U, // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4> - 3773116473U, // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4> - 2705346626U, // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4> - 2705346636U, // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5> - 3908577217U, // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5> - 2578428728U, // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6> - 2712572002U, // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0> - 2705346668U, // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1> - 2560516198U, // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS - 2560517363U, // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7> - 2566490060U, // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7> - 3634260118U, // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2> - 2560519478U, // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS - 2980498650U, // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5> - 2980497922U, // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6> - 3103214902U, // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS - 2560522030U, // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS - 2560524390U, // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS - 2560525556U, // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u> - 2566498253U, // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u> - 2646931439U, // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, - 2560527670U, // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS - 2634987674U, // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS - 2980506114U, // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6> - 1175277674U, // <6,5,u,7>: Cost 2 vrev <5,6,7,u> - 1175351411U, // <6,5,u,u>: Cost 2 vrev <5,6,u,u> - 2578448486U, // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS - 1573191782U, // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS - 2686030124U, // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4> - 3779088690U, // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1> - 2687209788U, // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2> - 3652194000U, // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3> - 2254852914U, // <6,6,0,6>: Cost 3 vrev <6,6,6,0> - 4041575734U, // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS - 1573192349U, // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS - 2646934262U, // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2> - 2646934324U, // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1> - 2646934422U, // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0> - 2846785638U, // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS - 3760951694U, // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3> - 2646934672U, // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7> - 2712572320U, // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3> - 3775549865U, // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3> - 2846785643U, // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS - 3759772094U, // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6> - 3704751676U, // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3> - 2631009936U, // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6> - 2646935206U, // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1> - 3759772127U, // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3> - 3704752004U, // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7> - 2646935482U, // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7> - 2712572410U, // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3> - 2712572419U, // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3> - 2646935702U, // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2> - 3777024534U, // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4> - 3704752453U, // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6> - 2646935964U, // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3> - 2705347122U, // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5> - 3779678778U, // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4> - 2657553069U, // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6> - 4039609654U, // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS - 2708001366U, // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5> - 2578481254U, // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS - 3652223734U, // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2> - 3760951922U, // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6> - 3779089019U, // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6> - 1570540772U, // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6> - 1573195062U, // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS - 2712572560U, // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0> - 2723410591U, // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6> - 1573195304U, // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6> - 3640287334U, // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS - 2646937296U, // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3> - 3640289235U, // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5> - 3720679279U, // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0> - 2646937542U, // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6> - 2646937604U, // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5> - 2646937698U, // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0> - 2846788918U, // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS - 2846788919U, // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS - 1516699750U, // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS - 2590442230U, // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2> - 2646938106U, // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3> - 2590443670U, // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2> - 1516703030U, // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS - 2590445264U, // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3> - 296144182U, // <6,6,6,6>: Cost 1 vdup2 RHS - 2712572738U, // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7> - 296144182U, // <6,6,6,u>: Cost 1 vdup2 RHS - 2566561894U, // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS - 3634332924U, // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7> - 2566563797U, // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7> - 2584480258U, // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6> - 2566565174U, // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS - 2717438846U, // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4> - 2980500280U, // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6> - 1906756918U, // <6,6,7,7>: Cost 2 vzipr RHS, RHS - 1906756919U, // <6,6,7,u>: Cost 2 vzipr RHS, RHS - 1516699750U, // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS - 1573197614U, // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS - 2566571990U, // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u> - 2846786205U, // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS - 1516703030U, // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS - 1573197978U, // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS - 296144182U, // <6,6,u,6>: Cost 1 vdup2 RHS - 1906765110U, // <6,6,u,7>: Cost 2 vzipr RHS, RHS - 296144182U, // <6,6,u,u>: Cost 1 vdup2 RHS - 1571209216U, // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0> - 497467494U, // <6,7,0,1>: Cost 1 vext2 RHS, LHS - 1571209380U, // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2> - 2644951292U, // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0> - 1571209554U, // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5> - 1510756450U, // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0> - 2644951542U, // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7> - 2584499194U, // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2> - 497468061U, // <6,7,0,u>: Cost 1 vext2 RHS, LHS - 1571209974U, // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2> - 1571210036U, // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1> - 1571210134U, // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0> - 1571210200U, // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3> - 2644952098U, // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5> - 1571210384U, // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7> - 2644952271U, // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7> - 2578535418U, // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2> - 1571210605U, // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3> - 2644952509U, // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2> - 2644952582U, // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3> - 1571210856U, // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2> - 1571210918U, // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1> - 2644952828U, // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6> - 2633009028U, // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7> - 1571211194U, // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7> - 2668840938U, // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1> - 1571211323U, // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1> - 1571211414U, // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2> - 2644953311U, // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3> - 2644953390U, // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1> - 1571211676U, // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3> - 1571211778U, // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6> - 2644953648U, // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7> - 2644953720U, // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7> - 2644953795U, // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1> - 1571212062U, // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2> - 1573202834U, // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1> - 2644954058U, // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3> - 2644954166U, // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3> - 2644954258U, // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5> - 1571212496U, // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4> - 497470774U, // <6,7,4,5>: Cost 1 vext2 RHS, RHS - 1573203316U, // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6> - 2646281688U, // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7> - 497471017U, // <6,7,4,u>: Cost 1 vext2 RHS, RHS - 2644954696U, // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2> - 1573203664U, // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3> - 2644954878U, // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4> - 2644954991U, // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0> - 1571213254U, // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6> - 1571213316U, // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5> - 1571213410U, // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0> - 1573204136U, // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7> - 1573204217U, // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7> - 2644955425U, // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2> - 2644955561U, // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3> - 1573204474U, // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3> - 2644955698U, // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5> - 2644955789U, // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6> - 2644955889U, // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7> - 1571214136U, // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6> - 1571214158U, // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1> - 1573204895U, // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1> - 1573204986U, // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2> - 2572608656U, // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7> - 2644956362U, // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3> - 2572610231U, // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7> - 1573205350U, // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6> - 2646947220U, // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7> - 1516786498U, // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7> - 1571214956U, // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7> - 1573205634U, // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2> - 1571215059U, // <6,7,u,0>: Cost 2 vext2 RHS, - 497473326U, // <6,7,u,1>: Cost 1 vext2 RHS, LHS - 1571215237U, // <6,7,u,2>: Cost 2 vext2 RHS, - 1571215292U, // <6,7,u,3>: Cost 2 vext2 RHS, - 1571215423U, // <6,7,u,4>: Cost 2 vext2 RHS, - 497473690U, // <6,7,u,5>: Cost 1 vext2 RHS, RHS - 1571215568U, // <6,7,u,6>: Cost 2 vext2 RHS, - 1573206272U, // <6,7,u,7>: Cost 2 vext2 RHS, - 497473893U, // <6,7,u,u>: Cost 1 vext2 RHS, LHS - 1571217408U, // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0> - 497475686U, // <6,u,0,1>: Cost 1 vext2 RHS, LHS - 1571217572U, // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2> - 2689865445U, // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, - 1571217746U, // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5> - 1510830187U, // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0> - 2644959734U, // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7> - 1193130221U, // <6,u,0,7>: Cost 2 vrev - 497476253U, // <6,u,0,u>: Cost 1 vext2 RHS, LHS - 1571218166U, // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2> - 1571218228U, // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1> - 1612289838U, // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS - 1571218392U, // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3> - 2566663478U, // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS - 1571218576U, // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7> - 2644960463U, // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7> - 2717439835U, // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, - 1612289892U, // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS - 1504870502U, // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS - 2644960774U, // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3> - 1571219048U, // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2> - 1571219110U, // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1> - 1504873782U, // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS - 2633017221U, // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u> - 1571219386U, // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7> - 2712573868U, // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, - 1571219515U, // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1> - 1571219606U, // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2> - 2644961503U, // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3> - 2566678499U, // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3> - 1571219868U, // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3> - 1571219970U, // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6> - 2689865711U, // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, - 2708002806U, // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, - 2644961987U, // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1> - 1571220254U, // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2> - 1571220370U, // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1> - 2644962250U, // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3> - 1661245476U, // <6,u,4,2>: Cost 2 vext3 , - 2686031917U, // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, - 1571220688U, // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4> - 497478967U, // <6,u,4,5>: Cost 1 vext2 RHS, RHS - 1571220852U, // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6> - 1661614161U, // <6,u,4,7>: Cost 2 vext3 , - 497479209U, // <6,u,4,u>: Cost 1 vext2 RHS, RHS - 2566692966U, // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS - 1571221200U, // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3> - 2566694885U, // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5> - 2689865855U, // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, - 1571221446U, // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6> - 1571221508U, // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5> - 1612290202U, // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS - 1571221672U, // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7> - 1612290220U, // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS - 1504903270U, // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS - 2644963752U, // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2> - 1571222010U, // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3> - 2686032080U, // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, - 1504906550U, // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS - 2644964079U, // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5> - 296144182U, // <6,u,6,6>: Cost 1 vdup2 RHS - 1571222350U, // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1> - 296144182U, // <6,u,6,u>: Cost 1 vdup2 RHS - 1492967526U, // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS - 2560738574U, // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7> - 1492969447U, // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7> - 1906753692U, // <6,u,7,3>: Cost 2 vzipr RHS, LHS - 1492970806U, // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS - 2980495761U, // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5> - 1516860235U, // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7> - 1906756936U, // <6,u,7,7>: Cost 2 vzipr RHS, RHS - 1492973358U, // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS - 1492975718U, // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS - 497481518U, // <6,u,u,1>: Cost 1 vext2 RHS, LHS - 1612290405U, // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS - 1571223484U, // <6,u,u,3>: Cost 2 vext2 RHS, - 1492978998U, // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS - 497481882U, // <6,u,u,5>: Cost 1 vext2 RHS, RHS - 296144182U, // <6,u,u,6>: Cost 1 vdup2 RHS - 1906765128U, // <6,u,u,7>: Cost 2 vzipr RHS, RHS - 497482085U, // <6,u,u,u>: Cost 1 vext2 RHS, LHS - 1638318080U, // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0> - 1638318090U, // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1> - 1638318100U, // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2> - 3646442178U, // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0> - 2712059941U, // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1> - 2651603364U, // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6> - 2590618445U, // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0> - 3785801798U, // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7> - 1638318153U, // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1> - 1516879974U, // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS - 2693922911U, // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5> - 564576358U, // <7,0,1,2>: Cost 1 vext3 RHS, LHS - 2638996480U, // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7> - 1516883254U, // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS - 2649613456U, // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7> - 1516884814U, // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1> - 2590626808U, // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0> - 564576412U, // <7,0,1,u>: Cost 1 vext3 RHS, LHS - 1638318244U, // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2> - 2692743344U, // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5> - 2712060084U, // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0> - 2712060094U, // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1> - 1638318284U, // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6> - 2712060118U, // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7> - 2651604922U, // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7> - 2686255336U, // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7> - 1638318316U, // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2> - 2651605142U, // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2> - 2712060156U, // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0> - 2712060165U, // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0> - 2651605404U, // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3> - 2651605506U, // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6> - 2638998111U, // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0> - 2639661744U, // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0> - 3712740068U, // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7> - 2640989010U, // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0> - 2712060232U, // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4> - 1638318418U, // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5> - 1638318428U, // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6> - 3646474950U, // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4> - 2712060270U, // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6> - 1577864502U, // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS - 2651606388U, // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6> - 3787792776U, // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5> - 1638318481U, // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5> - 2590654566U, // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS - 2651606736U, // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3> - 2712060334U, // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7> - 2649616239U, // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0> - 2651606982U, // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6> - 2651607044U, // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5> - 1577865314U, // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0> - 2651607208U, // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7> - 1579192580U, // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0> - 2688393709U, // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7> - 2712060406U, // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7> - 2688541183U, // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7> - 2655588936U, // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0> - 3762430481U, // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7> - 2651607730U, // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7> - 2651607864U, // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6> - 2651607886U, // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1> - 2688983605U, // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7> - 2651608058U, // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2> - 2932703334U, // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS - 3066921062U, // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS - 3712742678U, // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7> - 2651608422U, // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6> - 2651608513U, // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7> - 2663552532U, // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0> - 2651608684U, // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7> - 2651608706U, // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2> - 1638318730U, // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2> - 1638318738U, // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1> - 564576925U, // <7,0,u,2>: Cost 1 vext3 RHS, LHS - 2572765898U, // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u> - 1638318770U, // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6> - 1577867418U, // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS - 1516942165U, // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u> - 2651609344U, // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, - 564576979U, // <7,0,u,u>: Cost 1 vext3 RHS, LHS - 2590687334U, // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS - 2639003750U, // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS - 2793357414U, // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS - 1638318838U, // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2> - 2590690614U, // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS - 2712060679U, // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1> - 2590692182U, // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0> - 3785802521U, // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1> - 1638318883U, // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2> - 2712060715U, // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1> - 1638318900U, // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1> - 3774300994U, // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6> - 1638318920U, // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3> - 2712060755U, // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5> - 2691416926U, // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7> - 2590700375U, // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1> - 3765158766U, // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5> - 1638318965U, // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3> - 2712060796U, // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1> - 2712060807U, // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3> - 3712747112U, // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2> - 1638318998U, // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0> - 2712060836U, // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5> - 2712060843U, // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3> - 2590708568U, // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2> - 2735948730U, // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0> - 1638319043U, // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0> - 2712060876U, // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0> - 1638319064U, // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3> - 2712060894U, // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0> - 2692596718U, // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7> - 2712060917U, // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5> - 1619002368U, // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7> - 2692817929U, // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7> - 2735948814U, // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3> - 1619223579U, // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7> - 2712060962U, // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5> - 2712060971U, // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5> - 2712060980U, // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5> - 2712060989U, // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5> - 3785802822U, // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5> - 2639007030U, // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS - 2645642634U, // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1> - 3719384520U, // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0> - 2639007273U, // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS - 2572812390U, // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS - 2693776510U, // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7> - 3774301318U, // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6> - 1620182160U, // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7> - 2572815670U, // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS - 3766486178U, // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7> - 2651615331U, // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1> - 2652278964U, // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1> - 1620550845U, // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7> - 3768108230U, // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7> - 2694440143U, // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7> - 2712061144U, // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7> - 2694587617U, // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7> - 3768403178U, // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7> - 2694735091U, // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7> - 3768550652U, // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7> - 2652279630U, // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1> - 2694956302U, // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7> - 2645644282U, // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2> - 2859062094U, // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1> - 3779462437U, // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3> - 3121938534U, // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS - 2554916150U, // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS - 3769140548U, // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7> - 3726022164U, // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0> - 2554918508U, // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7> - 3121938539U, // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS - 2572836966U, // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS - 1638319469U, // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3> - 2712061299U, // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0> - 1622173059U, // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7> - 2572840246U, // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS - 1622320533U, // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7> - 2696136094U, // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7> - 2859060777U, // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS - 1622541744U, // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7> - 2712061364U, // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2> - 2712061373U, // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2> - 2712061380U, // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0> - 2712061389U, // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0> - 2712061404U, // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6> - 2696725990U, // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7> - 2712061417U, // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1> - 3785803251U, // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2> - 2696947201U, // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7> - 2712061446U, // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3> - 3785803276U, // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0> - 3785803285U, // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0> - 2712061471U, // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1> - 2712061482U, // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3> - 3766486576U, // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0> - 2712061500U, // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3> - 2602718850U, // <7,2,1,7>: Cost 3 vext1 , <7,u,1,2> - 2712061516U, // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1> - 2712061525U, // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1> - 2712061536U, // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3> - 1638319720U, // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2> - 1638319730U, // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3> - 2712061565U, // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5> - 2698053256U, // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7> - 2712061584U, // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6> - 3771795096U, // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5> - 1638319775U, // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3> - 1638319782U, // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1> - 2693924531U, // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5> - 2700560061U, // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6> - 2693924551U, // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7> - 1638319822U, // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5> - 2698716889U, // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7> - 2712061665U, // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6> - 2735949540U, // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0> - 1638319854U, // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1> - 2712061692U, // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6> - 2712061698U, // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3> - 2712061708U, // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4> - 2712061718U, // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5> - 2712061728U, // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6> - 2699380522U, // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7> - 2712061740U, // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0> - 3809691445U, // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0> - 2699601733U, // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7> - 2699675470U, // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7> - 3766486867U, // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3> - 2699822944U, // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7> - 2692745065U, // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7> - 2699970418U, // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7> - 3766486907U, // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7> - 2700117892U, // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7> - 3771795334U, // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0> - 2692745110U, // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7> - 2572894310U, // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS - 2712061860U, // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3> - 2700486577U, // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7> - 1626818490U, // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7> - 2572897590U, // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS - 2700707788U, // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7> - 2700781525U, // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7> - 3774597086U, // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7> - 1627187175U, // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7> - 2735949802U, // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1> - 3780200434U, // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0> - 3773564928U, // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5> - 2986541158U, // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS - 2554989878U, // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS - 3775113245U, // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7> - 4060283228U, // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6> - 2554992236U, // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7> - 2986541163U, // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS - 1638320187U, // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1> - 2693924936U, // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5> - 1638319720U, // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2> - 1628145756U, // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7> - 1638320227U, // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5> - 2702035054U, // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7> - 2702108791U, // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7> - 2735949945U, // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0> - 1628514441U, // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7> - 2712062091U, // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0> - 1638320278U, // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2> - 2712062109U, // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0> - 2590836886U, // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2> - 2712062128U, // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1> - 2712062138U, // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2> - 2590839656U, // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0> - 3311414017U, // <7,3,0,7>: Cost 4 vrev <3,7,7,0> - 1638320341U, // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2> - 2237164227U, // <7,3,1,0>: Cost 3 vrev <3,7,0,1> - 2712062182U, // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1> - 2712062193U, // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3> - 2692745468U, // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5> - 2712062214U, // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6> - 2693925132U, // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3> - 3768183059U, // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1> - 2692745504U, // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5> - 2696063273U, // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5> - 2712062254U, // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1> - 2712062262U, // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0> - 2712062273U, // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2> - 2712062280U, // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0> - 2712062294U, // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5> - 2712062302U, // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4> - 2700560742U, // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3> - 2712062319U, // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3> - 2712062325U, // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0> - 2712062335U, // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1> - 2636368158U, // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3> - 2637031791U, // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3> - 1638320540U, // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3> - 2712062374U, // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4> - 2704689586U, // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7> - 2590864235U, // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3> - 2704837060U, // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7> - 1638320540U, // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3> - 2712062416U, // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1> - 2712062426U, // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2> - 2566981640U, // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4> - 2712062447U, // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5> - 2712062456U, // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5> - 1638320642U, // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6> - 2648313204U, // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6> - 3311446789U, // <7,3,4,7>: Cost 4 vrev <3,7,7,4> - 1638320669U, // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6> - 2602819686U, // <7,3,5,0>: Cost 3 vext1 , LHS - 1574571728U, // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3> - 2648977185U, // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3> - 2705869378U, // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7> - 2237491947U, // <7,3,5,4>: Cost 3 vrev <3,7,4,5> - 2706016852U, // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7> - 2648313954U, // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0> - 2692745823U, // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0> - 1579217159U, // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3> - 2706311800U, // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7> - 2654286249U, // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3> - 1581208058U, // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3> - 2706533011U, // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7> - 2706606748U, // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7> - 3780422309U, // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7> - 2712062637U, // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6> - 2706827959U, // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7> - 1585189856U, // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3> - 2693925571U, // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1> - 2693925584U, // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5> - 2700561114U, // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6> - 2572978916U, // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7> - 2693925611U, // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5> - 2707344118U, // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7> - 2654950894U, // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7> - 2648315500U, // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7> - 2693925643U, // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1> - 2237221578U, // <7,3,u,0>: Cost 3 vrev <3,7,0,u> - 1638320926U, // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2> - 1593153452U, // <7,3,u,2>: Cost 2 vext2 , - 1638320540U, // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3> - 2237516526U, // <7,3,u,4>: Cost 3 vrev <3,7,4,u> - 1638320966U, // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6> - 2712062796U, // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3> - 2692967250U, // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0> - 1638320989U, // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2> - 2651635712U, // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0> - 1577893990U, // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS - 2651635876U, // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2> - 3785804672U, // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1> - 2651636050U, // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5> - 1638468498U, // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1> - 1638468508U, // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2> - 3787795364U, // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1> - 1640459181U, // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1> - 2651636470U, // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2> - 2651636532U, // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1> - 2712062922U, // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3> - 2639029248U, // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7> - 2712062940U, // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3> - 2712062946U, // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0> - 2712062958U, // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3> - 3785804791U, // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3> - 2712062973U, // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0> - 3785804807U, // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1> - 3785804818U, // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3> - 2651637352U, // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2> - 2651637414U, // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1> - 3716753194U, // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7> - 2712063030U, // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3> - 2712063036U, // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0> - 3773123658U, // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5> - 2712063054U, // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0> - 2651637910U, // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2> - 3712772348U, // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5> - 3785804906U, // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1> - 2651638172U, // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3> - 2651638274U, // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6> - 2639030883U, // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4> - 2712063122U, // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5> - 3712772836U, // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7> - 2641021782U, // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4> - 2714053802U, // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2> - 3785804978U, // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1> - 3716754505U, // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4> - 3785804998U, // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3> - 1638321360U, // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4> - 1638468826U, // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5> - 1638468836U, // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6> - 3785215214U, // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7> - 1640459509U, // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5> - 1517207654U, // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS - 2573034640U, // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7> - 2712063246U, // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3> - 2573036267U, // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5> - 1517210934U, // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS - 2711989549U, // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7> - 564579638U, // <7,4,5,6>: Cost 1 vext3 RHS, RHS - 2651639976U, // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7> - 564579656U, // <7,4,5,u>: Cost 1 vext3 RHS, RHS - 2712063307U, // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1> - 3767668056U, // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5> - 2651640314U, // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3> - 2655621708U, // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4> - 1638468980U, // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6> - 2712063358U, // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7> - 2712063367U, // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7> - 2712210826U, // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1> - 1638469012U, // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2> - 2651640826U, // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2> - 3773713830U, // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2> - 3773713842U, // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5> - 3780349372U, // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6> - 2651641140U, // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1> - 2712210888U, // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0> - 2712210898U, // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1> - 2651641452U, // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7> - 2713538026U, // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7> - 1517232230U, // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS - 1577899822U, // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS - 2712063489U, // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3> - 2573060846U, // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u> - 1640312342U, // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6> - 1638469146U, // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1> - 564579881U, // <7,4,u,6>: Cost 1 vext3 RHS, RHS - 2714054192U, // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5> - 564579899U, // <7,4,u,u>: Cost 1 vext3 RHS, RHS - 2579038310U, // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS - 2636382310U, // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS - 2796339302U, // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS - 3646810719U, // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0> - 2712063586U, // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1> - 2735951467U, // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1> - 2735951476U, // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1> - 2579043322U, // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2> - 2636382877U, // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS - 2712211087U, // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1> - 3698180916U, // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1> - 3710124950U, // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0> - 2636383232U, // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7> - 2712211127U, // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5> - 2590994128U, // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3> - 2590995323U, // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1> - 1638469328U, // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3> - 1638469337U, // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3> - 3785805536U, // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1> - 3785805544U, // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0> - 3704817288U, // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7> - 2712063742U, // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4> - 3716761386U, // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7> - 2714054415U, // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3> - 3774304024U, // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3> - 2712063777U, // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3> - 2712063787U, // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4> - 3634888806U, // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS - 2636384544U, // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5> - 3710790001U, // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5> - 3710126492U, // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3> - 3634892086U, // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS - 2639039076U, // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5> - 3713444533U, // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5> - 2693926767U, // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0> - 2712063864U, // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0> - 2579071078U, // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS - 3646841856U, // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7> - 3716762698U, // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5> - 3646843491U, // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4> - 2579074358U, // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS - 2636385590U, // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS - 2645675406U, // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5> - 1638322118U, // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6> - 1638469583U, // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6> - 2714054611U, // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1> - 2652974800U, // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3> - 3710127905U, // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3> - 3785805808U, // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3> - 2712211450U, // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4> - 1638322180U, // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5> - 2712064014U, // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6> - 1638469656U, // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7> - 1638469665U, // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7> - 2712064036U, // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1> - 2714054707U, // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7> - 3785805879U, // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2> - 2712064066U, // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4> - 2712064076U, // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5> - 2714054743U, // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7> - 2712064096U, // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7> - 1638322274U, // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0> - 1638469739U, // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0> - 1511325798U, // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS - 2692747392U, // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3> - 2585069160U, // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2> - 2573126390U, // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7> - 1511329078U, // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS - 1638469800U, // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7> - 2712211626U, // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0> - 2712211636U, // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1> - 1638469823U, // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3> - 1511333990U, // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS - 2636388142U, // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS - 2712211671U, // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0> - 2573134583U, // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u> - 1511337270U, // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS - 1638469881U, // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7> - 2712064258U, // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7> - 1638469892U, // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0> - 1638469904U, // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3> - 2650324992U, // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0> - 1576583270U, // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS - 2712064300U, // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4> - 2255295336U, // <7,6,0,3>: Cost 3 vrev <6,7,3,0> - 2712064316U, // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2> - 2585088098U, // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0> - 2735952204U, // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0> - 2712211799U, // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2> - 1576583837U, // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS - 1181340494U, // <7,6,1,0>: Cost 2 vrev <6,7,0,1> - 2650325812U, // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1> - 2650325910U, // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0> - 2650325976U, // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3> - 2579123510U, // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS - 2650326160U, // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7> - 2714055072U, // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3> - 2712064425U, // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3> - 1181930390U, // <7,6,1,u>: Cost 2 vrev <6,7,u,1> - 2712211897U, // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1> - 2714055108U, // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3> - 2650326632U, // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2> - 2650326694U, // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1> - 2714055137U, // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5> - 2714055148U, // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7> - 2650326970U, // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7> - 1638470138U, // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3> - 1638470147U, // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3> - 2650327190U, // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2> - 2255172441U, // <7,6,3,1>: Cost 3 vrev <6,7,1,3> - 2255246178U, // <7,6,3,2>: Cost 3 vrev <6,7,2,3> - 2650327452U, // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3> - 2712064562U, // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5> - 2650327627U, // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7> - 3713452726U, // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6> - 2700563016U, // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0> - 2712064593U, // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0> - 2650327954U, // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1> - 2735952486U, // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3> - 2735952497U, // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5> - 2255328108U, // <7,6,4,3>: Cost 3 vrev <6,7,3,4> - 2712212100U, // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6> - 1576586550U, // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS - 2714055312U, // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0> - 2712212126U, // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5> - 1576586793U, // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS - 2579152998U, // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS - 2650328784U, // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3> - 2714055364U, // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7> - 3785806538U, // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4> - 1576587206U, // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6> - 2650329092U, // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5> - 2650329186U, // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0> - 2712064753U, // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7> - 1181963162U, // <7,6,5,u>: Cost 2 vrev <6,7,u,5> - 2714055421U, // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1> - 2714055432U, // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3> - 2650329594U, // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3> - 3785806619U, // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4> - 2712212260U, // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4> - 2714055472U, // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7> - 1638323000U, // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6> - 1638470466U, // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7> - 1638470475U, // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7> - 1638323022U, // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1> - 2712064854U, // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0> - 2712064865U, // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2> - 2712064872U, // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0> - 1638323062U, // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5> - 2712064894U, // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4> - 2712064905U, // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6> - 2712064915U, // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7> - 1638323094U, // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1> - 1638470559U, // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1> - 1576589102U, // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS - 2712212402U, // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2> - 2712212409U, // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0> - 1638470599U, // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5> - 1576589466U, // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS - 1638323000U, // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6> - 1638470624U, // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3> - 1638470631U, // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1> - 2712065007U, // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0> - 1638323194U, // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2> - 2712065025U, // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0> - 3646958337U, // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0> - 2712065044U, // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1> - 2585161907U, // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0> - 2591134604U, // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0> - 2591134714U, // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2> - 1638323257U, // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2> - 2712065091U, // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3> - 2712065098U, // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1> - 2712065109U, // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3> - 2692748384U, // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5> - 2585169206U, // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS - 2693928048U, // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3> - 2585170766U, // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1> - 2735953024U, // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1> - 2695918731U, // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3> - 3770471574U, // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5> - 3785807002U, // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0> - 2712065189U, // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2> - 2712065196U, // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0> - 3773125818U, // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5> - 3766490305U, // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3> - 2700563658U, // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3> - 2735953107U, // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3> - 2701890780U, // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3> - 2712065251U, // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1> - 3766490350U, // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3> - 3774305530U, // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6> - 2637728196U, // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7> - 2712065291U, // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5> - 2585186486U, // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3> - 2639719095U, // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7> - 2640382728U, // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7> - 2641046361U, // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7> - 2712212792U, // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5> - 3646989312U, // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7> - 3785807176U, // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3> - 3646991109U, // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4> - 2712065371U, // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4> - 1638323558U, // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6> - 2712212845U, // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4> - 2591167846U, // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6> - 1638323585U, // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6> - 2585198694U, // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS - 2712212884U, // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7> - 3711471393U, // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3> - 2649673590U, // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7> - 2712065455U, // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7> - 1577259032U, // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7> - 2712065473U, // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7> - 2712212936U, // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5> - 1579249931U, // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7> - 2591178854U, // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS - 2735953374U, // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0> - 2712212974U, // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7> - 2655646287U, // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7> - 2591182134U, // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS - 2656973553U, // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7> - 1583895362U, // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7> - 2712065556U, // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0> - 1585222628U, // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7> - 1523417190U, // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS - 2597159670U, // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2> - 2597160552U, // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2> - 2597161110U, // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2> - 1523420470U, // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS - 2651002296U, // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7> - 2657637906U, // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7> - 363253046U, // <7,7,7,7>: Cost 1 vdup3 RHS - 363253046U, // <7,7,7,u>: Cost 1 vdup3 RHS - 1523417190U, // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS - 1638471298U, // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2> - 2712213132U, // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3> - 2712213138U, // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0> - 1523420470U, // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS - 1638471338U, // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6> - 1595840756U, // <7,7,u,6>: Cost 2 vext2 , - 363253046U, // <7,7,u,7>: Cost 1 vdup3 RHS - 363253046U, // <7,7,u,u>: Cost 1 vdup3 RHS - 1638318080U, // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0> - 1638323923U, // <7,u,0,1>: Cost 2 vext3 RHS, - 1662211804U, // <7,u,0,2>: Cost 2 vext3 RHS, - 1638323941U, // <7,u,0,3>: Cost 2 vext3 RHS, - 2712065773U, // <7,u,0,4>: Cost 3 vext3 RHS, - 1662359286U, // <7,u,0,5>: Cost 2 vext3 RHS, - 1662359296U, // <7,u,0,6>: Cost 2 vext3 RHS, - 2987150664U, // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS - 1638323986U, // <7,u,0,u>: Cost 2 vext3 RHS, - 1517469798U, // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS - 1638318900U, // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1> - 564582190U, // <7,u,1,2>: Cost 1 vext3 RHS, LHS - 1638324023U, // <7,u,1,3>: Cost 2 vext3 RHS, - 1517473078U, // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS - 2693928777U, // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, - 1517474710U, // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1> - 1640462171U, // <7,u,1,7>: Cost 2 vext3 RHS, - 564582244U, // <7,u,1,u>: Cost 1 vext3 RHS, LHS - 1638318244U, // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2> - 2712065907U, // <7,u,2,1>: Cost 3 vext3 RHS, - 1638319720U, // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2> - 1638324101U, // <7,u,2,3>: Cost 2 vext3 RHS, - 1638318284U, // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6> - 2712065947U, // <7,u,2,5>: Cost 3 vext3 RHS, - 2700564387U, // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, - 1640314796U, // <7,u,2,7>: Cost 2 vext3 RHS, - 1638324146U, // <7,u,2,u>: Cost 2 vext3 RHS, - 1638324156U, // <7,u,3,0>: Cost 2 vext3 RHS, - 1638319064U, // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3> - 2700564435U, // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, - 1638320540U, // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3> - 1638324196U, // <7,u,3,4>: Cost 2 vext3 RHS, - 1638324207U, // <7,u,3,5>: Cost 2 vext3 RHS, - 2700564472U, // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, - 2695919610U, // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, - 1638324228U, // <7,u,3,u>: Cost 2 vext3 RHS, - 2712066061U, // <7,u,4,0>: Cost 3 vext3 RHS, - 1662212122U, // <7,u,4,1>: Cost 2 vext3 RHS, - 1662212132U, // <7,u,4,2>: Cost 2 vext3 RHS, - 2712066092U, // <7,u,4,3>: Cost 3 vext3 RHS, - 1638321360U, // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4> - 1638324287U, // <7,u,4,5>: Cost 2 vext3 RHS, - 1662359624U, // <7,u,4,6>: Cost 2 vext3 RHS, - 1640314961U, // <7,u,4,7>: Cost 2 vext3 RHS, - 1638324314U, // <7,u,4,u>: Cost 2 vext3 RHS, - 1517502566U, // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS - 1574612693U, // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u> - 2712066162U, // <7,u,5,2>: Cost 3 vext3 RHS, - 1638324351U, // <7,u,5,3>: Cost 2 vext3 RHS, - 1576603592U, // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u> - 1577267225U, // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u> - 564582554U, // <7,u,5,6>: Cost 1 vext3 RHS, RHS - 1640462499U, // <7,u,5,7>: Cost 2 vext3 RHS, - 564582572U, // <7,u,5,u>: Cost 1 vext3 RHS, RHS - 2712066223U, // <7,u,6,0>: Cost 3 vext3 RHS, - 2712066238U, // <7,u,6,1>: Cost 3 vext3 RHS, - 1581249023U, // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u> - 1638324432U, // <7,u,6,3>: Cost 2 vext3 RHS, - 1638468980U, // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6> - 2712066274U, // <7,u,6,5>: Cost 3 vext3 RHS, - 1583903555U, // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u> - 1640315117U, // <7,u,6,7>: Cost 2 vext3 RHS, - 1638324477U, // <7,u,6,u>: Cost 2 vext3 RHS, - 1638471936U, // <7,u,7,0>: Cost 2 vext3 RHS, - 2692970763U, // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, - 2700933399U, // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, - 2573347601U, // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7> - 1638471976U, // <7,u,7,4>: Cost 2 vext3 RHS, - 1511551171U, // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7> - 2712213815U, // <7,u,7,6>: Cost 3 vext3 RHS, - 363253046U, // <7,u,7,7>: Cost 1 vdup3 RHS - 363253046U, // <7,u,7,u>: Cost 1 vdup3 RHS - 1638324561U, // <7,u,u,0>: Cost 2 vext3 RHS, - 1638324571U, // <7,u,u,1>: Cost 2 vext3 RHS, - 564582757U, // <7,u,u,2>: Cost 1 vext3 RHS, LHS - 1638324587U, // <7,u,u,3>: Cost 2 vext3 RHS, - 1638324601U, // <7,u,u,4>: Cost 2 vext3 RHS, - 1638324611U, // <7,u,u,5>: Cost 2 vext3 RHS, - 564582797U, // <7,u,u,6>: Cost 1 vext3 RHS, RHS - 363253046U, // <7,u,u,7>: Cost 1 vdup3 RHS - 564582811U, // <7,u,u,u>: Cost 1 vext3 RHS, LHS - 135053414U, // : Cost 1 vdup0 LHS - 1611489290U, // : Cost 2 vext3 LHS, <0,0,1,1> - 1611489300U, // : Cost 2 vext3 LHS, <0,0,2,2> - 2568054923U, // : Cost 3 vext1 <3,0,0,0>, <3,0,0,0> - 1481706806U, // : Cost 2 vext1 <0,u,0,0>, RHS - 2555449040U, // : Cost 3 vext1 <0,u,0,0>, <5,1,7,3> - 2591282078U, // : Cost 3 vext1 <6,u,0,0>, <6,u,0,0> - 2591945711U, // : Cost 3 vext1 <7,0,0,0>, <7,0,0,0> - 135053414U, // : Cost 1 vdup0 LHS - 1493655654U, // : Cost 2 vext1 <2,u,0,1>, LHS - 1860550758U, // : Cost 2 vzipl LHS, LHS - 537747563U, // : Cost 1 vext3 LHS, LHS - 2625135576U, // : Cost 3 vext2 <1,2,u,0>, <1,3,1,3> - 1493658934U, // : Cost 2 vext1 <2,u,0,1>, RHS - 2625135760U, // : Cost 3 vext2 <1,2,u,0>, <1,5,3,7> - 1517548447U, // : Cost 2 vext1 <6,u,0,1>, <6,u,0,1> - 2591290362U, // : Cost 3 vext1 <6,u,0,1>, <7,0,1,2> - 537747612U, // : Cost 1 vext3 LHS, LHS - 1611489444U, // : Cost 2 vext3 LHS, <0,2,0,2> - 2685231276U, // : Cost 3 vext3 LHS, <0,2,1,1> - 1994768486U, // : Cost 2 vtrnl LHS, LHS - 2685231294U, // : Cost 3 vext3 LHS, <0,2,3,1> - 1611489484U, // : Cost 2 vext3 LHS, <0,2,4,6> - 2712068310U, // : Cost 3 vext3 RHS, <0,2,5,7> - 2625136570U, // : Cost 3 vext2 <1,2,u,0>, <2,6,3,7> - 2591962097U, // : Cost 3 vext1 <7,0,0,2>, <7,0,0,2> - 1611489516U, // : Cost 2 vext3 LHS, <0,2,u,2> - 2954067968U, // : Cost 3 vzipr LHS, <0,0,0,0> - 2685231356U, // : Cost 3 vext3 LHS, <0,3,1,0> - 72589981U, // : Cost 1 vrev LHS - 2625137052U, // : Cost 3 vext2 <1,2,u,0>, <3,3,3,3> - 2625137154U, // : Cost 3 vext2 <1,2,u,0>, <3,4,5,6> - 2639071848U, // : Cost 3 vext2 <3,5,u,0>, <3,5,u,0> - 2639735481U, // : Cost 3 vext2 <3,6,u,0>, <3,6,u,0> - 2597279354U, // : Cost 3 vext1 <7,u,0,3>, <7,u,0,3> - 73032403U, // : Cost 1 vrev LHS - 2687074636U, // : Cost 3 vext3 <0,4,0,u>, <0,4,0,u> - 1611489618U, // : Cost 2 vext3 LHS, <0,4,1,5> - 1611489628U, // : Cost 2 vext3 LHS, <0,4,2,6> - 3629222038U, // : Cost 4 vext1 <0,u,0,4>, <3,0,1,2> - 2555481398U, // : Cost 3 vext1 <0,u,0,4>, RHS - 1551396150U, // : Cost 2 vext2 <1,2,u,0>, RHS - 2651680116U, // : Cost 3 vext2 <5,6,u,0>, <4,6,4,6> - 2646150600U, // : Cost 3 vext2 <4,7,5,0>, <4,7,5,0> - 1611932050U, // : Cost 2 vext3 LHS, <0,4,u,6> - 2561458278U, // : Cost 3 vext1 <1,u,0,5>, LHS - 1863532646U, // : Cost 2 vzipl RHS, LHS - 2712068526U, // : Cost 3 vext3 RHS, <0,5,2,7> - 2649689976U, // : Cost 3 vext2 <5,3,u,0>, <5,3,u,0> - 2220237489U, // : Cost 3 vrev <0,u,4,5> - 2651680772U, // : Cost 3 vext2 <5,6,u,0>, <5,5,5,5> - 1577939051U, // : Cost 2 vext2 <5,6,u,0>, <5,6,u,0> - 2830077238U, // : Cost 3 vuzpr <1,u,3,0>, RHS - 1579266317U, // : Cost 2 vext2 <5,u,u,0>, <5,u,u,0> - 2555494502U, // : Cost 3 vext1 <0,u,0,6>, LHS - 2712068598U, // : Cost 3 vext3 RHS, <0,6,1,7> - 1997750374U, // : Cost 2 vtrnl RHS, LHS - 2655662673U, // : Cost 3 vext2 <6,3,u,0>, <6,3,u,0> - 2555497782U, // : Cost 3 vext1 <0,u,0,6>, RHS - 2651681459U, // : Cost 3 vext2 <5,6,u,0>, <6,5,0,u> - 2651681592U, // : Cost 3 vext2 <5,6,u,0>, <6,6,6,6> - 2651681614U, // : Cost 3 vext2 <5,6,u,0>, <6,7,0,1> - 1997750428U, // : Cost 2 vtrnl RHS, LHS - 2567446630U, // : Cost 3 vext1 <2,u,0,7>, LHS - 2567447446U, // : Cost 3 vext1 <2,u,0,7>, <1,2,3,0> - 2567448641U, // : Cost 3 vext1 <2,u,0,7>, <2,u,0,7> - 2573421338U, // : Cost 3 vext1 <3,u,0,7>, <3,u,0,7> - 2567449910U, // : Cost 3 vext1 <2,u,0,7>, RHS - 2651682242U, // : Cost 3 vext2 <5,6,u,0>, <7,5,6,u> - 2591339429U, // : Cost 3 vext1 <6,u,0,7>, <6,u,0,7> - 2651682412U, // : Cost 3 vext2 <5,6,u,0>, <7,7,7,7> - 2567452462U, // : Cost 3 vext1 <2,u,0,7>, LHS - 135053414U, // : Cost 1 vdup0 LHS - 1611489938U, // : Cost 2 vext3 LHS, <0,u,1,1> - 537748125U, // : Cost 1 vext3 LHS, LHS - 2685674148U, // : Cost 3 vext3 LHS, <0,u,3,1> - 1611932338U, // : Cost 2 vext3 LHS, <0,u,4,6> - 1551399066U, // : Cost 2 vext2 <1,2,u,0>, RHS - 1517605798U, // : Cost 2 vext1 <6,u,0,u>, <6,u,0,u> - 2830077481U, // : Cost 3 vuzpr <1,u,3,0>, RHS - 537748179U, // : Cost 1 vext3 LHS, LHS - 1544101961U, // : Cost 2 vext2 <0,0,u,1>, <0,0,u,1> - 1558036582U, // : Cost 2 vext2 <2,3,u,1>, LHS - 2619171051U, // : Cost 3 vext2 <0,2,u,1>, <0,2,u,1> - 1611490038U, // : Cost 2 vext3 LHS, <1,0,3,2> - 2555522358U, // : Cost 3 vext1 <0,u,1,0>, RHS - 2712068871U, // : Cost 3 vext3 RHS, <1,0,5,1> - 2591355815U, // : Cost 3 vext1 <6,u,1,0>, <6,u,1,0> - 2597328512U, // : Cost 3 vext1 <7,u,1,0>, <7,u,1,0> - 1611490083U, // : Cost 2 vext3 LHS, <1,0,u,2> - 1481785446U, // : Cost 2 vext1 <0,u,1,1>, LHS - 202162278U, // : Cost 1 vdup1 LHS - 2555528808U, // : Cost 3 vext1 <0,u,1,1>, <2,2,2,2> - 1611490120U, // : Cost 2 vext3 LHS, <1,1,3,3> - 1481788726U, // : Cost 2 vext1 <0,u,1,1>, RHS - 2689876828U, // : Cost 3 vext3 LHS, <1,1,5,5> - 2591364008U, // : Cost 3 vext1 <6,u,1,1>, <6,u,1,1> - 2592691274U, // : Cost 3 vext1 <7,1,1,1>, <7,1,1,1> - 202162278U, // : Cost 1 vdup1 LHS - 1499709542U, // : Cost 2 vext1 <3,u,1,2>, LHS - 2689876871U, // : Cost 3 vext3 LHS, <1,2,1,3> - 2631116445U, // : Cost 3 vext2 <2,2,u,1>, <2,2,u,1> - 835584U, // : Cost 0 copy LHS - 1499712822U, // : Cost 2 vext1 <3,u,1,2>, RHS - 2689876907U, // : Cost 3 vext3 LHS, <1,2,5,3> - 2631780282U, // : Cost 3 vext2 <2,3,u,1>, <2,6,3,7> - 1523603074U, // : Cost 2 vext1 <7,u,1,2>, <7,u,1,2> - 835584U, // : Cost 0 copy LHS - 1487773798U, // : Cost 2 vext1 <1,u,1,3>, LHS - 1611490264U, // : Cost 2 vext3 LHS, <1,3,1,3> - 2685232094U, // : Cost 3 vext3 LHS, <1,3,2,0> - 2018746470U, // : Cost 2 vtrnr LHS, LHS - 1487777078U, // : Cost 2 vext1 <1,u,1,3>, RHS - 1611490304U, // : Cost 2 vext3 LHS, <1,3,5,7> - 2685674505U, // : Cost 3 vext3 LHS, <1,3,6,7> - 2640407307U, // : Cost 3 vext2 <3,7,u,1>, <3,7,u,1> - 1611490327U, // : Cost 2 vext3 LHS, <1,3,u,3> - 1567992749U, // : Cost 2 vext2 <4,0,u,1>, <4,0,u,1> - 2693121070U, // : Cost 3 vext3 <1,4,1,u>, <1,4,1,u> - 2693194807U, // : Cost 3 vext3 <1,4,2,u>, <1,4,2,u> - 1152386432U, // : Cost 2 vrev <1,u,3,4> - 2555555126U, // : Cost 3 vext1 <0,u,1,4>, RHS - 1558039862U, // : Cost 2 vext2 <2,3,u,1>, RHS - 2645716371U, // : Cost 3 vext2 <4,6,u,1>, <4,6,u,1> - 2597361284U, // : Cost 3 vext1 <7,u,1,4>, <7,u,1,4> - 1152755117U, // : Cost 2 vrev <1,u,u,4> - 1481818214U, // : Cost 2 vext1 <0,u,1,5>, LHS - 2555560694U, // : Cost 3 vext1 <0,u,1,5>, <1,0,3,2> - 2555561576U, // : Cost 3 vext1 <0,u,1,5>, <2,2,2,2> - 1611490448U, // : Cost 2 vext3 LHS, <1,5,3,7> - 1481821494U, // : Cost 2 vext1 <0,u,1,5>, RHS - 2651025435U, // : Cost 3 vext2 <5,5,u,1>, <5,5,u,1> - 2651689068U, // : Cost 3 vext2 <5,6,u,1>, <5,6,u,1> - 2823966006U, // : Cost 3 vuzpr <0,u,1,1>, RHS - 1611932861U, // : Cost 2 vext3 LHS, <1,5,u,7> - 2555568230U, // : Cost 3 vext1 <0,u,1,6>, LHS - 2689877199U, // : Cost 3 vext3 LHS, <1,6,1,7> - 2712069336U, // : Cost 3 vext3 RHS, <1,6,2,7> - 2685232353U, // : Cost 3 vext3 LHS, <1,6,3,7> - 2555571510U, // : Cost 3 vext1 <0,u,1,6>, RHS - 2689877235U, // : Cost 3 vext3 LHS, <1,6,5,7> - 2657661765U, // : Cost 3 vext2 <6,6,u,1>, <6,6,u,1> - 1584583574U, // : Cost 2 vext2 <6,7,u,1>, <6,7,u,1> - 1585247207U, // : Cost 2 vext2 <6,u,u,1>, <6,u,u,1> - 2561548390U, // : Cost 3 vext1 <1,u,1,7>, LHS - 2561549681U, // : Cost 3 vext1 <1,u,1,7>, <1,u,1,7> - 2573493926U, // : Cost 3 vext1 <3,u,1,7>, <2,3,0,1> - 2042962022U, // : Cost 2 vtrnr RHS, LHS - 2561551670U, // : Cost 3 vext1 <1,u,1,7>, RHS - 2226300309U, // : Cost 3 vrev <1,u,5,7> - 2658325990U, // : Cost 3 vext2 <6,7,u,1>, <7,6,1,u> - 2658326124U, // : Cost 3 vext2 <6,7,u,1>, <7,7,7,7> - 2042962027U, // : Cost 2 vtrnr RHS, LHS - 1481842790U, // : Cost 2 vext1 <0,u,1,u>, LHS - 202162278U, // : Cost 1 vdup1 LHS - 2685674867U, // : Cost 3 vext3 LHS, <1,u,2,0> - 835584U, // : Cost 0 copy LHS - 1481846070U, // : Cost 2 vext1 <0,u,1,u>, RHS - 1611933077U, // : Cost 2 vext3 LHS, <1,u,5,7> - 2685674910U, // : Cost 3 vext3 LHS, <1,u,6,7> - 1523652232U, // : Cost 2 vext1 <7,u,1,u>, <7,u,1,u> - 835584U, // : Cost 0 copy LHS - 1544110154U, // : Cost 2 vext2 <0,0,u,2>, <0,0,u,2> - 1545437286U, // : Cost 2 vext2 <0,2,u,2>, LHS - 1545437420U, // : Cost 2 vext2 <0,2,u,2>, <0,2,u,2> - 2685232589U, // : Cost 3 vext3 LHS, <2,0,3,0> - 2619179346U, // : Cost 3 vext2 <0,2,u,2>, <0,4,1,5> - 2712069606U, // : Cost 3 vext3 RHS, <2,0,5,7> - 2689877484U, // : Cost 3 vext3 LHS, <2,0,6,4> - 2659656273U, // : Cost 3 vext2 <7,0,u,2>, <0,7,2,u> - 1545437853U, // : Cost 2 vext2 <0,2,u,2>, LHS - 1550082851U, // : Cost 2 vext2 <1,0,u,2>, <1,0,u,2> - 2619179828U, // : Cost 3 vext2 <0,2,u,2>, <1,1,1,1> - 2619179926U, // : Cost 3 vext2 <0,2,u,2>, <1,2,3,0> - 2685232671U, // : Cost 3 vext3 LHS, <2,1,3,1> - 2555604278U, // : Cost 3 vext1 <0,u,2,1>, RHS - 2619180176U, // : Cost 3 vext2 <0,2,u,2>, <1,5,3,7> - 2689877564U, // : Cost 3 vext3 LHS, <2,1,6,3> - 2602718850U, // : Cost 3 vext1 , <7,u,1,2> - 1158703235U, // : Cost 2 vrev <2,u,u,1> - 1481867366U, // : Cost 2 vext1 <0,u,2,2>, LHS - 2555609846U, // : Cost 3 vext1 <0,u,2,2>, <1,0,3,2> - 269271142U, // : Cost 1 vdup2 LHS - 1611490930U, // : Cost 2 vext3 LHS, <2,2,3,3> - 1481870646U, // : Cost 2 vext1 <0,u,2,2>, RHS - 2689877640U, // : Cost 3 vext3 LHS, <2,2,5,7> - 2619180986U, // : Cost 3 vext2 <0,2,u,2>, <2,6,3,7> - 2593436837U, // : Cost 3 vext1 <7,2,2,2>, <7,2,2,2> - 269271142U, // : Cost 1 vdup2 LHS - 408134301U, // : Cost 1 vext1 LHS, LHS - 1481876214U, // : Cost 2 vext1 LHS, <1,0,3,2> - 1481877096U, // : Cost 2 vext1 LHS, <2,2,2,2> - 1880326246U, // : Cost 2 vzipr LHS, LHS - 408137014U, // : Cost 1 vext1 LHS, RHS - 1529654992U, // : Cost 2 vext1 LHS, <5,1,7,3> - 1529655802U, // : Cost 2 vext1 LHS, <6,2,7,3> - 1529656314U, // : Cost 2 vext1 LHS, <7,0,1,2> - 408139566U, // : Cost 1 vext1 LHS, LHS - 1567853468U, // : Cost 2 vext2 <4,0,6,2>, <4,0,6,2> - 2561598362U, // : Cost 3 vext1 <1,u,2,4>, <1,2,3,4> - 2555627214U, // : Cost 3 vext1 <0,u,2,4>, <2,3,4,5> - 2685232918U, // : Cost 3 vext3 LHS, <2,4,3,5> - 2555628854U, // : Cost 3 vext1 <0,u,2,4>, RHS - 1545440566U, // : Cost 2 vext2 <0,2,u,2>, RHS - 1571982740U, // : Cost 2 vext2 <4,6,u,2>, <4,6,u,2> - 2592125957U, // : Cost 3 vext1 <7,0,2,4>, <7,0,2,4> - 1545440809U, // : Cost 2 vext2 <0,2,u,2>, RHS - 2555633766U, // : Cost 3 vext1 <0,u,2,5>, LHS - 2561606550U, // : Cost 3 vext1 <1,u,2,5>, <1,2,3,0> - 2689877856U, // : Cost 3 vext3 LHS, <2,5,2,7> - 2685233000U, // : Cost 3 vext3 LHS, <2,5,3,6> - 1158441059U, // : Cost 2 vrev <2,u,4,5> - 2645725188U, // : Cost 3 vext2 <4,6,u,2>, <5,5,5,5> - 2689877892U, // : Cost 3 vext3 LHS, <2,5,6,7> - 2823900470U, // : Cost 3 vuzpr <0,u,0,2>, RHS - 1158736007U, // : Cost 2 vrev <2,u,u,5> - 1481900134U, // : Cost 2 vext1 <0,u,2,6>, LHS - 2555642614U, // : Cost 3 vext1 <0,u,2,6>, <1,0,3,2> - 2555643496U, // : Cost 3 vext1 <0,u,2,6>, <2,2,2,2> - 1611491258U, // : Cost 2 vext3 LHS, <2,6,3,7> - 1481903414U, // : Cost 2 vext1 <0,u,2,6>, RHS - 2689877964U, // : Cost 3 vext3 LHS, <2,6,5,7> - 2689877973U, // : Cost 3 vext3 LHS, <2,6,6,7> - 2645726030U, // : Cost 3 vext2 <4,6,u,2>, <6,7,0,1> - 1611933671U, // : Cost 2 vext3 LHS, <2,6,u,7> - 1585919033U, // : Cost 2 vext2 <7,0,u,2>, <7,0,u,2> - 2573566710U, // : Cost 3 vext1 <3,u,2,7>, <1,0,3,2> - 2567596115U, // : Cost 3 vext1 <2,u,2,7>, <2,u,2,7> - 1906901094U, // : Cost 2 vzipr RHS, LHS - 2555653430U, // : Cost 3 vext1 <0,u,2,7>, RHS - 2800080230U, // : Cost 3 vuzpl LHS, <7,4,5,6> - 2980643164U, // : Cost 3 vzipr RHS, <0,4,2,6> - 2645726828U, // : Cost 3 vext2 <4,6,u,2>, <7,7,7,7> - 1906901099U, // : Cost 2 vzipr RHS, LHS - 408175266U, // : Cost 1 vext1 LHS, LHS - 1545443118U, // : Cost 2 vext2 <0,2,u,2>, LHS - 269271142U, // : Cost 1 vdup2 LHS - 1611491416U, // : Cost 2 vext3 LHS, <2,u,3,3> - 408177974U, // : Cost 1 vext1 LHS, RHS - 1545443482U, // : Cost 2 vext2 <0,2,u,2>, RHS - 1726339226U, // : Cost 2 vuzpl LHS, RHS - 1529697274U, // : Cost 2 vext1 LHS, <7,0,1,2> - 408180526U, // : Cost 1 vext1 LHS, LHS - 1544781824U, // : Cost 2 vext2 LHS, <0,0,0,0> - 471040156U, // : Cost 1 vext2 LHS, LHS - 1544781988U, // : Cost 2 vext2 LHS, <0,2,0,2> - 2618523900U, // : Cost 3 vext2 LHS, <0,3,1,0> - 1544782162U, // : Cost 2 vext2 LHS, <0,4,1,5> - 2238188352U, // : Cost 3 vrev <3,u,5,0> - 2623169023U, // : Cost 3 vext2 LHS, <0,6,2,7> - 2238335826U, // : Cost 3 vrev <3,u,7,0> - 471040669U, // : Cost 1 vext2 LHS, LHS - 1544782582U, // : Cost 2 vext2 LHS, <1,0,3,2> - 1544782644U, // : Cost 2 vext2 LHS, <1,1,1,1> - 1544782742U, // : Cost 2 vext2 LHS, <1,2,3,0> - 1544782808U, // : Cost 2 vext2 LHS, <1,3,1,3> - 2618524733U, // : Cost 3 vext2 LHS, <1,4,3,5> - 1544782992U, // : Cost 2 vext2 LHS, <1,5,3,7> - 2618524897U, // : Cost 3 vext2 LHS, <1,6,3,7> - 2703517987U, // : Cost 3 vext3 <3,1,7,u>, <3,1,7,u> - 1544783213U, // : Cost 2 vext2 LHS, <1,u,1,3> - 1529716838U, // : Cost 2 vext1 , LHS - 1164167966U, // : Cost 2 vrev <3,u,1,2> - 1544783464U, // : Cost 2 vext2 LHS, <2,2,2,2> - 1544783526U, // : Cost 2 vext2 LHS, <2,3,0,1> - 1529720118U, // : Cost 2 vext1 , RHS - 2618525544U, // : Cost 3 vext2 LHS, <2,5,3,6> - 1544783802U, // : Cost 2 vext2 LHS, <2,6,3,7> - 2704181620U, // : Cost 3 vext3 <3,2,7,u>, <3,2,7,u> - 1544783931U, // : Cost 2 vext2 LHS, <2,u,0,1> - 1544784022U, // : Cost 2 vext2 LHS, <3,0,1,2> - 1487922559U, // : Cost 2 vext1 <1,u,3,3>, <1,u,3,3> - 1493895256U, // : Cost 2 vext1 <2,u,3,3>, <2,u,3,3> - 336380006U, // : Cost 1 vdup3 LHS - 1544784386U, // : Cost 2 vext2 LHS, <3,4,5,6> - 2824054478U, // : Cost 3 vuzpr LHS, <2,3,4,5> - 2238286668U, // : Cost 3 vrev <3,u,6,3> - 2954069136U, // : Cost 3 vzipr LHS, <1,5,3,7> - 336380006U, // : Cost 1 vdup3 LHS - 1487929446U, // : Cost 2 vext1 <1,u,3,4>, LHS - 1487930752U, // : Cost 2 vext1 <1,u,3,4>, <1,u,3,4> - 2623171644U, // : Cost 3 vext2 LHS, <4,2,6,0> - 2561673366U, // : Cost 3 vext1 <1,u,3,4>, <3,0,1,2> - 1487932726U, // : Cost 2 vext1 <1,u,3,4>, RHS - 471043382U, // : Cost 1 vext2 LHS, RHS - 1592561012U, // : Cost 2 vext2 LHS, <4,6,4,6> - 2238368598U, // : Cost 3 vrev <3,u,7,4> - 471043625U, // : Cost 1 vext2 LHS, RHS - 2555707494U, // : Cost 3 vext1 <0,u,3,5>, LHS - 1574645465U, // : Cost 2 vext2 <5,1,u,3>, <5,1,u,3> - 2567653106U, // : Cost 3 vext1 <2,u,3,5>, <2,3,u,5> - 2555709954U, // : Cost 3 vext1 <0,u,3,5>, <3,4,5,6> - 1592561606U, // : Cost 2 vext2 LHS, <5,4,7,6> - 1592561668U, // : Cost 2 vext2 LHS, <5,5,5,5> - 1592561762U, // : Cost 2 vext2 LHS, <5,6,7,0> - 1750314294U, // : Cost 2 vuzpr LHS, RHS - 1750314295U, // : Cost 2 vuzpr LHS, RHS - 2623172897U, // : Cost 3 vext2 LHS, <6,0,1,2> - 2561688962U, // : Cost 3 vext1 <1,u,3,6>, <1,u,3,6> - 1581281795U, // : Cost 2 vext2 <6,2,u,3>, <6,2,u,3> - 2706541204U, // : Cost 3 vext3 <3,6,3,u>, <3,6,3,u> - 2623173261U, // : Cost 3 vext2 LHS, <6,4,5,6> - 1164495686U, // : Cost 2 vrev <3,u,5,6> - 1592562488U, // : Cost 2 vext2 LHS, <6,6,6,6> - 1592562510U, // : Cost 2 vext2 LHS, <6,7,0,1> - 1164716897U, // : Cost 2 vrev <3,u,u,6> - 1487954022U, // : Cost 2 vext1 <1,u,3,7>, LHS - 1487955331U, // : Cost 2 vext1 <1,u,3,7>, <1,u,3,7> - 1493928028U, // : Cost 2 vext1 <2,u,3,7>, <2,u,3,7> - 2561697942U, // : Cost 3 vext1 <1,u,3,7>, <3,0,1,2> - 1487957302U, // : Cost 2 vext1 <1,u,3,7>, RHS - 2707352311U, // : Cost 3 vext3 <3,7,5,u>, <3,7,5,u> - 2655024623U, // : Cost 3 vext2 <6,2,u,3>, <7,6,2,u> - 1592563308U, // : Cost 2 vext2 LHS, <7,7,7,7> - 1487959854U, // : Cost 2 vext1 <1,u,3,7>, LHS - 1544787667U, // : Cost 2 vext2 LHS, - 471045934U, // : Cost 1 vext2 LHS, LHS - 1549432709U, // : Cost 2 vext2 LHS, - 336380006U, // : Cost 1 vdup3 LHS - 1544788031U, // : Cost 2 vext2 LHS, - 471046298U, // : Cost 1 vext2 LHS, RHS - 1549433040U, // : Cost 2 vext2 LHS, - 1750314537U, // : Cost 2 vuzpr LHS, RHS - 471046501U, // : Cost 1 vext2 LHS, LHS - 2625167360U, // : Cost 3 vext2 <1,2,u,4>, <0,0,0,0> - 1551425638U, // : Cost 2 vext2 <1,2,u,4>, LHS - 2619195630U, // : Cost 3 vext2 <0,2,u,4>, <0,2,u,4> - 2619343104U, // : Cost 3 vext2 <0,3,1,4>, <0,3,1,4> - 2625167698U, // : Cost 3 vext2 <1,2,u,4>, <0,4,1,5> - 1638329234U, // : Cost 2 vext3 RHS, <4,0,5,1> - 1638329244U, // : Cost 2 vext3 RHS, <4,0,6,2> - 3787803556U, // : Cost 4 vext3 RHS, <4,0,7,1> - 1551426205U, // : Cost 2 vext2 <1,2,u,4>, LHS - 2555748454U, // : Cost 3 vext1 <0,u,4,1>, LHS - 2625168180U, // : Cost 3 vext2 <1,2,u,4>, <1,1,1,1> - 1551426503U, // : Cost 2 vext2 <1,2,u,4>, <1,2,u,4> - 2625168344U, // : Cost 3 vext2 <1,2,u,4>, <1,3,1,3> - 2555751734U, // : Cost 3 vext1 <0,u,4,1>, RHS - 1860554038U, // : Cost 2 vzipl LHS, RHS - 2689879022U, // : Cost 3 vext3 LHS, <4,1,6,3> - 2592248852U, // : Cost 3 vext1 <7,0,4,1>, <7,0,4,1> - 1555408301U, // : Cost 2 vext2 <1,u,u,4>, <1,u,u,4> - 2555756646U, // : Cost 3 vext1 <0,u,4,2>, LHS - 2625168943U, // : Cost 3 vext2 <1,2,u,4>, <2,1,4,u> - 2625169000U, // : Cost 3 vext2 <1,2,u,4>, <2,2,2,2> - 2619197134U, // : Cost 3 vext2 <0,2,u,4>, <2,3,4,5> - 2555759926U, // : Cost 3 vext1 <0,u,4,2>, RHS - 2712071222U, // : Cost 3 vext3 RHS, <4,2,5,3> - 1994771766U, // : Cost 2 vtrnl LHS, RHS - 2592257045U, // : Cost 3 vext1 <7,0,4,2>, <7,0,4,2> - 1994771784U, // : Cost 2 vtrnl LHS, RHS - 2625169558U, // : Cost 3 vext2 <1,2,u,4>, <3,0,1,2> - 2567709594U, // : Cost 3 vext1 <2,u,4,3>, <1,2,3,4> - 2567710817U, // : Cost 3 vext1 <2,u,4,3>, <2,u,4,3> - 2625169820U, // : Cost 3 vext2 <1,2,u,4>, <3,3,3,3> - 2625169922U, // : Cost 3 vext2 <1,2,u,4>, <3,4,5,6> - 2954069710U, // : Cost 3 vzipr LHS, <2,3,4,5> - 2954068172U, // : Cost 3 vzipr LHS, <0,2,4,6> - 3903849472U, // : Cost 4 vuzpr <1,u,3,4>, <1,3,5,7> - 2954068174U, // : Cost 3 vzipr LHS, <0,2,4,u> - 1505919078U, // : Cost 2 vext1 <4,u,4,4>, LHS - 2567717831U, // : Cost 3 vext1 <2,u,4,4>, <1,2,u,4> - 2567719010U, // : Cost 3 vext1 <2,u,4,4>, <2,u,4,4> - 2570373542U, // : Cost 3 vext1 <3,3,4,4>, <3,3,4,4> - 161926454U, // : Cost 1 vdup0 RHS - 1551428918U, // : Cost 2 vext2 <1,2,u,4>, RHS - 1638329572U, // : Cost 2 vext3 RHS, <4,4,6,6> - 2594927963U, // : Cost 3 vext1 <7,4,4,4>, <7,4,4,4> - 161926454U, // : Cost 1 vdup0 RHS - 1493983334U, // : Cost 2 vext1 <2,u,4,5>, LHS - 2689879301U, // : Cost 3 vext3 LHS, <4,5,1,3> - 1493985379U, // : Cost 2 vext1 <2,u,4,5>, <2,u,4,5> - 2567727254U, // : Cost 3 vext1 <2,u,4,5>, <3,0,1,2> - 1493986614U, // : Cost 2 vext1 <2,u,4,5>, RHS - 1863535926U, // : Cost 2 vzipl RHS, RHS - 537750838U, // : Cost 1 vext3 LHS, RHS - 2830110006U, // : Cost 3 vuzpr <1,u,3,4>, RHS - 537750856U, // : Cost 1 vext3 LHS, RHS - 1482047590U, // : Cost 2 vext1 <0,u,4,6>, LHS - 2555790070U, // : Cost 3 vext1 <0,u,4,6>, <1,0,3,2> - 2555790952U, // : Cost 3 vext1 <0,u,4,6>, <2,2,2,2> - 2555791510U, // : Cost 3 vext1 <0,u,4,6>, <3,0,1,2> - 1482050870U, // : Cost 2 vext1 <0,u,4,6>, RHS - 2689879422U, // : Cost 3 vext3 LHS, <4,6,5,7> - 1997753654U, // : Cost 2 vtrnl RHS, RHS - 2712071562U, // : Cost 3 vext3 RHS, <4,6,7,1> - 1482053422U, // : Cost 2 vext1 <0,u,4,6>, LHS - 2567741542U, // : Cost 3 vext1 <2,u,4,7>, LHS - 2567742362U, // : Cost 3 vext1 <2,u,4,7>, <1,2,3,4> - 2567743589U, // : Cost 3 vext1 <2,u,4,7>, <2,u,4,7> - 2573716286U, // : Cost 3 vext1 <3,u,4,7>, <3,u,4,7> - 2567744822U, // : Cost 3 vext1 <2,u,4,7>, RHS - 2712071624U, // : Cost 3 vext3 RHS, <4,7,5,0> - 96808489U, // : Cost 1 vrev RHS - 2651715180U, // : Cost 3 vext2 <5,6,u,4>, <7,7,7,7> - 96955963U, // : Cost 1 vrev RHS - 1482063974U, // : Cost 2 vext1 <0,u,4,u>, LHS - 1551431470U, // : Cost 2 vext2 <1,2,u,4>, LHS - 1494009958U, // : Cost 2 vext1 <2,u,4,u>, <2,u,4,u> - 2555807894U, // : Cost 3 vext1 <0,u,4,u>, <3,0,1,2> - 161926454U, // : Cost 1 vdup0 RHS - 1551431834U, // : Cost 2 vext2 <1,2,u,4>, RHS - 537751081U, // : Cost 1 vext3 LHS, RHS - 2830110249U, // : Cost 3 vuzpr <1,u,3,4>, RHS - 537751099U, // : Cost 1 vext3 LHS, RHS - 2631811072U, // : Cost 3 vext2 <2,3,u,5>, <0,0,0,0> - 1558069350U, // : Cost 2 vext2 <2,3,u,5>, LHS - 2619203823U, // : Cost 3 vext2 <0,2,u,5>, <0,2,u,5> - 2619867456U, // : Cost 3 vext2 <0,3,u,5>, <0,3,u,5> - 1546273106U, // : Cost 2 vext2 <0,4,1,5>, <0,4,1,5> - 2733010539U, // : Cost 3 vext3 LHS, <5,0,5,1> - 2597622682U, // : Cost 3 vext1 <7,u,5,0>, <6,7,u,5> - 1176539396U, // : Cost 2 vrev <5,u,7,0> - 1558069917U, // : Cost 2 vext2 <2,3,u,5>, LHS - 1505968230U, // : Cost 2 vext1 <4,u,5,1>, LHS - 2624512887U, // : Cost 3 vext2 <1,1,u,5>, <1,1,u,5> - 2631811990U, // : Cost 3 vext2 <2,3,u,5>, <1,2,3,0> - 2618541056U, // : Cost 3 vext2 <0,1,u,5>, <1,3,5,7> - 1505971510U, // : Cost 2 vext1 <4,u,5,1>, RHS - 2627167419U, // : Cost 3 vext2 <1,5,u,5>, <1,5,u,5> - 2579714554U, // : Cost 3 vext1 <4,u,5,1>, <6,2,7,3> - 1638330064U, // : Cost 2 vext3 RHS, <5,1,7,3> - 1638477529U, // : Cost 2 vext3 RHS, <5,1,u,3> - 2561802342U, // : Cost 3 vext1 <1,u,5,2>, LHS - 2561803264U, // : Cost 3 vext1 <1,u,5,2>, <1,3,5,7> - 2631149217U, // : Cost 3 vext2 <2,2,u,5>, <2,2,u,5> - 1558071026U, // : Cost 2 vext2 <2,3,u,5>, <2,3,u,5> - 2561805622U, // : Cost 3 vext1 <1,u,5,2>, RHS - 2714062607U, // : Cost 3 vext3 RHS, <5,2,5,3> - 2631813050U, // : Cost 3 vext2 <2,3,u,5>, <2,6,3,7> - 3092335926U, // : Cost 3 vtrnr <0,u,0,2>, RHS - 1561389191U, // : Cost 2 vext2 <2,u,u,5>, <2,u,u,5> - 2561810534U, // : Cost 3 vext1 <1,u,5,3>, LHS - 2561811857U, // : Cost 3 vext1 <1,u,5,3>, <1,u,5,3> - 2631813474U, // : Cost 3 vext2 <2,3,u,5>, <3,2,5,u> - 2631813532U, // : Cost 3 vext2 <2,3,u,5>, <3,3,3,3> - 2619869698U, // : Cost 3 vext2 <0,3,u,5>, <3,4,5,6> - 3001847002U, // : Cost 3 vzipr LHS, <4,4,5,5> - 2954070530U, // : Cost 3 vzipr LHS, <3,4,5,6> - 2018749750U, // : Cost 2 vtrnr LHS, RHS - 2018749751U, // : Cost 2 vtrnr LHS, RHS - 2573762662U, // : Cost 3 vext1 <3,u,5,4>, LHS - 2620017634U, // : Cost 3 vext2 <0,4,1,5>, <4,1,5,0> - 2573764338U, // : Cost 3 vext1 <3,u,5,4>, <2,3,u,5> - 2573765444U, // : Cost 3 vext1 <3,u,5,4>, <3,u,5,4> - 1570680053U, // : Cost 2 vext2 <4,4,u,5>, <4,4,u,5> - 1558072630U, // : Cost 2 vext2 <2,3,u,5>, RHS - 2645749143U, // : Cost 3 vext2 <4,6,u,5>, <4,6,u,5> - 1638330310U, // : Cost 2 vext3 RHS, <5,4,7,6> - 1558072873U, // : Cost 2 vext2 <2,3,u,5>, RHS - 1506000998U, // : Cost 2 vext1 <4,u,5,5>, LHS - 2561827984U, // : Cost 3 vext1 <1,u,5,5>, <1,5,3,7> - 2579744360U, // : Cost 3 vext1 <4,u,5,5>, <2,2,2,2> - 2579744918U, // : Cost 3 vext1 <4,u,5,5>, <3,0,1,2> - 1506004278U, // : Cost 2 vext1 <4,u,5,5>, RHS - 229035318U, // : Cost 1 vdup1 RHS - 2712072206U, // : Cost 3 vext3 RHS, <5,5,6,6> - 1638330392U, // : Cost 2 vext3 RHS, <5,5,7,7> - 229035318U, // : Cost 1 vdup1 RHS - 1500037222U, // : Cost 2 vext1 <3,u,5,6>, LHS - 2561836436U, // : Cost 3 vext1 <1,u,5,6>, <1,u,5,6> - 2567809133U, // : Cost 3 vext1 <2,u,5,6>, <2,u,5,6> - 1500040006U, // : Cost 2 vext1 <3,u,5,6>, <3,u,5,6> - 1500040502U, // : Cost 2 vext1 <3,u,5,6>, RHS - 2714062935U, // : Cost 3 vext3 RHS, <5,6,5,7> - 2712072288U, // : Cost 3 vext3 RHS, <5,6,6,7> - 27705344U, // : Cost 0 copy RHS - 27705344U, // : Cost 0 copy RHS - 1488101478U, // : Cost 2 vext1 <1,u,5,7>, LHS - 1488102805U, // : Cost 2 vext1 <1,u,5,7>, <1,u,5,7> - 2561844840U, // : Cost 3 vext1 <1,u,5,7>, <2,2,2,2> - 2561845398U, // : Cost 3 vext1 <1,u,5,7>, <3,0,1,2> - 1488104758U, // : Cost 2 vext1 <1,u,5,7>, RHS - 1638330536U, // : Cost 2 vext3 RHS, <5,7,5,7> - 2712072362U, // : Cost 3 vext3 RHS, <5,7,6,0> - 2042965302U, // : Cost 2 vtrnr RHS, RHS - 1488107310U, // : Cost 2 vext1 <1,u,5,7>, LHS - 1488109670U, // : Cost 2 vext1 <1,u,5,u>, LHS - 1488110998U, // : Cost 2 vext1 <1,u,5,u>, <1,u,5,u> - 2561853032U, // : Cost 3 vext1 <1,u,5,u>, <2,2,2,2> - 1500056392U, // : Cost 2 vext1 <3,u,5,u>, <3,u,5,u> - 1488112950U, // : Cost 2 vext1 <1,u,5,u>, RHS - 229035318U, // : Cost 1 vdup1 RHS - 2954111490U, // : Cost 3 vzipr LHS, <3,4,5,6> - 27705344U, // : Cost 0 copy RHS - 27705344U, // : Cost 0 copy RHS - 2619211776U, // : Cost 3 vext2 <0,2,u,6>, <0,0,0,0> - 1545470054U, // : Cost 2 vext2 <0,2,u,6>, LHS - 1545470192U, // : Cost 2 vext2 <0,2,u,6>, <0,2,u,6> - 2255958969U, // : Cost 3 vrev <6,u,3,0> - 1546797458U, // : Cost 2 vext2 <0,4,u,6>, <0,4,u,6> - 2720624971U, // : Cost 3 vext3 <6,0,5,u>, <6,0,5,u> - 2256180180U, // : Cost 3 vrev <6,u,6,0> - 2960682294U, // : Cost 3 vzipr <1,2,u,0>, RHS - 1545470621U, // : Cost 2 vext2 <0,2,u,6>, LHS - 1182004127U, // : Cost 2 vrev <6,u,0,1> - 2619212596U, // : Cost 3 vext2 <0,2,u,6>, <1,1,1,1> - 2619212694U, // : Cost 3 vext2 <0,2,u,6>, <1,2,3,0> - 2619212760U, // : Cost 3 vext2 <0,2,u,6>, <1,3,1,3> - 2626511979U, // : Cost 3 vext2 <1,4,u,6>, <1,4,u,6> - 2619212944U, // : Cost 3 vext2 <0,2,u,6>, <1,5,3,7> - 2714063264U, // : Cost 3 vext3 RHS, <6,1,6,3> - 2967326006U, // : Cost 3 vzipr <2,3,u,1>, RHS - 1182594023U, // : Cost 2 vrev <6,u,u,1> - 1506050150U, // : Cost 2 vext1 <4,u,6,2>, LHS - 2579792630U, // : Cost 3 vext1 <4,u,6,2>, <1,0,3,2> - 2619213416U, // : Cost 3 vext2 <0,2,u,6>, <2,2,2,2> - 2619213478U, // : Cost 3 vext2 <0,2,u,6>, <2,3,0,1> - 1506053430U, // : Cost 2 vext1 <4,u,6,2>, RHS - 2633148309U, // : Cost 3 vext2 <2,5,u,6>, <2,5,u,6> - 2619213754U, // : Cost 3 vext2 <0,2,u,6>, <2,6,3,7> - 1638330874U, // : Cost 2 vext3 RHS, <6,2,7,3> - 1638478339U, // : Cost 2 vext3 RHS, <6,2,u,3> - 2619213974U, // : Cost 3 vext2 <0,2,u,6>, <3,0,1,2> - 2255836074U, // : Cost 3 vrev <6,u,1,3> - 2255909811U, // : Cost 3 vrev <6,u,2,3> - 2619214236U, // : Cost 3 vext2 <0,2,u,6>, <3,3,3,3> - 1564715549U, // : Cost 2 vext2 <3,4,u,6>, <3,4,u,6> - 2639121006U, // : Cost 3 vext2 <3,5,u,6>, <3,5,u,6> - 3001847012U, // : Cost 3 vzipr LHS, <4,4,6,6> - 1880329526U, // : Cost 2 vzipr LHS, RHS - 1880329527U, // : Cost 2 vzipr LHS, RHS - 2567864422U, // : Cost 3 vext1 <2,u,6,4>, LHS - 2733011558U, // : Cost 3 vext3 LHS, <6,4,1,3> - 2567866484U, // : Cost 3 vext1 <2,u,6,4>, <2,u,6,4> - 2638458005U, // : Cost 3 vext2 <3,4,u,6>, <4,3,6,u> - 1570540772U, // : Cost 2 vext2 <4,4,6,6>, <4,4,6,6> - 1545473334U, // : Cost 2 vext2 <0,2,u,6>, RHS - 1572015512U, // : Cost 2 vext2 <4,6,u,6>, <4,6,u,6> - 2960715062U, // : Cost 3 vzipr <1,2,u,4>, RHS - 1545473577U, // : Cost 2 vext2 <0,2,u,6>, RHS - 2567872614U, // : Cost 3 vext1 <2,u,6,5>, LHS - 2645757648U, // : Cost 3 vext2 <4,6,u,6>, <5,1,7,3> - 2567874490U, // : Cost 3 vext1 <2,u,6,5>, <2,6,3,7> - 2576501250U, // : Cost 3 vext1 <4,3,6,5>, <3,4,5,6> - 1576660943U, // : Cost 2 vext2 <5,4,u,6>, <5,4,u,6> - 2645757956U, // : Cost 3 vext2 <4,6,u,6>, <5,5,5,5> - 2645758050U, // : Cost 3 vext2 <4,6,u,6>, <5,6,7,0> - 2824080694U, // : Cost 3 vuzpr <0,u,2,6>, RHS - 1182626795U, // : Cost 2 vrev <6,u,u,5> - 1506082918U, // : Cost 2 vext1 <4,u,6,6>, LHS - 2579825398U, // : Cost 3 vext1 <4,u,6,6>, <1,0,3,2> - 2645758458U, // : Cost 3 vext2 <4,6,u,6>, <6,2,7,3> - 2579826838U, // : Cost 3 vext1 <4,u,6,6>, <3,0,1,2> - 1506086198U, // : Cost 2 vext1 <4,u,6,6>, RHS - 2579828432U, // : Cost 3 vext1 <4,u,6,6>, <5,1,7,3> - 296144182U, // : Cost 1 vdup2 RHS - 1638331202U, // : Cost 2 vext3 RHS, <6,6,7,7> - 296144182U, // : Cost 1 vdup2 RHS - 432349286U, // : Cost 1 vext1 RHS, LHS - 1506091766U, // : Cost 2 vext1 RHS, <1,0,3,2> - 1506092648U, // : Cost 2 vext1 RHS, <2,2,2,2> - 1506093206U, // : Cost 2 vext1 RHS, <3,0,1,2> - 432352809U, // : Cost 1 vext1 RHS, RHS - 1506094800U, // : Cost 2 vext1 RHS, <5,1,7,3> - 1506095610U, // : Cost 2 vext1 RHS, <6,2,7,3> - 1906904374U, // : Cost 2 vzipr RHS, RHS - 432355118U, // : Cost 1 vext1 RHS, LHS - 432357478U, // : Cost 1 vext1 RHS, LHS - 1545475886U, // : Cost 2 vext2 <0,2,u,6>, LHS - 1506100840U, // : Cost 2 vext1 RHS, <2,2,2,2> - 1506101398U, // : Cost 2 vext1 RHS, <3,0,1,2> - 432361002U, // : Cost 1 vext1 RHS, RHS - 1545476250U, // : Cost 2 vext2 <0,2,u,6>, RHS - 296144182U, // : Cost 1 vdup2 RHS - 1880370486U, // : Cost 2 vzipr LHS, RHS - 432363310U, // : Cost 1 vext1 RHS, LHS - 1571356672U, // : Cost 2 vext2 RHS, <0,0,0,0> - 497614950U, // : Cost 1 vext2 RHS, LHS - 1571356836U, // : Cost 2 vext2 RHS, <0,2,0,2> - 2573880146U, // : Cost 3 vext1 <3,u,7,0>, <3,u,7,0> - 1571357010U, // : Cost 2 vext2 RHS, <0,4,1,5> - 1512083716U, // : Cost 2 vext1 <5,u,7,0>, <5,u,7,0> - 2621874741U, // : Cost 3 vext2 <0,6,u,7>, <0,6,u,7> - 2585826298U, // : Cost 3 vext1 <5,u,7,0>, <7,0,1,2> - 497615517U, // : Cost 1 vext2 RHS, LHS - 1571357430U, // : Cost 2 vext2 RHS, <1,0,3,2> - 1571357492U, // : Cost 2 vext2 RHS, <1,1,1,1> - 1571357590U, // : Cost 2 vext2 RHS, <1,2,3,0> - 1552114715U, // : Cost 2 vext2 <1,3,u,7>, <1,3,u,7> - 2573888822U, // : Cost 3 vext1 <3,u,7,1>, RHS - 1553441981U, // : Cost 2 vext2 <1,5,u,7>, <1,5,u,7> - 2627847438U, // : Cost 3 vext2 <1,6,u,7>, <1,6,u,7> - 2727408775U, // : Cost 3 vext3 <7,1,7,u>, <7,1,7,u> - 1555432880U, // : Cost 2 vext2 <1,u,u,7>, <1,u,u,7> - 2629838337U, // : Cost 3 vext2 <2,0,u,7>, <2,0,u,7> - 1188058754U, // : Cost 2 vrev <7,u,1,2> - 1571358312U, // : Cost 2 vext2 RHS, <2,2,2,2> - 1571358374U, // : Cost 2 vext2 RHS, <2,3,0,1> - 2632492869U, // : Cost 3 vext2 <2,4,u,7>, <2,4,u,7> - 2633156502U, // : Cost 3 vext2 <2,5,u,7>, <2,5,u,7> - 1560078311U, // : Cost 2 vext2 <2,6,u,7>, <2,6,u,7> - 2728072408U, // : Cost 3 vext3 <7,2,7,u>, <7,2,7,u> - 1561405577U, // : Cost 2 vext2 <2,u,u,7>, <2,u,u,7> - 1571358870U, // : Cost 2 vext2 RHS, <3,0,1,2> - 2627184913U, // : Cost 3 vext2 <1,5,u,7>, <3,1,5,u> - 2633820523U, // : Cost 3 vext2 <2,6,u,7>, <3,2,6,u> - 1571359132U, // : Cost 2 vext2 RHS, <3,3,3,3> - 1571359234U, // : Cost 2 vext2 RHS, <3,4,5,6> - 1512108295U, // : Cost 2 vext1 <5,u,7,3>, <5,u,7,3> - 1518080992U, // : Cost 2 vext1 <6,u,7,3>, <6,u,7,3> - 2640456465U, // : Cost 3 vext2 <3,7,u,7>, <3,7,u,7> - 1571359518U, // : Cost 2 vext2 RHS, <3,u,1,2> - 1571359634U, // : Cost 2 vext2 RHS, <4,0,5,1> - 2573911067U, // : Cost 3 vext1 <3,u,7,4>, <1,3,u,7> - 2645101622U, // : Cost 3 vext2 RHS, <4,2,5,3> - 2573912918U, // : Cost 3 vext1 <3,u,7,4>, <3,u,7,4> - 1571359952U, // : Cost 2 vext2 RHS, <4,4,4,4> - 497618248U, // : Cost 1 vext2 RHS, RHS - 1571360116U, // : Cost 2 vext2 RHS, <4,6,4,6> - 2645102024U, // : Cost 3 vext2 RHS, <4,7,5,0> - 497618473U, // : Cost 1 vext2 RHS, RHS - 2645102152U, // : Cost 3 vext2 RHS, <5,0,1,2> - 1571360464U, // : Cost 2 vext2 RHS, <5,1,7,3> - 2645102334U, // : Cost 3 vext2 RHS, <5,2,3,4> - 2645102447U, // : Cost 3 vext2 RHS, <5,3,7,0> - 1571360710U, // : Cost 2 vext2 RHS, <5,4,7,6> - 1571360772U, // : Cost 2 vext2 RHS, <5,5,5,5> - 1571360866U, // : Cost 2 vext2 RHS, <5,6,7,0> - 1571360936U, // : Cost 2 vext2 RHS, <5,7,5,7> - 1571361017U, // : Cost 2 vext2 RHS, <5,u,5,7> - 1530044518U, // : Cost 2 vext1 , LHS - 2645103016U, // : Cost 3 vext2 RHS, <6,1,7,2> - 1571361274U, // : Cost 2 vext2 RHS, <6,2,7,3> - 2645103154U, // : Cost 3 vext2 RHS, <6,3,4,5> - 1530047798U, // : Cost 2 vext1 , RHS - 1188386474U, // : Cost 2 vrev <7,u,5,6> - 1571361592U, // : Cost 2 vext2 RHS, <6,6,6,6> - 1571361614U, // : Cost 2 vext2 RHS, <6,7,0,1> - 1571361695U, // : Cost 2 vext2 RHS, <6,u,0,1> - 1571361786U, // : Cost 2 vext2 RHS, <7,0,1,2> - 2573935616U, // : Cost 3 vext1 <3,u,7,7>, <1,3,5,7> - 2645103781U, // : Cost 3 vext2 RHS, <7,2,2,2> - 2573937497U, // : Cost 3 vext1 <3,u,7,7>, <3,u,7,7> - 1571362150U, // : Cost 2 vext2 RHS, <7,4,5,6> - 1512141067U, // : Cost 2 vext1 <5,u,7,7>, <5,u,7,7> - 1518113764U, // : Cost 2 vext1 <6,u,7,7>, <6,u,7,7> - 363253046U, // : Cost 1 vdup3 RHS - 363253046U, // : Cost 1 vdup3 RHS - 1571362515U, // : Cost 2 vext2 RHS, - 497620782U, // : Cost 1 vext2 RHS, LHS - 1571362693U, // : Cost 2 vext2 RHS, - 1571362748U, // : Cost 2 vext2 RHS, - 1571362879U, // : Cost 2 vext2 RHS, - 497621146U, // : Cost 1 vext2 RHS, RHS - 1571363024U, // : Cost 2 vext2 RHS, - 363253046U, // : Cost 1 vdup3 RHS - 497621349U, // : Cost 1 vext2 RHS, LHS - 135053414U, // : Cost 1 vdup0 LHS - 471081121U, // : Cost 1 vext2 LHS, LHS - 1544822948U, // : Cost 2 vext2 LHS, <0,2,0,2> - 1616140005U, // : Cost 2 vext3 LHS, - 1544823122U, // : Cost 2 vext2 LHS, <0,4,1,5> - 1512157453U, // : Cost 2 vext1 <5,u,u,0>, <5,u,u,0> - 1662220032U, // : Cost 2 vext3 RHS, - 1194457487U, // : Cost 2 vrev - 471081629U, // : Cost 1 vext2 LHS, LHS - 1544823542U, // : Cost 2 vext2 LHS, <1,0,3,2> - 202162278U, // : Cost 1 vdup1 LHS - 537753390U, // : Cost 1 vext3 LHS, LHS - 1544823768U, // : Cost 2 vext2 LHS, <1,3,1,3> - 1494248758U, // : Cost 2 vext1 <2,u,u,1>, RHS - 1544823952U, // : Cost 2 vext2 LHS, <1,5,3,7> - 1518138343U, // : Cost 2 vext1 <6,u,u,1>, <6,u,u,1> - 1640322907U, // : Cost 2 vext3 RHS, - 537753444U, // : Cost 1 vext3 LHS, LHS - 1482309734U, // : Cost 2 vext1 <0,u,u,2>, LHS - 1194031451U, // : Cost 2 vrev - 269271142U, // : Cost 1 vdup2 LHS - 835584U, // : Cost 0 copy LHS - 1482313014U, // : Cost 2 vext1 <0,u,u,2>, RHS - 2618566504U, // : Cost 3 vext2 LHS, <2,5,3,6> - 1544824762U, // : Cost 2 vext2 LHS, <2,6,3,7> - 1638479788U, // : Cost 2 vext3 RHS, - 835584U, // : Cost 0 copy LHS - 408576723U, // : Cost 1 vext1 LHS, LHS - 1482318582U, // : Cost 2 vext1 LHS, <1,0,3,2> - 120371557U, // : Cost 1 vrev LHS - 336380006U, // : Cost 1 vdup3 LHS - 408579382U, // : Cost 1 vext1 LHS, RHS - 1616140271U, // : Cost 2 vext3 LHS, - 1530098170U, // : Cost 2 vext1 LHS, <6,2,7,3> - 1880329544U, // : Cost 2 vzipr LHS, RHS - 408581934U, // : Cost 1 vext1 LHS, LHS - 1488298086U, // : Cost 2 vext1 <1,u,u,4>, LHS - 1488299437U, // : Cost 2 vext1 <1,u,u,4>, <1,u,u,4> - 1659271204U, // : Cost 2 vext3 LHS, - 1194195311U, // : Cost 2 vrev - 161926454U, // : Cost 1 vdup0 RHS - 471084342U, // : Cost 1 vext2 LHS, RHS - 1571368308U, // : Cost 2 vext2 RHS, <4,6,4,6> - 1640323153U, // : Cost 2 vext3 RHS, - 471084585U, // : Cost 1 vext2 LHS, RHS - 1494278246U, // : Cost 2 vext1 <2,u,u,5>, LHS - 1571368656U, // : Cost 2 vext2 RHS, <5,1,7,3> - 1494280327U, // : Cost 2 vext1 <2,u,u,5>, <2,u,u,5> - 1616140415U, // : Cost 2 vext3 LHS, - 1494281526U, // : Cost 2 vext1 <2,u,u,5>, RHS - 229035318U, // : Cost 1 vdup1 RHS - 537753754U, // : Cost 1 vext3 LHS, RHS - 1750355254U, // : Cost 2 vuzpr LHS, RHS - 537753772U, // : Cost 1 vext3 LHS, RHS - 1482342502U, // : Cost 2 vext1 <0,u,u,6>, LHS - 2556084982U, // : Cost 3 vext1 <0,u,u,6>, <1,0,3,2> - 1571369466U, // : Cost 2 vext2 RHS, <6,2,7,3> - 1611938000U, // : Cost 2 vext3 LHS, - 1482345782U, // : Cost 2 vext1 <0,u,u,6>, RHS - 1194359171U, // : Cost 2 vrev - 296144182U, // : Cost 1 vdup2 RHS - 27705344U, // : Cost 0 copy RHS - 27705344U, // : Cost 0 copy RHS - 432496742U, // : Cost 1 vext1 RHS, LHS - 1488324016U, // : Cost 2 vext1 <1,u,u,7>, <1,u,u,7> - 1494296713U, // : Cost 2 vext1 <2,u,u,7>, <2,u,u,7> - 1906901148U, // : Cost 2 vzipr RHS, LHS - 432500283U, // : Cost 1 vext1 RHS, RHS - 1506242256U, // : Cost 2 vext1 RHS, <5,1,7,3> - 120699277U, // : Cost 1 vrev RHS - 363253046U, // : Cost 1 vdup3 RHS - 432502574U, // : Cost 1 vext1 RHS, LHS - 408617688U, // : Cost 1 vext1 LHS, LHS - 471086894U, // : Cost 1 vext2 LHS, LHS - 537753957U, // : Cost 1 vext3 LHS, LHS - 835584U, // : Cost 0 copy LHS - 408620342U, // : Cost 1 vext1 LHS, RHS - 471087258U, // : Cost 1 vext2 LHS, RHS - 537753997U, // : Cost 1 vext3 LHS, RHS - 27705344U, // : Cost 0 copy RHS - 835584U, // : Cost 0 copy LHS - 0 -}; diff --git a/lib/Target/ARM64/ARM64PromoteConstant.cpp b/lib/Target/ARM64/ARM64PromoteConstant.cpp deleted file mode 100644 index 9fbaedb..0000000 --- a/lib/Target/ARM64/ARM64PromoteConstant.cpp +++ /dev/null @@ -1,585 +0,0 @@ - -//===-- ARM64PromoteConstant.cpp --- Promote constant to global for ARM64 -===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ARM64PromoteConstant pass which promotes constant -// to global variables when this is likely to be more efficient. -// Currently only types related to constant vector (i.e., constant vector, array -// of constant vectors, constant structure with a constant vector field, etc.) -// are promoted to global variables. -// Indeed, constant vector are likely to be lowered in target constant pool -// during instruction selection. -// Therefore, the access will remain the same (memory load), but the structures -// types are not split into different constant pool accesses for each field. -// The bonus side effect is that created globals may be merged by the global -// merge pass. -// -// FIXME: This pass may be useful for other targets too. -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-promote-const" -#include "ARM64.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/InlineAsm.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" - -using namespace llvm; - -// Stress testing mode - disable heuristics. -static cl::opt Stress("arm64-stress-promote-const", cl::Hidden, - cl::desc("Promote all vector constants")); - -STATISTIC(NumPromoted, "Number of promoted constants"); -STATISTIC(NumPromotedUses, "Number of promoted constants uses"); - -//===----------------------------------------------------------------------===// -// ARM64PromoteConstant -//===----------------------------------------------------------------------===// - -namespace { -/// Promotes interesting constant into global variables. -/// The motivating example is: -/// static const uint16_t TableA[32] = { -/// 41944, 40330, 38837, 37450, 36158, 34953, 33826, 32768, -/// 31776, 30841, 29960, 29128, 28340, 27595, 26887, 26215, -/// 25576, 24967, 24386, 23832, 23302, 22796, 22311, 21846, -/// 21400, 20972, 20561, 20165, 19785, 19419, 19066, 18725, -/// }; -/// -/// uint8x16x4_t LoadStatic(void) { -/// uint8x16x4_t ret; -/// ret.val[0] = vld1q_u16(TableA + 0); -/// ret.val[1] = vld1q_u16(TableA + 8); -/// ret.val[2] = vld1q_u16(TableA + 16); -/// ret.val[3] = vld1q_u16(TableA + 24); -/// return ret; -/// } -/// -/// The constants in that example are folded into the uses. Thus, 4 different -/// constants are created. -/// As their type is vector the cheapest way to create them is to load them -/// for the memory. -/// Therefore the final assembly final has 4 different load. -/// With this pass enabled, only one load is issued for the constants. -class ARM64PromoteConstant : public ModulePass { - -public: - static char ID; - ARM64PromoteConstant() : ModulePass(ID) {} - - virtual const char *getPassName() const { return "ARM64 Promote Constant"; } - - /// Iterate over the functions and promote the interesting constants into - /// global variables with module scope. - bool runOnModule(Module &M) { - DEBUG(dbgs() << getPassName() << '\n'); - bool Changed = false; - for (auto &MF: M) { - Changed |= runOnFunction(MF); - } - return Changed; - } - -private: - /// Look for interesting constants used within the given function. - /// Promote them into global variables, load these global variables within - /// the related function, so that the number of inserted load is minimal. - bool runOnFunction(Function &F); - - // This transformation requires dominator info - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); - } - - /// Type to store a list of User - typedef SmallVector Users; - /// Map an insertion point to all the uses it dominates. - typedef DenseMap InsertionPoints; - /// Map a function to the required insertion point of load for a - /// global variable - typedef DenseMap InsertionPointsPerFunc; - - /// Find the closest point that dominates the given Use. - Instruction *findInsertionPoint(Value::user_iterator &Use); - - /// Check if the given insertion point is dominated by an existing - /// insertion point. - /// If true, the given use is added to the list of dominated uses for - /// the related existing point. - /// \param NewPt the insertion point to be checked - /// \param UseIt the use to be added into the list of dominated uses - /// \param InsertPts existing insertion points - /// \pre NewPt and all instruction in InsertPts belong to the same function - /// \return true if one of the insertion point in InsertPts dominates NewPt, - /// false otherwise - bool isDominated(Instruction *NewPt, Value::user_iterator &UseIt, - InsertionPoints &InsertPts); - - /// Check if the given insertion point can be merged with an existing - /// insertion point in a common dominator. - /// If true, the given use is added to the list of the created insertion - /// point. - /// \param NewPt the insertion point to be checked - /// \param UseIt the use to be added into the list of dominated uses - /// \param InsertPts existing insertion points - /// \pre NewPt and all instruction in InsertPts belong to the same function - /// \pre isDominated returns false for the exact same parameters. - /// \return true if it exists an insertion point in InsertPts that could - /// have been merged with NewPt in a common dominator, - /// false otherwise - bool tryAndMerge(Instruction *NewPt, Value::user_iterator &UseIt, - InsertionPoints &InsertPts); - - /// Compute the minimal insertion points to dominates all the interesting - /// uses of value. - /// Insertion points are group per function and each insertion point - /// contains a list of all the uses it dominates within the related function - /// \param Val constant to be examined - /// \param[out] InsPtsPerFunc output storage of the analysis - void computeInsertionPoints(Constant *Val, - InsertionPointsPerFunc &InsPtsPerFunc); - - /// Insert a definition of a new global variable at each point contained in - /// InsPtsPerFunc and update the related uses (also contained in - /// InsPtsPerFunc). - bool insertDefinitions(Constant *Cst, InsertionPointsPerFunc &InsPtsPerFunc); - - /// Compute the minimal insertion points to dominate all the interesting - /// uses of Val and insert a definition of a new global variable - /// at these points. - /// Also update the uses of Val accordingly. - /// Currently a use of Val is considered interesting if: - /// - Val is not UndefValue - /// - Val is not zeroinitialized - /// - Replacing Val per a load of a global variable is valid. - /// \see shouldConvert for more details - bool computeAndInsertDefinitions(Constant *Val); - - /// Promote the given constant into a global variable if it is expected to - /// be profitable. - /// \return true if Cst has been promoted - bool promoteConstant(Constant *Cst); - - /// Transfer the list of dominated uses of IPI to NewPt in InsertPts. - /// Append UseIt to this list and delete the entry of IPI in InsertPts. - static void appendAndTransferDominatedUses(Instruction *NewPt, - Value::user_iterator &UseIt, - InsertionPoints::iterator &IPI, - InsertionPoints &InsertPts) { - // Record the dominated use - IPI->second.push_back(UseIt); - // Transfer the dominated uses of IPI to NewPt - // Inserting into the DenseMap may invalidate existing iterator. - // Keep a copy of the key to find the iterator to erase. - Instruction *OldInstr = IPI->first; - InsertPts.insert(InsertionPoints::value_type(NewPt, IPI->second)); - // Erase IPI - IPI = InsertPts.find(OldInstr); - InsertPts.erase(IPI); - } -}; -} // end anonymous namespace - -char ARM64PromoteConstant::ID = 0; - -namespace llvm { -void initializeARM64PromoteConstantPass(PassRegistry &); -} - -INITIALIZE_PASS_BEGIN(ARM64PromoteConstant, "arm64-promote-const", - "ARM64 Promote Constant Pass", false, false) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END(ARM64PromoteConstant, "arm64-promote-const", - "ARM64 Promote Constant Pass", false, false) - -ModulePass *llvm::createARM64PromoteConstantPass() { - return new ARM64PromoteConstant(); -} - -/// Check if the given type uses a vector type. -static bool isConstantUsingVectorTy(const Type *CstTy) { - if (CstTy->isVectorTy()) - return true; - if (CstTy->isStructTy()) { - for (unsigned EltIdx = 0, EndEltIdx = CstTy->getStructNumElements(); - EltIdx < EndEltIdx; ++EltIdx) - if (isConstantUsingVectorTy(CstTy->getStructElementType(EltIdx))) - return true; - } else if (CstTy->isArrayTy()) - return isConstantUsingVectorTy(CstTy->getArrayElementType()); - return false; -} - -/// Check if the given use (Instruction + OpIdx) of Cst should be converted into -/// a load of a global variable initialized with Cst. -/// A use should be converted if it is legal to do so. -/// For instance, it is not legal to turn the mask operand of a shuffle vector -/// into a load of a global variable. -static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr, - unsigned OpIdx) { - // shufflevector instruction expects a const for the mask argument, i.e., the - // third argument. Do not promote this use in that case. - if (isa(Instr) && OpIdx == 2) - return false; - - // extractvalue instruction expects a const idx - if (isa(Instr) && OpIdx > 0) - return false; - - // extractvalue instruction expects a const idx - if (isa(Instr) && OpIdx > 1) - return false; - - if (isa(Instr) && OpIdx > 0) - return false; - - // Alignment argument must be constant - if (isa(Instr) && OpIdx > 0) - return false; - - // Alignment argument must be constant - if (isa(Instr) && OpIdx > 1) - return false; - - // Index must be constant - if (isa(Instr) && OpIdx > 0) - return false; - - // Personality function and filters must be constant. - // Give up on that instruction. - if (isa(Instr)) - return false; - - // switch instruction expects constants to compare to - if (isa(Instr)) - return false; - - // Expected address must be a constant - if (isa(Instr)) - return false; - - // Do not mess with intrinsic - if (isa(Instr)) - return false; - - // Do not mess with inline asm - const CallInst *CI = dyn_cast(Instr); - if (CI && isa(CI->getCalledValue())) - return false; - - return true; -} - -/// Check if the given Cst should be converted into -/// a load of a global variable initialized with Cst. -/// A constant should be converted if it is likely that the materialization of -/// the constant will be tricky. Thus, we give up on zero or undef values. -/// -/// \todo Currently, accept only vector related types. -/// Also we give up on all simple vector type to keep the existing -/// behavior. Otherwise, we should push here all the check of the lowering of -/// BUILD_VECTOR. By giving up, we lose the potential benefit of merging -/// constant via global merge and the fact that the same constant is stored -/// only once with this method (versus, as many function that uses the constant -/// for the regular approach, even for float). -/// Again, the simplest solution would be to promote every -/// constant and rematerialize them when they are actually cheap to create. -static bool shouldConvert(const Constant *Cst) { - if (isa(Cst)) - return false; - - // FIXME: In some cases, it may be interesting to promote in memory - // a zero initialized constant. - // E.g., when the type of Cst require more instructions than the - // adrp/add/load sequence or when this sequence can be shared by several - // instances of Cst. - // Ideally, we could promote this into a global and rematerialize the constant - // when it was a bad idea. - if (Cst->isZeroValue()) - return false; - - if (Stress) - return true; - - // FIXME: see function \todo - if (Cst->getType()->isVectorTy()) - return false; - return isConstantUsingVectorTy(Cst->getType()); -} - -Instruction * -ARM64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) { - // If this user is a phi, the insertion point is in the related - // incoming basic block - PHINode *PhiInst = dyn_cast(*Use); - Instruction *InsertionPoint; - if (PhiInst) - InsertionPoint = - PhiInst->getIncomingBlock(Use.getOperandNo())->getTerminator(); - else - InsertionPoint = dyn_cast(*Use); - assert(InsertionPoint && "User is not an instruction!"); - return InsertionPoint; -} - -bool ARM64PromoteConstant::isDominated(Instruction *NewPt, - Value::user_iterator &UseIt, - InsertionPoints &InsertPts) { - - DominatorTree &DT = getAnalysis( - *NewPt->getParent()->getParent()).getDomTree(); - - // Traverse all the existing insertion point and check if one is dominating - // NewPt - for (InsertionPoints::iterator IPI = InsertPts.begin(), - EndIPI = InsertPts.end(); - IPI != EndIPI; ++IPI) { - if (NewPt == IPI->first || DT.dominates(IPI->first, NewPt) || - // When IPI->first is a terminator instruction, DT may think that - // the result is defined on the edge. - // Here we are testing the insertion point, not the definition. - (IPI->first->getParent() != NewPt->getParent() && - DT.dominates(IPI->first->getParent(), NewPt->getParent()))) { - // No need to insert this point - // Record the dominated use - DEBUG(dbgs() << "Insertion point dominated by:\n"); - DEBUG(IPI->first->print(dbgs())); - DEBUG(dbgs() << '\n'); - IPI->second.push_back(UseIt); - return true; - } - } - return false; -} - -bool ARM64PromoteConstant::tryAndMerge(Instruction *NewPt, - Value::user_iterator &UseIt, - InsertionPoints &InsertPts) { - DominatorTree &DT = getAnalysis( - *NewPt->getParent()->getParent()).getDomTree(); - BasicBlock *NewBB = NewPt->getParent(); - - // Traverse all the existing insertion point and check if one is dominated by - // NewPt and thus useless or can be combined with NewPt into a common - // dominator - for (InsertionPoints::iterator IPI = InsertPts.begin(), - EndIPI = InsertPts.end(); - IPI != EndIPI; ++IPI) { - BasicBlock *CurBB = IPI->first->getParent(); - if (NewBB == CurBB) { - // Instructions are in the same block. - // By construction, NewPt is dominating the other. - // Indeed, isDominated returned false with the exact same arguments. - DEBUG(dbgs() << "Merge insertion point with:\n"); - DEBUG(IPI->first->print(dbgs())); - DEBUG(dbgs() << "\nat considered insertion point.\n"); - appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts); - return true; - } - - // Look for a common dominator - BasicBlock *CommonDominator = DT.findNearestCommonDominator(NewBB, CurBB); - // If none exists, we cannot merge these two points - if (!CommonDominator) - continue; - - if (CommonDominator != NewBB) { - // By construction, the CommonDominator cannot be CurBB - assert(CommonDominator != CurBB && - "Instruction has not been rejected during isDominated check!"); - // Take the last instruction of the CommonDominator as insertion point - NewPt = CommonDominator->getTerminator(); - } - // else, CommonDominator is the block of NewBB, hence NewBB is the last - // possible insertion point in that block - DEBUG(dbgs() << "Merge insertion point with:\n"); - DEBUG(IPI->first->print(dbgs())); - DEBUG(dbgs() << '\n'); - DEBUG(NewPt->print(dbgs())); - DEBUG(dbgs() << '\n'); - appendAndTransferDominatedUses(NewPt, UseIt, IPI, InsertPts); - return true; - } - return false; -} - -void ARM64PromoteConstant::computeInsertionPoints( - Constant *Val, InsertionPointsPerFunc &InsPtsPerFunc) { - DEBUG(dbgs() << "** Compute insertion points **\n"); - for (Value::user_iterator UseIt = Val->user_begin(), - EndUseIt = Val->user_end(); - UseIt != EndUseIt; ++UseIt) { - // If the user is not an Instruction, we cannot modify it - if (!isa(*UseIt)) - continue; - - // Filter out uses that should not be converted - if (!shouldConvertUse(Val, cast(*UseIt), UseIt.getOperandNo())) - continue; - - DEBUG(dbgs() << "Considered use, opidx " << UseIt.getOperandNo() << ":\n"); - DEBUG((*UseIt)->print(dbgs())); - DEBUG(dbgs() << '\n'); - - Instruction *InsertionPoint = findInsertionPoint(UseIt); - - DEBUG(dbgs() << "Considered insertion point:\n"); - DEBUG(InsertionPoint->print(dbgs())); - DEBUG(dbgs() << '\n'); - - // Check if the current insertion point is useless, i.e., it is dominated - // by another one. - InsertionPoints &InsertPts = - InsPtsPerFunc[InsertionPoint->getParent()->getParent()]; - if (isDominated(InsertionPoint, UseIt, InsertPts)) - continue; - // This insertion point is useful, check if we can merge some insertion - // point in a common dominator or if NewPt dominates an existing one. - if (tryAndMerge(InsertionPoint, UseIt, InsertPts)) - continue; - - DEBUG(dbgs() << "Keep considered insertion point\n"); - - // It is definitely useful by its own - InsertPts[InsertionPoint].push_back(UseIt); - } -} - -bool -ARM64PromoteConstant::insertDefinitions(Constant *Cst, - InsertionPointsPerFunc &InsPtsPerFunc) { - // We will create one global variable per Module - DenseMap ModuleToMergedGV; - bool HasChanged = false; - - // Traverse all insertion points in all the function - for (InsertionPointsPerFunc::iterator FctToInstPtsIt = InsPtsPerFunc.begin(), - EndIt = InsPtsPerFunc.end(); - FctToInstPtsIt != EndIt; ++FctToInstPtsIt) { - InsertionPoints &InsertPts = FctToInstPtsIt->second; -// Do more check for debug purposes -#ifndef NDEBUG - DominatorTree &DT = getAnalysis( - *FctToInstPtsIt->first).getDomTree(); -#endif - GlobalVariable *PromotedGV; - assert(!InsertPts.empty() && "Empty uses does not need a definition"); - - Module *M = FctToInstPtsIt->first->getParent(); - DenseMap::iterator MapIt = - ModuleToMergedGV.find(M); - if (MapIt == ModuleToMergedGV.end()) { - PromotedGV = new GlobalVariable( - *M, Cst->getType(), true, GlobalValue::InternalLinkage, 0, - "_PromotedConst", 0, GlobalVariable::NotThreadLocal); - PromotedGV->setInitializer(Cst); - ModuleToMergedGV[M] = PromotedGV; - DEBUG(dbgs() << "Global replacement: "); - DEBUG(PromotedGV->print(dbgs())); - DEBUG(dbgs() << '\n'); - ++NumPromoted; - HasChanged = true; - } else { - PromotedGV = MapIt->second; - } - - for (InsertionPoints::iterator IPI = InsertPts.begin(), - EndIPI = InsertPts.end(); - IPI != EndIPI; ++IPI) { - // Create the load of the global variable - IRBuilder<> Builder(IPI->first->getParent(), IPI->first); - LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV); - DEBUG(dbgs() << "**********\n"); - DEBUG(dbgs() << "New def: "); - DEBUG(LoadedCst->print(dbgs())); - DEBUG(dbgs() << '\n'); - - // Update the dominated uses - Users &DominatedUsers = IPI->second; - for (Users::iterator UseIt = DominatedUsers.begin(), - EndIt = DominatedUsers.end(); - UseIt != EndIt; ++UseIt) { -#ifndef NDEBUG - assert((DT.dominates(LoadedCst, cast(**UseIt)) || - (isa(**UseIt) && - DT.dominates(LoadedCst, findInsertionPoint(*UseIt)))) && - "Inserted definition does not dominate all its uses!"); -#endif - DEBUG(dbgs() << "Use to update " << UseIt->getOperandNo() << ":"); - DEBUG((*UseIt)->print(dbgs())); - DEBUG(dbgs() << '\n'); - (*UseIt)->setOperand(UseIt->getOperandNo(), LoadedCst); - ++NumPromotedUses; - } - } - } - return HasChanged; -} - -bool ARM64PromoteConstant::computeAndInsertDefinitions(Constant *Val) { - InsertionPointsPerFunc InsertPtsPerFunc; - computeInsertionPoints(Val, InsertPtsPerFunc); - return insertDefinitions(Val, InsertPtsPerFunc); -} - -bool ARM64PromoteConstant::promoteConstant(Constant *Cst) { - assert(Cst && "Given variable is not a valid constant."); - - if (!shouldConvert(Cst)) - return false; - - DEBUG(dbgs() << "******************************\n"); - DEBUG(dbgs() << "Candidate constant: "); - DEBUG(Cst->print(dbgs())); - DEBUG(dbgs() << '\n'); - - return computeAndInsertDefinitions(Cst); -} - -bool ARM64PromoteConstant::runOnFunction(Function &F) { - // Look for instructions using constant vector - // Promote that constant to a global variable. - // Create as few load of this variable as possible and update the uses - // accordingly - bool LocalChange = false; - SmallSet AlreadyChecked; - - for (auto &MBB : F) { - for (auto &MI: MBB) { - // Traverse the operand, looking for constant vectors - // Replace them by a load of a global variable of type constant vector - for (unsigned OpIdx = 0, EndOpIdx = MI.getNumOperands(); - OpIdx != EndOpIdx; ++OpIdx) { - Constant *Cst = dyn_cast(MI.getOperand(OpIdx)); - // There is no point is promoting global value, they are already global. - // Do not promote constant expression, as they may require some code - // expansion. - if (Cst && !isa(Cst) && !isa(Cst) && - AlreadyChecked.insert(Cst)) - LocalChange |= promoteConstant(Cst); - } - } - } - return LocalChange; -} diff --git a/lib/Target/ARM64/ARM64RegisterInfo.cpp b/lib/Target/ARM64/ARM64RegisterInfo.cpp deleted file mode 100644 index 4c7fc8a..0000000 --- a/lib/Target/ARM64/ARM64RegisterInfo.cpp +++ /dev/null @@ -1,400 +0,0 @@ -//===- ARM64RegisterInfo.cpp - ARM64 Register Information -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the ARM64 implementation of the TargetRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#include "ARM64RegisterInfo.h" -#include "ARM64FrameLowering.h" -#include "ARM64InstrInfo.h" -#include "ARM64Subtarget.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetOptions.h" - -#define GET_REGINFO_TARGET_DESC -#include "ARM64GenRegisterInfo.inc" - -using namespace llvm; - -ARM64RegisterInfo::ARM64RegisterInfo(const ARM64InstrInfo *tii, - const ARM64Subtarget *sti) - : ARM64GenRegisterInfo(ARM64::LR), TII(tii), STI(sti) {} - -const uint16_t * -ARM64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - assert(MF && "Invalid MachineFunction pointer."); - if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) - return CSR_ARM64_AllRegs_SaveList; - else - return CSR_ARM64_AAPCS_SaveList; -} - -const uint32_t * -ARM64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { - if (CC == CallingConv::AnyReg) - return CSR_ARM64_AllRegs_RegMask; - else - return CSR_ARM64_AAPCS_RegMask; -} - -const uint32_t *ARM64RegisterInfo::getTLSCallPreservedMask() const { - if (STI->isTargetDarwin()) - return CSR_ARM64_TLS_Darwin_RegMask; - - assert(STI->isTargetELF() && "only expect Darwin or ELF TLS"); - return CSR_ARM64_TLS_ELF_RegMask; -} - -const uint32_t * -ARM64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { - // This should return a register mask that is the same as that returned by - // getCallPreservedMask but that additionally preserves the register used for - // the first i64 argument (which must also be the register used to return a - // single i64 return value) - // - // In case that the calling convention does not use the same register for - // both, the function should return NULL (does not currently apply) - return CSR_ARM64_AAPCS_ThisReturn_RegMask; -} - -BitVector ARM64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - // FIXME: avoid re-calculating this everytime. - BitVector Reserved(getNumRegs()); - Reserved.set(ARM64::SP); - Reserved.set(ARM64::XZR); - Reserved.set(ARM64::WSP); - Reserved.set(ARM64::WZR); - - if (TFI->hasFP(MF) || STI->isTargetDarwin()) { - Reserved.set(ARM64::FP); - Reserved.set(ARM64::W29); - } - - if (STI->isTargetDarwin()) { - Reserved.set(ARM64::X18); // Platform register - Reserved.set(ARM64::W18); - } - - if (hasBasePointer(MF)) { - Reserved.set(ARM64::X19); - Reserved.set(ARM64::W19); - } - - return Reserved; -} - -bool ARM64RegisterInfo::isReservedReg(const MachineFunction &MF, - unsigned Reg) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - switch (Reg) { - default: - break; - case ARM64::SP: - case ARM64::XZR: - case ARM64::WSP: - case ARM64::WZR: - return true; - case ARM64::X18: - case ARM64::W18: - return STI->isTargetDarwin(); - case ARM64::FP: - case ARM64::W29: - return TFI->hasFP(MF) || STI->isTargetDarwin(); - case ARM64::W19: - case ARM64::X19: - return hasBasePointer(MF); - } - - return false; -} - -const TargetRegisterClass * -ARM64RegisterInfo::getPointerRegClass(const MachineFunction &MF, - unsigned Kind) const { - return &ARM64::GPR64RegClass; -} - -const TargetRegisterClass * -ARM64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { - if (RC == &ARM64::CCRRegClass) - return NULL; // Can't copy CPSR. - return RC; -} - -unsigned ARM64RegisterInfo::getBaseRegister() const { return ARM64::X19; } - -bool ARM64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - - // In the presence of variable sized objects, if the fixed stack size is - // large enough that referencing from the FP won't result in things being - // in range relatively often, we can use a base pointer to allow access - // from the other direction like the SP normally works. - if (MFI->hasVarSizedObjects()) { - // Conservatively estimate whether the negative offset from the frame - // pointer will be sufficient to reach. If a function has a smallish - // frame, it's less likely to have lots of spills and callee saved - // space, so it's all more likely to be within range of the frame pointer. - // If it's wrong, we'll materialize the constant and still get to the - // object; it's just suboptimal. Negative offsets use the unscaled - // load/store instructions, which have a 9-bit signed immediate. - if (MFI->getLocalFrameSize() < 256) - return false; - return true; - } - - return false; -} - -unsigned ARM64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - return TFI->hasFP(MF) ? ARM64::FP : ARM64::SP; -} - -bool -ARM64RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { - return true; -} - -bool ARM64RegisterInfo::requiresVirtualBaseRegisters(const MachineFunction &MF) - const { - return true; -} - -bool -ARM64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - // ARM64FrameLowering::resolveFrameIndexReference() can always fall back - // to the stack pointer, so only put the emergency spill slot next to the - // FP when there's no better way to access it (SP or base pointer). - return MFI->hasVarSizedObjects() && !hasBasePointer(MF); -} - -bool ARM64RegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) - const { - return true; -} - -bool ARM64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - // Only consider eliminating leaf frames. - if (MFI->hasCalls() || (MF.getTarget().Options.DisableFramePointerElim(MF) && - MFI->adjustsStack())) - return true; - return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); -} - -/// needsFrameBaseReg - Returns true if the instruction's frame index -/// reference would be better served by a base register other than FP -/// or SP. Used by LocalStackFrameAllocation to determine which frame index -/// references it should create new base registers for. -bool ARM64RegisterInfo::needsFrameBaseReg(MachineInstr *MI, - int64_t Offset) const { - for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) - assert(i < MI->getNumOperands() && - "Instr doesn't have FrameIndex operand!"); - - // It's the load/store FI references that cause issues, as it can be difficult - // to materialize the offset if it won't fit in the literal field. Estimate - // based on the size of the local frame and some conservative assumptions - // about the rest of the stack frame (note, this is pre-regalloc, so - // we don't know everything for certain yet) whether this offset is likely - // to be out of range of the immediate. Return true if so. - - // We only generate virtual base registers for loads and stores, so - // return false for everything else. - if (!MI->mayLoad() && !MI->mayStore()) - return false; - - // Without a virtual base register, if the function has variable sized - // objects, all fixed-size local references will be via the frame pointer, - // Approximate the offset and see if it's legal for the instruction. - // Note that the incoming offset is based on the SP value at function entry, - // so it'll be negative. - MachineFunction &MF = *MI->getParent()->getParent(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - - // Estimate an offset from the frame pointer. - // Conservatively assume all GPR callee-saved registers get pushed. - // FP, LR, X19-X28, D8-D15. 64-bits each. - int64_t FPOffset = Offset - 16 * 20; - // Estimate an offset from the stack pointer. - // The incoming offset is relating to the SP at the start of the function, - // but when we access the local it'll be relative to the SP after local - // allocation, so adjust our SP-relative offset by that allocation size. - Offset += MFI->getLocalFrameSize(); - // Assume that we'll have at least some spill slots allocated. - // FIXME: This is a total SWAG number. We should run some statistics - // and pick a real one. - Offset += 128; // 128 bytes of spill slots - - // If there is a frame pointer, try using it. - // The FP is only available if there is no dynamic realignment. We - // don't know for sure yet whether we'll need that, so we guess based - // on whether there are any local variables that would trigger it. - if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, FPOffset)) - return false; - - // If we can reference via the stack pointer or base pointer, try that. - // FIXME: This (and the code that resolves the references) can be improved - // to only disallow SP relative references in the live range of - // the VLA(s). In practice, it's unclear how much difference that - // would make, but it may be worth doing. - if (isFrameOffsetLegal(MI, Offset)) - return false; - - // The offset likely isn't legal; we want to allocate a virtual base register. - return true; -} - -bool ARM64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, - int64_t Offset) const { - assert(Offset <= INT_MAX && "Offset too big to fit in int."); - assert(MI && "Unable to get the legal offset for nil instruction."); - int SaveOffset = Offset; - return isARM64FrameOffsetLegal(*MI, SaveOffset) & ARM64FrameOffsetIsLegal; -} - -/// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx -/// at the beginning of the basic block. -void ARM64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, - unsigned BaseReg, - int FrameIdx, - int64_t Offset) const { - MachineBasicBlock::iterator Ins = MBB->begin(); - DebugLoc DL; // Defaults to "unknown" - if (Ins != MBB->end()) - DL = Ins->getDebugLoc(); - - const MCInstrDesc &MCID = TII->get(ARM64::ADDXri); - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - const MachineFunction &MF = *MBB->getParent(); - MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF)); - unsigned Shifter = ARM64_AM::getShifterImm(ARM64_AM::LSL, 0); - - BuildMI(*MBB, Ins, DL, MCID, BaseReg) - .addFrameIndex(FrameIdx) - .addImm(Offset) - .addImm(Shifter); -} - -void ARM64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, - int64_t Offset) const { - int Off = Offset; // ARM doesn't need the general 64-bit offsets - unsigned i = 0; - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - bool Done = rewriteARM64FrameIndex(MI, i, BaseReg, Off, TII); - assert(Done && "Unable to resolve frame index!"); - (void)Done; -} - -void ARM64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, unsigned FIOperandNum, - RegScavenger *RS) const { - assert(SPAdj == 0 && "Unexpected"); - - MachineInstr &MI = *II; - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - const ARM64FrameLowering *TFI = static_cast( - MF.getTarget().getFrameLowering()); - - int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); - unsigned FrameReg; - int Offset; - - // Special handling of dbg_value, stackmap and patchpoint instructions. - if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP || - MI.getOpcode() == TargetOpcode::PATCHPOINT) { - Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, - /*PreferFP=*/true); - Offset += MI.getOperand(FIOperandNum + 1).getImm(); - MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); - return; - } - - // Modify MI as necessary to handle as much of 'Offset' as possible - Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg); - if (rewriteARM64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII)) - return; - - assert((!RS || !RS->isScavengingFrameIndex(FrameIndex)) && - "Emergency spill slot is out of reach"); - - // If we get here, the immediate doesn't fit into the instruction. We folded - // as much as possible above. Handle the rest, providing a register that is - // SP+LargeImm. - unsigned ScratchReg = - MF.getRegInfo().createVirtualRegister(&ARM64::GPR64RegClass); - emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII); - MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true); -} - -namespace llvm { - -unsigned ARM64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - switch (RC->getID()) { - default: - return 0; - case ARM64::GPR32RegClassID: - case ARM64::GPR32spRegClassID: - case ARM64::GPR32allRegClassID: - case ARM64::GPR64spRegClassID: - case ARM64::GPR64allRegClassID: - case ARM64::GPR64RegClassID: - case ARM64::GPR32commonRegClassID: - case ARM64::GPR64commonRegClassID: - return 32 - 1 // XZR/SP - - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP - - STI->isTargetDarwin() // X18 reserved as platform register - - hasBasePointer(MF); // X19 - case ARM64::FPR8RegClassID: - case ARM64::FPR16RegClassID: - case ARM64::FPR32RegClassID: - case ARM64::FPR64RegClassID: - case ARM64::FPR128RegClassID: - return 32; - - case ARM64::DDRegClassID: - case ARM64::DDDRegClassID: - case ARM64::DDDDRegClassID: - case ARM64::QQRegClassID: - case ARM64::QQQRegClassID: - case ARM64::QQQQRegClassID: - return 32; - - case ARM64::FPR128_loRegClassID: - return 16; - } -} - -} // namespace llvm diff --git a/lib/Target/ARM64/ARM64RegisterInfo.h b/lib/Target/ARM64/ARM64RegisterInfo.h deleted file mode 100644 index 31d9242..0000000 --- a/lib/Target/ARM64/ARM64RegisterInfo.h +++ /dev/null @@ -1,101 +0,0 @@ -//===- ARM64RegisterInfo.h - ARM64 Register Information Impl ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the ARM64 implementation of the MRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_ARM64REGISTERINFO_H -#define LLVM_TARGET_ARM64REGISTERINFO_H - -#define GET_REGINFO_HEADER -#include "ARM64GenRegisterInfo.inc" - -namespace llvm { - -class ARM64InstrInfo; -class ARM64Subtarget; -class MachineFunction; -class RegScavenger; -class TargetRegisterClass; - -struct ARM64RegisterInfo : public ARM64GenRegisterInfo { -private: - const ARM64InstrInfo *TII; - const ARM64Subtarget *STI; - -public: - ARM64RegisterInfo(const ARM64InstrInfo *tii, const ARM64Subtarget *sti); - - bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; - - /// Code Generation virtual methods... - const uint16_t * - getCalleeSavedRegs(const MachineFunction *MF = 0) const override; - const uint32_t *getCallPreservedMask(CallingConv::ID) const override; - - unsigned getCSRFirstUseCost() const { - // The cost will be compared against BlockFrequency where entry has the - // value of 1 << 14. A value of 5 will choose to spill or split really - // cold path instead of using a callee-saved register. - return 5; - } - - // Calls involved in thread-local variable lookup save more registers than - // normal calls, so they need a different mask to represent this. - const uint32_t *getTLSCallPreservedMask() const; - - /// getThisReturnPreservedMask - Returns a call preserved mask specific to the - /// case that 'returned' is on an i64 first argument if the calling convention - /// is one that can (partially) model this attribute with a preserved mask - /// (i.e. it is a calling convention that uses the same register for the first - /// i64 argument and an i64 return value) - /// - /// Should return NULL in the case that the calling convention does not have - /// this property - const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; - - BitVector getReservedRegs(const MachineFunction &MF) const override; - const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, - unsigned Kind = 0) const override; - const TargetRegisterClass * - getCrossCopyRegClass(const TargetRegisterClass *RC) const override; - - bool requiresRegisterScavenging(const MachineFunction &MF) const override; - bool useFPForScavengingIndex(const MachineFunction &MF) const override; - bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; - - bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override; - bool isFrameOffsetLegal(const MachineInstr *MI, - int64_t Offset) const override; - void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, - int FrameIdx, - int64_t Offset) const override; - void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, - int64_t Offset) const override; - void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, - unsigned FIOperandNum, - RegScavenger *RS = NULL) const override; - bool cannotEliminateFrame(const MachineFunction &MF) const; - - bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override; - bool hasBasePointer(const MachineFunction &MF) const; - unsigned getBaseRegister() const; - - // Debug information queries. - unsigned getFrameRegister(const MachineFunction &MF) const override; - - unsigned getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const override; -}; - -} // end namespace llvm - -#endif // LLVM_TARGET_ARM64REGISTERINFO_H diff --git a/lib/Target/ARM64/ARM64RegisterInfo.td b/lib/Target/ARM64/ARM64RegisterInfo.td deleted file mode 100644 index 96001c5..0000000 --- a/lib/Target/ARM64/ARM64RegisterInfo.td +++ /dev/null @@ -1,561 +0,0 @@ -//===- ARM64RegisterInfo.td - Describe the ARM64 Regisers --*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - - -class ARM64Reg enc, string n, list subregs = [], - list altNames = []> - : Register { - let HWEncoding = enc; - let Namespace = "ARM64"; - let SubRegs = subregs; -} - -let Namespace = "ARM64" in { - def sub_32 : SubRegIndex<32>; - - def bsub : SubRegIndex<8>; - def hsub : SubRegIndex<16>; - def ssub : SubRegIndex<32>; - def dsub : SubRegIndex<32>; - def qhisub : SubRegIndex<64>; - def qsub : SubRegIndex<64>; - // Note: Code depends on these having consecutive numbers - def dsub0 : SubRegIndex<64>; - def dsub1 : SubRegIndex<64>; - def dsub2 : SubRegIndex<64>; - def dsub3 : SubRegIndex<64>; - // Note: Code depends on these having consecutive numbers - def qsub0 : SubRegIndex<128>; - def qsub1 : SubRegIndex<128>; - def qsub2 : SubRegIndex<128>; - def qsub3 : SubRegIndex<128>; -} - -let Namespace = "ARM64" in { - def vreg : RegAltNameIndex; - def vlist1 : RegAltNameIndex; -} - -//===----------------------------------------------------------------------===// -// Registers -//===----------------------------------------------------------------------===// -def W0 : ARM64Reg<0, "w0" >, DwarfRegNum<[0]>; -def W1 : ARM64Reg<1, "w1" >, DwarfRegNum<[1]>; -def W2 : ARM64Reg<2, "w2" >, DwarfRegNum<[2]>; -def W3 : ARM64Reg<3, "w3" >, DwarfRegNum<[3]>; -def W4 : ARM64Reg<4, "w4" >, DwarfRegNum<[4]>; -def W5 : ARM64Reg<5, "w5" >, DwarfRegNum<[5]>; -def W6 : ARM64Reg<6, "w6" >, DwarfRegNum<[6]>; -def W7 : ARM64Reg<7, "w7" >, DwarfRegNum<[7]>; -def W8 : ARM64Reg<8, "w8" >, DwarfRegNum<[8]>; -def W9 : ARM64Reg<9, "w9" >, DwarfRegNum<[9]>; -def W10 : ARM64Reg<10, "w10">, DwarfRegNum<[10]>; -def W11 : ARM64Reg<11, "w11">, DwarfRegNum<[11]>; -def W12 : ARM64Reg<12, "w12">, DwarfRegNum<[12]>; -def W13 : ARM64Reg<13, "w13">, DwarfRegNum<[13]>; -def W14 : ARM64Reg<14, "w14">, DwarfRegNum<[14]>; -def W15 : ARM64Reg<15, "w15">, DwarfRegNum<[15]>; -def W16 : ARM64Reg<16, "w16">, DwarfRegNum<[16]>; -def W17 : ARM64Reg<17, "w17">, DwarfRegNum<[17]>; -def W18 : ARM64Reg<18, "w18">, DwarfRegNum<[18]>; -def W19 : ARM64Reg<19, "w19">, DwarfRegNum<[19]>; -def W20 : ARM64Reg<20, "w20">, DwarfRegNum<[20]>; -def W21 : ARM64Reg<21, "w21">, DwarfRegNum<[21]>; -def W22 : ARM64Reg<22, "w22">, DwarfRegNum<[22]>; -def W23 : ARM64Reg<23, "w23">, DwarfRegNum<[23]>; -def W24 : ARM64Reg<24, "w24">, DwarfRegNum<[24]>; -def W25 : ARM64Reg<25, "w25">, DwarfRegNum<[25]>; -def W26 : ARM64Reg<26, "w26">, DwarfRegNum<[26]>; -def W27 : ARM64Reg<27, "w27">, DwarfRegNum<[27]>; -def W28 : ARM64Reg<28, "w28">, DwarfRegNum<[28]>; -def W29 : ARM64Reg<29, "w29">, DwarfRegNum<[29]>; -def W30 : ARM64Reg<30, "w30">, DwarfRegNum<[30]>; -def WSP : ARM64Reg<31, "wsp">, DwarfRegNum<[31]>; -def WZR : ARM64Reg<31, "wzr">, DwarfRegAlias; - -let SubRegIndices = [sub_32] in { -def X0 : ARM64Reg<0, "x0", [W0]>, DwarfRegAlias; -def X1 : ARM64Reg<1, "x1", [W1]>, DwarfRegAlias; -def X2 : ARM64Reg<2, "x2", [W2]>, DwarfRegAlias; -def X3 : ARM64Reg<3, "x3", [W3]>, DwarfRegAlias; -def X4 : ARM64Reg<4, "x4", [W4]>, DwarfRegAlias; -def X5 : ARM64Reg<5, "x5", [W5]>, DwarfRegAlias; -def X6 : ARM64Reg<6, "x6", [W6]>, DwarfRegAlias; -def X7 : ARM64Reg<7, "x7", [W7]>, DwarfRegAlias; -def X8 : ARM64Reg<8, "x8", [W8]>, DwarfRegAlias; -def X9 : ARM64Reg<9, "x9", [W9]>, DwarfRegAlias; -def X10 : ARM64Reg<10, "x10", [W10]>, DwarfRegAlias; -def X11 : ARM64Reg<11, "x11", [W11]>, DwarfRegAlias; -def X12 : ARM64Reg<12, "x12", [W12]>, DwarfRegAlias; -def X13 : ARM64Reg<13, "x13", [W13]>, DwarfRegAlias; -def X14 : ARM64Reg<14, "x14", [W14]>, DwarfRegAlias; -def X15 : ARM64Reg<15, "x15", [W15]>, DwarfRegAlias; -def X16 : ARM64Reg<16, "x16", [W16]>, DwarfRegAlias; -def X17 : ARM64Reg<17, "x17", [W17]>, DwarfRegAlias; -def X18 : ARM64Reg<18, "x18", [W18]>, DwarfRegAlias; -def X19 : ARM64Reg<19, "x19", [W19]>, DwarfRegAlias; -def X20 : ARM64Reg<20, "x20", [W20]>, DwarfRegAlias; -def X21 : ARM64Reg<21, "x21", [W21]>, DwarfRegAlias; -def X22 : ARM64Reg<22, "x22", [W22]>, DwarfRegAlias; -def X23 : ARM64Reg<23, "x23", [W23]>, DwarfRegAlias; -def X24 : ARM64Reg<24, "x24", [W24]>, DwarfRegAlias; -def X25 : ARM64Reg<25, "x25", [W25]>, DwarfRegAlias; -def X26 : ARM64Reg<26, "x26", [W26]>, DwarfRegAlias; -def X27 : ARM64Reg<27, "x27", [W27]>, DwarfRegAlias; -def X28 : ARM64Reg<28, "x28", [W28]>, DwarfRegAlias; -def FP : ARM64Reg<29, "fp", [W29]>, DwarfRegAlias; -def LR : ARM64Reg<30, "lr", [W30]>, DwarfRegAlias; -def SP : ARM64Reg<31, "sp", [WSP]>, DwarfRegAlias; -def XZR : ARM64Reg<31, "xzr", [WZR]>, DwarfRegAlias; -} - -// Condition code register. -def CPSR : ARM64Reg<0, "cpsr">; - -// GPR register classes with the intersections of GPR32/GPR32sp and -// GPR64/GPR64sp for use by the coalescer. -def GPR32common : RegisterClass<"ARM64", [i32], 32, (sequence "W%u", 0, 30)> { - let AltOrders = [(rotl GPR32common, 8)]; - let AltOrderSelect = [{ return 1; }]; -} -def GPR64common : RegisterClass<"ARM64", [i64], 64, - (add (sequence "X%u", 0, 28), FP, LR)> { - let AltOrders = [(rotl GPR64common, 8)]; - let AltOrderSelect = [{ return 1; }]; -} -// GPR register classes which exclude SP/WSP. -def GPR32 : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR)> { - let AltOrders = [(rotl GPR32, 8)]; - let AltOrderSelect = [{ return 1; }]; -} -def GPR64 : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR)> { - let AltOrders = [(rotl GPR64, 8)]; - let AltOrderSelect = [{ return 1; }]; -} - -// GPR register classes which include SP/WSP. -def GPR32sp : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WSP)> { - let AltOrders = [(rotl GPR32sp, 8)]; - let AltOrderSelect = [{ return 1; }]; -} -def GPR64sp : RegisterClass<"ARM64", [i64], 64, (add GPR64common, SP)> { - let AltOrders = [(rotl GPR64sp, 8)]; - let AltOrderSelect = [{ return 1; }]; -} - -// GPR register classes which include WZR/XZR AND SP/WSP. This is not a -// constraint used by any instructions, it is used as a common super-class. -def GPR32all : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR, WSP)>; -def GPR64all : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR, SP)>; - -// For tail calls, we can't use callee-saved registers, as they are restored -// to the saved value before the tail call, which would clobber a call address. -// This is for indirect tail calls to store the address of the destination. -def tcGPR64 : RegisterClass<"ARM64", [i64], 64, (sub GPR64common, X19, X20, X21, - X22, X23, X24, X25, X26, - X27, X28)>; - -// GPR register classes for post increment ammount of vector load/store that -// has alternate printing when Rm=31 and prints a constant immediate value -// equal to the total number of bytes transferred. -def GPR64pi1 : RegisterOperand; -def GPR64pi2 : RegisterOperand; -def GPR64pi3 : RegisterOperand; -def GPR64pi4 : RegisterOperand; -def GPR64pi6 : RegisterOperand; -def GPR64pi8 : RegisterOperand; -def GPR64pi12 : RegisterOperand; -def GPR64pi16 : RegisterOperand; -def GPR64pi24 : RegisterOperand; -def GPR64pi32 : RegisterOperand; -def GPR64pi48 : RegisterOperand; -def GPR64pi64 : RegisterOperand; - -// Condition code regclass. -def CCR : RegisterClass<"ARM64", [i32], 32, (add CPSR)> { - let CopyCost = -1; // Don't allow copying of status registers. - - // CCR is not allocatable. - let isAllocatable = 0; -} - -//===----------------------------------------------------------------------===// -// Floating Point Scalar Registers -//===----------------------------------------------------------------------===// - -def B0 : ARM64Reg<0, "b0">, DwarfRegNum<[64]>; -def B1 : ARM64Reg<1, "b1">, DwarfRegNum<[65]>; -def B2 : ARM64Reg<2, "b2">, DwarfRegNum<[66]>; -def B3 : ARM64Reg<3, "b3">, DwarfRegNum<[67]>; -def B4 : ARM64Reg<4, "b4">, DwarfRegNum<[68]>; -def B5 : ARM64Reg<5, "b5">, DwarfRegNum<[69]>; -def B6 : ARM64Reg<6, "b6">, DwarfRegNum<[70]>; -def B7 : ARM64Reg<7, "b7">, DwarfRegNum<[71]>; -def B8 : ARM64Reg<8, "b8">, DwarfRegNum<[72]>; -def B9 : ARM64Reg<9, "b9">, DwarfRegNum<[73]>; -def B10 : ARM64Reg<10, "b10">, DwarfRegNum<[74]>; -def B11 : ARM64Reg<11, "b11">, DwarfRegNum<[75]>; -def B12 : ARM64Reg<12, "b12">, DwarfRegNum<[76]>; -def B13 : ARM64Reg<13, "b13">, DwarfRegNum<[77]>; -def B14 : ARM64Reg<14, "b14">, DwarfRegNum<[78]>; -def B15 : ARM64Reg<15, "b15">, DwarfRegNum<[79]>; -def B16 : ARM64Reg<16, "b16">, DwarfRegNum<[80]>; -def B17 : ARM64Reg<17, "b17">, DwarfRegNum<[81]>; -def B18 : ARM64Reg<18, "b18">, DwarfRegNum<[82]>; -def B19 : ARM64Reg<19, "b19">, DwarfRegNum<[83]>; -def B20 : ARM64Reg<20, "b20">, DwarfRegNum<[84]>; -def B21 : ARM64Reg<21, "b21">, DwarfRegNum<[85]>; -def B22 : ARM64Reg<22, "b22">, DwarfRegNum<[86]>; -def B23 : ARM64Reg<23, "b23">, DwarfRegNum<[87]>; -def B24 : ARM64Reg<24, "b24">, DwarfRegNum<[88]>; -def B25 : ARM64Reg<25, "b25">, DwarfRegNum<[89]>; -def B26 : ARM64Reg<26, "b26">, DwarfRegNum<[90]>; -def B27 : ARM64Reg<27, "b27">, DwarfRegNum<[91]>; -def B28 : ARM64Reg<28, "b28">, DwarfRegNum<[92]>; -def B29 : ARM64Reg<29, "b29">, DwarfRegNum<[93]>; -def B30 : ARM64Reg<30, "b30">, DwarfRegNum<[94]>; -def B31 : ARM64Reg<31, "b31">, DwarfRegNum<[95]>; - -let SubRegIndices = [bsub] in { -def H0 : ARM64Reg<0, "h0", [B0]>, DwarfRegAlias; -def H1 : ARM64Reg<1, "h1", [B1]>, DwarfRegAlias; -def H2 : ARM64Reg<2, "h2", [B2]>, DwarfRegAlias; -def H3 : ARM64Reg<3, "h3", [B3]>, DwarfRegAlias; -def H4 : ARM64Reg<4, "h4", [B4]>, DwarfRegAlias; -def H5 : ARM64Reg<5, "h5", [B5]>, DwarfRegAlias; -def H6 : ARM64Reg<6, "h6", [B6]>, DwarfRegAlias; -def H7 : ARM64Reg<7, "h7", [B7]>, DwarfRegAlias; -def H8 : ARM64Reg<8, "h8", [B8]>, DwarfRegAlias; -def H9 : ARM64Reg<9, "h9", [B9]>, DwarfRegAlias; -def H10 : ARM64Reg<10, "h10", [B10]>, DwarfRegAlias; -def H11 : ARM64Reg<11, "h11", [B11]>, DwarfRegAlias; -def H12 : ARM64Reg<12, "h12", [B12]>, DwarfRegAlias; -def H13 : ARM64Reg<13, "h13", [B13]>, DwarfRegAlias; -def H14 : ARM64Reg<14, "h14", [B14]>, DwarfRegAlias; -def H15 : ARM64Reg<15, "h15", [B15]>, DwarfRegAlias; -def H16 : ARM64Reg<16, "h16", [B16]>, DwarfRegAlias; -def H17 : ARM64Reg<17, "h17", [B17]>, DwarfRegAlias; -def H18 : ARM64Reg<18, "h18", [B18]>, DwarfRegAlias; -def H19 : ARM64Reg<19, "h19", [B19]>, DwarfRegAlias; -def H20 : ARM64Reg<20, "h20", [B20]>, DwarfRegAlias; -def H21 : ARM64Reg<21, "h21", [B21]>, DwarfRegAlias; -def H22 : ARM64Reg<22, "h22", [B22]>, DwarfRegAlias; -def H23 : ARM64Reg<23, "h23", [B23]>, DwarfRegAlias; -def H24 : ARM64Reg<24, "h24", [B24]>, DwarfRegAlias; -def H25 : ARM64Reg<25, "h25", [B25]>, DwarfRegAlias; -def H26 : ARM64Reg<26, "h26", [B26]>, DwarfRegAlias; -def H27 : ARM64Reg<27, "h27", [B27]>, DwarfRegAlias; -def H28 : ARM64Reg<28, "h28", [B28]>, DwarfRegAlias; -def H29 : ARM64Reg<29, "h29", [B29]>, DwarfRegAlias; -def H30 : ARM64Reg<30, "h30", [B30]>, DwarfRegAlias; -def H31 : ARM64Reg<31, "h31", [B31]>, DwarfRegAlias; -} - -let SubRegIndices = [hsub] in { -def S0 : ARM64Reg<0, "s0", [H0]>, DwarfRegAlias; -def S1 : ARM64Reg<1, "s1", [H1]>, DwarfRegAlias; -def S2 : ARM64Reg<2, "s2", [H2]>, DwarfRegAlias; -def S3 : ARM64Reg<3, "s3", [H3]>, DwarfRegAlias; -def S4 : ARM64Reg<4, "s4", [H4]>, DwarfRegAlias; -def S5 : ARM64Reg<5, "s5", [H5]>, DwarfRegAlias; -def S6 : ARM64Reg<6, "s6", [H6]>, DwarfRegAlias; -def S7 : ARM64Reg<7, "s7", [H7]>, DwarfRegAlias; -def S8 : ARM64Reg<8, "s8", [H8]>, DwarfRegAlias; -def S9 : ARM64Reg<9, "s9", [H9]>, DwarfRegAlias; -def S10 : ARM64Reg<10, "s10", [H10]>, DwarfRegAlias; -def S11 : ARM64Reg<11, "s11", [H11]>, DwarfRegAlias; -def S12 : ARM64Reg<12, "s12", [H12]>, DwarfRegAlias; -def S13 : ARM64Reg<13, "s13", [H13]>, DwarfRegAlias; -def S14 : ARM64Reg<14, "s14", [H14]>, DwarfRegAlias; -def S15 : ARM64Reg<15, "s15", [H15]>, DwarfRegAlias; -def S16 : ARM64Reg<16, "s16", [H16]>, DwarfRegAlias; -def S17 : ARM64Reg<17, "s17", [H17]>, DwarfRegAlias; -def S18 : ARM64Reg<18, "s18", [H18]>, DwarfRegAlias; -def S19 : ARM64Reg<19, "s19", [H19]>, DwarfRegAlias; -def S20 : ARM64Reg<20, "s20", [H20]>, DwarfRegAlias; -def S21 : ARM64Reg<21, "s21", [H21]>, DwarfRegAlias; -def S22 : ARM64Reg<22, "s22", [H22]>, DwarfRegAlias; -def S23 : ARM64Reg<23, "s23", [H23]>, DwarfRegAlias; -def S24 : ARM64Reg<24, "s24", [H24]>, DwarfRegAlias; -def S25 : ARM64Reg<25, "s25", [H25]>, DwarfRegAlias; -def S26 : ARM64Reg<26, "s26", [H26]>, DwarfRegAlias; -def S27 : ARM64Reg<27, "s27", [H27]>, DwarfRegAlias; -def S28 : ARM64Reg<28, "s28", [H28]>, DwarfRegAlias; -def S29 : ARM64Reg<29, "s29", [H29]>, DwarfRegAlias; -def S30 : ARM64Reg<30, "s30", [H30]>, DwarfRegAlias; -def S31 : ARM64Reg<31, "s31", [H31]>, DwarfRegAlias; -} - -let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in { -def D0 : ARM64Reg<0, "d0", [S0], ["v0", ""]>, DwarfRegAlias; -def D1 : ARM64Reg<1, "d1", [S1], ["v1", ""]>, DwarfRegAlias; -def D2 : ARM64Reg<2, "d2", [S2], ["v2", ""]>, DwarfRegAlias; -def D3 : ARM64Reg<3, "d3", [S3], ["v3", ""]>, DwarfRegAlias; -def D4 : ARM64Reg<4, "d4", [S4], ["v4", ""]>, DwarfRegAlias; -def D5 : ARM64Reg<5, "d5", [S5], ["v5", ""]>, DwarfRegAlias; -def D6 : ARM64Reg<6, "d6", [S6], ["v6", ""]>, DwarfRegAlias; -def D7 : ARM64Reg<7, "d7", [S7], ["v7", ""]>, DwarfRegAlias; -def D8 : ARM64Reg<8, "d8", [S8], ["v8", ""]>, DwarfRegAlias; -def D9 : ARM64Reg<9, "d9", [S9], ["v9", ""]>, DwarfRegAlias; -def D10 : ARM64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias; -def D11 : ARM64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias; -def D12 : ARM64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias; -def D13 : ARM64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias; -def D14 : ARM64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias; -def D15 : ARM64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias; -def D16 : ARM64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias; -def D17 : ARM64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias; -def D18 : ARM64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias; -def D19 : ARM64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias; -def D20 : ARM64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias; -def D21 : ARM64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias; -def D22 : ARM64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias; -def D23 : ARM64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias; -def D24 : ARM64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias; -def D25 : ARM64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias; -def D26 : ARM64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias; -def D27 : ARM64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias; -def D28 : ARM64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias; -def D29 : ARM64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias; -def D30 : ARM64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias; -def D31 : ARM64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias; -} - -let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in { -def Q0 : ARM64Reg<0, "q0", [D0], ["v0", ""]>, DwarfRegAlias; -def Q1 : ARM64Reg<1, "q1", [D1], ["v1", ""]>, DwarfRegAlias; -def Q2 : ARM64Reg<2, "q2", [D2], ["v2", ""]>, DwarfRegAlias; -def Q3 : ARM64Reg<3, "q3", [D3], ["v3", ""]>, DwarfRegAlias; -def Q4 : ARM64Reg<4, "q4", [D4], ["v4", ""]>, DwarfRegAlias; -def Q5 : ARM64Reg<5, "q5", [D5], ["v5", ""]>, DwarfRegAlias; -def Q6 : ARM64Reg<6, "q6", [D6], ["v6", ""]>, DwarfRegAlias; -def Q7 : ARM64Reg<7, "q7", [D7], ["v7", ""]>, DwarfRegAlias; -def Q8 : ARM64Reg<8, "q8", [D8], ["v8", ""]>, DwarfRegAlias; -def Q9 : ARM64Reg<9, "q9", [D9], ["v9", ""]>, DwarfRegAlias; -def Q10 : ARM64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias; -def Q11 : ARM64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias; -def Q12 : ARM64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias; -def Q13 : ARM64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias; -def Q14 : ARM64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias; -def Q15 : ARM64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias; -def Q16 : ARM64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias; -def Q17 : ARM64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias; -def Q18 : ARM64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias; -def Q19 : ARM64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias; -def Q20 : ARM64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias; -def Q21 : ARM64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias; -def Q22 : ARM64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias; -def Q23 : ARM64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias; -def Q24 : ARM64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias; -def Q25 : ARM64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias; -def Q26 : ARM64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias; -def Q27 : ARM64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias; -def Q28 : ARM64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias; -def Q29 : ARM64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias; -def Q30 : ARM64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias; -def Q31 : ARM64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias; -} - -def FPR8 : RegisterClass<"ARM64", [untyped], 8, (sequence "B%u", 0, 31)> { - let Size = 8; -} -def FPR16 : RegisterClass<"ARM64", [untyped], 16, (sequence "H%u", 0, 31)> { - let Size = 16; -} -def FPR32 : RegisterClass<"ARM64", [f32, i32], 32,(sequence "S%u", 0, 31)>; -def FPR64 : RegisterClass<"ARM64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32, - v1i64], - 64, (sequence "D%u", 0, 31)>; -// We don't (yet) have an f128 legal type, so don't use that here. We -// normalize 128-bit vectors to v2f64 for arg passing and such, so use -// that here. -def FPR128 : RegisterClass<"ARM64", - [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128], - 128, (sequence "Q%u", 0, 31)>; - -// The lower 16 vector registers. Some instructions can only take registers -// in this range. -def FPR128_lo : RegisterClass<"ARM64", - [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - 128, (trunc FPR128, 16)>; - -// Pairs, triples, and quads of 64-bit vector registers. -def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>; -def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2], - [(rotl FPR64, 0), (rotl FPR64, 1), - (rotl FPR64, 2)]>; -def DSeqQuads : RegisterTuples<[dsub0, dsub1, dsub2, dsub3], - [(rotl FPR64, 0), (rotl FPR64, 1), - (rotl FPR64, 2), (rotl FPR64, 3)]>; -def DD : RegisterClass<"ARM64", [untyped], 64, (add DSeqPairs)> { - let Size = 128; -} -def DDD : RegisterClass<"ARM64", [untyped], 64, (add DSeqTriples)> { - let Size = 196; -} -def DDDD : RegisterClass<"ARM64", [untyped], 64, (add DSeqQuads)> { - let Size = 256; -} - -// Pairs, triples, and quads of 128-bit vector registers. -def QSeqPairs : RegisterTuples<[qsub0, qsub1], [(rotl FPR128, 0), (rotl FPR128, 1)]>; -def QSeqTriples : RegisterTuples<[qsub0, qsub1, qsub2], - [(rotl FPR128, 0), (rotl FPR128, 1), - (rotl FPR128, 2)]>; -def QSeqQuads : RegisterTuples<[qsub0, qsub1, qsub2, qsub3], - [(rotl FPR128, 0), (rotl FPR128, 1), - (rotl FPR128, 2), (rotl FPR128, 3)]>; -def QQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqPairs)> { - let Size = 256; -} -def QQQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqTriples)> { - let Size = 384; -} -def QQQQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqQuads)> { - let Size = 512; -} - - -// Vector operand versions of the FP registers. Alternate name printing and -// assmebler matching. -def VectorRegAsmOperand : AsmOperandClass { let Name = "VectorReg"; } -let ParserMatchClass = VectorRegAsmOperand in { -def V64 : RegisterOperand; -def V128 : RegisterOperand; -def V128_lo : RegisterOperand; -} - -class TypedVecListAsmOperand - : AsmOperandClass { - let Name = "TypedVectorList" # count # "_" # lanes # kind; - - let PredicateMethod - = "isTypedVectorList<" # count # ", " # lanes # ", '" # kind # "'>"; - let RenderMethod = "addVectorList" # regsize # "Operands<" # count # ">"; -} - -class TypedVecListRegOperand - : RegisterOperand">; - -multiclass VectorList { - // With implicit types (probably on instruction instead). E.g. { v0, v1 } - def _64AsmOperand : AsmOperandClass { - let Name = NAME # "64"; - let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">"; - let RenderMethod = "addVectorList64Operands<" # count # ">"; - } - - def "64" : RegisterOperand { - let ParserMatchClass = !cast(NAME # "_64AsmOperand"); - } - - def _128AsmOperand : AsmOperandClass { - let Name = NAME # "128"; - let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">"; - let RenderMethod = "addVectorList128Operands<" # count # ">"; - } - - def "128" : RegisterOperand { - let ParserMatchClass = !cast(NAME # "_128AsmOperand"); - } - - // 64-bit register lists with explicit type. - - // { v0.8b, v1.8b } - def _8bAsmOperand : TypedVecListAsmOperand; - def "8b" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_8bAsmOperand"); - } - - // { v0.4h, v1.4h } - def _4hAsmOperand : TypedVecListAsmOperand; - def "4h" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_4hAsmOperand"); - } - - // { v0.2s, v1.2s } - def _2sAsmOperand : TypedVecListAsmOperand; - def "2s" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_2sAsmOperand"); - } - - // { v0.1d, v1.1d } - def _1dAsmOperand : TypedVecListAsmOperand; - def "1d" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_1dAsmOperand"); - } - - // 128-bit register lists with explicit type - - // { v0.16b, v1.16b } - def _16bAsmOperand : TypedVecListAsmOperand; - def "16b" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_16bAsmOperand"); - } - - // { v0.8h, v1.8h } - def _8hAsmOperand : TypedVecListAsmOperand; - def "8h" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_8hAsmOperand"); - } - - // { v0.4s, v1.4s } - def _4sAsmOperand : TypedVecListAsmOperand; - def "4s" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_4sAsmOperand"); - } - - // { v0.2d, v1.2d } - def _2dAsmOperand : TypedVecListAsmOperand; - def "2d" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_2dAsmOperand"); - } - - // { v0.b, v1.b } - def _bAsmOperand : TypedVecListAsmOperand; - def "b" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_bAsmOperand"); - } - - // { v0.h, v1.h } - def _hAsmOperand : TypedVecListAsmOperand; - def "h" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_hAsmOperand"); - } - - // { v0.s, v1.s } - def _sAsmOperand : TypedVecListAsmOperand; - def "s" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_sAsmOperand"); - } - - // { v0.d, v1.d } - def _dAsmOperand : TypedVecListAsmOperand; - def "d" : TypedVecListRegOperand { - let ParserMatchClass = !cast(NAME # "_dAsmOperand"); - } - - -} - -defm VecListOne : VectorList<1, FPR64, FPR128>; -defm VecListTwo : VectorList<2, DD, QQ>; -defm VecListThree : VectorList<3, DDD, QQQ>; -defm VecListFour : VectorList<4, DDDD, QQQQ>; - - -// Register operand versions of the scalar FP registers. -def FPR16Op : RegisterOperand; -def FPR32Op : RegisterOperand; -def FPR64Op : RegisterOperand; -def FPR128Op : RegisterOperand; diff --git a/lib/Target/ARM64/ARM64SchedCyclone.td b/lib/Target/ARM64/ARM64SchedCyclone.td deleted file mode 100644 index 65c68b3..0000000 --- a/lib/Target/ARM64/ARM64SchedCyclone.td +++ /dev/null @@ -1,852 +0,0 @@ -//=- ARMSchedCyclone.td - ARM64 Cyclone Scheduling Defs ------*- tablegen -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for ARM64 Cyclone to support -// instruction scheduling and other instruction cost heuristics. -// -//===----------------------------------------------------------------------===// - -def CycloneModel : SchedMachineModel { - let IssueWidth = 6; // 6 micro-ops are dispatched per cycle. - let MicroOpBufferSize = 192; // Based on the reorder buffer. - let LoadLatency = 4; // Optimistic load latency. - let MispredictPenalty = 16; // 14-19 cycles are typical. -} - -//===----------------------------------------------------------------------===// -// Define each kind of processor resource and number available on Cyclone. - -// 4 integer pipes -def CyUnitI : ProcResource<4> { - let BufferSize = 48; -} - -// 2 branch units: I[0..1] -def CyUnitB : ProcResource<2> { - let Super = CyUnitI; - let BufferSize = 24; -} - -// 1 indirect-branch unit: I[0] -def CyUnitBR : ProcResource<1> { - let Super = CyUnitB; -} - -// 2 shifter pipes: I[2..3] -// When an instruction consumes a CyUnitIS, it also consumes a CyUnitI -def CyUnitIS : ProcResource<2> { - let Super = CyUnitI; - let BufferSize = 24; -} - -// 1 mul pipe: I[0] -def CyUnitIM : ProcResource<1> { - let Super = CyUnitBR; - let BufferSize = 32; -} - -// 1 div pipe: I[1] -def CyUnitID : ProcResource<1> { - let Super = CyUnitB; - let BufferSize = 16; -} - -// 1 integer division unit. This is driven by the ID pipe, but only -// consumes the pipe for one cycle at issue and another cycle at writeback. -def CyUnitIntDiv : ProcResource<1>; - -// 2 ld/st pipes. -def CyUnitLS : ProcResource<2> { - let BufferSize = 28; -} - -// 3 fp/vector pipes. -def CyUnitV : ProcResource<3> { - let BufferSize = 48; -} -// 2 fp/vector arithmetic and multiply pipes: V[0-1] -def CyUnitVM : ProcResource<2> { - let Super = CyUnitV; - let BufferSize = 32; -} -// 1 fp/vector division/sqrt pipe: V[2] -def CyUnitVD : ProcResource<1> { - let Super = CyUnitV; - let BufferSize = 16; -} -// 1 fp compare pipe: V[0] -def CyUnitVC : ProcResource<1> { - let Super = CyUnitVM; - let BufferSize = 16; -} - -// 2 fp division/square-root units. These are driven by the VD pipe, -// but only consume the pipe for one cycle at issue and a cycle at writeback. -def CyUnitFloatDiv : ProcResource<2>; - -//===----------------------------------------------------------------------===// -// Define scheduler read/write resources and latency on Cyclone. -// This mirrors sections 7.7-7.9 of the Tuning Guide v1.0.1. - -let SchedModel = CycloneModel in { - -//--- -// 7.8.1. Moves -//--- - -// A single nop micro-op (uX). -def WriteX : SchedWriteRes<[]> { let Latency = 0; } - -// Move zero is a register rename (to machine register zero). -// The move is replaced by a single nop micro-op. -// MOVZ Rd, #0 -// AND Rd, Rzr, #imm -def WriteZPred : SchedPredicate<[{TII->isGPRZero(MI)}]>; -def WriteImmZ : SchedWriteVariant<[ - SchedVar, - SchedVar]>; -def : InstRW<[WriteImmZ], (instrs MOVZWi,MOVZXi,ANDWri,ANDXri)>; - -// Move GPR is a register rename and single nop micro-op. -// ORR Xd, XZR, Xm -// ADD Xd, Xn, #0 -def WriteIMovPred : SchedPredicate<[{TII->isGPRCopy(MI)}]>; -def WriteVMovPred : SchedPredicate<[{TII->isFPRCopy(MI)}]>; -def WriteMov : SchedWriteVariant<[ - SchedVar, - SchedVar, - SchedVar]>; -def : InstRW<[WriteMov], (instrs COPY,ORRXrr,ADDXrr)>; - -// Move non-zero immediate is an integer ALU op. -// MOVN,MOVZ,MOVK -def : WriteRes; - -//--- -// 7.8.2-7.8.5. Arithmetic and Logical, Comparison, Conditional, -// Shifts and Bitfield Operations -//--- - -// ADR,ADRP -// ADD(S)ri,SUB(S)ri,AND(S)ri,EORri,ORRri -// ADD(S)rr,SUB(S)rr,AND(S)rr,BIC(S)rr,EONrr,EORrr,ORNrr,ORRrr -// ADC(S),SBC(S) -// Aliases: CMN, CMP, TST -// -// Conditional operations. -// CCMNi,CCMPi,CCMNr,CCMPr, -// CSEL,CSINC,CSINV,CSNEG -// -// Bit counting and reversal operations. -// CLS,CLZ,RBIT,REV,REV16,REV32 -def : WriteRes; - -// ADD with shifted register operand is a single micro-op that -// consumes a shift pipeline for two cycles. -// ADD(S)rs,SUB(S)rs,AND(S)rs,BIC(S)rs,EONrs,EORrs,ORNrs,ORRrs -// EXAMPLE: ADDrs Xn, Xm LSL #imm -def : WriteRes { - let Latency = 2; - let ResourceCycles = [2]; -} - -// ADD with extended register operand is the same as shifted reg operand. -// ADD(S)re,SUB(S)re -// EXAMPLE: ADDXre Xn, Xm, UXTB #1 -def : WriteRes { - let Latency = 2; - let ResourceCycles = [2]; -} - -// Variable shift and bitfield operations. -// ASRV,LSLV,LSRV,RORV,BFM,SBFM,UBFM -def : WriteRes; - -// EXTR Shifts a pair of registers and requires two micro-ops. -// The second micro-op is delayed, as modeled by ReadExtrHi. -// EXTR Xn, Xm, #imm -def : WriteRes { - let Latency = 2; - let NumMicroOps = 2; -} - -// EXTR's first register read is delayed by one cycle, effectively -// shortening its writer's latency. -// EXTR Xn, Xm, #imm -def : ReadAdvance; - -//--- -// 7.8.6. Multiplies -//--- - -// MUL/MNEG are aliases for MADD/MSUB. -// MADDW,MSUBW,SMADDL,SMSUBL,UMADDL,UMSUBL -def : WriteRes { - let Latency = 4; -} -// MADDX,MSUBX,SMULH,UMULH -def : WriteRes { - let Latency = 5; -} - -//--- -// 7.8.7. Divide -//--- - -// 32-bit divide takes 7-13 cycles. 10 cycles covers a 20-bit quotient. -// The ID pipe is consumed for 2 cycles: issue and writeback. -// SDIVW,UDIVW -def : WriteRes { - let Latency = 10; - let ResourceCycles = [2, 10]; -} -// 64-bit divide takes 7-21 cycles. 13 cycles covers a 32-bit quotient. -// The ID pipe is consumed for 2 cycles: issue and writeback. -// SDIVX,UDIVX -def : WriteRes { - let Latency = 13; - let ResourceCycles = [2, 13]; -} - -//--- -// 7.8.8,7.8.10. Load/Store, single element -//--- - -// Integer loads take 4 cycles and use one LS unit for one cycle. -def : WriteRes { - let Latency = 4; -} - -// Store-load forwarding is 4 cycles. -// -// Note: The store-exclusive sequence incorporates this -// latency. However, general heuristics should not model the -// dependence between a store and subsequent may-alias load because -// hardware speculation works. -def : WriteRes { - let Latency = 4; -} - -// Load from base address plus an optionally scaled register offset. -// Rt latency is latency WriteIS + WriteLD. -// EXAMPLE: LDR Xn, Xm [, lsl 3] -def CyWriteLDIdx : SchedWriteVariant<[ - SchedVar, // Load from scaled register. - SchedVar]>; // Load from register offset. -def : SchedAlias; // Map ARM64->Cyclone type. - -// EXAMPLE: STR Xn, Xm [, lsl 3] -def CyWriteSTIdx : SchedWriteVariant<[ - SchedVar, // Store to scaled register. - SchedVar]>; // Store to register offset. -def : SchedAlias; // Map ARM64->Cyclone type. - -// Read the (unshifted) base register Xn in the second micro-op one cycle later. -// EXAMPLE: LDR Xn, Xm [, lsl 3] -def ReadBaseRS : SchedReadAdvance<1>; -def CyReadAdrBase : SchedReadVariant<[ - SchedVar, // Read base reg after shifting offset. - SchedVar]>; // Read base reg with no shift. -def : SchedAlias; // Map ARM64->Cyclone type. - -//--- -// 7.8.9,7.8.11. Load/Store, paired -//--- - -// Address pre/post increment is a simple ALU op with one cycle latency. -def : WriteRes; - -// LDP high register write is fused with the load, but a nop micro-op remains. -def : WriteRes { - let Latency = 4; -} - -// STP is a vector op and store, except for QQ, which is just two stores. -def : SchedAlias; -def : InstRW<[WriteST, WriteST], (instrs STPQi)>; - -//--- -// 7.8.13. Branches -//--- - -// Branches take a single micro-op. -// The misprediction penalty is defined as a SchedMachineModel property. -def : WriteRes {let Latency = 0;} -def : WriteRes {let Latency = 0;} - -//--- -// 7.8.14. Never-issued Instructions, Barrier and Hint Operations -//--- - -// NOP,SEV,SEVL,WFE,WFI,YIELD -def : WriteRes {let Latency = 0;} -// ISB -def : InstRW<[WriteI], (instrs ISB)>; -// SLREX,DMB,DSB -def : WriteRes; - -// System instructions get an invalid latency because the latency of -// other operations across them is meaningless. -def : WriteRes {let Latency = -1;} - -//===----------------------------------------------------------------------===// -// 7.9 Vector Unit Instructions - -// Simple vector operations take 2 cycles. -def : WriteRes {let Latency = 2;} - -// Define some longer latency vector op types for Cyclone. -def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;} -def CyWriteV4 : SchedWriteRes<[CyUnitV]> {let Latency = 4;} -def CyWriteV5 : SchedWriteRes<[CyUnitV]> {let Latency = 5;} -def CyWriteV6 : SchedWriteRes<[CyUnitV]> {let Latency = 6;} - -// Simple floating-point operations take 2 cycles. -def : WriteRes {let Latency = 2;} - -//--- -// 7.9.1 Vector Moves -//--- - -// TODO: Add Cyclone-specific zero-cycle zeros. LLVM currently -// generates expensive int-float conversion instead: -// FMOVDi Dd, #0.0 -// FMOVv2f64ns Vd.2d, #0.0 - -// FMOVSi,FMOVDi -def : WriteRes {let Latency = 2;} - -// MOVI,MVNI are WriteV -// FMOVv2f32ns,FMOVv2f64ns,FMOVv4f32ns are WriteV - -// Move FPR is a register rename and single nop micro-op. -// ORR.16b Vd,Vn,Vn -// COPY is handled above in the WriteMov Variant. -def WriteVMov : SchedWriteVariant<[ - SchedVar, - SchedVar]>; -def : InstRW<[WriteVMov], (instrs ORRv16i8)>; - -// FMOVSr,FMOVDr are WriteF. - -// MOV V,V is a WriteV. - -// CPY D,V[x] is a WriteV - -// INS V[x],V[y] is a WriteV. - -// FMOVWSr,FMOVXDr,FMOVXDHighr -def : SchedAlias; - -// FMOVSWr,FMOVDXr -def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>; - -// INS V[x],R -def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>; -def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>; - -// SMOV,UMOV R,V[x] -def CyWriteCopyToGPR : WriteSequence<[WriteLD, WriteI]>; -def : InstRW<[CyWriteCopyToGPR], (instregex "SMOVv","UMOVv")>; - -// DUP V,R -def : InstRW<[CyWriteCopyToFPR], (instregex "DUPv")>; - -// DUP V,V[x] is a WriteV. - -//--- -// 7.9.2 Integer Arithmetic, Logical, and Comparisons -//--- - -// BIC,ORR V,#imm are WriteV - -def : InstRW<[CyWriteV3], (instregex "ABSv")>; - -// MVN,NEG,NOT are WriteV - -def : InstRW<[CyWriteV3], (instregex "SQABSv","SQNEGv")>; - -// ADDP is a WriteV. -def CyWriteVADDLP : SchedWriteRes<[CyUnitV]> {let Latency = 2;} -def : InstRW<[CyWriteVADDLP], (instregex "SADDLPv","UADDLPv")>; - -def : InstRW<[CyWriteV3], - (instregex "ADDVv","SMAXVv","UMAXVv","SMINVv","UMINVv")>; - -def : InstRW<[CyWriteV3], (instregex "SADDLV","UADDLV")>; - -// ADD,SUB are WriteV - -// Forward declare. -def CyWriteVABD : SchedWriteRes<[CyUnitV]> {let Latency = 3;} - -// Add/Diff and accumulate uses the vector multiply unit. -def CyWriteVAccum : SchedWriteRes<[CyUnitVM]> {let Latency = 3;} -def CyReadVAccum : SchedReadAdvance<1, - [CyWriteVAccum, CyWriteVADDLP, CyWriteVABD]>; - -def : InstRW<[CyWriteVAccum, CyReadVAccum], - (instregex "SADALP","UADALP")>; - -def : InstRW<[CyWriteVAccum, CyReadVAccum], - (instregex "SABAv","UABAv","SABALv","UABALv")>; - -def : InstRW<[CyWriteV3], (instregex "SQADDv","SQSUBv","UQADDv","UQSUBv")>; - -def : InstRW<[CyWriteV3], (instregex "SUQADDv","USQADDv")>; - -def : InstRW<[CyWriteV4], (instregex "ADDHNv","RADDHNv", "RSUBHNv", "SUBHNv")>; - -// WriteV includes: -// AND,BIC,CMTST,EOR,ORN,ORR -// ADDP -// SHADD,SHSUB,SRHADD,UHADD,UHSUB,URHADD -// SADDL,SSUBL,UADDL,USUBL -// SADDW,SSUBW,UADDW,USUBW - -def : InstRW<[CyWriteV3], (instregex "CMEQv","CMGEv","CMGTv", - "CMLEv","CMLTv", - "CMHIv","CMHSv")>; - -def : InstRW<[CyWriteV3], (instregex "SMAXv","SMINv","UMAXv","UMINv", - "SMAXPv","SMINPv","UMAXPv","UMINPv")>; - -def : InstRW<[CyWriteVABD], (instregex "SABDv","UABDv", - "SABDLv","UABDLv")>; - -//--- -// 7.9.3 Floating Point Arithmetic and Comparisons -//--- - -// FABS,FNEG are WriteF - -def : InstRW<[CyWriteV4], (instrs FADDPv2i32p)>; -def : InstRW<[CyWriteV5], (instrs FADDPv2i64p)>; - -def : InstRW<[CyWriteV3], (instregex "FMAXPv2i","FMAXNMPv2i", - "FMINPv2i","FMINNMPv2i")>; - -def : InstRW<[CyWriteV4], (instregex "FMAXVv","FMAXNMVv","FMINVv","FMINNMVv")>; - -def : InstRW<[CyWriteV4], (instrs FADDSrr,FADDv2f32,FADDv4f32, - FSUBSrr,FSUBv2f32,FSUBv4f32, - FADDPv2f32,FADDPv4f32, - FABD32,FABDv2f32,FABDv4f32)>; -def : InstRW<[CyWriteV5], (instrs FADDDrr,FADDv2f64, - FSUBDrr,FSUBv2f64, - FADDPv2f64, - FABD64,FABDv2f64)>; - -def : InstRW<[CyWriteV3], (instregex "FCMEQ","FCMGT","FCMLE","FCMLT")>; - -def : InstRW<[CyWriteV3], (instregex "FACGE","FACGT", - "FMAXS","FMAXD","FMAXv", - "FMINS","FMIND","FMINv", - "FMAXNMS","FMAXNMD","FMAXNMv", - "FMINNMS","FMINNMD","FMINNMv", - "FMAXPv2f","FMAXPv4f", - "FMINPv2f","FMINPv4f", - "FMAXNMPv2f","FMAXNMPv4f", - "FMINNMPv2f","FMINNMPv4f")>; - -// FCMP,FCMPE,FCCMP,FCCMPE -def : WriteRes {let Latency = 4;} - -// FCSEL is a WriteF. - -//--- -// 7.9.4 Shifts and Bitfield Operations -//--- - -// SHL is a WriteV - -def CyWriteVSHR : SchedWriteRes<[CyUnitV]> {let Latency = 2;} -def : InstRW<[CyWriteVSHR], (instregex "SSHRv","USHRv")>; - -def CyWriteVSRSHR : SchedWriteRes<[CyUnitV]> {let Latency = 3;} -def : InstRW<[CyWriteVSRSHR], (instregex "SRSHRv","URSHRv")>; - -// Shift and accumulate uses the vector multiply unit. -def CyWriteVShiftAcc : SchedWriteRes<[CyUnitVM]> {let Latency = 3;} -def CyReadVShiftAcc : SchedReadAdvance<1, - [CyWriteVShiftAcc, CyWriteVSHR, CyWriteVSRSHR]>; -def : InstRW<[CyWriteVShiftAcc, CyReadVShiftAcc], - (instregex "SRSRAv","SSRAv","URSRAv","USRAv")>; - -// SSHL,USHL are WriteV. - -def : InstRW<[CyWriteV3], (instregex "SRSHLv","URSHLv")>; - -// SQSHL,SQSHLU,UQSHL are WriteV. - -def : InstRW<[CyWriteV3], (instregex "SQRSHLv","UQRSHLv")>; - -// WriteV includes: -// SHLL,SSHLL,USHLL -// SLI,SRI -// BIF,BIT,BSL -// EXT -// CLS,CLZ,CNT,RBIT,REV16,REV32,REV64,XTN -// XTN2 - -def : InstRW<[CyWriteV4], - (instregex "RSHRNv","SHRNv", - "SQRSHRNv","SQRSHRUNv","SQSHRNv","SQSHRUNv", - "UQRSHRNv","UQSHRNv","SQXTNv","SQXTUNv","UQXTNv")>; - -//--- -// 7.9.5 Multiplication -//--- - -def CyWriteVMul : SchedWriteRes<[CyUnitVM]> { let Latency = 4;} -def : InstRW<[CyWriteVMul], (instregex "MULv","SMULLv","UMULLv", - "SQDMULLv","SQDMULHv","SQRDMULHv")>; - -// FMUL,FMULX,FNMUL default to WriteFMul. -def : WriteRes { let Latency = 4;} - -def CyWriteV64Mul : SchedWriteRes<[CyUnitVM]> { let Latency = 5;} -def : InstRW<[CyWriteV64Mul], (instrs FMULDrr,FMULv2f64,FMULv2i64_indexed, - FNMULDrr,FMULX64,FMULXv2f64,FMULXv2i64_indexed)>; - -def CyReadVMulAcc : SchedReadAdvance<1, [CyWriteVMul, CyWriteV64Mul]>; -def : InstRW<[CyWriteVMul, CyReadVMulAcc], - (instregex "MLA","MLS","SMLAL","SMLSL","UMLAL","UMLSL", - "SQDMLAL","SQDMLSL")>; - -def CyWriteSMul : SchedWriteRes<[CyUnitVM]> { let Latency = 8;} -def CyWriteDMul : SchedWriteRes<[CyUnitVM]> { let Latency = 10;} -def CyReadSMul : SchedReadAdvance<4, [CyWriteSMul]>; -def CyReadDMul : SchedReadAdvance<5, [CyWriteDMul]>; - -def : InstRW<[CyWriteSMul, CyReadSMul], - (instrs FMADDSrrr,FMSUBSrrr,FNMADDSrrr,FNMSUBSrrr, - FMLAv2f32,FMLAv4f32, - FMLAv1i32_indexed,FMLAv1i64_indexed,FMLAv2i32_indexed)>; -def : InstRW<[CyWriteDMul, CyReadDMul], - (instrs FMADDDrrr,FMSUBDrrr,FNMADDDrrr,FNMSUBDrrr, - FMLAv2f64,FMLAv2i64_indexed, - FMLSv2f64,FMLSv2i64_indexed)>; - -def CyWritePMUL : SchedWriteRes<[CyUnitVD]> { let Latency = 3; } -def : InstRW<[CyWritePMUL], (instregex "PMULv", "PMULLv")>; - -//--- -// 7.9.6 Divide and Square Root -//--- - -// FDIV,FSQRT -// TODO: Add 64-bit variant with 19 cycle latency. -// TODO: Specialize FSQRT for longer latency. -def : WriteRes { - let Latency = 17; - let ResourceCycles = [2, 17]; -} - -def : InstRW<[CyWriteV4], (instregex "FRECPEv","FRECPXv","URECPEv","URSQRTEv")>; - -def WriteFRSQRTE : SchedWriteRes<[CyUnitVM]> { let Latency = 4; } -def : InstRW<[WriteFRSQRTE], (instregex "FRSQRTEv")>; - -def WriteFRECPS : SchedWriteRes<[CyUnitVM]> { let Latency = 8; } -def WriteFRSQRTS : SchedWriteRes<[CyUnitVM]> { let Latency = 10; } -def : InstRW<[WriteFRECPS], (instregex "FRECPSv")>; -def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>; - -//--- -// 7.9.7 Integer-FP Conversions -//--- - -// FCVT lengthen f16/s32 -def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>; - -// FCVT,FCVTN,FCVTXN -// SCVTF,UCVTF V,V -// FRINT(AIMNPXZ) V,V -def : WriteRes {let Latency = 4;} - -// SCVT/UCVT S/D, Rd = VLD5+V4: 9 cycles. -def CyWriteCvtToFPR : WriteSequence<[WriteVLD, CyWriteV4]>; -def : InstRW<[CyWriteCopyToFPR], (instregex "FCVT[AMNPZ][SU][SU][WX][SD]r")>; - -// FCVT Rd, S/D = V6+LD4: 10 cycles -def CyWriteCvtToGPR : WriteSequence<[CyWriteV6, WriteLD]>; -def : InstRW<[CyWriteCvtToGPR], (instregex "[SU]CVTF[SU][WX][SD]r")>; - -// FCVTL is a WriteV - -//--- -// 7.9.8-7.9.10 Cryptography, Data Transposition, Table Lookup -//--- - -def CyWriteCrypto2 : SchedWriteRes<[CyUnitVD]> {let Latency = 2;} -def : InstRW<[CyWriteCrypto2], (instrs AESIMCrr, AESMCrr, SHA1Hrr, - AESDrr, AESErr, SHA1SU1rr, SHA256SU0rr, - SHA1SU0rrr)>; - -def CyWriteCrypto3 : SchedWriteRes<[CyUnitVD]> {let Latency = 3;} -def : InstRW<[CyWriteCrypto3], (instrs SHA256SU1rrr)>; - -def CyWriteCrypto6 : SchedWriteRes<[CyUnitVD]> {let Latency = 6;} -def : InstRW<[CyWriteCrypto6], (instrs SHA1Crrr, SHA1Mrrr, SHA1Prrr, - SHA256Hrrr,SHA256H2rrr)>; - -// TRN,UZP,ZUP are WriteV. - -// TBL,TBX are WriteV. - -//--- -// 7.9.11-7.9.14 Load/Store, single element and paired -//--- - -// Loading into the vector unit takes 5 cycles vs 4 for integer loads. -def : WriteRes { - let Latency = 5; -} - -// Store-load forwarding is 4 cycles. -def : WriteRes { - let Latency = 4; -} - -// WriteVLDPair/VSTPair sequences are expanded by the target description. - -//--- -// 7.9.15 Load, element operations -//--- - -// Only the first WriteVLD and WriteAdr for writeback matches def operands. -// Subsequent WriteVLDs consume resources. Since all loaded values have the -// same latency, this is acceptable. - -// Vd is read 5 cycles after issuing the vector load. -def : ReadAdvance; - -def : InstRW<[WriteVLD], - (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD, WriteAdr], - (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; - -// Register writes from the load's high half are fused micro-ops. -def : InstRW<[WriteVLD], - (instregex "LD1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[WriteVLD, WriteAdr], - (instregex "LD1Twov(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVLD, WriteVLD], - (instregex "LD1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD, WriteAdr, WriteVLD], - (instregex "LD1Twov(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLD, WriteVLD], - (instregex "LD1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[WriteVLD, WriteAdr, WriteVLD], - (instregex "LD1Threev(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVLD, WriteVLD, WriteVLD], - (instregex "LD1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD], - (instregex "LD1Threev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLD, WriteVLD], - (instregex "LD1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[WriteVLD, WriteAdr, WriteVLD], - (instregex "LD1Fourv(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVLD, WriteVLD, WriteVLD, WriteVLD], - (instregex "LD1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD, WriteAdr, WriteVLD, WriteVLD, WriteVLD], - (instregex "LD1Fourv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD], - (instregex "LD1i(8|16|32)$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr], - (instregex "LD1i(8|16|32)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD], (instrs LD1i64)>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr],(instrs LD1i64_POST)>; - -def : InstRW<[WriteVLDShuffle], - (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr], - (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -def : InstRW<[WriteVLDShuffle, WriteV], - (instregex "LD2Twov(8b|4h|2s)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV], - (instregex "LD2Twov(8b|4h|2s)_POST$")>; -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle], - (instregex "LD2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle], - (instregex "LD2Twov(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV], - (instregex "LD2i(8|16|32)$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV], - (instregex "LD2i(8|16|32)_POST")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV], - (instregex "LD2i64$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV], - (instregex "LD2i64_POST")>; - -def : InstRW<[WriteVLDShuffle, WriteV], - (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV], - (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV], - (instregex "LD3Threev(8b|4h|2s)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV], - (instregex "LD3Threev(8b|4h|2s)_POST")>; -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle], - (instregex "LD3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle], - (instregex "LD3Threev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV], - (instregex "LD3i(8|16|32)$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV], - (instregex "LD3i(8|16|32)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV], - (instregex "LD3i64$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV], - (instregex "LD3i64_POST")>; - -def : InstRW<[WriteVLDShuffle, WriteV, WriteV], - (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV], - (instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>; - -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV], - (instrs LD3Rv1d,LD3Rv2d)>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV], - (instrs LD3Rv2d_POST,LD3Rv2d_POST)>; - -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV], - (instregex "LD4Fourv(8b|4h|2s)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV], - (instregex "LD4Fourv(8b|4h|2s)_POST")>; -def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle, - WriteVLDPairShuffle, WriteVLDPairShuffle], - (instregex "LD4Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle, - WriteVLDPairShuffle, WriteVLDPairShuffle], - (instregex "LD4Fourv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV], - (instregex "LD4i(8|16|32)$")>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV], - (instregex "LD4i(8|16|32)_POST")>; - - -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV], - (instrs LD4i64)>; -def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV], - (instrs LD4i64_POST)>; - -def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV], - (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV], - (instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>; - -def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV], - (instrs LD4Rv1d,LD4Rv2d)>; -def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV], - (instrs LD4Rv1d_POST,LD4Rv2d_POST)>; - -//--- -// 7.9.16 Store, element operations -//--- - -// Only the WriteAdr for writeback matches a def operands. -// Subsequent WriteVLDs only consume resources. - -def : InstRW<[WriteVST], - (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVST], - (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle], - (instregex "ST1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle], - (instregex "ST1Twov(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVST, WriteVST], - (instregex "ST1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVST, WriteVST], - (instregex "ST1Twov(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle, WriteVST], - (instregex "ST1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVST], - (instregex "ST1Threev(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVST, WriteVST, WriteVST], - (instregex "ST1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST], - (instregex "ST1Threev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST1Fourv(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], - (instregex "ST1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVST, WriteVST, WriteVST, WriteVST], - (instregex "ST1Fourv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle], (instregex "ST1i(8|16|32)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST1i(8|16|32)_POST")>; - -def : InstRW<[WriteVSTShuffle], (instrs ST1i64)>; -def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST1i64_POST)>; - -def : InstRW<[WriteVSTShuffle], - (instregex "ST2Twov(8b|4h|2s)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle], - (instregex "ST2Twov(8b|4h|2s)_POST")>; -def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST2Twov(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle], (instregex "ST2i(8|16|32)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST2i(8|16|32)_POST")>; -def : InstRW<[WriteVSTShuffle], (instrs ST2i64)>; -def : InstRW<[WriteAdr, WriteVSTShuffle], (instrs ST2i64_POST)>; - -def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST3Threev(8b|4h|2s)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST3Threev(8b|4h|2s)_POST")>; -def : InstRW<[WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle, WriteVSTShuffle], - (instregex "ST3Threev(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTShuffle], (instregex "ST3i(8|16|32)$")>; -def : InstRW<[WriteAdr, WriteVSTShuffle], (instregex "ST3i(8|16|32)_POST")>; - -def :InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64)>; -def :InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle], (instrs ST3i64_POST)>; - -def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle], - (instregex "ST4Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle], - (instregex "ST4Fourv(8b|4h|2s|1d)_POST")>; -def : InstRW<[WriteVSTPairShuffle, WriteVSTPairShuffle, - WriteVSTPairShuffle, WriteVSTPairShuffle], - (instregex "ST4Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[WriteAdr, WriteVSTPairShuffle, WriteVSTPairShuffle, - WriteVSTPairShuffle, WriteVSTPairShuffle], - (instregex "ST4Fourv(16b|8h|4s|2d)_POST")>; - -def : InstRW<[WriteVSTPairShuffle], (instregex "ST4i(8|16|32)$")>; -def : InstRW<[WriteAdr, WriteVSTPairShuffle], (instregex "ST4i(8|16|32)_POST")>; - -def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST4i64)>; -def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>; - -} // SchedModel = CycloneModel diff --git a/lib/Target/ARM64/ARM64Schedule.td b/lib/Target/ARM64/ARM64Schedule.td deleted file mode 100644 index 52f9262..0000000 --- a/lib/Target/ARM64/ARM64Schedule.td +++ /dev/null @@ -1,92 +0,0 @@ -//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -// Define TII for use in SchedVariant Predicates. -// const MachineInstr *MI and const TargetSchedModel *SchedModel -// are defined by default. -def : PredicateProlog<[{ - const ARM64InstrInfo *TII = - static_cast(SchedModel->getInstrInfo()); - (void)TII; -}]>; - -// ARM64 Scheduler Definitions - -def WriteImm : SchedWrite; // MOVN, MOVZ -// TODO: Provide variants for MOV32/64imm Pseudos that dynamically -// select the correct sequence of WriteImms. - -def WriteI : SchedWrite; // ALU -def WriteISReg : SchedWrite; // ALU of Shifted-Reg -def WriteIEReg : SchedWrite; // ALU of Extended-Reg -def WriteExtr : SchedWrite; // EXTR shifts a reg pair -def ReadExtrHi : SchedRead; // Read the high reg of the EXTR pair -def WriteIS : SchedWrite; // Shift/Scale -def WriteID32 : SchedWrite; // 32-bit Divide -def WriteID64 : SchedWrite; // 64-bit Divide -def WriteIM32 : SchedWrite; // 32-bit Multiply -def WriteIM64 : SchedWrite; // 64-bit Multiply -def WriteBr : SchedWrite; // Branch -def WriteBrReg : SchedWrite; // Indirect Branch - -def WriteLD : SchedWrite; // Load from base addr plus immediate offset -def WriteST : SchedWrite; // Store to base addr plus immediate offset -def WriteSTP : SchedWrite; // Store a register pair. -def WriteAdr : SchedWrite; // Address pre/post increment. - -def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled). -def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled). -def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST. - -// ScaledIdxPred is true if a WriteLDIdx operand will be -// scaled. Subtargets can use this to dynamically select resources and -// latency for WriteLDIdx and ReadAdrBase. -def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(MI)}]>; - -// Serialized two-level address load. -// EXAMPLE: LOADGot -def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>; - -// Serialized two-level address lookup. -// EXAMPLE: MOVaddr... -def WriteAdrAdr : WriteSequence<[WriteAdr, WriteAdr]>; - -// The second register of a load-pair. -// LDP,LDPSW,LDNP,LDXP,LDAXP -def WriteLDHi : SchedWrite; - -// Store-exclusive is a store followed by a dependent load. -def WriteSTX : WriteSequence<[WriteST, WriteLD]>; - -def WriteSys : SchedWrite; // Long, variable latency system ops. -def WriteBarrier : SchedWrite; // Memory barrier. -def WriteHint : SchedWrite; // Hint instruction. - -def WriteF : SchedWrite; // General floating-point ops. -def WriteFCmp : SchedWrite; // Floating-point compare. -def WriteFCvt : SchedWrite; // Float conversion. -def WriteFCopy : SchedWrite; // Float-int register copy. -def WriteFImm : SchedWrite; // Floating-point immediate. -def WriteFMul : SchedWrite; // Floating-point multiply. -def WriteFDiv : SchedWrite; // Floating-point division. - -def WriteV : SchedWrite; // Vector ops. -def WriteVLD : SchedWrite; // Vector loads. -def WriteVST : SchedWrite; // Vector stores. - -// Read the unwritten lanes of the VLD's destination registers. -def ReadVLD : SchedRead; - -// Sequential vector load and shuffle. -def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteV]>; -def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>; - -// Store a shuffled vector. -def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>; -def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>; diff --git a/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp b/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp deleted file mode 100644 index 79d507f..0000000 --- a/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp +++ /dev/null @@ -1,57 +0,0 @@ -//===-- ARM64SelectionDAGInfo.cpp - ARM64 SelectionDAG Info ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ARM64SelectionDAGInfo class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-selectiondag-info" -#include "ARM64TargetMachine.h" -using namespace llvm; - -ARM64SelectionDAGInfo::ARM64SelectionDAGInfo(const TargetMachine &TM) - : TargetSelectionDAGInfo(TM), - Subtarget(&TM.getSubtarget()) {} - -ARM64SelectionDAGInfo::~ARM64SelectionDAGInfo() {} - -SDValue ARM64SelectionDAGInfo::EmitTargetCodeForMemset( - SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVolatile, - MachinePointerInfo DstPtrInfo) const { - // Check to see if there is a specialized entry-point for memory zeroing. - ConstantSDNode *V = dyn_cast(Src); - ConstantSDNode *SizeValue = dyn_cast(Size); - const char *bzeroEntry = - (V && V->isNullValue()) ? Subtarget->getBZeroEntry() : 0; - // For small size (< 256), it is not beneficial to use bzero - // instead of memset. - if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) { - const ARM64TargetLowering &TLI = *static_cast( - DAG.getTarget().getTargetLowering()); - - EVT IntPtr = TLI.getPointerTy(); - Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Dst; - Entry.Ty = IntPtrTy; - Args.push_back(Entry); - Entry.Node = Size; - Args.push_back(Entry); - TargetLowering::CallLoweringInfo CLI( - Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, - 0, CallingConv::C, /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/false, - DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); - std::pair CallResult = TLI.LowerCallTo(CLI); - return CallResult.second; - } - return SDValue(); -} diff --git a/lib/Target/ARM64/ARM64SelectionDAGInfo.h b/lib/Target/ARM64/ARM64SelectionDAGInfo.h deleted file mode 100644 index 770775f..0000000 --- a/lib/Target/ARM64/ARM64SelectionDAGInfo.h +++ /dev/null @@ -1,37 +0,0 @@ -//===-- ARM64SelectionDAGInfo.h - ARM64 SelectionDAG Info -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the ARM64 subclass for TargetSelectionDAGInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64SELECTIONDAGINFO_H -#define ARM64SELECTIONDAGINFO_H - -#include "llvm/Target/TargetSelectionDAGInfo.h" - -namespace llvm { - -class ARM64SelectionDAGInfo : public TargetSelectionDAGInfo { - /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can - /// make the right decision when generating code for different targets. - const ARM64Subtarget *Subtarget; - -public: - explicit ARM64SelectionDAGInfo(const TargetMachine &TM); - ~ARM64SelectionDAGInfo(); - - SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, - SDValue Dst, SDValue Src, SDValue Size, - unsigned Align, bool isVolatile, - MachinePointerInfo DstPtrInfo) const override; -}; -} - -#endif diff --git a/lib/Target/ARM64/ARM64StorePairSuppress.cpp b/lib/Target/ARM64/ARM64StorePairSuppress.cpp deleted file mode 100644 index 6521d13..0000000 --- a/lib/Target/ARM64/ARM64StorePairSuppress.cpp +++ /dev/null @@ -1,167 +0,0 @@ -//===---- ARM64StorePairSuppress.cpp --- Suppress store pair formation ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass identifies floating point stores that should not be combined into -// store pairs. Later we may do the same for floating point loads. -// ===---------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-stp-suppress" -#include "ARM64InstrInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineTraceMetrics.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetSchedule.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { -class ARM64StorePairSuppress : public MachineFunctionPass { - const ARM64InstrInfo *TII; - const TargetRegisterInfo *TRI; - const MachineRegisterInfo *MRI; - MachineFunction *MF; - TargetSchedModel SchedModel; - MachineTraceMetrics *Traces; - MachineTraceMetrics::Ensemble *MinInstr; - -public: - static char ID; - ARM64StorePairSuppress() : MachineFunctionPass(ID) {} - - virtual const char *getPassName() const override { - return "ARM64 Store Pair Suppression"; - } - - bool runOnMachineFunction(MachineFunction &F) override; - -private: - bool shouldAddSTPToBlock(const MachineBasicBlock *BB); - - bool isNarrowFPStore(const MachineInstr &MI); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); - MachineFunctionPass::getAnalysisUsage(AU); - } -}; -char ARM64StorePairSuppress::ID = 0; -} // anonymous - -FunctionPass *llvm::createARM64StorePairSuppressPass() { - return new ARM64StorePairSuppress(); -} - -/// Return true if an STP can be added to this block without increasing the -/// critical resource height. STP is good to form in Ld/St limited blocks and -/// bad to form in float-point limited blocks. This is true independent of the -/// critical path. If the critical path is longer than the resource height, the -/// extra vector ops can limit physreg renaming. Otherwise, it could simply -/// oversaturate the vector units. -bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) { - if (!MinInstr) - MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); - - MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB); - unsigned ResLength = BBTrace.getResourceLength(); - - // Get the machine model's scheduling class for STPQi. - // Bypass TargetSchedule's SchedClass resolution since we only have an opcode. - unsigned SCIdx = TII->get(ARM64::STPDi).getSchedClass(); - const MCSchedClassDesc *SCDesc = - SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx); - - // If a subtarget does not define resources for STPQi, bail here. - if (SCDesc->isValid() && !SCDesc->isVariant()) { - unsigned ResLenWithSTP = BBTrace.getResourceLength( - ArrayRef(), SCDesc); - if (ResLenWithSTP > ResLength) { - DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber() - << " resources " << ResLength << " -> " << ResLenWithSTP - << "\n"); - return false; - } - } - return true; -} - -/// Return true if this is a floating-point store smaller than the V reg. On -/// cyclone, these require a vector shuffle before storing a pair. -/// Ideally we would call getMatchingPairOpcode() and have the machine model -/// tell us if it's profitable with no cpu knowledge here. -/// -/// FIXME: We plan to develop a decent Target abstraction for simple loads and -/// stores. Until then use a nasty switch similar to ARM64LoadStoreOptimizer. -bool ARM64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) { - switch (MI.getOpcode()) { - default: - return false; - case ARM64::STRSui: - case ARM64::STRDui: - case ARM64::STURSi: - case ARM64::STURDi: - return true; - } -} - -bool ARM64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) { - MF = &mf; - TII = static_cast(MF->getTarget().getInstrInfo()); - TRI = MF->getTarget().getRegisterInfo(); - MRI = &MF->getRegInfo(); - const TargetSubtargetInfo &ST = - MF->getTarget().getSubtarget(); - SchedModel.init(*ST.getSchedModel(), &ST, TII); - - Traces = &getAnalysis(); - MinInstr = 0; - - DEBUG(dbgs() << "*** " << getPassName() << ": " << MF->getName() << '\n'); - - if (!SchedModel.hasInstrSchedModel()) { - DEBUG(dbgs() << " Skipping pass: no machine model present.\n"); - return false; - } - - // Check for a sequence of stores to the same base address. We don't need to - // precisely determine whether a store pair can be formed. But we do want to - // filter out most situations where we can't form store pairs to avoid - // computing trace metrics in those cases. - for (auto &MBB: *MF) { - bool SuppressSTP = false; - unsigned PrevBaseReg = 0; - for (auto &MI: MBB) { - if (!isNarrowFPStore(MI)) - continue; - unsigned BaseReg; - unsigned Offset; - if (TII->getLdStBaseRegImmOfs(&MI, BaseReg, Offset, TRI)) { - if (PrevBaseReg == BaseReg) { - // If this block can take STPs, skip ahead to the next block. - if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent())) - break; - // Otherwise, continue unpairing the stores in this block. - DEBUG(dbgs() << "Unpairing store " << MI << "\n"); - SuppressSTP = true; - TII->suppressLdStPair(&MI); - } - PrevBaseReg = BaseReg; - } else - PrevBaseReg = 0; - } - } - // This pass just sets some internal MachineMemOperand flags. It can't really - // invalidate anything. - return false; -} diff --git a/lib/Target/ARM64/ARM64Subtarget.cpp b/lib/Target/ARM64/ARM64Subtarget.cpp deleted file mode 100644 index 14b5444..0000000 --- a/lib/Target/ARM64/ARM64Subtarget.cpp +++ /dev/null @@ -1,100 +0,0 @@ -//===-- ARM64Subtarget.cpp - ARM64 Subtarget Information --------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ARM64 specific subclass of TargetSubtarget. -// -//===----------------------------------------------------------------------===// - -#include "ARM64InstrInfo.h" -#include "ARM64Subtarget.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineScheduler.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_SUBTARGETINFO_CTOR -#define GET_SUBTARGETINFO_TARGET_DESC -#include "ARM64GenSubtargetInfo.inc" - -using namespace llvm; - -ARM64Subtarget::ARM64Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS) - : ARM64GenSubtargetInfo(TT, CPU, FS), HasZeroCycleRegMove(false), - HasZeroCycleZeroing(false), CPUString(CPU), TargetTriple(TT) { - // Determine default and user-specified characteristics - - if (CPUString.empty()) - // We default to Cyclone for now. - CPUString = "cyclone"; - - ParseSubtargetFeatures(CPUString, FS); -} - -/// ClassifyGlobalReference - Find the target operand flags that describe -/// how a global value should be referenced for the current subtarget. -unsigned char -ARM64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, - const TargetMachine &TM) const { - - // Determine whether this is a reference to a definition or a declaration. - // Materializable GVs (in JIT lazy compilation mode) do not require an extra - // load from stub. - bool isDecl = GV->hasAvailableExternallyLinkage(); - if (GV->isDeclaration() && !GV->isMaterializable()) - isDecl = true; - - // MachO large model always goes via a GOT, simply to get a single 8-byte - // absolute relocation on all global addresses. - if (TM.getCodeModel() == CodeModel::Large && isTargetMachO()) - return ARM64II::MO_GOT; - - // The small code mode's direct accesses use ADRP, which cannot necessarily - // produce the value 0 (if the code is above 4GB). Therefore they must use the - // GOT. - if (TM.getCodeModel() == CodeModel::Small && GV->isWeakForLinker() && isDecl) - return ARM64II::MO_GOT; - - // If symbol visibility is hidden, the extra load is not needed if - // the symbol is definitely defined in the current translation unit. - - // The handling of non-hidden symbols in PIC mode is rather target-dependent: - // + On MachO, if the symbol is defined in this module the GOT can be - // skipped. - // + On ELF, the R_AARCH64_COPY relocation means that even symbols actually - // defined could end up in unexpected places. Use a GOT. - if (TM.getRelocationModel() != Reloc::Static && GV->hasDefaultVisibility()) { - if (isTargetMachO()) - return (isDecl || GV->isWeakForLinker()) ? ARM64II::MO_GOT - : ARM64II::MO_NO_FLAG; - else - return ARM64II::MO_GOT; - } - - return ARM64II::MO_NO_FLAG; -} - -/// This function returns the name of a function which has an interface -/// like the non-standard bzero function, if such a function exists on -/// the current subtarget and it is considered prefereable over -/// memset with zero passed as the second argument. Otherwise it -/// returns null. -const char *ARM64Subtarget::getBZeroEntry() const { - // At the moment, always prefer bzero. - return "bzero"; -} - -void ARM64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, - MachineInstr *begin, MachineInstr *end, - unsigned NumRegionInstrs) const { - // LNT run (at least on Cyclone) showed reasonably significant gains for - // bi-directional scheduling. 253.perlbmk. - Policy.OnlyTopDown = false; - Policy.OnlyBottomUp = false; -} diff --git a/lib/Target/ARM64/ARM64Subtarget.h b/lib/Target/ARM64/ARM64Subtarget.h deleted file mode 100644 index 1cbd79e..0000000 --- a/lib/Target/ARM64/ARM64Subtarget.h +++ /dev/null @@ -1,87 +0,0 @@ -//=====---- ARM64Subtarget.h - Define Subtarget for the ARM64 -*- C++ -*--====// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the ARM64 specific subclass of TargetSubtarget. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64SUBTARGET_H -#define ARM64SUBTARGET_H - -#include "llvm/Target/TargetSubtargetInfo.h" -#include "ARM64RegisterInfo.h" -#include - -#define GET_SUBTARGETINFO_HEADER -#include "ARM64GenSubtargetInfo.inc" - -namespace llvm { -class GlobalValue; -class StringRef; - -class ARM64Subtarget : public ARM64GenSubtargetInfo { -protected: - // HasZeroCycleRegMove - Has zero-cycle register mov instructions. - bool HasZeroCycleRegMove; - - // HasZeroCycleZeroing - Has zero-cycle zeroing instructions. - bool HasZeroCycleZeroing; - - /// CPUString - String name of used CPU. - std::string CPUString; - - /// TargetTriple - What processor and OS we're targeting. - Triple TargetTriple; - -public: - /// This constructor initializes the data members to match that - /// of the specified triple. - ARM64Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS); - - bool enableMachineScheduler() const override { return true; } - - bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; } - - bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; } - - bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } - - bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } - - bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } - - bool isCyclone() const { return CPUString == "cyclone"; } - - /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size - /// that still makes it profitable to inline the call. - unsigned getMaxInlineSizeThreshold() const { return 64; } - - /// ParseSubtargetFeatures - Parses features string setting specified - /// subtarget options. Definition of function is auto generated by tblgen. - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - - /// ClassifyGlobalReference - Find the target operand flags that describe - /// how a global value should be referenced for the current subtarget. - unsigned char ClassifyGlobalReference(const GlobalValue *GV, - const TargetMachine &TM) const; - - /// This function returns the name of a function which has an interface - /// like the non-standard bzero function, if such a function exists on - /// the current subtarget and it is considered prefereable over - /// memset with zero passed as the second argument. Otherwise it - /// returns null. - const char *getBZeroEntry() const; - - void overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin, - MachineInstr *end, unsigned NumRegionInstrs) const; -}; -} // End llvm namespace - -#endif // ARM64SUBTARGET_H diff --git a/lib/Target/ARM64/ARM64TargetMachine.cpp b/lib/Target/ARM64/ARM64TargetMachine.cpp deleted file mode 100644 index 101dc25..0000000 --- a/lib/Target/ARM64/ARM64TargetMachine.cpp +++ /dev/null @@ -1,157 +0,0 @@ -//===-- ARM64TargetMachine.cpp - Define TargetMachine for ARM64 -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#include "ARM64.h" -#include "ARM64TargetMachine.h" -#include "llvm/PassManager.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Transforms/Scalar.h" -using namespace llvm; - -static cl::opt EnableCCMP("arm64-ccmp", - cl::desc("Enable the CCMP formation pass"), - cl::init(true)); - -static cl::opt EnableStPairSuppress("arm64-stp-suppress", cl::Hidden, - cl::desc("Suppress STP for ARM64"), - cl::init(true)); - -static cl::opt -EnablePromoteConstant("arm64-promote-const", cl::Hidden, - cl::desc("Enable the promote constant pass"), - cl::init(true)); - -static cl::opt -EnableCollectLOH("arm64-collect-loh", cl::Hidden, - cl::desc("Enable the pass that emits the linker" - " optimization hints (LOH)"), - cl::init(true)); - -extern "C" void LLVMInitializeARM64Target() { - // Register the target. - RegisterTargetMachine X(TheARM64Target); -} - -/// TargetMachine ctor - Create an ARM64 architecture model. -/// -ARM64TargetMachine::ARM64TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS), - DL(Subtarget.isTargetMachO() ? "e-m:o-i64:64-i128:128-n32:64-S128" - : "e-m:e-i64:64-i128:128-n32:64-S128"), - InstrInfo(Subtarget), TLInfo(*this), FrameLowering(*this, Subtarget), - TSInfo(*this) { - initAsmInfo(); -} - -namespace { -/// ARM64 Code Generator Pass Configuration Options. -class ARM64PassConfig : public TargetPassConfig { -public: - ARM64PassConfig(ARM64TargetMachine *TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} - - ARM64TargetMachine &getARM64TargetMachine() const { - return getTM(); - } - - virtual bool addPreISel(); - virtual bool addInstSelector(); - virtual bool addILPOpts(); - virtual bool addPreRegAlloc(); - virtual bool addPostRegAlloc(); - virtual bool addPreSched2(); - virtual bool addPreEmitPass(); -}; -} // namespace - -void ARM64TargetMachine::addAnalysisPasses(PassManagerBase &PM) { - // Add first the target-independent BasicTTI pass, then our ARM64 pass. This - // allows the ARM64 pass to delegate to the target independent layer when - // appropriate. - PM.add(createBasicTargetTransformInfoPass(this)); - PM.add(createARM64TargetTransformInfoPass(this)); -} - -TargetPassConfig *ARM64TargetMachine::createPassConfig(PassManagerBase &PM) { - return new ARM64PassConfig(this, PM); -} - -// Pass Pipeline Configuration -bool ARM64PassConfig::addPreISel() { - // Run promote constant before global merge, so that the promoted constants - // get a chance to be merged - if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant) - addPass(createARM64PromoteConstantPass()); - if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createGlobalMergePass(TM)); - if (TM->getOptLevel() != CodeGenOpt::None) - addPass(createARM64AddressTypePromotionPass()); - return false; -} - -bool ARM64PassConfig::addInstSelector() { - addPass(createARM64ISelDag(getARM64TargetMachine(), getOptLevel())); - - // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many - // references to _TLS_MODULE_BASE_ as possible. - if (TM->getSubtarget().isTargetELF() && - getOptLevel() != CodeGenOpt::None) - addPass(createARM64CleanupLocalDynamicTLSPass()); - - return false; -} - -bool ARM64PassConfig::addILPOpts() { - if (EnableCCMP) - addPass(createARM64ConditionalCompares()); - addPass(&EarlyIfConverterID); - if (EnableStPairSuppress) - addPass(createARM64StorePairSuppressPass()); - return true; -} - -bool ARM64PassConfig::addPreRegAlloc() { - // Use AdvSIMD scalar instructions whenever profitable. - addPass(createARM64AdvSIMDScalar()); - return true; -} - -bool ARM64PassConfig::addPostRegAlloc() { - // Change dead register definitions to refer to the zero register. - addPass(createARM64DeadRegisterDefinitions()); - return true; -} - -bool ARM64PassConfig::addPreSched2() { - // Expand some pseudo instructions to allow proper scheduling. - addPass(createARM64ExpandPseudoPass()); - // Use load/store pair instructions when possible. - addPass(createARM64LoadStoreOptimizationPass()); - return true; -} - -bool ARM64PassConfig::addPreEmitPass() { - // Relax conditional branch instructions if they're otherwise out of - // range of their destination. - addPass(createARM64BranchRelaxation()); - if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH) - addPass(createARM64CollectLOHPass()); - return true; -} diff --git a/lib/Target/ARM64/ARM64TargetMachine.h b/lib/Target/ARM64/ARM64TargetMachine.h deleted file mode 100644 index 8274550..0000000 --- a/lib/Target/ARM64/ARM64TargetMachine.h +++ /dev/null @@ -1,69 +0,0 @@ -//===-- ARM64TargetMachine.h - Define TargetMachine for ARM64 ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the ARM64 specific subclass of TargetMachine. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64TARGETMACHINE_H -#define ARM64TARGETMACHINE_H - -#include "ARM64InstrInfo.h" -#include "ARM64ISelLowering.h" -#include "ARM64Subtarget.h" -#include "ARM64FrameLowering.h" -#include "ARM64SelectionDAGInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/MC/MCStreamer.h" - -namespace llvm { - -class ARM64TargetMachine : public LLVMTargetMachine { -protected: - ARM64Subtarget Subtarget; - -private: - const DataLayout DL; - ARM64InstrInfo InstrInfo; - ARM64TargetLowering TLInfo; - ARM64FrameLowering FrameLowering; - ARM64SelectionDAGInfo TSInfo; - -public: - ARM64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, - const TargetOptions &Options, Reloc::Model RM, - CodeModel::Model CM, CodeGenOpt::Level OL); - - const ARM64Subtarget *getSubtargetImpl() const override { return &Subtarget; } - const ARM64TargetLowering *getTargetLowering() const override { - return &TLInfo; - } - const DataLayout *getDataLayout() const override { return &DL; } - const ARM64FrameLowering *getFrameLowering() const override { - return &FrameLowering; - } - const ARM64InstrInfo *getInstrInfo() const override { return &InstrInfo; } - const ARM64RegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); - } - const ARM64SelectionDAGInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } - - // Pass Pipeline Configuration - TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - - /// \brief Register ARM64 analysis passes with a pass manager. - void addAnalysisPasses(PassManagerBase &PM) override; -}; - -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/ARM64TargetObjectFile.cpp b/lib/Target/ARM64/ARM64TargetObjectFile.cpp deleted file mode 100644 index cde01e5..0000000 --- a/lib/Target/ARM64/ARM64TargetObjectFile.cpp +++ /dev/null @@ -1,52 +0,0 @@ -//===-- ARM64TargetObjectFile.cpp - ARM64 Object Info ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "ARM64TargetObjectFile.h" -#include "ARM64TargetMachine.h" -#include "llvm/IR/Mangler.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Support/Dwarf.h" -using namespace llvm; -using namespace dwarf; - -void ARM64_ELFTargetObjectFile::Initialize(MCContext &Ctx, - const TargetMachine &TM) { - TargetLoweringObjectFileELF::Initialize(Ctx, TM); - InitializeELF(TM.Options.UseInitArray); -} - -const MCExpr *ARM64_MachoTargetObjectFile::getTTypeGlobalReference( - const GlobalValue *GV, unsigned Encoding, Mangler &Mang, - const TargetMachine &TM, MachineModuleInfo *MMI, - MCStreamer &Streamer) const { - // On Darwin, we can reference dwarf symbols with foo@GOT-., which - // is an indirect pc-relative reference. The default implementation - // won't reference using the GOT, so we need this target-specific - // version. - if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) { - const MCSymbol *Sym = TM.getSymbol(GV, Mang); - const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext()); - MCSymbol *PCSym = getContext().CreateTempSymbol(); - Streamer.EmitLabel(PCSym); - const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext()); - return MCBinaryExpr::CreateSub(Res, PC, getContext()); - } - - return TargetLoweringObjectFileMachO::getTTypeGlobalReference( - GV, Encoding, Mang, TM, MMI, Streamer); -} - -MCSymbol *ARM64_MachoTargetObjectFile::getCFIPersonalitySymbol( - const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, - MachineModuleInfo *MMI) const { - return TM.getSymbol(GV, Mang); -} diff --git a/lib/Target/ARM64/ARM64TargetObjectFile.h b/lib/Target/ARM64/ARM64TargetObjectFile.h deleted file mode 100644 index 62446f9..0000000 --- a/lib/Target/ARM64/ARM64TargetObjectFile.h +++ /dev/null @@ -1,40 +0,0 @@ -//===-- ARM64TargetObjectFile.h - ARM64 Object Info -*- C++ -------------*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_ARM64_TARGETOBJECTFILE_H -#define LLVM_TARGET_ARM64_TARGETOBJECTFILE_H - -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/Target/TargetLoweringObjectFile.h" - -namespace llvm { -class ARM64TargetMachine; - -/// This implementation is used for AArch64 ELF targets (Linux in particular). -class ARM64_ELFTargetObjectFile : public TargetLoweringObjectFileELF { - void Initialize(MCContext &Ctx, const TargetMachine &TM) override; -}; - -/// ARM64_MachoTargetObjectFile - This TLOF implementation is used for Darwin. -class ARM64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO { -public: - const MCExpr *getTTypeGlobalReference(const GlobalValue *GV, - unsigned Encoding, Mangler &Mang, - const TargetMachine &TM, - MachineModuleInfo *MMI, - MCStreamer &Streamer) const override; - - MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang, - const TargetMachine &TM, - MachineModuleInfo *MMI) const override; -}; - -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp b/lib/Target/ARM64/ARM64TargetTransformInfo.cpp deleted file mode 100644 index 9b598d7..0000000 --- a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp +++ /dev/null @@ -1,326 +0,0 @@ -//===-- ARM64TargetTransformInfo.cpp - ARM64 specific TTI pass ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// \file -/// This file implements a TargetTransformInfo analysis pass specific to the -/// ARM64 target machine. It uses the target's detailed information to provide -/// more precise answers to certain TTI queries, while letting the target -/// independent and default TTI implementations handle the rest. -/// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64tti" -#include "ARM64.h" -#include "ARM64TargetMachine.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/CostTable.h" -#include "llvm/Target/TargetLowering.h" -using namespace llvm; - -// Declare the pass initialization routine locally as target-specific passes -// don't havve a target-wide initialization entry point, and so we rely on the -// pass constructor initialization. -namespace llvm { -void initializeARM64TTIPass(PassRegistry &); -} - -namespace { - -class ARM64TTI final : public ImmutablePass, public TargetTransformInfo { - const ARM64TargetMachine *TM; - const ARM64Subtarget *ST; - const ARM64TargetLowering *TLI; - - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; - -public: - ARM64TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { - llvm_unreachable("This pass cannot be directly constructed"); - } - - ARM64TTI(const ARM64TargetMachine *TM) - : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), - TLI(TM->getTargetLowering()) { - initializeARM64TTIPass(*PassRegistry::getPassRegistry()); - } - - void initializePass() override { pushTTIStack(this); } - - void getAnalysisUsage(AnalysisUsage &AU) const override { - TargetTransformInfo::getAnalysisUsage(AU); - } - - /// Pass identification. - static char ID; - - /// Provide necessary pointer adjustments for the two base classes. - void *getAdjustedAnalysisPointer(const void *ID) override { - if (ID == &TargetTransformInfo::ID) - return (TargetTransformInfo *)this; - return this; - } - - /// \name Scalar TTI Implementations - /// @{ - - unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; - PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; - - /// @} - - /// \name Vector TTI Implementations - /// @{ - - unsigned getNumberOfRegisters(bool Vector) const override { - if (Vector) - return 32; - - return 31; - } - - unsigned getRegisterBitWidth(bool Vector) const override { - if (Vector) - return 128; - - return 64; - } - - unsigned getMaximumUnrollFactor() const override { return 2; } - - unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const - override; - - unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const - override; - - unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind Opd1Info = OK_AnyValue, - OperandValueKind Opd2Info = OK_AnyValue) const - override; - - unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override; - - unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const - override; - - unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const override; - /// @} -}; - -} // end anonymous namespace - -INITIALIZE_AG_PASS(ARM64TTI, TargetTransformInfo, "arm64tti", - "ARM64 Target Transform Info", true, true, false) -char ARM64TTI::ID = 0; - -ImmutablePass * -llvm::createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM) { - return new ARM64TTI(TM); -} - -unsigned ARM64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { - assert(Ty->isIntegerTy()); - - unsigned BitSize = Ty->getPrimitiveSizeInBits(); - if (BitSize == 0) - return ~0U; - - int64_t Val = Imm.getSExtValue(); - if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, BitSize)) - return 1; - - if ((int64_t)Val < 0) - Val = ~Val; - if (BitSize == 32) - Val &= (1LL << 32) - 1; - - unsigned LZ = countLeadingZeros((uint64_t)Val); - unsigned Shift = (63 - LZ) / 16; - // MOVZ is free so return true for one or fewer MOVK. - return (Shift == 0) ? 1 : Shift; -} - -ARM64TTI::PopcntSupportKind ARM64TTI::getPopcntSupport(unsigned TyWidth) const { - assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); - if (TyWidth == 32 || TyWidth == 64) - return PSK_FastHardware; - // TODO: ARM64TargetLowering::LowerCTPOP() supports 128bit popcount. - return PSK_Software; -} - -unsigned ARM64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, - Type *Src) const { - int ISD = TLI->InstructionOpcodeToISD(Opcode); - assert(ISD && "Invalid opcode"); - - EVT SrcTy = TLI->getValueType(Src); - EVT DstTy = TLI->getValueType(Dst); - - if (!SrcTy.isSimple() || !DstTy.isSimple()) - return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); - - static const TypeConversionCostTblEntry ConversionTbl[] = { - // LowerVectorINT_TO_FP: - { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, - { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, - // LowerVectorFP_TO_INT - { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, - { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 }, - { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, - { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, - { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 }, - { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 }, - { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 }, - { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 }, - }; - - int Idx = ConvertCostTableLookup( - ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(), - SrcTy.getSimpleVT()); - if (Idx != -1) - return ConversionTbl[Idx].Cost; - - return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); -} - -unsigned ARM64TTI::getVectorInstrCost(unsigned Opcode, Type *Val, - unsigned Index) const { - assert(Val->isVectorTy() && "This must be a vector type"); - - if (Index != -1U) { - // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(Val); - - // This type is legalized to a scalar type. - if (!LT.second.isVector()) - return 0; - - // The type may be split. Normalize the index to the new type. - unsigned Width = LT.second.getVectorNumElements(); - Index = Index % Width; - - // The element at index zero is already inside the vector. - if (Index == 0) - return 0; - } - - // All other insert/extracts cost this much. - return 2; -} - -unsigned ARM64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind Opd1Info, - OperandValueKind Opd2Info) const { - // Legalize the type. - std::pair LT = TLI->getTypeLegalizationCost(Ty); - - int ISD = TLI->InstructionOpcodeToISD(Opcode); - - switch (ISD) { - default: - return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Opd1Info, - Opd2Info); - case ISD::ADD: - case ISD::MUL: - case ISD::XOR: - case ISD::OR: - case ISD::AND: - // These nodes are marked as 'custom' for combining purposes only. - // We know that they are legal. See LowerAdd in ISelLowering. - return 1 * LT.first; - } -} - -unsigned ARM64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { - // Address computations in vectorized code with non-consecutive addresses will - // likely result in more instructions compared to scalar code where the - // computation can more often be merged into the index mode. The resulting - // extra micro-ops can significantly decrease throughput. - unsigned NumVectorInstToHideOverhead = 10; - - if (Ty->isVectorTy() && IsComplex) - return NumVectorInstToHideOverhead; - - // In many cases the address computation is not merged into the instruction - // addressing mode. - return 1; -} - -unsigned ARM64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy) const { - - int ISD = TLI->InstructionOpcodeToISD(Opcode); - // We don't lower vector selects well that are wider than the register width. - if (ValTy->isVectorTy() && ISD == ISD::SELECT) { - // We would need this many instructions to hide the scalarization happening. - unsigned AmortizationCost = 20; - static const TypeConversionCostTblEntry - VectorSelectTbl[] = { - { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost }, - { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost }, - { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost }, - { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost }, - { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost }, - { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost } - }; - - EVT SelCondTy = TLI->getValueType(CondTy); - EVT SelValTy = TLI->getValueType(ValTy); - if (SelCondTy.isSimple() && SelValTy.isSimple()) { - int Idx = - ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(), - SelValTy.getSimpleVT()); - if (Idx != -1) - return VectorSelectTbl[Idx].Cost; - } - } - return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); -} - -unsigned ARM64TTI::getMemoryOpCost(unsigned Opcode, Type *Src, - unsigned Alignment, - unsigned AddressSpace) const { - std::pair LT = TLI->getTypeLegalizationCost(Src); - - if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 && - Src->getVectorElementType()->isIntegerTy(64)) { - // Unaligned stores are extremely inefficient. We don't split - // unaligned v2i64 stores because the negative impact that has shown in - // practice on inlined memcpy code. - // We make v2i64 stores expensive so that we will only vectorize if there - // are 6 other instructions getting vectorized. - unsigned AmortizationCost = 6; - - return LT.first * 2 * AmortizationCost; - } - - if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) && - Src->getVectorNumElements() < 8) { - // We scalarize the loads/stores because there is not v.4b register and we - // have to promote the elements to v.4h. - unsigned NumVecElts = Src->getVectorNumElements(); - unsigned NumVectorizableInstsToAmortize = NumVecElts * 2; - // We generate 2 instructions per vector element. - return NumVectorizableInstsToAmortize * NumVecElts * 2; - } - - return LT.first; -} diff --git a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp b/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp deleted file mode 100644 index 38a61d8..0000000 --- a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp +++ /dev/null @@ -1,4832 +0,0 @@ -//===-- ARM64AsmParser.cpp - Parse ARM64 assembly to MCInst instructions --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "MCTargetDesc/ARM64BaseInfo.h" -#include "MCTargetDesc/ARM64MCExpr.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" -#include "llvm/MC/MCParser/MCAsmParser.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCTargetAsmParser.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/ADT/Twine.h" -#include -using namespace llvm; - -namespace { - -class ARM64Operand; - -class ARM64AsmParser : public MCTargetAsmParser { -public: - typedef SmallVectorImpl OperandVector; - -private: - StringRef Mnemonic; ///< Instruction mnemonic. - MCSubtargetInfo &STI; - MCAsmParser &Parser; - - MCAsmParser &getParser() const { return Parser; } - MCAsmLexer &getLexer() const { return Parser.getLexer(); } - - SMLoc getLoc() const { return Parser.getTok().getLoc(); } - - bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands); - unsigned parseCondCodeString(StringRef Cond); - bool parseCondCode(OperandVector &Operands, bool invertCondCode); - int tryParseRegister(); - int tryMatchVectorRegister(StringRef &Kind); - bool parseOptionalShift(OperandVector &Operands); - bool parseOptionalExtend(OperandVector &Operands); - bool parseRegister(OperandVector &Operands); - bool parseMemory(OperandVector &Operands); - bool parseSymbolicImmVal(const MCExpr *&ImmVal); - bool parseVectorList(OperandVector &Operands); - bool parseOperand(OperandVector &Operands, bool isCondCode, - bool invertCondCode); - - void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } - bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } - bool showMatchError(SMLoc Loc, unsigned ErrCode); - - bool parseDirectiveWord(unsigned Size, SMLoc L); - bool parseDirectiveTLSDescCall(SMLoc L); - - bool parseDirectiveLOH(StringRef LOH, SMLoc L); - - bool validateInstruction(MCInst &Inst, SmallVectorImpl &Loc); - bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - OperandVector &Operands, MCStreamer &Out, - unsigned &ErrorInfo, bool MatchingInlineAsm); -/// @name Auto-generated Match Functions -/// { - -#define GET_ASSEMBLER_HEADER -#include "ARM64GenAsmMatcher.inc" - - /// } - - OperandMatchResultTy tryParseNoIndexMemory(OperandVector &Operands); - OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands); - OperandMatchResultTy tryParseSystemRegister(OperandVector &Operands); - OperandMatchResultTy tryParseCPSRField(OperandVector &Operands); - OperandMatchResultTy tryParseSysCROperand(OperandVector &Operands); - OperandMatchResultTy tryParsePrefetch(OperandVector &Operands); - OperandMatchResultTy tryParseAdrpLabel(OperandVector &Operands); - OperandMatchResultTy tryParseAdrLabel(OperandVector &Operands); - OperandMatchResultTy tryParseFPImm(OperandVector &Operands); - bool tryParseVectorRegister(OperandVector &Operands); - -public: - enum ARM64MatchResultTy { - Match_InvalidSuffix = FIRST_TARGET_MATCH_RESULT_TY, -#define GET_OPERAND_DIAGNOSTIC_TYPES -#include "ARM64GenAsmMatcher.inc" - }; - ARM64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &MII) - : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { - MCAsmParserExtension::Initialize(_Parser); - } - - virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, - SMLoc NameLoc, OperandVector &Operands); - virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); - virtual bool ParseDirective(AsmToken DirectiveID); - unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind); - - static bool classifySymbolRef(const MCExpr *Expr, - ARM64MCExpr::VariantKind &ELFRefKind, - MCSymbolRefExpr::VariantKind &DarwinRefKind, - const MCConstantExpr *&Addend); -}; -} // end anonymous namespace - -namespace { - -/// ARM64Operand - Instances of this class represent a parsed ARM64 machine -/// instruction. -class ARM64Operand : public MCParsedAsmOperand { -public: - enum MemIdxKindTy { - ImmediateOffset, // pre-indexed, no writeback - RegisterOffset // register offset, with optional extend - }; - -private: - enum KindTy { - k_Immediate, - k_Memory, - k_Register, - k_VectorList, - k_VectorIndex, - k_Token, - k_SysCR, - k_Prefetch, - k_Shifter, - k_Extend, - k_FPImm, - k_Barrier, - k_SystemRegister, - k_CPSRField - } Kind; - - SMLoc StartLoc, EndLoc, OffsetLoc; - - struct TokOp { - const char *Data; - unsigned Length; - bool IsSuffix; // Is the operand actually a suffix on the mnemonic. - }; - - struct RegOp { - unsigned RegNum; - bool isVector; - }; - - struct VectorListOp { - unsigned RegNum; - unsigned Count; - unsigned NumElements; - unsigned ElementKind; - }; - - struct VectorIndexOp { - unsigned Val; - }; - - struct ImmOp { - const MCExpr *Val; - }; - - struct FPImmOp { - unsigned Val; // Encoded 8-bit representation. - }; - - struct BarrierOp { - unsigned Val; // Not the enum since not all values have names. - }; - - struct SystemRegisterOp { - // 16-bit immediate, usually from the ARM64SYS::SystermRegister enum, - // but not limited to those values. - uint16_t Val; - }; - - struct CPSRFieldOp { - ARM64SYS::CPSRField Field; - }; - - struct SysCRImmOp { - unsigned Val; - }; - - struct PrefetchOp { - unsigned Val; - }; - - struct ShifterOp { - unsigned Val; - }; - - struct ExtendOp { - unsigned Val; - }; - - // This is for all forms of ARM64 address expressions - struct MemOp { - unsigned BaseRegNum, OffsetRegNum; - ARM64_AM::ExtendType ExtType; - unsigned ShiftVal; - bool ExplicitShift; - const MCExpr *OffsetImm; - MemIdxKindTy Mode; - }; - - union { - struct TokOp Tok; - struct RegOp Reg; - struct VectorListOp VectorList; - struct VectorIndexOp VectorIndex; - struct ImmOp Imm; - struct FPImmOp FPImm; - struct BarrierOp Barrier; - struct SystemRegisterOp SystemRegister; - struct CPSRFieldOp CPSRField; - struct SysCRImmOp SysCRImm; - struct PrefetchOp Prefetch; - struct ShifterOp Shifter; - struct ExtendOp Extend; - struct MemOp Mem; - }; - - // Keep the MCContext around as the MCExprs may need manipulated during - // the add<>Operands() calls. - MCContext &Ctx; - - ARM64Operand(KindTy K, MCContext &_Ctx) - : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {} - -public: - ARM64Operand(const ARM64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) { - Kind = o.Kind; - StartLoc = o.StartLoc; - EndLoc = o.EndLoc; - switch (Kind) { - case k_Token: - Tok = o.Tok; - break; - case k_Immediate: - Imm = o.Imm; - break; - case k_FPImm: - FPImm = o.FPImm; - break; - case k_Barrier: - Barrier = o.Barrier; - break; - case k_SystemRegister: - SystemRegister = o.SystemRegister; - break; - case k_CPSRField: - CPSRField = o.CPSRField; - break; - case k_Register: - Reg = o.Reg; - break; - case k_VectorList: - VectorList = o.VectorList; - break; - case k_VectorIndex: - VectorIndex = o.VectorIndex; - break; - case k_SysCR: - SysCRImm = o.SysCRImm; - break; - case k_Prefetch: - Prefetch = o.Prefetch; - break; - case k_Memory: - Mem = o.Mem; - break; - case k_Shifter: - Shifter = o.Shifter; - break; - case k_Extend: - Extend = o.Extend; - break; - } - } - - /// getStartLoc - Get the location of the first token of this operand. - SMLoc getStartLoc() const { return StartLoc; } - /// getEndLoc - Get the location of the last token of this operand. - SMLoc getEndLoc() const { return EndLoc; } - /// getOffsetLoc - Get the location of the offset of this memory operand. - SMLoc getOffsetLoc() const { return OffsetLoc; } - - StringRef getToken() const { - assert(Kind == k_Token && "Invalid access!"); - return StringRef(Tok.Data, Tok.Length); - } - - bool isTokenSuffix() const { - assert(Kind == k_Token && "Invalid access!"); - return Tok.IsSuffix; - } - - const MCExpr *getImm() const { - assert(Kind == k_Immediate && "Invalid access!"); - return Imm.Val; - } - - unsigned getFPImm() const { - assert(Kind == k_FPImm && "Invalid access!"); - return FPImm.Val; - } - - unsigned getBarrier() const { - assert(Kind == k_Barrier && "Invalid access!"); - return Barrier.Val; - } - - uint16_t getSystemRegister() const { - assert(Kind == k_SystemRegister && "Invalid access!"); - return SystemRegister.Val; - } - - ARM64SYS::CPSRField getCPSRField() const { - assert(Kind == k_CPSRField && "Invalid access!"); - return CPSRField.Field; - } - - unsigned getReg() const { - assert(Kind == k_Register && "Invalid access!"); - return Reg.RegNum; - } - - unsigned getVectorListStart() const { - assert(Kind == k_VectorList && "Invalid access!"); - return VectorList.RegNum; - } - - unsigned getVectorListCount() const { - assert(Kind == k_VectorList && "Invalid access!"); - return VectorList.Count; - } - - unsigned getVectorIndex() const { - assert(Kind == k_VectorIndex && "Invalid access!"); - return VectorIndex.Val; - } - - unsigned getSysCR() const { - assert(Kind == k_SysCR && "Invalid access!"); - return SysCRImm.Val; - } - - unsigned getPrefetch() const { - assert(Kind == k_Prefetch && "Invalid access!"); - return Prefetch.Val; - } - - unsigned getShifter() const { - assert(Kind == k_Shifter && "Invalid access!"); - return Shifter.Val; - } - - unsigned getExtend() const { - assert(Kind == k_Extend && "Invalid access!"); - return Extend.Val; - } - - bool isImm() const { return Kind == k_Immediate; } - bool isSImm9() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= -256 && Val < 256); - } - bool isSImm7s4() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= -256 && Val <= 252 && (Val & 3) == 0); - } - bool isSImm7s8() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= -512 && Val <= 504 && (Val & 7) == 0); - } - bool isSImm7s16() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= -1024 && Val <= 1008 && (Val & 15) == 0); - } - bool isImm0_7() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 0 && Val < 8); - } - bool isImm1_8() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val > 0 && Val < 9); - } - bool isImm0_15() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 0 && Val < 16); - } - bool isImm1_16() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val > 0 && Val < 17); - } - bool isImm0_31() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 0 && Val < 32); - } - bool isImm1_31() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 1 && Val < 32); - } - bool isImm1_32() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 1 && Val < 33); - } - bool isImm0_63() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 0 && Val < 64); - } - bool isImm1_63() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 1 && Val < 64); - } - bool isImm1_64() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 1 && Val < 65); - } - bool isImm0_127() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 0 && Val < 128); - } - bool isImm0_255() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 0 && Val < 256); - } - bool isImm0_65535() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - int64_t Val = MCE->getValue(); - return (Val >= 0 && Val < 65536); - } - bool isLogicalImm32() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - return ARM64_AM::isLogicalImmediate(MCE->getValue(), 32); - } - bool isLogicalImm64() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - return ARM64_AM::isLogicalImmediate(MCE->getValue(), 64); - } - bool isSIMDImmType10() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return false; - return ARM64_AM::isAdvSIMDModImmType10(MCE->getValue()); - } - bool isBranchTarget26() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return true; - int64_t Val = MCE->getValue(); - if (Val & 0x3) - return false; - return (Val >= -(0x2000000 << 2) && Val <= (0x1ffffff << 2)); - } - bool isBranchTarget19() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return true; - int64_t Val = MCE->getValue(); - if (Val & 0x3) - return false; - return (Val >= -(0x40000 << 2) && Val <= (0x3ffff << 2)); - } - bool isBranchTarget14() const { - if (!isImm()) - return false; - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) - return true; - int64_t Val = MCE->getValue(); - if (Val & 0x3) - return false; - return (Val >= -(0x2000 << 2) && Val <= (0x1fff << 2)); - } - - bool isMovWSymbol(ArrayRef AllowedModifiers) const { - if (!isImm()) - return false; - - ARM64MCExpr::VariantKind ELFRefKind; - MCSymbolRefExpr::VariantKind DarwinRefKind; - const MCConstantExpr *Addend; - if (!ARM64AsmParser::classifySymbolRef(getImm(), ELFRefKind, DarwinRefKind, - Addend)) { - return false; - } - if (DarwinRefKind != MCSymbolRefExpr::VK_None) - return false; - - for (unsigned i = 0; i != AllowedModifiers.size(); ++i) { - if (ELFRefKind == AllowedModifiers[i]) - return Addend == 0; - } - - return false; - } - - bool isMovZSymbolG3() const { - static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G3 }; - return isMovWSymbol(Variants); - } - - bool isMovZSymbolG2() const { - static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G2, - ARM64MCExpr::VK_TPREL_G2, - ARM64MCExpr::VK_DTPREL_G2 }; - return isMovWSymbol(Variants); - } - - bool isMovZSymbolG1() const { - static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G1, - ARM64MCExpr::VK_GOTTPREL_G1, - ARM64MCExpr::VK_TPREL_G1, - ARM64MCExpr::VK_DTPREL_G1, }; - return isMovWSymbol(Variants); - } - - bool isMovZSymbolG0() const { - static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G0, - ARM64MCExpr::VK_TPREL_G0, - ARM64MCExpr::VK_DTPREL_G0 }; - return isMovWSymbol(Variants); - } - - bool isMovKSymbolG2() const { - static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G2_NC }; - return isMovWSymbol(Variants); - } - - bool isMovKSymbolG1() const { - static ARM64MCExpr::VariantKind Variants[] = { - ARM64MCExpr::VK_ABS_G1_NC, ARM64MCExpr::VK_TPREL_G1_NC, - ARM64MCExpr::VK_DTPREL_G1_NC - }; - return isMovWSymbol(Variants); - } - - bool isMovKSymbolG0() const { - static ARM64MCExpr::VariantKind Variants[] = { - ARM64MCExpr::VK_ABS_G0_NC, ARM64MCExpr::VK_GOTTPREL_G0_NC, - ARM64MCExpr::VK_TPREL_G0_NC, ARM64MCExpr::VK_DTPREL_G0_NC - }; - return isMovWSymbol(Variants); - } - - bool isFPImm() const { return Kind == k_FPImm; } - bool isBarrier() const { return Kind == k_Barrier; } - bool isSystemRegister() const { - if (Kind == k_SystemRegister) - return true; - // SPSel is legal for both the system register and the CPSR-field - // variants of MSR, so special case that. Fugly. - return (Kind == k_CPSRField && getCPSRField() == ARM64SYS::cpsr_SPSel); - } - bool isSystemCPSRField() const { return Kind == k_CPSRField; } - bool isReg() const { return Kind == k_Register && !Reg.isVector; } - bool isVectorReg() const { return Kind == k_Register && Reg.isVector; } - - /// Is this a vector list with the type implicit (presumably attached to the - /// instruction itself)? - template bool isImplicitlyTypedVectorList() const { - return Kind == k_VectorList && VectorList.Count == NumRegs && - !VectorList.ElementKind; - } - - template - bool isTypedVectorList() const { - if (Kind != k_VectorList) - return false; - if (VectorList.Count != NumRegs) - return false; - if (VectorList.ElementKind != ElementKind) - return false; - return VectorList.NumElements == NumElements; - } - - bool isVectorIndexB() const { - return Kind == k_VectorIndex && VectorIndex.Val < 16; - } - bool isVectorIndexH() const { - return Kind == k_VectorIndex && VectorIndex.Val < 8; - } - bool isVectorIndexS() const { - return Kind == k_VectorIndex && VectorIndex.Val < 4; - } - bool isVectorIndexD() const { - return Kind == k_VectorIndex && VectorIndex.Val < 2; - } - bool isToken() const { return Kind == k_Token; } - bool isTokenEqual(StringRef Str) const { - return Kind == k_Token && getToken() == Str; - } - bool isMem() const { return Kind == k_Memory; } - bool isSysCR() const { return Kind == k_SysCR; } - bool isPrefetch() const { return Kind == k_Prefetch; } - bool isShifter() const { return Kind == k_Shifter; } - bool isExtend() const { - // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand. - if (isShifter()) { - ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val); - return ST == ARM64_AM::LSL; - } - return Kind == k_Extend; - } - bool isExtend64() const { - if (Kind != k_Extend) - return false; - // UXTX and SXTX require a 64-bit source register (the ExtendLSL64 class). - ARM64_AM::ExtendType ET = ARM64_AM::getArithExtendType(Extend.Val); - return ET != ARM64_AM::UXTX && ET != ARM64_AM::SXTX; - } - bool isExtendLSL64() const { - // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand. - if (isShifter()) { - ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val); - return ST == ARM64_AM::LSL; - } - if (Kind != k_Extend) - return false; - ARM64_AM::ExtendType ET = ARM64_AM::getArithExtendType(Extend.Val); - return ET == ARM64_AM::UXTX || ET == ARM64_AM::SXTX; - } - - bool isArithmeticShifter() const { - if (!isShifter()) - return false; - - // An arithmetic shifter is LSL, LSR, or ASR. - ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val); - return ST == ARM64_AM::LSL || ST == ARM64_AM::LSR || ST == ARM64_AM::ASR; - } - - bool isMovImm32Shifter() const { - if (!isShifter()) - return false; - - // A MOVi shifter is LSL of 0, 16, 32, or 48. - ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val); - if (ST != ARM64_AM::LSL) - return false; - uint64_t Val = ARM64_AM::getShiftValue(Shifter.Val); - return (Val == 0 || Val == 16); - } - - bool isMovImm64Shifter() const { - if (!isShifter()) - return false; - - // A MOVi shifter is LSL of 0 or 16. - ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val); - if (ST != ARM64_AM::LSL) - return false; - uint64_t Val = ARM64_AM::getShiftValue(Shifter.Val); - return (Val == 0 || Val == 16 || Val == 32 || Val == 48); - } - - bool isAddSubShifter() const { - if (!isShifter()) - return false; - - // An ADD/SUB shifter is either 'lsl #0' or 'lsl #12'. - unsigned Val = Shifter.Val; - return ARM64_AM::getShiftType(Val) == ARM64_AM::LSL && - (ARM64_AM::getShiftValue(Val) == 0 || - ARM64_AM::getShiftValue(Val) == 12); - } - - bool isLogicalVecShifter() const { - if (!isShifter()) - return false; - - // A logical vector shifter is a left shift by 0, 8, 16, or 24. - unsigned Val = Shifter.Val; - unsigned Shift = ARM64_AM::getShiftValue(Val); - return ARM64_AM::getShiftType(Val) == ARM64_AM::LSL && - (Shift == 0 || Shift == 8 || Shift == 16 || Shift == 24); - } - - bool isLogicalVecHalfWordShifter() const { - if (!isLogicalVecShifter()) - return false; - - // A logical vector shifter is a left shift by 0 or 8. - unsigned Val = Shifter.Val; - unsigned Shift = ARM64_AM::getShiftValue(Val); - return ARM64_AM::getShiftType(Val) == ARM64_AM::LSL && - (Shift == 0 || Shift == 8); - } - - bool isMoveVecShifter() const { - if (!isShifter()) - return false; - - // A logical vector shifter is a left shift by 8 or 16. - unsigned Val = Shifter.Val; - unsigned Shift = ARM64_AM::getShiftValue(Val); - return ARM64_AM::getShiftType(Val) == ARM64_AM::MSL && - (Shift == 8 || Shift == 16); - } - - bool isMemoryRegisterOffset8() const { - return isMem() && Mem.Mode == RegisterOffset && Mem.ShiftVal == 0; - } - - bool isMemoryRegisterOffset16() const { - return isMem() && Mem.Mode == RegisterOffset && - (Mem.ShiftVal == 0 || Mem.ShiftVal == 1); - } - - bool isMemoryRegisterOffset32() const { - return isMem() && Mem.Mode == RegisterOffset && - (Mem.ShiftVal == 0 || Mem.ShiftVal == 2); - } - - bool isMemoryRegisterOffset64() const { - return isMem() && Mem.Mode == RegisterOffset && - (Mem.ShiftVal == 0 || Mem.ShiftVal == 3); - } - - bool isMemoryRegisterOffset128() const { - return isMem() && Mem.Mode == RegisterOffset && - (Mem.ShiftVal == 0 || Mem.ShiftVal == 4); - } - - bool isMemoryUnscaled() const { - if (!isMem()) - return false; - if (Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - // Make sure the immediate value is valid. - const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm); - if (!CE) - return false; - // The offset must fit in a signed 9-bit unscaled immediate. - int64_t Value = CE->getValue(); - return (Value >= -256 && Value < 256); - } - // Fallback unscaled operands are for aliases of LDR/STR that fall back - // to LDUR/STUR when the offset is not legal for the former but is for - // the latter. As such, in addition to checking for being a legal unscaled - // address, also check that it is not a legal scaled address. This avoids - // ambiguity in the matcher. - bool isMemoryUnscaledFB8() const { - return isMemoryUnscaled() && !isMemoryIndexed8(); - } - bool isMemoryUnscaledFB16() const { - return isMemoryUnscaled() && !isMemoryIndexed16(); - } - bool isMemoryUnscaledFB32() const { - return isMemoryUnscaled() && !isMemoryIndexed32(); - } - bool isMemoryUnscaledFB64() const { - return isMemoryUnscaled() && !isMemoryIndexed64(); - } - bool isMemoryUnscaledFB128() const { - return isMemoryUnscaled() && !isMemoryIndexed128(); - } - bool isMemoryIndexed(unsigned Scale) const { - if (!isMem()) - return false; - if (Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - // Make sure the immediate value is valid. - const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm); - - if (CE) { - // The offset must be a positive multiple of the scale and in range of - // encoding with a 12-bit immediate. - int64_t Value = CE->getValue(); - return (Value >= 0 && (Value % Scale) == 0 && Value <= (4095 * Scale)); - } - - // If it's not a constant, check for some expressions we know. - const MCExpr *Expr = Mem.OffsetImm; - ARM64MCExpr::VariantKind ELFRefKind; - MCSymbolRefExpr::VariantKind DarwinRefKind; - const MCConstantExpr *Addend; - if (!ARM64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, - Addend)) { - // If we don't understand the expression, assume the best and - // let the fixup and relocation code deal with it. - return true; - } - - if (DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF || - ELFRefKind == ARM64MCExpr::VK_LO12 || - ELFRefKind == ARM64MCExpr::VK_GOT_LO12 || - ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12 || - ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_TPREL_LO12 || - ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_GOTTPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12) { - // Note that we don't range-check the addend. It's adjusted modulo page - // size when converted, so there is no "out of range" condition when using - // @pageoff. - int64_t Value = Addend ? Addend->getValue() : 0; - return Value >= 0 && (Value % Scale) == 0; - } else if (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF || - DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) { - // @gotpageoff/@tlvppageoff can only be used directly, not with an addend. - return Addend == 0; - } - - return false; - } - bool isMemoryIndexed128() const { return isMemoryIndexed(16); } - bool isMemoryIndexed64() const { return isMemoryIndexed(8); } - bool isMemoryIndexed32() const { return isMemoryIndexed(4); } - bool isMemoryIndexed16() const { return isMemoryIndexed(2); } - bool isMemoryIndexed8() const { return isMemoryIndexed(1); } - bool isMemoryNoIndex() const { - if (!isMem()) - return false; - if (Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - - // Make sure the immediate value is valid. Only zero is allowed. - const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm); - if (!CE || CE->getValue() != 0) - return false; - return true; - } - bool isMemorySIMDNoIndex() const { - if (!isMem()) - return false; - if (Mem.Mode != ImmediateOffset) - return false; - return Mem.OffsetImm == 0; - } - bool isMemoryIndexedSImm9() const { - if (!isMem() || Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm); - assert(CE && "Non-constant pre-indexed offset!"); - int64_t Value = CE->getValue(); - return Value >= -256 && Value <= 255; - } - bool isMemoryIndexed32SImm7() const { - if (!isMem() || Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm); - assert(CE && "Non-constant pre-indexed offset!"); - int64_t Value = CE->getValue(); - return ((Value % 4) == 0) && Value >= -256 && Value <= 252; - } - bool isMemoryIndexed64SImm7() const { - if (!isMem() || Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm); - assert(CE && "Non-constant pre-indexed offset!"); - int64_t Value = CE->getValue(); - return ((Value % 8) == 0) && Value >= -512 && Value <= 504; - } - bool isMemoryIndexed128SImm7() const { - if (!isMem() || Mem.Mode != ImmediateOffset) - return false; - if (!Mem.OffsetImm) - return true; - const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm); - assert(CE && "Non-constant pre-indexed offset!"); - int64_t Value = CE->getValue(); - return ((Value % 16) == 0) && Value >= -1024 && Value <= 1008; - } - - bool isAdrpLabel() const { - // Validation was handled during parsing, so we just sanity check that - // something didn't go haywire. - return isImm(); - } - - bool isAdrLabel() const { - // Validation was handled during parsing, so we just sanity check that - // something didn't go haywire. - return isImm(); - } - - void addExpr(MCInst &Inst, const MCExpr *Expr) const { - // Add as immediates when possible. Null MCExpr = 0. - if (Expr == 0) - Inst.addOperand(MCOperand::CreateImm(0)); - else if (const MCConstantExpr *CE = dyn_cast(Expr)) - Inst.addOperand(MCOperand::CreateImm(CE->getValue())); - else - Inst.addOperand(MCOperand::CreateExpr(Expr)); - } - - void addRegOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getReg())); - } - - void addVectorRegOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateReg(getReg())); - } - - template - void addVectorList64Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - static unsigned FirstRegs[] = { ARM64::D0, ARM64::D0_D1, - ARM64::D0_D1_D2, ARM64::D0_D1_D2_D3 }; - unsigned FirstReg = FirstRegs[NumRegs - 1]; - - Inst.addOperand( - MCOperand::CreateReg(FirstReg + getVectorListStart() - ARM64::Q0)); - } - - template - void addVectorList128Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - static unsigned FirstRegs[] = { ARM64::Q0, ARM64::Q0_Q1, - ARM64::Q0_Q1_Q2, ARM64::Q0_Q1_Q2_Q3 }; - unsigned FirstReg = FirstRegs[NumRegs - 1]; - - Inst.addOperand( - MCOperand::CreateReg(FirstReg + getVectorListStart() - ARM64::Q0)); - } - - void addVectorIndexBOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); - } - - void addVectorIndexHOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); - } - - void addVectorIndexSOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); - } - - void addVectorIndexDOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); - } - - void addImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // If this is a pageoff symrefexpr with an addend, adjust the addend - // to be only the page-offset portion. Otherwise, just add the expr - // as-is. - addExpr(Inst, getImm()); - } - - void addAdrpLabelOperands(MCInst &Inst, unsigned N) const { - addImmOperands(Inst, N); - } - - void addAdrLabelOperands(MCInst &Inst, unsigned N) const { - addImmOperands(Inst, N); - } - - void addSImm9Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addSImm7s4Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 4)); - } - - void addSImm7s8Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 8)); - } - - void addSImm7s16Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 16)); - } - - void addImm0_7Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm1_8Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm0_15Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm1_16Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm0_31Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm1_31Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm1_32Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm0_63Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm1_63Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm1_64Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm0_127Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm0_255Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addImm0_65535Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue())); - } - - void addLogicalImm32Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid logical immediate operand!"); - uint64_t encoding = ARM64_AM::encodeLogicalImmediate(MCE->getValue(), 32); - Inst.addOperand(MCOperand::CreateImm(encoding)); - } - - void addLogicalImm64Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid logical immediate operand!"); - uint64_t encoding = ARM64_AM::encodeLogicalImmediate(MCE->getValue(), 64); - Inst.addOperand(MCOperand::CreateImm(encoding)); - } - - void addSIMDImmType10Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - assert(MCE && "Invalid immediate operand!"); - uint64_t encoding = ARM64_AM::encodeAdvSIMDModImmType10(MCE->getValue()); - Inst.addOperand(MCOperand::CreateImm(encoding)); - } - - void addBranchTarget26Operands(MCInst &Inst, unsigned N) const { - // Branch operands don't encode the low bits, so shift them off - // here. If it's a label, however, just put it on directly as there's - // not enough information now to do anything. - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) { - addExpr(Inst, getImm()); - return; - } - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2)); - } - - void addBranchTarget19Operands(MCInst &Inst, unsigned N) const { - // Branch operands don't encode the low bits, so shift them off - // here. If it's a label, however, just put it on directly as there's - // not enough information now to do anything. - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) { - addExpr(Inst, getImm()); - return; - } - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2)); - } - - void addBranchTarget14Operands(MCInst &Inst, unsigned N) const { - // Branch operands don't encode the low bits, so shift them off - // here. If it's a label, however, just put it on directly as there's - // not enough information now to do anything. - assert(N == 1 && "Invalid number of operands!"); - const MCConstantExpr *MCE = dyn_cast(getImm()); - if (!MCE) { - addExpr(Inst, getImm()); - return; - } - assert(MCE && "Invalid constant immediate operand!"); - Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2)); - } - - void addFPImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getFPImm())); - } - - void addBarrierOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getBarrier())); - } - - void addSystemRegisterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - if (Kind == k_SystemRegister) - Inst.addOperand(MCOperand::CreateImm(getSystemRegister())); - else { - assert(Kind == k_CPSRField && getCPSRField() == ARM64SYS::cpsr_SPSel); - Inst.addOperand(MCOperand::CreateImm(ARM64SYS::SPSel)); - } - } - - void addSystemCPSRFieldOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getCPSRField())); - } - - void addSysCROperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getSysCR())); - } - - void addPrefetchOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getPrefetch())); - } - - void addShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getShifter())); - } - - void addArithmeticShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getShifter())); - } - - void addMovImm32ShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getShifter())); - } - - void addMovImm64ShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getShifter())); - } - - void addAddSubShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getShifter())); - } - - void addLogicalVecShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getShifter())); - } - - void addLogicalVecHalfWordShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getShifter())); - } - - void addMoveVecShifterOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getShifter())); - } - - void addExtendOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand. - if (isShifter()) { - assert(ARM64_AM::getShiftType(getShifter()) == ARM64_AM::LSL); - unsigned imm = getArithExtendImm(ARM64_AM::UXTX, - ARM64_AM::getShiftValue(getShifter())); - Inst.addOperand(MCOperand::CreateImm(imm)); - } else - Inst.addOperand(MCOperand::CreateImm(getExtend())); - } - - void addExtend64Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - Inst.addOperand(MCOperand::CreateImm(getExtend())); - } - - void addExtendLSL64Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand. - if (isShifter()) { - assert(ARM64_AM::getShiftType(getShifter()) == ARM64_AM::LSL); - unsigned imm = getArithExtendImm(ARM64_AM::UXTX, - ARM64_AM::getShiftValue(getShifter())); - Inst.addOperand(MCOperand::CreateImm(imm)); - } else - Inst.addOperand(MCOperand::CreateImm(getExtend())); - } - - void addMemoryRegisterOffsetOperands(MCInst &Inst, unsigned N, bool DoShift) { - assert(N == 3 && "Invalid number of operands!"); - - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - Inst.addOperand(MCOperand::CreateReg(Mem.OffsetRegNum)); - unsigned ExtendImm = ARM64_AM::getMemExtendImm(Mem.ExtType, DoShift); - Inst.addOperand(MCOperand::CreateImm(ExtendImm)); - } - - void addMemoryRegisterOffset8Operands(MCInst &Inst, unsigned N) { - addMemoryRegisterOffsetOperands(Inst, N, Mem.ExplicitShift); - } - - void addMemoryRegisterOffset16Operands(MCInst &Inst, unsigned N) { - addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 1); - } - - void addMemoryRegisterOffset32Operands(MCInst &Inst, unsigned N) { - addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 2); - } - - void addMemoryRegisterOffset64Operands(MCInst &Inst, unsigned N) { - addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 3); - } - - void addMemoryRegisterOffset128Operands(MCInst &Inst, unsigned N) { - addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 4); - } - - void addMemoryIndexedOperands(MCInst &Inst, unsigned N, - unsigned Scale) const { - // Add the base register operand. - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - - if (!Mem.OffsetImm) { - // There isn't an offset. - Inst.addOperand(MCOperand::CreateImm(0)); - return; - } - - // Add the offset operand. - if (const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm)) { - assert(CE->getValue() % Scale == 0 && - "Offset operand must be multiple of the scale!"); - - // The MCInst offset operand doesn't include the low bits (like the - // instruction encoding). - Inst.addOperand(MCOperand::CreateImm(CE->getValue() / Scale)); - } - - // If this is a pageoff symrefexpr with an addend, the linker will - // do the scaling of the addend. - // - // Otherwise we don't know what this is, so just add the scaling divide to - // the expression and let the MC fixup evaluation code deal with it. - const MCExpr *Expr = Mem.OffsetImm; - ARM64MCExpr::VariantKind ELFRefKind; - MCSymbolRefExpr::VariantKind DarwinRefKind; - const MCConstantExpr *Addend; - if (Scale > 1 && - (!ARM64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, - Addend) || - (Addend != 0 && DarwinRefKind != MCSymbolRefExpr::VK_PAGEOFF))) { - Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(Scale, Ctx), - Ctx); - } - - Inst.addOperand(MCOperand::CreateExpr(Expr)); - } - - void addMemoryUnscaledOperands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryUnscaled() && "Invalid number of operands!"); - // Add the base register operand. - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - - // Add the offset operand. - if (!Mem.OffsetImm) - Inst.addOperand(MCOperand::CreateImm(0)); - else { - // Only constant offsets supported. - const MCConstantExpr *CE = cast(Mem.OffsetImm); - Inst.addOperand(MCOperand::CreateImm(CE->getValue())); - } - } - - void addMemoryIndexed128Operands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryIndexed128() && "Invalid number of operands!"); - addMemoryIndexedOperands(Inst, N, 16); - } - - void addMemoryIndexed64Operands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryIndexed64() && "Invalid number of operands!"); - addMemoryIndexedOperands(Inst, N, 8); - } - - void addMemoryIndexed32Operands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryIndexed32() && "Invalid number of operands!"); - addMemoryIndexedOperands(Inst, N, 4); - } - - void addMemoryIndexed16Operands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryIndexed16() && "Invalid number of operands!"); - addMemoryIndexedOperands(Inst, N, 2); - } - - void addMemoryIndexed8Operands(MCInst &Inst, unsigned N) const { - assert(N == 2 && isMemoryIndexed8() && "Invalid number of operands!"); - addMemoryIndexedOperands(Inst, N, 1); - } - - void addMemoryNoIndexOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && isMemoryNoIndex() && "Invalid number of operands!"); - // Add the base register operand (the offset is always zero, so ignore it). - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - } - - void addMemorySIMDNoIndexOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && isMemorySIMDNoIndex() && "Invalid number of operands!"); - // Add the base register operand (the offset is always zero, so ignore it). - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - } - - void addMemoryWritebackIndexedOperands(MCInst &Inst, unsigned N, - unsigned Scale) const { - assert(N == 2 && "Invalid number of operands!"); - - // Add the base register operand. - Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum)); - - // Add the offset operand. - int64_t Offset = 0; - if (Mem.OffsetImm) { - const MCConstantExpr *CE = dyn_cast(Mem.OffsetImm); - assert(CE && "Non-constant indexed offset operand!"); - Offset = CE->getValue(); - } - - if (Scale != 1) { - assert(Offset % Scale == 0 && - "Offset operand must be a multiple of the scale!"); - Offset /= Scale; - } - - Inst.addOperand(MCOperand::CreateImm(Offset)); - } - - void addMemoryIndexedSImm9Operands(MCInst &Inst, unsigned N) const { - addMemoryWritebackIndexedOperands(Inst, N, 1); - } - - void addMemoryIndexed32SImm7Operands(MCInst &Inst, unsigned N) const { - addMemoryWritebackIndexedOperands(Inst, N, 4); - } - - void addMemoryIndexed64SImm7Operands(MCInst &Inst, unsigned N) const { - addMemoryWritebackIndexedOperands(Inst, N, 8); - } - - void addMemoryIndexed128SImm7Operands(MCInst &Inst, unsigned N) const { - addMemoryWritebackIndexedOperands(Inst, N, 16); - } - - virtual void print(raw_ostream &OS) const; - - static ARM64Operand *CreateToken(StringRef Str, bool IsSuffix, SMLoc S, - MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Token, Ctx); - Op->Tok.Data = Str.data(); - Op->Tok.Length = Str.size(); - Op->Tok.IsSuffix = IsSuffix; - Op->StartLoc = S; - Op->EndLoc = S; - return Op; - } - - static ARM64Operand *CreateReg(unsigned RegNum, bool isVector, SMLoc S, - SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Register, Ctx); - Op->Reg.RegNum = RegNum; - Op->Reg.isVector = isVector; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static ARM64Operand *CreateVectorList(unsigned RegNum, unsigned Count, - unsigned NumElements, char ElementKind, - SMLoc S, SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_VectorList, Ctx); - Op->VectorList.RegNum = RegNum; - Op->VectorList.Count = Count; - Op->VectorList.NumElements = NumElements; - Op->VectorList.ElementKind = ElementKind; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static ARM64Operand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, - MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_VectorIndex, Ctx); - Op->VectorIndex.Val = Idx; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static ARM64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, - MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Immediate, Ctx); - Op->Imm.Val = Val; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static ARM64Operand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_FPImm, Ctx); - Op->FPImm.Val = Val; - Op->StartLoc = S; - Op->EndLoc = S; - return Op; - } - - static ARM64Operand *CreateBarrier(unsigned Val, SMLoc S, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Barrier, Ctx); - Op->Barrier.Val = Val; - Op->StartLoc = S; - Op->EndLoc = S; - return Op; - } - - static ARM64Operand *CreateSystemRegister(uint16_t Val, SMLoc S, - MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_SystemRegister, Ctx); - Op->SystemRegister.Val = Val; - Op->StartLoc = S; - Op->EndLoc = S; - return Op; - } - - static ARM64Operand *CreateCPSRField(ARM64SYS::CPSRField Field, SMLoc S, - MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_CPSRField, Ctx); - Op->CPSRField.Field = Field; - Op->StartLoc = S; - Op->EndLoc = S; - return Op; - } - - static ARM64Operand *CreateMem(unsigned BaseRegNum, const MCExpr *Off, - SMLoc S, SMLoc E, SMLoc OffsetLoc, - MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Memory, Ctx); - Op->Mem.BaseRegNum = BaseRegNum; - Op->Mem.OffsetRegNum = 0; - Op->Mem.OffsetImm = Off; - Op->Mem.ExtType = ARM64_AM::UXTX; - Op->Mem.ShiftVal = 0; - Op->Mem.ExplicitShift = false; - Op->Mem.Mode = ImmediateOffset; - Op->OffsetLoc = OffsetLoc; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static ARM64Operand *CreateRegOffsetMem(unsigned BaseReg, unsigned OffsetReg, - ARM64_AM::ExtendType ExtType, - unsigned ShiftVal, bool ExplicitShift, - SMLoc S, SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Memory, Ctx); - Op->Mem.BaseRegNum = BaseReg; - Op->Mem.OffsetRegNum = OffsetReg; - Op->Mem.OffsetImm = 0; - Op->Mem.ExtType = ExtType; - Op->Mem.ShiftVal = ShiftVal; - Op->Mem.ExplicitShift = ExplicitShift; - Op->Mem.Mode = RegisterOffset; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static ARM64Operand *CreateSysCR(unsigned Val, SMLoc S, SMLoc E, - MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_SysCR, Ctx); - Op->SysCRImm.Val = Val; - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static ARM64Operand *CreatePrefetch(unsigned Val, SMLoc S, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Prefetch, Ctx); - Op->Prefetch.Val = Val; - Op->StartLoc = S; - Op->EndLoc = S; - return Op; - } - - static ARM64Operand *CreateShifter(ARM64_AM::ShiftType ShOp, unsigned Val, - SMLoc S, SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Shifter, Ctx); - Op->Shifter.Val = ARM64_AM::getShifterImm(ShOp, Val); - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } - - static ARM64Operand *CreateExtend(ARM64_AM::ExtendType ExtOp, unsigned Val, - SMLoc S, SMLoc E, MCContext &Ctx) { - ARM64Operand *Op = new ARM64Operand(k_Extend, Ctx); - Op->Extend.Val = ARM64_AM::getArithExtendImm(ExtOp, Val); - Op->StartLoc = S; - Op->EndLoc = E; - return Op; - } -}; - -} // end anonymous namespace. - -void ARM64Operand::print(raw_ostream &OS) const { - switch (Kind) { - case k_FPImm: - OS << ""; - break; - case k_Barrier: { - const char *Name = - ARM64SYS::getBarrierOptName((ARM64SYS::BarrierOption)getBarrier()); - OS << ""; - break; - } - case k_SystemRegister: { - const char *Name = ARM64SYS::getSystemRegisterName( - (ARM64SYS::SystemRegister)getSystemRegister()); - OS << ""; - break; - } - case k_CPSRField: { - const char *Name = ARM64SYS::getCPSRFieldName(getCPSRField()); - OS << ""; - break; - } - case k_Immediate: - getImm()->print(OS); - break; - case k_Memory: - OS << ""; - break; - case k_Register: - OS << ""; - break; - case k_VectorList: { - OS << ""; - break; - } - case k_VectorIndex: - OS << ""; - break; - case k_Token: - OS << "'" << getToken() << "'"; - break; - case k_SysCR: - OS << "c" << getSysCR(); - break; - case k_Prefetch: - OS << ""; - break; - case k_Shifter: { - unsigned Val = getShifter(); - OS << "<" << ARM64_AM::getShiftName(ARM64_AM::getShiftType(Val)) << " #" - << ARM64_AM::getShiftValue(Val) << ">"; - break; - } - case k_Extend: { - unsigned Val = getExtend(); - OS << "<" << ARM64_AM::getExtendName(ARM64_AM::getArithExtendType(Val)) - << " #" << ARM64_AM::getArithShiftValue(Val) << ">"; - break; - } - } -} - -/// @name Auto-generated Match Functions -/// { - -static unsigned MatchRegisterName(StringRef Name); - -/// } - -static unsigned matchVectorRegName(StringRef Name) { - return StringSwitch(Name) - .Case("v0", ARM64::Q0) - .Case("v1", ARM64::Q1) - .Case("v2", ARM64::Q2) - .Case("v3", ARM64::Q3) - .Case("v4", ARM64::Q4) - .Case("v5", ARM64::Q5) - .Case("v6", ARM64::Q6) - .Case("v7", ARM64::Q7) - .Case("v8", ARM64::Q8) - .Case("v9", ARM64::Q9) - .Case("v10", ARM64::Q10) - .Case("v11", ARM64::Q11) - .Case("v12", ARM64::Q12) - .Case("v13", ARM64::Q13) - .Case("v14", ARM64::Q14) - .Case("v15", ARM64::Q15) - .Case("v16", ARM64::Q16) - .Case("v17", ARM64::Q17) - .Case("v18", ARM64::Q18) - .Case("v19", ARM64::Q19) - .Case("v20", ARM64::Q20) - .Case("v21", ARM64::Q21) - .Case("v22", ARM64::Q22) - .Case("v23", ARM64::Q23) - .Case("v24", ARM64::Q24) - .Case("v25", ARM64::Q25) - .Case("v26", ARM64::Q26) - .Case("v27", ARM64::Q27) - .Case("v28", ARM64::Q28) - .Case("v29", ARM64::Q29) - .Case("v30", ARM64::Q30) - .Case("v31", ARM64::Q31) - .Default(0); -} - -static bool isValidVectorKind(StringRef Name) { - return StringSwitch(Name.lower()) - .Case(".8b", true) - .Case(".16b", true) - .Case(".4h", true) - .Case(".8h", true) - .Case(".2s", true) - .Case(".4s", true) - .Case(".1d", true) - .Case(".2d", true) - .Case(".1q", true) - // Accept the width neutral ones, too, for verbose syntax. If those - // aren't used in the right places, the token operand won't match so - // all will work out. - .Case(".b", true) - .Case(".h", true) - .Case(".s", true) - .Case(".d", true) - .Default(false); -} - -static void parseValidVectorKind(StringRef Name, unsigned &NumElements, - char &ElementKind) { - assert(isValidVectorKind(Name)); - - ElementKind = Name.lower()[Name.size() - 1]; - NumElements = 0; - - if (Name.size() == 2) - return; - - // Parse the lane count - Name = Name.drop_front(); - while (isdigit(Name.front())) { - NumElements = 10 * NumElements + (Name.front() - '0'); - Name = Name.drop_front(); - } -} - -bool ARM64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, - SMLoc &EndLoc) { - StartLoc = getLoc(); - RegNo = tryParseRegister(); - EndLoc = SMLoc::getFromPointer(getLoc().getPointer() - 1); - return (RegNo == (unsigned)-1); -} - -/// tryParseRegister - Try to parse a register name. The token must be an -/// Identifier when called, and if it is a register name the token is eaten and -/// the register is added to the operand list. -int ARM64AsmParser::tryParseRegister() { - const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); - - std::string lowerCase = Tok.getString().lower(); - unsigned RegNum = MatchRegisterName(lowerCase); - // Also handle a few aliases of registers. - if (RegNum == 0) - RegNum = StringSwitch(lowerCase) - .Case("x29", ARM64::FP) - .Case("x30", ARM64::LR) - .Case("x31", ARM64::XZR) - .Case("w31", ARM64::WZR) - .Default(0); - - if (RegNum == 0) - return -1; - - Parser.Lex(); // Eat identifier token. - return RegNum; -} - -/// tryMatchVectorRegister - Try to parse a vector register name with optional -/// kind specifier. If it is a register specifier, eat the token and return it. -int ARM64AsmParser::tryMatchVectorRegister(StringRef &Kind) { - if (Parser.getTok().isNot(AsmToken::Identifier)) { - TokError("vector register expected"); - return -1; - } - - StringRef Name = Parser.getTok().getString(); - // If there is a kind specifier, it's separated from the register name by - // a '.'. - size_t Start = 0, Next = Name.find('.'); - StringRef Head = Name.slice(Start, Next); - unsigned RegNum = matchVectorRegName(Head); - if (RegNum) { - if (Next != StringRef::npos) { - Kind = Name.slice(Next, StringRef::npos); - if (!isValidVectorKind(Kind)) { - TokError("invalid vector kind qualifier"); - return -1; - } - } - Parser.Lex(); // Eat the register token. - return RegNum; - } - return -1; -} - -static int MatchSysCRName(StringRef Name) { - // Use the same layout as the tablegen'erated register name matcher. Ugly, - // but efficient. - switch (Name.size()) { - default: - break; - case 2: - if (Name[0] != 'c' && Name[0] != 'C') - return -1; - switch (Name[1]) { - default: - return -1; - case '0': - return 0; - case '1': - return 1; - case '2': - return 2; - case '3': - return 3; - case '4': - return 4; - case '5': - return 5; - case '6': - return 6; - case '7': - return 7; - case '8': - return 8; - case '9': - return 9; - } - break; - case 3: - if ((Name[0] != 'c' && Name[0] != 'C') || Name[1] != '1') - return -1; - switch (Name[2]) { - default: - return -1; - case '0': - return 10; - case '1': - return 11; - case '2': - return 12; - case '3': - return 13; - case '4': - return 14; - case '5': - return 15; - } - break; - } - - llvm_unreachable("Unhandled SysCR operand string!"); - return -1; -} - -/// tryParseSysCROperand - Try to parse a system instruction CR operand name. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseSysCROperand(OperandVector &Operands) { - SMLoc S = getLoc(); - const AsmToken &Tok = Parser.getTok(); - if (Tok.isNot(AsmToken::Identifier)) - return MatchOperand_NoMatch; - - int Num = MatchSysCRName(Tok.getString()); - if (Num == -1) - return MatchOperand_NoMatch; - - Parser.Lex(); // Eat identifier token. - Operands.push_back(ARM64Operand::CreateSysCR(Num, S, getLoc(), getContext())); - return MatchOperand_Success; -} - -/// tryParsePrefetch - Try to parse a prefetch operand. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParsePrefetch(OperandVector &Operands) { - SMLoc S = getLoc(); - const AsmToken &Tok = Parser.getTok(); - // Either an identifier for named values or a 5-bit immediate. - if (Tok.is(AsmToken::Hash)) { - Parser.Lex(); // Eat hash token. - const MCExpr *ImmVal; - if (getParser().parseExpression(ImmVal)) - return MatchOperand_ParseFail; - - const MCConstantExpr *MCE = dyn_cast(ImmVal); - if (!MCE) { - TokError("immediate value expected for prefetch operand"); - return MatchOperand_ParseFail; - } - unsigned prfop = MCE->getValue(); - if (prfop > 31) { - TokError("prefetch operand out of range, [0,31] expected"); - return MatchOperand_ParseFail; - } - - Operands.push_back(ARM64Operand::CreatePrefetch(prfop, S, getContext())); - return MatchOperand_Success; - } - - if (Tok.isNot(AsmToken::Identifier)) { - TokError("pre-fetch hint expected"); - return MatchOperand_ParseFail; - } - - unsigned prfop = StringSwitch(Tok.getString()) - .Case("pldl1keep", ARM64_AM::PLDL1KEEP) - .Case("pldl1strm", ARM64_AM::PLDL1STRM) - .Case("pldl2keep", ARM64_AM::PLDL2KEEP) - .Case("pldl2strm", ARM64_AM::PLDL2STRM) - .Case("pldl3keep", ARM64_AM::PLDL3KEEP) - .Case("pldl3strm", ARM64_AM::PLDL3STRM) - .Case("pstl1keep", ARM64_AM::PSTL1KEEP) - .Case("pstl1strm", ARM64_AM::PSTL1STRM) - .Case("pstl2keep", ARM64_AM::PSTL2KEEP) - .Case("pstl2strm", ARM64_AM::PSTL2STRM) - .Case("pstl3keep", ARM64_AM::PSTL3KEEP) - .Case("pstl3strm", ARM64_AM::PSTL3STRM) - .Default(0xff); - if (prfop == 0xff) { - TokError("pre-fetch hint expected"); - return MatchOperand_ParseFail; - } - - Parser.Lex(); // Eat identifier token. - Operands.push_back(ARM64Operand::CreatePrefetch(prfop, S, getContext())); - return MatchOperand_Success; -} - -/// tryParseAdrpLabel - Parse and validate a source label for the ADRP -/// instruction. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseAdrpLabel(OperandVector &Operands) { - SMLoc S = getLoc(); - const MCExpr *Expr; - if (parseSymbolicImmVal(Expr)) - return MatchOperand_ParseFail; - - ARM64MCExpr::VariantKind ELFRefKind; - MCSymbolRefExpr::VariantKind DarwinRefKind; - const MCConstantExpr *Addend; - if (!classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) { - Error(S, "modified label reference + constant expected"); - return MatchOperand_ParseFail; - } - - if (DarwinRefKind == MCSymbolRefExpr::VK_None && - ELFRefKind == ARM64MCExpr::VK_INVALID) { - // No modifier was specified at all; this is the syntax for an ELF basic - // ADRP relocation (unfortunately). - Expr = ARM64MCExpr::Create(Expr, ARM64MCExpr::VK_ABS_PAGE, getContext()); - } else if ((DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGE || - DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGE) && - Addend != 0) { - Error(S, "gotpage label reference not allowed an addend"); - return MatchOperand_ParseFail; - } else if (DarwinRefKind != MCSymbolRefExpr::VK_PAGE && - DarwinRefKind != MCSymbolRefExpr::VK_GOTPAGE && - DarwinRefKind != MCSymbolRefExpr::VK_TLVPPAGE && - ELFRefKind != ARM64MCExpr::VK_GOT_PAGE && - ELFRefKind != ARM64MCExpr::VK_GOTTPREL_PAGE && - ELFRefKind != ARM64MCExpr::VK_TLSDESC_PAGE) { - // The operand must be an @page or @gotpage qualified symbolref. - Error(S, "page or gotpage label reference expected"); - return MatchOperand_ParseFail; - } - - // We have a label reference possibly with addend. The addend is a raw value - // here. The linker will adjust it to only reference the page. - SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext())); - - return MatchOperand_Success; -} - -/// tryParseAdrLabel - Parse and validate a source label for the ADR -/// instruction. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseAdrLabel(OperandVector &Operands) { - SMLoc S = getLoc(); - const MCExpr *Expr; - if (getParser().parseExpression(Expr)) - return MatchOperand_ParseFail; - - // The operand must be an un-qualified assembler local symbolref. - // FIXME: wrong for ELF. - if (const MCSymbolRefExpr *SRE = dyn_cast(Expr)) { - // FIXME: Should reference the MachineAsmInfo to get the private prefix. - bool isTemporary = SRE->getSymbol().getName().startswith("L"); - if (!isTemporary || SRE->getKind() != MCSymbolRefExpr::VK_None) { - Error(S, "unqualified, assembler-local label name expected"); - return MatchOperand_ParseFail; - } - } - - SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext())); - - return MatchOperand_Success; -} - -/// tryParseFPImm - A floating point immediate expression operand. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseFPImm(OperandVector &Operands) { - SMLoc S = getLoc(); - - if (Parser.getTok().isNot(AsmToken::Hash)) - return MatchOperand_NoMatch; - Parser.Lex(); // Eat the '#'. - - // Handle negation, as that still comes through as a separate token. - bool isNegative = false; - if (Parser.getTok().is(AsmToken::Minus)) { - isNegative = true; - Parser.Lex(); - } - const AsmToken &Tok = Parser.getTok(); - if (Tok.is(AsmToken::Real)) { - APFloat RealVal(APFloat::IEEEdouble, Tok.getString()); - uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); - // If we had a '-' in front, toggle the sign bit. - IntVal ^= (uint64_t)isNegative << 63; - int Val = ARM64_AM::getFP64Imm(APInt(64, IntVal)); - Parser.Lex(); // Eat the token. - // Check for out of range values. As an exception, we let Zero through, - // as we handle that special case in post-processing before matching in - // order to use the zero register for it. - if (Val == -1 && !RealVal.isZero()) { - TokError("floating point value out of range"); - return MatchOperand_ParseFail; - } - Operands.push_back(ARM64Operand::CreateFPImm(Val, S, getContext())); - return MatchOperand_Success; - } - if (Tok.is(AsmToken::Integer)) { - int64_t Val; - if (!isNegative && Tok.getString().startswith("0x")) { - Val = Tok.getIntVal(); - if (Val > 255 || Val < 0) { - TokError("encoded floating point value out of range"); - return MatchOperand_ParseFail; - } - } else { - APFloat RealVal(APFloat::IEEEdouble, Tok.getString()); - uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); - // If we had a '-' in front, toggle the sign bit. - IntVal ^= (uint64_t)isNegative << 63; - Val = ARM64_AM::getFP64Imm(APInt(64, IntVal)); - } - Parser.Lex(); // Eat the token. - Operands.push_back(ARM64Operand::CreateFPImm(Val, S, getContext())); - return MatchOperand_Success; - } - - TokError("invalid floating point immediate"); - return MatchOperand_ParseFail; -} - -/// parseCondCodeString - Parse a Condition Code string. -unsigned ARM64AsmParser::parseCondCodeString(StringRef Cond) { - unsigned CC = StringSwitch(Cond) - .Case("eq", ARM64CC::EQ) - .Case("ne", ARM64CC::NE) - .Case("cs", ARM64CC::CS) - .Case("hs", ARM64CC::CS) - .Case("cc", ARM64CC::CC) - .Case("lo", ARM64CC::CC) - .Case("mi", ARM64CC::MI) - .Case("pl", ARM64CC::PL) - .Case("vs", ARM64CC::VS) - .Case("vc", ARM64CC::VC) - .Case("hi", ARM64CC::HI) - .Case("ls", ARM64CC::LS) - .Case("ge", ARM64CC::GE) - .Case("lt", ARM64CC::LT) - .Case("gt", ARM64CC::GT) - .Case("le", ARM64CC::LE) - .Case("al", ARM64CC::AL) - // Upper case works too. Not mixed case, though. - .Case("EQ", ARM64CC::EQ) - .Case("NE", ARM64CC::NE) - .Case("CS", ARM64CC::CS) - .Case("HS", ARM64CC::CS) - .Case("CC", ARM64CC::CC) - .Case("LO", ARM64CC::CC) - .Case("MI", ARM64CC::MI) - .Case("PL", ARM64CC::PL) - .Case("VS", ARM64CC::VS) - .Case("VC", ARM64CC::VC) - .Case("HI", ARM64CC::HI) - .Case("LS", ARM64CC::LS) - .Case("GE", ARM64CC::GE) - .Case("LT", ARM64CC::LT) - .Case("GT", ARM64CC::GT) - .Case("LE", ARM64CC::LE) - .Case("AL", ARM64CC::AL) - .Default(~0U); - return CC; -} - -/// parseCondCode - Parse a Condition Code operand. -bool ARM64AsmParser::parseCondCode(OperandVector &Operands, - bool invertCondCode) { - SMLoc S = getLoc(); - const AsmToken &Tok = Parser.getTok(); - assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); - - StringRef Cond = Tok.getString(); - unsigned CC = parseCondCodeString(Cond); - if (CC == ~0U) - return TokError("invalid condition code"); - Parser.Lex(); // Eat identifier token. - - if (invertCondCode) - CC = ARM64CC::getInvertedCondCode(ARM64CC::CondCode(CC)); - - const MCExpr *CCExpr = MCConstantExpr::Create(CC, getContext()); - Operands.push_back( - ARM64Operand::CreateImm(CCExpr, S, getLoc(), getContext())); - return false; -} - -/// ParseOptionalShift - Some operands take an optional shift argument. Parse -/// them if present. -bool ARM64AsmParser::parseOptionalShift(OperandVector &Operands) { - const AsmToken &Tok = Parser.getTok(); - ARM64_AM::ShiftType ShOp = StringSwitch(Tok.getString()) - .Case("lsl", ARM64_AM::LSL) - .Case("lsr", ARM64_AM::LSR) - .Case("asr", ARM64_AM::ASR) - .Case("ror", ARM64_AM::ROR) - .Case("msl", ARM64_AM::MSL) - .Case("LSL", ARM64_AM::LSL) - .Case("LSR", ARM64_AM::LSR) - .Case("ASR", ARM64_AM::ASR) - .Case("ROR", ARM64_AM::ROR) - .Case("MSL", ARM64_AM::MSL) - .Default(ARM64_AM::InvalidShift); - if (ShOp == ARM64_AM::InvalidShift) - return true; - - SMLoc S = Tok.getLoc(); - Parser.Lex(); - - // We expect a number here. - if (getLexer().isNot(AsmToken::Hash)) - return TokError("immediate value expected for shifter operand"); - Parser.Lex(); // Eat the '#'. - - SMLoc ExprLoc = getLoc(); - const MCExpr *ImmVal; - if (getParser().parseExpression(ImmVal)) - return true; - - const MCConstantExpr *MCE = dyn_cast(ImmVal); - if (!MCE) - return TokError("immediate value expected for shifter operand"); - - if ((MCE->getValue() & 0x3f) != MCE->getValue()) - return Error(ExprLoc, "immediate value too large for shifter operand"); - - SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back( - ARM64Operand::CreateShifter(ShOp, MCE->getValue(), S, E, getContext())); - return false; -} - -/// parseOptionalExtend - Some operands take an optional extend argument. Parse -/// them if present. -bool ARM64AsmParser::parseOptionalExtend(OperandVector &Operands) { - const AsmToken &Tok = Parser.getTok(); - ARM64_AM::ExtendType ExtOp = - StringSwitch(Tok.getString()) - .Case("uxtb", ARM64_AM::UXTB) - .Case("uxth", ARM64_AM::UXTH) - .Case("uxtw", ARM64_AM::UXTW) - .Case("uxtx", ARM64_AM::UXTX) - .Case("lsl", ARM64_AM::UXTX) // Alias for UXTX - .Case("sxtb", ARM64_AM::SXTB) - .Case("sxth", ARM64_AM::SXTH) - .Case("sxtw", ARM64_AM::SXTW) - .Case("sxtx", ARM64_AM::SXTX) - .Case("UXTB", ARM64_AM::UXTB) - .Case("UXTH", ARM64_AM::UXTH) - .Case("UXTW", ARM64_AM::UXTW) - .Case("UXTX", ARM64_AM::UXTX) - .Case("LSL", ARM64_AM::UXTX) // Alias for UXTX - .Case("SXTB", ARM64_AM::SXTB) - .Case("SXTH", ARM64_AM::SXTH) - .Case("SXTW", ARM64_AM::SXTW) - .Case("SXTX", ARM64_AM::SXTX) - .Default(ARM64_AM::InvalidExtend); - if (ExtOp == ARM64_AM::InvalidExtend) - return true; - - SMLoc S = Tok.getLoc(); - Parser.Lex(); - - if (getLexer().is(AsmToken::EndOfStatement) || - getLexer().is(AsmToken::Comma)) { - SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back( - ARM64Operand::CreateExtend(ExtOp, 0, S, E, getContext())); - return false; - } - - if (getLexer().isNot(AsmToken::Hash)) { - SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back( - ARM64Operand::CreateExtend(ExtOp, 0, S, E, getContext())); - return false; - } - - Parser.Lex(); // Eat the '#'. - - const MCExpr *ImmVal; - if (getParser().parseExpression(ImmVal)) - return true; - - const MCConstantExpr *MCE = dyn_cast(ImmVal); - if (!MCE) - return TokError("immediate value expected for extend operand"); - - SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back( - ARM64Operand::CreateExtend(ExtOp, MCE->getValue(), S, E, getContext())); - return false; -} - -/// parseSysAlias - The IC, DC, AT, and TLBI instructions are simple aliases for -/// the SYS instruction. Parse them specially so that we create a SYS MCInst. -bool ARM64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, - OperandVector &Operands) { - if (Name.find('.') != StringRef::npos) - return TokError("invalid operand"); - - Mnemonic = Name; - Operands.push_back( - ARM64Operand::CreateToken("sys", false, NameLoc, getContext())); - - const AsmToken &Tok = Parser.getTok(); - StringRef Op = Tok.getString(); - SMLoc S = Tok.getLoc(); - - const MCExpr *Expr = 0; - -#define SYS_ALIAS(op1, Cn, Cm, op2) \ - do { \ - Expr = MCConstantExpr::Create(op1, getContext()); \ - Operands.push_back( \ - ARM64Operand::CreateImm(Expr, S, getLoc(), getContext())); \ - Operands.push_back( \ - ARM64Operand::CreateSysCR(Cn, S, getLoc(), getContext())); \ - Operands.push_back( \ - ARM64Operand::CreateSysCR(Cm, S, getLoc(), getContext())); \ - Expr = MCConstantExpr::Create(op2, getContext()); \ - Operands.push_back( \ - ARM64Operand::CreateImm(Expr, S, getLoc(), getContext())); \ - } while (0) - - if (Mnemonic == "ic") { - if (!Op.compare_lower("ialluis")) { - // SYS #0, C7, C1, #0 - SYS_ALIAS(0, 7, 1, 0); - } else if (!Op.compare_lower("iallu")) { - // SYS #0, C7, C5, #0 - SYS_ALIAS(0, 7, 5, 0); - } else if (!Op.compare_lower("ivau")) { - // SYS #3, C7, C5, #1 - SYS_ALIAS(3, 7, 5, 1); - } else { - return TokError("invalid operand for IC instruction"); - } - } else if (Mnemonic == "dc") { - if (!Op.compare_lower("zva")) { - // SYS #3, C7, C4, #1 - SYS_ALIAS(3, 7, 4, 1); - } else if (!Op.compare_lower("ivac")) { - // SYS #3, C7, C6, #1 - SYS_ALIAS(0, 7, 6, 1); - } else if (!Op.compare_lower("isw")) { - // SYS #0, C7, C6, #2 - SYS_ALIAS(0, 7, 6, 2); - } else if (!Op.compare_lower("cvac")) { - // SYS #3, C7, C10, #1 - SYS_ALIAS(3, 7, 10, 1); - } else if (!Op.compare_lower("csw")) { - // SYS #0, C7, C10, #2 - SYS_ALIAS(0, 7, 10, 2); - } else if (!Op.compare_lower("cvau")) { - // SYS #3, C7, C11, #1 - SYS_ALIAS(3, 7, 11, 1); - } else if (!Op.compare_lower("civac")) { - // SYS #3, C7, C14, #1 - SYS_ALIAS(3, 7, 14, 1); - } else if (!Op.compare_lower("cisw")) { - // SYS #0, C7, C14, #2 - SYS_ALIAS(0, 7, 14, 2); - } else { - return TokError("invalid operand for DC instruction"); - } - } else if (Mnemonic == "at") { - if (!Op.compare_lower("s1e1r")) { - // SYS #0, C7, C8, #0 - SYS_ALIAS(0, 7, 8, 0); - } else if (!Op.compare_lower("s1e2r")) { - // SYS #4, C7, C8, #0 - SYS_ALIAS(4, 7, 8, 0); - } else if (!Op.compare_lower("s1e3r")) { - // SYS #6, C7, C8, #0 - SYS_ALIAS(6, 7, 8, 0); - } else if (!Op.compare_lower("s1e1w")) { - // SYS #0, C7, C8, #1 - SYS_ALIAS(0, 7, 8, 1); - } else if (!Op.compare_lower("s1e2w")) { - // SYS #4, C7, C8, #1 - SYS_ALIAS(4, 7, 8, 1); - } else if (!Op.compare_lower("s1e3w")) { - // SYS #6, C7, C8, #1 - SYS_ALIAS(6, 7, 8, 1); - } else if (!Op.compare_lower("s1e0r")) { - // SYS #0, C7, C8, #3 - SYS_ALIAS(0, 7, 8, 2); - } else if (!Op.compare_lower("s1e0w")) { - // SYS #0, C7, C8, #3 - SYS_ALIAS(0, 7, 8, 3); - } else if (!Op.compare_lower("s12e1r")) { - // SYS #4, C7, C8, #4 - SYS_ALIAS(4, 7, 8, 4); - } else if (!Op.compare_lower("s12e1w")) { - // SYS #4, C7, C8, #5 - SYS_ALIAS(4, 7, 8, 5); - } else if (!Op.compare_lower("s12e0r")) { - // SYS #4, C7, C8, #6 - SYS_ALIAS(4, 7, 8, 6); - } else if (!Op.compare_lower("s12e0w")) { - // SYS #4, C7, C8, #7 - SYS_ALIAS(4, 7, 8, 7); - } else { - return TokError("invalid operand for AT instruction"); - } - } else if (Mnemonic == "tlbi") { - if (!Op.compare_lower("vmalle1is")) { - // SYS #0, C8, C3, #0 - SYS_ALIAS(0, 8, 3, 0); - } else if (!Op.compare_lower("alle2is")) { - // SYS #4, C8, C3, #0 - SYS_ALIAS(4, 8, 3, 0); - } else if (!Op.compare_lower("alle3is")) { - // SYS #6, C8, C3, #0 - SYS_ALIAS(6, 8, 3, 0); - } else if (!Op.compare_lower("vae1is")) { - // SYS #0, C8, C3, #1 - SYS_ALIAS(0, 8, 3, 1); - } else if (!Op.compare_lower("vae2is")) { - // SYS #4, C8, C3, #1 - SYS_ALIAS(4, 8, 3, 1); - } else if (!Op.compare_lower("vae3is")) { - // SYS #6, C8, C3, #1 - SYS_ALIAS(6, 8, 3, 1); - } else if (!Op.compare_lower("aside1is")) { - // SYS #0, C8, C3, #2 - SYS_ALIAS(0, 8, 3, 2); - } else if (!Op.compare_lower("vaae1is")) { - // SYS #0, C8, C3, #3 - SYS_ALIAS(0, 8, 3, 3); - } else if (!Op.compare_lower("alle1is")) { - // SYS #4, C8, C3, #4 - SYS_ALIAS(4, 8, 3, 4); - } else if (!Op.compare_lower("vale1is")) { - // SYS #0, C8, C3, #5 - SYS_ALIAS(0, 8, 3, 5); - } else if (!Op.compare_lower("vaale1is")) { - // SYS #0, C8, C3, #7 - SYS_ALIAS(0, 8, 3, 7); - } else if (!Op.compare_lower("vmalle1")) { - // SYS #0, C8, C7, #0 - SYS_ALIAS(0, 8, 7, 0); - } else if (!Op.compare_lower("alle2")) { - // SYS #4, C8, C7, #0 - SYS_ALIAS(4, 8, 7, 0); - } else if (!Op.compare_lower("vale2is")) { - // SYS #4, C8, C3, #5 - SYS_ALIAS(4, 8, 3, 5); - } else if (!Op.compare_lower("vale3is")) { - // SYS #6, C8, C3, #5 - SYS_ALIAS(6, 8, 3, 5); - } else if (!Op.compare_lower("alle3")) { - // SYS #6, C8, C7, #0 - SYS_ALIAS(6, 8, 7, 0); - } else if (!Op.compare_lower("vae1")) { - // SYS #0, C8, C7, #1 - SYS_ALIAS(0, 8, 7, 1); - } else if (!Op.compare_lower("vae2")) { - // SYS #4, C8, C7, #1 - SYS_ALIAS(4, 8, 7, 1); - } else if (!Op.compare_lower("vae3")) { - // SYS #6, C8, C7, #1 - SYS_ALIAS(6, 8, 7, 1); - } else if (!Op.compare_lower("aside1")) { - // SYS #0, C8, C7, #2 - SYS_ALIAS(0, 8, 7, 2); - } else if (!Op.compare_lower("vaae1")) { - // SYS #0, C8, C7, #3 - SYS_ALIAS(0, 8, 7, 3); - } else if (!Op.compare_lower("alle1")) { - // SYS #4, C8, C7, #4 - SYS_ALIAS(4, 8, 7, 4); - } else if (!Op.compare_lower("vale1")) { - // SYS #0, C8, C7, #5 - SYS_ALIAS(0, 8, 7, 5); - } else if (!Op.compare_lower("vale2")) { - // SYS #4, C8, C7, #5 - SYS_ALIAS(4, 8, 7, 5); - } else if (!Op.compare_lower("vale3")) { - // SYS #6, C8, C7, #5 - SYS_ALIAS(6, 8, 7, 5); - } else if (!Op.compare_lower("vaale1")) { - // SYS #0, C8, C7, #7 - SYS_ALIAS(0, 8, 7, 7); - } else if (!Op.compare_lower("ipas2e1")) { - // SYS #4, C8, C4, #1 - SYS_ALIAS(4, 8, 4, 1); - } else if (!Op.compare_lower("ipas2le1")) { - // SYS #4, C8, C4, #5 - SYS_ALIAS(4, 8, 4, 5); - } else if (!Op.compare_lower("vmalls12e1")) { - // SYS #4, C8, C7, #6 - SYS_ALIAS(4, 8, 7, 6); - } else if (!Op.compare_lower("vmalls12e1is")) { - // SYS #4, C8, C3, #6 - SYS_ALIAS(4, 8, 3, 6); - } else { - return TokError("invalid operand for TLBI instruction"); - } - } - -#undef SYS_ALIAS - - Parser.Lex(); // Eat operand. - - // Check for the optional register operand. - if (getLexer().is(AsmToken::Comma)) { - Parser.Lex(); // Eat comma. - - if (Tok.isNot(AsmToken::Identifier) || parseRegister(Operands)) - return TokError("expected register operand"); - } - - if (getLexer().isNot(AsmToken::EndOfStatement)) { - Parser.eatToEndOfStatement(); - return TokError("unexpected token in argument list"); - } - - Parser.Lex(); // Consume the EndOfStatement - return false; -} - -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseBarrierOperand(OperandVector &Operands) { - const AsmToken &Tok = Parser.getTok(); - - // Can be either a #imm style literal or an option name - if (Tok.is(AsmToken::Hash)) { - // Immediate operand. - Parser.Lex(); // Eat the '#' - const MCExpr *ImmVal; - SMLoc ExprLoc = getLoc(); - if (getParser().parseExpression(ImmVal)) - return MatchOperand_ParseFail; - const MCConstantExpr *MCE = dyn_cast(ImmVal); - if (!MCE) { - Error(ExprLoc, "immediate value expected for barrier operand"); - return MatchOperand_ParseFail; - } - if (MCE->getValue() < 0 || MCE->getValue() > 15) { - Error(ExprLoc, "barrier operand out of range"); - return MatchOperand_ParseFail; - } - Operands.push_back( - ARM64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext())); - return MatchOperand_Success; - } - - if (Tok.isNot(AsmToken::Identifier)) { - TokError("invalid operand for instruction"); - return MatchOperand_ParseFail; - } - - unsigned Opt = StringSwitch(Tok.getString()) - .Case("oshld", ARM64SYS::OSHLD) - .Case("oshst", ARM64SYS::OSHST) - .Case("osh", ARM64SYS::OSH) - .Case("nshld", ARM64SYS::NSHLD) - .Case("nshst", ARM64SYS::NSHST) - .Case("nsh", ARM64SYS::NSH) - .Case("ishld", ARM64SYS::ISHLD) - .Case("ishst", ARM64SYS::ISHST) - .Case("ish", ARM64SYS::ISH) - .Case("ld", ARM64SYS::LD) - .Case("st", ARM64SYS::ST) - .Case("sy", ARM64SYS::SY) - .Default(ARM64SYS::InvalidBarrier); - if (Opt == ARM64SYS::InvalidBarrier) { - TokError("invalid barrier option name"); - return MatchOperand_ParseFail; - } - - // The only valid named option for ISB is 'sy' - if (Mnemonic == "isb" && Opt != ARM64SYS::SY) { - TokError("'sy' or #imm operand expected"); - return MatchOperand_ParseFail; - } - - Operands.push_back(ARM64Operand::CreateBarrier(Opt, getLoc(), getContext())); - Parser.Lex(); // Consume the option - - return MatchOperand_Success; -} - -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseSystemRegister(OperandVector &Operands) { - const AsmToken &Tok = Parser.getTok(); - - // It can be specified as a symbolic name. - if (Tok.isNot(AsmToken::Identifier)) - return MatchOperand_NoMatch; - - auto ID = Tok.getString().lower(); - ARM64SYS::SystemRegister Reg = - StringSwitch(ID) - .Case("spsr_el1", ARM64SYS::SPSR_svc) - .Case("spsr_svc", ARM64SYS::SPSR_svc) - .Case("elr_el1", ARM64SYS::ELR_EL1) - .Case("sp_el0", ARM64SYS::SP_EL0) - .Case("spsel", ARM64SYS::SPSel) - .Case("daif", ARM64SYS::DAIF) - .Case("currentel", ARM64SYS::CurrentEL) - .Case("nzcv", ARM64SYS::NZCV) - .Case("fpcr", ARM64SYS::FPCR) - .Case("fpsr", ARM64SYS::FPSR) - .Case("dspsr", ARM64SYS::DSPSR) - .Case("dlr", ARM64SYS::DLR) - .Case("spsr_el2", ARM64SYS::SPSR_hyp) - .Case("spsr_hyp", ARM64SYS::SPSR_hyp) - .Case("elr_el2", ARM64SYS::ELR_EL2) - .Case("sp_el1", ARM64SYS::SP_EL1) - .Case("spsr_irq", ARM64SYS::SPSR_irq) - .Case("spsr_abt", ARM64SYS::SPSR_abt) - .Case("spsr_und", ARM64SYS::SPSR_und) - .Case("spsr_fiq", ARM64SYS::SPSR_fiq) - .Case("spsr_el3", ARM64SYS::SPSR_EL3) - .Case("elr_el3", ARM64SYS::ELR_EL3) - .Case("sp_el2", ARM64SYS::SP_EL2) - .Case("midr_el1", ARM64SYS::MIDR_EL1) - .Case("ctr_el0", ARM64SYS::CTR_EL0) - .Case("mpidr_el1", ARM64SYS::MPIDR_EL1) - .Case("ecoidr_el1", ARM64SYS::ECOIDR_EL1) - .Case("dczid_el0", ARM64SYS::DCZID_EL0) - .Case("mvfr0_el1", ARM64SYS::MVFR0_EL1) - .Case("mvfr1_el1", ARM64SYS::MVFR1_EL1) - .Case("id_aa64pfr0_el1", ARM64SYS::ID_AA64PFR0_EL1) - .Case("id_aa64pfr1_el1", ARM64SYS::ID_AA64PFR1_EL1) - .Case("id_aa64dfr0_el1", ARM64SYS::ID_AA64DFR0_EL1) - .Case("id_aa64dfr1_el1", ARM64SYS::ID_AA64DFR1_EL1) - .Case("id_aa64isar0_el1", ARM64SYS::ID_AA64ISAR0_EL1) - .Case("id_aa64isar1_el1", ARM64SYS::ID_AA64ISAR1_EL1) - .Case("id_aa64mmfr0_el1", ARM64SYS::ID_AA64MMFR0_EL1) - .Case("id_aa64mmfr1_el1", ARM64SYS::ID_AA64MMFR1_EL1) - .Case("ccsidr_el1", ARM64SYS::CCSIDR_EL1) - .Case("clidr_el1", ARM64SYS::CLIDR_EL1) - .Case("aidr_el1", ARM64SYS::AIDR_EL1) - .Case("csselr_el1", ARM64SYS::CSSELR_EL1) - .Case("vpidr_el2", ARM64SYS::VPIDR_EL2) - .Case("vmpidr_el2", ARM64SYS::VMPIDR_EL2) - .Case("sctlr_el1", ARM64SYS::SCTLR_EL1) - .Case("sctlr_el2", ARM64SYS::SCTLR_EL2) - .Case("sctlr_el3", ARM64SYS::SCTLR_EL3) - .Case("actlr_el1", ARM64SYS::ACTLR_EL1) - .Case("actlr_el2", ARM64SYS::ACTLR_EL2) - .Case("actlr_el3", ARM64SYS::ACTLR_EL3) - .Case("cpacr_el1", ARM64SYS::CPACR_EL1) - .Case("cptr_el2", ARM64SYS::CPTR_EL2) - .Case("cptr_el3", ARM64SYS::CPTR_EL3) - .Case("scr_el3", ARM64SYS::SCR_EL3) - .Case("hcr_el2", ARM64SYS::HCR_EL2) - .Case("mdcr_el2", ARM64SYS::MDCR_EL2) - .Case("mdcr_el3", ARM64SYS::MDCR_EL3) - .Case("hstr_el2", ARM64SYS::HSTR_EL2) - .Case("hacr_el2", ARM64SYS::HACR_EL2) - .Case("ttbr0_el1", ARM64SYS::TTBR0_EL1) - .Case("ttbr1_el1", ARM64SYS::TTBR1_EL1) - .Case("ttbr0_el2", ARM64SYS::TTBR0_EL2) - .Case("ttbr0_el3", ARM64SYS::TTBR0_EL3) - .Case("vttbr_el2", ARM64SYS::VTTBR_EL2) - .Case("tcr_el1", ARM64SYS::TCR_EL1) - .Case("tcr_el2", ARM64SYS::TCR_EL2) - .Case("tcr_el3", ARM64SYS::TCR_EL3) - .Case("vtcr_el2", ARM64SYS::VTCR_EL2) - .Case("adfsr_el1", ARM64SYS::ADFSR_EL1) - .Case("aifsr_el1", ARM64SYS::AIFSR_EL1) - .Case("adfsr_el2", ARM64SYS::ADFSR_EL2) - .Case("aifsr_el2", ARM64SYS::AIFSR_EL2) - .Case("adfsr_el3", ARM64SYS::ADFSR_EL3) - .Case("aifsr_el3", ARM64SYS::AIFSR_EL3) - .Case("esr_el1", ARM64SYS::ESR_EL1) - .Case("esr_el2", ARM64SYS::ESR_EL2) - .Case("esr_el3", ARM64SYS::ESR_EL3) - .Case("far_el1", ARM64SYS::FAR_EL1) - .Case("far_el2", ARM64SYS::FAR_EL2) - .Case("far_el3", ARM64SYS::FAR_EL3) - .Case("hpfar_el2", ARM64SYS::HPFAR_EL2) - .Case("par_el1", ARM64SYS::PAR_EL1) - .Case("mair_el1", ARM64SYS::MAIR_EL1) - .Case("mair_el2", ARM64SYS::MAIR_EL2) - .Case("mair_el3", ARM64SYS::MAIR_EL3) - .Case("amair_el1", ARM64SYS::AMAIR_EL1) - .Case("amair_el2", ARM64SYS::AMAIR_EL2) - .Case("amair_el3", ARM64SYS::AMAIR_EL3) - .Case("vbar_el1", ARM64SYS::VBAR_EL1) - .Case("vbar_el2", ARM64SYS::VBAR_EL2) - .Case("vbar_el3", ARM64SYS::VBAR_EL3) - .Case("rvbar_el1", ARM64SYS::RVBAR_EL1) - .Case("rvbar_el2", ARM64SYS::RVBAR_EL2) - .Case("rvbar_el3", ARM64SYS::RVBAR_EL3) - .Case("isr_el1", ARM64SYS::ISR_EL1) - .Case("contextidr_el1", ARM64SYS::CONTEXTIDR_EL1) - .Case("tpidr_el0", ARM64SYS::TPIDR_EL0) - .Case("tpidrro_el0", ARM64SYS::TPIDRRO_EL0) - .Case("tpidr_el1", ARM64SYS::TPIDR_EL1) - .Case("tpidr_el2", ARM64SYS::TPIDR_EL2) - .Case("tpidr_el3", ARM64SYS::TPIDR_EL3) - .Case("teecr32_el1", ARM64SYS::TEECR32_EL1) - .Case("cntfrq_el0", ARM64SYS::CNTFRQ_EL0) - .Case("cntpct_el0", ARM64SYS::CNTPCT_EL0) - .Case("cntvct_el0", ARM64SYS::CNTVCT_EL0) - .Case("cntvoff_el2", ARM64SYS::CNTVOFF_EL2) - .Case("cntkctl_el1", ARM64SYS::CNTKCTL_EL1) - .Case("cnthctl_el2", ARM64SYS::CNTHCTL_EL2) - .Case("cntp_tval_el0", ARM64SYS::CNTP_TVAL_EL0) - .Case("cntp_ctl_el0", ARM64SYS::CNTP_CTL_EL0) - .Case("cntp_cval_el0", ARM64SYS::CNTP_CVAL_EL0) - .Case("cntv_tval_el0", ARM64SYS::CNTV_TVAL_EL0) - .Case("cntv_ctl_el0", ARM64SYS::CNTV_CTL_EL0) - .Case("cntv_cval_el0", ARM64SYS::CNTV_CVAL_EL0) - .Case("cnthp_tval_el2", ARM64SYS::CNTHP_TVAL_EL2) - .Case("cnthp_ctl_el2", ARM64SYS::CNTHP_CTL_EL2) - .Case("cnthp_cval_el2", ARM64SYS::CNTHP_CVAL_EL2) - .Case("cntps_tval_el1", ARM64SYS::CNTPS_TVAL_EL1) - .Case("cntps_ctl_el1", ARM64SYS::CNTPS_CTL_EL1) - .Case("cntps_cval_el1", ARM64SYS::CNTPS_CVAL_EL1) - .Case("dacr32_el2", ARM64SYS::DACR32_EL2) - .Case("ifsr32_el2", ARM64SYS::IFSR32_EL2) - .Case("teehbr32_el1", ARM64SYS::TEEHBR32_EL1) - .Case("sder32_el3", ARM64SYS::SDER32_EL3) - .Case("fpexc32_el2", ARM64SYS::FPEXC32_EL2) - .Case("current_el", ARM64SYS::CurrentEL) - .Case("pmevcntr0_el0", ARM64SYS::PMEVCNTR0_EL0) - .Case("pmevcntr1_el0", ARM64SYS::PMEVCNTR1_EL0) - .Case("pmevcntr2_el0", ARM64SYS::PMEVCNTR2_EL0) - .Case("pmevcntr3_el0", ARM64SYS::PMEVCNTR3_EL0) - .Case("pmevcntr4_el0", ARM64SYS::PMEVCNTR4_EL0) - .Case("pmevcntr5_el0", ARM64SYS::PMEVCNTR5_EL0) - .Case("pmevcntr6_el0", ARM64SYS::PMEVCNTR6_EL0) - .Case("pmevcntr7_el0", ARM64SYS::PMEVCNTR7_EL0) - .Case("pmevcntr8_el0", ARM64SYS::PMEVCNTR8_EL0) - .Case("pmevcntr9_el0", ARM64SYS::PMEVCNTR9_EL0) - .Case("pmevcntr10_el0", ARM64SYS::PMEVCNTR10_EL0) - .Case("pmevcntr11_el0", ARM64SYS::PMEVCNTR11_EL0) - .Case("pmevcntr12_el0", ARM64SYS::PMEVCNTR12_EL0) - .Case("pmevcntr13_el0", ARM64SYS::PMEVCNTR13_EL0) - .Case("pmevcntr14_el0", ARM64SYS::PMEVCNTR14_EL0) - .Case("pmevcntr15_el0", ARM64SYS::PMEVCNTR15_EL0) - .Case("pmevcntr16_el0", ARM64SYS::PMEVCNTR16_EL0) - .Case("pmevcntr17_el0", ARM64SYS::PMEVCNTR17_EL0) - .Case("pmevcntr18_el0", ARM64SYS::PMEVCNTR18_EL0) - .Case("pmevcntr19_el0", ARM64SYS::PMEVCNTR19_EL0) - .Case("pmevcntr20_el0", ARM64SYS::PMEVCNTR20_EL0) - .Case("pmevcntr21_el0", ARM64SYS::PMEVCNTR21_EL0) - .Case("pmevcntr22_el0", ARM64SYS::PMEVCNTR22_EL0) - .Case("pmevcntr23_el0", ARM64SYS::PMEVCNTR23_EL0) - .Case("pmevcntr24_el0", ARM64SYS::PMEVCNTR24_EL0) - .Case("pmevcntr25_el0", ARM64SYS::PMEVCNTR25_EL0) - .Case("pmevcntr26_el0", ARM64SYS::PMEVCNTR26_EL0) - .Case("pmevcntr27_el0", ARM64SYS::PMEVCNTR27_EL0) - .Case("pmevcntr28_el0", ARM64SYS::PMEVCNTR28_EL0) - .Case("pmevcntr29_el0", ARM64SYS::PMEVCNTR29_EL0) - .Case("pmevcntr30_el0", ARM64SYS::PMEVCNTR30_EL0) - .Case("pmevtyper0_el0", ARM64SYS::PMEVTYPER0_EL0) - .Case("pmevtyper1_el0", ARM64SYS::PMEVTYPER1_EL0) - .Case("pmevtyper2_el0", ARM64SYS::PMEVTYPER2_EL0) - .Case("pmevtyper3_el0", ARM64SYS::PMEVTYPER3_EL0) - .Case("pmevtyper4_el0", ARM64SYS::PMEVTYPER4_EL0) - .Case("pmevtyper5_el0", ARM64SYS::PMEVTYPER5_EL0) - .Case("pmevtyper6_el0", ARM64SYS::PMEVTYPER6_EL0) - .Case("pmevtyper7_el0", ARM64SYS::PMEVTYPER7_EL0) - .Case("pmevtyper8_el0", ARM64SYS::PMEVTYPER8_EL0) - .Case("pmevtyper9_el0", ARM64SYS::PMEVTYPER9_EL0) - .Case("pmevtyper10_el0", ARM64SYS::PMEVTYPER10_EL0) - .Case("pmevtyper11_el0", ARM64SYS::PMEVTYPER11_EL0) - .Case("pmevtyper12_el0", ARM64SYS::PMEVTYPER12_EL0) - .Case("pmevtyper13_el0", ARM64SYS::PMEVTYPER13_EL0) - .Case("pmevtyper14_el0", ARM64SYS::PMEVTYPER14_EL0) - .Case("pmevtyper15_el0", ARM64SYS::PMEVTYPER15_EL0) - .Case("pmevtyper16_el0", ARM64SYS::PMEVTYPER16_EL0) - .Case("pmevtyper17_el0", ARM64SYS::PMEVTYPER17_EL0) - .Case("pmevtyper18_el0", ARM64SYS::PMEVTYPER18_EL0) - .Case("pmevtyper19_el0", ARM64SYS::PMEVTYPER19_EL0) - .Case("pmevtyper20_el0", ARM64SYS::PMEVTYPER20_EL0) - .Case("pmevtyper21_el0", ARM64SYS::PMEVTYPER21_EL0) - .Case("pmevtyper22_el0", ARM64SYS::PMEVTYPER22_EL0) - .Case("pmevtyper23_el0", ARM64SYS::PMEVTYPER23_EL0) - .Case("pmevtyper24_el0", ARM64SYS::PMEVTYPER24_EL0) - .Case("pmevtyper25_el0", ARM64SYS::PMEVTYPER25_EL0) - .Case("pmevtyper26_el0", ARM64SYS::PMEVTYPER26_EL0) - .Case("pmevtyper27_el0", ARM64SYS::PMEVTYPER27_EL0) - .Case("pmevtyper28_el0", ARM64SYS::PMEVTYPER28_EL0) - .Case("pmevtyper29_el0", ARM64SYS::PMEVTYPER29_EL0) - .Case("pmevtyper30_el0", ARM64SYS::PMEVTYPER30_EL0) - .Case("pmccfiltr_el0", ARM64SYS::PMCCFILTR_EL0) - .Case("rmr_el3", ARM64SYS::RMR_EL3) - .Case("rmr_el2", ARM64SYS::RMR_EL2) - .Case("rmr_el1", ARM64SYS::RMR_EL1) - .Case("cpm_ioacc_ctl_el3", ARM64SYS::CPM_IOACC_CTL_EL3) - .Case("mdccsr_el0", ARM64SYS::MDCCSR_EL0) - .Case("mdccint_el1", ARM64SYS::MDCCINT_EL1) - .Case("dbgdtr_el0", ARM64SYS::DBGDTR_EL0) - .Case("dbgdtrrx_el0", ARM64SYS::DBGDTRRX_EL0) - .Case("dbgdtrtx_el0", ARM64SYS::DBGDTRTX_EL0) - .Case("dbgvcr32_el2", ARM64SYS::DBGVCR32_EL2) - .Case("osdtrrx_el1", ARM64SYS::OSDTRRX_EL1) - .Case("mdscr_el1", ARM64SYS::MDSCR_EL1) - .Case("osdtrtx_el1", ARM64SYS::OSDTRTX_EL1) - .Case("oseccr_el11", ARM64SYS::OSECCR_EL11) - .Case("dbgbvr0_el1", ARM64SYS::DBGBVR0_EL1) - .Case("dbgbvr1_el1", ARM64SYS::DBGBVR1_EL1) - .Case("dbgbvr2_el1", ARM64SYS::DBGBVR2_EL1) - .Case("dbgbvr3_el1", ARM64SYS::DBGBVR3_EL1) - .Case("dbgbvr4_el1", ARM64SYS::DBGBVR4_EL1) - .Case("dbgbvr5_el1", ARM64SYS::DBGBVR5_EL1) - .Case("dbgbvr6_el1", ARM64SYS::DBGBVR6_EL1) - .Case("dbgbvr7_el1", ARM64SYS::DBGBVR7_EL1) - .Case("dbgbvr8_el1", ARM64SYS::DBGBVR8_EL1) - .Case("dbgbvr9_el1", ARM64SYS::DBGBVR9_EL1) - .Case("dbgbvr10_el1", ARM64SYS::DBGBVR10_EL1) - .Case("dbgbvr11_el1", ARM64SYS::DBGBVR11_EL1) - .Case("dbgbvr12_el1", ARM64SYS::DBGBVR12_EL1) - .Case("dbgbvr13_el1", ARM64SYS::DBGBVR13_EL1) - .Case("dbgbvr14_el1", ARM64SYS::DBGBVR14_EL1) - .Case("dbgbvr15_el1", ARM64SYS::DBGBVR15_EL1) - .Case("dbgbcr0_el1", ARM64SYS::DBGBCR0_EL1) - .Case("dbgbcr1_el1", ARM64SYS::DBGBCR1_EL1) - .Case("dbgbcr2_el1", ARM64SYS::DBGBCR2_EL1) - .Case("dbgbcr3_el1", ARM64SYS::DBGBCR3_EL1) - .Case("dbgbcr4_el1", ARM64SYS::DBGBCR4_EL1) - .Case("dbgbcr5_el1", ARM64SYS::DBGBCR5_EL1) - .Case("dbgbcr6_el1", ARM64SYS::DBGBCR6_EL1) - .Case("dbgbcr7_el1", ARM64SYS::DBGBCR7_EL1) - .Case("dbgbcr8_el1", ARM64SYS::DBGBCR8_EL1) - .Case("dbgbcr9_el1", ARM64SYS::DBGBCR9_EL1) - .Case("dbgbcr10_el1", ARM64SYS::DBGBCR10_EL1) - .Case("dbgbcr11_el1", ARM64SYS::DBGBCR11_EL1) - .Case("dbgbcr12_el1", ARM64SYS::DBGBCR12_EL1) - .Case("dbgbcr13_el1", ARM64SYS::DBGBCR13_EL1) - .Case("dbgbcr14_el1", ARM64SYS::DBGBCR14_EL1) - .Case("dbgbcr15_el1", ARM64SYS::DBGBCR15_EL1) - .Case("dbgwvr0_el1", ARM64SYS::DBGWVR0_EL1) - .Case("dbgwvr1_el1", ARM64SYS::DBGWVR1_EL1) - .Case("dbgwvr2_el1", ARM64SYS::DBGWVR2_EL1) - .Case("dbgwvr3_el1", ARM64SYS::DBGWVR3_EL1) - .Case("dbgwvr4_el1", ARM64SYS::DBGWVR4_EL1) - .Case("dbgwvr5_el1", ARM64SYS::DBGWVR5_EL1) - .Case("dbgwvr6_el1", ARM64SYS::DBGWVR6_EL1) - .Case("dbgwvr7_el1", ARM64SYS::DBGWVR7_EL1) - .Case("dbgwvr8_el1", ARM64SYS::DBGWVR8_EL1) - .Case("dbgwvr9_el1", ARM64SYS::DBGWVR9_EL1) - .Case("dbgwvr10_el1", ARM64SYS::DBGWVR10_EL1) - .Case("dbgwvr11_el1", ARM64SYS::DBGWVR11_EL1) - .Case("dbgwvr12_el1", ARM64SYS::DBGWVR12_EL1) - .Case("dbgwvr13_el1", ARM64SYS::DBGWVR13_EL1) - .Case("dbgwvr14_el1", ARM64SYS::DBGWVR14_EL1) - .Case("dbgwvr15_el1", ARM64SYS::DBGWVR15_EL1) - .Case("dbgwcr0_el1", ARM64SYS::DBGWCR0_EL1) - .Case("dbgwcr1_el1", ARM64SYS::DBGWCR1_EL1) - .Case("dbgwcr2_el1", ARM64SYS::DBGWCR2_EL1) - .Case("dbgwcr3_el1", ARM64SYS::DBGWCR3_EL1) - .Case("dbgwcr4_el1", ARM64SYS::DBGWCR4_EL1) - .Case("dbgwcr5_el1", ARM64SYS::DBGWCR5_EL1) - .Case("dbgwcr6_el1", ARM64SYS::DBGWCR6_EL1) - .Case("dbgwcr7_el1", ARM64SYS::DBGWCR7_EL1) - .Case("dbgwcr8_el1", ARM64SYS::DBGWCR8_EL1) - .Case("dbgwcr9_el1", ARM64SYS::DBGWCR9_EL1) - .Case("dbgwcr10_el1", ARM64SYS::DBGWCR10_EL1) - .Case("dbgwcr11_el1", ARM64SYS::DBGWCR11_EL1) - .Case("dbgwcr12_el1", ARM64SYS::DBGWCR12_EL1) - .Case("dbgwcr13_el1", ARM64SYS::DBGWCR13_EL1) - .Case("dbgwcr14_el1", ARM64SYS::DBGWCR14_EL1) - .Case("dbgwcr15_el1", ARM64SYS::DBGWCR15_EL1) - .Case("mdrar_el1", ARM64SYS::MDRAR_EL1) - .Case("oslar_el1", ARM64SYS::OSLAR_EL1) - .Case("oslsr_el1", ARM64SYS::OSLSR_EL1) - .Case("osdlr_el1", ARM64SYS::OSDLR_EL1) - .Case("dbgprcr_el1", ARM64SYS::DBGPRCR_EL1) - .Case("dbgclaimset_el1", ARM64SYS::DBGCLAIMSET_EL1) - .Case("dbgclaimclr_el1", ARM64SYS::DBGCLAIMCLR_EL1) - .Case("dbgauthstatus_el1", ARM64SYS::DBGAUTHSTATUS_EL1) - .Case("dbgdevid2", ARM64SYS::DBGDEVID2) - .Case("dbgdevid1", ARM64SYS::DBGDEVID1) - .Case("dbgdevid0", ARM64SYS::DBGDEVID0) - .Case("id_pfr0_el1", ARM64SYS::ID_PFR0_EL1) - .Case("id_pfr1_el1", ARM64SYS::ID_PFR1_EL1) - .Case("id_dfr0_el1", ARM64SYS::ID_DFR0_EL1) - .Case("id_afr0_el1", ARM64SYS::ID_AFR0_EL1) - .Case("id_isar0_el1", ARM64SYS::ID_ISAR0_EL1) - .Case("id_isar1_el1", ARM64SYS::ID_ISAR1_EL1) - .Case("id_isar2_el1", ARM64SYS::ID_ISAR2_EL1) - .Case("id_isar3_el1", ARM64SYS::ID_ISAR3_EL1) - .Case("id_isar4_el1", ARM64SYS::ID_ISAR4_EL1) - .Case("id_isar5_el1", ARM64SYS::ID_ISAR5_EL1) - .Case("afsr1_el1", ARM64SYS::AFSR1_EL1) - .Case("afsr0_el1", ARM64SYS::AFSR0_EL1) - .Case("revidr_el1", ARM64SYS::REVIDR_EL1) - .Default(ARM64SYS::InvalidSystemReg); - if (Reg != ARM64SYS::InvalidSystemReg) { - // We matched a reg name, so create the operand. - Operands.push_back( - ARM64Operand::CreateSystemRegister(Reg, getLoc(), getContext())); - Parser.Lex(); // Consume the register name. - return MatchOperand_Success; - } - - // Or we may have an identifier that encodes the sub-operands. - // For example, s3_2_c15_c0_0. - unsigned op0, op1, CRn, CRm, op2; - std::string Desc = ID; - if (std::sscanf(Desc.c_str(), "s%u_%u_c%u_c%u_%u", &op0, &op1, &CRn, &CRm, - &op2) != 5) - return MatchOperand_NoMatch; - if ((op0 != 2 && op0 != 3) || op1 > 7 || CRn > 15 || CRm > 15 || op2 > 7) - return MatchOperand_NoMatch; - - unsigned Val = op0 << 14 | op1 << 11 | CRn << 7 | CRm << 3 | op2; - Operands.push_back( - ARM64Operand::CreateSystemRegister(Val, getLoc(), getContext())); - Parser.Lex(); // Consume the register name. - - return MatchOperand_Success; -} - -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseCPSRField(OperandVector &Operands) { - const AsmToken &Tok = Parser.getTok(); - - if (Tok.isNot(AsmToken::Identifier)) - return MatchOperand_NoMatch; - - ARM64SYS::CPSRField Field = - StringSwitch(Tok.getString().lower()) - .Case("spsel", ARM64SYS::cpsr_SPSel) - .Case("daifset", ARM64SYS::cpsr_DAIFSet) - .Case("daifclr", ARM64SYS::cpsr_DAIFClr) - .Default(ARM64SYS::InvalidCPSRField); - if (Field == ARM64SYS::InvalidCPSRField) - return MatchOperand_NoMatch; - Operands.push_back( - ARM64Operand::CreateCPSRField(Field, getLoc(), getContext())); - Parser.Lex(); // Consume the register name. - - return MatchOperand_Success; -} - -/// tryParseVectorRegister - Parse a vector register operand. -bool ARM64AsmParser::tryParseVectorRegister(OperandVector &Operands) { - if (Parser.getTok().isNot(AsmToken::Identifier)) - return true; - - SMLoc S = getLoc(); - // Check for a vector register specifier first. - StringRef Kind; - int64_t Reg = tryMatchVectorRegister(Kind); - if (Reg == -1) - return true; - Operands.push_back( - ARM64Operand::CreateReg(Reg, true, S, getLoc(), getContext())); - // If there was an explicit qualifier, that goes on as a literal text - // operand. - if (!Kind.empty()) - Operands.push_back(ARM64Operand::CreateToken(Kind, false, S, getContext())); - - // If there is an index specifier following the register, parse that too. - if (Parser.getTok().is(AsmToken::LBrac)) { - SMLoc SIdx = getLoc(); - Parser.Lex(); // Eat left bracket token. - - const MCExpr *ImmVal; - if (getParser().parseExpression(ImmVal)) - return false; - const MCConstantExpr *MCE = dyn_cast(ImmVal); - if (!MCE) { - TokError("immediate value expected for vector index"); - return false; - } - - SMLoc E = getLoc(); - if (Parser.getTok().isNot(AsmToken::RBrac)) { - Error(E, "']' expected"); - return false; - } - - Parser.Lex(); // Eat right bracket token. - - Operands.push_back(ARM64Operand::CreateVectorIndex(MCE->getValue(), SIdx, E, - getContext())); - } - - return false; -} - -/// parseRegister - Parse a non-vector register operand. -bool ARM64AsmParser::parseRegister(OperandVector &Operands) { - SMLoc S = getLoc(); - // Try for a vector register. - if (!tryParseVectorRegister(Operands)) - return false; - - // Try for a scalar register. - int64_t Reg = tryParseRegister(); - if (Reg == -1) - return true; - Operands.push_back( - ARM64Operand::CreateReg(Reg, false, S, getLoc(), getContext())); - - // A small number of instructions (FMOVXDhighr, for example) have "[1]" - // as a string token in the instruction itself. - if (getLexer().getKind() == AsmToken::LBrac) { - SMLoc LBracS = getLoc(); - Parser.Lex(); - const AsmToken &Tok = Parser.getTok(); - if (Tok.is(AsmToken::Integer)) { - SMLoc IntS = getLoc(); - int64_t Val = Tok.getIntVal(); - if (Val == 1) { - Parser.Lex(); - if (getLexer().getKind() == AsmToken::RBrac) { - SMLoc RBracS = getLoc(); - Parser.Lex(); - Operands.push_back( - ARM64Operand::CreateToken("[", false, LBracS, getContext())); - Operands.push_back( - ARM64Operand::CreateToken("1", false, IntS, getContext())); - Operands.push_back( - ARM64Operand::CreateToken("]", false, RBracS, getContext())); - return false; - } - } - } - } - - return false; -} - -/// tryParseNoIndexMemory - Custom parser method for memory operands that -/// do not allow base regisrer writeback modes, -/// or those that handle writeback separately from -/// the memory operand (like the AdvSIMD ldX/stX -/// instructions. -ARM64AsmParser::OperandMatchResultTy -ARM64AsmParser::tryParseNoIndexMemory(OperandVector &Operands) { - if (Parser.getTok().isNot(AsmToken::LBrac)) - return MatchOperand_NoMatch; - SMLoc S = getLoc(); - Parser.Lex(); // Eat left bracket token. - - const AsmToken &BaseRegTok = Parser.getTok(); - if (BaseRegTok.isNot(AsmToken::Identifier)) { - Error(BaseRegTok.getLoc(), "register expected"); - return MatchOperand_ParseFail; - } - - int64_t Reg = tryParseRegister(); - if (Reg == -1) { - Error(BaseRegTok.getLoc(), "register expected"); - return MatchOperand_ParseFail; - } - - SMLoc E = getLoc(); - if (Parser.getTok().isNot(AsmToken::RBrac)) { - Error(E, "']' expected"); - return MatchOperand_ParseFail; - } - - Parser.Lex(); // Eat right bracket token. - - Operands.push_back(ARM64Operand::CreateMem(Reg, 0, S, E, E, getContext())); - return MatchOperand_Success; -} - -/// parseMemory - Parse a memory operand for a basic load/store instruction. -bool ARM64AsmParser::parseMemory(OperandVector &Operands) { - assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a Left Bracket"); - SMLoc S = getLoc(); - Parser.Lex(); // Eat left bracket token. - - const AsmToken &BaseRegTok = Parser.getTok(); - if (BaseRegTok.isNot(AsmToken::Identifier)) - return Error(BaseRegTok.getLoc(), "register expected"); - - int64_t Reg = tryParseRegister(); - if (Reg == -1) - return Error(BaseRegTok.getLoc(), "register expected"); - - // If there is an offset expression, parse it. - const MCExpr *OffsetExpr = 0; - SMLoc OffsetLoc; - if (Parser.getTok().is(AsmToken::Comma)) { - Parser.Lex(); // Eat the comma. - OffsetLoc = getLoc(); - - // Register offset - const AsmToken &OffsetRegTok = Parser.getTok(); - int Reg2 = OffsetRegTok.is(AsmToken::Identifier) ? tryParseRegister() : -1; - if (Reg2 != -1) { - // Default shift is LSL, with an omitted shift. We use the third bit of - // the extend value to indicate presence/omission of the immediate offset. - ARM64_AM::ExtendType ExtOp = ARM64_AM::UXTX; - int64_t ShiftVal = 0; - bool ExplicitShift = false; - - if (Parser.getTok().is(AsmToken::Comma)) { - // Embedded extend operand. - Parser.Lex(); // Eat the comma - - SMLoc ExtLoc = getLoc(); - const AsmToken &Tok = Parser.getTok(); - ExtOp = StringSwitch(Tok.getString()) - .Case("uxtw", ARM64_AM::UXTW) - .Case("lsl", ARM64_AM::UXTX) // Alias for UXTX - .Case("sxtw", ARM64_AM::SXTW) - .Case("sxtx", ARM64_AM::SXTX) - .Case("UXTW", ARM64_AM::UXTW) - .Case("LSL", ARM64_AM::UXTX) // Alias for UXTX - .Case("SXTW", ARM64_AM::SXTW) - .Case("SXTX", ARM64_AM::SXTX) - .Default(ARM64_AM::InvalidExtend); - if (ExtOp == ARM64_AM::InvalidExtend) - return Error(ExtLoc, "expected valid extend operation"); - - Parser.Lex(); // Eat the extend op. - - if (getLexer().is(AsmToken::RBrac)) { - // No immediate operand. - if (ExtOp == ARM64_AM::UXTX) - return Error(ExtLoc, "LSL extend requires immediate operand"); - } else if (getLexer().is(AsmToken::Hash)) { - // Immediate operand. - Parser.Lex(); // Eat the '#' - const MCExpr *ImmVal; - SMLoc ExprLoc = getLoc(); - if (getParser().parseExpression(ImmVal)) - return true; - const MCConstantExpr *MCE = dyn_cast(ImmVal); - if (!MCE) - return TokError("immediate value expected for extend operand"); - - ExplicitShift = true; - ShiftVal = MCE->getValue(); - if (ShiftVal < 0 || ShiftVal > 4) - return Error(ExprLoc, "immediate operand out of range"); - } else - return Error(getLoc(), "expected immediate operand"); - } - - if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(getLoc(), "']' expected"); - - Parser.Lex(); // Eat right bracket token. - - SMLoc E = getLoc(); - Operands.push_back(ARM64Operand::CreateRegOffsetMem( - Reg, Reg2, ExtOp, ShiftVal, ExplicitShift, S, E, getContext())); - return false; - - // Immediate expressions. - } else if (Parser.getTok().is(AsmToken::Hash)) { - Parser.Lex(); // Eat hash token. - - if (parseSymbolicImmVal(OffsetExpr)) - return true; - } else { - // FIXME: We really should make sure that we're dealing with a LDR/STR - // instruction that can legally have a symbolic expression here. - // Symbol reference. - if (Parser.getTok().isNot(AsmToken::Identifier) && - Parser.getTok().isNot(AsmToken::String)) - return Error(getLoc(), "identifier or immediate expression expected"); - if (getParser().parseExpression(OffsetExpr)) - return true; - // If this is a plain ref, Make sure a legal variant kind was specified. - // Otherwise, it's a more complicated expression and we have to just - // assume it's OK and let the relocation stuff puke if it's not. - ARM64MCExpr::VariantKind ELFRefKind; - MCSymbolRefExpr::VariantKind DarwinRefKind; - const MCConstantExpr *Addend; - if (classifySymbolRef(OffsetExpr, ELFRefKind, DarwinRefKind, Addend) && - Addend == 0) { - assert(ELFRefKind == ARM64MCExpr::VK_INVALID && - "ELF symbol modifiers not supported here yet"); - - switch (DarwinRefKind) { - default: - return Error(getLoc(), "expected @pageoff or @gotpageoff modifier"); - case MCSymbolRefExpr::VK_GOTPAGEOFF: - case MCSymbolRefExpr::VK_PAGEOFF: - case MCSymbolRefExpr::VK_TLVPPAGEOFF: - // These are what we're expecting. - break; - } - } - } - } - - SMLoc E = getLoc(); - if (Parser.getTok().isNot(AsmToken::RBrac)) - return Error(E, "']' expected"); - - Parser.Lex(); // Eat right bracket token. - - // Create the memory operand. - Operands.push_back( - ARM64Operand::CreateMem(Reg, OffsetExpr, S, E, OffsetLoc, getContext())); - - // Check for a '!', indicating pre-indexed addressing with writeback. - if (Parser.getTok().is(AsmToken::Exclaim)) { - // There needs to have been an immediate or wback doesn't make sense. - if (!OffsetExpr) - return Error(E, "missing offset for pre-indexed addressing"); - // Pre-indexed with writeback must have a constant expression for the - // offset. FIXME: Theoretically, we'd like to allow fixups so long - // as they don't require a relocation. - if (!isa(OffsetExpr)) - return Error(OffsetLoc, "constant immediate expression expected"); - - // Create the Token operand for the '!'. - Operands.push_back(ARM64Operand::CreateToken( - "!", false, Parser.getTok().getLoc(), getContext())); - Parser.Lex(); // Eat the '!' token. - } - - return false; -} - -bool ARM64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) { - bool HasELFModifier = false; - ARM64MCExpr::VariantKind RefKind; - - if (Parser.getTok().is(AsmToken::Colon)) { - Parser.Lex(); // Eat ':" - HasELFModifier = true; - - if (Parser.getTok().isNot(AsmToken::Identifier)) { - Error(Parser.getTok().getLoc(), - "expect relocation specifier in operand after ':'"); - return true; - } - - std::string LowerCase = Parser.getTok().getIdentifier().lower(); - RefKind = StringSwitch(LowerCase) - .Case("lo12", ARM64MCExpr::VK_LO12) - .Case("abs_g3", ARM64MCExpr::VK_ABS_G3) - .Case("abs_g2", ARM64MCExpr::VK_ABS_G2) - .Case("abs_g2_nc", ARM64MCExpr::VK_ABS_G2_NC) - .Case("abs_g1", ARM64MCExpr::VK_ABS_G1) - .Case("abs_g1_nc", ARM64MCExpr::VK_ABS_G1_NC) - .Case("abs_g0", ARM64MCExpr::VK_ABS_G0) - .Case("abs_g0_nc", ARM64MCExpr::VK_ABS_G0_NC) - .Case("dtprel_g2", ARM64MCExpr::VK_DTPREL_G2) - .Case("dtprel_g1", ARM64MCExpr::VK_DTPREL_G1) - .Case("dtprel_g1_nc", ARM64MCExpr::VK_DTPREL_G1_NC) - .Case("dtprel_g0", ARM64MCExpr::VK_DTPREL_G0) - .Case("dtprel_g0_nc", ARM64MCExpr::VK_DTPREL_G0_NC) - .Case("dtprel_lo12", ARM64MCExpr::VK_DTPREL_LO12) - .Case("dtprel_lo12_nc", ARM64MCExpr::VK_DTPREL_LO12_NC) - .Case("tprel_g2", ARM64MCExpr::VK_TPREL_G2) - .Case("tprel_g1", ARM64MCExpr::VK_TPREL_G1) - .Case("tprel_g1_nc", ARM64MCExpr::VK_TPREL_G1_NC) - .Case("tprel_g0", ARM64MCExpr::VK_TPREL_G0) - .Case("tprel_g0_nc", ARM64MCExpr::VK_TPREL_G0_NC) - .Case("tprel_lo12", ARM64MCExpr::VK_TPREL_LO12) - .Case("tprel_lo12_nc", ARM64MCExpr::VK_TPREL_LO12_NC) - .Case("tlsdesc_lo12", ARM64MCExpr::VK_TLSDESC_LO12) - .Case("got", ARM64MCExpr::VK_GOT_PAGE) - .Case("got_lo12", ARM64MCExpr::VK_GOT_LO12) - .Case("gottprel", ARM64MCExpr::VK_GOTTPREL_PAGE) - .Case("gottprel_lo12", ARM64MCExpr::VK_GOTTPREL_LO12_NC) - .Case("gottprel_g1", ARM64MCExpr::VK_GOTTPREL_G1) - .Case("gottprel_g0_nc", ARM64MCExpr::VK_GOTTPREL_G0_NC) - .Case("tlsdesc", ARM64MCExpr::VK_TLSDESC_PAGE) - .Default(ARM64MCExpr::VK_INVALID); - - if (RefKind == ARM64MCExpr::VK_INVALID) { - Error(Parser.getTok().getLoc(), - "expect relocation specifier in operand after ':'"); - return true; - } - - Parser.Lex(); // Eat identifier - - if (Parser.getTok().isNot(AsmToken::Colon)) { - Error(Parser.getTok().getLoc(), "expect ':' after relocation specifier"); - return true; - } - Parser.Lex(); // Eat ':' - } - - if (getParser().parseExpression(ImmVal)) - return true; - - if (HasELFModifier) - ImmVal = ARM64MCExpr::Create(ImmVal, RefKind, getContext()); - - return false; -} - -/// parseVectorList - Parse a vector list operand for AdvSIMD instructions. -bool ARM64AsmParser::parseVectorList(OperandVector &Operands) { - assert(Parser.getTok().is(AsmToken::LCurly) && "Token is not a Left Bracket"); - SMLoc S = getLoc(); - Parser.Lex(); // Eat left bracket token. - StringRef Kind; - int64_t FirstReg = tryMatchVectorRegister(Kind); - if (FirstReg == -1) - return Error(getLoc(), "vector register expected"); - int64_t PrevReg = FirstReg; - unsigned Count = 1; - while (Parser.getTok().isNot(AsmToken::RCurly)) { - if (Parser.getTok().is(AsmToken::EndOfStatement)) - Error(getLoc(), "'}' expected"); - - if (Parser.getTok().isNot(AsmToken::Comma)) - return Error(getLoc(), "',' expected"); - Parser.Lex(); // Eat the comma token. - - SMLoc Loc = getLoc(); - StringRef NextKind; - int64_t Reg = tryMatchVectorRegister(NextKind); - if (Reg == -1) - return Error(Loc, "vector register expected"); - // Any Kind suffices must match on all regs in the list. - if (Kind != NextKind) - return Error(Loc, "mismatched register size suffix"); - - // Registers must be incremental (with wraparound at 31) - if (getContext().getRegisterInfo()->getEncodingValue(Reg) != - (getContext().getRegisterInfo()->getEncodingValue(PrevReg) + 1) % 32) - return Error(Loc, "registers must be sequential"); - - PrevReg = Reg; - ++Count; - } - Parser.Lex(); // Eat the '}' token. - - unsigned NumElements = 0; - char ElementKind = 0; - if (!Kind.empty()) - parseValidVectorKind(Kind, NumElements, ElementKind); - - Operands.push_back(ARM64Operand::CreateVectorList( - FirstReg, Count, NumElements, ElementKind, S, getLoc(), getContext())); - - // If there is an index specifier following the list, parse that too. - if (Parser.getTok().is(AsmToken::LBrac)) { - SMLoc SIdx = getLoc(); - Parser.Lex(); // Eat left bracket token. - - const MCExpr *ImmVal; - if (getParser().parseExpression(ImmVal)) - return false; - const MCConstantExpr *MCE = dyn_cast(ImmVal); - if (!MCE) { - TokError("immediate value expected for vector index"); - return false; - } - - SMLoc E = getLoc(); - if (Parser.getTok().isNot(AsmToken::RBrac)) { - Error(E, "']' expected"); - return false; - } - - Parser.Lex(); // Eat right bracket token. - - Operands.push_back(ARM64Operand::CreateVectorIndex(MCE->getValue(), SIdx, E, - getContext())); - } - return false; -} - -/// parseOperand - Parse a arm instruction operand. For now this parses the -/// operand regardless of the mnemonic. -bool ARM64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, - bool invertCondCode) { - // Check if the current operand has a custom associated parser, if so, try to - // custom parse the operand, or fallback to the general approach. - OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); - if (ResTy == MatchOperand_Success) - return false; - // If there wasn't a custom match, try the generic matcher below. Otherwise, - // there was a match, but an error occurred, in which case, just return that - // the operand parsing failed. - if (ResTy == MatchOperand_ParseFail) - return true; - - // Nothing custom, so do general case parsing. - SMLoc S, E; - switch (getLexer().getKind()) { - default: { - SMLoc S = getLoc(); - const MCExpr *Expr; - if (parseSymbolicImmVal(Expr)) - return Error(S, "invalid operand"); - - SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext())); - return false; - } - case AsmToken::LBrac: - return parseMemory(Operands); - case AsmToken::LCurly: - return parseVectorList(Operands); - case AsmToken::Identifier: { - // If we're expecting a Condition Code operand, then just parse that. - if (isCondCode) - return parseCondCode(Operands, invertCondCode); - - // If it's a register name, parse it. - if (!parseRegister(Operands)) - return false; - - // This could be an optional "shift" operand. - if (!parseOptionalShift(Operands)) - return false; - - // Or maybe it could be an optional "extend" operand. - if (!parseOptionalExtend(Operands)) - return false; - - // This was not a register so parse other operands that start with an - // identifier (like labels) as expressions and create them as immediates. - const MCExpr *IdVal; - S = getLoc(); - if (getParser().parseExpression(IdVal)) - return true; - - E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back(ARM64Operand::CreateImm(IdVal, S, E, getContext())); - return false; - } - case AsmToken::Hash: { - // #42 -> immediate. - S = getLoc(); - Parser.Lex(); - - // The only Real that should come through here is a literal #0.0 for - // the fcmp[e] r, #0.0 instructions. They expect raw token operands, - // so convert the value. - const AsmToken &Tok = Parser.getTok(); - if (Tok.is(AsmToken::Real)) { - APFloat RealVal(APFloat::IEEEdouble, Tok.getString()); - uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue(); - if (IntVal != 0 || (Mnemonic != "fcmp" && Mnemonic != "fcmpe")) - return TokError("unexpected floating point literal"); - Parser.Lex(); // Eat the token. - - Operands.push_back( - ARM64Operand::CreateToken("#0", false, S, getContext())); - Operands.push_back( - ARM64Operand::CreateToken(".0", false, S, getContext())); - return false; - } - - const MCExpr *ImmVal; - if (parseSymbolicImmVal(ImmVal)) - return true; - - E = SMLoc::getFromPointer(getLoc().getPointer() - 1); - Operands.push_back(ARM64Operand::CreateImm(ImmVal, S, E, getContext())); - return false; - } - } -} - -/// ParseInstruction - Parse an ARM64 instruction mnemonic followed by its -/// operands. -bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info, - StringRef Name, SMLoc NameLoc, - OperandVector &Operands) { - // Create the leading tokens for the mnemonic, split by '.' characters. - size_t Start = 0, Next = Name.find('.'); - StringRef Head = Name.slice(Start, Next); - - // IC, DC, AT, and TLBI instructions are aliases for the SYS instruction. - if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi") - return parseSysAlias(Head, NameLoc, Operands); - - Operands.push_back( - ARM64Operand::CreateToken(Head, false, NameLoc, getContext())); - Mnemonic = Head; - - // Handle condition codes for a branch mnemonic - if (Head == "b" && Next != StringRef::npos) { - Start = Next; - Next = Name.find('.', Start + 1); - Head = Name.slice(Start + 1, Next); - - SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() + - (Head.data() - Name.data())); - unsigned CC = parseCondCodeString(Head); - if (CC == ~0U) - return Error(SuffixLoc, "invalid condition code"); - const MCExpr *CCExpr = MCConstantExpr::Create(CC, getContext()); - Operands.push_back( - ARM64Operand::CreateImm(CCExpr, NameLoc, NameLoc, getContext())); - } - - // Add the remaining tokens in the mnemonic. - while (Next != StringRef::npos) { - Start = Next; - Next = Name.find('.', Start + 1); - Head = Name.slice(Start, Next); - SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() + - (Head.data() - Name.data()) + 1); - Operands.push_back( - ARM64Operand::CreateToken(Head, true, SuffixLoc, getContext())); - } - - // Conditional compare instructions have a Condition Code operand, which needs - // to be parsed and an immediate operand created. - bool condCodeFourthOperand = - (Head == "ccmp" || Head == "ccmn" || Head == "fccmp" || - Head == "fccmpe" || Head == "fcsel" || Head == "csel" || - Head == "csinc" || Head == "csinv" || Head == "csneg"); - - // These instructions are aliases to some of the conditional select - // instructions. However, the condition code is inverted in the aliased - // instruction. - // - // FIXME: Is this the correct way to handle these? Or should the parser - // generate the aliased instructions directly? - bool condCodeSecondOperand = (Head == "cset" || Head == "csetm"); - bool condCodeThirdOperand = - (Head == "cinc" || Head == "cinv" || Head == "cneg"); - - // Read the remaining operands. - if (getLexer().isNot(AsmToken::EndOfStatement)) { - // Read the first operand. - if (parseOperand(Operands, false, false)) { - Parser.eatToEndOfStatement(); - return true; - } - - unsigned N = 2; - while (getLexer().is(AsmToken::Comma)) { - Parser.Lex(); // Eat the comma. - - // Parse and remember the operand. - if (parseOperand(Operands, (N == 4 && condCodeFourthOperand) || - (N == 3 && condCodeThirdOperand) || - (N == 2 && condCodeSecondOperand), - condCodeSecondOperand || condCodeThirdOperand)) { - Parser.eatToEndOfStatement(); - return true; - } - - ++N; - } - } - - if (getLexer().isNot(AsmToken::EndOfStatement)) { - SMLoc Loc = Parser.getTok().getLoc(); - Parser.eatToEndOfStatement(); - return Error(Loc, "unexpected token in argument list"); - } - - Parser.Lex(); // Consume the EndOfStatement - return false; -} - -/// isFPR32Register - Check if a register is in the FPR32 register class. -/// (The parser does not have the target register info to check the register -/// class directly.) -static bool isFPR32Register(unsigned Reg) { - using namespace ARM64; - switch (Reg) { - default: - break; - case S0: case S1: case S2: case S3: case S4: case S5: case S6: - case S7: case S8: case S9: case S10: case S11: case S12: case S13: - case S14: case S15: case S16: case S17: case S18: case S19: case S20: - case S21: case S22: case S23: case S24: case S25: case S26: case S27: - case S28: case S29: case S30: case S31: - return true; - } - return false; -} - -/// isGPR32Register - Check if a register is in the GPR32sp register class. -/// (The parser does not have the target register info to check the register -/// class directly.) -static bool isGPR32Register(unsigned Reg) { - using namespace ARM64; - switch (Reg) { - default: - break; - case W0: case W1: case W2: case W3: case W4: case W5: case W6: - case W7: case W8: case W9: case W10: case W11: case W12: case W13: - case W14: case W15: case W16: case W17: case W18: case W19: case W20: - case W21: case W22: case W23: case W24: case W25: case W26: case W27: - case W28: case W29: case W30: case WSP: - return true; - } - return false; -} - -static bool isGPR64Reg(unsigned Reg) { - using namespace ARM64; - switch (Reg) { - case X0: case X1: case X2: case X3: case X4: case X5: case X6: - case X7: case X8: case X9: case X10: case X11: case X12: case X13: - case X14: case X15: case X16: case X17: case X18: case X19: case X20: - case X21: case X22: case X23: case X24: case X25: case X26: case X27: - case X28: case FP: case LR: case SP: case XZR: - return true; - default: - return false; - } -} - - -// FIXME: This entire function is a giant hack to provide us with decent -// operand range validation/diagnostics until TableGen/MC can be extended -// to support autogeneration of this kind of validation. -bool ARM64AsmParser::validateInstruction(MCInst &Inst, - SmallVectorImpl &Loc) { - const MCRegisterInfo *RI = getContext().getRegisterInfo(); - // Check for indexed addressing modes w/ the base register being the - // same as a destination/source register or pair load where - // the Rt == Rt2. All of those are undefined behaviour. - switch (Inst.getOpcode()) { - case ARM64::LDPSWpre: - case ARM64::LDPWpost: - case ARM64::LDPWpre: - case ARM64::LDPXpost: - case ARM64::LDPXpre: { - unsigned Rt = Inst.getOperand(0).getReg(); - unsigned Rt2 = Inst.getOperand(1).getReg(); - unsigned Rn = Inst.getOperand(2).getReg(); - if (RI->isSubRegisterEq(Rn, Rt)) - return Error(Loc[0], "unpredictable LDP instruction, writeback base " - "is also a destination"); - if (RI->isSubRegisterEq(Rn, Rt2)) - return Error(Loc[1], "unpredictable LDP instruction, writeback base " - "is also a destination"); - // FALLTHROUGH - } - case ARM64::LDPDpost: - case ARM64::LDPDpre: - case ARM64::LDPQpost: - case ARM64::LDPQpre: - case ARM64::LDPSpost: - case ARM64::LDPSpre: - case ARM64::LDPSWpost: - case ARM64::LDPDi: - case ARM64::LDPQi: - case ARM64::LDPSi: - case ARM64::LDPSWi: - case ARM64::LDPWi: - case ARM64::LDPXi: { - unsigned Rt = Inst.getOperand(0).getReg(); - unsigned Rt2 = Inst.getOperand(1).getReg(); - if (Rt == Rt2) - return Error(Loc[1], "unpredictable LDP instruction, Rt2==Rt"); - break; - } - case ARM64::STPDpost: - case ARM64::STPDpre: - case ARM64::STPQpost: - case ARM64::STPQpre: - case ARM64::STPSpost: - case ARM64::STPSpre: - case ARM64::STPWpost: - case ARM64::STPWpre: - case ARM64::STPXpost: - case ARM64::STPXpre: { - unsigned Rt = Inst.getOperand(0).getReg(); - unsigned Rt2 = Inst.getOperand(1).getReg(); - unsigned Rn = Inst.getOperand(2).getReg(); - if (RI->isSubRegisterEq(Rn, Rt)) - return Error(Loc[0], "unpredictable STP instruction, writeback base " - "is also a source"); - if (RI->isSubRegisterEq(Rn, Rt2)) - return Error(Loc[1], "unpredictable STP instruction, writeback base " - "is also a source"); - break; - } - case ARM64::LDRBBpre: - case ARM64::LDRBpre: - case ARM64::LDRHHpre: - case ARM64::LDRHpre: - case ARM64::LDRSBWpre: - case ARM64::LDRSBXpre: - case ARM64::LDRSHWpre: - case ARM64::LDRSHXpre: - case ARM64::LDRSWpre: - case ARM64::LDRWpre: - case ARM64::LDRXpre: - case ARM64::LDRBBpost: - case ARM64::LDRBpost: - case ARM64::LDRHHpost: - case ARM64::LDRHpost: - case ARM64::LDRSBWpost: - case ARM64::LDRSBXpost: - case ARM64::LDRSHWpost: - case ARM64::LDRSHXpost: - case ARM64::LDRSWpost: - case ARM64::LDRWpost: - case ARM64::LDRXpost: { - unsigned Rt = Inst.getOperand(0).getReg(); - unsigned Rn = Inst.getOperand(1).getReg(); - if (RI->isSubRegisterEq(Rn, Rt)) - return Error(Loc[0], "unpredictable LDR instruction, writeback base " - "is also a source"); - break; - } - case ARM64::STRBBpost: - case ARM64::STRBpost: - case ARM64::STRHHpost: - case ARM64::STRHpost: - case ARM64::STRWpost: - case ARM64::STRXpost: - case ARM64::STRBBpre: - case ARM64::STRBpre: - case ARM64::STRHHpre: - case ARM64::STRHpre: - case ARM64::STRWpre: - case ARM64::STRXpre: { - unsigned Rt = Inst.getOperand(0).getReg(); - unsigned Rn = Inst.getOperand(1).getReg(); - if (RI->isSubRegisterEq(Rn, Rt)) - return Error(Loc[0], "unpredictable STR instruction, writeback base " - "is also a source"); - break; - } - } - - // Now check immediate ranges. Separate from the above as there is overlap - // in the instructions being checked and this keeps the nested conditionals - // to a minimum. - switch (Inst.getOpcode()) { - case ARM64::ANDWrs: - case ARM64::ANDSWrs: - case ARM64::EORWrs: - case ARM64::ORRWrs: { - if (!Inst.getOperand(3).isImm()) - return Error(Loc[3], "immediate value expected"); - int64_t shifter = Inst.getOperand(3).getImm(); - ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(shifter); - if (ST == ARM64_AM::LSL && shifter > 31) - return Error(Loc[3], "shift value out of range"); - return false; - } - case ARM64::ADDSWri: - case ARM64::ADDSXri: - case ARM64::ADDWri: - case ARM64::ADDXri: - case ARM64::SUBSWri: - case ARM64::SUBSXri: - case ARM64::SUBWri: - case ARM64::SUBXri: { - if (!Inst.getOperand(3).isImm()) - return Error(Loc[3], "immediate value expected"); - int64_t shifter = Inst.getOperand(3).getImm(); - if (shifter != 0 && shifter != 12) - return Error(Loc[3], "shift value out of range"); - // The imm12 operand can be an expression. Validate that it's legit. - // FIXME: We really, really want to allow arbitrary expressions here - // and resolve the value and validate the result at fixup time, but - // that's hard as we have long since lost any source information we - // need to generate good diagnostics by that point. - if (Inst.getOpcode() == ARM64::ADDXri && Inst.getOperand(2).isExpr()) { - const MCExpr *Expr = Inst.getOperand(2).getExpr(); - ARM64MCExpr::VariantKind ELFRefKind; - MCSymbolRefExpr::VariantKind DarwinRefKind; - const MCConstantExpr *Addend; - if (!classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) { - return Error(Loc[2], "invalid immediate expression"); - } - - if (DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF || - DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF || - ELFRefKind == ARM64MCExpr::VK_LO12 || - ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12 || - ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_TPREL_LO12 || - ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC || - ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12) { - // Note that we don't range-check the addend. It's adjusted - // modulo page size when converted, so there is no "out of range" - // condition when using @pageoff. Any validity checking for the value - // was done in the is*() predicate function. - return false; - } else if (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF) { - // @gotpageoff can only be used directly, not with an addend. - return Addend != 0; - } - - // Otherwise, we're not sure, so don't allow it for now. - return Error(Loc[2], "invalid immediate expression"); - } - - // If it's anything but an immediate, it's not legit. - if (!Inst.getOperand(2).isImm()) - return Error(Loc[2], "invalid immediate expression"); - int64_t imm = Inst.getOperand(2).getImm(); - if (imm > 4095 || imm < 0) - return Error(Loc[2], "immediate value out of range"); - return false; - } - case ARM64::LDRBpre: - case ARM64::LDRHpre: - case ARM64::LDRSBWpre: - case ARM64::LDRSBXpre: - case ARM64::LDRSHWpre: - case ARM64::LDRSHXpre: - case ARM64::LDRWpre: - case ARM64::LDRXpre: - case ARM64::LDRSpre: - case ARM64::LDRDpre: - case ARM64::LDRQpre: - case ARM64::STRBpre: - case ARM64::STRHpre: - case ARM64::STRWpre: - case ARM64::STRXpre: - case ARM64::STRSpre: - case ARM64::STRDpre: - case ARM64::STRQpre: - case ARM64::LDRBpost: - case ARM64::LDRHpost: - case ARM64::LDRSBWpost: - case ARM64::LDRSBXpost: - case ARM64::LDRSHWpost: - case ARM64::LDRSHXpost: - case ARM64::LDRWpost: - case ARM64::LDRXpost: - case ARM64::LDRSpost: - case ARM64::LDRDpost: - case ARM64::LDRQpost: - case ARM64::STRBpost: - case ARM64::STRHpost: - case ARM64::STRWpost: - case ARM64::STRXpost: - case ARM64::STRSpost: - case ARM64::STRDpost: - case ARM64::STRQpost: - case ARM64::LDTRXi: - case ARM64::LDTRWi: - case ARM64::LDTRHi: - case ARM64::LDTRBi: - case ARM64::LDTRSHWi: - case ARM64::LDTRSHXi: - case ARM64::LDTRSBWi: - case ARM64::LDTRSBXi: - case ARM64::LDTRSWi: - case ARM64::STTRWi: - case ARM64::STTRXi: - case ARM64::STTRHi: - case ARM64::STTRBi: - case ARM64::LDURWi: - case ARM64::LDURXi: - case ARM64::LDURSi: - case ARM64::LDURDi: - case ARM64::LDURQi: - case ARM64::LDURHi: - case ARM64::LDURBi: - case ARM64::LDURSHWi: - case ARM64::LDURSHXi: - case ARM64::LDURSBWi: - case ARM64::LDURSBXi: - case ARM64::LDURSWi: - case ARM64::PRFUMi: - case ARM64::STURWi: - case ARM64::STURXi: - case ARM64::STURSi: - case ARM64::STURDi: - case ARM64::STURQi: - case ARM64::STURHi: - case ARM64::STURBi: { - // FIXME: Should accept expressions and error in fixup evaluation - // if out of range. - if (!Inst.getOperand(2).isImm()) - return Error(Loc[1], "immediate value expected"); - int64_t offset = Inst.getOperand(2).getImm(); - if (offset > 255 || offset < -256) - return Error(Loc[1], "offset value out of range"); - return false; - } - case ARM64::LDRSro: - case ARM64::LDRWro: - case ARM64::LDRSWro: - case ARM64::STRWro: - case ARM64::STRSro: { - // FIXME: Should accept expressions and error in fixup evaluation - // if out of range. - if (!Inst.getOperand(3).isImm()) - return Error(Loc[1], "immediate value expected"); - int64_t shift = Inst.getOperand(3).getImm(); - ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift); - if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX && - type != ARM64_AM::SXTW && type != ARM64_AM::SXTX) - return Error(Loc[1], "shift type invalid"); - return false; - } - case ARM64::LDRDro: - case ARM64::LDRQro: - case ARM64::LDRXro: - case ARM64::PRFMro: - case ARM64::STRXro: - case ARM64::STRDro: - case ARM64::STRQro: { - // FIXME: Should accept expressions and error in fixup evaluation - // if out of range. - if (!Inst.getOperand(3).isImm()) - return Error(Loc[1], "immediate value expected"); - int64_t shift = Inst.getOperand(3).getImm(); - ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift); - if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX && - type != ARM64_AM::SXTW && type != ARM64_AM::SXTX) - return Error(Loc[1], "shift type invalid"); - return false; - } - case ARM64::LDRHro: - case ARM64::LDRHHro: - case ARM64::LDRSHWro: - case ARM64::LDRSHXro: - case ARM64::STRHro: - case ARM64::STRHHro: { - // FIXME: Should accept expressions and error in fixup evaluation - // if out of range. - if (!Inst.getOperand(3).isImm()) - return Error(Loc[1], "immediate value expected"); - int64_t shift = Inst.getOperand(3).getImm(); - ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift); - if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX && - type != ARM64_AM::SXTW && type != ARM64_AM::SXTX) - return Error(Loc[1], "shift type invalid"); - return false; - } - case ARM64::LDRBro: - case ARM64::LDRBBro: - case ARM64::LDRSBWro: - case ARM64::LDRSBXro: - case ARM64::STRBro: - case ARM64::STRBBro: { - // FIXME: Should accept expressions and error in fixup evaluation - // if out of range. - if (!Inst.getOperand(3).isImm()) - return Error(Loc[1], "immediate value expected"); - int64_t shift = Inst.getOperand(3).getImm(); - ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift); - if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX && - type != ARM64_AM::SXTW && type != ARM64_AM::SXTX) - return Error(Loc[1], "shift type invalid"); - return false; - } - case ARM64::LDPWi: - case ARM64::LDPXi: - case ARM64::LDPSi: - case ARM64::LDPDi: - case ARM64::LDPQi: - case ARM64::LDPSWi: - case ARM64::STPWi: - case ARM64::STPXi: - case ARM64::STPSi: - case ARM64::STPDi: - case ARM64::STPQi: - case ARM64::LDPWpre: - case ARM64::LDPXpre: - case ARM64::LDPSpre: - case ARM64::LDPDpre: - case ARM64::LDPQpre: - case ARM64::LDPSWpre: - case ARM64::STPWpre: - case ARM64::STPXpre: - case ARM64::STPSpre: - case ARM64::STPDpre: - case ARM64::STPQpre: - case ARM64::LDPWpost: - case ARM64::LDPXpost: - case ARM64::LDPSpost: - case ARM64::LDPDpost: - case ARM64::LDPQpost: - case ARM64::LDPSWpost: - case ARM64::STPWpost: - case ARM64::STPXpost: - case ARM64::STPSpost: - case ARM64::STPDpost: - case ARM64::STPQpost: - case ARM64::LDNPWi: - case ARM64::LDNPXi: - case ARM64::LDNPSi: - case ARM64::LDNPDi: - case ARM64::LDNPQi: - case ARM64::STNPWi: - case ARM64::STNPXi: - case ARM64::STNPSi: - case ARM64::STNPDi: - case ARM64::STNPQi: { - // FIXME: Should accept expressions and error in fixup evaluation - // if out of range. - if (!Inst.getOperand(3).isImm()) - return Error(Loc[2], "immediate value expected"); - int64_t offset = Inst.getOperand(3).getImm(); - if (offset > 63 || offset < -64) - return Error(Loc[2], "offset value out of range"); - return false; - } - default: - return false; - } -} - -static void rewriteMOV(ARM64AsmParser::OperandVector &Operands, - StringRef mnemonic, uint64_t imm, unsigned shift, - MCContext &Context) { - ARM64Operand *Op = static_cast(Operands[0]); - ARM64Operand *Op2 = static_cast(Operands[2]); - Operands[0] = - ARM64Operand::CreateToken(mnemonic, false, Op->getStartLoc(), Context); - - const MCExpr *NewImm = MCConstantExpr::Create(imm >> shift, Context); - Operands[2] = ARM64Operand::CreateImm(NewImm, Op2->getStartLoc(), - Op2->getEndLoc(), Context); - - Operands.push_back(ARM64Operand::CreateShifter( - ARM64_AM::LSL, shift, Op2->getStartLoc(), Op2->getEndLoc(), Context)); - delete Op2; - delete Op; -} - -bool ARM64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) { - switch (ErrCode) { - case Match_MissingFeature: - return Error(Loc, - "instruction requires a CPU feature not currently enabled"); - case Match_InvalidOperand: - return Error(Loc, "invalid operand for instruction"); - case Match_InvalidSuffix: - return Error(Loc, "invalid type suffix for instruction"); - case Match_InvalidMemoryIndexedSImm9: - return Error(Loc, "index must be an integer in range [-256,255]."); - case Match_InvalidMemoryIndexed32SImm7: - return Error(Loc, "index must be a multiple of 4 in range [-256,252]."); - case Match_InvalidMemoryIndexed64SImm7: - return Error(Loc, "index must be a multiple of 8 in range [-512,504]."); - case Match_InvalidMemoryIndexed128SImm7: - return Error(Loc, "index must be a multiple of 16 in range [-1024,1008]."); - case Match_InvalidMemoryIndexed8: - return Error(Loc, "index must be an integer in range [0,4095]."); - case Match_InvalidMemoryIndexed16: - return Error(Loc, "index must be a multiple of 2 in range [0,8190]."); - case Match_InvalidMemoryIndexed32: - return Error(Loc, "index must be a multiple of 4 in range [0,16380]."); - case Match_InvalidMemoryIndexed64: - return Error(Loc, "index must be a multiple of 8 in range [0,32760]."); - case Match_InvalidMemoryIndexed128: - return Error(Loc, "index must be a multiple of 16 in range [0,65520]."); - case Match_InvalidImm1_8: - return Error(Loc, "immediate must be an integer in range [1,8]."); - case Match_InvalidImm1_16: - return Error(Loc, "immediate must be an integer in range [1,16]."); - case Match_InvalidImm1_32: - return Error(Loc, "immediate must be an integer in range [1,32]."); - case Match_InvalidImm1_64: - return Error(Loc, "immediate must be an integer in range [1,64]."); - case Match_MnemonicFail: - return Error(Loc, "unrecognized instruction mnemonic"); - default: - assert(0 && "unexpected error code!"); - return Error(Loc, "invalid instruction format"); - } -} - -bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - OperandVector &Operands, - MCStreamer &Out, - unsigned &ErrorInfo, - bool MatchingInlineAsm) { - assert(!Operands.empty() && "Unexpect empty operand list!"); - ARM64Operand *Op = static_cast(Operands[0]); - assert(Op->isToken() && "Leading operand should always be a mnemonic!"); - - StringRef Tok = Op->getToken(); - // Translate CMN/CMP pseudos to ADDS/SUBS with zero register destination. - // This needs to be done before the special handling of ADD/SUB immediates. - if (Tok == "cmp" || Tok == "cmn") { - // Replace the opcode with either ADDS or SUBS. - const char *Repl = StringSwitch(Tok) - .Case("cmp", "subs") - .Case("cmn", "adds") - .Default(0); - assert(Repl && "Unknown compare instruction"); - delete Operands[0]; - Operands[0] = ARM64Operand::CreateToken(Repl, false, IDLoc, getContext()); - - // Insert WZR or XZR as destination operand. - ARM64Operand *RegOp = static_cast(Operands[1]); - unsigned ZeroReg; - if (RegOp->isReg() && - (isGPR32Register(RegOp->getReg()) || RegOp->getReg() == ARM64::WZR)) - ZeroReg = ARM64::WZR; - else - ZeroReg = ARM64::XZR; - Operands.insert( - Operands.begin() + 1, - ARM64Operand::CreateReg(ZeroReg, false, IDLoc, IDLoc, getContext())); - // Update since we modified it above. - ARM64Operand *Op = static_cast(Operands[0]); - Tok = Op->getToken(); - } - - unsigned NumOperands = Operands.size(); - - if (Tok == "mov" && NumOperands == 3) { - // The MOV mnemomic is aliased to movn/movz, depending on the value of - // the immediate being instantiated. - // FIXME: Catching this here is a total hack, and we should use tblgen - // support to implement this instead as soon as it is available. - - ARM64Operand *Op2 = static_cast(Operands[2]); - if (Op2->isImm()) { - if (const MCConstantExpr *CE = dyn_cast(Op2->getImm())) { - uint64_t Val = CE->getValue(); - uint64_t NVal = ~Val; - - // If this is a 32-bit register and the value has none of the upper - // set, clear the complemented upper 32-bits so the logic below works - // for 32-bit registers too. - ARM64Operand *Op1 = static_cast(Operands[1]); - if (Op1->isReg() && isGPR32Register(Op1->getReg()) && - (Val & 0xFFFFFFFFULL) == Val) - NVal &= 0x00000000FFFFFFFFULL; - - // MOVK Rd, imm << 0 - if ((Val & 0xFFFF) == Val) - rewriteMOV(Operands, "movz", Val, 0, getContext()); - - // MOVK Rd, imm << 16 - else if ((Val & 0xFFFF0000ULL) == Val) - rewriteMOV(Operands, "movz", Val, 16, getContext()); - - // MOVK Rd, imm << 32 - else if ((Val & 0xFFFF00000000ULL) == Val) - rewriteMOV(Operands, "movz", Val, 32, getContext()); - - // MOVK Rd, imm << 48 - else if ((Val & 0xFFFF000000000000ULL) == Val) - rewriteMOV(Operands, "movz", Val, 48, getContext()); - - // MOVN Rd, (~imm << 0) - else if ((NVal & 0xFFFFULL) == NVal) - rewriteMOV(Operands, "movn", NVal, 0, getContext()); - - // MOVN Rd, ~(imm << 16) - else if ((NVal & 0xFFFF0000ULL) == NVal) - rewriteMOV(Operands, "movn", NVal, 16, getContext()); - - // MOVN Rd, ~(imm << 32) - else if ((NVal & 0xFFFF00000000ULL) == NVal) - rewriteMOV(Operands, "movn", NVal, 32, getContext()); - - // MOVN Rd, ~(imm << 48) - else if ((NVal & 0xFFFF000000000000ULL) == NVal) - rewriteMOV(Operands, "movn", NVal, 48, getContext()); - } - } - } else if (NumOperands == 4) { - if (Tok == "add" || Tok == "adds" || Tok == "sub" || Tok == "subs") { - // Handle the uimm24 immediate form, where the shift is not specified. - ARM64Operand *Op3 = static_cast(Operands[3]); - if (Op3->isImm()) { - if (const MCConstantExpr *CE = - dyn_cast(Op3->getImm())) { - uint64_t Val = CE->getValue(); - if (Val >= (1 << 24)) { - Error(IDLoc, "immediate value is too large"); - return true; - } - if (Val < (1 << 12)) { - Operands.push_back(ARM64Operand::CreateShifter( - ARM64_AM::LSL, 0, IDLoc, IDLoc, getContext())); - } else if ((Val & 0xfff) == 0) { - delete Operands[3]; - CE = MCConstantExpr::Create(Val >> 12, getContext()); - Operands[3] = - ARM64Operand::CreateImm(CE, IDLoc, IDLoc, getContext()); - Operands.push_back(ARM64Operand::CreateShifter( - ARM64_AM::LSL, 12, IDLoc, IDLoc, getContext())); - } else { - Error(IDLoc, "immediate value is too large"); - return true; - } - } else { - Operands.push_back(ARM64Operand::CreateShifter( - ARM64_AM::LSL, 0, IDLoc, IDLoc, getContext())); - } - } - - // FIXME: Horible hack to handle the LSL -> UBFM alias. - } else if (NumOperands == 4 && Tok == "lsl") { - ARM64Operand *Op2 = static_cast(Operands[2]); - ARM64Operand *Op3 = static_cast(Operands[3]); - if (Op2->isReg() && Op3->isImm()) { - const MCConstantExpr *Op3CE = dyn_cast(Op3->getImm()); - if (Op3CE) { - uint64_t Op3Val = Op3CE->getValue(); - uint64_t NewOp3Val = 0; - uint64_t NewOp4Val = 0; - if (isGPR32Register(Op2->getReg()) || Op2->getReg() == ARM64::WZR) { - NewOp3Val = (32 - Op3Val) & 0x1f; - NewOp4Val = 31 - Op3Val; - } else { - NewOp3Val = (64 - Op3Val) & 0x3f; - NewOp4Val = 63 - Op3Val; - } - - const MCExpr *NewOp3 = - MCConstantExpr::Create(NewOp3Val, getContext()); - const MCExpr *NewOp4 = - MCConstantExpr::Create(NewOp4Val, getContext()); - - Operands[0] = ARM64Operand::CreateToken( - "ubfm", false, Op->getStartLoc(), getContext()); - Operands[3] = ARM64Operand::CreateImm(NewOp3, Op3->getStartLoc(), - Op3->getEndLoc(), getContext()); - Operands.push_back(ARM64Operand::CreateImm( - NewOp4, Op3->getStartLoc(), Op3->getEndLoc(), getContext())); - delete Op3; - delete Op; - } - } - - // FIXME: Horrible hack to handle the optional LSL shift for vector - // instructions. - } else if (NumOperands == 4 && (Tok == "bic" || Tok == "orr")) { - ARM64Operand *Op1 = static_cast(Operands[1]); - ARM64Operand *Op2 = static_cast(Operands[2]); - ARM64Operand *Op3 = static_cast(Operands[3]); - if ((Op1->isToken() && Op2->isVectorReg() && Op3->isImm()) || - (Op1->isVectorReg() && Op2->isToken() && Op3->isImm())) - Operands.push_back(ARM64Operand::CreateShifter(ARM64_AM::LSL, 0, IDLoc, - IDLoc, getContext())); - } else if (NumOperands == 4 && (Tok == "movi" || Tok == "mvni")) { - ARM64Operand *Op1 = static_cast(Operands[1]); - ARM64Operand *Op2 = static_cast(Operands[2]); - ARM64Operand *Op3 = static_cast(Operands[3]); - if ((Op1->isToken() && Op2->isVectorReg() && Op3->isImm()) || - (Op1->isVectorReg() && Op2->isToken() && Op3->isImm())) { - StringRef Suffix = Op1->isToken() ? Op1->getToken() : Op2->getToken(); - // Canonicalize on lower-case for ease of comparison. - std::string CanonicalSuffix = Suffix.lower(); - if (Tok != "movi" || - (CanonicalSuffix != ".1d" && CanonicalSuffix != ".2d" && - CanonicalSuffix != ".8b" && CanonicalSuffix != ".16b")) - Operands.push_back(ARM64Operand::CreateShifter( - ARM64_AM::LSL, 0, IDLoc, IDLoc, getContext())); - } - } - } else if (NumOperands == 5) { - // FIXME: Horrible hack to handle the BFI -> BFM, SBFIZ->SBFM, and - // UBFIZ -> UBFM aliases. - if (Tok == "bfi" || Tok == "sbfiz" || Tok == "ubfiz") { - ARM64Operand *Op1 = static_cast(Operands[1]); - ARM64Operand *Op3 = static_cast(Operands[3]); - ARM64Operand *Op4 = static_cast(Operands[4]); - - if (Op1->isReg() && Op3->isImm() && Op4->isImm()) { - const MCConstantExpr *Op3CE = dyn_cast(Op3->getImm()); - const MCConstantExpr *Op4CE = dyn_cast(Op4->getImm()); - - if (Op3CE && Op4CE) { - uint64_t Op3Val = Op3CE->getValue(); - uint64_t Op4Val = Op4CE->getValue(); - - uint64_t NewOp3Val = 0; - if (isGPR32Register(Op1->getReg())) - NewOp3Val = (32 - Op3Val) & 0x1f; - else - NewOp3Val = (64 - Op3Val) & 0x3f; - - uint64_t NewOp4Val = Op4Val - 1; - - const MCExpr *NewOp3 = - MCConstantExpr::Create(NewOp3Val, getContext()); - const MCExpr *NewOp4 = - MCConstantExpr::Create(NewOp4Val, getContext()); - Operands[3] = ARM64Operand::CreateImm(NewOp3, Op3->getStartLoc(), - Op3->getEndLoc(), getContext()); - Operands[4] = ARM64Operand::CreateImm(NewOp4, Op4->getStartLoc(), - Op4->getEndLoc(), getContext()); - if (Tok == "bfi") - Operands[0] = ARM64Operand::CreateToken( - "bfm", false, Op->getStartLoc(), getContext()); - else if (Tok == "sbfiz") - Operands[0] = ARM64Operand::CreateToken( - "sbfm", false, Op->getStartLoc(), getContext()); - else if (Tok == "ubfiz") - Operands[0] = ARM64Operand::CreateToken( - "ubfm", false, Op->getStartLoc(), getContext()); - else - llvm_unreachable("No valid mnemonic for alias?"); - - delete Op; - delete Op3; - delete Op4; - } - } - - // FIXME: Horrible hack to handle the BFXIL->BFM, SBFX->SBFM, and - // UBFX -> UBFM aliases. - } else if (NumOperands == 5 && - (Tok == "bfxil" || Tok == "sbfx" || Tok == "ubfx")) { - ARM64Operand *Op1 = static_cast(Operands[1]); - ARM64Operand *Op3 = static_cast(Operands[3]); - ARM64Operand *Op4 = static_cast(Operands[4]); - - if (Op1->isReg() && Op3->isImm() && Op4->isImm()) { - const MCConstantExpr *Op3CE = dyn_cast(Op3->getImm()); - const MCConstantExpr *Op4CE = dyn_cast(Op4->getImm()); - - if (Op3CE && Op4CE) { - uint64_t Op3Val = Op3CE->getValue(); - uint64_t Op4Val = Op4CE->getValue(); - uint64_t NewOp4Val = Op3Val + Op4Val - 1; - - if (NewOp4Val >= Op3Val) { - const MCExpr *NewOp4 = - MCConstantExpr::Create(NewOp4Val, getContext()); - Operands[4] = ARM64Operand::CreateImm( - NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext()); - if (Tok == "bfxil") - Operands[0] = ARM64Operand::CreateToken( - "bfm", false, Op->getStartLoc(), getContext()); - else if (Tok == "sbfx") - Operands[0] = ARM64Operand::CreateToken( - "sbfm", false, Op->getStartLoc(), getContext()); - else if (Tok == "ubfx") - Operands[0] = ARM64Operand::CreateToken( - "ubfm", false, Op->getStartLoc(), getContext()); - else - llvm_unreachable("No valid mnemonic for alias?"); - - delete Op; - delete Op4; - } - } - } - } - } - // FIXME: Horrible hack for tbz and tbnz with Wn register operand. - // InstAlias can't quite handle this since the reg classes aren't - // subclasses. - if (NumOperands == 4 && (Tok == "tbz" || Tok == "tbnz")) { - ARM64Operand *Op = static_cast(Operands[2]); - if (Op->isImm()) { - if (const MCConstantExpr *OpCE = dyn_cast(Op->getImm())) { - if (OpCE->getValue() < 32) { - // The source register can be Wn here, but the matcher expects a - // GPR64. Twiddle it here if necessary. - ARM64Operand *Op = static_cast(Operands[1]); - if (Op->isReg()) { - unsigned Reg = getXRegFromWReg(Op->getReg()); - Operands[1] = ARM64Operand::CreateReg( - Reg, false, Op->getStartLoc(), Op->getEndLoc(), getContext()); - delete Op; - } - } - } - } - } - // FIXME: Horrible hack for sxtw and uxtw with Wn src and Xd dst operands. - // InstAlias can't quite handle this since the reg classes aren't - // subclasses. - if (NumOperands == 3 && (Tok == "sxtw" || Tok == "uxtw")) { - // The source register can be Wn here, but the matcher expects a - // GPR64. Twiddle it here if necessary. - ARM64Operand *Op = static_cast(Operands[2]); - if (Op->isReg()) { - unsigned Reg = getXRegFromWReg(Op->getReg()); - Operands[2] = ARM64Operand::CreateReg(Reg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); - delete Op; - } - } - // FIXME: Likewise for [su]xt[bh] with a Xd dst operand - else if (NumOperands == 3 && - (Tok == "sxtb" || Tok == "uxtb" || Tok == "sxth" || Tok == "uxth")) { - ARM64Operand *Op = static_cast(Operands[1]); - if (Op->isReg() && isGPR64Reg(Op->getReg())) { - // The source register can be Wn here, but the matcher expects a - // GPR64. Twiddle it here if necessary. - ARM64Operand *Op = static_cast(Operands[2]); - if (Op->isReg()) { - unsigned Reg = getXRegFromWReg(Op->getReg()); - Operands[2] = ARM64Operand::CreateReg(Reg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); - delete Op; - } - } - } - - // Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR. - if (NumOperands == 3 && Tok == "fmov") { - ARM64Operand *RegOp = static_cast(Operands[1]); - ARM64Operand *ImmOp = static_cast(Operands[2]); - if (RegOp->isReg() && ImmOp->isFPImm() && - ImmOp->getFPImm() == (unsigned)-1) { - unsigned zreg = - isFPR32Register(RegOp->getReg()) ? ARM64::WZR : ARM64::XZR; - Operands[2] = ARM64Operand::CreateReg(zreg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); - delete ImmOp; - } - } - - // FIXME: Horrible hack to handle the literal .d[1] vector index on - // FMOV instructions. The index isn't an actual instruction operand - // but rather syntactic sugar. It really should be part of the mnemonic, - // not the operand, but whatever. - if ((NumOperands == 5) && Tok == "fmov") { - // If the last operand is a vectorindex of '1', then replace it with - // a '[' '1' ']' token sequence, which is what the matcher - // (annoyingly) expects for a literal vector index operand. - ARM64Operand *Op = static_cast(Operands[NumOperands - 1]); - if (Op->isVectorIndexD() && Op->getVectorIndex() == 1) { - SMLoc Loc = Op->getStartLoc(); - Operands.pop_back(); - Operands.push_back( - ARM64Operand::CreateToken("[", false, Loc, getContext())); - Operands.push_back( - ARM64Operand::CreateToken("1", false, Loc, getContext())); - Operands.push_back( - ARM64Operand::CreateToken("]", false, Loc, getContext())); - } else if (Op->isReg()) { - // Similarly, check the destination operand for the GPR->High-lane - // variant. - unsigned OpNo = NumOperands - 2; - ARM64Operand *Op = static_cast(Operands[OpNo]); - if (Op->isVectorIndexD() && Op->getVectorIndex() == 1) { - SMLoc Loc = Op->getStartLoc(); - Operands[OpNo] = - ARM64Operand::CreateToken("[", false, Loc, getContext()); - Operands.insert( - Operands.begin() + OpNo + 1, - ARM64Operand::CreateToken("1", false, Loc, getContext())); - Operands.insert( - Operands.begin() + OpNo + 2, - ARM64Operand::CreateToken("]", false, Loc, getContext())); - } - } - } - - MCInst Inst; - // First try to match against the secondary set of tables containing the - // short-form NEON instructions (e.g. "fadd.2s v0, v1, v2"). - unsigned MatchResult = - MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 1); - - // If that fails, try against the alternate table containing long-form NEON: - // "fadd v0.2s, v1.2s, v2.2s" - if (MatchResult != Match_Success) - MatchResult = - MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 0); - - switch (MatchResult) { - case Match_Success: { - // Perform range checking and other semantic validations - SmallVector OperandLocs; - NumOperands = Operands.size(); - for (unsigned i = 1; i < NumOperands; ++i) - OperandLocs.push_back(Operands[i]->getStartLoc()); - if (validateInstruction(Inst, OperandLocs)) - return true; - - Inst.setLoc(IDLoc); - Out.EmitInstruction(Inst, STI); - return false; - } - case Match_MissingFeature: - case Match_MnemonicFail: - return showMatchError(IDLoc, MatchResult); - case Match_InvalidOperand: { - SMLoc ErrorLoc = IDLoc; - if (ErrorInfo != ~0U) { - if (ErrorInfo >= Operands.size()) - return Error(IDLoc, "too few operands for instruction"); - - ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc(); - if (ErrorLoc == SMLoc()) - ErrorLoc = IDLoc; - } - // If the match failed on a suffix token operand, tweak the diagnostic - // accordingly. - if (((ARM64Operand *)Operands[ErrorInfo])->isToken() && - ((ARM64Operand *)Operands[ErrorInfo])->isTokenSuffix()) - MatchResult = Match_InvalidSuffix; - - return showMatchError(ErrorLoc, MatchResult); - } - case Match_InvalidMemoryIndexedSImm9: { - // If there is not a '!' after the memory operand that failed, we really - // want the diagnostic for the non-pre-indexed instruction variant instead. - // Be careful to check for the post-indexed variant as well, which also - // uses this match diagnostic. Also exclude the explicitly unscaled - // mnemonics, as they want the unscaled diagnostic as well. - if (Operands.size() == ErrorInfo + 1 && - !((ARM64Operand *)Operands[ErrorInfo])->isImm() && - !Tok.startswith("stur") && !Tok.startswith("ldur")) { - // whether we want an Indexed64 or Indexed32 diagnostic depends on - // the register class of the previous operand. Default to 64 in case - // we see something unexpected. - MatchResult = Match_InvalidMemoryIndexed64; - if (ErrorInfo) { - ARM64Operand *PrevOp = (ARM64Operand *)Operands[ErrorInfo - 1]; - if (PrevOp->isReg() && ARM64MCRegisterClasses[ARM64::GPR32RegClassID] - .contains(PrevOp->getReg())) - MatchResult = Match_InvalidMemoryIndexed32; - } - } - SMLoc ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc(); - if (ErrorLoc == SMLoc()) - ErrorLoc = IDLoc; - return showMatchError(ErrorLoc, MatchResult); - } - case Match_InvalidMemoryIndexed32: - case Match_InvalidMemoryIndexed64: - case Match_InvalidMemoryIndexed128: - // If there is a '!' after the memory operand that failed, we really - // want the diagnostic for the pre-indexed instruction variant instead. - if (Operands.size() > ErrorInfo + 1 && - ((ARM64Operand *)Operands[ErrorInfo + 1])->isTokenEqual("!")) - MatchResult = Match_InvalidMemoryIndexedSImm9; - // FALL THROUGH - case Match_InvalidMemoryIndexed8: - case Match_InvalidMemoryIndexed16: - case Match_InvalidMemoryIndexed32SImm7: - case Match_InvalidMemoryIndexed64SImm7: - case Match_InvalidMemoryIndexed128SImm7: - case Match_InvalidImm1_8: - case Match_InvalidImm1_16: - case Match_InvalidImm1_32: - case Match_InvalidImm1_64: { - // Any time we get here, there's nothing fancy to do. Just get the - // operand SMLoc and display the diagnostic. - SMLoc ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc(); - // If it's a memory operand, the error is with the offset immediate, - // so get that location instead. - if (((ARM64Operand *)Operands[ErrorInfo])->isMem()) - ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getOffsetLoc(); - if (ErrorLoc == SMLoc()) - ErrorLoc = IDLoc; - return showMatchError(ErrorLoc, MatchResult); - } - } - - llvm_unreachable("Implement any new match types added!"); - return true; -} - -/// ParseDirective parses the arm specific directives -bool ARM64AsmParser::ParseDirective(AsmToken DirectiveID) { - StringRef IDVal = DirectiveID.getIdentifier(); - SMLoc Loc = DirectiveID.getLoc(); - if (IDVal == ".hword") - return parseDirectiveWord(2, Loc); - if (IDVal == ".word") - return parseDirectiveWord(4, Loc); - if (IDVal == ".xword") - return parseDirectiveWord(8, Loc); - if (IDVal == ".tlsdesccall") - return parseDirectiveTLSDescCall(Loc); - - return parseDirectiveLOH(IDVal, Loc); -} - -/// parseDirectiveWord -/// ::= .word [ expression (, expression)* ] -bool ARM64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) { - if (getLexer().isNot(AsmToken::EndOfStatement)) { - for (;;) { - const MCExpr *Value; - if (getParser().parseExpression(Value)) - return true; - - getParser().getStreamer().EmitValue(Value, Size); - - if (getLexer().is(AsmToken::EndOfStatement)) - break; - - // FIXME: Improve diagnostic. - if (getLexer().isNot(AsmToken::Comma)) - return Error(L, "unexpected token in directive"); - Parser.Lex(); - } - } - - Parser.Lex(); - return false; -} - -// parseDirectiveTLSDescCall: -// ::= .tlsdesccall symbol -bool ARM64AsmParser::parseDirectiveTLSDescCall(SMLoc L) { - StringRef Name; - if (getParser().parseIdentifier(Name)) - return Error(L, "expected symbol after directive"); - - MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); - const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext()); - Expr = ARM64MCExpr::Create(Expr, ARM64MCExpr::VK_TLSDESC, getContext()); - - MCInst Inst; - Inst.setOpcode(ARM64::TLSDESCCALL); - Inst.addOperand(MCOperand::CreateExpr(Expr)); - - getParser().getStreamer().EmitInstruction(Inst, STI); - return false; -} - -/// ::= .loh label1, ..., labelN -/// The number of arguments depends on the loh identifier. -bool ARM64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) { - if (IDVal != MCLOHDirectiveName()) - return true; - MCLOHType Kind; - if (getParser().getTok().isNot(AsmToken::Identifier)) { - if (getParser().getTok().isNot(AsmToken::Integer)) - return TokError("expected an identifier or a number in directive"); - // We successfully get a numeric value for the identifier. - // Check if it is valid. - int64_t Id = getParser().getTok().getIntVal(); - Kind = (MCLOHType)Id; - // Check that Id does not overflow MCLOHType. - if (!isValidMCLOHType(Kind) || Id != Kind) - return TokError("invalid numeric identifier in directive"); - } else { - StringRef Name = getTok().getIdentifier(); - // We successfully parse an identifier. - // Check if it is a recognized one. - int Id = MCLOHNameToId(Name); - - if (Id == -1) - return TokError("invalid identifier in directive"); - Kind = (MCLOHType)Id; - } - // Consume the identifier. - Lex(); - // Get the number of arguments of this LOH. - int NbArgs = MCLOHIdToNbArgs(Kind); - - assert(NbArgs != -1 && "Invalid number of arguments"); - - SmallVector Args; - for (int Idx = 0; Idx < NbArgs; ++Idx) { - StringRef Name; - if (getParser().parseIdentifier(Name)) - return TokError("expected identifier in directive"); - Args.push_back(getContext().GetOrCreateSymbol(Name)); - - if (Idx + 1 == NbArgs) - break; - if (getLexer().isNot(AsmToken::Comma)) - return TokError("unexpected token in '" + Twine(IDVal) + "' directive"); - Lex(); - } - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '" + Twine(IDVal) + "' directive"); - - getStreamer().EmitLOHDirective((MCLOHType)Kind, Args); - return false; -} - -bool -ARM64AsmParser::classifySymbolRef(const MCExpr *Expr, - ARM64MCExpr::VariantKind &ELFRefKind, - MCSymbolRefExpr::VariantKind &DarwinRefKind, - const MCConstantExpr *&Addend) { - ELFRefKind = ARM64MCExpr::VK_INVALID; - DarwinRefKind = MCSymbolRefExpr::VK_None; - - if (const ARM64MCExpr *AE = dyn_cast(Expr)) { - ELFRefKind = AE->getKind(); - Expr = AE->getSubExpr(); - } - - const MCSymbolRefExpr *SE = dyn_cast(Expr); - if (SE) { - // It's a simple symbol reference with no addend. - DarwinRefKind = SE->getKind(); - Addend = 0; - return true; - } - - const MCBinaryExpr *BE = dyn_cast(Expr); - if (!BE) - return false; - - SE = dyn_cast(BE->getLHS()); - if (!SE) - return false; - DarwinRefKind = SE->getKind(); - - if (BE->getOpcode() != MCBinaryExpr::Add) - return false; - - // See if the addend is is a constant, otherwise there's more going - // on here than we can deal with. - Addend = dyn_cast(BE->getRHS()); - if (!Addend) - return false; - - // It's some symbol reference + a constant addend, but really - // shouldn't use both Darwin and ELF syntax. - return ELFRefKind == ARM64MCExpr::VK_INVALID || - DarwinRefKind == MCSymbolRefExpr::VK_None; -} - -/// Force static initialization. -extern "C" void LLVMInitializeARM64AsmParser() { - RegisterMCAsmParser X(TheARM64Target); -} - -#define GET_REGISTER_MATCHER -#define GET_MATCHER_IMPLEMENTATION -#include "ARM64GenAsmMatcher.inc" - -// Define this matcher function after the auto-generated include so we -// have the match class enum definitions. -unsigned ARM64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, - unsigned Kind) { - ARM64Operand *Op = static_cast(AsmOp); - // If the kind is a token for a literal immediate, check if our asm - // operand matches. This is for InstAliases which have a fixed-value - // immediate in the syntax. - int64_t ExpectedVal; - switch (Kind) { - default: - return Match_InvalidOperand; - case MCK__35_0: - ExpectedVal = 0; - break; - case MCK__35_1: - ExpectedVal = 1; - break; - case MCK__35_12: - ExpectedVal = 12; - break; - case MCK__35_16: - ExpectedVal = 16; - break; - case MCK__35_2: - ExpectedVal = 2; - break; - case MCK__35_24: - ExpectedVal = 24; - break; - case MCK__35_3: - ExpectedVal = 3; - break; - case MCK__35_32: - ExpectedVal = 32; - break; - case MCK__35_4: - ExpectedVal = 4; - break; - case MCK__35_48: - ExpectedVal = 48; - break; - case MCK__35_6: - ExpectedVal = 6; - break; - case MCK__35_64: - ExpectedVal = 64; - break; - case MCK__35_8: - ExpectedVal = 8; - break; - } - if (!Op->isImm()) - return Match_InvalidOperand; - const MCConstantExpr *CE = dyn_cast(Op->getImm()); - if (!CE) - return Match_InvalidOperand; - if (CE->getValue() == ExpectedVal) - return Match_Success; - return Match_InvalidOperand; -} diff --git a/lib/Target/ARM64/AsmParser/CMakeLists.txt b/lib/Target/ARM64/AsmParser/CMakeLists.txt deleted file mode 100644 index 826158b..0000000 --- a/lib/Target/ARM64/AsmParser/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMARM64AsmParser - ARM64AsmParser.cpp - ) - diff --git a/lib/Target/ARM64/AsmParser/LLVMBuild.txt b/lib/Target/ARM64/AsmParser/LLVMBuild.txt deleted file mode 100644 index 2c8fafe..0000000 --- a/lib/Target/ARM64/AsmParser/LLVMBuild.txt +++ /dev/null @@ -1,24 +0,0 @@ -;===- ./lib/Target/ARM64/AsmParser/LLVMBuild.txt ---------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = ARM64AsmParser -parent = ARM64 -required_libraries = ARM64Desc ARM64Info MC MCParser Support -add_to_library_groups = ARM64 - diff --git a/lib/Target/ARM64/AsmParser/Makefile b/lib/Target/ARM64/AsmParser/Makefile deleted file mode 100644 index d25c47f..0000000 --- a/lib/Target/ARM64/AsmParser/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMARM64AsmParser - -# Hack: we need to include 'main' ARM target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM64/CMakeLists.txt b/lib/Target/ARM64/CMakeLists.txt deleted file mode 100644 index 6de861c..0000000 --- a/lib/Target/ARM64/CMakeLists.txt +++ /dev/null @@ -1,50 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS ARM64.td) - -tablegen(LLVM ARM64GenRegisterInfo.inc -gen-register-info) -tablegen(LLVM ARM64GenInstrInfo.inc -gen-instr-info) -tablegen(LLVM ARM64GenMCCodeEmitter.inc -gen-emitter -mc-emitter) -tablegen(LLVM ARM64GenMCPseudoLowering.inc -gen-pseudo-lowering) -tablegen(LLVM ARM64GenAsmWriter.inc -gen-asm-writer) -tablegen(LLVM ARM64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) -tablegen(LLVM ARM64GenAsmMatcher.inc -gen-asm-matcher) -tablegen(LLVM ARM64GenDAGISel.inc -gen-dag-isel) -tablegen(LLVM ARM64GenFastISel.inc -gen-fast-isel) -tablegen(LLVM ARM64GenCallingConv.inc -gen-callingconv) -tablegen(LLVM ARM64GenSubtargetInfo.inc -gen-subtarget) -tablegen(LLVM ARM64GenDisassemblerTables.inc -gen-disassembler) -add_public_tablegen_target(ARM64CommonTableGen) - -add_llvm_target(ARM64CodeGen - ARM64AddressTypePromotion.cpp - ARM64AdvSIMDScalarPass.cpp - ARM64AsmPrinter.cpp - ARM64BranchRelaxation.cpp - ARM64CleanupLocalDynamicTLSPass.cpp - ARM64CollectLOH.cpp - ARM64ConditionalCompares.cpp - ARM64DeadRegisterDefinitionsPass.cpp - ARM64ExpandPseudoInsts.cpp - ARM64FastISel.cpp - ARM64FrameLowering.cpp - ARM64ISelDAGToDAG.cpp - ARM64ISelLowering.cpp - ARM64InstrInfo.cpp - ARM64LoadStoreOptimizer.cpp - ARM64MCInstLower.cpp - ARM64PromoteConstant.cpp - ARM64RegisterInfo.cpp - ARM64SelectionDAGInfo.cpp - ARM64StorePairSuppress.cpp - ARM64Subtarget.cpp - ARM64TargetMachine.cpp - ARM64TargetObjectFile.cpp - ARM64TargetTransformInfo.cpp -) - -add_dependencies(LLVMARM64CodeGen intrinsics_gen) - -add_subdirectory(TargetInfo) -add_subdirectory(AsmParser) -add_subdirectory(Disassembler) -add_subdirectory(InstPrinter) -add_subdirectory(MCTargetDesc) diff --git a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp b/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp deleted file mode 100644 index 44c501f..0000000 --- a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp +++ /dev/null @@ -1,2142 +0,0 @@ -//===- ARM64Disassembler.cpp - Disassembler for ARM64 -----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "arm64-disassembler" - -#include "ARM64Disassembler.h" -#include "ARM64Subtarget.h" -#include "MCTargetDesc/ARM64BaseInfo.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCFixedLenDisassembler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/MemoryObject.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" - -// Pull DecodeStatus and its enum values into the global namespace. -typedef llvm::MCDisassembler::DecodeStatus DecodeStatus; - -// Forward declare these because the autogenerated code will reference them. -// Definitions are further down. -static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeFPR128_loRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPR64spRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeGPR32spRegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeQQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeQQQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeDDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeDDDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, - const void *Decoder); - -static DecodeStatus DecodeFixedPointScaleImm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeCondBranchTarget(llvm::MCInst &Inst, unsigned Imm, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeSystemRegister(llvm::MCInst &Inst, unsigned Imm, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst, - uint32_t insn, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeRegOffsetLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn, - uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeSystemCPSRInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Address, - const void *Decoder); -static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn, - uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSIMDLdStPost(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeSIMDLdStSingle(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeSIMDLdStSingleTied(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, - const void *Decoder); - -static DecodeStatus DecodeVecShiftR64Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeVecShiftR64ImmNarrow(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, - const void *Decoder); -static DecodeStatus DecodeVecShiftR32Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeVecShiftR32ImmNarrow(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, - const void *Decoder); -static DecodeStatus DecodeVecShiftR16Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeVecShiftR16ImmNarrow(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, - const void *Decoder); -static DecodeStatus DecodeVecShiftR8Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeVecShiftL64Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeVecShiftL32Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); -static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder); - -#include "ARM64GenDisassemblerTables.inc" -#include "ARM64GenInstrInfo.inc" - -using namespace llvm; - -#define Success llvm::MCDisassembler::Success -#define Fail llvm::MCDisassembler::Fail - -static MCDisassembler *createARM64Disassembler(const Target &T, - const MCSubtargetInfo &STI) { - return new ARM64Disassembler(STI); -} - -DecodeStatus ARM64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, - const MemoryObject &Region, - uint64_t Address, - raw_ostream &os, - raw_ostream &cs) const { - CommentStream = &cs; - - uint8_t bytes[4]; - - Size = 0; - // We want to read exactly 4 bytes of data. - if (Region.readBytes(Address, 4, (uint8_t *)bytes) == -1) - return Fail; - Size = 4; - - // Encoded as a small-endian 32-bit word in the stream. - uint32_t insn = - (bytes[3] << 24) | (bytes[2] << 16) | (bytes[1] << 8) | (bytes[0] << 0); - - // Calling the auto-generated decoder function. - DecodeStatus result = - decodeInstruction(DecoderTable32, MI, insn, Address, this, STI); - if (!result) - return Fail; - - return Success; -} - -static MCSymbolRefExpr::VariantKind -getVariant(uint64_t LLVMDisassembler_VariantKind) { - switch (LLVMDisassembler_VariantKind) { - case LLVMDisassembler_VariantKind_None: - return MCSymbolRefExpr::VK_None; - case LLVMDisassembler_VariantKind_ARM64_PAGE: - return MCSymbolRefExpr::VK_PAGE; - case LLVMDisassembler_VariantKind_ARM64_PAGEOFF: - return MCSymbolRefExpr::VK_PAGEOFF; - case LLVMDisassembler_VariantKind_ARM64_GOTPAGE: - return MCSymbolRefExpr::VK_GOTPAGE; - case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF: - return MCSymbolRefExpr::VK_GOTPAGEOFF; - case LLVMDisassembler_VariantKind_ARM64_TLVP: - case LLVMDisassembler_VariantKind_ARM64_TLVOFF: - default: - assert(0 && "bad LLVMDisassembler_VariantKind"); - return MCSymbolRefExpr::VK_None; - } -} - -/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic -/// operand in place of the immediate Value in the MCInst. The immediate -/// Value has not had any PC adjustment made by the caller. If the instruction -/// is a branch that adds the PC to the immediate Value then isBranch is -/// Success, else Fail. If the getOpInfo() function was set as part of the -/// setupForSymbolicDisassembly() call then that function is called to get any -/// symbolic information at the Address for this instrution. If that returns -/// non-zero then the symbolic information it returns is used to create an -/// MCExpr and that is added as an operand to the MCInst. If getOpInfo() -/// returns zero and isBranch is Success then a symbol look up for -/// Address + Value is done and if a symbol is found an MCExpr is created with -/// that, else an MCExpr with Address + Value is created. If getOpInfo() -/// returns zero and isBranch is Fail then the the Opcode of the MCInst is -/// tested and for ADRP an other instructions that help to load of pointers -/// a symbol look up is done to see it is returns a specific reference type -/// to add to the comment stream. This function returns Success if it adds -/// an operand to the MCInst and Fail otherwise. -bool ARM64Disassembler::tryAddingSymbolicOperand(uint64_t Address, int Value, - bool isBranch, - uint64_t InstSize, MCInst &MI, - uint32_t insn) const { - LLVMOpInfoCallback getOpInfo = getLLVMOpInfoCallback(); - - struct LLVMOpInfo1 SymbolicOp; - memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); - SymbolicOp.Value = Value; - void *DisInfo = getDisInfoBlock(); - uint64_t ReferenceType; - const char *ReferenceName; - const char *Name; - LLVMSymbolLookupCallback SymbolLookUp = getLLVMSymbolLookupCallback(); - if (!getOpInfo || - !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { - if (isBranch) { - if (SymbolLookUp) { - ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; - Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, - &ReferenceName); - if (Name) { - SymbolicOp.AddSymbol.Name = Name; - SymbolicOp.AddSymbol.Present = Success; - SymbolicOp.Value = 0; - } else { - SymbolicOp.Value = Address + Value; - } - if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) - (*CommentStream) << "symbol stub for: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Message) - (*CommentStream) << "Objc message: " << ReferenceName; - } else { - return false; - } - } else if (MI.getOpcode() == ARM64::ADRP) { - if (SymbolLookUp) { - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP; - Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address, - &ReferenceName); - (*CommentStream) << format("0x%llx", - 0xfffffffffffff000LL & (Address + Value)); - } else { - return false; - } - } else if (MI.getOpcode() == ARM64::ADDXri || - MI.getOpcode() == ARM64::LDRXui || - MI.getOpcode() == ARM64::LDRXl || MI.getOpcode() == ARM64::ADR) { - if (SymbolLookUp) { - if (MI.getOpcode() == ARM64::ADDXri) - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri; - else if (MI.getOpcode() == ARM64::LDRXui) - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui; - if (MI.getOpcode() == ARM64::LDRXl) { - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl; - Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, - &ReferenceName); - } else if (MI.getOpcode() == ARM64::ADR) { - ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR; - Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address, - &ReferenceName); - } else { - Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address, - &ReferenceName); - } - if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) - (*CommentStream) << "literal pool symbol address: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) - (*CommentStream) << "literal pool for: \"" << ReferenceName << "\""; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) - (*CommentStream) << "Objc cfstring ref: @\"" << ReferenceName << "\""; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Message) - (*CommentStream) << "Objc message: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) - (*CommentStream) << "Objc message ref: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) - (*CommentStream) << "Objc selector ref: " << ReferenceName; - else if (ReferenceType == - LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) - (*CommentStream) << "Objc class ref: " << ReferenceName; - // For these instructions, the SymbolLookUp() above is just to get the - // ReferenceType and ReferenceName. We want to make sure not to - // fall through so we don't build an MCExpr to leave the disassembly - // of the immediate values of these instructions to the InstPrinter. - return false; - } else { - return false; - } - } else { - return false; - } - } - - MCContext *Ctx = getMCContext(); - const MCExpr *Add = NULL; - if (SymbolicOp.AddSymbol.Present) { - if (SymbolicOp.AddSymbol.Name) { - StringRef Name(SymbolicOp.AddSymbol.Name); - MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); - MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind); - if (Variant != MCSymbolRefExpr::VK_None) - Add = MCSymbolRefExpr::Create(Sym, Variant, *Ctx); - else - Add = MCSymbolRefExpr::Create(Sym, *Ctx); - } else { - Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx); - } - } - - const MCExpr *Sub = NULL; - if (SymbolicOp.SubtractSymbol.Present) { - if (SymbolicOp.SubtractSymbol.Name) { - StringRef Name(SymbolicOp.SubtractSymbol.Name); - MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); - Sub = MCSymbolRefExpr::Create(Sym, *Ctx); - } else { - Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx); - } - } - - const MCExpr *Off = NULL; - if (SymbolicOp.Value != 0) - Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); - - const MCExpr *Expr; - if (Sub) { - const MCExpr *LHS; - if (Add) - LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); - else - LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); - if (Off != 0) - Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); - else - Expr = LHS; - } else if (Add) { - if (Off != 0) - Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); - else - Expr = Add; - } else { - if (Off != 0) - Expr = Off; - else - Expr = MCConstantExpr::Create(0, *Ctx); - } - - MI.addOperand(MCOperand::CreateExpr(Expr)); - - return true; -} - -extern "C" void LLVMInitializeARM64Disassembler() { - TargetRegistry::RegisterMCDisassembler(TheARM64Target, - createARM64Disassembler); -} - -static const unsigned FPR128DecoderTable[] = { - ARM64::Q0, ARM64::Q1, ARM64::Q2, ARM64::Q3, ARM64::Q4, ARM64::Q5, - ARM64::Q6, ARM64::Q7, ARM64::Q8, ARM64::Q9, ARM64::Q10, ARM64::Q11, - ARM64::Q12, ARM64::Q13, ARM64::Q14, ARM64::Q15, ARM64::Q16, ARM64::Q17, - ARM64::Q18, ARM64::Q19, ARM64::Q20, ARM64::Q21, ARM64::Q22, ARM64::Q23, - ARM64::Q24, ARM64::Q25, ARM64::Q26, ARM64::Q27, ARM64::Q28, ARM64::Q29, - ARM64::Q30, ARM64::Q31 -}; - -static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = FPR128DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 15) - return Fail; - return DecodeFPR128RegisterClass(Inst, RegNo, Addr, Decoder); -} - -static const unsigned FPR64DecoderTable[] = { - ARM64::D0, ARM64::D1, ARM64::D2, ARM64::D3, ARM64::D4, ARM64::D5, - ARM64::D6, ARM64::D7, ARM64::D8, ARM64::D9, ARM64::D10, ARM64::D11, - ARM64::D12, ARM64::D13, ARM64::D14, ARM64::D15, ARM64::D16, ARM64::D17, - ARM64::D18, ARM64::D19, ARM64::D20, ARM64::D21, ARM64::D22, ARM64::D23, - ARM64::D24, ARM64::D25, ARM64::D26, ARM64::D27, ARM64::D28, ARM64::D29, - ARM64::D30, ARM64::D31 -}; - -static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = FPR64DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned FPR32DecoderTable[] = { - ARM64::S0, ARM64::S1, ARM64::S2, ARM64::S3, ARM64::S4, ARM64::S5, - ARM64::S6, ARM64::S7, ARM64::S8, ARM64::S9, ARM64::S10, ARM64::S11, - ARM64::S12, ARM64::S13, ARM64::S14, ARM64::S15, ARM64::S16, ARM64::S17, - ARM64::S18, ARM64::S19, ARM64::S20, ARM64::S21, ARM64::S22, ARM64::S23, - ARM64::S24, ARM64::S25, ARM64::S26, ARM64::S27, ARM64::S28, ARM64::S29, - ARM64::S30, ARM64::S31 -}; - -static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = FPR32DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned FPR16DecoderTable[] = { - ARM64::H0, ARM64::H1, ARM64::H2, ARM64::H3, ARM64::H4, ARM64::H5, - ARM64::H6, ARM64::H7, ARM64::H8, ARM64::H9, ARM64::H10, ARM64::H11, - ARM64::H12, ARM64::H13, ARM64::H14, ARM64::H15, ARM64::H16, ARM64::H17, - ARM64::H18, ARM64::H19, ARM64::H20, ARM64::H21, ARM64::H22, ARM64::H23, - ARM64::H24, ARM64::H25, ARM64::H26, ARM64::H27, ARM64::H28, ARM64::H29, - ARM64::H30, ARM64::H31 -}; - -static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = FPR16DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned FPR8DecoderTable[] = { - ARM64::B0, ARM64::B1, ARM64::B2, ARM64::B3, ARM64::B4, ARM64::B5, - ARM64::B6, ARM64::B7, ARM64::B8, ARM64::B9, ARM64::B10, ARM64::B11, - ARM64::B12, ARM64::B13, ARM64::B14, ARM64::B15, ARM64::B16, ARM64::B17, - ARM64::B18, ARM64::B19, ARM64::B20, ARM64::B21, ARM64::B22, ARM64::B23, - ARM64::B24, ARM64::B25, ARM64::B26, ARM64::B27, ARM64::B28, ARM64::B29, - ARM64::B30, ARM64::B31 -}; - -static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = FPR8DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned GPR64DecoderTable[] = { - ARM64::X0, ARM64::X1, ARM64::X2, ARM64::X3, ARM64::X4, ARM64::X5, - ARM64::X6, ARM64::X7, ARM64::X8, ARM64::X9, ARM64::X10, ARM64::X11, - ARM64::X12, ARM64::X13, ARM64::X14, ARM64::X15, ARM64::X16, ARM64::X17, - ARM64::X18, ARM64::X19, ARM64::X20, ARM64::X21, ARM64::X22, ARM64::X23, - ARM64::X24, ARM64::X25, ARM64::X26, ARM64::X27, ARM64::X28, ARM64::FP, - ARM64::LR, ARM64::XZR -}; - -static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = GPR64DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - unsigned Register = GPR64DecoderTable[RegNo]; - if (Register == ARM64::XZR) - Register = ARM64::SP; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned GPR32DecoderTable[] = { - ARM64::W0, ARM64::W1, ARM64::W2, ARM64::W3, ARM64::W4, ARM64::W5, - ARM64::W6, ARM64::W7, ARM64::W8, ARM64::W9, ARM64::W10, ARM64::W11, - ARM64::W12, ARM64::W13, ARM64::W14, ARM64::W15, ARM64::W16, ARM64::W17, - ARM64::W18, ARM64::W19, ARM64::W20, ARM64::W21, ARM64::W22, ARM64::W23, - ARM64::W24, ARM64::W25, ARM64::W26, ARM64::W27, ARM64::W28, ARM64::W29, - ARM64::W30, ARM64::WZR -}; - -static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = GPR32DecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = GPR32DecoderTable[RegNo]; - if (Register == ARM64::WZR) - Register = ARM64::WSP; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned VectorDecoderTable[] = { - ARM64::Q0, ARM64::Q1, ARM64::Q2, ARM64::Q3, ARM64::Q4, ARM64::Q5, - ARM64::Q6, ARM64::Q7, ARM64::Q8, ARM64::Q9, ARM64::Q10, ARM64::Q11, - ARM64::Q12, ARM64::Q13, ARM64::Q14, ARM64::Q15, ARM64::Q16, ARM64::Q17, - ARM64::Q18, ARM64::Q19, ARM64::Q20, ARM64::Q21, ARM64::Q22, ARM64::Q23, - ARM64::Q24, ARM64::Q25, ARM64::Q26, ARM64::Q27, ARM64::Q28, ARM64::Q29, - ARM64::Q30, ARM64::Q31 -}; - -static DecodeStatus DecodeVectorRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - - unsigned Register = VectorDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned QQDecoderTable[] = { - ARM64::Q0_Q1, ARM64::Q1_Q2, ARM64::Q2_Q3, ARM64::Q3_Q4, - ARM64::Q4_Q5, ARM64::Q5_Q6, ARM64::Q6_Q7, ARM64::Q7_Q8, - ARM64::Q8_Q9, ARM64::Q9_Q10, ARM64::Q10_Q11, ARM64::Q11_Q12, - ARM64::Q12_Q13, ARM64::Q13_Q14, ARM64::Q14_Q15, ARM64::Q15_Q16, - ARM64::Q16_Q17, ARM64::Q17_Q18, ARM64::Q18_Q19, ARM64::Q19_Q20, - ARM64::Q20_Q21, ARM64::Q21_Q22, ARM64::Q22_Q23, ARM64::Q23_Q24, - ARM64::Q24_Q25, ARM64::Q25_Q26, ARM64::Q26_Q27, ARM64::Q27_Q28, - ARM64::Q28_Q29, ARM64::Q29_Q30, ARM64::Q30_Q31, ARM64::Q31_Q0 -}; - -static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, const void *Decoder) { - if (RegNo > 31) - return Fail; - unsigned Register = QQDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned QQQDecoderTable[] = { - ARM64::Q0_Q1_Q2, ARM64::Q1_Q2_Q3, ARM64::Q2_Q3_Q4, - ARM64::Q3_Q4_Q5, ARM64::Q4_Q5_Q6, ARM64::Q5_Q6_Q7, - ARM64::Q6_Q7_Q8, ARM64::Q7_Q8_Q9, ARM64::Q8_Q9_Q10, - ARM64::Q9_Q10_Q11, ARM64::Q10_Q11_Q12, ARM64::Q11_Q12_Q13, - ARM64::Q12_Q13_Q14, ARM64::Q13_Q14_Q15, ARM64::Q14_Q15_Q16, - ARM64::Q15_Q16_Q17, ARM64::Q16_Q17_Q18, ARM64::Q17_Q18_Q19, - ARM64::Q18_Q19_Q20, ARM64::Q19_Q20_Q21, ARM64::Q20_Q21_Q22, - ARM64::Q21_Q22_Q23, ARM64::Q22_Q23_Q24, ARM64::Q23_Q24_Q25, - ARM64::Q24_Q25_Q26, ARM64::Q25_Q26_Q27, ARM64::Q26_Q27_Q28, - ARM64::Q27_Q28_Q29, ARM64::Q28_Q29_Q30, ARM64::Q29_Q30_Q31, - ARM64::Q30_Q31_Q0, ARM64::Q31_Q0_Q1 -}; - -static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, const void *Decoder) { - if (RegNo > 31) - return Fail; - unsigned Register = QQQDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned QQQQDecoderTable[] = { - ARM64::Q0_Q1_Q2_Q3, ARM64::Q1_Q2_Q3_Q4, ARM64::Q2_Q3_Q4_Q5, - ARM64::Q3_Q4_Q5_Q6, ARM64::Q4_Q5_Q6_Q7, ARM64::Q5_Q6_Q7_Q8, - ARM64::Q6_Q7_Q8_Q9, ARM64::Q7_Q8_Q9_Q10, ARM64::Q8_Q9_Q10_Q11, - ARM64::Q9_Q10_Q11_Q12, ARM64::Q10_Q11_Q12_Q13, ARM64::Q11_Q12_Q13_Q14, - ARM64::Q12_Q13_Q14_Q15, ARM64::Q13_Q14_Q15_Q16, ARM64::Q14_Q15_Q16_Q17, - ARM64::Q15_Q16_Q17_Q18, ARM64::Q16_Q17_Q18_Q19, ARM64::Q17_Q18_Q19_Q20, - ARM64::Q18_Q19_Q20_Q21, ARM64::Q19_Q20_Q21_Q22, ARM64::Q20_Q21_Q22_Q23, - ARM64::Q21_Q22_Q23_Q24, ARM64::Q22_Q23_Q24_Q25, ARM64::Q23_Q24_Q25_Q26, - ARM64::Q24_Q25_Q26_Q27, ARM64::Q25_Q26_Q27_Q28, ARM64::Q26_Q27_Q28_Q29, - ARM64::Q27_Q28_Q29_Q30, ARM64::Q28_Q29_Q30_Q31, ARM64::Q29_Q30_Q31_Q0, - ARM64::Q30_Q31_Q0_Q1, ARM64::Q31_Q0_Q1_Q2 -}; - -static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - unsigned Register = QQQQDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned DDDecoderTable[] = { - ARM64::D0_D1, ARM64::D1_D2, ARM64::D2_D3, ARM64::D3_D4, - ARM64::D4_D5, ARM64::D5_D6, ARM64::D6_D7, ARM64::D7_D8, - ARM64::D8_D9, ARM64::D9_D10, ARM64::D10_D11, ARM64::D11_D12, - ARM64::D12_D13, ARM64::D13_D14, ARM64::D14_D15, ARM64::D15_D16, - ARM64::D16_D17, ARM64::D17_D18, ARM64::D18_D19, ARM64::D19_D20, - ARM64::D20_D21, ARM64::D21_D22, ARM64::D22_D23, ARM64::D23_D24, - ARM64::D24_D25, ARM64::D25_D26, ARM64::D26_D27, ARM64::D27_D28, - ARM64::D28_D29, ARM64::D29_D30, ARM64::D30_D31, ARM64::D31_D0 -}; - -static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, const void *Decoder) { - if (RegNo > 31) - return Fail; - unsigned Register = DDDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned DDDDecoderTable[] = { - ARM64::D0_D1_D2, ARM64::D1_D2_D3, ARM64::D2_D3_D4, - ARM64::D3_D4_D5, ARM64::D4_D5_D6, ARM64::D5_D6_D7, - ARM64::D6_D7_D8, ARM64::D7_D8_D9, ARM64::D8_D9_D10, - ARM64::D9_D10_D11, ARM64::D10_D11_D12, ARM64::D11_D12_D13, - ARM64::D12_D13_D14, ARM64::D13_D14_D15, ARM64::D14_D15_D16, - ARM64::D15_D16_D17, ARM64::D16_D17_D18, ARM64::D17_D18_D19, - ARM64::D18_D19_D20, ARM64::D19_D20_D21, ARM64::D20_D21_D22, - ARM64::D21_D22_D23, ARM64::D22_D23_D24, ARM64::D23_D24_D25, - ARM64::D24_D25_D26, ARM64::D25_D26_D27, ARM64::D26_D27_D28, - ARM64::D27_D28_D29, ARM64::D28_D29_D30, ARM64::D29_D30_D31, - ARM64::D30_D31_D0, ARM64::D31_D0_D1 -}; - -static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, const void *Decoder) { - if (RegNo > 31) - return Fail; - unsigned Register = DDDDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static const unsigned DDDDDecoderTable[] = { - ARM64::D0_D1_D2_D3, ARM64::D1_D2_D3_D4, ARM64::D2_D3_D4_D5, - ARM64::D3_D4_D5_D6, ARM64::D4_D5_D6_D7, ARM64::D5_D6_D7_D8, - ARM64::D6_D7_D8_D9, ARM64::D7_D8_D9_D10, ARM64::D8_D9_D10_D11, - ARM64::D9_D10_D11_D12, ARM64::D10_D11_D12_D13, ARM64::D11_D12_D13_D14, - ARM64::D12_D13_D14_D15, ARM64::D13_D14_D15_D16, ARM64::D14_D15_D16_D17, - ARM64::D15_D16_D17_D18, ARM64::D16_D17_D18_D19, ARM64::D17_D18_D19_D20, - ARM64::D18_D19_D20_D21, ARM64::D19_D20_D21_D22, ARM64::D20_D21_D22_D23, - ARM64::D21_D22_D23_D24, ARM64::D22_D23_D24_D25, ARM64::D23_D24_D25_D26, - ARM64::D24_D25_D26_D27, ARM64::D25_D26_D27_D28, ARM64::D26_D27_D28_D29, - ARM64::D27_D28_D29_D30, ARM64::D28_D29_D30_D31, ARM64::D29_D30_D31_D0, - ARM64::D30_D31_D0_D1, ARM64::D31_D0_D1_D2 -}; - -static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo, - uint64_t Addr, - const void *Decoder) { - if (RegNo > 31) - return Fail; - unsigned Register = DDDDDecoderTable[RegNo]; - Inst.addOperand(MCOperand::CreateReg(Register)); - return Success; -} - -static DecodeStatus DecodeFixedPointScaleImm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, - const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(64 - Imm)); - return Success; -} - -static DecodeStatus DecodeCondBranchTarget(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - int64_t ImmVal = Imm; - const ARM64Disassembler *Dis = - static_cast(Decoder); - - // Sign-extend 19-bit immediate. - if (ImmVal & (1 << (19 - 1))) - ImmVal |= ~((1LL << 19) - 1); - - if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal << 2, - Inst.getOpcode() != ARM64::LDRXl, 4, Inst)) - Inst.addOperand(MCOperand::CreateImm(ImmVal)); - return Success; -} - -static DecodeStatus DecodeSystemRegister(llvm::MCInst &Inst, unsigned Imm, - uint64_t Address, - const void *Decoder) { - Inst.addOperand(MCOperand::CreateImm(Imm | 0x8000)); - return Success; -} - -static DecodeStatus DecodeVecShiftRImm(llvm::MCInst &Inst, unsigned Imm, - unsigned Add) { - Inst.addOperand(MCOperand::CreateImm(Add - Imm)); - return Success; -} - -static DecodeStatus DecodeVecShiftLImm(llvm::MCInst &Inst, unsigned Imm, - unsigned Add) { - Inst.addOperand(MCOperand::CreateImm((Imm + Add) & (Add - 1))); - return Success; -} - -static DecodeStatus DecodeVecShiftR64Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - return DecodeVecShiftRImm(Inst, Imm, 64); -} - -static DecodeStatus DecodeVecShiftR64ImmNarrow(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, - const void *Decoder) { - return DecodeVecShiftRImm(Inst, Imm | 0x20, 64); -} - -static DecodeStatus DecodeVecShiftR32Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - return DecodeVecShiftRImm(Inst, Imm, 32); -} - -static DecodeStatus DecodeVecShiftR32ImmNarrow(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, - const void *Decoder) { - return DecodeVecShiftRImm(Inst, Imm | 0x10, 32); -} - -static DecodeStatus DecodeVecShiftR16Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - return DecodeVecShiftRImm(Inst, Imm, 16); -} - -static DecodeStatus DecodeVecShiftR16ImmNarrow(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, - const void *Decoder) { - return DecodeVecShiftRImm(Inst, Imm | 0x8, 16); -} - -static DecodeStatus DecodeVecShiftR8Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - return DecodeVecShiftRImm(Inst, Imm, 8); -} - -static DecodeStatus DecodeVecShiftL64Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - return DecodeVecShiftLImm(Inst, Imm, 64); -} - -static DecodeStatus DecodeVecShiftL32Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - return DecodeVecShiftLImm(Inst, Imm, 32); -} - -static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - return DecodeVecShiftLImm(Inst, Imm, 16); -} - -static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm, - uint64_t Addr, const void *Decoder) { - return DecodeVecShiftLImm(Inst, Imm, 8); -} - -static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - unsigned Rm = fieldFromInstruction(insn, 16, 5); - unsigned shiftHi = fieldFromInstruction(insn, 22, 2); - unsigned shiftLo = fieldFromInstruction(insn, 10, 6); - unsigned shift = (shiftHi << 6) | shiftLo; - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::ANDWrs: - case ARM64::ANDSWrs: - case ARM64::BICWrs: - case ARM64::BICSWrs: - case ARM64::ORRWrs: - case ARM64::ORNWrs: - case ARM64::EORWrs: - case ARM64::EONWrs: - case ARM64::ADDWrs: - case ARM64::ADDSWrs: - case ARM64::SUBWrs: - case ARM64::SUBSWrs: { - DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rn, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); - break; - } - case ARM64::ANDXrs: - case ARM64::ANDSXrs: - case ARM64::BICXrs: - case ARM64::BICSXrs: - case ARM64::ORRXrs: - case ARM64::ORNXrs: - case ARM64::EORXrs: - case ARM64::EONXrs: - case ARM64::ADDXrs: - case ARM64::ADDSXrs: - case ARM64::SUBXrs: - case ARM64::SUBSXrs: - DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder); - DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); - break; - } - - Inst.addOperand(MCOperand::CreateImm(shift)); - return Success; -} - -static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(insn, 0, 5); - unsigned imm = fieldFromInstruction(insn, 5, 16); - unsigned shift = fieldFromInstruction(insn, 21, 2); - shift <<= 4; - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::MOVZWi: - case ARM64::MOVNWi: - case ARM64::MOVKWi: - DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); - break; - case ARM64::MOVZXi: - case ARM64::MOVNXi: - case ARM64::MOVKXi: - DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); - break; - } - - if (Inst.getOpcode() == ARM64::MOVKWi || Inst.getOpcode() == ARM64::MOVKXi) - Inst.addOperand(Inst.getOperand(0)); - - Inst.addOperand(MCOperand::CreateImm(imm)); - Inst.addOperand(MCOperand::CreateImm(shift)); - return Success; -} - -static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - unsigned offset = fieldFromInstruction(insn, 10, 12); - const ARM64Disassembler *Dis = - static_cast(Decoder); - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::PRFMui: - // Rt is an immediate in prefetch. - Inst.addOperand(MCOperand::CreateImm(Rt)); - break; - case ARM64::STRBBui: - case ARM64::LDRBBui: - case ARM64::LDRSBWui: - case ARM64::STRHHui: - case ARM64::LDRHHui: - case ARM64::LDRSHWui: - case ARM64::STRWui: - case ARM64::LDRWui: - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRSBXui: - case ARM64::LDRSHXui: - case ARM64::LDRSWui: - case ARM64::STRXui: - case ARM64::LDRXui: - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRQui: - case ARM64::STRQui: - DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRDui: - case ARM64::STRDui: - DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRSui: - case ARM64::STRSui: - DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRHui: - case ARM64::STRHui: - DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRBui: - case ARM64::STRBui: - DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder); - break; - } - - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - if (!Dis->tryAddingSymbolicOperand(Addr, offset, Fail, 4, Inst, insn)) - Inst.addOperand(MCOperand::CreateImm(offset)); - return Success; -} - -static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - int64_t offset = fieldFromInstruction(insn, 12, 9); - - // offset is a 9-bit signed immediate, so sign extend it to - // fill the unsigned. - if (offset & (1 << (9 - 1))) - offset |= ~((1LL << 9) - 1); - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::PRFUMi: - // Rt is an immediate in prefetch. - Inst.addOperand(MCOperand::CreateImm(Rt)); - break; - case ARM64::STURBBi: - case ARM64::LDURBBi: - case ARM64::LDURSBWi: - case ARM64::STURHHi: - case ARM64::LDURHHi: - case ARM64::LDURSHWi: - case ARM64::STURWi: - case ARM64::LDURWi: - case ARM64::LDTRSBWi: - case ARM64::LDTRSHWi: - case ARM64::STTRWi: - case ARM64::LDTRWi: - case ARM64::STTRHi: - case ARM64::LDTRHi: - case ARM64::LDTRBi: - case ARM64::STTRBi: - case ARM64::LDRSBWpre: - case ARM64::LDRSHWpre: - case ARM64::STRBBpre: - case ARM64::LDRBBpre: - case ARM64::STRHHpre: - case ARM64::LDRHHpre: - case ARM64::STRWpre: - case ARM64::LDRWpre: - case ARM64::LDRSBWpost: - case ARM64::LDRSHWpost: - case ARM64::STRBBpost: - case ARM64::LDRBBpost: - case ARM64::STRHHpost: - case ARM64::LDRHHpost: - case ARM64::STRWpost: - case ARM64::LDRWpost: - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDURSBXi: - case ARM64::LDURSHXi: - case ARM64::LDURSWi: - case ARM64::STURXi: - case ARM64::LDURXi: - case ARM64::LDTRSBXi: - case ARM64::LDTRSHXi: - case ARM64::LDTRSWi: - case ARM64::STTRXi: - case ARM64::LDTRXi: - case ARM64::LDRSBXpre: - case ARM64::LDRSHXpre: - case ARM64::STRXpre: - case ARM64::LDRSWpre: - case ARM64::LDRXpre: - case ARM64::LDRSBXpost: - case ARM64::LDRSHXpost: - case ARM64::STRXpost: - case ARM64::LDRSWpost: - case ARM64::LDRXpost: - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDURQi: - case ARM64::STURQi: - case ARM64::LDRQpre: - case ARM64::STRQpre: - case ARM64::LDRQpost: - case ARM64::STRQpost: - DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDURDi: - case ARM64::STURDi: - case ARM64::LDRDpre: - case ARM64::STRDpre: - case ARM64::LDRDpost: - case ARM64::STRDpost: - DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDURSi: - case ARM64::STURSi: - case ARM64::LDRSpre: - case ARM64::STRSpre: - case ARM64::LDRSpost: - case ARM64::STRSpost: - DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDURHi: - case ARM64::STURHi: - case ARM64::LDRHpre: - case ARM64::STRHpre: - case ARM64::LDRHpost: - case ARM64::STRHpost: - DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDURBi: - case ARM64::STURBi: - case ARM64::LDRBpre: - case ARM64::STRBpre: - case ARM64::LDRBpost: - case ARM64::STRBpost: - DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder); - break; - } - - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - Inst.addOperand(MCOperand::CreateImm(offset)); - return Success; -} - -static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - unsigned Rt2 = fieldFromInstruction(insn, 10, 5); - unsigned Rs = fieldFromInstruction(insn, 16, 5); - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::STLXRW: - case ARM64::STLXRB: - case ARM64::STLXRH: - case ARM64::STXRW: - case ARM64::STXRB: - case ARM64::STXRH: - DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder); - // FALLTHROUGH - case ARM64::LDARW: - case ARM64::LDARB: - case ARM64::LDARH: - case ARM64::LDAXRW: - case ARM64::LDAXRB: - case ARM64::LDAXRH: - case ARM64::LDXRW: - case ARM64::LDXRB: - case ARM64::LDXRH: - case ARM64::STLRW: - case ARM64::STLRB: - case ARM64::STLRH: - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::STLXRX: - case ARM64::STXRX: - DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder); - // FALLTHROUGH - case ARM64::LDARX: - case ARM64::LDAXRX: - case ARM64::LDXRX: - case ARM64::STLRX: - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::STLXPW: - case ARM64::STXPW: - DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder); - // FALLTHROUGH - case ARM64::LDAXPW: - case ARM64::LDXPW: - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder); - break; - case ARM64::STLXPX: - case ARM64::STXPX: - DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder); - // FALLTHROUGH - case ARM64::LDAXPX: - case ARM64::LDXPX: - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder); - break; - } - - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - return Success; -} - -static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - unsigned Rt2 = fieldFromInstruction(insn, 10, 5); - int64_t offset = fieldFromInstruction(insn, 15, 7); - - // offset is a 7-bit signed immediate, so sign extend it to - // fill the unsigned. - if (offset & (1 << (7 - 1))) - offset |= ~((1LL << 7) - 1); - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::LDNPXi: - case ARM64::STNPXi: - case ARM64::LDPXpost: - case ARM64::STPXpost: - case ARM64::LDPSWpost: - case ARM64::LDPXi: - case ARM64::STPXi: - case ARM64::LDPSWi: - case ARM64::LDPXpre: - case ARM64::STPXpre: - case ARM64::LDPSWpre: - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder); - break; - case ARM64::LDNPWi: - case ARM64::STNPWi: - case ARM64::LDPWpost: - case ARM64::STPWpost: - case ARM64::LDPWi: - case ARM64::STPWi: - case ARM64::LDPWpre: - case ARM64::STPWpre: - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder); - break; - case ARM64::LDNPQi: - case ARM64::STNPQi: - case ARM64::LDPQpost: - case ARM64::STPQpost: - case ARM64::LDPQi: - case ARM64::STPQi: - case ARM64::LDPQpre: - case ARM64::STPQpre: - DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); - DecodeFPR128RegisterClass(Inst, Rt2, Addr, Decoder); - break; - case ARM64::LDNPDi: - case ARM64::STNPDi: - case ARM64::LDPDpost: - case ARM64::STPDpost: - case ARM64::LDPDi: - case ARM64::STPDi: - case ARM64::LDPDpre: - case ARM64::STPDpre: - DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder); - DecodeFPR64RegisterClass(Inst, Rt2, Addr, Decoder); - break; - case ARM64::LDNPSi: - case ARM64::STNPSi: - case ARM64::LDPSpost: - case ARM64::STPSpost: - case ARM64::LDPSi: - case ARM64::STPSi: - case ARM64::LDPSpre: - case ARM64::STPSpre: - DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder); - DecodeFPR32RegisterClass(Inst, Rt2, Addr, Decoder); - break; - } - - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - Inst.addOperand(MCOperand::CreateImm(offset)); - return Success; -} - -static DecodeStatus DecodeRegOffsetLdStInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - unsigned Rt = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - unsigned Rm = fieldFromInstruction(insn, 16, 5); - unsigned extendHi = fieldFromInstruction(insn, 13, 3); - unsigned extendLo = fieldFromInstruction(insn, 12, 1); - unsigned extend = 0; - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::LDRSWro: - extend = (extendHi << 1) | extendLo; - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRXro: - case ARM64::STRXro: - extend = (extendHi << 1) | extendLo; - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRWro: - case ARM64::STRWro: - extend = (extendHi << 1) | extendLo; - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRQro: - case ARM64::STRQro: - extend = (extendHi << 1) | extendLo; - DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRDro: - case ARM64::STRDro: - extend = (extendHi << 1) | extendLo; - DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRSro: - case ARM64::STRSro: - extend = (extendHi << 1) | extendLo; - DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRHro: - extend = (extendHi << 1) | extendLo; - DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRBro: - extend = (extendHi << 1) | extendLo; - DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRBBro: - case ARM64::STRBBro: - case ARM64::LDRSBWro: - extend = (extendHi << 1) | extendLo; - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRHHro: - case ARM64::STRHHro: - case ARM64::LDRSHWro: - extend = (extendHi << 1) | extendLo; - DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRSHXro: - extend = (extendHi << 1) | extendLo; - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LDRSBXro: - extend = (extendHi << 1) | extendLo; - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::PRFMro: - extend = (extendHi << 1) | extendLo; - Inst.addOperand(MCOperand::CreateImm(Rt)); - } - - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - - if (extendHi == 0x3) - DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); - else - DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); - - Inst.addOperand(MCOperand::CreateImm(extend)); - return Success; -} - -static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - unsigned Rm = fieldFromInstruction(insn, 16, 5); - unsigned extend = fieldFromInstruction(insn, 10, 6); - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::ADDWrx: - case ARM64::SUBWrx: - DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); - break; - case ARM64::ADDSWrx: - case ARM64::SUBSWrx: - DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); - break; - case ARM64::ADDXrx: - case ARM64::SUBXrx: - DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); - break; - case ARM64::ADDSXrx: - case ARM64::SUBSXrx: - DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder); - break; - case ARM64::ADDXrx64: - case ARM64::ADDSXrx64: - case ARM64::SUBXrx64: - case ARM64::SUBSXrx64: - DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); - break; - } - - Inst.addOperand(MCOperand::CreateImm(extend)); - return Success; -} - -static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - unsigned Datasize = fieldFromInstruction(insn, 31, 1); - unsigned imm; - - if (Datasize) { - DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder); - imm = fieldFromInstruction(insn, 10, 13); - if (!ARM64_AM::isValidDecodeLogicalImmediate(imm, 64)) - return Fail; - } else { - DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR32RegisterClass(Inst, Rn, Addr, Decoder); - imm = fieldFromInstruction(insn, 10, 12); - if (!ARM64_AM::isValidDecodeLogicalImmediate(imm, 32)) - return Fail; - } - Inst.addOperand(MCOperand::CreateImm(imm)); - return Success; -} - -static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(insn, 0, 5); - unsigned cmode = fieldFromInstruction(insn, 12, 4); - unsigned imm = fieldFromInstruction(insn, 16, 3) << 5; - imm |= fieldFromInstruction(insn, 5, 5); - - if (Inst.getOpcode() == ARM64::MOVID) - DecodeFPR64RegisterClass(Inst, Rd, Addr, Decoder); - else - DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder); - - Inst.addOperand(MCOperand::CreateImm(imm)); - - switch (Inst.getOpcode()) { - default: - break; - case ARM64::MOVIv4i16: - case ARM64::MOVIv8i16: - case ARM64::MVNIv4i16: - case ARM64::MVNIv8i16: - case ARM64::MOVIv2i32: - case ARM64::MOVIv4i32: - case ARM64::MVNIv2i32: - case ARM64::MVNIv4i32: - Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2)); - break; - case ARM64::MOVIv2s_msl: - case ARM64::MOVIv4s_msl: - case ARM64::MVNIv2s_msl: - case ARM64::MVNIv4s_msl: - Inst.addOperand(MCOperand::CreateImm(cmode & 1 ? 0x110 : 0x108)); - break; - } - - return Success; -} - -static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - unsigned Rd = fieldFromInstruction(insn, 0, 5); - unsigned cmode = fieldFromInstruction(insn, 12, 4); - unsigned imm = fieldFromInstruction(insn, 16, 3) << 5; - imm |= fieldFromInstruction(insn, 5, 5); - - // Tied operands added twice. - DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder); - DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder); - - Inst.addOperand(MCOperand::CreateImm(imm)); - Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2)); - - return Success; -} - -static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, const void *Decoder) { - unsigned Rd = fieldFromInstruction(insn, 0, 5); - int64_t imm = fieldFromInstruction(insn, 5, 19) << 2; - imm |= fieldFromInstruction(insn, 29, 2); - const ARM64Disassembler *Dis = - static_cast(Decoder); - - // Sign-extend the 21-bit immediate. - if (imm & (1 << (21 - 1))) - imm |= ~((1LL << 21) - 1); - - DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); - if (!Dis->tryAddingSymbolicOperand(Addr, imm, Fail, 4, Inst, insn)) - Inst.addOperand(MCOperand::CreateImm(imm)); - - return Success; -} - -static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, const void *Decoder) { - unsigned Rd = fieldFromInstruction(insn, 0, 5); - unsigned Rn = fieldFromInstruction(insn, 5, 5); - unsigned Imm = fieldFromInstruction(insn, 10, 14); - unsigned S = fieldFromInstruction(insn, 29, 1); - unsigned Datasize = fieldFromInstruction(insn, 31, 1); - - unsigned ShifterVal = (Imm >> 12) & 3; - unsigned ImmVal = Imm & 0xFFF; - const ARM64Disassembler *Dis = - static_cast(Decoder); - - if (ShifterVal != 0 && ShifterVal != 1) - return Fail; - - if (Datasize) { - if (Rd == 31 && !S) - DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder); - else - DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - } else { - if (Rd == 31 && !S) - DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder); - else - DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder); - DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder); - } - - if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal, Fail, 4, Inst, insn)) - Inst.addOperand(MCOperand::CreateImm(ImmVal)); - Inst.addOperand(MCOperand::CreateImm(12 * ShifterVal)); - return Success; -} - -static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, - const void *Decoder) { - int64_t imm = fieldFromInstruction(insn, 0, 26); - const ARM64Disassembler *Dis = - static_cast(Decoder); - - // Sign-extend the 26-bit immediate. - if (imm & (1 << (26 - 1))) - imm |= ~((1LL << 26) - 1); - - if (!Dis->tryAddingSymbolicOperand(Addr, imm << 2, true, 4, Inst)) - Inst.addOperand(MCOperand::CreateImm(imm)); - - return Success; -} - -static DecodeStatus DecodeSystemCPSRInstruction(llvm::MCInst &Inst, - uint32_t insn, uint64_t Addr, - const void *Decoder) { - uint64_t op1 = fieldFromInstruction(insn, 16, 3); - uint64_t op2 = fieldFromInstruction(insn, 5, 3); - uint64_t crm = fieldFromInstruction(insn, 8, 4); - - Inst.addOperand(MCOperand::CreateImm((op1 << 3) | op2)); - Inst.addOperand(MCOperand::CreateImm(crm)); - - return Success; -} - -static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, const void *Decoder) { - uint64_t Rt = fieldFromInstruction(insn, 0, 5); - uint64_t bit = fieldFromInstruction(insn, 31, 1) << 5; - bit |= fieldFromInstruction(insn, 19, 5); - int64_t dst = fieldFromInstruction(insn, 5, 14); - const ARM64Disassembler *Dis = - static_cast(Decoder); - - // Sign-extend 14-bit immediate. - if (dst & (1 << (14 - 1))) - dst |= ~((1LL << 14) - 1); - - DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); - Inst.addOperand(MCOperand::CreateImm(bit)); - if (!Dis->tryAddingSymbolicOperand(Addr, dst << 2, true, 4, Inst)) - Inst.addOperand(MCOperand::CreateImm(dst)); - - return Success; -} - -static DecodeStatus DecodeSIMDLdStPost(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, const void *Decoder) { - uint64_t Rd = fieldFromInstruction(insn, 0, 5); - uint64_t Rn = fieldFromInstruction(insn, 5, 5); - uint64_t Rm = fieldFromInstruction(insn, 16, 5); - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::ST1Onev8b_POST: - case ARM64::ST1Onev4h_POST: - case ARM64::ST1Onev2s_POST: - case ARM64::ST1Onev1d_POST: - case ARM64::LD1Onev8b_POST: - case ARM64::LD1Onev4h_POST: - case ARM64::LD1Onev2s_POST: - case ARM64::LD1Onev1d_POST: - DecodeFPR64RegisterClass(Inst, Rd, Addr, Decoder); - break; - case ARM64::ST1Onev16b_POST: - case ARM64::ST1Onev8h_POST: - case ARM64::ST1Onev4s_POST: - case ARM64::ST1Onev2d_POST: - case ARM64::LD1Onev16b_POST: - case ARM64::LD1Onev8h_POST: - case ARM64::LD1Onev4s_POST: - case ARM64::LD1Onev2d_POST: - DecodeFPR128RegisterClass(Inst, Rd, Addr, Decoder); - break; - case ARM64::ST1Twov8b_POST: - case ARM64::ST1Twov4h_POST: - case ARM64::ST1Twov2s_POST: - case ARM64::ST1Twov1d_POST: - case ARM64::ST2Twov8b_POST: - case ARM64::ST2Twov4h_POST: - case ARM64::ST2Twov2s_POST: - case ARM64::LD1Twov8b_POST: - case ARM64::LD1Twov4h_POST: - case ARM64::LD1Twov2s_POST: - case ARM64::LD1Twov1d_POST: - case ARM64::LD2Twov8b_POST: - case ARM64::LD2Twov4h_POST: - case ARM64::LD2Twov2s_POST: - DecodeDDRegisterClass(Inst, Rd, Addr, Decoder); - break; - case ARM64::ST1Threev8b_POST: - case ARM64::ST1Threev4h_POST: - case ARM64::ST1Threev2s_POST: - case ARM64::ST1Threev1d_POST: - case ARM64::ST3Threev8b_POST: - case ARM64::ST3Threev4h_POST: - case ARM64::ST3Threev2s_POST: - case ARM64::LD1Threev8b_POST: - case ARM64::LD1Threev4h_POST: - case ARM64::LD1Threev2s_POST: - case ARM64::LD1Threev1d_POST: - case ARM64::LD3Threev8b_POST: - case ARM64::LD3Threev4h_POST: - case ARM64::LD3Threev2s_POST: - DecodeDDDRegisterClass(Inst, Rd, Addr, Decoder); - break; - case ARM64::ST1Fourv8b_POST: - case ARM64::ST1Fourv4h_POST: - case ARM64::ST1Fourv2s_POST: - case ARM64::ST1Fourv1d_POST: - case ARM64::ST4Fourv8b_POST: - case ARM64::ST4Fourv4h_POST: - case ARM64::ST4Fourv2s_POST: - case ARM64::LD1Fourv8b_POST: - case ARM64::LD1Fourv4h_POST: - case ARM64::LD1Fourv2s_POST: - case ARM64::LD1Fourv1d_POST: - case ARM64::LD4Fourv8b_POST: - case ARM64::LD4Fourv4h_POST: - case ARM64::LD4Fourv2s_POST: - DecodeDDDDRegisterClass(Inst, Rd, Addr, Decoder); - break; - case ARM64::ST1Twov16b_POST: - case ARM64::ST1Twov8h_POST: - case ARM64::ST1Twov4s_POST: - case ARM64::ST1Twov2d_POST: - case ARM64::ST2Twov16b_POST: - case ARM64::ST2Twov8h_POST: - case ARM64::ST2Twov4s_POST: - case ARM64::ST2Twov2d_POST: - case ARM64::LD1Twov16b_POST: - case ARM64::LD1Twov8h_POST: - case ARM64::LD1Twov4s_POST: - case ARM64::LD1Twov2d_POST: - case ARM64::LD2Twov16b_POST: - case ARM64::LD2Twov8h_POST: - case ARM64::LD2Twov4s_POST: - case ARM64::LD2Twov2d_POST: - DecodeQQRegisterClass(Inst, Rd, Addr, Decoder); - break; - case ARM64::ST1Threev16b_POST: - case ARM64::ST1Threev8h_POST: - case ARM64::ST1Threev4s_POST: - case ARM64::ST1Threev2d_POST: - case ARM64::ST3Threev16b_POST: - case ARM64::ST3Threev8h_POST: - case ARM64::ST3Threev4s_POST: - case ARM64::ST3Threev2d_POST: - case ARM64::LD1Threev16b_POST: - case ARM64::LD1Threev8h_POST: - case ARM64::LD1Threev4s_POST: - case ARM64::LD1Threev2d_POST: - case ARM64::LD3Threev16b_POST: - case ARM64::LD3Threev8h_POST: - case ARM64::LD3Threev4s_POST: - case ARM64::LD3Threev2d_POST: - DecodeQQQRegisterClass(Inst, Rd, Addr, Decoder); - break; - case ARM64::ST1Fourv16b_POST: - case ARM64::ST1Fourv8h_POST: - case ARM64::ST1Fourv4s_POST: - case ARM64::ST1Fourv2d_POST: - case ARM64::ST4Fourv16b_POST: - case ARM64::ST4Fourv8h_POST: - case ARM64::ST4Fourv4s_POST: - case ARM64::ST4Fourv2d_POST: - case ARM64::LD1Fourv16b_POST: - case ARM64::LD1Fourv8h_POST: - case ARM64::LD1Fourv4s_POST: - case ARM64::LD1Fourv2d_POST: - case ARM64::LD4Fourv16b_POST: - case ARM64::LD4Fourv8h_POST: - case ARM64::LD4Fourv4s_POST: - case ARM64::LD4Fourv2d_POST: - DecodeQQQQRegisterClass(Inst, Rd, Addr, Decoder); - break; - } - - DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); - return Success; -} - -static DecodeStatus DecodeSIMDLdStSingle(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, const void *Decoder) { - uint64_t Rt = fieldFromInstruction(insn, 0, 5); - uint64_t Rn = fieldFromInstruction(insn, 5, 5); - uint64_t Rm = fieldFromInstruction(insn, 16, 5); - uint64_t size = fieldFromInstruction(insn, 10, 2); - uint64_t S = fieldFromInstruction(insn, 12, 1); - uint64_t Q = fieldFromInstruction(insn, 30, 1); - uint64_t index = 0; - - switch (Inst.getOpcode()) { - case ARM64::ST1i8: - case ARM64::ST1i8_POST: - case ARM64::ST2i8: - case ARM64::ST2i8_POST: - case ARM64::ST3i8_POST: - case ARM64::ST3i8: - case ARM64::ST4i8_POST: - case ARM64::ST4i8: - index = (Q << 3) | (S << 2) | size; - break; - case ARM64::ST1i16: - case ARM64::ST1i16_POST: - case ARM64::ST2i16: - case ARM64::ST2i16_POST: - case ARM64::ST3i16_POST: - case ARM64::ST3i16: - case ARM64::ST4i16_POST: - case ARM64::ST4i16: - index = (Q << 2) | (S << 1) | (size >> 1); - break; - case ARM64::ST1i32: - case ARM64::ST1i32_POST: - case ARM64::ST2i32: - case ARM64::ST2i32_POST: - case ARM64::ST3i32_POST: - case ARM64::ST3i32: - case ARM64::ST4i32_POST: - case ARM64::ST4i32: - index = (Q << 1) | S; - break; - case ARM64::ST1i64: - case ARM64::ST1i64_POST: - case ARM64::ST2i64: - case ARM64::ST2i64_POST: - case ARM64::ST3i64_POST: - case ARM64::ST3i64: - case ARM64::ST4i64_POST: - case ARM64::ST4i64: - index = Q; - break; - } - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::LD1Rv8b: - case ARM64::LD1Rv8b_POST: - case ARM64::LD1Rv4h: - case ARM64::LD1Rv4h_POST: - case ARM64::LD1Rv2s: - case ARM64::LD1Rv2s_POST: - case ARM64::LD1Rv1d: - case ARM64::LD1Rv1d_POST: - DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD1Rv16b: - case ARM64::LD1Rv16b_POST: - case ARM64::LD1Rv8h: - case ARM64::LD1Rv8h_POST: - case ARM64::LD1Rv4s: - case ARM64::LD1Rv4s_POST: - case ARM64::LD1Rv2d: - case ARM64::LD1Rv2d_POST: - case ARM64::ST1i8: - case ARM64::ST1i8_POST: - case ARM64::ST1i16: - case ARM64::ST1i16_POST: - case ARM64::ST1i32: - case ARM64::ST1i32_POST: - case ARM64::ST1i64: - case ARM64::ST1i64_POST: - DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD2Rv16b: - case ARM64::LD2Rv16b_POST: - case ARM64::LD2Rv8h: - case ARM64::LD2Rv8h_POST: - case ARM64::LD2Rv4s: - case ARM64::LD2Rv4s_POST: - case ARM64::LD2Rv2d: - case ARM64::LD2Rv2d_POST: - case ARM64::ST2i8: - case ARM64::ST2i8_POST: - case ARM64::ST2i16: - case ARM64::ST2i16_POST: - case ARM64::ST2i32: - case ARM64::ST2i32_POST: - case ARM64::ST2i64: - case ARM64::ST2i64_POST: - DecodeQQRegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD2Rv8b: - case ARM64::LD2Rv8b_POST: - case ARM64::LD2Rv4h: - case ARM64::LD2Rv4h_POST: - case ARM64::LD2Rv2s: - case ARM64::LD2Rv2s_POST: - case ARM64::LD2Rv1d: - case ARM64::LD2Rv1d_POST: - DecodeDDRegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD3Rv8b: - case ARM64::LD3Rv8b_POST: - case ARM64::LD3Rv4h: - case ARM64::LD3Rv4h_POST: - case ARM64::LD3Rv2s: - case ARM64::LD3Rv2s_POST: - case ARM64::LD3Rv1d: - case ARM64::LD3Rv1d_POST: - DecodeDDDRegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD3Rv16b: - case ARM64::LD3Rv16b_POST: - case ARM64::LD3Rv8h: - case ARM64::LD3Rv8h_POST: - case ARM64::LD3Rv4s: - case ARM64::LD3Rv4s_POST: - case ARM64::LD3Rv2d: - case ARM64::LD3Rv2d_POST: - case ARM64::ST3i8: - case ARM64::ST3i8_POST: - case ARM64::ST3i16: - case ARM64::ST3i16_POST: - case ARM64::ST3i32: - case ARM64::ST3i32_POST: - case ARM64::ST3i64: - case ARM64::ST3i64_POST: - DecodeQQQRegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD4Rv8b: - case ARM64::LD4Rv8b_POST: - case ARM64::LD4Rv4h: - case ARM64::LD4Rv4h_POST: - case ARM64::LD4Rv2s: - case ARM64::LD4Rv2s_POST: - case ARM64::LD4Rv1d: - case ARM64::LD4Rv1d_POST: - DecodeDDDDRegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD4Rv16b: - case ARM64::LD4Rv16b_POST: - case ARM64::LD4Rv8h: - case ARM64::LD4Rv8h_POST: - case ARM64::LD4Rv4s: - case ARM64::LD4Rv4s_POST: - case ARM64::LD4Rv2d: - case ARM64::LD4Rv2d_POST: - case ARM64::ST4i8: - case ARM64::ST4i8_POST: - case ARM64::ST4i16: - case ARM64::ST4i16_POST: - case ARM64::ST4i32: - case ARM64::ST4i32_POST: - case ARM64::ST4i64: - case ARM64::ST4i64_POST: - DecodeQQQQRegisterClass(Inst, Rt, Addr, Decoder); - break; - } - - switch (Inst.getOpcode()) { - case ARM64::LD1Rv8b: - case ARM64::LD1Rv8b_POST: - case ARM64::LD1Rv16b: - case ARM64::LD1Rv16b_POST: - case ARM64::LD1Rv4h: - case ARM64::LD1Rv4h_POST: - case ARM64::LD1Rv8h: - case ARM64::LD1Rv8h_POST: - case ARM64::LD1Rv4s: - case ARM64::LD1Rv4s_POST: - case ARM64::LD1Rv2s: - case ARM64::LD1Rv2s_POST: - case ARM64::LD1Rv1d: - case ARM64::LD1Rv1d_POST: - case ARM64::LD1Rv2d: - case ARM64::LD1Rv2d_POST: - case ARM64::LD2Rv8b: - case ARM64::LD2Rv8b_POST: - case ARM64::LD2Rv16b: - case ARM64::LD2Rv16b_POST: - case ARM64::LD2Rv4h: - case ARM64::LD2Rv4h_POST: - case ARM64::LD2Rv8h: - case ARM64::LD2Rv8h_POST: - case ARM64::LD2Rv2s: - case ARM64::LD2Rv2s_POST: - case ARM64::LD2Rv4s: - case ARM64::LD2Rv4s_POST: - case ARM64::LD2Rv2d: - case ARM64::LD2Rv2d_POST: - case ARM64::LD2Rv1d: - case ARM64::LD2Rv1d_POST: - case ARM64::LD3Rv8b: - case ARM64::LD3Rv8b_POST: - case ARM64::LD3Rv16b: - case ARM64::LD3Rv16b_POST: - case ARM64::LD3Rv4h: - case ARM64::LD3Rv4h_POST: - case ARM64::LD3Rv8h: - case ARM64::LD3Rv8h_POST: - case ARM64::LD3Rv2s: - case ARM64::LD3Rv2s_POST: - case ARM64::LD3Rv4s: - case ARM64::LD3Rv4s_POST: - case ARM64::LD3Rv2d: - case ARM64::LD3Rv2d_POST: - case ARM64::LD3Rv1d: - case ARM64::LD3Rv1d_POST: - case ARM64::LD4Rv8b: - case ARM64::LD4Rv8b_POST: - case ARM64::LD4Rv16b: - case ARM64::LD4Rv16b_POST: - case ARM64::LD4Rv4h: - case ARM64::LD4Rv4h_POST: - case ARM64::LD4Rv8h: - case ARM64::LD4Rv8h_POST: - case ARM64::LD4Rv2s: - case ARM64::LD4Rv2s_POST: - case ARM64::LD4Rv4s: - case ARM64::LD4Rv4s_POST: - case ARM64::LD4Rv2d: - case ARM64::LD4Rv2d_POST: - case ARM64::LD4Rv1d: - case ARM64::LD4Rv1d_POST: - break; - default: - Inst.addOperand(MCOperand::CreateImm(index)); - } - - DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder); - - switch (Inst.getOpcode()) { - case ARM64::ST1i8_POST: - case ARM64::ST1i16_POST: - case ARM64::ST1i32_POST: - case ARM64::ST1i64_POST: - case ARM64::LD1Rv8b_POST: - case ARM64::LD1Rv16b_POST: - case ARM64::LD1Rv4h_POST: - case ARM64::LD1Rv8h_POST: - case ARM64::LD1Rv2s_POST: - case ARM64::LD1Rv4s_POST: - case ARM64::LD1Rv1d_POST: - case ARM64::LD1Rv2d_POST: - case ARM64::ST2i8_POST: - case ARM64::ST2i16_POST: - case ARM64::ST2i32_POST: - case ARM64::ST2i64_POST: - case ARM64::LD2Rv8b_POST: - case ARM64::LD2Rv16b_POST: - case ARM64::LD2Rv4h_POST: - case ARM64::LD2Rv8h_POST: - case ARM64::LD2Rv2s_POST: - case ARM64::LD2Rv4s_POST: - case ARM64::LD2Rv2d_POST: - case ARM64::LD2Rv1d_POST: - case ARM64::ST3i8_POST: - case ARM64::ST3i16_POST: - case ARM64::ST3i32_POST: - case ARM64::ST3i64_POST: - case ARM64::LD3Rv8b_POST: - case ARM64::LD3Rv16b_POST: - case ARM64::LD3Rv4h_POST: - case ARM64::LD3Rv8h_POST: - case ARM64::LD3Rv2s_POST: - case ARM64::LD3Rv4s_POST: - case ARM64::LD3Rv2d_POST: - case ARM64::LD3Rv1d_POST: - case ARM64::ST4i8_POST: - case ARM64::ST4i16_POST: - case ARM64::ST4i32_POST: - case ARM64::ST4i64_POST: - case ARM64::LD4Rv8b_POST: - case ARM64::LD4Rv16b_POST: - case ARM64::LD4Rv4h_POST: - case ARM64::LD4Rv8h_POST: - case ARM64::LD4Rv2s_POST: - case ARM64::LD4Rv4s_POST: - case ARM64::LD4Rv2d_POST: - case ARM64::LD4Rv1d_POST: - DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); - break; - } - return Success; -} - -static DecodeStatus DecodeSIMDLdStSingleTied(llvm::MCInst &Inst, uint32_t insn, - uint64_t Addr, - const void *Decoder) { - uint64_t Rt = fieldFromInstruction(insn, 0, 5); - uint64_t Rn = fieldFromInstruction(insn, 5, 5); - uint64_t Rm = fieldFromInstruction(insn, 16, 5); - uint64_t size = fieldFromInstruction(insn, 10, 2); - uint64_t S = fieldFromInstruction(insn, 12, 1); - uint64_t Q = fieldFromInstruction(insn, 30, 1); - uint64_t index = 0; - - switch (Inst.getOpcode()) { - case ARM64::LD1i8: - case ARM64::LD1i8_POST: - case ARM64::LD2i8: - case ARM64::LD2i8_POST: - case ARM64::LD3i8_POST: - case ARM64::LD3i8: - case ARM64::LD4i8_POST: - case ARM64::LD4i8: - index = (Q << 3) | (S << 2) | size; - break; - case ARM64::LD1i16: - case ARM64::LD1i16_POST: - case ARM64::LD2i16: - case ARM64::LD2i16_POST: - case ARM64::LD3i16_POST: - case ARM64::LD3i16: - case ARM64::LD4i16_POST: - case ARM64::LD4i16: - index = (Q << 2) | (S << 1) | (size >> 1); - break; - case ARM64::LD1i32: - case ARM64::LD1i32_POST: - case ARM64::LD2i32: - case ARM64::LD2i32_POST: - case ARM64::LD3i32_POST: - case ARM64::LD3i32: - case ARM64::LD4i32_POST: - case ARM64::LD4i32: - index = (Q << 1) | S; - break; - case ARM64::LD1i64: - case ARM64::LD1i64_POST: - case ARM64::LD2i64: - case ARM64::LD2i64_POST: - case ARM64::LD3i64_POST: - case ARM64::LD3i64: - case ARM64::LD4i64_POST: - case ARM64::LD4i64: - index = Q; - break; - } - - switch (Inst.getOpcode()) { - default: - return Fail; - case ARM64::LD1i8: - case ARM64::LD1i8_POST: - case ARM64::LD1i16: - case ARM64::LD1i16_POST: - case ARM64::LD1i32: - case ARM64::LD1i32_POST: - case ARM64::LD1i64: - case ARM64::LD1i64_POST: - DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); - DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD2i8: - case ARM64::LD2i8_POST: - case ARM64::LD2i16: - case ARM64::LD2i16_POST: - case ARM64::LD2i32: - case ARM64::LD2i32_POST: - case ARM64::LD2i64: - case ARM64::LD2i64_POST: - DecodeQQRegisterClass(Inst, Rt, Addr, Decoder); - DecodeQQRegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD3i8: - case ARM64::LD3i8_POST: - case ARM64::LD3i16: - case ARM64::LD3i16_POST: - case ARM64::LD3i32: - case ARM64::LD3i32_POST: - case ARM64::LD3i64: - case ARM64::LD3i64_POST: - DecodeQQQRegisterClass(Inst, Rt, Addr, Decoder); - DecodeQQQRegisterClass(Inst, Rt, Addr, Decoder); - break; - case ARM64::LD4i8: - case ARM64::LD4i8_POST: - case ARM64::LD4i16: - case ARM64::LD4i16_POST: - case ARM64::LD4i32: - case ARM64::LD4i32_POST: - case ARM64::LD4i64: - case ARM64::LD4i64_POST: - DecodeQQQQRegisterClass(Inst, Rt, Addr, Decoder); - DecodeQQQQRegisterClass(Inst, Rt, Addr, Decoder); - break; - } - - Inst.addOperand(MCOperand::CreateImm(index)); - DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder); - - switch (Inst.getOpcode()) { - case ARM64::LD1i8_POST: - case ARM64::LD1i16_POST: - case ARM64::LD1i32_POST: - case ARM64::LD1i64_POST: - case ARM64::LD2i8_POST: - case ARM64::LD2i16_POST: - case ARM64::LD2i32_POST: - case ARM64::LD2i64_POST: - case ARM64::LD3i8_POST: - case ARM64::LD3i16_POST: - case ARM64::LD3i32_POST: - case ARM64::LD3i64_POST: - case ARM64::LD4i8_POST: - case ARM64::LD4i16_POST: - case ARM64::LD4i32_POST: - case ARM64::LD4i64_POST: - DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder); - break; - } - return Success; -} diff --git a/lib/Target/ARM64/Disassembler/ARM64Disassembler.h b/lib/Target/ARM64/Disassembler/ARM64Disassembler.h deleted file mode 100644 index 35efc8d..0000000 --- a/lib/Target/ARM64/Disassembler/ARM64Disassembler.h +++ /dev/null @@ -1,54 +0,0 @@ -//===- ARM64Disassembler.h - Disassembler for ARM64 -------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64DISASSEMBLER_H -#define ARM64DISASSEMBLER_H - -#include "llvm/MC/MCDisassembler.h" - -namespace llvm { - -class MCInst; -class MemoryObject; -class raw_ostream; - -class ARM64Disassembler : public MCDisassembler { -public: - ARM64Disassembler(const MCSubtargetInfo &STI) : MCDisassembler(STI) {} - - ~ARM64Disassembler() {} - - /// getInstruction - See MCDisassembler. - MCDisassembler::DecodeStatus getInstruction(MCInst &instr, uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const; - - /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic - /// operand in place of the immediate Value in the MCInst. The immediate - /// Value has not had any PC adjustment made by the caller. If the instruction - /// adds the PC to the immediate Value then InstsAddsAddressToValue is true, - /// else false. If the getOpInfo() function was set as part of the - /// setupForSymbolicDisassembly() call then that function is called to get any - /// symbolic information at the Address for this instrution. If that returns - /// non-zero then the symbolic information it returns is used to create an - /// MCExpr and that is added as an operand to the MCInst. This function - /// returns true if it adds an operand to the MCInst and false otherwise. - bool tryAddingSymbolicOperand(uint64_t Address, int Value, - bool InstsAddsAddressToValue, uint64_t InstSize, - MCInst &MI, uint32_t insn = 0) const; -}; - -} // namespace llvm - -#endif diff --git a/lib/Target/ARM64/Disassembler/CMakeLists.txt b/lib/Target/ARM64/Disassembler/CMakeLists.txt deleted file mode 100644 index ad998c2..0000000 --- a/lib/Target/ARM64/Disassembler/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMARM64Disassembler - ARM64Disassembler.cpp - ) -# workaround for hanging compilation on MSVC8, 9 and 10 -#if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) -#set_property( -# SOURCE ARMDisassembler.cpp -# PROPERTY COMPILE_FLAGS "/Od" -# ) -#endif() -add_dependencies(LLVMARM64Disassembler ARM64CommonTableGen) diff --git a/lib/Target/ARM64/Disassembler/LLVMBuild.txt b/lib/Target/ARM64/Disassembler/LLVMBuild.txt deleted file mode 100644 index 5935ee6..0000000 --- a/lib/Target/ARM64/Disassembler/LLVMBuild.txt +++ /dev/null @@ -1,24 +0,0 @@ -;===- ./lib/Target/ARM64/Disassembler/LLVMBuild.txt ------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = ARM64Disassembler -parent = ARM64 -required_libraries = ARM64Desc ARM64Info MC Support -add_to_library_groups = ARM64 - diff --git a/lib/Target/ARM64/Disassembler/Makefile b/lib/Target/ARM64/Disassembler/Makefile deleted file mode 100644 index 479d00c..0000000 --- a/lib/Target/ARM64/Disassembler/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/ARM64/Disassembler/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMARM64Disassembler - -# Hack: we need to include 'main' arm target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp b/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp deleted file mode 100644 index bb90707..0000000 --- a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp +++ /dev/null @@ -1,1428 +0,0 @@ -//===-- ARM64InstPrinter.cpp - Convert ARM64 MCInst to assembly syntax ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints an ARM64 MCInst to a .s file. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "ARM64InstPrinter.h" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "MCTargetDesc/ARM64BaseInfo.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -#define GET_INSTRUCTION_NAME -#define PRINT_ALIAS_INSTR -#include "ARM64GenAsmWriter.inc" -#define GET_INSTRUCTION_NAME -#define PRINT_ALIAS_INSTR -#include "ARM64GenAsmWriter1.inc" - -ARM64InstPrinter::ARM64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) - : MCInstPrinter(MAI, MII, MRI) { - // Initialize the set of available features. - setAvailableFeatures(STI.getFeatureBits()); -} - -ARM64AppleInstPrinter::ARM64AppleInstPrinter(const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) - : ARM64InstPrinter(MAI, MII, MRI, STI) {} - -void ARM64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - // This is for .cfi directives. - OS << getRegisterName(RegNo); -} - -void ARM64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { - // Check for special encodings and print the cannonical alias instead. - - unsigned Opcode = MI->getOpcode(); - - if (Opcode == ARM64::SYS || Opcode == ARM64::SYSxt) - if (printSysAlias(MI, O)) { - printAnnotation(O, Annot); - return; - } - - // TBZ/TBNZ should print the register operand as a Wreg if the bit - // number is < 32. - if ((Opcode == ARM64::TBNZ || Opcode == ARM64::TBZ) && - MI->getOperand(1).getImm() < 32) { - MCInst newMI = *MI; - unsigned Reg = MI->getOperand(0).getReg(); - newMI.getOperand(0).setReg(getWRegFromXReg(Reg)); - printInstruction(&newMI, O); - printAnnotation(O, Annot); - return; - } - - // SBFM/UBFM should print to a nicer aliased form if possible. - if (Opcode == ARM64::SBFMXri || Opcode == ARM64::SBFMWri || - Opcode == ARM64::UBFMXri || Opcode == ARM64::UBFMWri) { - const MCOperand &Op0 = MI->getOperand(0); - const MCOperand &Op1 = MI->getOperand(1); - const MCOperand &Op2 = MI->getOperand(2); - const MCOperand &Op3 = MI->getOperand(3); - - if (Op2.isImm() && Op2.getImm() == 0 && Op3.isImm()) { - bool IsSigned = (Opcode == ARM64::SBFMXri || Opcode == ARM64::SBFMWri); - const char *AsmMnemonic = 0; - - switch (Op3.getImm()) { - default: - break; - case 7: - AsmMnemonic = IsSigned ? "sxtb" : "uxtb"; - break; - case 15: - AsmMnemonic = IsSigned ? "sxth" : "uxth"; - break; - case 31: - AsmMnemonic = IsSigned ? "sxtw" : "uxtw"; - break; - } - - if (AsmMnemonic) { - O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg()) - << ", " << getRegisterName(Op1.getReg()); - printAnnotation(O, Annot); - return; - } - } - - // All immediate shifts are aliases, implemented using the Bitfield - // instruction. In all cases the immediate shift amount shift must be in - // the range 0 to (reg.size -1). - if (Op2.isImm() && Op3.isImm()) { - const char *AsmMnemonic = 0; - int shift = 0; - int64_t immr = Op2.getImm(); - int64_t imms = Op3.getImm(); - if (Opcode == ARM64::UBFMWri && imms != 0x1F && ((imms + 1) == immr)) { - AsmMnemonic = "lsl"; - shift = 31 - imms; - } else if (Opcode == ARM64::UBFMXri && imms != 0x3f && - ((imms + 1 == immr))) { - AsmMnemonic = "lsl"; - shift = 63 - imms; - } else if (Opcode == ARM64::UBFMWri && imms == 0x1f) { - AsmMnemonic = "lsr"; - shift = immr; - } else if (Opcode == ARM64::UBFMXri && imms == 0x3f) { - AsmMnemonic = "lsr"; - shift = immr; - } else if (Opcode == ARM64::SBFMWri && imms == 0x1f) { - AsmMnemonic = "asr"; - shift = immr; - } else if (Opcode == ARM64::SBFMXri && imms == 0x3f) { - AsmMnemonic = "asr"; - shift = immr; - } - if (AsmMnemonic) { - O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg()) - << ", " << getRegisterName(Op1.getReg()) << ", #" << shift; - printAnnotation(O, Annot); - return; - } - } - } - - // Symbolic operands for MOVZ, MOVN and MOVK already imply a shift - // (e.g. :gottprel_g1: is always going to be "lsl #16") so it should not be - // printed. - if ((Opcode == ARM64::MOVZXi || Opcode == ARM64::MOVZWi || - Opcode == ARM64::MOVNXi || Opcode == ARM64::MOVNWi) && - MI->getOperand(1).isExpr()) { - if (Opcode == ARM64::MOVZXi || Opcode == ARM64::MOVZWi) - O << "\tmovz\t"; - else - O << "\tmovn\t"; - - O << getRegisterName(MI->getOperand(0).getReg()) << ", #" - << *MI->getOperand(1).getExpr(); - return; - } - - if ((Opcode == ARM64::MOVKXi || Opcode == ARM64::MOVKWi) && - MI->getOperand(2).isExpr()) { - O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #" - << *MI->getOperand(2).getExpr(); - return; - } - - // ANDS WZR, Wn, #imm ==> TST Wn, #imm - // ANDS XZR, Xn, #imm ==> TST Xn, #imm - if (Opcode == ARM64::ANDSWri && MI->getOperand(0).getReg() == ARM64::WZR) { - O << "\ttst\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printLogicalImm32(MI, 2, O); - return; - } - if (Opcode == ARM64::ANDSXri && MI->getOperand(0).getReg() == ARM64::XZR) { - O << "\ttst\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printLogicalImm64(MI, 2, O); - return; - } - // ANDS WZR, Wn, Wm{, lshift #imm} ==> TST Wn{, lshift #imm} - // ANDS XZR, Xn, Xm{, lshift #imm} ==> TST Xn{, lshift #imm} - if ((Opcode == ARM64::ANDSWrs && MI->getOperand(0).getReg() == ARM64::WZR) || - (Opcode == ARM64::ANDSXrs && MI->getOperand(0).getReg() == ARM64::XZR)) { - O << "\ttst\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printShiftedRegister(MI, 2, O); - return; - } - - // SUBS WZR, Wn, #imm ==> CMP Wn, #imm - // SUBS XZR, Xn, #imm ==> CMP Xn, #imm - if ((Opcode == ARM64::SUBSWri && MI->getOperand(0).getReg() == ARM64::WZR) || - (Opcode == ARM64::SUBSXri && MI->getOperand(0).getReg() == ARM64::XZR)) { - O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printAddSubImm(MI, 2, O); - return; - } - // SUBS WZR, Wn, Wm{, lshift #imm} ==> CMP Wn, Wm{, lshift #imm} - // SUBS XZR, Xn, Xm{, lshift #imm} ==> CMP Xn, Xm{, lshift #imm} - if ((Opcode == ARM64::SUBSWrs && MI->getOperand(0).getReg() == ARM64::WZR) || - (Opcode == ARM64::SUBSXrs && MI->getOperand(0).getReg() == ARM64::XZR)) { - O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printShiftedRegister(MI, 2, O); - return; - } - // SUBS XZR, Xn, Wm, uxtb #imm ==> CMP Xn, uxtb #imm - // SUBS WZR, Wn, Xm, uxtb #imm ==> CMP Wn, uxtb #imm - if ((Opcode == ARM64::SUBSXrx && MI->getOperand(0).getReg() == ARM64::XZR) || - (Opcode == ARM64::SUBSWrx && MI->getOperand(0).getReg() == ARM64::WZR)) { - O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printExtendedRegister(MI, 2, O); - return; - } - // SUBS XZR, Xn, Xm, uxtx #imm ==> CMP Xn, uxtb #imm - if (Opcode == ARM64::SUBSXrx64 && MI->getOperand(0).getReg() == ARM64::XZR) { - O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", " - << getRegisterName(MI->getOperand(2).getReg()); - printExtend(MI, 3, O); - return; - } - - // ADDS WZR, Wn, #imm ==> CMN Wn, #imm - // ADDS XZR, Xn, #imm ==> CMN Xn, #imm - if ((Opcode == ARM64::ADDSWri && MI->getOperand(0).getReg() == ARM64::WZR) || - (Opcode == ARM64::ADDSXri && MI->getOperand(0).getReg() == ARM64::XZR)) { - O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printAddSubImm(MI, 2, O); - return; - } - // ADDS WZR, Wn, Wm{, lshift #imm} ==> CMN Wn, Wm{, lshift #imm} - // ADDS XZR, Xn, Xm{, lshift #imm} ==> CMN Xn, Xm{, lshift #imm} - if ((Opcode == ARM64::ADDSWrs && MI->getOperand(0).getReg() == ARM64::WZR) || - (Opcode == ARM64::ADDSXrs && MI->getOperand(0).getReg() == ARM64::XZR)) { - O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printShiftedRegister(MI, 2, O); - return; - } - // ADDS XZR, Xn, Wm, uxtb #imm ==> CMN Xn, uxtb #imm - if (Opcode == ARM64::ADDSXrx && MI->getOperand(0).getReg() == ARM64::XZR) { - O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "; - printExtendedRegister(MI, 2, O); - return; - } - // ADDS XZR, Xn, Xm, uxtx #imm ==> CMN Xn, uxtb #imm - if (Opcode == ARM64::ADDSXrx64 && MI->getOperand(0).getReg() == ARM64::XZR) { - O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", " - << getRegisterName(MI->getOperand(2).getReg()); - printExtend(MI, 3, O); - return; - } - - if (!printAliasInstr(MI, O)) - printInstruction(MI, O); - - printAnnotation(O, Annot); -} - -static bool isTblTbxInstruction(unsigned Opcode, StringRef &Layout, - bool &IsTbx) { - switch (Opcode) { - case ARM64::TBXv8i8One: - case ARM64::TBXv8i8Two: - case ARM64::TBXv8i8Three: - case ARM64::TBXv8i8Four: - IsTbx = true; - Layout = ".8b"; - return true; - case ARM64::TBLv8i8One: - case ARM64::TBLv8i8Two: - case ARM64::TBLv8i8Three: - case ARM64::TBLv8i8Four: - IsTbx = false; - Layout = ".8b"; - return true; - case ARM64::TBXv16i8One: - case ARM64::TBXv16i8Two: - case ARM64::TBXv16i8Three: - case ARM64::TBXv16i8Four: - IsTbx = true; - Layout = ".16b"; - return true; - case ARM64::TBLv16i8One: - case ARM64::TBLv16i8Two: - case ARM64::TBLv16i8Three: - case ARM64::TBLv16i8Four: - IsTbx = false; - Layout = ".16b"; - return true; - default: - return false; - } -} - -struct LdStNInstrDesc { - unsigned Opcode; - const char *Mnemonic; - const char *Layout; - int LaneOperand; - int NaturalOffset; -}; - -static LdStNInstrDesc LdStNInstInfo[] = { - { ARM64::LD1i8, "ld1", ".b", 2, 0 }, - { ARM64::LD1i16, "ld1", ".h", 2, 0 }, - { ARM64::LD1i32, "ld1", ".s", 2, 0 }, - { ARM64::LD1i64, "ld1", ".d", 2, 0 }, - { ARM64::LD1i8_POST, "ld1", ".b", 2, 1 }, - { ARM64::LD1i16_POST, "ld1", ".h", 2, 2 }, - { ARM64::LD1i32_POST, "ld1", ".s", 2, 4 }, - { ARM64::LD1i64_POST, "ld1", ".d", 2, 8 }, - { ARM64::LD1Rv16b, "ld1r", ".16b", 0, 0 }, - { ARM64::LD1Rv8h, "ld1r", ".8h", 0, 0 }, - { ARM64::LD1Rv4s, "ld1r", ".4s", 0, 0 }, - { ARM64::LD1Rv2d, "ld1r", ".2d", 0, 0 }, - { ARM64::LD1Rv8b, "ld1r", ".8b", 0, 0 }, - { ARM64::LD1Rv4h, "ld1r", ".4h", 0, 0 }, - { ARM64::LD1Rv2s, "ld1r", ".2s", 0, 0 }, - { ARM64::LD1Rv1d, "ld1r", ".1d", 0, 0 }, - { ARM64::LD1Rv16b_POST, "ld1r", ".16b", 0, 1 }, - { ARM64::LD1Rv8h_POST, "ld1r", ".8h", 0, 2 }, - { ARM64::LD1Rv4s_POST, "ld1r", ".4s", 0, 4 }, - { ARM64::LD1Rv2d_POST, "ld1r", ".2d", 0, 8 }, - { ARM64::LD1Rv8b_POST, "ld1r", ".8b", 0, 1 }, - { ARM64::LD1Rv4h_POST, "ld1r", ".4h", 0, 2 }, - { ARM64::LD1Rv2s_POST, "ld1r", ".2s", 0, 4 }, - { ARM64::LD1Rv1d_POST, "ld1r", ".1d", 0, 8 }, - { ARM64::LD1Onev16b, "ld1", ".16b", 0, 0 }, - { ARM64::LD1Onev8h, "ld1", ".8h", 0, 0 }, - { ARM64::LD1Onev4s, "ld1", ".4s", 0, 0 }, - { ARM64::LD1Onev2d, "ld1", ".2d", 0, 0 }, - { ARM64::LD1Onev8b, "ld1", ".8b", 0, 0 }, - { ARM64::LD1Onev4h, "ld1", ".4h", 0, 0 }, - { ARM64::LD1Onev2s, "ld1", ".2s", 0, 0 }, - { ARM64::LD1Onev1d, "ld1", ".1d", 0, 0 }, - { ARM64::LD1Onev16b_POST, "ld1", ".16b", 0, 16 }, - { ARM64::LD1Onev8h_POST, "ld1", ".8h", 0, 16 }, - { ARM64::LD1Onev4s_POST, "ld1", ".4s", 0, 16 }, - { ARM64::LD1Onev2d_POST, "ld1", ".2d", 0, 16 }, - { ARM64::LD1Onev8b_POST, "ld1", ".8b", 0, 8 }, - { ARM64::LD1Onev4h_POST, "ld1", ".4h", 0, 8 }, - { ARM64::LD1Onev2s_POST, "ld1", ".2s", 0, 8 }, - { ARM64::LD1Onev1d_POST, "ld1", ".1d", 0, 8 }, - { ARM64::LD1Twov16b, "ld1", ".16b", 0, 0 }, - { ARM64::LD1Twov8h, "ld1", ".8h", 0, 0 }, - { ARM64::LD1Twov4s, "ld1", ".4s", 0, 0 }, - { ARM64::LD1Twov2d, "ld1", ".2d", 0, 0 }, - { ARM64::LD1Twov8b, "ld1", ".8b", 0, 0 }, - { ARM64::LD1Twov4h, "ld1", ".4h", 0, 0 }, - { ARM64::LD1Twov2s, "ld1", ".2s", 0, 0 }, - { ARM64::LD1Twov1d, "ld1", ".1d", 0, 0 }, - { ARM64::LD1Twov16b_POST, "ld1", ".16b", 0, 32 }, - { ARM64::LD1Twov8h_POST, "ld1", ".8h", 0, 32 }, - { ARM64::LD1Twov4s_POST, "ld1", ".4s", 0, 32 }, - { ARM64::LD1Twov2d_POST, "ld1", ".2d", 0, 32 }, - { ARM64::LD1Twov8b_POST, "ld1", ".8b", 0, 16 }, - { ARM64::LD1Twov4h_POST, "ld1", ".4h", 0, 16 }, - { ARM64::LD1Twov2s_POST, "ld1", ".2s", 0, 16 }, - { ARM64::LD1Twov1d_POST, "ld1", ".1d", 0, 16 }, - { ARM64::LD1Threev16b, "ld1", ".16b", 0, 0 }, - { ARM64::LD1Threev8h, "ld1", ".8h", 0, 0 }, - { ARM64::LD1Threev4s, "ld1", ".4s", 0, 0 }, - { ARM64::LD1Threev2d, "ld1", ".2d", 0, 0 }, - { ARM64::LD1Threev8b, "ld1", ".8b", 0, 0 }, - { ARM64::LD1Threev4h, "ld1", ".4h", 0, 0 }, - { ARM64::LD1Threev2s, "ld1", ".2s", 0, 0 }, - { ARM64::LD1Threev1d, "ld1", ".1d", 0, 0 }, - { ARM64::LD1Threev16b_POST, "ld1", ".16b", 0, 48 }, - { ARM64::LD1Threev8h_POST, "ld1", ".8h", 0, 48 }, - { ARM64::LD1Threev4s_POST, "ld1", ".4s", 0, 48 }, - { ARM64::LD1Threev2d_POST, "ld1", ".2d", 0, 48 }, - { ARM64::LD1Threev8b_POST, "ld1", ".8b", 0, 24 }, - { ARM64::LD1Threev4h_POST, "ld1", ".4h", 0, 24 }, - { ARM64::LD1Threev2s_POST, "ld1", ".2s", 0, 24 }, - { ARM64::LD1Threev1d_POST, "ld1", ".1d", 0, 24 }, - { ARM64::LD1Fourv16b, "ld1", ".16b", 0, 0 }, - { ARM64::LD1Fourv8h, "ld1", ".8h", 0, 0 }, - { ARM64::LD1Fourv4s, "ld1", ".4s", 0, 0 }, - { ARM64::LD1Fourv2d, "ld1", ".2d", 0, 0 }, - { ARM64::LD1Fourv8b, "ld1", ".8b", 0, 0 }, - { ARM64::LD1Fourv4h, "ld1", ".4h", 0, 0 }, - { ARM64::LD1Fourv2s, "ld1", ".2s", 0, 0 }, - { ARM64::LD1Fourv1d, "ld1", ".1d", 0, 0 }, - { ARM64::LD1Fourv16b_POST, "ld1", ".16b", 0, 64 }, - { ARM64::LD1Fourv8h_POST, "ld1", ".8h", 0, 64 }, - { ARM64::LD1Fourv4s_POST, "ld1", ".4s", 0, 64 }, - { ARM64::LD1Fourv2d_POST, "ld1", ".2d", 0, 64 }, - { ARM64::LD1Fourv8b_POST, "ld1", ".8b", 0, 32 }, - { ARM64::LD1Fourv4h_POST, "ld1", ".4h", 0, 32 }, - { ARM64::LD1Fourv2s_POST, "ld1", ".2s", 0, 32 }, - { ARM64::LD1Fourv1d_POST, "ld1", ".1d", 0, 32 }, - { ARM64::LD2i8, "ld2", ".b", 2, 0 }, - { ARM64::LD2i16, "ld2", ".h", 2, 0 }, - { ARM64::LD2i32, "ld2", ".s", 2, 0 }, - { ARM64::LD2i64, "ld2", ".d", 2, 0 }, - { ARM64::LD2i8_POST, "ld2", ".b", 2, 2 }, - { ARM64::LD2i16_POST, "ld2", ".h", 2, 4 }, - { ARM64::LD2i32_POST, "ld2", ".s", 2, 8 }, - { ARM64::LD2i64_POST, "ld2", ".d", 2, 16 }, - { ARM64::LD2Rv16b, "ld2r", ".16b", 0, 0 }, - { ARM64::LD2Rv8h, "ld2r", ".8h", 0, 0 }, - { ARM64::LD2Rv4s, "ld2r", ".4s", 0, 0 }, - { ARM64::LD2Rv2d, "ld2r", ".2d", 0, 0 }, - { ARM64::LD2Rv8b, "ld2r", ".8b", 0, 0 }, - { ARM64::LD2Rv4h, "ld2r", ".4h", 0, 0 }, - { ARM64::LD2Rv2s, "ld2r", ".2s", 0, 0 }, - { ARM64::LD2Rv1d, "ld2r", ".1d", 0, 0 }, - { ARM64::LD2Rv16b_POST, "ld2r", ".16b", 0, 2 }, - { ARM64::LD2Rv8h_POST, "ld2r", ".8h", 0, 4 }, - { ARM64::LD2Rv4s_POST, "ld2r", ".4s", 0, 8 }, - { ARM64::LD2Rv2d_POST, "ld2r", ".2d", 0, 16 }, - { ARM64::LD2Rv8b_POST, "ld2r", ".8b", 0, 2 }, - { ARM64::LD2Rv4h_POST, "ld2r", ".4h", 0, 4 }, - { ARM64::LD2Rv2s_POST, "ld2r", ".2s", 0, 8 }, - { ARM64::LD2Rv1d_POST, "ld2r", ".1d", 0, 16 }, - { ARM64::LD2Twov16b, "ld2", ".16b", 0, 0 }, - { ARM64::LD2Twov8h, "ld2", ".8h", 0, 0 }, - { ARM64::LD2Twov4s, "ld2", ".4s", 0, 0 }, - { ARM64::LD2Twov2d, "ld2", ".2d", 0, 0 }, - { ARM64::LD2Twov8b, "ld2", ".8b", 0, 0 }, - { ARM64::LD2Twov4h, "ld2", ".4h", 0, 0 }, - { ARM64::LD2Twov2s, "ld2", ".2s", 0, 0 }, - { ARM64::LD2Twov16b_POST, "ld2", ".16b", 0, 32 }, - { ARM64::LD2Twov8h_POST, "ld2", ".8h", 0, 32 }, - { ARM64::LD2Twov4s_POST, "ld2", ".4s", 0, 32 }, - { ARM64::LD2Twov2d_POST, "ld2", ".2d", 0, 32 }, - { ARM64::LD2Twov8b_POST, "ld2", ".8b", 0, 16 }, - { ARM64::LD2Twov4h_POST, "ld2", ".4h", 0, 16 }, - { ARM64::LD2Twov2s_POST, "ld2", ".2s", 0, 16 }, - { ARM64::LD3i8, "ld3", ".b", 2, 0 }, - { ARM64::LD3i16, "ld3", ".h", 2, 0 }, - { ARM64::LD3i32, "ld3", ".s", 2, 0 }, - { ARM64::LD3i64, "ld3", ".d", 2, 0 }, - { ARM64::LD3i8_POST, "ld3", ".b", 2, 3 }, - { ARM64::LD3i16_POST, "ld3", ".h", 2, 6 }, - { ARM64::LD3i32_POST, "ld3", ".s", 2, 12 }, - { ARM64::LD3i64_POST, "ld3", ".d", 2, 24 }, - { ARM64::LD3Rv16b, "ld3r", ".16b", 0, 0 }, - { ARM64::LD3Rv8h, "ld3r", ".8h", 0, 0 }, - { ARM64::LD3Rv4s, "ld3r", ".4s", 0, 0 }, - { ARM64::LD3Rv2d, "ld3r", ".2d", 0, 0 }, - { ARM64::LD3Rv8b, "ld3r", ".8b", 0, 0 }, - { ARM64::LD3Rv4h, "ld3r", ".4h", 0, 0 }, - { ARM64::LD3Rv2s, "ld3r", ".2s", 0, 0 }, - { ARM64::LD3Rv1d, "ld3r", ".1d", 0, 0 }, - { ARM64::LD3Rv16b_POST, "ld3r", ".16b", 0, 3 }, - { ARM64::LD3Rv8h_POST, "ld3r", ".8h", 0, 6 }, - { ARM64::LD3Rv4s_POST, "ld3r", ".4s", 0, 12 }, - { ARM64::LD3Rv2d_POST, "ld3r", ".2d", 0, 24 }, - { ARM64::LD3Rv8b_POST, "ld3r", ".8b", 0, 3 }, - { ARM64::LD3Rv4h_POST, "ld3r", ".4h", 0, 6 }, - { ARM64::LD3Rv2s_POST, "ld3r", ".2s", 0, 12 }, - { ARM64::LD3Rv1d_POST, "ld3r", ".1d", 0, 24 }, - { ARM64::LD3Threev16b, "ld3", ".16b", 0, 0 }, - { ARM64::LD3Threev8h, "ld3", ".8h", 0, 0 }, - { ARM64::LD3Threev4s, "ld3", ".4s", 0, 0 }, - { ARM64::LD3Threev2d, "ld3", ".2d", 0, 0 }, - { ARM64::LD3Threev8b, "ld3", ".8b", 0, 0 }, - { ARM64::LD3Threev4h, "ld3", ".4h", 0, 0 }, - { ARM64::LD3Threev2s, "ld3", ".2s", 0, 0 }, - { ARM64::LD3Threev16b_POST, "ld3", ".16b", 0, 48 }, - { ARM64::LD3Threev8h_POST, "ld3", ".8h", 0, 48 }, - { ARM64::LD3Threev4s_POST, "ld3", ".4s", 0, 48 }, - { ARM64::LD3Threev2d_POST, "ld3", ".2d", 0, 48 }, - { ARM64::LD3Threev8b_POST, "ld3", ".8b", 0, 24 }, - { ARM64::LD3Threev4h_POST, "ld3", ".4h", 0, 24 }, - { ARM64::LD3Threev2s_POST, "ld3", ".2s", 0, 24 }, - { ARM64::LD4i8, "ld4", ".b", 2, 0 }, - { ARM64::LD4i16, "ld4", ".h", 2, 0 }, - { ARM64::LD4i32, "ld4", ".s", 2, 0 }, - { ARM64::LD4i64, "ld4", ".d", 2, 0 }, - { ARM64::LD4i8_POST, "ld4", ".b", 2, 4 }, - { ARM64::LD4i16_POST, "ld4", ".h", 2, 8 }, - { ARM64::LD4i32_POST, "ld4", ".s", 2, 16 }, - { ARM64::LD4i64_POST, "ld4", ".d", 2, 32 }, - { ARM64::LD4Rv16b, "ld4r", ".16b", 0, 0 }, - { ARM64::LD4Rv8h, "ld4r", ".8h", 0, 0 }, - { ARM64::LD4Rv4s, "ld4r", ".4s", 0, 0 }, - { ARM64::LD4Rv2d, "ld4r", ".2d", 0, 0 }, - { ARM64::LD4Rv8b, "ld4r", ".8b", 0, 0 }, - { ARM64::LD4Rv4h, "ld4r", ".4h", 0, 0 }, - { ARM64::LD4Rv2s, "ld4r", ".2s", 0, 0 }, - { ARM64::LD4Rv1d, "ld4r", ".1d", 0, 0 }, - { ARM64::LD4Rv16b_POST, "ld4r", ".16b", 0, 4 }, - { ARM64::LD4Rv8h_POST, "ld4r", ".8h", 0, 8 }, - { ARM64::LD4Rv4s_POST, "ld4r", ".4s", 0, 16 }, - { ARM64::LD4Rv2d_POST, "ld4r", ".2d", 0, 32 }, - { ARM64::LD4Rv8b_POST, "ld4r", ".8b", 0, 4 }, - { ARM64::LD4Rv4h_POST, "ld4r", ".4h", 0, 8 }, - { ARM64::LD4Rv2s_POST, "ld4r", ".2s", 0, 16 }, - { ARM64::LD4Rv1d_POST, "ld4r", ".1d", 0, 32 }, - { ARM64::LD4Fourv16b, "ld4", ".16b", 0, 0 }, - { ARM64::LD4Fourv8h, "ld4", ".8h", 0, 0 }, - { ARM64::LD4Fourv4s, "ld4", ".4s", 0, 0 }, - { ARM64::LD4Fourv2d, "ld4", ".2d", 0, 0 }, - { ARM64::LD4Fourv8b, "ld4", ".8b", 0, 0 }, - { ARM64::LD4Fourv4h, "ld4", ".4h", 0, 0 }, - { ARM64::LD4Fourv2s, "ld4", ".2s", 0, 0 }, - { ARM64::LD4Fourv16b_POST, "ld4", ".16b", 0, 64 }, - { ARM64::LD4Fourv8h_POST, "ld4", ".8h", 0, 64 }, - { ARM64::LD4Fourv4s_POST, "ld4", ".4s", 0, 64 }, - { ARM64::LD4Fourv2d_POST, "ld4", ".2d", 0, 64 }, - { ARM64::LD4Fourv8b_POST, "ld4", ".8b", 0, 32 }, - { ARM64::LD4Fourv4h_POST, "ld4", ".4h", 0, 32 }, - { ARM64::LD4Fourv2s_POST, "ld4", ".2s", 0, 32 }, - { ARM64::ST1i8, "st1", ".b", 1, 0 }, - { ARM64::ST1i16, "st1", ".h", 1, 0 }, - { ARM64::ST1i32, "st1", ".s", 1, 0 }, - { ARM64::ST1i64, "st1", ".d", 1, 0 }, - { ARM64::ST1i8_POST, "st1", ".b", 1, 1 }, - { ARM64::ST1i16_POST, "st1", ".h", 1, 2 }, - { ARM64::ST1i32_POST, "st1", ".s", 1, 4 }, - { ARM64::ST1i64_POST, "st1", ".d", 1, 8 }, - { ARM64::ST1Onev16b, "st1", ".16b", 0, 0 }, - { ARM64::ST1Onev8h, "st1", ".8h", 0, 0 }, - { ARM64::ST1Onev4s, "st1", ".4s", 0, 0 }, - { ARM64::ST1Onev2d, "st1", ".2d", 0, 0 }, - { ARM64::ST1Onev8b, "st1", ".8b", 0, 0 }, - { ARM64::ST1Onev4h, "st1", ".4h", 0, 0 }, - { ARM64::ST1Onev2s, "st1", ".2s", 0, 0 }, - { ARM64::ST1Onev1d, "st1", ".1d", 0, 0 }, - { ARM64::ST1Onev16b_POST, "st1", ".16b", 0, 16 }, - { ARM64::ST1Onev8h_POST, "st1", ".8h", 0, 16 }, - { ARM64::ST1Onev4s_POST, "st1", ".4s", 0, 16 }, - { ARM64::ST1Onev2d_POST, "st1", ".2d", 0, 16 }, - { ARM64::ST1Onev8b_POST, "st1", ".8b", 0, 8 }, - { ARM64::ST1Onev4h_POST, "st1", ".4h", 0, 8 }, - { ARM64::ST1Onev2s_POST, "st1", ".2s", 0, 8 }, - { ARM64::ST1Onev1d_POST, "st1", ".1d", 0, 8 }, - { ARM64::ST1Twov16b, "st1", ".16b", 0, 0 }, - { ARM64::ST1Twov8h, "st1", ".8h", 0, 0 }, - { ARM64::ST1Twov4s, "st1", ".4s", 0, 0 }, - { ARM64::ST1Twov2d, "st1", ".2d", 0, 0 }, - { ARM64::ST1Twov8b, "st1", ".8b", 0, 0 }, - { ARM64::ST1Twov4h, "st1", ".4h", 0, 0 }, - { ARM64::ST1Twov2s, "st1", ".2s", 0, 0 }, - { ARM64::ST1Twov1d, "st1", ".1d", 0, 0 }, - { ARM64::ST1Twov16b_POST, "st1", ".16b", 0, 32 }, - { ARM64::ST1Twov8h_POST, "st1", ".8h", 0, 32 }, - { ARM64::ST1Twov4s_POST, "st1", ".4s", 0, 32 }, - { ARM64::ST1Twov2d_POST, "st1", ".2d", 0, 32 }, - { ARM64::ST1Twov8b_POST, "st1", ".8b", 0, 16 }, - { ARM64::ST1Twov4h_POST, "st1", ".4h", 0, 16 }, - { ARM64::ST1Twov2s_POST, "st1", ".2s", 0, 16 }, - { ARM64::ST1Twov1d_POST, "st1", ".1d", 0, 16 }, - { ARM64::ST1Threev16b, "st1", ".16b", 0, 0 }, - { ARM64::ST1Threev8h, "st1", ".8h", 0, 0 }, - { ARM64::ST1Threev4s, "st1", ".4s", 0, 0 }, - { ARM64::ST1Threev2d, "st1", ".2d", 0, 0 }, - { ARM64::ST1Threev8b, "st1", ".8b", 0, 0 }, - { ARM64::ST1Threev4h, "st1", ".4h", 0, 0 }, - { ARM64::ST1Threev2s, "st1", ".2s", 0, 0 }, - { ARM64::ST1Threev1d, "st1", ".1d", 0, 0 }, - { ARM64::ST1Threev16b_POST, "st1", ".16b", 0, 48 }, - { ARM64::ST1Threev8h_POST, "st1", ".8h", 0, 48 }, - { ARM64::ST1Threev4s_POST, "st1", ".4s", 0, 48 }, - { ARM64::ST1Threev2d_POST, "st1", ".2d", 0, 48 }, - { ARM64::ST1Threev8b_POST, "st1", ".8b", 0, 24 }, - { ARM64::ST1Threev4h_POST, "st1", ".4h", 0, 24 }, - { ARM64::ST1Threev2s_POST, "st1", ".2s", 0, 24 }, - { ARM64::ST1Threev1d_POST, "st1", ".1d", 0, 24 }, - { ARM64::ST1Fourv16b, "st1", ".16b", 0, 0 }, - { ARM64::ST1Fourv8h, "st1", ".8h", 0, 0 }, - { ARM64::ST1Fourv4s, "st1", ".4s", 0, 0 }, - { ARM64::ST1Fourv2d, "st1", ".2d", 0, 0 }, - { ARM64::ST1Fourv8b, "st1", ".8b", 0, 0 }, - { ARM64::ST1Fourv4h, "st1", ".4h", 0, 0 }, - { ARM64::ST1Fourv2s, "st1", ".2s", 0, 0 }, - { ARM64::ST1Fourv1d, "st1", ".1d", 0, 0 }, - { ARM64::ST1Fourv16b_POST, "st1", ".16b", 0, 64 }, - { ARM64::ST1Fourv8h_POST, "st1", ".8h", 0, 64 }, - { ARM64::ST1Fourv4s_POST, "st1", ".4s", 0, 64 }, - { ARM64::ST1Fourv2d_POST, "st1", ".2d", 0, 64 }, - { ARM64::ST1Fourv8b_POST, "st1", ".8b", 0, 32 }, - { ARM64::ST1Fourv4h_POST, "st1", ".4h", 0, 32 }, - { ARM64::ST1Fourv2s_POST, "st1", ".2s", 0, 32 }, - { ARM64::ST1Fourv1d_POST, "st1", ".1d", 0, 32 }, - { ARM64::ST2i8, "st2", ".b", 1, 0 }, - { ARM64::ST2i16, "st2", ".h", 1, 0 }, - { ARM64::ST2i32, "st2", ".s", 1, 0 }, - { ARM64::ST2i64, "st2", ".d", 1, 0 }, - { ARM64::ST2i8_POST, "st2", ".b", 1, 2 }, - { ARM64::ST2i16_POST, "st2", ".h", 1, 4 }, - { ARM64::ST2i32_POST, "st2", ".s", 1, 8 }, - { ARM64::ST2i64_POST, "st2", ".d", 1, 16 }, - { ARM64::ST2Twov16b, "st2", ".16b", 0, 0 }, - { ARM64::ST2Twov8h, "st2", ".8h", 0, 0 }, - { ARM64::ST2Twov4s, "st2", ".4s", 0, 0 }, - { ARM64::ST2Twov2d, "st2", ".2d", 0, 0 }, - { ARM64::ST2Twov8b, "st2", ".8b", 0, 0 }, - { ARM64::ST2Twov4h, "st2", ".4h", 0, 0 }, - { ARM64::ST2Twov2s, "st2", ".2s", 0, 0 }, - { ARM64::ST2Twov16b_POST, "st2", ".16b", 0, 32 }, - { ARM64::ST2Twov8h_POST, "st2", ".8h", 0, 32 }, - { ARM64::ST2Twov4s_POST, "st2", ".4s", 0, 32 }, - { ARM64::ST2Twov2d_POST, "st2", ".2d", 0, 32 }, - { ARM64::ST2Twov8b_POST, "st2", ".8b", 0, 16 }, - { ARM64::ST2Twov4h_POST, "st2", ".4h", 0, 16 }, - { ARM64::ST2Twov2s_POST, "st2", ".2s", 0, 16 }, - { ARM64::ST3i8, "st3", ".b", 1, 0 }, - { ARM64::ST3i16, "st3", ".h", 1, 0 }, - { ARM64::ST3i32, "st3", ".s", 1, 0 }, - { ARM64::ST3i64, "st3", ".d", 1, 0 }, - { ARM64::ST3i8_POST, "st3", ".b", 1, 3 }, - { ARM64::ST3i16_POST, "st3", ".h", 1, 6 }, - { ARM64::ST3i32_POST, "st3", ".s", 1, 12 }, - { ARM64::ST3i64_POST, "st3", ".d", 1, 24 }, - { ARM64::ST3Threev16b, "st3", ".16b", 0, 0 }, - { ARM64::ST3Threev8h, "st3", ".8h", 0, 0 }, - { ARM64::ST3Threev4s, "st3", ".4s", 0, 0 }, - { ARM64::ST3Threev2d, "st3", ".2d", 0, 0 }, - { ARM64::ST3Threev8b, "st3", ".8b", 0, 0 }, - { ARM64::ST3Threev4h, "st3", ".4h", 0, 0 }, - { ARM64::ST3Threev2s, "st3", ".2s", 0, 0 }, - { ARM64::ST3Threev16b_POST, "st3", ".16b", 0, 48 }, - { ARM64::ST3Threev8h_POST, "st3", ".8h", 0, 48 }, - { ARM64::ST3Threev4s_POST, "st3", ".4s", 0, 48 }, - { ARM64::ST3Threev2d_POST, "st3", ".2d", 0, 48 }, - { ARM64::ST3Threev8b_POST, "st3", ".8b", 0, 24 }, - { ARM64::ST3Threev4h_POST, "st3", ".4h", 0, 24 }, - { ARM64::ST3Threev2s_POST, "st3", ".2s", 0, 24 }, - { ARM64::ST4i8, "st4", ".b", 1, 0 }, - { ARM64::ST4i16, "st4", ".h", 1, 0 }, - { ARM64::ST4i32, "st4", ".s", 1, 0 }, - { ARM64::ST4i64, "st4", ".d", 1, 0 }, - { ARM64::ST4i8_POST, "st4", ".b", 1, 4 }, - { ARM64::ST4i16_POST, "st4", ".h", 1, 8 }, - { ARM64::ST4i32_POST, "st4", ".s", 1, 16 }, - { ARM64::ST4i64_POST, "st4", ".d", 1, 32 }, - { ARM64::ST4Fourv16b, "st4", ".16b", 0, 0 }, - { ARM64::ST4Fourv8h, "st4", ".8h", 0, 0 }, - { ARM64::ST4Fourv4s, "st4", ".4s", 0, 0 }, - { ARM64::ST4Fourv2d, "st4", ".2d", 0, 0 }, - { ARM64::ST4Fourv8b, "st4", ".8b", 0, 0 }, - { ARM64::ST4Fourv4h, "st4", ".4h", 0, 0 }, - { ARM64::ST4Fourv2s, "st4", ".2s", 0, 0 }, - { ARM64::ST4Fourv16b_POST, "st4", ".16b", 0, 64 }, - { ARM64::ST4Fourv8h_POST, "st4", ".8h", 0, 64 }, - { ARM64::ST4Fourv4s_POST, "st4", ".4s", 0, 64 }, - { ARM64::ST4Fourv2d_POST, "st4", ".2d", 0, 64 }, - { ARM64::ST4Fourv8b_POST, "st4", ".8b", 0, 32 }, - { ARM64::ST4Fourv4h_POST, "st4", ".4h", 0, 32 }, - { ARM64::ST4Fourv2s_POST, "st4", ".2s", 0, 32 }, -}; - -static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) { - unsigned Idx; - for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx) - if (LdStNInstInfo[Idx].Opcode == Opcode) - return &LdStNInstInfo[Idx]; - - return 0; -} - -void ARM64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O, - StringRef Annot) { - unsigned Opcode = MI->getOpcode(); - StringRef Layout, Mnemonic; - - bool IsTbx; - if (isTblTbxInstruction(MI->getOpcode(), Layout, IsTbx)) { - O << "\t" << (IsTbx ? "tbx" : "tbl") << Layout << '\t' - << getRegisterName(MI->getOperand(0).getReg(), ARM64::vreg) << ", "; - - unsigned ListOpNum = IsTbx ? 2 : 1; - printVectorList(MI, ListOpNum, O, ""); - - O << ", " - << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), ARM64::vreg); - printAnnotation(O, Annot); - return; - } - - if (LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) { - O << "\t" << LdStDesc->Mnemonic << LdStDesc->Layout << '\t'; - - // Now onto the operands: first a vector list with possible lane - // specifier. E.g. { v0 }[2] - printVectorList(MI, 0, O, ""); - - if (LdStDesc->LaneOperand != 0) - O << '[' << MI->getOperand(LdStDesc->LaneOperand).getImm() << ']'; - - // Next the address: [xN] - unsigned AddrOpNum = LdStDesc->LaneOperand + 1; - unsigned AddrReg = MI->getOperand(AddrOpNum).getReg(); - O << ", [" << getRegisterName(AddrReg) << ']'; - - // Finally, there might be a post-indexed offset. - if (LdStDesc->NaturalOffset != 0) { - unsigned Reg = MI->getOperand(AddrOpNum + 1).getReg(); - if (Reg != ARM64::XZR) - O << ", " << getRegisterName(Reg); - else { - assert(LdStDesc->NaturalOffset && "no offset on post-inc instruction?"); - O << ", #" << LdStDesc->NaturalOffset; - } - } - - printAnnotation(O, Annot); - return; - } - - ARM64InstPrinter::printInst(MI, O, Annot); -} - -bool ARM64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) { -#ifndef NDEBUG - unsigned Opcode = MI->getOpcode(); - assert((Opcode == ARM64::SYS || Opcode == ARM64::SYSxt) && - "Invalid opcode for SYS alias!"); -#endif - - const char *Asm = 0; - const MCOperand &Op1 = MI->getOperand(0); - const MCOperand &Cn = MI->getOperand(1); - const MCOperand &Cm = MI->getOperand(2); - const MCOperand &Op2 = MI->getOperand(3); - - unsigned Op1Val = Op1.getImm(); - unsigned CnVal = Cn.getImm(); - unsigned CmVal = Cm.getImm(); - unsigned Op2Val = Op2.getImm(); - - if (CnVal == 7) { - switch (CmVal) { - default: - break; - - // IC aliases - case 1: - if (Op1Val == 0 && Op2Val == 0) - Asm = "ic\tialluis"; - break; - case 5: - if (Op1Val == 0 && Op2Val == 0) - Asm = "ic\tiallu"; - else if (Op1Val == 3 && Op2Val == 1) - Asm = "ic\tivau"; - break; - - // DC aliases - case 4: - if (Op1Val == 3 && Op2Val == 1) - Asm = "dc\tzva"; - break; - case 6: - if (Op1Val == 0 && Op2Val == 1) - Asm = "dc\tivac"; - if (Op1Val == 0 && Op2Val == 2) - Asm = "dc\tisw"; - break; - case 10: - if (Op1Val == 3 && Op2Val == 1) - Asm = "dc\tcvac"; - else if (Op1Val == 0 && Op2Val == 2) - Asm = "dc\tcsw"; - break; - case 11: - if (Op1Val == 3 && Op2Val == 1) - Asm = "dc\tcvau"; - break; - case 14: - if (Op1Val == 3 && Op2Val == 1) - Asm = "dc\tcivac"; - else if (Op1Val == 0 && Op2Val == 2) - Asm = "dc\tcisw"; - break; - - // AT aliases - case 8: - switch (Op1Val) { - default: - break; - case 0: - switch (Op2Val) { - default: - break; - case 0: Asm = "at\ts1e1r"; break; - case 1: Asm = "at\ts1e1w"; break; - case 2: Asm = "at\ts1e0r"; break; - case 3: Asm = "at\ts1e0w"; break; - } - break; - case 4: - switch (Op2Val) { - default: - break; - case 0: Asm = "at\ts1e2r"; break; - case 1: Asm = "at\ts1e2w"; break; - case 4: Asm = "at\ts12e1r"; break; - case 5: Asm = "at\ts12e1w"; break; - case 6: Asm = "at\ts12e0r"; break; - case 7: Asm = "at\ts12e0w"; break; - } - break; - case 6: - switch (Op2Val) { - default: - break; - case 0: Asm = "at\ts1e3r"; break; - case 1: Asm = "at\ts1e3w"; break; - } - break; - } - break; - } - } else if (CnVal == 8) { - // TLBI aliases - switch (CmVal) { - default: - break; - case 3: - switch (Op1Val) { - default: - break; - case 0: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\tvmalle1is"; break; - case 1: Asm = "tlbi\tvae1is"; break; - case 2: Asm = "tlbi\taside1is"; break; - case 3: Asm = "tlbi\tvaae1is"; break; - case 5: Asm = "tlbi\tvale1is"; break; - case 7: Asm = "tlbi\tvaale1is"; break; - } - break; - case 4: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\talle2is"; break; - case 1: Asm = "tlbi\tvae2is"; break; - case 4: Asm = "tlbi\talle1is"; break; - case 5: Asm = "tlbi\tvale2is"; break; - case 6: Asm = "tlbi\tvmalls12e1is"; break; - } - break; - case 6: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\talle3is"; break; - case 1: Asm = "tlbi\tvae3is"; break; - case 5: Asm = "tlbi\tvale3is"; break; - } - break; - } - break; - case 4: - switch (Op1Val) { - default: - break; - case 4: - switch (Op2Val) { - default: - break; - case 1: Asm = "tlbi\tipas2e1"; break; - case 5: Asm = "tlbi\tipas2le1"; break; - } - break; - } - break; - case 7: - switch (Op1Val) { - default: - break; - case 0: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\tvmalle1"; break; - case 1: Asm = "tlbi\tvae1"; break; - case 2: Asm = "tlbi\taside1"; break; - case 3: Asm = "tlbi\tvaae1"; break; - case 5: Asm = "tlbi\tvale1"; break; - case 7: Asm = "tlbi\tvaale1"; break; - } - break; - case 4: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\talle2"; break; - case 1: Asm = "tlbi\tvae2"; break; - case 4: Asm = "tlbi\talle1"; break; - case 5: Asm = "tlbi\tvale2"; break; - case 6: Asm = "tlbi\tvmalls12e1"; break; - } - break; - case 6: - switch (Op2Val) { - default: - break; - case 0: Asm = "tlbi\talle3"; break; - case 1: Asm = "tlbi\tvae3"; break; - case 5: Asm = "tlbi\tvale3"; break; - } - break; - } - break; - } - } - - if (Asm) { - O << '\t' << Asm; - if (MI->getNumOperands() == 5) - O << ", " << getRegisterName(MI->getOperand(4).getReg()); - } - - return Asm != 0; -} - -void ARM64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isReg()) { - unsigned Reg = Op.getReg(); - O << getRegisterName(Reg); - } else if (Op.isImm()) { - O << '#' << Op.getImm(); - } else { - assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << *Op.getExpr(); - } -} - -void ARM64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo, - unsigned Imm, raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - if (Op.isReg()) { - unsigned Reg = Op.getReg(); - if (Reg == ARM64::XZR) - O << "#" << Imm; - else - O << getRegisterName(Reg); - } else - assert(0 && "unknown operand kind in printPostIncOperand64"); -} - -void ARM64InstPrinter::printPostIncOperand1(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 1, O); -} - -void ARM64InstPrinter::printPostIncOperand2(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 2, O); -} - -void ARM64InstPrinter::printPostIncOperand3(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 3, O); -} - -void ARM64InstPrinter::printPostIncOperand4(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 4, O); -} - -void ARM64InstPrinter::printPostIncOperand6(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 6, O); -} - -void ARM64InstPrinter::printPostIncOperand8(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 8, O); -} - -void ARM64InstPrinter::printPostIncOperand12(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 12, O); -} - -void ARM64InstPrinter::printPostIncOperand16(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 16, O); -} - -void ARM64InstPrinter::printPostIncOperand24(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 24, O); -} - -void ARM64InstPrinter::printPostIncOperand32(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 32, O); -} - -void ARM64InstPrinter::printPostIncOperand48(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 48, O); -} - -void ARM64InstPrinter::printPostIncOperand64(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - printPostIncOperand(MI, OpNo, 64, O); -} - -void ARM64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - assert(Op.isReg() && "Non-register vreg operand!"); - unsigned Reg = Op.getReg(); - O << getRegisterName(Reg, ARM64::vreg); -} - -void ARM64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNo); - assert(Op.isImm() && "System instruction C[nm] operands must be immediates!"); - O << "c" << Op.getImm(); -} - -void ARM64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - if (MO.isImm()) { - unsigned Val = (MO.getImm() & 0xfff); - assert(Val == MO.getImm() && "Add/sub immediate out of range!"); - unsigned Shift = - ARM64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm()); - O << '#' << (Val << Shift); - // Distinguish "0, lsl #12" from "0, lsl #0". - if (Val == 0 && Shift != 0) - printShifter(MI, OpNum + 1, O); - } else { - assert(MO.isExpr() && "Unexpected operand type!"); - O << *MO.getExpr(); - printShifter(MI, OpNum + 1, O); - } -} - -void ARM64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - uint64_t Val = MI->getOperand(OpNum).getImm(); - O << "#0x"; - O.write_hex(ARM64_AM::decodeLogicalImmediate(Val, 32)); -} - -void ARM64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - uint64_t Val = MI->getOperand(OpNum).getImm(); - O << "#0x"; - O.write_hex(ARM64_AM::decodeLogicalImmediate(Val, 64)); -} - -void ARM64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - unsigned Val = MI->getOperand(OpNum).getImm(); - // LSL #0 should not be printed. - if (ARM64_AM::getShiftType(Val) == ARM64_AM::LSL && - ARM64_AM::getShiftValue(Val) == 0) - return; - O << ", " << ARM64_AM::getShiftName(ARM64_AM::getShiftType(Val)) << " #" - << ARM64_AM::getShiftValue(Val); -} - -void ARM64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << getRegisterName(MI->getOperand(OpNum).getReg()); - printShifter(MI, OpNum + 1, O); -} - -void ARM64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << getRegisterName(MI->getOperand(OpNum).getReg()); - printExtend(MI, OpNum + 1, O); -} - -void ARM64InstPrinter::printExtend(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - unsigned Val = MI->getOperand(OpNum).getImm(); - ARM64_AM::ExtendType ExtType = ARM64_AM::getArithExtendType(Val); - unsigned ShiftVal = ARM64_AM::getArithShiftValue(Val); - - // If the destination or first source register operand is [W]SP, print - // UXTW/UXTX as LSL, and if the shift amount is also zero, print nothing at - // all. - if (ExtType == ARM64_AM::UXTW || ExtType == ARM64_AM::UXTX) { - unsigned Dest = MI->getOperand(0).getReg(); - unsigned Src1 = MI->getOperand(1).getReg(); - if (Dest == ARM64::SP || Dest == ARM64::WSP || Src1 == ARM64::SP || - Src1 == ARM64::WSP) { - if (ShiftVal != 0) - O << ", lsl #" << ShiftVal; - return; - } - } - O << ", " << ARM64_AM::getExtendName(ExtType); - if (ShiftVal != 0) - O << " #" << ShiftVal; -} - -void ARM64InstPrinter::printDotCondCode(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(OpNum).getImm(); - if (CC != ARM64CC::AL) - O << '.' << ARM64CC::getCondCodeName(CC); -} - -void ARM64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(OpNum).getImm(); - O << ARM64CC::getCondCodeName(CC); -} - -void ARM64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']'; -} - -void ARM64InstPrinter::printImmScale4(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << '#' << 4 * MI->getOperand(OpNum).getImm(); -} - -void ARM64InstPrinter::printImmScale8(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << '#' << 8 * MI->getOperand(OpNum).getImm(); -} - -void ARM64InstPrinter::printImmScale16(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << '#' << 16 * MI->getOperand(OpNum).getImm(); -} - -void ARM64InstPrinter::printAMIndexed(const MCInst *MI, unsigned OpNum, - unsigned Scale, raw_ostream &O) { - const MCOperand MO1 = MI->getOperand(OpNum + 1); - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()); - if (MO1.isImm()) { - if (MO1.getImm() != 0) - O << ", #" << (MO1.getImm() * Scale); - } else { - assert(MO1.isExpr() && "Unexpected operand type!"); - O << ", " << *MO1.getExpr(); - } - O << ']'; -} - -void ARM64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - unsigned prfop = MI->getOperand(OpNum).getImm(); - if (ARM64_AM::isNamedPrefetchOp(prfop)) - O << ARM64_AM::getPrefetchOpName((ARM64_AM::PrefetchOp)prfop); - else - O << '#' << prfop; -} - -void ARM64InstPrinter::printMemoryPostIndexed32(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #" - << 4 * MI->getOperand(OpNum + 1).getImm(); -} - -void ARM64InstPrinter::printMemoryPostIndexed64(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #" - << 8 * MI->getOperand(OpNum + 1).getImm(); -} - -void ARM64InstPrinter::printMemoryPostIndexed128(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #" - << 16 * MI->getOperand(OpNum + 1).getImm(); -} - -void ARM64InstPrinter::printMemoryPostIndexed(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #" - << MI->getOperand(OpNum + 1).getImm(); -} - -void ARM64InstPrinter::printMemoryRegOffset(const MCInst *MI, unsigned OpNum, - raw_ostream &O, int LegalShiftAmt) { - O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " - << getRegisterName(MI->getOperand(OpNum + 1).getReg()); - - unsigned Val = MI->getOperand(OpNum + 2).getImm(); - ARM64_AM::ExtendType ExtType = ARM64_AM::getMemExtendType(Val); - bool DoShift = ARM64_AM::getMemDoShift(Val); - - if (ExtType == ARM64_AM::UXTX) { - if (DoShift) - O << ", lsl"; - } else - O << ", " << ARM64_AM::getExtendName(ExtType); - - if (DoShift) - O << " #" << LegalShiftAmt; - - O << "]"; -} - -void ARM64InstPrinter::printMemoryRegOffset8(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printMemoryRegOffset(MI, OpNum, O, 0); -} - -void ARM64InstPrinter::printMemoryRegOffset16(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printMemoryRegOffset(MI, OpNum, O, 1); -} - -void ARM64InstPrinter::printMemoryRegOffset32(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printMemoryRegOffset(MI, OpNum, O, 2); -} - -void ARM64InstPrinter::printMemoryRegOffset64(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printMemoryRegOffset(MI, OpNum, O, 3); -} - -void ARM64InstPrinter::printMemoryRegOffset128(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - printMemoryRegOffset(MI, OpNum, O, 4); -} - -void ARM64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNum); - O << '#'; - if (MO.isFPImm()) - // FIXME: Should this ever happen? - O << MO.getFPImm(); - else - O << ARM64_AM::getFPImmFloat(MO.getImm()); -} - -static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) { - while (Stride--) { - switch (Reg) { - default: - assert(0 && "Vector register expected!"); - case ARM64::Q0: Reg = ARM64::Q1; break; - case ARM64::Q1: Reg = ARM64::Q2; break; - case ARM64::Q2: Reg = ARM64::Q3; break; - case ARM64::Q3: Reg = ARM64::Q4; break; - case ARM64::Q4: Reg = ARM64::Q5; break; - case ARM64::Q5: Reg = ARM64::Q6; break; - case ARM64::Q6: Reg = ARM64::Q7; break; - case ARM64::Q7: Reg = ARM64::Q8; break; - case ARM64::Q8: Reg = ARM64::Q9; break; - case ARM64::Q9: Reg = ARM64::Q10; break; - case ARM64::Q10: Reg = ARM64::Q11; break; - case ARM64::Q11: Reg = ARM64::Q12; break; - case ARM64::Q12: Reg = ARM64::Q13; break; - case ARM64::Q13: Reg = ARM64::Q14; break; - case ARM64::Q14: Reg = ARM64::Q15; break; - case ARM64::Q15: Reg = ARM64::Q16; break; - case ARM64::Q16: Reg = ARM64::Q17; break; - case ARM64::Q17: Reg = ARM64::Q18; break; - case ARM64::Q18: Reg = ARM64::Q19; break; - case ARM64::Q19: Reg = ARM64::Q20; break; - case ARM64::Q20: Reg = ARM64::Q21; break; - case ARM64::Q21: Reg = ARM64::Q22; break; - case ARM64::Q22: Reg = ARM64::Q23; break; - case ARM64::Q23: Reg = ARM64::Q24; break; - case ARM64::Q24: Reg = ARM64::Q25; break; - case ARM64::Q25: Reg = ARM64::Q26; break; - case ARM64::Q26: Reg = ARM64::Q27; break; - case ARM64::Q27: Reg = ARM64::Q28; break; - case ARM64::Q28: Reg = ARM64::Q29; break; - case ARM64::Q29: Reg = ARM64::Q30; break; - case ARM64::Q30: Reg = ARM64::Q31; break; - // Vector lists can wrap around. - case ARM64::Q31: - Reg = ARM64::Q0; - break; - } - } - return Reg; -} - -void ARM64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, - raw_ostream &O, StringRef LayoutSuffix) { - unsigned Reg = MI->getOperand(OpNum).getReg(); - - O << "{ "; - - // Work out how many registers there are in the list (if there is an actual - // list). - unsigned NumRegs = 1; - if (MRI.getRegClass(ARM64::DDRegClassID).contains(Reg) || - MRI.getRegClass(ARM64::QQRegClassID).contains(Reg)) - NumRegs = 2; - else if (MRI.getRegClass(ARM64::DDDRegClassID).contains(Reg) || - MRI.getRegClass(ARM64::QQQRegClassID).contains(Reg)) - NumRegs = 3; - else if (MRI.getRegClass(ARM64::DDDDRegClassID).contains(Reg) || - MRI.getRegClass(ARM64::QQQQRegClassID).contains(Reg)) - NumRegs = 4; - - // Now forget about the list and find out what the first register is. - if (unsigned FirstReg = MRI.getSubReg(Reg, ARM64::dsub0)) - Reg = FirstReg; - else if (unsigned FirstReg = MRI.getSubReg(Reg, ARM64::qsub0)) - Reg = FirstReg; - - // If it's a D-reg, we need to promote it to the equivalent Q-reg before - // printing (otherwise getRegisterName fails). - if (MRI.getRegClass(ARM64::FPR64RegClassID).contains(Reg)) { - const MCRegisterClass &FPR128RC = MRI.getRegClass(ARM64::FPR128RegClassID); - Reg = MRI.getMatchingSuperReg(Reg, ARM64::dsub, &FPR128RC); - } - - for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) { - O << getRegisterName(Reg, ARM64::vreg) << LayoutSuffix; - if (i + 1 != NumRegs) - O << ", "; - } - - O << " }"; -} - -void ARM64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - printVectorList(MI, OpNum, O, ""); -} - -template -void ARM64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - std::string Suffix("."); - if (NumLanes) - Suffix += itostr(NumLanes) + LaneKind; - else - Suffix += LaneKind; - - printVectorList(MI, OpNum, O, Suffix); -} - -void ARM64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "[" << MI->getOperand(OpNum).getImm() << "]"; -} - -void ARM64InstPrinter::printAlignedBranchTarget(const MCInst *MI, - unsigned OpNum, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNum); - - // If the label has already been resolved to an immediate offset (say, when - // we're running the disassembler), just print the immediate. - if (Op.isImm()) { - O << "#" << (Op.getImm() << 2); - return; - } - - // If the branch target is simply an address then print it in hex. - const MCConstantExpr *BranchTarget = - dyn_cast(MI->getOperand(OpNum).getExpr()); - int64_t Address; - if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) { - O << "0x"; - O.write_hex(Address); - } else { - // Otherwise, just print the expression. - O << *MI->getOperand(OpNum).getExpr(); - } -} - -void ARM64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNum); - - // If the label has already been resolved to an immediate offset (say, when - // we're running the disassembler), just print the immediate. - if (Op.isImm()) { - O << "#" << (Op.getImm() << 12); - return; - } - - // Otherwise, just print the expression. - O << *MI->getOperand(OpNum).getExpr(); -} - -void ARM64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned Val = MI->getOperand(OpNo).getImm(); - const char *Name = ARM64SYS::getBarrierOptName((ARM64SYS::BarrierOption)Val); - if (Name) - O << Name; - else - O << "#" << Val; -} - -void ARM64InstPrinter::printSystemRegister(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned Val = MI->getOperand(OpNo).getImm(); - const char *Name = - ARM64SYS::getSystemRegisterName((ARM64SYS::SystemRegister)Val); - if (Name) { - O << Name; - return; - } - - unsigned Op0 = 2 | ((Val >> 14) & 1); - unsigned Op1 = (Val >> 11) & 7; - unsigned CRn = (Val >> 7) & 0xf; - unsigned CRm = (Val >> 3) & 0xf; - unsigned Op2 = Val & 7; - - O << 'S' << Op0 << '_' << Op1 << "_C" << CRn << "_C" << CRm << '_' << Op2; -} - -void ARM64InstPrinter::printSystemCPSRField(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned Val = MI->getOperand(OpNo).getImm(); - const char *Name = ARM64SYS::getCPSRFieldName((ARM64SYS::CPSRField)Val); - O << Name; -} - -void ARM64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo, - raw_ostream &O) { - unsigned RawVal = MI->getOperand(OpNo).getImm(); - uint64_t Val = ARM64_AM::decodeAdvSIMDModImmType10(RawVal); - O << format("#%#016llx", Val); -} diff --git a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h b/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h deleted file mode 100644 index ff66ff0..0000000 --- a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h +++ /dev/null @@ -1,157 +0,0 @@ -//===-- ARM64InstPrinter.h - Convert ARM64 MCInst to assembly syntax ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This class prints an ARM64 MCInst to a .s file. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64INSTPRINTER_H -#define ARM64INSTPRINTER_H - -#include "MCTargetDesc/ARM64MCTargetDesc.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCInstPrinter.h" -#include "llvm/MC/MCSubtargetInfo.h" - -namespace llvm { - -class MCOperand; - -class ARM64InstPrinter : public MCInstPrinter { -public: - ARM64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); - - virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - - // Autogenerated by tblgen. - virtual void printInstruction(const MCInst *MI, raw_ostream &O); - virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O); - virtual StringRef getRegName(unsigned RegNo) const { - return getRegisterName(RegNo); - } - static const char *getRegisterName(unsigned RegNo, - unsigned AltIdx = ARM64::NoRegAltName); - -protected: - bool printSysAlias(const MCInst *MI, raw_ostream &O); - // Operand printers - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm, - raw_ostream &O); - void printPostIncOperand1(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand2(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand3(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand4(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand6(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand8(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand12(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand16(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand24(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand32(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand48(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printPostIncOperand64(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printVRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printSysCROperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printAddSubImm(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printLogicalImm32(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printDotCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAlignedBranchTarget(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printAMIndexed(const MCInst *MI, unsigned OpNum, unsigned Scale, - raw_ostream &O); - void printAMIndexed128(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printAMIndexed(MI, OpNum, 16, O); - } - - void printAMIndexed64(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printAMIndexed(MI, OpNum, 8, O); - } - - void printAMIndexed32(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printAMIndexed(MI, OpNum, 4, O); - } - - void printAMIndexed16(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printAMIndexed(MI, OpNum, 2, O); - } - - void printAMIndexed8(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printAMIndexed(MI, OpNum, 1, O); - } - void printAMUnscaled(const MCInst *MI, unsigned OpNum, raw_ostream &O) { - printAMIndexed(MI, OpNum, 1, O); - } - void printAMNoIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printImmScale4(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printImmScale8(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printImmScale16(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMemoryPostIndexed(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMemoryPostIndexed32(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printMemoryPostIndexed64(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printMemoryPostIndexed128(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - void printMemoryRegOffset(const MCInst *MI, unsigned OpNum, raw_ostream &O, - int LegalShiftAmt); - void printMemoryRegOffset8(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMemoryRegOffset16(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMemoryRegOffset32(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMemoryRegOffset64(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printMemoryRegOffset128(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O, - StringRef LayoutSuffix); - - /// Print a list of vector registers where the type suffix is implicit - /// (i.e. attached to the instruction rather than the registers). - void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum, - raw_ostream &O); - - template - void printTypedVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O); - - void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAdrpLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printBarrierOption(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printSystemCPSRField(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); -}; - -class ARM64AppleInstPrinter : public ARM64InstPrinter { -public: - ARM64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, - const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); - - virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); - - virtual void printInstruction(const MCInst *MI, raw_ostream &O); - virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O); - virtual StringRef getRegName(unsigned RegNo) const { - return getRegisterName(RegNo); - } - static const char *getRegisterName(unsigned RegNo, - unsigned AltIdx = ARM64::NoRegAltName); -}; -} - -#endif diff --git a/lib/Target/ARM64/InstPrinter/CMakeLists.txt b/lib/Target/ARM64/InstPrinter/CMakeLists.txt deleted file mode 100644 index b8ee12c..0000000 --- a/lib/Target/ARM64/InstPrinter/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMARM64AsmPrinter - ARM64InstPrinter.cpp - ) - -add_dependencies(LLVMARM64AsmPrinter ARM64CommonTableGen) diff --git a/lib/Target/ARM64/InstPrinter/LLVMBuild.txt b/lib/Target/ARM64/InstPrinter/LLVMBuild.txt deleted file mode 100644 index 2ec83d2..0000000 --- a/lib/Target/ARM64/InstPrinter/LLVMBuild.txt +++ /dev/null @@ -1,24 +0,0 @@ -;===- ./lib/Target/ARM64/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = ARM64AsmPrinter -parent = ARM64 -required_libraries = MC Support -add_to_library_groups = ARM64 - diff --git a/lib/Target/ARM64/InstPrinter/Makefile b/lib/Target/ARM64/InstPrinter/Makefile deleted file mode 100644 index a59efb0..0000000 --- a/lib/Target/ARM64/InstPrinter/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/ARM64/AsmPrinter/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMARM64AsmPrinter - -# Hack: we need to include 'main' arm target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM64/LLVMBuild.txt b/lib/Target/ARM64/LLVMBuild.txt deleted file mode 100644 index 45b0628..0000000 --- a/lib/Target/ARM64/LLVMBuild.txt +++ /dev/null @@ -1,36 +0,0 @@ -;===- ./lib/Target/ARM64/LLVMBuild.txt -------------------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[common] -subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo - -[component_0] -type = TargetGroup -name = ARM64 -parent = Target -has_asmparser = 1 -has_asmprinter = 1 -has_disassembler = 1 -has_jit = 1 - -[component_1] -type = Library -name = ARM64CodeGen -parent = ARM64 -required_libraries = ARM64AsmPrinter ARM64Desc ARM64Info Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target -add_to_library_groups = ARM64 - diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h b/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h deleted file mode 100644 index 7717743..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h +++ /dev/null @@ -1,758 +0,0 @@ -//===- ARM64AddressingModes.h - ARM64 Addressing Modes ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the ARM64 addressing mode implementation stuff. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H -#define LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H - -#include "llvm/ADT/APFloat.h" -#include "llvm/ADT/APInt.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include - -namespace llvm { - -/// ARM64_AM - ARM64 Addressing Mode Stuff -namespace ARM64_AM { - -//===----------------------------------------------------------------------===// -// Shifts -// - -enum ShiftType { - InvalidShift = -1, - LSL = 0, - LSR = 1, - ASR = 2, - ROR = 3, - MSL = 4 -}; - -/// getShiftName - Get the string encoding for the shift type. -static inline const char *getShiftName(ARM64_AM::ShiftType ST) { - switch (ST) { - default: assert(false && "unhandled shift type!"); - case ARM64_AM::LSL: return "lsl"; - case ARM64_AM::LSR: return "lsr"; - case ARM64_AM::ASR: return "asr"; - case ARM64_AM::ROR: return "ror"; - case ARM64_AM::MSL: return "msl"; - } - return 0; -} - -/// getShiftType - Extract the shift type. -static inline ARM64_AM::ShiftType getShiftType(unsigned Imm) { - return ARM64_AM::ShiftType((Imm >> 6) & 0x7); -} - -/// getShiftValue - Extract the shift value. -static inline unsigned getShiftValue(unsigned Imm) { - return Imm & 0x3f; -} - -/// getShifterImm - Encode the shift type and amount: -/// imm: 6-bit shift amount -/// shifter: 000 ==> lsl -/// 001 ==> lsr -/// 010 ==> asr -/// 011 ==> ror -/// 100 ==> msl -/// {8-6} = shifter -/// {5-0} = imm -static inline unsigned getShifterImm(ARM64_AM::ShiftType ST, unsigned Imm) { - assert((Imm & 0x3f) == Imm && "Illegal shifted immedate value!"); - return (unsigned(ST) << 6) | (Imm & 0x3f); -} - -//===----------------------------------------------------------------------===// -// Extends -// - -enum ExtendType { - InvalidExtend = -1, - UXTB = 0, - UXTH = 1, - UXTW = 2, - UXTX = 3, - SXTB = 4, - SXTH = 5, - SXTW = 6, - SXTX = 7 -}; - -/// getExtendName - Get the string encoding for the extend type. -static inline const char *getExtendName(ARM64_AM::ExtendType ET) { - switch (ET) { - default: assert(false && "unhandled extend type!"); - case ARM64_AM::UXTB: return "uxtb"; - case ARM64_AM::UXTH: return "uxth"; - case ARM64_AM::UXTW: return "uxtw"; - case ARM64_AM::UXTX: return "uxtx"; - case ARM64_AM::SXTB: return "sxtb"; - case ARM64_AM::SXTH: return "sxth"; - case ARM64_AM::SXTW: return "sxtw"; - case ARM64_AM::SXTX: return "sxtx"; - } - return 0; -} - -/// getArithShiftValue - get the arithmetic shift value. -static inline unsigned getArithShiftValue(unsigned Imm) { - return Imm & 0x7; -} - -/// getExtendType - Extract the extend type for operands of arithmetic ops. -static inline ARM64_AM::ExtendType getArithExtendType(unsigned Imm) { - return ARM64_AM::ExtendType((Imm >> 3) & 0x7); -} - -/// getArithExtendImm - Encode the extend type and shift amount for an -/// arithmetic instruction: -/// imm: 3-bit extend amount -/// shifter: 000 ==> uxtb -/// 001 ==> uxth -/// 010 ==> uxtw -/// 011 ==> uxtx -/// 100 ==> sxtb -/// 101 ==> sxth -/// 110 ==> sxtw -/// 111 ==> sxtx -/// {5-3} = shifter -/// {2-0} = imm3 -static inline unsigned getArithExtendImm(ARM64_AM::ExtendType ET, - unsigned Imm) { - assert((Imm & 0x7) == Imm && "Illegal shifted immedate value!"); - return (unsigned(ET) << 3) | (Imm & 0x7); -} - -/// getMemDoShift - Extract the "do shift" flag value for load/store -/// instructions. -static inline bool getMemDoShift(unsigned Imm) { - return (Imm & 0x1) != 0; -} - -/// getExtendType - Extract the extend type for the offset operand of -/// loads/stores. -static inline ARM64_AM::ExtendType getMemExtendType(unsigned Imm) { - return ARM64_AM::ExtendType((Imm >> 1) & 0x7); -} - -/// getExtendImm - Encode the extend type and amount for a load/store inst: -/// doshift: should the offset be scaled by the access size -/// shifter: 000 ==> uxtb -/// 001 ==> uxth -/// 010 ==> uxtw -/// 011 ==> uxtx -/// 100 ==> sxtb -/// 101 ==> sxth -/// 110 ==> sxtw -/// 111 ==> sxtx -/// {3-1} = shifter -/// {0} = doshift -static inline unsigned getMemExtendImm(ARM64_AM::ExtendType ET, bool DoShift) { - return (unsigned(ET) << 1) | unsigned(DoShift); -} - -//===----------------------------------------------------------------------===// -// Prefetch -// - -/// Pre-fetch operator names. -/// The enum values match the encoding values: -/// prfop<4:3> 00=preload data, 10=prepare for store -/// prfop<2:1> 00=target L1 cache, 01=target L2 cache, 10=target L3 cache, -/// prfop<0> 0=non-streaming (temporal), 1=streaming (non-temporal) -enum PrefetchOp { - InvalidPrefetchOp = -1, - PLDL1KEEP = 0x00, - PLDL1STRM = 0x01, - PLDL2KEEP = 0x02, - PLDL2STRM = 0x03, - PLDL3KEEP = 0x04, - PLDL3STRM = 0x05, - PSTL1KEEP = 0x10, - PSTL1STRM = 0x11, - PSTL2KEEP = 0x12, - PSTL2STRM = 0x13, - PSTL3KEEP = 0x14, - PSTL3STRM = 0x15 -}; - -/// isNamedPrefetchOp - Check if the prefetch-op 5-bit value has a name. -static inline bool isNamedPrefetchOp(unsigned prfop) { - switch (prfop) { - default: return false; - case ARM64_AM::PLDL1KEEP: case ARM64_AM::PLDL1STRM: case ARM64_AM::PLDL2KEEP: - case ARM64_AM::PLDL2STRM: case ARM64_AM::PLDL3KEEP: case ARM64_AM::PLDL3STRM: - case ARM64_AM::PSTL1KEEP: case ARM64_AM::PSTL1STRM: case ARM64_AM::PSTL2KEEP: - case ARM64_AM::PSTL2STRM: case ARM64_AM::PSTL3KEEP: case ARM64_AM::PSTL3STRM: - return true; - } -} - - -/// getPrefetchOpName - Get the string encoding for the prefetch operator. -static inline const char *getPrefetchOpName(ARM64_AM::PrefetchOp prfop) { - switch (prfop) { - default: assert(false && "unhandled prefetch-op type!"); - case ARM64_AM::PLDL1KEEP: return "pldl1keep"; - case ARM64_AM::PLDL1STRM: return "pldl1strm"; - case ARM64_AM::PLDL2KEEP: return "pldl2keep"; - case ARM64_AM::PLDL2STRM: return "pldl2strm"; - case ARM64_AM::PLDL3KEEP: return "pldl3keep"; - case ARM64_AM::PLDL3STRM: return "pldl3strm"; - case ARM64_AM::PSTL1KEEP: return "pstl1keep"; - case ARM64_AM::PSTL1STRM: return "pstl1strm"; - case ARM64_AM::PSTL2KEEP: return "pstl2keep"; - case ARM64_AM::PSTL2STRM: return "pstl2strm"; - case ARM64_AM::PSTL3KEEP: return "pstl3keep"; - case ARM64_AM::PSTL3STRM: return "pstl3strm"; - } - return 0; -} - -static inline uint64_t ror(uint64_t elt, unsigned size) { - return ((elt & 1) << (size-1)) | (elt >> 1); -} - -/// processLogicalImmediate - Determine if an immediate value can be encoded -/// as the immediate operand of a logical instruction for the given register -/// size. If so, return true with "encoding" set to the encoded value in -/// the form N:immr:imms. -static inline bool processLogicalImmediate(uint64_t imm, unsigned regSize, - uint64_t &encoding) { - if (imm == 0ULL || imm == ~0ULL || - (regSize != 64 && (imm >> regSize != 0 || imm == ~0U))) - return false; - - unsigned size = 2; - uint64_t eltVal = imm; - - // First, determine the element size. - while (size < regSize) { - unsigned numElts = regSize / size; - unsigned mask = (1ULL << size) - 1; - uint64_t lowestEltVal = imm & mask; - - bool allMatched = true; - for (unsigned i = 1; i < numElts; ++i) { - uint64_t currEltVal = (imm >> (i*size)) & mask; - if (currEltVal != lowestEltVal) { - allMatched = false; - break; - } - } - - if (allMatched) { - eltVal = lowestEltVal; - break; - } - - size *= 2; - } - - // Second, determine the rotation to make the element be: 0^m 1^n. - for (unsigned i = 0; i < size; ++i) { - eltVal = ror(eltVal, size); - uint32_t clz = countLeadingZeros(eltVal) - (64 - size); - uint32_t cto = CountTrailingOnes_64(eltVal); - - if (clz + cto == size) { - // Encode in immr the number of RORs it would take to get *from* this - // element value to our target value, where i+1 is the number of RORs - // to go the opposite direction. - unsigned immr = size - (i + 1); - - // If size has a 1 in the n'th bit, create a value that has zeroes in - // bits [0, n] and ones above that. - uint64_t nimms = ~(size-1) << 1; - - // Or the CTO value into the low bits, which must be below the Nth bit - // bit mentioned above. - nimms |= (cto-1); - - // Extract the seventh bit and toggle it to create the N field. - unsigned N = ((nimms >> 6) & 1) ^ 1; - - encoding = (N << 12) | (immr << 6) | (nimms & 0x3f); - return true; - } - } - - return false; -} - -/// isLogicalImmediate - Return true if the immediate is valid for a logical -/// immediate instruction of the given register size. Return false otherwise. -static inline bool isLogicalImmediate(uint64_t imm, unsigned regSize) { - uint64_t encoding; - return processLogicalImmediate(imm, regSize, encoding); -} - -/// encodeLogicalImmediate - Return the encoded immediate value for a logical -/// immediate instruction of the given register size. -static inline uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize) { - uint64_t encoding = 0; - bool res = processLogicalImmediate(imm, regSize, encoding); - assert(res && "invalid logical immediate"); - (void)res; - return encoding; -} - -/// decodeLogicalImmediate - Decode a logical immediate value in the form -/// "N:immr:imms" (where the immr and imms fields are each 6 bits) into the -/// integer value it represents with regSize bits. -static inline uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize) { - // Extract the N, imms, and immr fields. - unsigned N = (val >> 12) & 1; - unsigned immr = (val >> 6) & 0x3f; - unsigned imms = val & 0x3f; - - assert((regSize == 64 || N == 0) && "undefined logical immediate encoding"); - int len = 31 - countLeadingZeros((N << 6) | (~imms & 0x3f)); - assert(len >= 0 && "undefined logical immediate encoding"); - unsigned size = (1 << len); - unsigned R = immr & (size - 1); - unsigned S = imms & (size - 1); - assert(S != size - 1 && "undefined logical immediate encoding"); - uint64_t pattern = (1ULL << (S + 1)) - 1; - for (unsigned i = 0; i < R; ++i) - pattern = ror(pattern, size); - - // Replicate the pattern to fill the regSize. - while (size != regSize) { - pattern |= (pattern << size); - size *= 2; - } - return pattern; -} - -/// isValidDecodeLogicalImmediate - Check to see if the logical immediate value -/// in the form "N:immr:imms" (where the immr and imms fields are each 6 bits) -/// is a valid encoding for an integer value with regSize bits. -static inline bool isValidDecodeLogicalImmediate(uint64_t val, - unsigned regSize) { - // Extract the N and imms fields needed for checking. - unsigned N = (val >> 12) & 1; - unsigned imms = val & 0x3f; - - if (regSize == 32 && N != 0) // undefined logical immediate encoding - return false; - int len = 31 - countLeadingZeros((N << 6) | (~imms & 0x3f)); - if (len < 0) // undefined logical immediate encoding - return false; - unsigned size = (1 << len); - unsigned S = imms & (size - 1); - if (S == size - 1) // undefined logical immediate encoding - return false; - - return true; -} - -//===----------------------------------------------------------------------===// -// Floating-point Immediates -// -static inline float getFPImmFloat(unsigned Imm) { - // We expect an 8-bit binary encoding of a floating-point number here. - union { - uint32_t I; - float F; - } FPUnion; - - uint8_t Sign = (Imm >> 7) & 0x1; - uint8_t Exp = (Imm >> 4) & 0x7; - uint8_t Mantissa = Imm & 0xf; - - // 8-bit FP iEEEE Float Encoding - // abcd efgh aBbbbbbc defgh000 00000000 00000000 - // - // where B = NOT(b); - - FPUnion.I = 0; - FPUnion.I |= Sign << 31; - FPUnion.I |= ((Exp & 0x4) != 0 ? 0 : 1) << 30; - FPUnion.I |= ((Exp & 0x4) != 0 ? 0x1f : 0) << 25; - FPUnion.I |= (Exp & 0x3) << 23; - FPUnion.I |= Mantissa << 19; - return FPUnion.F; -} - -/// getFP32Imm - Return an 8-bit floating-point version of the 32-bit -/// floating-point value. If the value cannot be represented as an 8-bit -/// floating-point value, then return -1. -static inline int getFP32Imm(const APInt &Imm) { - uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; - int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 - int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits - - // We can handle 4 bits of mantissa. - // mantissa = (16+UInt(e:f:g:h))/16. - if (Mantissa & 0x7ffff) - return -1; - Mantissa >>= 19; - if ((Mantissa & 0xf) != Mantissa) - return -1; - - // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 - if (Exp < -3 || Exp > 4) - return -1; - Exp = ((Exp+3) & 0x7) ^ 4; - - return ((int)Sign << 7) | (Exp << 4) | Mantissa; -} - -static inline int getFP32Imm(const APFloat &FPImm) { - return getFP32Imm(FPImm.bitcastToAPInt()); -} - -/// getFP64Imm - Return an 8-bit floating-point version of the 64-bit -/// floating-point value. If the value cannot be represented as an 8-bit -/// floating-point value, then return -1. -static inline int getFP64Imm(const APInt &Imm) { - uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; - int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 - uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL; - - // We can handle 4 bits of mantissa. - // mantissa = (16+UInt(e:f:g:h))/16. - if (Mantissa & 0xffffffffffffULL) - return -1; - Mantissa >>= 48; - if ((Mantissa & 0xf) != Mantissa) - return -1; - - // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 - if (Exp < -3 || Exp > 4) - return -1; - Exp = ((Exp+3) & 0x7) ^ 4; - - return ((int)Sign << 7) | (Exp << 4) | Mantissa; -} - -static inline int getFP64Imm(const APFloat &FPImm) { - return getFP64Imm(FPImm.bitcastToAPInt()); -} - -//===--------------------------------------------------------------------===// -// AdvSIMD Modified Immediates -//===--------------------------------------------------------------------===// - -// 0x00 0x00 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh -static inline bool isAdvSIMDModImmType1(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - ((Imm & 0xffffff00ffffff00ULL) == 0); -} - -static inline uint8_t encodeAdvSIMDModImmType1(uint64_t Imm) { - return (Imm & 0xffULL); -} - -static inline uint64_t decodeAdvSIMDModImmType1(uint8_t Imm) { - uint64_t EncVal = Imm; - return (EncVal << 32) | EncVal; -} - -// 0x00 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh 0x00 -static inline bool isAdvSIMDModImmType2(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - ((Imm & 0xffff00ffffff00ffULL) == 0); -} - -static inline uint8_t encodeAdvSIMDModImmType2(uint64_t Imm) { - return (Imm & 0xff00ULL) >> 8; -} - -static inline uint64_t decodeAdvSIMDModImmType2(uint8_t Imm) { - uint64_t EncVal = Imm; - return (EncVal << 40) | (EncVal << 8); -} - -// 0x00 abcdefgh 0x00 0x00 0x00 abcdefgh 0x00 0x00 -static inline bool isAdvSIMDModImmType3(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - ((Imm & 0xff00ffffff00ffffULL) == 0); -} - -static inline uint8_t encodeAdvSIMDModImmType3(uint64_t Imm) { - return (Imm & 0xff0000ULL) >> 16; -} - -static inline uint64_t decodeAdvSIMDModImmType3(uint8_t Imm) { - uint64_t EncVal = Imm; - return (EncVal << 48) | (EncVal << 16); -} - -// abcdefgh 0x00 0x00 0x00 abcdefgh 0x00 0x00 0x00 -static inline bool isAdvSIMDModImmType4(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - ((Imm & 0x00ffffff00ffffffULL) == 0); -} - -static inline uint8_t encodeAdvSIMDModImmType4(uint64_t Imm) { - return (Imm & 0xff000000ULL) >> 24; -} - -static inline uint64_t decodeAdvSIMDModImmType4(uint8_t Imm) { - uint64_t EncVal = Imm; - return (EncVal << 56) | (EncVal << 24); -} - -// 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh -static inline bool isAdvSIMDModImmType5(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - (((Imm & 0x00ff0000ULL) >> 16) == (Imm & 0x000000ffULL)) && - ((Imm & 0xff00ff00ff00ff00ULL) == 0); -} - -static inline uint8_t encodeAdvSIMDModImmType5(uint64_t Imm) { - return (Imm & 0xffULL); -} - -static inline uint64_t decodeAdvSIMDModImmType5(uint8_t Imm) { - uint64_t EncVal = Imm; - return (EncVal << 48) | (EncVal << 32) | (EncVal << 16) | EncVal; -} - -// abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00 abcdefgh 0x00 -static inline bool isAdvSIMDModImmType6(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - (((Imm & 0xff000000ULL) >> 16) == (Imm & 0x0000ff00ULL)) && - ((Imm & 0x00ff00ff00ff00ffULL) == 0); -} - -static inline uint8_t encodeAdvSIMDModImmType6(uint64_t Imm) { - return (Imm & 0xff00ULL) >> 8; -} - -static inline uint64_t decodeAdvSIMDModImmType6(uint8_t Imm) { - uint64_t EncVal = Imm; - return (EncVal << 56) | (EncVal << 40) | (EncVal << 24) | (EncVal << 8); -} - -// 0x00 0x00 abcdefgh 0xFF 0x00 0x00 abcdefgh 0xFF -static inline bool isAdvSIMDModImmType7(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - ((Imm & 0xffff00ffffff00ffULL) == 0x000000ff000000ffULL); -} - -static inline uint8_t encodeAdvSIMDModImmType7(uint64_t Imm) { - return (Imm & 0xff00ULL) >> 8; -} - -static inline uint64_t decodeAdvSIMDModImmType7(uint8_t Imm) { - uint64_t EncVal = Imm; - return (EncVal << 40) | (EncVal << 8) | 0x000000ff000000ffULL; -} - -// 0x00 abcdefgh 0xFF 0xFF 0x00 abcdefgh 0xFF 0xFF -static inline bool isAdvSIMDModImmType8(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - ((Imm & 0xff00ffffff00ffffULL) == 0x0000ffff0000ffffULL); -} - -static inline uint64_t decodeAdvSIMDModImmType8(uint8_t Imm) { - uint64_t EncVal = Imm; - return (EncVal << 48) | (EncVal << 16) | 0x0000ffff0000ffffULL; -} - -static inline uint8_t encodeAdvSIMDModImmType8(uint64_t Imm) { - return (Imm & 0x00ff0000ULL) >> 16; -} - -// abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh abcdefgh -static inline bool isAdvSIMDModImmType9(uint64_t Imm) { - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - ((Imm >> 48) == (Imm & 0x0000ffffULL)) && - ((Imm >> 56) == (Imm & 0x000000ffULL)); -} - -static inline uint8_t encodeAdvSIMDModImmType9(uint64_t Imm) { - return (Imm & 0xffULL); -} - -static inline uint64_t decodeAdvSIMDModImmType9(uint8_t Imm) { - uint64_t EncVal = Imm; - EncVal |= (EncVal << 8); - EncVal |= (EncVal << 16); - EncVal |= (EncVal << 32); - return EncVal; -} - -// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh -// cmode: 1110, op: 1 -static inline bool isAdvSIMDModImmType10(uint64_t Imm) { - uint64_t ByteA = Imm & 0xff00000000000000ULL; - uint64_t ByteB = Imm & 0x00ff000000000000ULL; - uint64_t ByteC = Imm & 0x0000ff0000000000ULL; - uint64_t ByteD = Imm & 0x000000ff00000000ULL; - uint64_t ByteE = Imm & 0x00000000ff000000ULL; - uint64_t ByteF = Imm & 0x0000000000ff0000ULL; - uint64_t ByteG = Imm & 0x000000000000ff00ULL; - uint64_t ByteH = Imm & 0x00000000000000ffULL; - - return (ByteA == 0ULL || ByteA == 0xff00000000000000ULL) && - (ByteB == 0ULL || ByteB == 0x00ff000000000000ULL) && - (ByteC == 0ULL || ByteC == 0x0000ff0000000000ULL) && - (ByteD == 0ULL || ByteD == 0x000000ff00000000ULL) && - (ByteE == 0ULL || ByteE == 0x00000000ff000000ULL) && - (ByteF == 0ULL || ByteF == 0x0000000000ff0000ULL) && - (ByteG == 0ULL || ByteG == 0x000000000000ff00ULL) && - (ByteH == 0ULL || ByteH == 0x00000000000000ffULL); -} - -static inline uint8_t encodeAdvSIMDModImmType10(uint64_t Imm) { - uint8_t BitA = (Imm & 0xff00000000000000ULL) != 0; - uint8_t BitB = (Imm & 0x00ff000000000000ULL) != 0; - uint8_t BitC = (Imm & 0x0000ff0000000000ULL) != 0; - uint8_t BitD = (Imm & 0x000000ff00000000ULL) != 0; - uint8_t BitE = (Imm & 0x00000000ff000000ULL) != 0; - uint8_t BitF = (Imm & 0x0000000000ff0000ULL) != 0; - uint8_t BitG = (Imm & 0x000000000000ff00ULL) != 0; - uint8_t BitH = (Imm & 0x00000000000000ffULL) != 0; - - uint8_t EncVal = BitA; - EncVal <<= 1; - EncVal |= BitB; - EncVal <<= 1; - EncVal |= BitC; - EncVal <<= 1; - EncVal |= BitD; - EncVal <<= 1; - EncVal |= BitE; - EncVal <<= 1; - EncVal |= BitF; - EncVal <<= 1; - EncVal |= BitG; - EncVal <<= 1; - EncVal |= BitH; - return EncVal; -} - -static inline uint64_t decodeAdvSIMDModImmType10(uint8_t Imm) { - uint64_t EncVal = 0; - if (Imm & 0x80) EncVal |= 0xff00000000000000ULL; - if (Imm & 0x40) EncVal |= 0x00ff000000000000ULL; - if (Imm & 0x20) EncVal |= 0x0000ff0000000000ULL; - if (Imm & 0x10) EncVal |= 0x000000ff00000000ULL; - if (Imm & 0x08) EncVal |= 0x00000000ff000000ULL; - if (Imm & 0x04) EncVal |= 0x0000000000ff0000ULL; - if (Imm & 0x02) EncVal |= 0x000000000000ff00ULL; - if (Imm & 0x01) EncVal |= 0x00000000000000ffULL; - return EncVal; -} - -// aBbbbbbc defgh000 0x00 0x00 aBbbbbbc defgh000 0x00 0x00 -static inline bool isAdvSIMDModImmType11(uint64_t Imm) { - uint64_t BString = (Imm & 0x7E000000ULL) >> 25; - return ((Imm >> 32) == (Imm & 0xffffffffULL)) && - (BString == 0x1f || BString == 0x20) && - ((Imm & 0x0007ffff0007ffffULL) == 0); -} - -static inline uint8_t encodeAdvSIMDModImmType11(uint64_t Imm) { - uint8_t BitA = (Imm & 0x80000000ULL) != 0; - uint8_t BitB = (Imm & 0x20000000ULL) != 0; - uint8_t BitC = (Imm & 0x01000000ULL) != 0; - uint8_t BitD = (Imm & 0x00800000ULL) != 0; - uint8_t BitE = (Imm & 0x00400000ULL) != 0; - uint8_t BitF = (Imm & 0x00200000ULL) != 0; - uint8_t BitG = (Imm & 0x00100000ULL) != 0; - uint8_t BitH = (Imm & 0x00080000ULL) != 0; - - uint8_t EncVal = BitA; - EncVal <<= 1; - EncVal |= BitB; - EncVal <<= 1; - EncVal |= BitC; - EncVal <<= 1; - EncVal |= BitD; - EncVal <<= 1; - EncVal |= BitE; - EncVal <<= 1; - EncVal |= BitF; - EncVal <<= 1; - EncVal |= BitG; - EncVal <<= 1; - EncVal |= BitH; - return EncVal; -} - -static inline uint64_t decodeAdvSIMDModImmType11(uint8_t Imm) { - uint64_t EncVal = 0; - if (Imm & 0x80) EncVal |= 0x80000000ULL; - if (Imm & 0x40) EncVal |= 0x3e000000ULL; - else EncVal |= 0x40000000ULL; - if (Imm & 0x20) EncVal |= 0x01000000ULL; - if (Imm & 0x10) EncVal |= 0x00800000ULL; - if (Imm & 0x08) EncVal |= 0x00400000ULL; - if (Imm & 0x04) EncVal |= 0x00200000ULL; - if (Imm & 0x02) EncVal |= 0x00100000ULL; - if (Imm & 0x01) EncVal |= 0x00080000ULL; - return (EncVal << 32) | EncVal; -} - -// aBbbbbbb bbcdefgh 0x00 0x00 0x00 0x00 0x00 0x00 -static inline bool isAdvSIMDModImmType12(uint64_t Imm) { - uint64_t BString = (Imm & 0x7fc0000000000000ULL) >> 54; - return ((BString == 0xff || BString == 0x100) && - ((Imm & 0x0000ffffffffffffULL) == 0)); -} - -static inline uint8_t encodeAdvSIMDModImmType12(uint64_t Imm) { - uint8_t BitA = (Imm & 0x8000000000000000ULL) != 0; - uint8_t BitB = (Imm & 0x0040000000000000ULL) != 0; - uint8_t BitC = (Imm & 0x0020000000000000ULL) != 0; - uint8_t BitD = (Imm & 0x0010000000000000ULL) != 0; - uint8_t BitE = (Imm & 0x0008000000000000ULL) != 0; - uint8_t BitF = (Imm & 0x0004000000000000ULL) != 0; - uint8_t BitG = (Imm & 0x0002000000000000ULL) != 0; - uint8_t BitH = (Imm & 0x0001000000000000ULL) != 0; - - uint8_t EncVal = BitA; - EncVal <<= 1; - EncVal |= BitB; - EncVal <<= 1; - EncVal |= BitC; - EncVal <<= 1; - EncVal |= BitD; - EncVal <<= 1; - EncVal |= BitE; - EncVal <<= 1; - EncVal |= BitF; - EncVal <<= 1; - EncVal |= BitG; - EncVal <<= 1; - EncVal |= BitH; - return EncVal; -} - -static inline uint64_t decodeAdvSIMDModImmType12(uint8_t Imm) { - uint64_t EncVal = 0; - if (Imm & 0x80) EncVal |= 0x8000000000000000ULL; - if (Imm & 0x40) EncVal |= 0x3fc0000000000000ULL; - else EncVal |= 0x4000000000000000ULL; - if (Imm & 0x20) EncVal |= 0x0020000000000000ULL; - if (Imm & 0x10) EncVal |= 0x0010000000000000ULL; - if (Imm & 0x08) EncVal |= 0x0008000000000000ULL; - if (Imm & 0x04) EncVal |= 0x0004000000000000ULL; - if (Imm & 0x02) EncVal |= 0x0002000000000000ULL; - if (Imm & 0x01) EncVal |= 0x0001000000000000ULL; - return (EncVal << 32) | EncVal; -} - -} // end namespace ARM64_AM - -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp deleted file mode 100644 index 26813e2..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp +++ /dev/null @@ -1,533 +0,0 @@ -//===-- ARM64AsmBackend.cpp - ARM64 Assembler Backend ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "ARM64.h" -#include "ARM64RegisterInfo.h" -#include "MCTargetDesc/ARM64FixupKinds.h" -#include "llvm/ADT/Triple.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCDirectives.h" -#include "llvm/MC/MCFixupKindInfo.h" -#include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCSectionMachO.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" -using namespace llvm; - -namespace { - -class ARM64AsmBackend : public MCAsmBackend { - static const unsigned PCRelFlagVal = - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits | MCFixupKindInfo::FKF_IsPCRel; - -public: - ARM64AsmBackend(const Target &T) : MCAsmBackend() {} - - unsigned getNumFixupKinds() const { return ARM64::NumTargetFixupKinds; } - - const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { - const static MCFixupKindInfo Infos[ARM64::NumTargetFixupKinds] = { - // This table *must* be in the order that the fixup_* kinds are defined in - // ARM64FixupKinds.h. - // - // Name Offset (bits) Size (bits) Flags - { "fixup_arm64_pcrel_adr_imm21", 0, 32, PCRelFlagVal }, - { "fixup_arm64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal }, - { "fixup_arm64_add_imm12", 10, 12, 0 }, - { "fixup_arm64_ldst_imm12_scale1", 10, 12, 0 }, - { "fixup_arm64_ldst_imm12_scale2", 10, 12, 0 }, - { "fixup_arm64_ldst_imm12_scale4", 10, 12, 0 }, - { "fixup_arm64_ldst_imm12_scale8", 10, 12, 0 }, - { "fixup_arm64_ldst_imm12_scale16", 10, 12, 0 }, - { "fixup_arm64_movw", 5, 16, 0 }, - { "fixup_arm64_pcrel_branch14", 5, 14, PCRelFlagVal }, - { "fixup_arm64_pcrel_imm19", 5, 19, PCRelFlagVal }, - { "fixup_arm64_pcrel_branch26", 0, 26, PCRelFlagVal }, - { "fixup_arm64_pcrel_call26", 0, 26, PCRelFlagVal }, - { "fixup_arm64_tlsdesc_call", 0, 0, 0 } - }; - - if (Kind < FirstTargetFixupKind) - return MCAsmBackend::getFixupKindInfo(Kind); - - assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && - "Invalid kind!"); - return Infos[Kind - FirstTargetFixupKind]; - } - - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel) const; - - bool mayNeedRelaxation(const MCInst &Inst) const; - bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const; - void relaxInstruction(const MCInst &Inst, MCInst &Res) const; - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const; - - void HandleAssemblerFlag(MCAssemblerFlag Flag) {} - - unsigned getPointerSize() const { return 8; } -}; - -} // end anonymous namespace - -/// \brief The number of bytes the fixup may change. -static unsigned getFixupKindNumBytes(unsigned Kind) { - switch (Kind) { - default: - assert(0 && "Unknown fixup kind!"); - - case ARM64::fixup_arm64_tlsdesc_call: - return 0; - - case FK_Data_1: - return 1; - - case FK_Data_2: - case ARM64::fixup_arm64_movw: - return 2; - - case ARM64::fixup_arm64_pcrel_branch14: - case ARM64::fixup_arm64_add_imm12: - case ARM64::fixup_arm64_ldst_imm12_scale1: - case ARM64::fixup_arm64_ldst_imm12_scale2: - case ARM64::fixup_arm64_ldst_imm12_scale4: - case ARM64::fixup_arm64_ldst_imm12_scale8: - case ARM64::fixup_arm64_ldst_imm12_scale16: - case ARM64::fixup_arm64_pcrel_imm19: - return 3; - - case ARM64::fixup_arm64_pcrel_adr_imm21: - case ARM64::fixup_arm64_pcrel_adrp_imm21: - case ARM64::fixup_arm64_pcrel_branch26: - case ARM64::fixup_arm64_pcrel_call26: - case FK_Data_4: - return 4; - - case FK_Data_8: - return 8; - } -} - -static unsigned AdrImmBits(unsigned Value) { - unsigned lo2 = Value & 0x3; - unsigned hi19 = (Value & 0x1ffffc) >> 2; - return (hi19 << 5) | (lo2 << 29); -} - -static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { - int64_t SignedValue = static_cast(Value); - switch (Kind) { - default: - assert(false && "Unknown fixup kind!"); - case ARM64::fixup_arm64_pcrel_adr_imm21: - if (SignedValue > 2097151 || SignedValue < -2097152) - report_fatal_error("fixup value out of range"); - return AdrImmBits(Value & 0x1fffffULL); - case ARM64::fixup_arm64_pcrel_adrp_imm21: - return AdrImmBits((Value & 0x1fffff000ULL) >> 12); - case ARM64::fixup_arm64_pcrel_imm19: - // Signed 21-bit immediate - if (SignedValue > 2097151 || SignedValue < -2097152) - report_fatal_error("fixup value out of range"); - // Low two bits are not encoded. - return (Value >> 2) & 0x7ffff; - case ARM64::fixup_arm64_add_imm12: - case ARM64::fixup_arm64_ldst_imm12_scale1: - // Unsigned 12-bit immediate - if (Value >= 0x1000) - report_fatal_error("invalid imm12 fixup value"); - return Value; - case ARM64::fixup_arm64_ldst_imm12_scale2: - // Unsigned 12-bit immediate which gets multiplied by 2 - if (Value & 1 || Value >= 0x2000) - report_fatal_error("invalid imm12 fixup value"); - return Value >> 1; - case ARM64::fixup_arm64_ldst_imm12_scale4: - // Unsigned 12-bit immediate which gets multiplied by 4 - if (Value & 3 || Value >= 0x4000) - report_fatal_error("invalid imm12 fixup value"); - return Value >> 2; - case ARM64::fixup_arm64_ldst_imm12_scale8: - // Unsigned 12-bit immediate which gets multiplied by 8 - if (Value & 7 || Value >= 0x8000) - report_fatal_error("invalid imm12 fixup value"); - return Value >> 3; - case ARM64::fixup_arm64_ldst_imm12_scale16: - // Unsigned 12-bit immediate which gets multiplied by 16 - if (Value & 15 || Value >= 0x10000) - report_fatal_error("invalid imm12 fixup value"); - return Value >> 4; - case ARM64::fixup_arm64_movw: - report_fatal_error("no resolvable MOVZ/MOVK fixups supported yet"); - return Value; - case ARM64::fixup_arm64_pcrel_branch14: - // Signed 16-bit immediate - if (SignedValue > 32767 || SignedValue < -32768) - report_fatal_error("fixup value out of range"); - // Low two bits are not encoded (4-byte alignment assumed). - if (Value & 0x3) - report_fatal_error("fixup not sufficiently aligned"); - return (Value >> 2) & 0x3fff; - case ARM64::fixup_arm64_pcrel_branch26: - case ARM64::fixup_arm64_pcrel_call26: - // Signed 28-bit immediate - if (SignedValue > 134217727 || SignedValue < -134217728) - report_fatal_error("fixup value out of range"); - // Low two bits are not encoded (4-byte alignment assumed). - if (Value & 0x3) - report_fatal_error("fixup not sufficiently aligned"); - return (Value >> 2) & 0x3ffffff; - case FK_Data_1: - case FK_Data_2: - case FK_Data_4: - case FK_Data_8: - return Value; - } -} - -void ARM64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, - bool IsPCRel) const { - unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); - if (!Value) - return; // Doesn't change encoding. - MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind()); - // Apply any target-specific value adjustments. - Value = adjustFixupValue(Fixup.getKind(), Value); - - // Shift the value into position. - Value <<= Info.TargetOffset; - - unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); - - // For each byte of the fragment that the fixup touches, mask in the - // bits from the fixup value. - for (unsigned i = 0; i != NumBytes; ++i) - Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); -} - -bool ARM64AsmBackend::mayNeedRelaxation(const MCInst &Inst) const { - return false; -} - -bool ARM64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const { - // FIXME: This isn't correct for ARM64. Just moving the "generic" logic - // into the targets for now. - // - // Relax if the value is too big for a (signed) i8. - return int64_t(Value) != int64_t(int8_t(Value)); -} - -void ARM64AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { - assert(false && "ARM64AsmBackend::relaxInstruction() unimplemented"); -} - -bool ARM64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { - // If the count is not 4-byte aligned, we must be writing data into the text - // section (otherwise we have unaligned instructions, and thus have far - // bigger problems), so just write zeros instead. - if ((Count & 3) != 0) { - for (uint64_t i = 0, e = (Count & 3); i != e; ++i) - OW->Write8(0); - } - - // We are properly aligned, so write NOPs as requested. - Count /= 4; - for (uint64_t i = 0; i != Count; ++i) - OW->Write32(0xd503201f); - return true; -} - -namespace { - -namespace CU { - -/// \brief Compact unwind encoding values. -enum CompactUnwindEncodings { - /// \brief A "frameless" leaf function, where no non-volatile registers are - /// saved. The return remains in LR throughout the function. - UNWIND_ARM64_MODE_FRAMELESS = 0x02000000, - - /// \brief No compact unwind encoding available. Instead the low 23-bits of - /// the compact unwind encoding is the offset of the DWARF FDE in the - /// __eh_frame section. This mode is never used in object files. It is only - /// generated by the linker in final linked images, which have only DWARF info - /// for a function. - UNWIND_ARM64_MODE_DWARF = 0x03000000, - - /// \brief This is a standard arm64 prologue where FP/LR are immediately - /// pushed on the stack, then SP is copied to FP. If there are any - /// non-volatile register saved, they are copied into the stack fame in pairs - /// in a contiguous ranger right below the saved FP/LR pair. Any subset of the - /// five X pairs and four D pairs can be saved, but the memory layout must be - /// in register number order. - UNWIND_ARM64_MODE_FRAME = 0x04000000, - - /// \brief Frame register pair encodings. - UNWIND_ARM64_FRAME_X19_X20_PAIR = 0x00000001, - UNWIND_ARM64_FRAME_X21_X22_PAIR = 0x00000002, - UNWIND_ARM64_FRAME_X23_X24_PAIR = 0x00000004, - UNWIND_ARM64_FRAME_X25_X26_PAIR = 0x00000008, - UNWIND_ARM64_FRAME_X27_X28_PAIR = 0x00000010, - UNWIND_ARM64_FRAME_D8_D9_PAIR = 0x00000100, - UNWIND_ARM64_FRAME_D10_D11_PAIR = 0x00000200, - UNWIND_ARM64_FRAME_D12_D13_PAIR = 0x00000400, - UNWIND_ARM64_FRAME_D14_D15_PAIR = 0x00000800 -}; - -} // end CU namespace - -// FIXME: This should be in a separate file. -class DarwinARM64AsmBackend : public ARM64AsmBackend { - const MCRegisterInfo &MRI; - - /// \brief Encode compact unwind stack adjustment for frameless functions. - /// See UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK in compact_unwind_encoding.h. - /// The stack size always needs to be 16 byte aligned. - uint32_t encodeStackAdjustment(uint32_t StackSize) const { - return (StackSize / 16) << 12; - } - -public: - DarwinARM64AsmBackend(const Target &T, const MCRegisterInfo &MRI) - : ARM64AsmBackend(T), MRI(MRI) {} - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createARM64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64, - MachO::CPU_SUBTYPE_ARM64_ALL); - } - - virtual bool doesSectionRequireSymbols(const MCSection &Section) const { - // Any section for which the linker breaks things into atoms needs to - // preserve symbols, including assembler local symbols, to identify - // those atoms. These sections are: - // Sections of type: - // - // S_CSTRING_LITERALS (e.g. __cstring) - // S_LITERAL_POINTERS (e.g. objc selector pointers) - // S_16BYTE_LITERALS, S_8BYTE_LITERALS, S_4BYTE_LITERALS - // - // Sections named: - // - // __TEXT,__eh_frame - // __TEXT,__ustring - // __DATA,__cfstring - // __DATA,__objc_classrefs - // __DATA,__objc_catlist - // - // FIXME: It would be better if the compiler used actual linker local - // symbols for each of these sections rather than preserving what - // are ostensibly assembler local symbols. - const MCSectionMachO &SMO = static_cast(Section); - return (SMO.getType() == MachO::S_CSTRING_LITERALS || - SMO.getType() == MachO::S_4BYTE_LITERALS || - SMO.getType() == MachO::S_8BYTE_LITERALS || - SMO.getType() == MachO::S_16BYTE_LITERALS || - SMO.getType() == MachO::S_LITERAL_POINTERS || - (SMO.getSegmentName() == "__TEXT" && - (SMO.getSectionName() == "__eh_frame" || - SMO.getSectionName() == "__ustring")) || - (SMO.getSegmentName() == "__DATA" && - (SMO.getSectionName() == "__cfstring" || - SMO.getSectionName() == "__objc_classrefs" || - SMO.getSectionName() == "__objc_catlist"))); - } - - /// \brief Generate the compact unwind encoding from the CFI directives. - virtual uint32_t - generateCompactUnwindEncoding(ArrayRef Instrs) const - override { - if (Instrs.empty()) - return CU::UNWIND_ARM64_MODE_FRAMELESS; - - bool HasFP = false; - unsigned StackSize = 0; - - uint32_t CompactUnwindEncoding = 0; - for (size_t i = 0, e = Instrs.size(); i != e; ++i) { - const MCCFIInstruction &Inst = Instrs[i]; - - switch (Inst.getOperation()) { - default: - // Cannot handle this directive: bail out. - return CU::UNWIND_ARM64_MODE_DWARF; - case MCCFIInstruction::OpDefCfa: { - // Defines a frame pointer. - assert(getXRegFromWReg(MRI.getLLVMRegNum(Inst.getRegister(), true)) == - ARM64::FP && - "Invalid frame pointer!"); - assert(i + 2 < e && "Insufficient CFI instructions to define a frame!"); - - const MCCFIInstruction &LRPush = Instrs[++i]; - assert(LRPush.getOperation() == MCCFIInstruction::OpOffset && - "Link register not pushed!"); - const MCCFIInstruction &FPPush = Instrs[++i]; - assert(FPPush.getOperation() == MCCFIInstruction::OpOffset && - "Frame pointer not pushed!"); - - unsigned LRReg = MRI.getLLVMRegNum(LRPush.getRegister(), true); - unsigned FPReg = MRI.getLLVMRegNum(FPPush.getRegister(), true); - - LRReg = getXRegFromWReg(LRReg); - FPReg = getXRegFromWReg(FPReg); - - assert(LRReg == ARM64::LR && FPReg == ARM64::FP && - "Pushing invalid registers for frame!"); - - // Indicate that the function has a frame. - CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAME; - HasFP = true; - break; - } - case MCCFIInstruction::OpDefCfaOffset: { - assert(StackSize == 0 && "We already have the CFA offset!"); - StackSize = std::abs(Inst.getOffset()); - break; - } - case MCCFIInstruction::OpOffset: { - // Registers are saved in pairs. We expect there to be two consecutive - // `.cfi_offset' instructions with the appropriate registers specified. - unsigned Reg1 = MRI.getLLVMRegNum(Inst.getRegister(), true); - if (i + 1 == e) - return CU::UNWIND_ARM64_MODE_DWARF; - - const MCCFIInstruction &Inst2 = Instrs[++i]; - if (Inst2.getOperation() != MCCFIInstruction::OpOffset) - return CU::UNWIND_ARM64_MODE_DWARF; - unsigned Reg2 = MRI.getLLVMRegNum(Inst2.getRegister(), true); - - // N.B. The encodings must be in register number order, and the X - // registers before the D registers. - - // X19/X20 pair = 0x00000001, - // X21/X22 pair = 0x00000002, - // X23/X24 pair = 0x00000004, - // X25/X26 pair = 0x00000008, - // X27/X28 pair = 0x00000010 - Reg1 = getXRegFromWReg(Reg1); - Reg2 = getXRegFromWReg(Reg2); - - if (Reg1 == ARM64::X19 && Reg2 == ARM64::X20 && - (CompactUnwindEncoding & 0xF1E) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X19_X20_PAIR; - else if (Reg1 == ARM64::X21 && Reg2 == ARM64::X22 && - (CompactUnwindEncoding & 0xF1C) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X21_X22_PAIR; - else if (Reg1 == ARM64::X23 && Reg2 == ARM64::X24 && - (CompactUnwindEncoding & 0xF18) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X23_X24_PAIR; - else if (Reg1 == ARM64::X25 && Reg2 == ARM64::X26 && - (CompactUnwindEncoding & 0xF10) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X25_X26_PAIR; - else if (Reg1 == ARM64::X27 && Reg2 == ARM64::X28 && - (CompactUnwindEncoding & 0xF00) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X27_X28_PAIR; - else { - Reg1 = getDRegFromBReg(Reg1); - Reg2 = getDRegFromBReg(Reg2); - - // D8/D9 pair = 0x00000100, - // D10/D11 pair = 0x00000200, - // D12/D13 pair = 0x00000400, - // D14/D15 pair = 0x00000800 - if (Reg1 == ARM64::D8 && Reg2 == ARM64::D9 && - (CompactUnwindEncoding & 0xE00) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D8_D9_PAIR; - else if (Reg1 == ARM64::D10 && Reg2 == ARM64::D11 && - (CompactUnwindEncoding & 0xC00) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D10_D11_PAIR; - else if (Reg1 == ARM64::D12 && Reg2 == ARM64::D13 && - (CompactUnwindEncoding & 0x800) == 0) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D12_D13_PAIR; - else if (Reg1 == ARM64::D14 && Reg2 == ARM64::D15) - CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D14_D15_PAIR; - else - // A pair was pushed which we cannot handle. - return CU::UNWIND_ARM64_MODE_DWARF; - } - - break; - } - } - } - - if (!HasFP) { - // With compact unwind info we can only represent stack adjustments of up - // to 65520 bytes. - if (StackSize > 65520) - return CU::UNWIND_ARM64_MODE_DWARF; - - CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAMELESS; - CompactUnwindEncoding |= encodeStackAdjustment(StackSize); - } - - return CompactUnwindEncoding; - } -}; - -} // end anonymous namespace - -namespace { - -class ELFARM64AsmBackend : public ARM64AsmBackend { -public: - uint8_t OSABI; - - ELFARM64AsmBackend(const Target &T, uint8_t OSABI) - : ARM64AsmBackend(T), OSABI(OSABI) {} - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createARM64ELFObjectWriter(OS, OSABI); - } - - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override; -}; - -void ELFARM64AsmBackend::processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, - const MCFixup &Fixup, - const MCFragment *DF, - const MCValue &Target, - uint64_t &Value, bool &IsResolved) { - // The ADRP instruction adds some multiple of 0x1000 to the current PC & - // ~0xfff. This means that the required offset to reach a symbol can vary by - // up to one step depending on where the ADRP is in memory. For example: - // - // ADRP x0, there - // there: - // - // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and - // we'll need that as an offset. At any other address "there" will be in the - // same page as the ADRP and the instruction should encode 0x0. Assuming the - // section isn't 0x1000-aligned, we therefore need to delegate this decision - // to the linker -- a relocation! - if ((uint32_t)Fixup.getKind() == ARM64::fixup_arm64_pcrel_adrp_imm21) - IsResolved = false; -} -} - -MCAsmBackend *llvm::createARM64AsmBackend(const Target &T, - const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin()) - return new DarwinARM64AsmBackend(T, MRI); - - assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target"); - return new ELFARM64AsmBackend(T, TheTriple.getOS()); -} diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64BaseInfo.h b/lib/Target/ARM64/MCTargetDesc/ARM64BaseInfo.h deleted file mode 100644 index d3c2cf7..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64BaseInfo.h +++ /dev/null @@ -1,998 +0,0 @@ -//===-- ARM64BaseInfo.h - Top level definitions for ARM64 -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains small standalone helper functions and enum definitions for -// the ARM64 target useful for the compiler back-end and the MC libraries. -// As such, it deliberately does not include references to LLVM core -// code gen types, passes, etc.. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64BASEINFO_H -#define ARM64BASEINFO_H - -#include "ARM64MCTargetDesc.h" -#include "llvm/Support/ErrorHandling.h" - -namespace llvm { - -inline static unsigned getWRegFromXReg(unsigned Reg) { - switch (Reg) { - case ARM64::X0: return ARM64::W0; - case ARM64::X1: return ARM64::W1; - case ARM64::X2: return ARM64::W2; - case ARM64::X3: return ARM64::W3; - case ARM64::X4: return ARM64::W4; - case ARM64::X5: return ARM64::W5; - case ARM64::X6: return ARM64::W6; - case ARM64::X7: return ARM64::W7; - case ARM64::X8: return ARM64::W8; - case ARM64::X9: return ARM64::W9; - case ARM64::X10: return ARM64::W10; - case ARM64::X11: return ARM64::W11; - case ARM64::X12: return ARM64::W12; - case ARM64::X13: return ARM64::W13; - case ARM64::X14: return ARM64::W14; - case ARM64::X15: return ARM64::W15; - case ARM64::X16: return ARM64::W16; - case ARM64::X17: return ARM64::W17; - case ARM64::X18: return ARM64::W18; - case ARM64::X19: return ARM64::W19; - case ARM64::X20: return ARM64::W20; - case ARM64::X21: return ARM64::W21; - case ARM64::X22: return ARM64::W22; - case ARM64::X23: return ARM64::W23; - case ARM64::X24: return ARM64::W24; - case ARM64::X25: return ARM64::W25; - case ARM64::X26: return ARM64::W26; - case ARM64::X27: return ARM64::W27; - case ARM64::X28: return ARM64::W28; - case ARM64::FP: return ARM64::W29; - case ARM64::LR: return ARM64::W30; - case ARM64::SP: return ARM64::WSP; - case ARM64::XZR: return ARM64::WZR; - } - // For anything else, return it unchanged. - return Reg; -} - -inline static unsigned getXRegFromWReg(unsigned Reg) { - switch (Reg) { - case ARM64::W0: return ARM64::X0; - case ARM64::W1: return ARM64::X1; - case ARM64::W2: return ARM64::X2; - case ARM64::W3: return ARM64::X3; - case ARM64::W4: return ARM64::X4; - case ARM64::W5: return ARM64::X5; - case ARM64::W6: return ARM64::X6; - case ARM64::W7: return ARM64::X7; - case ARM64::W8: return ARM64::X8; - case ARM64::W9: return ARM64::X9; - case ARM64::W10: return ARM64::X10; - case ARM64::W11: return ARM64::X11; - case ARM64::W12: return ARM64::X12; - case ARM64::W13: return ARM64::X13; - case ARM64::W14: return ARM64::X14; - case ARM64::W15: return ARM64::X15; - case ARM64::W16: return ARM64::X16; - case ARM64::W17: return ARM64::X17; - case ARM64::W18: return ARM64::X18; - case ARM64::W19: return ARM64::X19; - case ARM64::W20: return ARM64::X20; - case ARM64::W21: return ARM64::X21; - case ARM64::W22: return ARM64::X22; - case ARM64::W23: return ARM64::X23; - case ARM64::W24: return ARM64::X24; - case ARM64::W25: return ARM64::X25; - case ARM64::W26: return ARM64::X26; - case ARM64::W27: return ARM64::X27; - case ARM64::W28: return ARM64::X28; - case ARM64::W29: return ARM64::FP; - case ARM64::W30: return ARM64::LR; - case ARM64::WSP: return ARM64::SP; - case ARM64::WZR: return ARM64::XZR; - } - // For anything else, return it unchanged. - return Reg; -} - -static inline unsigned getBRegFromDReg(unsigned Reg) { - switch (Reg) { - case ARM64::D0: return ARM64::B0; - case ARM64::D1: return ARM64::B1; - case ARM64::D2: return ARM64::B2; - case ARM64::D3: return ARM64::B3; - case ARM64::D4: return ARM64::B4; - case ARM64::D5: return ARM64::B5; - case ARM64::D6: return ARM64::B6; - case ARM64::D7: return ARM64::B7; - case ARM64::D8: return ARM64::B8; - case ARM64::D9: return ARM64::B9; - case ARM64::D10: return ARM64::B10; - case ARM64::D11: return ARM64::B11; - case ARM64::D12: return ARM64::B12; - case ARM64::D13: return ARM64::B13; - case ARM64::D14: return ARM64::B14; - case ARM64::D15: return ARM64::B15; - case ARM64::D16: return ARM64::B16; - case ARM64::D17: return ARM64::B17; - case ARM64::D18: return ARM64::B18; - case ARM64::D19: return ARM64::B19; - case ARM64::D20: return ARM64::B20; - case ARM64::D21: return ARM64::B21; - case ARM64::D22: return ARM64::B22; - case ARM64::D23: return ARM64::B23; - case ARM64::D24: return ARM64::B24; - case ARM64::D25: return ARM64::B25; - case ARM64::D26: return ARM64::B26; - case ARM64::D27: return ARM64::B27; - case ARM64::D28: return ARM64::B28; - case ARM64::D29: return ARM64::B29; - case ARM64::D30: return ARM64::B30; - case ARM64::D31: return ARM64::B31; - } - // For anything else, return it unchanged. - return Reg; -} - - -static inline unsigned getDRegFromBReg(unsigned Reg) { - switch (Reg) { - case ARM64::B0: return ARM64::D0; - case ARM64::B1: return ARM64::D1; - case ARM64::B2: return ARM64::D2; - case ARM64::B3: return ARM64::D3; - case ARM64::B4: return ARM64::D4; - case ARM64::B5: return ARM64::D5; - case ARM64::B6: return ARM64::D6; - case ARM64::B7: return ARM64::D7; - case ARM64::B8: return ARM64::D8; - case ARM64::B9: return ARM64::D9; - case ARM64::B10: return ARM64::D10; - case ARM64::B11: return ARM64::D11; - case ARM64::B12: return ARM64::D12; - case ARM64::B13: return ARM64::D13; - case ARM64::B14: return ARM64::D14; - case ARM64::B15: return ARM64::D15; - case ARM64::B16: return ARM64::D16; - case ARM64::B17: return ARM64::D17; - case ARM64::B18: return ARM64::D18; - case ARM64::B19: return ARM64::D19; - case ARM64::B20: return ARM64::D20; - case ARM64::B21: return ARM64::D21; - case ARM64::B22: return ARM64::D22; - case ARM64::B23: return ARM64::D23; - case ARM64::B24: return ARM64::D24; - case ARM64::B25: return ARM64::D25; - case ARM64::B26: return ARM64::D26; - case ARM64::B27: return ARM64::D27; - case ARM64::B28: return ARM64::D28; - case ARM64::B29: return ARM64::D29; - case ARM64::B30: return ARM64::D30; - case ARM64::B31: return ARM64::D31; - } - // For anything else, return it unchanged. - return Reg; -} - -namespace ARM64CC { - -// The CondCodes constants map directly to the 4-bit encoding of the condition -// field for predicated instructions. -enum CondCode { // Meaning (integer) Meaning (floating-point) - EQ = 0x0, // Equal Equal - NE = 0x1, // Not equal Not equal, or unordered - CS = 0x2, // Carry set >, ==, or unordered - CC = 0x3, // Carry clear Less than - MI = 0x4, // Minus, negative Less than - PL = 0x5, // Plus, positive or zero >, ==, or unordered - VS = 0x6, // Overflow Unordered - VC = 0x7, // No overflow Not unordered - HI = 0x8, // Unsigned higher Greater than, or unordered - LS = 0x9, // Unsigned lower or same Less than or equal - GE = 0xa, // Greater than or equal Greater than or equal - LT = 0xb, // Less than Less than, or unordered - GT = 0xc, // Greater than Greater than - LE = 0xd, // Less than or equal <, ==, or unordered - AL = 0xe // Always (unconditional) Always (unconditional) -}; - -inline static const char *getCondCodeName(CondCode Code) { - // cond<0> is ignored when cond<3:1> = 111, where 1110 is 0xe (aka AL). - if ((Code & AL) == AL) - Code = AL; - switch (Code) { - case EQ: return "eq"; - case NE: return "ne"; - case CS: return "cs"; - case CC: return "cc"; - case MI: return "mi"; - case PL: return "pl"; - case VS: return "vs"; - case VC: return "vc"; - case HI: return "hi"; - case LS: return "ls"; - case GE: return "ge"; - case LT: return "lt"; - case GT: return "gt"; - case LE: return "le"; - case AL: return "al"; - } - llvm_unreachable("Unknown condition code"); -} - -inline static CondCode getInvertedCondCode(CondCode Code) { - switch (Code) { - default: llvm_unreachable("Unknown condition code"); - case EQ: return NE; - case NE: return EQ; - case CS: return CC; - case CC: return CS; - case MI: return PL; - case PL: return MI; - case VS: return VC; - case VC: return VS; - case HI: return LS; - case LS: return HI; - case GE: return LT; - case LT: return GE; - case GT: return LE; - case LE: return GT; - } -} - -/// Given a condition code, return NZCV flags that would satisfy that condition. -/// The flag bits are in the format expected by the ccmp instructions. -/// Note that many different flag settings can satisfy a given condition code, -/// this function just returns one of them. -inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) { - // NZCV flags encoded as expected by ccmp instructions, ARMv8 ISA 5.5.7. - enum { N = 8, Z = 4, C = 2, V = 1 }; - switch (Code) { - default: llvm_unreachable("Unknown condition code"); - case EQ: return Z; // Z == 1 - case NE: return 0; // Z == 0 - case CS: return C; // C == 1 - case CC: return 0; // C == 0 - case MI: return N; // N == 1 - case PL: return 0; // N == 0 - case VS: return V; // V == 1 - case VC: return 0; // V == 0 - case HI: return C; // C == 1 && Z == 0 - case LS: return 0; // C == 0 || Z == 1 - case GE: return 0; // N == V - case LT: return N; // N != V - case GT: return 0; // Z == 0 && N == V - case LE: return Z; // Z == 1 || N != V - } -} -} // end namespace ARM64CC - -namespace ARM64SYS { -enum BarrierOption { - InvalidBarrier = 0xff, - OSHLD = 0x1, - OSHST = 0x2, - OSH = 0x3, - NSHLD = 0x5, - NSHST = 0x6, - NSH = 0x7, - ISHLD = 0x9, - ISHST = 0xa, - ISH = 0xb, - LD = 0xd, - ST = 0xe, - SY = 0xf -}; - -inline static const char *getBarrierOptName(BarrierOption Opt) { - switch (Opt) { - default: return NULL; - case 0x1: return "oshld"; - case 0x2: return "oshst"; - case 0x3: return "osh"; - case 0x5: return "nshld"; - case 0x6: return "nshst"; - case 0x7: return "nsh"; - case 0x9: return "ishld"; - case 0xa: return "ishst"; - case 0xb: return "ish"; - case 0xd: return "ld"; - case 0xe: return "st"; - case 0xf: return "sy"; - } -} - -#define A64_SYSREG_ENC(op0,CRn,op2,CRm,op1) ((op0) << 14 | (op1) << 11 | \ - (CRn) << 7 | (CRm) << 3 | (op2)) -enum SystemRegister { - InvalidSystemReg = 0, - // Table in section 3.10.3 - SPSR_EL1 = 0xc200, - SPSR_svc = SPSR_EL1, - ELR_EL1 = 0xc201, - SP_EL0 = 0xc208, - SPSel = 0xc210, - CurrentEL = 0xc212, - DAIF = 0xda11, - NZCV = 0xda10, - FPCR = 0xda20, - FPSR = 0xda21, - DSPSR = 0xda28, - DLR = 0xda29, - SPSR_EL2 = 0xe200, - SPSR_hyp = SPSR_EL2, - ELR_EL2 = 0xe201, - SP_EL1 = 0xe208, - SPSR_irq = 0xe218, - SPSR_abt = 0xe219, - SPSR_und = 0xe21a, - SPSR_fiq = 0xe21b, - SPSR_EL3 = 0xf200, - ELR_EL3 = 0xf201, - SP_EL2 = 0xf208, - - - // Table in section 3.10.8 - MIDR_EL1 = 0xc000, - CTR_EL0 = 0xd801, - MPIDR_EL1 = 0xc005, - ECOIDR_EL1 = 0xc006, - DCZID_EL0 = 0xd807, - MVFR0_EL1 = 0xc018, - MVFR1_EL1 = 0xc019, - ID_AA64PFR0_EL1 = 0xc020, - ID_AA64PFR1_EL1 = 0xc021, - ID_AA64DFR0_EL1 = 0xc028, - ID_AA64DFR1_EL1 = 0xc029, - ID_AA64ISAR0_EL1 = 0xc030, - ID_AA64ISAR1_EL1 = 0xc031, - ID_AA64MMFR0_EL1 = 0xc038, - ID_AA64MMFR1_EL1 = 0xc039, - CCSIDR_EL1 = 0xc800, - CLIDR_EL1 = 0xc801, - AIDR_EL1 = 0xc807, - CSSELR_EL1 = 0xd000, - VPIDR_EL2 = 0xe000, - VMPIDR_EL2 = 0xe005, - SCTLR_EL1 = 0xc080, - SCTLR_EL2 = 0xe080, - SCTLR_EL3 = 0xf080, - ACTLR_EL1 = 0xc081, - ACTLR_EL2 = 0xe081, - ACTLR_EL3 = 0xf081, - CPACR_EL1 = 0xc082, - CPTR_EL2 = 0xe08a, - CPTR_EL3 = 0xf08a, - SCR_EL3 = 0xf088, - HCR_EL2 = 0xe088, - MDCR_EL2 = 0xe089, - MDCR_EL3 = 0xf099, - HSTR_EL2 = 0xe08b, - HACR_EL2 = 0xe08f, - TTBR0_EL1 = 0xc100, - TTBR1_EL1 = 0xc101, - TTBR0_EL2 = 0xe100, - TTBR0_EL3 = 0xf100, - VTTBR_EL2 = 0xe108, - TCR_EL1 = 0xc102, - TCR_EL2 = 0xe102, - TCR_EL3 = 0xf102, - VTCR_EL2 = 0xe10a, - ADFSR_EL1 = 0xc288, - AIFSR_EL1 = 0xc289, - ADFSR_EL2 = 0xe288, - AIFSR_EL2 = 0xe289, - ADFSR_EL3 = 0xf288, - AIFSR_EL3 = 0xf289, - ESR_EL1 = 0xc290, - ESR_EL2 = 0xe290, - ESR_EL3 = 0xf290, - FAR_EL1 = 0xc300, - FAR_EL2 = 0xe300, - FAR_EL3 = 0xf300, - HPFAR_EL2 = 0xe304, - PAR_EL1 = 0xc3a0, - MAIR_EL1 = 0xc510, - MAIR_EL2 = 0xe510, - MAIR_EL3 = 0xf510, - AMAIR_EL1 = 0xc518, - AMAIR_EL2 = 0xe518, - AMAIR_EL3 = 0xf518, - VBAR_EL1 = 0xc600, - VBAR_EL2 = 0xe600, - VBAR_EL3 = 0xf600, - RVBAR_EL1 = 0xc601, - RVBAR_EL2 = 0xe601, - RVBAR_EL3 = 0xf601, - ISR_EL1 = 0xc608, - CONTEXTIDR_EL1 = 0xc681, - TPIDR_EL0 = 0xde82, - TPIDRRO_EL0 = 0xde83, - TPIDR_EL1 = 0xc684, - TPIDR_EL2 = 0xe682, - TPIDR_EL3 = 0xf682, - TEECR32_EL1 = 0x9000, - CNTFRQ_EL0 = 0xdf00, - CNTPCT_EL0 = 0xdf01, - CNTVCT_EL0 = 0xdf02, - CNTVOFF_EL2 = 0xe703, - CNTKCTL_EL1 = 0xc708, - CNTHCTL_EL2 = 0xe708, - CNTP_TVAL_EL0 = 0xdf10, - CNTP_CTL_EL0 = 0xdf11, - CNTP_CVAL_EL0 = 0xdf12, - CNTV_TVAL_EL0 = 0xdf18, - CNTV_CTL_EL0 = 0xdf19, - CNTV_CVAL_EL0 = 0xdf1a, - CNTHP_TVAL_EL2 = 0xe710, - CNTHP_CTL_EL2 = 0xe711, - CNTHP_CVAL_EL2 = 0xe712, - CNTPS_TVAL_EL1 = 0xff10, - CNTPS_CTL_EL1 = 0xff11, - CNTPS_CVAL_EL1= 0xff12, - - PMEVCNTR0_EL0 = 0xdf40, - PMEVCNTR1_EL0 = 0xdf41, - PMEVCNTR2_EL0 = 0xdf42, - PMEVCNTR3_EL0 = 0xdf43, - PMEVCNTR4_EL0 = 0xdf44, - PMEVCNTR5_EL0 = 0xdf45, - PMEVCNTR6_EL0 = 0xdf46, - PMEVCNTR7_EL0 = 0xdf47, - PMEVCNTR8_EL0 = 0xdf48, - PMEVCNTR9_EL0 = 0xdf49, - PMEVCNTR10_EL0 = 0xdf4a, - PMEVCNTR11_EL0 = 0xdf4b, - PMEVCNTR12_EL0 = 0xdf4c, - PMEVCNTR13_EL0 = 0xdf4d, - PMEVCNTR14_EL0 = 0xdf4e, - PMEVCNTR15_EL0 = 0xdf4f, - PMEVCNTR16_EL0 = 0xdf50, - PMEVCNTR17_EL0 = 0xdf51, - PMEVCNTR18_EL0 = 0xdf52, - PMEVCNTR19_EL0 = 0xdf53, - PMEVCNTR20_EL0 = 0xdf54, - PMEVCNTR21_EL0 = 0xdf55, - PMEVCNTR22_EL0 = 0xdf56, - PMEVCNTR23_EL0 = 0xdf57, - PMEVCNTR24_EL0 = 0xdf58, - PMEVCNTR25_EL0 = 0xdf59, - PMEVCNTR26_EL0 = 0xdf5a, - PMEVCNTR27_EL0 = 0xdf5b, - PMEVCNTR28_EL0 = 0xdf5c, - PMEVCNTR29_EL0 = 0xdf5d, - PMEVCNTR30_EL0 = 0xdf5e, - - PMEVTYPER0_EL0 = 0xdf60, - PMEVTYPER1_EL0 = 0xdf61, - PMEVTYPER2_EL0 = 0xdf62, - PMEVTYPER3_EL0 = 0xdf63, - PMEVTYPER4_EL0 = 0xdf64, - PMEVTYPER5_EL0 = 0xdf65, - PMEVTYPER6_EL0 = 0xdf66, - PMEVTYPER7_EL0 = 0xdf67, - PMEVTYPER8_EL0 = 0xdf68, - PMEVTYPER9_EL0 = 0xdf69, - PMEVTYPER10_EL0 = 0xdf6a, - PMEVTYPER11_EL0 = 0xdf6b, - PMEVTYPER12_EL0 = 0xdf6c, - PMEVTYPER13_EL0 = 0xdf6d, - PMEVTYPER14_EL0 = 0xdf6e, - PMEVTYPER15_EL0 = 0xdf6f, - PMEVTYPER16_EL0 = 0xdf70, - PMEVTYPER17_EL0 = 0xdf71, - PMEVTYPER18_EL0 = 0xdf72, - PMEVTYPER19_EL0 = 0xdf73, - PMEVTYPER20_EL0 = 0xdf74, - PMEVTYPER21_EL0 = 0xdf75, - PMEVTYPER22_EL0 = 0xdf76, - PMEVTYPER23_EL0 = 0xdf77, - PMEVTYPER24_EL0 = 0xdf78, - PMEVTYPER25_EL0 = 0xdf79, - PMEVTYPER26_EL0 = 0xdf7a, - PMEVTYPER27_EL0 = 0xdf7b, - PMEVTYPER28_EL0 = 0xdf7c, - PMEVTYPER29_EL0 = 0xdf7d, - PMEVTYPER30_EL0 = 0xdf7e, - - PMCCFILTR_EL0 = 0xdf7f, - - RMR_EL3 = 0xf602, - RMR_EL2 = 0xd602, - RMR_EL1 = 0xce02, - - // Debug Architecture 5.3, Table 17. - MDCCSR_EL0 = A64_SYSREG_ENC(2, 0, 0, 1, 3), - MDCCINT_EL1 = A64_SYSREG_ENC(2, 0, 0, 2, 0), - DBGDTR_EL0 = A64_SYSREG_ENC(2, 0, 0, 4, 3), - DBGDTRRX_EL0 = A64_SYSREG_ENC(2, 0, 0, 5, 3), - DBGDTRTX_EL0 = DBGDTRRX_EL0, - DBGVCR32_EL2 = A64_SYSREG_ENC(2, 0, 0, 7, 4), - OSDTRRX_EL1 = A64_SYSREG_ENC(2, 0, 2, 0, 0), - MDSCR_EL1 = A64_SYSREG_ENC(2, 0, 2, 2, 0), - OSDTRTX_EL1 = A64_SYSREG_ENC(2, 0, 2, 3, 0), - OSECCR_EL11 = A64_SYSREG_ENC(2, 0, 2, 6, 0), - - DBGBVR0_EL1 = A64_SYSREG_ENC(2, 0, 4, 0, 0), - DBGBVR1_EL1 = A64_SYSREG_ENC(2, 0, 4, 1, 0), - DBGBVR2_EL1 = A64_SYSREG_ENC(2, 0, 4, 2, 0), - DBGBVR3_EL1 = A64_SYSREG_ENC(2, 0, 4, 3, 0), - DBGBVR4_EL1 = A64_SYSREG_ENC(2, 0, 4, 4, 0), - DBGBVR5_EL1 = A64_SYSREG_ENC(2, 0, 4, 5, 0), - DBGBVR6_EL1 = A64_SYSREG_ENC(2, 0, 4, 6, 0), - DBGBVR7_EL1 = A64_SYSREG_ENC(2, 0, 4, 7, 0), - DBGBVR8_EL1 = A64_SYSREG_ENC(2, 0, 4, 8, 0), - DBGBVR9_EL1 = A64_SYSREG_ENC(2, 0, 4, 9, 0), - DBGBVR10_EL1 = A64_SYSREG_ENC(2, 0, 4, 10, 0), - DBGBVR11_EL1 = A64_SYSREG_ENC(2, 0, 4, 11, 0), - DBGBVR12_EL1 = A64_SYSREG_ENC(2, 0, 4, 12, 0), - DBGBVR13_EL1 = A64_SYSREG_ENC(2, 0, 4, 13, 0), - DBGBVR14_EL1 = A64_SYSREG_ENC(2, 0, 4, 14, 0), - DBGBVR15_EL1 = A64_SYSREG_ENC(2, 0, 4, 15, 0), - - DBGBCR0_EL1 = A64_SYSREG_ENC(2, 0, 5, 0, 0), - DBGBCR1_EL1 = A64_SYSREG_ENC(2, 0, 5, 1, 0), - DBGBCR2_EL1 = A64_SYSREG_ENC(2, 0, 5, 2, 0), - DBGBCR3_EL1 = A64_SYSREG_ENC(2, 0, 5, 3, 0), - DBGBCR4_EL1 = A64_SYSREG_ENC(2, 0, 5, 4, 0), - DBGBCR5_EL1 = A64_SYSREG_ENC(2, 0, 5, 5, 0), - DBGBCR6_EL1 = A64_SYSREG_ENC(2, 0, 5, 6, 0), - DBGBCR7_EL1 = A64_SYSREG_ENC(2, 0, 5, 7, 0), - DBGBCR8_EL1 = A64_SYSREG_ENC(2, 0, 5, 8, 0), - DBGBCR9_EL1 = A64_SYSREG_ENC(2, 0, 5, 9, 0), - DBGBCR10_EL1 = A64_SYSREG_ENC(2, 0, 5, 10, 0), - DBGBCR11_EL1 = A64_SYSREG_ENC(2, 0, 5, 11, 0), - DBGBCR12_EL1 = A64_SYSREG_ENC(2, 0, 5, 12, 0), - DBGBCR13_EL1 = A64_SYSREG_ENC(2, 0, 5, 13, 0), - DBGBCR14_EL1 = A64_SYSREG_ENC(2, 0, 5, 14, 0), - DBGBCR15_EL1 = A64_SYSREG_ENC(2, 0, 5, 15, 0), - - DBGWVR0_EL1 = A64_SYSREG_ENC(2, 0, 6, 0, 0), - DBGWVR1_EL1 = A64_SYSREG_ENC(2, 0, 6, 1, 0), - DBGWVR2_EL1 = A64_SYSREG_ENC(2, 0, 6, 2, 0), - DBGWVR3_EL1 = A64_SYSREG_ENC(2, 0, 6, 3, 0), - DBGWVR4_EL1 = A64_SYSREG_ENC(2, 0, 6, 4, 0), - DBGWVR5_EL1 = A64_SYSREG_ENC(2, 0, 6, 5, 0), - DBGWVR6_EL1 = A64_SYSREG_ENC(2, 0, 6, 6, 0), - DBGWVR7_EL1 = A64_SYSREG_ENC(2, 0, 6, 7, 0), - DBGWVR8_EL1 = A64_SYSREG_ENC(2, 0, 6, 8, 0), - DBGWVR9_EL1 = A64_SYSREG_ENC(2, 0, 6, 9, 0), - DBGWVR10_EL1 = A64_SYSREG_ENC(2, 0, 6, 10, 0), - DBGWVR11_EL1 = A64_SYSREG_ENC(2, 0, 6, 11, 0), - DBGWVR12_EL1 = A64_SYSREG_ENC(2, 0, 6, 12, 0), - DBGWVR13_EL1 = A64_SYSREG_ENC(2, 0, 6, 13, 0), - DBGWVR14_EL1 = A64_SYSREG_ENC(2, 0, 6, 14, 0), - DBGWVR15_EL1 = A64_SYSREG_ENC(2, 0, 6, 15, 0), - - DBGWCR0_EL1 = A64_SYSREG_ENC(2, 0, 7, 0, 0), - DBGWCR1_EL1 = A64_SYSREG_ENC(2, 0, 7, 1, 0), - DBGWCR2_EL1 = A64_SYSREG_ENC(2, 0, 7, 2, 0), - DBGWCR3_EL1 = A64_SYSREG_ENC(2, 0, 7, 3, 0), - DBGWCR4_EL1 = A64_SYSREG_ENC(2, 0, 7, 4, 0), - DBGWCR5_EL1 = A64_SYSREG_ENC(2, 0, 7, 5, 0), - DBGWCR6_EL1 = A64_SYSREG_ENC(2, 0, 7, 6, 0), - DBGWCR7_EL1 = A64_SYSREG_ENC(2, 0, 7, 7, 0), - DBGWCR8_EL1 = A64_SYSREG_ENC(2, 0, 7, 8, 0), - DBGWCR9_EL1 = A64_SYSREG_ENC(2, 0, 7, 9, 0), - DBGWCR10_EL1 = A64_SYSREG_ENC(2, 0, 7, 10, 0), - DBGWCR11_EL1 = A64_SYSREG_ENC(2, 0, 7, 11, 0), - DBGWCR12_EL1 = A64_SYSREG_ENC(2, 0, 7, 12, 0), - DBGWCR13_EL1 = A64_SYSREG_ENC(2, 0, 7, 13, 0), - DBGWCR14_EL1 = A64_SYSREG_ENC(2, 0, 7, 14, 0), - DBGWCR15_EL1 = A64_SYSREG_ENC(2, 0, 7, 15, 0), - - MDRAR_EL1 = A64_SYSREG_ENC(2, 1, 0, 0, 0), - OSLAR_EL1 = A64_SYSREG_ENC(2, 1, 4, 0, 0), - OSLSR_EL1 = A64_SYSREG_ENC(2, 1, 4, 1, 0), - OSDLR_EL1 = A64_SYSREG_ENC(2, 1, 4, 3, 0), - DBGPRCR_EL1 = A64_SYSREG_ENC(2, 1, 4, 4, 0), - - DBGCLAIMSET_EL1 = A64_SYSREG_ENC(2, 7, 6, 8, 0), - DBGCLAIMCLR_EL1 = A64_SYSREG_ENC(2, 7, 6, 9, 0), - DBGAUTHSTATUS_EL1 = A64_SYSREG_ENC(2, 7, 6, 14, 0), - - DBGDEVID2 = A64_SYSREG_ENC(2, 7, 7, 0, 0), - DBGDEVID1 = A64_SYSREG_ENC(2, 7, 7, 1, 0), - DBGDEVID0 = A64_SYSREG_ENC(2, 7, 7, 2, 0), - - // The following registers are defined to allow access from AArch64 to - // registers which are only used in the AArch32 architecture. - DACR32_EL2 = 0xe180, - IFSR32_EL2 = 0xe281, - TEEHBR32_EL1 = 0x9080, - SDER32_EL3 = 0xf089, - FPEXC32_EL2 = 0xe298, - - // Cyclone specific system registers - CPM_IOACC_CTL_EL3 = 0xff90, - - // Architectural system registers - ID_PFR0_EL1 = 0xc008, - ID_PFR1_EL1 = 0xc009, - ID_DFR0_EL1 = 0xc00a, - ID_AFR0_EL1 = 0xc00b, - ID_ISAR0_EL1 = 0xc010, - ID_ISAR1_EL1 = 0xc011, - ID_ISAR2_EL1 = 0xc012, - ID_ISAR3_EL1 = 0xc013, - ID_ISAR4_EL1 = 0xc014, - ID_ISAR5_EL1 = 0xc015, - AFSR1_EL1 = 0xc289, // note same as old AIFSR_EL1 - AFSR0_EL1 = 0xc288, // note same as old ADFSR_EL1 - REVIDR_EL1 = 0xc006 // note same as old ECOIDR_EL1 - -}; -#undef A64_SYSREG_ENC - -static inline const char *getSystemRegisterName(SystemRegister Reg) { - switch(Reg) { - default: return NULL; // Caller is responsible for handling invalid value. - case SPSR_EL1: return "SPSR_EL1"; - case ELR_EL1: return "ELR_EL1"; - case SP_EL0: return "SP_EL0"; - case SPSel: return "SPSel"; - case DAIF: return "DAIF"; - case CurrentEL: return "CurrentEL"; - case NZCV: return "NZCV"; - case FPCR: return "FPCR"; - case FPSR: return "FPSR"; - case DSPSR: return "DSPSR"; - case DLR: return "DLR"; - case SPSR_EL2: return "SPSR_EL2"; - case ELR_EL2: return "ELR_EL2"; - case SP_EL1: return "SP_EL1"; - case SPSR_irq: return "SPSR_irq"; - case SPSR_abt: return "SPSR_abt"; - case SPSR_und: return "SPSR_und"; - case SPSR_fiq: return "SPSR_fiq"; - case SPSR_EL3: return "SPSR_EL3"; - case ELR_EL3: return "ELR_EL3"; - case SP_EL2: return "SP_EL2"; - case MIDR_EL1: return "MIDR_EL1"; - case CTR_EL0: return "CTR_EL0"; - case MPIDR_EL1: return "MPIDR_EL1"; - case DCZID_EL0: return "DCZID_EL0"; - case MVFR0_EL1: return "MVFR0_EL1"; - case MVFR1_EL1: return "MVFR1_EL1"; - case ID_AA64PFR0_EL1: return "ID_AA64PFR0_EL1"; - case ID_AA64PFR1_EL1: return "ID_AA64PFR1_EL1"; - case ID_AA64DFR0_EL1: return "ID_AA64DFR0_EL1"; - case ID_AA64DFR1_EL1: return "ID_AA64DFR1_EL1"; - case ID_AA64ISAR0_EL1: return "ID_AA64ISAR0_EL1"; - case ID_AA64ISAR1_EL1: return "ID_AA64ISAR1_EL1"; - case ID_AA64MMFR0_EL1: return "ID_AA64MMFR0_EL1"; - case ID_AA64MMFR1_EL1: return "ID_AA64MMFR1_EL1"; - case CCSIDR_EL1: return "CCSIDR_EL1"; - case CLIDR_EL1: return "CLIDR_EL1"; - case AIDR_EL1: return "AIDR_EL1"; - case CSSELR_EL1: return "CSSELR_EL1"; - case VPIDR_EL2: return "VPIDR_EL2"; - case VMPIDR_EL2: return "VMPIDR_EL2"; - case SCTLR_EL1: return "SCTLR_EL1"; - case SCTLR_EL2: return "SCTLR_EL2"; - case SCTLR_EL3: return "SCTLR_EL3"; - case ACTLR_EL1: return "ACTLR_EL1"; - case ACTLR_EL2: return "ACTLR_EL2"; - case ACTLR_EL3: return "ACTLR_EL3"; - case CPACR_EL1: return "CPACR_EL1"; - case CPTR_EL2: return "CPTR_EL2"; - case CPTR_EL3: return "CPTR_EL3"; - case SCR_EL3: return "SCR_EL3"; - case HCR_EL2: return "HCR_EL2"; - case MDCR_EL2: return "MDCR_EL2"; - case MDCR_EL3: return "MDCR_EL3"; - case HSTR_EL2: return "HSTR_EL2"; - case HACR_EL2: return "HACR_EL2"; - case TTBR0_EL1: return "TTBR0_EL1"; - case TTBR1_EL1: return "TTBR1_EL1"; - case TTBR0_EL2: return "TTBR0_EL2"; - case TTBR0_EL3: return "TTBR0_EL3"; - case VTTBR_EL2: return "VTTBR_EL2"; - case TCR_EL1: return "TCR_EL1"; - case TCR_EL2: return "TCR_EL2"; - case TCR_EL3: return "TCR_EL3"; - case VTCR_EL2: return "VTCR_EL2"; - case ADFSR_EL2: return "ADFSR_EL2"; - case AIFSR_EL2: return "AIFSR_EL2"; - case ADFSR_EL3: return "ADFSR_EL3"; - case AIFSR_EL3: return "AIFSR_EL3"; - case ESR_EL1: return "ESR_EL1"; - case ESR_EL2: return "ESR_EL2"; - case ESR_EL3: return "ESR_EL3"; - case FAR_EL1: return "FAR_EL1"; - case FAR_EL2: return "FAR_EL2"; - case FAR_EL3: return "FAR_EL3"; - case HPFAR_EL2: return "HPFAR_EL2"; - case PAR_EL1: return "PAR_EL1"; - case MAIR_EL1: return "MAIR_EL1"; - case MAIR_EL2: return "MAIR_EL2"; - case MAIR_EL3: return "MAIR_EL3"; - case AMAIR_EL1: return "AMAIR_EL1"; - case AMAIR_EL2: return "AMAIR_EL2"; - case AMAIR_EL3: return "AMAIR_EL3"; - case VBAR_EL1: return "VBAR_EL1"; - case VBAR_EL2: return "VBAR_EL2"; - case VBAR_EL3: return "VBAR_EL3"; - case RVBAR_EL1: return "RVBAR_EL1"; - case RVBAR_EL2: return "RVBAR_EL2"; - case RVBAR_EL3: return "RVBAR_EL3"; - case ISR_EL1: return "ISR_EL1"; - case CONTEXTIDR_EL1: return "CONTEXTIDR_EL1"; - case TPIDR_EL0: return "TPIDR_EL0"; - case TPIDRRO_EL0: return "TPIDRRO_EL0"; - case TPIDR_EL1: return "TPIDR_EL1"; - case TPIDR_EL2: return "TPIDR_EL2"; - case TPIDR_EL3: return "TPIDR_EL3"; - case TEECR32_EL1: return "TEECR32_EL1"; - case CNTFRQ_EL0: return "CNTFRQ_EL0"; - case CNTPCT_EL0: return "CNTPCT_EL0"; - case CNTVCT_EL0: return "CNTVCT_EL0"; - case CNTVOFF_EL2: return "CNTVOFF_EL2"; - case CNTKCTL_EL1: return "CNTKCTL_EL1"; - case CNTHCTL_EL2: return "CNTHCTL_EL2"; - case CNTP_TVAL_EL0: return "CNTP_TVAL_EL0"; - case CNTP_CTL_EL0: return "CNTP_CTL_EL0"; - case CNTP_CVAL_EL0: return "CNTP_CVAL_EL0"; - case CNTV_TVAL_EL0: return "CNTV_TVAL_EL0"; - case CNTV_CTL_EL0: return "CNTV_CTL_EL0"; - case CNTV_CVAL_EL0: return "CNTV_CVAL_EL0"; - case CNTHP_TVAL_EL2: return "CNTHP_TVAL_EL2"; - case CNTHP_CTL_EL2: return "CNTHP_CTL_EL2"; - case CNTHP_CVAL_EL2: return "CNTHP_CVAL_EL2"; - case CNTPS_TVAL_EL1: return "CNTPS_TVAL_EL1"; - case CNTPS_CTL_EL1: return "CNTPS_CTL_EL1"; - case CNTPS_CVAL_EL1: return "CNTPS_CVAL_EL1"; - case DACR32_EL2: return "DACR32_EL2"; - case IFSR32_EL2: return "IFSR32_EL2"; - case TEEHBR32_EL1: return "TEEHBR32_EL1"; - case SDER32_EL3: return "SDER32_EL3"; - case FPEXC32_EL2: return "FPEXC32_EL2"; - case PMEVCNTR0_EL0: return "PMEVCNTR0_EL0"; - case PMEVCNTR1_EL0: return "PMEVCNTR1_EL0"; - case PMEVCNTR2_EL0: return "PMEVCNTR2_EL0"; - case PMEVCNTR3_EL0: return "PMEVCNTR3_EL0"; - case PMEVCNTR4_EL0: return "PMEVCNTR4_EL0"; - case PMEVCNTR5_EL0: return "PMEVCNTR5_EL0"; - case PMEVCNTR6_EL0: return "PMEVCNTR6_EL0"; - case PMEVCNTR7_EL0: return "PMEVCNTR7_EL0"; - case PMEVCNTR8_EL0: return "PMEVCNTR8_EL0"; - case PMEVCNTR9_EL0: return "PMEVCNTR9_EL0"; - case PMEVCNTR10_EL0: return "PMEVCNTR10_EL0"; - case PMEVCNTR11_EL0: return "PMEVCNTR11_EL0"; - case PMEVCNTR12_EL0: return "PMEVCNTR12_EL0"; - case PMEVCNTR13_EL0: return "PMEVCNTR13_EL0"; - case PMEVCNTR14_EL0: return "PMEVCNTR14_EL0"; - case PMEVCNTR15_EL0: return "PMEVCNTR15_EL0"; - case PMEVCNTR16_EL0: return "PMEVCNTR16_EL0"; - case PMEVCNTR17_EL0: return "PMEVCNTR17_EL0"; - case PMEVCNTR18_EL0: return "PMEVCNTR18_EL0"; - case PMEVCNTR19_EL0: return "PMEVCNTR19_EL0"; - case PMEVCNTR20_EL0: return "PMEVCNTR20_EL0"; - case PMEVCNTR21_EL0: return "PMEVCNTR21_EL0"; - case PMEVCNTR22_EL0: return "PMEVCNTR22_EL0"; - case PMEVCNTR23_EL0: return "PMEVCNTR23_EL0"; - case PMEVCNTR24_EL0: return "PMEVCNTR24_EL0"; - case PMEVCNTR25_EL0: return "PMEVCNTR25_EL0"; - case PMEVCNTR26_EL0: return "PMEVCNTR26_EL0"; - case PMEVCNTR27_EL0: return "PMEVCNTR27_EL0"; - case PMEVCNTR28_EL0: return "PMEVCNTR28_EL0"; - case PMEVCNTR29_EL0: return "PMEVCNTR29_EL0"; - case PMEVCNTR30_EL0: return "PMEVCNTR30_EL0"; - case PMEVTYPER0_EL0: return "PMEVTYPER0_EL0"; - case PMEVTYPER1_EL0: return "PMEVTYPER1_EL0"; - case PMEVTYPER2_EL0: return "PMEVTYPER2_EL0"; - case PMEVTYPER3_EL0: return "PMEVTYPER3_EL0"; - case PMEVTYPER4_EL0: return "PMEVTYPER4_EL0"; - case PMEVTYPER5_EL0: return "PMEVTYPER5_EL0"; - case PMEVTYPER6_EL0: return "PMEVTYPER6_EL0"; - case PMEVTYPER7_EL0: return "PMEVTYPER7_EL0"; - case PMEVTYPER8_EL0: return "PMEVTYPER8_EL0"; - case PMEVTYPER9_EL0: return "PMEVTYPER9_EL0"; - case PMEVTYPER10_EL0: return "PMEVTYPER10_EL0"; - case PMEVTYPER11_EL0: return "PMEVTYPER11_EL0"; - case PMEVTYPER12_EL0: return "PMEVTYPER12_EL0"; - case PMEVTYPER13_EL0: return "PMEVTYPER13_EL0"; - case PMEVTYPER14_EL0: return "PMEVTYPER14_EL0"; - case PMEVTYPER15_EL0: return "PMEVTYPER15_EL0"; - case PMEVTYPER16_EL0: return "PMEVTYPER16_EL0"; - case PMEVTYPER17_EL0: return "PMEVTYPER17_EL0"; - case PMEVTYPER18_EL0: return "PMEVTYPER18_EL0"; - case PMEVTYPER19_EL0: return "PMEVTYPER19_EL0"; - case PMEVTYPER20_EL0: return "PMEVTYPER20_EL0"; - case PMEVTYPER21_EL0: return "PMEVTYPER21_EL0"; - case PMEVTYPER22_EL0: return "PMEVTYPER22_EL0"; - case PMEVTYPER23_EL0: return "PMEVTYPER23_EL0"; - case PMEVTYPER24_EL0: return "PMEVTYPER24_EL0"; - case PMEVTYPER25_EL0: return "PMEVTYPER25_EL0"; - case PMEVTYPER26_EL0: return "PMEVTYPER26_EL0"; - case PMEVTYPER27_EL0: return "PMEVTYPER27_EL0"; - case PMEVTYPER28_EL0: return "PMEVTYPER28_EL0"; - case PMEVTYPER29_EL0: return "PMEVTYPER29_EL0"; - case PMEVTYPER30_EL0: return "PMEVTYPER30_EL0"; - case PMCCFILTR_EL0: return "PMCCFILTR_EL0"; - case RMR_EL3: return "RMR_EL3"; - case RMR_EL2: return "RMR_EL2"; - case RMR_EL1: return "RMR_EL1"; - case CPM_IOACC_CTL_EL3: return "CPM_IOACC_CTL_EL3"; - case MDCCSR_EL0: return "MDCCSR_EL0"; - case MDCCINT_EL1: return "MDCCINT_EL1"; - case DBGDTR_EL0: return "DBGDTR_EL0"; - case DBGDTRRX_EL0: return "DBGDTRRX_EL0"; - case DBGVCR32_EL2: return "DBGVCR32_EL2"; - case OSDTRRX_EL1: return "OSDTRRX_EL1"; - case MDSCR_EL1: return "MDSCR_EL1"; - case OSDTRTX_EL1: return "OSDTRTX_EL1"; - case OSECCR_EL11: return "OSECCR_EL11"; - case DBGBVR0_EL1: return "DBGBVR0_EL1"; - case DBGBVR1_EL1: return "DBGBVR1_EL1"; - case DBGBVR2_EL1: return "DBGBVR2_EL1"; - case DBGBVR3_EL1: return "DBGBVR3_EL1"; - case DBGBVR4_EL1: return "DBGBVR4_EL1"; - case DBGBVR5_EL1: return "DBGBVR5_EL1"; - case DBGBVR6_EL1: return "DBGBVR6_EL1"; - case DBGBVR7_EL1: return "DBGBVR7_EL1"; - case DBGBVR8_EL1: return "DBGBVR8_EL1"; - case DBGBVR9_EL1: return "DBGBVR9_EL1"; - case DBGBVR10_EL1: return "DBGBVR10_EL1"; - case DBGBVR11_EL1: return "DBGBVR11_EL1"; - case DBGBVR12_EL1: return "DBGBVR12_EL1"; - case DBGBVR13_EL1: return "DBGBVR13_EL1"; - case DBGBVR14_EL1: return "DBGBVR14_EL1"; - case DBGBVR15_EL1: return "DBGBVR15_EL1"; - case DBGBCR0_EL1: return "DBGBCR0_EL1"; - case DBGBCR1_EL1: return "DBGBCR1_EL1"; - case DBGBCR2_EL1: return "DBGBCR2_EL1"; - case DBGBCR3_EL1: return "DBGBCR3_EL1"; - case DBGBCR4_EL1: return "DBGBCR4_EL1"; - case DBGBCR5_EL1: return "DBGBCR5_EL1"; - case DBGBCR6_EL1: return "DBGBCR6_EL1"; - case DBGBCR7_EL1: return "DBGBCR7_EL1"; - case DBGBCR8_EL1: return "DBGBCR8_EL1"; - case DBGBCR9_EL1: return "DBGBCR9_EL1"; - case DBGBCR10_EL1: return "DBGBCR10_EL1"; - case DBGBCR11_EL1: return "DBGBCR11_EL1"; - case DBGBCR12_EL1: return "DBGBCR12_EL1"; - case DBGBCR13_EL1: return "DBGBCR13_EL1"; - case DBGBCR14_EL1: return "DBGBCR14_EL1"; - case DBGBCR15_EL1: return "DBGBCR15_EL1"; - case DBGWVR0_EL1: return "DBGWVR0_EL1"; - case DBGWVR1_EL1: return "DBGWVR1_EL1"; - case DBGWVR2_EL1: return "DBGWVR2_EL1"; - case DBGWVR3_EL1: return "DBGWVR3_EL1"; - case DBGWVR4_EL1: return "DBGWVR4_EL1"; - case DBGWVR5_EL1: return "DBGWVR5_EL1"; - case DBGWVR6_EL1: return "DBGWVR6_EL1"; - case DBGWVR7_EL1: return "DBGWVR7_EL1"; - case DBGWVR8_EL1: return "DBGWVR8_EL1"; - case DBGWVR9_EL1: return "DBGWVR9_EL1"; - case DBGWVR10_EL1: return "DBGWVR10_EL1"; - case DBGWVR11_EL1: return "DBGWVR11_EL1"; - case DBGWVR12_EL1: return "DBGWVR12_EL1"; - case DBGWVR13_EL1: return "DBGWVR13_EL1"; - case DBGWVR14_EL1: return "DBGWVR14_EL1"; - case DBGWVR15_EL1: return "DBGWVR15_EL1"; - case DBGWCR0_EL1: return "DBGWCR0_EL1"; - case DBGWCR1_EL1: return "DBGWCR1_EL1"; - case DBGWCR2_EL1: return "DBGWCR2_EL1"; - case DBGWCR3_EL1: return "DBGWCR3_EL1"; - case DBGWCR4_EL1: return "DBGWCR4_EL1"; - case DBGWCR5_EL1: return "DBGWCR5_EL1"; - case DBGWCR6_EL1: return "DBGWCR6_EL1"; - case DBGWCR7_EL1: return "DBGWCR7_EL1"; - case DBGWCR8_EL1: return "DBGWCR8_EL1"; - case DBGWCR9_EL1: return "DBGWCR9_EL1"; - case DBGWCR10_EL1: return "DBGWCR10_EL1"; - case DBGWCR11_EL1: return "DBGWCR11_EL1"; - case DBGWCR12_EL1: return "DBGWCR12_EL1"; - case DBGWCR13_EL1: return "DBGWCR13_EL1"; - case DBGWCR14_EL1: return "DBGWCR14_EL1"; - case DBGWCR15_EL1: return "DBGWCR15_EL1"; - case MDRAR_EL1: return "MDRAR_EL1"; - case OSLAR_EL1: return "OSLAR_EL1"; - case OSLSR_EL1: return "OSLSR_EL1"; - case OSDLR_EL1: return "OSDLR_EL1"; - case DBGPRCR_EL1: return "DBGPRCR_EL1"; - case DBGCLAIMSET_EL1: return "DBGCLAIMSET_EL1"; - case DBGCLAIMCLR_EL1: return "DBGCLAIMCLR_EL1"; - case DBGAUTHSTATUS_EL1: return "DBGAUTHSTATUS_EL1"; - case DBGDEVID2: return "DBGDEVID2"; - case DBGDEVID1: return "DBGDEVID1"; - case DBGDEVID0: return "DBGDEVID0"; - case ID_PFR0_EL1: return "ID_PFR0_EL1"; - case ID_PFR1_EL1: return "ID_PFR1_EL1"; - case ID_DFR0_EL1: return "ID_DFR0_EL1"; - case ID_AFR0_EL1: return "ID_AFR0_EL1"; - case ID_ISAR0_EL1: return "ID_ISAR0_EL1"; - case ID_ISAR1_EL1: return "ID_ISAR1_EL1"; - case ID_ISAR2_EL1: return "ID_ISAR2_EL1"; - case ID_ISAR3_EL1: return "ID_ISAR3_EL1"; - case ID_ISAR4_EL1: return "ID_ISAR4_EL1"; - case ID_ISAR5_EL1: return "ID_ISAR5_EL1"; - case AFSR1_EL1: return "AFSR1_EL1"; - case AFSR0_EL1: return "AFSR0_EL1"; - case REVIDR_EL1: return "REVIDR_EL1"; - } -} - -enum CPSRField { - InvalidCPSRField = 0xff, - cpsr_SPSel = 0x5, - cpsr_DAIFSet = 0x1e, - cpsr_DAIFClr = 0x1f -}; - -static inline const char *getCPSRFieldName(CPSRField Val) { - switch(Val) { - default: assert(0 && "Invalid system register value!"); - case cpsr_SPSel: return "SPSel"; - case cpsr_DAIFSet: return "DAIFSet"; - case cpsr_DAIFClr: return "DAIFClr"; - } -} - -} // end namespace ARM64SYS - -namespace ARM64II { - /// Target Operand Flag enum. - enum TOF { - //===------------------------------------------------------------------===// - // ARM64 Specific MachineOperand flags. - - MO_NO_FLAG, - - MO_FRAGMENT = 0x7, - - /// MO_PAGE - A symbol operand with this flag represents the pc-relative - /// offset of the 4K page containing the symbol. This is used with the - /// ADRP instruction. - MO_PAGE = 1, - - /// MO_PAGEOFF - A symbol operand with this flag represents the offset of - /// that symbol within a 4K page. This offset is added to the page address - /// to produce the complete address. - MO_PAGEOFF = 2, - - /// MO_G3 - A symbol operand with this flag (granule 3) represents the high - /// 16-bits of a 64-bit address, used in a MOVZ or MOVK instruction - MO_G3 = 3, - - /// MO_G2 - A symbol operand with this flag (granule 2) represents the bits - /// 32-47 of a 64-bit address, used in a MOVZ or MOVK instruction - MO_G2 = 4, - - /// MO_G1 - A symbol operand with this flag (granule 1) represents the bits - /// 16-31 of a 64-bit address, used in a MOVZ or MOVK instruction - MO_G1 = 5, - - /// MO_G0 - A symbol operand with this flag (granule 0) represents the bits - /// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction - MO_G0 = 6, - - /// MO_GOT - This flag indicates that a symbol operand represents the - /// address of the GOT entry for the symbol, rather than the address of - /// the symbol itself. - MO_GOT = 8, - - /// MO_NC - Indicates whether the linker is expected to check the symbol - /// reference for overflow. For example in an ADRP/ADD pair of relocations - /// the ADRP usually does check, but not the ADD. - MO_NC = 0x10, - - /// MO_TLS - Indicates that the operand being accessed is some kind of - /// thread-local symbol. On Darwin, only one type of thread-local access - /// exists (pre linker-relaxation), but on ELF the TLSModel used for the - /// referee will affect interpretation. - MO_TLS = 0x20 - }; -} // end namespace ARM64II - -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp deleted file mode 100644 index 1a132a1..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp +++ /dev/null @@ -1,237 +0,0 @@ -//===-- ARM64ELFObjectWriter.cpp - ARM64 ELF Writer -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file handles ELF-specific object emission, converting LLVM's internal -// fixups into the appropriate relocations. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/ARM64FixupKinds.h" -#include "MCTargetDesc/ARM64MCExpr.h" -#include "MCTargetDesc/ARM64MCTargetDesc.h" -#include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/MC/MCValue.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm; - -namespace { -class ARM64ELFObjectWriter : public MCELFObjectTargetWriter { -public: - ARM64ELFObjectWriter(uint8_t OSABI); - - virtual ~ARM64ELFObjectWriter(); - -protected: - unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel) const override; - -private: -}; -} - -ARM64ELFObjectWriter::ARM64ELFObjectWriter(uint8_t OSABI) - : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64, - /*HasRelocationAddend*/ true) {} - -ARM64ELFObjectWriter::~ARM64ELFObjectWriter() {} - -unsigned ARM64ELFObjectWriter::GetRelocType(const MCValue &Target, - const MCFixup &Fixup, - bool IsPCRel) const { - ARM64MCExpr::VariantKind RefKind = - static_cast(Target.getRefKind()); - ARM64MCExpr::VariantKind SymLoc = ARM64MCExpr::getSymbolLoc(RefKind); - bool IsNC = ARM64MCExpr::isNotChecked(RefKind); - - assert((!Target.getSymA() || - Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None) && - "Should only be expression-level modifiers here"); - - assert((!Target.getSymB() || - Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None) && - "Should only be expression-level modifiers here"); - - if (IsPCRel) { - switch ((unsigned)Fixup.getKind()) { - case FK_Data_2: - return ELF::R_AARCH64_PREL16; - case FK_Data_4: - return ELF::R_AARCH64_PREL32; - case FK_Data_8: - return ELF::R_AARCH64_PREL64; - case ARM64::fixup_arm64_pcrel_adr_imm21: - llvm_unreachable("No ELF relocations supported for ADR at the moment"); - case ARM64::fixup_arm64_pcrel_adrp_imm21: - if (SymLoc == ARM64MCExpr::VK_ABS && !IsNC) - return ELF::R_AARCH64_ADR_PREL_PG_HI21; - if (SymLoc == ARM64MCExpr::VK_GOT && !IsNC) - return ELF::R_AARCH64_ADR_GOT_PAGE; - if (SymLoc == ARM64MCExpr::VK_GOTTPREL && !IsNC) - return ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21; - if (SymLoc == ARM64MCExpr::VK_TLSDESC && !IsNC) - return ELF::R_AARCH64_TLSDESC_ADR_PAGE; - llvm_unreachable("invalid symbol kind for ADRP relocation"); - case ARM64::fixup_arm64_pcrel_branch26: - return ELF::R_AARCH64_JUMP26; - case ARM64::fixup_arm64_pcrel_call26: - return ELF::R_AARCH64_CALL26; - case ARM64::fixup_arm64_pcrel_imm19: - return ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19; - default: - llvm_unreachable("Unsupported pc-relative fixup kind"); - } - } else { - switch ((unsigned)Fixup.getKind()) { - case FK_Data_2: - return ELF::R_AARCH64_ABS16; - case FK_Data_4: - return ELF::R_AARCH64_ABS32; - case FK_Data_8: - return ELF::R_AARCH64_ABS64; - case ARM64::fixup_arm64_add_imm12: - if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC) - return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC) - return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC) - return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC) - return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_TLSDESC && IsNC) - return ELF::R_AARCH64_TLSDESC_ADD_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) - return ELF::R_AARCH64_ADD_ABS_LO12_NC; - - report_fatal_error("invalid fixup for add (uimm12) instruction"); - return 0; - case ARM64::fixup_arm64_ldst_imm12_scale1: - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) - return ELF::R_AARCH64_LDST8_ABS_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC) - return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC) - return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC) - return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC) - return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC; - - report_fatal_error("invalid fixup for 8-bit load/store instruction"); - return 0; - case ARM64::fixup_arm64_ldst_imm12_scale2: - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) - return ELF::R_AARCH64_LDST16_ABS_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC) - return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC) - return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC) - return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC) - return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC; - - report_fatal_error("invalid fixup for 16-bit load/store instruction"); - return 0; - case ARM64::fixup_arm64_ldst_imm12_scale4: - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) - return ELF::R_AARCH64_LDST32_ABS_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC) - return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC) - return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC) - return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC) - return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC; - - report_fatal_error("invalid fixup for 32-bit load/store instruction"); - return 0; - case ARM64::fixup_arm64_ldst_imm12_scale8: - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) - return ELF::R_AARCH64_LDST64_ABS_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_GOT && IsNC) - return ELF::R_AARCH64_LD64_GOT_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC) - return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC) - return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC) - return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12; - if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC) - return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_GOTTPREL && IsNC) - return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC; - if (SymLoc == ARM64MCExpr::VK_TLSDESC && IsNC) - return ELF::R_AARCH64_TLSDESC_LD64_LO12_NC; - - report_fatal_error("invalid fixup for 64-bit load/store instruction"); - return 0; - case ARM64::fixup_arm64_ldst_imm12_scale16: - if (SymLoc == ARM64MCExpr::VK_ABS && IsNC) - return ELF::R_AARCH64_LDST128_ABS_LO12_NC; - - report_fatal_error("invalid fixup for 128-bit load/store instruction"); - return 0; - case ARM64::fixup_arm64_movw: - if (RefKind == ARM64MCExpr::VK_ABS_G3) - return ELF::R_AARCH64_MOVW_UABS_G3; - if (RefKind == ARM64MCExpr::VK_ABS_G2) - return ELF::R_AARCH64_MOVW_UABS_G2; - if (RefKind == ARM64MCExpr::VK_ABS_G2_NC) - return ELF::R_AARCH64_MOVW_UABS_G2_NC; - if (RefKind == ARM64MCExpr::VK_ABS_G1) - return ELF::R_AARCH64_MOVW_UABS_G1; - if (RefKind == ARM64MCExpr::VK_ABS_G1_NC) - return ELF::R_AARCH64_MOVW_UABS_G1_NC; - if (RefKind == ARM64MCExpr::VK_ABS_G0) - return ELF::R_AARCH64_MOVW_UABS_G0; - if (RefKind == ARM64MCExpr::VK_ABS_G0_NC) - return ELF::R_AARCH64_MOVW_UABS_G0_NC; - if (RefKind == ARM64MCExpr::VK_DTPREL_G2) - return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2; - if (RefKind == ARM64MCExpr::VK_DTPREL_G1) - return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1; - if (RefKind == ARM64MCExpr::VK_DTPREL_G1_NC) - return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC; - if (RefKind == ARM64MCExpr::VK_DTPREL_G0) - return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0; - if (RefKind == ARM64MCExpr::VK_DTPREL_G0_NC) - return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC; - if (RefKind == ARM64MCExpr::VK_TPREL_G2) - return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2; - if (RefKind == ARM64MCExpr::VK_TPREL_G1) - return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1; - if (RefKind == ARM64MCExpr::VK_TPREL_G1_NC) - return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC; - if (RefKind == ARM64MCExpr::VK_TPREL_G0) - return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0; - if (RefKind == ARM64MCExpr::VK_TPREL_G0_NC) - return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC; - if (RefKind == ARM64MCExpr::VK_GOTTPREL_G1) - return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1; - if (RefKind == ARM64MCExpr::VK_GOTTPREL_G0_NC) - return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC; - report_fatal_error("invalid fixup for movz/movk instruction"); - return 0; - case ARM64::fixup_arm64_tlsdesc_call: - return ELF::R_AARCH64_TLSDESC_CALL; - default: - llvm_unreachable("Unknown ELF relocation type"); - } - } - - llvm_unreachable("Unimplemented fixup -> relocation"); -} - -MCObjectWriter *llvm::createARM64ELFObjectWriter(raw_ostream &OS, - uint8_t OSABI) { - MCELFObjectTargetWriter *MOTW = new ARM64ELFObjectWriter(OSABI); - return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true); -} diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp deleted file mode 100644 index 97a3493..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp +++ /dev/null @@ -1,158 +0,0 @@ -//===- lib/MC/ARM64ELFStreamer.cpp - ELF Object Output for ARM64 ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file assembles .s files and emits AArch64 ELF .o object files. Different -// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit -// regions of data and code. -// -//===----------------------------------------------------------------------===// - -#include "llvm/MC/MCELFStreamer.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" -#include "llvm/MC/MCELFStreamer.h" -#include "llvm/MC/MCELFSymbolFlags.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCObjectStreamer.h" -#include "llvm/MC/MCSection.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCValue.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ELF.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { - -/// Extend the generic ELFStreamer class so that it can emit mapping symbols at -/// the appropriate points in the object files. These symbols are defined in the -/// AArch64 ELF ABI: -/// infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf -/// -/// In brief: $x or $d should be emitted at the start of each contiguous region -/// of A64 code or data in a section. In practice, this emission does not rely -/// on explicit assembler directives but on inherent properties of the -/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an -/// instruction). -/// -/// As a result this system is orthogonal to the DataRegion infrastructure used -/// by MachO. Beware! -class ARM64ELFStreamer : public MCELFStreamer { -public: - ARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, - MCCodeEmitter *Emitter) - : MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0), - LastEMS(EMS_None) {} - - ~ARM64ELFStreamer() {} - - virtual void ChangeSection(const MCSection *Section, - const MCExpr *Subsection) { - // We have to keep track of the mapping symbol state of any sections we - // use. Each one should start off as EMS_None, which is provided as the - // default constructor by DenseMap::lookup. - LastMappingSymbols[getPreviousSection().first] = LastEMS; - LastEMS = LastMappingSymbols.lookup(Section); - - MCELFStreamer::ChangeSection(Section, Subsection); - } - - /// This function is the one used to emit instruction data into the ELF - /// streamer. We override it to add the appropriate mapping symbol if - /// necessary. - virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) { - EmitA64MappingSymbol(); - MCELFStreamer::EmitInstruction(Inst, STI); - } - - /// This is one of the functions used to emit data into an ELF section, so the - /// ARM64 streamer overrides it to add the appropriate mapping symbol ($d) - /// if necessary. - virtual void EmitBytes(StringRef Data) { - EmitDataMappingSymbol(); - MCELFStreamer::EmitBytes(Data); - } - - /// This is one of the functions used to emit data into an ELF section, so the - /// ARM64 streamer overrides it to add the appropriate mapping symbol ($d) - /// if necessary. - virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) { - EmitDataMappingSymbol(); - MCELFStreamer::EmitValueImpl(Value, Size); - } - -private: - enum ElfMappingSymbol { - EMS_None, - EMS_A64, - EMS_Data - }; - - void EmitDataMappingSymbol() { - if (LastEMS == EMS_Data) - return; - EmitMappingSymbol("$d"); - LastEMS = EMS_Data; - } - - void EmitA64MappingSymbol() { - if (LastEMS == EMS_A64) - return; - EmitMappingSymbol("$x"); - LastEMS = EMS_A64; - } - - void EmitMappingSymbol(StringRef Name) { - MCSymbol *Start = getContext().CreateTempSymbol(); - EmitLabel(Start); - - MCSymbol *Symbol = getContext().GetOrCreateSymbol( - Name + "." + Twine(MappingSymbolCounter++)); - - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - MCELF::SetType(SD, ELF::STT_NOTYPE); - MCELF::SetBinding(SD, ELF::STB_LOCAL); - SD.setExternal(false); - Symbol->setSection(*getCurrentSection().first); - - const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); - Symbol->setVariableValue(Value); - } - - int64_t MappingSymbolCounter; - - DenseMap LastMappingSymbols; - ElfMappingSymbol LastEMS; - - /// @} -}; -} - -namespace llvm { -MCELFStreamer *createARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll, bool NoExecStack) { - ARM64ELFStreamer *S = new ARM64ELFStreamer(Context, TAB, OS, Emitter); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); - if (NoExecStack) - S->getAssembler().setNoExecStack(true); - return S; -} -} diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h b/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h deleted file mode 100644 index 72dadbc..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h +++ /dev/null @@ -1,26 +0,0 @@ -//===-- ARM64ELFStreamer.h - ELF Streamer for ARM64 -------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements ELF streamer information for the ARM64 backend. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_AARCH64_ELF_STREAMER_H -#define LLVM_AARCH64_ELF_STREAMER_H - -#include "llvm/MC/MCELFStreamer.h" - -namespace llvm { - -MCELFStreamer *createARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll, bool NoExecStack); -} - -#endif // ARM64_ELF_STREAMER_H diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h b/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h deleted file mode 100644 index 02eb91f..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h +++ /dev/null @@ -1,72 +0,0 @@ -//===-- ARM64FixupKinds.h - ARM64 Specific Fixup Entries --------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ARM64FIXUPKINDS_H -#define LLVM_ARM64FIXUPKINDS_H - -#include "llvm/MC/MCFixup.h" - -namespace llvm { -namespace ARM64 { - -enum Fixups { - // fixup_arm64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into - // an ADR instruction. - fixup_arm64_pcrel_adr_imm21 = FirstTargetFixupKind, - - // fixup_arm64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into - // an ADRP instruction. - fixup_arm64_pcrel_adrp_imm21, - - // fixup_arm64_imm12 - 12-bit fixup for add/sub instructions. - // No alignment adjustment. All value bits are encoded. - fixup_arm64_add_imm12, - - // fixup_arm64_ldst_imm12_* - unsigned 12-bit fixups for load and - // store instructions. - fixup_arm64_ldst_imm12_scale1, - fixup_arm64_ldst_imm12_scale2, - fixup_arm64_ldst_imm12_scale4, - fixup_arm64_ldst_imm12_scale8, - fixup_arm64_ldst_imm12_scale16, - - // FIXME: comment - fixup_arm64_movw, - - // fixup_arm64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative - // immediate. - fixup_arm64_pcrel_branch14, - - // fixup_arm64_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative - // immediate. Same encoding as fixup_arm64_pcrel_adrhi, except this - // is not used as part of a lo/hi pair and thus generates relocations - // directly when necessary. - fixup_arm64_pcrel_imm19, - - // fixup_arm64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative - // immediate. - fixup_arm64_pcrel_branch26, - - // fixup_arm64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative - // immediate. Distinguished from branch26 only on ELF. - fixup_arm64_pcrel_call26, - - // fixup_arm64_tlsdesc_call - zero-space placeholder for the ELF - // R_AARCH64_TLSDESC_CALL relocation. - fixup_arm64_tlsdesc_call, - - // Marker - LastTargetFixupKind, - NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind -}; - -} // end namespace ARM64 -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp deleted file mode 100644 index 97e0d3c..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp +++ /dev/null @@ -1,92 +0,0 @@ -//===-- ARM64MCAsmInfo.cpp - ARM64 asm properties -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declarations of the ARM64MCAsmInfo properties. -// -//===----------------------------------------------------------------------===// - -#include "ARM64MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/Support/CommandLine.h" -using namespace llvm; - -enum AsmWriterVariantTy { - Default = -1, - Generic = 0, - Apple = 1 -}; - -static cl::opt AsmWriterVariant( - "arm64-neon-syntax", cl::init(Default), - cl::desc("Choose style of NEON code to emit from ARM64 backend:"), - cl::values(clEnumValN(Generic, "generic", "Emit generic NEON assembly"), - clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly"), - clEnumValEnd)); - -ARM64MCAsmInfoDarwin::ARM64MCAsmInfoDarwin() { - // We prefer NEON instructions to be printed in the short form. - AssemblerDialect = AsmWriterVariant == Default ? 1 : AsmWriterVariant; - - PrivateGlobalPrefix = "L"; - SeparatorString = "%%"; - CommentString = ";"; - PointerSize = CalleeSaveStackSlotSize = 8; - - AlignmentIsInBytes = false; - UsesELFSectionDirectiveForBSS = true; - SupportsDebugInformation = true; - UseDataRegionDirectives = true; - - ExceptionsType = ExceptionHandling::DwarfCFI; -} - -const MCExpr *ARM64MCAsmInfoDarwin::getExprForPersonalitySymbol( - const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const { - // On Darwin, we can reference dwarf symbols with foo@GOT-., which - // is an indirect pc-relative reference. The default implementation - // won't reference using the GOT, so we need this target-specific - // version. - MCContext &Context = Streamer.getContext(); - const MCExpr *Res = - MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Context); - MCSymbol *PCSym = Context.CreateTempSymbol(); - Streamer.EmitLabel(PCSym); - const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context); - return MCBinaryExpr::CreateSub(Res, PC, Context); -} - -ARM64MCAsmInfoELF::ARM64MCAsmInfoELF() { - // We prefer NEON instructions to be printed in the short form. - AssemblerDialect = AsmWriterVariant == Default ? 0 : AsmWriterVariant; - - PointerSize = 8; - - // ".comm align is in bytes but .align is pow-2." - AlignmentIsInBytes = false; - - CommentString = "//"; - PrivateGlobalPrefix = ".L"; - Code32Directive = ".code\t32"; - - Data16bitsDirective = "\t.hword\t"; - Data32bitsDirective = "\t.word\t"; - Data64bitsDirective = "\t.xword\t"; - - UseDataRegionDirectives = false; - - WeakRefDirective = "\t.weak\t"; - - HasLEB128 = true; - SupportsDebugInformation = true; - - // Exceptions handling - ExceptionsType = ExceptionHandling::DwarfCFI; -} diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h b/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h deleted file mode 100644 index f2d33a7..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h +++ /dev/null @@ -1,36 +0,0 @@ -//=====-- ARM64MCAsmInfo.h - ARM64 asm properties -----------*- C++ -*--====// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the ARM64MCAsmInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64TARGETASMINFO_H -#define ARM64TARGETASMINFO_H - -#include "llvm/MC/MCAsmInfoDarwin.h" - -namespace llvm { -class Target; -class StringRef; -class MCStreamer; -struct ARM64MCAsmInfoDarwin : public MCAsmInfoDarwin { - explicit ARM64MCAsmInfoDarwin(); - virtual const MCExpr *getExprForPersonalitySymbol(const MCSymbol *Sym, - unsigned Encoding, - MCStreamer &Streamer) const; -}; - -struct ARM64MCAsmInfoELF : public MCAsmInfo { - explicit ARM64MCAsmInfoELF(); -}; - -} // namespace llvm - -#endif diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp deleted file mode 100644 index 19559f8..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp +++ /dev/null @@ -1,563 +0,0 @@ -//===-- ARM64/ARM64MCCodeEmitter.cpp - Convert ARM64 code to machine code -===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ARM64MCCodeEmitter class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "mccodeemitter" -#include "MCTargetDesc/ARM64AddressingModes.h" -#include "MCTargetDesc/ARM64BaseInfo.h" -#include "MCTargetDesc/ARM64FixupKinds.h" -#include "MCTargetDesc/ARM64MCExpr.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInst.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -STATISTIC(MCNumEmitted, "Number of MC instructions emitted."); -STATISTIC(MCNumFixups, "Number of MC fixups created."); - -namespace { - -class ARM64MCCodeEmitter : public MCCodeEmitter { - MCContext &Ctx; - - ARM64MCCodeEmitter(const ARM64MCCodeEmitter &); // DO NOT IMPLEMENT - void operator=(const ARM64MCCodeEmitter &); // DO NOT IMPLEMENT -public: - ARM64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx) - : Ctx(ctx) {} - - ~ARM64MCCodeEmitter() {} - - // getBinaryCodeForInstr - TableGen'erated function for getting the - // binary encoding for an instruction. - uint64_t getBinaryCodeForInstr(const MCInst &MI, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - /// getMachineOpValue - Return binary encoding of operand. If the machine - /// operand requires relocation, record the relocation and return zero. - unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - /// getAMIndexed8OpValue - Return encoding info for base register - /// and 12-bit unsigned immediate attached to a load, store or prfm - /// instruction. If operand requires a relocation, record it and - /// return zero in that part of the encoding. - template - uint32_t getAMIndexed8OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - /// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label - /// target. - uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - /// getAddSubImmOpValue - Return encoding for the 12-bit immediate value and - /// the 2-bit shift field. - uint32_t getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - /// getCondBranchTargetOpValue - Return the encoded value for a conditional - /// branch target. - uint32_t getCondBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - /// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and- - /// branch target. - uint32_t getTestBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - /// getBranchTargetOpValue - Return the encoded value for an unconditional - /// branch target. - uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - /// getMoveWideImmOpValue - Return the encoded value for the immediate operand - /// of a MOVZ or MOVK instruction. - uint32_t getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - /// getVecShifterOpValue - Return the encoded value for the vector shifter. - uint32_t getVecShifterOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - /// getMoveVecShifterOpValue - Return the encoded value for the vector move - /// shifter (MSL). - uint32_t getMoveVecShifterOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - /// getFixedPointScaleOpValue - Return the encoded value for the - // FP-to-fixed-point scale factor. - uint32_t getFixedPointScaleOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - uint32_t getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - uint32_t getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - uint32_t getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - uint32_t getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - uint32_t getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - uint32_t getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - uint32_t getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - uint32_t getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - /// getSIMDShift64OpValue - Return the encoded value for the - // shift-by-immediate AdvSIMD instructions. - uint32_t getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - uint32_t getSIMDShift64_32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - uint32_t getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - uint32_t getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; - - unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue, - const MCSubtargetInfo &STI) const; - - void EmitByte(unsigned char C, raw_ostream &OS) const { OS << (char)C; } - - void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const { - // Output the constant in little endian byte order. - for (unsigned i = 0; i != Size; ++i) { - EmitByte(Val & 255, OS); - Val >>= 8; - } - } - - void EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; -}; - -} // end anonymous namespace - -MCCodeEmitter *llvm::createARM64MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx) { - return new ARM64MCCodeEmitter(MCII, STI, Ctx); -} - -/// getMachineOpValue - Return binary encoding of operand. If the machine -/// operand requires relocation, record the relocation and return zero. -unsigned -ARM64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - if (MO.isReg()) - return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); - else { - assert(MO.isImm() && "did not expect relocated expression"); - return static_cast(MO.getImm()); - } - - assert(0 && "Unable to encode MCOperand!"); - return 0; -} - -template -uint32_t -ARM64MCCodeEmitter::getAMIndexed8OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - unsigned BaseReg = MI.getOperand(OpIdx).getReg(); - BaseReg = Ctx.getRegisterInfo()->getEncodingValue(BaseReg); - - const MCOperand &MO = MI.getOperand(OpIdx + 1); - uint32_t ImmVal = 0; - - if (MO.isImm()) - ImmVal = static_cast(MO.getImm()); - else { - assert(MO.isExpr() && "unable to encode load/store imm operand"); - MCFixupKind Kind = MCFixupKind(FixupKind); - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); - ++MCNumFixups; - } - - return BaseReg | (ImmVal << 5); -} - -/// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label -/// target. -uint32_t -ARM64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - - // If the destination is an immediate, we have nothing to do. - if (MO.isImm()) - return MO.getImm(); - assert(MO.isExpr() && "Unexpected ADR target type!"); - const MCExpr *Expr = MO.getExpr(); - - MCFixupKind Kind = MI.getOpcode() == ARM64::ADR - ? MCFixupKind(ARM64::fixup_arm64_pcrel_adr_imm21) - : MCFixupKind(ARM64::fixup_arm64_pcrel_adrp_imm21); - Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); - - MCNumFixups += 1; - - // All of the information is in the fixup. - return 0; -} - -/// getAddSubImmOpValue - Return encoding for the 12-bit immediate value and -/// the 2-bit shift field. The shift field is stored in bits 13-14 of the -/// return value. -uint32_t -ARM64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - // Suboperands are [imm, shifter]. - const MCOperand &MO = MI.getOperand(OpIdx); - const MCOperand &MO1 = MI.getOperand(OpIdx + 1); - assert(ARM64_AM::getShiftType(MO1.getImm()) == ARM64_AM::LSL && - "unexpected shift type for add/sub immediate"); - unsigned ShiftVal = ARM64_AM::getShiftValue(MO1.getImm()); - assert((ShiftVal == 0 || ShiftVal == 12) && - "unexpected shift value for add/sub immediate"); - if (MO.isImm()) - return MO.getImm() | (ShiftVal == 0 ? 0 : (1 << 12)); - assert(MO.isExpr() && "Unable to encode MCOperand!"); - const MCExpr *Expr = MO.getExpr(); - assert(ShiftVal == 0 && "shift not allowed on add/sub immediate with fixup"); - - // Encode the 12 bits of the fixup. - MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_add_imm12); - Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); - - ++MCNumFixups; - - return 0; -} - -/// getCondBranchTargetOpValue - Return the encoded value for a conditional -/// branch target. -uint32_t ARM64MCCodeEmitter::getCondBranchTargetOpValue( - const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - - // If the destination is an immediate, we have nothing to do. - if (MO.isImm()) - return MO.getImm(); - assert(MO.isExpr() && "Unexpected target type!"); - - MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_imm19); - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); - - ++MCNumFixups; - - // All of the information is in the fixup. - return 0; -} - -uint32_t -ARM64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - - if (MO.isImm()) - return MO.getImm(); - assert(MO.isExpr() && "Unexpected movz/movk immediate"); - - Fixups.push_back(MCFixup::Create( - 0, MO.getExpr(), MCFixupKind(ARM64::fixup_arm64_movw), MI.getLoc())); - - ++MCNumFixups; - - return 0; -} - -/// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and- -/// branch target. -uint32_t ARM64MCCodeEmitter::getTestBranchTargetOpValue( - const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - - // If the destination is an immediate, we have nothing to do. - if (MO.isImm()) - return MO.getImm(); - assert(MO.isExpr() && "Unexpected ADR target type!"); - - MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_branch14); - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); - - ++MCNumFixups; - - // All of the information is in the fixup. - return 0; -} - -/// getBranchTargetOpValue - Return the encoded value for an unconditional -/// branch target. -uint32_t -ARM64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - - // If the destination is an immediate, we have nothing to do. - if (MO.isImm()) - return MO.getImm(); - assert(MO.isExpr() && "Unexpected ADR target type!"); - - MCFixupKind Kind = MI.getOpcode() == ARM64::BL - ? MCFixupKind(ARM64::fixup_arm64_pcrel_call26) - : MCFixupKind(ARM64::fixup_arm64_pcrel_branch26); - Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc())); - - ++MCNumFixups; - - // All of the information is in the fixup. - return 0; -} - -/// getVecShifterOpValue - Return the encoded value for the vector shifter: -/// -/// 00 -> 0 -/// 01 -> 8 -/// 10 -> 16 -/// 11 -> 24 -uint32_t -ARM64MCCodeEmitter::getVecShifterOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the shift amount!"); - - switch (MO.getImm()) { - default: - break; - case 0: - return 0; - case 8: - return 1; - case 16: - return 2; - case 24: - return 3; - } - - assert(false && "Invalid value for vector shift amount!"); - return 0; -} - -uint32_t -ARM64MCCodeEmitter::getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the shift amount!"); - return 64 - (MO.getImm()); -} - -uint32_t -ARM64MCCodeEmitter::getSIMDShift64_32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the shift amount!"); - return 64 - (MO.getImm() | 32); -} - -uint32_t -ARM64MCCodeEmitter::getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the shift amount!"); - return 32 - (MO.getImm() | 16); -} - -uint32_t -ARM64MCCodeEmitter::getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the shift amount!"); - return 16 - (MO.getImm() | 8); -} - -/// getFixedPointScaleOpValue - Return the encoded value for the -// FP-to-fixed-point scale factor. -uint32_t ARM64MCCodeEmitter::getFixedPointScaleOpValue( - const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return 64 - MO.getImm(); -} - -uint32_t -ARM64MCCodeEmitter::getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return 64 - MO.getImm(); -} - -uint32_t -ARM64MCCodeEmitter::getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return 32 - MO.getImm(); -} - -uint32_t -ARM64MCCodeEmitter::getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return 16 - MO.getImm(); -} - -uint32_t -ARM64MCCodeEmitter::getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return 8 - MO.getImm(); -} - -uint32_t -ARM64MCCodeEmitter::getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return MO.getImm() - 64; -} - -uint32_t -ARM64MCCodeEmitter::getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return MO.getImm() - 32; -} - -uint32_t -ARM64MCCodeEmitter::getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return MO.getImm() - 16; -} - -uint32_t -ARM64MCCodeEmitter::getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && "Expected an immediate value for the scale amount!"); - return MO.getImm() - 8; -} - -/// getMoveVecShifterOpValue - Return the encoded value for the vector move -/// shifter (MSL). -uint32_t -ARM64MCCodeEmitter::getMoveVecShifterOpValue(const MCInst &MI, unsigned OpIdx, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpIdx); - assert(MO.isImm() && - "Expected an immediate value for the move shift amount!"); - unsigned ShiftVal = ARM64_AM::getShiftValue(MO.getImm()); - assert((ShiftVal == 8 || ShiftVal == 16) && "Invalid shift amount!"); - return ShiftVal == 8 ? 0 : 1; -} - -unsigned ARM64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue, - const MCSubtargetInfo &STI) const { - // If one of the signed fixup kinds is applied to a MOVZ instruction, the - // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's - // job to ensure that any bits possibly affected by this are 0. This means we - // must zero out bit 30 (essentially emitting a MOVN). - MCOperand UImm16MO = MI.getOperand(1); - - // Nothing to do if there's no fixup. - if (UImm16MO.isImm()) - return EncodedValue; - - return EncodedValue & ~(1u << 30); -} - -void ARM64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - if (MI.getOpcode() == ARM64::TLSDESCCALL) { - // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the - // following (BLR) instruction. It doesn't emit any code itself so it - // doesn't go through the normal TableGenerated channels. - MCFixupKind Fixup = MCFixupKind(ARM64::fixup_arm64_tlsdesc_call); - Fixups.push_back(MCFixup::Create(0, MI.getOperand(0).getExpr(), Fixup)); - return; - } - - uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); - EmitConstant(Binary, 4, OS); - ++MCNumEmitted; // Keep track of the # of mi's emitted. -} - -#include "ARM64GenMCCodeEmitter.inc" diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp deleted file mode 100644 index d4ab140..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp +++ /dev/null @@ -1,168 +0,0 @@ -//===-- ARM64MCExpr.cpp - ARM64 specific MC expression classes --------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the implementation of the assembly expression modifiers -// accepted by the AArch64 architecture (e.g. ":lo12:", ":gottprel_g1:", ...). -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "aarch64symbolrefexpr" -#include "ARM64MCExpr.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELF.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MCValue.h" -#include "llvm/Object/ELF.h" -#include "llvm/Support/ErrorHandling.h" - -using namespace llvm; - -const ARM64MCExpr *ARM64MCExpr::Create(const MCExpr *Expr, VariantKind Kind, - MCContext &Ctx) { - return new (Ctx) ARM64MCExpr(Expr, Kind); -} - -StringRef ARM64MCExpr::getVariantKindName() const { - switch (static_cast(getKind())) { - case VK_CALL: return ""; - case VK_LO12: return ":lo12:"; - case VK_ABS_G3: return ":abs_g3:"; - case VK_ABS_G2: return ":abs_g2:"; - case VK_ABS_G2_NC: return ":abs_g2_nc:"; - case VK_ABS_G1: return ":abs_g1:"; - case VK_ABS_G1_NC: return ":abs_g1_nc:"; - case VK_ABS_G0: return ":abs_g0:"; - case VK_ABS_G0_NC: return ":abs_g0_nc:"; - case VK_DTPREL_G2: return ":dtprel_g2:"; - case VK_DTPREL_G1: return ":dtprel_g1:"; - case VK_DTPREL_G1_NC: return ":dtprel_g1_nc:"; - case VK_DTPREL_G0: return ":dtprel_g0:"; - case VK_DTPREL_G0_NC: return ":dtprel_g0_nc:"; - case VK_DTPREL_LO12: return ":dtprel_lo12:"; - case VK_DTPREL_LO12_NC: return ":dtprel_lo12_nc:"; - case VK_TPREL_G2: return ":tprel_g2:"; - case VK_TPREL_G1: return ":tprel_g1:"; - case VK_TPREL_G1_NC: return ":tprel_g1_nc:"; - case VK_TPREL_G0: return ":tprel_g0:"; - case VK_TPREL_G0_NC: return ":tprel_g0_nc:"; - case VK_TPREL_LO12: return ":tprel_lo12:"; - case VK_TPREL_LO12_NC: return ":tprel_lo12_nc:"; - case VK_TLSDESC_LO12: return ":tlsdesc_lo12:"; - case VK_ABS_PAGE: return ""; - case VK_GOT_PAGE: return ":got:"; - case VK_GOT_LO12: return ":got_lo12:"; - case VK_GOTTPREL_PAGE: return ":gottprel:"; - case VK_GOTTPREL_LO12_NC: return ":gottprel_lo12:"; - case VK_GOTTPREL_G1: return ":gottprel_g1:"; - case VK_GOTTPREL_G0_NC: return ":gottprel_g0_nc:"; - case VK_TLSDESC: return ""; - case VK_TLSDESC_PAGE: return ":tlsdesc:"; - default: - llvm_unreachable("Invalid ELF symbol kind"); - } -} - -void ARM64MCExpr::PrintImpl(raw_ostream &OS) const { - if (getKind() != VK_NONE) - OS << getVariantKindName(); - OS << *Expr; -} - -// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps -// that method should be made public? -// FIXME: really do above: now that two backends are using it. -static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) { - switch (Value->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle nested target expr!"); - break; - - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast(Value); - AddValueSymbolsImpl(BE->getLHS(), Asm); - AddValueSymbolsImpl(BE->getRHS(), Asm); - break; - } - - case MCExpr::SymbolRef: - Asm->getOrCreateSymbolData(cast(Value)->getSymbol()); - break; - - case MCExpr::Unary: - AddValueSymbolsImpl(cast(Value)->getSubExpr(), Asm); - break; - } -} - -void ARM64MCExpr::AddValueSymbols(MCAssembler *Asm) const { - AddValueSymbolsImpl(getSubExpr(), Asm); -} - -const MCSection *ARM64MCExpr::FindAssociatedSection() const { - llvm_unreachable("FIXME: what goes here?"); -} - -bool ARM64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const { - if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout)) - return false; - - Res = - MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind()); - - return true; -} - -static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) { - switch (Expr->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle nested target expression"); - break; - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast(Expr); - fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm); - fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm); - break; - } - - case MCExpr::SymbolRef: { - // We're known to be under a TLS fixup, so any symbol should be - // modified. There should be only one. - const MCSymbolRefExpr &SymRef = *cast(Expr); - MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol()); - MCELF::SetType(SD, ELF::STT_TLS); - break; - } - - case MCExpr::Unary: - fixELFSymbolsInTLSFixupsImpl(cast(Expr)->getSubExpr(), Asm); - break; - } -} - -void ARM64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { - switch (getSymbolLoc(Kind)) { - default: - return; - case VK_DTPREL: - case VK_GOTTPREL: - case VK_TPREL: - case VK_TLSDESC: - break; - } - - fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); -} diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h b/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h deleted file mode 100644 index a33fe43..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h +++ /dev/null @@ -1,162 +0,0 @@ -//=---- ARM64MCExpr.h - ARM64 specific MC expression classes ------*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes ARM64-specific MCExprs, used for modifiers like -// ":lo12:" or ":gottprel_g1:". -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ARM64MCEXPR_H -#define LLVM_ARM64MCEXPR_H - -#include "llvm/MC/MCExpr.h" -#include "llvm/Support/ErrorHandling.h" - -namespace llvm { - -class ARM64MCExpr : public MCTargetExpr { -public: - enum VariantKind { - VK_NONE = 0x000, - - // Symbol locations specifying (roughly speaking) what calculation should be - // performed to construct the final address for the relocated - // symbol. E.g. direct, via the GOT, ... - VK_ABS = 0x001, - VK_SABS = 0x002, - VK_GOT = 0x003, - VK_DTPREL = 0x004, - VK_GOTTPREL = 0x005, - VK_TPREL = 0x006, - VK_TLSDESC = 0x007, - VK_SymLocBits = 0x00f, - - // Variants specifying which part of the final address calculation is - // used. E.g. the low 12 bits for an ADD/LDR, the middle 16 bits for a - // MOVZ/MOVK. - VK_PAGE = 0x010, - VK_PAGEOFF = 0x020, - VK_G0 = 0x030, - VK_G1 = 0x040, - VK_G2 = 0x050, - VK_G3 = 0x060, - VK_AddressFragBits = 0x0f0, - - // Whether the final relocation is a checked one (where a linker should - // perform a range-check on the final address) or not. Note that this field - // is unfortunately sometimes omitted from the assembly syntax. E.g. :lo12: - // on its own is a non-checked relocation. We side with ELF on being - // explicit about this! - VK_NC = 0x100, - - // Convenience definitions for referring to specific textual representations - // of relocation specifiers. Note that this means the "_NC" is sometimes - // omitted in line with assembly syntax here (VK_LO12 rather than VK_LO12_NC - // since a user would write ":lo12:"). - VK_CALL = VK_ABS, - VK_ABS_PAGE = VK_ABS | VK_PAGE, - VK_ABS_G3 = VK_ABS | VK_G3, - VK_ABS_G2 = VK_ABS | VK_G2, - VK_ABS_G2_NC = VK_ABS | VK_G2 | VK_NC, - VK_ABS_G1 = VK_ABS | VK_G1, - VK_ABS_G1_NC = VK_ABS | VK_G1 | VK_NC, - VK_ABS_G0 = VK_ABS | VK_G0, - VK_ABS_G0_NC = VK_ABS | VK_G0 | VK_NC, - VK_LO12 = VK_ABS | VK_PAGEOFF | VK_NC, - VK_GOT_LO12 = VK_GOT | VK_PAGEOFF | VK_NC, - VK_GOT_PAGE = VK_GOT | VK_PAGE, - VK_DTPREL_G2 = VK_DTPREL | VK_G2, - VK_DTPREL_G1 = VK_DTPREL | VK_G1, - VK_DTPREL_G1_NC = VK_DTPREL | VK_G1 | VK_NC, - VK_DTPREL_G0 = VK_DTPREL | VK_G0, - VK_DTPREL_G0_NC = VK_DTPREL | VK_G0 | VK_NC, - VK_DTPREL_LO12 = VK_DTPREL | VK_PAGEOFF, - VK_DTPREL_LO12_NC = VK_DTPREL | VK_PAGEOFF | VK_NC, - VK_GOTTPREL_PAGE = VK_GOTTPREL | VK_PAGE, - VK_GOTTPREL_LO12_NC = VK_GOTTPREL | VK_PAGEOFF | VK_NC, - VK_GOTTPREL_G1 = VK_GOTTPREL | VK_G1, - VK_GOTTPREL_G0_NC = VK_GOTTPREL | VK_G0 | VK_NC, - VK_TPREL_G2 = VK_TPREL | VK_G2, - VK_TPREL_G1 = VK_TPREL | VK_G1, - VK_TPREL_G1_NC = VK_TPREL | VK_G1 | VK_NC, - VK_TPREL_G0 = VK_TPREL | VK_G0, - VK_TPREL_G0_NC = VK_TPREL | VK_G0 | VK_NC, - VK_TPREL_LO12 = VK_TPREL | VK_PAGEOFF, - VK_TPREL_LO12_NC = VK_TPREL | VK_PAGEOFF | VK_NC, - VK_TLSDESC_LO12 = VK_TLSDESC | VK_PAGEOFF | VK_NC, - VK_TLSDESC_PAGE = VK_TLSDESC | VK_PAGE, - - VK_INVALID = 0xfff - }; - -private: - const MCExpr *Expr; - const VariantKind Kind; - - explicit ARM64MCExpr(const MCExpr *Expr, VariantKind Kind) - : Expr(Expr), Kind(Kind) {} - -public: - /// @name Construction - /// @{ - - static const ARM64MCExpr *Create(const MCExpr *Expr, VariantKind Kind, - MCContext &Ctx); - - /// @} - /// @name Accessors - /// @{ - - /// Get the kind of this expression. - VariantKind getKind() const { return static_cast(Kind); } - - /// Get the expression this modifier applies to. - const MCExpr *getSubExpr() const { return Expr; } - - /// @} - /// @name VariantKind information extractors. - /// @{ - - static VariantKind getSymbolLoc(VariantKind Kind) { - return static_cast(Kind & VK_SymLocBits); - } - - static VariantKind getAddressFrag(VariantKind Kind) { - return static_cast(Kind & VK_AddressFragBits); - } - - static bool isNotChecked(VariantKind Kind) { return Kind & VK_NC; } - - /// @} - - /// Convert the variant kind into an ELF-appropriate modifier - /// (e.g. ":got:", ":lo12:"). - StringRef getVariantKindName() const; - - void PrintImpl(raw_ostream &OS) const; - - void AddValueSymbols(MCAssembler *) const; - - const MCSection *FindAssociatedSection() const; - - bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const; - - void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const; - - static bool classof(const MCExpr *E) { - return E->getKind() == MCExpr::Target; - } - - static bool classof(const ARM64MCExpr *) { return true; } - -}; -} // end namespace llvm - -#endif diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp deleted file mode 100644 index 8d54412..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp +++ /dev/null @@ -1,167 +0,0 @@ -//===-- ARM64MCTargetDesc.cpp - ARM64 Target Descriptions -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides ARM64 specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#include "ARM64MCTargetDesc.h" -#include "ARM64ELFStreamer.h" -#include "ARM64MCAsmInfo.h" -#include "InstPrinter/ARM64InstPrinter.h" -#include "llvm/MC/MCCodeGenInfo.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_INSTRINFO_MC_DESC -#include "ARM64GenInstrInfo.inc" - -#define GET_SUBTARGETINFO_MC_DESC -#include "ARM64GenSubtargetInfo.inc" - -#define GET_REGINFO_MC_DESC -#include "ARM64GenRegisterInfo.inc" - -using namespace llvm; - -static MCInstrInfo *createARM64MCInstrInfo() { - MCInstrInfo *X = new MCInstrInfo(); - InitARM64MCInstrInfo(X); - return X; -} - -static MCSubtargetInfo *createARM64MCSubtargetInfo(StringRef TT, StringRef CPU, - StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitARM64MCSubtargetInfo(X, TT, CPU, FS); - return X; -} - -static MCRegisterInfo *createARM64MCRegisterInfo(StringRef Triple) { - MCRegisterInfo *X = new MCRegisterInfo(); - InitARM64MCRegisterInfo(X, ARM64::LR); - return X; -} - -static MCAsmInfo *createARM64MCAsmInfo(const MCRegisterInfo &MRI, - StringRef TT) { - Triple TheTriple(TT); - - MCAsmInfo *MAI; - if (TheTriple.isOSDarwin()) - MAI = new ARM64MCAsmInfoDarwin(); - else { - assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF"); - MAI = new ARM64MCAsmInfoELF(); - } - - // Initial state of the frame pointer is SP. - unsigned Reg = MRI.getDwarfRegNum(ARM64::SP, true); - MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, Reg, 0); - MAI->addInitialFrameState(Inst); - - return MAI; -} - -static MCCodeGenInfo *createARM64MCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) { - Triple TheTriple(TT); - assert((TheTriple.isOSBinFormatELF() || TheTriple.isOSBinFormatMachO()) && - "Only expect Darwin and ELF targets"); - - if (CM == CodeModel::Default) - CM = CodeModel::Small; - // The default MCJIT memory managers make no guarantees about where they can - // find an executable page; JITed code needs to be able to refer to globals - // no matter how far away they are. - else if (CM == CodeModel::JITDefault) - CM = CodeModel::Large; - else if (CM != CodeModel::Small && CM != CodeModel::Large) - report_fatal_error("Only small and large code models are allowed on ARM64"); - - // ARM64 Darwin is always PIC. - if (TheTriple.isOSDarwin()) - RM = Reloc::PIC_; - // On ELF platforms the default static relocation model has a smart enough - // linker to cope with referencing external symbols defined in a shared - // library. Hence DynamicNoPIC doesn't need to be promoted to PIC. - else if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) - RM = Reloc::Static; - - MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(RM, CM, OL); - return X; -} - -static MCInstPrinter *createARM64MCInstPrinter(const Target &T, - unsigned SyntaxVariant, - const MCAsmInfo &MAI, - const MCInstrInfo &MII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI) { - if (SyntaxVariant == 0) - return new ARM64InstPrinter(MAI, MII, MRI, STI); - if (SyntaxVariant == 1) - return new ARM64AppleInstPrinter(MAI, MII, MRI, STI); - - return 0; -} - -static MCStreamer *createMCStreamer(const Target &T, StringRef TT, - MCContext &Ctx, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, bool RelaxAll, - bool NoExecStack) { - Triple TheTriple(TT); - - if (TheTriple.isOSDarwin()) - return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll, - /*LabelSections*/ true); - - return createARM64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack); -} - -// Force static initialization. -extern "C" void LLVMInitializeARM64TargetMC() { - // Register the MC asm info. - RegisterMCAsmInfoFn X(TheARM64Target, createARM64MCAsmInfo); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheARM64Target, - createARM64MCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheARM64Target, createARM64MCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheARM64Target, createARM64MCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheARM64Target, - createARM64MCSubtargetInfo); - - // Register the asm backend. - TargetRegistry::RegisterMCAsmBackend(TheARM64Target, createARM64AsmBackend); - - // Register the MC Code Emitter - TargetRegistry::RegisterMCCodeEmitter(TheARM64Target, - createARM64MCCodeEmitter); - - // Register the object streamer. - TargetRegistry::RegisterMCObjectStreamer(TheARM64Target, createMCStreamer); - - // Register the MCInstPrinter. - TargetRegistry::RegisterMCInstPrinter(TheARM64Target, - createARM64MCInstPrinter); -} diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h b/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h deleted file mode 100644 index 0db2b22..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h +++ /dev/null @@ -1,62 +0,0 @@ -//===-- ARM64MCTargetDesc.h - ARM64 Target Descriptions ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides ARM64 specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#ifndef ARM64MCTARGETDESC_H -#define ARM64MCTARGETDESC_H - -#include "llvm/Support/DataTypes.h" -#include - -namespace llvm { -class MCAsmBackend; -class MCCodeEmitter; -class MCContext; -class MCInstrInfo; -class MCRegisterInfo; -class MCObjectWriter; -class MCSubtargetInfo; -class StringRef; -class Target; -class raw_ostream; - -extern Target TheARM64Target; - -MCCodeEmitter *createARM64MCCodeEmitter(const MCInstrInfo &MCII, - const MCRegisterInfo &MRI, - const MCSubtargetInfo &STI, - MCContext &Ctx); -MCAsmBackend *createARM64AsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU); - -MCObjectWriter *createARM64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI); - -MCObjectWriter *createARM64MachObjectWriter(raw_ostream &OS, uint32_t CPUType, - uint32_t CPUSubtype); - -} // End llvm namespace - -// Defines symbolic names for ARM64 registers. This defines a mapping from -// register name to register number. -// -#define GET_REGINFO_ENUM -#include "ARM64GenRegisterInfo.inc" - -// Defines symbolic names for the ARM64 instructions. -// -#define GET_INSTRINFO_ENUM -#include "ARM64GenInstrInfo.inc" - -#define GET_SUBTARGETINFO_ENUM -#include "ARM64GenSubtargetInfo.inc" - -#endif diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp deleted file mode 100644 index 1733dc5..0000000 --- a/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp +++ /dev/null @@ -1,396 +0,0 @@ -//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "MCTargetDesc/ARM64FixupKinds.h" -#include "MCTargetDesc/ARM64MCTargetDesc.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCAsmLayout.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCFixup.h" -#include "llvm/MC/MCMachObjectWriter.h" -#include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCValue.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" -using namespace llvm; - -namespace { -class ARM64MachObjectWriter : public MCMachObjectTargetWriter { - bool getARM64FixupKindMachOInfo(const MCFixup &Fixup, unsigned &RelocType, - const MCSymbolRefExpr *Sym, - unsigned &Log2Size, const MCAssembler &Asm); - -public: - ARM64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype) - : MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype, - /*UseAggressiveSymbolFolding=*/true) {} - - void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, - const MCAsmLayout &Layout, const MCFragment *Fragment, - const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue); -}; -} - -bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo( - const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr *Sym, - unsigned &Log2Size, const MCAssembler &Asm) { - RelocType = unsigned(MachO::ARM64_RELOC_UNSIGNED); - Log2Size = ~0U; - - switch ((unsigned)Fixup.getKind()) { - default: - return false; - - case FK_Data_1: - Log2Size = llvm::Log2_32(1); - return true; - case FK_Data_2: - Log2Size = llvm::Log2_32(2); - return true; - case FK_Data_4: - Log2Size = llvm::Log2_32(4); - if (Sym->getKind() == MCSymbolRefExpr::VK_GOT) - RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT); - return true; - case FK_Data_8: - Log2Size = llvm::Log2_32(8); - if (Sym->getKind() == MCSymbolRefExpr::VK_GOT) - RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT); - return true; - case ARM64::fixup_arm64_add_imm12: - case ARM64::fixup_arm64_ldst_imm12_scale1: - case ARM64::fixup_arm64_ldst_imm12_scale2: - case ARM64::fixup_arm64_ldst_imm12_scale4: - case ARM64::fixup_arm64_ldst_imm12_scale8: - case ARM64::fixup_arm64_ldst_imm12_scale16: - Log2Size = llvm::Log2_32(4); - switch (Sym->getKind()) { - default: - assert(0 && "Unexpected symbol reference variant kind!"); - case MCSymbolRefExpr::VK_PAGEOFF: - RelocType = unsigned(MachO::ARM64_RELOC_PAGEOFF12); - return true; - case MCSymbolRefExpr::VK_GOTPAGEOFF: - RelocType = unsigned(MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12); - return true; - case MCSymbolRefExpr::VK_TLVPPAGEOFF: - RelocType = unsigned(MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12); - return true; - } - case ARM64::fixup_arm64_pcrel_adrp_imm21: - Log2Size = llvm::Log2_32(4); - // This encompasses the relocation for the whole 21-bit value. - switch (Sym->getKind()) { - default: - Asm.getContext().FatalError(Fixup.getLoc(), - "ADR/ADRP relocations must be GOT relative"); - case MCSymbolRefExpr::VK_PAGE: - RelocType = unsigned(MachO::ARM64_RELOC_PAGE21); - return true; - case MCSymbolRefExpr::VK_GOTPAGE: - RelocType = unsigned(MachO::ARM64_RELOC_GOT_LOAD_PAGE21); - return true; - case MCSymbolRefExpr::VK_TLVPPAGE: - RelocType = unsigned(MachO::ARM64_RELOC_TLVP_LOAD_PAGE21); - return true; - } - return true; - case ARM64::fixup_arm64_pcrel_branch26: - case ARM64::fixup_arm64_pcrel_call26: - Log2Size = llvm::Log2_32(4); - RelocType = unsigned(MachO::ARM64_RELOC_BRANCH26); - return true; - } -} - -void ARM64MachObjectWriter::RecordRelocation( - MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue) { - unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); - - // See . - uint32_t FixupOffset = Layout.getFragmentOffset(Fragment); - unsigned Log2Size = 0; - int64_t Value = 0; - unsigned Index = 0; - unsigned IsExtern = 0; - unsigned Type = 0; - unsigned Kind = Fixup.getKind(); - - FixupOffset += Fixup.getOffset(); - - // ARM64 pcrel relocation addends do not include the section offset. - if (IsPCRel) - FixedValue += FixupOffset; - - // ADRP fixups use relocations for the whole symbol value and only - // put the addend in the instruction itself. Clear out any value the - // generic code figured out from the sybmol definition. - if (Kind == ARM64::fixup_arm64_pcrel_adrp_imm21 || - Kind == ARM64::fixup_arm64_pcrel_imm19) - FixedValue = 0; - - // imm19 relocations are for conditional branches, which require - // assembler local symbols. If we got here, that's not what we have, - // so complain loudly. - if (Kind == ARM64::fixup_arm64_pcrel_imm19) { - Asm.getContext().FatalError(Fixup.getLoc(), - "conditional branch requires assembler-local" - " label. '" + - Target.getSymA()->getSymbol().getName() + - "' is external."); - return; - } - - // 14-bit branch relocations should only target internal labels, and so - // should never get here. - if (Kind == ARM64::fixup_arm64_pcrel_branch14) { - Asm.getContext().FatalError(Fixup.getLoc(), - "Invalid relocation on conditional branch!"); - return; - } - - if (!getARM64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size, - Asm)) { - Asm.getContext().FatalError(Fixup.getLoc(), "unknown ARM64 fixup kind!"); - return; - } - - Value = Target.getConstant(); - - if (Target.isAbsolute()) { // constant - // FIXME: Should this always be extern? - // SymbolNum of 0 indicates the absolute section. - Type = MachO::ARM64_RELOC_UNSIGNED; - Index = 0; - - if (IsPCRel) { - IsExtern = 1; - Asm.getContext().FatalError(Fixup.getLoc(), - "PC relative absolute relocation!"); - - // FIXME: x86_64 sets the type to a branch reloc here. Should we do - // something similar? - } - } else if (Target.getSymB()) { // A - B + constant - const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData &A_SD = Asm.getSymbolData(*A); - const MCSymbolData *A_Base = Asm.getAtom(&A_SD); - - const MCSymbol *B = &Target.getSymB()->getSymbol(); - MCSymbolData &B_SD = Asm.getSymbolData(*B); - const MCSymbolData *B_Base = Asm.getAtom(&B_SD); - - // Check for "_foo@got - .", which comes through here as: - // Ltmp0: - // ... _foo@got - Ltmp0 - if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOT && - Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None && - Layout.getSymbolOffset(&B_SD) == - Layout.getFragmentOffset(Fragment) + Fixup.getOffset()) { - // SymB is the PC, so use a PC-rel pointer-to-GOT relocation. - Index = A_Base->getIndex(); - IsExtern = 1; - Type = MachO::ARM64_RELOC_POINTER_TO_GOT; - IsPCRel = 1; - MachO::any_relocation_info MRE; - MRE.r_word0 = FixupOffset; - MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | - (IsExtern << 27) | (Type << 28)); - Writer->addRelocation(Fragment->getParent(), MRE); - return; - } else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || - Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) - // Otherwise, neither symbol can be modified. - Asm.getContext().FatalError(Fixup.getLoc(), - "unsupported relocation of modified symbol"); - - // We don't support PCrel relocations of differences. - if (IsPCRel) - Asm.getContext().FatalError(Fixup.getLoc(), - "unsupported pc-relative relocation of " - "difference"); - - // ARM64 always uses external relocations. If there is no symbol to use as - // a base address (a local symbol with no preceeding non-local symbol), - // error out. - // - // FIXME: We should probably just synthesize an external symbol and use - // that. - if (!A_Base) - Asm.getContext().FatalError( - Fixup.getLoc(), - "unsupported relocation of local symbol '" + A->getName() + - "'. Must have non-local symbol earlier in section."); - if (!B_Base) - Asm.getContext().FatalError( - Fixup.getLoc(), - "unsupported relocation of local symbol '" + B->getName() + - "'. Must have non-local symbol earlier in section."); - - if (A_Base == B_Base && A_Base) - Asm.getContext().FatalError(Fixup.getLoc(), - "unsupported relocation with identical base"); - - Value += (A_SD.getFragment() == NULL ? 0 : Writer->getSymbolAddress( - &A_SD, Layout)) - - (A_Base == NULL || A_Base->getFragment() == NULL - ? 0 - : Writer->getSymbolAddress(A_Base, Layout)); - Value -= (B_SD.getFragment() == NULL ? 0 : Writer->getSymbolAddress( - &B_SD, Layout)) - - (B_Base == NULL || B_Base->getFragment() == NULL - ? 0 - : Writer->getSymbolAddress(B_Base, Layout)); - - Index = A_Base->getIndex(); - IsExtern = 1; - Type = MachO::ARM64_RELOC_UNSIGNED; - - MachO::any_relocation_info MRE; - MRE.r_word0 = FixupOffset; - MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | - (IsExtern << 27) | (Type << 28)); - Writer->addRelocation(Fragment->getParent(), MRE); - - Index = B_Base->getIndex(); - IsExtern = 1; - Type = MachO::ARM64_RELOC_SUBTRACTOR; - } else { // A + constant - const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); - MCSymbolData &SD = Asm.getSymbolData(*Symbol); - const MCSymbolData *Base = Asm.getAtom(&SD); - const MCSectionMachO &Section = static_cast( - Fragment->getParent()->getSection()); - - // If the symbol is a variable and we weren't able to get a Base for it - // (i.e., it's not in the symbol table associated with a section) resolve - // the relocation based its expansion instead. - if (Symbol->isVariable() && !Base) { - // If the evaluation is an absolute value, just use that directly - // to keep things easy. - int64_t Res; - if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute( - Res, Layout, Writer->getSectionAddressMap())) { - FixedValue = Res; - return; - } - - // FIXME: Will the Target we already have ever have any data in it - // we need to preserve and merge with the new Target? How about - // the FixedValue? - if (!Symbol->getVariableValue()->EvaluateAsRelocatable(Target, &Layout)) - Asm.getContext().FatalError(Fixup.getLoc(), - "unable to resolve variable '" + - Symbol->getName() + "'"); - return RecordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, - FixedValue); - } - - // Relocations inside debug sections always use local relocations when - // possible. This seems to be done because the debugger doesn't fully - // understand relocation entries and expects to find values that - // have already been fixed up. - if (Symbol->isInSection()) { - if (Section.hasAttribute(MachO::S_ATTR_DEBUG)) - Base = 0; - } - - // ARM64 uses external relocations as much as possible. For debug sections, - // and for pointer-sized relocations (.quad), we allow section relocations. - // It's code sections that run into trouble. - if (Base) { - Index = Base->getIndex(); - IsExtern = 1; - - // Add the local offset, if needed. - if (Base != &SD) - Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base); - } else if (Symbol->isInSection()) { - // Pointer-sized relocations can use a local relocation. Otherwise, - // we have to be in a debug info section. - if (!Section.hasAttribute(MachO::S_ATTR_DEBUG) && Log2Size != 3) - Asm.getContext().FatalError( - Fixup.getLoc(), - "unsupported relocation of local symbol '" + Symbol->getName() + - "'. Must have non-local symbol earlier in section."); - // Adjust the relocation to be section-relative. - // The index is the section ordinal (1-based). - const MCSectionData &SymSD = - Asm.getSectionData(SD.getSymbol().getSection()); - Index = SymSD.getOrdinal() + 1; - IsExtern = 0; - Value += Writer->getSymbolAddress(&SD, Layout); - - if (IsPCRel) - Value -= Writer->getFragmentAddress(Fragment, Layout) + - Fixup.getOffset() + (1ULL << Log2Size); - } else { - // Resolve constant variables. - if (SD.getSymbol().isVariable()) { - int64_t Res; - if (SD.getSymbol().getVariableValue()->EvaluateAsAbsolute( - Res, Layout, Writer->getSectionAddressMap())) { - FixedValue = Res; - return; - } - } - Asm.getContext().FatalError(Fixup.getLoc(), - "unsupported relocation of variable '" + - Symbol->getName() + "'"); - } - } - - // If the relocation kind is Branch26, Page21, or Pageoff12, any addend - // is represented via an Addend relocation, not encoded directly into - // the instruction. - if ((Type == MachO::ARM64_RELOC_BRANCH26 || - Type == MachO::ARM64_RELOC_PAGE21 || - Type == MachO::ARM64_RELOC_PAGEOFF12) && - Value) { - assert((Value & 0xff000000) == 0 && "Added relocation out of range!"); - - MachO::any_relocation_info MRE; - MRE.r_word0 = FixupOffset; - MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | - (IsExtern << 27) | (Type << 28)); - Writer->addRelocation(Fragment->getParent(), MRE); - - // Now set up the Addend relocation. - Type = MachO::ARM64_RELOC_ADDEND; - Index = Value; - IsPCRel = 0; - Log2Size = 2; - IsExtern = 0; - - // Put zero into the instruction itself. The addend is in the relocation. - Value = 0; - } - - // If there's any addend left to handle, encode it in the instruction. - FixedValue = Value; - - // struct relocation_info (8 bytes) - MachO::any_relocation_info MRE; - MRE.r_word0 = FixupOffset; - MRE.r_word1 = ((Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | - (IsExtern << 27) | (Type << 28)); - Writer->addRelocation(Fragment->getParent(), MRE); -} - -MCObjectWriter *llvm::createARM64MachObjectWriter(raw_ostream &OS, - uint32_t CPUType, - uint32_t CPUSubtype) { - return createMachObjectWriter(new ARM64MachObjectWriter(CPUType, CPUSubtype), - OS, /*IsLittleEndian=*/true); -} diff --git a/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index f8665bc..0000000 --- a/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -add_llvm_library(LLVMARM64Desc - ARM64AsmBackend.cpp - ARM64ELFObjectWriter.cpp - ARM64ELFStreamer.cpp - ARM64MCAsmInfo.cpp - ARM64MCCodeEmitter.cpp - ARM64MCExpr.cpp - ARM64MCTargetDesc.cpp - ARM64MachObjectWriter.cpp -) -add_dependencies(LLVMARM64Desc ARM64CommonTableGen) - -# Hack: we need to include 'main' target directory to grab private headers -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt deleted file mode 100644 index e4c74d2..0000000 --- a/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt +++ /dev/null @@ -1,24 +0,0 @@ -;===- ./lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = ARM64Desc -parent = ARM64 -required_libraries = ARM64AsmPrinter ARM64Info MC Support -add_to_library_groups = ARM64 - diff --git a/lib/Target/ARM64/MCTargetDesc/Makefile b/lib/Target/ARM64/MCTargetDesc/Makefile deleted file mode 100644 index 013cc63..0000000 --- a/lib/Target/ARM64/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/ARM64/TargetDesc/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMARM64Desc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM64/Makefile b/lib/Target/ARM64/Makefile deleted file mode 100644 index 5f0f307..0000000 --- a/lib/Target/ARM64/Makefile +++ /dev/null @@ -1,25 +0,0 @@ -##===- lib/Target/ARM64/Makefile ---------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMARM64CodeGen -TARGET = ARM64 - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = ARM64GenRegisterInfo.inc ARM64GenInstrInfo.inc \ - ARM64GenAsmWriter.inc ARM64GenAsmWriter1.inc \ - ARM64GenDAGISel.inc \ - ARM64GenCallingConv.inc ARM64GenAsmMatcher.inc \ - ARM64GenSubtargetInfo.inc ARM64GenMCCodeEmitter.inc \ - ARM64GenFastISel.inc ARM64GenDisassemblerTables.inc \ - ARM64GenMCPseudoLowering.inc - -DIRS = TargetInfo InstPrinter AsmParser Disassembler MCTargetDesc - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp b/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp deleted file mode 100644 index dec09ed..0000000 --- a/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp +++ /dev/null @@ -1,21 +0,0 @@ -//===-- ARM64TargetInfo.cpp - ARM64 Target Implementation -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/Triple.h" -#include "llvm/Support/TargetRegistry.h" -using namespace llvm; - -namespace llvm { -Target TheARM64Target; -} // end namespace llvm - -extern "C" void LLVMInitializeARM64TargetInfo() { - RegisterTarget X(TheARM64Target, "arm64", - "ARM64"); -} diff --git a/lib/Target/ARM64/TargetInfo/CMakeLists.txt b/lib/Target/ARM64/TargetInfo/CMakeLists.txt deleted file mode 100644 index a0142c4..0000000 --- a/lib/Target/ARM64/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMARM64Info - ARM64TargetInfo.cpp - ) - -add_dependencies(LLVMARM64Info ARM64CommonTableGen) diff --git a/lib/Target/ARM64/TargetInfo/LLVMBuild.txt b/lib/Target/ARM64/TargetInfo/LLVMBuild.txt deleted file mode 100644 index 5bea694..0000000 --- a/lib/Target/ARM64/TargetInfo/LLVMBuild.txt +++ /dev/null @@ -1,24 +0,0 @@ -;===- ./lib/Target/ARM64/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = ARM64Info -parent = ARM64 -required_libraries = MC Support -add_to_library_groups = ARM64 - diff --git a/lib/Target/ARM64/TargetInfo/Makefile b/lib/Target/ARM64/TargetInfo/Makefile deleted file mode 100644 index 2d5a1a0..0000000 --- a/lib/Target/ARM64/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/ARM64/TargetInfo/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMARM64Info - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index afd1f51..15b574d 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -108,9 +108,9 @@ namespace { explicit CppWriter(formatted_raw_ostream &o) : ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){} - virtual const char *getPassName() const { return "C++ backend"; } + const char *getPassName() const override { return "C++ backend"; } - bool runOnModule(Module &M); + bool runOnModule(Module &M) override; void printProgram(const std::string& fname, const std::string& modName ); void printModule(const std::string& fname, const std::string& modName ); @@ -396,7 +396,7 @@ std::string CppWriter::getCppName(Type* Ty) { return I->second; // Okay, let's build a new name for this type. Start with a prefix - const char* prefix = 0; + const char* prefix = nullptr; switch (Ty->getTypeID()) { case Type::FunctionTyID: prefix = "FuncTy_"; break; case Type::StructTyID: prefix = "StructTy_"; break; @@ -1690,9 +1690,8 @@ void CppWriter::printFunctionUses(const Function* F) { // Print the function declarations for any functions encountered nl(Out) << "// Function Declarations"; nl(Out); - for (SmallPtrSet::iterator I = gvs.begin(), E = gvs.end(); - I != E; ++I) { - if (Function* Fun = dyn_cast(*I)) { + for (auto *GV : gvs) { + if (Function *Fun = dyn_cast(GV)) { if (!is_inline || Fun != F) printFunctionHead(Fun); } @@ -1700,17 +1699,15 @@ void CppWriter::printFunctionUses(const Function* F) { // Print the global variable declarations for any variables encountered nl(Out) << "// Global Variable Declarations"; nl(Out); - for (SmallPtrSet::iterator I = gvs.begin(), E = gvs.end(); - I != E; ++I) { - if (GlobalVariable* F = dyn_cast(*I)) + for (auto *GV : gvs) { + if (GlobalVariable *F = dyn_cast(GV)) printVariableHead(F); } // Print the constants found nl(Out) << "// Constant Definitions"; nl(Out); - for (SmallPtrSet::iterator I = consts.begin(), - E = consts.end(); I != E; ++I) { - printConstant(*I); + for (const auto *C : consts) { + printConstant(C); } // Process the global variables definitions now that all the constants have @@ -1718,10 +1715,9 @@ void CppWriter::printFunctionUses(const Function* F) { // initializers. if (GenerationType != GenFunction) { nl(Out) << "// Global Variable Definitions"; nl(Out); - for (SmallPtrSet::iterator I = gvs.begin(), E = gvs.end(); - I != E; ++I) { - if (GlobalVariable* GV = dyn_cast(*I)) - printVariableBody(GV); + for (const auto &GV : gvs) { + if (GlobalVariable *Var = dyn_cast(GV)) + printVariableBody(Var); } } } diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h index 477e788..673ade7 100644 --- a/lib/Target/CppBackend/CPPTargetMachine.h +++ b/lib/Target/CppBackend/CPPTargetMachine.h @@ -28,14 +28,12 @@ struct CPPTargetMachine : public TargetMachine { CodeGenOpt::Level OL) : TargetMachine(T, TT, CPU, FS, Options) {} - virtual bool addPassesToEmitFile(PassManagerBase &PM, - formatted_raw_ostream &Out, - CodeGenFileType FileType, - bool DisableVerify, - AnalysisID StartAfter, - AnalysisID StopAfter); - - virtual const DataLayout *getDataLayout() const { return 0; } + bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out, + CodeGenFileType FileType, bool DisableVerify, + AnalysisID StartAfter, + AnalysisID StopAfter) override; + + const DataLayout *getDataLayout() const override { return nullptr; } }; extern Target TheCppBackendTarget; diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td index c1b6d45..5f4a6c6 100644 --- a/lib/Target/Hexagon/Hexagon.td +++ b/lib/Target/Hexagon/Hexagon.td @@ -200,8 +200,6 @@ class Proc Features> : ProcessorModel; -def : Proc<"hexagonv2", HexagonModel, [ArchV2]>; -def : Proc<"hexagonv3", HexagonModel, [ArchV2, ArchV3]>; def : Proc<"hexagonv4", HexagonModelV4, [ArchV2, ArchV3, ArchV4]>; def : Proc<"hexagonv5", HexagonModelV4, [ArchV2, ArchV3, ArchV4, ArchV5]>; diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index a588274..2e011bd 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "Hexagon.h" #include "HexagonAsmPrinter.h" #include "HexagonMachineFunctionInfo.h" @@ -56,6 +55,8 @@ using namespace llvm; +#define DEBUG_TYPE "asm-printer" + static cl::opt AlignCalls( "hexagon-align-calls", cl::Hidden, cl::init(true), cl::desc("Insert falign after call instruction for Hexagon target")); @@ -224,7 +225,7 @@ static MCInstPrinter *createHexagonMCInstPrinter(const Target &T, if (SyntaxVariant == 0) return(new HexagonInstPrinter(MAI, MII, MRI)); else - return NULL; + return nullptr; } extern "C" void LLVMInitializeHexagonAsmPrinter() { diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h index a186dc9..7fe8c57 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.h +++ b/lib/Target/Hexagon/HexagonAsmPrinter.h @@ -30,21 +30,22 @@ namespace llvm { Subtarget = &TM.getSubtarget(); } - virtual const char *getPassName() const { + const char *getPassName() const override { return "Hexagon Assembly Printer"; } - bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; + bool isBlockOnlyReachableByFallthrough( + const MachineBasicBlock *MBB) const override; - virtual void EmitInstruction(const MachineInstr *MI); + void EmitInstruction(const MachineInstr *MI) override; void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &OS); + raw_ostream &OS) override; bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &OS); + raw_ostream &OS) override; static const char *getRegisterName(unsigned RegNo); }; diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp index 8597f11..de340e0 100644 --- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp +++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp @@ -6,7 +6,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hexagon_cfg" #include "Hexagon.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonSubtarget.h" @@ -26,6 +25,8 @@ using namespace llvm; +#define DEBUG_TYPE "hexagon_cfg" + namespace llvm { void initializeHexagonCFGOptimizerPass(PassRegistry&); } @@ -48,10 +49,10 @@ private: initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry()); } - const char *getPassName() const { + const char *getPassName() const override { return "Hexagon CFG Optimizer"; } - bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; }; @@ -146,8 +147,8 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { MachineBasicBlock::succ_iterator SI = MBB->succ_begin(); MachineBasicBlock* FirstSucc = *SI; MachineBasicBlock* SecondSucc = *(++SI); - MachineBasicBlock* LayoutSucc = NULL; - MachineBasicBlock* JumpAroundTarget = NULL; + MachineBasicBlock* LayoutSucc = nullptr; + MachineBasicBlock* JumpAroundTarget = nullptr; if (MBB->isLayoutSuccessor(FirstSucc)) { LayoutSucc = FirstSucc; @@ -161,7 +162,7 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { // The target of the unconditional branch must be JumpAroundTarget. // TODO: If not, we should not invert the unconditional branch. - MachineBasicBlock* CondBranchTarget = NULL; + MachineBasicBlock* CondBranchTarget = nullptr; if ((MI->getOpcode() == Hexagon::JMP_t) || (MI->getOpcode() == Hexagon::JMP_f)) { CondBranchTarget = MI->getOperand(1).getMBB(); @@ -239,7 +240,7 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) { static void initializePassOnce(PassRegistry &Registry) { PassInfo *PI = new PassInfo("Hexagon CFG Optimizer", "hexagon-cfg", - &HexagonCFGOptimizer::ID, 0, false, false); + &HexagonCFGOptimizer::ID, nullptr, false, false); Registry.registerPass(*PI, true); } diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp index 60c933b..aeff680 100644 --- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp +++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp @@ -11,8 +11,6 @@ // to move them together. If we can move them next to each other we do so and // replace them with a combine instruction. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hexagon-copy-combine" - #include "llvm/PassSupport.h" #include "Hexagon.h" #include "HexagonInstrInfo.h" @@ -36,6 +34,8 @@ using namespace llvm; +#define DEBUG_TYPE "hexagon-copy-combine" + static cl::opt IsCombinesDisabled("disable-merge-into-combines", cl::Hidden, cl::ZeroOrMore, @@ -68,15 +68,15 @@ public: initializeHexagonCopyToCombinePass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); } - const char *getPassName() const { + const char *getPassName() const override { return "Hexagon Copy-To-Combine Pass"; } - virtual bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; private: MachineInstr *findPairable(MachineInstr *I1, bool &DoInsertAtI1); @@ -262,7 +262,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1, unsigned KilledOperand = 0; if (I2->killsRegister(I2UseReg)) KilledOperand = I2UseReg; - MachineInstr *KillingInstr = 0; + MachineInstr *KillingInstr = nullptr; for (; I != End; ++I) { // If the intervening instruction I: @@ -306,7 +306,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1, // Track killed operands. If we move across an instruction that kills our // operand, we need to update the kill information on the moved I1. It kills // the operand now. - MachineInstr *KillingInstr = 0; + MachineInstr *KillingInstr = nullptr; unsigned KilledOperand = 0; while(++I != End) { @@ -333,7 +333,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1, // Check for an exact kill (registers match). if (I1UseReg && I->killsRegister(I1UseReg)) { - assert(KillingInstr == 0 && "Should only see one killing instruction"); + assert(!KillingInstr && "Should only see one killing instruction"); KilledOperand = I1UseReg; KillingInstr = &*I; } @@ -506,7 +506,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr *I1, // Not safe. Stop searching. break; } - return 0; + return nullptr; } void HexagonCopyToCombine::combine(MachineInstr *I1, MachineInstr *I2, diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp index 8a5991f..3dafe80 100644 --- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp +++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp @@ -60,10 +60,10 @@ class HexagonExpandPredSpillCode : public MachineFunctionPass { initializeHexagonExpandPredSpillCodePass(Registry); } - const char *getPassName() const { + const char *getPassName() const override { return "Hexagon Expand Predicate Spill Code"; } - bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; }; @@ -187,7 +187,7 @@ static void initializePassOnce(PassRegistry &Registry) { const char *Name = "Hexagon Expand Predicate Spill Code"; PassInfo *PI = new PassInfo(Name, "hexagon-spill-pred", &HexagonExpandPredSpillCode::ID, - 0, false, false); + nullptr, false, false); Registry.registerPass(*PI, true); } diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp index a79264b..d41939a 100644 --- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp +++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp @@ -40,11 +40,13 @@ namespace { initializeHexagonFixupHwLoopsPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; } + const char *getPassName() const override { + return "Hexagon Hardware Loop Fixup"; + } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index 0ea13d4..d551ca9 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -246,7 +246,7 @@ HexagonFrameLowering::spillCalleeSavedRegisters( // unsigned SuperReg = uniqueSuperReg(Reg, TRI); bool CanUseDblStore = false; - const TargetRegisterClass* SuperRegClass = 0; + const TargetRegisterClass* SuperRegClass = nullptr; if (ContiguousRegs && (i < CSI.size()-1)) { unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI); @@ -300,7 +300,7 @@ bool HexagonFrameLowering::restoreCalleeSavedRegisters( // Check if we can use a double-word load. // unsigned SuperReg = uniqueSuperReg(Reg, TRI); - const TargetRegisterClass* SuperRegClass = 0; + const TargetRegisterClass* SuperRegClass = nullptr; bool CanUseDblLoad = false; if (ContiguousRegs && (i < CSI.size()-1)) { unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI); diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h index a62c76a..446af16 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/lib/Target/Hexagon/HexagonFrameLowering.h @@ -28,25 +28,25 @@ public: /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - virtual bool - spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const; - - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - - virtual bool + void emitPrologue(MachineFunction &MF) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const override; + + void + eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; + + bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, - const TargetRegisterInfo *TRI) const; - int getFrameIndexOffset(const MachineFunction &MF, int FI) const; - bool hasFP(const MachineFunction &MF) const; + const TargetRegisterInfo *TRI) const override; + int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; + bool hasFP(const MachineFunction &MF) const override; bool hasTailCall(MachineBasicBlock &MBB) const; }; diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp index 936fb11..7f76421 100644 --- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -26,7 +26,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hwloops" #include "llvm/ADT/SmallSet.h" #include "Hexagon.h" #include "HexagonTargetMachine.h" @@ -47,6 +46,8 @@ using namespace llvm; +#define DEBUG_TYPE "hwloops" + #ifndef NDEBUG static cl::opt HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1)); #endif @@ -77,11 +78,11 @@ namespace { initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - const char *getPassName() const { return "Hexagon Hardware Loops"; } + const char *getPassName() const override { return "Hexagon Hardware Loops"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); @@ -264,8 +265,8 @@ namespace { return Contents.ImmVal; } - void print(raw_ostream &OS, const TargetMachine *TM = 0) const { - const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0; + void print(raw_ostream &OS, const TargetMachine *TM = nullptr) const { + const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : nullptr; if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); } if (isImm()) { OS << Contents.ImmVal; } } @@ -369,7 +370,7 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L, } // for (instr) SmallVector Cond; - MachineBasicBlock *TB = 0, *FB = 0; + MachineBasicBlock *TB = nullptr, *FB = nullptr; bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); if (NotAnalyzed) return false; @@ -434,37 +435,37 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, "Loop must have more than one incoming edge!"); MachineBasicBlock *Backedge = *PI++; if (PI == TopMBB->pred_end()) // dead loop? - return 0; + return nullptr; MachineBasicBlock *Incoming = *PI++; if (PI != TopMBB->pred_end()) // multiple backedges? - return 0; + return nullptr; // Make sure there is one incoming and one backedge and determine which // is which. if (L->contains(Incoming)) { if (L->contains(Backedge)) - return 0; + return nullptr; std::swap(Incoming, Backedge); } else if (!L->contains(Backedge)) - return 0; + return nullptr; // Look for the cmp instruction to determine if we can get a useful trip // count. The trip count can be either a register or an immediate. The // location of the value depends upon the type (reg or imm). MachineBasicBlock *Latch = L->getLoopLatch(); if (!Latch) - return 0; + return nullptr; unsigned IVReg = 0; int64_t IVBump = 0; MachineInstr *IVOp; bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp); if (!FoundIV) - return 0; + return nullptr; MachineBasicBlock *Preheader = L->getLoopPreheader(); - MachineOperand *InitialValue = 0; + MachineOperand *InitialValue = nullptr; MachineInstr *IV_Phi = MRI->getVRegDef(IVReg); for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) { MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB(); @@ -474,13 +475,13 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump. } if (!InitialValue) - return 0; + return nullptr; SmallVector Cond; - MachineBasicBlock *TB = 0, *FB = 0; + MachineBasicBlock *TB = nullptr, *FB = nullptr; bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); if (NotAnalyzed) - return 0; + return nullptr; MachineBasicBlock *Header = L->getHeader(); // TB must be non-null. If FB is also non-null, one of them must be @@ -489,7 +490,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, assert (TB && "Latch block without a branch?"); assert ((!FB || TB == Header || FB == Header) && "Branches not to header?"); if (!TB || (FB && TB != Header && FB != Header)) - return 0; + return nullptr; // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch // to put imm(0), followed by P in the vector Cond. @@ -505,7 +506,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2, Mask, ImmValue); if (!AnalyzedCmp) - return 0; + return nullptr; // The comparison operator type determines how we compute the loop // trip count. @@ -521,7 +522,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, bool isSwapped = false; const MachineOperand &Op1 = CondI->getOperand(1); const MachineOperand &Op2 = CondI->getOperand(2); - const MachineOperand *EndValue = 0; + const MachineOperand *EndValue = nullptr; if (Op1.isReg()) { if (Op2.isImm() || Op1.getReg() == IVReg) @@ -533,7 +534,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, } if (!EndValue) - return 0; + return nullptr; switch (CondOpc) { case Hexagon::CMPEQri: @@ -552,7 +553,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, case Hexagon::CMPbEQri_V4: case Hexagon::CMPhEQri_V4: { if (IVBump != 1) - return 0; + return nullptr; int64_t InitV, EndV; // Since the comparisons are "ri", the EndValue should be an @@ -562,26 +563,26 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, // Allow InitialValue to be a register defined with an immediate. if (InitialValue->isReg()) { if (!defWithImmediate(InitialValue->getReg())) - return 0; + return nullptr; InitV = getImmediate(*InitialValue); } else { assert(InitialValue->isImm()); InitV = InitialValue->getImm(); } if (InitV >= EndV) - return 0; + return nullptr; if (CondOpc == Hexagon::CMPbEQri_V4) { if (!isInt<8>(InitV) || !isInt<8>(EndV)) - return 0; + return nullptr; } else { // Hexagon::CMPhEQri_V4 if (!isInt<16>(InitV) || !isInt<16>(EndV)) - return 0; + return nullptr; } Cmp = !Negated ? Comparison::EQ : Comparison::NE; break; } default: - return 0; + return nullptr; } if (isSwapped) @@ -591,14 +592,14 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L, unsigned R = InitialValue->getReg(); MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); if (!MDT->properlyDominates(DefBB, Header)) - return 0; + return nullptr; OldInsts.push_back(MRI->getVRegDef(R)); } if (EndValue->isReg()) { unsigned R = EndValue->getReg(); MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent(); if (!MDT->properlyDominates(DefBB, Header)) - return 0; + return nullptr; } return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp); @@ -616,7 +617,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, Comparison::Kind Cmp) const { // Cannot handle comparison EQ, i.e. while (A == B). if (Cmp == Comparison::EQ) - return 0; + return nullptr; // Check if either the start or end values are an assignment of an immediate. // If so, use the immediate value rather than the register. @@ -642,11 +643,11 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, // If loop executes while iv is "less" with the iv value going down, then // the iv must wrap. if (CmpLess && IVBump < 0) - return 0; + return nullptr; // If loop executes while iv is "greater" with the iv value going up, then // the iv must wrap. if (CmpGreater && IVBump > 0) - return 0; + return nullptr; if (Start->isImm() && End->isImm()) { // Both, start and end are immediates. @@ -654,15 +655,15 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, int64_t EndV = End->getImm(); int64_t Dist = EndV - StartV; if (Dist == 0) - return 0; + return nullptr; bool Exact = (Dist % IVBump) == 0; if (Cmp == Comparison::NE) { if (!Exact) - return 0; + return nullptr; if ((Dist < 0) ^ (IVBump < 0)) - return 0; + return nullptr; } // For comparisons that include the final value (i.e. include equality @@ -683,7 +684,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, uint64_t Count = Dist1; if (Count > 0xFFFFFFFFULL) - return 0; + return nullptr; return new CountValue(CountValue::CV_Immediate, Count); } @@ -695,7 +696,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, // If the induction variable bump is not a power of 2, quit. // Othwerise we'd need a general integer division. if (!isPowerOf2_64(abs64(IVBump))) - return 0; + return nullptr; MachineBasicBlock *PH = Loop->getLoopPreheader(); assert (PH && "Should have a preheader by now"); @@ -766,7 +767,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop, // Hardware loops cannot handle 64-bit registers. If it's a double // register, it has to have a subregister. if (!SR && RC == &Hexagon::DoubleRegsRegClass) - return 0; + return nullptr; const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass; // Compute DistR (register with the distance between Start and End). @@ -1013,7 +1014,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { MachineBasicBlock *LastMBB = L->getExitingBlock(); // Don't generate hw loop if the loop has more than one exit. - if (LastMBB == 0) + if (!LastMBB) return false; MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator(); @@ -1035,7 +1036,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { SmallVector OldInsts; // Are we able to determine the trip count for the loop? CountValue *TripCount = getLoopTripCount(L, OldInsts); - if (TripCount == 0) + if (!TripCount) return false; // Is the trip count available in the preheader? @@ -1127,7 +1128,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) { if (LastI != LastMBB->end()) LastI = LastMBB->erase(LastI); SmallVector Cond; - TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, LastIDL); + TII->InsertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL); } } else { // Conditional branch to loop start; just delete it. @@ -1196,7 +1197,7 @@ MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) { case Hexagon::CONST64_Int_Real: return DI; } - return 0; + return nullptr; } @@ -1291,7 +1292,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { if (IndRegs.empty()) return false; - MachineBasicBlock *TB = 0, *FB = 0; + MachineBasicBlock *TB = nullptr, *FB = nullptr; SmallVector Cond; // AnalyzeBranch returns true if it fails to analyze branch. bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false); @@ -1322,7 +1323,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) { return false; SmallSet CmpRegs; - MachineOperand *CmpImmOp = 0; + MachineOperand *CmpImmOp = nullptr; // Go over all operands to the compare and look for immediate and register // operands. Assume that if the compare has a single register use and a @@ -1420,7 +1421,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( DebugLoc DL; if (!Latch || Header->hasAddressTaken()) - return 0; + return nullptr; typedef MachineBasicBlock::instr_iterator instr_iterator; @@ -1429,17 +1430,17 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( typedef std::vector MBBVector; MBBVector Preds(Header->pred_begin(), Header->pred_end()); SmallVector Tmp1; - MachineBasicBlock *TB = 0, *FB = 0; + MachineBasicBlock *TB = nullptr, *FB = nullptr; if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false)) - return 0; + return nullptr; for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { MachineBasicBlock *PB = *I; if (PB != Latch) { bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false); if (NotAnalyzed) - return 0; + return nullptr; } } @@ -1515,7 +1516,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( SmallVector Tmp2; SmallVector EmptyCond; - TB = FB = 0; + TB = FB = nullptr; for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { MachineBasicBlock *PB = *I; @@ -1525,22 +1526,22 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop( (void)NotAnalyzed; // suppress compiler warning assert (!NotAnalyzed && "Should be analyzable!"); if (TB != Header && (Tmp2.empty() || FB != Header)) - TII->InsertBranch(*PB, NewPH, 0, EmptyCond, DL); + TII->InsertBranch(*PB, NewPH, nullptr, EmptyCond, DL); PB->ReplaceUsesOfBlockWith(Header, NewPH); } } // It can happen that the latch block will fall through into the header. // Insert an unconditional branch to the header. - TB = FB = 0; + TB = FB = nullptr; bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false); (void)LatchNotAnalyzed; // suppress compiler warning assert (!LatchNotAnalyzed && "Should be analyzable!"); if (!TB && !FB) - TII->InsertBranch(*Latch, Header, 0, EmptyCond, DL); + TII->InsertBranch(*Latch, Header, nullptr, EmptyCond, DL); // Finally, the branch from the preheader to the header. - TII->InsertBranch(*NewPH, Header, 0, EmptyCond, DL); + TII->InsertBranch(*NewPH, Header, nullptr, EmptyCond, DL); NewPH->addSuccessor(Header); return NewPH; diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index ed8c786..dabe650 100644 --- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hexagon-isel" #include "Hexagon.h" #include "HexagonISelLowering.h" #include "HexagonTargetMachine.h" @@ -23,6 +22,8 @@ #include "llvm/Support/Debug.h" using namespace llvm; +#define DEBUG_TYPE "hexagon-isel" + static cl::opt MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders", @@ -61,7 +62,7 @@ public: } bool hasNumUsesBelowThresGA(SDNode *N) const; - SDNode *Select(SDNode *N); + SDNode *Select(SDNode *N) override; // Complex Pattern Selectors. inline bool foldGlobalAddress(SDValue &N, SDValue &R); @@ -78,15 +79,15 @@ public: bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2); bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2); - virtual const char *getPassName() const { + const char *getPassName() const override { return "Hexagon DAG->DAG Pattern Instruction Selection"; } /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector &OutOps); + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps) override; bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset); SDNode *SelectLoad(SDNode *N); @@ -186,7 +187,7 @@ FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM, static void initializePassOnce(PassRegistry &Registry) { const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection"; PassInfo *PI = new PassInfo(Name, "hexagon-isel", - &SelectionDAGISel::ID, 0, false, false); + &SelectionDAGISel::ID, nullptr, false, false); Registry.registerPass(*PI, true); } @@ -1238,7 +1239,7 @@ SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) { SDNode *PdRs = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1, SDValue(Arg, 0)); Ops.push_back(SDValue(PdRs,0)); - } else if (RC == NULL && (dyn_cast(Arg) != NULL)) { + } else if (!RC && (dyn_cast(Arg) != nullptr)) { // This is immediate operand. Lower it here making sure that we DO have // const SDNode for immediate value. int32_t Val = cast(Arg)->getSExtValue(); @@ -1346,7 +1347,7 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) { SDNode *HexagonDAGToDAGISel::Select(SDNode *N) { if (N->isMachineOpcode()) { N->setNodeId(-1); - return NULL; // Already selected. + return nullptr; // Already selected. } diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 92b794d..b8e5d24 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -39,6 +39,8 @@ using namespace llvm; +#define DEBUG_TYPE "hexagon-lowering" + static cl::opt EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden, cl::desc("Control jump table emission on Hexagon target")); @@ -135,7 +137,7 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT, State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo)); return false; } - llvm_unreachable(0); + llvm_unreachable(nullptr); } @@ -182,7 +184,7 @@ static bool CC_Hexagon32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - static const uint16_t RegList[] = { + static const MCPhysReg RegList[] = { Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, Hexagon::R5 }; @@ -205,10 +207,10 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT, return false; } - static const uint16_t RegList1[] = { + static const MCPhysReg RegList1[] = { Hexagon::D1, Hexagon::D2 }; - static const uint16_t RegList2[] = { + static const MCPhysReg RegList2[] = { Hexagon::R1, Hexagon::R3 }; if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) { @@ -346,8 +348,7 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, if (Flag.getNode()) RetOps.push_back(Flag); - return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, - &RetOps[0], RetOps.size()); + return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps); } @@ -410,7 +411,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, int NumNamedVarArgParams = -1; if (GlobalAddressSDNode *GA = dyn_cast(Callee)) { - const Function* CalleeFn = NULL; + const Function* CalleeFn = nullptr; Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, MVT::i32); if ((CalleeFn = dyn_cast(GA->getGlobal()))) { @@ -520,8 +521,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) { - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], - MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); } if (!isTailCall) @@ -595,9 +595,9 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (isTailCall) - return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops); - Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. @@ -817,7 +817,7 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, Sub); SDValue Ops[2] = { ArgAdjust, CopyChain }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } SDValue @@ -916,8 +916,7 @@ const { } if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0], - MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); if (isVarArg) { // This will point to the next argument passed via stack. @@ -1480,7 +1479,7 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; + default: return nullptr; case HexagonISD::CONST32: return "HexagonISD::CONST32"; case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP"; case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real"; diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 73da226..4f27c27 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -92,14 +92,14 @@ namespace llvm { const SmallVectorImpl &Ins, SelectionDAG& DAG) const; - virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; - virtual bool isTruncateFree(EVT VT1, EVT VT2) const; + bool isTruncateFree(Type *Ty1, Type *Ty2) const override; + bool isTruncateFree(EVT VT1, EVT VT2) const override; - virtual bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const; + bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - virtual const char *getTargetNodeName(unsigned Opcode) const; + const char *getTargetNodeName(unsigned Opcode) const override; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; @@ -109,12 +109,12 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; + SmallVectorImpl &InVals) const override; SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const; + SmallVectorImpl &InVals) const override; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, @@ -133,46 +133,45 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, - SDLoc dl, SelectionDAG &DAG) const; + SDLoc dl, SelectionDAG &DAG) const override; - virtual MachineBasicBlock - *EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const; + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *BB) const override; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - virtual EVT getSetCCResultType(LLVMContext &C, EVT VT) const { + EVT getSetCCResultType(LLVMContext &C, EVT VT) const override { if (!VT.isVector()) return MVT::i1; else return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements()); } - virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, - SDValue &Base, SDValue &Offset, - ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const; + bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const override; std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + MVT VT) const override; // Intrinsics - virtual SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, - SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. /// The type may be VoidTy, in which case only return true if the addressing /// mode is legal for a load/store of any legal type. /// TODO: Handle pre/postinc as well. - virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const; - virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can /// compare a register against the immediate without having to materialize /// the immediate into a register. - virtual bool isLegalICmpImmediate(int64_t Imm) const; + bool isLegalICmpImmediate(int64_t Imm) const override; }; } // end namespace llvm diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td index d25bfa8..1057343 100644 --- a/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/lib/Target/Hexagon/HexagonInstrFormats.td @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Hexagon Intruction Flags + +// Hexagon Instruction Flags + // // *** Must match HexagonBaseInfo.h *** //===----------------------------------------------------------------------===// @@ -68,7 +68,7 @@ def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd) //===----------------------------------------------------------------------===// -// Intruction Class Declaration + +// Instruction Class Declaration + //===----------------------------------------------------------------------===// class OpcodeHexagon { @@ -104,54 +104,72 @@ class InstHexagon pattern, // Solo instructions, i.e., those that cannot be in a packet with others. bits<1> isSolo = 0; let TSFlags{5} = isSolo; + // Packed only with A or X-type instructions. + bits<1> isSoloAX = 0; + let TSFlags{6} = isSoloAX; + // Only A-type instruction in first slot or nothing. + bits<1> isSoloAin1 = 0; + let TSFlags{7} = isSoloAin1; // Predicated instructions. bits<1> isPredicated = 0; - let TSFlags{6} = isPredicated; + let TSFlags{8} = isPredicated; bits<1> isPredicatedFalse = 0; - let TSFlags{7} = isPredicatedFalse; + let TSFlags{9} = isPredicatedFalse; bits<1> isPredicatedNew = 0; - let TSFlags{8} = isPredicatedNew; + let TSFlags{10} = isPredicatedNew; + bits<1> isPredicateLate = 0; + let TSFlags{11} = isPredicateLate; // Late predicate producer insn. // New-value insn helper fields. bits<1> isNewValue = 0; - let TSFlags{9} = isNewValue; // New-value consumer insn. + let TSFlags{12} = isNewValue; // New-value consumer insn. bits<1> hasNewValue = 0; - let TSFlags{10} = hasNewValue; // New-value producer insn. + let TSFlags{13} = hasNewValue; // New-value producer insn. bits<3> opNewValue = 0; - let TSFlags{13-11} = opNewValue; // New-value produced operand. - bits<2> opNewBits = 0; - let TSFlags{15-14} = opNewBits; // New-value opcode bits location: 0, 8, 16. + let TSFlags{16-14} = opNewValue; // New-value produced operand. bits<1> isNVStorable = 0; - let TSFlags{16} = isNVStorable; // Store that can become new-value store. + let TSFlags{17} = isNVStorable; // Store that can become new-value store. bits<1> isNVStore = 0; - let TSFlags{17} = isNVStore; // New-value store insn. + let TSFlags{18} = isNVStore; // New-value store insn. + bits<1> isCVLoadable = 0; + let TSFlags{19} = isCVLoadable; // Load that can become cur-value load. + bits<1> isCVLoad = 0; + let TSFlags{20} = isCVLoad; // Cur-value load insn. // Immediate extender helper fields. bits<1> isExtendable = 0; - let TSFlags{18} = isExtendable; // Insn may be extended. + let TSFlags{21} = isExtendable; // Insn may be extended. bits<1> isExtended = 0; - let TSFlags{19} = isExtended; // Insn must be extended. + let TSFlags{22} = isExtended; // Insn must be extended. bits<3> opExtendable = 0; - let TSFlags{22-20} = opExtendable; // Which operand may be extended. + let TSFlags{25-23} = opExtendable; // Which operand may be extended. bits<1> isExtentSigned = 0; - let TSFlags{23} = isExtentSigned; // Signed or unsigned range. + let TSFlags{26} = isExtentSigned; // Signed or unsigned range. bits<5> opExtentBits = 0; - let TSFlags{28-24} = opExtentBits; //Number of bits of range before extending. + let TSFlags{31-27} = opExtentBits; //Number of bits of range before extending. + bits<2> opExtentAlign = 0; + let TSFlags{33-32} = opExtentAlign; // Alignment exponent before extending. // If an instruction is valid on a subtarget (v2-v5), set the corresponding // bit from validSubTargets. v2 is the least significant bit. // By default, instruction is valid on all subtargets. SubTarget validSubTargets = HasV2SubT; - let TSFlags{32-29} = validSubTargets.Value; + let TSFlags{37-34} = validSubTargets.Value; // Addressing mode for load/store instructions. AddrModeType addrMode = NoAddrMode; - let TSFlags{35-33} = addrMode.Value; + let TSFlags{42-40} = addrMode.Value; // Memory access size for mem access instructions (load/store) MemAccessSize accessSize = NoMemAccess; - let TSFlags{38-36} = accessSize.Value; + let TSFlags{45-43} = accessSize.Value; + + bits<1> isTaken = 0; + let TSFlags {47} = isTaken; // Branch prediction. + + bits<1> isFP = 0; + let TSFlags {48} = isFP; // Floating-point. // Fields used for relation models. string BaseOpcode = ""; @@ -173,14 +191,14 @@ class InstHexagon pattern, } //===----------------------------------------------------------------------===// -// Intruction Classes Definitions + +// Instruction Classes Definitions + //===----------------------------------------------------------------------===// // LD Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class LDInst pattern = [], - string cstr = ""> - : InstHexagon; + string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01> + : InstHexagon; let mayLoad = 1 in class LDInst2 pattern = [], @@ -199,16 +217,16 @@ class LDInstPost pattern = [], let mayLoad = 1 in class LD0Inst pattern = [], - string cstr = ""> - : LDInst; + string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0> + : InstHexagon; // ST Instruction Class in V2/V3 can take SLOT0 only. // ST Instruction Class in V4 can take SLOT0 & SLOT1. // Definition of the instruction class CHANGED from V2/V3 to V4. let mayStore = 1 in class STInst pattern = [], - string cstr = ""> - : InstHexagon; + string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01> + : InstHexagon; class STInst2 pattern = [], string cstr = ""> @@ -216,39 +234,39 @@ class STInst2 pattern = [], let mayStore = 1 in class ST0Inst pattern = [], - string cstr = ""> - : InstHexagon; + string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0> + : InstHexagon; // ST Instruction Class in V2/V3 can take SLOT0 only. // ST Instruction Class in V4 can take SLOT0 & SLOT1. // Definition of the instruction class CHANGED from V2/V3 to V4. class STInstPost pattern = [], - string cstr = ""> - : STInst; + string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01> + : STInst; // SYSTEM Instruction Class in V4 can take SLOT0 only // In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1. class SYSInst pattern = [], - string cstr = ""> - : InstHexagon; + string cstr = "", InstrItinClass itin = ST_tc_3stall_SLOT0> + : InstHexagon; // ALU32 Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class ALU32Inst pattern = [], - string cstr = ""> - : InstHexagon; + string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123> + : InstHexagon; // ALU64 Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4. class ALU64Inst pattern = [], - string cstr = ""> - : InstHexagon; + string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23> + : InstHexagon; class ALU64_acc pattern = [], - string cstr = ""> - : ALU64Inst; + string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23> + : ALU64Inst; // M Instruction Class in V2/V3. @@ -256,55 +274,55 @@ class ALU64_acc pattern = [], // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. class MInst pattern = [], - string cstr = ""> - : InstHexagon; + string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23> + : InstHexagon; // M Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4. class MInst_acc pattern = [], - string cstr = ""> - : MInst; + string cstr = "", InstrItinClass itin = M_tc_2_SLOT23> + : MInst; // S Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. class SInst pattern = [], - string cstr = ""> - : InstHexagon; + string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23> + : InstHexagon; // S Instruction Class in V2/V3. // XTYPE Instruction Class in V4. // Definition of the instruction class NOT CHANGED. // Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4. class SInst_acc pattern = [], - string cstr = ""> - : SInst; + string cstr = "", InstrItinClass itin = S_3op_tc_1_SLOT23> + : SInst; // J Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class JInst pattern = [], - string cstr = ""> - : InstHexagon; + string cstr = "", InstrItinClass itin = J_tc_2early_SLOT23> + : InstHexagon; // JR Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class JRInst pattern = [], - string cstr = ""> - : InstHexagon; + string cstr = "", InstrItinClass itin = J_tc_2early_SLOT2> + : InstHexagon; // CR Instruction Class in V2/V3/V4. // Definition of the instruction class NOT CHANGED. class CRInst pattern = [], - string cstr = ""> - : InstHexagon; + string cstr = "", InstrItinClass itin = CR_tc_2early_SLOT3> + : InstHexagon; let isCodeGenOnly = 1, isPseudo = 1 in class Endloop pattern = [], - string cstr = ""> - : InstHexagon; + string cstr = "", InstrItinClass itin = J_tc_2early_SLOT0123> + : InstHexagon; let isCodeGenOnly = 1, isPseudo = 1 in class Pseudo pattern = [], @@ -317,39 +335,40 @@ class PseudoM pattern = [], : InstHexagon; //===----------------------------------------------------------------------===// -// Intruction Classes Definitions - +// Instruction Classes Definitions - //===----------------------------------------------------------------------===// // // ALU32 patterns //. -class ALU32_rr pattern, - string cstr = ""> - : ALU32Inst; +class ALU32_rr pattern = [], + string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123> + : ALU32Inst; -class ALU32_ir pattern, - string cstr = ""> - : ALU32Inst; +class ALU32_ir pattern = [], + string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123> + : ALU32Inst; -class ALU32_ri pattern, - string cstr = ""> - : ALU32Inst; +class ALU32_ri pattern = [], + string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123> + : ALU32Inst; + +class ALU32_ii pattern = [], + string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123> + : ALU32Inst; -class ALU32_ii pattern, - string cstr = ""> - : ALU32Inst; // // ALU64 patterns. // -class ALU64_rr pattern, - string cstr = ""> - : ALU64Inst; +class ALU64_rr pattern = [], + string cstr = "", InstrItinClass itin = ALU64_tc_1_SLOT23> + : ALU64Inst; -class ALU64_ri pattern, - string cstr = ""> - : ALU64Inst; +class ALU64_ri pattern = [], + string cstr = "", InstrItinClass itin = ALU64_tc_1_SLOT23> + : ALU64Inst; // Post increment ST Instruction. class STInstPI pattern = [], diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td index 9fda0da..d92f97b 100644 --- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td +++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// //----------------------------------------------------------------------------// -// Hexagon Intruction Flags + +// Hexagon Instruction Flags // // *** Must match BaseInfo.h *** //----------------------------------------------------------------------------// @@ -22,30 +22,30 @@ def TypeNV : IType<10>; def TypePREFIX : IType<30>; //----------------------------------------------------------------------------// -// Intruction Classes Definitions + +// Instruction Classes Definitions //----------------------------------------------------------------------------// // // NV type instructions. // class NVInst pattern = [], - string cstr = ""> - : InstHexagon; + string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0> + : InstHexagon; class NVInst_V4 pattern = [], - string cstr = ""> - : NVInst; + string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0> + : NVInst; // Definition of Post increment new value store. class NVInstPost_V4 pattern = [], - string cstr = ""> - : NVInst; + string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0> + : NVInst; // Post increment ST Instruction. let mayStore = 1 in class NVInstPI_V4 pattern = [], - string cstr = ""> - : NVInst; + string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0> + : NVInst; // New-value conditional branch. class NCJInst pattern = [], @@ -54,13 +54,14 @@ class NCJInst pattern = [], let mayLoad = 1, mayStore = 1 in class MEMInst pattern = [], - string cstr = ""> - : InstHexagon; + string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0> + : InstHexagon; class MEMInst_V4 pattern = [], - string cstr = ""> - : MEMInst; + string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0> + : MEMInst; let isCodeGenOnly = 1 in class EXTENDERInst pattern = []> - : InstHexagon; + : InstHexagon; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 21a12de..ea6367a 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -26,13 +26,16 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "hexagon-instrinfo" + #define GET_INSTRINFO_CTOR_DTOR #define GET_INSTRMAP_INFO #include "HexagonGenInstrInfo.inc" #include "HexagonGenDFAPacketizer.inc" -using namespace llvm; - /// /// Constants for Hexagon instructions. /// @@ -135,7 +138,7 @@ HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, regPos = 1; } - if (FBB == 0) { + if (!FBB) { if (Cond.empty()) { // Due to a bug in TailMerging/CFG Optimization, we need to add a // special case handling of a predicated jump followed by an @@ -151,7 +154,7 @@ HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, if (NewTBB == NextBB) { ReverseBranchCondition(Cond); RemoveBranch(MBB); - return InsertBranch(MBB, TBB, 0, Cond, DL); + return InsertBranch(MBB, TBB, nullptr, Cond, DL); } } BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); @@ -174,8 +177,8 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const { - TBB = NULL; - FBB = NULL; + TBB = nullptr; + FBB = nullptr; // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::instr_iterator I = MBB.instr_end(); @@ -224,7 +227,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, // Get the last instruction in the block. MachineInstr *LastInst = I; - MachineInstr *SecondLastInst = NULL; + MachineInstr *SecondLastInst = nullptr; // Find one more terminator if present. do { if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(I)) { @@ -557,7 +560,7 @@ MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, const SmallVectorImpl &Ops, int FI) const { // Hexagon_TODO: Implement. - return(0); + return nullptr; } unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const { diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h index 5da23cb..6b032c9 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.h +++ b/lib/Target/Hexagon/HexagonInstrInfo.h @@ -40,124 +40,121 @@ public: /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - virtual const HexagonRegisterInfo &getRegisterInfo() const { return RI; } + const HexagonRegisterInfo &getRegisterInfo() const { return RI; } /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than loading from the stack slot. - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than storing to the stack slot. - virtual unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - - - virtual bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const; - - virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - - virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const; - - virtual bool analyzeCompare(const MachineInstr *MI, - unsigned &SrcReg, unsigned &SrcReg2, - int &Mask, int &Value) const; - - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - return 0; + unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; + + + bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const override; + + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; + + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const override; + + bool analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const override; + + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, + SmallVectorImpl &Addr, + const TargetRegisterClass *RC, + SmallVectorImpl &NewMIs) const; + + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + int FrameIndex) const override; + + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, + MachineInstr* MI, + const SmallVectorImpl &Ops, + MachineInstr* LoadMI) const override { + return nullptr; } unsigned createVR(MachineFunction* MF, MVT VT) const; - virtual bool isBranch(const MachineInstr *MI) const; - virtual bool isPredicable(MachineInstr *MI) const; - virtual bool - PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl &Cond) const; - - virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, - unsigned ExtraPredCycles, - const BranchProbability &Probability) const; - - virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, - unsigned NumTCycles, unsigned ExtraTCycles, - MachineBasicBlock &FMBB, - unsigned NumFCycles, unsigned ExtraFCycles, - const BranchProbability &Probability) const; - - virtual bool isPredicated(const MachineInstr *MI) const; - virtual bool isPredicated(unsigned Opcode) const; - virtual bool isPredicatedTrue(const MachineInstr *MI) const; - virtual bool isPredicatedTrue(unsigned Opcode) const; - virtual bool isPredicatedNew(const MachineInstr *MI) const; - virtual bool isPredicatedNew(unsigned Opcode) const; - virtual bool DefinesPredicate(MachineInstr *MI, - std::vector &Pred) const; - virtual bool - SubsumesPredicate(const SmallVectorImpl &Pred1, - const SmallVectorImpl &Pred2) const; - - virtual bool - ReverseBranchCondition(SmallVectorImpl &Cond) const; - - virtual bool - isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles, - const BranchProbability &Probability) const; - - virtual DFAPacketizer* + bool isBranch(const MachineInstr *MI) const; + bool isPredicable(MachineInstr *MI) const override; + bool PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl &Cond) const override; + + bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + unsigned ExtraPredCycles, + const BranchProbability &Probability) const override; + + bool isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumTCycles, unsigned ExtraTCycles, + MachineBasicBlock &FMBB, + unsigned NumFCycles, unsigned ExtraFCycles, + const BranchProbability &Probability) const override; + + bool isPredicated(const MachineInstr *MI) const override; + bool isPredicated(unsigned Opcode) const; + bool isPredicatedTrue(const MachineInstr *MI) const; + bool isPredicatedTrue(unsigned Opcode) const; + bool isPredicatedNew(const MachineInstr *MI) const; + bool isPredicatedNew(unsigned Opcode) const; + bool DefinesPredicate(MachineInstr *MI, + std::vector &Pred) const override; + bool SubsumesPredicate(const SmallVectorImpl &Pred1, + const SmallVectorImpl &Pred2) const override; + + bool + ReverseBranchCondition(SmallVectorImpl &Cond) const override; + + bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles, + const BranchProbability &Probability) const override; + + DFAPacketizer* CreateTargetScheduleState(const TargetMachine *TM, - const ScheduleDAG *DAG) const; + const ScheduleDAG *DAG) const override; - virtual bool isSchedulingBoundary(const MachineInstr *MI, - const MachineBasicBlock *MBB, - const MachineFunction &MF) const; + bool isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const override; bool isValidOffset(const int Opcode, const int Offset) const; bool isValidAutoIncImm(const EVT VT, const int Offset) const; bool isMemOp(const MachineInstr *MI) const; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index c96aaca..4dcf101 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -768,12 +768,13 @@ class T_JMP JumpList = []> let InputType = "imm", isExtendable = 1, opExtendable = 1, isExtentSigned = 1, Defs = [PC], isPredicated = 1, opExtentBits = 17 in -class T_JMP_c : +class T_JMP_c : JInst<(outs ), (ins PredRegs:$src, brtarget:$dst), !if(PredNot, "if (!$src", "if ($src")# !if(isPredNew, ".new) ", ") ")#"jump"# - !if(isPredNew, !if(isTaken, ":t ", ":nt "), " ")#"$dst"> { + !if(isPredNew, !if(isTak, ":t ", ":nt "), " ")#"$dst"> { + let isTaken = isTak; let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), ""); let isPredicatedFalse = PredNot; let isPredicatedNew = isPredNew; @@ -784,7 +785,7 @@ class T_JMP_c : let Inst{27-24} = 0b1100; let Inst{21} = PredNot; - let Inst{12} = !if(isPredNew, isTaken, zero); + let Inst{12} = !if(isPredNew, isTak, zero); let Inst{11} = isPredNew; let Inst{9-8} = src; let Inst{23-22} = dst{16-15}; @@ -806,12 +807,13 @@ class T_JMPr } let Defs = [PC], isPredicated = 1, InputType = "reg" in -class T_JMPr_c : +class T_JMPr_c : JRInst <(outs ), (ins PredRegs:$src, IntRegs:$dst), !if(PredNot, "if (!$src", "if ($src")# !if(isPredNew, ".new) ", ") ")#"jumpr"# - !if(isPredNew, !if(isTaken, ":t ", ":nt "), " ")#"$dst"> { + !if(isPredNew, !if(isTak, ":t ", ":nt "), " ")#"$dst"> { + let isTaken = isTak; let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), ""); let isPredicatedFalse = PredNot; let isPredicatedNew = isPredNew; @@ -823,7 +825,7 @@ class T_JMPr_c : let Inst{27-22} = 0b001101; let Inst{21} = PredNot; let Inst{20-16} = dst; - let Inst{12} = !if(isPredNew, isTaken, zero); + let Inst{12} = !if(isPredNew, isTak, zero); let Inst{11} = isPredNew; let Inst{9-8} = src; let Predicates = !if(isPredNew, [HasV3T], [HasV2T]); diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index a95fb80..db5b7ea 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -1004,13 +1004,13 @@ defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel; let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in class NVJrr_template majOp, bit NvOpNum, - bit isNegCond, bit isTaken> + bit isNegCond, bit isTak> : NVInst_V4<(outs), (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset), "if ("#!if(isNegCond, "!","")#mnemonic# "($src1"#!if(!eq(NvOpNum, 0),".new, ",", ")# "$src2"#!if(!eq(NvOpNum, 1),".new))","))")#" jump:" - #!if(isTaken, "t","nt")#" $offset", + #!if(isTak, "t","nt")#" $offset", []>, Requires<[HasV4T]> { bits<5> src1; @@ -1019,6 +1019,7 @@ class NVJrr_template majOp, bit NvOpNum, bits<5> RegOp; // Non-New-Value Operand bits<11> offset; + let isTaken = isTak; let isBrTaken = !if(isTaken, "true", "false"); let isPredicatedFalse = isNegCond; @@ -1030,7 +1031,7 @@ class NVJrr_template majOp, bit NvOpNum, let Inst{25-23} = majOp; let Inst{22} = isNegCond; let Inst{18-16} = Ns; - let Inst{13} = isTaken; + let Inst{13} = isTak; let Inst{12-8} = RegOp; let Inst{21-20} = offset{10-9}; let Inst{7-1} = offset{8-2}; @@ -1078,13 +1079,14 @@ let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1, let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in class NVJri_template majOp, bit isNegCond, - bit isTaken> + bit isTak> : NVInst_V4<(outs), (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset), "if ("#!if(isNegCond, "!","")#mnemonic#"($src1.new, #$src2)) jump:" - #!if(isTaken, "t","nt")#" $offset", + #!if(isTak, "t","nt")#" $offset", []>, Requires<[HasV4T]> { + let isTaken = isTak; let isPredicatedFalse = isNegCond; let isBrTaken = !if(isTaken, "true", "false"); @@ -1097,7 +1099,7 @@ class NVJri_template majOp, bit isNegCond, let Inst{25-23} = majOp; let Inst{22} = isNegCond; let Inst{18-16} = src1; - let Inst{13} = isTaken; + let Inst{13} = isTak; let Inst{12-8} = src2; let Inst{21-20} = offset{10-9}; let Inst{7-1} = offset{8-2}; @@ -1135,14 +1137,15 @@ let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1, let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 11 in class NVJ_ConstImm_template majOp, string ImmVal, - bit isNegCond, bit isTaken> + bit isNegCond, bit isTak> : NVInst_V4<(outs), (ins IntRegs:$src1, brtarget:$offset), "if ("#!if(isNegCond, "!","")#mnemonic #"($src1.new, #"#ImmVal#")) jump:" - #!if(isTaken, "t","nt")#" $offset", + #!if(isTak, "t","nt")#" $offset", []>, Requires<[HasV4T]> { + let isTaken = isTak; let isPredicatedFalse = isNegCond; let isBrTaken = !if(isTaken, "true", "false"); @@ -1153,7 +1156,7 @@ class NVJ_ConstImm_template majOp, string ImmVal, let Inst{25-23} = majOp; let Inst{22} = isNegCond; let Inst{18-16} = src1; - let Inst{13} = isTaken; + let Inst{13} = isTak; let Inst{21-20} = offset{10-9}; let Inst{7-1} = offset{8-2}; } @@ -2019,9 +2022,10 @@ multiclass MemOpi_bitPats ; + def : Pat <(stOp (OpNode (ldOp (addrPred IntRegs:$addr, extPred:$offset)), + immPred:$bitend), + (addrPred (i32 IntRegs:$addr), extPred:$offset)), + (MI IntRegs:$addr, extPred:$offset, (xformFunc immPred:$bitend))>; } multiclass MemOpi_bitExtType { @@ -2065,9 +2069,10 @@ multiclass MemOpr_Pats { let AddedComplexity = 141 in // mem[bhw](Rs+#0) [+-&|]= Rt - def : Pat <(stOp (OpNode (ldOp addrPred:$addr), (i32 IntRegs:$addend)), - addrPred:$addr), - (MI IntRegs:$addr, #0, (i32 IntRegs:$addend) )>; + def : Pat <(stOp (OpNode (ldOp (addrPred IntRegs:$addr, extPred:$offset)), + (i32 IntRegs:$addend)), + (addrPred (i32 IntRegs:$addr), extPred:$offset)), + (MI IntRegs:$addr, extPred:$offset, (i32 IntRegs:$addend) )>; // mem[bhw](Rs+#U6:[012]) [+-&|]= Rt let AddedComplexity = 150 in diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 51318ff..7dd6e95 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -12,17 +12,17 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "misched" - #include "HexagonMachineScheduler.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/IR/Function.h" using namespace llvm; +#define DEBUG_TYPE "misched" + /// Platform specific modifications to DAG. void VLIWMachineScheduler::postprocessDAG() { - SUnit* LastSequentialCall = NULL; + SUnit* LastSequentialCall = nullptr; // Currently we only catch the situation when compare gets scheduled // before preceding call. for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { @@ -398,13 +398,13 @@ SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() { for (unsigned i = 0; Available.empty(); ++i) { assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && "permanent hazard"); (void)i; - ResourceModel->reserveResources(0); + ResourceModel->reserveResources(nullptr); bumpCycle(); releasePending(); } if (Available.size() == 1) return *Available.begin(); - return NULL; + return nullptr; } #ifndef NDEBUG @@ -424,7 +424,7 @@ void ConvergingVLIWScheduler::traceCandidate(const char *Label, /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor /// of SU, return it, otherwise return null. static SUnit *getSingleUnscheduledPred(SUnit *SU) { - SUnit *OnlyAvailablePred = 0; + SUnit *OnlyAvailablePred = nullptr; for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { SUnit &Pred = *I->getSUnit(); @@ -432,7 +432,7 @@ static SUnit *getSingleUnscheduledPred(SUnit *SU) { // We found an available, but not scheduled, predecessor. If it's the // only one we have found, keep track of it... otherwise give up. if (OnlyAvailablePred && OnlyAvailablePred != &Pred) - return 0; + return nullptr; OnlyAvailablePred = &Pred; } } @@ -442,7 +442,7 @@ static SUnit *getSingleUnscheduledPred(SUnit *SU) { /// getSingleUnscheduledSucc - If there is exactly one unscheduled successor /// of SU, return it, otherwise return null. static SUnit *getSingleUnscheduledSucc(SUnit *SU) { - SUnit *OnlyAvailableSucc = 0; + SUnit *OnlyAvailableSucc = nullptr; for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { SUnit &Succ = *I->getSUnit(); @@ -450,7 +450,7 @@ static SUnit *getSingleUnscheduledSucc(SUnit *SU) { // We found an available, but not scheduled, successor. If it's the // only one we have found, keep track of it... otherwise give up. if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ) - return 0; + return nullptr; OnlyAvailableSucc = &Succ; } } @@ -639,7 +639,7 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) { if (DAG->top() == DAG->bottom()) { assert(Top.Available.empty() && Top.Pending.empty() && Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); - return NULL; + return nullptr; } SUnit *SU; if (llvm::ForceTopDown) { diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h index 300f1c7..99100a1 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.h +++ b/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -14,7 +14,6 @@ #ifndef HEXAGONASMPRINTER_H #define HEXAGONASMPRINTER_H -#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -57,7 +56,7 @@ class VLIWResourceModel { public: VLIWResourceModel(const TargetMachine &TM, const TargetSchedModel *SM) : SchedModel(SM), TotalPackets(0) { - ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM,NULL); + ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM, nullptr); // This hard requirement could be relaxed, // but for now do not let it proceed. @@ -94,8 +93,9 @@ VLIWResourceModel(const TargetMachine &TM, const TargetSchedModel *SM) : /// top-level schedule() driver. class VLIWMachineScheduler : public ScheduleDAGMILive { public: - VLIWMachineScheduler(MachineSchedContext *C, MachineSchedStrategy *S): - ScheduleDAGMILive(C, S) {} + VLIWMachineScheduler(MachineSchedContext *C, + std::unique_ptr S) + : ScheduleDAGMILive(C, std::move(S)) {} /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's /// time to do some work. @@ -120,7 +120,7 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy { // Best scheduling cost. int SCost; - SchedCandidate(): SU(NULL), SCost(0) {} + SchedCandidate(): SU(nullptr), SCost(0) {} }; /// Represent the type of SchedCandidate found within a single queue. enum CandResult { @@ -153,9 +153,9 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy { /// Pending queues extend the ready queues with the same ID and the /// PendingFlag set. VLIWSchedBoundary(unsigned ID, const Twine &Name): - DAG(0), SchedModel(0), Available(ID, Name+".A"), + DAG(nullptr), SchedModel(nullptr), Available(ID, Name+".A"), Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P"), - CheckPending(false), HazardRec(0), ResourceModel(0), + CheckPending(false), HazardRec(nullptr), ResourceModel(nullptr), CurrCycle(0), IssueCount(0), MinReadyCycle(UINT_MAX), MaxMinLatency(0) {} @@ -203,8 +203,9 @@ public: LogMaxQID = 2 }; - ConvergingVLIWScheduler(): - DAG(0), SchedModel(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + ConvergingVLIWScheduler() + : DAG(nullptr), SchedModel(nullptr), Top(TopQID, "TopQ"), + Bot(BotQID, "BotQ") {} virtual void initialize(ScheduleDAGMI *dag) override; diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index 3e238bf..b7c03a7 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -21,7 +21,6 @@ // // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hexagon-nvj" #include "llvm/PassSupport.h" #include "Hexagon.h" #include "HexagonInstrInfo.h" @@ -47,6 +46,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "hexagon-nvj" + STATISTIC(NumNVJGenerated, "Number of New Value Jump Instructions created"); static cl::opt @@ -74,16 +75,16 @@ namespace { initializeHexagonNewValueJumpPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } - const char *getPassName() const { + const char *getPassName() const override { return "Hexagon NewValueJump"; } - virtual bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; private: /// \brief A handle to the branch probability pass. @@ -393,8 +394,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { bool MO2IsKill = false; MachineBasicBlock::iterator jmpPos; MachineBasicBlock::iterator cmpPos; - MachineInstr *cmpInstr = NULL, *jmpInstr = NULL; - MachineBasicBlock *jmpTarget = NULL; + MachineInstr *cmpInstr = nullptr, *jmpInstr = nullptr; + MachineBasicBlock *jmpTarget = nullptr; bool afterRA = false; bool isSecondOpReg = false; bool isSecondOpNewified = false; diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp index 5490ecd..48b6159 100644 --- a/lib/Target/Hexagon/HexagonPeephole.cpp +++ b/lib/Target/Hexagon/HexagonPeephole.cpp @@ -35,7 +35,6 @@ //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hexagon-peephole" #include "Hexagon.h" #include "HexagonTargetMachine.h" #include "llvm/ADT/DenseMap.h" @@ -57,6 +56,8 @@ using namespace llvm; +#define DEBUG_TYPE "hexagon-peephole" + static cl::opt DisableHexagonPeephole("disable-hexagon-peephole", cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Disable Peephole Optimization")); @@ -89,13 +90,13 @@ namespace { initializeHexagonPeepholePass(*PassRegistry::getPassRegistry()); } - bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - const char *getPassName() const { + const char *getPassName() const override { return "Hexagon optimize redundant zero and size extends"; } - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 9a20dfd..fb466d3 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -43,13 +43,12 @@ HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st) Subtarget(st) { } -const uint16_t* HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction - *MF) - const { - static const uint16_t CalleeSavedRegsV2[] = { +const MCPhysReg * +HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + static const MCPhysReg CalleeSavedRegsV2[] = { Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 }; - static const uint16_t CalleeSavedRegsV3[] = { + static const MCPhysReg CalleeSavedRegsV3[] = { Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19, Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0 diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h index 89af7c3..648b4af 100644 --- a/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -48,16 +48,17 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo { HexagonRegisterInfo(HexagonSubtarget &st); /// Code Generation virtual methods... - const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const MCPhysReg * + getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; - const TargetRegisterClass* const* getCalleeSavedRegClasses( - const MachineFunction *MF = 0) const; + const TargetRegisterClass* const* + getCalleeSavedRegClasses(const MachineFunction *MF = nullptr) const; - BitVector getReservedRegs(const MachineFunction &MF) const; + BitVector getReservedRegs(const MachineFunction &MF) const override; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const override; /// determineFrameLayout - Determine the size of the frame and maximum call /// frame size. @@ -65,17 +66,17 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo { /// requiresRegisterScavenging - returns true since we may need scavenging for /// a temporary register when generating hardware loop instructions. - bool requiresRegisterScavenging(const MachineFunction &MF) const { + bool requiresRegisterScavenging(const MachineFunction &MF) const override { return true; } - bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override { return true; } // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(const MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const override; unsigned getFrameRegister() const; unsigned getStackRegister() const; }; diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp index cadcb32..2b459a4 100644 --- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp +++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp @@ -33,13 +33,13 @@ namespace { HexagonRemoveExtendArgs() : FunctionPass(ID) { initializeHexagonRemoveExtendArgsPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; - const char *getPassName() const { + const char *getPassName() const override { return "Remove sign extends"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); AU.addPreserved("stack-protector"); diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td index c2cfbb9..528cafc 100644 --- a/lib/Target/Hexagon/HexagonSchedule.td +++ b/lib/Target/Hexagon/HexagonSchedule.td @@ -7,57 +7,6 @@ // //===----------------------------------------------------------------------===// -// Functional Units -def LSUNIT : FuncUnit; // SLOT0 -def LUNIT : FuncUnit; // SLOT1 -def MUNIT : FuncUnit; // SLOT2 -def SUNIT : FuncUnit; // SLOT3 -def LOOPUNIT : FuncUnit; - -// Itinerary classes -def ALU32 : InstrItinClass; -def ALU64 : InstrItinClass; -def CR : InstrItinClass; -def J : InstrItinClass; -def JR : InstrItinClass; -def LD : InstrItinClass; -def LD0 : InstrItinClass; -def M : InstrItinClass; -def ST : InstrItinClass; -def ST0 : InstrItinClass; -def S : InstrItinClass; -def SYS : InstrItinClass; -def ENDLOOP : InstrItinClass; -def PSEUDO : InstrItinClass; -def PSEUDOM : InstrItinClass; - -def HexagonItineraries : - ProcessorItineraries<[LSUNIT, LUNIT, MUNIT, SUNIT, LOOPUNIT], [], [ - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData, - InstrStage<1, [MUNIT, SUNIT]>]> - ]>; - -def HexagonModel : SchedMachineModel { - // Max issue per cycle == bundle width. - let IssueWidth = 4; - let Itineraries = HexagonItineraries; - let LoadLatency = 1; -} - //===----------------------------------------------------------------------===// // V4 Machine Info + //===----------------------------------------------------------------------===// diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td index ef72cf4..a7d2d47 100644 --- a/lib/Target/Hexagon/HexagonScheduleV4.td +++ b/lib/Target/Hexagon/HexagonScheduleV4.td @@ -34,29 +34,158 @@ def SLOT3 : FuncUnit; def SLOT_ENDLOOP: FuncUnit; // Itinerary classes. -def NV_V4 : InstrItinClass; -def MEM_V4 : InstrItinClass; +def PSEUDO : InstrItinClass; +def PSEUDOM : InstrItinClass; // ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4. +def DUPLEX : InstrItinClass; def PREFIX : InstrItinClass; +def COMPOUND : InstrItinClass; + +def ALU32_2op_tc_1_SLOT0123 : InstrItinClass; +def ALU32_2op_tc_2early_SLOT0123 : InstrItinClass; +def ALU32_3op_tc_2early_SLOT0123 : InstrItinClass; +def ALU32_3op_tc_1_SLOT0123 : InstrItinClass; +def ALU32_3op_tc_2_SLOT0123 : InstrItinClass; +def ALU32_ADDI_tc_1_SLOT0123 : InstrItinClass; +def ALU64_tc_1_SLOT23 : InstrItinClass; +def ALU64_tc_1or2_SLOT23 : InstrItinClass; +def ALU64_tc_2_SLOT23 : InstrItinClass; +def ALU64_tc_2early_SLOT23 : InstrItinClass; +def ALU64_tc_3x_SLOT23 : InstrItinClass; +def CR_tc_2_SLOT3 : InstrItinClass; +def CR_tc_2early_SLOT23 : InstrItinClass; +def CR_tc_2early_SLOT3 : InstrItinClass; +def CR_tc_3x_SLOT23 : InstrItinClass; +def CR_tc_3x_SLOT3 : InstrItinClass; +def J_tc_2early_SLOT23 : InstrItinClass; +def J_tc_2early_SLOT2 : InstrItinClass; +def LD_tc_ld_SLOT01 : InstrItinClass; +def LD_tc_ld_SLOT0 : InstrItinClass; +def LD_tc_3or4stall_SLOT0 : InstrItinClass; +def M_tc_1_SLOT23 : InstrItinClass; +def M_tc_1or2_SLOT23 : InstrItinClass; +def M_tc_2_SLOT23 : InstrItinClass; +def M_tc_3_SLOT23 : InstrItinClass; +def M_tc_3x_SLOT23 : InstrItinClass; +def M_tc_3or4x_SLOT23 : InstrItinClass; +def ST_tc_st_SLOT01 : InstrItinClass; +def ST_tc_st_SLOT0 : InstrItinClass; +def ST_tc_ld_SLOT0 : InstrItinClass; +def ST_tc_3stall_SLOT0 : InstrItinClass; +def S_2op_tc_1_SLOT23 : InstrItinClass; +def S_2op_tc_2_SLOT23 : InstrItinClass; +def S_2op_tc_2early_SLOT23 : InstrItinClass; +def S_2op_tc_3or4x_SLOT23 : InstrItinClass; +def S_3op_tc_1_SLOT23 : InstrItinClass; +def S_3op_tc_1or2_SLOT23 : InstrItinClass; +def S_3op_tc_2_SLOT23 : InstrItinClass; +def S_3op_tc_2early_SLOT23 : InstrItinClass; +def S_3op_tc_3_SLOT23 : InstrItinClass; +def S_3op_tc_3x_SLOT23 : InstrItinClass; +def NCJ_tc_3or4stall_SLOT0 : InstrItinClass; +def V2LDST_tc_ld_SLOT01 : InstrItinClass; +def V2LDST_tc_st_SLOT0 : InstrItinClass; +def V2LDST_tc_st_SLOT01 : InstrItinClass; +def V4LDST_tc_ld_SLOT01 : InstrItinClass; +def V4LDST_tc_st_SLOT0 : InstrItinClass; +def V4LDST_tc_st_SLOT01 : InstrItinClass; +def J_tc_2early_SLOT0123 : InstrItinClass; +def EXTENDER_tc_1_SLOT0123 : InstrItinClass; + def HexagonItinerariesV4 : ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [ - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, + // ALU32 + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // ALU64 + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // CR -> System + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // Jump (conditional/unconditional/return etc) + // CR + InstrItinData]>, + InstrItinData]>, + // J + InstrItinData]>, + // JR + InstrItinData]>, + + //Load + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // M + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // Store + // ST + InstrItinData]>, + // ST0 + InstrItinData]>, + InstrItinData]>, + + // S + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + // SYS + InstrItinData]>, + + // New Value Compare Jump + InstrItinData]>, + + // Mem ops - MEM_V4 + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + InstrItinData]>, + + InstrItinData]>, + + // ENDLOOP + InstrItinData]>, + + // Extender/PREFIX + InstrItinData]>, + + InstrItinData]>, InstrItinData]>, InstrItinData, InstrStage<1, [SLOT2, SLOT3]>]> diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp index c37bf9f..9e1e0fd 100644 --- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp +++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp @@ -11,10 +11,11 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hexagon-selectiondag-info" #include "HexagonTargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "hexagon-selectiondag-info" + bool llvm::flag_aligned_memcpy; HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const HexagonTargetMachine diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h index 31f278a..8ba6108 100644 --- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h +++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h @@ -25,14 +25,13 @@ public: explicit HexagonSelectionDAGInfo(const HexagonTargetMachine &TM); ~HexagonSelectionDAGInfo(); - virtual SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) const; + MachinePointerInfo SrcPtrInfo) const override; }; } diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp index 5303f44..247207f 100644 --- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp +++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp @@ -17,11 +17,10 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "xfer" - -#include "HexagonTargetMachine.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonSubtarget.h" +#include "HexagonTargetMachine.h" +#include "HexagonTargetObjectFile.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/CodeGen/MachineDominators.h" @@ -44,21 +43,22 @@ using namespace llvm; +#define DEBUG_TYPE "xfer" + namespace { class HexagonSplitConst32AndConst64 : public MachineFunctionPass { - const HexagonTargetMachine& QTM; - const HexagonSubtarget &QST; + const HexagonTargetMachine &QTM; public: static char ID; - HexagonSplitConst32AndConst64(const HexagonTargetMachine& TM) - : MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {} + HexagonSplitConst32AndConst64(const HexagonTargetMachine &TM) + : MachineFunctionPass(ID), QTM(TM) {} - const char *getPassName() const { + const char *getPassName() const override { return "Hexagon Split Const32s and Const64s"; } - bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; }; @@ -67,6 +67,12 @@ char HexagonSplitConst32AndConst64::ID = 0; bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) { + const HexagonTargetObjectFile &TLOF = + (const HexagonTargetObjectFile &) + QTM.getTargetLowering()->getObjFileLowering(); + if (TLOF.IsSmallDataEnabled()) + return true; + const TargetInstrInfo *TII = QTM.getInstrInfo(); // Loop over all of the basic blocks diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp index 8608e08..9601090 100644 --- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp +++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp @@ -26,7 +26,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "xfer" #include "Hexagon.h" #include "HexagonMachineFunctionInfo.h" #include "HexagonSubtarget.h" @@ -49,6 +48,8 @@ using namespace llvm; +#define DEBUG_TYPE "xfer" + namespace llvm { void initializeHexagonSplitTFRCondSetsPass(PassRegistry&); } @@ -67,10 +68,10 @@ class HexagonSplitTFRCondSets : public MachineFunctionPass { initializeHexagonSplitTFRCondSetsPass(*PassRegistry::getPassRegistry()); } - const char *getPassName() const { + const char *getPassName() const override { return "Hexagon Split TFRCondSets"; } - bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; }; @@ -221,7 +222,8 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) { static void initializePassOnce(PassRegistry &Registry) { const char *Name = "Hexagon Split TFRCondSets"; PassInfo *PI = new PassInfo(Name, "hexagon-split-tfr", - &HexagonSplitTFRCondSets::ID, 0, false, false); + &HexagonSplitTFRCondSets::ID, nullptr, false, + false); Registry.registerPass(*PI, true); } diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index fca6707..70c87fa 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -18,6 +18,8 @@ #include "llvm/Support/ErrorHandling.h" using namespace llvm; +#define DEBUG_TYPE "hexagon-subtarget" + #define GET_SUBTARGETINFO_CTOR #define GET_SUBTARGETINFO_TARGET_DESC #include "HexagonGenSubtargetInfo.inc" diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index 9ce1fb8..b923764 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -52,7 +52,7 @@ extern "C" void LLVMInitializeHexagonTarget() { } static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) { - return new VLIWMachineScheduler(C, new ConvergingVLIWScheduler()); + return new VLIWMachineScheduler(C, make_unique()); } static MachineSchedRegistry @@ -79,20 +79,6 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, initAsmInfo(); } -// addPassesForOptimizations - Allow the backend (target) to add Target -// Independent Optimization passes to the Pass Manager. -bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) { - if (getOptLevel() != CodeGenOpt::None) { - PM.add(createConstantPropagationPass()); - PM.add(createLoopSimplifyPass()); - PM.add(createDeadCodeEliminationPass()); - PM.add(createConstantPropagationPass()); - PM.add(createLoopUnrollPass()); - PM.add(createLoopStrengthReducePass()); - } - return true; -} - namespace { /// Hexagon Code Generator Pass Configuration Options. class HexagonPassConfig : public TargetPassConfig { @@ -113,16 +99,16 @@ public: return getTM(); } - virtual ScheduleDAGInstrs * - createMachineScheduler(MachineSchedContext *C) const { + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { return createVLIWMachineSched(C); } - virtual bool addInstSelector(); - virtual bool addPreRegAlloc(); - virtual bool addPostRegAlloc(); - virtual bool addPreSched2(); - virtual bool addPreEmitPass(); + bool addInstSelector() override; + bool addPreRegAlloc() override; + bool addPostRegAlloc() override; + bool addPreSched2() override; + bool addPreEmitPass() override; }; } // namespace @@ -164,16 +150,12 @@ bool HexagonPassConfig::addPostRegAlloc() { bool HexagonPassConfig::addPreSched2() { const HexagonTargetMachine &TM = getHexagonTargetMachine(); - const HexagonTargetObjectFile &TLOF = - (const HexagonTargetObjectFile &)getTargetLowering()->getObjFileLowering(); addPass(createHexagonCopyToCombine()); if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); - if (!TLOF.IsSmallDataEnabled()) { - addPass(createHexagonSplitConst32AndConst64(TM)); - printAndVerify("After hexagon split const32/64 pass"); - } + addPass(createHexagonSplitConst32AndConst64(TM)); + printAndVerify("After hexagon split const32/64 pass"); return true; } diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h index cf8f9aa..70b835e 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -41,39 +41,37 @@ public: Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); - virtual const HexagonInstrInfo *getInstrInfo() const { + const HexagonInstrInfo *getInstrInfo() const override { return &InstrInfo; } - virtual const HexagonSubtarget *getSubtargetImpl() const { + const HexagonSubtarget *getSubtargetImpl() const override { return &Subtarget; } - virtual const HexagonRegisterInfo *getRegisterInfo() const { + const HexagonRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); } - virtual const InstrItineraryData* getInstrItineraryData() const { + const InstrItineraryData* getInstrItineraryData() const override { return InstrItins; } - virtual const HexagonTargetLowering* getTargetLowering() const { + const HexagonTargetLowering* getTargetLowering() const override { return &TLInfo; } - virtual const HexagonFrameLowering* getFrameLowering() const { + const HexagonFrameLowering* getFrameLowering() const override { return &FrameLowering; } - virtual const HexagonSelectionDAGInfo* getSelectionDAGInfo() const { + const HexagonSelectionDAGInfo* getSelectionDAGInfo() const override { return &TSInfo; } - virtual const DataLayout *getDataLayout() const { return &DL; } + const DataLayout *getDataLayout() const override { return &DL; } static unsigned getModuleMatchQuality(const Module &M); - // Pass Pipeline Configuration. - virtual bool addPassesForOptimizations(PassManagerBase &PM); - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; }; extern bool flag_aligned_memcpy; diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index 976ff2b..87ce960 100644 --- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -16,7 +16,6 @@ // prune the dependence. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "packets" #include "llvm/CodeGen/DFAPacketizer.h" #include "Hexagon.h" #include "HexagonMachineFunctionInfo.h" @@ -51,6 +50,8 @@ using namespace llvm; +#define DEBUG_TYPE "packets" + static cl::opt PacketizeVolatiles("hexagon-packetize-volatiles", cl::ZeroOrMore, cl::Hidden, cl::init(true), cl::desc("Allow non-solo packetization of volatile memory references")); @@ -69,7 +70,7 @@ namespace { initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry()); } - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); AU.addRequired(); @@ -79,11 +80,11 @@ namespace { MachineFunctionPass::getAnalysisUsage(AU); } - const char *getPassName() const { + const char *getPassName() const override { return "Hexagon Packetizer"; } - bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; }; char HexagonPacketizer::ID = 0; @@ -121,24 +122,25 @@ namespace { const MachineBranchProbabilityInfo *MBPI); // initPacketizerState - initialize some internal flags. - void initPacketizerState(); + void initPacketizerState() override; // ignorePseudoInstruction - Ignore bundling of pseudo instructions. - bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB); + bool ignorePseudoInstruction(MachineInstr *MI, + MachineBasicBlock *MBB) override; // isSoloInstruction - return true if instruction MI can not be packetized // with any other instruction, which means that MI itself is a packet. - bool isSoloInstruction(MachineInstr *MI); + bool isSoloInstruction(MachineInstr *MI) override; // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ // together. - bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ); + bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override; // isLegalToPruneDependencies - Is it legal to prune dependece between SUI // and SUJ. - bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ); + bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override; - MachineBasicBlock::iterator addToPacket(MachineInstr *MI); + MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override; private: bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg); bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType, @@ -390,7 +392,7 @@ static bool IsLoopN(MachineInstr *MI) { /// callee-saved register. static bool DoesModifyCalleeSavedReg(MachineInstr *MI, const TargetRegisterInfo *TRI) { - for (const uint16_t *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) { + for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) { unsigned CalleeSavedReg = *CSR; if (MI->modifiesRegister(CalleeSavedReg, TRI)) return true; @@ -603,7 +605,7 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI, // evaluate identically unsigned predRegNumSrc = 0; unsigned predRegNumDst = 0; - const TargetRegisterClass* predRegClass = NULL; + const TargetRegisterClass* predRegClass = nullptr; // Get predicate register used in the source instruction for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) { @@ -1172,7 +1174,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { // of that (IsCallDependent) function. Bug 6216 is opened for this. // unsigned DepReg = 0; - const TargetRegisterClass* RC = NULL; + const TargetRegisterClass* RC = nullptr; if (DepType == SDep::Data) { DepReg = SUJ->Succs[i].getReg(); RC = QRI->getMinimalPhysRegClass(DepReg); diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp index 33667f4..9942a60 100644 --- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp +++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "HexagonAsmPrinter.h" #include "Hexagon.h" #include "HexagonInstPrinter.h" @@ -24,6 +23,8 @@ using namespace llvm; +#define DEBUG_TYPE "asm-printer" + #define GET_INSTRUCTION_NAME #include "HexagonGenAsmWriter.inc" diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h index d0cef68..09e3f88 100644 --- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h +++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h @@ -27,7 +27,7 @@ namespace llvm { const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI), MII(MII) {} - virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; void printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot); virtual StringRef getOpcodeName(unsigned Opcode) const; void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index 8519cf3..f8be77c 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -87,70 +87,82 @@ namespace HexagonII { // Solo instructions. SoloPos = 5, SoloMask = 0x1, + // Packed only with A or X-type instructions. + SoloAXPos = 6, + SoloAXMask = 0x1, + // Only A-type instruction in first slot or nothing. + SoloAin1Pos = 7, + SoloAin1Mask = 0x1, // Predicated instructions. - PredicatedPos = 6, + PredicatedPos = 8, PredicatedMask = 0x1, - PredicatedFalsePos = 7, + PredicatedFalsePos = 9, PredicatedFalseMask = 0x1, - PredicatedNewPos = 8, + PredicatedNewPos = 10, PredicatedNewMask = 0x1, + PredicateLatePos = 11, + PredicateLateMask = 0x1, // New-Value consumer instructions. - NewValuePos = 9, + NewValuePos = 12, NewValueMask = 0x1, - // New-Value producer instructions. - hasNewValuePos = 10, + hasNewValuePos = 13, hasNewValueMask = 0x1, - // Which operand consumes or produces a new value. - NewValueOpPos = 11, + NewValueOpPos = 14, NewValueOpMask = 0x7, - - // Which bits encode the new value. - NewValueBitsPos = 14, - NewValueBitsMask = 0x3, - // Stores that can become new-value stores. - mayNVStorePos = 16, + mayNVStorePos = 17, mayNVStoreMask = 0x1, - // New-value store instructions. - NVStorePos = 17, + NVStorePos = 18, NVStoreMask = 0x1, + // Loads that can become current-value loads. + mayCVLoadPos = 19, + mayCVLoadMask = 0x1, + // Current-value load instructions. + CVLoadPos = 20, + CVLoadMask = 0x1, // Extendable insns. - ExtendablePos = 18, + ExtendablePos = 21, ExtendableMask = 0x1, - // Insns must be extended. - ExtendedPos = 19, + ExtendedPos = 22, ExtendedMask = 0x1, - // Which operand may be extended. - ExtendableOpPos = 20, + ExtendableOpPos = 23, ExtendableOpMask = 0x7, - // Signed or unsigned range. - ExtentSignedPos = 23, + ExtentSignedPos = 26, ExtentSignedMask = 0x1, - // Number of bits of range before extending operand. - ExtentBitsPos = 24, + ExtentBitsPos = 27, ExtentBitsMask = 0x1f, + // Alignment power-of-two before extending operand. + ExtentAlignPos = 32, + ExtentAlignMask = 0x3, // Valid subtargets - validSubTargetPos = 29, + validSubTargetPos = 34, validSubTargetMask = 0xf, // Addressing mode for load/store instructions. - AddrModePos = 33, + AddrModePos = 40, AddrModeMask = 0x7, + // Access size for load/store instructions. + MemAccessSizePos = 43, + MemAccesSizeMask = 0x7, + + // Branch predicted taken. + TakenPos = 47, + TakenMask = 0x1, - // Access size of memory access instructions (load/store). - MemAccessSizePos = 36, - MemAccesSizeMask = 0x7 + // Floating-point instructions. + FPPos = 48, + FPMask = 0x1 }; // *** The code above must match HexagonInstrFormat*.td *** // diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp index f1a65c3..141e514 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp @@ -21,7 +21,7 @@ void HexagonMCAsmInfo::anchor() {} HexagonMCAsmInfo::HexagonMCAsmInfo(StringRef TT) { Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; - Data64bitsDirective = 0; // .xword is only supported by V9. + Data64bitsDirective = nullptr; // .xword is only supported by V9. ZeroDirective = "\t.skip\t"; CommentString = "//"; HasLEB128 = true; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h index bd8cb76..953d804 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h @@ -19,7 +19,7 @@ namespace llvm { class HexagonMCAsmInfo : public MCAsmInfoELF { - virtual void anchor(); + void anchor() override; public: explicit HexagonMCAsmInfo(StringRef TT); }; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h index 3ca71f0..3c52d45 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h @@ -31,7 +31,7 @@ namespace llvm { public: explicit HexagonMCInst(): - MCInst(), MCID(0), packetStart(0), packetEnd(0) {}; + MCInst(), MCID(nullptr), packetStart(0), packetEnd(0) {}; HexagonMCInst(const MCInstrDesc& mcid): MCInst(), MCID(&mcid), packetStart(0), packetEnd(0) {}; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index 7f103d8..581674d 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -23,6 +23,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + #define GET_INSTRINFO_MC_DESC #include "HexagonGenInstrInfo.inc" @@ -32,8 +34,6 @@ #define GET_REGINFO_MC_DESC #include "HexagonGenRegisterInfo.inc" -using namespace llvm; - static MCInstrInfo *createHexagonMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitHexagonMCInstrInfo(X); @@ -60,7 +60,7 @@ static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI, // VirtualFP = (R30 + #0). MCCFIInstruction Inst = MCCFIInstruction::createDefCfa( - 0, Hexagon::R30, 0); + nullptr, Hexagon::R30, 0); MAI->addInitialFrameState(Inst); return MAI; diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index 13abaf8..1b0837c 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = AArch64 ARM ARM64 CppBackend Hexagon MSP430 NVPTX Mips PowerPC R600 Sparc SystemZ X86 XCore +subdirectories = ARM AArch64 CppBackend Hexagon MSP430 NVPTX Mips PowerPC R600 Sparc SystemZ X86 XCore ; This is a special group whose required libraries are extended (by llvm-build) ; with the best execution engine (the native JIT, if available, or the diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp index 4b12aea..acf1214 100644 --- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "MSP430InstPrinter.h" #include "MSP430.h" #include "llvm/MC/MCAsmInfo.h" @@ -21,6 +20,8 @@ #include "llvm/Support/FormattedStream.h" using namespace llvm; +#define DEBUG_TYPE "asm-printer" + // Include the auto-generated portion of the assembly writer. #include "MSP430GenAsmWriter.inc" @@ -44,7 +45,7 @@ void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo, void MSP430InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, const char *Modifier) { - assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + assert((Modifier == nullptr || Modifier[0] == 0) && "No modifiers supported"); const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { O << getRegisterName(Op.getReg()); diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h index d32eb3a..5afbd20 100644 --- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h @@ -25,17 +25,17 @@ namespace llvm { const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} - virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, - const char *Modifier = 0); + const char *Modifier = nullptr); void printPCRelImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O, - const char *Modifier = 0); + const char *Modifier = nullptr); void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); }; diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h index a7e0e58..ef805bb 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h @@ -20,7 +20,7 @@ namespace llvm { class StringRef; class MSP430MCAsmInfo : public MCAsmInfoELF { - virtual void anchor(); + void anchor() override; public: explicit MSP430MCAsmInfo(StringRef TT); }; diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp index 530e6aa..72adb45 100644 --- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp +++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp @@ -20,6 +20,8 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + #define GET_INSTRINFO_MC_DESC #include "MSP430GenInstrInfo.inc" @@ -29,8 +31,6 @@ #define GET_REGINFO_MC_DESC #include "MSP430GenRegisterInfo.inc" -using namespace llvm; - static MCInstrInfo *createMSP430MCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitMSP430MCInstrInfo(X); @@ -66,7 +66,7 @@ static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) return new MSP430InstPrinter(MAI, MII, MRI); - return 0; + return nullptr; } extern "C" void LLVMInitializeMSP430TargetMC() { diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp index 91065d8..22a973e 100644 --- a/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "MSP430.h" #include "InstPrinter/MSP430InstPrinter.h" #include "MSP430InstrInfo.h" @@ -35,27 +34,29 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "asm-printer" + namespace { class MSP430AsmPrinter : public AsmPrinter { public: MSP430AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : AsmPrinter(TM, Streamer) {} - virtual const char *getPassName() const { + const char *getPassName() const override { return "MSP430 Assembly Printer"; } void printOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O, const char* Modifier = 0); + raw_ostream &O, const char* Modifier = nullptr); void printSrcMemOperand(const MachineInstr *MI, int OpNum, raw_ostream &O); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); + raw_ostream &O) override; bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O); - void EmitInstruction(const MachineInstr *MI); + const char *ExtraCode, raw_ostream &O) override; + void EmitInstruction(const MachineInstr *MI) override; }; } // end of anonymous namespace diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp index f128427..a96930a 100644 --- a/lib/Target/MSP430/MSP430BranchSelector.cpp +++ b/lib/Target/MSP430/MSP430BranchSelector.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "msp430-branch-select" #include "MSP430.h" #include "MSP430InstrInfo.h" #include "llvm/ADT/Statistic.h" @@ -25,6 +24,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "msp430-branch-select" + STATISTIC(NumExpanded, "Number of branches expanded to long format"); namespace { @@ -35,9 +36,9 @@ namespace { /// BlockSizes - The sizes of the basic blocks in the function. std::vector BlockSizes; - virtual bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "MSP430 Branch Selector"; } }; diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp index ce078a3..82c8b29 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.cpp +++ b/lib/Target/MSP430/MSP430FrameLowering.cpp @@ -242,7 +242,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // alignment boundary. Amount = (Amount+StackAlign-1)/StackAlign*StackAlign; - MachineInstr *New = 0; + MachineInstr *New = nullptr; if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) { New = BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri), MSP430::SPW) diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h index 8370714..d464dd9 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.h +++ b/lib/Target/MSP430/MSP430FrameLowering.h @@ -32,26 +32,26 @@ public: /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void emitPrologue(MachineFunction &MF) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const; + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const override; - bool hasFP(const MachineFunction &MF) const; - bool hasReservedCallFrame(const MachineFunction &MF) const; + bool hasFP(const MachineFunction &MF) const override; + bool hasReservedCallFrame(const MachineFunction &MF) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const override; }; } // End llvm namespace diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 4152829..a9b9035 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -31,6 +31,8 @@ #include "llvm/Target/TargetLowering.h" using namespace llvm; +#define DEBUG_TYPE "msp430-isel" + namespace { struct MSP430ISelAddressMode { enum { @@ -52,17 +54,17 @@ namespace { unsigned Align; // CP alignment. MSP430ISelAddressMode() - : BaseType(RegBase), Disp(0), GV(0), CP(0), BlockAddr(0), - ES(0), JT(-1), Align(0) { + : BaseType(RegBase), Disp(0), GV(nullptr), CP(nullptr), + BlockAddr(nullptr), ES(nullptr), JT(-1), Align(0) { } bool hasSymbolicDisplacement() const { - return GV != 0 || CP != 0 || ES != 0 || JT != -1; + return GV != nullptr || CP != nullptr || ES != nullptr || JT != -1; } void dump() { errs() << "MSP430ISelAddressMode " << this << '\n'; - if (BaseType == RegBase && Base.Reg.getNode() != 0) { + if (BaseType == RegBase && Base.Reg.getNode() != nullptr) { errs() << "Base.Reg "; Base.Reg.getNode()->dump(); } else if (BaseType == FrameIndexBase) { @@ -99,7 +101,7 @@ namespace { Lowering(*TM.getTargetLowering()), Subtarget(*TM.getSubtargetImpl()) { } - virtual const char *getPassName() const { + const char *getPassName() const override { return "MSP430 DAG->DAG Pattern Instruction Selection"; } @@ -107,15 +109,14 @@ namespace { bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM); bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM); - virtual bool - SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, - std::vector &OutOps); + bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, + std::vector &OutOps) override; // Include the pieces autogenerated from the target description. #include "MSP430GenDAGISel.inc" private: - SDNode *Select(SDNode *N); + SDNode *Select(SDNode *N) override; SDNode *SelectIndexedLoad(SDNode *Op); SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2, unsigned Opc8, unsigned Opc16); @@ -199,7 +200,7 @@ bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) { case ISD::FrameIndex: if (AM.BaseType == MSP430ISelAddressMode::RegBase - && AM.Base.Reg.getNode() == 0) { + && AM.Base.Reg.getNode() == nullptr) { AM.BaseType = MSP430ISelAddressMode::FrameIndexBase; AM.Base.FrameIndex = cast(N)->getIndex(); return false; @@ -228,7 +229,7 @@ bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) { // Start with the LHS as an addr mode. if (!MatchAddress(N.getOperand(0), AM) && // Address could not have picked a GV address for the displacement. - AM.GV == NULL && + AM.GV == nullptr && // Check to see if the LHS & C is zero. CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { AM.Disp += Offset; @@ -330,7 +331,7 @@ static bool isValidIndexedLoad(const LoadSDNode *LD) { SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDNode *N) { LoadSDNode *LD = cast(N); if (!isValidIndexedLoad(LD)) - return NULL; + return nullptr; MVT VT = LD->getMemoryVT().getSimpleVT(); @@ -343,7 +344,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDNode *N) { Opcode = MSP430::MOV16rm_POST; break; default: - return NULL; + return nullptr; } return CurDAG->getMachineNode(Opcode, SDLoc(N), @@ -359,7 +360,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op, IsLegalToFold(N1, Op, Op, OptLevel)) { LoadSDNode *LD = cast(N1); if (!isValidIndexedLoad(LD)) - return NULL; + return nullptr; MVT VT = LD->getMemoryVT().getSimpleVT(); unsigned Opc = (VT == MVT::i16 ? Opc16 : Opc8); @@ -367,9 +368,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op, MemRefs0[0] = cast(N1)->getMemOperand(); SDValue Ops0[] = { N2, LD->getBasePtr(), LD->getChain() }; SDNode *ResNode = - CurDAG->SelectNodeTo(Op, Opc, - VT, MVT::i16, MVT::Other, - Ops0, 3); + CurDAG->SelectNodeTo(Op, Opc, VT, MVT::i16, MVT::Other, Ops0); cast(ResNode)->setMemRefs(MemRefs0, MemRefs0 + 1); // Transfer chain. ReplaceUses(SDValue(N1.getNode(), 2), SDValue(ResNode, 2)); @@ -378,7 +377,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op, return ResNode; } - return NULL; + return nullptr; } @@ -396,7 +395,7 @@ SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) { Node->dump(CurDAG); errs() << "\n"); Node->setNodeId(-1); - return NULL; + return nullptr; } // Few custom selection stuff. @@ -484,7 +483,7 @@ SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) { SDNode *ResNode = SelectCode(Node); DEBUG(errs() << "=> "); - if (ResNode == NULL || ResNode == Node) + if (ResNode == nullptr || ResNode == Node) DEBUG(Node->dump(CurDAG)); else DEBUG(ResNode->dump(CurDAG)); diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index fe163d4..c5901bc 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "msp430-lower" - #include "MSP430ISelLowering.h" #include "MSP430.h" #include "MSP430MachineFunctionInfo.h" @@ -38,6 +36,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "msp430-lower" + typedef enum { NoHWMult, HWMultIntr, @@ -284,7 +284,7 @@ template static void AnalyzeArguments(CCState &State, SmallVectorImpl &ArgLocs, const SmallVectorImpl &Args) { - static const uint16_t RegList[] = { + static const MCPhysReg RegList[] = { MSP430::R15W, MSP430::R14W, MSP430::R13W, MSP430::R12W }; static const unsigned NbRegs = array_lengthof(RegList); @@ -462,7 +462,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, errs() << "LowerFormalArguments Unhandled argument type: " << RegVT.getSimpleVT().SimpleTy << "\n"; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } case MVT::i16: unsigned VReg = RegInfo.createVirtualRegister(&MSP430::GR16RegClass); @@ -568,7 +568,7 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, if (Flag.getNode()) RetOps.push_back(Flag); - return DAG.getNode(Opc, dl, MVT::Other, &RetOps[0], RetOps.size()); + return DAG.getNode(Opc, dl, MVT::Other, RetOps); } /// LowerCCCCallTo - functions arguments are copied from virtual regs to @@ -629,7 +629,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, } else { assert(VA.isMemLoc()); - if (StackPtr.getNode() == 0) + if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SPW, getPointerTy()); SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), @@ -659,8 +659,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Transform all store nodes into one single node because all store nodes are // independent of each other. if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain and // flag operands which copy the outgoing args into registers. The InFlag in @@ -695,7 +694,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, if (InFlag.getNode()) Ops.push_back(InFlag); - Chain = DAG.getNode(MSP430ISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(MSP430ISD::CALL, dl, NodeTys, Ops); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. @@ -986,7 +985,7 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { Ops.push_back(Zero); Ops.push_back(TargetCC); Ops.push_back(Flag); - return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size()); + return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, Ops); } } @@ -1009,7 +1008,7 @@ SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, Ops.push_back(TargetCC); Ops.push_back(Flag); - return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size()); + return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, Ops); } SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op, @@ -1148,7 +1147,7 @@ bool MSP430TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return NULL; + default: return nullptr; case MSP430ISD::RET_FLAG: return "MSP430ISD::RET_FLAG"; case MSP430ISD::RETI_FLAG: return "MSP430ISD::RETI_FLAG"; case MSP430ISD::RRA: return "MSP430ISD::RRA"; diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 85a861e..3ced61d 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -73,14 +73,14 @@ namespace llvm { public: explicit MSP430TargetLowering(MSP430TargetMachine &TM); - virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i8; } + MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; } /// LowerOperation - Provide custom lowering hooks for some operations. - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; /// getTargetNodeName - This method returns the name of a target specific /// DAG node. - virtual const char *getTargetNodeName(unsigned Opcode) const; + const char *getTargetNodeName(unsigned Opcode) const override; SDValue LowerShifts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; @@ -97,15 +97,16 @@ namespace llvm { SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; TargetLowering::ConstraintType - getConstraintType(const std::string &Constraint) const; + getConstraintType(const std::string &Constraint) const override; std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; + getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const override; /// isTruncateFree - Return true if it's free to truncate a value of type /// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in /// register R15W to i8 by referencing its sub-register R15B. - virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; - virtual bool isTruncateFree(EVT VT1, EVT VT2) const; + bool isTruncateFree(Type *Ty1, Type *Ty2) const override; + bool isTruncateFree(EVT VT1, EVT VT2) const override; /// isZExtFree - Return true if any actual instruction that defines a value /// of type Ty1 implicit zero-extends the value to Ty2 in the result @@ -115,12 +116,12 @@ namespace llvm { /// necessarily apply to truncate instructions. e.g. on msp430, all /// instructions that define 8-bit values implicit zero-extend the result /// out to 16 bits. - virtual bool isZExtFree(Type *Ty1, Type *Ty2) const; - virtual bool isZExtFree(EVT VT1, EVT VT2) const; - virtual bool isZExtFree(SDValue Val, EVT VT2) const; + bool isZExtFree(Type *Ty1, Type *Ty2) const override; + bool isZExtFree(EVT VT1, EVT VT2) const override; + bool isZExtFree(SDValue Val, EVT VT2) const override; MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const; + MachineBasicBlock *BB) const override; MachineBasicBlock* EmitShiftInstr(MachineInstr *MI, MachineBasicBlock *BB) const; @@ -148,28 +149,27 @@ namespace llvm { SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; - virtual SDValue + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - virtual SDValue + SmallVectorImpl &InVals) const override; + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const; - - virtual SDValue - LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - SDLoc dl, SelectionDAG &DAG) const; - - virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, - SDValue &Base, - SDValue &Offset, - ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const; + SmallVectorImpl &InVals) const override; + + SDValue LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + SDLoc dl, SelectionDAG &DAG) const override; + + bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const override; const MSP430Subtarget &Subtarget; const DataLayout *TD; diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 38f73b9..0c04ddb 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -22,11 +22,11 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + #define GET_INSTRINFO_CTOR_DTOR #include "MSP430GenInstrInfo.inc" -using namespace llvm; - // Pin the vtable to this file. void MSP430InstrInfo::anchor() {} @@ -208,11 +208,11 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, while (std::next(I) != MBB.end()) std::next(I)->eraseFromParent(); Cond.clear(); - FBB = 0; + FBB = nullptr; // Delete the JMP if it's equivalent to a fall-through. if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { - TBB = 0; + TBB = nullptr; I->eraseFromParent(); I = MBB.end(); continue; diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index ad2b8cc..1ffcebb 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -50,40 +50,41 @@ public: /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; } + const TargetRegisterInfo &getRegisterInfo() const { return RI; } void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, - int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + bool KillSrc) const override; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, + int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; unsigned GetInstSizeInBytes(const MachineInstr *MI) const; // Branch folding goodness - bool ReverseBranchCondition(SmallVectorImpl &Cond) const; - bool isUnpredicatedTerminator(const MachineInstr *MI) const; + bool + ReverseBranchCondition(SmallVectorImpl &Cond) const override; + bool isUnpredicatedTerminator(const MachineInstr *MI) const override; bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, - bool AllowModify) const; + bool AllowModify) const override; - unsigned RemoveBranch(MachineBasicBlock &MBB) const; + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl &Cond, - DebugLoc DL) const; + DebugLoc DL) const override; }; diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index f64017e..341fb64 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "msp430-reg-info" - #include "MSP430RegisterInfo.h" #include "MSP430.h" #include "MSP430MachineFunctionInfo.h" @@ -26,38 +24,40 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +using namespace llvm; + +#define DEBUG_TYPE "msp430-reg-info" + #define GET_REGINFO_TARGET_DESC #include "MSP430GenRegisterInfo.inc" -using namespace llvm; - // FIXME: Provide proper call frame setup / destroy opcodes. MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm) : MSP430GenRegisterInfo(MSP430::PCW), TM(tm) { StackAlign = TM.getFrameLowering()->getStackAlignment(); } -const uint16_t* +const MCPhysReg* MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const TargetFrameLowering *TFI = MF->getTarget().getFrameLowering(); const Function* F = MF->getFunction(); - static const uint16_t CalleeSavedRegs[] = { + static const MCPhysReg CalleeSavedRegs[] = { MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W, MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W, 0 }; - static const uint16_t CalleeSavedRegsFP[] = { + static const MCPhysReg CalleeSavedRegsFP[] = { MSP430::R5W, MSP430::R6W, MSP430::R7W, MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W, 0 }; - static const uint16_t CalleeSavedRegsIntr[] = { + static const MCPhysReg CalleeSavedRegsIntr[] = { MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W, MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W, MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W, 0 }; - static const uint16_t CalleeSavedRegsIntrFP[] = { + static const MCPhysReg CalleeSavedRegsIntrFP[] = { MSP430::R5W, MSP430::R6W, MSP430::R7W, MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W, MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W, diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index 78047cc..a607528 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -35,18 +35,20 @@ public: MSP430RegisterInfo(MSP430TargetMachine &tm); /// Code Generation virtual methods... - const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const MCPhysReg * + getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; - BitVector getReservedRegs(const MachineFunction &MF) const; + BitVector getReservedRegs(const MachineFunction &MF) const override; const TargetRegisterClass* - getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const; + getPointerRegClass(const MachineFunction &MF, + unsigned Kind = 0) const override; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const override; // Debug information queries. - unsigned getFrameRegister(const MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const override; }; } // end namespace llvm diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp index 24f45fa..c700383 100644 --- a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp +++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp @@ -11,10 +11,11 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "msp430-selectiondag-info" #include "MSP430TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "msp430-selectiondag-info" + MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const MSP430TargetMachine &TM) : TargetSelectionDAGInfo(TM) { } diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp index edeaf34..68ad091 100644 --- a/lib/Target/MSP430/MSP430Subtarget.cpp +++ b/lib/Target/MSP430/MSP430Subtarget.cpp @@ -15,12 +15,14 @@ #include "MSP430.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +#define DEBUG_TYPE "msp430-subtarget" + #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "MSP430GenSubtargetInfo.inc" -using namespace llvm; - void MSP430Subtarget::anchor() { } MSP430Subtarget::MSP430Subtarget(const std::string &TT, diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 98a6003..50be2be 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -51,8 +51,8 @@ public: return getTM(); } - virtual bool addInstSelector(); - virtual bool addPreEmitPass(); + bool addInstSelector() override; + bool addPreEmitPass() override; }; } // namespace diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index be695a2..ea5d407 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -43,25 +43,25 @@ public: Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); - virtual const TargetFrameLowering *getFrameLowering() const { + const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; } - virtual const MSP430InstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const DataLayout *getDataLayout() const { return &DL;} - virtual const MSP430Subtarget *getSubtargetImpl() const { return &Subtarget; } + const MSP430InstrInfo *getInstrInfo() const override { return &InstrInfo; } + const DataLayout *getDataLayout() const override { return &DL;} + const MSP430Subtarget *getSubtargetImpl() const override { return &Subtarget; } - virtual const TargetRegisterInfo *getRegisterInfo() const { + const TargetRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); } - virtual const MSP430TargetLowering *getTargetLowering() const { + const MSP430TargetLowering *getTargetLowering() const override { return &TLInfo; } - virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const { + const MSP430SelectionDAGInfo* getSelectionDAGInfo() const override { return &TSInfo; } - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; }; // MSP430TargetMachine. } // end namespace llvm diff --git a/lib/Target/Mips/Android.mk b/lib/Target/Mips/Android.mk index 74b8a3b..4e8831c 100644 --- a/lib/Target/Mips/Android.mk +++ b/lib/Target/Mips/Android.mk @@ -24,6 +24,7 @@ mips_codegen_SRC_FILES := \ MipsCodeEmitter.cpp \ MipsConstantIslandPass.cpp \ MipsDelaySlotFiller.cpp \ + MipsFastISel.cpp \ MipsFrameLowering.cpp \ MipsInstrInfo.cpp \ MipsISelDAGToDAG.cpp \ diff --git a/lib/Target/Mips/AsmParser/LLVMBuild.txt b/lib/Target/Mips/AsmParser/LLVMBuild.txt index e7ca243..dd8e3cf 100644 --- a/lib/Target/Mips/AsmParser/LLVMBuild.txt +++ b/lib/Target/Mips/AsmParser/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = MipsAsmParser parent = Mips -required_libraries = MC MCParser Support MipsDesc MipsInfo +required_libraries = MC MCParser MipsDesc MipsInfo Support add_to_library_groups = Mips diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 911a119..86fd386 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -29,6 +29,8 @@ using namespace llvm; +#define DEBUG_TYPE "mips-asm-parser" + namespace llvm { class MCInstrInfo; } @@ -73,10 +75,10 @@ class MipsAsmParser : public MCTargetAsmParser { bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl &Operands, MCStreamer &Out, unsigned &ErrorInfo, - bool MatchingInlineAsm); + bool MatchingInlineAsm) override; /// Parse a register as used in CFI directives - bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; bool ParseParenSuffix(StringRef Name, SmallVectorImpl &Operands); @@ -84,11 +86,11 @@ class MipsAsmParser : public MCTargetAsmParser { bool ParseBracketSuffix(StringRef Name, SmallVectorImpl &Operands); - bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, - SMLoc NameLoc, - SmallVectorImpl &Operands); + bool + ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, + SmallVectorImpl &Operands) override; - bool ParseDirective(AsmToken DirectiveID); + bool ParseDirective(AsmToken DirectiveID) override; MipsAsmParser::OperandMatchResultTy parseMemOperand(SmallVectorImpl &Operands); @@ -135,6 +137,7 @@ class MipsAsmParser : public MCTargetAsmParser { SmallVectorImpl &Instructions, bool isLoad, bool isImmOpnd); bool reportParseError(StringRef ErrorMsg); + bool reportParseError(SMLoc Loc, StringRef ErrorMsg); bool parseMemOffset(const MCExpr *&Res, bool isParenExpr); bool parseRelocOperand(const MCExpr *&Res); @@ -143,7 +146,9 @@ class MipsAsmParser : public MCTargetAsmParser { bool isEvaluated(const MCExpr *Expr); bool parseSetFeature(uint64_t Feature); + bool parseDirectiveCPLoad(SMLoc Loc); bool parseDirectiveCPSetup(); + bool parseDirectiveNaN(); bool parseDirectiveSet(); bool parseDirectiveOption(); @@ -212,21 +217,22 @@ class MipsAsmParser : public MCTargetAsmParser { void setFeatureBits(unsigned Feature, StringRef FeatureString) { if (!(STI.getFeatureBits() & Feature)) { - setAvailableFeatures(ComputeAvailableFeatures( - STI.ToggleFeature(FeatureString))); + setAvailableFeatures( + ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); } } void clearFeatureBits(unsigned Feature, StringRef FeatureString) { if (STI.getFeatureBits() & Feature) { - setAvailableFeatures(ComputeAvailableFeatures( - STI.ToggleFeature(FeatureString))); + setAvailableFeatures( + ComputeAvailableFeatures(STI.ToggleFeature(FeatureString))); } } public: MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser, - const MCInstrInfo &MII) + const MCInstrInfo &MII, + const MCTargetOptions &Options) : MCTargetAsmParser(), STI(sti), Parser(parser) { // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); @@ -266,11 +272,12 @@ public: /// context). RegKind_CCR = 128, /// CCR RegKind_HWRegs = 256, /// HWRegs + RegKind_COP3 = 512, /// COP3 /// Potentially any (e.g. $1) RegKind_Numeric = RegKind_GPR | RegKind_FGR | RegKind_FCC | RegKind_MSA128 | RegKind_MSACtrl | RegKind_COP2 | RegKind_ACC | - RegKind_CCR | RegKind_HWRegs + RegKind_CCR | RegKind_HWRegs | RegKind_COP3 }; private: @@ -422,6 +429,14 @@ private: return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); } + /// Coerce the register to COP3 and return the real register for the + /// current target. + unsigned getCOP3Reg() const { + assert(isRegIdx() && (RegIdx.Kind & RegKind_COP3) && "Invalid access!"); + unsigned ClassID = Mips::COP3RegClassID; + return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index); + } + /// Coerce the register to ACC64DSP and return the real register for the /// current target. unsigned getACC64DSPReg() const { @@ -465,7 +480,7 @@ private: public: void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediate when possible. Null MCExpr = 0. - if (Expr == 0) + if (!Expr) Inst.addOperand(MCOperand::CreateImm(0)); else if (const MCConstantExpr *CE = dyn_cast(Expr)) Inst.addOperand(MCOperand::CreateImm(CE->getValue())); @@ -533,6 +548,11 @@ public: Inst.addOperand(MCOperand::CreateReg(getCOP2Reg())); } + void addCOP3AsmRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getCOP3Reg())); + } + void addACC64DSPAsmRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getACC64DSPReg())); @@ -573,7 +593,7 @@ public: addExpr(Inst, Expr); } - bool isReg() const { + bool isReg() const override { // As a special case until we sort out the definition of div/divu, pretend // that $0/$zero are k_PhysRegister so that MCK_ZERO works correctly. if (isGPRAsmReg() && RegIdx.Index == 0) @@ -582,16 +602,16 @@ public: return Kind == k_PhysRegister; } bool isRegIdx() const { return Kind == k_RegisterIndex; } - bool isImm() const { return Kind == k_Immediate; } + bool isImm() const override { return Kind == k_Immediate; } bool isConstantImm() const { return isImm() && dyn_cast(getImm()); } - bool isToken() const { + bool isToken() const override { // Note: It's not possible to pretend that other operand kinds are tokens. // The matcher emitter checks tokens first. return Kind == k_Token; } - bool isMem() const { return Kind == k_Memory; } + bool isMem() const override { return Kind == k_Memory; } bool isInvNum() const { return Kind == k_Immediate; } bool isLSAImm() const { if (!isConstantImm()) @@ -605,7 +625,7 @@ public: return StringRef(Tok.Data, Tok.Length); } - unsigned getReg() const { + unsigned getReg() const override { // As a special case until we sort out the definition of div/divu, pretend // that $0/$zero are k_PhysRegister so that MCK_ZERO works correctly. if (Kind == k_RegisterIndex && RegIdx.Index == 0 && @@ -744,6 +764,9 @@ public: bool isCOP2AsmReg() const { return isRegIdx() && RegIdx.Kind & RegKind_COP2 && RegIdx.Index <= 31; } + bool isCOP3AsmReg() const { + return isRegIdx() && RegIdx.Kind & RegKind_COP3 && RegIdx.Index <= 31; + } bool isMSA128AsmReg() const { return isRegIdx() && RegIdx.Kind & RegKind_MSA128 && RegIdx.Index <= 31; } @@ -752,11 +775,25 @@ public: } /// getStartLoc - Get the location of the first token of this operand. - SMLoc getStartLoc() const { return StartLoc; } + SMLoc getStartLoc() const override { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. - SMLoc getEndLoc() const { return EndLoc; } + SMLoc getEndLoc() const override { return EndLoc; } - virtual void print(raw_ostream &OS) const { + virtual ~MipsOperand() { + switch (Kind) { + case k_Immediate: + break; + case k_Memory: + delete Mem.Base; + break; + case k_PhysRegister: + case k_RegisterIndex: + case k_Token: + break; + } + } + + void print(raw_ostream &OS) const override { switch (Kind) { case k_Immediate: OS << "Imm<"; @@ -906,10 +943,6 @@ bool MipsAsmParser::needsExpansion(MCInst &Inst) { case Mips::LoadImm32Reg: case Mips::LoadAddr32Imm: case Mips::LoadAddr32Reg: - case Mips::SUBi: - case Mips::SUBiu: - case Mips::DSUBi: - case Mips::DSUBiu: return true; default: return false; @@ -925,30 +958,6 @@ void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc, return expandLoadAddressImm(Inst, IDLoc, Instructions); case Mips::LoadAddr32Reg: return expandLoadAddressReg(Inst, IDLoc, Instructions); - case Mips::SUBi: - Instructions.push_back(MCInstBuilder(Mips::ADDi) - .addReg(Inst.getOperand(0).getReg()) - .addReg(Inst.getOperand(1).getReg()) - .addImm(-Inst.getOperand(2).getImm())); - return; - case Mips::SUBiu: - Instructions.push_back(MCInstBuilder(Mips::ADDiu) - .addReg(Inst.getOperand(0).getReg()) - .addReg(Inst.getOperand(1).getReg()) - .addImm(-Inst.getOperand(2).getImm())); - return; - case Mips::DSUBi: - Instructions.push_back(MCInstBuilder(Mips::DADDi) - .addReg(Inst.getOperand(0).getReg()) - .addReg(Inst.getOperand(1).getReg()) - .addImm(-Inst.getOperand(2).getImm())); - return; - case Mips::DSUBiu: - Instructions.push_back(MCInstBuilder(Mips::DADDiu) - .addReg(Inst.getOperand(0).getReg()) - .addReg(Inst.getOperand(1).getReg()) - .addImm(-Inst.getOperand(2).getImm())); - return; } } @@ -1586,6 +1595,8 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, RegNo = isGP64() ? Operand.getGPR64Reg() : Operand.getGPR32Reg(); } + delete &Operand; + return (RegNo == (unsigned)-1); } @@ -1624,7 +1635,7 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) { MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( SmallVectorImpl &Operands) { DEBUG(dbgs() << "parseMemOperand\n"); - const MCExpr *IdVal = 0; + const MCExpr *IdVal = nullptr; SMLoc S; bool isParenExpr = false; MipsAsmParser::OperandMatchResultTy Res = MatchOperand_NoMatch; @@ -1654,6 +1665,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); // Zero register assumed, add a memory operand with ZERO as its base. + // "Base" will be managed by k_Memory. MipsOperand *Base = MipsOperand::CreateGPRReg( 0, getContext().getRegisterInfo(), S, E, *this); Operands.push_back(MipsOperand::CreateMem(Base, IdVal, S, E, *this)); @@ -1679,12 +1691,13 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( Parser.Lex(); // Eat the ')' token. - if (IdVal == 0) + if (!IdVal) IdVal = MCConstantExpr::Create(0, getContext()); // Replace the register operand with the memory operand. MipsOperand *op = static_cast(Operands.back()); // Remove the register from the operands. + // "op" will be managed by k_Memory. Operands.pop_back(); // Add the memory operand. if (const MCBinaryExpr *BE = dyn_cast(IdVal)) { @@ -1969,9 +1982,11 @@ MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) { .Case("call_lo", MCSymbolRefExpr::VK_Mips_CALL_LO16) .Case("higher", MCSymbolRefExpr::VK_Mips_HIGHER) .Case("highest", MCSymbolRefExpr::VK_Mips_HIGHEST) + .Case("pcrel_hi", MCSymbolRefExpr::VK_Mips_PCREL_HI16) + .Case("pcrel_lo", MCSymbolRefExpr::VK_Mips_PCREL_LO16) .Default(MCSymbolRefExpr::VK_None); - assert (VK != MCSymbolRefExpr::VK_None); + assert(VK != MCSymbolRefExpr::VK_None); return VK; } @@ -2089,6 +2104,10 @@ bool MipsAsmParser::reportParseError(StringRef ErrorMsg) { return Error(Loc, ErrorMsg); } +bool MipsAsmParser::reportParseError(SMLoc Loc, StringRef ErrorMsg) { + return Error(Loc, ErrorMsg); +} + bool MipsAsmParser::parseSetNoAtDirective() { // Line should look like: ".set noat". // set at reg to 0. @@ -2248,29 +2267,30 @@ bool MipsAsmParser::parseSetFeature(uint64_t Feature) { if (getLexer().isNot(AsmToken::EndOfStatement)) return reportParseError("unexpected token in .set directive"); - switch(Feature) { - default: llvm_unreachable("Unimplemented feature"); - case Mips::FeatureDSP: - setFeatureBits(Mips::FeatureDSP, "dsp"); - getTargetStreamer().emitDirectiveSetDsp(); + switch (Feature) { + default: + llvm_unreachable("Unimplemented feature"); + case Mips::FeatureDSP: + setFeatureBits(Mips::FeatureDSP, "dsp"); + getTargetStreamer().emitDirectiveSetDsp(); break; - case Mips::FeatureMicroMips: - getTargetStreamer().emitDirectiveSetMicroMips(); + case Mips::FeatureMicroMips: + getTargetStreamer().emitDirectiveSetMicroMips(); break; - case Mips::FeatureMips16: - getTargetStreamer().emitDirectiveSetMips16(); + case Mips::FeatureMips16: + getTargetStreamer().emitDirectiveSetMips16(); break; - case Mips::FeatureMips32r2: - setFeatureBits(Mips::FeatureMips32r2, "mips32r2"); - getTargetStreamer().emitDirectiveSetMips32R2(); + case Mips::FeatureMips32r2: + setFeatureBits(Mips::FeatureMips32r2, "mips32r2"); + getTargetStreamer().emitDirectiveSetMips32R2(); break; - case Mips::FeatureMips64: - setFeatureBits(Mips::FeatureMips64, "mips64"); - getTargetStreamer().emitDirectiveSetMips64(); + case Mips::FeatureMips64: + setFeatureBits(Mips::FeatureMips64, "mips64"); + getTargetStreamer().emitDirectiveSetMips64(); break; - case Mips::FeatureMips64r2: - setFeatureBits(Mips::FeatureMips64r2, "mips64r2"); - getTargetStreamer().emitDirectiveSetMips64R2(); + case Mips::FeatureMips64r2: + setFeatureBits(Mips::FeatureMips64r2, "mips64r2"); + getTargetStreamer().emitDirectiveSetMips64R2(); break; } return false; @@ -2302,10 +2322,34 @@ bool MipsAsmParser::eatComma(StringRef ErrorStr) { return Error(Loc, ErrorStr); } - Parser.Lex(); // Eat the comma. + Parser.Lex(); // Eat the comma. return true; } +bool MipsAsmParser::parseDirectiveCPLoad(SMLoc Loc) { + if (Options.isReorder()) + Warning(Loc, ".cpload in reorder section"); + + // FIXME: Warn if cpload is used in Mips16 mode. + + SmallVector Reg; + OperandMatchResultTy ResTy = ParseAnyRegister(Reg); + if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) { + reportParseError("expected register containing function address"); + return false; + } + + MipsOperand *RegOpnd = static_cast(Reg[0]); + if (!RegOpnd->isGPRAsmReg()) { + reportParseError(RegOpnd->getStartLoc(), "invalid register"); + return false; + } + + getTargetStreamer().emitDirectiveCpload(RegOpnd->getGPR32Reg()); + delete RegOpnd; + return false; +} + bool MipsAsmParser::parseDirectiveCPSetup() { unsigned FuncReg; unsigned Save; @@ -2336,60 +2380,28 @@ bool MipsAsmParser::parseDirectiveCPSetup() { if (Parser.parseIdentifier(Name)) reportParseError("expected identifier"); MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); - unsigned GPReg = getGPR(matchCPURegisterName("gp")); - // FIXME: The code below this point should be in the TargetStreamers. - // Only N32 and N64 emit anything for .cpsetup - // FIXME: We should only emit something for PIC mode too. - if (!isN32() && !isN64()) - return false; + getTargetStreamer().emitDirectiveCpsetup(FuncReg, Save, *Sym, SaveIsReg); + return false; +} - MCStreamer &TS = getStreamer(); - MCInst Inst; - // Either store the old $gp in a register or on the stack - if (SaveIsReg) { - // move $save, $gpreg - Inst.setOpcode(Mips::DADDu); - Inst.addOperand(MCOperand::CreateReg(Save)); - Inst.addOperand(MCOperand::CreateReg(GPReg)); - Inst.addOperand(MCOperand::CreateReg(getGPR(0))); - } else { - // sd $gpreg, offset($sp) - Inst.setOpcode(Mips::SD); - Inst.addOperand(MCOperand::CreateReg(GPReg)); - Inst.addOperand(MCOperand::CreateReg(getGPR(matchCPURegisterName("sp")))); - Inst.addOperand(MCOperand::CreateImm(Save)); - } - TS.EmitInstruction(Inst, STI); - Inst.clear(); - - const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create( - Sym->getName(), MCSymbolRefExpr::VK_Mips_GPOFF_HI, - getContext()); - const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::Create( - Sym->getName(), MCSymbolRefExpr::VK_Mips_GPOFF_LO, - getContext()); - // lui $gp, %hi(%neg(%gp_rel(funcSym))) - Inst.setOpcode(Mips::LUi); - Inst.addOperand(MCOperand::CreateReg(GPReg)); - Inst.addOperand(MCOperand::CreateExpr(HiExpr)); - TS.EmitInstruction(Inst, STI); - Inst.clear(); - - // addiu $gp, $gp, %lo(%neg(%gp_rel(funcSym))) - Inst.setOpcode(Mips::ADDiu); - Inst.addOperand(MCOperand::CreateReg(GPReg)); - Inst.addOperand(MCOperand::CreateReg(GPReg)); - Inst.addOperand(MCOperand::CreateExpr(LoExpr)); - TS.EmitInstruction(Inst, STI); - Inst.clear(); - - // daddu $gp, $gp, $funcreg - Inst.setOpcode(Mips::DADDu); - Inst.addOperand(MCOperand::CreateReg(GPReg)); - Inst.addOperand(MCOperand::CreateReg(GPReg)); - Inst.addOperand(MCOperand::CreateReg(FuncReg)); - TS.EmitInstruction(Inst, STI); +bool MipsAsmParser::parseDirectiveNaN() { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + const AsmToken &Tok = Parser.getTok(); + + if (Tok.getString() == "2008") { + Parser.Lex(); + getTargetStreamer().emitDirectiveNaN2008(); + return false; + } else if (Tok.getString() == "legacy") { + Parser.Lex(); + getTargetStreamer().emitDirectiveNaNLegacy(); + return false; + } + } + // If we don't recognize the option passed to the .nan + // directive (e.g. no option or unknown option), emit an error. + reportParseError("invalid option in .nan directive"); return false; } @@ -2419,15 +2431,15 @@ bool MipsAsmParser::parseDirectiveSet() { Parser.eatToEndOfStatement(); return false; } else if (Tok.getString() == "micromips") { - return parseSetFeature(Mips::FeatureMicroMips); + return parseSetFeature(Mips::FeatureMicroMips); } else if (Tok.getString() == "mips32r2") { - return parseSetFeature(Mips::FeatureMips32r2); + return parseSetFeature(Mips::FeatureMips32r2); } else if (Tok.getString() == "mips64") { - return parseSetFeature(Mips::FeatureMips64); + return parseSetFeature(Mips::FeatureMips64); } else if (Tok.getString() == "mips64r2") { - return parseSetFeature(Mips::FeatureMips64r2); + return parseSetFeature(Mips::FeatureMips64r2); } else if (Tok.getString() == "dsp") { - return parseSetFeature(Mips::FeatureDSP); + return parseSetFeature(Mips::FeatureDSP); } else { // It is just an identifier, look for an assignment. parseSetAssignment(); @@ -2537,6 +2549,8 @@ bool MipsAsmParser::parseDirectiveOption() { bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); + if (IDVal == ".cpload") + return parseDirectiveCPLoad(DirectiveID.getLoc()); if (IDVal == ".dword") { parseDataDirective(8, DirectiveID.getLoc()); return false; @@ -2576,6 +2590,9 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { return false; } + if (IDVal == ".nan") + return parseDirectiveNaN(); + if (IDVal == ".gpword") { parseDirectiveGpWord(); return false; diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index c304ee3..bf67d71 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -7,6 +7,7 @@ tablegen(LLVM MipsGenCodeEmitter.inc -gen-emitter) tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM MipsGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM MipsGenFastISel.inc -gen-fast-isel) tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv) tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM MipsGenAsmMatcher.inc -gen-asm-matcher) @@ -26,6 +27,7 @@ add_llvm_target(MipsCodeGen MipsCodeEmitter.cpp MipsConstantIslandPass.cpp MipsDelaySlotFiller.cpp + MipsFastISel.cpp MipsJITInfo.cpp MipsInstrInfo.cpp MipsISelDAGToDAG.cpp diff --git a/lib/Target/Mips/Disassembler/LLVMBuild.txt b/lib/Target/Mips/Disassembler/LLVMBuild.txt index 7101c06..bb70fd3 100644 --- a/lib/Target/Mips/Disassembler/LLVMBuild.txt +++ b/lib/Target/Mips/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = MipsDisassembler parent = Mips -required_libraries = MC Support MipsInfo +required_libraries = MC MipsInfo Support add_to_library_groups = Mips diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index fc3b922..95670aa 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -14,6 +14,7 @@ #include "Mips.h" #include "MipsRegisterInfo.h" #include "MipsSubtarget.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" @@ -24,6 +25,8 @@ using namespace llvm; +#define DEBUG_TYPE "mips-disassembler" + typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { @@ -33,19 +36,16 @@ class MipsDisassemblerBase : public MCDisassembler { public: /// Constructor - Initializes the disassembler. /// - MipsDisassemblerBase(const MCSubtargetInfo &STI, const MCRegisterInfo *Info, + MipsDisassemblerBase(const MCSubtargetInfo &STI, MCContext &Ctx, bool bigEndian) : - MCDisassembler(STI), RegInfo(Info), + MCDisassembler(STI, Ctx), IsN64(STI.getFeatureBits() & Mips::FeatureN64), isBigEndian(bigEndian) {} virtual ~MipsDisassemblerBase() {} - const MCRegisterInfo *getRegInfo() const { return RegInfo.get(); } - bool isN64() const { return IsN64; } private: - OwningPtr RegInfo; bool IsN64; protected: bool isBigEndian; @@ -57,19 +57,23 @@ class MipsDisassembler : public MipsDisassemblerBase { public: /// Constructor - Initializes the disassembler. /// - MipsDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info, + MipsDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, bool bigEndian) : - MipsDisassemblerBase(STI, Info, bigEndian) { + MipsDisassemblerBase(STI, Ctx, bigEndian) { IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips; } + bool isMips32r6() const { + return STI.getFeatureBits() & Mips::FeatureMips32r6; + } + /// getInstruction - See MCDisassembler. - virtual DecodeStatus getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const; + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const override; }; @@ -78,17 +82,17 @@ class Mips64Disassembler : public MipsDisassemblerBase { public: /// Constructor - Initializes the disassembler. /// - Mips64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info, + Mips64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx, bool bigEndian) : - MipsDisassemblerBase(STI, Info, bigEndian) {} + MipsDisassemblerBase(STI, Ctx, bigEndian) {} /// getInstruction - See MCDisassembler. - virtual DecodeStatus getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const; + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const override; }; } // end anonymous namespace @@ -195,6 +199,11 @@ static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, uint64_t Address, @@ -205,6 +214,16 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeBranchTarget21(MCInst &Inst, + unsigned Offset, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeBranchTarget26(MCInst &Inst, + unsigned Offset, + uint64_t Address, + const void *Decoder); + // DecodeBranchTargetMM - Decode microMIPS branch offset, which is // shifted left by 1 bit. static DecodeStatus DecodeBranchTargetMM(MCInst &Inst, @@ -263,11 +282,40 @@ static DecodeStatus DecodeExtSize(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + /// INSVE_[BHWD] have an implicit operand that the generated decoder doesn't /// handle. template static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address, const void *Decoder); + +template +static DecodeStatus +DecodeAddiGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, + const void *Decoder); + +template +static DecodeStatus +DecodeDaddiGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, + const void *Decoder); + +template +static DecodeStatus +DecodeBlezlGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, + const void *Decoder); + +template +static DecodeStatus +DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, + const void *Decoder); + +template +static DecodeStatus +DecodeBgtzGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, + const void *Decoder); + namespace llvm { extern Target TheMipselTarget, TheMipsTarget, TheMips64Target, TheMips64elTarget; @@ -275,26 +323,30 @@ extern Target TheMipselTarget, TheMipsTarget, TheMips64Target, static MCDisassembler *createMipsDisassembler( const Target &T, - const MCSubtargetInfo &STI) { - return new MipsDisassembler(STI, T.createMCRegInfo(""), true); + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new MipsDisassembler(STI, Ctx, true); } static MCDisassembler *createMipselDisassembler( const Target &T, - const MCSubtargetInfo &STI) { - return new MipsDisassembler(STI, T.createMCRegInfo(""), false); + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new MipsDisassembler(STI, Ctx, false); } static MCDisassembler *createMips64Disassembler( const Target &T, - const MCSubtargetInfo &STI) { - return new Mips64Disassembler(STI, T.createMCRegInfo(""), true); + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new Mips64Disassembler(STI, Ctx, true); } static MCDisassembler *createMips64elDisassembler( const Target &T, - const MCSubtargetInfo &STI) { - return new Mips64Disassembler(STI, T.createMCRegInfo(""), false); + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new Mips64Disassembler(STI, Ctx, false); } extern "C" void LLVMInitializeMipsDisassembler() { @@ -311,6 +363,12 @@ extern "C" void LLVMInitializeMipsDisassembler() { #include "MipsGenDisassemblerTables.inc" +static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { + const MipsDisassemblerBase *Dis = static_cast(D); + const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo(); + return *(RegInfo->getRegClass(RC).begin() + RegNo); +} + template static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address, const void *Decoder) { @@ -357,6 +415,202 @@ static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address, return MCDisassembler::Success; } +template +static DecodeStatus DecodeAddiGroupBranch(MCInst &MI, InsnType insn, + uint64_t Address, + const void *Decoder) { + // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled + // (otherwise we would have matched the ADDI instruction from the earlier + // ISA's instead). + // + // We have: + // 0b001000 sssss ttttt iiiiiiiiiiiiiiii + // BOVC if rs >= rt + // BEQZALC if rs == 0 && rt != 0 + // BEQC if rs < rt && rs != 0 + + InsnType Rs = fieldFromInstruction(insn, 21, 5); + InsnType Rt = fieldFromInstruction(insn, 16, 5); + InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2; + bool HasRs = false; + + if (Rs >= Rt) { + MI.setOpcode(Mips::BOVC); + HasRs = true; + } else if (Rs != 0 && Rs < Rt) { + MI.setOpcode(Mips::BEQC); + HasRs = true; + } else + MI.setOpcode(Mips::BEQZALC); + + if (HasRs) + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rs))); + + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rt))); + MI.addOperand(MCOperand::CreateImm(Imm)); + + return MCDisassembler::Success; +} + +template +static DecodeStatus DecodeDaddiGroupBranch(MCInst &MI, InsnType insn, + uint64_t Address, + const void *Decoder) { + // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled + // (otherwise we would have matched the ADDI instruction from the earlier + // ISA's instead). + // + // We have: + // 0b011000 sssss ttttt iiiiiiiiiiiiiiii + // BNVC if rs >= rt + // BNEZALC if rs == 0 && rt != 0 + // BNEC if rs < rt && rs != 0 + + InsnType Rs = fieldFromInstruction(insn, 21, 5); + InsnType Rt = fieldFromInstruction(insn, 16, 5); + InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2; + bool HasRs = false; + + if (Rs >= Rt) { + MI.setOpcode(Mips::BNVC); + HasRs = true; + } else if (Rs != 0 && Rs < Rt) { + MI.setOpcode(Mips::BNEC); + HasRs = true; + } else + MI.setOpcode(Mips::BNEZALC); + + if (HasRs) + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rs))); + + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rt))); + MI.addOperand(MCOperand::CreateImm(Imm)); + + return MCDisassembler::Success; +} + +template +static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn, + uint64_t Address, + const void *Decoder) { + // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled + // (otherwise we would have matched the BLEZL instruction from the earlier + // ISA's instead). + // + // We have: + // 0b010110 sssss ttttt iiiiiiiiiiiiiiii + // Invalid if rs == 0 + // BLEZC if rs == 0 && rt != 0 + // BGEZC if rs == rt && rt != 0 + // BGEC if rs != rt && rs != 0 && rt != 0 + + InsnType Rs = fieldFromInstruction(insn, 21, 5); + InsnType Rt = fieldFromInstruction(insn, 16, 5); + InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2; + + if (Rt == 0) + return MCDisassembler::Fail; + else if (Rs == 0) + MI.setOpcode(Mips::BLEZC); + else if (Rs == Rt) + MI.setOpcode(Mips::BGEZC); + else + return MCDisassembler::Fail; // FIXME: BGEC is not implemented yet. + + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rt))); + + MI.addOperand(MCOperand::CreateImm(Imm)); + + return MCDisassembler::Success; +} + +template +static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn, + uint64_t Address, + const void *Decoder) { + // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled + // (otherwise we would have matched the BGTZL instruction from the earlier + // ISA's instead). + // + // We have: + // 0b010111 sssss ttttt iiiiiiiiiiiiiiii + // Invalid if rs == 0 + // BGTZC if rs == 0 && rt != 0 + // BLTZC if rs == rt && rt != 0 + // BLTC if rs != rt && rs != 0 && rt != 0 + + InsnType Rs = fieldFromInstruction(insn, 21, 5); + InsnType Rt = fieldFromInstruction(insn, 16, 5); + InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2; + + if (Rt == 0) + return MCDisassembler::Fail; + else if (Rs == 0) + MI.setOpcode(Mips::BGTZC); + else if (Rs == Rt) + MI.setOpcode(Mips::BLTZC); + else + return MCDisassembler::Fail; // FIXME: BLTC is not implemented yet. + + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rt))); + + MI.addOperand(MCOperand::CreateImm(Imm)); + + return MCDisassembler::Success; +} + +template +static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn, + uint64_t Address, + const void *Decoder) { + // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled + // (otherwise we would have matched the BGTZ instruction from the earlier + // ISA's instead). + // + // We have: + // 0b000111 sssss ttttt iiiiiiiiiiiiiiii + // BGTZ if rt == 0 + // BGTZALC if rs == 0 && rt != 0 + // BLTZALC if rs != 0 && rs == rt + // BLTUC if rs != 0 && rs != rt + + InsnType Rs = fieldFromInstruction(insn, 21, 5); + InsnType Rt = fieldFromInstruction(insn, 16, 5); + InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2; + bool HasRs = false; + bool HasRt = false; + + if (Rt == 0) { + MI.setOpcode(Mips::BGTZ); + HasRs = true; + } else if (Rs == 0) { + MI.setOpcode(Mips::BGTZALC); + HasRt = true; + } else if (Rs == Rt) { + MI.setOpcode(Mips::BLTZALC); + HasRs = true; + } else + return MCDisassembler::Fail; // BLTUC not implemented yet + + if (HasRs) + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rs))); + + if (HasRt) + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rt))); + + MI.addOperand(MCOperand::CreateImm(Imm)); + + return MCDisassembler::Success; +} + /// readInstruction - read four bytes from the MemoryObject /// and return 32 bit word sorted according to the given endianess static DecodeStatus readInstruction32(const MemoryObject ®ion, @@ -426,6 +680,15 @@ MipsDisassembler::getInstruction(MCInst &instr, return MCDisassembler::Fail; } + if (isMips32r6()) { + Result = decodeInstruction(DecoderTableMips32r6_64r632, instr, Insn, + Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + } + // Calling the auto-generated decoder function. Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address, this, STI); @@ -469,11 +732,6 @@ Mips64Disassembler::getInstruction(MCInst &instr, return MCDisassembler::Fail; } -static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { - const MipsDisassemblerBase *Dis = static_cast(D); - return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo); -} - static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -828,12 +1086,23 @@ static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, Mips::COP2RegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeBranchTarget(MCInst &Inst, unsigned Offset, uint64_t Address, const void *Decoder) { - unsigned BranchOffset = Offset & 0xffff; - BranchOffset = SignExtend32<18>(BranchOffset << 2) + 4; + int32_t BranchOffset = (SignExtend32<16>(Offset) << 2) + 4; Inst.addOperand(MCOperand::CreateImm(BranchOffset)); return MCDisassembler::Success; } @@ -848,12 +1117,31 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeBranchTarget21(MCInst &Inst, + unsigned Offset, + uint64_t Address, + const void *Decoder) { + int32_t BranchOffset = SignExtend32<21>(Offset) << 2; + + Inst.addOperand(MCOperand::CreateImm(BranchOffset)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeBranchTarget26(MCInst &Inst, + unsigned Offset, + uint64_t Address, + const void *Decoder) { + int32_t BranchOffset = SignExtend32<26>(Offset) << 2; + + Inst.addOperand(MCOperand::CreateImm(BranchOffset)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeBranchTargetMM(MCInst &Inst, unsigned Offset, uint64_t Address, const void *Decoder) { - unsigned BranchOffset = Offset & 0xffff; - BranchOffset = SignExtend32<18>(BranchOffset << 1); + int32_t BranchOffset = SignExtend32<16>(Offset) << 1; Inst.addOperand(MCOperand::CreateImm(BranchOffset)); return MCDisassembler::Success; } @@ -903,3 +1191,9 @@ static DecodeStatus DecodeExtSize(MCInst &Inst, Inst.addOperand(MCOperand::CreateImm(SignExtend32<16>(Size))); return MCDisassembler::Success; } + +static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(SignExtend32<19>(Insn) << 2)); + return MCDisassembler::Success; +} diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index c8f08f1..8c79751 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "MipsInstPrinter.h" #include "MCTargetDesc/MipsMCExpr.h" #include "MipsInstrInfo.h" @@ -24,6 +23,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "asm-printer" + #define PRINT_ALIAS_INSTR #include "MipsGenAsmWriter.inc" @@ -165,6 +166,8 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) { case MCSymbolRefExpr::VK_Mips_GOT_LO16: OS << "%got_lo("; break; case MCSymbolRefExpr::VK_Mips_CALL_HI16: OS << "%call_hi("; break; case MCSymbolRefExpr::VK_Mips_CALL_LO16: OS << "%call_lo("; break; + case MCSymbolRefExpr::VK_Mips_PCREL_HI16: OS << "%pcrel_hi("; break; + case MCSymbolRefExpr::VK_Mips_PCREL_LO16: OS << "%pcrel_lo("; break; } OS << SRE->getSymbol(); diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h index 2b745f0..550a0f1 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h @@ -85,10 +85,12 @@ public: void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); - virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + void printRegName(raw_ostream &OS, unsigned RegNo) const override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; bool printAliasInstr(const MCInst *MI, raw_ostream &OS); + void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, + unsigned PrintMethodIdx, raw_ostream &O); private: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 0f99ecc..5375a00 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -31,7 +31,7 @@ using namespace llvm; // Prepare value for the target space for it static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, - MCContext *Ctx = NULL) { + MCContext *Ctx = nullptr) { unsigned Kind = Fixup.getKind(); @@ -56,6 +56,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case Mips::fixup_MICROMIPS_GOT_PAGE: case Mips::fixup_MICROMIPS_GOT_OFST: case Mips::fixup_MICROMIPS_GOT_DISP: + case Mips::fixup_MIPS_PCLO16: break; case Mips::fixup_Mips_PC16: // So far we are only using this type for branches. @@ -80,6 +81,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case Mips::fixup_Mips_GOT_HI16: case Mips::fixup_Mips_CALL_HI16: case Mips::fixup_MICROMIPS_HI16: + case Mips::fixup_MIPS_PCHI16: // Get the 2nd 16-bits. Also add 1 if bit 15 is 1. Value = ((Value + 0x8000) >> 16) & 0xffff; break; @@ -102,6 +104,22 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, if (!isIntN(16, Value) && Ctx) Ctx->FatalError(Fixup.getLoc(), "out of range PC16 fixup"); break; + case Mips::fixup_MIPS_PC21_S2: + Value -= 4; + // Forcing a signed division because Value can be negative. + Value = (int64_t) Value / 4; + // We now check if Value can be encoded as a 21-bit signed immediate. + if (!isIntN(21, Value) && Ctx) + Ctx->FatalError(Fixup.getLoc(), "out of range PC21 fixup"); + break; + case Mips::fixup_MIPS_PC26_S2: + Value -= 4; + // Forcing a signed division because Value can be negative. + Value = (int64_t) Value / 4; + // We now check if Value can be encoded as a 26-bit signed immediate. + if (!isIntN(26, Value) && Ctx) + Ctx->FatalError(Fixup.getLoc(), "out of range PC26 fixup"); + break; } return Value; @@ -189,7 +207,7 @@ void MipsAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, const MCFixupKindInfo &MipsAsmBackend:: getFixupKindInfo(MCFixupKind Kind) const { - const static MCFixupKindInfo Infos[Mips::NumTargetFixupKinds] = { + const static MCFixupKindInfo LittleEndianInfos[Mips::NumTargetFixupKinds] = { // This table *must* be in same the order of fixup_* kinds in // MipsFixupKinds.h. // @@ -229,6 +247,10 @@ getFixupKindInfo(MCFixupKind Kind) const { { "fixup_Mips_GOT_LO16", 0, 16, 0 }, { "fixup_Mips_CALL_HI16", 0, 16, 0 }, { "fixup_Mips_CALL_LO16", 0, 16, 0 }, + { "fixup_MIPS_PC21_S2", 0, 21, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_MIPS_PC26_S2", 0, 26, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_MIPS_PCHI16", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_MIPS_PCLO16", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MICROMIPS_26_S1", 0, 26, 0 }, { "fixup_MICROMIPS_HI16", 0, 16, 0 }, { "fixup_MICROMIPS_LO16", 0, 16, 0 }, @@ -246,12 +268,76 @@ getFixupKindInfo(MCFixupKind Kind) const { { "fixup_MICROMIPS_TLS_TPREL_LO16", 0, 16, 0 } }; + const static MCFixupKindInfo BigEndianInfos[Mips::NumTargetFixupKinds] = { + // This table *must* be in same the order of fixup_* kinds in + // MipsFixupKinds.h. + // + // name offset bits flags + { "fixup_Mips_16", 16, 16, 0 }, + { "fixup_Mips_32", 0, 32, 0 }, + { "fixup_Mips_REL32", 0, 32, 0 }, + { "fixup_Mips_26", 6, 26, 0 }, + { "fixup_Mips_HI16", 16, 16, 0 }, + { "fixup_Mips_LO16", 16, 16, 0 }, + { "fixup_Mips_GPREL16", 16, 16, 0 }, + { "fixup_Mips_LITERAL", 16, 16, 0 }, + { "fixup_Mips_GOT_Global", 16, 16, 0 }, + { "fixup_Mips_GOT_Local", 16, 16, 0 }, + { "fixup_Mips_PC16", 16, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Mips_CALL16", 16, 16, 0 }, + { "fixup_Mips_GPREL32", 0, 32, 0 }, + { "fixup_Mips_SHIFT5", 21, 5, 0 }, + { "fixup_Mips_SHIFT6", 21, 5, 0 }, + { "fixup_Mips_64", 0, 64, 0 }, + { "fixup_Mips_TLSGD", 16, 16, 0 }, + { "fixup_Mips_GOTTPREL", 16, 16, 0 }, + { "fixup_Mips_TPREL_HI", 16, 16, 0 }, + { "fixup_Mips_TPREL_LO", 16, 16, 0 }, + { "fixup_Mips_TLSLDM", 16, 16, 0 }, + { "fixup_Mips_DTPREL_HI", 16, 16, 0 }, + { "fixup_Mips_DTPREL_LO", 16, 16, 0 }, + { "fixup_Mips_Branch_PCRel",16, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Mips_GPOFF_HI", 16, 16, 0 }, + { "fixup_Mips_GPOFF_LO", 16, 16, 0 }, + { "fixup_Mips_GOT_PAGE", 16, 16, 0 }, + { "fixup_Mips_GOT_OFST", 16, 16, 0 }, + { "fixup_Mips_GOT_DISP", 16, 16, 0 }, + { "fixup_Mips_HIGHER", 16, 16, 0 }, + { "fixup_Mips_HIGHEST", 16, 16, 0 }, + { "fixup_Mips_GOT_HI16", 16, 16, 0 }, + { "fixup_Mips_GOT_LO16", 16, 16, 0 }, + { "fixup_Mips_CALL_HI16", 16, 16, 0 }, + { "fixup_Mips_CALL_LO16", 16, 16, 0 }, + { "fixup_MIPS_PC21_S2", 11, 21, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_MIPS_PC26_S2", 6, 26, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_MIPS_PCHI16", 16, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_MIPS_PCLO16", 16, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_MICROMIPS_26_S1", 6, 26, 0 }, + { "fixup_MICROMIPS_HI16", 16, 16, 0 }, + { "fixup_MICROMIPS_LO16", 16, 16, 0 }, + { "fixup_MICROMIPS_GOT16", 16, 16, 0 }, + { "fixup_MICROMIPS_PC16_S1",16, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_MICROMIPS_CALL16", 16, 16, 0 }, + { "fixup_MICROMIPS_GOT_DISP", 16, 16, 0 }, + { "fixup_MICROMIPS_GOT_PAGE", 16, 16, 0 }, + { "fixup_MICROMIPS_GOT_OFST", 16, 16, 0 }, + { "fixup_MICROMIPS_TLS_GD", 16, 16, 0 }, + { "fixup_MICROMIPS_TLS_LDM", 16, 16, 0 }, + { "fixup_MICROMIPS_TLS_DTPREL_HI16", 16, 16, 0 }, + { "fixup_MICROMIPS_TLS_DTPREL_LO16", 16, 16, 0 }, + { "fixup_MICROMIPS_TLS_TPREL_HI16", 16, 16, 0 }, + { "fixup_MICROMIPS_TLS_TPREL_LO16", 16, 16, 0 } + }; + if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); - return Infos[Kind - FirstTargetFixupKind]; + + if (IsLittle) + return LittleEndianInfos[Kind - FirstTargetFixupKind]; + return BigEndianInfos[Kind - FirstTargetFixupKind]; } /// WriteNopData - Write an (optimal) nop sequence of Count bytes diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index cc5207a..bc695e6 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -37,14 +37,14 @@ public: : MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle), Is64Bit(_is64Bit) {} - MCObjectWriter *createObjectWriter(raw_ostream &OS) const; + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override; void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel) const; + uint64_t Value, bool IsPCRel) const override; - const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const; + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; - unsigned getNumFixupKinds() const { + unsigned getNumFixupKinds() const override { return Mips::NumTargetFixupKinds; } @@ -55,7 +55,7 @@ public: /// relaxation. /// /// \param Inst - The instruction to test. - bool mayNeedRelaxation(const MCInst &Inst) const { + bool mayNeedRelaxation(const MCInst &Inst) const override { return false; } @@ -63,7 +63,7 @@ public: /// fixup requires the associated instruction to be relaxed. bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const { + const MCAsmLayout &Layout) const override { // FIXME. assert(0 && "RelaxInstruction() unimplemented"); return false; @@ -75,16 +75,16 @@ public: /// \param Inst - The instruction to relax, which may be the same /// as the output. /// \param [out] Res On return, the relaxed instruction. - void relaxInstruction(const MCInst &Inst, MCInst &Res) const {} + void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {} /// @} - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const; + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFixup &Fixup, const MCFragment *DF, const MCValue &Target, uint64_t &Value, - bool &IsResolved); + bool &IsResolved) override; }; // class MipsAsmBackend diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 794978b..74c12ff 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -193,6 +193,18 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, case Mips::fixup_MICROMIPS_TLS_TPREL_LO16: Type = ELF::R_MICROMIPS_TLS_TPREL_LO16; break; + case Mips::fixup_MIPS_PC21_S2: + Type = ELF::R_MIPS_PC21_S2; + break; + case Mips::fixup_MIPS_PC26_S2: + Type = ELF::R_MIPS_PC26_S2; + break; + case Mips::fixup_MIPS_PCHI16: + Type = ELF::R_MIPS_PCHI16; + break; + case Mips::fixup_MIPS_PCLO16: + Type = ELF::R_MIPS_PCLO16; + break; } return Type; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index dc6192c..3079004 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -128,6 +128,18 @@ namespace Mips { // resulting in - R_MIPS_CALL_LO16 fixup_Mips_CALL_LO16, + // resulting in - R_MIPS_PC21_S2 + fixup_MIPS_PC21_S2, + + // resulting in - R_MIPS_PC26_S2 + fixup_MIPS_PC26_S2, + + // resulting in - R_MIPS_PCHI16 + fixup_MIPS_PCHI16, + + // resulting in - R_MIPS_PCLO16 + fixup_MIPS_PCLO16, + // resulting in - R_MICROMIPS_26_S1 fixup_MICROMIPS_26_S1, diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h index 1000113..37ba0c4 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h @@ -20,7 +20,7 @@ namespace llvm { class StringRef; class MipsMCAsmInfo : public MCAsmInfoELF { - virtual void anchor(); + void anchor() override; public: explicit MipsMCAsmInfo(StringRef TT); }; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index edd2146..85e0bf1 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -12,8 +12,6 @@ //===----------------------------------------------------------------------===// // -#define DEBUG_TYPE "mccodeemitter" - #include "MipsMCCodeEmitter.h" #include "MCTargetDesc/MipsFixupKinds.h" #include "MCTargetDesc/MipsMCExpr.h" @@ -28,6 +26,8 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/raw_ostream.h" +#define DEBUG_TYPE "mccodeemitter" + #define GET_INSTRMAP_INFO #include "MipsGenInstrInfo.inc" #undef GET_INSTRMAP_INFO @@ -242,6 +242,69 @@ getBranchTargetOpValueMM(const MCInst &MI, unsigned OpNo, return 0; } +/// getBranchTarget21OpValue - Return binary encoding of the branch +/// target operand. If the machine operand requires relocation, +/// record the relocation and return zero. +unsigned MipsMCCodeEmitter:: +getBranchTarget21OpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + + const MCOperand &MO = MI.getOperand(OpNo); + + // If the destination is an immediate, divide by 4. + if (MO.isImm()) return MO.getImm() >> 2; + + assert(MO.isExpr() && + "getBranchTarget21OpValue expects only expressions or immediates"); + + const MCExpr *Expr = MO.getExpr(); + Fixups.push_back(MCFixup::Create(0, Expr, + MCFixupKind(Mips::fixup_MIPS_PC21_S2))); + return 0; +} + +/// getBranchTarget26OpValue - Return binary encoding of the branch +/// target operand. If the machine operand requires relocation, +/// record the relocation and return zero. +unsigned MipsMCCodeEmitter:: +getBranchTarget26OpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + + const MCOperand &MO = MI.getOperand(OpNo); + + // If the destination is an immediate, divide by 4. + if (MO.isImm()) return MO.getImm() >> 2; + + assert(MO.isExpr() && + "getBranchTarget26OpValue expects only expressions or immediates"); + + const MCExpr *Expr = MO.getExpr(); + Fixups.push_back(MCFixup::Create(0, Expr, + MCFixupKind(Mips::fixup_MIPS_PC26_S2))); + return 0; +} + +/// getJumpOffset16OpValue - Return binary encoding of the jump +/// target operand. If the machine operand requires relocation, +/// record the relocation and return zero. +unsigned MipsMCCodeEmitter:: +getJumpOffset16OpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + + const MCOperand &MO = MI.getOperand(OpNo); + + if (MO.isImm()) return MO.getImm(); + + assert(MO.isExpr() && + "getJumpOffset16OpValue expects only expressions or an immediate"); + + // TODO: Push fixup. + return 0; +} + /// getJumpTargetOpValue - Return binary encoding of the jump /// target operand. If the machine operand requires relocation, /// record the relocation and return zero. @@ -417,6 +480,12 @@ getExprOpValue(const MCExpr *Expr,SmallVectorImpl &Fixups, case MCSymbolRefExpr::VK_Mips_CALL_LO16: FixupKind = Mips::fixup_Mips_CALL_LO16; break; + case MCSymbolRefExpr::VK_Mips_PCREL_HI16: + FixupKind = Mips::fixup_MIPS_PCHI16; + break; + case MCSymbolRefExpr::VK_Mips_PCREL_LO16: + FixupKind = Mips::fixup_MIPS_PCLO16; + break; } // switch Fixups.push_back(MCFixup::Create(0, Expr, MCFixupKind(FixupKind))); @@ -548,5 +617,15 @@ MipsMCCodeEmitter::getLSAImmEncoding(const MCInst &MI, unsigned OpNo, return getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) - 1; } -#include "MipsGenMCCodeEmitter.inc" +unsigned +MipsMCCodeEmitter::getSimm19Lsl2Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + assert(MI.getOperand(OpNo).isImm()); + // The immediate is encoded as 'immediate << 2'. + unsigned Res = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI); + assert((Res & 3) == 0); + return Res >> 2; +} +#include "MipsGenMCCodeEmitter.inc" diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h index 49a2490..3f7daab 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h @@ -52,7 +52,7 @@ public: void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + const MCSubtargetInfo &STI) const override; // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. @@ -88,6 +88,27 @@ public: SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + // getBranchTarget21OpValue - Return binary encoding of the branch + // offset operand. If the machine operand requires relocation, + // record the relocation and return zero. + unsigned getBranchTarget21OpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + // getBranchTarget26OpValue - Return binary encoding of the branch + // offset operand. If the machine operand requires relocation, + // record the relocation and return zero. + unsigned getBranchTarget26OpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + // getJumpOffset16OpValue - Return binary encoding of the jump + // offset operand. If the machine operand requires relocation, + // record the relocation and return zero. + unsigned getJumpOffset16OpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + // getMachineOpValue - Return binary encoding of operand. If the machin // operand requires relocation, record the relocation and return zero. unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, @@ -116,6 +137,10 @@ public: SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + unsigned getSimm19Lsl2Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getExprOpValue(const MCExpr *Expr, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp index c7ba12d..21ccc3c 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mipsmcexpr" #include "MipsMCExpr.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" @@ -15,6 +14,8 @@ using namespace llvm; +#define DEBUG_TYPE "mipsmcexpr" + bool MipsMCExpr::isSupportedBinaryExpr(MCSymbolRefExpr::VariantKind VK, const MCBinaryExpr *BE) { switch (VK) { diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h index 722bba7..8d7aacd 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h @@ -46,16 +46,16 @@ public: /// getSubExpr - Get the child of this expression. const MCExpr *getSubExpr() const { return Expr; } - void PrintImpl(raw_ostream &OS) const; + void PrintImpl(raw_ostream &OS) const override; bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const; - void AddValueSymbols(MCAssembler *) const; - const MCSection *FindAssociatedSection() const { + const MCAsmLayout *Layout) const override; + void AddValueSymbols(MCAssembler *) const override; + const MCSection *FindAssociatedSection() const override { return getSubExpr()->FindAssociatedSection(); } // There are no TLS MipsMCExprs at the moment. - void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {} + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h index 6992d06..01d5363 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h @@ -18,7 +18,7 @@ namespace llvm { static const unsigned MIPS_NACL_BUNDLE_ALIGN = 4u; bool isBasePlusOffsetMemoryAccess(unsigned Opcode, unsigned *AddrIdx, - bool *IsStore = NULL); + bool *IsStore = nullptr); bool baseRegNeedsLoadStoreMask(unsigned Reg); // This function creates an MCELFStreamer for Mips NaCl. diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index eecca68..660e5a7 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -30,6 +30,8 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + #define GET_INSTRINFO_MC_DESC #include "MipsGenInstrInfo.inc" @@ -39,8 +41,6 @@ #define GET_REGINFO_MC_DESC #include "MipsGenRegisterInfo.inc" -using namespace llvm; - /// Select the Mips CPU for the given triple and cpu name. /// FIXME: Merge with the copy in MipsSubtarget.cpp static inline StringRef selectMipsCPU(StringRef TT, StringRef CPU) { @@ -79,7 +79,7 @@ static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { MCAsmInfo *MAI = new MipsMCAsmInfo(TT); unsigned SP = MRI.getDwarfRegNum(Mips::SP, true); - MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, SP, 0); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, SP, 0); MAI->addInitialFrameState(Inst); return MAI; @@ -124,12 +124,11 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCStreamer * createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useCFI, bool useDwarfDirectory, + bool isVerboseAsm, bool useDwarfDirectory, MCInstPrinter *InstPrint, MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) { - MCStreamer *S = - llvm::createAsmStreamer(Ctx, OS, isVerboseAsm, useCFI, useDwarfDirectory, - InstPrint, CE, TAB, ShowInst); + MCStreamer *S = llvm::createAsmStreamer( + Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); new MipsTargetAsmStreamer(*S, OS); return S; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp index 639a058..cd6be73 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp @@ -17,8 +17,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mips-mc-nacl" - #include "Mips.h" #include "MipsELFStreamer.h" #include "MipsMCNaCl.h" @@ -26,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "mips-mc-nacl" + namespace { const unsigned IndirectBranchMaskReg = Mips::T6; @@ -120,7 +120,8 @@ private: public: /// This function is the one used to emit instruction data into the ELF /// streamer. We override it to mask dangerous instructions. - virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) { + void EmitInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI) override { // Sandbox indirect jumps. if (isIndirectJump(Inst)) { if (PendingCall) diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index fb6aff2..a8fa272 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -85,6 +85,13 @@ void MipsTargetAsmStreamer::emitDirectiveEnt(const MCSymbol &Symbol) { } void MipsTargetAsmStreamer::emitDirectiveAbiCalls() { OS << "\t.abicalls\n"; } + +void MipsTargetAsmStreamer::emitDirectiveNaN2008() { OS << "\t.nan\t2008\n"; } + +void MipsTargetAsmStreamer::emitDirectiveNaNLegacy() { + OS << "\t.nan\tlegacy\n"; +} + void MipsTargetAsmStreamer::emitDirectiveOptionPic0() { OS << "\t.option\tpic0\n"; } @@ -137,6 +144,29 @@ void MipsTargetAsmStreamer::emitFMask(unsigned FPUBitmask, OS << "," << FPUTopSavedRegOff << '\n'; } +void MipsTargetAsmStreamer::emitDirectiveCpload(unsigned RegNo) { + OS << "\t.cpload\t$" + << StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << "\n"; +} + +void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo, + int RegOrOffset, + const MCSymbol &Sym, + bool IsReg) { + OS << "\t.cpsetup\t$" + << StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << ", "; + + if (IsReg) + OS << "$" + << StringRef(MipsInstPrinter::getRegisterName(RegOrOffset)).lower(); + else + OS << RegOrOffset; + + OS << ", "; + + OS << Sym.getName() << "\n"; +} + // This part is for ELF object output. MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI) @@ -180,6 +210,10 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S, EFlags |= ELF::EF_MIPS_ABI_O32; } + // Other options. + if (Features & Mips::FeatureNaN2008) + EFlags |= ELF::EF_MIPS_NAN2008; + MCA.setELFHeaderEFlags(EFlags); } @@ -325,6 +359,21 @@ void MipsTargetELFStreamer::emitDirectiveAbiCalls() { Flags |= ELF::EF_MIPS_CPIC | ELF::EF_MIPS_PIC; MCA.setELFHeaderEFlags(Flags); } + +void MipsTargetELFStreamer::emitDirectiveNaN2008() { + MCAssembler &MCA = getStreamer().getAssembler(); + unsigned Flags = MCA.getELFHeaderEFlags(); + Flags |= ELF::EF_MIPS_NAN2008; + MCA.setELFHeaderEFlags(Flags); +} + +void MipsTargetELFStreamer::emitDirectiveNaNLegacy() { + MCAssembler &MCA = getStreamer().getAssembler(); + unsigned Flags = MCA.getELFHeaderEFlags(); + Flags &= ~ELF::EF_MIPS_NAN2008; + MCA.setELFHeaderEFlags(Flags); +} + void MipsTargetELFStreamer::emitDirectiveOptionPic0() { MCAssembler &MCA = getStreamer().getAssembler(); unsigned Flags = MCA.getELFHeaderEFlags(); @@ -376,3 +425,107 @@ void MipsTargetELFStreamer::emitDirectiveSetMips64R2() { void MipsTargetELFStreamer::emitDirectiveSetDsp() { // No action required for ELF output. } + +void MipsTargetELFStreamer::emitDirectiveCpload(unsigned RegNo) { + // .cpload $reg + // This directive expands to: + // lui $gp, %hi(_gp_disp) + // addui $gp, $gp, %lo(_gp_disp) + // addu $gp, $gp, $reg + // when support for position independent code is enabled. + if (!Pic || (isN32() || isN64())) + return; + + // There's a GNU extension controlled by -mno-shared that allows + // locally-binding symbols to be accessed using absolute addresses. + // This is currently not supported. When supported -mno-shared makes + // .cpload expand to: + // lui $gp, %hi(__gnu_local_gp) + // addiu $gp, $gp, %lo(__gnu_local_gp) + + StringRef SymName("_gp_disp"); + MCAssembler &MCA = getStreamer().getAssembler(); + MCSymbol *GP_Disp = MCA.getContext().GetOrCreateSymbol(SymName); + MCA.getOrCreateSymbolData(*GP_Disp); + + MCInst TmpInst; + TmpInst.setOpcode(Mips::LUi); + TmpInst.addOperand(MCOperand::CreateReg(Mips::GP)); + const MCSymbolRefExpr *HiSym = MCSymbolRefExpr::Create( + "_gp_disp", MCSymbolRefExpr::VK_Mips_ABS_HI, MCA.getContext()); + TmpInst.addOperand(MCOperand::CreateExpr(HiSym)); + getStreamer().EmitInstruction(TmpInst, STI); + + TmpInst.clear(); + + TmpInst.setOpcode(Mips::ADDiu); + TmpInst.addOperand(MCOperand::CreateReg(Mips::GP)); + TmpInst.addOperand(MCOperand::CreateReg(Mips::GP)); + const MCSymbolRefExpr *LoSym = MCSymbolRefExpr::Create( + "_gp_disp", MCSymbolRefExpr::VK_Mips_ABS_LO, MCA.getContext()); + TmpInst.addOperand(MCOperand::CreateExpr(LoSym)); + getStreamer().EmitInstruction(TmpInst, STI); + + TmpInst.clear(); + + TmpInst.setOpcode(Mips::ADDu); + TmpInst.addOperand(MCOperand::CreateReg(Mips::GP)); + TmpInst.addOperand(MCOperand::CreateReg(Mips::GP)); + TmpInst.addOperand(MCOperand::CreateReg(RegNo)); + getStreamer().EmitInstruction(TmpInst, STI); +} + +void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo, + int RegOrOffset, + const MCSymbol &Sym, + bool IsReg) { + // Only N32 and N64 emit anything for .cpsetup iff PIC is set. + if (!Pic || !(isN32() || isN64())) + return; + + MCAssembler &MCA = getStreamer().getAssembler(); + MCInst Inst; + + // Either store the old $gp in a register or on the stack + if (IsReg) { + // move $save, $gpreg + Inst.setOpcode(Mips::DADDu); + Inst.addOperand(MCOperand::CreateReg(RegOrOffset)); + Inst.addOperand(MCOperand::CreateReg(Mips::GP)); + Inst.addOperand(MCOperand::CreateReg(Mips::ZERO)); + } else { + // sd $gpreg, offset($sp) + Inst.setOpcode(Mips::SD); + Inst.addOperand(MCOperand::CreateReg(Mips::GP)); + Inst.addOperand(MCOperand::CreateReg(Mips::SP)); + Inst.addOperand(MCOperand::CreateImm(RegOrOffset)); + } + getStreamer().EmitInstruction(Inst, STI); + Inst.clear(); + + const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create( + Sym.getName(), MCSymbolRefExpr::VK_Mips_GPOFF_HI, MCA.getContext()); + const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::Create( + Sym.getName(), MCSymbolRefExpr::VK_Mips_GPOFF_LO, MCA.getContext()); + // lui $gp, %hi(%neg(%gp_rel(funcSym))) + Inst.setOpcode(Mips::LUi); + Inst.addOperand(MCOperand::CreateReg(Mips::GP)); + Inst.addOperand(MCOperand::CreateExpr(HiExpr)); + getStreamer().EmitInstruction(Inst, STI); + Inst.clear(); + + // addiu $gp, $gp, %lo(%neg(%gp_rel(funcSym))) + Inst.setOpcode(Mips::ADDiu); + Inst.addOperand(MCOperand::CreateReg(Mips::GP)); + Inst.addOperand(MCOperand::CreateReg(Mips::GP)); + Inst.addOperand(MCOperand::CreateExpr(LoExpr)); + getStreamer().EmitInstruction(Inst, STI); + Inst.clear(); + + // daddu $gp, $gp, $funcreg + Inst.setOpcode(Mips::DADDu); + Inst.addOperand(MCOperand::CreateReg(Mips::GP)); + Inst.addOperand(MCOperand::CreateReg(Mips::GP)); + Inst.addOperand(MCOperand::CreateReg(RegNo)); + getStreamer().EmitInstruction(Inst, STI); +} diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index bcf951e..41efa47 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -13,7 +13,7 @@ TARGET = Mips # Make sure that tblgen is run, first thing. BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \ - MipsGenAsmWriter.inc MipsGenCodeEmitter.inc \ + MipsGenAsmWriter.inc MipsGenFastISel.inc MipsGenCodeEmitter.inc \ MipsGenDAGISel.inc MipsGenCallingConv.inc \ MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \ MipsGenDisassemblerTables.inc \ diff --git a/lib/Target/Mips/MicroMipsInstrFPU.td b/lib/Target/Mips/MicroMipsInstrFPU.td index 91d447a..d95f9b0 100644 --- a/lib/Target/Mips/MicroMipsInstrFPU.td +++ b/lib/Target/Mips/MicroMipsInstrFPU.td @@ -28,9 +28,9 @@ def LWXC1_MM : MMRel, LWXC1_FT<"lwxc1", FGR32Opnd, II_LWXC1, load>, def SWXC1_MM : MMRel, SWXC1_FT<"swxc1", FGR32Opnd, II_SWXC1, store>, SWXC1_FM_MM<0x88>; def LUXC1_MM : MMRel, LWXC1_FT<"luxc1", AFGR64Opnd, II_LUXC1>, - LWXC1_FM_MM<0x148>; + LWXC1_FM_MM<0x148>, INSN_MIPS5_32R2; def SUXC1_MM : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>, - SWXC1_FM_MM<0x188>; + SWXC1_FM_MM<0x188>, INSN_MIPS5_32R2; def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM_MM<0>; @@ -70,9 +70,9 @@ def FSQRT_MM : MMRel, ABSS_FT<"sqrt.d", AFGR64Opnd, AFGR64Opnd, II_SQRT_D, fsqrt>, ROUND_W_FM_MM<1, 0x28>; def CVT_L_S_MM : MMRel, ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, II_CVT>, - ROUND_W_FM_MM<0, 0x4>; + ROUND_W_FM_MM<0, 0x4>, INSN_MIPS3_32R2; def CVT_L_D64_MM : MMRel, ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, II_CVT>, - ROUND_W_FM_MM<1, 0x4>; + ROUND_W_FM_MM<1, 0x4>, INSN_MIPS3_32R2; def FABS_S_MM : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, II_ABS, fabs>, ABS_FM_MM<0, 0xd>; @@ -95,7 +95,7 @@ def FNEG_MM : MMRel, ABSS_FT<"neg.d", AFGR64Opnd, AFGR64Opnd, II_NEG, fneg>, ABS_FM_MM<1, 0x2d>; def FMOV_D32_MM : MMRel, ABSS_FT<"mov.d", AFGR64Opnd, AFGR64Opnd, II_MOV_D>, - ABS_FM_MM<1, 0x1>, Requires<[NotFP64bit, HasStdEnc]>; + ABS_FM_MM<1, 0x1>, AdditionalRequires<[NotFP64bit]>; def MOVZ_I_S_MM : MMRel, CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd, II_MOVZ_S>, CMov_I_F_FM_MM<0x78, 0>; @@ -124,9 +124,9 @@ def MFC1_MM : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, def MTC1_MM : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1, bitconvert>, MFC1_FM_MM<0xa0>; def MFHC1_MM : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, FGRH32Opnd, II_MFHC1>, - MFC1_FM_MM<3>; + MFC1_FM_MM<3>, ISA_MIPS32R2; def MTHC1_MM : MMRel, MTC1_FT<"mthc1", FGRH32Opnd, GPR32Opnd, II_MTHC1>, - MFC1_FM_MM<7>; + MFC1_FM_MM<7>, ISA_MIPS32R2; def MADD_S_MM : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>, MADDS_FM_MM<0x1>; diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 3f13e83..9904bc6 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -218,15 +218,20 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { def MSUBU_MM : MMRel, MArithR<"msubu", II_MSUBU>, MULT_FM_MM<0x3ec>; /// Count Leading - def CLZ_MM : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM_MM<0x16c>; - def CLO_MM : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM_MM<0x12c>; + def CLZ_MM : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM_MM<0x16c>, + ISA_MIPS32; + def CLO_MM : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM_MM<0x12c>, + ISA_MIPS32; /// Sign Ext In Register Instructions. - def SEB_MM : MMRel, SignExtInReg<"seb", i8, GPR32Opnd, II_SEB>, SEB_FM_MM<0x0ac>; - def SEH_MM : MMRel, SignExtInReg<"seh", i16, GPR32Opnd, II_SEH>, SEB_FM_MM<0x0ec>; + def SEB_MM : MMRel, SignExtInReg<"seb", i8, GPR32Opnd, II_SEB>, + SEB_FM_MM<0x0ac>, ISA_MIPS32R2; + def SEH_MM : MMRel, SignExtInReg<"seh", i16, GPR32Opnd, II_SEH>, + SEB_FM_MM<0x0ec>, ISA_MIPS32R2; /// Word Swap Bytes Within Halfwords - def WSBH_MM : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM_MM<0x1ec>; + def WSBH_MM : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM_MM<0x1ec>, + ISA_MIPS32R2; def EXT_MM : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, EXT_FM_MM<0x2c>; @@ -268,8 +273,10 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { def WAIT_MM : WaitMM<"wait">, WAIT_FM_MM; def ERET_MM : MMRel, ER_FT<"eret">, ER_FM_MM<0x3cd>; def DERET_MM : MMRel, ER_FT<"deret">, ER_FM_MM<0x38d>; - def EI_MM : MMRel, DEI_FT<"ei", GPR32Opnd>, EI_FM_MM<0x15d>; - def DI_MM : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM_MM<0x11d>; + def EI_MM : MMRel, DEI_FT<"ei", GPR32Opnd>, EI_FM_MM<0x15d>, + ISA_MIPS32R2; + def DI_MM : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM_MM<0x11d>, + ISA_MIPS32R2; /// Trap Instructions def TEQ_MM : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM_MM<0x0>; @@ -296,5 +303,5 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { //===----------------------------------------------------------------------===// let Predicates = [InMicroMips] in { - def : InstAlias<"wait", (WAIT_MM 0x0), 1>; + def : MipsInstAlias<"wait", (WAIT_MM 0x0), 1>; } diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 10a4699..ea16331 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -15,6 +15,33 @@ include "llvm/Target/Target.td" +// The overall idea of the PredicateControl class is to chop the Predicates list +// into subsets that are usually overridden independently. This allows +// subclasses to partially override the predicates of their superclasses without +// having to re-add all the existing predicates. +class PredicateControl { + // Predicates for the encoding scheme in use such as HasStdEnc + list EncodingPredicates = []; + // Predicates for the GPR size such as IsGP64bit + list GPRPredicates = []; + // Predicates for the FGR size and layout such as IsFP64bit + list FGRPredicates = []; + // Predicates for the instruction group membership such as ISA's and ASE's + list InsnPredicates = []; + // Predicates for anything else + list AdditionalPredicates = []; + list Predicates = !listconcat(EncodingPredicates, + GPRPredicates, + FGRPredicates, + InsnPredicates, + AdditionalPredicates); +} + +// Like Requires<> but for the AdditionalPredicates list +class AdditionalRequires preds> { + list AdditionalPredicates = preds; +} + //===----------------------------------------------------------------------===// // Register File, Calling Conv, Instruction Descriptions //===----------------------------------------------------------------------===// @@ -34,6 +61,8 @@ def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true", "General Purpose Registers are 64-bit wide.">; def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true", "Support 64-bit FP registers.">; +def FeatureNaN2008 : SubtargetFeature<"nan2008", "IsNaN2008bit", "true", + "IEEE 754-2008 NaN encoding.">; def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat", "true", "Only supports single precision float">; def FeatureO32 : SubtargetFeature<"o32", "MipsABI", "O32", @@ -46,33 +75,62 @@ def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI", "Enable eabi ABI">; def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU", "true", "Enable vector FPU instructions.">; -def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true", - "Enable 'signext in register' instructions.">; -def FeatureCondMov : SubtargetFeature<"condmov", "HasCondMov", "true", - "Enable 'conditional move' instructions.">; -def FeatureSwap : SubtargetFeature<"swap", "HasSwap", "true", - "Enable 'byte/half swap' instructions.">; -def FeatureBitCount : SubtargetFeature<"bitcount", "HasBitCount", "true", - "Enable 'count leading bits' instructions.">; -def FeatureFPIdx : SubtargetFeature<"FPIdx", "HasFPIdx", "true", - "Enable 'FP indexed load/store' instructions.">; +def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1", + "Mips I ISA Support [highly experimental]">; +def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2", + "Mips II ISA Support [highly experimental]", + [FeatureMips1]>; +def FeatureMips3_32 : SubtargetFeature<"mips3_32", "HasMips3_32", "true", + "Subset of MIPS-III that is also in MIPS32 " + "[highly experimental]">; +def FeatureMips3_32r2 : SubtargetFeature<"mips3_32r2", "HasMips3_32r2", "true", + "Subset of MIPS-III that is also in MIPS32r2 " + "[highly experimental]">; +def FeatureMips3 : SubtargetFeature<"mips3", "MipsArchVersion", "Mips3", + "MIPS III ISA Support [highly experimental]", + [FeatureMips2, FeatureMips3_32, + FeatureMips3_32r2, FeatureGP64Bit, + FeatureFP64Bit]>; +def FeatureMips4_32 : SubtargetFeature<"mips4_32", "HasMips4_32", "true", + "Subset of MIPS-IV that is also in MIPS32 " + "[highly experimental]">; +def FeatureMips4_32r2 : SubtargetFeature<"mips4_32r2", "HasMips4_32r2", "true", + "Subset of MIPS-IV that is also in MIPS32r2 " + "[highly experimental]">; +def FeatureMips4 : SubtargetFeature<"mips4", "MipsArchVersion", + "Mips4", "MIPS IV ISA Support", + [FeatureMips3, FeatureMips4_32, + FeatureMips4_32r2]>; +def FeatureMips5_32r2 : SubtargetFeature<"mips5_32r2", "HasMips5_32r2", "true", + "Subset of MIPS-V that is also in MIPS32r2 " + "[highly experimental]">; +def FeatureMips5 : SubtargetFeature<"mips5", "MipsArchVersion", "Mips5", + "MIPS V ISA Support [highly experimental]", + [FeatureMips4, FeatureMips5_32r2]>; def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32", "Mips32 ISA Support", - [FeatureCondMov, FeatureBitCount]>; + [FeatureMips2, FeatureMips3_32, + FeatureMips4_32]>; def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion", "Mips32r2", "Mips32r2 ISA Support", - [FeatureMips32, FeatureSEInReg, FeatureSwap, - FeatureFPIdx]>; -def FeatureMips4 : SubtargetFeature<"mips4", "MipsArchVersion", - "Mips4", "MIPS IV ISA Support", - [FeatureGP64Bit, FeatureFP64Bit, - FeatureCondMov]>; + [FeatureMips3_32r2, FeatureMips4_32r2, + FeatureMips5_32r2, FeatureMips32]>; +def FeatureMips32r6 : SubtargetFeature<"mips32r6", "MipsArchVersion", + "Mips32r6", + "Mips32r6 ISA Support [experimental]", + [FeatureMips32r2, FeatureFP64Bit, + FeatureNaN2008]>; def FeatureMips64 : SubtargetFeature<"mips64", "MipsArchVersion", "Mips64", "Mips64 ISA Support", - [FeatureMips4, FeatureMips32, FeatureFPIdx]>; + [FeatureMips5, FeatureMips32]>; def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion", "Mips64r2", "Mips64r2 ISA Support", [FeatureMips64, FeatureMips32r2]>; +def FeatureMips64r6 : SubtargetFeature<"mips64r6", "MipsArchVersion", + "Mips64r6", + "Mips64r6 ISA Support [experimental]", + [FeatureMips32r6, FeatureMips64r2, + FeatureNaN2008]>; def FeatureMips16 : SubtargetFeature<"mips16", "InMips16Mode", "true", "Mips16 mode">; @@ -97,11 +155,18 @@ def FeatureCnMips : SubtargetFeature<"cnmips", "HasCnMips", class Proc Features> : Processor; +def : Proc<"mips1", [FeatureMips1, FeatureO32]>; +def : Proc<"mips2", [FeatureMips2, FeatureO32]>; def : Proc<"mips32", [FeatureMips32, FeatureO32]>; def : Proc<"mips32r2", [FeatureMips32r2, FeatureO32]>; +def : Proc<"mips32r6", [FeatureMips32r6, FeatureO32]>; + +def : Proc<"mips3", [FeatureMips3, FeatureN64]>; def : Proc<"mips4", [FeatureMips4, FeatureN64]>; +def : Proc<"mips5", [FeatureMips5, FeatureN64]>; def : Proc<"mips64", [FeatureMips64, FeatureN64]>; def : Proc<"mips64r2", [FeatureMips64r2, FeatureN64]>; +def : Proc<"mips64r6", [FeatureMips64r6, FeatureN64]>; def : Proc<"mips16", [FeatureMips16, FeatureO32]>; def : Proc<"octeon", [FeatureMips64r2, FeatureN64, FeatureCnMips]>; diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp index 028b049..c01d03a 100644 --- a/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -71,7 +71,7 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const { } if (hasFP(MF)) BuildMI(MBB, MBBI, dl, TII.get(Mips::MoveR3216), Mips::S0) - .addReg(Mips::SP); + .addReg(Mips::SP).setMIFlag(MachineInstr::FrameSetup); } diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h index 8ce2ced..3f7829d 100644 --- a/lib/Target/Mips/Mips16FrameLowering.h +++ b/lib/Target/Mips/Mips16FrameLowering.h @@ -24,27 +24,27 @@ public: /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void emitPrologue(MachineFunction &MF) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const; + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const override; - bool hasReservedCallFrame(const MachineFunction &MF) const; + bool hasReservedCallFrame(const MachineFunction &MF) const override; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const; + RegScavenger *RS) const override; }; } // End llvm namespace diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp index d321e21..14055d6 100644 --- a/lib/Target/Mips/Mips16HardFloat.cpp +++ b/lib/Target/Mips/Mips16HardFloat.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mips16-hard-float" #include "Mips16HardFloat.h" #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" @@ -20,6 +19,8 @@ #include #include +#define DEBUG_TYPE "mips16-hard-float" + static void inlineAsmOut (LLVMContext &C, StringRef AsmString, BasicBlock *BB ) { std::vector AsmArgTypes; @@ -354,9 +355,8 @@ static const char *IntrinsicInline[] = }; static bool isIntrinsicInline(Function *F) { - return std::binary_search( - IntrinsicInline, array_endof(IntrinsicInline), - F->getName()); + return std::binary_search(std::begin(IntrinsicInline), + std::end(IntrinsicInline), F->getName()); } // // Returns of float, double and complex need to be handled with a helper @@ -407,11 +407,11 @@ static bool fixupFPReturnAndCall CallInst::Create(F, Params, "", &Inst ); } else if (const CallInst *CI = dyn_cast(I)) { const Value* V = CI->getCalledValue(); - const Type* T = 0; + const Type* T = nullptr; if (V) T = V->getType(); - const PointerType *PFT=0; + const PointerType *PFT=nullptr; if (T) PFT = dyn_cast(T); - const FunctionType *FT=0; + const FunctionType *FT=nullptr; if (PFT) FT = dyn_cast(PFT->getElementType()); Function *F_ = CI->getCalledFunction(); if (FT && needsFPReturnHelper(*FT) && diff --git a/lib/Target/Mips/Mips16HardFloat.h b/lib/Target/Mips/Mips16HardFloat.h index b7f712a..826887e 100644 --- a/lib/Target/Mips/Mips16HardFloat.h +++ b/lib/Target/Mips/Mips16HardFloat.h @@ -34,11 +34,11 @@ public: TM(TM_), Subtarget(TM.getSubtarget()) { } - virtual const char *getPassName() const { + const char *getPassName() const override { return "MIPS16 Hard Float Pass"; } - virtual bool runOnModule(Module &M); + bool runOnModule(Module &M) override; protected: /// Keep a pointer to the MipsSubtarget around so that we can make the right diff --git a/lib/Target/Mips/Mips16HardFloatInfo.cpp b/lib/Target/Mips/Mips16HardFloatInfo.cpp index d8b685e..2eb6e5d 100644 --- a/lib/Target/Mips/Mips16HardFloatInfo.cpp +++ b/lib/Target/Mips/Mips16HardFloatInfo.cpp @@ -30,7 +30,7 @@ const FuncNameSignature PredefinedFuncs[] = { { "__fixunssfsi", { FSig, NoFPRet } }, { "__fixunssfdi", { FSig, NoFPRet } }, { "__floatundisf", { NoSig, FRet } }, - { 0, { NoSig, NoFPRet } } + { nullptr, { NoSig, NoFPRet } } }; // just do a search for now. there are very few of these special cases. @@ -44,7 +44,7 @@ extern FuncSignature const *findFuncSignature(const char *name) { return &PredefinedFuncs[i].Signature; i++; } - return 0; + return nullptr; } } } diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp index 9e36546..4e86a27 100644 --- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mips-isel" #include "Mips16ISelDAGToDAG.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "Mips.h" @@ -35,6 +34,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "mips-isel" + bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { if (!Subtarget.inMips16Mode()) return false; @@ -44,7 +45,7 @@ bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { std::pair Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, SDLoc DL, EVT Ty, bool HasLo, bool HasHi) { - SDNode *Lo = 0, *Hi = 0; + SDNode *Lo = nullptr, *Hi = nullptr; SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0), N->getOperand(1)); SDValue InFlag = SDValue(Mul, 0); @@ -224,10 +225,12 @@ bool Mips16DAGToDAGISel::selectAddr16( // If an indexed floating point load/store can be emitted, return false. const LSBaseSDNode *LS = dyn_cast(Parent); - if (LS && - (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && - Subtarget.hasFPIdx()) - return false; + if (LS) { + if (LS->getMemoryVT() == MVT::f32 && Subtarget.hasMips4_32r2()) + return false; + if (LS->getMemoryVT() == MVT::f64 && Subtarget.hasMips4_32r2()) + return false; + } } Base = Addr; Offset = CurDAG->getTargetConstant(0, ValTy); @@ -297,7 +300,7 @@ std::pair Mips16DAGToDAGISel::selectNode(SDNode *Node) { if (!SDValue(Node, 1).use_empty()) ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0)); - return std::make_pair(true, (SDNode*)NULL); + return std::make_pair(true, nullptr); } case ISD::MULHS: @@ -308,7 +311,7 @@ std::pair Mips16DAGToDAGISel::selectNode(SDNode *Node) { } } - return std::make_pair(false, (SDNode*)NULL); + return std::make_pair(false, nullptr); } FunctionPass *llvm::createMips16ISelDag(MipsTargetMachine &TM) { diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h index 49dc6e5..e653b39 100644 --- a/lib/Target/Mips/Mips16ISelDAGToDAG.h +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h @@ -28,16 +28,16 @@ private: SDValue getMips16SPAliasReg(); - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg); - virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, - SDValue &Offset, SDValue &Alias); + bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base, + SDValue &Offset, SDValue &Alias) override; - virtual std::pair selectNode(SDNode *Node); + std::pair selectNode(SDNode *Node) override; - virtual void processFunctionAfterISel(MachineFunction &MF); + void processFunctionAfterISel(MachineFunction &MF) override; // Insert instructions to initialize the global base register in the // first MBB of the function. diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp index 5c6f302..9102450 100644 --- a/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -10,7 +10,6 @@ // Subclass of MipsTargetLowering specialized for mips16. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mips-lower" #include "Mips16ISelLowering.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MipsRegisterInfo.h" @@ -23,6 +22,8 @@ using namespace llvm; +#define DEBUG_TYPE "mips-lower" + static cl::opt DontExpandCondPseudos16( "mips16-dont-expand-cond-pseudo", cl::init(false), @@ -353,7 +354,7 @@ unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber #define T P "0" , T1 #define P P_ static char const * vMips16Helper[MAX_STUB_NUMBER+1] = - {0, T1 }; + {nullptr, T1 }; #undef P #define P P_ "sf_" static char const * sfMips16Helper[MAX_STUB_NUMBER+1] = @@ -430,7 +431,7 @@ getOpndList(SmallVectorImpl &Ops, SelectionDAG &DAG = CLI.DAG; MachineFunction &MF = DAG.getMachineFunction(); MipsFunctionInfo *FuncInfo = MF.getInfo(); - const char* Mips16HelperFunction = 0; + const char* Mips16HelperFunction = nullptr; bool NeedMips16Helper = false; if (Subtarget->inMips16HardFloat()) { @@ -443,8 +444,8 @@ getOpndList(SmallVectorImpl &Ops, if (ExternalSymbolSDNode *S = dyn_cast(CLI.Callee)) { Mips16Libcall Find = { RTLIB::UNKNOWN_LIBCALL, S->getSymbol() }; - if (std::binary_search(HardFloatLibCalls, array_endof(HardFloatLibCalls), - Find)) + if (std::binary_search(std::begin(HardFloatLibCalls), + std::end(HardFloatLibCalls), Find)) LookupHelper = false; else { const char *Symbol = S->getSymbol(); @@ -471,13 +472,12 @@ getOpndList(SmallVectorImpl &Ops, FuncInfo->setSaveS2(); } // one more look at list of intrinsics - if (std::binary_search(Mips16IntrinsicHelper, - array_endof(Mips16IntrinsicHelper), - IntrinsicFind)) { - const Mips16IntrinsicHelperType *h =(std::find(Mips16IntrinsicHelper, - array_endof(Mips16IntrinsicHelper), - IntrinsicFind)); - Mips16HelperFunction = h->Helper; + const Mips16IntrinsicHelperType *Helper = + std::lower_bound(std::begin(Mips16IntrinsicHelper), + std::end(Mips16IntrinsicHelper), IntrinsicFind); + if (Helper != std::end(Mips16IntrinsicHelper) && + *Helper == IntrinsicFind) { + Mips16HelperFunction = Helper->Helper; NeedMips16Helper = true; LookupHelper = false; } @@ -488,13 +488,13 @@ getOpndList(SmallVectorImpl &Ops, Mips16Libcall Find = { RTLIB::UNKNOWN_LIBCALL, G->getGlobal()->getName().data() }; - if (std::binary_search(HardFloatLibCalls, array_endof(HardFloatLibCalls), - Find)) + if (std::binary_search(std::begin(HardFloatLibCalls), + std::end(HardFloatLibCalls), Find)) LookupHelper = false; } - if (LookupHelper) Mips16HelperFunction = - getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper); - + if (LookupHelper) + Mips16HelperFunction = + getMips16HelperFunction(CLI.RetTy, CLI.getArgs(), NeedMips16Helper); } SDValue JumpTarget = Callee; diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h index 618ec90..df88333 100644 --- a/lib/Target/Mips/Mips16ISelLowering.h +++ b/lib/Target/Mips/Mips16ISelLowering.h @@ -21,17 +21,17 @@ namespace llvm { public: explicit Mips16TargetLowering(MipsTargetMachine &TM); - virtual bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace, - bool *Fast) const; + bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace, + bool *Fast) const override; - virtual MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const override; private: - virtual bool - isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, - unsigned NextStackOffset, - const MipsFunctionInfo& FI) const; + bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const override; void setMips16HardFloatLibCalls(); @@ -41,11 +41,12 @@ namespace llvm { const char *getMips16HelperFunction (Type* RetTy, ArgListTy &Args, bool &needHelper) const; - virtual void + void getOpndList(SmallVectorImpl &Ops, std::deque< std::pair > &RegsToPass, bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, - CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; + CallLoweringInfo &CLI, SDValue Callee, + SDValue Chain) const override; MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const; diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp index 43c2fbd..79607de 100644 --- a/lib/Target/Mips/Mips16InstrInfo.cpp +++ b/lib/Target/Mips/Mips16InstrInfo.cpp @@ -29,6 +29,7 @@ using namespace llvm; +#define DEBUG_TYPE "mips16-instrinfo" Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm) : MipsInstrInfo(tm, Mips::Bimm16), diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h index e93925c..0dc0046 100644 --- a/lib/Target/Mips/Mips16InstrInfo.h +++ b/lib/Target/Mips/Mips16InstrInfo.h @@ -25,46 +25,46 @@ class Mips16InstrInfo : public MipsInstrInfo { public: explicit Mips16InstrInfo(MipsTargetMachine &TM); - virtual const MipsRegisterInfo &getRegisterInfo() const; + const MipsRegisterInfo &getRegisterInfo() const override; /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than loading from the stack slot. - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than storing to the stack slot. - virtual unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; + unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; - virtual void storeRegToStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, - int64_t Offset) const; + void storeRegToStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const override; - virtual void loadRegFromStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, - int64_t Offset) const; + void loadRegFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const override; - virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; - virtual unsigned getOppositeBranchOpc(unsigned Opc) const; + unsigned getOppositeBranchOpc(unsigned Opc) const override; // Adjust SP by FrameSize bytes. Save RA, S0, S1 void makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB, @@ -104,9 +104,9 @@ public: (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const; unsigned getInlineAsmLength(const char *Str, - const MCAsmInfo &MAI) const; + const MCAsmInfo &MAI) const override; private: - virtual unsigned getAnalyzableBrOpc(unsigned Opc) const; + unsigned getAnalyzableBrOpc(unsigned Opc) const override; void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opc) const; diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp index 3a50ed9..dbee774 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.cpp +++ b/lib/Target/Mips/Mips16RegisterInfo.cpp @@ -39,6 +39,8 @@ using namespace llvm; +#define DEBUG_TYPE "mips16-registerinfo" + Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST) : MipsRegisterInfo(ST) {} diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h index 13e82a3..f59f1a7 100644 --- a/lib/Target/Mips/Mips16RegisterInfo.h +++ b/lib/Target/Mips/Mips16RegisterInfo.h @@ -23,24 +23,24 @@ class Mips16RegisterInfo : public MipsRegisterInfo { public: Mips16RegisterInfo(const MipsSubtarget &Subtarget); - bool requiresRegisterScavenging(const MachineFunction &MF) const; + bool requiresRegisterScavenging(const MachineFunction &MF) const override; - bool requiresFrameIndexScavenging(const MachineFunction &MF) const; + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; - bool useFPForScavengingIndex(const MachineFunction &MF) const; + bool useFPForScavengingIndex(const MachineFunction &MF) const override; bool saveScavengerRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC, - unsigned Reg) const; + unsigned Reg) const override; - virtual const TargetRegisterClass *intRegClass(unsigned Size) const; + const TargetRegisterClass *intRegClass(unsigned Size) const override; private: - virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, - int FrameIndex, uint64_t StackSize, - int64_t SPOffset) const; + void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, + int FrameIndex, uint64_t StackSize, + int64_t SPOffset) const override; }; } // end namespace llvm diff --git a/lib/Target/Mips/Mips32r6InstrFormats.td b/lib/Target/Mips/Mips32r6InstrFormats.td new file mode 100644 index 0000000..a3f9df5 --- /dev/null +++ b/lib/Target/Mips/Mips32r6InstrFormats.td @@ -0,0 +1,386 @@ +//=- Mips32r6InstrFormats.td - Mips32r6 Instruction Formats -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes Mips32r6 instruction formats. +// +//===----------------------------------------------------------------------===// + +class MipsR6Inst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>, + PredicateControl { + let DecoderNamespace = "Mips32r6_64r6"; + let EncodingPredicates = [HasStdEnc]; +} + +//===----------------------------------------------------------------------===// +// +// Field Values +// +//===----------------------------------------------------------------------===// + +class OPGROUP Val> { + bits<6> Value = Val; +} +def OPGROUP_COP1 : OPGROUP<0b010001>; +def OPGROUP_COP2 : OPGROUP<0b010010>; +def OPGROUP_ADDI : OPGROUP<0b001000>; +def OPGROUP_AUI : OPGROUP<0b001111>; +def OPGROUP_BLEZ : OPGROUP<0b000110>; +def OPGROUP_BGTZ : OPGROUP<0b000111>; +def OPGROUP_BLEZL : OPGROUP<0b010110>; +def OPGROUP_BGTZL : OPGROUP<0b010111>; +def OPGROUP_DADDI : OPGROUP<0b011000>; +def OPGROUP_DAUI : OPGROUP<0b011101>; +def OPGROUP_PCREL : OPGROUP<0b111011>; +def OPGROUP_REGIMM : OPGROUP<0b000001>; +def OPGROUP_SPECIAL : OPGROUP<0b000000>; +def OPGROUP_SPECIAL3 : OPGROUP<0b011111>; + +class OPCODE2 Val> { + bits<2> Value = Val; +} +def OPCODE2_ADDIUPC : OPCODE2<0b00>; +def OPCODE2_LWPC : OPCODE2<0b01>; +def OPCODE2_LWUPC : OPCODE2<0b10>; + +class OPCODE5 Val> { + bits<5> Value = Val; +} +def OPCODE5_ALUIPC : OPCODE5<0b11111>; +def OPCODE5_AUIPC : OPCODE5<0b11110>; +def OPCODE5_DAHI : OPCODE5<0b00110>; +def OPCODE5_DATI : OPCODE5<0b11110>; +def OPCODE5_BC1EQZ : OPCODE5<0b01001>; +def OPCODE5_BC1NEZ : OPCODE5<0b01101>; +def OPCODE5_BC2EQZ : OPCODE5<0b01001>; +def OPCODE5_BC2NEZ : OPCODE5<0b01101>; + +class OPCODE6 Val> { + bits<6> Value = Val; +} +def OPCODE6_ALIGN : OPCODE6<0b100000>; +def OPCODE6_DALIGN : OPCODE6<0b100100>; +def OPCODE6_BITSWAP : OPCODE6<0b100000>; +def OPCODE6_DBITSWAP : OPCODE6<0b100100>; + +class FIELD_FMT Val> { + bits<5> Value = Val; +} +def FIELD_FMT_S : FIELD_FMT<0b10000>; +def FIELD_FMT_D : FIELD_FMT<0b10001>; + +class FIELD_CMP_COND Val> { + bits<5> Value = Val; +} +def FIELD_CMP_COND_F : FIELD_CMP_COND<0b00000>; +def FIELD_CMP_COND_UN : FIELD_CMP_COND<0b00001>; +def FIELD_CMP_COND_EQ : FIELD_CMP_COND<0b00010>; +def FIELD_CMP_COND_UEQ : FIELD_CMP_COND<0b00011>; +def FIELD_CMP_COND_OLT : FIELD_CMP_COND<0b00100>; +def FIELD_CMP_COND_ULT : FIELD_CMP_COND<0b00101>; +def FIELD_CMP_COND_OLE : FIELD_CMP_COND<0b00110>; +def FIELD_CMP_COND_ULE : FIELD_CMP_COND<0b00111>; +def FIELD_CMP_COND_SF : FIELD_CMP_COND<0b01000>; +def FIELD_CMP_COND_NGLE : FIELD_CMP_COND<0b01001>; +def FIELD_CMP_COND_SEQ : FIELD_CMP_COND<0b01010>; +def FIELD_CMP_COND_NGL : FIELD_CMP_COND<0b01011>; +def FIELD_CMP_COND_LT : FIELD_CMP_COND<0b01100>; +def FIELD_CMP_COND_NGE : FIELD_CMP_COND<0b01101>; +def FIELD_CMP_COND_LE : FIELD_CMP_COND<0b01110>; +def FIELD_CMP_COND_NGT : FIELD_CMP_COND<0b01111>; + +class FIELD_CMP_FORMAT Val> { + bits<5> Value = Val; +} +def FIELD_CMP_FORMAT_S : FIELD_CMP_FORMAT<0b10100>; +def FIELD_CMP_FORMAT_D : FIELD_CMP_FORMAT<0b10101>; + +//===----------------------------------------------------------------------===// +// +// Disambiguators +// +//===----------------------------------------------------------------------===// +// +// Some encodings are ambiguous except by comparing field values. + +class DecodeDisambiguates { + string DecoderMethod = !strconcat("Decode", Name); +} + +class DecodeDisambiguatedBy : DecodeDisambiguates { + string DecoderNamespace = "Mips32r6_64r6_Ambiguous"; +} + +//===----------------------------------------------------------------------===// +// +// Encoding Formats +// +//===----------------------------------------------------------------------===// + +class AUI_FM : MipsR6Inst { + bits<5> rs; + bits<5> rt; + bits<16> imm; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_AUI.Value; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-0} = imm; +} + +class DAUI_FM : AUI_FM { + let Inst{31-26} = OPGROUP_DAUI.Value; +} + +class COP1_2R_FM funct, FIELD_FMT Format> : MipsR6Inst { + bits<5> fs; + bits<5> fd; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_COP1.Value; + let Inst{25-21} = Format.Value; + let Inst{20-16} = 0b00000; + let Inst{15-11} = fs; + let Inst{10-6} = fd; + let Inst{5-0} = funct; +} + +class COP1_3R_FM funct, FIELD_FMT Format> : MipsR6Inst { + bits<5> ft; + bits<5> fs; + bits<5> fd; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_COP1.Value; + let Inst{25-21} = Format.Value; + let Inst{20-16} = ft; + let Inst{15-11} = fs; + let Inst{10-6} = fd; + let Inst{5-0} = funct; +} + +class COP1_BCCZ_FM : MipsR6Inst { + bits<5> ft; + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_COP1.Value; + let Inst{25-21} = Operation.Value; + let Inst{20-16} = ft; + let Inst{15-0} = offset; +} + +class COP2_BCCZ_FM : MipsR6Inst { + bits<5> ct; + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_COP2.Value; + let Inst{25-21} = Operation.Value; + let Inst{20-16} = ct; + let Inst{15-0} = offset; +} + +class PCREL16_FM : MipsR6Inst { + bits<5> rs; + bits<16> imm; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_PCREL.Value; + let Inst{25-21} = rs; + let Inst{20-16} = Operation.Value; + let Inst{15-0} = imm; +} + +class PCREL19_FM : MipsR6Inst { + bits<5> rs; + bits<19> imm; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_PCREL.Value; + let Inst{25-21} = rs; + let Inst{20-19} = Operation.Value; + let Inst{18-0} = imm; +} + +class SPECIAL3_2R_FM : MipsR6Inst { + bits<5> rd; + bits<5> rt; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_SPECIAL3.Value; + let Inst{25-21} = 0b00000; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = 0b00000; + let Inst{5-0} = Operation.Value; +} + +class SPECIAL_3R_FM mulop, bits<6> funct> : MipsR6Inst { + bits<5> rd; + bits<5> rs; + bits<5> rt; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_SPECIAL.Value; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-6} = mulop; + let Inst{5-0} = funct; +} + +// This class is ambiguous with other branches: +// BEQC/BNEC require that rs > rt +class CMP_BRANCH_2R_OFF16_FM : MipsR6Inst { + bits<5> rs; + bits<5> rt; + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = funct.Value; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-0} = offset; +} + +// This class is ambiguous with other branches: +// BLEZC/BGEZC/BEQZALC/BNEZALC/BGTZALC require that rs == 0 && rt != 0 +// The '1R_RT' in the name means 1 register in the rt field. +class CMP_BRANCH_1R_RT_OFF16_FM : MipsR6Inst { + bits<5> rt; + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = funct.Value; + let Inst{25-21} = 0b00000; + let Inst{20-16} = rt; + let Inst{15-0} = offset; +} + +// This class is ambiguous with other branches: +// BLTZC/BGTZC/BLTZALC/BGEZALC require that rs == rt && rt != 0 +// The '1R_BOTH' in the name means 1 register in both the rs and rt fields. +class CMP_BRANCH_1R_BOTH_OFF16_FM : MipsR6Inst { + bits<5> rt; + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = funct.Value; + let Inst{25-21} = rt; + let Inst{20-16} = rt; + let Inst{15-0} = offset; +} + +class CMP_BRANCH_OFF21_FM funct> : MipsR6Inst { + bits<5> rs; // rs != 0 + bits<21> offset; + + bits<32> Inst; + + let Inst{31-26} = funct; + let Inst{25-21} = rs; + let Inst{20-0} = offset; +} + +class JMP_IDX_COMPACT_FM funct> : MipsR6Inst { + bits<5> rt; + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = funct; + let Inst{25-21} = 0b000000; + let Inst{20-16} = rt; + let Inst{15-0} = offset; +} + +class BRANCH_OFF26_FM funct> : MipsR6Inst { + bits<32> Inst; + bits<26> offset; + + let Inst{31-26} = funct; + let Inst{25-0} = offset; +} + +class SPECIAL3_ALIGN_FM : MipsR6Inst { + bits<5> rd; + bits<5> rs; + bits<5> rt; + bits<2> bp; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_SPECIAL3.Value; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-8} = 0b010; + let Inst{7-6} = bp; + let Inst{5-0} = Operation.Value; +} + +class SPECIAL3_DALIGN_FM : MipsR6Inst { + bits<5> rd; + bits<5> rs; + bits<5> rt; + bits<3> bp; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_SPECIAL3.Value; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-9} = 0b01; + let Inst{8-6} = bp; + let Inst{5-0} = Operation.Value; +} + +class REGIMM_FM : MipsR6Inst { + bits<5> rs; + bits<16> imm; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_REGIMM.Value; + let Inst{25-21} = rs; + let Inst{20-16} = Operation.Value; + let Inst{15-0} = imm; +} + +class COP1_CMP_CONDN_FM : MipsR6Inst { + bits<5> fd; + bits<5> fs; + bits<5> ft; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_COP1.Value; + let Inst{25-21} = Format.Value; + let Inst{20-16} = ft; + let Inst{15-11} = fs; + let Inst{10-6} = fd; + let Inst{5} = 0; + let Inst{4-0} = Cond.Value; +} + diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td new file mode 100644 index 0000000..ffaf965 --- /dev/null +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -0,0 +1,583 @@ +//=- Mips32r6InstrInfo.td - Mips32r6 Instruction Information -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes Mips32r6 instructions. +// +//===----------------------------------------------------------------------===// + +include "Mips32r6InstrFormats.td" + +// Notes about removals/changes from MIPS32r6: +// Unclear: ssnop +// Reencoded: cache, pref +// Reencoded: clo, clz +// Reencoded: jr -> jalr +// Reencoded: jr.hb -> jalr.hb +// Reencoded: ldc2 +// Reencoded: ll, sc +// Reencoded: lwc2 +// Reencoded: sdbbp +// Reencoded: sdc2 +// Reencoded: swc2 +// Removed: bc1any2, bc1any4 +// Removed: bc2[ft] +// Removed: bc2f, bc2t +// Removed: bgezal +// Removed: bltzal +// Removed: c.cond.fmt, bc1[ft] +// Removed: div, divu +// Removed: jalx +// Removed: ldxc1 +// Removed: luxc1 +// Removed: lwxc1 +// Removed: madd.[ds], nmadd.[ds], nmsub.[ds], sub.[ds] +// Removed: mfhi, mflo, mthi, mtlo, madd, maddu, msub, msubu, mul +// Removed: movf, movt +// Removed: movf.fmt, movt.fmt, movn.fmt, movz.fmt +// Removed: movn, movz +// Removed: mult, multu +// Removed: prefx +// Removed: sdxc1 +// Removed: suxc1 +// Removed: swxc1 +// Rencoded: [ls][wd]c2 + +def brtarget21 : Operand { + let EncoderMethod = "getBranchTarget21OpValue"; + let OperandType = "OPERAND_PCREL"; + let DecoderMethod = "DecodeBranchTarget21"; + let ParserMatchClass = MipsJumpTargetAsmOperand; +} + +def brtarget26 : Operand { + let EncoderMethod = "getBranchTarget26OpValue"; + let OperandType = "OPERAND_PCREL"; + let DecoderMethod = "DecodeBranchTarget26"; + let ParserMatchClass = MipsJumpTargetAsmOperand; +} + +def jmpoffset16 : Operand { + let EncoderMethod = "getJumpOffset16OpValue"; + let ParserMatchClass = MipsJumpTargetAsmOperand; +} + +def calloffset16 : Operand { + let EncoderMethod = "getJumpOffset16OpValue"; + let ParserMatchClass = MipsJumpTargetAsmOperand; +} + +//===----------------------------------------------------------------------===// +// +// Instruction Encodings +// +//===----------------------------------------------------------------------===// + +class ADDIUPC_ENC : PCREL19_FM; +class ALIGN_ENC : SPECIAL3_ALIGN_FM; +class ALUIPC_ENC : PCREL16_FM; +class AUI_ENC : AUI_FM; +class AUIPC_ENC : PCREL16_FM; + +class BALC_ENC : BRANCH_OFF26_FM<0b111010>; +class BC_ENC : BRANCH_OFF26_FM<0b110010>; +class BEQC_ENC : CMP_BRANCH_2R_OFF16_FM, + DecodeDisambiguates<"AddiGroupBranch">; +class BEQZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM, + DecodeDisambiguatedBy<"DaddiGroupBranch">; +class BNEC_ENC : CMP_BRANCH_2R_OFF16_FM, + DecodeDisambiguates<"DaddiGroupBranch">; +class BNEZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM, + DecodeDisambiguatedBy<"DaddiGroupBranch">; + +class BLTZC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM, + DecodeDisambiguates<"BgtzlGroupBranch">; +class BGEZC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM, + DecodeDisambiguates<"BlezlGroupBranch">; +class BGTZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM, + DecodeDisambiguatedBy<"BgtzGroupBranch">; + +class BLEZC_ENC : CMP_BRANCH_1R_RT_OFF16_FM, + DecodeDisambiguatedBy<"BlezlGroupBranch">; +class BLTZALC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM, + DecodeDisambiguates<"BgtzGroupBranch">; +class BGTZC_ENC : CMP_BRANCH_1R_RT_OFF16_FM, + DecodeDisambiguatedBy<"BgtzlGroupBranch">; + +class BEQZC_ENC : CMP_BRANCH_OFF21_FM<0b110110>; +class BGEZALC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM; +class BNEZC_ENC : CMP_BRANCH_OFF21_FM<0b111110>; + +class BC1EQZ_ENC : COP1_BCCZ_FM; +class BC1NEZ_ENC : COP1_BCCZ_FM; +class BC2EQZ_ENC : COP2_BCCZ_FM; +class BC2NEZ_ENC : COP2_BCCZ_FM; + +class JIALC_ENC : JMP_IDX_COMPACT_FM<0b111110>; +class JIC_ENC : JMP_IDX_COMPACT_FM<0b110110>; + +class BITSWAP_ENC : SPECIAL3_2R_FM; +class BLEZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM; +class BNVC_ENC : CMP_BRANCH_2R_OFF16_FM, + DecodeDisambiguatedBy<"DaddiGroupBranch">; +class BOVC_ENC : CMP_BRANCH_2R_OFF16_FM, + DecodeDisambiguatedBy<"AddiGroupBranch">; +class DIV_ENC : SPECIAL_3R_FM<0b00010, 0b011010>; +class DIVU_ENC : SPECIAL_3R_FM<0b00010, 0b011011>; +class MOD_ENC : SPECIAL_3R_FM<0b00011, 0b011010>; +class MODU_ENC : SPECIAL_3R_FM<0b00011, 0b011011>; +class MUH_ENC : SPECIAL_3R_FM<0b00011, 0b011000>; +class MUHU_ENC : SPECIAL_3R_FM<0b00011, 0b011001>; +class MUL_R6_ENC : SPECIAL_3R_FM<0b00010, 0b011000>; +class MULU_ENC : SPECIAL_3R_FM<0b00010, 0b011001>; + +class MADDF_S_ENC : COP1_3R_FM<0b011000, FIELD_FMT_S>; +class MADDF_D_ENC : COP1_3R_FM<0b011000, FIELD_FMT_D>; +class MSUBF_S_ENC : COP1_3R_FM<0b011001, FIELD_FMT_S>; +class MSUBF_D_ENC : COP1_3R_FM<0b011001, FIELD_FMT_D>; + +class SEL_D_ENC : COP1_3R_FM<0b010000, FIELD_FMT_D>; +class SEL_S_ENC : COP1_3R_FM<0b010000, FIELD_FMT_S>; + +class SELEQZ_ENC : SPECIAL_3R_FM<0b00000, 0b110101>; +class SELNEZ_ENC : SPECIAL_3R_FM<0b00000, 0b110111>; + +class LWPC_ENC : PCREL19_FM; +class LWUPC_ENC : PCREL19_FM; + +class MAX_S_ENC : COP1_3R_FM<0b011101, FIELD_FMT_S>; +class MAX_D_ENC : COP1_3R_FM<0b011101, FIELD_FMT_D>; +class MIN_S_ENC : COP1_3R_FM<0b011100, FIELD_FMT_S>; +class MIN_D_ENC : COP1_3R_FM<0b011100, FIELD_FMT_D>; + +class MAXA_S_ENC : COP1_3R_FM<0b011111, FIELD_FMT_S>; +class MAXA_D_ENC : COP1_3R_FM<0b011111, FIELD_FMT_D>; +class MINA_S_ENC : COP1_3R_FM<0b011110, FIELD_FMT_S>; +class MINA_D_ENC : COP1_3R_FM<0b011110, FIELD_FMT_D>; + +class SELEQZ_S_ENC : COP1_3R_FM<0b010100, FIELD_FMT_S>; +class SELEQZ_D_ENC : COP1_3R_FM<0b010100, FIELD_FMT_D>; +class SELNEZ_S_ENC : COP1_3R_FM<0b010111, FIELD_FMT_S>; +class SELNEZ_D_ENC : COP1_3R_FM<0b010111, FIELD_FMT_D>; + +class RINT_S_ENC : COP1_2R_FM<0b011010, FIELD_FMT_S>; +class RINT_D_ENC : COP1_2R_FM<0b011010, FIELD_FMT_D>; +class CLASS_S_ENC : COP1_2R_FM<0b011011, FIELD_FMT_S>; +class CLASS_D_ENC : COP1_2R_FM<0b011011, FIELD_FMT_D>; + +class CMP_CONDN_DESC_BASE { + dag OutOperandList = (outs FGROpnd:$fd); + dag InOperandList = (ins FGROpnd:$fs, FGROpnd:$ft); + string AsmString = !strconcat("cmp.", CondStr, ".", Typestr, "\t$fd, $fs, $ft"); + list Pattern = []; +} + +//===----------------------------------------------------------------------===// +// +// Instruction Multiclasses +// +//===----------------------------------------------------------------------===// + +multiclass CMP_CC_M { + def CMP_F_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"f", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_UN_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_EQ_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_UEQ_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_OLT_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"olt", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_ULT_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_OLE_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"ole", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_ULE_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_SF_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"sf", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_NGLE_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"ngle", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_SEQ_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_NGL_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"ngl", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_LT_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_NGE_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"nge", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_LE_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_NGT_#NAME : COP1_CMP_CONDN_FM, + CMP_CONDN_DESC_BASE<"ngt", Typestr, FGROpnd>, + ISA_MIPS32R6; +} + +//===----------------------------------------------------------------------===// +// +// Instruction Descriptions +// +//===----------------------------------------------------------------------===// + +class PCREL19_DESC_BASE { + dag OutOperandList = (outs GPROpnd:$rs); + dag InOperandList = (ins simm19_lsl2:$imm); + string AsmString = !strconcat(instr_asm, "\t$rs, $imm"); + list Pattern = []; +} + +class ADDIUPC_DESC : PCREL19_DESC_BASE<"addiupc", GPR32Opnd>; +class LWPC_DESC: PCREL19_DESC_BASE<"lwpc", GPR32Opnd>; +class LWUPC_DESC: PCREL19_DESC_BASE<"lwupc", GPR32Opnd>; + +class ALIGN_DESC_BASE { + dag OutOperandList = (outs GPROpnd:$rd); + dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$bp); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $bp"); + list Pattern = []; +} + +class ALIGN_DESC : ALIGN_DESC_BASE<"align", GPR32Opnd, uimm2>; + +class ALUIPC_DESC_BASE { + dag OutOperandList = (outs GPROpnd:$rs); + dag InOperandList = (ins simm16:$imm); + string AsmString = !strconcat(instr_asm, "\t$rs, $imm"); + list Pattern = []; +} + +class ALUIPC_DESC : ALUIPC_DESC_BASE<"aluipc", GPR32Opnd>; +class AUIPC_DESC : ALUIPC_DESC_BASE<"auipc", GPR32Opnd>; + +class AUI_DESC_BASE { + dag OutOperandList = (outs GPROpnd:$rs); + dag InOperandList = (ins GPROpnd:$rt, simm16:$imm); + string AsmString = !strconcat(instr_asm, "\t$rs, $rt, $imm"); + list Pattern = []; +} + +class AUI_DESC : AUI_DESC_BASE<"aui", GPR32Opnd>; + +class BRANCH_DESC_BASE { + bit isBranch = 1; + bit isTerminator = 1; + bit hasDelaySlot = 0; +} + +class BC_DESC_BASE : BRANCH_DESC_BASE { + dag InOperandList = (ins opnd:$offset); + dag OutOperandList = (outs); + string AsmString = !strconcat(instr_asm, "\t$offset"); + bit isBarrier = 1; +} + +class CMP_BC_DESC_BASE : BRANCH_DESC_BASE { + dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, opnd:$offset); + dag OutOperandList = (outs); + string AsmString = !strconcat(instr_asm, "\t$rs, $rt, $offset"); + list Defs = [AT]; +} + +class CMP_CBR_EQNE_Z_DESC_BASE : BRANCH_DESC_BASE { + dag InOperandList = (ins GPROpnd:$rs, opnd:$offset); + dag OutOperandList = (outs); + string AsmString = !strconcat(instr_asm, "\t$rs, $offset"); + list Defs = [AT]; +} + +class CMP_CBR_RT_Z_DESC_BASE : BRANCH_DESC_BASE { + dag InOperandList = (ins GPROpnd:$rt, opnd:$offset); + dag OutOperandList = (outs); + string AsmString = !strconcat(instr_asm, "\t$rt, $offset"); + list Defs = [AT]; +} + +class BALC_DESC : BC_DESC_BASE<"balc", brtarget26> { + bit isCall = 1; + list Defs = [RA]; +} + +class BC_DESC : BC_DESC_BASE<"bc", brtarget26>; +class BEQC_DESC : CMP_BC_DESC_BASE<"beqc", brtarget, GPR32Opnd>; +class BNEC_DESC : CMP_BC_DESC_BASE<"bnec", brtarget, GPR32Opnd>; + +class BLTZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bltzc", brtarget, GPR32Opnd>; +class BGEZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgezc", brtarget, GPR32Opnd>; + +class BLEZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"blezc", brtarget, GPR32Opnd>; +class BGTZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgtzc", brtarget, GPR32Opnd>; + +class BEQZC_DESC : CMP_CBR_EQNE_Z_DESC_BASE<"beqzc", brtarget21, GPR32Opnd>; +class BNEZC_DESC : CMP_CBR_EQNE_Z_DESC_BASE<"bnezc", brtarget21, GPR32Opnd>; + +class COP1_BCCZ_DESC_BASE : BRANCH_DESC_BASE { + dag InOperandList = (ins FGR64Opnd:$ft, brtarget:$offset); + dag OutOperandList = (outs); + string AsmString = instr_asm; + bit hasDelaySlot = 1; +} + +class BC1EQZ_DESC : COP1_BCCZ_DESC_BASE<"bc1eqz $ft, $offset">; +class BC1NEZ_DESC : COP1_BCCZ_DESC_BASE<"bc1nez $ft, $offset">; + +class COP2_BCCZ_DESC_BASE : BRANCH_DESC_BASE { + dag InOperandList = (ins COP2Opnd:$ct, brtarget:$offset); + dag OutOperandList = (outs); + string AsmString = instr_asm; + bit hasDelaySlot = 1; +} + +class BC2EQZ_DESC : COP2_BCCZ_DESC_BASE<"bc2eqz $ct, $offset">; +class BC2NEZ_DESC : COP2_BCCZ_DESC_BASE<"bc2nez $ct, $offset">; + +class BOVC_DESC : CMP_BC_DESC_BASE<"bovc", brtarget, GPR32Opnd>; +class BNVC_DESC : CMP_BC_DESC_BASE<"bnvc", brtarget, GPR32Opnd>; + +class JMP_IDX_COMPACT_DESC_BASE { + dag InOperandList = (ins GPROpnd:$rt, opnd:$offset); + string AsmString = !strconcat(opstr, "\t$rt, $offset"); + list Pattern = []; + bit isTerminator = 1; + bit hasDelaySlot = 0; + string DecoderMethod = "DecodeSimm16"; +} + +class JIALC_DESC : JMP_IDX_COMPACT_DESC_BASE<"jialc", calloffset16, + GPR32Opnd> { + bit isCall = 1; + list Defs = [RA]; +} + +class JIC_DESC : JMP_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR32Opnd> { + bit isBarrier = 1; + list Defs = [AT]; +} + +class BITSWAP_DESC_BASE { + dag OutOperandList = (outs GPROpnd:$rd); + dag InOperandList = (ins GPROpnd:$rt); + string AsmString = !strconcat(instr_asm, "\t$rd, $rt"); + list Pattern = []; +} + +class BITSWAP_DESC : BITSWAP_DESC_BASE<"bitswap", GPR32Opnd>; + +class DIVMOD_DESC_BASE { + dag OutOperandList = (outs GPROpnd:$rd); + dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); + list Pattern = []; +} + +class DIV_DESC : DIVMOD_DESC_BASE<"div", GPR32Opnd>; +class DIVU_DESC : DIVMOD_DESC_BASE<"divu", GPR32Opnd>; +class MOD_DESC : DIVMOD_DESC_BASE<"mod", GPR32Opnd>; +class MODU_DESC : DIVMOD_DESC_BASE<"modu", GPR32Opnd>; + +class BEQZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"beqzalc", brtarget, GPR32Opnd> { + list Defs = [RA]; +} + +class BGEZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgezalc", brtarget, GPR32Opnd> { + list Defs = [RA]; +} + +class BGTZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgtzalc", brtarget, GPR32Opnd> { + list Defs = [RA]; +} + +class BLEZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"blezalc", brtarget, GPR32Opnd> { + list Defs = [RA]; +} + +class BLTZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bltzalc", brtarget, GPR32Opnd> { + list Defs = [RA]; +} + +class BNEZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bnezalc", brtarget, GPR32Opnd> { + list Defs = [RA]; +} +class MUL_R6_DESC_BASE { + dag OutOperandList = (outs GPROpnd:$rd); + dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); + list Pattern = []; +} + +class MUH_DESC : MUL_R6_DESC_BASE<"muh", GPR32Opnd>; +class MUHU_DESC : MUL_R6_DESC_BASE<"muhu", GPR32Opnd>; +class MUL_R6_DESC : MUL_R6_DESC_BASE<"mul", GPR32Opnd>; +class MULU_DESC : MUL_R6_DESC_BASE<"mulu", GPR32Opnd>; + +class COP1_4R_DESC_BASE { + dag OutOperandList = (outs FGROpnd:$fd); + dag InOperandList = (ins FGROpnd:$fd_in, FGROpnd:$fs, FGROpnd:$ft); + string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft"); + list Pattern = []; + string Constraints = "$fd_in = $fd"; +} + +class SEL_D_DESC : COP1_4R_DESC_BASE<"sel.d", FGR64Opnd>; +class SEL_S_DESC : COP1_4R_DESC_BASE<"sel.s", FGR32Opnd>; + +class SELEQNE_Z_DESC_BASE { + dag OutOperandList = (outs GPROpnd:$rd); + dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); + list Pattern = []; +} + +class SELEQZ_DESC : SELEQNE_Z_DESC_BASE<"seleqz", GPR32Opnd>; +class SELNEZ_DESC : SELEQNE_Z_DESC_BASE<"selnez", GPR32Opnd>; + +class MADDF_S_DESC : COP1_4R_DESC_BASE<"maddf.s", FGR32Opnd>; +class MADDF_D_DESC : COP1_4R_DESC_BASE<"maddf.d", FGR64Opnd>; +class MSUBF_S_DESC : COP1_4R_DESC_BASE<"msubf.s", FGR32Opnd>; +class MSUBF_D_DESC : COP1_4R_DESC_BASE<"msubf.d", FGR64Opnd>; + +class MAX_MIN_DESC_BASE { + dag OutOperandList = (outs FGROpnd:$fd); + dag InOperandList = (ins FGROpnd:$fs, FGROpnd:$ft); + string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft"); + list Pattern = []; +} + +class MAX_S_DESC : MAX_MIN_DESC_BASE<"max.s", FGR32Opnd>; +class MAX_D_DESC : MAX_MIN_DESC_BASE<"max.d", FGR64Opnd>; +class MIN_S_DESC : MAX_MIN_DESC_BASE<"min.s", FGR32Opnd>; +class MIN_D_DESC : MAX_MIN_DESC_BASE<"min.d", FGR64Opnd>; + +class MAXA_S_DESC : MAX_MIN_DESC_BASE<"maxa.s", FGR32Opnd>; +class MAXA_D_DESC : MAX_MIN_DESC_BASE<"maxa.d", FGR64Opnd>; +class MINA_S_DESC : MAX_MIN_DESC_BASE<"mina.s", FGR32Opnd>; +class MINA_D_DESC : MAX_MIN_DESC_BASE<"mina.d", FGR64Opnd>; + +class SELEQNEZ_DESC_BASE { + dag OutOperandList = (outs FGROpnd:$fd); + dag InOperandList = (ins FGROpnd:$fs, FGROpnd:$ft); + string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft"); + list Pattern = []; +} + +class SELEQZ_S_DESC : SELEQNEZ_DESC_BASE<"seleqz.s", FGR32Opnd>; +class SELEQZ_D_DESC : SELEQNEZ_DESC_BASE<"seleqz.d", FGR64Opnd>; +class SELNEZ_S_DESC : SELEQNEZ_DESC_BASE<"selnez.s", FGR32Opnd>; +class SELNEZ_D_DESC : SELEQNEZ_DESC_BASE<"selnez.d", FGR64Opnd>; + +class CLASS_RINT_DESC_BASE { + dag OutOperandList = (outs FGROpnd:$fd); + dag InOperandList = (ins FGROpnd:$fs); + string AsmString = !strconcat(instr_asm, "\t$fd, $fs"); + list Pattern = []; +} + +class RINT_S_DESC : CLASS_RINT_DESC_BASE<"rint.s", FGR32Opnd>; +class RINT_D_DESC : CLASS_RINT_DESC_BASE<"rint.d", FGR64Opnd>; +class CLASS_S_DESC : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd>; +class CLASS_D_DESC : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd>; + +//===----------------------------------------------------------------------===// +// +// Instruction Definitions +// +//===----------------------------------------------------------------------===// + +def ADDIUPC : ADDIUPC_ENC, ADDIUPC_DESC, ISA_MIPS32R6; +def ALIGN : ALIGN_ENC, ALIGN_DESC, ISA_MIPS32R6; +def ALUIPC : ALUIPC_ENC, ALUIPC_DESC, ISA_MIPS32R6; +def AUI : AUI_ENC, AUI_DESC, ISA_MIPS32R6; +def AUIPC : AUIPC_ENC, AUIPC_DESC, ISA_MIPS32R6; +def BALC : BALC_ENC, BALC_DESC, ISA_MIPS32R6; +def BC1EQZ : BC1EQZ_ENC, BC1EQZ_DESC, ISA_MIPS32R6; +def BC1NEZ : BC1NEZ_ENC, BC1NEZ_DESC, ISA_MIPS32R6; +def BC2EQZ : BC2EQZ_ENC, BC2EQZ_DESC, ISA_MIPS32R6; +def BC2NEZ : BC2NEZ_ENC, BC2NEZ_DESC, ISA_MIPS32R6; +def BC : BC_ENC, BC_DESC, ISA_MIPS32R6; +def BEQC : BEQC_ENC, BEQC_DESC, ISA_MIPS32R6; +def BEQZALC : BEQZALC_ENC, BEQZALC_DESC, ISA_MIPS32R6; +def BEQZC : BEQZC_ENC, BEQZC_DESC, ISA_MIPS32R6; +def BGEC; // Also aliased to blec with operands swapped +def BGEUC; // Also aliased to bleuc with operands swapped +def BGEZALC : BGEZALC_ENC, BGEZALC_DESC, ISA_MIPS32R6; +def BGEZC : BGEZC_ENC, BGEZC_DESC, ISA_MIPS32R6; +def BGTZALC : BGTZALC_ENC, BGTZALC_DESC, ISA_MIPS32R6; +def BGTZC : BGTZC_ENC, BGTZC_DESC, ISA_MIPS32R6; +def BITSWAP : BITSWAP_ENC, BITSWAP_DESC, ISA_MIPS32R6; +def BLEZALC : BLEZALC_ENC, BLEZALC_DESC, ISA_MIPS32R6; +def BLEZC : BLEZC_ENC, BLEZC_DESC, ISA_MIPS32R6; +def BLTC; // Also aliased to bgtc with operands swapped +def BLTUC; // Also aliased to bgtuc with operands swapped +def BLTZALC : BLTZALC_ENC, BLTZALC_DESC, ISA_MIPS32R6; +def BLTZC : BLTZC_ENC, BLTZC_DESC, ISA_MIPS32R6; +def BNEC : BNEC_ENC, BNEC_DESC, ISA_MIPS32R6; +def BNEZALC : BNEZALC_ENC, BNEZALC_DESC, ISA_MIPS32R6; +def BNEZC : BNEZC_ENC, BNEZC_DESC, ISA_MIPS32R6; +def BNVC : BNVC_ENC, BNVC_DESC, ISA_MIPS32R6; +def BOVC : BOVC_ENC, BOVC_DESC, ISA_MIPS32R6; +def CLASS_D : CLASS_D_ENC, CLASS_D_DESC, ISA_MIPS32R6; +def CLASS_S : CLASS_S_ENC, CLASS_S_DESC, ISA_MIPS32R6; +defm S : CMP_CC_M; +defm D : CMP_CC_M; +def DIV : DIV_ENC, DIV_DESC, ISA_MIPS32R6; +def DIVU : DIVU_ENC, DIVU_DESC, ISA_MIPS32R6; +def JIALC : JIALC_ENC, JIALC_DESC, ISA_MIPS32R6; +def JIC : JIC_ENC, JIC_DESC, ISA_MIPS32R6; +// def LSA; // See MSA +def LWPC : LWPC_ENC, LWPC_DESC, ISA_MIPS32R6; +def LWUPC : LWUPC_ENC, LWUPC_DESC, ISA_MIPS32R6; +def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6; +def MADDF_D : MADDF_D_ENC, MADDF_D_DESC, ISA_MIPS32R6; +def MAXA_D : MAXA_D_ENC, MAXA_D_DESC, ISA_MIPS32R6; +def MAXA_S : MAXA_S_ENC, MAXA_S_DESC, ISA_MIPS32R6; +def MAX_D : MAX_D_ENC, MAX_D_DESC, ISA_MIPS32R6; +def MAX_S : MAX_S_ENC, MAX_S_DESC, ISA_MIPS32R6; +def MINA_D : MINA_D_ENC, MINA_D_DESC, ISA_MIPS32R6; +def MINA_S : MINA_S_ENC, MINA_S_DESC, ISA_MIPS32R6; +def MIN_D : MIN_D_ENC, MIN_D_DESC, ISA_MIPS32R6; +def MIN_S : MIN_S_ENC, MIN_S_DESC, ISA_MIPS32R6; +def MOD : MOD_ENC, MOD_DESC, ISA_MIPS32R6; +def MODU : MODU_ENC, MODU_DESC, ISA_MIPS32R6; +def MSUBF_S : MSUBF_S_ENC, MSUBF_S_DESC, ISA_MIPS32R6; +def MSUBF_D : MSUBF_D_ENC, MSUBF_D_DESC, ISA_MIPS32R6; +def MUH : MUH_ENC, MUH_DESC, ISA_MIPS32R6; +def MUHU : MUHU_ENC, MUHU_DESC, ISA_MIPS32R6; +def MUL_R6 : MUL_R6_ENC, MUL_R6_DESC, ISA_MIPS32R6; +def MULU : MULU_ENC, MULU_DESC, ISA_MIPS32R6; +def NAL; // BAL with rd=0 +def RINT_D : RINT_D_ENC, RINT_D_DESC, ISA_MIPS32R6; +def RINT_S : RINT_S_ENC, RINT_S_DESC, ISA_MIPS32R6; +def SELEQZ : SELEQZ_ENC, SELEQZ_DESC, ISA_MIPS32R6; +def SELEQZ_D : SELEQZ_D_ENC, SELEQZ_D_DESC, ISA_MIPS32R6; +def SELEQZ_S : SELEQZ_S_ENC, SELEQZ_S_DESC, ISA_MIPS32R6; +def SELNEZ : SELNEZ_ENC, SELNEZ_DESC, ISA_MIPS32R6; +def SELNEZ_D : SELNEZ_D_ENC, SELNEZ_D_DESC, ISA_MIPS32R6; +def SELNEZ_S : SELNEZ_S_ENC, SELNEZ_S_DESC, ISA_MIPS32R6; +def SEL_D : SEL_D_ENC, SEL_D_DESC, ISA_MIPS32R6; +def SEL_S : SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6; diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 7115d11..924b325 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -20,6 +20,9 @@ def uimm16_64 : Operand { let PrintMethod = "printUnsignedImm"; } +// Signed Operand +def simm10_64 : Operand; + // Transformation Function - get Imm - 32. def Subtract32 : SDNodeXFormgetZExtValue() - 32); @@ -28,6 +31,11 @@ def Subtract32 : SDNodeXForm; +// Node immediate fits as 10-bit sign extended on target immediate. +// e.g. seqi, snei +def immSExt10_64 : PatLeaf<(i64 imm), + [{ return isInt<10>(N->getSExtValue()); }]>; + //===----------------------------------------------------------------------===// // Instructions specific format //===----------------------------------------------------------------------===// @@ -53,10 +61,11 @@ let isPseudo = 1, isCodeGenOnly = 1 in { //===----------------------------------------------------------------------===// let DecoderNamespace = "Mips64" in { /// Arithmetic Instructions (ALU Immediate) -def DADDi : ArithLogicI<"daddi", simm16_64, GPR64Opnd>, ADDI_FM<0x18>; +def DADDi : ArithLogicI<"daddi", simm16_64, GPR64Opnd>, ADDI_FM<0x18>, + ISA_MIPS3; def DADDiu : ArithLogicI<"daddiu", simm16_64, GPR64Opnd, II_DADDIU, immSExt16, add>, - ADDI_FM<0x19>, IsAsCheapAsAMove; + ADDI_FM<0x19>, IsAsCheapAsAMove, ISA_MIPS3; let isCodeGenOnly = 1 in { def SLTi64 : SetCC_I<"slti", setlt, simm16_64, immSExt16, GPR64Opnd>, @@ -73,12 +82,14 @@ def LUi64 : LoadUpper<"lui", GPR64Opnd, uimm16_64>, LUI_FM; } /// Arithmetic Instructions (3-Operand, R-Type) -def DADD : ArithLogicR<"dadd", GPR64Opnd, 1, II_DADD>, ADD_FM<0, 0x2c>; -def DADDu : ArithLogicR<"daddu", GPR64Opnd, 1, II_DADDU, add>, - ADD_FM<0, 0x2d>; -def DSUBu : ArithLogicR<"dsubu", GPR64Opnd, 0, II_DSUBU, sub>, - ADD_FM<0, 0x2f>; -def DSUB : ArithLogicR<"dsub", GPR64Opnd, 0, II_DSUB, sub>, ADD_FM<0, 0x2e>; +def DADD : ArithLogicR<"dadd", GPR64Opnd, 1, II_DADD>, ADD_FM<0, 0x2c>, + ISA_MIPS3; +def DADDu : ArithLogicR<"daddu", GPR64Opnd, 1, II_DADDU, add>, ADD_FM<0, 0x2d>, + ISA_MIPS3; +def DSUBu : ArithLogicR<"dsubu", GPR64Opnd, 0, II_DSUBU, sub>, ADD_FM<0, 0x2f>, + ISA_MIPS3; +def DSUB : ArithLogicR<"dsub", GPR64Opnd, 0, II_DSUB>, ADD_FM<0, 0x2e>, + ISA_MIPS3; let isCodeGenOnly = 1 in { def SLT64 : SetCC_R<"slt", setlt, GPR64Opnd>, ADD_FM<0, 0x2a>; @@ -91,33 +102,32 @@ def NOR64 : LogicNOR<"nor", GPR64Opnd>, ADD_FM<0, 0x27>; /// Shift Instructions def DSLL : shift_rotate_imm<"dsll", uimm6, GPR64Opnd, II_DSLL, shl, immZExt6>, - SRA_FM<0x38, 0>; + SRA_FM<0x38, 0>, ISA_MIPS3; def DSRL : shift_rotate_imm<"dsrl", uimm6, GPR64Opnd, II_DSRL, srl, immZExt6>, - SRA_FM<0x3a, 0>; + SRA_FM<0x3a, 0>, ISA_MIPS3; def DSRA : shift_rotate_imm<"dsra", uimm6, GPR64Opnd, II_DSRA, sra, immZExt6>, - SRA_FM<0x3b, 0>; + SRA_FM<0x3b, 0>, ISA_MIPS3; def DSLLV : shift_rotate_reg<"dsllv", GPR64Opnd, II_DSLLV, shl>, - SRLV_FM<0x14, 0>; + SRLV_FM<0x14, 0>, ISA_MIPS3; def DSRLV : shift_rotate_reg<"dsrlv", GPR64Opnd, II_DSRLV, srl>, - SRLV_FM<0x16, 0>; + SRLV_FM<0x16, 0>, ISA_MIPS3; def DSRAV : shift_rotate_reg<"dsrav", GPR64Opnd, II_DSRAV, sra>, - SRLV_FM<0x17, 0>; + SRLV_FM<0x17, 0>, ISA_MIPS3; def DSLL32 : shift_rotate_imm<"dsll32", uimm5, GPR64Opnd, II_DSLL32>, - SRA_FM<0x3c, 0>; + SRA_FM<0x3c, 0>, ISA_MIPS3; def DSRL32 : shift_rotate_imm<"dsrl32", uimm5, GPR64Opnd, II_DSRL32>, - SRA_FM<0x3e, 0>; + SRA_FM<0x3e, 0>, ISA_MIPS3; def DSRA32 : shift_rotate_imm<"dsra32", uimm5, GPR64Opnd, II_DSRA32>, - SRA_FM<0x3f, 0>; + SRA_FM<0x3f, 0>, ISA_MIPS3; // Rotate Instructions -let Predicates = [HasMips64r2, HasStdEnc] in { - def DROTR : shift_rotate_imm<"drotr", uimm6, GPR64Opnd, II_DROTR, rotr, - immZExt6>, SRA_FM<0x3a, 1>; - def DROTRV : shift_rotate_reg<"drotrv", GPR64Opnd, II_DROTRV, rotr>, - SRLV_FM<0x16, 1>; - def DROTR32 : shift_rotate_imm<"drotr32", uimm5, GPR64Opnd, II_DROTR32>, - SRA_FM<0x3e, 1>; -} +def DROTR : shift_rotate_imm<"drotr", uimm6, GPR64Opnd, II_DROTR, rotr, + immZExt6>, + SRA_FM<0x3a, 1>, ISA_MIPS64R2; +def DROTRV : shift_rotate_reg<"drotrv", GPR64Opnd, II_DROTRV, rotr>, + SRLV_FM<0x16, 1>, ISA_MIPS64R2; +def DROTR32 : shift_rotate_imm<"drotr32", uimm5, GPR64Opnd, II_DROTR32>, + SRA_FM<0x3e, 1>, ISA_MIPS64R2; /// Load and Store Instructions /// aligned @@ -132,9 +142,9 @@ def SH64 : Store<"sh", GPR64Opnd, truncstorei16, II_SH>, LW_FM<0x29>; def SW64 : Store<"sw", GPR64Opnd, truncstorei32, II_SW>, LW_FM<0x2b>; } -def LWu : Load<"lwu", GPR64Opnd, zextloadi32, II_LWU>, LW_FM<0x27>; -def LD : Load<"ld", GPR64Opnd, load, II_LD>, LW_FM<0x37>; -def SD : Store<"sd", GPR64Opnd, store, II_SD>, LW_FM<0x3f>; +def LWu : Load<"lwu", GPR64Opnd, zextloadi32, II_LWU>, LW_FM<0x27>, ISA_MIPS3; +def LD : Load<"ld", GPR64Opnd, load, II_LD>, LW_FM<0x37>, ISA_MIPS3; +def SD : Store<"sd", GPR64Opnd, store, II_SD>, LW_FM<0x3f>, ISA_MIPS3; /// load/store left/right let isCodeGenOnly = 1 in { @@ -144,14 +154,18 @@ def SWL64 : StoreLeftRight<"swl", MipsSWL, GPR64Opnd, II_SWL>, LW_FM<0x2a>; def SWR64 : StoreLeftRight<"swr", MipsSWR, GPR64Opnd, II_SWR>, LW_FM<0x2e>; } -def LDL : LoadLeftRight<"ldl", MipsLDL, GPR64Opnd, II_LDL>, LW_FM<0x1a>; -def LDR : LoadLeftRight<"ldr", MipsLDR, GPR64Opnd, II_LDR>, LW_FM<0x1b>; -def SDL : StoreLeftRight<"sdl", MipsSDL, GPR64Opnd, II_SDL>, LW_FM<0x2c>; -def SDR : StoreLeftRight<"sdr", MipsSDR, GPR64Opnd, II_SDR>, LW_FM<0x2d>; +def LDL : LoadLeftRight<"ldl", MipsLDL, GPR64Opnd, II_LDL>, LW_FM<0x1a>, + ISA_MIPS3_NOT_32R6_64R6; +def LDR : LoadLeftRight<"ldr", MipsLDR, GPR64Opnd, II_LDR>, LW_FM<0x1b>, + ISA_MIPS3_NOT_32R6_64R6; +def SDL : StoreLeftRight<"sdl", MipsSDL, GPR64Opnd, II_SDL>, LW_FM<0x2c>, + ISA_MIPS3_NOT_32R6_64R6; +def SDR : StoreLeftRight<"sdr", MipsSDR, GPR64Opnd, II_SDR>, LW_FM<0x2d>, + ISA_MIPS3_NOT_32R6_64R6; /// Load-linked, Store-conditional -def LLD : LLBase<"lld", GPR64Opnd>, LW_FM<0x34>; -def SCD : SCBase<"scd", GPR64Opnd>, LW_FM<0x3c>; +def LLD : LLBase<"lld", GPR64Opnd>, LW_FM<0x34>, ISA_MIPS3; +def SCD : SCBase<"scd", GPR64Opnd>, LW_FM<0x3c>, ISA_MIPS3; /// Jump and Branch Instructions let isCodeGenOnly = 1 in { @@ -169,17 +183,17 @@ def TAILCALL64_R : TailCallReg; /// Multiply and Divide Instructions. def DMULT : Mult<"dmult", II_DMULT, GPR64Opnd, [HI0_64, LO0_64]>, - MULT_FM<0, 0x1c>; + MULT_FM<0, 0x1c>, ISA_MIPS3; def DMULTu : Mult<"dmultu", II_DMULTU, GPR64Opnd, [HI0_64, LO0_64]>, - MULT_FM<0, 0x1d>; + MULT_FM<0, 0x1d>, ISA_MIPS3; def PseudoDMULT : MultDivPseudo; def PseudoDMULTu : MultDivPseudo; def DSDIV : Div<"ddiv", II_DDIV, GPR64Opnd, [HI0_64, LO0_64]>, - MULT_FM<0, 0x1e>; + MULT_FM<0, 0x1e>, ISA_MIPS3; def DUDIV : Div<"ddivu", II_DDIVU, GPR64Opnd, [HI0_64, LO0_64]>, - MULT_FM<0, 0x1f>; + MULT_FM<0, 0x1f>, ISA_MIPS3; def PseudoDSDIV : MultDivPseudo; def PseudoDUDIV : MultDivPseudo; def PseudoMTLOHI64 : PseudoMTLOHI; /// Sign Ext In Register Instructions. -def SEB64 : SignExtInReg<"seb", i8, GPR64Opnd, II_SEB>, SEB_FM<0x10, 0x20>; -def SEH64 : SignExtInReg<"seh", i16, GPR64Opnd, II_SEH>, SEB_FM<0x18, 0x20>; +def SEB64 : SignExtInReg<"seb", i8, GPR64Opnd, II_SEB>, SEB_FM<0x10, 0x20>, + ISA_MIPS32R2; +def SEH64 : SignExtInReg<"seh", i16, GPR64Opnd, II_SEH>, SEB_FM<0x18, 0x20>, + ISA_MIPS32R2; } /// Count Leading -def DCLZ : CountLeading0<"dclz", GPR64Opnd>, CLO_FM<0x24>; -def DCLO : CountLeading1<"dclo", GPR64Opnd>, CLO_FM<0x25>; +def DCLZ : CountLeading0<"dclz", GPR64Opnd>, CLO_FM<0x24>, ISA_MIPS64; +def DCLO : CountLeading1<"dclo", GPR64Opnd>, CLO_FM<0x25>, ISA_MIPS64; /// Double Word Swap Bytes/HalfWords -def DSBH : SubwordSwap<"dsbh", GPR64Opnd>, SEB_FM<2, 0x24>; -def DSHD : SubwordSwap<"dshd", GPR64Opnd>, SEB_FM<5, 0x24>; +def DSBH : SubwordSwap<"dsbh", GPR64Opnd>, SEB_FM<2, 0x24>, ISA_MIPS64R2; +def DSHD : SubwordSwap<"dshd", GPR64Opnd>, SEB_FM<5, 0x24>, ISA_MIPS64R2; def LEA_ADDiu64 : EffectiveAddress<"daddiu", GPR64Opnd>, LW_FM<0x19>; @@ -229,8 +245,19 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in { "sll\t$rd, $rt, 0", [], II_SLL>; } +// We need the following pseudo instruction to avoid offset calculation for +// long branches. See the comment in file MipsLongBranch.cpp for detailed +// explanation. + +// Expands to: daddiu $dst, $src, %PART($tgt - $baltgt) +// where %PART may be %hi or %lo, depending on the relocation kind +// that $tgt is annotated with. +def LONG_BRANCH_DADDiu : PseudoSE<(outs GPR64Opnd:$dst), + (ins GPR64Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>; + // Cavium Octeon cmMIPS instructions -let Predicates = [HasCnMips] in { +let EncodingPredicates = [], // FIXME: The lack of HasStdEnc is probably a bug + AdditionalPredicates = [HasCnMips] in { class Count1s: InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"), @@ -254,6 +281,14 @@ class SetCC64_R : let TwoOperandAliasConstraint = "$rd = $rs"; } +class SetCC64_I: + InstSE<(outs GPR64Opnd:$rt), (ins GPR64Opnd:$rs, simm10_64:$imm10), + !strconcat(opstr, "\t$rt, $rs, $imm10"), + [(set GPR64Opnd:$rt, (cond_op GPR64Opnd:$rs, immSExt10_64:$imm10))], + II_SEQI_SNEI, FrmI, opstr> { + let TwoOperandAliasConstraint = "$rt = $rs"; +} + // Unsigned Byte Add let Pattern = [(set GPR64Opnd:$rd, (and (add GPR64Opnd:$rs, GPR64Opnd:$rt), 255))] in @@ -287,7 +322,25 @@ def DPOP : Count1s<"dpop", GPR64Opnd>, POP_FM<0x2d>; // Set on equal/not equal def SEQ : SetCC64_R<"seq", seteq>, SEQ_FM<0x2a>; +def SEQi : SetCC64_I<"seqi", seteq>, SEQI_FM<0x2e>; def SNE : SetCC64_R<"sne", setne>, SEQ_FM<0x2b>; +def SNEi : SetCC64_I<"snei", setne>, SEQI_FM<0x2f>; + +// 192-bit x 64-bit Unsigned Multiply and Add +let Defs = [P0, P1, P2] in +def V3MULU: ArithLogicR<"v3mulu", GPR64Opnd, 0, II_DMUL>, + ADD_FM<0x1c, 0x11>; + +// 64-bit Unsigned Multiply and Add Move +let Defs = [MPL0, P0, P1, P2] in +def VMM0 : ArithLogicR<"vmm0", GPR64Opnd, 0, II_DMUL>, + ADD_FM<0x1c, 0x10>; + +// 64-bit Unsigned Multiply and Add +let Defs = [MPL1, MPL2, P0, P1, P2] in +def VMULU : ArithLogicR<"vmulu", GPR64Opnd, 0, II_DMUL>, + ADD_FM<0x1c, 0x0f>; + } } @@ -297,12 +350,10 @@ def SNE : SetCC64_R<"sne", setne>, SEQ_FM<0x2b>; //===----------------------------------------------------------------------===// // extended loads -let Predicates = [HasStdEnc] in { - def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>; - def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>; - def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64 addr:$src)>; - def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64 addr:$src)>; -} +def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>; +def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>; +def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64 addr:$src)>; +def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64 addr:$src)>; // hi/lo relocs def : MipsPat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>; @@ -355,8 +406,7 @@ defm : SetgeImmPats; // truncate def : MipsPat<(i32 (trunc GPR64:$src)), - (SLL (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>, - Requires<[HasStdEnc]>; + (SLL (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>; // 32-to-64-bit extension def : MipsPat<(i64 (anyext GPR32:$src)), (SLL64_32 GPR32:$src)>; @@ -373,64 +423,59 @@ def : MipsPat<(bswap GPR64:$rt), (DSHD (DSBH GPR64:$rt))>; //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// -def : InstAlias<"move $dst, $src", - (DADDu GPR64Opnd:$dst, GPR64Opnd:$src, ZERO_64), 1>, - Requires<[HasMips64]>; -def : InstAlias<"daddu $rs, $rt, $imm", - (DADDiu GPR64Opnd:$rs, GPR64Opnd:$rt, simm16_64:$imm), - 0>; -def : InstAlias<"dadd $rs, $rt, $imm", - (DADDi GPR64Opnd:$rs, GPR64Opnd:$rt, simm16_64:$imm), - 0>; -def : InstAlias<"daddu $rs, $imm", - (DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm), - 0>; -def : InstAlias<"dadd $rs, $imm", - (DADDi GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm), - 0>; -def : InstAlias<"add $rs, $imm", - (ADDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), - 0>; -def : InstAlias<"addu $rs, $imm", - (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), - 0>; -let isPseudo=1, usesCustomInserter=1, isCodeGenOnly=1 in { -def SUBi : MipsInst<(outs GPR32Opnd: $rt), (ins GPR32Opnd: $rs, simm16: $imm), - "sub\t$rt, $rs, $imm", [], II_DSUB, Pseudo>; -def SUBiu : MipsInst<(outs GPR32Opnd: $rt), (ins GPR32Opnd: $rs, simm16: $imm), - "subu\t$rt, $rs, $imm", [], II_DSUB, Pseudo>; -def DSUBi : MipsInst<(outs GPR64Opnd: $rt), (ins GPR64Opnd: $rs, simm16_64: $imm), - "ssub\t$rt, $rs, $imm", [], II_DSUB, Pseudo>; -def DSUBiu : MipsInst<(outs GPR64Opnd: $rt), (ins GPR64Opnd: $rs, simm16_64: $imm), - "ssubu\t$rt, $rs, $imm", [], II_DSUB, Pseudo>; -} -def : InstAlias<"dsubu $rt, $rs, $imm", - (DSUBiu GPR64Opnd:$rt, GPR64Opnd:$rs, simm16_64: $imm), - 0>; -def : InstAlias<"sub $rs, $imm", - (SUBi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), - 0>; -def : InstAlias<"subu $rs, $imm", - (SUBiu GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), - 0>; -def : InstAlias<"dsub $rs, $imm", - (DSUBi GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm), - 0>; -def : InstAlias<"dsubu $rs, $imm", - (DSUBiu GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm), - 0>; +def : MipsInstAlias<"move $dst, $src", + (DADDu GPR64Opnd:$dst, GPR64Opnd:$src, ZERO_64), 1>, + GPR_64; +def : MipsInstAlias<"daddu $rs, $rt, $imm", + (DADDiu GPR64Opnd:$rs, GPR64Opnd:$rt, simm16_64:$imm), + 0>; +def : MipsInstAlias<"dadd $rs, $rt, $imm", + (DADDi GPR64Opnd:$rs, GPR64Opnd:$rt, simm16_64:$imm), + 0>; +def : MipsInstAlias<"daddu $rs, $imm", + (DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm), + 0>; +def : MipsInstAlias<"dadd $rs, $imm", + (DADDi GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm), + 0>; +def : MipsInstAlias<"add $rs, $imm", + (ADDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), + 0>; +def : MipsInstAlias<"addu $rs, $imm", + (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), + 0>; +def : MipsInstAlias<"dsll $rd, $rt, $rs", + (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>, + ISA_MIPS3; +def : MipsInstAlias<"dsubu $rt, $rs, $imm", + (DADDiu GPR64Opnd:$rt, GPR64Opnd:$rs, + InvertedImOperand64:$imm), 0>; +def : MipsInstAlias<"dsub $rs, $imm", + (DADDi GPR64Opnd:$rs, GPR64Opnd:$rs, + InvertedImOperand64:$imm), + 0>; +def : MipsInstAlias<"dsubu $rs, $imm", + (DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs, + InvertedImOperand64:$imm), + 0>; +def : MipsInstAlias<"dsra $rd, $rt, $rs", + (DSRAV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>, + ISA_MIPS3; +def : MipsInstAlias<"dsrl $rd, $rt, $rs", + (DSRLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>, + ISA_MIPS3; /// Move between CPU and coprocessor registers let DecoderNamespace = "Mips64", Predicates = [HasMips64] in { def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd>, MFC3OP_FM<0x10, 1>; -def DMTC0 : MFC3OP<"dmtc0", GPR64Opnd>, MFC3OP_FM<0x10, 5>; -def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd>, MFC3OP_FM<0x12, 1>; -def DMTC2 : MFC3OP<"dmtc2", GPR64Opnd>, MFC3OP_FM<0x12, 5>; +def DMTC0 : MFC3OP<"dmtc0", GPR64Opnd>, MFC3OP_FM<0x10, 5>, ISA_MIPS3; +def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd>, MFC3OP_FM<0x12, 1>, ISA_MIPS3; +def DMTC2 : MFC3OP<"dmtc2", GPR64Opnd>, MFC3OP_FM<0x12, 5>, ISA_MIPS3; } // Two operand (implicit 0 selector) versions: -def : InstAlias<"dmfc0 $rt, $rd", (DMFC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; -def : InstAlias<"dmtc0 $rt, $rd", (DMTC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; -def : InstAlias<"dmfc2 $rt, $rd", (DMFC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; -def : InstAlias<"dmtc2 $rt, $rd", (DMTC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; +def : MipsInstAlias<"dmfc0 $rt, $rd", (DMFC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; +def : MipsInstAlias<"dmtc0 $rt, $rd", (DMTC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; +def : MipsInstAlias<"dmfc2 $rt, $rd", (DMFC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; +def : MipsInstAlias<"dmtc2 $rt, $rd", (DMTC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>; diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td new file mode 100644 index 0000000..f971218 --- /dev/null +++ b/lib/Target/Mips/Mips64r6InstrInfo.td @@ -0,0 +1,88 @@ +//=- Mips64r6InstrInfo.td - Mips64r6 Instruction Information -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes Mips64r6 instructions. +// +//===----------------------------------------------------------------------===// + +// Notes about removals/changes from MIPS32r6: +// Reencoded: dclo, dclz +// Reencoded: lld, scd +// Removed: daddi +// Removed: ddiv, ddivu, dmult, dmultu +// Removed: div, divu + +//===----------------------------------------------------------------------===// +// +// Instruction Encodings +// +//===----------------------------------------------------------------------===// + +class DALIGN_ENC : SPECIAL3_DALIGN_FM; +class DAUI_ENC : DAUI_FM; +class DAHI_ENC : REGIMM_FM; +class DATI_ENC : REGIMM_FM; +class DBITSWAP_ENC : SPECIAL3_2R_FM; +class DDIV_ENC : SPECIAL_3R_FM<0b00010, 0b011110>; +class DDIVU_ENC : SPECIAL_3R_FM<0b00010, 0b011111>; +class DMOD_ENC : SPECIAL_3R_FM<0b00011, 0b011110>; +class DMODU_ENC : SPECIAL_3R_FM<0b00011, 0b011111>; +class DMUH_ENC : SPECIAL_3R_FM<0b00011, 0b111000>; +class DMUHU_ENC : SPECIAL_3R_FM<0b00011, 0b111001>; +class DMUL_R6_ENC : SPECIAL_3R_FM<0b00010, 0b111000>; +class DMULU_ENC : SPECIAL_3R_FM<0b00010, 0b111001>; + +//===----------------------------------------------------------------------===// +// +// Instruction Descriptions +// +//===----------------------------------------------------------------------===// + +class AHI_ATI_DESC_BASE { + dag OutOperandList = (outs GPROpnd:$rs); + dag InOperandList = (ins GPROpnd:$rt, simm16:$imm); + string AsmString = !strconcat(instr_asm, "\t$rt, $imm"); + string Constraints = "$rs = $rt"; +} + +class DALIGN_DESC : ALIGN_DESC_BASE<"dalign", GPR64Opnd, uimm3>; +class DAHI_DESC : AHI_ATI_DESC_BASE<"dahi", GPR64Opnd>; +class DATI_DESC : AHI_ATI_DESC_BASE<"dati", GPR64Opnd>; +class DAUI_DESC : AUI_DESC_BASE<"daui", GPR64Opnd>; +class DBITSWAP_DESC : BITSWAP_DESC_BASE<"dbitswap", GPR64Opnd>; +class DDIV_DESC : DIVMOD_DESC_BASE<"ddiv", GPR64Opnd>; +class DDIVU_DESC : DIVMOD_DESC_BASE<"ddivu", GPR64Opnd>; +class DMOD_DESC : DIVMOD_DESC_BASE<"dmod", GPR64Opnd>; +class DMODU_DESC : DIVMOD_DESC_BASE<"dmodu", GPR64Opnd>; +class DMUH_DESC : MUL_R6_DESC_BASE<"dmuh", GPR64Opnd>; +class DMUHU_DESC : MUL_R6_DESC_BASE<"dmuhu", GPR64Opnd>; +class DMUL_R6_DESC : MUL_R6_DESC_BASE<"dmul", GPR64Opnd>; +class DMULU_DESC : MUL_R6_DESC_BASE<"dmulu", GPR64Opnd>; + +//===----------------------------------------------------------------------===// +// +// Instruction Definitions +// +//===----------------------------------------------------------------------===// + +def DAHI : DAHI_ENC, DAHI_DESC, ISA_MIPS64R6; +def DALIGN : DALIGN_ENC, DALIGN_DESC, ISA_MIPS64R6; +def DATI : DATI_ENC, DATI_DESC, ISA_MIPS64R6; +def DAUI : DAUI_ENC, DAUI_DESC, ISA_MIPS64R6; +def DBITSWAP : DBITSWAP_ENC, DBITSWAP_DESC, ISA_MIPS64R6; +def DDIV : DDIV_ENC, DDIV_DESC, ISA_MIPS64R6; +def DDIVU : DDIVU_ENC, DDIVU_DESC, ISA_MIPS64R6; +// def DLSA; // See MSA +def DMOD : DMOD_ENC, DMOD_DESC, ISA_MIPS64R6; +def DMODU : DMODU_ENC, DMODU_DESC, ISA_MIPS64R6; +def DMUH: DMUH_ENC, DMUH_DESC, ISA_MIPS64R6; +def DMUHU: DMUHU_ENC, DMUHU_DESC, ISA_MIPS64R6; +def DMUL_R6: DMUL_R6_ENC, DMUL_R6_DESC, ISA_MIPS64R6; +def DMULU: DMULU_ENC, DMULU_DESC, ISA_MIPS64R6; +def LDPC; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index d5df855..6df90aa 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mips-asm-printer" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MCTargetDesc/MipsMCNaCl.h" @@ -52,6 +51,8 @@ using namespace llvm; +#define DEBUG_TYPE "mips-asm-printer" + MipsTargetStreamer &MipsAsmPrinter::getTargetStreamer() { return static_cast(*OutStreamer.getTargetStreamer()); } @@ -147,7 +148,8 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { // removing another test for this situation downstream in the // callchain. // - if (I->isPseudo() && !Subtarget->inMips16Mode()) + if (I->isPseudo() && !Subtarget->inMips16Mode() + && !isLongBranchPseudo(I->getOpcode())) llvm_unreachable("Pseudo opcode found in EmitInstruction()"); MCInst TmpInst0; @@ -285,9 +287,8 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() { if (Subtarget->inMicroMipsMode()) TS.emitDirectiveSetMicroMips(); - // leave out until FSF available gas has micromips changes - // else - // TS.emitDirectiveSetNoMicroMips(); + else + TS.emitDirectiveSetNoMicroMips(); if (Subtarget->inMips16Mode()) TS.emitDirectiveSetMips16(); @@ -621,16 +622,29 @@ printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { // TODO: Need to add -mabicalls and -mno-abicalls flags. // Currently we assume that -mabicalls is the default. - getTargetStreamer().emitDirectiveAbiCalls(); - Reloc::Model RM = Subtarget->getRelocationModel(); - if (RM == Reloc::Static && !Subtarget->hasMips64()) - getTargetStreamer().emitDirectiveOptionPic0(); + bool IsABICalls = true; + if (IsABICalls) { + getTargetStreamer().emitDirectiveAbiCalls(); + Reloc::Model RM = Subtarget->getRelocationModel(); + // FIXME: This condition should be a lot more complicated that it is here. + // Ideally it should test for properties of the ABI and not the ABI + // itself. + // For the moment, I'm only correcting enough to make MIPS-IV work. + if (RM == Reloc::Static && !Subtarget->isABI_N64()) + getTargetStreamer().emitDirectiveOptionPic0(); + } // Tell the assembler which ABI we are using std::string SectionName = std::string(".mdebug.") + getCurrentABIString(); OutStreamer.SwitchSection(OutContext.getELFSection( SectionName, ELF::SHT_PROGBITS, 0, SectionKind::getDataRel())); + // NaN: At the moment we only support: + // 1. .nan legacy (default) + // 2. .nan 2008 + Subtarget->isNaN2008() ? getTargetStreamer().emitDirectiveNaN2008() + : getTargetStreamer().emitDirectiveNaNLegacy(); + // TODO: handle O64 ABI if (Subtarget->isABI_EABI()) { @@ -824,7 +838,7 @@ void MipsAsmPrinter::EmitFPCallStub( const MCSectionELF *M = OutContext.getELFSection( ".mips16.call.fp." + std::string(Symbol), ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_EXECINSTR, SectionKind::getText()); - OutStreamer.SwitchSection(M, 0); + OutStreamer.SwitchSection(M, nullptr); // // .align 2 // @@ -941,6 +955,12 @@ void MipsAsmPrinter::NaClAlignIndirectJumpTargets(MachineFunction &MF) { } } +bool MipsAsmPrinter::isLongBranchPseudo(int Opcode) const { + return (Opcode == Mips::LONG_BRANCH_LUi + || Opcode == Mips::LONG_BRANCH_ADDiu + || Opcode == Mips::LONG_BRANCH_DADDiu); +} + // Force static initialization. extern "C" void LLVMInitializeMipsAsmPrinter() { RegisterAsmPrinter X(TheMipsTarget); diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index 3e9093e..e82b145 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -75,6 +75,8 @@ private: void NaClAlignIndirectJumpTargets(MachineFunction &MF); + bool isLongBranchPseudo(int Opcode) const; + public: const MipsSubtarget *Subtarget; @@ -82,18 +84,18 @@ public: MipsMCInstLower MCInstLowering; explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), MCP(0), InConstantPool(false), + : AsmPrinter(TM, Streamer), MCP(nullptr), InConstantPool(false), MCInstLowering(*this) { Subtarget = &TM.getSubtarget(); } - virtual const char *getPassName() const { + const char *getPassName() const override { return "Mips Assembly Printer"; } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual void EmitConstantPool() override { + void EmitConstantPool() override { bool UsingConstantPools = (Subtarget->inMips16Mode() && Subtarget->useConstantIslands()); if (!UsingConstantPools) @@ -101,30 +103,30 @@ public: // we emit constant pools customly! } - void EmitInstruction(const MachineInstr *MI); + void EmitInstruction(const MachineInstr *MI) override; void printSavedRegsBitmask(); void emitFrameDirective(); const char *getCurrentABIString() const; - virtual void EmitFunctionEntryLabel(); - virtual void EmitFunctionBodyStart(); - virtual void EmitFunctionBodyEnd(); - virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock* - MBB) const; + void EmitFunctionEntryLabel() override; + void EmitFunctionBodyStart() override; + void EmitFunctionBodyEnd() override; + bool isBlockOnlyReachableByFallthrough( + const MachineBasicBlock* MBB) const override; bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); + raw_ostream &O) override; bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); + raw_ostream &O) override; void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O); void printUnsignedImm8(const MachineInstr *MI, int opNum, raw_ostream &O); void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O); void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O); void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier = 0); - void EmitStartOfAsmFile(Module &M); - void EmitEndOfAsmFile(Module &M); + const char *Modifier = nullptr); + void EmitStartOfAsmFile(Module &M) override; + void EmitEndOfAsmFile(Module &M) override; void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); }; } diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index 615310f..c83d880 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -245,8 +245,8 @@ def CSR_O32 : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP, def CSR_O32_FP64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 20), RA, FP, (sequence "S%u", 7, 0))>; -def CSR_N32 : CalleeSavedRegs<(add D31_64, D29_64, D27_64, D25_64, D24_64, - D23_64, D22_64, D21_64, RA_64, FP_64, GP_64, +def CSR_N32 : CalleeSavedRegs<(add D20_64, D22_64, D24_64, D26_64, D28_64, + D30_64, RA_64, FP_64, GP_64, (sequence "S%u_64", 7, 0))>; def CSR_N64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 24), RA_64, FP_64, diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index ea49086..13fa546 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -12,7 +12,6 @@ // //===---------------------------------------------------------------------===// -#define DEBUG_TYPE "jit" #include "Mips.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MipsInstrInfo.h" @@ -41,6 +40,8 @@ using namespace llvm; +#define DEBUG_TYPE "jit" + STATISTIC(NumEmitted, "Number of machine instructions emitted"); namespace { @@ -56,7 +57,7 @@ class MipsCodeEmitter : public MachineFunctionPass { const std::vector *MJTEs; bool IsPIC; - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired (); MachineFunctionPass::getAnalysisUsage(AU); } @@ -65,13 +66,13 @@ class MipsCodeEmitter : public MachineFunctionPass { public: MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) - : MachineFunctionPass(ID), JTI(0), II(0), TD(0), - TM(tm), MCE(mce), MCPEs(0), MJTEs(0), + : MachineFunctionPass(ID), JTI(nullptr), II(nullptr), TD(nullptr), + TM(tm), MCE(mce), MCPEs(nullptr), MJTEs(nullptr), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} - bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "Mips Machine Code Emitter"; } @@ -109,6 +110,12 @@ private: unsigned getBranchTargetOpValueMM(const MachineInstr &MI, unsigned OpNo) const; + unsigned getBranchTarget21OpValue(const MachineInstr &MI, + unsigned OpNo) const; + unsigned getBranchTarget26OpValue(const MachineInstr &MI, + unsigned OpNo) const; + unsigned getJumpOffset16OpValue(const MachineInstr &MI, unsigned OpNo) const; + unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemEncodingMMImm12(const MachineInstr &MI, unsigned OpNo) const; @@ -116,6 +123,7 @@ private: unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getLSAImmEncoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getSimm19Lsl2Encoding(const MachineInstr &MI, unsigned OpNo) const; /// Expand pseudo instructions with accumulator register operands. void expandACCInstr(MachineBasicBlock::instr_iterator MI, @@ -138,7 +146,7 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) { TD = Target.getDataLayout(); Subtarget = &TM.getSubtarget (); MCPEs = &MF.getConstantPool()->getConstants(); - MJTEs = 0; + MJTEs = nullptr; if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables(); JTI->Initialize(MF, IsPIC, Subtarget->isLittle()); MCE.setModuleInfo(&getAnalysis ()); @@ -201,6 +209,24 @@ unsigned MipsCodeEmitter::getBranchTargetOpValueMM(const MachineInstr &MI, return 0; } +unsigned MipsCodeEmitter::getBranchTarget21OpValue(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getBranchTarget26OpValue(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + +unsigned MipsCodeEmitter::getJumpOffset16OpValue(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + unsigned MipsCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const { MachineOperand MO = MI.getOperand(OpNo); @@ -247,6 +273,12 @@ unsigned MipsCodeEmitter::getLSAImmEncoding(const MachineInstr &MI, return 0; } +unsigned MipsCodeEmitter::getSimm19Lsl2Encoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI, diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td index 567eef9..7177f65 100644 --- a/lib/Target/Mips/MipsCondMov.td +++ b/lib/Target/Mips/MipsCondMov.td @@ -104,9 +104,9 @@ multiclass MovnPats, - ADD_FM<0, 0xa>; + ADD_FM<0, 0xa>, INSN_MIPS4_32; -let Predicates = [HasStdEnc], isCodeGenOnly = 1 in { +let isCodeGenOnly = 1 in { def MOVZ_I_I64 : CMov_I_I_FT<"movz", GPR32Opnd, GPR64Opnd, II_MOVZ>, ADD_FM<0, 0xa>; def MOVZ_I64_I : CMov_I_I_FT<"movz", GPR64Opnd, GPR32Opnd, II_MOVZ>, @@ -116,9 +116,9 @@ let Predicates = [HasStdEnc], isCodeGenOnly = 1 in { } def MOVN_I_I : MMRel, CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd, II_MOVN>, - ADD_FM<0, 0xb>; + ADD_FM<0, 0xb>, INSN_MIPS4_32; -let Predicates = [HasStdEnc], isCodeGenOnly = 1 in { +let isCodeGenOnly = 1 in { def MOVN_I_I64 : CMov_I_I_FT<"movn", GPR32Opnd, GPR64Opnd, II_MOVN>, ADD_FM<0, 0xb>; def MOVN_I64_I : CMov_I_I_FT<"movn", GPR64Opnd, GPR32Opnd, II_MOVN>, @@ -128,118 +128,112 @@ let Predicates = [HasStdEnc], isCodeGenOnly = 1 in { } def MOVZ_I_S : MMRel, CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd, II_MOVZ_S>, - CMov_I_F_FM<18, 16>; + CMov_I_F_FM<18, 16>, INSN_MIPS4_32; let isCodeGenOnly = 1 in def MOVZ_I64_S : CMov_I_F_FT<"movz.s", GPR64Opnd, FGR32Opnd, II_MOVZ_S>, - CMov_I_F_FM<18, 16>, Requires<[HasMips64, HasStdEnc]>; + CMov_I_F_FM<18, 16>, AdditionalRequires<[HasMips64]>; def MOVN_I_S : MMRel, CMov_I_F_FT<"movn.s", GPR32Opnd, FGR32Opnd, II_MOVN_S>, - CMov_I_F_FM<19, 16>; + CMov_I_F_FM<19, 16>, INSN_MIPS4_32; let isCodeGenOnly = 1 in def MOVN_I64_S : CMov_I_F_FT<"movn.s", GPR64Opnd, FGR32Opnd, II_MOVN_S>, - CMov_I_F_FM<19, 16>, Requires<[HasMips64, HasStdEnc]>; + CMov_I_F_FM<19, 16>, AdditionalRequires<[IsGP64bit]>; -let Predicates = [NotFP64bit, HasStdEnc] in { - def MOVZ_I_D32 : MMRel, CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64Opnd, - II_MOVZ_D>, CMov_I_F_FM<18, 17>; - def MOVN_I_D32 : MMRel, CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64Opnd, - II_MOVN_D>, CMov_I_F_FM<19, 17>; -} +def MOVZ_I_D32 : MMRel, CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64Opnd, + II_MOVZ_D>, CMov_I_F_FM<18, 17>, + INSN_MIPS4_32, FGR_32; +def MOVN_I_D32 : MMRel, CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64Opnd, + II_MOVN_D>, CMov_I_F_FM<19, 17>, + INSN_MIPS4_32, FGR_32; -let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { +let DecoderNamespace = "Mips64" in { def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", GPR32Opnd, FGR64Opnd, II_MOVZ_D>, - CMov_I_F_FM<18, 17>; + CMov_I_F_FM<18, 17>, INSN_MIPS4_32, FGR_64; def MOVN_I_D64 : CMov_I_F_FT<"movn.d", GPR32Opnd, FGR64Opnd, II_MOVN_D>, - CMov_I_F_FM<19, 17>; + CMov_I_F_FM<19, 17>, INSN_MIPS4_32, FGR_64; let isCodeGenOnly = 1 in { def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64Opnd, - II_MOVZ_D>, CMov_I_F_FM<18, 17>; + II_MOVZ_D>, CMov_I_F_FM<18, 17>, FGR_64; def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64Opnd, - II_MOVN_D>, CMov_I_F_FM<19, 17>; + II_MOVN_D>, CMov_I_F_FM<19, 17>, FGR_64; } } def MOVT_I : MMRel, CMov_F_I_FT<"movt", GPR32Opnd, II_MOVT, MipsCMovFP_T>, - CMov_F_I_FM<1>; + CMov_F_I_FM<1>, INSN_MIPS4_32; let isCodeGenOnly = 1 in def MOVT_I64 : CMov_F_I_FT<"movt", GPR64Opnd, II_MOVT, MipsCMovFP_T>, - CMov_F_I_FM<1>, Requires<[HasMips64, HasStdEnc]>; + CMov_F_I_FM<1>, AdditionalRequires<[IsGP64bit]>; def MOVF_I : MMRel, CMov_F_I_FT<"movf", GPR32Opnd, II_MOVF, MipsCMovFP_F>, - CMov_F_I_FM<0>; + CMov_F_I_FM<0>, INSN_MIPS4_32; let isCodeGenOnly = 1 in def MOVF_I64 : CMov_F_I_FT<"movf", GPR64Opnd, II_MOVF, MipsCMovFP_F>, - CMov_F_I_FM<0>, Requires<[HasMips64, HasStdEnc]>; + CMov_F_I_FM<0>, AdditionalRequires<[IsGP64bit]>; def MOVT_S : MMRel, CMov_F_F_FT<"movt.s", FGR32Opnd, II_MOVT_S, MipsCMovFP_T>, - CMov_F_F_FM<16, 1>; + CMov_F_F_FM<16, 1>, INSN_MIPS4_32; def MOVF_S : MMRel, CMov_F_F_FT<"movf.s", FGR32Opnd, II_MOVF_S, MipsCMovFP_F>, - CMov_F_F_FM<16, 0>; + CMov_F_F_FM<16, 0>, INSN_MIPS4_32; -let Predicates = [NotFP64bit, HasStdEnc] in { - def MOVT_D32 : MMRel, CMov_F_F_FT<"movt.d", AFGR64Opnd, II_MOVT_D, - MipsCMovFP_T>, CMov_F_F_FM<17, 1>; - def MOVF_D32 : MMRel, CMov_F_F_FT<"movf.d", AFGR64Opnd, II_MOVF_D, - MipsCMovFP_F>, CMov_F_F_FM<17, 0>; -} +def MOVT_D32 : MMRel, CMov_F_F_FT<"movt.d", AFGR64Opnd, II_MOVT_D, + MipsCMovFP_T>, CMov_F_F_FM<17, 1>, + INSN_MIPS4_32, FGR_32; +def MOVF_D32 : MMRel, CMov_F_F_FT<"movf.d", AFGR64Opnd, II_MOVF_D, + MipsCMovFP_F>, CMov_F_F_FM<17, 0>, + INSN_MIPS4_32, FGR_32; -let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { +let DecoderNamespace = "Mips64" in { def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64Opnd, II_MOVT_D, MipsCMovFP_T>, - CMov_F_F_FM<17, 1>; + CMov_F_F_FM<17, 1>, INSN_MIPS4_32, FGR_64; def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64Opnd, II_MOVF_D, MipsCMovFP_F>, - CMov_F_F_FM<17, 0>; + CMov_F_F_FM<17, 0>, INSN_MIPS4_32, FGR_64; } // Instantiation of conditional move patterns. defm : MovzPats0; defm : MovzPats1; defm : MovzPats2; -let Predicates = [HasMips64, HasStdEnc] in { - defm : MovzPats0; - defm : MovzPats0; - defm : MovzPats0; - defm : MovzPats1; - defm : MovzPats1; - defm : MovzPats1; - defm : MovzPats2; - defm : MovzPats2; - defm : MovzPats2; -} + +defm : MovzPats0, GPR_64; +defm : MovzPats0, + GPR_64; +defm : MovzPats0, + GPR_64; +defm : MovzPats1, GPR_64; +defm : MovzPats1, GPR_64; +defm : MovzPats1, GPR_64; +defm : MovzPats2, GPR_64; +defm : MovzPats2, GPR_64; +defm : MovzPats2, GPR_64; defm : MovnPats; -let Predicates = [HasMips64, HasStdEnc] in { - defm : MovnPats; - defm : MovnPats; - defm : MovnPats; -} + +defm : MovnPats, GPR_64; +defm : MovnPats, GPR_64; +defm : MovnPats, GPR_64; defm : MovzPats0; defm : MovzPats1; defm : MovnPats; -let Predicates = [HasMips64, HasStdEnc] in { - defm : MovzPats0; - defm : MovzPats1; - defm : MovnPats; -} -let Predicates = [NotFP64bit, HasStdEnc] in { - defm : MovzPats0; - defm : MovzPats1; - defm : MovnPats; -} -let Predicates = [IsFP64bit, HasStdEnc] in { - defm : MovzPats0; - defm : MovzPats0; - defm : MovzPats1; - defm : MovzPats1; - defm : MovnPats; - defm : MovnPats; -} +defm : MovzPats0, + GPR_64; +defm : MovzPats1, GPR_64; +defm : MovnPats, GPR_64; + +defm : MovzPats0, FGR_32; +defm : MovzPats1, FGR_32; +defm : MovnPats, FGR_32; + +defm : MovzPats0, FGR_64; +defm : MovzPats0, + FGR_64; +defm : MovzPats1, FGR_64; +defm : MovzPats1, FGR_64; +defm : MovnPats, FGR_64; +defm : MovnPats, FGR_64; diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp index e5642ba..a37062f 100644 --- a/lib/Target/Mips/MipsConstantIslandPass.cpp +++ b/lib/Target/Mips/MipsConstantIslandPass.cpp @@ -21,8 +21,6 @@ // // -#define DEBUG_TYPE "mips-constant-islands" - #include "Mips.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "Mips16InstrInfo.h" @@ -47,6 +45,8 @@ using namespace llvm; +#define DEBUG_TYPE "mips-constant-islands" + STATISTIC(NumCPEs, "Number of constpool entries"); STATISTIC(NumSplit, "Number of uncond branches inserted"); STATISTIC(NumCBrFixed, "Number of cond branches fixed"); @@ -368,14 +368,14 @@ namespace { : MachineFunctionPass(ID), TM(tm), IsPIC(TM.getRelocationModel() == Reloc::PIC_), ABI(TM.getSubtarget().getTargetABI()), - STI(&TM.getSubtarget()), MF(0), MCP(0), + STI(&TM.getSubtarget()), MF(nullptr), MCP(nullptr), PrescannedForConstants(false){} - virtual const char *getPassName() const { + const char *getPassName() const override { return "Mips Constant Islands"; } - bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineFunction(MachineFunction &F) override; void doInitialPlacement(std::vector &CPEMIs); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); @@ -628,7 +628,7 @@ MipsConstantIslands::CPEntry if (CPEs[i].CPEMI == CPEMI) return &CPEs[i]; } - return NULL; + return nullptr; } /// getCPELogAlign - Returns the required alignment of the constant pool entry @@ -1065,7 +1065,7 @@ bool MipsConstantIslands::decrementCPEReferenceCount(unsigned CPI, assert(CPE && "Unexpected!"); if (--CPE->RefCount == 0) { removeDeadCPEMI(CPEMI); - CPE->CPEMI = NULL; + CPE->CPEMI = nullptr; --NumCPEs; return true; } @@ -1098,7 +1098,7 @@ int MipsConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) if (CPEs[i].CPEMI == CPEMI) continue; // Removing CPEs can leave empty entries, skip - if (CPEs[i].CPEMI == NULL) + if (CPEs[i].CPEMI == nullptr) continue; if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(), U.NegOk)) { @@ -1154,7 +1154,7 @@ int MipsConstantIslands::findLongFormInRangeCPEntry if (CPEs[i].CPEMI == CPEMI) continue; // Removing CPEs can leave empty entries, skip - if (CPEs[i].CPEMI == NULL) + if (CPEs[i].CPEMI == nullptr) continue; if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getLongFormMaxDisp(), U.NegOk)) { @@ -1486,7 +1486,7 @@ bool MipsConstantIslands::removeUnusedCPEntries() { for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) { if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) { removeDeadCPEMI(CPEs[j].CPEMI); - CPEs[j].CPEMI = NULL; + CPEs[j].CPEMI = nullptr; MadeChange = true; } } diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index eef9f38..d6c7cac 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "delay-slot-filler" - #include "MCTargetDesc/MipsMCNaCl.h" #include "Mips.h" #include "MipsInstrInfo.h" @@ -33,6 +31,8 @@ using namespace llvm; +#define DEBUG_TYPE "delay-slot-filler" + STATISTIC(FilledSlots, "Number of delay slots filled"); STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that" " are not NOP."); @@ -124,7 +124,7 @@ namespace { public: NoMemInstr() : InspectMemInstr(true) {} private: - virtual bool hasHazard_(const MachineInstr &MI) { return true; } + bool hasHazard_(const MachineInstr &MI) override { return true; } }; /// This subclass accepts loads from stacks and constant loads. @@ -132,7 +132,7 @@ namespace { public: LoadFromStackOrConst() : InspectMemInstr(false) {} private: - virtual bool hasHazard_(const MachineInstr &MI); + bool hasHazard_(const MachineInstr &MI) override; }; /// This subclass uses memory dependence information to determine whether a @@ -142,19 +142,21 @@ namespace { MemDefsUses(const MachineFrameInfo *MFI); private: - virtual bool hasHazard_(const MachineInstr &MI); + typedef PointerUnion ValueType; + + bool hasHazard_(const MachineInstr &MI) override; /// Update Defs and Uses. Return true if there exist dependences that /// disqualify the delay slot candidate between V and values in Uses and /// Defs. - bool updateDefsUses(const Value *V, bool MayStore); + bool updateDefsUses(ValueType V, bool MayStore); /// Get the list of underlying objects of MI's memory operand. bool getUnderlyingObjects(const MachineInstr &MI, - SmallVectorImpl &Objects) const; + SmallVectorImpl &Objects) const; const MachineFrameInfo *MFI; - SmallPtrSet Uses, Defs; + SmallPtrSet Uses, Defs; /// Flags indicating whether loads or stores with no underlying objects have /// been seen. @@ -166,11 +168,11 @@ namespace { Filler(TargetMachine &tm) : MachineFunctionPass(ID), TM(tm) { } - virtual const char *getPassName() const { + const char *getPassName() const override { return "Mips Delay Slot Filler"; } - bool runOnMachineFunction(MachineFunction &F) { + bool runOnMachineFunction(MachineFunction &F) override { bool Changed = false; for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) @@ -178,7 +180,7 @@ namespace { return Changed; } - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -399,16 +401,15 @@ bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) { if (MI.mayStore()) return true; - if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getValue()) + if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getPseudoValue()) return true; - const Value *V = (*MI.memoperands_begin())->getValue(); - - if (isa(V)) - return false; - - if (const PseudoSourceValue *PSV = dyn_cast(V)) - return !PSV->isConstant(0) && V != PseudoSourceValue::getStack(); + if (const PseudoSourceValue *PSV = + (*MI.memoperands_begin())->getPseudoValue()) { + if (isa(PSV)) + return false; + return !PSV->isConstant(nullptr) && PSV != PseudoSourceValue::getStack(); + } return true; } @@ -419,11 +420,11 @@ MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_) bool MemDefsUses::hasHazard_(const MachineInstr &MI) { bool HasHazard = false; - SmallVector Objs; + SmallVector Objs; // Check underlying object list. if (getUnderlyingObjects(MI, Objs)) { - for (SmallVectorImpl::const_iterator I = Objs.begin(); + for (SmallVectorImpl::const_iterator I = Objs.begin(); I != Objs.end(); ++I) HasHazard |= updateDefsUses(*I, MI.mayStore()); @@ -440,7 +441,7 @@ bool MemDefsUses::hasHazard_(const MachineInstr &MI) { return HasHazard; } -bool MemDefsUses::updateDefsUses(const Value *V, bool MayStore) { +bool MemDefsUses::updateDefsUses(ValueType V, bool MayStore) { if (MayStore) return !Defs.insert(V) || Uses.count(V) || SeenNoObjStore || SeenNoObjLoad; @@ -450,10 +451,20 @@ bool MemDefsUses::updateDefsUses(const Value *V, bool MayStore) { bool MemDefsUses:: getUnderlyingObjects(const MachineInstr &MI, - SmallVectorImpl &Objects) const { - if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getValue()) + SmallVectorImpl &Objects) const { + if (!MI.hasOneMemOperand() || + (!(*MI.memoperands_begin())->getValue() && + !(*MI.memoperands_begin())->getPseudoValue())) return false; + if (const PseudoSourceValue *PSV = + (*MI.memoperands_begin())->getPseudoValue()) { + if (!PSV->isAliased(MFI)) + return false; + Objects.push_back(PSV); + return true; + } + const Value *V = (*MI.memoperands_begin())->getValue(); SmallVector Objs; @@ -461,10 +472,7 @@ getUnderlyingObjects(const MachineInstr &MI, for (SmallVectorImpl::iterator I = Objs.begin(), E = Objs.end(); I != E; ++I) { - if (const PseudoSourceValue *PSV = dyn_cast(*I)) { - if (PSV->isAliased(MFI)) - return false; - } else if (!isIdentifiedObject(V)) + if (!isIdentifiedObject(V)) return false; Objects.push_back(*I); @@ -602,7 +610,7 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const { RegDefsUses RegDU(TM); bool HasMultipleSuccs = false; BB2BrMap BrMap; - OwningPtr IM; + std::unique_ptr IM; Iter Filler; // Iterate over SuccBB's predecessor list. @@ -636,7 +644,7 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const { MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const { if (B.succ_empty()) - return NULL; + return nullptr; // Select the successor with the larget edge weight. auto &Prob = getAnalysis(); @@ -645,14 +653,14 @@ MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const { const MachineBasicBlock *Dst1) { return Prob.getEdgeWeight(&B, Dst0) < Prob.getEdgeWeight(&B, Dst1); }); - return S->isLandingPad() ? NULL : S; + return S->isLandingPad() ? nullptr : S; } std::pair Filler::getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const { const MipsInstrInfo *TII = static_cast(TM.getInstrInfo()); - MachineBasicBlock *TrueBB = 0, *FalseBB = 0; + MachineBasicBlock *TrueBB = nullptr, *FalseBB = nullptr; SmallVector BranchInstrs; SmallVector Cond; @@ -660,11 +668,11 @@ Filler::getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const { TII->AnalyzeBranch(MBB, TrueBB, FalseBB, Cond, false, BranchInstrs); if ((R == MipsInstrInfo::BT_None) || (R == MipsInstrInfo::BT_NoBranch)) - return std::make_pair(R, (MachineInstr*)NULL); + return std::make_pair(R, nullptr); if (R != MipsInstrInfo::BT_CondUncond) { if (!hasUnoccupiedSlot(BranchInstrs[0])) - return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL); + return std::make_pair(MipsInstrInfo::BT_None, nullptr); assert(((R != MipsInstrInfo::BT_Uncond) || (TrueBB == &Dst))); @@ -681,7 +689,7 @@ Filler::getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const { if (hasUnoccupiedSlot(BranchInstrs[1]) && (FalseBB == &Dst)) return std::make_pair(MipsInstrInfo::BT_Uncond, BranchInstrs[1]); - return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL); + return std::make_pair(MipsInstrInfo::BT_None, nullptr); } bool Filler::examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ, diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp new file mode 100644 index 0000000..268a0ed --- /dev/null +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -0,0 +1,283 @@ +//===-- MipsastISel.cpp - Mips FastISel implementation +//---------------------===// + +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "MipsRegisterInfo.h" +#include "MipsISelLowering.h" +#include "MipsMachineFunction.h" +#include "MipsSubtarget.h" + +using namespace llvm; + +namespace { + +// All possible address modes. +typedef struct Address { + enum { RegBase, FrameIndexBase } BaseType; + + union { + unsigned Reg; + int FI; + } Base; + + int64_t Offset; + + // Innocuous defaults for our address. + Address() : BaseType(RegBase), Offset(0) { Base.Reg = 0; } +} Address; + +class MipsFastISel final : public FastISel { + + /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can + /// make the right decision when generating code for different targets. + const MipsSubtarget *Subtarget; + Module &M; + const TargetMachine &TM; + const TargetInstrInfo &TII; + const TargetLowering &TLI; + MipsFunctionInfo *MFI; + + // Convenience variables to avoid some queries. + LLVMContext *Context; + + bool TargetSupported; + +public: + explicit MipsFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) + : FastISel(funcInfo, libInfo), + M(const_cast(*funcInfo.Fn->getParent())), + TM(funcInfo.MF->getTarget()), TII(*TM.getInstrInfo()), + TLI(*TM.getTargetLowering()) { + Subtarget = &TM.getSubtarget(); + MFI = funcInfo.MF->getInfo(); + Context = &funcInfo.Fn->getContext(); + TargetSupported = ((Subtarget->getRelocationModel() == Reloc::PIC_) && + (Subtarget->hasMips32r2() && (Subtarget->isABI_O32()))); + } + + bool TargetSelectInstruction(const Instruction *I) override; + unsigned TargetMaterializeConstant(const Constant *C) override; + + bool ComputeAddress(const Value *Obj, Address &Addr); + +private: + bool EmitStore(MVT VT, unsigned SrcReg, Address &Addr, + unsigned Alignment = 0); + bool SelectRet(const Instruction *I); + bool SelectStore(const Instruction *I); + + bool isTypeLegal(Type *Ty, MVT &VT); + bool isLoadTypeLegal(Type *Ty, MVT &VT); + + unsigned MaterializeFP(const ConstantFP *CFP, MVT VT); + unsigned MaterializeGV(const GlobalValue *GV, MVT VT); + unsigned MaterializeInt(const Constant *C, MVT VT); + unsigned Materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC); +}; + +bool MipsFastISel::isTypeLegal(Type *Ty, MVT &VT) { + EVT evt = TLI.getValueType(Ty, true); + // Only handle simple types. + if (evt == MVT::Other || !evt.isSimple()) + return false; + VT = evt.getSimpleVT(); + + // Handle all legal types, i.e. a register that will directly hold this + // value. + return TLI.isTypeLegal(VT); +} + +bool MipsFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { + if (isTypeLegal(Ty, VT)) + return true; + // We will extend this in a later patch: + // If this is a type than can be sign or zero-extended to a basic operation + // go ahead and accept it now. + return false; +} + +bool MipsFastISel::ComputeAddress(const Value *Obj, Address &Addr) { + // This construct looks a big awkward but it is how other ports handle this + // and as this function is more fully completed, these cases which + // return false will have additional code in them. + // + if (isa(Obj)) + return false; + else if (isa(Obj)) + return false; + Addr.Base.Reg = getRegForValue(Obj); + return Addr.Base.Reg != 0; +} + +// Materialize a constant into a register, and return the register +// number (or zero if we failed to handle it). +unsigned MipsFastISel::TargetMaterializeConstant(const Constant *C) { + EVT CEVT = TLI.getValueType(C->getType(), true); + + // Only handle simple types. + if (!CEVT.isSimple()) + return 0; + MVT VT = CEVT.getSimpleVT(); + + if (const ConstantFP *CFP = dyn_cast(C)) + return MaterializeFP(CFP, VT); + else if (const GlobalValue *GV = dyn_cast(C)) + return MaterializeGV(GV, VT); + else if (isa(C)) + return MaterializeInt(C, VT); + + return 0; +} + +bool MipsFastISel::EmitStore(MVT VT, unsigned SrcReg, Address &Addr, + unsigned Alignment) { + // + // more cases will be handled here in following patches. + // + if (VT != MVT::i32) + return false; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::SW)) + .addReg(SrcReg) + .addReg(Addr.Base.Reg) + .addImm(Addr.Offset); + return true; +} + +bool MipsFastISel::SelectStore(const Instruction *I) { + Value *Op0 = I->getOperand(0); + unsigned SrcReg = 0; + + // Atomic stores need special handling. + if (cast(I)->isAtomic()) + return false; + + // Verify we have a legal type before going any further. + MVT VT; + if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) + return false; + + // Get the value to be stored into a register. + SrcReg = getRegForValue(Op0); + if (SrcReg == 0) + return false; + + // See if we can handle this address. + Address Addr; + if (!ComputeAddress(I->getOperand(1), Addr)) + return false; + + if (!EmitStore(VT, SrcReg, Addr, cast(I)->getAlignment())) + return false; + return true; +} + +bool MipsFastISel::SelectRet(const Instruction *I) { + const ReturnInst *Ret = cast(I); + + if (!FuncInfo.CanLowerReturn) + return false; + if (Ret->getNumOperands() > 0) { + return false; + } + unsigned RetOpc = Mips::RetRA; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(RetOpc)); + return true; +} + +bool MipsFastISel::TargetSelectInstruction(const Instruction *I) { + if (!TargetSupported) + return false; + switch (I->getOpcode()) { + default: + break; + case Instruction::Store: + return SelectStore(I); + case Instruction::Ret: + return SelectRet(I); + } + return false; +} +} + +unsigned MipsFastISel::MaterializeFP(const ConstantFP *CFP, MVT VT) { + return 0; +} + +unsigned MipsFastISel::MaterializeGV(const GlobalValue *GV, MVT VT) { + // For now 32-bit only. + if (VT != MVT::i32) + return 0; + const TargetRegisterClass *RC = &Mips::GPR32RegClass; + unsigned DestReg = createResultReg(RC); + const GlobalVariable *GVar = dyn_cast(GV); + bool IsThreadLocal = GVar && GVar->isThreadLocal(); + // TLS not supported at this time. + if (IsThreadLocal) + return 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LW), DestReg) + .addReg(MFI->getGlobalBaseReg()) + .addGlobalAddress(GV, 0, MipsII::MO_GOT); + return DestReg; +} +unsigned MipsFastISel::MaterializeInt(const Constant *C, MVT VT) { + if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) + return 0; + const TargetRegisterClass *RC = &Mips::GPR32RegClass; + const ConstantInt *CI = cast(C); + int64_t Imm; + if (CI->isNegative()) + Imm = CI->getSExtValue(); + else + Imm = CI->getZExtValue(); + return Materialize32BitInt(Imm, RC); +} + +unsigned MipsFastISel::Materialize32BitInt(int64_t Imm, + const TargetRegisterClass *RC) { + unsigned ResultReg = createResultReg(RC); + + if (isInt<16>(Imm)) { + unsigned Opc = Mips::ADDiu; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(Mips::ZERO) + .addImm(Imm); + return ResultReg; + } else if (isUInt<16>(Imm)) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::ORi), + ResultReg) + .addReg(Mips::ZERO) + .addImm(Imm); + return ResultReg; + } + unsigned Lo = Imm & 0xFFFF; + unsigned Hi = (Imm >> 16) & 0xFFFF; + if (Lo) { + // Both Lo and Hi have nonzero bits. + unsigned TmpReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LUi), + TmpReg).addImm(Hi); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::ORi), + ResultReg) + .addReg(TmpReg) + .addImm(Lo); + + } else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LUi), + ResultReg).addImm(Hi); + } + return ResultReg; +} + +namespace llvm { +FastISel *Mips::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) { + return new MipsFastISel(funcInfo, libInfo); +} +} diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index eb9d49f..8ba35fa 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -110,7 +110,7 @@ uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const { Offset = std::max(Offset, -MFI->getObjectOffset(I)); // Conservatively assume all callee-saved registers will be saved. - for (const uint16_t *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) { + for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) { unsigned Size = TRI.getMinimalPhysRegClass(*R)->getSize(); Offset = RoundUpToAlignment(Offset + Size, Size); } diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index 6a5f79d..e10a3a5 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -32,7 +32,7 @@ public: static const MipsFrameLowering *create(MipsTargetMachine &TM, const MipsSubtarget &ST); - bool hasFP(const MachineFunction &MF) const; + bool hasFP(const MachineFunction &MF) const override; protected: uint64_t estimateStackSize(const MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 941aeac..90cff63 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mips-isel" #include "MipsISelDAGToDAG.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "Mips.h" @@ -36,6 +35,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "mips-isel" + //===----------------------------------------------------------------------===// // Instruction Selector Implementation //===----------------------------------------------------------------------===// @@ -182,7 +183,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { if (Node->isMachineOpcode()) { DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); Node->setNodeId(-1); - return NULL; + return nullptr; } // See if subclasses can handle this node. @@ -201,8 +202,9 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { #ifndef NDEBUG case ISD::LOAD: case ISD::STORE: - assert(cast(Node)->getMemoryVT().getSizeInBits() / 8 <= - cast(Node)->getAlignment() && + assert((Subtarget.systemSupportsUnalignedAccess() || + cast(Node)->getMemoryVT().getSizeInBits() / 8 <= + cast(Node)->getAlignment()) && "Unexpected unaligned loads/stores."); break; #endif @@ -212,7 +214,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { SDNode *ResNode = SelectCode(Node); DEBUG(errs() << "=> "); - if (ResNode == NULL || ResNode == Node) + if (ResNode == nullptr || ResNode == Node) DEBUG(Node->dump(CurDAG)); else DEBUG(ResNode->dump(CurDAG)); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h index 4546182..13becb6 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/lib/Target/Mips/MipsISelDAGToDAG.h @@ -35,11 +35,11 @@ public: : SelectionDAGISel(TM), Subtarget(TM.getSubtarget()) {} // Pass Name - virtual const char *getPassName() const { + const char *getPassName() const override { return "MIPS DAG->DAG Pattern Instruction Selection"; } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; protected: SDNode *getGlobalBaseReg(); @@ -110,7 +110,7 @@ private: /// starting at bit zero. virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const; - virtual SDNode *Select(SDNode *N); + SDNode *Select(SDNode *N) override; virtual std::pair selectNode(SDNode *Node) = 0; @@ -121,9 +121,9 @@ private: virtual void processFunctionAfterISel(MachineFunction &MF) = 0; - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector &OutOps); + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps) override; }; /// createMipsISelDag - This pass converts a legalized DAG into a diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index abf36da..bfe5ea1 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -11,7 +11,6 @@ // selection DAG. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mips-lower" #include "MipsISelLowering.h" #include "InstPrinter/MipsInstPrinter.h" #include "MCTargetDesc/MipsBaseInfo.h" @@ -39,6 +38,8 @@ using namespace llvm; +#define DEBUG_TYPE "mips-lower" + STATISTIC(NumTailCalls, "Number of tail calls"); static cl::opt @@ -50,16 +51,21 @@ NoZeroDivCheck("mno-check-zero-division", cl::Hidden, cl::desc("MIPS: Don't trap on integer division by zero."), cl::init(false)); -static const uint16_t O32IntRegs[4] = { +cl::opt +EnableMipsFastISel("mips-fast-isel", cl::Hidden, + cl::desc("Allow mips-fast-isel to be used"), + cl::init(false)); + +static const MCPhysReg O32IntRegs[4] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; -static const uint16_t Mips64IntRegs[8] = { +static const MCPhysReg Mips64IntRegs[8] = { Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64, Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64 }; -static const uint16_t Mips64DPRegs[8] = { +static const MCPhysReg Mips64DPRegs[8] = { Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64, Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64 }; @@ -198,7 +204,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::PCKEV: return "MipsISD::PCKEV"; case MipsISD::PCKOD: return "MipsISD::PCKOD"; case MipsISD::INSVE: return "MipsISD::INSVE"; - default: return NULL; + default: return nullptr; } } @@ -245,12 +251,7 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - if (!TM.Options.NoNaNsFPMath) { - setOperationAction(ISD::FABS, MVT::f32, Custom); - setOperationAction(ISD::FABS, MVT::f64, Custom); - } - - if (hasMips64()) { + if (isGP64bit()) { setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i64, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); @@ -262,14 +263,14 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); } - if (!hasMips64()) { + if (!isGP64bit()) { setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } setOperationAction(ISD::ADD, MVT::i32, Custom); - if (hasMips64()) + if (isGP64bit()) setOperationAction(ISD::ADD, MVT::i64, Custom); setOperationAction(ISD::SDIV, MVT::i32, Expand); @@ -334,11 +335,6 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FREM, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); - if (!TM.Options.NoNaNsFPMath) { - setOperationAction(ISD::FNEG, MVT::f32, Expand); - setOperationAction(ISD::FNEG, MVT::f64, Expand); - } - setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Expand); @@ -356,22 +352,23 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM) setInsertFencesForAtomic(true); - if (!Subtarget->hasSEInReg()) { + if (!Subtarget->hasMips32r2()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); } - if (!Subtarget->hasBitCount()) { + // MIPS16 lacks MIPS32's clz and clo instructions. + if (!Subtarget->hasMips32() || Subtarget->inMips16Mode()) setOperationAction(ISD::CTLZ, MVT::i32, Expand); + if (!Subtarget->hasMips64()) setOperationAction(ISD::CTLZ, MVT::i64, Expand); - } - if (!Subtarget->hasSwap()) { + if (!Subtarget->hasMips32r2()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); + if (!Subtarget->hasMips64r2()) setOperationAction(ISD::BSWAP, MVT::i64, Expand); - } - if (hasMips64()) { + if (isGP64bit()) { setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Custom); setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::i32, Custom); @@ -387,7 +384,7 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::ADD); - setMinFunctionAlignment(hasMips64() ? 3 : 2); + setMinFunctionAlignment(isGP64bit() ? 3 : 2); setStackPointerRegisterToSaveRestore(isN64() ? Mips::SP_64 : Mips::SP); @@ -406,6 +403,15 @@ const MipsTargetLowering *MipsTargetLowering::create(MipsTargetMachine &TM) { return llvm::createMipsSETargetLowering(TM); } +// Create a fast isel object. +FastISel * +MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const { + if (!EnableMipsFastISel) + return TargetLowering::createFastISel(funcInfo, libInfo); + return Mips::createFastISel(funcInfo, libInfo); +} + EVT MipsTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { if (!VT.isVector()) return MVT::i32; @@ -779,7 +785,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::SETCC: return lowerSETCC(Op, DAG); case ISD::VASTART: return lowerVASTART(Op, DAG); case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG); - case ISD::FABS: return lowerFABS(Op, DAG); case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG); case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG); case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG); @@ -1506,7 +1511,7 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op, SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, 0, MipsII::MO_GPREL); SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, DL, - DAG.getVTList(MVT::i32), &GA, 1); + DAG.getVTList(MVT::i32), GA); SDValue GPReg = DAG.getRegister(Mips::GP, MVT::i32); return DAG.getNode(ISD::ADD, DL, MVT::i32, GPReg, GPRelNode); } @@ -1572,11 +1577,9 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const Entry.Ty = PtrTy; Args.push_back(Entry); - TargetLowering::CallLoweringInfo CLI(DAG.getEntryNode(), PtrTy, - false, false, false, false, 0, CallingConv::C, - /*IsTailCall=*/false, /*doesNotRet=*/false, - /*isReturnValueUsed=*/true, - TlsGetAddr, Args, DAG, DL); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL).setChain(DAG.getEntryNode()) + .setCallee(CallingConv::C, PtrTy, TlsGetAddr, &Args, 0); std::pair CallResult = LowerCallTo(CLI); SDValue Ret = CallResult.first; @@ -1765,71 +1768,12 @@ static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, SDValue MipsTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { - if (Subtarget->hasMips64()) + if (Subtarget->isGP64bit()) return lowerFCOPYSIGN64(Op, DAG, Subtarget->hasExtractInsert()); return lowerFCOPYSIGN32(Op, DAG, Subtarget->hasExtractInsert()); } -static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG, - bool HasExtractInsert) { - SDValue Res, Const1 = DAG.getConstant(1, MVT::i32); - SDLoc DL(Op); - - // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it - // to i32. - SDValue X = (Op.getValueType() == MVT::f32) ? - DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0)) : - DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), - Const1); - - // Clear MSB. - if (HasExtractInsert) - Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32, - DAG.getRegister(Mips::ZERO, MVT::i32), - DAG.getConstant(31, MVT::i32), Const1, X); - else { - SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1); - Res = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1); - } - - if (Op.getValueType() == MVT::f32) - return DAG.getNode(ISD::BITCAST, DL, MVT::f32, Res); - - SDValue LowX = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, - Op.getOperand(0), DAG.getConstant(0, MVT::i32)); - return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res); -} - -static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG, - bool HasExtractInsert) { - SDValue Res, Const1 = DAG.getConstant(1, MVT::i32); - SDLoc DL(Op); - - // Bitcast to integer node. - SDValue X = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(0)); - - // Clear MSB. - if (HasExtractInsert) - Res = DAG.getNode(MipsISD::Ins, DL, MVT::i64, - DAG.getRegister(Mips::ZERO_64, MVT::i64), - DAG.getConstant(63, MVT::i32), Const1, X); - else { - SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i64, X, Const1); - Res = DAG.getNode(ISD::SRL, DL, MVT::i64, SllX, Const1); - } - - return DAG.getNode(ISD::BITCAST, DL, MVT::f64, Res); -} - -SDValue -MipsTargetLowering::lowerFABS(SDValue Op, SelectionDAG &DAG) const { - if (Subtarget->hasMips64() && (Op.getValueType() == MVT::f64)) - return lowerFABS64(Op, DAG, Subtarget->hasExtractInsert()); - - return lowerFABS32(Op, DAG, Subtarget->hasExtractInsert()); -} - SDValue MipsTargetLowering:: lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { // check the depth @@ -1931,7 +1875,7 @@ SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op, Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftLeftLo, Or); SDValue Ops[2] = {Lo, Hi}; - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, @@ -1972,7 +1916,7 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, ShiftRightHi); SDValue Ops[2] = {Lo, Hi}; - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } static SDValue createLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD, @@ -1988,7 +1932,7 @@ static SDValue createLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD, DAG.getConstant(Offset, BasePtrVT)); SDValue Ops[] = { Chain, Ptr, Src }; - return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, 3, MemVT, + return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, MemVT, LD->getMemOperand()); } @@ -1997,6 +1941,9 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { LoadSDNode *LD = cast(Op); EVT MemVT = LD->getMemoryVT(); + if (Subtarget->systemSupportsUnalignedAccess()) + return Op; + // Return if load is aligned or if MemVT is neither i32 nor i64. if ((LD->getAlignment() >= MemVT.getSizeInBits() / 8) || ((MemVT != MVT::i32) && (MemVT != MVT::i64))) @@ -2051,7 +1998,7 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue SLL = DAG.getNode(ISD::SHL, DL, MVT::i64, LWR, Const32); SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i64, SLL, Const32); SDValue Ops[] = { SRL, LWR.getValue(1) }; - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } static SDValue createStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD, @@ -2066,7 +2013,7 @@ static SDValue createStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD, DAG.getConstant(Offset, BasePtrVT)); SDValue Ops[] = { Chain, Value, Ptr }; - return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, 3, MemVT, + return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, MemVT, SD->getMemOperand()); } @@ -2120,7 +2067,8 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { EVT MemVT = SD->getMemoryVT(); // Lower unaligned integer stores. - if ((SD->getAlignment() < MemVT.getSizeInBits() / 8) && + if (!Subtarget->systemSupportsUnalignedAccess() && + (SD->getAlignment() < MemVT.getSizeInBits() / 8) && ((MemVT == MVT::i32) || (MemVT == MVT::i64))) return lowerUnalignedIntStore(SD, DAG, Subtarget->isLittle()); @@ -2177,12 +2125,12 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op, static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State, const uint16_t *F64Regs) { + CCState &State, const MCPhysReg *F64Regs) { static const unsigned IntRegsSize = 4, FloatRegsSize = 2; - static const uint16_t IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; - static const uint16_t F32Regs[] = { Mips::F12, Mips::F14 }; + static const MCPhysReg IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 }; + static const MCPhysReg F32Regs[] = { Mips::F12, Mips::F14 }; // Do not process byval args here. if (ArgFlags.isByVal()) @@ -2254,7 +2202,7 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT, static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - static const uint16_t F64Regs[] = { Mips::D6, Mips::D7 }; + static const MCPhysReg F64Regs[] = { Mips::D6, Mips::D7 }; return CC_MipsO32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs); } @@ -2262,7 +2210,7 @@ static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT, static bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - static const uint16_t F64Regs[] = { Mips::D12_64, Mips::D14_64 }; + static const MCPhysReg F64Regs[] = { Mips::D12_64, Mips::D14_64 }; return CC_MipsO32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs); } @@ -2383,7 +2331,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, MipsCCInfo.analyzeCallOperands(Outs, IsVarArg, Subtarget->mipsSEUsesSoftFloat(), - Callee.getNode(), CLI.Args); + Callee.getNode(), CLI.getArgs()); // Get a count of how many bytes are to be pushed on the stack. unsigned NextStackOffset = CCInfo.getNextStackOffset(); @@ -2394,6 +2342,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, isEligibleForTailCallOptimization(MipsCCInfo, NextStackOffset, *MF.getInfo()); + if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall()) + report_fatal_error("failed to perform tail call elimination on a call " + "site marked musttail"); + if (IsTailCall) ++NumTailCalls; @@ -2489,8 +2441,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Transform all store nodes into one single node because all store // nodes are independent of each other. if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol @@ -2544,9 +2495,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CLI, Callee, Chain); if (IsTailCall) - return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, &Ops[0], Ops.size()); + return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, Ops); - Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, Ops); SDValue InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. @@ -2713,18 +2664,21 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, } } - // The mips ABIs for returning structs by value requires that we copy - // the sret argument into $v0 for the return. Save the argument into - // a virtual register so that we can access it from the return points. - if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { - unsigned Reg = MipsFI->getSRetReturnReg(); - if (!Reg) { - Reg = MF.getRegInfo().createVirtualRegister( - getRegClassFor(isN64() ? MVT::i64 : MVT::i32)); - MipsFI->setSRetReturnReg(Reg); + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + // The mips ABIs for returning structs by value requires that we copy + // the sret argument into $v0 for the return. Save the argument into + // a virtual register so that we can access it from the return points. + if (Ins[i].Flags.isSRet()) { + unsigned Reg = MipsFI->getSRetReturnReg(); + if (!Reg) { + Reg = MF.getRegInfo().createVirtualRegister( + getRegClassFor(isN64() ? MVT::i64 : MVT::i32)); + MipsFI->setSRetReturnReg(Reg); + } + SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain); + break; } - SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[0]); - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain); } if (IsVarArg) @@ -2734,8 +2688,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // the size of Ins and InVals. This only happens when on varg functions if (!OutChains.empty()) { OutChains.push_back(Chain); - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - &OutChains[0], OutChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); } return Chain; @@ -2820,7 +2773,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, RetOps.push_back(Flag); // Return on Mips is always a "jr $ra" - return DAG.getNode(MipsISD::Ret, DL, MVT::Other, &RetOps[0], RetOps.size()); + return DAG.getNode(MipsISD::Ret, DL, MVT::Other, RetOps); } //===----------------------------------------------------------------------===// @@ -2870,7 +2823,7 @@ MipsTargetLowering::getSingleConstraintMatchWeight( Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. @@ -2948,12 +2901,12 @@ parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const { std::pair R = parsePhysicalReg(C, Prefix, Reg); if (!R.first) - return std::make_pair((unsigned)0, (const TargetRegisterClass*)0); + return std::make_pair(0U, nullptr); if ((Prefix == "hi" || Prefix == "lo")) { // Parse hi/lo. // No numeric characters follow "hi" or "lo". if (R.second) - return std::make_pair((unsigned)0, (const TargetRegisterClass*)0); + return std::make_pair(0U, nullptr); RC = TRI->getRegClass(Prefix == "hi" ? Mips::HI32RegClassID : Mips::LO32RegClassID); @@ -2963,7 +2916,7 @@ parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const { // No numeric characters follow the name. if (R.second) - return std::make_pair((unsigned)0, (const TargetRegisterClass *)0); + return std::make_pair(0U, nullptr); Reg = StringSwitch(Prefix) .Case("$msair", Mips::MSAIR) @@ -2977,14 +2930,14 @@ parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const { .Default(0); if (!Reg) - return std::make_pair((unsigned)0, (const TargetRegisterClass *)0); + return std::make_pair(0U, nullptr); RC = TRI->getRegClass(Mips::MSACtrlRegClassID); return std::make_pair(Reg, RC); } if (!R.second) - return std::make_pair((unsigned)0, (const TargetRegisterClass*)0); + return std::make_pair(0U, nullptr); if (Prefix == "$f") { // Parse $f0-$f31. // If the size of FP registers is 64-bit or Reg is an even number, select @@ -3032,7 +2985,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const if (VT == MVT::i64 && isGP64bit()) return std::make_pair(0U, &Mips::GPR64RegClass); // This will generate an error message - return std::make_pair(0u, static_cast(0)); + return std::make_pair(0U, nullptr); case 'f': // FPU or MSA register if (VT == MVT::v16i8) return std::make_pair(0U, &Mips::MSA128BRegClass); @@ -3062,7 +3015,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const case 'x': // register suitable for indirect jump // Fixme: Not triggering the use of both hi and low // This will generate an error message - return std::make_pair(0u, static_cast(0)); + return std::make_pair(0U, nullptr); } } @@ -3081,7 +3034,7 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { - SDValue Result(0, 0); + SDValue Result; // Only support length 1 constraints for now. if (Constraint.length() > 1) return; @@ -3265,7 +3218,7 @@ static bool originalTypeIsF128(const Type *Ty, const SDNode *CallNode) { MipsTargetLowering::MipsCC::SpecialCallingConvType MipsTargetLowering::getSpecialCallingConv(SDValue Callee) const { MipsCC::SpecialCallingConvType SpecialCallingConv = - MipsCC::NoSpecialCallingConv;; + MipsCC::NoSpecialCallingConv; if (Subtarget->inMips16HardFloat()) { if (GlobalAddressSDNode *G = dyn_cast(Callee)) { llvm::StringRef Sym = G->getGlobal()->getName(); @@ -3321,7 +3274,7 @@ analyzeCallOperands(const SmallVectorImpl &Args, dbgs() << "Call operand #" << I << " has unhandled type " << EVT(ArgVT).getEVTString(); #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } } @@ -3344,7 +3297,7 @@ analyzeFormalArguments(const SmallVectorImpl &Args, continue; } - MVT RegVT = getRegVT(ArgVT, FuncArg->getType(), 0, IsSoftFloat); + MVT RegVT = getRegVT(ArgVT, FuncArg->getType(), nullptr, IsSoftFloat); if (!FixedFn(I, ArgVT, RegVT, CCValAssign::Full, ArgFlags, CCInfo)) continue; @@ -3353,7 +3306,7 @@ analyzeFormalArguments(const SmallVectorImpl &Args, dbgs() << "Formal Arg #" << I << " has unhandled type " << EVT(ArgVT).getEVTString(); #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } @@ -3378,7 +3331,7 @@ analyzeReturn(const SmallVectorImpl &RetVals, bool IsSoftFloat, dbgs() << "Call result #" << I << " has unhandled type " << EVT(VT).getEVTString() << '\n'; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } } @@ -3392,7 +3345,7 @@ analyzeCallResult(const SmallVectorImpl &Ins, bool IsSoftFloat, void MipsTargetLowering::MipsCC:: analyzeReturn(const SmallVectorImpl &Outs, bool IsSoftFloat, const Type *RetTy) const { - analyzeReturn(Outs, IsSoftFloat, 0, RetTy); + analyzeReturn(Outs, IsSoftFloat, nullptr, RetTy); } void MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT, @@ -3426,7 +3379,7 @@ unsigned MipsTargetLowering::MipsCC::reservedArgArea() const { return (IsO32 && (CallConv != CallingConv::Fast)) ? 16 : 0; } -const uint16_t *MipsTargetLowering::MipsCC::intArgRegs() const { +const MCPhysReg *MipsTargetLowering::MipsCC::intArgRegs() const { return IsO32 ? O32IntRegs : Mips64IntRegs; } @@ -3443,7 +3396,7 @@ llvm::CCAssignFn *MipsTargetLowering::MipsCC::varArgFn() const { return IsO32 ? (IsFP64 ? CC_MipsO32_FP64 : CC_MipsO32_FP32) : CC_MipsN_VarArg; } -const uint16_t *MipsTargetLowering::MipsCC::shadowRegs() const { +const MCPhysReg *MipsTargetLowering::MipsCC::shadowRegs() const { return IsO32 ? O32IntRegs : Mips64DPRegs; } @@ -3451,7 +3404,7 @@ void MipsTargetLowering::MipsCC::allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize, unsigned Align) { unsigned RegSize = regSize(), NumIntArgRegs = numIntArgRegs(); - const uint16_t *IntArgRegs = intArgRegs(), *ShadowRegs = shadowRegs(); + const MCPhysReg *IntArgRegs = intArgRegs(), *ShadowRegs = shadowRegs(); assert(!(ByValSize % RegSize) && !(Align % RegSize) && "Byval argument's size and alignment should be a multiple of" "RegSize."); @@ -3536,21 +3489,22 @@ passByValArg(SDValue Chain, SDLoc DL, MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, const MipsCC &CC, const ByValArgInfo &ByVal, const ISD::ArgFlagsTy &Flags, bool isLittle) const { - unsigned ByValSize = Flags.getByValSize(); - unsigned Offset = 0; // Offset in # of bytes from the beginning of struct. - unsigned RegSize = CC.regSize(); - unsigned Alignment = std::min(Flags.getByValAlign(), RegSize); - EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSize * 8); + unsigned ByValSizeInBytes = Flags.getByValSize(); + unsigned OffsetInBytes = 0; // From beginning of struct + unsigned RegSizeInBytes = CC.regSize(); + unsigned Alignment = std::min(Flags.getByValAlign(), RegSizeInBytes); + EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSizeInBytes * 8); if (ByVal.NumRegs) { - const uint16_t *ArgRegs = CC.intArgRegs(); - bool LeftoverBytes = (ByVal.NumRegs * RegSize > ByValSize); + const MCPhysReg *ArgRegs = CC.intArgRegs(); + bool LeftoverBytes = (ByVal.NumRegs * RegSizeInBytes > ByValSizeInBytes); unsigned I = 0; // Copy words to registers. - for (; I < ByVal.NumRegs - LeftoverBytes; ++I, Offset += RegSize) { + for (; I < ByVal.NumRegs - LeftoverBytes; + ++I, OffsetInBytes += RegSizeInBytes) { SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, - DAG.getConstant(Offset, PtrTy)); + DAG.getConstant(OffsetInBytes, PtrTy)); SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr, MachinePointerInfo(), false, false, false, Alignment); @@ -3560,38 +3514,38 @@ passByValArg(SDValue Chain, SDLoc DL, } // Return if the struct has been fully copied. - if (ByValSize == Offset) + if (ByValSizeInBytes == OffsetInBytes) return; // Copy the remainder of the byval argument with sub-word loads and shifts. if (LeftoverBytes) { - assert((ByValSize > Offset) && (ByValSize < Offset + RegSize) && - "Size of the remainder should be smaller than RegSize."); + assert((ByValSizeInBytes > OffsetInBytes) && + (ByValSizeInBytes < OffsetInBytes + RegSizeInBytes) && + "Size of the remainder should be smaller than RegSizeInBytes."); SDValue Val; - for (unsigned LoadSize = RegSize / 2, TotalSizeLoaded = 0; - Offset < ByValSize; LoadSize /= 2) { - unsigned RemSize = ByValSize - Offset; + for (unsigned LoadSizeInBytes = RegSizeInBytes / 2, TotalBytesLoaded = 0; + OffsetInBytes < ByValSizeInBytes; LoadSizeInBytes /= 2) { + unsigned RemainingSizeInBytes = ByValSizeInBytes - OffsetInBytes; - if (RemSize < LoadSize) + if (RemainingSizeInBytes < LoadSizeInBytes) continue; // Load subword. SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, - DAG.getConstant(Offset, PtrTy)); - SDValue LoadVal = - DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, - MachinePointerInfo(), MVT::getIntegerVT(LoadSize * 8), - false, false, Alignment); + DAG.getConstant(OffsetInBytes, PtrTy)); + SDValue LoadVal = DAG.getExtLoad( + ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(), + MVT::getIntegerVT(LoadSizeInBytes * 8), false, false, Alignment); MemOpChains.push_back(LoadVal.getValue(1)); // Shift the loaded value. unsigned Shamt; if (isLittle) - Shamt = TotalSizeLoaded; + Shamt = TotalBytesLoaded * 8; else - Shamt = (RegSize - (TotalSizeLoaded + LoadSize)) * 8; + Shamt = (RegSizeInBytes - (TotalBytesLoaded + LoadSizeInBytes)) * 8; SDValue Shift = DAG.getNode(ISD::SHL, DL, RegTy, LoadVal, DAG.getConstant(Shamt, MVT::i32)); @@ -3601,9 +3555,9 @@ passByValArg(SDValue Chain, SDLoc DL, else Val = Shift; - Offset += LoadSize; - TotalSizeLoaded += LoadSize; - Alignment = std::min(Alignment, LoadSize); + OffsetInBytes += LoadSizeInBytes; + TotalBytesLoaded += LoadSizeInBytes; + Alignment = std::min(Alignment, LoadSizeInBytes); } unsigned ArgReg = ArgRegs[ByVal.FirstIdx + I]; @@ -3613,14 +3567,14 @@ passByValArg(SDValue Chain, SDLoc DL, } // Copy remainder of byval arg to it with memcpy. - unsigned MemCpySize = ByValSize - Offset; + unsigned MemCpySize = ByValSizeInBytes - OffsetInBytes; SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg, - DAG.getConstant(Offset, PtrTy)); + DAG.getConstant(OffsetInBytes, PtrTy)); SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr, DAG.getIntPtrConstant(ByVal.Address)); Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, PtrTy), Alignment, /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); + MachinePointerInfo(), MachinePointerInfo()); MemOpChains.push_back(Chain); } @@ -3628,7 +3582,7 @@ void MipsTargetLowering::writeVarArgRegs(std::vector &OutChains, const MipsCC &CC, SDValue Chain, SDLoc DL, SelectionDAG &DAG) const { unsigned NumRegs = CC.numIntArgRegs(); - const uint16_t *ArgRegs = CC.intArgRegs(); + const MCPhysReg *ArgRegs = CC.intArgRegs(); const CCState &CCInfo = CC.getCCInfo(); unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumRegs); unsigned RegSize = CC.regSize(); @@ -3662,7 +3616,7 @@ void MipsTargetLowering::writeVarArgRegs(std::vector &OutChains, SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff, MachinePointerInfo(), false, false, 0); - cast(Store.getNode())->getMemOperand()->setValue(0); + cast(Store.getNode())->getMemOperand()->setValue((Value*)nullptr); OutChains.push_back(Store); } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 35dd396..4ac33bf 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -218,32 +218,38 @@ namespace llvm { static const MipsTargetLowering *create(MipsTargetMachine &TM); - virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + /// createFastISel - This method returns a target specific FastISel object, + /// or null if the target does not support "fast" ISel. + FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo) const override; - virtual void LowerOperationWrapper(SDNode *N, - SmallVectorImpl &Results, - SelectionDAG &DAG) const; + MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } + + void LowerOperationWrapper(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) const override; /// LowerOperation - Provide custom lowering hooks for some operations. - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// - virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, - SelectionDAG &DAG) const; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, + SelectionDAG &DAG) const override; /// getTargetNodeName - This method returns the name of a target specific // DAG node. - virtual const char *getTargetNodeName(unsigned Opcode) const; + const char *getTargetNodeName(unsigned Opcode) const override; /// getSetCCResultType - get the ISD::SETCC result ValueType - EVT getSetCCResultType(LLVMContext &Context, EVT VT) const; + EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; - virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - virtual MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const override; struct LTStr { bool operator()(const char *S1, const char *S2) const { @@ -382,7 +388,7 @@ namespace llvm { unsigned reservedArgArea() const; /// Return pointer to array of integer argument registers. - const uint16_t *intArgRegs() const; + const MCPhysReg *intArgRegs() const; typedef SmallVectorImpl::const_iterator byval_iterator; byval_iterator byval_begin() const { return ByValArgs.begin(); } @@ -403,7 +409,7 @@ namespace llvm { /// Return the function that analyzes variable argument list functions. llvm::CCAssignFn *varArgFn() const; - const uint16_t *shadowRegs() const; + const MCPhysReg *shadowRegs() const; void allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize, unsigned Align); @@ -523,41 +529,39 @@ namespace llvm { void writeVarArgRegs(std::vector &OutChains, const MipsCC &CC, SDValue Chain, SDLoc DL, SelectionDAG &DAG) const; - virtual SDValue + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; + SmallVectorImpl &InVals) const override; SDValue passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain, SDValue Arg, SDLoc DL, bool IsTailCall, SelectionDAG &DAG) const; - virtual SDValue - LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const; + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; - virtual bool - CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, - bool isVarArg, - const SmallVectorImpl &Outs, - LLVMContext &Context) const; - - virtual SDValue - LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - SDLoc dl, SelectionDAG &DAG) const; + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, + bool isVarArg, + const SmallVectorImpl &Outs, + LLVMContext &Context) const override; + + SDValue LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + SDLoc dl, SelectionDAG &DAG) const override; // Inline asm support - ConstraintType getConstraintType(const std::string &Constraint) const; + ConstraintType + getConstraintType(const std::string &Constraint) const override; /// Examine constraint string and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. ConstraintWeight getSingleConstraintMatchWeight( - AsmOperandInfo &info, const char *constraint) const; + AsmOperandInfo &info, const char *constraint) const override; /// This function parses registers that appear in inline-asm constraints. /// It returns pair (0, 0) on failure. @@ -566,33 +570,33 @@ namespace llvm { std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + MVT VT) const override; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is /// true it means one of the asm constraint of the inline asm instruction /// being processed is 'm'. - virtual void LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, - std::vector &Ops, - SelectionDAG &DAG) const; + void LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const override; - virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; - virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; - virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, - unsigned SrcAlign, - bool IsMemset, bool ZeroMemset, - bool MemcpyStrSrc, - MachineFunction &MF) const; + EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, + unsigned SrcAlign, + bool IsMemset, bool ZeroMemset, + bool MemcpyStrSrc, + MachineFunction &MF) const override; /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. - virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; - virtual unsigned getJumpTableEncoding() const; + unsigned getJumpTableEncoding() const override; MachineBasicBlock *emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand = false) const; @@ -608,6 +612,11 @@ namespace llvm { /// Create MipsTargetLowering objects. const MipsTargetLowering *createMips16TargetLowering(MipsTargetMachine &TM); const MipsTargetLowering *createMipsSETargetLowering(MipsTargetMachine &TM); + + namespace Mips { + FastISel *createFastISel(FunctionLoweringInfo &funcInfo, + const TargetLibraryInfo *libInfo); + } } #endif // MipsISELLOWERING_H diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 4b5a73e..32cda3b 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -66,6 +66,16 @@ def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">, def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">, AssemblerPredicate<"!FeatureSingleFloat">; +//===----------------------------------------------------------------------===// +// Mips FGR size adjectives. +// They are mutually exclusive. +//===----------------------------------------------------------------------===// + +class FGR_32 { list FGRPredicates = [NotFP64bit]; } +class FGR_64 { list FGRPredicates = [IsFP64bit]; } + +//===----------------------------------------------------------------------===// + // FP immediate patterns. def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); @@ -100,10 +110,10 @@ class ADDS_FT { def _D32 : MMRel, ADDS_FT, - Requires<[NotFP64bit, HasStdEnc]>; + AdditionalRequires<[NotFP64bit]>; def _D64 : ADDS_FT, - Requires<[IsFP64bit, HasStdEnc]> { + AdditionalRequires<[IsFP64bit]> { string DecoderNamespace = "Mips64"; } } @@ -117,18 +127,18 @@ class ABSS_FT { def _D32 : MMRel, ABSS_FT, - Requires<[NotFP64bit, HasStdEnc]>; + AdditionalRequires<[NotFP64bit]>; def _D64 : ABSS_FT, - Requires<[IsFP64bit, HasStdEnc]> { + AdditionalRequires<[IsFP64bit]> { string DecoderNamespace = "Mips64"; } } multiclass ROUND_M { def _D32 : MMRel, ABSS_FT, - Requires<[NotFP64bit, HasStdEnc]>; + AdditionalRequires<[NotFP64bit]>; def _D64 : ABSS_FT, - Requires<[IsFP64bit, HasStdEnc]> { + AdditionalRequires<[IsFP64bit]> { let DecoderNamespace = "Mips64"; } } @@ -241,77 +251,75 @@ multiclass C_COND_M fmt, defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>; defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>, - Requires<[NotFP64bit, HasStdEnc]>; + AdditionalRequires<[NotFP64bit]>; let DecoderNamespace = "Mips64" in defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, - Requires<[IsFP64bit, HasStdEnc]>; + AdditionalRequires<[IsFP64bit]>; //===----------------------------------------------------------------------===// // Floating Point Instructions //===----------------------------------------------------------------------===// def ROUND_W_S : MMRel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>, - ABSS_FM<0xc, 16>; + ABSS_FM<0xc, 16>, ISA_MIPS2; def TRUNC_W_S : MMRel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>, - ABSS_FM<0xd, 16>; + ABSS_FM<0xd, 16>, ISA_MIPS2; def CEIL_W_S : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>, - ABSS_FM<0xe, 16>; + ABSS_FM<0xe, 16>, ISA_MIPS2; def FLOOR_W_S : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>, - ABSS_FM<0xf, 16>; + ABSS_FM<0xf, 16>, ISA_MIPS2; def CVT_W_S : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>, ABSS_FM<0x24, 16>; -defm ROUND_W : ROUND_M<"round.w.d", II_ROUND>, ABSS_FM<0xc, 17>; -defm TRUNC_W : ROUND_M<"trunc.w.d", II_TRUNC>, ABSS_FM<0xd, 17>; -defm CEIL_W : ROUND_M<"ceil.w.d", II_CEIL>, ABSS_FM<0xe, 17>; -defm FLOOR_W : ROUND_M<"floor.w.d", II_FLOOR>, ABSS_FM<0xf, 17>; +defm ROUND_W : ROUND_M<"round.w.d", II_ROUND>, ABSS_FM<0xc, 17>, ISA_MIPS2; +defm TRUNC_W : ROUND_M<"trunc.w.d", II_TRUNC>, ABSS_FM<0xd, 17>, ISA_MIPS2; +defm CEIL_W : ROUND_M<"ceil.w.d", II_CEIL>, ABSS_FM<0xe, 17>, ISA_MIPS2; +defm FLOOR_W : ROUND_M<"floor.w.d", II_FLOOR>, ABSS_FM<0xf, 17>, ISA_MIPS2; defm CVT_W : ROUND_M<"cvt.w.d", II_CVT>, ABSS_FM<0x24, 17>; -let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { +let DecoderNamespace = "Mips64" in { def ROUND_L_S : ABSS_FT<"round.l.s", FGR64Opnd, FGR32Opnd, II_ROUND>, - ABSS_FM<0x8, 16>; + ABSS_FM<0x8, 16>, FGR_64; def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64Opnd, FGR64Opnd, II_ROUND>, - ABSS_FM<0x8, 17>; + ABSS_FM<0x8, 17>, FGR_64; def TRUNC_L_S : ABSS_FT<"trunc.l.s", FGR64Opnd, FGR32Opnd, II_TRUNC>, - ABSS_FM<0x9, 16>; + ABSS_FM<0x9, 16>, FGR_64; def TRUNC_L_D64 : ABSS_FT<"trunc.l.d", FGR64Opnd, FGR64Opnd, II_TRUNC>, - ABSS_FM<0x9, 17>; + ABSS_FM<0x9, 17>, FGR_64; def CEIL_L_S : ABSS_FT<"ceil.l.s", FGR64Opnd, FGR32Opnd, II_CEIL>, - ABSS_FM<0xa, 16>; + ABSS_FM<0xa, 16>, FGR_64; def CEIL_L_D64 : ABSS_FT<"ceil.l.d", FGR64Opnd, FGR64Opnd, II_CEIL>, - ABSS_FM<0xa, 17>; + ABSS_FM<0xa, 17>, FGR_64; def FLOOR_L_S : ABSS_FT<"floor.l.s", FGR64Opnd, FGR32Opnd, II_FLOOR>, - ABSS_FM<0xb, 16>; + ABSS_FM<0xb, 16>, FGR_64; def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64Opnd, FGR64Opnd, II_FLOOR>, - ABSS_FM<0xb, 17>; + ABSS_FM<0xb, 17>, FGR_64; } def CVT_S_W : MMRel, ABSS_FT<"cvt.s.w", FGR32Opnd, FGR32Opnd, II_CVT>, ABSS_FM<0x20, 20>; def CVT_L_S : MMRel, ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, II_CVT>, - ABSS_FM<0x25, 16>; + ABSS_FM<0x25, 16>, INSN_MIPS3_32R2; def CVT_L_D64: MMRel, ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, II_CVT>, - ABSS_FM<0x25, 17>; - -let Predicates = [NotFP64bit, HasStdEnc] in { - def CVT_S_D32 : MMRel, ABSS_FT<"cvt.s.d", FGR32Opnd, AFGR64Opnd, II_CVT>, - ABSS_FM<0x20, 17>; - def CVT_D32_W : MMRel, ABSS_FT<"cvt.d.w", AFGR64Opnd, FGR32Opnd, II_CVT>, - ABSS_FM<0x21, 20>; - def CVT_D32_S : MMRel, ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, II_CVT>, - ABSS_FM<0x21, 16>; -} + ABSS_FM<0x25, 17>, INSN_MIPS3_32R2; + +def CVT_S_D32 : MMRel, ABSS_FT<"cvt.s.d", FGR32Opnd, AFGR64Opnd, II_CVT>, + ABSS_FM<0x20, 17>, FGR_32; +def CVT_D32_W : MMRel, ABSS_FT<"cvt.d.w", AFGR64Opnd, FGR32Opnd, II_CVT>, + ABSS_FM<0x21, 20>, FGR_32; +def CVT_D32_S : MMRel, ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, II_CVT>, + ABSS_FM<0x21, 16>, FGR_32; -let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { +let DecoderNamespace = "Mips64" in { def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32Opnd, FGR64Opnd, II_CVT>, - ABSS_FM<0x20, 17>; + ABSS_FM<0x20, 17>, FGR_64; def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, II_CVT>, - ABSS_FM<0x20, 21>; + ABSS_FM<0x20, 21>, FGR_64; def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64Opnd, FGR32Opnd, II_CVT>, - ABSS_FM<0x21, 20>; + ABSS_FM<0x21, 20>, FGR_64; def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64Opnd, FGR32Opnd, II_CVT>, - ABSS_FM<0x21, 16>; + ABSS_FM<0x21, 16>, FGR_64; def CVT_D64_L : ABSS_FT<"cvt.d.l", FGR64Opnd, FGR64Opnd, II_CVT>, - ABSS_FM<0x21, 21>; + ABSS_FM<0x21, 21>, FGR_64; } let isPseudo = 1, isCodeGenOnly = 1 in { @@ -322,18 +330,16 @@ let isPseudo = 1, isCodeGenOnly = 1 in { def PseudoCVT_D64_L : ABSS_FT<"", FGR64Opnd, GPR64Opnd, II_CVT>; } -let Predicates = [NoNaNsFPMath, HasStdEnc] in { - def FABS_S : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, II_ABS, fabs>, - ABSS_FM<0x5, 16>; - def FNEG_S : MMRel, ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>, - ABSS_FM<0x7, 16>; - defm FABS : ABSS_M<"abs.d", II_ABS, fabs>, ABSS_FM<0x5, 17>; - defm FNEG : ABSS_M<"neg.d", II_NEG, fneg>, ABSS_FM<0x7, 17>; -} +def FABS_S : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, II_ABS, fabs>, + ABSS_FM<0x5, 16>; +def FNEG_S : MMRel, ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>, + ABSS_FM<0x7, 16>; +defm FABS : ABSS_M<"abs.d", II_ABS, fabs>, ABSS_FM<0x5, 17>; +defm FNEG : ABSS_M<"neg.d", II_NEG, fneg>, ABSS_FM<0x7, 17>; def FSQRT_S : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S, fsqrt>, - ABSS_FM<0x4, 16>; -defm FSQRT : ABSS_M<"sqrt.d", II_SQRT_D, fsqrt>, ABSS_FM<0x4, 17>; + ABSS_FM<0x4, 16>, ISA_MIPS2; +defm FSQRT : ABSS_M<"sqrt.d", II_SQRT_D, fsqrt>, ABSS_FM<0x4, 17>, ISA_MIPS2; // The odd-numbered registers are only referenced when doing loads, // stores, and moves between floating-point and integer registers. @@ -348,76 +354,92 @@ def MFC1 : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1, def MTC1 : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1, bitconvert>, MFC1_FM<4>; def MFHC1 : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, FGRH32Opnd, II_MFHC1>, - MFC1_FM<3>; + MFC1_FM<3>, ISA_MIPS32R2; def MTHC1 : MMRel, MTC1_FT<"mthc1", FGRH32Opnd, GPR32Opnd, II_MTHC1>, - MFC1_FM<7>; + MFC1_FM<7>, ISA_MIPS32R2; def DMFC1 : MFC1_FT<"dmfc1", GPR64Opnd, FGR64Opnd, II_DMFC1, - bitconvert>, MFC1_FM<1>; + bitconvert>, MFC1_FM<1>, ISA_MIPS3; def DMTC1 : MTC1_FT<"dmtc1", FGR64Opnd, GPR64Opnd, II_DMTC1, - bitconvert>, MFC1_FM<5>; + bitconvert>, MFC1_FM<5>, ISA_MIPS3; def FMOV_S : MMRel, ABSS_FT<"mov.s", FGR32Opnd, FGR32Opnd, II_MOV_S>, ABSS_FM<0x6, 16>; def FMOV_D32 : MMRel, ABSS_FT<"mov.d", AFGR64Opnd, AFGR64Opnd, II_MOV_D>, - ABSS_FM<0x6, 17>, Requires<[NotFP64bit, HasStdEnc]>; + ABSS_FM<0x6, 17>, AdditionalRequires<[NotFP64bit]>; def FMOV_D64 : ABSS_FT<"mov.d", FGR64Opnd, FGR64Opnd, II_MOV_D>, - ABSS_FM<0x6, 17>, Requires<[IsFP64bit, HasStdEnc]> { + ABSS_FM<0x6, 17>, AdditionalRequires<[IsFP64bit]> { let DecoderNamespace = "Mips64"; } /// Floating Point Memory Instructions -let Predicates = [HasStdEnc] in { - def LWC1 : MMRel, LW_FT<"lwc1", FGR32Opnd, II_LWC1, load>, LW_FM<0x31>; - def SWC1 : MMRel, SW_FT<"swc1", FGR32Opnd, II_SWC1, store>, LW_FM<0x39>; -} - -let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in { - def LDC164 : LW_FT<"ldc1", FGR64Opnd, II_LDC1, load>, LW_FM<0x35>; - def SDC164 : SW_FT<"sdc1", FGR64Opnd, II_SDC1, store>, LW_FM<0x3d>; -} - -let Predicates = [NotFP64bit, HasStdEnc] in { - def LDC1 : MMRel, LW_FT<"ldc1", AFGR64Opnd, II_LDC1, load>, LW_FM<0x35>; - def SDC1 : MMRel, SW_FT<"sdc1", AFGR64Opnd, II_SDC1, store>, LW_FM<0x3d>; -} - -/// Cop2 Memory Instructions -let Predicates = [HasStdEnc] in { - def LWC2 : LW_FT<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>; - def SWC2 : SW_FT<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>; - def LDC2 : LW_FT<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>; - def SDC2 : SW_FT<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>; -} +def LWC1 : MMRel, LW_FT<"lwc1", FGR32Opnd, II_LWC1, load>, LW_FM<0x31>; +def SWC1 : MMRel, SW_FT<"swc1", FGR32Opnd, II_SWC1, store>, LW_FM<0x39>; + +let DecoderNamespace = "Mips64" in { + def LDC164 : LW_FT<"ldc1", FGR64Opnd, II_LDC1, load>, LW_FM<0x35>, ISA_MIPS2, + FGR_64; + def SDC164 : SW_FT<"sdc1", FGR64Opnd, II_SDC1, store>, LW_FM<0x3d>, ISA_MIPS2, + FGR_64; +} + +def LDC1 : MMRel, LW_FT<"ldc1", AFGR64Opnd, II_LDC1, load>, LW_FM<0x35>, + ISA_MIPS2, FGR_32; +def SDC1 : MMRel, SW_FT<"sdc1", AFGR64Opnd, II_SDC1, store>, LW_FM<0x3d>, + ISA_MIPS2, FGR_32; + +// Cop2 Memory Instructions +// FIXME: These aren't really FPU instructions and as such don't belong in this +// file +def LWC2 : LW_FT<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>; +def SWC2 : SW_FT<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>; +def LDC2 : LW_FT<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>, ISA_MIPS2; +def SDC2 : SW_FT<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>, ISA_MIPS2; + +// Cop3 Memory Instructions +// FIXME: These aren't really FPU instructions and as such don't belong in this +// file +def LWC3 : LW_FT<"lwc3", COP3Opnd, NoItinerary, load>, LW_FM<0x33>; +def SWC3 : SW_FT<"swc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3b>; +def LDC3 : LW_FT<"ldc3", COP3Opnd, NoItinerary, load>, LW_FM<0x37>, ISA_MIPS2; +def SDC3 : SW_FT<"sdc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3f>, ISA_MIPS2; // Indexed loads and stores. // Base register + offset register addressing mode (indicated by "x" in the // instruction mnemonic) is disallowed under NaCl. -let Predicates = [HasFPIdx, HasStdEnc, IsNotNaCl] in { - def LWXC1 : MMRel, LWXC1_FT<"lwxc1", FGR32Opnd, II_LWXC1, load>, LWXC1_FM<0>; - def SWXC1 : MMRel, SWXC1_FT<"swxc1", FGR32Opnd, II_SWXC1, store>, SWXC1_FM<8>; +let AdditionalPredicates = [IsNotNaCl] in { + def LWXC1 : MMRel, LWXC1_FT<"lwxc1", FGR32Opnd, II_LWXC1, load>, LWXC1_FM<0>, + INSN_MIPS4_32R2; + def SWXC1 : MMRel, SWXC1_FT<"swxc1", FGR32Opnd, II_SWXC1, store>, SWXC1_FM<8>, + INSN_MIPS4_32R2; } -let Predicates = [HasFPIdx, NotFP64bit, HasStdEnc, NotInMicroMips, - IsNotNaCl] in { - def LDXC1 : LWXC1_FT<"ldxc1", AFGR64Opnd, II_LDXC1, load>, LWXC1_FM<1>; - def SDXC1 : SWXC1_FT<"sdxc1", AFGR64Opnd, II_SDXC1, store>, SWXC1_FM<9>; +let AdditionalPredicates = [NotInMicroMips, IsNotNaCl] in { + def LDXC1 : LWXC1_FT<"ldxc1", AFGR64Opnd, II_LDXC1, load>, LWXC1_FM<1>, + INSN_MIPS4_32R2, FGR_32; + def SDXC1 : SWXC1_FT<"sdxc1", AFGR64Opnd, II_SDXC1, store>, SWXC1_FM<9>, + INSN_MIPS4_32R2, FGR_32; } -let Predicates = [HasFPIdx, IsFP64bit, HasStdEnc], - DecoderNamespace="Mips64" in { - def LDXC164 : LWXC1_FT<"ldxc1", FGR64Opnd, II_LDXC1, load>, LWXC1_FM<1>; - def SDXC164 : SWXC1_FT<"sdxc1", FGR64Opnd, II_SDXC1, store>, SWXC1_FM<9>; +let DecoderNamespace="Mips64" in { + def LDXC164 : LWXC1_FT<"ldxc1", FGR64Opnd, II_LDXC1, load>, LWXC1_FM<1>, + INSN_MIPS4_32R2, FGR_64; + def SDXC164 : SWXC1_FT<"sdxc1", FGR64Opnd, II_SDXC1, store>, SWXC1_FM<9>, + INSN_MIPS4_32R2, FGR_64; } // Load/store doubleword indexed unaligned. -let Predicates = [NotFP64bit, HasStdEnc, IsNotNaCl] in { - def LUXC1 : MMRel, LWXC1_FT<"luxc1", AFGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>; - def SUXC1 : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>, SWXC1_FM<0xd>; +let AdditionalPredicates = [IsNotNaCl] in { + def LUXC1 : MMRel, LWXC1_FT<"luxc1", AFGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>, + INSN_MIPS5_32R2, FGR_32; + def SUXC1 : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>, SWXC1_FM<0xd>, + INSN_MIPS5_32R2, FGR_32; } -let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace="Mips64" in { - def LUXC164 : LWXC1_FT<"luxc1", FGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>; - def SUXC164 : SWXC1_FT<"suxc1", FGR64Opnd, II_SUXC1>, SWXC1_FM<0xd>; +let DecoderNamespace="Mips64" in { + def LUXC164 : LWXC1_FT<"luxc1", FGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>, + INSN_MIPS5_32R2, FGR_64; + def SUXC164 : SWXC1_FT<"suxc1", FGR64Opnd, II_SUXC1>, SWXC1_FM<0xd>, + INSN_MIPS5_32R2, FGR_64; } /// Floating-point Aritmetic @@ -434,47 +456,43 @@ def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>, ADDS_FM<0x01, 16>; defm FSUB : ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>; -let Predicates = [HasMips32r2, HasStdEnc] in { - def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>, - MADDS_FM<4, 0>; - def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>, - MADDS_FM<5, 0>; -} +def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>, + MADDS_FM<4, 0>, ISA_MIPS32R2; +def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>, + MADDS_FM<5, 0>, ISA_MIPS32R2; -let Predicates = [HasMips32r2, NoNaNsFPMath, HasStdEnc] in { +let AdditionalPredicates = [NoNaNsFPMath] in { def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>, - MADDS_FM<6, 0>; + MADDS_FM<6, 0>, ISA_MIPS32R2; def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>, - MADDS_FM<7, 0>; + MADDS_FM<7, 0>, ISA_MIPS32R2; } -let Predicates = [HasMips32r2, NotFP64bit, HasStdEnc] in { - def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>, - MADDS_FM<4, 1>; - def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>, - MADDS_FM<5, 1>; -} +def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>, + MADDS_FM<4, 1>, ISA_MIPS32R2, FGR_32; +def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>, + MADDS_FM<5, 1>, ISA_MIPS32R2, FGR_32; -let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStdEnc] in { +let AdditionalPredicates = [NoNaNsFPMath] in { def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>, - MADDS_FM<6, 1>; + MADDS_FM<6, 1>, ISA_MIPS32R2, FGR_32; def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>, - MADDS_FM<7, 1>; + MADDS_FM<7, 1>, ISA_MIPS32R2, FGR_32; } -let Predicates = [HasMips32r2, IsFP64bit, HasStdEnc], isCodeGenOnly=1 in { +let isCodeGenOnly=1 in { def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>, - MADDS_FM<4, 1>; + MADDS_FM<4, 1>, ISA_MIPS32R2, FGR_64; def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>, - MADDS_FM<5, 1>; + MADDS_FM<5, 1>, ISA_MIPS32R2, FGR_64; } -let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStdEnc], +let AdditionalPredicates = [NoNaNsFPMath], isCodeGenOnly=1 in { def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>, - MADDS_FM<6, 1>; + MADDS_FM<6, 1>, ISA_MIPS32R2, FGR_64; def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64Opnd, II_NMSUB_D, fsub>, - MADDS_FM<7, 1>; + MADDS_FM<7, 1>, ISA_MIPS32R2, FGR_64; } //===----------------------------------------------------------------------===// @@ -515,10 +533,10 @@ def MIPS_FCOND_NGT : PatLeaf<(i32 15)>; /// Floating Point Compare def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>; def FCMP_D32 : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>, - Requires<[NotFP64bit, HasStdEnc]>; + AdditionalRequires<[NotFP64bit]>; let DecoderNamespace = "Mips64" in def FCMP_D64 : CEQS_FT<"d", FGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>, - Requires<[IsFP64bit, HasStdEnc]>; + AdditionalRequires<[IsFP64bit]>; //===----------------------------------------------------------------------===// // Floating Point Pseudo-Instructions @@ -531,9 +549,9 @@ class BuildPairF64Base : [(set RO:$dst, (MipsBuildPairF64 GPR32Opnd:$lo, GPR32Opnd:$hi))]>; def BuildPairF64 : BuildPairF64Base, - Requires<[NotFP64bit, HasStdEnc]>; + AdditionalRequires<[NotFP64bit]>; def BuildPairF64_64 : BuildPairF64Base, - Requires<[IsFP64bit, HasStdEnc]>; + AdditionalRequires<[IsFP64bit]>; // This pseudo instr gets expanded into 2 mfc1 instrs after register // allocation. @@ -544,15 +562,15 @@ class ExtractElementF64Base : [(set GPR32Opnd:$dst, (MipsExtractElementF64 RO:$src, imm:$n))]>; def ExtractElementF64 : ExtractElementF64Base, - Requires<[NotFP64bit, HasStdEnc]>; + AdditionalRequires<[NotFP64bit]>; def ExtractElementF64_64 : ExtractElementF64Base, - Requires<[IsFP64bit, HasStdEnc]>; + AdditionalRequires<[IsFP64bit]>; //===----------------------------------------------------------------------===// // InstAliases. //===----------------------------------------------------------------------===// -def : InstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>; -def : InstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>; +def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>; +def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>; //===----------------------------------------------------------------------===// // Floating Point Patterns @@ -565,55 +583,45 @@ def : MipsPat<(f32 (sint_to_fp GPR32Opnd:$src)), def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src), (TRUNC_W_S FGR32Opnd:$src)>; -let Predicates = [NotFP64bit, HasStdEnc] in { - def : MipsPat<(f64 (sint_to_fp GPR32Opnd:$src)), - (PseudoCVT_D32_W GPR32Opnd:$src)>; - def : MipsPat<(MipsTruncIntFP AFGR64Opnd:$src), - (TRUNC_W_D32 AFGR64Opnd:$src)>; - def : MipsPat<(f32 (fround AFGR64Opnd:$src)), - (CVT_S_D32 AFGR64Opnd:$src)>; - def : MipsPat<(f64 (fextend FGR32Opnd:$src)), - (CVT_D32_S FGR32Opnd:$src)>; -} +def : MipsPat<(f64 (sint_to_fp GPR32Opnd:$src)), + (PseudoCVT_D32_W GPR32Opnd:$src)>, FGR_32; +def : MipsPat<(MipsTruncIntFP AFGR64Opnd:$src), + (TRUNC_W_D32 AFGR64Opnd:$src)>, FGR_32; +def : MipsPat<(f32 (fround AFGR64Opnd:$src)), + (CVT_S_D32 AFGR64Opnd:$src)>, FGR_32; +def : MipsPat<(f64 (fextend FGR32Opnd:$src)), + (CVT_D32_S FGR32Opnd:$src)>, FGR_32; + +def : MipsPat<(f64 fpimm0), (DMTC1 ZERO_64)>, FGR_64; +def : MipsPat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>, FGR_64; + +def : MipsPat<(f64 (sint_to_fp GPR32Opnd:$src)), + (PseudoCVT_D64_W GPR32Opnd:$src)>, FGR_64; +def : MipsPat<(f32 (sint_to_fp GPR64Opnd:$src)), + (EXTRACT_SUBREG (PseudoCVT_S_L GPR64Opnd:$src), sub_lo)>, FGR_64; +def : MipsPat<(f64 (sint_to_fp GPR64Opnd:$src)), + (PseudoCVT_D64_L GPR64Opnd:$src)>, FGR_64; + +def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src), + (TRUNC_W_D64 FGR64Opnd:$src)>, FGR_64; +def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src), + (TRUNC_L_S FGR32Opnd:$src)>, FGR_64; +def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src), + (TRUNC_L_D64 FGR64Opnd:$src)>, FGR_64; -let Predicates = [IsFP64bit, HasStdEnc] in { - def : MipsPat<(f64 fpimm0), (DMTC1 ZERO_64)>; - def : MipsPat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>; - - def : MipsPat<(f64 (sint_to_fp GPR32Opnd:$src)), - (PseudoCVT_D64_W GPR32Opnd:$src)>; - def : MipsPat<(f32 (sint_to_fp GPR64Opnd:$src)), - (EXTRACT_SUBREG (PseudoCVT_S_L GPR64Opnd:$src), sub_lo)>; - def : MipsPat<(f64 (sint_to_fp GPR64Opnd:$src)), - (PseudoCVT_D64_L GPR64Opnd:$src)>; - - def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src), - (TRUNC_W_D64 FGR64Opnd:$src)>; - def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src), - (TRUNC_L_S FGR32Opnd:$src)>; - def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src), - (TRUNC_L_D64 FGR64Opnd:$src)>; - - def : MipsPat<(f32 (fround FGR64Opnd:$src)), - (CVT_S_D64 FGR64Opnd:$src)>; - def : MipsPat<(f64 (fextend FGR32Opnd:$src)), - (CVT_D64_S FGR32Opnd:$src)>; -} +def : MipsPat<(f32 (fround FGR64Opnd:$src)), + (CVT_S_D64 FGR64Opnd:$src)>, FGR_64; +def : MipsPat<(f64 (fextend FGR32Opnd:$src)), + (CVT_D64_S FGR32Opnd:$src)>, FGR_64; // Patterns for loads/stores with a reg+imm operand. let AddedComplexity = 40 in { - let Predicates = [HasStdEnc] in { - def : LoadRegImmPat; - def : StoreRegImmPat; - } + def : LoadRegImmPat; + def : StoreRegImmPat; - let Predicates = [IsFP64bit, HasStdEnc] in { - def : LoadRegImmPat; - def : StoreRegImmPat; - } + def : LoadRegImmPat, FGR_64; + def : StoreRegImmPat, FGR_64; - let Predicates = [NotFP64bit, HasStdEnc] in { - def : LoadRegImmPat; - def : StoreRegImmPat; - } + def : LoadRegImmPat, FGR_32; + def : StoreRegImmPat, FGR_32; } diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index e4405ab..0377eab 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -93,8 +93,8 @@ class MipsInst pattern, // Mips32/64 Instruction Format class InstSE pattern, InstrItinClass itin, Format f, string opstr = ""> : - MipsInst { - let Predicates = [HasStdEnc]; + MipsInst, PredicateControl { + let EncodingPredicates = [HasStdEnc]; string BaseOpcode = opstr; string Arch; } @@ -109,9 +109,9 @@ class MipsPseudo pattern, // Mips32/64 Pseudo Instruction Format class PseudoSE pattern, - InstrItinClass itin = IIPseudo>: - MipsPseudo { - let Predicates = [HasStdEnc]; + InstrItinClass itin = IIPseudo> : + MipsPseudo, PredicateControl { + let EncodingPredicates = [HasStdEnc]; } // Pseudo-instructions for alternate assembly syntax (never used by codegen). @@ -545,6 +545,20 @@ class SEQ_FM funct> : StdArch { let Inst{5-0} = funct; } +class SEQI_FM funct> : StdArch { + bits<5> rs; + bits<5> rt; + bits<10> imm10; + + bits<32> Inst; + + let Inst{31-26} = 0x1c; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-6} = imm10; + let Inst{5-0} = funct; +} + //===----------------------------------------------------------------------===// // System calls format //===----------------------------------------------------------------------===// @@ -829,3 +843,12 @@ class BARRIER_FM op> : StdArch { let Inst{10-6} = op; // Operation let Inst{5-0} = 0; // SLL } + +class COP0_TLB_FM op> : StdArch { + bits<32> Inst; + + let Inst{31-26} = 0x10; // COP0 + let Inst{25} = 1; // CO + let Inst{24-6} = 0; + let Inst{5-0} = op; // Operation +} diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 0ebad05..d6da6c6 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -22,11 +22,11 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + #define GET_INSTRINFO_CTOR_DTOR #include "MipsGenInstrInfo.inc" -using namespace llvm; - // Pin the vtable to this file. void MipsInstrInfo::anchor() {} @@ -195,7 +195,7 @@ AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, if (I == REnd || !isUnpredicatedTerminator(&*I)) { // This block ends with no branches (it just falls through to its succ). // Leave TBB/FBB null. - TBB = FBB = NULL; + TBB = FBB = nullptr; return BT_NoBranch; } @@ -209,7 +209,7 @@ AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, // Get the second to last instruction in the block. unsigned SecondLastOpc = 0; - MachineInstr *SecondLastInst = NULL; + MachineInstr *SecondLastInst = nullptr; if (++I != REnd) { SecondLastInst = &*I; diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index d9ac961..742193f 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -9,6 +9,10 @@ // // This file contains the Mips implementation of the TargetInstrInfo class. // +// FIXME: We need to override TargetInstrInfo::getInlineAsmLength method in +// order for MipsLongBranch pass to work correctly when the code has inline +// assembly. The returned value doesn't have to be the asm instruction's exact +// size in bytes; MipsLongBranch only expects it to be the correct upper bound. //===----------------------------------------------------------------------===// #ifndef MIPSINSTRUCTIONINFO_H @@ -47,20 +51,20 @@ public: static const MipsInstrInfo *create(MipsTargetMachine &TM); /// Branch Analysis - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const; + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const override; - virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; - virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const; + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const override; - virtual - bool ReverseBranchCondition(SmallVectorImpl &Cond) const; + bool + ReverseBranchCondition(SmallVectorImpl &Cond) const override; BranchType AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -69,8 +73,8 @@ public: SmallVectorImpl &BranchInstrs) const; /// Insert nop instruction when hazard condition is found - virtual void insertNoop(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const; + void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override; /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should @@ -83,19 +87,19 @@ public: /// Return the number of bytes of code the specified instruction may be. unsigned GetInstSizeInBytes(const MachineInstr *MI) const; - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override { storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0); } - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override { loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0); } diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 07c37d8..0d3cb75 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -146,26 +146,40 @@ def MipsSDR : SDNode<"MipsISD::SDR", SDTStore, //===----------------------------------------------------------------------===// // Mips Instruction Predicate Definitions. //===----------------------------------------------------------------------===// -def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">, - AssemblerPredicate<"FeatureSEInReg">; -def HasBitCount : Predicate<"Subtarget.hasBitCount()">, - AssemblerPredicate<"FeatureBitCount">; -def HasSwap : Predicate<"Subtarget.hasSwap()">, - AssemblerPredicate<"FeatureSwap">; -def HasCondMov : Predicate<"Subtarget.hasCondMov()">, - AssemblerPredicate<"FeatureCondMov">; -def HasFPIdx : Predicate<"Subtarget.hasFPIdx()">, - AssemblerPredicate<"FeatureFPIdx">; +def HasMips2 : Predicate<"Subtarget.hasMips2()">, + AssemblerPredicate<"FeatureMips2">; +def HasMips3_32 : Predicate<"Subtarget.hasMips3_32()">, + AssemblerPredicate<"FeatureMips3_32">; +def HasMips3_32r2 : Predicate<"Subtarget.hasMips3_32r2()">, + AssemblerPredicate<"FeatureMips3_32r2">; +def HasMips3 : Predicate<"Subtarget.hasMips3()">, + AssemblerPredicate<"FeatureMips3">; +def HasMips4_32 : Predicate<"Subtarget.hasMips4_32()">, + AssemblerPredicate<"FeatureMips4_32">; +def HasMips4_32r2 : Predicate<"Subtarget.hasMips4_32r2()">, + AssemblerPredicate<"FeatureMips4_32r2">; +def HasMips5_32r2 : Predicate<"Subtarget.hasMips5_32r2()">, + AssemblerPredicate<"FeatureMips5_32r2">; def HasMips32 : Predicate<"Subtarget.hasMips32()">, AssemblerPredicate<"FeatureMips32">; def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">, AssemblerPredicate<"FeatureMips32r2">; +def HasMips32r6 : Predicate<"Subtarget.hasMips32r6()">, + AssemblerPredicate<"FeatureMips32r6">; +def NotMips32r6 : Predicate<"!Subtarget.hasMips32r6()">, + AssemblerPredicate<"!FeatureMips32r6">; +def IsGP64bit : Predicate<"Subtarget.isGP64bit()">, + AssemblerPredicate<"FeatureGP64Bit">; +def IsGP32bit : Predicate<"!Subtarget.isGP64bit()">, + AssemblerPredicate<"!FeatureGP64Bit">; def HasMips64 : Predicate<"Subtarget.hasMips64()">, AssemblerPredicate<"FeatureMips64">; -def NotMips64 : Predicate<"!Subtarget.hasMips64()">, - AssemblerPredicate<"!FeatureMips64">; def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">, AssemblerPredicate<"FeatureMips64r2">; +def HasMips64r6 : Predicate<"Subtarget.hasMips64r6()">, + AssemblerPredicate<"FeatureMips64r6">; +def NotMips64r6 : Predicate<"!Subtarget.hasMips64r6()">, + AssemblerPredicate<"!FeatureMips64r6">; def IsN64 : Predicate<"Subtarget.isABI_N64()">, AssemblerPredicate<"FeatureN64">; def InMips16Mode : Predicate<"Subtarget.inMips16Mode()">, @@ -176,8 +190,7 @@ def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">, AssemblerPredicate<"FeatureMips32">; def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">, AssemblerPredicate<"FeatureMips32">; -def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">, - AssemblerPredicate<"FeatureMips32">; +def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; def HasStdEnc : Predicate<"Subtarget.hasStandardEncoding()">, AssemblerPredicate<"!FeatureMips16">; def NotDSP : Predicate<"!Subtarget.hasDSP()">; @@ -189,9 +202,65 @@ def IsLE : Predicate<"Subtarget.isLittle()">; def IsBE : Predicate<"!Subtarget.isLittle()">; def IsNotNaCl : Predicate<"!Subtarget.isTargetNaCl()">; -class MipsPat : Pat { - let Predicates = [HasStdEnc]; +//===----------------------------------------------------------------------===// +// Mips GPR size adjectives. +// They are mutually exclusive. +//===----------------------------------------------------------------------===// + +class GPR_32 { list GPRPredicates = [IsGP32bit]; } +class GPR_64 { list GPRPredicates = [IsGP64bit]; } + +//===----------------------------------------------------------------------===// +// Mips ISA/ASE membership and instruction group membership adjectives. +// They are mutually exclusive. +//===----------------------------------------------------------------------===// + +// FIXME: I'd prefer to use additive predicates to build the instruction sets +// but we are short on assembler feature bits at the moment. Using a +// subtractive predicate will hopefully keep us under the 32 predicate +// limit long enough to develop an alternative way to handle P1||P2 +// predicates. +class ISA_MIPS1_NOT_32R6_64R6 { + list InsnPredicates = [NotMips32r6, NotMips64r6]; +} +class ISA_MIPS2 { list InsnPredicates = [HasMips2]; } +class ISA_MIPS2_NOT_32R6_64R6 { + list InsnPredicates = [HasMips2, NotMips32r6, NotMips64r6]; +} +class ISA_MIPS3 { list InsnPredicates = [HasMips3]; } +class ISA_MIPS3_NOT_32R6_64R6 { + list InsnPredicates = [HasMips3, NotMips32r6, NotMips64r6]; } +class ISA_MIPS32 { list InsnPredicates = [HasMips32]; } +class ISA_MIPS32R2 { list InsnPredicates = [HasMips32r2]; } +class ISA_MIPS64 { list InsnPredicates = [HasMips64]; } +class ISA_MIPS64R2 { list InsnPredicates = [HasMips64r2]; } +class ISA_MIPS32R6 { list InsnPredicates = [HasMips32r6]; } +class ISA_MIPS64R6 { list InsnPredicates = [HasMips64r6]; } + +// The portions of MIPS-III that were also added to MIPS32 +class INSN_MIPS3_32 { list InsnPredicates = [HasMips3_32]; } + +// The portions of MIPS-III that were also added to MIPS32 +class INSN_MIPS3_32R2 { list InsnPredicates = [HasMips3_32r2]; } + +// The portions of MIPS-IV that were also added to MIPS32 +class INSN_MIPS4_32 { list InsnPredicates = [HasMips4_32]; } + +// The portions of MIPS-IV that were also added to MIPS32R2 +class INSN_MIPS4_32R2 { list InsnPredicates = [HasMips4_32r2]; } + +// The portions of MIPS-V that were also added to MIPS32R2 +class INSN_MIPS5_32R2 { list InsnPredicates = [HasMips5_32r2]; } + +//===----------------------------------------------------------------------===// + +class MipsPat : Pat, PredicateControl { + let EncodingPredicates = [HasStdEnc]; +} + +class MipsInstAlias : + InstAlias, PredicateControl; class IsCommutable { bit isCommutable = 1; @@ -265,6 +334,11 @@ def simm16 : Operand { let DecoderMethod= "DecodeSimm16"; } +def simm19_lsl2 : Operand { + let EncoderMethod = "getSimm19Lsl2Encoding"; + let DecoderMethod = "DecodeSimm19Lsl2"; +} + def simm20 : Operand { } @@ -284,6 +358,14 @@ def uimmz : Operand { } // Unsigned Operand +def uimm2 : Operand { + let PrintMethod = "printUnsignedImm"; +} + +def uimm3 : Operand { + let PrintMethod = "printUnsignedImm"; +} + def uimm5 : Operand { let PrintMethod = "printUnsignedImm"; } @@ -314,6 +396,10 @@ def InvertedImOperand : Operand { let ParserMatchClass = MipsInvertedImmoperand; } +def InvertedImOperand64 : Operand { + let ParserMatchClass = MipsInvertedImmoperand; +} + class mem_generic : Operand { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops ptr_rc, simm16); @@ -478,7 +564,9 @@ class shift_rotate_imm : InstSE<(outs RO:$rd), (ins RO:$rt, ImmOpnd:$shamt), !strconcat(opstr, "\t$rd, $rt, $shamt"), - [(set RO:$rd, (OpNode RO:$rt, PF:$shamt))], itin, FrmR, opstr>; + [(set RO:$rd, (OpNode RO:$rt, PF:$shamt))], itin, FrmR, opstr> { + let TwoOperandAliasConstraint = "$rt = $rd"; +} class shift_rotate_reg: @@ -590,7 +678,7 @@ class UncondBranch : let isTerminator = 1; let isBarrier = 1; let hasDelaySlot = 1; - let Predicates = [RelocPIC, HasStdEnc]; + let AdditionalPredicates = [RelocPIC]; let Defs = [AT]; } @@ -779,27 +867,22 @@ class EffectiveAddress : // Count Leading Ones/Zeros in Word class CountLeading0: InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"), - [(set RO:$rd, (ctlz RO:$rs))], II_CLZ, FrmR, opstr>, - Requires<[HasBitCount, HasStdEnc]>; + [(set RO:$rd, (ctlz RO:$rs))], II_CLZ, FrmR, opstr>; class CountLeading1: InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"), - [(set RO:$rd, (ctlz (not RO:$rs)))], II_CLO, FrmR, opstr>, - Requires<[HasBitCount, HasStdEnc]>; + [(set RO:$rd, (ctlz (not RO:$rs)))], II_CLO, FrmR, opstr>; // Sign Extend in Register. class SignExtInReg : InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), - [(set RO:$rd, (sext_inreg RO:$rt, vt))], itin, FrmR, opstr> { - let Predicates = [HasSEInReg, HasStdEnc]; -} + [(set RO:$rd, (sext_inreg RO:$rt, vt))], itin, FrmR, opstr>; // Subword Swap class SubwordSwap: InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [], NoItinerary, FrmR, opstr> { - let Predicates = [HasSwap, HasStdEnc]; let neverHasSideEffects = 1; } @@ -814,17 +897,14 @@ class ExtBase { - let Predicates = [HasMips32r2, HasStdEnc]; -} + FrmR, opstr>, ISA_MIPS32R2; class InsBase: InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ins:$size, RO:$src), !strconcat(opstr, " $rt, $rs, $pos, $size"), [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size, RO:$src))], - NoItinerary, FrmR, opstr> { - let Predicates = [HasMips32r2, HasStdEnc]; + NoItinerary, FrmR, opstr>, ISA_MIPS32R2 { let Constraints = "$src = $rt"; } @@ -915,6 +995,18 @@ let isPseudo = 1, isCodeGenOnly = 1 in { def STORE_ACC64 : Store<"", ACC64>; } +// We need these two pseudo instructions to avoid offset calculation for long +// branches. See the comment in file MipsLongBranch.cpp for detailed +// explanation. + +// Expands to: lui $dst, %hi($tgt - $baltgt) +def LONG_BRANCH_LUi : PseudoSE<(outs GPR32Opnd:$dst), + (ins brtarget:$tgt, brtarget:$baltgt), []>; + +// Expands to: addiu $dst, $src, %lo($tgt - $baltgt) +def LONG_BRANCH_ADDiu : PseudoSE<(outs GPR32Opnd:$dst), + (ins GPR32Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>; + //===----------------------------------------------------------------------===// // Instruction definition //===----------------------------------------------------------------------===// @@ -926,7 +1018,8 @@ let isPseudo = 1, isCodeGenOnly = 1 in { def ADDiu : MMRel, ArithLogicI<"addiu", simm16, GPR32Opnd, II_ADDIU, immSExt16, add>, ADDI_FM<0x9>, IsAsCheapAsAMove; -def ADDi : MMRel, ArithLogicI<"addi", simm16, GPR32Opnd>, ADDI_FM<0x8>; +def ADDi : MMRel, ArithLogicI<"addi", simm16, GPR32Opnd>, ADDI_FM<0x8>, + ISA_MIPS1_NOT_32R6_64R6; def SLTi : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, GPR32Opnd>, SLTI_FM<0xa>; def SLTiu : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, GPR32Opnd>, @@ -949,7 +1042,7 @@ def SUBu : MMRel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>, ADD_FM<0, 0x23>; let Defs = [HI0, LO0] in def MUL : MMRel, ArithLogicR<"mul", GPR32Opnd, 1, II_MUL, mul>, - ADD_FM<0x1c, 2>; + ADD_FM<0x1c, 2>, ISA_MIPS32; def ADD : MMRel, ArithLogicR<"add", GPR32Opnd>, ADD_FM<0, 0x20>; def SUB : MMRel, ArithLogicR<"sub", GPR32Opnd>, ADD_FM<0, 0x22>; def SLT : MMRel, SetCC_R<"slt", setlt, GPR32Opnd>, ADD_FM<0, 0x2a>; @@ -977,12 +1070,11 @@ def SRAV : MMRel, shift_rotate_reg<"srav", GPR32Opnd, II_SRAV, sra>, SRLV_FM<7, 0>; // Rotate Instructions -let Predicates = [HasMips32r2, HasStdEnc] in { - def ROTR : MMRel, shift_rotate_imm<"rotr", uimm5, GPR32Opnd, II_ROTR, rotr, - immZExt5>, SRA_FM<2, 1>; - def ROTRV : MMRel, shift_rotate_reg<"rotrv", GPR32Opnd, II_ROTRV, rotr>, - SRLV_FM<6, 1>; -} +def ROTR : MMRel, shift_rotate_imm<"rotr", uimm5, GPR32Opnd, II_ROTR, rotr, + immZExt5>, + SRA_FM<2, 1>, ISA_MIPS32R2; +def ROTRV : MMRel, shift_rotate_reg<"rotrv", GPR32Opnd, II_ROTRV, rotr>, + SRLV_FM<6, 1>, ISA_MIPS32R2; /// Load and Store Instructions /// aligned @@ -999,11 +1091,16 @@ def SH : Store<"sh", GPR32Opnd, truncstorei16, II_SH>, MMRel, LW_FM<0x29>; def SW : Store<"sw", GPR32Opnd, store, II_SW>, MMRel, LW_FM<0x2b>; /// load/store left/right -let Predicates = [NotInMicroMips] in { -def LWL : LoadLeftRight<"lwl", MipsLWL, GPR32Opnd, II_LWL>, LW_FM<0x22>; -def LWR : LoadLeftRight<"lwr", MipsLWR, GPR32Opnd, II_LWR>, LW_FM<0x26>; -def SWL : StoreLeftRight<"swl", MipsSWL, GPR32Opnd, II_SWL>, LW_FM<0x2a>; -def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, II_SWR>, LW_FM<0x2e>; +let EncodingPredicates = [], // FIXME: Lack of HasStdEnc is probably a bug + AdditionalPredicates = [NotInMicroMips] in { +def LWL : LoadLeftRight<"lwl", MipsLWL, GPR32Opnd, II_LWL>, LW_FM<0x22>, + ISA_MIPS1_NOT_32R6_64R6; +def LWR : LoadLeftRight<"lwr", MipsLWR, GPR32Opnd, II_LWR>, LW_FM<0x26>, + ISA_MIPS1_NOT_32R6_64R6; +def SWL : StoreLeftRight<"swl", MipsSWL, GPR32Opnd, II_SWL>, LW_FM<0x2a>, + ISA_MIPS1_NOT_32R6_64R6; +def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, II_SWR>, LW_FM<0x2e>, + ISA_MIPS1_NOT_32R6_64R6; } def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM; @@ -1014,34 +1111,41 @@ def TLT : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>; def TLTU : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>; def TNE : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>; -def TEQI : MMRel, TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM<0xc>; -def TGEI : MMRel, TEQI_FT<"tgei", GPR32Opnd>, TEQI_FM<0x8>; -def TGEIU : MMRel, TEQI_FT<"tgeiu", GPR32Opnd>, TEQI_FM<0x9>; -def TLTI : MMRel, TEQI_FT<"tlti", GPR32Opnd>, TEQI_FM<0xa>; -def TTLTIU : MMRel, TEQI_FT<"tltiu", GPR32Opnd>, TEQI_FM<0xb>; -def TNEI : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>; +def TEQI : MMRel, TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM<0xc>, + ISA_MIPS2_NOT_32R6_64R6; +def TGEI : MMRel, TEQI_FT<"tgei", GPR32Opnd>, TEQI_FM<0x8>, + ISA_MIPS2_NOT_32R6_64R6; +def TGEIU : MMRel, TEQI_FT<"tgeiu", GPR32Opnd>, TEQI_FM<0x9>, + ISA_MIPS2_NOT_32R6_64R6; +def TLTI : MMRel, TEQI_FT<"tlti", GPR32Opnd>, TEQI_FM<0xa>, + ISA_MIPS2_NOT_32R6_64R6; +def TTLTIU : MMRel, TEQI_FT<"tltiu", GPR32Opnd>, TEQI_FM<0xb>, + ISA_MIPS2_NOT_32R6_64R6; +def TNEI : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>, + ISA_MIPS2_NOT_32R6_64R6; def BREAK : MMRel, BRK_FT<"break">, BRK_FM<0xd>; def SYSCALL : MMRel, SYS_FT<"syscall">, SYS_FM<0xc>; def TRAP : TrapBase; -def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>; -def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>; +def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>, INSN_MIPS3_32; +def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>, ISA_MIPS32; -def EI : MMRel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>; -def DI : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM<0>; +def EI : MMRel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>, ISA_MIPS32R2; +def DI : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM<0>, ISA_MIPS32R2; -let Predicates = [NotInMicroMips] in { +let EncodingPredicates = [], // FIXME: Lack of HasStdEnc is probably a bug + AdditionalPredicates = [NotInMicroMips] in { def WAIT : WAIT_FT<"wait">, WAIT_FM; /// Load-linked, Store-conditional -def LL : LLBase<"ll", GPR32Opnd>, LW_FM<0x30>; -def SC : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>; +def LL : LLBase<"ll", GPR32Opnd>, LW_FM<0x30>, ISA_MIPS2; +def SC : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>, ISA_MIPS2; } /// Jump and Branch Instructions def J : MMRel, JumpFJ, FJ<2>, - Requires<[RelocStatic, HasStdEnc]>, IsBranch; + AdditionalRequires<[RelocStatic]>, IsBranch; def JR : MMRel, IndirectBranch<"jr", GPR32Opnd>, MTLO_FM<8>; def BEQ : MMRel, CBranch<"beq", brtarget, seteq, GPR32Opnd>, BEQ_FM<4>; def BNE : MMRel, CBranch<"bne", brtarget, setne, GPR32Opnd>, BEQ_FM<5>; @@ -1056,7 +1160,7 @@ def BLTZ : MMRel, CBranchZero<"bltz", brtarget, setlt, GPR32Opnd>, def B : UncondBranch; def JAL : MMRel, JumpLink<"jal", calltarget>, FJ<3>; -let Predicates = [NotInMicroMips, HasStdEnc] in { +let AdditionalPredicates = [NotInMicroMips] in { def JALR : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM; def JALRPseudo : JumpLinkRegPseudo; } @@ -1102,21 +1206,24 @@ def UDIV : MMRel, Div<"divu", II_DIVU, GPR32Opnd, [HI0, LO0]>, def MTHI : MMRel, MoveToLOHI<"mthi", GPR32Opnd, [HI0]>, MTLO_FM<0x11>; def MTLO : MMRel, MoveToLOHI<"mtlo", GPR32Opnd, [LO0]>, MTLO_FM<0x13>; -let Predicates = [NotInMicroMips] in { +let EncodingPredicates = [], // FIXME: Lack of HasStdEnc is probably a bug + AdditionalPredicates = [NotInMicroMips] in { def MFHI : MMRel, MoveFromLOHI<"mfhi", GPR32Opnd, AC0>, MFLO_FM<0x10>; def MFLO : MMRel, MoveFromLOHI<"mflo", GPR32Opnd, AC0>, MFLO_FM<0x12>; } /// Sign Ext In Register Instructions. -def SEB : MMRel, SignExtInReg<"seb", i8, GPR32Opnd, II_SEB>, SEB_FM<0x10, 0x20>; -def SEH : MMRel, SignExtInReg<"seh", i16, GPR32Opnd, II_SEH>, SEB_FM<0x18, 0x20>; +def SEB : MMRel, SignExtInReg<"seb", i8, GPR32Opnd, II_SEB>, + SEB_FM<0x10, 0x20>, ISA_MIPS32R2; +def SEH : MMRel, SignExtInReg<"seh", i16, GPR32Opnd, II_SEH>, + SEB_FM<0x18, 0x20>, ISA_MIPS32R2; /// Count Leading -def CLZ : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>; -def CLO : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>; +def CLZ : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>, ISA_MIPS32; +def CLO : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>, ISA_MIPS32; /// Word Swap Bytes Within Halfwords -def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM<2, 0x20>; +def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM<2, 0x20>, ISA_MIPS32R2; /// No operation. def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>; @@ -1128,12 +1235,12 @@ def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>; def LEA_ADDiu : MMRel, EffectiveAddress<"addiu", GPR32Opnd>, LW_FM<9>; // MADD*/MSUB* -def MADD : MMRel, MArithR<"madd", II_MADD, 1>, MULT_FM<0x1c, 0>; -def MADDU : MMRel, MArithR<"maddu", II_MADDU, 1>, MULT_FM<0x1c, 1>; -def MSUB : MMRel, MArithR<"msub", II_MSUB>, MULT_FM<0x1c, 4>; -def MSUBU : MMRel, MArithR<"msubu", II_MSUBU>, MULT_FM<0x1c, 5>; +def MADD : MMRel, MArithR<"madd", II_MADD, 1>, MULT_FM<0x1c, 0>, ISA_MIPS32; +def MADDU : MMRel, MArithR<"maddu", II_MADDU, 1>, MULT_FM<0x1c, 1>, ISA_MIPS32; +def MSUB : MMRel, MArithR<"msub", II_MSUB>, MULT_FM<0x1c, 4>, ISA_MIPS32; +def MSUBU : MMRel, MArithR<"msubu", II_MSUBU>, MULT_FM<0x1c, 5>, ISA_MIPS32; -let Predicates = [HasStdEnc, NotDSP] in { +let AdditionalPredicates = [NotDSP] in { def PseudoMULT : MultDivPseudo; def PseudoMULTu : MultDivPseudo; def PseudoMFHI : PseudoMFLOHI; @@ -1156,8 +1263,8 @@ def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, EXT_FM<0>; def INS : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, EXT_FM<4>; /// Move Control Registers From/To CPU Registers -def MFC0 : MFC3OP<"mfc0", GPR32Opnd>, MFC3OP_FM<0x10, 0>; -def MTC0 : MFC3OP<"mtc0", GPR32Opnd>, MFC3OP_FM<0x10, 4>; +def MFC0 : MFC3OP<"mfc0", GPR32Opnd>, MFC3OP_FM<0x10, 0>, ISA_MIPS32; +def MTC0 : MFC3OP<"mtc0", GPR32Opnd>, MFC3OP_FM<0x10, 4>, ISA_MIPS32; def MFC2 : MFC3OP<"mfc2", GPR32Opnd>, MFC3OP_FM<0x12, 0>; def MTC2 : MFC3OP<"mtc2", GPR32Opnd>, MFC3OP_FM<0x12, 4>; @@ -1165,67 +1272,94 @@ class Barrier : InstSE<(outs), (ins), asmstr, [], NoItinerary, FrmOther>; def SSNOP : Barrier<"ssnop">, BARRIER_FM<1>; def EHB : Barrier<"ehb">, BARRIER_FM<3>; -def PAUSE : Barrier<"pause">, BARRIER_FM<5>, Requires<[HasMips32r2]>; +def PAUSE : Barrier<"pause">, BARRIER_FM<5>, ISA_MIPS32R2; + +class TLB : InstSE<(outs), (ins), asmstr, [], NoItinerary, + FrmOther>; +def TLBP : TLB<"tlbp">, COP0_TLB_FM<0x08>; +def TLBR : TLB<"tlbr">, COP0_TLB_FM<0x01>; +def TLBWI : TLB<"tlbwi">, COP0_TLB_FM<0x02>; +def TLBWR : TLB<"tlbwr">, COP0_TLB_FM<0x06>; //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// -def : InstAlias<"move $dst, $src", - (ADDu GPR32Opnd:$dst, GPR32Opnd:$src,ZERO), 1>, - Requires<[NotMips64, NotInMicroMips]>; -def : InstAlias<"bal $offset", (BGEZAL ZERO, brtarget:$offset), 0>; -def : InstAlias<"addu $rs, $rt, $imm", - (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>; -def : InstAlias<"add $rs, $rt, $imm", - (ADDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>; -def : InstAlias<"and $rs, $rt, $imm", - (ANDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>; -def : InstAlias<"j $rs", (JR GPR32Opnd:$rs), 0>; +def : MipsInstAlias<"move $dst, $src", + (ADDu GPR32Opnd:$dst, GPR32Opnd:$src,ZERO), 1>, + GPR_32 { + let AdditionalPredicates = [NotInMicroMips]; +} +def : MipsInstAlias<"bal $offset", (BGEZAL ZERO, brtarget:$offset), 0>; +def : MipsInstAlias<"addu $rs, $rt, $imm", + (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>; +def : MipsInstAlias<"add $rs, $rt, $imm", + (ADDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>; +def : MipsInstAlias<"and $rs, $rt, $imm", + (ANDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>; +def : MipsInstAlias<"j $rs", (JR GPR32Opnd:$rs), 0>; let Predicates = [NotInMicroMips] in { -def : InstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>; -} -def : InstAlias<"jal $rs", (JALR RA, GPR32Opnd:$rs), 0>; -def : InstAlias<"jal $rd,$rs", (JALR GPR32Opnd:$rd, GPR32Opnd:$rs), 0>; -def : InstAlias<"not $rt, $rs", - (NOR GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>; -def : InstAlias<"neg $rt, $rs", - (SUB GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>; -def : InstAlias<"negu $rt, $rs", - (SUBu GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>; -def : InstAlias<"slt $rs, $rt, $imm", - (SLTi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>; -def : InstAlias<"xor $rs, $rt, $imm", - (XORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>; -def : InstAlias<"or $rs, $rt, $imm", - (ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>; -def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>; -def : InstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; -def : InstAlias<"mtc0 $rt, $rd", (MTC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; -def : InstAlias<"mfc2 $rt, $rd", (MFC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; -def : InstAlias<"mtc2 $rt, $rd", (MTC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; -def : InstAlias<"b $offset", (BEQ ZERO, ZERO, brtarget:$offset), 0>; -def : InstAlias<"bnez $rs,$offset", - (BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>; -def : InstAlias<"beqz $rs,$offset", - (BEQ GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>; -def : InstAlias<"syscall", (SYSCALL 0), 1>; - -def : InstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>; -def : InstAlias<"break", (BREAK 0, 0), 1>; -def : InstAlias<"ei", (EI ZERO), 1>; -def : InstAlias<"di", (DI ZERO), 1>; - -def : InstAlias<"teq $rs, $rt", (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; -def : InstAlias<"tge $rs, $rt", (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; -def : InstAlias<"tgeu $rs, $rt", (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; -def : InstAlias<"tlt $rs, $rt", (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; -def : InstAlias<"tltu $rs, $rt", (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; -def : InstAlias<"tne $rs, $rt", (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; -def : InstAlias<"sub, $rd, $rs, $imm", - (ADDi GPR32Opnd:$rd, GPR32Opnd:$rs, InvertedImOperand:$imm)>; -def : InstAlias<"subu, $rd, $rs, $imm", - (ADDiu GPR32Opnd:$rd, GPR32Opnd:$rs, InvertedImOperand:$imm)>; - +def : MipsInstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>; +} +def : MipsInstAlias<"jal $rs", (JALR RA, GPR32Opnd:$rs), 0>; +def : MipsInstAlias<"jal $rd,$rs", (JALR GPR32Opnd:$rd, GPR32Opnd:$rs), 0>; +def : MipsInstAlias<"not $rt, $rs", + (NOR GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>; +def : MipsInstAlias<"neg $rt, $rs", + (SUB GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>; +def : MipsInstAlias<"negu $rt", + (SUBu GPR32Opnd:$rt, ZERO, GPR32Opnd:$rt), 0>; +def : MipsInstAlias<"negu $rt, $rs", + (SUBu GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>; +def : MipsInstAlias<"slt $rs, $rt, $imm", + (SLTi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>; +def : MipsInstAlias<"sltu $rt, $rs, $imm", + (SLTiu GPR32Opnd:$rt, GPR32Opnd:$rs, simm16:$imm), 0>; +def : MipsInstAlias<"xor $rs, $rt, $imm", + (XORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>; +def : MipsInstAlias<"or $rs, $rt, $imm", + (ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>; +def : MipsInstAlias<"nop", (SLL ZERO, ZERO, 0), 1>; +def : MipsInstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; +def : MipsInstAlias<"mtc0 $rt, $rd", (MTC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; +def : MipsInstAlias<"mfc2 $rt, $rd", (MFC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; +def : MipsInstAlias<"mtc2 $rt, $rd", (MTC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; +def : MipsInstAlias<"b $offset", (BEQ ZERO, ZERO, brtarget:$offset), 0>; +def : MipsInstAlias<"bnez $rs,$offset", + (BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>; +def : MipsInstAlias<"beqz $rs,$offset", + (BEQ GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>; +def : MipsInstAlias<"syscall", (SYSCALL 0), 1>; + +def : MipsInstAlias<"break", (BREAK 0, 0), 1>; +def : MipsInstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>; +def : MipsInstAlias<"ei", (EI ZERO), 1>; +def : MipsInstAlias<"di", (DI ZERO), 1>; + +def : MipsInstAlias<"teq $rs, $rt", (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; +def : MipsInstAlias<"tge $rs, $rt", (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; +def : MipsInstAlias<"tgeu $rs, $rt", (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), + 1>; +def : MipsInstAlias<"tlt $rs, $rt", (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; +def : MipsInstAlias<"tltu $rs, $rt", (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), + 1>; +def : MipsInstAlias<"tne $rs, $rt", (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>; +def : MipsInstAlias<"sll $rd, $rt, $rs", + (SLLV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>; +def : MipsInstAlias<"sub, $rd, $rs, $imm", + (ADDi GPR32Opnd:$rd, GPR32Opnd:$rs, + InvertedImOperand:$imm), 0>; +def : MipsInstAlias<"sub $rs, $imm", + (ADDi GPR32Opnd:$rs, GPR32Opnd:$rs, InvertedImOperand:$imm), + 0>; +def : MipsInstAlias<"subu, $rd, $rs, $imm", + (ADDiu GPR32Opnd:$rd, GPR32Opnd:$rs, + InvertedImOperand:$imm), 0>; +def : MipsInstAlias<"subu $rs, $imm", (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rs, + InvertedImOperand:$imm), 0>; +def : MipsInstAlias<"sra $rd, $rt, $rs", + (SRAV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>; +def : MipsInstAlias<"srl $rd, $rt, $rs", + (SRLV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// @@ -1271,7 +1405,7 @@ def : MipsPat<(i32 imm:$imm), // Carry MipsPatterns def : MipsPat<(subc GPR32:$lhs, GPR32:$rhs), (SUBu GPR32:$lhs, GPR32:$rhs)>; -let Predicates = [HasStdEnc, NotDSP] in { +let AdditionalPredicates = [NotDSP] in { def : MipsPat<(addc GPR32:$lhs, GPR32:$rhs), (ADDu GPR32:$lhs, GPR32:$rhs)>; def : MipsPat<(addc GPR32:$src, immSExt16:$imm), @@ -1340,14 +1474,11 @@ def : MipsPat<(not GPR32:$in), (NOR GPR32Opnd:$in, ZERO)>; // extended loads -let Predicates = [HasStdEnc] in { - def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>; - def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>; - def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu addr:$src)>; -} +def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>; +def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>; +def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu addr:$src)>; // peepholes -let Predicates = [HasStdEnc] in def : MipsPat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>; // brcond patterns @@ -1441,11 +1572,9 @@ def : MipsPat<(bswap GPR32:$rt), (ROTR (WSBH GPR32:$rt), 16)>; // Load halfword/word patterns. let AddedComplexity = 40 in { - let Predicates = [HasStdEnc] in { - def : LoadRegImmPat; - def : LoadRegImmPat; - def : LoadRegImmPat; - } + def : LoadRegImmPat; + def : LoadRegImmPat; + def : LoadRegImmPat; } //===----------------------------------------------------------------------===// @@ -1456,6 +1585,9 @@ include "MipsInstrFPU.td" include "Mips64InstrInfo.td" include "MipsCondMov.td" +include "Mips32r6InstrInfo.td" +include "Mips64r6InstrInfo.td" + // // Mips16 diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp index d76cb1d..2072488 100644 --- a/lib/Target/Mips/MipsJITInfo.cpp +++ b/lib/Target/Mips/MipsJITInfo.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jit" #include "MipsJITInfo.h" #include "MipsInstrInfo.h" #include "MipsRelocations.h" @@ -25,6 +24,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "jit" + void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { unsigned NewAddr = (intptr_t)New; diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h index ecda310..c9dfd83 100644 --- a/lib/Target/Mips/MipsJITInfo.h +++ b/lib/Target/Mips/MipsJITInfo.h @@ -37,26 +37,26 @@ class MipsJITInfo : public TargetJITInfo { /// overwriting OLD with a branch to NEW. This is used for self-modifying /// code. /// - virtual void replaceMachineCodeForFunction(void *Old, void *New); + void replaceMachineCodeForFunction(void *Old, void *New) override; // getStubLayout - Returns the size and alignment of the largest call stub // on Mips. - virtual StubLayout getStubLayout(); + StubLayout getStubLayout() override; /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a /// small native function that simply calls the function at the specified /// address. - virtual void *emitFunctionStub(const Function *F, void *Fn, - JITCodeEmitter &JCE); + void *emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE) override; /// getLazyResolverFunction - Expose the lazy resolver to the JIT. - virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); + LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; /// relocate - Before the JIT can run a block of code that has been emitted, /// it must rewrite the code to contain the actual addresses of any /// referenced global symbols. - virtual void relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char *GOTBase); + void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char *GOTBase) override; /// Initialize - Initialize internal stage for the function being JITted. void Initialize(const MachineFunction &MF, bool isPIC, diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index 2b6a874..acfe76e 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -10,14 +10,9 @@ // This pass expands a branch or jump instruction into a long branch if its // offset is too large to fit into its immediate field. // -// FIXME: -// 1. Fix pc-region jump instructions which cross 256MB segment boundaries. -// 2. If program has inline assembly statements whose size cannot be -// determined accurately, load branch target addresses from the GOT. +// FIXME: Fix pc-region jump instructions which cross 256MB segment boundaries. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mips-long-branch" - #include "Mips.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MipsTargetMachine.h" @@ -33,6 +28,8 @@ using namespace llvm; +#define DEBUG_TYPE "mips-long-branch" + STATISTIC(LongBranches, "Number of long branches."); static cl::opt SkipLongBranch( @@ -56,7 +53,7 @@ namespace { bool HasLongBranch; MachineInstr *Br; - MBBInfo() : Size(0), HasLongBranch(false), Br(0) {} + MBBInfo() : Size(0), HasLongBranch(false), Br(nullptr) {} }; class MipsLongBranch : public MachineFunctionPass { @@ -67,13 +64,13 @@ namespace { : MachineFunctionPass(ID), TM(tm), IsPIC(TM.getRelocationModel() == Reloc::PIC_), ABI(TM.getSubtarget().getTargetABI()), - LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 13 : 9)) {} + LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 10 : 9)) {} - virtual const char *getPassName() const { + const char *getPassName() const override { return "Mips Long Branch"; } - bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineFunction(MachineFunction &F) override; private: void splitMBB(MachineBasicBlock *MBB); @@ -111,7 +108,7 @@ static MachineBasicBlock *getTargetMBB(const MachineInstr &Br) { } assert(false && "This instruction does not have an MBB operand."); - return 0; + return nullptr; } // Traverse the list of instructions backwards until a non-debug instruction is @@ -267,20 +264,14 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { LongBrMBB->addSuccessor(BalTgtMBB); BalTgtMBB->addSuccessor(TgtMBB); - int64_t TgtAddress = MBBInfos[TgtMBB->getNumber()].Address; - unsigned BalTgtMBBSize = 5; - int64_t Offset = TgtAddress - (I.Address + I.Size - BalTgtMBBSize * 4); - int64_t Lo = SignExtend64<16>(Offset & 0xffff); - int64_t Hi = SignExtend64<16>(((Offset + 0x8000) >> 16) & 0xffff); - if (ABI != MipsSubtarget::N64) { // $longbr: // addiu $sp, $sp, -8 // sw $ra, 0($sp) - // bal $baltgt // lui $at, %hi($tgt - $baltgt) - // $baltgt: + // bal $baltgt // addiu $at, $at, %lo($tgt - $baltgt) + // $baltgt: // addu $at, $ra, $at // lw $ra, 0($sp) // jr $at @@ -295,14 +286,31 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SW)).addReg(Mips::RA) .addReg(Mips::SP).addImm(0); + // LUi and ADDiu instructions create 32-bit offset of the target basic + // block from the target of BAL instruction. We cannot use immediate + // value for this offset because it cannot be determined accurately when + // the program has inline assembly statements. We therefore use the + // relocation expressions %hi($tgt-$baltgt) and %lo($tgt-$baltgt) which + // are resolved during the fixup, so the values will always be correct. + // + // Since we cannot create %hi($tgt-$baltgt) and %lo($tgt-$baltgt) + // expressions at this point (it is possible only at the MC layer), + // we replace LUi and ADDiu with pseudo instructions + // LONG_BRANCH_LUi and LONG_BRANCH_ADDiu, and add both basic + // blocks as operands to these instructions. When lowering these pseudo + // instructions to LUi and ADDiu in the MC layer, we will create + // %hi($tgt-$baltgt) and %lo($tgt-$baltgt) expressions and add them as + // operands to lowered instructions. + + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_LUi), Mips::AT) + .addMBB(TgtMBB).addMBB(BalTgtMBB); MIBundleBuilder(*LongBrMBB, Pos) .append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB)) - .append(BuildMI(*MF, DL, TII->get(Mips::LUi), Mips::AT).addImm(Hi)); + .append(BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_ADDiu), Mips::AT) + .addReg(Mips::AT).addMBB(TgtMBB).addMBB(BalTgtMBB)); Pos = BalTgtMBB->begin(); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::AT) - .addReg(Mips::AT).addImm(Lo); BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDu), Mips::AT) .addReg(Mips::RA).addReg(Mips::AT); BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LW), Mips::RA) @@ -316,14 +324,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { // $longbr: // daddiu $sp, $sp, -16 // sd $ra, 0($sp) - // lui64 $at, %highest($tgt - $baltgt) - // daddiu $at, $at, %higher($tgt - $baltgt) + // daddiu $at, $zero, %hi($tgt - $baltgt) // dsll $at, $at, 16 - // daddiu $at, $at, %hi($tgt - $baltgt) // bal $baltgt - // dsll $at, $at, 16 - // $baltgt: // daddiu $at, $at, %lo($tgt - $baltgt) + // $baltgt: // daddu $at, $ra, $at // ld $ra, 0($sp) // jr64 $at @@ -331,9 +336,20 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { // $fallthrough: // - int64_t Higher = SignExtend64<16>(((Offset + 0x80008000) >> 32) & 0xffff); - int64_t Highest = - SignExtend64<16>(((Offset + 0x800080008000LL) >> 48) & 0xffff); + // We assume the branch is within-function, and that offset is within + // +/- 2GB. High 32 bits will therefore always be zero. + + // Note that this will work even if the offset is negative, because + // of the +1 modification that's added in that case. For example, if the + // offset is -1MB (0xFFFFFFFFFFF00000), the computation for %higher is + // + // 0xFFFFFFFFFFF00000 + 0x80008000 = 0x000000007FF08000 + // + // and the bits [47:32] are zero. For %highest + // + // 0xFFFFFFFFFFF00000 + 0x800080008000 = 0x000080007FF08000 + // + // and the bits [63:48] are zero. Pos = LongBrMBB->begin(); @@ -341,24 +357,21 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { .addReg(Mips::SP_64).addImm(-16); BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SD)).addReg(Mips::RA_64) .addReg(Mips::SP_64).addImm(0); - BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LUi64), Mips::AT_64) - .addImm(Highest); - BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64) - .addReg(Mips::AT_64).addImm(Higher); + BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_DADDiu), + Mips::AT_64).addReg(Mips::ZERO_64) + .addMBB(TgtMBB, MipsII::MO_ABS_HI).addMBB(BalTgtMBB); BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64) .addReg(Mips::AT_64).addImm(16); - BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64) - .addReg(Mips::AT_64).addImm(Hi); MIBundleBuilder(*LongBrMBB, Pos) .append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB)) - .append(BuildMI(*MF, DL, TII->get(Mips::DSLL), Mips::AT_64) - .addReg(Mips::AT_64).addImm(16)); + .append(BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_DADDiu), + Mips::AT_64).addReg(Mips::AT_64) + .addMBB(TgtMBB, MipsII::MO_ABS_LO) + .addMBB(BalTgtMBB)); Pos = BalTgtMBB->begin(); - BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64) - .addReg(Mips::AT_64).addImm(Lo); BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDu), Mips::AT_64) .addReg(Mips::RA_64).addReg(Mips::AT_64); BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LD), Mips::RA_64) @@ -370,8 +383,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { .addReg(Mips::SP_64).addImm(16)); } - assert(BalTgtMBBSize == BalTgtMBB->size()); - assert(LongBrMBB->size() + BalTgtMBBSize == LongBranchSeqSize); + assert(LongBrMBB->size() + BalTgtMBB->size() == LongBranchSeqSize); } else { // $longbr: // j $tgt diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 7c9a9ed..821392e 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -151,7 +151,75 @@ MCOperand MipsMCInstLower::LowerOperand(const MachineOperand &MO, return MCOperand(); } +MCOperand MipsMCInstLower::createSub(MachineBasicBlock *BB1, + MachineBasicBlock *BB2, + MCSymbolRefExpr::VariantKind Kind) const { + const MCSymbolRefExpr *Sym1 = MCSymbolRefExpr::Create(BB1->getSymbol(), *Ctx); + const MCSymbolRefExpr *Sym2 = MCSymbolRefExpr::Create(BB2->getSymbol(), *Ctx); + const MCBinaryExpr *Sub = MCBinaryExpr::CreateSub(Sym1, Sym2, *Ctx); + + return MCOperand::CreateExpr(MipsMCExpr::Create(Kind, Sub, *Ctx)); +} + +void MipsMCInstLower:: +lowerLongBranchLUi(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(Mips::LUi); + + // Lower register operand. + OutMI.addOperand(LowerOperand(MI->getOperand(0))); + + // Create %hi($tgt-$baltgt). + OutMI.addOperand(createSub(MI->getOperand(1).getMBB(), + MI->getOperand(2).getMBB(), + MCSymbolRefExpr::VK_Mips_ABS_HI)); +} + +void MipsMCInstLower:: +lowerLongBranchADDiu(const MachineInstr *MI, MCInst &OutMI, int Opcode, + MCSymbolRefExpr::VariantKind Kind) const { + OutMI.setOpcode(Opcode); + + // Lower two register operands. + for (unsigned I = 0, E = 2; I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + OutMI.addOperand(LowerOperand(MO)); + } + + // Create %lo($tgt-$baltgt) or %hi($tgt-$baltgt). + OutMI.addOperand(createSub(MI->getOperand(2).getMBB(), + MI->getOperand(3).getMBB(), Kind)); +} + +bool MipsMCInstLower::lowerLongBranch(const MachineInstr *MI, + MCInst &OutMI) const { + switch (MI->getOpcode()) { + default: + return false; + case Mips::LONG_BRANCH_LUi: + lowerLongBranchLUi(MI, OutMI); + return true; + case Mips::LONG_BRANCH_ADDiu: + lowerLongBranchADDiu(MI, OutMI, Mips::ADDiu, + MCSymbolRefExpr::VK_Mips_ABS_LO); + return true; + case Mips::LONG_BRANCH_DADDiu: + unsigned TargetFlags = MI->getOperand(2).getTargetFlags(); + if (TargetFlags == MipsII::MO_ABS_HI) + lowerLongBranchADDiu(MI, OutMI, Mips::DADDiu, + MCSymbolRefExpr::VK_Mips_ABS_HI); + else if (TargetFlags == MipsII::MO_ABS_LO) + lowerLongBranchADDiu(MI, OutMI, Mips::DADDiu, + MCSymbolRefExpr::VK_Mips_ABS_LO); + else + report_fatal_error("Unexpected flags for LONG_BRANCH_DADDiu"); + return true; + } +} + void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + if (lowerLongBranch(MI, OutMI)) + return; + OutMI.setOpcode(MI->getOpcode()); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index 4570bd9..269190f 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -9,6 +9,7 @@ #ifndef MIPSMCINSTLOWER_H #define MIPSMCINSTLOWER_H +#include "MCTargetDesc/MipsMCExpr.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/Support/Compiler.h" @@ -36,6 +37,13 @@ public: private: MCOperand LowerSymbolOperand(const MachineOperand &MO, MachineOperandType MOTy, unsigned Offset) const; + MCOperand createSub(MachineBasicBlock *BB1, MachineBasicBlock *BB2, + MCSymbolRefExpr::VariantKind Kind) const; + void lowerLongBranchLUi(const MachineInstr *MI, MCInst &OutMI) const; + void lowerLongBranchADDiu(const MachineInstr *MI, MCInst &OutMI, + int Opcode, + MCSymbolRefExpr::VariantKind Kind) const; + bool lowerLongBranch(const MachineInstr *MI, MCInst &OutMI) const; }; } diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 5722c6c..285bb14 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -65,10 +65,6 @@ def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT", // Operands -def uimm2 : Operand { - let PrintMethod = "printUnsignedImm"; -} - // The immediate of an LSA instruction needs special handling // as the encoded value should be subtracted by one. def uimm2LSAAsmOperand : AsmOperandClass { @@ -84,10 +80,6 @@ def LSAImm : Operand { let ParserMatchClass = uimm2LSAAsmOperand; } -def uimm3 : Operand { - let PrintMethod = "printUnsignedImm8"; -} - def uimm4 : Operand { let PrintMethod = "printUnsignedImm8"; } @@ -1505,6 +1497,15 @@ class MSA_INSERT_PSEUDO_BASE : + MSAPseudo<(outs ROWD:$wd), (ins ROWD:$wd_in, GPR32Opnd:$n, ROFS:$fs), + [(set ROWD:$wd, (OpNode (Ty ROWD:$wd_in), ROFS:$fs, + GPR32Opnd:$n))]> { + bit usesCustomInserter = 1; + string Constraints = "$wd = $wd_in"; +} + class MSA_INSVE_DESC_BASE { @@ -2300,11 +2301,25 @@ class INSERT_W_DESC : MSA_INSERT_DESC_BASE<"insert.w", vinsert_v4i32, class INSERT_D_DESC : MSA_INSERT_DESC_BASE<"insert.d", vinsert_v2i64, MSA128DOpnd, GPR64Opnd>; +class INSERT_B_VIDX_PSEUDO_DESC : + MSA_INSERT_VIDX_PSEUDO_BASE; +class INSERT_H_VIDX_PSEUDO_DESC : + MSA_INSERT_VIDX_PSEUDO_BASE; +class INSERT_W_VIDX_PSEUDO_DESC : + MSA_INSERT_VIDX_PSEUDO_BASE; +class INSERT_D_VIDX_PSEUDO_DESC : + MSA_INSERT_VIDX_PSEUDO_BASE; + class INSERT_FW_PSEUDO_DESC : MSA_INSERT_PSEUDO_BASE; class INSERT_FD_PSEUDO_DESC : MSA_INSERT_PSEUDO_BASE; +class INSERT_FW_VIDX_PSEUDO_DESC : + MSA_INSERT_VIDX_PSEUDO_BASE; +class INSERT_FD_VIDX_PSEUDO_DESC : + MSA_INSERT_VIDX_PSEUDO_BASE; + class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, MSA128BOpnd>; class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, @@ -3214,6 +3229,13 @@ let DecoderMethod = "DecodeINSVE_DF" in { def INSERT_FW_PSEUDO : INSERT_FW_PSEUDO_DESC; def INSERT_FD_PSEUDO : INSERT_FD_PSEUDO_DESC; +def INSERT_B_VIDX_PSEUDO : INSERT_B_VIDX_PSEUDO_DESC; +def INSERT_H_VIDX_PSEUDO : INSERT_H_VIDX_PSEUDO_DESC; +def INSERT_W_VIDX_PSEUDO : INSERT_W_VIDX_PSEUDO_DESC; +def INSERT_D_VIDX_PSEUDO : INSERT_D_VIDX_PSEUDO_DESC; +def INSERT_FW_VIDX_PSEUDO : INSERT_FW_VIDX_PSEUDO_DESC; +def INSERT_FD_VIDX_PSEUDO : INSERT_FD_VIDX_PSEUDO_DESC; + def LD_B: LD_B_ENC, LD_B_DESC; def LD_H: LD_H_ENC, LD_H_DESC; def LD_W: LD_W_ENC, LD_W_DESC; @@ -3731,3 +3753,55 @@ def SZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE; def SZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE; + +// Vector extraction with variable index +def : MSAPat<(i32 (vextract_sext_i8 v16i8:$ws, i32:$idx)), + (SRA (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (SPLAT_B v16i8:$ws, + i32:$idx), + sub_lo)), + GPR32), (i32 24))>; +def : MSAPat<(i32 (vextract_sext_i16 v8i16:$ws, i32:$idx)), + (SRA (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (SPLAT_H v8i16:$ws, + i32:$idx), + sub_lo)), + GPR32), (i32 16))>; +def : MSAPat<(i32 (vextract_sext_i32 v4i32:$ws, i32:$idx)), + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (SPLAT_W v4i32:$ws, + i32:$idx), + sub_lo)), + GPR32)>; +def : MSAPat<(i64 (vextract_sext_i64 v2i64:$ws, i32:$idx)), + (COPY_TO_REGCLASS (i64 (EXTRACT_SUBREG (SPLAT_D v2i64:$ws, + i32:$idx), + sub_64)), + GPR64), [HasMSA, IsGP64bit]>; + +def : MSAPat<(i32 (vextract_zext_i8 v16i8:$ws, i32:$idx)), + (SRL (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (SPLAT_B v16i8:$ws, + i32:$idx), + sub_lo)), + GPR32), (i32 24))>; +def : MSAPat<(i32 (vextract_zext_i16 v8i16:$ws, i32:$idx)), + (SRL (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (SPLAT_H v8i16:$ws, + i32:$idx), + sub_lo)), + GPR32), (i32 16))>; +def : MSAPat<(i32 (vextract_zext_i32 v4i32:$ws, i32:$idx)), + (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (SPLAT_W v4i32:$ws, + i32:$idx), + sub_lo)), + GPR32)>; +def : MSAPat<(i64 (vextract_zext_i64 v2i64:$ws, i32:$idx)), + (COPY_TO_REGCLASS (i64 (EXTRACT_SUBREG (SPLAT_D v2i64:$ws, + i32:$idx), + sub_64)), + GPR64), [HasMSA, IsGP64bit]>; + +def : MSAPat<(f32 (vector_extract v4f32:$ws, i32:$idx)), + (f32 (EXTRACT_SUBREG (SPLAT_W v4f32:$ws, + i32:$idx), + sub_lo))>; +def : MSAPat<(f64 (vector_extract v2f64:$ws, i32:$idx)), + (f64 (EXTRACT_SUBREG (SPLAT_D v2f64:$ws, + i32:$idx), + sub_64))>; diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp index dedf802..e30302e 100644 --- a/lib/Target/Mips/MipsMachineFunction.cpp +++ b/lib/Target/Mips/MipsMachineFunction.cpp @@ -27,7 +27,7 @@ FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true), MipsCallEntry::MipsCallEntry(const StringRef &N) { #ifndef NDEBUG Name = N; - Val = 0; + Val = nullptr; #endif } @@ -65,9 +65,8 @@ MipsFunctionInfo::~MipsFunctionInfo() { ++I) delete I->getValue(); - for (ValueMap::iterator - I = GlobalCallEntries.begin(), E = GlobalCallEntries.end(); I != E; ++I) - delete I->second; + for (const auto &Entry : GlobalCallEntries) + delete Entry.second; } bool MipsFunctionInfo::globalBaseRegSet() const { diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index 3e14c8c..e9101cc 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -37,12 +37,12 @@ class MipsCallEntry : public PseudoSourceValue { public: explicit MipsCallEntry(const StringRef &N); explicit MipsCallEntry(const GlobalValue *V); - virtual bool isConstant(const MachineFrameInfo *) const; - virtual bool isAliased(const MachineFrameInfo *) const; - virtual bool mayAlias(const MachineFrameInfo *) const; + bool isConstant(const MachineFrameInfo *) const override; + bool isAliased(const MachineFrameInfo *) const override; + bool mayAlias(const MachineFrameInfo *) const override; private: - virtual void printCustom(raw_ostream &O) const; + void printCustom(raw_ostream &O) const override; #ifndef NDEBUG std::string Name; const GlobalValue *Val; diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp index c6abf17..03c76ea 100644 --- a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp @@ -14,6 +14,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#define DEBUG_TYPE "mips-isel" + namespace llvm { bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.h b/lib/Target/Mips/MipsModuleISelDAGToDAG.h index fda35ae..a96862a 100644 --- a/lib/Target/Mips/MipsModuleISelDAGToDAG.h +++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.h @@ -41,15 +41,11 @@ public: TM(TM_), Subtarget(TM.getSubtarget()) {} // Pass Name - virtual const char *getPassName() const { + const char *getPassName() const override { return "MIPS DAG->DAG Pattern Instruction Selection"; } - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual SDNode *Select(SDNode *N) { - llvm_unreachable("unexpected"); - } + bool runOnMachineFunction(MachineFunction &MF) override; protected: /// Keep a pointer to the MipsSubtarget around so that we can make the right diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp index db270f3..c234049 100644 --- a/lib/Target/Mips/MipsOptimizePICCall.cpp +++ b/lib/Target/Mips/MipsOptimizePICCall.cpp @@ -12,8 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "optimize-mips-pic-call" - #include "Mips.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "MipsMachineFunction.h" @@ -25,6 +23,8 @@ using namespace llvm; +#define DEBUG_TYPE "optimize-mips-pic-call" + static cl::opt LoadTargetFromGOT("mips-load-target-from-got", cl::init(true), cl::desc("Load target address from GOT"), @@ -35,11 +35,13 @@ static cl::opt EraseGPOpnd("mips-erase-gp-opnd", cl::Hidden); namespace { +typedef PointerUnion ValueType; + typedef std::pair CntRegP; typedef RecyclingAllocator > + ScopedHashTableVal > AllocatorTy; -typedef ScopedHashTable, +typedef ScopedHashTable, AllocatorTy> ScopedHTType; class MBBInfo { @@ -59,11 +61,11 @@ class OptimizePICCall : public MachineFunctionPass { public: OptimizePICCall(TargetMachine &tm) : MachineFunctionPass(ID) {} - virtual const char *getPassName() const { return "Mips OptimizePICCall"; } + const char *getPassName() const override { return "Mips OptimizePICCall"; } - bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -78,18 +80,18 @@ private: /// and the underlying object in Reg and Val respectively, if the function's /// address can be resolved lazily. bool isCallViaRegister(MachineInstr &MI, unsigned &Reg, - const Value *&Val) const; + ValueType &Val) const; /// \brief Return the number of instructions that dominate the current /// instruction and load the function address from object Entry. - unsigned getCount(const Value *Entry); + unsigned getCount(ValueType Entry); /// \brief Return the destination virtual register of the last instruction /// that loads from object Entry. - unsigned getReg(const Value *Entry); + unsigned getReg(ValueType Entry); /// \brief Update ScopedHT. - void incCntAndSetReg(const Value *Entry, unsigned Reg); + void incCntAndSetReg(ValueType Entry, unsigned Reg); ScopedHTType ScopedHT; static char ID; @@ -101,13 +103,13 @@ char OptimizePICCall::ID = 0; /// Return the first MachineOperand of MI if it is a used virtual register. static MachineOperand *getCallTargetRegOpnd(MachineInstr &MI) { if (MI.getNumOperands() == 0) - return 0; + return nullptr; MachineOperand &MO = MI.getOperand(0); if (!MO.isReg() || !MO.isUse() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - return 0; + return nullptr; return &MO; } @@ -153,10 +155,10 @@ static void eraseGPOpnd(MachineInstr &MI) { } } - llvm_unreachable(0); + llvm_unreachable(nullptr); } -MBBInfo::MBBInfo(MachineDomTreeNode *N) : Node(N), HTScope(0) {} +MBBInfo::MBBInfo(MachineDomTreeNode *N) : Node(N), HTScope(nullptr) {} const MachineDomTreeNode *MBBInfo::getNode() const { return Node; } @@ -210,7 +212,7 @@ bool OptimizePICCall::visitNode(MBBInfo &MBBI) { for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { unsigned Reg; - const Value *Entry; + ValueType Entry; // Skip instructions that are not call instructions via registers. if (!isCallViaRegister(*I, Reg, Entry)) @@ -242,7 +244,7 @@ bool OptimizePICCall::visitNode(MBBInfo &MBBI) { } bool OptimizePICCall::isCallViaRegister(MachineInstr &MI, unsigned &Reg, - const Value *&Val) const { + ValueType &Val) const { if (!MI.isCall()) return false; @@ -254,7 +256,7 @@ bool OptimizePICCall::isCallViaRegister(MachineInstr &MI, unsigned &Reg, // Get the instruction that loads the function address from the GOT. Reg = MO->getReg(); - Val = 0; + Val = (Value*)nullptr; MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); MachineInstr *DefMI = MRI.getVRegDef(Reg); @@ -273,20 +275,22 @@ bool OptimizePICCall::isCallViaRegister(MachineInstr &MI, unsigned &Reg, // Return the underlying object for the GOT entry in Val. assert(DefMI->hasOneMemOperand()); Val = (*DefMI->memoperands_begin())->getValue(); + if (!Val) + Val = (*DefMI->memoperands_begin())->getPseudoValue(); return true; } -unsigned OptimizePICCall::getCount(const Value *Entry) { +unsigned OptimizePICCall::getCount(ValueType Entry) { return ScopedHT.lookup(Entry).first; } -unsigned OptimizePICCall::getReg(const Value *Entry) { +unsigned OptimizePICCall::getReg(ValueType Entry) { unsigned Reg = ScopedHT.lookup(Entry).second; assert(Reg); return Reg; } -void OptimizePICCall::incCntAndSetReg(const Value *Entry, unsigned Reg) { +void OptimizePICCall::incCntAndSetReg(ValueType Entry, unsigned Reg) { CntRegP P = ScopedHT.lookup(Entry); ScopedHT.insert(Entry, std::make_pair(P.first + 1, Reg)); } diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp index fe60841..7aae964 100644 --- a/lib/Target/Mips/MipsOs16.cpp +++ b/lib/Target/Mips/MipsOs16.cpp @@ -11,13 +11,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mips-os16" #include "MipsOs16.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#define DEBUG_TYPE "mips-os16" + static cl::opt Mips32FunctionMask( "mips32-function-mask", diff --git a/lib/Target/Mips/MipsOs16.h b/lib/Target/Mips/MipsOs16.h index 21beef8..55e5a81 100644 --- a/lib/Target/Mips/MipsOs16.h +++ b/lib/Target/Mips/MipsOs16.h @@ -34,11 +34,11 @@ public: } - virtual const char *getPassName() const { + const char *getPassName() const override { return "MIPS Os16 Optimization"; } - virtual bool runOnModule(Module &M); + bool runOnModule(Module &M) override; }; diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index d7fc93b..83d25ab 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mips-reg-info" - #include "MipsRegisterInfo.h" #include "Mips.h" #include "MipsAnalyzeImmediate.h" @@ -37,11 +35,13 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +using namespace llvm; + +#define DEBUG_TYPE "mips-reg-info" + #define GET_REGINFO_TARGET_DESC #include "MipsGenRegisterInfo.inc" -using namespace llvm; - MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST) : MipsGenRegisterInfo(Mips::RA), Subtarget(ST) {} @@ -79,8 +79,8 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, //===----------------------------------------------------------------------===// /// Mips Callee Saved Registers -const uint16_t* MipsRegisterInfo:: -getCalleeSavedRegs(const MachineFunction *MF) const { +const MCPhysReg * +MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (Subtarget.isSingleFloat()) return CSR_SingleFloatOnly_SaveList; @@ -119,11 +119,11 @@ const uint32_t *MipsRegisterInfo::getMips16RetHelperMask() { BitVector MipsRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { - static const uint16_t ReservedGPR32[] = { + static const MCPhysReg ReservedGPR32[] = { Mips::ZERO, Mips::K0, Mips::K1, Mips::SP }; - static const uint16_t ReservedGPR64[] = { + static const MCPhysReg ReservedGPR64[] = { Mips::ZERO_64, Mips::K0_64, Mips::K1_64, Mips::SP_64 }; diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 0450c6f..b34496f 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -43,30 +43,31 @@ public: /// Code Generation virtual methods... const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, - unsigned Kind) const; + unsigned Kind) const override; unsigned getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const; - const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const uint32_t *getCallPreservedMask(CallingConv::ID) const; + MachineFunction &MF) const override; + const MCPhysReg * + getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; + const uint32_t *getCallPreservedMask(CallingConv::ID) const override; static const uint32_t *getMips16RetHelperMask(); - BitVector getReservedRegs(const MachineFunction &MF) const; + BitVector getReservedRegs(const MachineFunction &MF) const override; - virtual bool requiresRegisterScavenging(const MachineFunction &MF) const; + bool requiresRegisterScavenging(const MachineFunction &MF) const override; - virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override; /// Stack Frame Processing Methods void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const; /// Debug information queries. - unsigned getFrameRegister(const MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const override; /// \brief Return GPR register class. virtual const TargetRegisterClass *intRegClass(unsigned Size) const = 0; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 834e6c5..875a596 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -205,6 +205,10 @@ let Namespace = "Mips" in { foreach I = 0-31 in def COP2#I : MipsReg<#I, ""#I>; + // COP3 registers. + foreach I = 0-31 in + def COP3#I : MipsReg<#I, ""#I>; + // PC register def PC : Register<"pc">; @@ -387,6 +391,10 @@ def DSPCC : RegisterClass<"Mips", [v4i8, v2i16], 32, (add DSPCCond)>; def COP2 : RegisterClass<"Mips", [i32], 32, (sequence "COP2%u", 0, 31)>, Unallocatable; +// Coprocessor 3 registers. +def COP3 : RegisterClass<"Mips", [i32], 32, (sequence "COP3%u", 0, 31)>, + Unallocatable; + // Octeon multiplier and product registers def OCTEON_MPL : RegisterClass<"Mips", [i64], 64, (add MPL0, MPL1, MPL2)>, Unallocatable; @@ -484,6 +492,10 @@ def COP2AsmOperand : MipsAsmRegOperand { let Name = "COP2AsmReg"; } +def COP3AsmOperand : MipsAsmRegOperand { + let Name = "COP3AsmReg"; +} + def HWRegsOpnd : RegisterOperand { let ParserMatchClass = HWRegsAsmOperand; } @@ -524,6 +536,10 @@ def COP2Opnd : RegisterOperand { let ParserMatchClass = COP2AsmOperand; } +def COP3Opnd : RegisterOperand { + let ParserMatchClass = COP3AsmOperand; +} + def MSA128BOpnd : RegisterOperand { let ParserMatchClass = MSA128AsmOperand; } diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 0343a47..6ad5821 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -375,7 +375,8 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const { // if framepointer enabled, set it to point to the stack pointer. if (hasFP(MF)) { // Insert instruction "move $fp, $sp" at this location. - BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO); + BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO) + .setMIFlag(MachineInstr::FrameSetup); // emit ".cfi_def_cfa_register $fp" unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h index 8fa9e46..5d2801f 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.h +++ b/lib/Target/Mips/MipsSEFrameLowering.h @@ -25,22 +25,22 @@ public: /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void emitPrologue(MachineFunction &MF) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo *TRI) const override; - bool hasReservedCallFrame(const MachineFunction &MF) const; + bool hasReservedCallFrame(const MachineFunction &MF) const override; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const; + RegScavenger *RS) const override; unsigned ehDataReg(unsigned I) const; }; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 5b20a6c..d5385be 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mips-isel" #include "MipsSEISelDAGToDAG.h" #include "MCTargetDesc/MipsBaseInfo.h" #include "Mips.h" @@ -35,6 +34,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "mips-isel" + bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { if (Subtarget.inMips16Mode()) return false; @@ -412,7 +413,7 @@ bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const { BuildVectorSDNode *Node = dyn_cast(N); - if (Node == NULL) + if (!Node) return false; APInt SplatValue, SplatUndef; @@ -813,16 +814,16 @@ std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) { EVT ViaVecTy; if (!Subtarget.hasMSA() || !BVN->getValueType(0).is128BitVector()) - return std::make_pair(false, (SDNode*)NULL); + return std::make_pair(false, nullptr); if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 8, !Subtarget.isLittle())) - return std::make_pair(false, (SDNode*)NULL); + return std::make_pair(false, nullptr); switch (SplatBitSize) { default: - return std::make_pair(false, (SDNode*)NULL); + return std::make_pair(false, nullptr); case 8: LdiOp = Mips::LDI_B; ViaVecTy = MVT::v16i8; @@ -842,7 +843,7 @@ std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) { } if (!SplatValue.isSignedIntN(10)) - return std::make_pair(false, (SDNode*)NULL); + return std::make_pair(false, nullptr); SDValue Imm = CurDAG->getTargetConstant(SplatValue, ViaVecTy.getVectorElementType()); @@ -868,7 +869,7 @@ std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) { } - return std::make_pair(false, (SDNode*)NULL); + return std::make_pair(false, nullptr); } FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) { diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h index ba84a6d..57328d2 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -25,7 +25,7 @@ public: private: - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI, MachineFunction &MF); @@ -44,66 +44,66 @@ private: bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset, unsigned OffsetBits) const; - virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base, - SDValue &Offset) const; + bool selectAddrRegImm(SDValue Addr, SDValue &Base, + SDValue &Offset) const override; - virtual bool selectAddrRegReg(SDValue Addr, SDValue &Base, - SDValue &Offset) const; + bool selectAddrRegReg(SDValue Addr, SDValue &Base, + SDValue &Offset) const override; - virtual bool selectAddrDefault(SDValue Addr, SDValue &Base, - SDValue &Offset) const; + bool selectAddrDefault(SDValue Addr, SDValue &Base, + SDValue &Offset) const override; - virtual bool selectIntAddr(SDValue Addr, SDValue &Base, - SDValue &Offset) const; + bool selectIntAddr(SDValue Addr, SDValue &Base, + SDValue &Offset) const override; - virtual bool selectAddrRegImm10(SDValue Addr, SDValue &Base, - SDValue &Offset) const; + bool selectAddrRegImm10(SDValue Addr, SDValue &Base, + SDValue &Offset) const; - virtual bool selectAddrRegImm12(SDValue Addr, SDValue &Base, - SDValue &Offset) const; + bool selectAddrRegImm12(SDValue Addr, SDValue &Base, + SDValue &Offset) const; - virtual bool selectIntAddrMM(SDValue Addr, SDValue &Base, - SDValue &Offset) const; + bool selectIntAddrMM(SDValue Addr, SDValue &Base, + SDValue &Offset) const override; - virtual bool selectIntAddrMSA(SDValue Addr, SDValue &Base, - SDValue &Offset) const; + bool selectIntAddrMSA(SDValue Addr, SDValue &Base, + SDValue &Offset) const override; /// \brief Select constant vector splats. - virtual bool selectVSplat(SDNode *N, APInt &Imm) const; + bool selectVSplat(SDNode *N, APInt &Imm) const override; /// \brief Select constant vector splats whose value fits in a given integer. - virtual bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed, + bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed, unsigned ImmBitSize) const; /// \brief Select constant vector splats whose value fits in a uimm1. - virtual bool selectVSplatUimm1(SDValue N, SDValue &Imm) const; + bool selectVSplatUimm1(SDValue N, SDValue &Imm) const override; /// \brief Select constant vector splats whose value fits in a uimm2. - virtual bool selectVSplatUimm2(SDValue N, SDValue &Imm) const; + bool selectVSplatUimm2(SDValue N, SDValue &Imm) const override; /// \brief Select constant vector splats whose value fits in a uimm3. - virtual bool selectVSplatUimm3(SDValue N, SDValue &Imm) const; + bool selectVSplatUimm3(SDValue N, SDValue &Imm) const override; /// \brief Select constant vector splats whose value fits in a uimm4. - virtual bool selectVSplatUimm4(SDValue N, SDValue &Imm) const; + bool selectVSplatUimm4(SDValue N, SDValue &Imm) const override; /// \brief Select constant vector splats whose value fits in a uimm5. - virtual bool selectVSplatUimm5(SDValue N, SDValue &Imm) const; + bool selectVSplatUimm5(SDValue N, SDValue &Imm) const override; /// \brief Select constant vector splats whose value fits in a uimm6. - virtual bool selectVSplatUimm6(SDValue N, SDValue &Imm) const; + bool selectVSplatUimm6(SDValue N, SDValue &Imm) const override; /// \brief Select constant vector splats whose value fits in a uimm8. - virtual bool selectVSplatUimm8(SDValue N, SDValue &Imm) const; + bool selectVSplatUimm8(SDValue N, SDValue &Imm) const override; /// \brief Select constant vector splats whose value fits in a simm5. - virtual bool selectVSplatSimm5(SDValue N, SDValue &Imm) const; + bool selectVSplatSimm5(SDValue N, SDValue &Imm) const override; /// \brief Select constant vector splats whose value is a power of 2. - virtual bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const; + bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const override; /// \brief Select constant vector splats whose value is the inverse of a /// power of 2. - virtual bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const; + bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const override; /// \brief Select constant vector splats whose value is a run of set bits /// ending at the most significant bit - virtual bool selectVSplatMaskL(SDValue N, SDValue &Imm) const; + bool selectVSplatMaskL(SDValue N, SDValue &Imm) const override; /// \brief Select constant vector splats whose value is a run of set bits /// starting at bit zero. - virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const; + bool selectVSplatMaskR(SDValue N, SDValue &Imm) const override; - virtual std::pair selectNode(SDNode *Node); + std::pair selectNode(SDNode *Node) override; - virtual void processFunctionAfterISel(MachineFunction &MF); + void processFunctionAfterISel(MachineFunction &MF) override; // Insert instructions to initialize the global base register in the // first MBB of the function. diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 0dac0b7..969d730 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -10,7 +10,6 @@ // Subclass of MipsTargetLowering specialized for mips32/64. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mips-isel" #include "MipsSEISelLowering.h" #include "MipsRegisterInfo.h" #include "MipsTargetMachine.h" @@ -24,6 +23,8 @@ using namespace llvm; +#define DEBUG_TYPE "mips-isel" + static cl::opt EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden, cl::desc("MIPS: Enable tail calls."), cl::init(false)); @@ -119,10 +120,10 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) if (Subtarget->hasCnMips()) setOperationAction(ISD::MUL, MVT::i64, Legal); - else if (hasMips64()) + else if (isGP64bit()) setOperationAction(ISD::MUL, MVT::i64, Custom); - if (hasMips64()) { + if (isGP64bit()) { setOperationAction(ISD::MULHS, MVT::i64, Custom); setOperationAction(ISD::MULHU, MVT::i64, Custom); } @@ -253,6 +254,16 @@ MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; + if (Subtarget->systemSupportsUnalignedAccess()) { + // MIPS32r6/MIPS64r6 is required to support unaligned access. It's + // implementation defined whether this is handled by hardware, software, or + // a hybrid of the two but it's expected that most implementations will + // handle the majority of cases in hardware. + if (Fast) + *Fast = true; + return true; + } + switch (SVT) { case MVT::i64: case MVT::i32: @@ -487,7 +498,8 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, Log2 == ExtendTySize) { SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; DAG.MorphNodeTo(Op0.getNode(), MipsISD::VEXTRACT_ZEXT_ELT, - Op0->getVTList(), Ops, Op0->getNumOperands()); + Op0->getVTList(), + makeArrayRef(Ops, Op0->getNumOperands())); return Op0; } } @@ -507,7 +519,7 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { BuildVectorSDNode *Node = dyn_cast(N.getNode()); - if (Node == NULL) + if (!Node) return false; APInt SplatValue, SplatUndef; @@ -831,7 +843,8 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), Op0Op0->getOperand(2) }; DAG.MorphNodeTo(Op0Op0.getNode(), MipsISD::VEXTRACT_SEXT_ELT, - Op0Op0->getVTList(), Ops, Op0Op0->getNumOperands()); + Op0Op0->getVTList(), + makeArrayRef(Ops, Op0Op0->getNumOperands())); return Op0Op0; } } @@ -1051,6 +1064,18 @@ MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return emitINSERT_FW(MI, BB); case Mips::INSERT_FD_PSEUDO: return emitINSERT_FD(MI, BB); + case Mips::INSERT_B_VIDX_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 1, false); + case Mips::INSERT_H_VIDX_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 2, false); + case Mips::INSERT_W_VIDX_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 4, false); + case Mips::INSERT_D_VIDX_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 8, false); + case Mips::INSERT_FW_VIDX_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 4, true); + case Mips::INSERT_FD_VIDX_PSEUDO: + return emitINSERT_DF_VIDX(MI, BB, 8, true); case Mips::FILL_FW_PSEUDO: return emitFILL_FW(MI, BB); case Mips::FILL_FD_PSEUDO: @@ -1117,7 +1142,7 @@ SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); SDValue Ops[2] = {BP, Hi.getValue(1)}; - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { @@ -1168,7 +1193,7 @@ SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, return HasLo ? Lo : Hi; SDValue Vals[] = { Lo, Hi }; - return DAG.getMergeValues(Vals, 2, DL); + return DAG.getMergeValues(Vals, DL); } @@ -1235,7 +1260,7 @@ static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I); // Create node. - SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size()); + SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops); SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; if (!HasChainIn) @@ -1243,7 +1268,7 @@ static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { assert(Val->getValueType(1) == MVT::Other); SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; - return DAG.getMergeValues(Vals, 2, DL); + return DAG.getMergeValues(Vals, DL); } // Lower an MSA copy intrinsic into the specified SelectionDAG node @@ -1280,8 +1305,8 @@ static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; - SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, Ops, - ViaVecTy.getVectorNumElements()); + SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, + makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); if (ViaVecTy != ResVecTy) Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, Result); @@ -1320,8 +1345,8 @@ static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, SplatValueA, SplatValueB, SplatValueA, SplatValueB, SplatValueA, SplatValueB, SplatValueA, SplatValueB }; - SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, Ops, - ViaVecTy.getVectorNumElements()); + SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, + makeArrayRef(Ops, ViaVecTy.getVectorNumElements())); if (VecTy != ViaVecTy) Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); @@ -1355,7 +1380,7 @@ static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, } } - if (Exp2Imm.getNode() == NULL) { + if (!Exp2Imm.getNode()) { // We couldnt constant fold, do a vector shift instead // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since @@ -1735,7 +1760,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, // If ResTy is v2i64 then the type legalizer will break this node down into // an equivalent v4i32. - return DAG.getNode(ISD::BUILD_VECTOR, DL, ResTy, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, DL, ResTy, Ops); } case Intrinsic::mips_fexp2_w: case Intrinsic::mips_fexp2_d: { @@ -2560,8 +2585,7 @@ static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, ++I) Ops.push_back(DAG.getTargetConstant(*I, MaskEltTy)); - SDValue MaskVec = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecTy, &Ops[0], - Ops.size()); + SDValue MaskVec = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecTy, Ops); if (Using1stVec && Using2ndVec) { Op0 = Op->getOperand(0); @@ -2885,6 +2909,131 @@ MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI, return BB; } +// Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. +// +// For integer: +// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) +// => +// (SLL $lanetmp1, $lane, +// (SUBREG_TO_REG $wt, $fs, ) +// (SLL $lanetmp1, $lane, getParent()->getRegInfo(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Wd = MI->getOperand(0).getReg(); + unsigned SrcVecReg = MI->getOperand(1).getReg(); + unsigned LaneReg = MI->getOperand(2).getReg(); + unsigned SrcValReg = MI->getOperand(3).getReg(); + + const TargetRegisterClass *VecRC = nullptr; + const TargetRegisterClass *GPRRC = isGP64bit() ? &Mips::GPR64RegClass + : &Mips::GPR32RegClass; + unsigned EltLog2Size; + unsigned InsertOp = 0; + unsigned InsveOp = 0; + switch (EltSizeInBytes) { + default: + llvm_unreachable("Unexpected size"); + case 1: + EltLog2Size = 0; + InsertOp = Mips::INSERT_B; + InsveOp = Mips::INSVE_B; + VecRC = &Mips::MSA128BRegClass; + break; + case 2: + EltLog2Size = 1; + InsertOp = Mips::INSERT_H; + InsveOp = Mips::INSVE_H; + VecRC = &Mips::MSA128HRegClass; + break; + case 4: + EltLog2Size = 2; + InsertOp = Mips::INSERT_W; + InsveOp = Mips::INSVE_W; + VecRC = &Mips::MSA128WRegClass; + break; + case 8: + EltLog2Size = 3; + InsertOp = Mips::INSERT_D; + InsveOp = Mips::INSVE_D; + VecRC = &Mips::MSA128DRegClass; + break; + } + + if (IsFP) { + unsigned Wt = RegInfo.createVirtualRegister(VecRC); + BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) + .addImm(0) + .addReg(SrcValReg) + .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo); + SrcValReg = Wt; + } + + // Convert the lane index into a byte index + if (EltSizeInBytes != 1) { + unsigned LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); + BuildMI(*BB, MI, DL, TII->get(Mips::SLL), LaneTmp1) + .addReg(LaneReg) + .addImm(EltLog2Size); + LaneReg = LaneTmp1; + } + + // Rotate bytes around so that the desired lane is element zero + unsigned WdTmp1 = RegInfo.createVirtualRegister(VecRC); + BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1) + .addReg(SrcVecReg) + .addReg(SrcVecReg) + .addReg(LaneReg); + + unsigned WdTmp2 = RegInfo.createVirtualRegister(VecRC); + if (IsFP) { + // Use insve.df to insert to element zero + BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2) + .addReg(WdTmp1) + .addImm(0) + .addReg(SrcValReg) + .addImm(0); + } else { + // Use insert.df to insert to element zero + BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2) + .addReg(WdTmp1) + .addReg(SrcValReg) + .addImm(0); + } + + // Rotate elements the rest of the way for a full rotation. + // sld.df inteprets $rt modulo the number of columns so we only need to negate + // the lane index to do this. + unsigned LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); + BuildMI(*BB, MI, DL, TII->get(Mips::SUB), LaneTmp2) + .addReg(Mips::ZERO) + .addReg(LaneReg); + BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd) + .addReg(WdTmp2) + .addReg(WdTmp2) + .addReg(LaneTmp2); + + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + // Emit the FILL_FW pseudo instruction. // // fill_fw_pseudo $wd, $fs diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index 079fbf6..03a20ef 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -30,22 +30,23 @@ namespace llvm { void addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); - virtual bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS = 0, - bool *Fast = 0) const override; + bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS = 0, + bool *Fast = nullptr) const override; - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - virtual MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const override; - virtual bool isShuffleMaskLegal(const SmallVectorImpl &Mask, - EVT VT) const { + bool isShuffleMaskLegal(const SmallVectorImpl &Mask, + EVT VT) const override { return false; } - virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { + const TargetRegisterClass *getRepRegClassFor(MVT VT) const override { if (VT == MVT::Untyped) return Subtarget->hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass; @@ -54,16 +55,16 @@ namespace llvm { } private: - virtual bool - isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, - unsigned NextStackOffset, - const MipsFunctionInfo& FI) const; + bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, + unsigned NextStackOffset, + const MipsFunctionInfo& FI) const override; - virtual void + void getOpndList(SmallVectorImpl &Ops, std::deque< std::pair > &RegsToPass, bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, - CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const; + CallLoweringInfo &CLI, SDValue Callee, + SDValue Chain) const override; SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const; @@ -97,6 +98,11 @@ namespace llvm { /// \brief Emit the INSERT_FD pseudo instruction MachineBasicBlock *emitINSERT_FD(MachineInstr *MI, MachineBasicBlock *BB) const; + /// \brief Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction + MachineBasicBlock *emitINSERT_DF_VIDX(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned EltSizeInBytes, + bool IsFP) const; /// \brief Emit the FILL_FW pseudo instruction MachineBasicBlock *emitFILL_FW(MachineInstr *MI, MachineBasicBlock *BB) const; diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index 094ee29..f6f364f 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -368,7 +368,7 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount, if (isInt<16>(Amount))// addi sp, sp, amount BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount); else { // Expand immediate that doesn't fit in 16-bit. - unsigned Reg = loadImmediate(Amount, MBB, I, DL, 0); + unsigned Reg = loadImmediate(Amount, MBB, I, DL, nullptr); BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(Reg, RegState::Kill); } } diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index 6d2dd90..aa68552 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -26,46 +26,46 @@ class MipsSEInstrInfo : public MipsInstrInfo { public: explicit MipsSEInstrInfo(MipsTargetMachine &TM); - virtual const MipsRegisterInfo &getRegisterInfo() const; + const MipsRegisterInfo &getRegisterInfo() const override; /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than loading from the stack slot. - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than storing to the stack slot. - virtual unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; + unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; - virtual void storeRegToStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, - int64_t Offset) const; + void storeRegToStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const override; - virtual void loadRegFromStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, - int64_t Offset) const; + void loadRegFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI, + int64_t Offset) const override; - virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; - virtual unsigned getOppositeBranchOpc(unsigned Opc) const; + unsigned getOppositeBranchOpc(unsigned Opc) const override; /// Adjust SP by Amount bytes. void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB, @@ -79,7 +79,7 @@ public: unsigned *NewImm) const; private: - virtual unsigned getAnalyzableBrOpc(unsigned Opc) const; + unsigned getAnalyzableBrOpc(unsigned Opc) const override; void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opc) const; diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp index 2ac082f..0af1a6b 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.cpp +++ b/lib/Target/Mips/MipsSERegisterInfo.cpp @@ -39,6 +39,8 @@ using namespace llvm; +#define DEBUG_TYPE "mips-reg-info" + MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST) : MipsRegisterInfo(ST) {} @@ -187,7 +189,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II, *static_cast( MBB.getParent()->getTarget().getInstrInfo()); unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL, - OffsetBitSize == 16 ? &NewImm : NULL); + OffsetBitSize == 16 ? &NewImm : nullptr); BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg) .addReg(Reg, RegState::Kill); diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h index 76cdd9d..f2f3a7e 100644 --- a/lib/Target/Mips/MipsSERegisterInfo.h +++ b/lib/Target/Mips/MipsSERegisterInfo.h @@ -24,16 +24,16 @@ class MipsSERegisterInfo : public MipsRegisterInfo { public: MipsSERegisterInfo(const MipsSubtarget &Subtarget); - bool requiresRegisterScavenging(const MachineFunction &MF) const; + bool requiresRegisterScavenging(const MachineFunction &MF) const override; - bool requiresFrameIndexScavenging(const MachineFunction &MF) const; + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; - virtual const TargetRegisterClass *intRegClass(unsigned Size) const; + const TargetRegisterClass *intRegClass(unsigned Size) const override; private: - virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, - int FrameIndex, uint64_t StackSize, - int64_t SPOffset) const; + void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo, + int FrameIndex, uint64_t StackSize, + int64_t SPOffset) const override; }; } // end namespace llvm diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/lib/Target/Mips/MipsSelectionDAGInfo.cpp index e4d70fc..0d4398e 100644 --- a/lib/Target/Mips/MipsSelectionDAGInfo.cpp +++ b/lib/Target/Mips/MipsSelectionDAGInfo.cpp @@ -11,10 +11,11 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mips-selectiondag-info" #include "MipsTargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "mips-selectiondag-info" + MipsSelectionDAGInfo::MipsSelectionDAGInfo(const MipsTargetMachine &TM) : TargetSelectionDAGInfo(TM) { } diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 143b945..74ec064 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mips-subtarget" - #include "MipsMachineFunction.h" #include "Mips.h" #include "MipsRegisterInfo.h" @@ -25,13 +23,14 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "mips-subtarget" + #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "MipsGenSubtargetInfo.inc" - -using namespace llvm; - // FIXME: Maybe this should be on by default when Mips16 is specified // static cl::opt Mixed16_32( @@ -77,17 +76,16 @@ void MipsSubtarget::anchor() { } MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool little, - Reloc::Model _RM, MipsTargetMachine *_TM) : - MipsGenSubtargetInfo(TT, CPU, FS), - MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), - IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), - HasCnMips(false), IsLinux(true), HasSEInReg(false), HasCondMov(false), - HasSwap(false), HasBitCount(false), HasFPIdx(false), - InMips16Mode(false), InMips16HardFloat(Mips16HardFloat), - InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), - AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false), - RM(_RM), OverrideMode(NoOverride), TM(_TM), TargetTriple(TT) -{ + Reloc::Model _RM, MipsTargetMachine *_TM) + : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32), + MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false), + IsFP64bit(false), IsNaN2008bit(false), IsGP64bit(false), HasVFPU(false), + HasCnMips(false), IsLinux(true), HasMips3_32(false), HasMips3_32r2(false), + HasMips4_32(false), HasMips4_32r2(false), HasMips5_32r2(false), + InMips16Mode(false), InMips16HardFloat(Mips16HardFloat), + InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), + AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false), + RM(_RM), OverrideMode(NoOverride), TM(_TM), TargetTriple(TT) { std::string CPUName = CPU; CPUName = selectMipsCPU(TT, CPUName); @@ -109,6 +107,19 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUName); + // Don't even attempt to generate code for MIPS-I, MIPS-II, MIPS-III, and + // MIPS-V. They have not been tested and currently exist for the integrated + // assembler only. + if (MipsArchVersion == Mips1) + report_fatal_error("Code generation for MIPS-I is not implemented", false); + if (MipsArchVersion == Mips2) + report_fatal_error("Code generation for MIPS-II is not implemented", false); + if (MipsArchVersion == Mips3) + report_fatal_error("Code generation for MIPS-III is not implemented", + false); + if (MipsArchVersion == Mips5) + report_fatal_error("Code generation for MIPS-V is not implemented", false); + // Assert exactly one ABI was chosen. assert(MipsABI != UnknownABI); assert((((getFeatureBits() & Mips::FeatureO32) != 0) + @@ -126,15 +137,23 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, "See -mattr=+fp64.", false); + if (hasMips32r6()) { + StringRef ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6"; + + assert(isFP64bit()); + assert(isNaN2008()); + if (hasDSP()) + report_fatal_error(ISA + " is not compatible with the DSP ASE", false); + } + // Is the target system Linux ? if (TT.find("linux") == std::string::npos) IsLinux = false; // Set UseSmallSection. + // TODO: Investigate the IsLinux check. I suspect it's really checking for + // bare-metal. UseSmallSection = !IsLinux && (RM == Reloc::Static); - // set some subtarget specific features - if (inMips16Mode()) - HasBitCount=false; } bool diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 2166b93..373f481 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -37,7 +37,10 @@ public: }; protected: - enum MipsArchEnum { Mips32, Mips32r2, Mips4, Mips64, Mips64r2 }; + enum MipsArchEnum { + Mips1, Mips2, Mips32, Mips32r2, Mips32r6, Mips3, Mips4, Mips5, Mips64, + Mips64r2, Mips64r6 + }; // Mips architecture version MipsArchEnum MipsArchVersion; @@ -56,6 +59,9 @@ protected: // IsFP64bit - The target processor has 64-bit floating point registers. bool IsFP64bit; + // IsNan2008 - IEEE 754-2008 NaN encoding. + bool IsNaN2008bit; + // IsFP64bit - General-purpose registers are 64 bits wide bool IsGP64bit; @@ -73,20 +79,20 @@ protected: /// Features related to the presence of specific instructions. - // HasSEInReg - SEB and SEH (signext in register) instructions. - bool HasSEInReg; + // HasMips3_32 - The subset of MIPS-III instructions added to MIPS32 + bool HasMips3_32; - // HasCondMov - Conditional mov (MOVZ, MOVN) instructions. - bool HasCondMov; + // HasMips3_32r2 - The subset of MIPS-III instructions added to MIPS32r2 + bool HasMips3_32r2; - // HasSwap - Byte and half swap instructions. - bool HasSwap; + // HasMips4_32 - Has the subset of MIPS-IV present in MIPS32 + bool HasMips4_32; - // HasBitCount - Count leading '1' and '0' bits. - bool HasBitCount; + // HasMips4_32r2 - Has the subset of MIPS-IV present in MIPS32r2 + bool HasMips4_32r2; - // HasFPIdx -- Floating point indexed load/store instructions. - bool HasFPIdx; + // HasMips5_32r2 - Has the subset of MIPS-V present in MIPS32r2 + bool HasMips5_32r2; // InMips16 -- can process Mips16 instructions bool InMips16Mode; @@ -127,9 +133,9 @@ protected: Triple TargetTriple; public: - virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const; + bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const override; /// Only O32 and EABI supported right now. bool isABI_EABI() const { return MipsABI == EABI; } @@ -148,16 +154,24 @@ public: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + bool hasMips2() const { return MipsArchVersion >= Mips2; } + bool hasMips3() const { return MipsArchVersion >= Mips3; } + bool hasMips4_32() const { return HasMips4_32; } + bool hasMips4_32r2() const { return HasMips4_32r2; } bool hasMips32() const { return MipsArchVersion >= Mips32; } bool hasMips32r2() const { return MipsArchVersion == Mips32r2 || MipsArchVersion == Mips64r2; } + bool hasMips32r6() const { return MipsArchVersion == Mips32r6 || + MipsArchVersion == Mips64r6; } bool hasMips64() const { return MipsArchVersion >= Mips64; } bool hasMips64r2() const { return MipsArchVersion == Mips64r2; } + bool hasMips64r6() const { return MipsArchVersion == Mips64r6; } bool hasCnMips() const { return HasCnMips; } bool isLittle() const { return IsLittle; } bool isFP64bit() const { return IsFP64bit; } + bool isNaN2008() const { return IsNaN2008bit; } bool isNotFP64bit() const { return !IsFP64bit; } bool isGP64bit() const { return IsGP64bit; } bool isGP32bit() const { return !IsGP64bit; } @@ -197,11 +211,6 @@ public: } /// Features related to the presence of specific instructions. - bool hasSEInReg() const { return HasSEInReg; } - bool hasCondMov() const { return HasCondMov; } - bool hasSwap() const { return HasSwap; } - bool hasBitCount() const { return HasBitCount; } - bool hasFPIdx() const { return HasFPIdx; } bool hasExtractInsert() const { return !inMips16Mode() && hasMips32r2(); } const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } @@ -213,10 +222,9 @@ public: bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } bool isNotTargetNaCl() const { return !TargetTriple.isOSNaCl(); } -// for now constant islands are on for the whole compilation unit but we only -// really use them if in addition we are in mips16 mode -// -static bool useConstantIslands(); + // for now constant islands are on for the whole compilation unit but we only + // really use them if in addition we are in mips16 mode + static bool useConstantIslands(); unsigned stackAlignment() const { return hasMips64() ? 16 : 8; } @@ -226,7 +234,12 @@ static bool useConstantIslands(); /// \brief Reset the subtarget for the Mips target. void resetSubtarget(MachineFunction *MF); - + /// Does the system support unaligned memory access. + /// + /// MIPS32r6/MIPS64r6 require full unaligned access support but does not + /// specify which component of the system provides it. Hardware, software, and + /// hybrid implementations are all valid. + bool systemSupportsUnalignedAccess() const { return hasMips32r6(); } }; } // End llvm namespace diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index e9053c8..984c58e 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -35,7 +35,7 @@ #include "llvm/Transforms/Scalar.h" using namespace llvm; - +#define DEBUG_TYPE "mips" extern "C" void LLVMInitializeMipsTarget() { // Register the target. @@ -171,12 +171,12 @@ public: return *getMipsTargetMachine().getSubtargetImpl(); } - virtual void addIRPasses(); - virtual bool addInstSelector(); - virtual void addMachineSSAOptimization(); - virtual bool addPreEmitPass(); + void addIRPasses() override; + bool addInstSelector() override; + void addMachineSSAOptimization() override; + bool addPreEmitPass() override; - virtual bool addPreRegAlloc(); + bool addPreRegAlloc() override; }; } // namespace diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 5a9a11d..a5aa39b 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -20,7 +20,6 @@ #include "MipsJITInfo.h" #include "MipsSelectionDAGInfo.h" #include "MipsSubtarget.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/IR/DataLayout.h" @@ -34,15 +33,15 @@ class MipsRegisterInfo; class MipsTargetMachine : public LLVMTargetMachine { MipsSubtarget Subtarget; const DataLayout DL; // Calculates type size & alignment - OwningPtr InstrInfo; - OwningPtr FrameLowering; - OwningPtr TLInfo; - OwningPtr InstrInfo16; - OwningPtr FrameLowering16; - OwningPtr TLInfo16; - OwningPtr InstrInfoSE; - OwningPtr FrameLoweringSE; - OwningPtr TLInfoSE; + std::unique_ptr InstrInfo; + std::unique_ptr FrameLowering; + std::unique_ptr TLInfo; + std::unique_ptr InstrInfo16; + std::unique_ptr FrameLowering16; + std::unique_ptr TLInfo16; + std::unique_ptr InstrInfoSE; + std::unique_ptr FrameLoweringSE; + std::unique_ptr TLInfoSE; MipsSelectionDAGInfo TSInfo; const InstrItineraryData &InstrItins; MipsJITInfo JITInfo; @@ -56,39 +55,38 @@ public: virtual ~MipsTargetMachine() {} - virtual void addAnalysisPasses(PassManagerBase &PM); + void addAnalysisPasses(PassManagerBase &PM) override; - virtual const MipsInstrInfo *getInstrInfo() const + const MipsInstrInfo *getInstrInfo() const override { return InstrInfo.get(); } - virtual const TargetFrameLowering *getFrameLowering() const + const TargetFrameLowering *getFrameLowering() const override { return FrameLowering.get(); } - virtual const MipsSubtarget *getSubtargetImpl() const + const MipsSubtarget *getSubtargetImpl() const override { return &Subtarget; } - virtual const DataLayout *getDataLayout() const + const DataLayout *getDataLayout() const override { return &DL;} - virtual const InstrItineraryData *getInstrItineraryData() const { - return Subtarget.inMips16Mode() ? 0 : &InstrItins; + const InstrItineraryData *getInstrItineraryData() const override { + return Subtarget.inMips16Mode() ? nullptr : &InstrItins; } - virtual MipsJITInfo *getJITInfo() - { return &JITInfo; } + MipsJITInfo *getJITInfo() override { return &JITInfo; } - virtual const MipsRegisterInfo *getRegisterInfo() const { + const MipsRegisterInfo *getRegisterInfo() const override { return &InstrInfo->getRegisterInfo(); } - virtual const MipsTargetLowering *getTargetLowering() const { + const MipsTargetLowering *getTargetLowering() const override { return TLInfo.get(); } - virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const { + const MipsSelectionDAGInfo* getSelectionDAGInfo() const override { return &TSInfo; } // Pass Pipeline Configuration - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); - virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override; // Set helper classes void setHelperClassesMips16(); diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h index 5f4b74b..4ad37ac 100644 --- a/lib/Target/Mips/MipsTargetStreamer.h +++ b/lib/Target/Mips/MipsTargetStreamer.h @@ -34,6 +34,8 @@ public: virtual void emitDirectiveEnt(const MCSymbol &Symbol) = 0; virtual void emitDirectiveAbiCalls() = 0; + virtual void emitDirectiveNaN2008() = 0; + virtual void emitDirectiveNaNLegacy() = 0; virtual void emitDirectiveOptionPic0() = 0; virtual void emitDirectiveOptionPic2() = 0; virtual void emitFrame(unsigned StackReg, unsigned StackSize, @@ -45,6 +47,11 @@ public: virtual void emitDirectiveSetMips64() = 0; virtual void emitDirectiveSetMips64R2() = 0; virtual void emitDirectiveSetDsp() = 0; + + // PIC support + virtual void emitDirectiveCpload(unsigned RegNo) = 0; + virtual void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, + const MCSymbol &Sym, bool IsReg) = 0; }; // This part is for ascii assembly output @@ -53,32 +60,39 @@ class MipsTargetAsmStreamer : public MipsTargetStreamer { public: MipsTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); - virtual void emitDirectiveSetMicroMips(); - virtual void emitDirectiveSetNoMicroMips(); - virtual void emitDirectiveSetMips16(); - virtual void emitDirectiveSetNoMips16(); - - virtual void emitDirectiveSetReorder(); - virtual void emitDirectiveSetNoReorder(); - virtual void emitDirectiveSetMacro(); - virtual void emitDirectiveSetNoMacro(); - virtual void emitDirectiveSetAt(); - virtual void emitDirectiveSetNoAt(); - virtual void emitDirectiveEnd(StringRef Name); - - virtual void emitDirectiveEnt(const MCSymbol &Symbol); - virtual void emitDirectiveAbiCalls(); - virtual void emitDirectiveOptionPic0(); - virtual void emitDirectiveOptionPic2(); - virtual void emitFrame(unsigned StackReg, unsigned StackSize, - unsigned ReturnReg); - virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff); - virtual void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff); - - virtual void emitDirectiveSetMips32R2(); - virtual void emitDirectiveSetMips64(); - virtual void emitDirectiveSetMips64R2(); - virtual void emitDirectiveSetDsp(); + void emitDirectiveSetMicroMips() override; + void emitDirectiveSetNoMicroMips() override; + void emitDirectiveSetMips16() override; + void emitDirectiveSetNoMips16() override; + + void emitDirectiveSetReorder() override; + void emitDirectiveSetNoReorder() override; + void emitDirectiveSetMacro() override; + void emitDirectiveSetNoMacro() override; + void emitDirectiveSetAt() override; + void emitDirectiveSetNoAt() override; + void emitDirectiveEnd(StringRef Name) override; + + void emitDirectiveEnt(const MCSymbol &Symbol) override; + void emitDirectiveAbiCalls() override; + void emitDirectiveNaN2008() override; + void emitDirectiveNaNLegacy() override; + void emitDirectiveOptionPic0() override; + void emitDirectiveOptionPic2() override; + void emitFrame(unsigned StackReg, unsigned StackSize, + unsigned ReturnReg) override; + void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override; + void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) override; + + void emitDirectiveSetMips32R2() override; + void emitDirectiveSetMips64() override; + void emitDirectiveSetMips64R2() override; + void emitDirectiveSetDsp() override; + + // PIC support + virtual void emitDirectiveCpload(unsigned RegNo); + void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, + const MCSymbol &Sym, bool IsReg) override; }; // This part is for ELF object output @@ -92,36 +106,48 @@ public: MCELFStreamer &getStreamer(); MipsTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI); - virtual void emitLabel(MCSymbol *Symbol) override; - virtual void emitAssignment(MCSymbol *Symbol, const MCExpr *Value) override; + void emitLabel(MCSymbol *Symbol) override; + void emitAssignment(MCSymbol *Symbol, const MCExpr *Value) override; void finish() override; - virtual void emitDirectiveSetMicroMips(); - virtual void emitDirectiveSetNoMicroMips(); - virtual void emitDirectiveSetMips16(); - virtual void emitDirectiveSetNoMips16(); - - virtual void emitDirectiveSetReorder(); - virtual void emitDirectiveSetNoReorder(); - virtual void emitDirectiveSetMacro(); - virtual void emitDirectiveSetNoMacro(); - virtual void emitDirectiveSetAt(); - virtual void emitDirectiveSetNoAt(); - virtual void emitDirectiveEnd(StringRef Name); - - virtual void emitDirectiveEnt(const MCSymbol &Symbol); - virtual void emitDirectiveAbiCalls(); - virtual void emitDirectiveOptionPic0(); - virtual void emitDirectiveOptionPic2(); - virtual void emitFrame(unsigned StackReg, unsigned StackSize, - unsigned ReturnReg); - virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff); - virtual void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff); - - virtual void emitDirectiveSetMips32R2(); - virtual void emitDirectiveSetMips64(); - virtual void emitDirectiveSetMips64R2(); - virtual void emitDirectiveSetDsp(); + void emitDirectiveSetMicroMips() override; + void emitDirectiveSetNoMicroMips() override; + void emitDirectiveSetMips16() override; + void emitDirectiveSetNoMips16() override; + + void emitDirectiveSetReorder() override; + void emitDirectiveSetNoReorder() override; + void emitDirectiveSetMacro() override; + void emitDirectiveSetNoMacro() override; + void emitDirectiveSetAt() override; + void emitDirectiveSetNoAt() override; + void emitDirectiveEnd(StringRef Name) override; + + void emitDirectiveEnt(const MCSymbol &Symbol) override; + void emitDirectiveAbiCalls() override; + void emitDirectiveNaN2008() override; + void emitDirectiveNaNLegacy() override; + void emitDirectiveOptionPic0() override; + void emitDirectiveOptionPic2() override; + void emitFrame(unsigned StackReg, unsigned StackSize, + unsigned ReturnReg) override; + void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override; + void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) override; + + void emitDirectiveSetMips32R2() override; + void emitDirectiveSetMips64() override; + void emitDirectiveSetMips64R2() override; + void emitDirectiveSetDsp() override; + + // PIC support + virtual void emitDirectiveCpload(unsigned RegNo); + void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, + const MCSymbol &Sym, bool IsReg) override; + +protected: + bool isO32() const { return STI.getFeatureBits() & Mips::FeatureO32; } + bool isN32() const { return STI.getFeatureBits() & Mips::FeatureN32; } + bool isN64() const { return STI.getFeatureBits() & Mips::FeatureN64; } }; } #endif diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index 029118a..4e35b18 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -9,6 +9,7 @@ tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(NVPTXCommonTableGen) set(NVPTXCodeGen_sources + NVPTXFavorNonGenericAddrSpaces.cpp NVPTXFrameLowering.cpp NVPTXInstrInfo.cpp NVPTXISelDAGToDAG.cpp @@ -26,6 +27,8 @@ set(NVPTXCodeGen_sources NVPTXAssignValidGlobalNames.cpp NVPTXPrologEpilogPass.cpp NVPTXMCExpr.cpp + NVPTXReplaceImageHandles.cpp + NVPTXImageOptimizer.cpp ) add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp index cf165be..9618896 100644 --- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "InstPrinter/NVPTXInstPrinter.h" #include "MCTargetDesc/NVPTXBaseInfo.h" #include "NVPTX.h" @@ -25,6 +24,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "asm-printer" + #include "NVPTXGenAsmWriter.inc" diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h index 93029ae..1fb3c57 100644 --- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h +++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h @@ -27,8 +27,8 @@ public: NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI); - virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot); + void printRegName(raw_ostream &OS, unsigned RegNo) const override; + void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); @@ -37,15 +37,15 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printCvtMode(const MCInst *MI, int OpNum, raw_ostream &O, - const char *Modifier = 0); + const char *Modifier = nullptr); void printCmpMode(const MCInst *MI, int OpNum, raw_ostream &O, - const char *Modifier = 0); + const char *Modifier = nullptr); void printLdStCode(const MCInst *MI, int OpNum, - raw_ostream &O, const char *Modifier = 0); + raw_ostream &O, const char *Modifier = nullptr); void printMemOperand(const MCInst *MI, int OpNum, - raw_ostream &O, const char *Modifier = 0); + raw_ostream &O, const char *Modifier = nullptr); void printProtoIdent(const MCInst *MI, int OpNum, - raw_ostream &O, const char *Modifier = 0); + raw_ostream &O, const char *Modifier = nullptr); }; } diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h index edf4a80..ddb122f 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h @@ -43,14 +43,16 @@ enum PropertyAnnotation { PROPERTY_ISSAMPLER, PROPERTY_ISREADONLY_IMAGE_PARAM, PROPERTY_ISWRITEONLY_IMAGE_PARAM, + PROPERTY_ISREADWRITE_IMAGE_PARAM, PROPERTY_ISKERNEL_FUNCTION, PROPERTY_ALIGN, + PROPERTY_MANAGED, // last property PROPERTY_LAST }; -const unsigned AnnotationNameLen = 8; // length of each annotation name +const unsigned AnnotationNameLen = 9; // length of each annotation name const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = { "maxntidx", // PROPERTY_MAXNTID_X "maxntidy", // PROPERTY_MAXNTID_Y @@ -64,8 +66,10 @@ const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = { "sampler", // PROPERTY_ISSAMPLER "rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM "wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM + "rdwrimage", // PROPERTY_ISREADWRITE_IMAGE_PARAM "kernel", // PROPERTY_ISKERNEL_FUNCTION "align", // PROPERTY_ALIGN + "managed", // PROPERTY_MANAGED // last property "proplast", // PROPERTY_LAST diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp index 3cf6e4b..158ca90 100644 --- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp +++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp @@ -20,6 +20,8 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + #define GET_INSTRINFO_MC_DESC #include "NVPTXGenInstrInfo.inc" @@ -29,8 +31,6 @@ #define GET_REGINFO_MC_DESC #include "NVPTXGenRegisterInfo.inc" -using namespace llvm; - static MCInstrInfo *createNVPTXMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitNVPTXMCInstrInfo(X); @@ -66,7 +66,7 @@ static MCInstPrinter *createNVPTXMCInstPrinter(const Target &T, const MCSubtargetInfo &STI) { if (SyntaxVariant == 0) return new NVPTXInstPrinter(MAI, MII, MRI, STI); - return 0; + return nullptr; } // Force static initialization. diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index 8cbdd47..e74c808 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -63,9 +63,12 @@ FunctionPass * createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); ModulePass *createNVPTXAssignValidGlobalNamesPass(); ModulePass *createGenericToNVVMPass(); +FunctionPass *createNVPTXFavorNonGenericAddrSpacesPass(); ModulePass *createNVVMReflectPass(); ModulePass *createNVVMReflectPass(const StringMap& Mapping); MachineFunctionPass *createNVPTXPrologEpilogPass(); +MachineFunctionPass *createNVPTXReplaceImageHandlesPass(); +FunctionPass *createNVPTXImageOptimizerPass(); bool isImageOrSamplerVal(const Value *, const Module *); diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h index 22404b7..5b61068 100644 --- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h +++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h @@ -30,17 +30,17 @@ public: static char ID; // Pass ID NVPTXAllocaHoisting() : FunctionPass(ID) {} - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved("stack-protector"); AU.addPreserved(); } - virtual const char *getPassName() const { + const char *getPassName() const override { return "NVPTX specific alloca hoisting"; } - virtual bool runOnFunction(Function &function); + bool runOnFunction(Function &function) override; }; extern FunctionPass *createAllocaHoisting(); diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 97e2cc6..4ec575f 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/NVPTXMCAsmInfo.h" #include "NVPTX.h" #include "NVPTXInstrInfo.h" +#include "NVPTXMachineFunctionInfo.h" #include "NVPTXMCExpr.h" #include "NVPTXRegisterInfo.h" #include "NVPTXTargetMachine.h" @@ -131,7 +132,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); const ConstantExpr *CE = dyn_cast(CV); - if (CE == 0) + if (!CE) llvm_unreachable("Unknown constant value to lower!"); switch (CE->getOpcode()) { @@ -149,9 +150,24 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) { raw_string_ostream OS(S); OS << "Unsupported expression in static initializer: "; CE->printAsOperand(OS, /*PrintType=*/ false, - !AP.MF ? 0 : AP.MF->getFunction()->getParent()); + !AP.MF ? nullptr : AP.MF->getFunction()->getParent()); report_fatal_error(OS.str()); } + case Instruction::AddrSpaceCast: { + // Strip any addrspace(1)->addrspace(0) addrspace casts. These will be + // handled by the generic() logic in the MCExpr printer + PointerType *DstTy = cast(CE->getType()); + PointerType *SrcTy = cast(CE->getOperand(0)->getType()); + if (SrcTy->getAddressSpace() == 1 && DstTy->getAddressSpace() == 0) { + return LowerConstant(cast(CE->getOperand(0)), AP); + } + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + CE->printAsOperand(OS, /*PrintType=*/ false, + !AP.MF ? nullptr : AP.MF->getFunction()->getParent()); + report_fatal_error(OS.str()); + } case Instruction::GetElementPtr: { const DataLayout &TD = *AP.TM.getDataLayout(); // Generate a symbolic expression for the byte address @@ -310,13 +326,279 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(OutStreamer, Inst); } +// Handle symbol backtracking for targets that do not support image handles +bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI, + unsigned OpNo, MCOperand &MCOp) { + const MachineOperand &MO = MI->getOperand(OpNo); + + switch (MI->getOpcode()) { + default: return false; + case NVPTX::TEX_1D_F32_I32: + case NVPTX::TEX_1D_F32_F32: + case NVPTX::TEX_1D_F32_F32_LEVEL: + case NVPTX::TEX_1D_F32_F32_GRAD: + case NVPTX::TEX_1D_I32_I32: + case NVPTX::TEX_1D_I32_F32: + case NVPTX::TEX_1D_I32_F32_LEVEL: + case NVPTX::TEX_1D_I32_F32_GRAD: + case NVPTX::TEX_1D_ARRAY_F32_I32: + case NVPTX::TEX_1D_ARRAY_F32_F32: + case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL: + case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD: + case NVPTX::TEX_1D_ARRAY_I32_I32: + case NVPTX::TEX_1D_ARRAY_I32_F32: + case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL: + case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD: + case NVPTX::TEX_2D_F32_I32: + case NVPTX::TEX_2D_F32_F32: + case NVPTX::TEX_2D_F32_F32_LEVEL: + case NVPTX::TEX_2D_F32_F32_GRAD: + case NVPTX::TEX_2D_I32_I32: + case NVPTX::TEX_2D_I32_F32: + case NVPTX::TEX_2D_I32_F32_LEVEL: + case NVPTX::TEX_2D_I32_F32_GRAD: + case NVPTX::TEX_2D_ARRAY_F32_I32: + case NVPTX::TEX_2D_ARRAY_F32_F32: + case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL: + case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD: + case NVPTX::TEX_2D_ARRAY_I32_I32: + case NVPTX::TEX_2D_ARRAY_I32_F32: + case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL: + case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD: + case NVPTX::TEX_3D_F32_I32: + case NVPTX::TEX_3D_F32_F32: + case NVPTX::TEX_3D_F32_F32_LEVEL: + case NVPTX::TEX_3D_F32_F32_GRAD: + case NVPTX::TEX_3D_I32_I32: + case NVPTX::TEX_3D_I32_F32: + case NVPTX::TEX_3D_I32_F32_LEVEL: + case NVPTX::TEX_3D_I32_F32_GRAD: + { + // This is a texture fetch, so operand 4 is a texref and operand 5 is + // a samplerref + if (OpNo == 4) { + lowerImageHandleSymbol(MO.getImm(), MCOp); + return true; + } + if (OpNo == 5) { + lowerImageHandleSymbol(MO.getImm(), MCOp); + return true; + } + + return false; + } + case NVPTX::SULD_1D_I8_TRAP: + case NVPTX::SULD_1D_I16_TRAP: + case NVPTX::SULD_1D_I32_TRAP: + case NVPTX::SULD_1D_ARRAY_I8_TRAP: + case NVPTX::SULD_1D_ARRAY_I16_TRAP: + case NVPTX::SULD_1D_ARRAY_I32_TRAP: + case NVPTX::SULD_2D_I8_TRAP: + case NVPTX::SULD_2D_I16_TRAP: + case NVPTX::SULD_2D_I32_TRAP: + case NVPTX::SULD_2D_ARRAY_I8_TRAP: + case NVPTX::SULD_2D_ARRAY_I16_TRAP: + case NVPTX::SULD_2D_ARRAY_I32_TRAP: + case NVPTX::SULD_3D_I8_TRAP: + case NVPTX::SULD_3D_I16_TRAP: + case NVPTX::SULD_3D_I32_TRAP: { + // This is a V1 surface load, so operand 1 is a surfref + if (OpNo == 1) { + lowerImageHandleSymbol(MO.getImm(), MCOp); + return true; + } + + return false; + } + case NVPTX::SULD_1D_V2I8_TRAP: + case NVPTX::SULD_1D_V2I16_TRAP: + case NVPTX::SULD_1D_V2I32_TRAP: + case NVPTX::SULD_1D_ARRAY_V2I8_TRAP: + case NVPTX::SULD_1D_ARRAY_V2I16_TRAP: + case NVPTX::SULD_1D_ARRAY_V2I32_TRAP: + case NVPTX::SULD_2D_V2I8_TRAP: + case NVPTX::SULD_2D_V2I16_TRAP: + case NVPTX::SULD_2D_V2I32_TRAP: + case NVPTX::SULD_2D_ARRAY_V2I8_TRAP: + case NVPTX::SULD_2D_ARRAY_V2I16_TRAP: + case NVPTX::SULD_2D_ARRAY_V2I32_TRAP: + case NVPTX::SULD_3D_V2I8_TRAP: + case NVPTX::SULD_3D_V2I16_TRAP: + case NVPTX::SULD_3D_V2I32_TRAP: { + // This is a V2 surface load, so operand 2 is a surfref + if (OpNo == 2) { + lowerImageHandleSymbol(MO.getImm(), MCOp); + return true; + } + + return false; + } + case NVPTX::SULD_1D_V4I8_TRAP: + case NVPTX::SULD_1D_V4I16_TRAP: + case NVPTX::SULD_1D_V4I32_TRAP: + case NVPTX::SULD_1D_ARRAY_V4I8_TRAP: + case NVPTX::SULD_1D_ARRAY_V4I16_TRAP: + case NVPTX::SULD_1D_ARRAY_V4I32_TRAP: + case NVPTX::SULD_2D_V4I8_TRAP: + case NVPTX::SULD_2D_V4I16_TRAP: + case NVPTX::SULD_2D_V4I32_TRAP: + case NVPTX::SULD_2D_ARRAY_V4I8_TRAP: + case NVPTX::SULD_2D_ARRAY_V4I16_TRAP: + case NVPTX::SULD_2D_ARRAY_V4I32_TRAP: + case NVPTX::SULD_3D_V4I8_TRAP: + case NVPTX::SULD_3D_V4I16_TRAP: + case NVPTX::SULD_3D_V4I32_TRAP: { + // This is a V4 surface load, so operand 4 is a surfref + if (OpNo == 4) { + lowerImageHandleSymbol(MO.getImm(), MCOp); + return true; + } + + return false; + } + case NVPTX::SUST_B_1D_B8_TRAP: + case NVPTX::SUST_B_1D_B16_TRAP: + case NVPTX::SUST_B_1D_B32_TRAP: + case NVPTX::SUST_B_1D_V2B8_TRAP: + case NVPTX::SUST_B_1D_V2B16_TRAP: + case NVPTX::SUST_B_1D_V2B32_TRAP: + case NVPTX::SUST_B_1D_V4B8_TRAP: + case NVPTX::SUST_B_1D_V4B16_TRAP: + case NVPTX::SUST_B_1D_V4B32_TRAP: + case NVPTX::SUST_B_1D_ARRAY_B8_TRAP: + case NVPTX::SUST_B_1D_ARRAY_B16_TRAP: + case NVPTX::SUST_B_1D_ARRAY_B32_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP: + case NVPTX::SUST_B_2D_B8_TRAP: + case NVPTX::SUST_B_2D_B16_TRAP: + case NVPTX::SUST_B_2D_B32_TRAP: + case NVPTX::SUST_B_2D_V2B8_TRAP: + case NVPTX::SUST_B_2D_V2B16_TRAP: + case NVPTX::SUST_B_2D_V2B32_TRAP: + case NVPTX::SUST_B_2D_V4B8_TRAP: + case NVPTX::SUST_B_2D_V4B16_TRAP: + case NVPTX::SUST_B_2D_V4B32_TRAP: + case NVPTX::SUST_B_2D_ARRAY_B8_TRAP: + case NVPTX::SUST_B_2D_ARRAY_B16_TRAP: + case NVPTX::SUST_B_2D_ARRAY_B32_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP: + case NVPTX::SUST_B_3D_B8_TRAP: + case NVPTX::SUST_B_3D_B16_TRAP: + case NVPTX::SUST_B_3D_B32_TRAP: + case NVPTX::SUST_B_3D_V2B8_TRAP: + case NVPTX::SUST_B_3D_V2B16_TRAP: + case NVPTX::SUST_B_3D_V2B32_TRAP: + case NVPTX::SUST_B_3D_V4B8_TRAP: + case NVPTX::SUST_B_3D_V4B16_TRAP: + case NVPTX::SUST_B_3D_V4B32_TRAP: + case NVPTX::SUST_P_1D_B8_TRAP: + case NVPTX::SUST_P_1D_B16_TRAP: + case NVPTX::SUST_P_1D_B32_TRAP: + case NVPTX::SUST_P_1D_V2B8_TRAP: + case NVPTX::SUST_P_1D_V2B16_TRAP: + case NVPTX::SUST_P_1D_V2B32_TRAP: + case NVPTX::SUST_P_1D_V4B8_TRAP: + case NVPTX::SUST_P_1D_V4B16_TRAP: + case NVPTX::SUST_P_1D_V4B32_TRAP: + case NVPTX::SUST_P_1D_ARRAY_B8_TRAP: + case NVPTX::SUST_P_1D_ARRAY_B16_TRAP: + case NVPTX::SUST_P_1D_ARRAY_B32_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP: + case NVPTX::SUST_P_2D_B8_TRAP: + case NVPTX::SUST_P_2D_B16_TRAP: + case NVPTX::SUST_P_2D_B32_TRAP: + case NVPTX::SUST_P_2D_V2B8_TRAP: + case NVPTX::SUST_P_2D_V2B16_TRAP: + case NVPTX::SUST_P_2D_V2B32_TRAP: + case NVPTX::SUST_P_2D_V4B8_TRAP: + case NVPTX::SUST_P_2D_V4B16_TRAP: + case NVPTX::SUST_P_2D_V4B32_TRAP: + case NVPTX::SUST_P_2D_ARRAY_B8_TRAP: + case NVPTX::SUST_P_2D_ARRAY_B16_TRAP: + case NVPTX::SUST_P_2D_ARRAY_B32_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP: + case NVPTX::SUST_P_3D_B8_TRAP: + case NVPTX::SUST_P_3D_B16_TRAP: + case NVPTX::SUST_P_3D_B32_TRAP: + case NVPTX::SUST_P_3D_V2B8_TRAP: + case NVPTX::SUST_P_3D_V2B16_TRAP: + case NVPTX::SUST_P_3D_V2B32_TRAP: + case NVPTX::SUST_P_3D_V4B8_TRAP: + case NVPTX::SUST_P_3D_V4B16_TRAP: + case NVPTX::SUST_P_3D_V4B32_TRAP: { + // This is a surface store, so operand 0 is a surfref + if (OpNo == 0) { + lowerImageHandleSymbol(MO.getImm(), MCOp); + return true; + } + + return false; + } + case NVPTX::TXQ_CHANNEL_ORDER: + case NVPTX::TXQ_CHANNEL_DATA_TYPE: + case NVPTX::TXQ_WIDTH: + case NVPTX::TXQ_HEIGHT: + case NVPTX::TXQ_DEPTH: + case NVPTX::TXQ_ARRAY_SIZE: + case NVPTX::TXQ_NUM_SAMPLES: + case NVPTX::TXQ_NUM_MIPMAP_LEVELS: + case NVPTX::SUQ_CHANNEL_ORDER: + case NVPTX::SUQ_CHANNEL_DATA_TYPE: + case NVPTX::SUQ_WIDTH: + case NVPTX::SUQ_HEIGHT: + case NVPTX::SUQ_DEPTH: + case NVPTX::SUQ_ARRAY_SIZE: { + // This is a query, so operand 1 is a surfref/texref + if (OpNo == 1) { + lowerImageHandleSymbol(MO.getImm(), MCOp); + return true; + } + + return false; + } + } +} + +void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { + // Ewwww + TargetMachine &TM = const_cast(MF->getTarget()); + NVPTXTargetMachine &nvTM = static_cast(TM); + const NVPTXMachineFunctionInfo *MFI = MF->getInfo(); + const char *Sym = MFI->getImageHandleSymbol(Index); + std::string *SymNamePtr = + nvTM.getManagedStrPool()->getManagedString(Sym); + MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol( + StringRef(SymNamePtr->c_str()))); +} + void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { OutMI.setOpcode(MI->getOpcode()); + const NVPTXSubtarget &ST = TM.getSubtarget(); // Special: Do not mangle symbol operand of CALL_PROTOTYPE if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) { const MachineOperand &MO = MI->getOperand(0); - OutMI.addOperand(GetSymbolRef(MO, + OutMI.addOperand(GetSymbolRef( OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName())))); return; } @@ -325,6 +607,13 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { const MachineOperand &MO = MI->getOperand(i); MCOperand MCOp; + if (!ST.hasImageHandles()) { + if (lowerImageHandleOperand(MI, i, MCOp)) { + OutMI.addOperand(MCOp); + continue; + } + } + if (lowerOperand(MO, MCOp)) OutMI.addOperand(MCOp); } @@ -345,10 +634,10 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, MO.getMBB()->getSymbol(), OutContext)); break; case MachineOperand::MO_ExternalSymbol: - MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName())); + MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName())); break; case MachineOperand::MO_GlobalAddress: - MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal())); + MCOp = GetSymbolRef(getSymbol(MO.getGlobal())); break; case MachineOperand::MO_FPImmediate: { const ConstantFP *Cnt = MO.getFPImm(); @@ -407,8 +696,7 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { } } -MCOperand NVPTXAsmPrinter::GetSymbolRef(const MachineOperand &MO, - const MCSymbol *Symbol) { +MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { const MCExpr *Expr; Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext); @@ -750,7 +1038,7 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED) return false; - const Function *oneFunc = 0; + const Function *oneFunc = nullptr; bool flag = usedInOneFunc(gv, oneFunc); if (flag == false) @@ -1010,6 +1298,8 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { for (i = 0; i < n; i++) global_list.insert(global_list.end(), gv_array[i]); + clearAnnotationCache(&M); + delete[] gv_array; return ret; @@ -1105,10 +1395,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, if (llvm::isSampler(*GVar)) { O << ".global .samplerref " << llvm::getSamplerName(*GVar); - const Constant *Initializer = NULL; + const Constant *Initializer = nullptr; if (GVar->hasInitializer()) Initializer = GVar->getInitializer(); - const ConstantInt *CI = NULL; + const ConstantInt *CI = nullptr; if (Initializer) CI = dyn_cast(Initializer); if (CI) { @@ -1175,7 +1465,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, return; } - const Function *demotedFunc = 0; + const Function *demotedFunc = nullptr; if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { O << "// " << GVar->getName().str() << " has been demoted\n"; if (localDecls.find(demotedFunc) != localDecls.end()) @@ -1347,7 +1637,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { return "u32"; } llvm_unreachable("unexpected type"); - return NULL; + return nullptr; } void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, @@ -1495,19 +1785,33 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { first = false; // Handle image/sampler parameters - if (llvm::isSampler(*I) || llvm::isImage(*I)) { - if (llvm::isImage(*I)) { - std::string sname = I->getName(); - if (llvm::isImageWriteOnly(*I)) - O << "\t.param .surfref " << *getSymbol(F) << "_param_" - << paramIndex; - else // Default image is read_only - O << "\t.param .texref " << *getSymbol(F) << "_param_" - << paramIndex; - } else // Should be llvm::isSampler(*I) - O << "\t.param .samplerref " << *getSymbol(F) << "_param_" - << paramIndex; - continue; + if (isKernelFunction(*F)) { + if (isSampler(*I) || isImage(*I)) { + if (isImage(*I)) { + std::string sname = I->getName(); + if (isImageWriteOnly(*I) || isImageReadWrite(*I)) { + if (nvptxSubtarget.hasImageHandles()) + O << "\t.param .u64 .ptr .surfref "; + else + O << "\t.param .surfref "; + O << *CurrentFnSym << "_param_" << paramIndex; + } + else { // Default image is read_only + if (nvptxSubtarget.hasImageHandles()) + O << "\t.param .u64 .ptr .texref "; + else + O << "\t.param .texref "; + O << *CurrentFnSym << "_param_" << paramIndex; + } + } else { + if (nvptxSubtarget.hasImageHandles()) + O << "\t.param .u64 .ptr .samplerref "; + else + O << "\t.param .samplerref "; + O << *CurrentFnSym << "_param_" << paramIndex; + } + continue; + } } if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) { @@ -1752,13 +2056,35 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { return; } if (const GlobalValue *GVar = dyn_cast(CPV)) { - O << *getSymbol(GVar); + PointerType *PTy = dyn_cast(GVar->getType()); + bool IsNonGenericPointer = false; + if (PTy && PTy->getAddressSpace() != 0) { + IsNonGenericPointer = true; + } + if (EmitGeneric && !isa(CPV) && !IsNonGenericPointer) { + O << "generic("; + O << *getSymbol(GVar); + O << ")"; + } else { + O << *getSymbol(GVar); + } return; } if (const ConstantExpr *Cexpr = dyn_cast(CPV)) { const Value *v = Cexpr->stripPointerCasts(); + PointerType *PTy = dyn_cast(Cexpr->getType()); + bool IsNonGenericPointer = false; + if (PTy && PTy->getAddressSpace() != 0) { + IsNonGenericPointer = true; + } if (const GlobalValue *GVar = dyn_cast(v)) { - O << *getSymbol(GVar); + if (EmitGeneric && !isa(v) && !IsNonGenericPointer) { + O << "generic("; + O << *getSymbol(GVar); + O << ")"; + } else { + O << *getSymbol(GVar); + } return; } else { O << *LowerConstant(CPV, *this); @@ -2121,7 +2447,7 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) { } LineReader *NVPTXAsmPrinter::getReader(std::string filename) { - if (reader == NULL) { + if (!reader) { reader = new LineReader(filename); } diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 7162420..a9f9bdd 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -96,6 +96,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { unsigned curpos; raw_ostream &O; NVPTXAsmPrinter &AP; + bool EmitGeneric; public: AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP) @@ -104,6 +105,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { size = _size; curpos = 0; numSymbols = 0; + EmitGeneric = AP.EmitGeneric; } ~AggBuffer() { delete[] buffer; } unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) { @@ -155,7 +157,18 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { const Value *v = Symbols[nSym]; if (const GlobalValue *GVar = dyn_cast(v)) { MCSymbol *Name = AP.getSymbol(GVar); - O << *Name; + PointerType *PTy = dyn_cast(GVar->getType()); + bool IsNonGenericPointer = false; + if (PTy && PTy->getAddressSpace() != 0) { + IsNonGenericPointer = true; + } + if (EmitGeneric && !isa(v) && !IsNonGenericPointer) { + O << "generic("; + O << *Name; + O << ")"; + } else { + O << *Name; + } } else if (const ConstantExpr *Cexpr = dyn_cast(v)) { O << *nvptx::LowerConstant(Cexpr, AP); } else @@ -176,31 +189,31 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { friend class AggBuffer; - virtual void emitSrcInText(StringRef filename, unsigned line); + void emitSrcInText(StringRef filename, unsigned line); private: - virtual const char *getPassName() const { return "NVPTX Assembly Printer"; } + const char *getPassName() const override { return "NVPTX Assembly Printer"; } const Function *F; std::string CurrentFnName; - void EmitFunctionEntryLabel(); - void EmitFunctionBodyStart(); - void EmitFunctionBodyEnd(); - void emitImplicitDef(const MachineInstr *MI) const; + void EmitFunctionEntryLabel() override; + void EmitFunctionBodyStart() override; + void EmitFunctionBodyEnd() override; + void emitImplicitDef(const MachineInstr *MI) const override; - void EmitInstruction(const MachineInstr *); + void EmitInstruction(const MachineInstr *) override; void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI); bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); - MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol); + MCOperand GetSymbolRef(const MCSymbol *Symbol); unsigned encodeVirtualRegister(unsigned Reg); - void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const {} + void EmitAlignment(unsigned NumBits, const GlobalValue *GV = nullptr) const {} void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier, raw_ostream &O); void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier = 0); + const char *Modifier = nullptr); void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const; void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O, bool = false); @@ -221,15 +234,15 @@ private: void printReturnValStr(const MachineFunction &MF, raw_ostream &O); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &); + raw_ostream &) override; void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O, - const char *Modifier = 0); + const char *Modifier = nullptr); bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &); + raw_ostream &) override; protected: - bool doInitialization(Module &M); - bool doFinalization(Module &M); + bool doInitialization(Module &M) override; + bool doFinalization(Module &M) override; private: std::string CurrentBankselLabelInBasicBlock; @@ -274,14 +287,33 @@ private: static const char *getRegisterName(unsigned RegNo); void emitDemotedVars(const Function *, raw_ostream &); + bool lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo, + MCOperand &MCOp); + void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp); + LineReader *reader; LineReader *getReader(std::string); + + // Used to control the need to emit .generic() in the initializer of + // module scope variables. + // Although ptx supports the hybrid mode like the following, + // .global .u32 a; + // .global .u32 b; + // .global .u32 addr[] = {a, generic(b)} + // we have difficulty representing the difference in the NVVM IR. + // + // Since the address value should always be generic in CUDA C and always + // be specific in OpenCL, we use this simple control here. + // + bool EmitGeneric; + public: NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : AsmPrinter(TM, Streamer), nvptxSubtarget(TM.getSubtarget()) { CurrentBankselLabelInBasicBlock = ""; - reader = NULL; + reader = nullptr; + EmitGeneric = (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA); } ~NVPTXAsmPrinter() { diff --git a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp index 158c482..962b123 100644 --- a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp +++ b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp @@ -33,7 +33,7 @@ public: static char ID; NVPTXAssignValidGlobalNames() : ModulePass(ID) {} - virtual bool runOnModule(Module &M); + bool runOnModule(Module &M) override; /// \brief Clean up the name to remove symbols invalid in PTX. std::string cleanUpName(StringRef Name); diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp new file mode 100644 index 0000000..f3a095d --- /dev/null +++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp @@ -0,0 +1,195 @@ +//===-- NVPTXFavorNonGenericAddrSpace.cpp - ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// When a load/store accesses the generic address space, checks whether the +// address is casted from a non-generic address space. If so, remove this +// addrspacecast because accessing non-generic address spaces is typically +// faster. Besides seeking addrspacecasts, this optimization also traces into +// the base pointer of a GEP. +// +// For instance, the code below loads a float from an array allocated in +// addrspace(3). +// +// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]* +// %1 = gep [10 x float]* %0, i64 0, i64 %i +// %2 = load float* %1 ; emits ld.f32 +// +// First, function hoistAddrSpaceCastFromGEP reorders the addrspacecast +// and the GEP to expose more optimization opportunities to function +// optimizeMemoryInst. The intermediate code looks like: +// +// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i +// %1 = addrspacecast float addrspace(3)* %0 to float* +// %2 = load float* %1 ; still emits ld.f32, but will be optimized shortly +// +// Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed +// generic pointers, and folds the load and the addrspacecast into a load from +// the original address space. The final code looks like: +// +// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i +// %2 = load float addrspace(3)* %0 ; emits ld.shared.f32 +// +// This pass may remove an addrspacecast in a different BB. Therefore, we +// implement it as a FunctionPass. +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +// An option to disable this optimization. Enable it by default. +static cl::opt DisableFavorNonGeneric( + "disable-nvptx-favor-non-generic", + cl::init(false), + cl::desc("Do not convert generic address space usage " + "to non-generic address space usage"), + cl::Hidden); + +namespace { +/// \brief NVPTXFavorNonGenericAddrSpaces +class NVPTXFavorNonGenericAddrSpaces : public FunctionPass { +public: + static char ID; + NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {} + + bool runOnFunction(Function &F) override; + + /// Optimizes load/store instructions. Idx is the index of the pointer operand + /// (0 for load, and 1 for store). Returns true if it changes anything. + bool optimizeMemoryInstruction(Instruction *I, unsigned Idx); + /// Transforms "gep (addrspacecast X), indices" into "addrspacecast (gep X, + /// indices)". This reordering exposes to optimizeMemoryInstruction more + /// optimization opportunities on loads and stores. Returns true if it changes + /// the program. + bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP); +}; +} + +char NVPTXFavorNonGenericAddrSpaces::ID = 0; + +namespace llvm { +void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); +} +INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic", + "Remove unnecessary non-generic-to-generic addrspacecasts", + false, false) + +// Decides whether removing Cast is valid and beneficial. Cast can be an +// instruction or a constant expression. +static bool IsEliminableAddrSpaceCast(Operator *Cast) { + // Returns false if not even an addrspacecast. + if (Cast->getOpcode() != Instruction::AddrSpaceCast) + return false; + + Value *Src = Cast->getOperand(0); + PointerType *SrcTy = cast(Src->getType()); + PointerType *DestTy = cast(Cast->getType()); + // TODO: For now, we only handle the case where the addrspacecast only changes + // the address space but not the type. If the type also changes, we could + // still get rid of the addrspacecast by adding an extra bitcast, but we + // rarely see such scenarios. + if (SrcTy->getElementType() != DestTy->getElementType()) + return false; + + // Checks whether the addrspacecast is from a non-generic address space to the + // generic address space. + return (SrcTy->getAddressSpace() != AddressSpace::ADDRESS_SPACE_GENERIC && + DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC); +} + +bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP( + GEPOperator *GEP) { + Operator *Cast = dyn_cast(GEP->getPointerOperand()); + if (!Cast) + return false; + + if (!IsEliminableAddrSpaceCast(Cast)) + return false; + + SmallVector Indices(GEP->idx_begin(), GEP->idx_end()); + if (Instruction *GEPI = dyn_cast(GEP)) { + // %1 = gep (addrspacecast X), indices + // => + // %0 = gep X, indices + // %1 = addrspacecast %0 + GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(Cast->getOperand(0), + Indices, + GEP->getName(), + GEPI); + NewGEPI->setIsInBounds(GEP->isInBounds()); + GEP->replaceAllUsesWith( + new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI)); + } else { + // GEP is a constant expression. + Constant *NewGEPCE = ConstantExpr::getGetElementPtr( + cast(Cast->getOperand(0)), + Indices, + GEP->isInBounds()); + GEP->replaceAllUsesWith( + ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType())); + } + + return true; +} + +bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI, + unsigned Idx) { + // If the pointer operand is a GEP, hoist the addrspacecast if any from the + // GEP to expose more optimization opportunites. + if (GEPOperator *GEP = dyn_cast(MI->getOperand(Idx))) { + hoistAddrSpaceCastFromGEP(GEP); + } + + // load/store (addrspacecast X) => load/store X if shortcutting the + // addrspacecast is valid and can improve performance. + // + // e.g., + // %1 = addrspacecast float addrspace(3)* %0 to float* + // %2 = load float* %1 + // -> + // %2 = load float addrspace(3)* %0 + // + // Note: the addrspacecast can also be a constant expression. + if (Operator *Cast = dyn_cast(MI->getOperand(Idx))) { + if (IsEliminableAddrSpaceCast(Cast)) { + MI->setOperand(Idx, Cast->getOperand(0)); + return true; + } + } + + return false; +} + +bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) { + if (DisableFavorNonGeneric) + return false; + + bool Changed = false; + for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) { + for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ++I) { + if (isa(I)) { + // V = load P + Changed |= optimizeMemoryInstruction(I, 0); + } else if (isa(I)) { + // store V, P + Changed |= optimizeMemoryInstruction(I, 1); + } + } + } + return Changed; +} + +FunctionPass *llvm::createNVPTXFavorNonGenericAddrSpacesPass() { + return new NVPTXFavorNonGenericAddrSpaces(); +} diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h index 819f1dd..2ae6d72 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -28,13 +28,13 @@ public: : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm), is64bit(_is64bit) {} - virtual bool hasFP(const MachineFunction &MF) const; - virtual void emitPrologue(MachineFunction &MF) const; - virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + bool hasFP(const MachineFunction &MF) const override; + void emitPrologue(MachineFunction &MF) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; }; } // End llvm namespace diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 45f0734..023dd5e 100644 --- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -40,10 +40,9 @@ public: GenericToNVVM() : ModulePass(ID) {} - virtual bool runOnModule(Module &M); + bool runOnModule(Module &M) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - } + void getAnalysisUsage(AnalysisUsage &AU) const override {} private: Value *getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV, @@ -88,7 +87,8 @@ bool GenericToNVVM::runOnModule(Module &M) { !GV->getName().startswith("llvm.")) { GlobalVariable *NewGV = new GlobalVariable( M, GV->getType()->getElementType(), GV->isConstant(), - GV->getLinkage(), GV->hasInitializer() ? GV->getInitializer() : NULL, + GV->getLinkage(), + GV->hasInitializer() ? GV->getInitializer() : nullptr, "", GV, GV->getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL); NewGV->copyAttributesFrom(GV); GVMap[GV] = NewGV; @@ -162,7 +162,7 @@ Value *GenericToNVVM::getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV, IRBuilder<> &Builder) { PointerType *GVType = GV->getType(); - Value *CVTA = NULL; + Value *CVTA = nullptr; // See if the address space conversion requires the operand to be bitcast // to i8 addrspace(n)* first. diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index bd08d2d..cd30880 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -20,11 +20,10 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" -#undef DEBUG_TYPE -#define DEBUG_TYPE "nvptx-isel" - using namespace llvm; +#define DEBUG_TYPE "nvptx-isel" + static cl::opt FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: FMA contraction (0: don't do it" @@ -120,10 +119,10 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { if (N->isMachineOpcode()) { N->setNodeId(-1); - return NULL; // Already selected. + return nullptr; // Already selected. } - SDNode *ResNode = NULL; + SDNode *ResNode = nullptr; switch (N->getOpcode()) { case ISD::LOAD: ResNode = SelectLoad(N); @@ -162,6 +161,98 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { case NVPTXISD::StoreParamU32: ResNode = SelectStoreParam(N); break; + case ISD::INTRINSIC_WO_CHAIN: + ResNode = SelectIntrinsicNoChain(N); + break; + case NVPTXISD::Tex1DFloatI32: + case NVPTXISD::Tex1DFloatFloat: + case NVPTXISD::Tex1DFloatFloatLevel: + case NVPTXISD::Tex1DFloatFloatGrad: + case NVPTXISD::Tex1DI32I32: + case NVPTXISD::Tex1DI32Float: + case NVPTXISD::Tex1DI32FloatLevel: + case NVPTXISD::Tex1DI32FloatGrad: + case NVPTXISD::Tex1DArrayFloatI32: + case NVPTXISD::Tex1DArrayFloatFloat: + case NVPTXISD::Tex1DArrayFloatFloatLevel: + case NVPTXISD::Tex1DArrayFloatFloatGrad: + case NVPTXISD::Tex1DArrayI32I32: + case NVPTXISD::Tex1DArrayI32Float: + case NVPTXISD::Tex1DArrayI32FloatLevel: + case NVPTXISD::Tex1DArrayI32FloatGrad: + case NVPTXISD::Tex2DFloatI32: + case NVPTXISD::Tex2DFloatFloat: + case NVPTXISD::Tex2DFloatFloatLevel: + case NVPTXISD::Tex2DFloatFloatGrad: + case NVPTXISD::Tex2DI32I32: + case NVPTXISD::Tex2DI32Float: + case NVPTXISD::Tex2DI32FloatLevel: + case NVPTXISD::Tex2DI32FloatGrad: + case NVPTXISD::Tex2DArrayFloatI32: + case NVPTXISD::Tex2DArrayFloatFloat: + case NVPTXISD::Tex2DArrayFloatFloatLevel: + case NVPTXISD::Tex2DArrayFloatFloatGrad: + case NVPTXISD::Tex2DArrayI32I32: + case NVPTXISD::Tex2DArrayI32Float: + case NVPTXISD::Tex2DArrayI32FloatLevel: + case NVPTXISD::Tex2DArrayI32FloatGrad: + case NVPTXISD::Tex3DFloatI32: + case NVPTXISD::Tex3DFloatFloat: + case NVPTXISD::Tex3DFloatFloatLevel: + case NVPTXISD::Tex3DFloatFloatGrad: + case NVPTXISD::Tex3DI32I32: + case NVPTXISD::Tex3DI32Float: + case NVPTXISD::Tex3DI32FloatLevel: + case NVPTXISD::Tex3DI32FloatGrad: + ResNode = SelectTextureIntrinsic(N); + break; + case NVPTXISD::Suld1DI8Trap: + case NVPTXISD::Suld1DI16Trap: + case NVPTXISD::Suld1DI32Trap: + case NVPTXISD::Suld1DV2I8Trap: + case NVPTXISD::Suld1DV2I16Trap: + case NVPTXISD::Suld1DV2I32Trap: + case NVPTXISD::Suld1DV4I8Trap: + case NVPTXISD::Suld1DV4I16Trap: + case NVPTXISD::Suld1DV4I32Trap: + case NVPTXISD::Suld1DArrayI8Trap: + case NVPTXISD::Suld1DArrayI16Trap: + case NVPTXISD::Suld1DArrayI32Trap: + case NVPTXISD::Suld1DArrayV2I8Trap: + case NVPTXISD::Suld1DArrayV2I16Trap: + case NVPTXISD::Suld1DArrayV2I32Trap: + case NVPTXISD::Suld1DArrayV4I8Trap: + case NVPTXISD::Suld1DArrayV4I16Trap: + case NVPTXISD::Suld1DArrayV4I32Trap: + case NVPTXISD::Suld2DI8Trap: + case NVPTXISD::Suld2DI16Trap: + case NVPTXISD::Suld2DI32Trap: + case NVPTXISD::Suld2DV2I8Trap: + case NVPTXISD::Suld2DV2I16Trap: + case NVPTXISD::Suld2DV2I32Trap: + case NVPTXISD::Suld2DV4I8Trap: + case NVPTXISD::Suld2DV4I16Trap: + case NVPTXISD::Suld2DV4I32Trap: + case NVPTXISD::Suld2DArrayI8Trap: + case NVPTXISD::Suld2DArrayI16Trap: + case NVPTXISD::Suld2DArrayI32Trap: + case NVPTXISD::Suld2DArrayV2I8Trap: + case NVPTXISD::Suld2DArrayV2I16Trap: + case NVPTXISD::Suld2DArrayV2I32Trap: + case NVPTXISD::Suld2DArrayV4I8Trap: + case NVPTXISD::Suld2DArrayV4I16Trap: + case NVPTXISD::Suld2DArrayV4I32Trap: + case NVPTXISD::Suld3DI8Trap: + case NVPTXISD::Suld3DI16Trap: + case NVPTXISD::Suld3DI32Trap: + case NVPTXISD::Suld3DV2I8Trap: + case NVPTXISD::Suld3DV2I16Trap: + case NVPTXISD::Suld3DV2I32Trap: + case NVPTXISD::Suld3DV4I8Trap: + case NVPTXISD::Suld3DV4I16Trap: + case NVPTXISD::Suld3DV4I32Trap: + ResNode = SelectSurfaceIntrinsic(N); + break; case ISD::ADDRSPACECAST: ResNode = SelectAddrSpaceCast(N); break; @@ -175,7 +266,7 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { static unsigned int getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget) { - const Value *Src = N->getSrcValue(); + const Value *Src = N->getMemOperand()->getValue(); if (!Src) return NVPTX::PTXLdStInstCode::GENERIC; @@ -194,6 +285,24 @@ static unsigned int getCodeAddrSpace(MemSDNode *N, return NVPTX::PTXLdStInstCode::GENERIC; } +SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) { + unsigned IID = cast(N->getOperand(0))->getZExtValue(); + switch (IID) { + default: + return nullptr; + case Intrinsic::nvvm_texsurf_handle_internal: + return SelectTexSurfHandle(N); + } +} + +SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) { + // Op 0 is the intrinsic ID + SDValue Wrapper = N->getOperand(1); + SDValue GlobalVal = Wrapper.getOperand(0); + return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64, + GlobalVal); +} + SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { SDValue Src = N->getOperand(0); AddrSpaceCastSDNode *CastN = cast(N); @@ -258,14 +367,14 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { SDLoc dl(N); LoadSDNode *LD = cast(N); EVT LoadedVT = LD->getMemoryVT(); - SDNode *NVPTXLD = NULL; + SDNode *NVPTXLD = nullptr; // do not support pre/post inc/dec if (LD->isIndexed()) - return NULL; + return nullptr; if (!LoadedVT.isSimple()) - return NULL; + return nullptr; // Address Space Setting unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget); @@ -288,7 +397,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { else if (num == 4) vecType = NVPTX::PTXLdStInstCode::V4; else - return NULL; + return nullptr; } // Type Setting: fromType + fromTypeWidth @@ -337,7 +446,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { Opcode = NVPTX::LD_f64_avar; break; default: - return NULL; + return nullptr; } SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(fromType), @@ -366,7 +475,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { Opcode = NVPTX::LD_f64_asi; break; default: - return NULL; + return nullptr; } SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(fromType), @@ -396,7 +505,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { Opcode = NVPTX::LD_f64_ari_64; break; default: - return NULL; + return nullptr; } } else { switch (TargetVT) { @@ -419,7 +528,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { Opcode = NVPTX::LD_f64_ari; break; default: - return NULL; + return nullptr; } } SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), @@ -448,7 +557,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { Opcode = NVPTX::LD_f64_areg_64; break; default: - return NULL; + return nullptr; } } else { switch (TargetVT) { @@ -471,7 +580,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { Opcode = NVPTX::LD_f64_areg; break; default: - return NULL; + return nullptr; } } SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace), @@ -480,7 +589,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) { NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops); } - if (NVPTXLD != NULL) { + if (NVPTXLD) { MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast(N)->getMemOperand(); cast(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1); @@ -501,7 +610,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { EVT LoadedVT = MemSD->getMemoryVT(); if (!LoadedVT.isSimple()) - return NULL; + return nullptr; // Address Space Setting unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget); @@ -547,7 +656,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { VecType = NVPTX::PTXLdStInstCode::V4; break; default: - return NULL; + return nullptr; } EVT EltVT = N->getValueType(0); @@ -555,11 +664,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { if (SelectDirectAddr(Op1, Addr)) { switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::LDV_i8_v2_avar; break; @@ -583,7 +692,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::LDV_i8_v4_avar; break; @@ -609,11 +718,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) { switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::LDV_i8_v2_asi; break; @@ -637,7 +746,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::LDV_i8_v4_asi; break; @@ -664,11 +773,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari_64; break; @@ -692,7 +801,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari_64; break; @@ -711,11 +820,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { } else { switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::LDV_i8_v2_ari; break; @@ -739,7 +848,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::LDV_i8_v4_ari; break; @@ -766,11 +875,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg_64; break; @@ -794,7 +903,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg_64; break; @@ -813,11 +922,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { } else { switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::LoadV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::LDV_i8_v2_areg; break; @@ -841,7 +950,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { case NVPTXISD::LoadV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::LDV_i8_v4_areg; break; @@ -887,11 +996,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { if (SelectDirectAddr(Op1, Addr)) { switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar; break; @@ -915,7 +1024,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { case NVPTXISD::LDUV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar; break; @@ -939,7 +1048,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { case NVPTXISD::LDGV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar; break; @@ -957,7 +1066,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { case NVPTXISD::LDUV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar; break; @@ -975,19 +1084,18 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { } SDValue Ops[] = { Addr, Chain }; - LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), - ArrayRef(Ops, 2)); + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); } else if (Subtarget.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset) : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64; break; @@ -1011,7 +1119,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { case NVPTXISD::LDUV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64; break; @@ -1035,7 +1143,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { case NVPTXISD::LDGV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64; break; @@ -1053,7 +1161,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { case NVPTXISD::LDUV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64; break; @@ -1072,11 +1180,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { } else { switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32; break; @@ -1100,7 +1208,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { case NVPTXISD::LDUV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32; break; @@ -1124,7 +1232,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { case NVPTXISD::LDGV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32; break; @@ -1142,7 +1250,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { case NVPTXISD::LDUV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32; break; @@ -1162,17 +1270,16 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { SDValue Ops[] = { Base, Offset, Chain }; - LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), - ArrayRef(Ops, 3)); + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); } else { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64; break; @@ -1196,7 +1303,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { case NVPTXISD::LDUV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64; break; @@ -1220,7 +1327,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { case NVPTXISD::LDGV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64; break; @@ -1238,7 +1345,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { case NVPTXISD::LDUV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64; break; @@ -1257,11 +1364,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { } else { switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32; break; @@ -1285,7 +1392,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { case NVPTXISD::LDUV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32; break; @@ -1309,7 +1416,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { case NVPTXISD::LDGV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32; break; @@ -1327,7 +1434,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { case NVPTXISD::LDUV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32; break; @@ -1346,8 +1453,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { } SDValue Ops[] = { Op1, Chain }; - LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), - ArrayRef(Ops, 2)); + LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops); } MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); @@ -1361,14 +1467,14 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { SDLoc dl(N); StoreSDNode *ST = cast(N); EVT StoreVT = ST->getMemoryVT(); - SDNode *NVPTXST = NULL; + SDNode *NVPTXST = nullptr; // do not support pre/post inc/dec if (ST->isIndexed()) - return NULL; + return nullptr; if (!StoreVT.isSimple()) - return NULL; + return nullptr; // Address Space Setting unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget); @@ -1391,7 +1497,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { else if (num == 4) vecType = NVPTX::PTXLdStInstCode::V4; else - return NULL; + return nullptr; } // Type Setting: toType + toTypeWidth @@ -1435,7 +1541,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { Opcode = NVPTX::ST_f64_avar; break; default: - return NULL; + return nullptr; } SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(toType), @@ -1464,7 +1570,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { Opcode = NVPTX::ST_f64_asi; break; default: - return NULL; + return nullptr; } SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), getI32Imm(vecType), getI32Imm(toType), @@ -1494,7 +1600,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { Opcode = NVPTX::ST_f64_ari_64; break; default: - return NULL; + return nullptr; } } else { switch (SourceVT) { @@ -1517,7 +1623,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { Opcode = NVPTX::ST_f64_ari; break; default: - return NULL; + return nullptr; } } SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), @@ -1546,7 +1652,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { Opcode = NVPTX::ST_f64_areg_64; break; default: - return NULL; + return nullptr; } } else { switch (SourceVT) { @@ -1569,7 +1675,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { Opcode = NVPTX::ST_f64_areg; break; default: - return NULL; + return nullptr; } } SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace), @@ -1578,7 +1684,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) { NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops); } - if (NVPTXST != NULL) { + if (NVPTXST) { MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); MemRefs0[0] = cast(N)->getMemOperand(); cast(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1); @@ -1645,7 +1751,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { N2 = N->getOperand(5); break; default: - return NULL; + return nullptr; } StOps.push_back(getI32Imm(IsVolatile)); @@ -1657,11 +1763,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { if (SelectDirectAddr(N2, Addr)) { switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::STV_i8_v2_avar; break; @@ -1685,7 +1791,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::STV_i8_v4_avar; break; @@ -1707,11 +1813,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { : SelectADDRsi(N2.getNode(), N2, Base, Offset)) { switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::STV_i8_v2_asi; break; @@ -1735,7 +1841,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::STV_i8_v4_asi; break; @@ -1759,11 +1865,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari_64; break; @@ -1787,7 +1893,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari_64; break; @@ -1806,11 +1912,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { } else { switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::STV_i8_v2_ari; break; @@ -1834,7 +1940,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::STV_i8_v4_ari; break; @@ -1857,11 +1963,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { if (Subtarget.is64Bit()) { switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg_64; break; @@ -1885,7 +1991,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg_64; break; @@ -1904,11 +2010,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { } else { switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::StoreV2: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::STV_i8_v2_areg; break; @@ -1932,7 +2038,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) { case NVPTXISD::StoreV4: switch (EltVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i8: Opcode = NVPTX::STV_i8_v4_areg; break; @@ -1973,7 +2079,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) { unsigned VecSize; switch (Node->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::LoadParam: VecSize = 1; break; @@ -1992,11 +2098,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) { switch (VecSize) { default: - return NULL; + return nullptr; case 1: switch (MemVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i1: Opc = NVPTX::LoadParamMemI8; break; @@ -2023,7 +2129,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) { case 2: switch (MemVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i1: Opc = NVPTX::LoadParamMemV2I8; break; @@ -2050,7 +2156,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) { case 4: switch (MemVT.getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i1: Opc = NVPTX::LoadParamMemV4I8; break; @@ -2077,7 +2183,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) { VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue); } else { EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue }; - VTs = CurDAG->getVTList(&EVTs[0], array_lengthof(EVTs)); + VTs = CurDAG->getVTList(EVTs); } unsigned OffsetVal = cast(Offset)->getZExtValue(); @@ -2103,7 +2209,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) { unsigned NumElts = 1; switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::StoreRetval: NumElts = 1; break; @@ -2128,11 +2234,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) { unsigned Opcode = 0; switch (NumElts) { default: - return NULL; + return nullptr; case 1: switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i1: Opcode = NVPTX::StoreRetvalI8; break; @@ -2159,7 +2265,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) { case 2: switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i1: Opcode = NVPTX::StoreRetvalV2I8; break; @@ -2186,7 +2292,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) { case 4: switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i1: Opcode = NVPTX::StoreRetvalV4I8; break; @@ -2229,7 +2335,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { unsigned NumElts = 1; switch (N->getOpcode()) { default: - return NULL; + return nullptr; case NVPTXISD::StoreParamU32: case NVPTXISD::StoreParamS32: case NVPTXISD::StoreParam: @@ -2260,11 +2366,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { default: switch (NumElts) { default: - return NULL; + return nullptr; case 1: switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i1: Opcode = NVPTX::StoreParamI8; break; @@ -2291,7 +2397,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { case 2: switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i1: Opcode = NVPTX::StoreParamV2I8; break; @@ -2318,7 +2424,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { case 4: switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) { default: - return NULL; + return nullptr; case MVT::i1: Opcode = NVPTX::StoreParamV4I8; break; @@ -2371,6 +2477,488 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) { return Ret; } +SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue TexRef = N->getOperand(1); + SDValue SampRef = N->getOperand(2); + SDNode *Ret = nullptr; + unsigned Opc = 0; + SmallVector Ops; + + switch (N->getOpcode()) { + default: return nullptr; + case NVPTXISD::Tex1DFloatI32: + Opc = NVPTX::TEX_1D_F32_I32; + break; + case NVPTXISD::Tex1DFloatFloat: + Opc = NVPTX::TEX_1D_F32_F32; + break; + case NVPTXISD::Tex1DFloatFloatLevel: + Opc = NVPTX::TEX_1D_F32_F32_LEVEL; + break; + case NVPTXISD::Tex1DFloatFloatGrad: + Opc = NVPTX::TEX_1D_F32_F32_GRAD; + break; + case NVPTXISD::Tex1DI32I32: + Opc = NVPTX::TEX_1D_I32_I32; + break; + case NVPTXISD::Tex1DI32Float: + Opc = NVPTX::TEX_1D_I32_F32; + break; + case NVPTXISD::Tex1DI32FloatLevel: + Opc = NVPTX::TEX_1D_I32_F32_LEVEL; + break; + case NVPTXISD::Tex1DI32FloatGrad: + Opc = NVPTX::TEX_1D_I32_F32_GRAD; + break; + case NVPTXISD::Tex1DArrayFloatI32: + Opc = NVPTX::TEX_1D_ARRAY_F32_I32; + break; + case NVPTXISD::Tex1DArrayFloatFloat: + Opc = NVPTX::TEX_1D_ARRAY_F32_F32; + break; + case NVPTXISD::Tex1DArrayFloatFloatLevel: + Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL; + break; + case NVPTXISD::Tex1DArrayFloatFloatGrad: + Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD; + break; + case NVPTXISD::Tex1DArrayI32I32: + Opc = NVPTX::TEX_1D_ARRAY_I32_I32; + break; + case NVPTXISD::Tex1DArrayI32Float: + Opc = NVPTX::TEX_1D_ARRAY_I32_F32; + break; + case NVPTXISD::Tex1DArrayI32FloatLevel: + Opc = NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL; + break; + case NVPTXISD::Tex1DArrayI32FloatGrad: + Opc = NVPTX::TEX_1D_ARRAY_I32_F32_GRAD; + break; + case NVPTXISD::Tex2DFloatI32: + Opc = NVPTX::TEX_2D_F32_I32; + break; + case NVPTXISD::Tex2DFloatFloat: + Opc = NVPTX::TEX_2D_F32_F32; + break; + case NVPTXISD::Tex2DFloatFloatLevel: + Opc = NVPTX::TEX_2D_F32_F32_LEVEL; + break; + case NVPTXISD::Tex2DFloatFloatGrad: + Opc = NVPTX::TEX_2D_F32_F32_GRAD; + break; + case NVPTXISD::Tex2DI32I32: + Opc = NVPTX::TEX_2D_I32_I32; + break; + case NVPTXISD::Tex2DI32Float: + Opc = NVPTX::TEX_2D_I32_F32; + break; + case NVPTXISD::Tex2DI32FloatLevel: + Opc = NVPTX::TEX_2D_I32_F32_LEVEL; + break; + case NVPTXISD::Tex2DI32FloatGrad: + Opc = NVPTX::TEX_2D_I32_F32_GRAD; + break; + case NVPTXISD::Tex2DArrayFloatI32: + Opc = NVPTX::TEX_2D_ARRAY_F32_I32; + break; + case NVPTXISD::Tex2DArrayFloatFloat: + Opc = NVPTX::TEX_2D_ARRAY_F32_F32; + break; + case NVPTXISD::Tex2DArrayFloatFloatLevel: + Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL; + break; + case NVPTXISD::Tex2DArrayFloatFloatGrad: + Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD; + break; + case NVPTXISD::Tex2DArrayI32I32: + Opc = NVPTX::TEX_2D_ARRAY_I32_I32; + break; + case NVPTXISD::Tex2DArrayI32Float: + Opc = NVPTX::TEX_2D_ARRAY_I32_F32; + break; + case NVPTXISD::Tex2DArrayI32FloatLevel: + Opc = NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL; + break; + case NVPTXISD::Tex2DArrayI32FloatGrad: + Opc = NVPTX::TEX_2D_ARRAY_I32_F32_GRAD; + break; + case NVPTXISD::Tex3DFloatI32: + Opc = NVPTX::TEX_3D_F32_I32; + break; + case NVPTXISD::Tex3DFloatFloat: + Opc = NVPTX::TEX_3D_F32_F32; + break; + case NVPTXISD::Tex3DFloatFloatLevel: + Opc = NVPTX::TEX_3D_F32_F32_LEVEL; + break; + case NVPTXISD::Tex3DFloatFloatGrad: + Opc = NVPTX::TEX_3D_F32_F32_GRAD; + break; + case NVPTXISD::Tex3DI32I32: + Opc = NVPTX::TEX_3D_I32_I32; + break; + case NVPTXISD::Tex3DI32Float: + Opc = NVPTX::TEX_3D_I32_F32; + break; + case NVPTXISD::Tex3DI32FloatLevel: + Opc = NVPTX::TEX_3D_I32_F32_LEVEL; + break; + case NVPTXISD::Tex3DI32FloatGrad: + Opc = NVPTX::TEX_3D_I32_F32_GRAD; + break; + } + + Ops.push_back(TexRef); + Ops.push_back(SampRef); + + // Copy over indices + for (unsigned i = 3; i < N->getNumOperands(); ++i) { + Ops.push_back(N->getOperand(i)); + } + + Ops.push_back(Chain); + Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); + return Ret; +} + +SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue TexHandle = N->getOperand(1); + SDNode *Ret = nullptr; + unsigned Opc = 0; + SmallVector Ops; + switch (N->getOpcode()) { + default: return nullptr; + case NVPTXISD::Suld1DI8Trap: + Opc = NVPTX::SULD_1D_I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI16Trap: + Opc = NVPTX::SULD_1D_I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DI32Trap: + Opc = NVPTX::SULD_1D_I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I8Trap: + Opc = NVPTX::SULD_1D_V2I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I16Trap: + Opc = NVPTX::SULD_1D_V2I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV2I32Trap: + Opc = NVPTX::SULD_1D_V2I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I8Trap: + Opc = NVPTX::SULD_1D_V4I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I16Trap: + Opc = NVPTX::SULD_1D_V4I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DV4I32Trap: + Opc = NVPTX::SULD_1D_V4I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI8Trap: + Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI16Trap: + Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayI32Trap: + Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I8Trap: + Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I16Trap: + Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV2I32Trap: + Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I8Trap: + Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I16Trap: + Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld1DArrayV4I32Trap: + Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI8Trap: + Opc = NVPTX::SULD_2D_I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI16Trap: + Opc = NVPTX::SULD_2D_I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DI32Trap: + Opc = NVPTX::SULD_2D_I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I8Trap: + Opc = NVPTX::SULD_2D_V2I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I16Trap: + Opc = NVPTX::SULD_2D_V2I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV2I32Trap: + Opc = NVPTX::SULD_2D_V2I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I8Trap: + Opc = NVPTX::SULD_2D_V4I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I16Trap: + Opc = NVPTX::SULD_2D_V4I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DV4I32Trap: + Opc = NVPTX::SULD_2D_V4I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI8Trap: + Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI16Trap: + Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayI32Trap: + Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I8Trap: + Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I16Trap: + Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV2I32Trap: + Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I8Trap: + Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I16Trap: + Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld2DArrayV4I32Trap: + Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI8Trap: + Opc = NVPTX::SULD_3D_I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI16Trap: + Opc = NVPTX::SULD_3D_I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DI32Trap: + Opc = NVPTX::SULD_3D_I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I8Trap: + Opc = NVPTX::SULD_3D_V2I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I16Trap: + Opc = NVPTX::SULD_3D_V2I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV2I32Trap: + Opc = NVPTX::SULD_3D_V2I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I8Trap: + Opc = NVPTX::SULD_3D_V4I8_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I16Trap: + Opc = NVPTX::SULD_3D_V4I16_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + case NVPTXISD::Suld3DV4I32Trap: + Opc = NVPTX::SULD_3D_V4I32_TRAP; + Ops.push_back(TexHandle); + Ops.push_back(N->getOperand(2)); + Ops.push_back(N->getOperand(3)); + Ops.push_back(N->getOperand(4)); + Ops.push_back(Chain); + break; + } + Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); + return Ret; +} + // SelectDirectAddr - Match a direct address for DAG. // A direct address could be a globaladdress or externalsymbol. bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { @@ -2464,14 +3052,18 @@ bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr, bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const { - const Value *Src = NULL; + const Value *Src = nullptr; // Even though MemIntrinsicSDNode is a subclas of MemSDNode, // the classof() for MemSDNode does not include MemIntrinsicSDNode // (See SelectionDAGNodes.h). So we need to check for both. if (MemSDNode *mN = dyn_cast(N)) { - Src = mN->getSrcValue(); + if (spN == 0 && mN->getMemOperand()->getPseudoValue()) + return true; + Src = mN->getMemOperand()->getValue(); } else if (MemSDNode *mN = dyn_cast(N)) { - Src = mN->getSrcValue(); + if (spN == 0 && mN->getMemOperand()->getPseudoValue()) + return true; + Src = mN->getMemOperand()->getValue(); } if (!Src) return false; diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 93ad169..11f92e7 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "nvptx-isel" - #include "NVPTX.h" #include "NVPTXISelLowering.h" #include "NVPTXRegisterInfo.h" @@ -46,19 +44,22 @@ public: CodeGenOpt::Level OptLevel); // Pass Name - virtual const char *getPassName() const { + const char *getPassName() const override { return "NVPTX DAG->DAG Pattern Instruction Selection"; } const NVPTXSubtarget &Subtarget; - virtual bool SelectInlineAsmMemoryOperand( - const SDValue &Op, char ConstraintCode, std::vector &OutOps); + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps) override; private: // Include the pieces autogenerated from the target description. #include "NVPTXGenDAGISel.inc" - SDNode *Select(SDNode *N); + SDNode *Select(SDNode *N) override; + SDNode *SelectIntrinsicNoChain(SDNode *N); + SDNode *SelectTexSurfHandle(SDNode *N); SDNode *SelectLoad(SDNode *N); SDNode *SelectLoadVector(SDNode *N); SDNode *SelectLDGLDUVector(SDNode *N); @@ -68,6 +69,8 @@ private: SDNode *SelectStoreRetval(SDNode *N); SDNode *SelectStoreParam(SDNode *N); SDNode *SelectAddrSpaceCast(SDNode *N); + SDNode *SelectTextureIntrinsic(SDNode *N); + SDNode *SelectSurfaceIntrinsic(SDNode *N); inline SDValue getI32Imm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, MVT::i32); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index 8e25a65..b0943be 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -75,7 +75,7 @@ static bool IsPTXVectorType(MVT VT) { /// LowerCall, and LowerReturn. static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty, SmallVectorImpl &ValueVTs, - SmallVectorImpl *Offsets = 0, + SmallVectorImpl *Offsets = nullptr, uint64_t StartingOffset = 0) { SmallVector TempVTs; SmallVector TempOffsets; @@ -245,7 +245,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: - return 0; + return nullptr; case NVPTXISD::CALL: return "NVPTXISD::CALL"; case NVPTXISD::RET_FLAG: @@ -328,6 +328,116 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "NVPTXISD::StoreV2"; case NVPTXISD::StoreV4: return "NVPTXISD::StoreV4"; + case NVPTXISD::Tex1DFloatI32: return "NVPTXISD::Tex1DFloatI32"; + case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; + case NVPTXISD::Tex1DFloatFloatLevel: + return "NVPTXISD::Tex1DFloatFloatLevel"; + case NVPTXISD::Tex1DFloatFloatGrad: + return "NVPTXISD::Tex1DFloatFloatGrad"; + case NVPTXISD::Tex1DI32I32: return "NVPTXISD::Tex1DI32I32"; + case NVPTXISD::Tex1DI32Float: return "NVPTXISD::Tex1DI32Float"; + case NVPTXISD::Tex1DI32FloatLevel: + return "NVPTXISD::Tex1DI32FloatLevel"; + case NVPTXISD::Tex1DI32FloatGrad: + return "NVPTXISD::Tex1DI32FloatGrad"; + case NVPTXISD::Tex1DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32"; + case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; + case NVPTXISD::Tex1DArrayFloatFloatLevel: + return "NVPTXISD::Tex2DArrayFloatFloatLevel"; + case NVPTXISD::Tex1DArrayFloatFloatGrad: + return "NVPTXISD::Tex2DArrayFloatFloatGrad"; + case NVPTXISD::Tex1DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32"; + case NVPTXISD::Tex1DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float"; + case NVPTXISD::Tex1DArrayI32FloatLevel: + return "NVPTXISD::Tex2DArrayI32FloatLevel"; + case NVPTXISD::Tex1DArrayI32FloatGrad: + return "NVPTXISD::Tex2DArrayI32FloatGrad"; + case NVPTXISD::Tex2DFloatI32: return "NVPTXISD::Tex2DFloatI32"; + case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat"; + case NVPTXISD::Tex2DFloatFloatLevel: + return "NVPTXISD::Tex2DFloatFloatLevel"; + case NVPTXISD::Tex2DFloatFloatGrad: + return "NVPTXISD::Tex2DFloatFloatGrad"; + case NVPTXISD::Tex2DI32I32: return "NVPTXISD::Tex2DI32I32"; + case NVPTXISD::Tex2DI32Float: return "NVPTXISD::Tex2DI32Float"; + case NVPTXISD::Tex2DI32FloatLevel: + return "NVPTXISD::Tex2DI32FloatLevel"; + case NVPTXISD::Tex2DI32FloatGrad: + return "NVPTXISD::Tex2DI32FloatGrad"; + case NVPTXISD::Tex2DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32"; + case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat"; + case NVPTXISD::Tex2DArrayFloatFloatLevel: + return "NVPTXISD::Tex2DArrayFloatFloatLevel"; + case NVPTXISD::Tex2DArrayFloatFloatGrad: + return "NVPTXISD::Tex2DArrayFloatFloatGrad"; + case NVPTXISD::Tex2DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32"; + case NVPTXISD::Tex2DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float"; + case NVPTXISD::Tex2DArrayI32FloatLevel: + return "NVPTXISD::Tex2DArrayI32FloatLevel"; + case NVPTXISD::Tex2DArrayI32FloatGrad: + return "NVPTXISD::Tex2DArrayI32FloatGrad"; + case NVPTXISD::Tex3DFloatI32: return "NVPTXISD::Tex3DFloatI32"; + case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat"; + case NVPTXISD::Tex3DFloatFloatLevel: + return "NVPTXISD::Tex3DFloatFloatLevel"; + case NVPTXISD::Tex3DFloatFloatGrad: + return "NVPTXISD::Tex3DFloatFloatGrad"; + case NVPTXISD::Tex3DI32I32: return "NVPTXISD::Tex3DI32I32"; + case NVPTXISD::Tex3DI32Float: return "NVPTXISD::Tex3DI32Float"; + case NVPTXISD::Tex3DI32FloatLevel: + return "NVPTXISD::Tex3DI32FloatLevel"; + case NVPTXISD::Tex3DI32FloatGrad: + return "NVPTXISD::Tex3DI32FloatGrad"; + + case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap"; + case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap"; + case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap"; + case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap"; + case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap"; + case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap"; + case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap"; + case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap"; + case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap"; + + case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap"; + case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap"; + case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap"; + case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap"; + case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap"; + case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap"; + case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap"; + case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap"; + case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap"; + + case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap"; + case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap"; + case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap"; + case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap"; + case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap"; + case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap"; + case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap"; + case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap"; + case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap"; + + case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap"; + case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap"; + case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap"; + case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap"; + case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap"; + case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap"; + case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap"; + case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap"; + case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap"; + + case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap"; + case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap"; + case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap"; + case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap"; + case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap"; + case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap"; + case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap"; + case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap"; + case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap"; } } @@ -526,7 +636,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; - ArgListTy &Args = CLI.Args; + ArgListTy &Args = CLI.getArgs(); Type *retTy = CLI.RetTy; ImmutableCallSite *CS = CLI.CS; @@ -575,7 +685,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, - DeclareParamOps, 5); + DeclareParamOps); InFlag = Chain.getValue(1); unsigned curOffset = 0; for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { @@ -599,7 +709,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getConstant(curOffset, MVT::i32), StVal, InFlag }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, - CopyParamVTs, &CopyParamOps[0], 5, + CopyParamVTs, CopyParamOps, elemtype, MachinePointerInfo()); InFlag = Chain.getValue(1); curOffset += sz / 8; @@ -621,7 +731,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, - DeclareParamOps, 5); + DeclareParamOps); InFlag = Chain.getValue(1); unsigned NumElts = ObjectVT.getVectorNumElements(); EVT EltVT = ObjectVT.getVectorElementType(); @@ -644,7 +754,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getConstant(0, MVT::i32), Elt, InFlag }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, - CopyParamVTs, &CopyParamOps[0], 5, + CopyParamVTs, CopyParamOps, MemVT, MachinePointerInfo()); InFlag = Chain.getValue(1); } else if (NumElts == 2) { @@ -661,7 +771,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getConstant(0, MVT::i32), Elt0, Elt1, InFlag }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl, - CopyParamVTs, &CopyParamOps[0], 6, + CopyParamVTs, CopyParamOps, MemVT, MachinePointerInfo()); InFlag = Chain.getValue(1); } else { @@ -735,9 +845,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(InFlag); SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, &Ops[0], - Ops.size(), MemVT, - MachinePointerInfo()); + Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops, + MemVT, MachinePointerInfo()); InFlag = Chain.getValue(1); curOffset += PerStoreOffset; } @@ -762,7 +871,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getConstant(sz, MVT::i32), DAG.getConstant(0, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs, - DeclareParamOps, 5); + DeclareParamOps); InFlag = Chain.getValue(1); SDValue OutV = OutVals[OIdx]; if (needExtend) { @@ -781,7 +890,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, opcode = NVPTXISD::StoreParamU32; else if (Outs[OIdx].Flags.isSExt()) opcode = NVPTXISD::StoreParamS32; - Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, 5, + Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, VT, MachinePointerInfo()); InFlag = Chain.getValue(1); @@ -806,7 +915,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, - DeclareParamOps, 5); + DeclareParamOps); InFlag = Chain.getValue(1); unsigned curOffset = 0; for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { @@ -834,7 +943,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getConstant(curOffset, MVT::i32), theVal, InFlag }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, - CopyParamOps, 5, elemtype, + CopyParamOps, elemtype, MachinePointerInfo()); InFlag = Chain.getValue(1); @@ -865,7 +974,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getConstant(resultsz, MVT::i32), DAG.getConstant(0, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs, - DeclareRetOps, 5); + DeclareRetOps); InFlag = Chain.getValue(1); } else { retAlignment = getArgumentAlignment(Callee, CS, retTy, 0); @@ -875,7 +984,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getConstant(resultsz / 8, MVT::i32), DAG.getConstant(0, MVT::i32), InFlag }; Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs, - DeclareRetOps, 5); + DeclareRetOps); InFlag = Chain.getValue(1); } } @@ -895,7 +1004,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue ProtoOps[] = { Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag, }; - Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, &ProtoOps[0], 3); + Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps); InFlag = Chain.getValue(1); } // Op to just print "call" @@ -904,20 +1013,20 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag }; Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall), - dl, PrintCallVTs, PrintCallOps, 3); + dl, PrintCallVTs, PrintCallOps); InFlag = Chain.getValue(1); // Ops to print out the function name SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CallVoidOps[] = { Chain, Callee, InFlag }; - Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3); + Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps); InFlag = Chain.getValue(1); // Ops to print out the param list SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CallArgBeginOps[] = { Chain, InFlag }; Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs, - CallArgBeginOps, 2); + CallArgBeginOps); InFlag = Chain.getValue(1); for (unsigned i = 0, e = paramCount; i != e; ++i) { @@ -929,21 +1038,20 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32), DAG.getConstant(i, MVT::i32), InFlag }; - Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4); + Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps); InFlag = Chain.getValue(1); } SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32), InFlag }; - Chain = - DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3); + Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps); InFlag = Chain.getValue(1); if (!Func) { SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32), InFlag }; - Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3); + Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps); InFlag = Chain.getValue(1); } @@ -962,7 +1070,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (NumElts == 1) { // Just a simple load - std::vector LoadRetVTs; + SmallVector LoadRetVTs; if (needTruncate) { // If loading i1 result, generate // load i16 @@ -972,15 +1080,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, LoadRetVTs.push_back(EltVT); LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue); - std::vector LoadRetOps; + SmallVector LoadRetOps; LoadRetOps.push_back(Chain); LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); LoadRetOps.push_back(InFlag); SDValue retval = DAG.getMemIntrinsicNode( NVPTXISD::LoadParam, dl, - DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0], - LoadRetOps.size(), EltVT, MachinePointerInfo()); + DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); Chain = retval.getValue(1); InFlag = retval.getValue(2); SDValue Ret0 = retval; @@ -989,7 +1096,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InVals.push_back(Ret0); } else if (NumElts == 2) { // LoadV2 - std::vector LoadRetVTs; + SmallVector LoadRetVTs; if (needTruncate) { // If loading i1 result, generate // load i16 @@ -1002,15 +1109,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } LoadRetVTs.push_back(MVT::Other); LoadRetVTs.push_back(MVT::Glue); - std::vector LoadRetOps; + SmallVector LoadRetOps; LoadRetOps.push_back(Chain); LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); LoadRetOps.push_back(DAG.getConstant(0, MVT::i32)); LoadRetOps.push_back(InFlag); SDValue retval = DAG.getMemIntrinsicNode( NVPTXISD::LoadParamV2, dl, - DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0], - LoadRetOps.size(), EltVT, MachinePointerInfo()); + DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo()); Chain = retval.getValue(2); InFlag = retval.getValue(3); SDValue Ret0 = retval.getValue(0); @@ -1054,8 +1160,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32)); LoadRetOps.push_back(InFlag); SDValue retval = DAG.getMemIntrinsicNode( - Opc, dl, DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), - &LoadRetOps[0], LoadRetOps.size(), EltVT, MachinePointerInfo()); + Opc, dl, DAG.getVTList(LoadRetVTs), + LoadRetOps, EltVT, MachinePointerInfo()); if (VecSize == 2) { Chain = retval.getValue(2); InFlag = retval.getValue(3); @@ -1110,8 +1216,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, LoadRetOps.push_back(InFlag); SDValue retval = DAG.getMemIntrinsicNode( NVPTXISD::LoadParam, dl, - DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0], - LoadRetOps.size(), TheLoadType, MachinePointerInfo()); + DAG.getVTList(LoadRetVTs), LoadRetOps, + TheLoadType, MachinePointerInfo()); Chain = retval.getValue(1); InFlag = retval.getValue(2); SDValue Ret0 = retval.getValue(0); @@ -1153,8 +1259,7 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { DAG.getIntPtrConstant(j))); } } - return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0], - Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops); } SDValue @@ -1209,7 +1314,7 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() // in LegalizeDAG.cpp which also uses MergeValues. SDValue Ops[] = { result, LD->getChain() }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { @@ -1297,7 +1402,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const { MemSDNode *MemSD = cast(N); SDValue NewSt = DAG.getMemIntrinsicNode( - Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(), + Opcode, DL, DAG.getVTList(MVT::Other), Ops, MemSD->getMemoryVT(), MemSD->getMemOperand()); //return DCI.CombineTo(N, NewSt, true); @@ -1429,7 +1534,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( if (isImageOrSamplerVal( theArgs[i], (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent() - : 0))) { + : nullptr))) { assert(isKernel && "Only kernels can have image/sampler params"); InVals.push_back(DAG.getConstant(i + 1, MVT::i32)); continue; @@ -1683,8 +1788,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( //} if (!OutChains.empty()) - DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0], - OutChains.size())); + DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains)); return Chain; } @@ -1726,7 +1830,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal); SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, - DAG.getVTList(MVT::Other), &Ops[0], 3, + DAG.getVTList(MVT::Other), Ops, EltVT, MachinePointerInfo()); } else if (NumElts == 2) { @@ -1742,7 +1846,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0, StoreVal1 }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl, - DAG.getVTList(MVT::Other), &Ops[0], 4, + DAG.getVTList(MVT::Other), Ops, EltVT, MachinePointerInfo()); } else { // V4 stores @@ -1814,8 +1918,8 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size()); Chain = - DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), &Ops[0], - Ops.size(), EltVT, MachinePointerInfo()); + DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops, + EltVT, MachinePointerInfo()); Offset += PerStoreOffset; } } @@ -1852,8 +1956,8 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SDValue Ops[] = { Chain, DAG.getConstant(SizeSoFar, MVT::i32), TmpVal }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, - DAG.getVTList(MVT::Other), &Ops[0], - 3, TheStoreType, + DAG.getVTList(MVT::Other), Ops, + TheStoreType, MachinePointerInfo()); if(TheValType.isVector()) SizeSoFar += @@ -1891,6 +1995,195 @@ bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const { return false; } +static unsigned getOpcForTextureInstr(unsigned Intrinsic) { + switch (Intrinsic) { + default: + return 0; + + case Intrinsic::nvvm_tex_1d_v4f32_i32: + return NVPTXISD::Tex1DFloatI32; + case Intrinsic::nvvm_tex_1d_v4f32_f32: + return NVPTXISD::Tex1DFloatFloat; + case Intrinsic::nvvm_tex_1d_level_v4f32_f32: + return NVPTXISD::Tex1DFloatFloatLevel; + case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: + return NVPTXISD::Tex1DFloatFloatGrad; + case Intrinsic::nvvm_tex_1d_v4i32_i32: + return NVPTXISD::Tex1DI32I32; + case Intrinsic::nvvm_tex_1d_v4i32_f32: + return NVPTXISD::Tex1DI32Float; + case Intrinsic::nvvm_tex_1d_level_v4i32_f32: + return NVPTXISD::Tex1DI32FloatLevel; + case Intrinsic::nvvm_tex_1d_grad_v4i32_f32: + return NVPTXISD::Tex1DI32FloatGrad; + + case Intrinsic::nvvm_tex_1d_array_v4f32_i32: + return NVPTXISD::Tex1DArrayFloatI32; + case Intrinsic::nvvm_tex_1d_array_v4f32_f32: + return NVPTXISD::Tex1DArrayFloatFloat; + case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: + return NVPTXISD::Tex1DArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: + return NVPTXISD::Tex1DArrayFloatFloatGrad; + case Intrinsic::nvvm_tex_1d_array_v4i32_i32: + return NVPTXISD::Tex1DArrayI32I32; + case Intrinsic::nvvm_tex_1d_array_v4i32_f32: + return NVPTXISD::Tex1DArrayI32Float; + case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32: + return NVPTXISD::Tex1DArrayI32FloatLevel; + case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32: + return NVPTXISD::Tex1DArrayI32FloatGrad; + + case Intrinsic::nvvm_tex_2d_v4f32_i32: + return NVPTXISD::Tex2DFloatI32; + case Intrinsic::nvvm_tex_2d_v4f32_f32: + return NVPTXISD::Tex2DFloatFloat; + case Intrinsic::nvvm_tex_2d_level_v4f32_f32: + return NVPTXISD::Tex2DFloatFloatLevel; + case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: + return NVPTXISD::Tex2DFloatFloatGrad; + case Intrinsic::nvvm_tex_2d_v4i32_i32: + return NVPTXISD::Tex2DI32I32; + case Intrinsic::nvvm_tex_2d_v4i32_f32: + return NVPTXISD::Tex2DI32Float; + case Intrinsic::nvvm_tex_2d_level_v4i32_f32: + return NVPTXISD::Tex2DI32FloatLevel; + case Intrinsic::nvvm_tex_2d_grad_v4i32_f32: + return NVPTXISD::Tex2DI32FloatGrad; + + case Intrinsic::nvvm_tex_2d_array_v4f32_i32: + return NVPTXISD::Tex2DArrayFloatI32; + case Intrinsic::nvvm_tex_2d_array_v4f32_f32: + return NVPTXISD::Tex2DArrayFloatFloat; + case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: + return NVPTXISD::Tex2DArrayFloatFloatLevel; + case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: + return NVPTXISD::Tex2DArrayFloatFloatGrad; + case Intrinsic::nvvm_tex_2d_array_v4i32_i32: + return NVPTXISD::Tex2DArrayI32I32; + case Intrinsic::nvvm_tex_2d_array_v4i32_f32: + return NVPTXISD::Tex2DArrayI32Float; + case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32: + return NVPTXISD::Tex2DArrayI32FloatLevel; + case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32: + return NVPTXISD::Tex2DArrayI32FloatGrad; + + case Intrinsic::nvvm_tex_3d_v4f32_i32: + return NVPTXISD::Tex3DFloatI32; + case Intrinsic::nvvm_tex_3d_v4f32_f32: + return NVPTXISD::Tex3DFloatFloat; + case Intrinsic::nvvm_tex_3d_level_v4f32_f32: + return NVPTXISD::Tex3DFloatFloatLevel; + case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: + return NVPTXISD::Tex3DFloatFloatGrad; + case Intrinsic::nvvm_tex_3d_v4i32_i32: + return NVPTXISD::Tex3DI32I32; + case Intrinsic::nvvm_tex_3d_v4i32_f32: + return NVPTXISD::Tex3DI32Float; + case Intrinsic::nvvm_tex_3d_level_v4i32_f32: + return NVPTXISD::Tex3DI32FloatLevel; + case Intrinsic::nvvm_tex_3d_grad_v4i32_f32: + return NVPTXISD::Tex3DI32FloatGrad; + } +} + +static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) { + switch (Intrinsic) { + default: + return 0; + case Intrinsic::nvvm_suld_1d_i8_trap: + return NVPTXISD::Suld1DI8Trap; + case Intrinsic::nvvm_suld_1d_i16_trap: + return NVPTXISD::Suld1DI16Trap; + case Intrinsic::nvvm_suld_1d_i32_trap: + return NVPTXISD::Suld1DI32Trap; + case Intrinsic::nvvm_suld_1d_v2i8_trap: + return NVPTXISD::Suld1DV2I8Trap; + case Intrinsic::nvvm_suld_1d_v2i16_trap: + return NVPTXISD::Suld1DV2I16Trap; + case Intrinsic::nvvm_suld_1d_v2i32_trap: + return NVPTXISD::Suld1DV2I32Trap; + case Intrinsic::nvvm_suld_1d_v4i8_trap: + return NVPTXISD::Suld1DV4I8Trap; + case Intrinsic::nvvm_suld_1d_v4i16_trap: + return NVPTXISD::Suld1DV4I16Trap; + case Intrinsic::nvvm_suld_1d_v4i32_trap: + return NVPTXISD::Suld1DV4I32Trap; + case Intrinsic::nvvm_suld_1d_array_i8_trap: + return NVPTXISD::Suld1DArrayI8Trap; + case Intrinsic::nvvm_suld_1d_array_i16_trap: + return NVPTXISD::Suld1DArrayI16Trap; + case Intrinsic::nvvm_suld_1d_array_i32_trap: + return NVPTXISD::Suld1DArrayI32Trap; + case Intrinsic::nvvm_suld_1d_array_v2i8_trap: + return NVPTXISD::Suld1DArrayV2I8Trap; + case Intrinsic::nvvm_suld_1d_array_v2i16_trap: + return NVPTXISD::Suld1DArrayV2I16Trap; + case Intrinsic::nvvm_suld_1d_array_v2i32_trap: + return NVPTXISD::Suld1DArrayV2I32Trap; + case Intrinsic::nvvm_suld_1d_array_v4i8_trap: + return NVPTXISD::Suld1DArrayV4I8Trap; + case Intrinsic::nvvm_suld_1d_array_v4i16_trap: + return NVPTXISD::Suld1DArrayV4I16Trap; + case Intrinsic::nvvm_suld_1d_array_v4i32_trap: + return NVPTXISD::Suld1DArrayV4I32Trap; + case Intrinsic::nvvm_suld_2d_i8_trap: + return NVPTXISD::Suld2DI8Trap; + case Intrinsic::nvvm_suld_2d_i16_trap: + return NVPTXISD::Suld2DI16Trap; + case Intrinsic::nvvm_suld_2d_i32_trap: + return NVPTXISD::Suld2DI32Trap; + case Intrinsic::nvvm_suld_2d_v2i8_trap: + return NVPTXISD::Suld2DV2I8Trap; + case Intrinsic::nvvm_suld_2d_v2i16_trap: + return NVPTXISD::Suld2DV2I16Trap; + case Intrinsic::nvvm_suld_2d_v2i32_trap: + return NVPTXISD::Suld2DV2I32Trap; + case Intrinsic::nvvm_suld_2d_v4i8_trap: + return NVPTXISD::Suld2DV4I8Trap; + case Intrinsic::nvvm_suld_2d_v4i16_trap: + return NVPTXISD::Suld2DV4I16Trap; + case Intrinsic::nvvm_suld_2d_v4i32_trap: + return NVPTXISD::Suld2DV4I32Trap; + case Intrinsic::nvvm_suld_2d_array_i8_trap: + return NVPTXISD::Suld2DArrayI8Trap; + case Intrinsic::nvvm_suld_2d_array_i16_trap: + return NVPTXISD::Suld2DArrayI16Trap; + case Intrinsic::nvvm_suld_2d_array_i32_trap: + return NVPTXISD::Suld2DArrayI32Trap; + case Intrinsic::nvvm_suld_2d_array_v2i8_trap: + return NVPTXISD::Suld2DArrayV2I8Trap; + case Intrinsic::nvvm_suld_2d_array_v2i16_trap: + return NVPTXISD::Suld2DArrayV2I16Trap; + case Intrinsic::nvvm_suld_2d_array_v2i32_trap: + return NVPTXISD::Suld2DArrayV2I32Trap; + case Intrinsic::nvvm_suld_2d_array_v4i8_trap: + return NVPTXISD::Suld2DArrayV4I8Trap; + case Intrinsic::nvvm_suld_2d_array_v4i16_trap: + return NVPTXISD::Suld2DArrayV4I16Trap; + case Intrinsic::nvvm_suld_2d_array_v4i32_trap: + return NVPTXISD::Suld2DArrayV4I32Trap; + case Intrinsic::nvvm_suld_3d_i8_trap: + return NVPTXISD::Suld3DI8Trap; + case Intrinsic::nvvm_suld_3d_i16_trap: + return NVPTXISD::Suld3DI16Trap; + case Intrinsic::nvvm_suld_3d_i32_trap: + return NVPTXISD::Suld3DI32Trap; + case Intrinsic::nvvm_suld_3d_v2i8_trap: + return NVPTXISD::Suld3DV2I8Trap; + case Intrinsic::nvvm_suld_3d_v2i16_trap: + return NVPTXISD::Suld3DV2I16Trap; + case Intrinsic::nvvm_suld_3d_v2i32_trap: + return NVPTXISD::Suld3DV2I32Trap; + case Intrinsic::nvvm_suld_3d_v4i8_trap: + return NVPTXISD::Suld3DV4I8Trap; + case Intrinsic::nvvm_suld_3d_v4i16_trap: + return NVPTXISD::Suld3DV4I16Trap; + case Intrinsic::nvvm_suld_3d_v4i32_trap: + return NVPTXISD::Suld3DV4I32Trap; + } +} + // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as // TgtMemIntrinsic // because we need the information that is only available in the "Value" type @@ -1944,6 +2237,142 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( Info.align = 0; return true; + case Intrinsic::nvvm_tex_1d_v4f32_i32: + case Intrinsic::nvvm_tex_1d_v4f32_f32: + case Intrinsic::nvvm_tex_1d_level_v4f32_f32: + case Intrinsic::nvvm_tex_1d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_1d_array_v4f32_i32: + case Intrinsic::nvvm_tex_1d_array_v4f32_f32: + case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32: + case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32: + case Intrinsic::nvvm_tex_2d_v4f32_i32: + case Intrinsic::nvvm_tex_2d_v4f32_f32: + case Intrinsic::nvvm_tex_2d_level_v4f32_f32: + case Intrinsic::nvvm_tex_2d_grad_v4f32_f32: + case Intrinsic::nvvm_tex_2d_array_v4f32_i32: + case Intrinsic::nvvm_tex_2d_array_v4f32_f32: + case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32: + case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32: + case Intrinsic::nvvm_tex_3d_v4f32_i32: + case Intrinsic::nvvm_tex_3d_v4f32_f32: + case Intrinsic::nvvm_tex_3d_level_v4f32_f32: + case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: { + Info.opc = getOpcForTextureInstr(Intrinsic); + Info.memVT = MVT::f32; + Info.ptrVal = nullptr; + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + } + case Intrinsic::nvvm_tex_1d_v4i32_i32: + case Intrinsic::nvvm_tex_1d_v4i32_f32: + case Intrinsic::nvvm_tex_1d_level_v4i32_f32: + case Intrinsic::nvvm_tex_1d_grad_v4i32_f32: + case Intrinsic::nvvm_tex_1d_array_v4i32_i32: + case Intrinsic::nvvm_tex_1d_array_v4i32_f32: + case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32: + case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32: + case Intrinsic::nvvm_tex_2d_v4i32_i32: + case Intrinsic::nvvm_tex_2d_v4i32_f32: + case Intrinsic::nvvm_tex_2d_level_v4i32_f32: + case Intrinsic::nvvm_tex_2d_grad_v4i32_f32: + case Intrinsic::nvvm_tex_2d_array_v4i32_i32: + case Intrinsic::nvvm_tex_2d_array_v4i32_f32: + case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32: + case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32: + case Intrinsic::nvvm_tex_3d_v4i32_i32: + case Intrinsic::nvvm_tex_3d_v4i32_f32: + case Intrinsic::nvvm_tex_3d_level_v4i32_f32: + case Intrinsic::nvvm_tex_3d_grad_v4i32_f32: { + Info.opc = getOpcForTextureInstr(Intrinsic); + Info.memVT = MVT::i32; + Info.ptrVal = nullptr; + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + } + case Intrinsic::nvvm_suld_1d_i8_trap: + case Intrinsic::nvvm_suld_1d_v2i8_trap: + case Intrinsic::nvvm_suld_1d_v4i8_trap: + case Intrinsic::nvvm_suld_1d_array_i8_trap: + case Intrinsic::nvvm_suld_1d_array_v2i8_trap: + case Intrinsic::nvvm_suld_1d_array_v4i8_trap: + case Intrinsic::nvvm_suld_2d_i8_trap: + case Intrinsic::nvvm_suld_2d_v2i8_trap: + case Intrinsic::nvvm_suld_2d_v4i8_trap: + case Intrinsic::nvvm_suld_2d_array_i8_trap: + case Intrinsic::nvvm_suld_2d_array_v2i8_trap: + case Intrinsic::nvvm_suld_2d_array_v4i8_trap: + case Intrinsic::nvvm_suld_3d_i8_trap: + case Intrinsic::nvvm_suld_3d_v2i8_trap: + case Intrinsic::nvvm_suld_3d_v4i8_trap: { + Info.opc = getOpcForSurfaceInstr(Intrinsic); + Info.memVT = MVT::i8; + Info.ptrVal = nullptr; + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + } + case Intrinsic::nvvm_suld_1d_i16_trap: + case Intrinsic::nvvm_suld_1d_v2i16_trap: + case Intrinsic::nvvm_suld_1d_v4i16_trap: + case Intrinsic::nvvm_suld_1d_array_i16_trap: + case Intrinsic::nvvm_suld_1d_array_v2i16_trap: + case Intrinsic::nvvm_suld_1d_array_v4i16_trap: + case Intrinsic::nvvm_suld_2d_i16_trap: + case Intrinsic::nvvm_suld_2d_v2i16_trap: + case Intrinsic::nvvm_suld_2d_v4i16_trap: + case Intrinsic::nvvm_suld_2d_array_i16_trap: + case Intrinsic::nvvm_suld_2d_array_v2i16_trap: + case Intrinsic::nvvm_suld_2d_array_v4i16_trap: + case Intrinsic::nvvm_suld_3d_i16_trap: + case Intrinsic::nvvm_suld_3d_v2i16_trap: + case Intrinsic::nvvm_suld_3d_v4i16_trap: { + Info.opc = getOpcForSurfaceInstr(Intrinsic); + Info.memVT = MVT::i16; + Info.ptrVal = nullptr; + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + } + case Intrinsic::nvvm_suld_1d_i32_trap: + case Intrinsic::nvvm_suld_1d_v2i32_trap: + case Intrinsic::nvvm_suld_1d_v4i32_trap: + case Intrinsic::nvvm_suld_1d_array_i32_trap: + case Intrinsic::nvvm_suld_1d_array_v2i32_trap: + case Intrinsic::nvvm_suld_1d_array_v4i32_trap: + case Intrinsic::nvvm_suld_2d_i32_trap: + case Intrinsic::nvvm_suld_2d_v2i32_trap: + case Intrinsic::nvvm_suld_2d_v4i32_trap: + case Intrinsic::nvvm_suld_2d_array_i32_trap: + case Intrinsic::nvvm_suld_2d_array_v2i32_trap: + case Intrinsic::nvvm_suld_2d_array_v4i32_trap: + case Intrinsic::nvvm_suld_3d_i32_trap: + case Intrinsic::nvvm_suld_3d_v2i32_trap: + case Intrinsic::nvvm_suld_3d_v4i32_trap: { + Info.opc = getOpcForSurfaceInstr(Intrinsic); + Info.memVT = MVT::i32; + Info.ptrVal = nullptr; + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = false; + Info.align = 16; + return true; + } + } return false; } @@ -2094,7 +2523,7 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, case 4: { Opcode = NVPTXISD::LoadV4; EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; - LdResVTs = DAG.getVTList(ListVTs, 5); + LdResVTs = DAG.getVTList(ListVTs); break; } } @@ -2111,8 +2540,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, // pass along the extension information OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType())); - SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0], - OtherOps.size(), LD->getMemoryVT(), + SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, + LD->getMemoryVT(), LD->getMemOperand()); SmallVector ScalarRes; @@ -2126,8 +2555,7 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, SDValue LoadChain = NewLD.getValue(NumElts); - SDValue BuildVec = - DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); + SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes); Results.push_back(BuildVec); Results.push_back(LoadChain); @@ -2207,7 +2635,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, break; } EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other }; - LdResVTs = DAG.getVTList(ListVTs, 5); + LdResVTs = DAG.getVTList(ListVTs); break; } } @@ -2224,9 +2652,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, MemIntrinsicSDNode *MemSD = cast(N); - SDValue NewLD = DAG.getMemIntrinsicNode( - Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(), - MemSD->getMemoryVT(), MemSD->getMemOperand()); + SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps, + MemSD->getMemoryVT(), + MemSD->getMemOperand()); SmallVector ScalarRes; @@ -2241,7 +2669,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, SDValue LoadChain = NewLD.getValue(NumElts); SDValue BuildVec = - DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts); + DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes); Results.push_back(BuildVec); Results.push_back(LoadChain); @@ -2263,8 +2691,8 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG, // We make sure the memory type is i8, which will be used during isel // to select the proper instruction. SDValue NewLD = - DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0], - Ops.size(), MVT::i8, MemSD->getMemOperand()); + DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops, + MVT::i8, MemSD->getMemOperand()); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, NewLD.getValue(0))); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index c1e8c21..7bad8a2 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -70,7 +70,100 @@ enum NodeType { StoreParamU32, // to zext and store a <32bit value, not used currently StoreRetval, StoreRetvalV2, - StoreRetvalV4 + StoreRetvalV4, + + // Texture intrinsics + Tex1DFloatI32, + Tex1DFloatFloat, + Tex1DFloatFloatLevel, + Tex1DFloatFloatGrad, + Tex1DI32I32, + Tex1DI32Float, + Tex1DI32FloatLevel, + Tex1DI32FloatGrad, + Tex1DArrayFloatI32, + Tex1DArrayFloatFloat, + Tex1DArrayFloatFloatLevel, + Tex1DArrayFloatFloatGrad, + Tex1DArrayI32I32, + Tex1DArrayI32Float, + Tex1DArrayI32FloatLevel, + Tex1DArrayI32FloatGrad, + Tex2DFloatI32, + Tex2DFloatFloat, + Tex2DFloatFloatLevel, + Tex2DFloatFloatGrad, + Tex2DI32I32, + Tex2DI32Float, + Tex2DI32FloatLevel, + Tex2DI32FloatGrad, + Tex2DArrayFloatI32, + Tex2DArrayFloatFloat, + Tex2DArrayFloatFloatLevel, + Tex2DArrayFloatFloatGrad, + Tex2DArrayI32I32, + Tex2DArrayI32Float, + Tex2DArrayI32FloatLevel, + Tex2DArrayI32FloatGrad, + Tex3DFloatI32, + Tex3DFloatFloat, + Tex3DFloatFloatLevel, + Tex3DFloatFloatGrad, + Tex3DI32I32, + Tex3DI32Float, + Tex3DI32FloatLevel, + Tex3DI32FloatGrad, + + // Surface intrinsics + Suld1DI8Trap, + Suld1DI16Trap, + Suld1DI32Trap, + Suld1DV2I8Trap, + Suld1DV2I16Trap, + Suld1DV2I32Trap, + Suld1DV4I8Trap, + Suld1DV4I16Trap, + Suld1DV4I32Trap, + + Suld1DArrayI8Trap, + Suld1DArrayI16Trap, + Suld1DArrayI32Trap, + Suld1DArrayV2I8Trap, + Suld1DArrayV2I16Trap, + Suld1DArrayV2I32Trap, + Suld1DArrayV4I8Trap, + Suld1DArrayV4I16Trap, + Suld1DArrayV4I32Trap, + + Suld2DI8Trap, + Suld2DI16Trap, + Suld2DI32Trap, + Suld2DV2I8Trap, + Suld2DV2I16Trap, + Suld2DV2I32Trap, + Suld2DV4I8Trap, + Suld2DV4I16Trap, + Suld2DV4I32Trap, + + Suld2DArrayI8Trap, + Suld2DArrayI16Trap, + Suld2DArrayI32Trap, + Suld2DArrayV2I8Trap, + Suld2DArrayV2I16Trap, + Suld2DArrayV2I32Trap, + Suld2DArrayV4I8Trap, + Suld2DArrayV4I16Trap, + Suld2DArrayV4I32Trap, + + Suld3DI8Trap, + Suld3DI16Trap, + Suld3DI32Trap, + Suld3DV2I8Trap, + Suld3DV2I16Trap, + Suld3DV2I32Trap, + Suld3DV4I8Trap, + Suld3DV4I16Trap, + Suld3DV4I32Trap }; } @@ -80,68 +173,70 @@ enum NodeType { class NVPTXTargetLowering : public TargetLowering { public: explicit NVPTXTargetLowering(NVPTXTargetMachine &TM); - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset, SelectionDAG &DAG) const; - virtual const char *getTargetNodeName(unsigned Opcode) const; + const char *getTargetNodeName(unsigned Opcode) const override; bool isTypeSupportedInIntrinsic(MVT VT) const; bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, - unsigned Intrinsic) const; + unsigned Intrinsic) const override; /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type /// Used to guide target specific optimizations, like loop strength /// reduction (LoopStrengthReduce.cpp) and memory optimization for /// address mode (CodeGenPrepare.cpp) - virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; /// getFunctionAlignment - Return the Log2 alignment of this function. - virtual unsigned getFunctionAlignment(const Function *F) const; + unsigned getFunctionAlignment(const Function *F) const; - virtual EVT getSetCCResultType(LLVMContext &, EVT VT) const { + EVT getSetCCResultType(LLVMContext &, EVT VT) const override { if (VT.isVector()) return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); return MVT::i1; } - ConstraintType getConstraintType(const std::string &Constraint) const; + ConstraintType + getConstraintType(const std::string &Constraint) const override; std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; + getRegForInlineAsmConstraint(const std::string &Constraint, + MVT VT) const override; - virtual SDValue LowerFormalArguments( + SDValue LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; + SmallVectorImpl &InVals) const override; - virtual SDValue - LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; + SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; std::string getPrototype(Type *, const ArgListTy &, const SmallVectorImpl &, unsigned retAlignment, const ImmutableCallSite *CS) const; - virtual SDValue + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, SDLoc dl, - SelectionDAG &DAG) const; + SelectionDAG &DAG) const override; - virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, - std::vector &Ops, - SelectionDAG &DAG) const; + void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const override; NVPTXTargetMachine *nvTM; // PTX always uses 32-bit shift amounts - virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } - virtual bool shouldSplitVectorType(EVT VT) const override; + bool shouldSplitVectorType(EVT VT) const override; private: const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here @@ -160,8 +255,8 @@ private: SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const; - virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, - SelectionDAG &DAG) const; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const override; unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS, Type *Ty, unsigned Idx) const; diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp new file mode 100644 index 0000000..397f4bc --- /dev/null +++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp @@ -0,0 +1,178 @@ +//===-- NVPTXImageOptimizer.cpp - Image optimization pass -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements IR-level optimizations of image access code, +// including: +// +// 1. Eliminate istypep intrinsics when image access qualifier is known +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXUtilities.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/ConstantFolding.h" + +using namespace llvm; + +namespace { +class NVPTXImageOptimizer : public FunctionPass { +private: + static char ID; + SmallVector InstrToDelete; + +public: + NVPTXImageOptimizer(); + + bool runOnFunction(Function &F) override; + +private: + bool replaceIsTypePSampler(Instruction &I); + bool replaceIsTypePSurface(Instruction &I); + bool replaceIsTypePTexture(Instruction &I); + Value *cleanupValue(Value *V); + void replaceWith(Instruction *From, ConstantInt *To); +}; +} + +char NVPTXImageOptimizer::ID = 0; + +NVPTXImageOptimizer::NVPTXImageOptimizer() + : FunctionPass(ID) {} + +bool NVPTXImageOptimizer::runOnFunction(Function &F) { + bool Changed = false; + InstrToDelete.clear(); + + // Look for call instructions in the function + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; + ++BI) { + for (BasicBlock::iterator I = (*BI).begin(), E = (*BI).end(); + I != E; ++I) { + Instruction &Instr = *I; + if (CallInst *CI = dyn_cast(I)) { + Function *CalledF = CI->getCalledFunction(); + if (CalledF && CalledF->isIntrinsic()) { + // This is an intrinsic function call, check if its an istypep + switch (CalledF->getIntrinsicID()) { + default: break; + case Intrinsic::nvvm_istypep_sampler: + Changed |= replaceIsTypePSampler(Instr); + break; + case Intrinsic::nvvm_istypep_surface: + Changed |= replaceIsTypePSurface(Instr); + break; + case Intrinsic::nvvm_istypep_texture: + Changed |= replaceIsTypePTexture(Instr); + break; + } + } + } + } + } + + // Delete any istypep instances we replaced in the IR + for (unsigned i = 0, e = InstrToDelete.size(); i != e; ++i) + InstrToDelete[i]->eraseFromParent(); + + return Changed; +} + +bool NVPTXImageOptimizer::replaceIsTypePSampler(Instruction &I) { + Value *TexHandle = cleanupValue(I.getOperand(0)); + if (isSampler(*TexHandle)) { + // This is an OpenCL sampler, so it must be a samplerref + replaceWith(&I, ConstantInt::getTrue(I.getContext())); + return true; + } else if (isImageWriteOnly(*TexHandle) || + isImageReadWrite(*TexHandle) || + isImageReadOnly(*TexHandle)) { + // This is an OpenCL image, so it cannot be a samplerref + replaceWith(&I, ConstantInt::getFalse(I.getContext())); + return true; + } else { + // The image type is unknown, so we cannot eliminate the intrinsic + return false; + } +} + +bool NVPTXImageOptimizer::replaceIsTypePSurface(Instruction &I) { + Value *TexHandle = cleanupValue(I.getOperand(0)); + if (isImageReadWrite(*TexHandle) || + isImageWriteOnly(*TexHandle)) { + // This is an OpenCL read-only/read-write image, so it must be a surfref + replaceWith(&I, ConstantInt::getTrue(I.getContext())); + return true; + } else if (isImageReadOnly(*TexHandle) || + isSampler(*TexHandle)) { + // This is an OpenCL read-only/ imageor sampler, so it cannot be + // a surfref + replaceWith(&I, ConstantInt::getFalse(I.getContext())); + return true; + } else { + // The image type is unknown, so we cannot eliminate the intrinsic + return false; + } +} + +bool NVPTXImageOptimizer::replaceIsTypePTexture(Instruction &I) { + Value *TexHandle = cleanupValue(I.getOperand(0)); + if (isImageReadOnly(*TexHandle)) { + // This is an OpenCL read-only image, so it must be a texref + replaceWith(&I, ConstantInt::getTrue(I.getContext())); + return true; + } else if (isImageWriteOnly(*TexHandle) || + isImageReadWrite(*TexHandle) || + isSampler(*TexHandle)) { + // This is an OpenCL read-write/write-only image or a sampler, so it + // cannot be a texref + replaceWith(&I, ConstantInt::getFalse(I.getContext())); + return true; + } else { + // The image type is unknown, so we cannot eliminate the intrinsic + return false; + } +} + +void NVPTXImageOptimizer::replaceWith(Instruction *From, ConstantInt *To) { + // We implement "poor man's DCE" here to make sure any code that is no longer + // live is actually unreachable and can be trivially eliminated by the + // unreachable block elimiation pass. + for (CallInst::use_iterator UI = From->use_begin(), UE = From->use_end(); + UI != UE; ++UI) { + if (BranchInst *BI = dyn_cast(*UI)) { + if (BI->isUnconditional()) continue; + BasicBlock *Dest; + if (To->isZero()) + // Get false block + Dest = BI->getSuccessor(1); + else + // Get true block + Dest = BI->getSuccessor(0); + BranchInst::Create(Dest, BI); + InstrToDelete.push_back(BI); + } + } + From->replaceAllUsesWith(To); + InstrToDelete.push_back(From); +} + +Value *NVPTXImageOptimizer::cleanupValue(Value *V) { + if (ExtractValueInst *EVI = dyn_cast(V)) { + return cleanupValue(EVI->getAggregateOperand()); + } + return V; +} + +FunctionPass *llvm::createNVPTXImageOptimizerPass() { + return new NVPTXImageOptimizer(); +} diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 86ddd38..cdc8088 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -14,8 +14,6 @@ #include "NVPTX.h" #include "NVPTXInstrInfo.h" #include "NVPTXTargetMachine.h" -#define GET_INSTRINFO_CTOR_DTOR -#include "NVPTXGenInstrInfo.inc" #include "llvm/IR/Function.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFunction.h" @@ -24,6 +22,9 @@ using namespace llvm; +#define GET_INSTRINFO_CTOR_DTOR +#include "NVPTXGenInstrInfo.inc" + // Pin the vtable to this file. void NVPTXInstrInfo::anchor() {} @@ -256,7 +257,7 @@ unsigned NVPTXInstrInfo::InsertBranch( "NVPTX branch conditions have two components!"); // One-way branch. - if (FBB == 0) { + if (!FBB) { if (Cond.empty()) // Unconditional branch BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB); else // Conditional branch diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h index 600fc5c..88a9e45 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -30,7 +30,7 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo { public: explicit NVPTXInstrInfo(NVPTXTargetMachine &TM); - virtual const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; } + const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; } /* The following virtual functions are used in register allocation. * They are not implemented because the existing interface and the logic @@ -50,9 +50,9 @@ public: * const TargetRegisterClass *RC) const; */ - virtual void copyPhysReg( + void copyPhysReg( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, bool KillSrc) const; + unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DestReg) const; bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const; @@ -61,13 +61,13 @@ public: virtual bool CanTailMerge(const MachineInstr *MI) const; // Branch analysis. - virtual bool AnalyzeBranch( + bool AnalyzeBranch( MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, bool AllowModify) const; - virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - virtual unsigned InsertBranch( + SmallVectorImpl &Cond, bool AllowModify) const override; + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; + unsigned InsertBranch( MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, DebugLoc DL) const; + const SmallVectorImpl &Cond, DebugLoc DL) const override; unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const { return MI.getOperand(2).getImm(); } diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 14049b1..5e228fc 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1666,6 +1666,9 @@ def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen (MoveParam texternalsym:$src)))), (nvvm_move_ptr32 texternalsym:$src)>; +def texsurf_handles + : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), + "mov.u64 \t$result, $src;", []>; //----------------------------------- // Compiler Error Warn @@ -1686,6 +1689,1826 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), [(int_nvvm_compiler_error Int64Regs:$a)]>; +//----------------------------------- +// Texture Intrinsics +//----------------------------------- + +// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be +// also defined in NVPTXReplaceImageHandles.cpp + + +// Texture fetch instructions using handles +def TEX_1D_F32_I32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), + "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", + []>; +def TEX_1D_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), + "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", + []>; +def TEX_1D_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod), + "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x\\}], $lod;", + []>; +def TEX_1D_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; +def TEX_1D_I32_I32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x), + "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", + []>; +def TEX_1D_I32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x), + "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", + []>; +def TEX_1D_I32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x\\}], $lod;", + []>; +def TEX_1D_I32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; + +def TEX_1D_ARRAY_F32_I32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}];", + []>; +def TEX_1D_ARRAY_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), + "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}];", + []>; +def TEX_1D_ARRAY_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}], $lod;", + []>; +def TEX_1D_ARRAY_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; +def TEX_1D_ARRAY_I32_I32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}];", + []>; +def TEX_1D_ARRAY_I32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x), + "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}];", + []>; +def TEX_1D_ARRAY_I32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$lod), + "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}], $lod;", + []>; +def TEX_1D_ARRAY_I32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$gradx, Float32Regs:$grady), + "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", + []>; + +def TEX_2D_F32_I32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TEX_2D_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TEX_2D_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$lod), + "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}], $lod;", + []>; +def TEX_2D_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; +def TEX_2D_I32_I32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TEX_2D_I32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y), + "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}];", + []>; +def TEX_2D_I32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$lod), + "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}], $lod;", + []>; +def TEX_2D_I32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; + +def TEX_2D_ARRAY_F32_I32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$y), + "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_2D_ARRAY_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y), + "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_2D_ARRAY_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, Float32Regs:$lod), + "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", + []>; +def TEX_2D_ARRAY_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; +def TEX_2D_ARRAY_I32_I32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$y), + "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_2D_ARRAY_I32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y), + "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}];", + []>; +def TEX_2D_ARRAY_I32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, Float32Regs:$lod), + "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;", + []>; +def TEX_2D_ARRAY_I32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x, + Float32Regs:$y, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$grady0, Float32Regs:$grady1), + "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, " + "\\{$grady0, $grady1\\};", + []>; + +def TEX_3D_F32_I32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$z), + "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_3D_F32_F32 + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z), + "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_3D_F32_F32_LEVEL + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, Float32Regs:$lod), + "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", + []>; +def TEX_3D_F32_F32_GRAD + : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g, + Float32Regs:$b, Float32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$gradx2, Float32Regs:$grady0, + Float32Regs:$grady1, Float32Regs:$grady2), + "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], " + "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " + "\\{$grady0, $grady1, $grady2, $grady2\\};", + []>; +def TEX_3D_I32_I32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$z), + "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_3D_I32_F32 + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z), + "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}];", + []>; +def TEX_3D_I32_F32_LEVEL + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, Float32Regs:$lod), + "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;", + []>; +def TEX_3D_I32_F32_GRAD + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y, + Float32Regs:$z, + Float32Regs:$gradx0, Float32Regs:$gradx1, + Float32Regs:$gradx2, Float32Regs:$grady0, + Float32Regs:$grady1, Float32Regs:$grady2), + "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, " + "[$t, $s, \\{$x, $y, $z, $z\\}], " + "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, " + "\\{$grady0, $grady1, $grady2, $grady2\\};", + []>; + + +// Surface load instructions +def SULD_1D_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; +def SULD_1D_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x), + "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", + []>; + +def SULD_1D_ARRAY_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; +def SULD_1D_ARRAY_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x), + "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x\\}];", + []>; + +def SULD_2D_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; +def SULD_2D_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y), + "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", + []>; + +def SULD_2D_ARRAY_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; +def SULD_2D_ARRAY_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y), + "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$l, $x, $y, $y\\}];", + []>; + +def SULD_3D_I8_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I16_TRAP + : NVPTXInst<(outs Int16Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_I32_TRAP + : NVPTXInst<(outs Int32Regs:$r), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V2I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V4I8_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V4I16_TRAP + : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; +def SULD_3D_V4I32_TRAP + : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z), + "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, " + "[$s, \\{$x, $y, $z, $z\\}];", + []>; + + +//----------------------------------- +// Texture Query Intrinsics +//----------------------------------- +def TXQ_CHANNEL_ORDER + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.channel_order.b32 \t$d, [$a];", + []>; +def TXQ_CHANNEL_DATA_TYPE + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.channel_data_type.b32 \t$d, [$a];", + []>; +def TXQ_WIDTH + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.width.b32 \t$d, [$a];", + []>; +def TXQ_HEIGHT + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.height.b32 \t$d, [$a];", + []>; +def TXQ_DEPTH + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.depth.b32 \t$d, [$a];", + []>; +def TXQ_ARRAY_SIZE + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.array_size.b32 \t$d, [$a];", + []>; +def TXQ_NUM_SAMPLES + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.num_samples.b32 \t$d, [$a];", + []>; +def TXQ_NUM_MIPMAP_LEVELS + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "txq.num_mipmap_levels.b32 \t$d, [$a];", + []>; + +def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a), + (TXQ_CHANNEL_ORDER Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a), + (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_width Int64Regs:$a), + (TXQ_WIDTH Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_height Int64Regs:$a), + (TXQ_HEIGHT Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_depth Int64Regs:$a), + (TXQ_DEPTH Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_array_size Int64Regs:$a), + (TXQ_ARRAY_SIZE Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a), + (TXQ_NUM_SAMPLES Int64Regs:$a)>; +def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a), + (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>; + + +//----------------------------------- +// Surface Query Intrinsics +//----------------------------------- +def SUQ_CHANNEL_ORDER + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.channel_order.b32 \t$d, [$a];", + []>; +def SUQ_CHANNEL_DATA_TYPE + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.channel_data_type.b32 \t$d, [$a];", + []>; +def SUQ_WIDTH + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.width.b32 \t$d, [$a];", + []>; +def SUQ_HEIGHT + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.height.b32 \t$d, [$a];", + []>; +def SUQ_DEPTH + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.depth.b32 \t$d, [$a];", + []>; +def SUQ_ARRAY_SIZE + : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), + "suq.array_size.b32 \t$d, [$a];", + []>; + +def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a), + (SUQ_CHANNEL_ORDER Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a), + (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_width Int64Regs:$a), + (SUQ_WIDTH Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_height Int64Regs:$a), + (SUQ_HEIGHT Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_depth Int64Regs:$a), + (SUQ_DEPTH Int64Regs:$a)>; +def : Pat<(int_nvvm_suq_array_size Int64Regs:$a), + (SUQ_ARRAY_SIZE Int64Regs:$a)>; + + +//===- Handle Query -------------------------------------------------------===// + +// TODO: These intrinsics are not yet finalized, pending PTX ISA design work +def ISTYPEP_SAMPLER + : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + "istypep.samplerref \t$d, $a;", + [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>; +def ISTYPEP_SURFACE + : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + "istypep.surfref \t$d, $a;", + [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>; +def ISTYPEP_TEXTURE + : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + "istypep.texref \t$d, $a;", + [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>; + +//===- Surface Stores -----------------------------------------------------===// + +// Unformatted + +def SUST_B_1D_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), + "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_B_1D_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, + Int16Regs:$b, Int16Regs:$a), + "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, + Int16Regs:$b, Int16Regs:$a), + "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_1D_ARRAY_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), + "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), + "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), + "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_B_1D_ARRAY_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g), + "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g), + "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, + Int32Regs:$g), + "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_B_1D_ARRAY_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_ARRAY_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_1D_ARRAY_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, + Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_2D_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g), + "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g), + "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g), + "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_B_2D_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_2D_ARRAY_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r), + "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r), + "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r), + "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_B_2D_ARRAY_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g), + "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_2D_ARRAY_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_ARRAY_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_2D_ARRAY_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_B_3D_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r), + "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_B_3D_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g), + "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_B_3D_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_3D_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_B_3D_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + +// Formatted + +def SUST_P_1D_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_P_1D_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_P_1D_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), + "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};", + []>; +def SUST_P_1D_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_P_1D_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_P_1D_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};", + []>; +def SUST_P_1D_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, + Int16Regs:$b, Int16Regs:$a), + "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +def SUST_P_1D_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g, + Int16Regs:$b, Int16Regs:$a), + "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; +def SUST_P_1D_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g, + Int32Regs:$b, Int32Regs:$a), + "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", + []>; + + +def SUST_P_1D_ARRAY_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), + "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_P_1D_ARRAY_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r), + "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_P_1D_ARRAY_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r), + "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};", + []>; +def SUST_P_1D_ARRAY_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g), + "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_P_1D_ARRAY_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g), + "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_P_1D_ARRAY_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, + Int32Regs:$g), + "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", + []>; +def SUST_P_1D_ARRAY_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_P_1D_ARRAY_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_P_1D_ARRAY_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r, + Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_P_2D_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_P_2D_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_P_2D_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};", + []>; +def SUST_P_2D_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g), + "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_P_2D_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g), + "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_P_2D_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g), + "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", + []>; +def SUST_P_2D_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_P_2D_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, + Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_P_2D_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_P_2D_ARRAY_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r), + "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_P_2D_ARRAY_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r), + "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_P_2D_ARRAY_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r), + "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", + []>; +def SUST_P_2D_ARRAY_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_P_2D_ARRAY_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_P_2D_ARRAY_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g), + "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g\\};", + []>; +def SUST_P_2D_ARRAY_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_P_2D_ARRAY_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_P_2D_ARRAY_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +def SUST_P_3D_B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_P_3D_B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_P_3D_B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r), + "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", + []>; +def SUST_P_3D_V2B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_P_3D_V2B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_P_3D_V2B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g), + "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g\\};", + []>; +def SUST_P_3D_V4B8_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_P_3D_V4B16_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; +def SUST_P_3D_V4B32_TRAP + : NVPTXInst<(outs), + (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], " + "\\{$r, $g, $b, $a\\};", + []>; + + +// Surface store instruction patterns +// I'm not sure why we can't just include these in the instruction definitions, +// but TableGen complains of type errors :( + +def : Pat<(int_nvvm_sust_b_1d_i8_trap + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i16_trap + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), + (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i8_trap + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i16_trap + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v2i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i8_trap + Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i16_trap + Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_v4i32_trap + Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_1d_array_i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), + (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), + (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_2d_i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v2i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), + (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_v4i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_2d_array_i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g), + (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_b_3d_i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + (SUST_B_3D_B8_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + (SUST_B_3D_B16_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r), + (SUST_B_3D_B32_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_3D_V2B8_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + (SUST_B_3D_V2B16_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v2i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g), + (SUST_B_3D_V2B32_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_3D_V4B8_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_B_3D_V4B16_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_b_3d_v4i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_B_3D_V4B32_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + + +def : Pat<(int_nvvm_sust_p_1d_i8_trap + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_i16_trap + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), + (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), + (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_v2i8_trap + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_v2i16_trap + Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_v2i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_v4i8_trap + Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_1d_v4i16_trap + Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_1d_v4i32_trap + Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_p_1d_array_i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), + (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_array_i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), + (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_array_i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), + (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), + (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), + (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_p_2d_i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_v2i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), + (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_v2i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), + (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_v2i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), + (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_v4i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_2d_v4i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_2d_v4i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_p_2d_array_i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_array_i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), + (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_array_i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), + (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g), + (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, + Int32Regs:$g), + (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s, + Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap + Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, + Int32Regs:$x, Int32Regs:$y, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + + +def : Pat<(int_nvvm_sust_p_3d_i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + (SUST_P_3D_B8_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_3d_i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r), + (SUST_P_3D_B16_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_3d_i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r), + (SUST_P_3D_B32_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r)>; + +def : Pat<(int_nvvm_sust_p_3d_v2i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + (SUST_P_3D_V2B8_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_3d_v2i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g), + (SUST_P_3D_V2B16_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_3d_v2i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g), + (SUST_P_3D_V2B32_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g)>; + +def : Pat<(int_nvvm_sust_p_3d_v4i8_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_3D_V4B8_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_3d_v4i16_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), + (SUST_P_3D_V4B16_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; + +def : Pat<(int_nvvm_sust_p_3d_v4i32_trap + Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), + (SUST_P_3D_V4B32_TRAP Int64Regs:$s, + Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, + Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; + + //===-- Old PTX Back-end Intrinsics ---------------------------------------===// diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h index c9aa87d..5ec1fc9 100644 --- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h +++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h @@ -27,17 +27,17 @@ struct NVPTXLowerAggrCopies : public FunctionPass { NVPTXLowerAggrCopies() : FunctionPass(ID) {} - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved("stack-protector"); AU.addPreserved(); } - virtual bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; static const unsigned MaxAggrCopySize = 128; - virtual const char *getPassName() const { + const char *getPassName() const override { return "Lower aggregate copies/intrinsics into loops"; } }; diff --git a/lib/Target/NVPTX/NVPTXMCExpr.cpp b/lib/Target/NVPTX/NVPTXMCExpr.cpp index ca24764..137248b 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.cpp +++ b/lib/Target/NVPTX/NVPTXMCExpr.cpp @@ -7,13 +7,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "nvptx-mcexpr" #include "NVPTXMCExpr.h" #include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" using namespace llvm; +#define DEBUG_TYPE "nvptx-mcexpr" + const NVPTXFloatMCExpr* NVPTXFloatMCExpr::Create(VariantKind Kind, APFloat Flt, MCContext &Ctx) { return new (Ctx) NVPTXFloatMCExpr(Kind, Flt); diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h index 0efb231..0ee018c 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.h +++ b/lib/Target/NVPTX/NVPTXMCExpr.h @@ -61,18 +61,18 @@ public: /// @} - void PrintImpl(raw_ostream &OS) const; + void PrintImpl(raw_ostream &OS) const override; bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const { + const MCAsmLayout *Layout) const override { return false; } - void AddValueSymbols(MCAssembler *) const {}; - const MCSection *FindAssociatedSection() const { - return NULL; + void AddValueSymbols(MCAssembler *) const override {}; + const MCSection *FindAssociatedSection() const override { + return nullptr; } // There are no TLS NVPTXMCExprs at the moment. - void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {} + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; diff --git a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h new file mode 100644 index 0000000..67fb390 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h @@ -0,0 +1,46 @@ +//===-- NVPTXMachineFunctionInfo.h - NVPTX-specific Function Info --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class is attached to a MachineFunction instance and tracks target- +// dependent information +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { +class NVPTXMachineFunctionInfo : public MachineFunctionInfo { +private: + /// Stores a mapping from index to symbol name for removing image handles + /// on Fermi. + SmallVector ImageHandleList; + +public: + NVPTXMachineFunctionInfo(MachineFunction &MF) {} + + /// Returns the index for the symbol \p Symbol. If the symbol was previously, + /// added, the same index is returned. Otherwise, the symbol is added and the + /// new index is returned. + unsigned getImageHandleSymbolIndex(const char *Symbol) { + // Is the symbol already present? + for (unsigned i = 0, e = ImageHandleList.size(); i != e; ++i) + if (ImageHandleList[i] == std::string(Symbol)) + return i; + // Nope, insert it + ImageHandleList.push_back(Symbol); + return ImageHandleList.size()-1; + } + + /// Returns the symbol name at the given index. + const char *getImageHandleSymbol(unsigned Idx) const { + assert(ImageHandleList.size() > Idx && "Bad index"); + return ImageHandleList[Idx].c_str(); + } +}; +} diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp index d5b042a..348ab0c 100644 --- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp +++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp @@ -25,13 +25,15 @@ using namespace llvm; +#define DEBUG_TYPE "nvptx-prolog-epilog" + namespace { class NVPTXPrologEpilogPass : public MachineFunctionPass { public: static char ID; NVPTXPrologEpilogPass() : MachineFunctionPass(ID) {} - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; private: void calculateFrameObjectOffsets(MachineFunction &Fn); @@ -58,7 +60,7 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (!MI->getOperand(i).isFI()) continue; - TRI.eliminateFrameIndex(MI, 0, i, NULL); + TRI.eliminateFrameIndex(MI, 0, i, nullptr); Modified = true; } } diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index 4d3a1d9..62f288b 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "nvptx-reg-info" - #include "NVPTXRegisterInfo.h" #include "NVPTX.h" #include "NVPTXSubtarget.h" @@ -25,6 +23,8 @@ using namespace llvm; +#define DEBUG_TYPE "nvptx-reg-info" + namespace llvm { std::string getNVPTXRegClassName(TargetRegisterClass const *RC) { if (RC == &NVPTX::Float32RegsRegClass) { @@ -78,19 +78,12 @@ NVPTXRegisterInfo::NVPTXRegisterInfo(const NVPTXSubtarget &st) #include "NVPTXGenRegisterInfo.inc" /// NVPTX Callee Saved Registers -const uint16_t * +const MCPhysReg * NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - static const uint16_t CalleeSavedRegs[] = { 0 }; + static const MCPhysReg CalleeSavedRegs[] = { 0 }; return CalleeSavedRegs; } -// NVPTX Callee Saved Reg Classes -const TargetRegisterClass *const * -NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass *const CalleeSavedRegClasses[] = { 0 }; - return CalleeSavedRegClasses; -} - BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); return Reserved; @@ -113,12 +106,6 @@ void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); } -int NVPTXRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { - return 0; -} - unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return NVPTX::VRFrame; } - -unsigned NVPTXRegisterInfo::getRARegister() const { return 0; } diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h index 0a20f29..a7594be 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.h +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h @@ -16,11 +16,10 @@ #include "ManagedStringPool.h" #include "llvm/Target/TargetRegisterInfo.h" +#include #define GET_REGINFO_HEADER #include "NVPTXGenRegisterInfo.inc" -#include "llvm/Target/TargetRegisterInfo.h" -#include namespace llvm { @@ -42,22 +41,16 @@ public: //------------------------------------------------------ // NVPTX callee saved registers - virtual const uint16_t * - getCalleeSavedRegs(const MachineFunction *MF = 0) const; - - // NVPTX callee saved register classes - virtual const TargetRegisterClass *const * - getCalleeSavedRegClasses(const MachineFunction *MF) const; + const MCPhysReg * + getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; - virtual BitVector getReservedRegs(const MachineFunction &MF) const; + BitVector getReservedRegs(const MachineFunction &MF) const override; - virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, - unsigned FIOperandNum, - RegScavenger *RS = NULL) const; + void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS = nullptr) const override; - virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const; - virtual unsigned getFrameRegister(const MachineFunction &MF) const; - virtual unsigned getRARegister() const; + unsigned getFrameRegister(const MachineFunction &MF) const override; ManagedStringPool *getStrPool() const { return const_cast(&ManagedStrPool); diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp new file mode 100644 index 0000000..afd53a6 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp @@ -0,0 +1,357 @@ +//===-- NVPTXReplaceImageHandles.cpp - Replace image handles for Fermi ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// On Fermi, image handles are not supported. To work around this, we traverse +// the machine code and replace image handles with concrete symbols. For this +// to work reliably, inlining of all function call must be performed. +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseSet.h" + +using namespace llvm; + +namespace { +class NVPTXReplaceImageHandles : public MachineFunctionPass { +private: + static char ID; + DenseSet InstrsToRemove; + +public: + NVPTXReplaceImageHandles(); + + bool runOnMachineFunction(MachineFunction &MF) override; +private: + bool processInstr(MachineInstr &MI); + void replaceImageHandle(MachineOperand &Op, MachineFunction &MF); +}; +} + +char NVPTXReplaceImageHandles::ID = 0; + +NVPTXReplaceImageHandles::NVPTXReplaceImageHandles() + : MachineFunctionPass(ID) {} + +bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + InstrsToRemove.clear(); + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; + ++BI) { + for (MachineBasicBlock::iterator I = (*BI).begin(), E = (*BI).end(); + I != E; ++I) { + MachineInstr &MI = *I; + Changed |= processInstr(MI); + } + } + + // Now clean up any handle-access instructions + // This is needed in debug mode when code cleanup passes are not executed, + // but we need the handle access to be eliminated because they are not + // valid instructions when image handles are disabled. + for (DenseSet::iterator I = InstrsToRemove.begin(), + E = InstrsToRemove.end(); I != E; ++I) { + (*I)->eraseFromParent(); + } + + return Changed; +} + +bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) { + MachineFunction &MF = *MI.getParent()->getParent(); + // Check if we have a surface/texture instruction + switch (MI.getOpcode()) { + default: return false; + case NVPTX::TEX_1D_F32_I32: + case NVPTX::TEX_1D_F32_F32: + case NVPTX::TEX_1D_F32_F32_LEVEL: + case NVPTX::TEX_1D_F32_F32_GRAD: + case NVPTX::TEX_1D_I32_I32: + case NVPTX::TEX_1D_I32_F32: + case NVPTX::TEX_1D_I32_F32_LEVEL: + case NVPTX::TEX_1D_I32_F32_GRAD: + case NVPTX::TEX_1D_ARRAY_F32_I32: + case NVPTX::TEX_1D_ARRAY_F32_F32: + case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL: + case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD: + case NVPTX::TEX_1D_ARRAY_I32_I32: + case NVPTX::TEX_1D_ARRAY_I32_F32: + case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL: + case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD: + case NVPTX::TEX_2D_F32_I32: + case NVPTX::TEX_2D_F32_F32: + case NVPTX::TEX_2D_F32_F32_LEVEL: + case NVPTX::TEX_2D_F32_F32_GRAD: + case NVPTX::TEX_2D_I32_I32: + case NVPTX::TEX_2D_I32_F32: + case NVPTX::TEX_2D_I32_F32_LEVEL: + case NVPTX::TEX_2D_I32_F32_GRAD: + case NVPTX::TEX_2D_ARRAY_F32_I32: + case NVPTX::TEX_2D_ARRAY_F32_F32: + case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL: + case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD: + case NVPTX::TEX_2D_ARRAY_I32_I32: + case NVPTX::TEX_2D_ARRAY_I32_F32: + case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL: + case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD: + case NVPTX::TEX_3D_F32_I32: + case NVPTX::TEX_3D_F32_F32: + case NVPTX::TEX_3D_F32_F32_LEVEL: + case NVPTX::TEX_3D_F32_F32_GRAD: + case NVPTX::TEX_3D_I32_I32: + case NVPTX::TEX_3D_I32_F32: + case NVPTX::TEX_3D_I32_F32_LEVEL: + case NVPTX::TEX_3D_I32_F32_GRAD: { + // This is a texture fetch, so operand 4 is a texref and operand 5 is + // a samplerref + MachineOperand &TexHandle = MI.getOperand(4); + MachineOperand &SampHandle = MI.getOperand(5); + + replaceImageHandle(TexHandle, MF); + replaceImageHandle(SampHandle, MF); + + return true; + } + case NVPTX::SULD_1D_I8_TRAP: + case NVPTX::SULD_1D_I16_TRAP: + case NVPTX::SULD_1D_I32_TRAP: + case NVPTX::SULD_1D_ARRAY_I8_TRAP: + case NVPTX::SULD_1D_ARRAY_I16_TRAP: + case NVPTX::SULD_1D_ARRAY_I32_TRAP: + case NVPTX::SULD_2D_I8_TRAP: + case NVPTX::SULD_2D_I16_TRAP: + case NVPTX::SULD_2D_I32_TRAP: + case NVPTX::SULD_2D_ARRAY_I8_TRAP: + case NVPTX::SULD_2D_ARRAY_I16_TRAP: + case NVPTX::SULD_2D_ARRAY_I32_TRAP: + case NVPTX::SULD_3D_I8_TRAP: + case NVPTX::SULD_3D_I16_TRAP: + case NVPTX::SULD_3D_I32_TRAP: { + // This is a V1 surface load, so operand 1 is a surfref + MachineOperand &SurfHandle = MI.getOperand(1); + + replaceImageHandle(SurfHandle, MF); + + return true; + } + case NVPTX::SULD_1D_V2I8_TRAP: + case NVPTX::SULD_1D_V2I16_TRAP: + case NVPTX::SULD_1D_V2I32_TRAP: + case NVPTX::SULD_1D_ARRAY_V2I8_TRAP: + case NVPTX::SULD_1D_ARRAY_V2I16_TRAP: + case NVPTX::SULD_1D_ARRAY_V2I32_TRAP: + case NVPTX::SULD_2D_V2I8_TRAP: + case NVPTX::SULD_2D_V2I16_TRAP: + case NVPTX::SULD_2D_V2I32_TRAP: + case NVPTX::SULD_2D_ARRAY_V2I8_TRAP: + case NVPTX::SULD_2D_ARRAY_V2I16_TRAP: + case NVPTX::SULD_2D_ARRAY_V2I32_TRAP: + case NVPTX::SULD_3D_V2I8_TRAP: + case NVPTX::SULD_3D_V2I16_TRAP: + case NVPTX::SULD_3D_V2I32_TRAP: { + // This is a V2 surface load, so operand 2 is a surfref + MachineOperand &SurfHandle = MI.getOperand(2); + + replaceImageHandle(SurfHandle, MF); + + return true; + } + case NVPTX::SULD_1D_V4I8_TRAP: + case NVPTX::SULD_1D_V4I16_TRAP: + case NVPTX::SULD_1D_V4I32_TRAP: + case NVPTX::SULD_1D_ARRAY_V4I8_TRAP: + case NVPTX::SULD_1D_ARRAY_V4I16_TRAP: + case NVPTX::SULD_1D_ARRAY_V4I32_TRAP: + case NVPTX::SULD_2D_V4I8_TRAP: + case NVPTX::SULD_2D_V4I16_TRAP: + case NVPTX::SULD_2D_V4I32_TRAP: + case NVPTX::SULD_2D_ARRAY_V4I8_TRAP: + case NVPTX::SULD_2D_ARRAY_V4I16_TRAP: + case NVPTX::SULD_2D_ARRAY_V4I32_TRAP: + case NVPTX::SULD_3D_V4I8_TRAP: + case NVPTX::SULD_3D_V4I16_TRAP: + case NVPTX::SULD_3D_V4I32_TRAP: { + // This is a V4 surface load, so operand 4 is a surfref + MachineOperand &SurfHandle = MI.getOperand(4); + + replaceImageHandle(SurfHandle, MF); + + return true; + } + case NVPTX::SUST_B_1D_B8_TRAP: + case NVPTX::SUST_B_1D_B16_TRAP: + case NVPTX::SUST_B_1D_B32_TRAP: + case NVPTX::SUST_B_1D_V2B8_TRAP: + case NVPTX::SUST_B_1D_V2B16_TRAP: + case NVPTX::SUST_B_1D_V2B32_TRAP: + case NVPTX::SUST_B_1D_V4B8_TRAP: + case NVPTX::SUST_B_1D_V4B16_TRAP: + case NVPTX::SUST_B_1D_V4B32_TRAP: + case NVPTX::SUST_B_1D_ARRAY_B8_TRAP: + case NVPTX::SUST_B_1D_ARRAY_B16_TRAP: + case NVPTX::SUST_B_1D_ARRAY_B32_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP: + case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP: + case NVPTX::SUST_B_2D_B8_TRAP: + case NVPTX::SUST_B_2D_B16_TRAP: + case NVPTX::SUST_B_2D_B32_TRAP: + case NVPTX::SUST_B_2D_V2B8_TRAP: + case NVPTX::SUST_B_2D_V2B16_TRAP: + case NVPTX::SUST_B_2D_V2B32_TRAP: + case NVPTX::SUST_B_2D_V4B8_TRAP: + case NVPTX::SUST_B_2D_V4B16_TRAP: + case NVPTX::SUST_B_2D_V4B32_TRAP: + case NVPTX::SUST_B_2D_ARRAY_B8_TRAP: + case NVPTX::SUST_B_2D_ARRAY_B16_TRAP: + case NVPTX::SUST_B_2D_ARRAY_B32_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP: + case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP: + case NVPTX::SUST_B_3D_B8_TRAP: + case NVPTX::SUST_B_3D_B16_TRAP: + case NVPTX::SUST_B_3D_B32_TRAP: + case NVPTX::SUST_B_3D_V2B8_TRAP: + case NVPTX::SUST_B_3D_V2B16_TRAP: + case NVPTX::SUST_B_3D_V2B32_TRAP: + case NVPTX::SUST_B_3D_V4B8_TRAP: + case NVPTX::SUST_B_3D_V4B16_TRAP: + case NVPTX::SUST_B_3D_V4B32_TRAP: + case NVPTX::SUST_P_1D_B8_TRAP: + case NVPTX::SUST_P_1D_B16_TRAP: + case NVPTX::SUST_P_1D_B32_TRAP: + case NVPTX::SUST_P_1D_V2B8_TRAP: + case NVPTX::SUST_P_1D_V2B16_TRAP: + case NVPTX::SUST_P_1D_V2B32_TRAP: + case NVPTX::SUST_P_1D_V4B8_TRAP: + case NVPTX::SUST_P_1D_V4B16_TRAP: + case NVPTX::SUST_P_1D_V4B32_TRAP: + case NVPTX::SUST_P_1D_ARRAY_B8_TRAP: + case NVPTX::SUST_P_1D_ARRAY_B16_TRAP: + case NVPTX::SUST_P_1D_ARRAY_B32_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP: + case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP: + case NVPTX::SUST_P_2D_B8_TRAP: + case NVPTX::SUST_P_2D_B16_TRAP: + case NVPTX::SUST_P_2D_B32_TRAP: + case NVPTX::SUST_P_2D_V2B8_TRAP: + case NVPTX::SUST_P_2D_V2B16_TRAP: + case NVPTX::SUST_P_2D_V2B32_TRAP: + case NVPTX::SUST_P_2D_V4B8_TRAP: + case NVPTX::SUST_P_2D_V4B16_TRAP: + case NVPTX::SUST_P_2D_V4B32_TRAP: + case NVPTX::SUST_P_2D_ARRAY_B8_TRAP: + case NVPTX::SUST_P_2D_ARRAY_B16_TRAP: + case NVPTX::SUST_P_2D_ARRAY_B32_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP: + case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP: + case NVPTX::SUST_P_3D_B8_TRAP: + case NVPTX::SUST_P_3D_B16_TRAP: + case NVPTX::SUST_P_3D_B32_TRAP: + case NVPTX::SUST_P_3D_V2B8_TRAP: + case NVPTX::SUST_P_3D_V2B16_TRAP: + case NVPTX::SUST_P_3D_V2B32_TRAP: + case NVPTX::SUST_P_3D_V4B8_TRAP: + case NVPTX::SUST_P_3D_V4B16_TRAP: + case NVPTX::SUST_P_3D_V4B32_TRAP: { + // This is a surface store, so operand 0 is a surfref + MachineOperand &SurfHandle = MI.getOperand(0); + + replaceImageHandle(SurfHandle, MF); + + return true; + } + case NVPTX::TXQ_CHANNEL_ORDER: + case NVPTX::TXQ_CHANNEL_DATA_TYPE: + case NVPTX::TXQ_WIDTH: + case NVPTX::TXQ_HEIGHT: + case NVPTX::TXQ_DEPTH: + case NVPTX::TXQ_ARRAY_SIZE: + case NVPTX::TXQ_NUM_SAMPLES: + case NVPTX::TXQ_NUM_MIPMAP_LEVELS: + case NVPTX::SUQ_CHANNEL_ORDER: + case NVPTX::SUQ_CHANNEL_DATA_TYPE: + case NVPTX::SUQ_WIDTH: + case NVPTX::SUQ_HEIGHT: + case NVPTX::SUQ_DEPTH: + case NVPTX::SUQ_ARRAY_SIZE: { + // This is a query, so operand 1 is a surfref/texref + MachineOperand &Handle = MI.getOperand(1); + + replaceImageHandle(Handle, MF); + + return true; + } + } +} + +void NVPTXReplaceImageHandles:: +replaceImageHandle(MachineOperand &Op, MachineFunction &MF) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + NVPTXMachineFunctionInfo *MFI = MF.getInfo(); + // Which instruction defines the handle? + MachineInstr *MI = MRI.getVRegDef(Op.getReg()); + assert(MI && "No def for image handle vreg?"); + MachineInstr &TexHandleDef = *MI; + + switch (TexHandleDef.getOpcode()) { + case NVPTX::LD_i64_avar: { + // The handle is a parameter value being loaded, replace with the + // parameter symbol + assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!"); + StringRef Sym = TexHandleDef.getOperand(6).getSymbolName(); + std::string ParamBaseName = MF.getName(); + ParamBaseName += "_param_"; + assert(Sym.startswith(ParamBaseName) && "Invalid symbol reference"); + unsigned Param = atoi(Sym.data()+ParamBaseName.size()); + std::string NewSym; + raw_string_ostream NewSymStr(NewSym); + NewSymStr << MF.getFunction()->getName() << "_param_" << Param; + Op.ChangeToImmediate( + MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str())); + InstrsToRemove.insert(&TexHandleDef); + break; + } + case NVPTX::texsurf_handles: { + // The handle is a global variable, replace with the global variable name + assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!"); + const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal(); + assert(GV->hasName() && "Global sampler must be named!"); + Op.ChangeToImmediate(MFI->getImageHandleSymbolIndex(GV->getName().data())); + InstrsToRemove.insert(&TexHandleDef); + break; + } + default: + llvm_unreachable("Unknown instruction operating on handle"); + } +} + +MachineFunctionPass *llvm::createNVPTXReplaceImageHandlesPass() { + return new NVPTXReplaceImageHandles(); +} diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h index f8a692e..aa0436b 100644 --- a/lib/Target/NVPTX/NVPTXSection.h +++ b/lib/Target/NVPTX/NVPTXSection.h @@ -31,16 +31,16 @@ public: /// Override this as NVPTX has its own way of printing switching /// to a section. - virtual void PrintSwitchToSection(const MCAsmInfo &MAI, - raw_ostream &OS, - const MCExpr *Subsection) const {} + void PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS, + const MCExpr *Subsection) const override {} /// Base address of PTX sections is zero. - virtual bool isBaseAddressKnownZero() const { return true; } - virtual bool UseCodeAlign() const { return false; } - virtual bool isVirtualSection() const { return false; } - virtual std::string getLabelBeginName() const { return ""; } - virtual std::string getLabelEndName() const { return ""; } + bool isBaseAddressKnownZero() const override { return true; } + bool UseCodeAlign() const override { return false; } + bool isVirtualSection() const override { return false; } + std::string getLabelBeginName() const override { return ""; } + std::string getLabelEndName() const override { return ""; } }; } // end namespace llvm diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp index 9771a17..8c7df52 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -12,14 +12,16 @@ //===----------------------------------------------------------------------===// #include "NVPTXSubtarget.h" + +using namespace llvm; + +#define DEBUG_TYPE "nvptx-subtarget" + #define GET_SUBTARGETINFO_ENUM #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "NVPTXGenSubtargetInfo.inc" -using namespace llvm; - - // Pin the vtable to this file. void NVPTXSubtarget::anchor() {} diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index f99bebd..581e5ed 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -16,12 +16,11 @@ #include "NVPTX.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include #define GET_SUBTARGETINFO_HEADER #include "NVPTXGenSubtargetInfo.inc" -#include - namespace llvm { class NVPTXSubtarget : public NVPTXGenSubtargetInfo { @@ -65,6 +64,10 @@ public: inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); } inline bool hasROT64() const { return SmVersion >= 20; } + bool hasImageHandles() const { + // Currently disabled + return false; + } bool is64Bit() const { return Is64Bit; } unsigned int getSmVersion() const { return SmVersion; } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 7d7d793..26a4f84 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -16,7 +16,6 @@ #include "NVPTX.h" #include "NVPTXAllocaHoisting.h" #include "NVPTXLowerAggrCopies.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -50,6 +49,7 @@ namespace llvm { void initializeNVVMReflectPass(PassRegistry&); void initializeGenericToNVVMPass(PassRegistry&); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); +void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); } extern "C" void LLVMInitializeNVPTXTarget() { @@ -62,6 +62,8 @@ extern "C" void LLVMInitializeNVPTXTarget() { initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); + initializeNVPTXFavorNonGenericAddrSpacesPass( + *PassRegistry::getPassRegistry()); } static std::string computeDataLayout(const NVPTXSubtarget &ST) { @@ -113,14 +115,14 @@ public: return getTM(); } - virtual void addIRPasses(); - virtual bool addInstSelector(); - virtual bool addPreRegAlloc(); - virtual bool addPostRegAlloc(); + void addIRPasses() override; + bool addInstSelector() override; + bool addPreRegAlloc() override; + bool addPostRegAlloc() override; - virtual FunctionPass *createTargetRegisterAllocator(bool) override; - virtual void addFastRegAlloc(FunctionPass *RegAllocPass); - virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass); + FunctionPass *createTargetRegisterAllocator(bool) override; + void addFastRegAlloc(FunctionPass *RegAllocPass) override; + void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; }; } // end anonymous namespace @@ -140,15 +142,42 @@ void NVPTXPassConfig::addIRPasses() { disablePass(&BranchFolderPassID); disablePass(&TailDuplicateID); + addPass(createNVPTXImageOptimizerPass()); TargetPassConfig::addIRPasses(); addPass(createNVPTXAssignValidGlobalNamesPass()); addPass(createGenericToNVVMPass()); + addPass(createNVPTXFavorNonGenericAddrSpacesPass()); + addPass(createSeparateConstOffsetFromGEPPass()); + // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used + // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates + // significantly better code than EarlyCSE for some of our benchmarks. + if (getOptLevel() == CodeGenOpt::Aggressive) + addPass(createGVNPass()); + else + addPass(createEarlyCSEPass()); + // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave + // some dead code. We could remove dead code in an ad-hoc manner, but that + // requires manual work and might be error-prone. + // + // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts, + // and leave them unused. + // + // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the + // old index and some of its intermediate results may become unused. + addPass(createDeadCodeEliminationPass()); } bool NVPTXPassConfig::addInstSelector() { + const NVPTXSubtarget &ST = + getTM().getSubtarget(); + addPass(createLowerAggrCopies()); addPass(createAllocaHoisting()); addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); + + if (!ST.hasImageHandles()) + addPass(createNVPTXReplaceImageHandlesPass()); + return false; } @@ -159,7 +188,7 @@ bool NVPTXPassConfig::addPostRegAlloc() { } FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { - return 0; // No reg alloc + return nullptr; // No reg alloc } void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index 5fbcf73..2db7c18 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -51,22 +51,22 @@ public: const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit); - virtual const TargetFrameLowering *getFrameLowering() const { + const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; } - virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const DataLayout *getDataLayout() const { return &DL; } - virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; } + const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const DataLayout *getDataLayout() const override { return &DL; } + const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; } - virtual const NVPTXRegisterInfo *getRegisterInfo() const { + const NVPTXRegisterInfo *getRegisterInfo() const override { return &(InstrInfo.getRegisterInfo()); } - virtual NVPTXTargetLowering *getTargetLowering() const { + NVPTXTargetLowering *getTargetLowering() const override { return const_cast(&TLInfo); } - virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const { + const TargetSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } @@ -79,17 +79,17 @@ public: return const_cast(&ManagedStrPool); } - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; // Emission of machine code through JITCodeEmitter is not supported. - virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &, - bool = true) { + bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &, + bool = true) override { return true; } // Emission of machine code through MCJIT is not supported. - virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, - bool = true) { + bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &, + bool = true) override { return true; } diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h index 2a7281e..0b438c5 100644 --- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h +++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h @@ -22,26 +22,26 @@ class NVPTXTargetObjectFile : public TargetLoweringObjectFile { public: NVPTXTargetObjectFile() { - TextSection = 0; - DataSection = 0; - BSSSection = 0; - ReadOnlySection = 0; + TextSection = nullptr; + DataSection = nullptr; + BSSSection = nullptr; + ReadOnlySection = nullptr; - StaticCtorSection = 0; - StaticDtorSection = 0; - LSDASection = 0; - EHFrameSection = 0; - DwarfAbbrevSection = 0; - DwarfInfoSection = 0; - DwarfLineSection = 0; - DwarfFrameSection = 0; - DwarfPubTypesSection = 0; - DwarfDebugInlineSection = 0; - DwarfStrSection = 0; - DwarfLocSection = 0; - DwarfARangesSection = 0; - DwarfRangesSection = 0; - DwarfMacroInfoSection = 0; + StaticCtorSection = nullptr; + StaticDtorSection = nullptr; + LSDASection = nullptr; + EHFrameSection = nullptr; + DwarfAbbrevSection = nullptr; + DwarfInfoSection = nullptr; + DwarfLineSection = nullptr; + DwarfFrameSection = nullptr; + DwarfPubTypesSection = nullptr; + DwarfDebugInlineSection = nullptr; + DwarfStrSection = nullptr; + DwarfLocSection = nullptr; + DwarfARangesSection = nullptr; + DwarfRangesSection = nullptr; + DwarfMacroInfoSection = nullptr; } virtual ~NVPTXTargetObjectFile(); diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp index 60a5173..a9fd190b 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -22,9 +22,9 @@ #include #include #include -//#include #include "llvm/Support/ManagedStatic.h" #include "llvm/IR/InstIterator.h" +#include "llvm/Support/MutexGuard.h" using namespace llvm; @@ -33,8 +33,15 @@ typedef std::map global_val_annot_t; typedef std::map per_module_annot_t; ManagedStatic annotationCache; +static sys::Mutex Lock; + +void llvm::clearAnnotationCache(const llvm::Module *Mod) { + MutexGuard Guard(Lock); + annotationCache->erase(Mod); +} static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { + MutexGuard Guard(Lock); assert(md && "Invalid mdnode for annotation"); assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands"); // start index = 1, to skip the global variable key @@ -60,6 +67,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) { } static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { + MutexGuard Guard(Lock); NamedMDNode *NMD = m->getNamedMetadata(llvm::NamedMDForAnnotations); if (!NMD) return; @@ -92,6 +100,7 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) { bool llvm::findOneNVVMAnnotation(const GlobalValue *gv, std::string prop, unsigned &retval) { + MutexGuard Guard(Lock); const Module *m = gv->getParent(); if ((*annotationCache).find(m) == (*annotationCache).end()) cacheAnnotationFromMD(m, gv); @@ -105,6 +114,7 @@ bool llvm::findOneNVVMAnnotation(const GlobalValue *gv, std::string prop, bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop, std::vector &retval) { + MutexGuard Guard(Lock); const Module *m = gv->getParent(); if ((*annotationCache).find(m) == (*annotationCache).end()) cacheAnnotationFromMD(m, gv); @@ -195,8 +205,37 @@ bool llvm::isImageWriteOnly(const llvm::Value &val) { return false; } +bool llvm::isImageReadWrite(const llvm::Value &val) { + if (const Argument *arg = dyn_cast(&val)) { + const Function *func = arg->getParent(); + std::vector annot; + if (llvm::findAllNVVMAnnotation(func, + llvm::PropertyAnnotationNames[ + llvm::PROPERTY_ISREADWRITE_IMAGE_PARAM], + annot)) { + if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end()) + return true; + } + } + return false; +} + bool llvm::isImage(const llvm::Value &val) { - return llvm::isImageReadOnly(val) || llvm::isImageWriteOnly(val); + return llvm::isImageReadOnly(val) || llvm::isImageWriteOnly(val) || + llvm::isImageReadWrite(val); +} + +bool llvm::isManaged(const llvm::Value &val) { + if(const GlobalValue *gv = dyn_cast(&val)) { + unsigned annot; + if(llvm::findOneNVVMAnnotation(gv, + llvm::PropertyAnnotationNames[llvm::PROPERTY_MANAGED], + annot)) { + assert((annot == 1) && "Unexpected annotation on a managed symbol"); + return true; + } + } + return false; } std::string llvm::getTextureName(const llvm::Value &val) { @@ -354,12 +393,12 @@ llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) { const Value * llvm::skipPointerTransfer(const Value *V, std::set &processed) { if (processed.find(V) != processed.end()) - return NULL; + return nullptr; processed.insert(V); const Value *V2 = V->stripPointerCasts(); if (V2 != V && processed.find(V2) != processed.end()) - return NULL; + return nullptr; processed.insert(V2); V = V2; @@ -375,20 +414,20 @@ llvm::skipPointerTransfer(const Value *V, std::set &processed) { continue; } else if (const PHINode *PN = dyn_cast(V)) { if (V != V2 && processed.find(V) != processed.end()) - return NULL; + return nullptr; processed.insert(PN); - const Value *common = 0; + const Value *common = nullptr; for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { const Value *pv = PN->getIncomingValue(i); const Value *base = skipPointerTransfer(pv, processed); if (base) { - if (common == 0) + if (!common) common = base; else if (common != base) return PN; } } - if (common == 0) + if (!common) return PN; V = common; } @@ -406,7 +445,7 @@ BasicBlock *llvm::getParentBlock(Value *v) { if (Instruction *I = dyn_cast(v)) return I->getParent(); - return 0; + return nullptr; } Function *llvm::getParentFunction(Value *v) { @@ -419,13 +458,13 @@ Function *llvm::getParentFunction(Value *v) { if (BasicBlock *B = dyn_cast(v)) return B->getParent(); - return 0; + return nullptr; } // Dump a block by name void llvm::dumpBlock(Value *v, char *blockName) { Function *F = getParentFunction(v); - if (F == 0) + if (!F) return; for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) { @@ -440,8 +479,8 @@ void llvm::dumpBlock(Value *v, char *blockName) { // Find an instruction by name Instruction *llvm::getInst(Value *base, char *instName) { Function *F = getParentFunction(base); - if (F == 0) - return 0; + if (!F) + return nullptr; for (inst_iterator it = inst_begin(F), ie = inst_end(F); it != ie; ++it) { Instruction *I = &*it; @@ -450,7 +489,7 @@ Instruction *llvm::getInst(Value *base, char *instName) { } } - return 0; + return nullptr; } // Dump an instruction by nane diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h index a208004..446bfa1 100644 --- a/lib/Target/NVPTX/NVPTXUtilities.h +++ b/lib/Target/NVPTX/NVPTXUtilities.h @@ -28,6 +28,8 @@ namespace llvm { #define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly" #define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly" +void clearAnnotationCache(const llvm::Module *); + bool findOneNVVMAnnotation(const llvm::GlobalValue *, std::string, unsigned &); bool findAllNVVMAnnotation(const llvm::GlobalValue *, std::string, std::vector &); @@ -38,6 +40,8 @@ bool isSampler(const llvm::Value &); bool isImage(const llvm::Value &); bool isImageReadOnly(const llvm::Value &); bool isImageWriteOnly(const llvm::Value &); +bool isImageReadWrite(const llvm::Value &); +bool isManaged(const llvm::Value &); std::string getTextureName(const llvm::Value &); std::string getSurfaceName(const llvm::Value &); diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp index 8b5444a..cb8bd72 100644 --- a/lib/Target/NVPTX/NVVMReflect.cpp +++ b/lib/Target/NVPTX/NVVMReflect.cpp @@ -38,6 +38,8 @@ using namespace llvm; +#define DEBUG_TYPE "nvptx-reflect" + namespace llvm { void initializeNVVMReflectPass(PassRegistry &); } namespace { @@ -49,13 +51,13 @@ private: public: static char ID; - NVVMReflect() : ModulePass(ID), ReflectFunction(0) { + NVVMReflect() : ModulePass(ID), ReflectFunction(nullptr) { initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); VarMap.clear(); } NVVMReflect(const StringMap &Mapping) - : ModulePass(ID), ReflectFunction(0) { + : ModulePass(ID), ReflectFunction(nullptr) { initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); for (StringMap::const_iterator I = Mapping.begin(), E = Mapping.end(); I != E; ++I) { @@ -63,8 +65,10 @@ public: } } - void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } - virtual bool runOnModule(Module &); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + bool runOnModule(Module &) override; void setVarMap(); }; @@ -126,7 +130,7 @@ bool NVVMReflect::runOnModule(Module &M) { // If reflect function is not used, then there will be // no entry in the module. - if (ReflectFunction == 0) + if (!ReflectFunction) return false; // Validate _reflect function diff --git a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt index 02ebf1d..801f27b 100644 --- a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt +++ b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/PowerPC/AsmParser/LLVMBuild.txt --------------*- Conf -*--===; +;===- ./lib/Target/PowerPC/AsmParser/LLVMBuild.txt -------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -19,5 +19,5 @@ type = Library name = PowerPCAsmParser parent = PowerPC -required_libraries = PowerPCDesc PowerPCInfo MC MCParser Support +required_libraries = MC MCParser PowerPCDesc PowerPCInfo Support add_to_library_groups = PowerPC diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 8bb91cf..3ac037d 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -230,7 +230,7 @@ class PPCAsmParser : public MCTargetAsmParser { bool MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal); - virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; const MCExpr *ExtractModifierFromExpr(const MCExpr *E, PPCMCExpr::VariantKind &Variant); @@ -248,7 +248,7 @@ class PPCAsmParser : public MCTargetAsmParser { bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl &Operands, MCStreamer &Out, unsigned &ErrorInfo, - bool MatchingInlineAsm); + bool MatchingInlineAsm) override; void ProcessInstruction(MCInst &Inst, const SmallVectorImpl &Ops); @@ -264,7 +264,8 @@ class PPCAsmParser : public MCTargetAsmParser { public: PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &_MII) + const MCInstrInfo &_MII, + const MCTargetOptions &Options) : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(_MII) { // Check for 64-bit vs. 32-bit pointer mode. Triple TheTriple(STI.getTargetTriple()); @@ -275,17 +276,18 @@ public: setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } - virtual bool ParseInstruction(ParseInstructionInfo &Info, - StringRef Name, SMLoc NameLoc, - SmallVectorImpl &Operands); + bool ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, SMLoc NameLoc, + SmallVectorImpl &Operands) override; - virtual bool ParseDirective(AsmToken DirectiveID); + bool ParseDirective(AsmToken DirectiveID) override; - unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind); + unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, + unsigned Kind) override; - virtual const MCExpr *applyModifierToExpr(const MCExpr *E, - MCSymbolRefExpr::VariantKind, - MCContext &Ctx); + const MCExpr *applyModifierToExpr(const MCExpr *E, + MCSymbolRefExpr::VariantKind, + MCContext &Ctx) override; }; /// PPCOperand - Instances of this class represent a parsed PowerPC machine @@ -350,10 +352,10 @@ public: } /// getStartLoc - Get the location of the first token of this operand. - SMLoc getStartLoc() const { return StartLoc; } + SMLoc getStartLoc() const override { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. - SMLoc getEndLoc() const { return EndLoc; } + SMLoc getEndLoc() const override { return EndLoc; } /// isPPC64 - True if this operand is for an instruction in 64-bit mode. bool isPPC64() const { return IsPPC64; } @@ -378,7 +380,7 @@ public: return TLSReg.Sym; } - unsigned getReg() const { + unsigned getReg() const override { assert(isRegNumber() && "Invalid access!"); return (unsigned) Imm.Val; } @@ -403,8 +405,8 @@ public: return 7 - countTrailingZeros(Imm.Val); } - bool isToken() const { return Kind == Token; } - bool isImm() const { return Kind == Immediate || Kind == Expression; } + bool isToken() const override { return Kind == Token; } + bool isImm() const override { return Kind == Immediate || Kind == Expression; } bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); } bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); } bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); } @@ -437,8 +439,8 @@ public: && isUInt<5>(getImm())); } bool isCRBitMask() const { return Kind == Immediate && isUInt<8>(getImm()) && isPowerOf2_32(getImm()); } - bool isMem() const { return false; } - bool isReg() const { return false; } + bool isMem() const override { return false; } + bool isReg() const override { return false; } void addRegOperands(MCInst &Inst, unsigned N) const { llvm_unreachable("addRegOperands"); @@ -544,7 +546,7 @@ public: return StringRef(Tok.Data, Tok.Length); } - virtual void print(raw_ostream &OS) const; + void print(raw_ostream &OS) const override; static PPCOperand *CreateToken(StringRef Str, SMLoc S, bool IsPPC64) { PPCOperand *Op = new PPCOperand(Token); @@ -1021,7 +1023,7 @@ ExtractModifierFromExpr(const MCExpr *E, switch (E->getKind()) { case MCExpr::Target: case MCExpr::Constant: - return 0; + return nullptr; case MCExpr::SymbolRef: { const MCSymbolRefExpr *SRE = cast(E); @@ -1049,7 +1051,7 @@ ExtractModifierFromExpr(const MCExpr *E, Variant = PPCMCExpr::VK_PPC_HIGHESTA; break; default: - return 0; + return nullptr; } return MCSymbolRefExpr::Create(&SRE->getSymbol(), Context); @@ -1059,7 +1061,7 @@ ExtractModifierFromExpr(const MCExpr *E, const MCUnaryExpr *UE = cast(E); const MCExpr *Sub = ExtractModifierFromExpr(UE->getSubExpr(), Variant); if (!Sub) - return 0; + return nullptr; return MCUnaryExpr::Create(UE->getOpcode(), Sub, Context); } @@ -1070,7 +1072,7 @@ ExtractModifierFromExpr(const MCExpr *E, const MCExpr *RHS = ExtractModifierFromExpr(BE->getRHS(), RHSVariant); if (!LHS && !RHS) - return 0; + return nullptr; if (!LHS) LHS = BE->getLHS(); if (!RHS) RHS = BE->getRHS(); @@ -1082,7 +1084,7 @@ ExtractModifierFromExpr(const MCExpr *E, else if (LHSVariant == RHSVariant) Variant = LHSVariant; else - return 0; + return nullptr; return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, Context); } @@ -1593,6 +1595,6 @@ PPCAsmParser::applyModifierToExpr(const MCExpr *E, case MCSymbolRefExpr::VK_PPC_HIGHESTA: return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx); default: - return 0; + return nullptr; } } diff --git a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt index 7f29040..c1011ff 100644 --- a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt +++ b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = PowerPCDisassembler parent = PowerPC -required_libraries = MC Support PowerPCDesc PowerPCInfo +required_libraries = MC PowerPCDesc PowerPCInfo Support add_to_library_groups = PowerPC diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index c4a7544..a2305a9 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -17,13 +17,15 @@ using namespace llvm; +#define DEBUG_TYPE "ppc-disassembler" + typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { class PPCDisassembler : public MCDisassembler { public: - PPCDisassembler(const MCSubtargetInfo &STI) - : MCDisassembler(STI) {} + PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) + : MCDisassembler(STI, Ctx) {} virtual ~PPCDisassembler() {} // Override MCDisassembler. @@ -37,8 +39,9 @@ public: } // end anonymous namespace static MCDisassembler *createPPCDisassembler(const Target &T, - const MCSubtargetInfo &STI) { - return new PPCDisassembler(STI); + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new PPCDisassembler(STI, Ctx); } extern "C" void LLVMInitializePowerPCDisassembler() { diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index dc54b52..7279b09 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "PPCInstPrinter.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" @@ -23,6 +22,8 @@ #include "llvm/Target/TargetOpcodes.h" using namespace llvm; +#define DEBUG_TYPE "asm-printer" + // FIXME: Once the integrated assembler supports full register names, tie this // to the verbose-asm setting. static cl::opt diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index 4d1df78..211a628 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -31,8 +31,8 @@ public: return IsDarwin; } - virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + void printRegName(raw_ostream &OS, unsigned RegNo) const override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); @@ -41,7 +41,7 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printPredicateOperand(const MCInst *MI, unsigned OpNo, - raw_ostream &O, const char *Modifier = 0); + raw_ostream &O, const char *Modifier = nullptr); void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index f7309bb..12584be 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -77,9 +77,11 @@ public: PPCAsmBackend(const Target &T, bool isLittle) : MCAsmBackend(), TheTarget(T), IsLittleEndian(isLittle) {} - unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; } + unsigned getNumFixupKinds() const override { + return PPC::NumTargetFixupKinds; + } - const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { const static MCFixupKindInfo InfosBE[PPC::NumTargetFixupKinds] = { // name offset bits flags { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel }, @@ -110,7 +112,7 @@ public: } void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel) const { + uint64_t Value, bool IsPCRel) const override { Value = adjustFixupValue(Fixup.getKind(), Value); if (!Value) return; // Doesn't change encoding. @@ -126,7 +128,7 @@ public: } } - bool mayNeedRelaxation(const MCInst &Inst) const { + bool mayNeedRelaxation(const MCInst &Inst) const override { // FIXME. return false; } @@ -134,18 +136,18 @@ public: bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const { + const MCAsmLayout &Layout) const override { // FIXME. llvm_unreachable("relaxInstruction() unimplemented"); } - void relaxInstruction(const MCInst &Inst, MCInst &Res) const { + void relaxInstruction(const MCInst &Inst, MCInst &Res) const override { // FIXME. llvm_unreachable("relaxInstruction() unimplemented"); } - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override { uint64_t NumNops = Count / 4; for (uint64_t i = 0; i != NumNops; ++i) OW->Write32(0x60000000); @@ -180,7 +182,7 @@ namespace { public: DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { bool is64 = getPointerSize() == 8; return createPPCMachObjectWriter( OS, @@ -197,7 +199,7 @@ namespace { PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { bool is64 = getPointerSize() == 8; return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index d19f6a0..cd3b4f4 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -41,11 +41,12 @@ PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI) PPCELFObjectWriter::~PPCELFObjectWriter() { } -static MCSymbolRefExpr::VariantKind getAccessVariant(const MCFixup &Fixup) { +static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target, + const MCFixup &Fixup) { const MCExpr *Expr = Fixup.getValue(); if (Expr->getKind() != MCExpr::Target) - return Fixup.getAccessVariant(); + return Target.getAccessVariant(); switch (cast(Expr)->getKind()) { case PPCMCExpr::VK_PPC_None: @@ -72,7 +73,7 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { - MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Fixup); + MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Target, Fixup); // determine the type of the relocation unsigned Type; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp index 18609e1..b95a2ac 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp @@ -28,7 +28,7 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) { ExceptionsType = ExceptionHandling::DwarfCFI; if (!is64Bit) - Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode. + Data64bitsDirective = nullptr; // We can't emit a 64-bit unit in PPC32 mode. AssemblerDialect = 1; // New-Style mnemonics. SupportsDebugInformation= true; // Debug information. @@ -71,7 +71,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) { ExceptionsType = ExceptionHandling::DwarfCFI; ZeroDirective = "\t.space\t"; - Data64bitsDirective = is64Bit ? "\t.quad\t" : 0; + Data64bitsDirective = is64Bit ? "\t.quad\t" : nullptr; AssemblerDialect = 1; // New-Style mnemonics. if (T.getOS() == llvm::Triple::FreeBSD || diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h index cee2cb7..754330b 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h @@ -21,13 +21,13 @@ namespace llvm { class Triple; class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin { - virtual void anchor(); + void anchor() override; public: explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&); }; class PPCLinuxMCAsmInfo : public MCAsmInfoELF { - virtual void anchor(); + void anchor() override; public: explicit PPCLinuxMCAsmInfo(bool is64Bit, const Triple&); }; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index b259c5d..a4983ad 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mccodeemitter" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCFixupKinds.h" #include "llvm/ADT/Statistic.h" @@ -26,6 +25,8 @@ #include "llvm/Target/TargetOpcodes.h" using namespace llvm; +#define DEBUG_TYPE "mccodeemitter" + STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); namespace { @@ -88,7 +89,7 @@ public: const MCSubtargetInfo &STI) const; void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { + const MCSubtargetInfo &STI) const override { // For fast-isel, a float COPY_TO_REGCLASS can survive this long. // It's just a nop to keep the register classes happy, so don't // generate anything. diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index c181e03..10d068d 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ppcmcexpr" #include "PPCMCExpr.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" @@ -15,6 +14,8 @@ using namespace llvm; +#define DEBUG_TYPE "ppcmcexpr" + const PPCMCExpr* PPCMCExpr::Create(VariantKind Kind, const MCExpr *Expr, bool isDarwin, MCContext &Ctx) { diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index 5fc7918..3421b91 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -76,16 +76,16 @@ public: /// @} - void PrintImpl(raw_ostream &OS) const; + void PrintImpl(raw_ostream &OS) const override; bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const; - void AddValueSymbols(MCAssembler *) const; - const MCSection *FindAssociatedSection() const { + const MCAsmLayout *Layout) const override; + void AddValueSymbols(MCAssembler *) const override; + const MCSection *FindAssociatedSection() const override { return getSubExpr()->FindAssociatedSection(); } // There are no TLS PPCMCExprs at the moment. - void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {} + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {} static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index 105c511..7057797 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -26,6 +26,8 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + #define GET_INSTRINFO_MC_DESC #include "PPCGenInstrInfo.inc" @@ -35,8 +37,6 @@ #define GET_REGINFO_MC_DESC #include "PPCGenRegisterInfo.inc" -using namespace llvm; - // Pin the vtable to this file. PPCTargetStreamer::~PPCTargetStreamer() {} PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} @@ -80,7 +80,7 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { // Initial state of the frame pointer is R1. unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1; MCCFIInstruction Inst = - MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(Reg, true), 0); + MCCFIInstruction::createDefCfa(nullptr, MRI.getDwarfRegNum(Reg, true), 0); MAI->addInitialFrameState(Inst); return MAI; @@ -115,14 +115,14 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer { public: PPCTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS) : PPCTargetStreamer(S), OS(OS) {} - virtual void emitTCEntry(const MCSymbol &S) { + void emitTCEntry(const MCSymbol &S) override { OS << "\t.tc "; OS << S.getName(); OS << "[TC],"; OS << S.getName(); OS << '\n'; } - virtual void emitMachine(StringRef CPU) { + void emitMachine(StringRef CPU) override { OS << "\t.machine " << CPU << '\n'; } }; @@ -130,11 +130,11 @@ public: class PPCTargetELFStreamer : public PPCTargetStreamer { public: PPCTargetELFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {} - virtual void emitTCEntry(const MCSymbol &S) { + void emitTCEntry(const MCSymbol &S) override { // Creates a R_PPC64_TOC relocation Streamer.EmitSymbolValue(&S, 8); } - virtual void emitMachine(StringRef CPU) { + void emitMachine(StringRef CPU) override { // FIXME: Is there anything to do in here or does this directive only // limit the parser? } @@ -143,10 +143,10 @@ public: class PPCTargetMachOStreamer : public PPCTargetStreamer { public: PPCTargetMachOStreamer(MCStreamer &S) : PPCTargetStreamer(S) {} - virtual void emitTCEntry(const MCSymbol &S) { + void emitTCEntry(const MCSymbol &S) override { llvm_unreachable("Unknown pseudo-op: .tc"); } - virtual void emitMachine(StringRef CPU) { + void emitMachine(StringRef CPU) override { // FIXME: We should update the CPUType, CPUSubType in the Object file if // the new values are different from the defaults. } @@ -175,13 +175,12 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCStreamer * createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useCFI, bool useDwarfDirectory, + bool isVerboseAsm, bool useDwarfDirectory, MCInstPrinter *InstPrint, MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) { - MCStreamer *S = - llvm::createAsmStreamer(Ctx, OS, isVerboseAsm, useCFI, useDwarfDirectory, - InstPrint, CE, TAB, ShowInst); + MCStreamer *S = llvm::createAsmStreamer( + Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); new PPCTargetAsmStreamer(*S, OS); return S; } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp index bbafe2e..cff27ba 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp @@ -44,7 +44,7 @@ public: void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, - uint64_t &FixedValue) { + uint64_t &FixedValue) override { if (Writer->is64Bit()) { report_fatal_error("Relocation emission for MachO/PPC64 unimplemented."); } else @@ -206,7 +206,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); + const MCSymbolData *A_SD = &Asm.getSymbolData(*A); if (!A_SD->getFragment()) report_fatal_error("symbol '" + A->getName() + @@ -219,7 +219,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation( uint32_t Value2 = 0; if (const MCSymbolRefExpr *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); if (!B_SD->getFragment()) report_fatal_error("symbol '" + B->getSymbol().getName() + @@ -324,7 +324,7 @@ void PPCMachObjectWriter::RecordPPCRelocation( // this doesn't seem right for RIT_PPC_BR24 // Get the symbol data, if any. - MCSymbolData *SD = 0; + const MCSymbolData *SD = nullptr; if (Target.getSymA()) SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 9ce8ea9..e89fb2d 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asmprinter" #include "PPC.h" #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCMCExpr.h" @@ -59,6 +58,8 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; +#define DEBUG_TYPE "asmprinter" + namespace { class PPCAsmPrinter : public AsmPrinter { protected: @@ -70,22 +71,22 @@ namespace { : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget()), TOCLabelID(0) {} - virtual const char *getPassName() const { + const char *getPassName() const override { return "PowerPC Assembly Printer"; } MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym); - virtual void EmitInstruction(const MachineInstr *MI); + void EmitInstruction(const MachineInstr *MI) override; void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); + raw_ostream &O) override; bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); + raw_ostream &O) override; }; /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux @@ -94,15 +95,15 @@ namespace { explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : PPCAsmPrinter(TM, Streamer) {} - virtual const char *getPassName() const { + const char *getPassName() const override { return "Linux PPC Assembly Printer"; } - bool doFinalization(Module &M); + bool doFinalization(Module &M) override; - virtual void EmitFunctionEntryLabel(); + void EmitFunctionEntryLabel() override; - void EmitFunctionBodyEnd(); + void EmitFunctionBodyEnd() override; }; /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac @@ -112,12 +113,12 @@ namespace { explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : PPCAsmPrinter(TM, Streamer) {} - virtual const char *getPassName() const { + const char *getPassName() const override { return "Darwin PPC Assembly Printer"; } - bool doFinalization(Module &M); - void EmitStartOfAsmFile(Module &M); + bool doFinalization(Module &M) override; + void EmitStartOfAsmFile(Module &M) override; void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs); }; @@ -180,7 +181,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, MachineModuleInfoImpl::StubValueTy &StubSym = MMI->getObjFileInfo() .getGVStubEntry(SymToPrint); - if (StubSym.getPointer() == 0) + if (!StubSym.getPointer()) StubSym = MachineModuleInfoImpl:: StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); } else if (GV->isDeclaration() || GV->hasCommonLinkage() || @@ -190,7 +191,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, MachineModuleInfoImpl::StubValueTy &StubSym = MMI->getObjFileInfo(). getHiddenGVStubEntry(SymToPrint); - if (StubSym.getPointer() == 0) + if (!StubSym.getPointer()) StubSym = MachineModuleInfoImpl:: StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); } else { @@ -207,7 +208,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, } default: - O << ""; + O << ""; return; } } @@ -288,9 +289,9 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) { MCSymbol *&TOCEntry = TOC[Sym]; // To avoid name clash check if the name already exists. - while (TOCEntry == 0) { + while (!TOCEntry) { if (OutContext.LookupSymbol(Twine(DL->getPrivateGlobalPrefix()) + - "C" + Twine(TOCLabelID++)) == 0) { + "C" + Twine(TOCLabelID++)) == nullptr) { TOCEntry = GetTempSymbol("C", TOCLabelID); } } @@ -342,7 +343,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Map symbol -> label of TOC entry assert(MO.isGlobal() || MO.isCPI() || MO.isJTI()); - MCSymbol *MOSymbol = 0; + MCSymbol *MOSymbol = nullptr; if (MO.isGlobal()) MOSymbol = getSymbol(MO.getGlobal()); else if (MO.isCPI()) @@ -372,23 +373,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MachineOperand &MO = MI->getOperand(2); assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) && "Invalid operand for ADDIStocHA!"); - MCSymbol *MOSymbol = 0; + MCSymbol *MOSymbol = nullptr; bool IsExternal = false; bool IsFunction = false; bool IsCommon = false; bool IsAvailExt = false; if (MO.isGlobal()) { - const GlobalValue *GValue = MO.getGlobal(); - const GlobalAlias *GAlias = dyn_cast(GValue); - const GlobalValue *RealGValue = - GAlias ? GAlias->getAliasedGlobal() : GValue; - MOSymbol = getSymbol(RealGValue); - const GlobalVariable *GVar = dyn_cast(RealGValue); - IsExternal = GVar && !GVar->hasInitializer(); - IsCommon = GVar && RealGValue->hasCommonLinkage(); - IsFunction = !GVar; - IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage(); + const GlobalValue *GV = MO.getGlobal(); + MOSymbol = getSymbol(GV); + IsExternal = GV->isDeclaration(); + IsCommon = GV->hasCommonLinkage(); + IsFunction = GV->getType()->getElementType()->isFunctionTy(); + IsAvailExt = GV->hasAvailableExternallyLinkage(); } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); else if (MO.isJTI()) @@ -416,7 +413,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { const MachineOperand &MO = MI->getOperand(1); assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) && "Invalid operand for LDtocL!"); - MCSymbol *MOSymbol = 0; + MCSymbol *MOSymbol = nullptr; if (MO.isJTI()) MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex())); @@ -427,14 +424,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } else if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); - const GlobalAlias *GAlias = dyn_cast(GValue); - const GlobalValue *RealGValue = - GAlias ? GAlias->getAliasedGlobal() : GValue; - MOSymbol = getSymbol(RealGValue); - const GlobalVariable *GVar = dyn_cast(RealGValue); - - if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || - RealGValue->hasAvailableExternallyLinkage() || + MOSymbol = getSymbol(GValue); + if (GValue->isDeclaration() || GValue->hasCommonLinkage() || + GValue->hasAvailableExternallyLinkage() || TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); } @@ -456,19 +448,15 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { TmpInst.setOpcode(PPC::ADDI8); const MachineOperand &MO = MI->getOperand(2); assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL"); - MCSymbol *MOSymbol = 0; + MCSymbol *MOSymbol = nullptr; bool IsExternal = false; bool IsFunction = false; if (MO.isGlobal()) { - const GlobalValue *GValue = MO.getGlobal(); - const GlobalAlias *GAlias = dyn_cast(GValue); - const GlobalValue *RealGValue = - GAlias ? GAlias->getAliasedGlobal() : GValue; - MOSymbol = getSymbol(RealGValue); - const GlobalVariable *GVar = dyn_cast(RealGValue); - IsExternal = GVar && !GVar->hasInitializer(); - IsFunction = !GVar; + const GlobalValue *GV = MO.getGlobal(); + MOSymbol = getSymbol(GV); + IsExternal = GV->isDeclaration(); + IsFunction = GV->getType()->getElementType()->isFunctionTy(); } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp index 9276211..ee90671 100644 --- a/lib/Target/PowerPC/PPCBranchSelector.cpp +++ b/lib/Target/PowerPC/PPCBranchSelector.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ppc-branch-select" #include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPCInstrBuilder.h" @@ -26,6 +25,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "ppc-branch-select" + STATISTIC(NumExpanded, "Number of branches expanded to long format"); namespace llvm { @@ -42,9 +43,9 @@ namespace { /// BlockSizes - The sizes of the basic blocks in the function. std::vector BlockSizes; - virtual bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "PowerPC Branch Selector"; } }; @@ -112,7 +113,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) { unsigned MBBStartOffset = 0; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { - MachineBasicBlock *Dest = 0; + MachineBasicBlock *Dest = nullptr; if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm()) Dest = I->getOperand(2).getMBB(); else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) && diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp index 9c5db50..ec1e34d 100644 --- a/lib/Target/PowerPC/PPCCTRLoops.cpp +++ b/lib/Target/PowerPC/PPCCTRLoops.cpp @@ -23,8 +23,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ctrloops" - #include "llvm/Transforms/Scalar.h" #include "PPC.h" #include "PPCTargetMachine.h" @@ -61,6 +59,8 @@ using namespace llvm; +#define DEBUG_TYPE "ctrloops" + #ifndef NDEBUG static cl::opt CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1)); #endif @@ -84,16 +84,16 @@ namespace { public: static char ID; - PPCCTRLoops() : FunctionPass(ID), TM(0) { + PPCCTRLoops() : FunctionPass(ID), TM(nullptr) { initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); } PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) { initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -128,12 +128,12 @@ namespace { initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry()); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; private: MachineDominatorTree *MDT; @@ -172,7 +172,7 @@ bool PPCCTRLoops::runOnFunction(Function &F) { SE = &getAnalysis(); DT = &getAnalysis().getDomTree(); DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; LibInfo = getAnalysisIfAvailable(); bool MadeChange = false; @@ -370,6 +370,14 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) { J->getOpcode() == Instruction::URem || J->getOpcode() == Instruction::SRem)) { return true; + } else if (TT.isArch32Bit() && + isLargeIntegerTy(false, J->getType()->getScalarType()) && + (J->getOpcode() == Instruction::Shl || + J->getOpcode() == Instruction::AShr || + J->getOpcode() == Instruction::LShr)) { + // Only on PPC32, for 128-bit integers (specifically not 64-bit + // integers), these might be runtime calls. + return true; } else if (isa(J) || isa(J)) { // On PowerPC, indirect jumps use the counter register. return true; @@ -424,9 +432,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) { SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - BasicBlock *CountedExitBlock = 0; - const SCEV *ExitCount = 0; - BranchInst *CountedExitBranch = 0; + BasicBlock *CountedExitBlock = nullptr; + const SCEV *ExitCount = nullptr; + BranchInst *CountedExitBranch = nullptr; for (SmallVectorImpl::iterator I = ExitingBlocks.begin(), IE = ExitingBlocks.end(); I != IE; ++I) { const SCEV *EC = SE->getExitCount(L, *I); diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp index 84fc888..0875523 100644 --- a/lib/Target/PowerPC/PPCCodeEmitter.cpp +++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp @@ -32,7 +32,7 @@ namespace { JITCodeEmitter &MCE; MachineModuleInfo *MMI; - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -73,11 +73,13 @@ namespace { unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getTLSCallEncoding(const MachineInstr &MI, unsigned OpNo) const; - const char *getPassName() const { return "PowerPC Machine Code Emitter"; } + const char *getPassName() const override { + return "PowerPC Machine Code Emitter"; + } /// runOnMachineFunction - emits the given MachineFunction to memory /// - bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; /// emitBasicBlock - emits the given MachineBasicBlock to memory /// @@ -102,7 +104,7 @@ bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) { MMI = &getAnalysis(); MCE.setModuleInfo(MMI); do { - MovePCtoLROffset = 0; + MovePCtoLROffset = nullptr; MCE.startFunction(MF); for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) emitBasicBlock(*BB); diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index dd45683..ed3cb4d 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ppcfastisel" #include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPCISelLowering.h" @@ -58,6 +57,8 @@ //===----------------------------------------------------------------------===// using namespace llvm; +#define DEBUG_TYPE "ppcfastisel" + namespace { typedef struct Address { @@ -85,7 +86,7 @@ class PPCFastISel final : public FastISel { const TargetMachine &TM; const TargetInstrInfo &TII; const TargetLowering &TLI; - const PPCSubtarget &PPCSubTarget; + const PPCSubtarget *PPCSubTarget; LLVMContext *Context; public: @@ -95,31 +96,29 @@ class PPCFastISel final : public FastISel { TM(FuncInfo.MF->getTarget()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), - PPCSubTarget( - *((static_cast(&TM))->getSubtargetImpl()) - ), + PPCSubTarget(&TM.getSubtarget()), Context(&FuncInfo.Fn->getContext()) { } // Backend specific FastISel code. private: - virtual bool TargetSelectInstruction(const Instruction *I); - virtual unsigned TargetMaterializeConstant(const Constant *C); - virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); - virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, - const LoadInst *LI); - virtual bool FastLowerArguments(); - virtual unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm); - virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - uint64_t Imm); - virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill); - virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill); + bool TargetSelectInstruction(const Instruction *I) override; + unsigned TargetMaterializeConstant(const Constant *C) override; + unsigned TargetMaterializeAlloca(const AllocaInst *AI) override; + bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) override; + bool FastLowerArguments() override; + unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override; + unsigned FastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm); + unsigned FastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill); + unsigned FastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill); // Instruction selection routines. private: @@ -282,7 +281,7 @@ bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { // Given a value Obj, create an Address object Addr that represents its // address. Return false if we can't handle it. bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) { - const User *U = NULL; + const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast(Obj)) { // Don't walk into other basic blocks unless the object is an alloca from @@ -556,7 +555,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) { // to constrain RA from using R0/X0 when this is not legal. unsigned AssignedReg = FuncInfo.ValueMap[I]; const TargetRegisterClass *RC = - AssignedReg ? MRI.getRegClass(AssignedReg) : 0; + AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; unsigned ResultReg = 0; if (!PPCEmitLoad(VT, ResultReg, Addr, RC)) @@ -739,7 +738,7 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2, return false; MVT SrcVT = SrcEVT.getSimpleVT(); - if (SrcVT == MVT::i1 && PPCSubTarget.useCRBits()) + if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits()) return false; // See if operand 2 is an immediate encodeable in the compare. @@ -900,7 +899,7 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg, if (!IsSigned) { LoadOpc = PPC::LFIWZX; Addr.Offset = 4; - } else if (PPCSubTarget.hasLFIWAX()) { + } else if (PPCSubTarget->hasLFIWAX()) { LoadOpc = PPC::LFIWAX; Addr.Offset = 4; } @@ -941,7 +940,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { // We can only lower an unsigned convert if we have the newer // floating-point conversion operations. - if (!IsSigned && !PPCSubTarget.hasFPCVT()) + if (!IsSigned && !PPCSubTarget->hasFPCVT()) return false; // FIXME: For now we require the newer floating-point conversion operations @@ -949,7 +948,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) { // to single-precision float. Otherwise we have to generate a lot of // fiddly code to avoid double rounding. If necessary, the fiddly code // can be found in PPCTargetLowering::LowerINT_TO_FP(). - if (DstVT == MVT::f32 && !PPCSubTarget.hasFPCVT()) + if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT()) return false; // Extend the input if necessary. @@ -1012,7 +1011,7 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT, // to determine the required register class. unsigned AssignedReg = FuncInfo.ValueMap[I]; const TargetRegisterClass *RC = - AssignedReg ? MRI.getRegClass(AssignedReg) : 0; + AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr; unsigned ResultReg = 0; if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned)) @@ -1064,7 +1063,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { if (IsSigned) Opc = PPC::FCTIWZ; else - Opc = PPCSubTarget.hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ; + Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ; else Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ; @@ -1863,7 +1862,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { if (!GVar) { // If GV is an alias, use the aliasee for determining thread-locality. if (const GlobalAlias *GA = dyn_cast(GV)) - GVar = dyn_cast_or_null(GA->getAliasedGlobal()); + GVar = dyn_cast_or_null(GA->getAliasee()); } // FIXME: We don't yet handle the complexity of TLS. @@ -2001,7 +2000,7 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm, unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) { // If we're using CR bit registers for i1 values, handle that as a special // case first. - if (VT == MVT::i1 && PPCSubTarget.useCRBits()) { + if (VT == MVT::i1 && PPCSubTarget->useCRBits()) { const ConstantInt *CI = cast(C); unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -2149,7 +2148,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, unsigned ResultReg = MI->getOperand(0).getReg(); - if (!PPCEmitLoad(VT, ResultReg, Addr, 0, IsZExt)) + if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt)) return false; MI->eraseFromParent(); @@ -2175,7 +2174,7 @@ unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) { // If we're using CR bit registers for i1 values, handle that as a special // case first. - if (VT == MVT::i1 && PPCSubTarget.useCRBits()) { + if (VT == MVT::i1 && PPCSubTarget->useCRBits()) { unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg); @@ -2261,6 +2260,6 @@ namespace llvm { if (Subtarget->isPPC64() && Subtarget->isSVR4ABI()) return new PPCFastISel(FuncInfo, LibInfo); - return 0; + return nullptr; } } diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index d8f491f..e294156 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -222,7 +222,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, if (!DisableRedZone && (Subtarget.isPPC64() || // 32-bit SVR4, no stack- !Subtarget.isSVR4ABI() || // allocated locals. - FrameSize == 0) && + FrameSize == 0) && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. @@ -281,8 +281,8 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { // Naked functions have no stack frame pushed, so we don't have a frame // pointer. - if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::Naked)) + if (MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::Naked)) return false; return MF.getTarget().Options.DisableFramePointerElim(MF) || @@ -426,7 +426,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { assert(FPIndex && "No Frame Pointer Save Slot!"); FPOffset = FFI->getObjectOffset(FPIndex); } else { - FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); + FPOffset = + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); } } @@ -562,13 +563,14 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { assert(NegFrameSize); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize)); - BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION)).addCFIIndex(CFIIndex); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); if (HasFP) { unsigned Reg = MRI->getDwarfRegNum(FPReg, true); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); - BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION)) + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } @@ -576,7 +578,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned Reg = MRI->getDwarfRegNum(BPReg, true); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); - BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION)) + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } @@ -584,7 +586,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned Reg = MRI->getDwarfRegNum(LRReg, true); CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); - BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION)) + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } @@ -601,7 +603,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); - BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION)) + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } @@ -629,7 +631,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(PPC::CR2, true), 8)); - BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION)) + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); continue; } @@ -637,7 +639,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); - BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION)) + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } } @@ -712,7 +714,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF, assert(FPIndex && "No Frame Pointer Save Slot!"); FPOffset = FFI->getObjectOffset(FPIndex); } else { - FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); + FPOffset = + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); } } @@ -930,9 +933,9 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); } - // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the + // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the // function uses CR 2, 3, or 4. - if (!isPPC64 && !isDarwinABI && + if (!isPPC64 && !isDarwinABI && (MRI.isPhysRegUsed(PPC::CR2) || MRI.isPhysRegUsed(PPC::CR3) || MRI.isPhysRegUsed(PPC::CR4))) { @@ -1106,10 +1109,10 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, unsigned Reg = CSI[i].getReg(); if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2) - // Leave Darwin logic as-is. - || (!Subtarget.isSVR4ABI() && - (PPC::CRBITRCRegClass.contains(Reg) || - PPC::CRRCRegClass.contains(Reg)))) { + // Leave Darwin logic as-is. + || (!Subtarget.isSVR4ABI() && + (PPC::CRBITRCRegClass.contains(Reg) || + PPC::CRRCRegClass.contains(Reg)))) { int FI = CSI[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); @@ -1190,11 +1193,11 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, } } -bool +bool PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const { + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { // Currently, this function only handles SVR4 32- and 64-bit ABIs. // Return false otherwise to maintain pre-existing behavior. @@ -1207,7 +1210,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, DebugLoc DL; bool CRSpilled = false; MachineInstrBuilder CRMIB; - + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); // Only Darwin actually uses the VRSAVE register, but it can still appear @@ -1237,21 +1240,21 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, CRSpilled = true; FuncInfo->setSpillsCR(); - // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have - // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. - CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) + // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have + // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. + CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) .addReg(Reg, RegState::ImplicitKill); - MBB.insert(MI, CRMIB); - MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) - .addReg(PPC::R12, - getKillRegState(true)), - CSI[i].getFrameIdx())); + MBB.insert(MI, CRMIB); + MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) + .addReg(PPC::R12, + getKillRegState(true)), + CSI[i].getFrameIdx())); } } else { const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.storeRegToStackSlot(MBB, MI, Reg, true, - CSI[i].getFrameIdx(), RC, TRI); + CSI[i].getFrameIdx(), RC, TRI); } } return true; @@ -1260,8 +1263,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, static void restoreCRs(bool isPPC64, bool is31, bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, - MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector &CSI, unsigned CSIIndex) { + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const std::vector &CSI, unsigned CSIIndex) { MachineFunction *MF = MBB.getParent(); const PPCInstrInfo &TII = @@ -1275,12 +1278,12 @@ restoreCRs(bool isPPC64, bool is31, else { // 32-bit: FP-relative MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), - PPC::R12), - CSI[CSIIndex].getFrameIdx())); + PPC::R12), + CSI[CSIIndex].getFrameIdx())); RestoreOp = PPC::MTOCRF; MoveReg = PPC::R12; } - + if (CR2Spilled) MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); @@ -1335,11 +1338,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.erase(I); } -bool +bool PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const { + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { // Currently, this function only handles SVR4 32- and 64-bit ABIs. // Return false otherwise to maintain pre-existing behavior. @@ -1387,20 +1390,20 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, // When we first encounter a non-CR register after seeing at // least one CR register, restore all spilled CRs together. if ((CR2Spilled || CR3Spilled || CR4Spilled) - && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { + && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { bool is31 = needsFP(*MF); restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled, - MBB, I, CSI, CSIIndex); - CR2Spilled = CR3Spilled = CR4Spilled = false; + MBB, I, CSI, CSIIndex); + CR2Spilled = CR3Spilled = CR4Spilled = false; } // Default behavior for non-CR saves. const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), - RC, TRI); + RC, TRI); assert(I != MBB.begin() && - "loadRegFromStackSlot didn't insert any code!"); + "loadRegFromStackSlot didn't insert any code!"); } // Insert in reverse order. @@ -1409,16 +1412,15 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, else { I = BeforeI; ++I; - } + } } // If we haven't yet spilled the CRs, do so now. if (CR2Spilled || CR3Spilled || CR4Spilled) { - bool is31 = needsFP(*MF); + bool is31 = needsFP(*MF); restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled, - MBB, I, CSI, CSIIndex); + MBB, I, CSI, CSIIndex); } return true; } - diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index 7aab37e..94e9b67 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -38,37 +38,37 @@ public: /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void emitPrologue(MachineFunction &MF) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - bool hasFP(const MachineFunction &MF) const; + bool hasFP(const MachineFunction &MF) const override; bool needsFP(const MachineFunction &MF) const; void replaceFPWithRealFP(MachineFunction &MF) const; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const override; void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo *TRI) const override; void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const; + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const override; /// targetHandlesStackFrameRounding - Returns true if the target is /// responsible for rounding up the stack frame (probably at emitPrologue /// time). - bool targetHandlesStackFrameRounding() const { return true; } + bool targetHandlesStackFrameRounding() const override { return true; } /// getReturnSaveOffset - Return the previous frame offset to save the /// return address. @@ -141,7 +141,7 @@ public: // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. const SpillSlot * - getCalleeSavedSpillSlots(unsigned &NumEntries) const { + getCalleeSavedSpillSlots(unsigned &NumEntries) const override { if (Subtarget.isDarwinABI()) { NumEntries = 1; if (Subtarget.isPPC64()) { @@ -156,7 +156,7 @@ public: // Early exit if not using the SVR4 ABI. if (!Subtarget.isSVR4ABI()) { NumEntries = 0; - return 0; + return nullptr; } // Note that the offsets here overlap, but this is fixed up in diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 37c85b3..7ca706b 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "pre-RA-sched" #include "PPCHazardRecognizers.h" #include "PPC.h" #include "PPCInstrInfo.h" @@ -22,6 +21,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "pre-RA-sched" + bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) { // FIXME: Move this. if (isBCTRAfterSet(SU)) @@ -226,7 +227,7 @@ void PPCDispatchGroupSBHazardRecognizer::EmitNoop() { CurGroup.clear(); CurSlots = CurBranches = 0; } else { - CurGroup.push_back(0); + CurGroup.push_back(nullptr); ++CurSlots; } } diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index 6b7fe41..cf4332c 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -37,14 +37,14 @@ public: ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_), CurSlots(0), CurBranches(0) {} - virtual HazardType getHazardType(SUnit *SU, int Stalls); - virtual bool ShouldPreferAnother(SUnit* SU); - virtual unsigned PreEmitNoops(SUnit *SU); - virtual void EmitInstruction(SUnit *SU); - virtual void AdvanceCycle(); - virtual void RecedeCycle(); - virtual void Reset(); - virtual void EmitNoop(); + HazardType getHazardType(SUnit *SU, int Stalls) override; + bool ShouldPreferAnother(SUnit* SU) override; + unsigned PreEmitNoops(SUnit *SU) override; + void EmitInstruction(SUnit *SU) override; + void AdvanceCycle() override; + void RecedeCycle() override; + void Reset() override; + void EmitNoop() override; }; /// PPCHazardRecognizer970 - This class defines a finite state automata that @@ -76,10 +76,10 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { public: PPCHazardRecognizer970(const TargetMachine &TM); - virtual HazardType getHazardType(SUnit *SU, int Stalls); - virtual void EmitInstruction(SUnit *SU); - virtual void AdvanceCycle(); - virtual void Reset(); + virtual HazardType getHazardType(SUnit *SU, int Stalls) override; + virtual void EmitInstruction(SUnit *SU) override; + virtual void AdvanceCycle() override; + virtual void Reset() override; private: /// EndDispatchGroup - Called when we are finishing a new dispatch group. diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 3bbc839..251e8b6 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ppc-codegen" #include "PPC.h" #include "MCTargetDesc/PPCPredicates.h" #include "PPCTargetMachine.h" @@ -35,6 +34,8 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; +#define DEBUG_TYPE "ppc-codegen" + // FIXME: Remove this once the bug has been fixed! cl::opt ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); @@ -50,29 +51,31 @@ namespace { /// class PPCDAGToDAGISel : public SelectionDAGISel { const PPCTargetMachine &TM; - const PPCTargetLowering &PPCLowering; - const PPCSubtarget &PPCSubTarget; + const PPCTargetLowering *PPCLowering; + const PPCSubtarget *PPCSubTarget; unsigned GlobalBaseReg; public: explicit PPCDAGToDAGISel(PPCTargetMachine &tm) : SelectionDAGISel(tm), TM(tm), - PPCLowering(*TM.getTargetLowering()), - PPCSubTarget(*TM.getSubtargetImpl()) { + PPCLowering(TM.getTargetLowering()), + PPCSubTarget(TM.getSubtargetImpl()) { initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { // Make sure we re-emit a set of the global base reg if necessary GlobalBaseReg = 0; + PPCLowering = TM.getTargetLowering(); + PPCSubTarget = TM.getSubtargetImpl(); SelectionDAGISel::runOnMachineFunction(MF); - if (!PPCSubTarget.isSVR4ABI()) + if (!PPCSubTarget->isSVR4ABI()) InsertVRSaveCode(MF); return true; } - virtual void PostprocessISelDAG(); + void PostprocessISelDAG() override; /// getI32Imm - Return a target constant with the specified value, of type /// i32. @@ -88,7 +91,7 @@ namespace { /// getSmallIPtrImm - Return a target constant of pointer type. inline SDValue getSmallIPtrImm(unsigned Imm) { - return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy()); + return CurDAG->getTargetConstant(Imm, PPCLowering->getPointerTy()); } /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s @@ -109,7 +112,7 @@ namespace { // Select - Convert the specified operand from a target-independent to a // target-specific node if it hasn't already been changed. - SDNode *Select(SDNode *N); + SDNode *Select(SDNode *N) override; SDNode *SelectBitfieldInsert(SDNode *N); @@ -121,7 +124,7 @@ namespace { /// a base register plus a signed 16-bit displacement [r+imm]. bool SelectAddrImm(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, false); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false); } /// SelectAddrImmOffs - Return true if the operand is valid for a preinc @@ -141,20 +144,20 @@ namespace { /// represented as an indexed [r+r] operation. Returns false if it can /// be represented by [r+imm], which are preferred. bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { - return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG); + return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG); } /// SelectAddrIdxOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { - return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG); + return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); } /// SelectAddrImmX4 - Returns true if the address N can be represented by /// a base register plus a signed 16-bit displacement that is a multiple of 4. /// Suitable for use by STD and friends. bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { - return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, true); + return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true); } // Select an address into a single register. @@ -168,16 +171,16 @@ namespace { /// a register. The case of adding a (possibly relocatable) constant to a /// register can be improved, but it is wrong to substitute Reg+Reg for /// Reg in an asm, because the load or store opcode would have to change. - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector &OutOps) { + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps) override { OutOps.push_back(Op); return false; } void InsertVRSaveCode(MachineFunction &MF); - virtual const char *getPassName() const { + const char *getPassName() const override { return "PowerPC DAG->DAG Pattern Instruction Selection"; } @@ -188,7 +191,7 @@ private: SDNode *SelectSETCC(SDNode *N); void PeepholePPC64(); - void PeepholdCROps(); + void PeepholeCROps(); bool AllUsersSelectZero(SDNode *N); void SwapAllSelectUsers(SDNode *N); @@ -271,7 +274,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { MachineBasicBlock::iterator MBBI = FirstMBB.begin(); DebugLoc dl; - if (PPCLowering.getPointerTy() == MVT::i32) { + if (PPCLowering->getPointerTy() == MVT::i32) { GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); @@ -282,7 +285,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { } } return CurDAG->getRegister(GlobalBaseReg, - PPCLowering.getPointerTy()).getNode(); + PPCLowering->getPointerTy()).getNode(); } /// isIntS16Immediate - This method tests to see if the node is either a 32-bit @@ -414,8 +417,8 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { SDLoc dl(N); APInt LKZ, LKO, RKZ, RKO; - CurDAG->ComputeMaskedBits(Op0, LKZ, LKO); - CurDAG->ComputeMaskedBits(Op1, RKZ, RKO); + CurDAG->computeKnownBits(Op0, LKZ, LKO); + CurDAG->computeKnownBits(Op1, RKZ, RKO); unsigned TargetMask = LKZ.getZExtValue(); unsigned InsertMask = RKZ.getZExtValue(); @@ -458,11 +461,18 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value; } if (Op1Opc == ISD::AND) { + // The AND mask might not be a constant, and we need to make sure that + // if we're going to fold the masking with the insert, all bits not + // know to be zero in the mask are known to be one. + APInt MKZ, MKO; + CurDAG->computeKnownBits(Op1.getOperand(1), MKZ, MKO); + bool CanFoldMask = InsertMask == MKO.getZExtValue(); + unsigned SHOpc = Op1.getOperand(0).getOpcode(); - if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && + if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask && isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) { - // Note that Value must be in range here (less than 32) because - // otherwise there would not be any bits set in InsertMask. + // Note that Value must be in range here (less than 32) because + // otherwise there would not be any bits set in InsertMask. Op1 = Op1.getOperand(0).getOperand(0); SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; } @@ -474,7 +484,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops); } } - return 0; + return nullptr; } /// SelectCC - Select a comparison of the specified values with the specified @@ -572,7 +582,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, Opc = PPC::FCMPUS; } else { assert(LHS.getValueType() == MVT::f64 && "Unknown vt!"); - Opc = PPCSubTarget.hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; + Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; } return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); } @@ -738,7 +748,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy(); bool isPPC64 = (PtrVT == MVT::i64); - if (!PPCSubTarget.useCRBits() && + if (!PPCSubTarget->useCRBits() && isInt32Immediate(N->getOperand(1), Imm)) { // We can codegen setcc op, imm very efficiently compared to a brcond. // Check for those cases here. @@ -750,7 +760,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { case ISD::SETEQ: { Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } case ISD::SETNE: { if (isPPC64) break; @@ -762,14 +772,14 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { } case ISD::SETLT: { SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } case ISD::SETGT: { SDValue T = SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0); T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } } } else if (Imm == ~0U) { // setcc op, -1 @@ -799,7 +809,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD, Op), 0); SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } case ISD::SETGT: { SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) }; @@ -820,7 +830,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { if (LHS.getValueType().isVector()) { EVT VecVT = LHS.getValueType(); MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy; - unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget.hasVSX()); + unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget->hasVSX()); switch (CC) { case ISD::SETEQ: @@ -831,7 +841,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { case ISD::SETONE: case ISD::SETUNE: { SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); - return CurDAG->SelectNodeTo(N, PPCSubTarget.hasVSX() ? PPC::XXLNOR : + return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, VecVT, VCmp, VCmp); } @@ -853,9 +863,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); } else { SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); - unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget.hasVSX()); + unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX()); SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); - return CurDAG->SelectNodeTo(N, PPCSubTarget.hasVSX() ? PPC::XXLOR : + return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR : PPC::VOR, VecVT, VCmpGT, VCmpEQ); } @@ -864,9 +874,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { case ISD::SETOLE: case ISD::SETULE: { SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0); - unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget.hasVSX()); + unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX()); SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); - return CurDAG->SelectNodeTo(N, PPCSubTarget.hasVSX() ? PPC::XXLOR : + return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR : PPC::VOR, VecVT, VCmpLE, VCmpEQ); } @@ -875,8 +885,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { } } - if (PPCSubTarget.useCRBits()) - return 0; + if (PPCSubTarget->useCRBits()) + return nullptr; bool Inv; unsigned Idx = getCRIdxForSetCC(CC, Inv); @@ -886,7 +896,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { // Force the ccreg into CR7. SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); - SDValue InFlag(0, 0); // Null incoming flag value. + SDValue InFlag(nullptr, 0); // Null incoming flag value. CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, InFlag).getValue(1); @@ -896,7 +906,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31), getI32Imm(31), getI32Imm(31) }; if (!Inv) - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); // Get the specified bit. SDValue Tmp = @@ -911,7 +921,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); if (N->isMachineOpcode()) { N->setNodeId(-1); - return NULL; // Already selected. + return nullptr; // Already selected. } switch (N->getOpcode()) { @@ -1093,7 +1103,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Offset, Base, Chain }; return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), - PPCLowering.getPointerTy(), + PPCLowering->getPointerTy(), MVT::Other, Ops); } else { unsigned Opcode; @@ -1128,7 +1138,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[] = { Base, Offset, Chain }; return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0), - PPCLowering.getPointerTy(), + PPCLowering->getPointerTy(), MVT::Other, Ops); } } @@ -1143,7 +1153,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) { SDValue Val = N->getOperand(0).getOperand(0); SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } // If this is just a masked value where the input is not handled above, and // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm @@ -1152,7 +1162,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { N->getOperand(0).getOpcode() != ISD::ROTL) { SDValue Val = N->getOperand(0); SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } // If this is a 64-bit zero-extension mask, emit rldicl. if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && @@ -1174,12 +1184,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { } SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB) }; - return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops, 3); + return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); } // AND X, 0 -> 0, not "rlwinm 32". if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { ReplaceUses(SDValue(N, 0), N->getOperand(1)); - return NULL; + return nullptr; } // ISD::OR doesn't get all the bitfield insertion fun. // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert @@ -1212,7 +1222,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { isRotateAndMask(N, Imm, true, SH, MB, ME)) { SDValue Ops[] = { N->getOperand(0).getOperand(0), getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } // Other cases are autogenerated. @@ -1224,7 +1234,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { isRotateAndMask(N, Imm, true, SH, MB, ME)) { SDValue Ops[] = { N->getOperand(0).getOperand(0), getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; - return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); } // Other cases are autogenerated. @@ -1259,7 +1269,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { bool isPPC64 = (PtrVT == MVT::i64); // If this is a select of i1 operands, we'll pattern match it. - if (PPCSubTarget.useCRBits() && + if (PPCSubTarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1) break; @@ -1327,17 +1337,17 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3), getI32Imm(BROpc) }; - return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4); + return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); } case ISD::VSELECT: - if (PPCSubTarget.hasVSX()) { + if (PPCSubTarget->hasVSX()) { SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) }; - return CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops, 3); + return CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops); } break; case ISD::VECTOR_SHUFFLE: - if (PPCSubTarget.hasVSX() && (N->getValueType(0) == MVT::v2f64 || + if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || N->getValueType(0) == MVT::v2i64)) { ShuffleVectorSDNode *SVN = cast(N); @@ -1364,23 +1374,23 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue Chain = LD->getChain(); SDValue Ops[] = { Base, Offset, Chain }; return CurDAG->SelectNodeTo(N, PPC::LXVDSX, - N->getValueType(0), Ops, 3); + N->getValueType(0), Ops); } } SDValue Ops[] = { Op1, Op2, DMV }; - return CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops, 3); + return CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); } break; case PPCISD::BDNZ: case PPCISD::BDZ: { - bool IsPPC64 = PPCSubTarget.isPPC64(); + bool IsPPC64 = PPCSubTarget->isPPC64(); SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; return CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), - MVT::Other, Ops, 2); + MVT::Other, Ops); } case PPCISD::COND_BRANCH: { // Op #0 is the Chain. @@ -1393,7 +1403,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { getI32Imm(cast(N->getOperand(1))->getZExtValue()); SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), N->getOperand(0), N->getOperand(4) }; - return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 5); + return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); } case ISD::BR_CC: { ISD::CondCode CC = cast(N->getOperand(1))->get(); @@ -1422,7 +1432,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); SDValue Ops[] = { getI32Imm(PCC), CondCode, N->getOperand(4), N->getOperand(0) }; - return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4); + return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); } case ISD::BRIND: { // FIXME: Should custom lower this. @@ -1435,7 +1445,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); } case PPCISD::TOC_ENTRY: { - assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI"); + assert (PPCSubTarget->isPPC64() && "Only supported for 64-bit ABI"); // For medium and large code model, we generate two instructions as // described below. Otherwise we allow SelectCodeCommon to handle this, @@ -1462,18 +1472,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { if (GlobalAddressSDNode *G = dyn_cast(GA)) { const GlobalValue *GValue = G->getGlobal(); - const GlobalAlias *GAlias = dyn_cast(GValue); - const GlobalValue *RealGValue = - GAlias ? GAlias->getAliasedGlobal() : GValue; - const GlobalVariable *GVar = dyn_cast(RealGValue); - assert((GVar || isa(RealGValue)) && - "Unexpected global value subclass!"); - - // An external variable is one without an initializer. For these, - // for variables with common linkage, and for Functions, generate - // the LDtocL form. - if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() || - RealGValue->hasAvailableExternallyLinkage()) + if (GValue->isDeclaration() || GValue->hasCommonLinkage() || + GValue->hasAvailableExternallyLinkage()) return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, SDValue(Tmp, 0)); } @@ -1566,7 +1566,7 @@ void PPCDAGToDAGISel::PostprocessISelDAG() { return; PeepholePPC64(); - PeepholdCROps(); + PeepholeCROps(); } // Check if all users of this node will become isel where the second operand @@ -1576,7 +1576,7 @@ void PPCDAGToDAGISel::PostprocessISelDAG() { // containing zero. bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { // If we're not using isel, then this does not matter. - if (!PPCSubTarget.hasISEL()) + if (!PPCSubTarget->hasISEL()) return false; for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); @@ -1637,7 +1637,7 @@ void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) { } } -void PPCDAGToDAGISel::PeepholdCROps() { +void PPCDAGToDAGISel::PeepholeCROps() { bool IsModified; do { IsModified = false; @@ -2038,7 +2038,7 @@ void PPCDAGToDAGISel::PeepholdCROps() { void PPCDAGToDAGISel::PeepholePPC64() { // These optimizations are currently supported only for 64-bit SVR4. - if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64()) + if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64()) return; SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); @@ -2196,8 +2196,8 @@ FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) { static void initializePassOnce(PassRegistry &Registry) { const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection"; - PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, 0, - false, false); + PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, + nullptr, false, false); Registry.registerPass(*PI, true); } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 32ac1dc..cf4c9e6 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -18,6 +18,7 @@ #include "PPCTargetMachine.h" #include "PPCTargetObjectFile.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -459,6 +460,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); + setOperationAction(ISD::BSWAP, VT, Expand); setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); @@ -758,7 +760,7 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; + default: return nullptr; case PPCISD::FSEL: return "PPCISD::FSEL"; case PPCISD::FCFID: return "PPCISD::FCFID"; case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; @@ -929,7 +931,7 @@ bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, /// amount, otherwise return -1. int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { if (N->getValueType(0) != MVT::v16i8) - return false; + return -1; ShuffleVectorSDNode *SVOp = cast(N); @@ -1019,7 +1021,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { /// the constant being splatted. The ByteSize field indicates the number of /// bytes of each element [124] -> [bhw]. SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { - SDValue OpVal(0, 0); + SDValue OpVal(nullptr, 0); // If ByteSize of the splat is bigger than the element size of the // build_vector, then we have a case where we are checking for a splat where @@ -1038,7 +1040,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { if (!isa(N->getOperand(i))) return SDValue(); - if (UniquedVals[i&(Multiple-1)].getNode() == 0) + if (!UniquedVals[i&(Multiple-1)].getNode()) UniquedVals[i&(Multiple-1)] = N->getOperand(i); else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) return SDValue(); // no match. @@ -1053,21 +1055,21 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { bool LeadingZero = true; bool LeadingOnes = true; for (unsigned i = 0; i != Multiple-1; ++i) { - if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs. + if (!UniquedVals[i].getNode()) continue; // Must have been undefs. LeadingZero &= cast(UniquedVals[i])->isNullValue(); LeadingOnes &= cast(UniquedVals[i])->isAllOnesValue(); } // Finally, check the least significant entry. if (LeadingZero) { - if (UniquedVals[Multiple-1].getNode() == 0) + if (!UniquedVals[Multiple-1].getNode()) return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef int Val = cast(UniquedVals[Multiple-1])->getZExtValue(); if (Val < 16) return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) } if (LeadingOnes) { - if (UniquedVals[Multiple-1].getNode() == 0) + if (!UniquedVals[Multiple-1].getNode()) return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef int Val =cast(UniquedVals[Multiple-1])->getSExtValue(); if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) @@ -1080,13 +1082,13 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { // Check to see if this buildvec has a single non-undef value in its elements. for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; - if (OpVal.getNode() == 0) + if (!OpVal.getNode()) OpVal = N->getOperand(i); else if (OpVal != N->getOperand(i)) return SDValue(); } - if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def. + if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. unsigned ValSizeInBytes = EltSize; uint64_t Value = 0; @@ -1135,7 +1137,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { /// sign extension from a 16-bit value. If so, this returns true and the /// immediate. static bool isIntS16Immediate(SDNode *N, short &Imm) { - if (N->getOpcode() != ISD::Constant) + if (!isa(N)) return false; Imm = (short)cast(N)->getZExtValue(); @@ -1174,12 +1176,12 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, // disjoint. APInt LHSKnownZero, LHSKnownOne; APInt RHSKnownZero, RHSKnownOne; - DAG.ComputeMaskedBits(N.getOperand(0), - LHSKnownZero, LHSKnownOne); + DAG.computeKnownBits(N.getOperand(0), + LHSKnownZero, LHSKnownOne); if (LHSKnownZero.getBoolValue()) { - DAG.ComputeMaskedBits(N.getOperand(1), - RHSKnownZero, RHSKnownOne); + DAG.computeKnownBits(N.getOperand(1), + RHSKnownZero, RHSKnownOne); // If all of the bits are known zero on the LHS or RHS, the add won't // carry. if (~(LHSKnownZero | RHSKnownZero) == 0) { @@ -1279,7 +1281,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, // (for better address arithmetic) if the LHS and RHS of the OR are // provably disjoint. APInt LHSKnownZero, LHSKnownOne; - DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); + DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { // If all of the bits are known zero on the LHS or RHS, the add won't @@ -1439,7 +1441,8 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, /// GetLabelAccessInfo - Return true if we should reference labels using a /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, - unsigned &LoOpFlags, const GlobalValue *GV = 0) { + unsigned &LoOpFlags, + const GlobalValue *GV = nullptr) { HiOpFlags = PPCII::MO_HA; LoOpFlags = PPCII::MO_LO; @@ -1885,17 +1888,12 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, Entry.Node = Nest; Args.push_back(Entry); // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) - TargetLowering::CallLoweringInfo CLI(Chain, - Type::getVoidTy(*DAG.getContext()), - false, false, false, false, 0, - CallingConv::C, - /*isTailCall=*/false, - /*doesNotRet=*/false, - /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__trampoline_setup", PtrVT), - Args, DAG, dl); - std::pair CallResult = LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__trampoline_setup", PtrVT), &Args, 0); + std::pair CallResult = LowerCallTo(CLI); return CallResult.second; } @@ -2016,7 +2014,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - static const uint16_t ArgRegs[] = { + static const MCPhysReg ArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; @@ -2043,7 +2041,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - static const uint16_t ArgRegs[] = { + static const MCPhysReg ArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; @@ -2067,8 +2065,8 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, /// GetFPR - Get the set of FP registers that should be allocated for arguments, /// on Darwin. -static const uint16_t *GetFPR() { - static const uint16_t FPR[] = { +static const MCPhysReg *GetFPR() { + static const MCPhysReg FPR[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 }; @@ -2265,13 +2263,13 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - static const uint16_t GPArgRegs[] = { + static const MCPhysReg GPArgRegs[] = { PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); - static const uint16_t FPArgRegs[] = { + static const MCPhysReg FPArgRegs[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, PPC::F8 }; @@ -2333,8 +2331,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( } if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, - MVT::Other, &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } @@ -2405,18 +2402,18 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // Area that is at least reserved in caller of this function. unsigned MinReservedArea = ArgOffset; - static const uint16_t GPR[] = { + static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const uint16_t *FPR = GetFPR(); + static const MCPhysReg *FPR = GetFPR(); - static const uint16_t VR[] = { + static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; - static const uint16_t VSRH[] = { + static const MCPhysReg VSRH[] = { PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 }; @@ -2683,8 +2680,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( } if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, - MVT::Other, &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } @@ -2714,18 +2710,18 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // Area that is at least reserved in caller of this function. unsigned MinReservedArea = ArgOffset; - static const uint16_t GPR_32[] = { // 32-bit registers. + static const MCPhysReg GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; - static const uint16_t GPR_64[] = { // 64-bit registers. + static const MCPhysReg GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const uint16_t *FPR = GetFPR(); + static const MCPhysReg *FPR = GetFPR(); - static const uint16_t VR[] = { + static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; @@ -2736,7 +2732,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; - const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; + const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; // In 32-bit non-varargs functions, the stack space for vectors is after the // stack space for non-vectors. We do not use this space unless we have @@ -3039,8 +3035,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( } if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, - MVT::Other, &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); return Chain; } @@ -3174,12 +3169,12 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, /// 32-bit value is representable in the immediate field of a BxA instruction. static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { ConstantSDNode *C = dyn_cast(Op); - if (!C) return 0; + if (!C) return nullptr; int Addr = C->getZExtValue(); if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. SignExtend32<26>(Addr) != Addr) - return 0; // Top 6 bits have to be sext of immediate. + return nullptr; // Top 6 bits have to be sext of immediate. return DAG.getConstant((int)C->getZExtValue() >> 2, DAG.getTargetLoweringInfo().getPointerTy()).getNode(); @@ -3315,8 +3310,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, SDLoc dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), - false, false, MachinePointerInfo(0), - MachinePointerInfo(0)); + false, false, MachinePointerInfo(), + MachinePointerInfo()); } /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of @@ -3361,8 +3356,7 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, MemOpChains2, dl); if (!MemOpChains2.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains2[0], MemOpChains2.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); // Store the return address to the appropriate stack slot. Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, @@ -3476,8 +3470,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // Load the address of the function entry point from the function // descriptor. SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue); - SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps, - InFlag.getNode() ? 3 : 2); + SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, + makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); Chain = LoadFuncPtr.getValue(1); InFlag = LoadFuncPtr.getValue(2); @@ -3513,8 +3507,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, MTCTROps[2] = InFlag; } - Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps, - 2 + (InFlag.getNode() != 0)); + Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, + makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); InFlag = Chain.getValue(1); NodeTys.clear(); @@ -3522,7 +3516,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, NodeTys.push_back(MVT::Glue); Ops.push_back(Chain); CallOpc = PPCISD::BCTRL; - Callee.setNode(0); + Callee.setNode(nullptr); // Add use of X11 (holding environment pointer) if (isSVR4ABI && isPPC64) Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); @@ -3650,7 +3644,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, isa(Callee)) && "Expecting an global address, external symbol, absolute value or register"); - return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size()); + return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); } // Add a NOP immediately after the branch instruction when using the 64-bit @@ -3683,7 +3677,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, } } - Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); if (needsTOCRestore) { @@ -3720,6 +3714,10 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG); + if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) + report_fatal_error("failed to perform tail call elimination on a call " + "site marked musttail"); + if (PPCSubTarget.isSVR4ABI()) { if (PPCSubTarget.isPPC64()) return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, @@ -3800,7 +3798,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, errs() << "Call operand #" << i << " has unhandled type " << EVT(ArgVT).getEVTString() << "\n"; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } } } else { @@ -3921,8 +3919,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, } if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. @@ -3940,7 +3937,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, SDValue Ops[] = { Chain, InFlag }; Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, - dl, VTs, Ops, InFlag.getNode() ? 2 : 1); + dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); InFlag = Chain.getValue(1); } @@ -4044,17 +4041,17 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; - static const uint16_t GPR[] = { + static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const uint16_t *FPR = GetFPR(); + static const MCPhysReg *FPR = GetFPR(); - static const uint16_t VR[] = { + static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; - static const uint16_t VSRH[] = { + static const MCPhysReg VSRH[] = { PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 }; @@ -4333,8 +4330,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, } if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Check if this is an indirect call (MTCTR/BCTRL). // See PrepareCall() for more information about calls through function @@ -4448,17 +4444,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; - static const uint16_t GPR_32[] = { // 32-bit registers. + static const MCPhysReg GPR_32[] = { // 32-bit registers. PPC::R3, PPC::R4, PPC::R5, PPC::R6, PPC::R7, PPC::R8, PPC::R9, PPC::R10, }; - static const uint16_t GPR_64[] = { // 64-bit registers. + static const MCPhysReg GPR_64[] = { // 64-bit registers. PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const uint16_t *FPR = GetFPR(); + static const MCPhysReg *FPR = GetFPR(); - static const uint16_t VR[] = { + static const MCPhysReg VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 }; @@ -4466,7 +4462,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, const unsigned NumFPRs = 13; const unsigned NumVRs = array_lengthof(VR); - const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; + const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; SmallVector, 8> RegsToPass; SmallVector TailCallArguments; @@ -4696,8 +4692,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, } if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // On Darwin, R12 must contain the address of an indirect callee. This does // not mean the MTCTR instruction must use R12; it's easier to model this as @@ -4785,8 +4780,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain, if (Flag.getNode()) RetOps.push_back(Flag); - return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, - &RetOps[0], RetOps.size()); + return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); } SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, @@ -4889,7 +4883,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, // Build a DYNALLOC node. SDValue Ops[3] = { Chain, NegSize, FPSIdx }; SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); - return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3); + return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); } SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, @@ -4925,7 +4919,7 @@ SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { @@ -5097,8 +5091,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, - DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), - MVT::i32, MMO); + DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); } else Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, MPI, false, false, 0); @@ -5225,7 +5218,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::LFIWZX : PPCISD::LFIWAX, dl, DAG.getVTList(MVT::f64, MVT::Other), - Ops, 2, MVT::i32, MMO); + Ops, MVT::i32, MMO); } else { assert(PPCSubTarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"); @@ -5279,14 +5272,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); EVT VT = Op.getValueType(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDValue MFFSreg, InFlag; // Save FP Control Word to register EVT NodeTys[] = { MVT::f64, // return register MVT::Glue // unused in this context }; - SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); + SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); // Save FP register to stack slot int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); @@ -5345,7 +5337,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); SDValue OutOps[] = { OutLo, OutHi }; - return DAG.getMergeValues(OutOps, 2, dl); + return DAG.getMergeValues(OutOps, dl); } SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { @@ -5374,7 +5366,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); SDValue OutOps[] = { OutLo, OutHi }; - return DAG.getMergeValues(OutOps, 2, dl); + return DAG.getMergeValues(OutOps, dl); } SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { @@ -5403,7 +5395,7 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT), Tmp4, Tmp6, ISD::SETLE); SDValue OutOps[] = { OutLo, OutHi }; - return DAG.getMergeValues(OutOps, 2, dl); + return DAG.getMergeValues(OutOps, dl); } //===----------------------------------------------------------------------===// @@ -5432,8 +5424,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, SDValue Elt = DAG.getConstant(Val, MVT::i32); SmallVector Ops; Ops.assign(CanonicalVT.getVectorNumElements(), Elt); - SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, - &Ops[0], Ops.size()); + SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops); return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); } @@ -5492,7 +5483,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); - assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); + assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); // Check if this is a splat of a constant value. APInt APSplatBits, APSplatUndef; @@ -5540,10 +5531,14 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // we convert to a pseudo that will be expanded later into one of // the above forms. SDValue Elt = DAG.getConstant(SextVal, MVT::i32); - EVT VT = Op.getValueType(); - int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4); - SDValue EltSize = DAG.getConstant(Size, MVT::i32); - return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); + EVT VT = (SplatSize == 1 ? MVT::v16i8 : + (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); + SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32); + SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); + if (VT == Op.getValueType()) + return RetVal; + else + return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); } // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is @@ -5838,7 +5833,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, } SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, - &ResultMask[0], ResultMask.size()); + ResultMask); return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask); } @@ -5913,7 +5908,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, DAG.getConstant(CompareOpc, MVT::i32) }; EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; - SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); + SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); // Now that we have the comparison, emit a copy from the CR to a GPR. // This is flagged to the above dot comparison. @@ -7232,8 +7227,8 @@ static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base, return true; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const GlobalValue *GV1 = NULL; - const GlobalValue *GV2 = NULL; + const GlobalValue *GV1 = nullptr; + const GlobalValue *GV2 = nullptr; int64_t Offset1 = 0; int64_t Offset2 = 0; bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); @@ -7360,8 +7355,8 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, // that the high bits are equal. APInt Op1Zero, Op1One; APInt Op2Zero, Op2One; - DAG.ComputeMaskedBits(N->getOperand(0), Op1Zero, Op1One); - DAG.ComputeMaskedBits(N->getOperand(1), Op2Zero, Op2One); + DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One); + DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One); // We don't really care about what is known about the first bit (if // anything), so clear it in all masks prior to comparing them. @@ -7579,8 +7574,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); DAG.ReplaceAllUsesOfValueWith(PromOp, - DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, - Ops.data(), Ops.size())); + DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); } // Now we're left with the initial truncation itself. @@ -7816,8 +7810,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, } DAG.ReplaceAllUsesOfValueWith(PromOp, - DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), - Ops.data(), Ops.size())); + DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); } // Now we're left with the initial extension itself. @@ -7883,7 +7876,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (N->getOperand(1).getOpcode() == ISD::FSQRT) { SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI); - if (RV.getNode() != 0) { + if (RV.getNode()) { DCI.AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), N->getOperand(0), RV); @@ -7893,7 +7886,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), DCI); - if (RV.getNode() != 0) { + if (RV.getNode()) { DCI.AddToWorklist(RV.getNode()); RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)), N->getValueType(0), RV); @@ -7906,7 +7899,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), DCI); - if (RV.getNode() != 0) { + if (RV.getNode()) { DCI.AddToWorklist(RV.getNode()); RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)), N->getValueType(0), RV, @@ -7918,7 +7911,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI); - if (RV.getNode() != 0) { + if (RV.getNode()) { DCI.AddToWorklist(RV.getNode()); return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), N->getOperand(0), RV); @@ -7933,10 +7926,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the // reciprocal sqrt. SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI); - if (RV.getNode() != 0) { + if (RV.getNode()) { DCI.AddToWorklist(RV.getNode()); RV = DAGCombineFastRecip(RV, DCI); - if (RV.getNode() != 0) { + if (RV.getNode()) { // Unfortunately, RV is now NaN if the input was exactly 0. Select out // this case and force the answer to 0. @@ -8014,7 +8007,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, }; Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, - DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), + DAG.getVTList(MVT::Other), Ops, cast(N)->getMemoryVT(), cast(N)->getMemOperand()); DCI.AddToWorklist(Val.getNode()); @@ -8041,8 +8034,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, }; return DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), - Ops, array_lengthof(Ops), - cast(N)->getMemoryVT(), + Ops, cast(N)->getMemoryVT(), cast(N)->getMemOperand()); } break; @@ -8167,7 +8159,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, Ops.push_back(*O); } - DAG.UpdateNodeOperands(User, Ops.data(), Ops.size()); + DAG.UpdateNodeOperands(User, Ops); } return SDValue(N, 0); @@ -8220,7 +8212,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, DAG.getVTList(N->getValueType(0) == MVT::i64 ? MVT::i64 : MVT::i32, MVT::Other), - Ops, 3, LD->getMemoryVT(), LD->getMemOperand()); + Ops, LD->getMemoryVT(), LD->getMemOperand()); // If this is an i16 load, insert the truncate. SDValue ResVal = BSLoad; @@ -8250,7 +8242,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, !N->getOperand(2).hasOneUse()) { // Scan all of the users of the LHS, looking for VCMPo's that match. - SDNode *VCMPoNode = 0; + SDNode *VCMPoNode = nullptr; SDNode *LHSN = N->getOperand(0).getNode(); for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); @@ -8271,9 +8263,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Look at the (necessarily single) use of the flag value. If it has a // chain, this transformation is more complex. Note that multiple things // could use the value result, which we should ignore. - SDNode *FlagUser = 0; + SDNode *FlagUser = nullptr; for (SDNode::use_iterator UI = VCMPoNode->use_begin(); - FlagUser == 0; ++UI) { + FlagUser == nullptr; ++UI) { assert(UI != VCMPoNode->use_end() && "Didn't find user!"); SDNode *User = *UI; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { @@ -8378,7 +8370,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAG.getConstant(CompareOpc, MVT::i32) }; EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; - SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); + SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); // Unpack the result based on how the target uses it. PPC::Predicate CompOpc; @@ -8414,11 +8406,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Inline Assembly Support //===----------------------------------------------------------------------===// -void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const { +void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; @@ -8493,7 +8485,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight( Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); @@ -8599,7 +8591,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { - SDValue Result(0,0); + SDValue Result; // Only support length 1 constraints. if (Constraint.length() > 1) return; @@ -8766,6 +8758,30 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, return FrameAddr; } +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned PPCTargetLowering::getRegisterByName(const char* RegName, + EVT VT) const { + bool isPPC64 = PPCSubTarget.isPPC64(); + bool isDarwinABI = PPCSubTarget.isDarwinABI(); + + if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || + (!isPPC64 && VT != MVT::i32)) + report_fatal_error("Invalid register global variable type"); + + bool is64Bit = isPPC64 && VT == MVT::i64; + unsigned Reg = StringSwitch(RegName) + .Case("r1", is64Bit ? PPC::X1 : PPC::R1) + .Case("r2", isDarwinABI ? 0 : (is64Bit ? PPC::X2 : PPC::R2)) + .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : + (is64Bit ? PPC::X13 : PPC::R13)) + .Default(0); + + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); +} + bool PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The PowerPC target isn't yet aware of offsets. @@ -8795,6 +8811,42 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, } } +/// \brief Returns true if it is beneficial to convert a load of a constant +/// to just the constant itself. +bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + if (BitSize == 0 || BitSize > 64) + return false; + return true; +} + +bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) + return false; + unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); + unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); + return NumBits1 == 64 && NumBits2 == 32; +} + +bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { + if (!VT1.isInteger() || !VT2.isInteger()) + return false; + unsigned NumBits1 = VT1.getSizeInBits(); + unsigned NumBits2 = VT2.getSizeInBits(); + return NumBits1 == 64 && NumBits2 == 32; +} + +bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + return isInt<16>(Imm) || isUInt<16>(Imm); +} + +bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { + return isInt<16>(Imm) || isUInt<16>(Imm); +} + bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, unsigned, bool *Fast) const { diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index da6d4dc..080ef5d 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -351,20 +351,20 @@ namespace llvm { /// getTargetNodeName() - This method returns the name of a target specific /// DAG node. - virtual const char *getTargetNodeName(unsigned Opcode) const; + const char *getTargetNodeName(unsigned Opcode) const override; - virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } /// getSetCCResultType - Return the ISD::SETCC ValueType - virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const; + EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. - virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, - SDValue &Offset, - ISD::MemIndexedMode &AM, - SelectionDAG &DAG) const; + bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const override; /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it @@ -384,29 +384,31 @@ namespace llvm { bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const; - Sched::Preference getSchedulingPreference(SDNode *N) const; + Sched::Preference getSchedulingPreference(SDNode *N) const override; /// LowerOperation - Provide custom lowering hooks for some operations. /// - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// - virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, - SelectionDAG &DAG) const; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, + SelectionDAG &DAG) const override; - virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - virtual void computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth = 0) const; + unsigned getRegisterByName(const char* RegName, EVT VT) const override; - virtual MachineBasicBlock * + void computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth = 0) const override; + + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; + MachineBasicBlock *MBB) const override; MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, bool is64Bit, unsigned BinOpcode) const; @@ -420,34 +422,58 @@ namespace llvm { MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI, MachineBasicBlock *MBB) const; - ConstraintType getConstraintType(const std::string &Constraint) const; + ConstraintType + getConstraintType(const std::string &Constraint) const override; /// Examine constraint string and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. ConstraintWeight getSingleConstraintMatchWeight( - AsmOperandInfo &info, const char *constraint) const; + AsmOperandInfo &info, const char *constraint) const override; std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + MVT VT) const override; /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. - unsigned getByValTypeAlignment(Type *Ty) const; + unsigned getByValTypeAlignment(Type *Ty) const override; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. - virtual void LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, - std::vector &Ops, - SelectionDAG &DAG) const; + void LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector &Ops, + SelectionDAG &DAG) const override; /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. - virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + + /// isLegalICmpImmediate - Return true if the specified immediate is legal + /// icmp immediate, that is the target has icmp instructions which can + /// compare a register against the immediate without having to materialize + /// the immediate into a register. + bool isLegalICmpImmediate(int64_t Imm) const override; + + /// isLegalAddImmediate - Return true if the specified immediate is legal + /// add immediate, that is the target has add instructions which can + /// add a register and the immediate without having to materialize + /// the immediate into a register. + bool isLegalAddImmediate(int64_t Imm) const override; - virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + /// isTruncateFree - Return true if it's free to truncate a value of + /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in + /// register X1 to i32 by referencing its sub-register R1. + bool isTruncateFree(Type *Ty1, Type *Ty2) const override; + bool isTruncateFree(EVT VT1, EVT VT2) const override; + + /// \brief Returns true if it is beneficial to convert a load of a constant + /// to just the constant itself. + bool shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const override; + + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove @@ -460,32 +486,32 @@ namespace llvm { /// source is constant so it does not need to be loaded. /// It returns EVT::Other if the type should be determined using generic /// target-independent logic. - virtual EVT + EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, - MachineFunction &MF) const; + MachineFunction &MF) const override; /// Is unaligned memory access allowed for the given type, and is it fast /// relative to software emulation. - virtual bool allowsUnalignedMemoryAccesses(EVT VT, - unsigned AddrSpace, - bool *Fast = 0) const; + bool allowsUnalignedMemoryAccesses(EVT VT, + unsigned AddrSpace, + bool *Fast = nullptr) const override; /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be /// expanded to FMAs when this method returns true, otherwise fmuladd is /// expanded to fmul + fadd. - virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const; + bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; // Should we expand the build vector with shuffles? - virtual bool + bool shouldExpandBuildVectorWithShuffles(EVT VT, - unsigned DefinedValues) const; + unsigned DefinedValues) const override; /// createFastISel - This method returns a target-specific FastISel object, /// or null if the target does not support "fast" instruction selection. - virtual FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, - const TargetLibraryInfo *LibInfo) const; + FastISel *createFastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo) const override; private: SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const; @@ -559,29 +585,29 @@ namespace llvm { const SmallVectorImpl &Ins, SmallVectorImpl &InVals) const; - virtual SDValue + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; + SmallVectorImpl &InVals) const override; - virtual SDValue + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const; + SmallVectorImpl &InVals) const override; - virtual bool + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &Outs, - LLVMContext &Context) const; + LLVMContext &Context) const override; - virtual SDValue + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, - SDLoc dl, SelectionDAG &DAG) const; + SDLoc dl, SelectionDAG &DAG) const override; SDValue extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG, diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index 2fd4a3e..f3c2eab 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -223,7 +223,7 @@ class VX2_Int_Ty2 xo, string opc, Intrinsic IntID, ValueType OutTy, //===----------------------------------------------------------------------===// // Instruction Definitions. -def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">; +def HasAltivec : Predicate<"PPCSubTarget->hasAltivec()">; let Predicates = [HasAltivec] in { let isCodeGenOnly = 1 in { diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 939bbdc..fd72384 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -35,12 +35,14 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "ppc-instr-info" + #define GET_INSTRMAP_INFO #define GET_INSTRINFO_CTOR_DTOR #include "PPCGenInstrInfo.inc" -using namespace llvm; - static cl:: opt DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden, cl::desc("Disable analysis for CTR loops")); @@ -230,7 +232,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { // Cannot commute if it has a non-zero rotate count. if (MI->getOperand(3).getImm() != 0) - return 0; + return nullptr; // If we have a zero rotate count, we have: // M = mask(MB,ME) @@ -539,7 +541,7 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); // One-way branch. - if (FBB == 0) { + if (!FBB) { if (Cond.empty()) // Unconditional branch BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB); else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8) @@ -1399,10 +1401,10 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, // There are two possible candidates which can be changed to set CR[01]. // One is MI, the other is a SUB instruction. // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1). - MachineInstr *Sub = NULL; + MachineInstr *Sub = nullptr; if (SrcReg2 != 0) // MI is not a candidate for CMPrr. - MI = NULL; + MI = nullptr; // FIXME: Conservatively refuse to convert an instruction which isn't in the // same BB as the comparison. This is to allow the check below to avoid calls // (and other explicit clobbers); instead we should really check for these @@ -1810,10 +1812,15 @@ protected: } public: - virtual bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { + TM = static_cast(&MF.getTarget()); + // If we don't have VSX then go ahead and return without doing + // anything. + if (!TM->getSubtargetImpl()->hasVSX()) + return false; + LIS = &getAnalysis(); - TM = static_cast(&MF.getTarget()); TII = TM->getInstrInfo(); bool Changed = false; @@ -1830,7 +1837,7 @@ public: return Changed; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -1962,8 +1969,11 @@ protected: } public: - virtual bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { TM = static_cast(&MF.getTarget()); + // If we don't have VSX on the subtarget, don't do anything. + if (!TM->getSubtargetImpl()->hasVSX()) + return false; TII = TM->getInstrInfo(); bool Changed = false; @@ -1977,7 +1987,7 @@ public: return Changed; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -2036,8 +2046,11 @@ protected: } public: - virtual bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { TM = static_cast(&MF.getTarget()); + // If we don't have VSX don't bother doing anything here. + if (!TM->getSubtargetImpl()->hasVSX()) + return false; TII = TM->getInstrInfo(); bool Changed = false; @@ -2051,7 +2064,7 @@ public: return Changed; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -2193,7 +2206,7 @@ protected: } public: - virtual bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { TM = static_cast(&MF.getTarget()); TII = TM->getInstrInfo(); @@ -2213,7 +2226,7 @@ public: return Changed; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); } }; diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 3c8117c..d9db3e1 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -86,151 +86,148 @@ public: /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; } + const PPCRegisterInfo &getRegisterInfo() const { return RI; } ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetMachine *TM, - const ScheduleDAG *DAG) const; + const ScheduleDAG *DAG) const override; ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, - const ScheduleDAG *DAG) const; + const ScheduleDAG *DAG) const override; - virtual int getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const; - virtual + const MachineInstr *UseMI, + unsigned UseIdx) const override; int getOperandLatency(const InstrItineraryData *ItinData, SDNode *DefNode, unsigned DefIdx, - SDNode *UseNode, unsigned UseIdx) const { + SDNode *UseNode, unsigned UseIdx) const override { return PPCGenInstrInfo::getOperandLatency(ItinData, DefNode, DefIdx, UseNode, UseIdx); } bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, - unsigned &SubIdx) const; + unsigned &SubIdx) const override; unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; + int &FrameIndex) const override; unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; + int &FrameIndex) const override; // commuteInstruction - We can commute rlwimi instructions, but only if the // rotate amt is zero. We also have to munge the immediates a bit. - virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const; + MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const override; - virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, - unsigned &SrcOpIdx2) const; + bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const override; - virtual void insertNoop(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const; + void insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override; // Branch analysis. - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const; - virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const; + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const override; + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const override; // Select analysis. - virtual bool canInsertSelect(const MachineBasicBlock&, - const SmallVectorImpl &Cond, - unsigned, unsigned, int&, int&, int&) const; - virtual void insertSelect(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DstReg, - const SmallVectorImpl &Cond, - unsigned TrueReg, unsigned FalseReg) const; - - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual - bool ReverseBranchCondition(SmallVectorImpl &Cond) const; - - virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, - unsigned Reg, MachineRegisterInfo *MRI) const; + bool canInsertSelect(const MachineBasicBlock&, + const SmallVectorImpl &Cond, + unsigned, unsigned, int&, int&, int&) const override; + void insertSelect(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DstReg, + const SmallVectorImpl &Cond, + unsigned TrueReg, unsigned FalseReg) const override; + + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + bool + ReverseBranchCondition(SmallVectorImpl &Cond) const override; + + bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, + unsigned Reg, MachineRegisterInfo *MRI) const override; // If conversion by predication (only supported by some branch instructions). // All of the profitability checks always return true; it is always // profitable to use the predicated branches. - virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, - unsigned NumCycles, unsigned ExtraPredCycles, - const BranchProbability &Probability) const { + bool isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, unsigned ExtraPredCycles, + const BranchProbability &Probability) const override { return true; } - virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, - unsigned NumT, unsigned ExtraT, - MachineBasicBlock &FMBB, - unsigned NumF, unsigned ExtraF, - const BranchProbability &Probability) const; + bool isProfitableToIfCvt(MachineBasicBlock &TMBB, + unsigned NumT, unsigned ExtraT, + MachineBasicBlock &FMBB, + unsigned NumF, unsigned ExtraF, + const BranchProbability &Probability) const override; - virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, - unsigned NumCycles, - const BranchProbability - &Probability) const { + bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, + unsigned NumCycles, + const BranchProbability + &Probability) const override { return true; } - virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, - MachineBasicBlock &FMBB) const { + bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, + MachineBasicBlock &FMBB) const override { return false; } // Predication support. - bool isPredicated(const MachineInstr *MI) const; + bool isPredicated(const MachineInstr *MI) const override; - virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; + bool isUnpredicatedTerminator(const MachineInstr *MI) const override; - virtual bool PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl &Pred) const; + const SmallVectorImpl &Pred) const override; - virtual bool SubsumesPredicate(const SmallVectorImpl &Pred1, - const SmallVectorImpl &Pred2) const; + const SmallVectorImpl &Pred2) const override; - virtual bool DefinesPredicate(MachineInstr *MI, - std::vector &Pred) const; + bool DefinesPredicate(MachineInstr *MI, + std::vector &Pred) const override; - virtual bool isPredicable(MachineInstr *MI) const; + bool isPredicable(MachineInstr *MI) const override; // Comparison optimization. - virtual bool analyzeCompare(const MachineInstr *MI, - unsigned &SrcReg, unsigned &SrcReg2, - int &Mask, int &Value) const; + bool analyzeCompare(const MachineInstr *MI, + unsigned &SrcReg, unsigned &SrcReg2, + int &Mask, int &Value) const override; - virtual bool optimizeCompareInstr(MachineInstr *CmpInstr, - unsigned SrcReg, unsigned SrcReg2, - int Mask, int Value, - const MachineRegisterInfo *MRI) const; + bool optimizeCompareInstr(MachineInstr *CmpInstr, + unsigned SrcReg, unsigned SrcReg2, + int Mask, int Value, + const MachineRegisterInfo *MRI) const override; /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. /// - virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const; + unsigned GetInstSizeInBytes(const MachineInstr *MI) const; }; } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 1d984ab..e421f8e 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -610,10 +610,10 @@ def iaddroff : ComplexPattern; //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. -def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">; -def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">; -def IsBookE : Predicate<"PPCSubTarget.isBookE()">; -def IsNotBookE : Predicate<"!PPCSubTarget.isBookE()">; +def In32BitMode : Predicate<"!PPCSubTarget->isPPC64()">; +def In64BitMode : Predicate<"PPCSubTarget->isPPC64()">; +def IsBookE : Predicate<"PPCSubTarget->isBookE()">; +def IsNotBookE : Predicate<"!PPCSubTarget->isBookE()">; //===----------------------------------------------------------------------===// // PowerPC Multiclass Definitions. diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 9cc919e..49bcc48 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -39,7 +39,7 @@ multiclass XX3Form_Rcr opcode, bits<7> xo, dag OOL, dag IOL, } } -def HasVSX : Predicate<"PPCSubTarget.hasVSX()">; +def HasVSX : Predicate<"PPCSubTarget->hasVSX()">; let Predicates = [HasVSX] in { let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. let neverHasSideEffects = 1 in { // VSX instructions don't have side effects. diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 227919c..7bbc71b 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jit" #include "PPCJITInfo.h" #include "PPCRelocations.h" #include "PPCTargetMachine.h" @@ -22,6 +21,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "jit" + static TargetJITInfo::JITCompilerFn JITCompilerFunction; #define BUILD_ADDIS(RD,RS,IMM16) \ diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h index 46d4a08..0693e3e 100644 --- a/lib/Target/PowerPC/PPCJITInfo.h +++ b/lib/Target/PowerPC/PPCJITInfo.h @@ -30,19 +30,19 @@ namespace llvm { is64Bit = tmIs64Bit; } - virtual StubLayout getStubLayout(); - virtual void *emitFunctionStub(const Function* F, void *Fn, - JITCodeEmitter &JCE); - virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); - virtual void relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char* GOTBase); - + StubLayout getStubLayout() override; + void *emitFunctionStub(const Function* F, void *Fn, + JITCodeEmitter &JCE) override; + LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; + void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) override; + /// replaceMachineCodeForFunction - Make it so that calling the function /// whose machine code is at OLD turns into a call to NEW, perhaps by /// overwriting OLD with a branch to NEW. This is used for self-modifying /// code. /// - virtual void replaceMachineCodeForFunction(void *Old, void *New); + void replaceMachineCodeForFunction(void *Old, void *New) override; }; } diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp index 029bb8a..f8e84a5 100644 --- a/lib/Target/PowerPC/PPCMCInstLower.cpp +++ b/lib/Target/PowerPC/PPCMCInstLower.cpp @@ -96,7 +96,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){ (MO.getTargetFlags() & PPCII::MO_NLP_HIDDEN_FLAG) ? MachO.getHiddenGVStubEntry(Sym) : MachO.getGVStubEntry(Sym); - if (StubSym.getPointer() == 0) { + if (!StubSym.getPointer()) { assert(MO.isGlobal() && "Extern symbol not handled yet"); StubSym = MachineModuleInfoImpl:: StubValueTy(AP.getSymbol(MO.getGlobal()), diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 4ff282e..e333b51 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "reginfo" #include "PPCRegisterInfo.h" #include "PPC.h" #include "PPCFrameLowering.h" @@ -42,11 +41,13 @@ #include "llvm/Target/TargetOptions.h" #include +using namespace llvm; + +#define DEBUG_TYPE "reginfo" + #define GET_REGINFO_TARGET_DESC #include "PPCGenRegisterInfo.inc" -using namespace llvm; - static cl::opt EnableBasePointer("ppc-use-base-pointer", cl::Hidden, cl::init(true), cl::desc("Enable use of a base pointer for complex stack frames")); @@ -96,7 +97,7 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) return &PPC::GPRCRegClass; } -const uint16_t* +const MCPhysReg* PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (Subtarget.isDarwinABI()) return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ? diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index c3e54b4..13a35f6 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -34,36 +34,37 @@ public: /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. - virtual const TargetRegisterClass * - getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const; + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override; unsigned getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const; + MachineFunction &MF) const override; const TargetRegisterClass* - getLargestLegalSuperClass(const TargetRegisterClass *RC) const; + getLargestLegalSuperClass(const TargetRegisterClass *RC) const override; /// Code Generation virtual methods... - const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const; - const uint32_t *getCallPreservedMask(CallingConv::ID CC) const; + const MCPhysReg * + getCalleeSavedRegs(const MachineFunction* MF =nullptr) const override; + const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override; const uint32_t *getNoPreservedMask() const; - BitVector getReservedRegs(const MachineFunction &MF) const; + BitVector getReservedRegs(const MachineFunction &MF) const override; /// We require the register scavenger. - bool requiresRegisterScavenging(const MachineFunction &MF) const { + bool requiresRegisterScavenging(const MachineFunction &MF) const override { return true; } - bool requiresFrameIndexScavenging(const MachineFunction &MF) const { + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override { return true; } - bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const { + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override { return true; } - virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const { + bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override { return true; } @@ -82,28 +83,29 @@ public: unsigned FrameIndex) const; bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg, - int &FrameIdx) const; + int &FrameIdx) const override; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const override; // Support for virtual base registers. - bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const; + bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override; void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg, int FrameIdx, - int64_t Offset) const; + int64_t Offset) const override; void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, - int64_t Offset) const; - bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const; + int64_t Offset) const override; + bool isFrameOffsetLegal(const MachineInstr *MI, + int64_t Offset) const override; // Debug information queries. - unsigned getFrameRegister(const MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const override; // Base pointer (stack realignment) support. unsigned getBaseRegister(const MachineFunction &MF) const; bool hasBasePointer(const MachineFunction &MF) const; bool canRealignStack(const MachineFunction &MF) const; - bool needsStackRealignment(const MachineFunction &MF) const; + bool needsStackRealignment(const MachineFunction &MF) const override; }; } // end namespace llvm diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index e11f7d4..b3d145b 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -188,6 +188,13 @@ def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>; def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>; } +// The full condition-code register. This is not modeled fully, but defined +// here primarily, for compatibility with gcc, to allow the inline asm "cc" +// clobber specification to work. +def CC : PPCReg<"cc">, DwarfRegAlias { + let Aliases = [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7]; +} + // Link register def LR : SPR<8, "lr">, DwarfRegNum<[-2, 65]>; //let Aliases = [LR] in @@ -300,3 +307,8 @@ def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>; def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> { let CopyCost = -1; } + +def CCRC : RegisterClass<"PPC", [i32], 32, (add CC)> { + let isAllocatable = 0; +} + diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp index d4258b4..f742f72 100644 --- a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp +++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp @@ -11,10 +11,11 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "powerpc-selectiondag-info" #include "PPCTargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "powerpc-selectiondag-info" + PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM) : TargetSelectionDAGInfo(TM) { } diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index b07abe4..ea9daee 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -24,31 +24,21 @@ #include "llvm/Target/TargetMachine.h" #include +using namespace llvm; + +#define DEBUG_TYPE "ppc-subtarget" + #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "PPCGenSubtargetInfo.inc" -using namespace llvm; - PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool is64Bit, CodeGenOpt::Level OptLevel) - : PPCGenSubtargetInfo(TT, CPU, FS) - , IsPPC64(is64Bit) - , TargetTriple(TT) { + : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT), + OptLevel(OptLevel) { initializeEnvironment(); - - std::string FullFS = FS; - - // At -O2 and above, track CR bits as individual registers. - if (OptLevel >= CodeGenOpt::Default) { - if (!FullFS.empty()) - FullFS = "+crbits," + FullFS; - else - FullFS = "+crbits"; - } - - resetSubtargetFeatures(CPU, FullFS); + resetSubtargetFeatures(CPU, FS); } /// SetJITMode - This is called to inform the subtarget info that we are @@ -138,6 +128,14 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { FullFS = "+64bit"; } + // At -O2 and above, track CR bits as individual registers. + if (OptLevel >= CodeGenOpt::Default) { + if (!FullFS.empty()) + FullFS = "+crbits," + FullFS; + else + FullFS = "+crbits"; + } + // Parse features string. ParseSubtargetFeatures(CPUName, FullFS); diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 87e012e..ee43fd5 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -99,6 +99,9 @@ protected: /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; + /// OptLevel - What default optimization level we're emitting code for. + CodeGenOpt::Level OptLevel; + public: /// This constructor initializes the data members to match that /// of the specified triple. @@ -129,7 +132,7 @@ public: const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } /// \brief Reset the features for the PowerPC target. - virtual void resetSubtargetFeatures(const MachineFunction *MF); + void resetSubtargetFeatures(const MachineFunction *MF) override; private: void initializeEnvironment(); void resetSubtargetFeatures(StringRef CPU, StringRef FS); @@ -200,15 +203,17 @@ public: /// enablePostRAScheduler - True at 'More' optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const; + RegClassVector& CriticalPathRCs) const override; + + bool enableEarlyIfConversion() const override { return hasISEL(); } // Scheduling customization. - bool enableMachineScheduler() const; + bool enableMachineScheduler() const override; void overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin, MachineInstr *end, - unsigned NumRegionInstrs) const; - bool useAA() const; + unsigned NumRegionInstrs) const override; + bool useAA() const override; }; } // End llvm namespace diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index e7438f3..2323add 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -127,12 +127,12 @@ public: return *getPPCTargetMachine().getSubtargetImpl(); } - virtual bool addPreISel(); - virtual bool addILPOpts(); - virtual bool addInstSelector(); - virtual bool addPreRegAlloc(); - virtual bool addPreSched2(); - virtual bool addPreEmitPass(); + bool addPreISel() override; + bool addILPOpts() override; + bool addInstSelector() override; + bool addPreRegAlloc() override; + bool addPreSched2() override; + bool addPreEmitPass() override; }; } // namespace @@ -148,12 +148,8 @@ bool PPCPassConfig::addPreISel() { } bool PPCPassConfig::addILPOpts() { - if (getPPCSubtarget().hasISEL()) { - addPass(&EarlyIfConverterID); - return true; - } - - return false; + addPass(&EarlyIfConverterID); + return true; } bool PPCPassConfig::addInstSelector() { @@ -165,25 +161,19 @@ bool PPCPassConfig::addInstSelector() { addPass(createPPCCTRLoopsVerify()); #endif - if (getPPCSubtarget().hasVSX()) - addPass(createPPCVSXCopyPass()); - + addPass(createPPCVSXCopyPass()); return false; } bool PPCPassConfig::addPreRegAlloc() { - if (getPPCSubtarget().hasVSX()) { - initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); - insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID, - &PPCVSXFMAMutateID); - } - + initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); + insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID, + &PPCVSXFMAMutateID); return false; } bool PPCPassConfig::addPreSched2() { - if (getPPCSubtarget().hasVSX()) - addPass(createPPCVSXCopyCleanupPass()); + addPass(createPPCVSXCopyCleanupPass()); if (getOptLevel() != CodeGenOpt::None) addPass(&IfConverterID); diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 606ccb3..9e92494 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -43,34 +43,34 @@ public: Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64Bit); - virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const PPCFrameLowering *getFrameLowering() const { + const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const PPCFrameLowering *getFrameLowering() const override { return &FrameLowering; } - virtual PPCJITInfo *getJITInfo() { return &JITInfo; } - virtual const PPCTargetLowering *getTargetLowering() const { + PPCJITInfo *getJITInfo() override { return &JITInfo; } + const PPCTargetLowering *getTargetLowering() const override { return &TLInfo; } - virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const { + const PPCSelectionDAGInfo* getSelectionDAGInfo() const override { return &TSInfo; } - virtual const PPCRegisterInfo *getRegisterInfo() const { + const PPCRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); } - virtual const DataLayout *getDataLayout() const { return &DL; } - virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual const InstrItineraryData *getInstrItineraryData() const { + const DataLayout *getDataLayout() const override { return &DL; } + const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; } + const InstrItineraryData *getInstrItineraryData() const override { return &InstrItins; } // Pass Pipeline Configuration - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); - virtual bool addCodeEmitter(PassManagerBase &PM, - JITCodeEmitter &JCE); + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + bool addCodeEmitter(PassManagerBase &PM, + JITCodeEmitter &JCE) override; /// \brief Register PPC analysis passes with a pass manager. - virtual void addAnalysisPasses(PassManagerBase &PM); + void addAnalysisPasses(PassManagerBase &PM) override; }; /// PPC32TargetMachine - PowerPC 32-bit target machine. diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 2f4d5c1..007901b 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -14,17 +14,22 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ppctti" #include "PPC.h" #include "PPCTargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" using namespace llvm; +#define DEBUG_TYPE "ppctti" + +static cl::opt DisablePPCConstHoist("disable-ppc-constant-hoisting", +cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden); + // Declare the pass initialization routine locally as target-specific passes -// don't havve a target-wide initialization entry point, and so we rely on the +// don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. namespace llvm { void initializePPCTTIPass(PassRegistry &); @@ -33,21 +38,16 @@ void initializePPCTTIPass(PassRegistry &); namespace { class PPCTTI final : public ImmutablePass, public TargetTransformInfo { - const PPCTargetMachine *TM; const PPCSubtarget *ST; const PPCTargetLowering *TLI; - /// Estimate the overhead of scalarizing an instruction. Insert and Extract - /// are set if the result needs to be inserted and/or extracted from vectors. - unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; - public: - PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + PPCTTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) { llvm_unreachable("This pass cannot be directly constructed"); } PPCTTI(const PPCTargetMachine *TM) - : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), + : ImmutablePass(ID), ST(TM->getSubtargetImpl()), TLI(TM->getTargetLowering()) { initializePPCTTIPass(*PassRegistry::getPassRegistry()); } @@ -72,6 +72,13 @@ public: /// \name Scalar TTI Implementations /// @{ + unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; + + unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty) const override; + unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty) const override; + virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; virtual void getUnrollingPreferences( @@ -128,6 +135,142 @@ PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { return PSK_Software; } +unsigned PPCTTI::getIntImmCost(const APInt &Imm, Type *Ty) const { + if (DisablePPCConstHoist) + return TargetTransformInfo::getIntImmCost(Imm, Ty); + + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + if (BitSize == 0) + return ~0U; + + if (Imm == 0) + return TCC_Free; + + if (Imm.getBitWidth() <= 64) { + if (isInt<16>(Imm.getSExtValue())) + return TCC_Basic; + + if (isInt<32>(Imm.getSExtValue())) { + // A constant that can be materialized using lis. + if ((Imm.getZExtValue() & 0xFFFF) == 0) + return TCC_Basic; + + return 2 * TCC_Basic; + } + } + + return 4 * TCC_Basic; +} + +unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) const { + if (DisablePPCConstHoist) + return TargetTransformInfo::getIntImmCost(IID, Idx, Imm, Ty); + + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + if (BitSize == 0) + return ~0U; + + switch (IID) { + default: return TCC_Free; + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue())) + return TCC_Free; + break; + } + return PPCTTI::getIntImmCost(Imm, Ty); +} + +unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty) const { + if (DisablePPCConstHoist) + return TargetTransformInfo::getIntImmCost(Opcode, Idx, Imm, Ty); + + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + if (BitSize == 0) + return ~0U; + + unsigned ImmIdx = ~0U; + bool ShiftedFree = false, RunFree = false, UnsignedFree = false, + ZeroFree = false; + switch (Opcode) { + default: return TCC_Free; + case Instruction::GetElementPtr: + // Always hoist the base address of a GetElementPtr. This prevents the + // creation of new constants for every base constant that gets constant + // folded with the offset. + if (Idx == 0) + return 2 * TCC_Basic; + return TCC_Free; + case Instruction::And: + RunFree = true; // (for the rotate-and-mask instructions) + // Fallthrough... + case Instruction::Add: + case Instruction::Or: + case Instruction::Xor: + ShiftedFree = true; + // Fallthrough... + case Instruction::Sub: + case Instruction::Mul: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + ImmIdx = 1; + break; + case Instruction::ICmp: + UnsignedFree = true; + ImmIdx = 1; + // Fallthrough... (zero comparisons can use record-form instructions) + case Instruction::Select: + ZeroFree = true; + break; + case Instruction::PHI: + case Instruction::Call: + case Instruction::Ret: + case Instruction::Load: + case Instruction::Store: + break; + } + + if (ZeroFree && Imm == 0) + return TCC_Free; + + if (Idx == ImmIdx && Imm.getBitWidth() <= 64) { + if (isInt<16>(Imm.getSExtValue())) + return TCC_Free; + + if (RunFree) { + if (Imm.getBitWidth() <= 32 && + (isShiftedMask_32(Imm.getZExtValue()) || + isShiftedMask_32(~Imm.getZExtValue()))) + return TCC_Free; + + + if (ST->isPPC64() && + (isShiftedMask_64(Imm.getZExtValue()) || + isShiftedMask_64(~Imm.getZExtValue()))) + return TCC_Free; + } + + if (UnsignedFree && isUInt<16>(Imm.getZExtValue())) + return TCC_Free; + + if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0) + return TCC_Free; + } + + return PPCTTI::getIntImmCost(Imm, Ty); +} + void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { if (ST->getDarwinDirective() == PPC::DIR_A2) { // The A2 is in-order with a deep pipeline, and concatenation unrolling @@ -220,7 +363,9 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val, // experimentally as a minimum needed to prevent unprofitable // vectorization for the paq8p benchmark. It may need to be // raised further if other unprofitable cases remain. - unsigned LHSPenalty = 12; + unsigned LHSPenalty = 2; + if (ISD == ISD::INSERT_VECTOR_ELT) + LHSPenalty += 7; // Vector element insert/extract with Altivec is very expensive, // because they require store and reload with the attendant @@ -244,14 +389,32 @@ unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned Cost = TargetTransformInfo::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); - // FIXME: Update this for VSX loads/stores that support unaligned access. + // VSX loads/stores support unaligned access. + if (ST->hasVSX()) { + if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64) + return Cost; + } + + bool UnalignedAltivec = + Src->isVectorTy() && + Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() && + LT.second.getSizeInBits() == 128 && + Opcode == Instruction::Load; // PPC in general does not support unaligned loads and stores. They'll need // to be decomposed based on the alignment factor. unsigned SrcBytes = LT.second.getStoreSize(); - if (SrcBytes && Alignment && Alignment < SrcBytes) + if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) { Cost += LT.first*(SrcBytes/Alignment-1); + // For a vector type, there is also scalarization overhead (only for + // stores, loads are expanded using the vector-load + permutation sequence, + // which is much less expensive). + if (Src->isVectorTy() && Opcode == Instruction::Store) + for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i) + Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i); + } + return Cost; } diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 3e1848b..949fdfb 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -37,11 +37,15 @@ FunctionPass *createAMDGPUCFGStructurizerPass(); // SI Passes FunctionPass *createSITypeRewriter(); FunctionPass *createSIAnnotateControlFlowPass(); +FunctionPass *createSILowerI1CopiesPass(); FunctionPass *createSILowerControlFlowPass(TargetMachine &tm); FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); FunctionPass *createSIInsertWaits(TargetMachine &tm); +void initializeSILowerI1CopiesPass(PassRegistry &); +extern char &SILowerI1CopiesID; + // Passes common to R600 and SI Pass *createAMDGPUStructurizeCFGPass(); FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); @@ -76,8 +80,8 @@ enum AddressSpaces { GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). CONSTANT_ADDRESS = 2, ///< Address space for constant memory LOCAL_ADDRESS = 3, ///< Address space for local memory. - REGION_ADDRESS = 4, ///< Address space for region memory. - ADDRESS_NONE = 5, ///< Address space for unknown memory. + FLAT_ADDRESS = 4, ///< Address space for flat memory. + REGION_ADDRESS = 5, ///< Address space for region memory. PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0) PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1) @@ -102,7 +106,8 @@ enum AddressSpaces { CONSTANT_BUFFER_13 = 21, CONSTANT_BUFFER_14 = 22, CONSTANT_BUFFER_15 = 23, - LAST_ADDRESS = 24 + ADDRESS_NONE = 24, ///< Address space for unknown memory. + LAST_ADDRESS = ADDRESS_NONE }; } // namespace AMDGPUAS diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td index d1e2cf5..2edc115 100644 --- a/lib/Target/R600/AMDGPU.td +++ b/lib/Target/R600/AMDGPU.td @@ -120,6 +120,17 @@ def AMDGPU : Target { let InstructionSet = AMDGPUInstrInfo; } +//===----------------------------------------------------------------------===// +// Predicate helper class +//===----------------------------------------------------------------------===// + +class PredicateControl { + Predicate SubtargetPredicate; + list OtherPredicates = []; + list Predicates = !listconcat([SubtargetPredicate], + OtherPredicates); +} + // Include AMDGPU TD files include "R600Schedule.td" include "SISchedule.td" diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index b166c45..170f479 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -64,7 +64,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { const AMDGPUSubtarget &STM = TM.getSubtarget(); SIProgramInfo KernelInfo; if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { - findNumUsedRegistersSI(MF, KernelInfo.NumSGPR, KernelInfo.NumVGPR); + getSIProgramInfo(KernelInfo, MF); EmitProgramInfoSI(MF, KernelInfo); } else { EmitProgramInfoR600(MF); @@ -84,8 +84,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { SectionKind::getReadOnly()); OutStreamer.SwitchSection(CommentSection); - if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { + if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { OutStreamer.emitRawComment(" Kernel info:", false); + OutStreamer.emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen), + false); OutStreamer.emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR), false); OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR), @@ -184,9 +186,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) { } } -void AMDGPUAsmPrinter::findNumUsedRegistersSI(MachineFunction &MF, - unsigned &NumSGPR, - unsigned &NumVGPR) const { +void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, + MachineFunction &MF) const { + uint64_t CodeSize = 0; unsigned MaxSGPR = 0; unsigned MaxVGPR = 0; bool VCCUsed = false; @@ -200,6 +202,9 @@ void AMDGPUAsmPrinter::findNumUsedRegistersSI(MachineFunction &MF, I != E; ++I) { MachineInstr &MI = *I; + // TODO: CodeSize should account for multiple functions. + CodeSize += MI.getDesc().Size; + unsigned numOperands = MI.getNumOperands(); for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { MachineOperand &MO = MI.getOperand(op_idx); @@ -274,13 +279,9 @@ void AMDGPUAsmPrinter::findNumUsedRegistersSI(MachineFunction &MF, if (VCCUsed) MaxSGPR += 2; - NumSGPR = MaxSGPR; - NumVGPR = MaxVGPR; -} - -void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &Out, - MachineFunction &MF) const { - findNumUsedRegistersSI(MF, Out.NumSGPR, Out.NumVGPR); + ProgInfo.CodeLen = CodeSize; + ProgInfo.NumSGPR = MaxSGPR; + ProgInfo.NumVGPR = MaxVGPR; } void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF, diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h index a2b8337..71adc9a 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.h +++ b/lib/Target/R600/AMDGPUAsmPrinter.h @@ -24,7 +24,12 @@ namespace llvm { class AMDGPUAsmPrinter : public AsmPrinter { private: struct SIProgramInfo { - SIProgramInfo() : NumSGPR(0), NumVGPR(0) {} + SIProgramInfo() : + CodeLen(0), + NumSGPR(0), + NumVGPR(0) {} + + uint64_t CodeLen; unsigned NumSGPR; unsigned NumVGPR; }; @@ -42,14 +47,14 @@ private: public: explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer); - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "AMDGPU Assembly Printer"; } /// Implemented in AMDGPUMCInstLower.cpp - virtual void EmitInstruction(const MachineInstr *MI); + void EmitInstruction(const MachineInstr *MI) override; protected: bool DisasmEnabled; diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td index 65cdb24..5f8ad8c 100644 --- a/lib/Target/R600/AMDGPUCallingConv.td +++ b/lib/Target/R600/AMDGPUCallingConv.td @@ -20,7 +20,7 @@ def CC_SI : CallingConv<[ CCIfInReg>>, CCIfInReg -#include +#include "llvm/IR/Function.h" using namespace llvm; @@ -43,11 +39,12 @@ public: AMDGPUDAGToDAGISel(TargetMachine &TM); virtual ~AMDGPUDAGToDAGISel(); - SDNode *Select(SDNode *N); - virtual const char *getPassName() const; - virtual void PostprocessISelDAG(); + SDNode *Select(SDNode *N) override; + const char *getPassName() const override; + void PostprocessISelDAG() override; private: + bool isInlineImmediate(SDNode *N) const; inline SDValue getSmallIPtrImm(unsigned Imm); bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, const R600InstrInfo *TII); @@ -58,11 +55,9 @@ private: bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2); bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2); bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2); - SDValue SimplifyI24(SDValue &Op); - bool SelectI24(SDValue Addr, SDValue &Op); - bool SelectU24(SDValue Addr, SDValue &Op); static bool checkType(const Value *ptr, unsigned int addrspace); + static bool checkPrivateAddress(const MachineMemOperand *Op); static bool isGlobalStore(const StoreSDNode *N); static bool isPrivateStore(const StoreSDNode *N); @@ -77,10 +72,15 @@ private: bool isLocalLoad(const LoadSDNode *N) const; bool isRegionLoad(const LoadSDNode *N) const; + /// \returns True if the current basic block being selected is at control + /// flow depth 0. Meaning that the current block dominates the + // exit block. + bool isCFDepth0() const; + const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); - bool SelectGlobalValueVariableOffset(SDValue Addr, - SDValue &BaseReg, SDValue& Offset); + bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, + SDValue& Offset); bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); @@ -91,8 +91,7 @@ private: /// \brief This pass converts a legalized DAG into a AMDGPU-specific // DAG, ready for instruction scheduling. -FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM - ) { +FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) { return new AMDGPUDAGToDAGISel(TM); } @@ -103,32 +102,39 @@ AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM) AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() { } +bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const { + const SITargetLowering *TL + = static_cast(getTargetLowering()); + return TL->analyzeImmediate(N) == 0; +} + /// \brief Determine the register class for \p OpNo /// \returns The register class of the virtual register that will be used for /// the given operand number \OpNo or NULL if the register class cannot be /// determined. const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, unsigned OpNo) const { - if (!N->isMachineOpcode()) { - return NULL; - } + if (!N->isMachineOpcode()) + return nullptr; + switch (N->getMachineOpcode()) { default: { const MCInstrDesc &Desc = TM.getInstrInfo()->get(N->getMachineOpcode()); unsigned OpIdx = Desc.getNumDefs() + OpNo; if (OpIdx >= Desc.getNumOperands()) - return NULL; + return nullptr; int RegClass = Desc.OpInfo[OpIdx].RegClass; - if (RegClass == -1) { - return NULL; - } + if (RegClass == -1) + return nullptr; + return TM.getRegisterInfo()->getRegClass(RegClass); } case AMDGPU::REG_SEQUENCE: { - const TargetRegisterClass *SuperRC = TM.getRegisterInfo()->getRegClass( - cast(N->getOperand(0))->getZExtValue()); - unsigned SubRegIdx = - dyn_cast(N->getOperand(OpNo + 1))->getZExtValue(); + unsigned RCID = cast(N->getOperand(0))->getZExtValue(); + const TargetRegisterClass *SuperRC = TM.getRegisterInfo()->getRegClass(RCID); + + SDValue SubRegOp = N->getOperand(OpNo + 1); + unsigned SubRegIdx = cast(SubRegOp)->getZExtValue(); return TM.getRegisterInfo()->getSubClassWithSubReg(SuperRC, SubRegIdx); } } @@ -139,7 +145,7 @@ SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) { } bool AMDGPUDAGToDAGISel::SelectADDRParam( - SDValue Addr, SDValue& R1, SDValue& R2) { + SDValue Addr, SDValue& R1, SDValue& R2) { if (Addr.getOpcode() == ISD::FrameIndex) { if (FrameIndexSDNode *FIN = dyn_cast(Addr)) { @@ -196,15 +202,16 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { N->setNodeId(-1); - return NULL; // Already selected. + return nullptr; // Already selected. } + + const AMDGPUSubtarget &ST = TM.getSubtarget(); switch (Opc) { default: break; // We are selecting i64 ADD here instead of custom lower it during // DAG legalization, so we can fold some i64 ADDs used for address // calculation into the LOAD and STORE instructions. case ISD::ADD: { - const AMDGPUSubtarget &ST = TM.getSubtarget(); if (N->getValueType(0) != MVT::i64 || ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) break; @@ -232,12 +239,13 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { AddLoArgs.push_back(SDValue(Lo0, 0)); AddLoArgs.push_back(SDValue(Lo1, 0)); - SDNode *AddLo = CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, - VTList, AddLoArgs); + SDNode *AddLo = CurDAG->getMachineNode( + isCFDepth0() ? AMDGPU::S_ADD_I32 : AMDGPU::V_ADD_I32_e32, + DL, VTList, AddLoArgs); SDValue Carry = SDValue(AddLo, 1); - SDNode *AddHi = CurDAG->getMachineNode(AMDGPU::S_ADDC_U32, DL, - MVT::i32, SDValue(Hi0, 0), - SDValue(Hi1, 0), Carry); + SDNode *AddHi = CurDAG->getMachineNode( + isCFDepth0() ? AMDGPU::S_ADDC_U32 : AMDGPU::V_ADDC_U32_e32, + DL, MVT::i32, SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); SDValue Args[5] = { CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), @@ -246,11 +254,10 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { SDValue(AddHi,0), Sub1, }; - return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args, 5); + return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); } case ISD::BUILD_VECTOR: { unsigned RegClassID; - const AMDGPUSubtarget &ST = TM.getSubtarget(); const AMDGPURegisterInfo *TRI = static_cast(TM.getRegisterInfo()); const SIRegisterInfo *SIRI = @@ -316,7 +323,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { // 16 = Max Num Vector Elements // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) // 1 = Vector Register Class - SDValue RegSeqArgs[16 * 2 + 1]; + SmallVector RegSeqArgs(N->getNumOperands() * 2 + 1); RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32); bool IsRegSeq = true; @@ -333,11 +340,10 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { if (!IsRegSeq) break; return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), - RegSeqArgs, 2 * N->getNumOperands() + 1); + RegSeqArgs); } case ISD::BUILD_PAIR: { SDValue RC, SubReg0, SubReg1; - const AMDGPUSubtarget &ST = TM.getSubtarget(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { break; } @@ -346,7 +352,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32); } else if (N->getValueType(0) == MVT::i64) { - RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32); + RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32); SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); } else { @@ -357,8 +363,37 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N), N->getValueType(0), Ops); } - case AMDGPUISD::REGISTER_LOAD: { + + case ISD::Constant: + case ISD::ConstantFP: { const AMDGPUSubtarget &ST = TM.getSubtarget(); + if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || + N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) + break; + + uint64_t Imm; + if (ConstantFPSDNode *FP = dyn_cast(N)) + Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); + else { + ConstantSDNode *C = cast(N); + Imm = C->getZExtValue(); + } + + SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32, + CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32)); + SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32, + CurDAG->getConstant(Imm >> 32, MVT::i32)); + const SDValue Ops[] = { + CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), + SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32), + SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32) + }; + + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N), + N->getValueType(0), Ops); + } + + case AMDGPUISD::REGISTER_LOAD: { if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) break; SDValue Addr, Offset; @@ -375,7 +410,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { Ops); } case AMDGPUISD::REGISTER_STORE: { - const AMDGPUSubtarget &ST = TM.getSubtarget(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) break; SDValue Addr, Offset; @@ -391,42 +425,95 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { CurDAG->getVTList(MVT::Other), Ops); } + + case AMDGPUISD::BFE_I32: + case AMDGPUISD::BFE_U32: { + if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) + break; + + // There is a scalar version available, but unlike the vector version which + // has a separate operand for the offset and width, the scalar version packs + // the width and offset into a single operand. Try to move to the scalar + // version if the offsets are constant, so that we can try to keep extended + // loads of kernel arguments in SGPRs. + + // TODO: Technically we could try to pattern match scalar bitshifts of + // dynamic values, but it's probably not useful. + ConstantSDNode *Offset = dyn_cast(N->getOperand(1)); + if (!Offset) + break; + + ConstantSDNode *Width = dyn_cast(N->getOperand(2)); + if (!Width) + break; + + bool Signed = Opc == AMDGPUISD::BFE_I32; + + // Transformation function, pack the offset and width of a BFE into + // the format expected by the S_BFE_I32 / S_BFE_U32. In the second + // source, bits [5:0] contain the offset and bits [22:16] the width. + + uint32_t OffsetVal = Offset->getZExtValue(); + uint32_t WidthVal = Width->getZExtValue(); + + uint32_t PackedVal = OffsetVal | WidthVal << 16; + + SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32); + return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, + SDLoc(N), + MVT::i32, + N->getOperand(0), + PackedOffsetWidth); + + } } return SelectCode(N); } -bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) { - if (!ptr) { +bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) { + assert(AS != 0 && "Use checkPrivateAddress instead."); + if (!Ptr) return false; - } - Type *ptrType = ptr->getType(); - return dyn_cast(ptrType)->getAddressSpace() == addrspace; + + return Ptr->getType()->getPointerAddressSpace() == AS; +} + +bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) { + if (Op->getPseudoValue()) + return true; + + if (PointerType *PT = dyn_cast(Op->getValue()->getType())) + return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; + + return false; } bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) { - return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS); + return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); } bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) { - return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS) - && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS) - && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)); + const Value *MemVal = N->getMemOperand()->getValue(); + return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && + !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && + !checkType(MemVal, AMDGPUAS::REGION_ADDRESS)); } bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) { - return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS); + return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); } bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) { - return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS); + return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); } bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const { - if (CbId == -1) { - return checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS); - } - return checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_BUFFER_0 + CbId); + const Value *MemVal = N->getMemOperand()->getValue(); + if (CbId == -1) + return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS); + + return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId); } bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { @@ -437,27 +524,26 @@ bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { return true; } } - return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS); + return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS); } bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const { - return checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS); + return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS); } bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const { - return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS); + return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS); } bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const { - return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS); + return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS); } bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { MachineMemOperand *MMO = N->getMemOperand(); - if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) { + if (checkPrivateAddress(N->getMemOperand())) { if (MMO) { - const Value *V = MMO->getValue(); - const PseudoSourceValue *PSV = dyn_cast(V); + const PseudoSourceValue *PSV = MMO->getPseudoValue(); if (PSV && PSV == PseudoSourceValue::getConstantPool()) { return true; } @@ -467,24 +553,34 @@ bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const { } bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const { - if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) { + if (checkPrivateAddress(N->getMemOperand())) { // Check to make sure we are not a constant pool load or a constant load // that is marked as a private load if (isCPLoad(N) || isConstantLoad(N, -1)) { return false; } } - if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS) - && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS) - && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS) - && !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS) - && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS) - && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS)) { + + const Value *MemVal = N->getMemOperand()->getValue(); + if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) && + !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) && + !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) && + !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) && + !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) && + !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)){ return true; } return false; } +bool AMDGPUDAGToDAGISel::isCFDepth0() const { + // FIXME: Figure out a way to use DominatorTree analysis here. + const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock(); + const Function *Fn = FuncInfo->Fn; + return &Fn->front() == CurBlock || &Fn->back() == CurBlock; +} + + const char *AMDGPUDAGToDAGISel::getPassName() const { return "AMDGPU DAG->DAG Pattern Instruction Selection"; } @@ -499,7 +595,7 @@ const char *AMDGPUDAGToDAGISel::getPassName() const { //===----------------------------------------------------------------------===// bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, - SDValue& IntPtr) { + SDValue& IntPtr) { if (ConstantSDNode *Cst = dyn_cast(Addr)) { IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true); return true; @@ -509,7 +605,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, SDValue& BaseReg, SDValue &Offset) { - if (!dyn_cast(Addr)) { + if (!isa(Addr)) { BaseReg = Addr; Offset = CurDAG->getIntPtrConstant(0, true); return true; @@ -519,7 +615,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset) { - ConstantSDNode * IMMOffset; + ConstantSDNode *IMMOffset; if (Addr.getOpcode() == ISD::ADD && (IMMOffset = dyn_cast(Addr.getOperand(1))) @@ -563,52 +659,9 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, return true; } -SDValue AMDGPUDAGToDAGISel::SimplifyI24(SDValue &Op) { - APInt Demanded = APInt(32, 0x00FFFFFF); - APInt KnownZero, KnownOne; - TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true); - const TargetLowering *TLI = getTargetLowering(); - if (TLI->SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) { - CurDAG->ReplaceAllUsesWith(Op, TLO.New); - CurDAG->RepositionNode(Op.getNode(), TLO.New.getNode()); - return SimplifyI24(TLO.New); - } else { - return Op; - } -} - -bool AMDGPUDAGToDAGISel::SelectI24(SDValue Op, SDValue &I24) { - - assert(Op.getValueType() == MVT::i32); - - if (CurDAG->ComputeNumSignBits(Op) == 9) { - I24 = SimplifyI24(Op); - return true; - } - return false; -} - -bool AMDGPUDAGToDAGISel::SelectU24(SDValue Op, SDValue &U24) { - APInt KnownZero; - APInt KnownOne; - CurDAG->ComputeMaskedBits(Op, KnownZero, KnownOne); - - assert (Op.getValueType() == MVT::i32); - - // ANY_EXTEND and EXTLOAD operations can only be done on types smaller than - // i32. These smaller types are legal to use with the i24 instructions. - if ((KnownZero & APInt(KnownZero.getBitWidth(), 0xFF000000)) == 0xFF000000 || - Op.getOpcode() == ISD::ANY_EXTEND || - ISD::isEXTLoad(Op.getNode())) { - U24 = SimplifyI24(Op); - return true; - } - return false; -} - void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = - (*(const AMDGPUTargetLowering*)getTargetLowering()); + *static_cast(getTargetLowering()); bool IsModified = false; do { IsModified = false; diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 183725c..6c443ea 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -28,8 +28,50 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" using namespace llvm; + +namespace { + +/// Diagnostic information for unimplemented or unsupported feature reporting. +class DiagnosticInfoUnsupported : public DiagnosticInfo { +private: + const Twine &Description; + const Function &Fn; + + static int KindID; + + static int getKindID() { + if (KindID == 0) + KindID = llvm::getNextAvailablePluginDiagnosticKind(); + return KindID; + } + +public: + DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc, + DiagnosticSeverity Severity = DS_Error) + : DiagnosticInfo(getKindID(), Severity), + Description(Desc), + Fn(Fn) { } + + const Function &getFunction() const { return Fn; } + const Twine &getDescription() const { return Description; } + + void print(DiagnosticPrinter &DP) const override { + DP << "unsupported " << getDescription() << " in " << Fn.getName(); + } + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == getKindID(); + } +}; + +int DiagnosticInfoUnsupported::KindID = 0; +} + + static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { @@ -88,6 +130,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::f64, Promote); AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); + setOperationAction(ISD::STORE, MVT::v2f64, Promote); + AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v2i64); + // Custom lowering of vector stores is required for local address space // stores. setOperationAction(ISD::STORE, MVT::v4i32, Custom); @@ -103,6 +148,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : // handle 64-bit stores. setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); + setTruncStoreAction(MVT::i64, MVT::i16, Expand); + setTruncStoreAction(MVT::i64, MVT::i8, Expand); setTruncStoreAction(MVT::i64, MVT::i1, Expand); setTruncStoreAction(MVT::v2i64, MVT::v2i1, Expand); setTruncStoreAction(MVT::v4i64, MVT::v4i1, Expand); @@ -126,6 +173,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::f64, Promote); AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); + setOperationAction(ISD::LOAD, MVT::v2f64, Promote); + AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v2i64); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom); @@ -152,15 +202,19 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::BR_CC, MVT::i1, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + setOperationAction(ISD::FNEG, MVT::v2f32, Expand); setOperationAction(ISD::FNEG, MVT::v4f32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::SUB, MVT::i64, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Custom); + setOperationAction(ISD::UDIVREM, MVT::i64, Custom); setOperationAction(ISD::UREM, MVT::i32, Expand); setOperationAction(ISD::VSELECT, MVT::v2f32, Expand); setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); @@ -168,10 +222,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : static const MVT::SimpleValueType IntTypes[] = { MVT::v2i32, MVT::v4i32 }; - const size_t NumIntTypes = array_lengthof(IntTypes); - for (unsigned int x = 0; x < NumIntTypes; ++x) { - MVT::SimpleValueType VT = IntTypes[x]; + for (MVT VT : IntTypes) { //Expand the following operations for the current type by default setOperationAction(ISD::ADD, VT, Expand); setOperationAction(ISD::AND, VT, Expand); @@ -195,12 +247,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : static const MVT::SimpleValueType FloatTypes[] = { MVT::v2f32, MVT::v4f32 }; - const size_t NumFloatTypes = array_lengthof(FloatTypes); - for (unsigned int x = 0; x < NumFloatTypes; ++x) { - MVT::SimpleValueType VT = FloatTypes[x]; + for (MVT VT : FloatTypes) { setOperationAction(ISD::FABS, VT, Expand); setOperationAction(ISD::FADD, VT, Expand); + setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); @@ -208,25 +259,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FMUL, VT, Expand); setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); + setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FSUB, VT, Expand); setOperationAction(ISD::SELECT, VT, Expand); } - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom); - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom); - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom); - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Custom); - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom); + setTargetDAGCombine(ISD::MUL); + setTargetDAGCombine(ISD::SELECT_CC); } //===----------------------------------------------------------------------===// @@ -325,6 +364,25 @@ SDValue AMDGPUTargetLowering::LowerReturn( // Target specific lowering //===---------------------------------------------------------------------===// +SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SDValue Callee = CLI.Callee; + SelectionDAG &DAG = CLI.DAG; + + const Function &Fn = *DAG.getMachineFunction().getFunction(); + + StringRef FuncName(""); + + if (const ExternalSymbolSDNode *G = dyn_cast(Callee)) + FuncName = G->getSymbol(); + else if (const GlobalAddressSDNode *G = dyn_cast(Callee)) + FuncName = G->getGlobal()->getName(); + + DiagnosticInfoUnsupported NoCalls(Fn, "call to function " + FuncName); + DAG.getContext()->diagnose(NoCalls); + return SDValue(); +} + SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -361,12 +419,111 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N, // ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do // nothing here and let the illegal result integer be handled normally. return; + case ISD::UDIV: { + SDValue Op = SDValue(N, 0); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), + N->getOperand(0), N->getOperand(1)); + Results.push_back(UDIVREM); + break; + } + case ISD::UREM: { + SDValue Op = SDValue(N, 0); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), + N->getOperand(0), N->getOperand(1)); + Results.push_back(UDIVREM.getValue(1)); + break; + } + case ISD::UDIVREM: { + SDValue Op = SDValue(N, 0); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext()); + + SDValue one = DAG.getConstant(1, HalfVT); + SDValue zero = DAG.getConstant(0, HalfVT); + + //HiLo split + SDValue LHS = N->getOperand(0); + SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero); + SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one); + + SDValue RHS = N->getOperand(1); + SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero); + SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one); + + // Get Speculative values + SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo); + SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo); + + SDValue REM_Hi = zero; + SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ); + + SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ); + SDValue DIV_Lo = zero; + + const unsigned halfBitWidth = HalfVT.getSizeInBits(); + + for (unsigned i = 0; i < halfBitWidth; ++i) { + SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT); + // Get Value of high bit + SDValue HBit; + if (halfBitWidth == 32 && Subtarget->hasBFE()) { + HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one); + } else { + HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS); + HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one); + } + + SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo, + DAG.getConstant(halfBitWidth - 1, HalfVT)); + REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one); + REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry); + + REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one); + REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit); + + + SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi); + + SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT); + SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE); + + DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT); + + // Update REM + + SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS); + + REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE); + REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero); + REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one); + } + SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi); + SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi); + Results.push_back(DIV); + Results.push_back(REM); + break; + } default: return; } } +// FIXME: This implements accesses to initialized globals in the constant +// address space by copying them to private and accessing that. It does not +// properly handle illegal types or vectors. The private vector loads are not +// scalarized, and the illegal scalars hit an assertion. This technique will not +// work well with large initializers, and this should eventually be +// removed. Initialized globals should be placed into a data section that the +// runtime will load into a buffer before the kernel is executed. Uses of the +// global need to be replaced with a pointer loaded from an implicit kernel +// argument into this buffer holding the copy of the data, which will remove the +// need for any of this. SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init, const GlobalValue *GV, const SDValue &InitPtr, @@ -380,29 +537,60 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init, return DAG.getStore(Chain, DL, DAG.getConstant(*CI, VT), InitPtr, MachinePointerInfo(UndefValue::get(PtrTy)), false, false, TD->getPrefTypeAlignment(CI->getType())); - } else if (const ConstantFP *CFP = dyn_cast(Init)) { + } + + if (const ConstantFP *CFP = dyn_cast(Init)) { EVT VT = EVT::getEVT(CFP->getType()); PointerType *PtrTy = PointerType::get(CFP->getType(), 0); return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, VT), InitPtr, MachinePointerInfo(UndefValue::get(PtrTy)), false, false, TD->getPrefTypeAlignment(CFP->getType())); - } else if (Init->getType()->isAggregateType()) { + } + + Type *InitTy = Init->getType(); + if (StructType *ST = dyn_cast(InitTy)) { + const StructLayout *SL = TD->getStructLayout(ST); + EVT PtrVT = InitPtr.getValueType(); - unsigned NumElements = Init->getType()->getArrayNumElements(); + SmallVector Chains; + + for (unsigned I = 0, N = ST->getNumElements(); I != N; ++I) { + SDValue Offset = DAG.getConstant(SL->getElementOffset(I), PtrVT); + SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset); + + Constant *Elt = Init->getAggregateElement(I); + Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG)); + } + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + } + + if (SequentialType *SeqTy = dyn_cast(InitTy)) { + EVT PtrVT = InitPtr.getValueType(); + + unsigned NumElements; + if (ArrayType *AT = dyn_cast(SeqTy)) + NumElements = AT->getNumElements(); + else if (VectorType *VT = dyn_cast(SeqTy)) + NumElements = VT->getNumElements(); + else + llvm_unreachable("Unexpected type"); + + unsigned EltSize = TD->getTypeAllocSize(SeqTy->getElementType()); SmallVector Chains; for (unsigned i = 0; i < NumElements; ++i) { - SDValue Offset = DAG.getConstant(i * TD->getTypeAllocSize( - Init->getType()->getArrayElementType()), PtrVT); + SDValue Offset = DAG.getConstant(i * EltSize, PtrVT); SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset); - Chains.push_back(LowerConstantInitializer(Init->getAggregateElement(i), - GV, Ptr, Chain, DAG)); + + Constant *Elt = Init->getAggregateElement(i); + Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG)); } - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], - Chains.size()); - } else { - Init->dump(); - llvm_unreachable("Unhandled constant initializer"); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } + + Init->dump(); + llvm_unreachable("Unhandled constant initializer"); } SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, @@ -440,7 +628,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, unsigned Size = TD->getTypeAllocSize(EltType); unsigned Alignment = TD->getPrefTypeAlignment(EltType); - const GlobalVariable *Var = dyn_cast(GV); + const GlobalVariable *Var = cast(GV); const Constant *Init = Var->getInitializer(); int FI = FrameInfo->CreateStackObject(Size, Alignment, false); SDValue InitPtr = DAG.getFrameIndex(FI, @@ -461,7 +649,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, for (unsigned i = 1; i < (*I)->getNumOperands(); ++i) { Ops.push_back((*I)->getOperand(i)); } - DAG.UpdateNodeOperands(*I, &Ops[0], Ops.size()); + DAG.UpdateNodeOperands(*I, Ops); } return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), getPointerTy(AMDGPUAS::CONSTANT_ADDRESS)); @@ -469,44 +657,28 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, } } -void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG, - SmallVectorImpl &Args, - unsigned Start, - unsigned Count) const { - EVT VT = Op.getValueType(); - for (unsigned i = Start, e = Start + Count; i != e; ++i) { - Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), - VT.getVectorElementType(), - Op, DAG.getConstant(i, MVT::i32))); - } -} - SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { SmallVector Args; SDValue A = Op.getOperand(0); SDValue B = Op.getOperand(1); - ExtractVectorElements(A, DAG, Args, 0, - A.getValueType().getVectorNumElements()); - ExtractVectorElements(B, DAG, Args, 0, - B.getValueType().getVectorNumElements()); + DAG.ExtractVectorElements(A, Args); + DAG.ExtractVectorElements(B, Args); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), - &Args[0], Args.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args); } SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { SmallVector Args; - EVT VT = Op.getValueType(); unsigned Start = cast(Op.getOperand(1))->getZExtValue(); - ExtractVectorElements(Op.getOperand(0), DAG, Args, Start, - VT.getVectorNumElements()); + EVT VT = Op.getValueType(); + DAG.ExtractVectorElements(Op.getOperand(0), Args, Start, + VT.getVectorNumElements()); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), - &Args[0], Args.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args); } SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op, @@ -560,6 +732,22 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), Op.getOperand(2)); + case AMDGPUIntrinsic::AMDGPU_umul24: + return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT, + Op.getOperand(1), Op.getOperand(2)); + + case AMDGPUIntrinsic::AMDGPU_imul24: + return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT, + Op.getOperand(1), Op.getOperand(2)); + + case AMDGPUIntrinsic::AMDGPU_umad24: + return DAG.getNode(AMDGPUISD::MAD_U24, DL, VT, + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case AMDGPUIntrinsic::AMDGPU_imad24: + return DAG.getNode(AMDGPUISD::MAD_I24, DL, VT, + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case AMDGPUIntrinsic::AMDGPU_bfe_i32: return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT, Op.getOperand(1), @@ -590,8 +778,7 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, ///IABS(a) = SMAX(sub(0, a), a) SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, - SelectionDAG &DAG) const { - + SelectionDAG &DAG) const { SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), @@ -603,7 +790,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, /// Linear Interpolation /// LRP(a, b, c) = muladd(a, b, (1 - a) * c) SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, - SelectionDAG &DAG) const { + SelectionDAG &DAG) const { SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, @@ -617,16 +804,16 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, } /// \brief Generate Min/Max node -SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, - SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT VT = Op.getValueType(); +SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N, + SelectionDAG &DAG) const { + SDLoc DL(N); + EVT VT = N->getValueType(0); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue True = Op.getOperand(2); - SDValue False = Op.getOperand(3); - SDValue CC = Op.getOperand(4); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue True = N->getOperand(2); + SDValue False = N->getOperand(3); + SDValue CC = N->getOperand(4); if (VT != MVT::f32 || !((LHS == True && RHS == False) || (LHS == False && RHS == True))) { @@ -654,10 +841,8 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, case ISD::SETOLT: case ISD::SETLE: case ISD::SETLT: { - if (LHS == True) - return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); - else - return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); + unsigned Opc = (LHS == True) ? AMDGPUISD::FMIN : AMDGPUISD::FMAX; + return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETGT: case ISD::SETGE: @@ -665,15 +850,13 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, case ISD::SETOGE: case ISD::SETUGT: case ISD::SETOGT: { - if (LHS == True) - return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); - else - return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); + unsigned Opc = (LHS == True) ? AMDGPUISD::FMAX : AMDGPUISD::FMIN; + return DAG.getNode(Opc, DL, VT, LHS, RHS); } case ISD::SETCC_INVALID: llvm_unreachable("Invalid setcc condcode!"); } - return Op; + return SDValue(); } SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op, @@ -695,8 +878,7 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op, MemEltVT, Load->isVolatile(), Load->isNonTemporal(), Load->getAlignment())); } - return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), - Loads.data(), Loads.size()); + return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), Loads); } SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op, @@ -713,32 +895,46 @@ SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op, } SDLoc DL(Op); - const SDValue &Value = Store->getValue(); + SDValue Value = Store->getValue(); EVT VT = Value.getValueType(); - const SDValue &Ptr = Store->getBasePtr(); + EVT ElemVT = VT.getVectorElementType(); + SDValue Ptr = Store->getBasePtr(); EVT MemEltVT = MemVT.getVectorElementType(); unsigned MemEltBits = MemEltVT.getSizeInBits(); unsigned MemNumElements = MemVT.getVectorNumElements(); - EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); - SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, PackedVT); + unsigned PackedSize = MemVT.getStoreSizeInBits(); + SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, MVT::i32); + + assert(Value.getValueType().getScalarSizeInBits() >= 32); SDValue PackedValue; for (unsigned i = 0; i < MemNumElements; ++i) { - EVT ElemVT = VT.getVectorElementType(); SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value, DAG.getConstant(i, MVT::i32)); - Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT); - Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask); - SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT); - Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift); + Elt = DAG.getZExtOrTrunc(Elt, DL, MVT::i32); + Elt = DAG.getNode(ISD::AND, DL, MVT::i32, Elt, Mask); // getZeroExtendInReg + + SDValue Shift = DAG.getConstant(MemEltBits * i, MVT::i32); + Elt = DAG.getNode(ISD::SHL, DL, MVT::i32, Elt, Shift); + if (i == 0) { PackedValue = Elt; } else { - PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt); + PackedValue = DAG.getNode(ISD::OR, DL, MVT::i32, PackedValue, Elt); } } + + if (PackedSize < 32) { + EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), PackedSize); + return DAG.getTruncStore(Store->getChain(), DL, PackedValue, Ptr, + Store->getMemOperand()->getPointerInfo(), + PackedVT, + Store->isNonTemporal(), Store->isVolatile(), + Store->getAlignment()); + } + return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr, - MachinePointerInfo(Store->getMemOperand()->getValue()), + Store->getMemOperand()->getPointerInfo(), Store->isVolatile(), Store->isNonTemporal(), Store->getAlignment()); } @@ -766,7 +962,7 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, MemEltVT, Store->isVolatile(), Store->isNonTemporal(), Store->getAlignment())); } - return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts); + return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains); } SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { @@ -788,9 +984,24 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32); } + if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) { + assert(VT == MVT::i1 && "Only i1 non-extloads expected"); + // FIXME: Copied from PPC + // First, load into 32 bits, then truncate to 1 bit. + + SDValue Chain = Load->getChain(); + SDValue BasePtr = Load->getBasePtr(); + MachineMemOperand *MMO = Load->getMemOperand(); + + SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain, + BasePtr, MVT::i8, MMO); + return DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); + } + // Lower loads constant address space global variable loads if (Load->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && - isa(GetUnderlyingObject(Load->getPointerInfo().V))) { + isa( + GetUnderlyingObject(Load->getMemOperand()->getValue()))) { SDValue Ptr = DAG.getZExtOrTrunc(Load->getBasePtr(), DL, getPointerTy(AMDGPUAS::PRIVATE_ADDRESS)); @@ -887,15 +1098,13 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { } SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, - SelectionDAG &DAG) const { + SelectionDAG &DAG) const { SDLoc DL(Op); EVT VT = Op.getValueType(); SDValue Num = Op.getOperand(0); SDValue Den = Op.getOperand(1); - SmallVector Results; - // RCP = URECIP(Den) = 2^32 / Den + e // e is rounding error. SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); @@ -985,10 +1194,11 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), Remainder_A_Den, Rem, ISD::SETEQ); - SDValue Ops[2]; - Ops[0] = Div; - Ops[1] = Rem; - return DAG.getMergeValues(Ops, 2, DL); + SDValue Ops[2] = { + Div, + Rem + }; + return DAG.getMergeValues(Ops, DL); } SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, @@ -1029,81 +1239,197 @@ SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, MVT VT = Op.getSimpleValueType(); MVT ScalarVT = VT.getScalarType(); - unsigned SrcBits = ExtraVT.getScalarType().getSizeInBits(); - unsigned DestBits = ScalarVT.getSizeInBits(); - unsigned BitsDiff = DestBits - SrcBits; - - if (!Subtarget->hasBFE()) - return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG); + if (!VT.isVector()) + return SDValue(); SDValue Src = Op.getOperand(0); - if (VT.isVector()) { - SDLoc DL(Op); - // Need to scalarize this, and revisit each of the scalars later. - // TODO: Don't scalarize on Evergreen? - unsigned NElts = VT.getVectorNumElements(); - SmallVector Args; - ExtractVectorElements(Src, DAG, Args, 0, NElts); + SDLoc DL(Op); - SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType()); - for (unsigned I = 0; I < NElts; ++I) - Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp); + // TODO: Don't scalarize on Evergreen? + unsigned NElts = VT.getVectorNumElements(); + SmallVector Args; + DAG.ExtractVectorElements(Src, Args, 0, NElts); - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args.data(), Args.size()); - } + SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType()); + for (unsigned I = 0; I < NElts; ++I) + Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp); - if (SrcBits == 32) { - SDLoc DL(Op); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args); +} - // If the source is 32-bits, this is really half of a 2-register pair, and - // we need to discard the unused half of the pair. - SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Src); - return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, TruncSrc); - } +//===----------------------------------------------------------------------===// +// Custom DAG optimizations +//===----------------------------------------------------------------------===// - unsigned NElts = VT.isVector() ? VT.getVectorNumElements() : 1; +static bool isU24(SDValue Op, SelectionDAG &DAG) { + APInt KnownZero, KnownOne; + EVT VT = Op.getValueType(); + DAG.computeKnownBits(Op, KnownZero, KnownOne); - // TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it - // might not be worth the effort, and will need to expand to shifts when - // fixing SGPR copies. - if (SrcBits < 32 && DestBits <= 32) { - SDLoc DL(Op); - MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts); - - if (DestBits != 32) - Src = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Src); - - // FIXME: This should use TargetConstant, but that hits assertions for - // Evergreen. - SDValue Ext = DAG.getNode(AMDGPUISD::BFE_I32, DL, ExtVT, - Op.getOperand(0), // Operand - DAG.getConstant(0, ExtVT), // Offset - DAG.getConstant(SrcBits, ExtVT)); // Width - - // Truncate to the original type if necessary. - if (ScalarVT == MVT::i32) - return Ext; - return DAG.getNode(ISD::TRUNCATE, DL, VT, Ext); - } + return (VT.getSizeInBits() - KnownZero.countLeadingOnes()) <= 24; +} - // For small types, extend to 32-bits first. - if (SrcBits < 32) { - SDLoc DL(Op); - MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts); +static bool isI24(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); - SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, Src); - SDValue Ext32 = DAG.getNode(AMDGPUISD::BFE_I32, - DL, - ExtVT, - TruncSrc, // Operand - DAG.getConstant(0, ExtVT), // Offset - DAG.getConstant(SrcBits, ExtVT)); // Width + // In order for this to be a signed 24-bit value, bit 23, must + // be a sign bit. + return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated + // as unsigned 24-bit values. + (VT.getSizeInBits() - DAG.ComputeNumSignBits(Op)) < 24; +} + +static void simplifyI24(SDValue Op, TargetLowering::DAGCombinerInfo &DCI) { + + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = Op.getValueType(); + + APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24); + APInt KnownZero, KnownOne; + TargetLowering::TargetLoweringOpt TLO(DAG, true, true); + if (TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) + DCI.CommitTargetLoweringOpt(TLO); +} - return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Ext32); +template +static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, + uint32_t Offset, uint32_t Width) { + if (Width + Offset < 32) { + IntTy Result = (Src0 << (32 - Offset - Width)) >> (32 - Width); + return DAG.getConstant(Result, MVT::i32); } - // For everything else, use the standard bitshift expansion. - return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG); + return DAG.getConstant(Src0 >> Offset, MVT::i32); +} + +SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + + switch(N->getOpcode()) { + default: break; + case ISD::MUL: { + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Mul; + + // FIXME: Add support for 24-bit multiply with 64-bit output on SI. + if (VT.isVector() || VT.getSizeInBits() > 32) + break; + + if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) { + N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32); + N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32); + Mul = DAG.getNode(AMDGPUISD::MUL_U24, DL, MVT::i32, N0, N1); + } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) { + N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32); + N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32); + Mul = DAG.getNode(AMDGPUISD::MUL_I24, DL, MVT::i32, N0, N1); + } else { + break; + } + + // We need to use sext even for MUL_U24, because MUL_U24 is used + // for signed multiply of 8 and 16-bit types. + SDValue Reg = DAG.getSExtOrTrunc(Mul, DL, VT); + + return Reg; + } + case AMDGPUISD::MUL_I24: + case AMDGPUISD::MUL_U24: { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + simplifyI24(N0, DCI); + simplifyI24(N1, DCI); + return SDValue(); + } + case ISD::SELECT_CC: { + return CombineMinMax(N, DAG); + } + case AMDGPUISD::BFE_I32: + case AMDGPUISD::BFE_U32: { + assert(!N->getValueType(0).isVector() && + "Vector handling of BFE not implemented"); + ConstantSDNode *Width = dyn_cast(N->getOperand(2)); + if (!Width) + break; + + uint32_t WidthVal = Width->getZExtValue() & 0x1f; + if (WidthVal == 0) + return DAG.getConstant(0, MVT::i32); + + ConstantSDNode *Offset = dyn_cast(N->getOperand(1)); + if (!Offset) + break; + + SDValue BitsFrom = N->getOperand(0); + uint32_t OffsetVal = Offset->getZExtValue() & 0x1f; + + bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32; + + if (OffsetVal == 0) { + // This is already sign / zero extended, so try to fold away extra BFEs. + unsigned SignBits = Signed ? (32 - WidthVal + 1) : (32 - WidthVal); + + unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom); + if (OpSignBits >= SignBits) + return BitsFrom; + + EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal); + if (Signed) { + // This is a sign_extend_inreg. Replace it to take advantage of existing + // DAG Combines. If not eliminated, we will match back to BFE during + // selection. + + // TODO: The sext_inreg of extended types ends, although we can could + // handle them in a single BFE. + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom, + DAG.getValueType(SmallVT)); + } + + return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT); + } + + if (ConstantSDNode *Val = dyn_cast(N->getOperand(0))) { + if (Signed) { + return constantFoldBFE(DAG, + Val->getSExtValue(), + OffsetVal, + WidthVal); + } + + return constantFoldBFE(DAG, + Val->getZExtValue(), + OffsetVal, + WidthVal); + } + + APInt Demanded = APInt::getBitsSet(32, + OffsetVal, + OffsetVal + WidthVal); + + if ((OffsetVal + WidthVal) >= 32) { + SDValue ShiftVal = DAG.getConstant(OffsetVal, MVT::i32); + return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32, + BitsFrom, ShiftVal); + } + + APInt KnownZero, KnownOne; + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) || + TLI.SimplifyDemandedBits(BitsFrom, Demanded, KnownZero, KnownOne, TLO)) { + DCI.CommitTargetLoweringOpt(TLO); + } + + break; + } + } + return SDValue(); } //===----------------------------------------------------------------------===// @@ -1181,7 +1507,7 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; + default: return nullptr; // AMDIL DAG nodes NODE_NAME_CASE(CALL); NODE_NAME_CASE(UMUL); @@ -1202,6 +1528,10 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BFE_I32) NODE_NAME_CASE(BFI) NODE_NAME_CASE(BFM) + NODE_NAME_CASE(MUL_U24) + NODE_NAME_CASE(MUL_I24) + NODE_NAME_CASE(MAD_U24) + NODE_NAME_CASE(MAD_I24) NODE_NAME_CASE(URECIP) NODE_NAME_CASE(DOT4) NODE_NAME_CASE(EXPORT) @@ -1219,22 +1549,22 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { } } -static void computeMaskedBitsForMinMax(const SDValue Op0, - const SDValue Op1, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) { +static void computeKnownBitsForMinMax(const SDValue Op0, + const SDValue Op1, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) { APInt Op0Zero, Op0One; APInt Op1Zero, Op1One; - DAG.ComputeMaskedBits(Op0, Op0Zero, Op0One, Depth); - DAG.ComputeMaskedBits(Op1, Op1Zero, Op1One, Depth); + DAG.computeKnownBits(Op0, Op0Zero, Op0One, Depth); + DAG.computeKnownBits(Op1, Op1Zero, Op1One, Depth); KnownZero = Op0Zero & Op1Zero; KnownOne = Op0One & Op1One; } -void AMDGPUTargetLowering::computeMaskedBitsForTargetNode( +void AMDGPUTargetLowering::computeKnownBitsForTargetNode( const SDValue Op, APInt &KnownZero, APInt &KnownOne, @@ -1242,8 +1572,14 @@ void AMDGPUTargetLowering::computeMaskedBitsForTargetNode( unsigned Depth) const { KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything. + + APInt KnownZero2; + APInt KnownOne2; unsigned Opc = Op.getOpcode(); + switch (Opc) { + default: + break; case ISD::INTRINSIC_WO_CHAIN: { // FIXME: The intrinsic should just use the node. switch (cast(Op.getOperand(0))->getZExtValue()) { @@ -1251,8 +1587,8 @@ void AMDGPUTargetLowering::computeMaskedBitsForTargetNode( case AMDGPUIntrinsic::AMDGPU_umax: case AMDGPUIntrinsic::AMDGPU_imin: case AMDGPUIntrinsic::AMDGPU_umin: - computeMaskedBitsForMinMax(Op.getOperand(1), Op.getOperand(2), - KnownZero, KnownOne, DAG, Depth); + computeKnownBitsForMinMax(Op.getOperand(1), Op.getOperand(2), + KnownZero, KnownOne, DAG, Depth); break; default: break; @@ -1264,10 +1600,62 @@ void AMDGPUTargetLowering::computeMaskedBitsForTargetNode( case AMDGPUISD::UMAX: case AMDGPUISD::SMIN: case AMDGPUISD::UMIN: - computeMaskedBitsForMinMax(Op.getOperand(0), Op.getOperand(1), - KnownZero, KnownOne, DAG, Depth); + computeKnownBitsForMinMax(Op.getOperand(0), Op.getOperand(1), + KnownZero, KnownOne, DAG, Depth); break; - default: + + case AMDGPUISD::BFE_I32: + case AMDGPUISD::BFE_U32: { + ConstantSDNode *CWidth = dyn_cast(Op.getOperand(2)); + if (!CWidth) + return; + + unsigned BitWidth = 32; + uint32_t Width = CWidth->getZExtValue() & 0x1f; + if (Width == 0) { + KnownZero = APInt::getAllOnesValue(BitWidth); + KnownOne = APInt::getNullValue(BitWidth); + return; + } + + // FIXME: This could do a lot more. If offset is 0, should be the same as + // sign_extend_inreg implementation, but that involves duplicating it. + if (Opc == AMDGPUISD::BFE_I32) + KnownOne = APInt::getHighBitsSet(BitWidth, BitWidth - Width); + else + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - Width); + break; } + } +} + +unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode( + SDValue Op, + const SelectionDAG &DAG, + unsigned Depth) const { + switch (Op.getOpcode()) { + case AMDGPUISD::BFE_I32: { + ConstantSDNode *Width = dyn_cast(Op.getOperand(2)); + if (!Width) + return 1; + + unsigned SignBits = 32 - Width->getZExtValue() + 1; + ConstantSDNode *Offset = dyn_cast(Op.getOperand(1)); + if (!Offset || !Offset->isNullValue()) + return SignBits; + + // TODO: Could probably figure something out with non-0 offsets. + unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); + return std::max(SignBits, Op0SignBits); + } + + case AMDGPUISD::BFE_U32: { + ConstantSDNode *Width = dyn_cast(Op.getOperand(2)); + return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1; + } + + default: + return 1; + } } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index a019616..d5d821d 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -29,9 +29,6 @@ protected: const AMDGPUSubtarget *Subtarget; private: - void ExtractVectorElements(SDValue Op, SelectionDAG &DAG, - SmallVectorImpl &Args, - unsigned Start, unsigned Count) const; SDValue LowerConstantInitializer(const Constant* Init, const GlobalValue *GV, const SDValue &InitPtr, SDValue Chain, @@ -44,7 +41,7 @@ private: /// of the same bitwidth. SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const; /// \brief Split a vector store into multiple scalar stores. - /// \returns The resulting chain. + /// \returns The resulting chain. SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; @@ -83,62 +80,67 @@ protected: public: AMDGPUTargetLowering(TargetMachine &TM); - virtual bool isFAbsFree(EVT VT) const override; - virtual bool isFNegFree(EVT VT) const override; - virtual bool isTruncateFree(EVT Src, EVT Dest) const override; - virtual bool isTruncateFree(Type *Src, Type *Dest) const override; - - virtual bool isZExtFree(Type *Src, Type *Dest) const override; - virtual bool isZExtFree(EVT Src, EVT Dest) const override; - - virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; - - virtual MVT getVectorIdxTy() const override; - virtual bool isLoadBitCastBeneficial(EVT, EVT) const override; - virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - SDLoc DL, SelectionDAG &DAG) const; - virtual SDValue LowerCall(CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const { - CLI.Callee.dump(); - llvm_unreachable("Undefined function"); - } + bool isFAbsFree(EVT VT) const override; + bool isFNegFree(EVT VT) const override; + bool isTruncateFree(EVT Src, EVT Dest) const override; + bool isTruncateFree(Type *Src, Type *Dest) const override; + + bool isZExtFree(Type *Src, Type *Dest) const override; + bool isZExtFree(EVT Src, EVT Dest) const override; + + bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; - virtual void ReplaceNodeResults(SDNode * N, - SmallVectorImpl &Results, - SelectionDAG &DAG) const override; + MVT getVectorIdxTy() const override; + bool isLoadBitCastBeneficial(EVT, EVT) const override; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + SDLoc DL, SelectionDAG &DAG) const override; + SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; + + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + void ReplaceNodeResults(SDNode * N, + SmallVectorImpl &Results, + SelectionDAG &DAG) const override; SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const; - virtual const char* getTargetNodeName(unsigned Opcode) const; + SDValue CombineMinMax(SDNode *N, SelectionDAG &DAG) const; + const char* getTargetNodeName(unsigned Opcode) const override; - virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const { + virtual SDNode *PostISelFolding(MachineSDNode *N, + SelectionDAG &DAG) const { return N; } /// \brief Determine which of the bits specified in \p Mask are known to be /// either zero or one and return them in the \p KnownZero and \p KnownOne /// bitsets. - virtual void computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth = 0) const override; + void computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth = 0) const override; + + virtual unsigned ComputeNumSignBitsForTargetNode( + SDValue Op, + const SelectionDAG &DAG, + unsigned Depth = 0) const override; // Functions defined in AMDILISelLowering.cpp public: - virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, unsigned Intrinsic) const; + bool getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, unsigned Intrinsic) const override; /// We want to mark f32/f64 floating point values as legal. - bool isFPImmLegal(const APFloat &Imm, EVT VT) const; + bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; /// We don't want to shrink f64/f32 constants. - bool ShouldShrinkFPConstant(EVT VT) const; + bool ShouldShrinkFPConstant(EVT VT) const override; + + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; private: void InitAMDILLowering(); @@ -158,7 +160,6 @@ private: SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; }; namespace AMDGPUISD { @@ -188,6 +189,10 @@ enum { BFE_I32, // Extract range of bits with sign extension to 32-bits. BFI, // (src0 & src1) | (~src0 & src2) BFM, // Insert a range of bits into a 32-bit word. + MUL_U24, + MUL_I24, + MAD_U24, + MAD_I24, TEXTURE_FETCH, EXPORT, CONST_ADDRESS, diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp index e32dd9f..1c3361a 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.cpp +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -20,14 +20,13 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +using namespace llvm; + #define GET_INSTRINFO_CTOR_DTOR #define GET_INSTRINFO_NAMED_OPS #define GET_INSTRMAP_INFO #include "AMDGPUGenInstrInfo.inc" -using namespace llvm; - - // Pin the vtable to this file. void AMDGPUInstrInfo::anchor() {} @@ -85,7 +84,7 @@ AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const { // TODO: Implement this function - return NULL; + return nullptr; } bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter, MachineBasicBlock &MBB) const { @@ -176,7 +175,7 @@ AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, const SmallVectorImpl &Ops, int FrameIndex) const { // TODO: Implement this function - return 0; + return nullptr; } MachineInstr* AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, @@ -184,7 +183,7 @@ AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, const SmallVectorImpl &Ops, MachineInstr *LoadMI) const { // TODO: Implement this function - return 0; + return nullptr; } bool AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, @@ -356,3 +355,14 @@ int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const { case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3); } } + +// Wrapper for Tablegen'd function. enum Subtarget is not defined in any +// header files, so we need to wrap it in a function that takes unsigned +// instead. +namespace llvm { +namespace AMDGPU { +int getMCOpcode(uint16_t Opcode, unsigned Gen) { + return getMCOpcode(Opcode); +} +} +} diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h index 426910c..74baf6b 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.h +++ b/lib/Target/R600/AMDGPUInstrInfo.h @@ -52,14 +52,15 @@ public: virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0; bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, - unsigned &DstReg, unsigned &SubIdx) const; + unsigned &DstReg, unsigned &SubIdx) const override; - unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, - int &FrameIndex) const; + int &FrameIndex) const override; bool hasLoadFromStackSlot(const MachineInstr *MI, const MachineMemOperand *&MMO, - int &FrameIndex) const; + int &FrameIndex) const override; unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const; @@ -70,7 +71,7 @@ public: MachineInstr * convertToThreeAddress(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI, - LiveVariables *LV) const; + LiveVariables *LV) const override; virtual void copyPhysReg(MachineBasicBlock &MBB, @@ -78,61 +79,62 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const = 0; - virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override; - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; protected: MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl &Ops, - int FrameIndex) const; + int FrameIndex) const override; MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl &Ops, - MachineInstr *LoadMI) const; + MachineInstr *LoadMI) const override; /// \returns the smallest register index that will be accessed by an indirect /// read or write or -1 if indirect addressing is not used by this program. - virtual int getIndirectIndexBegin(const MachineFunction &MF) const; + int getIndirectIndexBegin(const MachineFunction &MF) const; /// \returns the largest register index that will be accessed by an indirect /// read or write or -1 if indirect addressing is not used by this program. - virtual int getIndirectIndexEnd(const MachineFunction &MF) const; + int getIndirectIndexEnd(const MachineFunction &MF) const; public: bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl &Ops) const; + const SmallVectorImpl &Ops) const override; bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, - unsigned Reg, bool UnfoldLoad, bool UnfoldStore, - SmallVectorImpl &NewMIs) const; + unsigned Reg, bool UnfoldLoad, bool UnfoldStore, + SmallVectorImpl &NewMIs) const override; bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, - SmallVectorImpl &NewNodes) const; + SmallVectorImpl &NewNodes) const override; unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, - bool UnfoldLoad, bool UnfoldStore, - unsigned *LoadRegIndex = 0) const; + bool UnfoldLoad, bool UnfoldStore, + unsigned *LoadRegIndex = nullptr) const override; bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, int64_t Offset1, int64_t Offset2, - unsigned NumLoads) const; + unsigned NumLoads) const override; - bool ReverseBranchCondition(SmallVectorImpl &Cond) const; + bool + ReverseBranchCondition(SmallVectorImpl &Cond) const override; void insertNoop(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const; - bool isPredicated(const MachineInstr *MI) const; + MachineBasicBlock::iterator MI) const override; + bool isPredicated(const MachineInstr *MI) const override; bool SubsumesPredicate(const SmallVectorImpl &Pred1, - const SmallVectorImpl &Pred2) const; + const SmallVectorImpl &Pred2) const override; bool DefinesPredicate(MachineInstr *MI, - std::vector &Pred) const; - bool isPredicable(MachineInstr *MI) const; - bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; + std::vector &Pred) const override; + bool isPredicable(MachineInstr *MI) const override; + bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override; // Helper functions that check the opcode for status information bool isLoadInst(llvm::MachineInstr *MI) const; @@ -186,8 +188,7 @@ public: /// \brief Convert the AMDIL MachineInstr to a supported ISA /// MachineInstr - virtual void convertToISA(MachineInstr & MI, MachineFunction &MF, - DebugLoc DL) const; + void convertToISA(MachineInstr & MI, MachineFunction &MF, DebugLoc DL) const; /// \brief Build a MOV instruction. virtual MachineInstr *buildMovInstr(MachineBasicBlock *MBB, diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index 69d8059..f96dbb4 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -92,3 +92,18 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>; def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>; def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; +// Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when +// performing the mulitply. The result is a 32-bit value. +def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp, + [SDNPCommutative] +>; +def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp, + [SDNPCommutative] +>; + +def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp, + [] +>; +def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp, + [] +>; diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 505fc81..80bdf5b 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -37,6 +37,18 @@ class AMDGPUShaderInst pattern> def InstFlag : OperandWithDefaultOps ; def ADDRIndirect : ComplexPattern; +def u32imm : Operand { + let PrintMethod = "printU32ImmOperand"; +} + +def u16imm : Operand { + let PrintMethod = "printU16ImmOperand"; +} + +def u8imm : Operand { + let PrintMethod = "printU8ImmOperand"; +} + //===----------------------------------------------------------------------===// // PatLeafs for floating-point comparisons //===----------------------------------------------------------------------===// @@ -253,9 +265,6 @@ def FP_ONE : PatLeaf < [{return N->isExactlyValue(1.0);}] >; -def U24 : ComplexPattern; -def I24 : ComplexPattern; - let isCodeGenOnly = 1, isPseudo = 1 in { let usesCustomInserter = 1 in { @@ -414,6 +423,40 @@ class UMUL24Pattern : Pat < >; */ +class IMad24Pat : Pat < + (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), + (Inst $src0, $src1, $src2) +>; + +class UMad24Pat : Pat < + (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), + (Inst $src0, $src1, $src2) +>; + +multiclass Expand24IBitOps { + def _expand_imad24 : Pat < + (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2), + (AddInst (MulInst $src0, $src1), $src2) + >; + + def _expand_imul24 : Pat < + (AMDGPUmul_i24 i32:$src0, i32:$src1), + (MulInst $src0, $src1) + >; +} + +multiclass Expand24UBitOps { + def _expand_umad24 : Pat < + (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2), + (AddInst (MulInst $src0, $src1), $src2) + >; + + def _expand_umul24 : Pat < + (AMDGPUmul_u24 i32:$src0, i32:$src1), + (MulInst $src0, $src1) + >; +} + include "R600Instructions.td" include "R700Instructions.td" include "EvergreenInstructions.td" diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td index c6521d0..9ad5e72 100644 --- a/lib/Target/R600/AMDGPUIntrinsics.td +++ b/lib/Target/R600/AMDGPUIntrinsics.td @@ -49,6 +49,10 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_umul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_imul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_imad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_umad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_AMDGPU_bfi : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp index 2c9909f..b759495 100644 --- a/lib/Target/R600/AMDGPUMCInstLower.cpp +++ b/lib/Target/R600/AMDGPUMCInstLower.cpp @@ -17,6 +17,7 @@ #include "AMDGPUAsmPrinter.h" #include "InstPrinter/AMDGPUInstPrinter.h" #include "R600InstrInfo.h" +#include "SIInstrInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/Constants.h" @@ -31,16 +32,30 @@ using namespace llvm; -AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx): - Ctx(ctx) +AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st): + Ctx(ctx), ST(st) { } +enum AMDGPUMCInstLower::SISubtarget +AMDGPUMCInstLower::AMDGPUSubtargetToSISubtarget(unsigned) const { + return AMDGPUMCInstLower::SI; +} + +unsigned AMDGPUMCInstLower::getMCOpcode(unsigned MIOpcode) const { + + int MCOpcode = AMDGPU::getMCOpcode(MIOpcode, + AMDGPUSubtargetToSISubtarget(ST.getGeneration())); + if (MCOpcode == -1) + MCOpcode = MIOpcode; + + return MCOpcode; +} + void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { - OutMI.setOpcode(MI->getOpcode()); - for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + OutMI.setOpcode(getMCOpcode(MI->getOpcode())); + for (const MachineOperand &MO : MI->explicit_operands()) { MCOperand MCOp; switch (MO.getType()) { default: @@ -67,7 +82,8 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { } void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) { - AMDGPUMCInstLower MCInstLowering(OutContext); + AMDGPUMCInstLower MCInstLowering(OutContext, + MF->getTarget().getSubtarget()); #ifdef _DEBUG StringRef Err; diff --git a/lib/Target/R600/AMDGPUMCInstLower.h b/lib/Target/R600/AMDGPUMCInstLower.h index d7d538e..2b7f1e3 100644 --- a/lib/Target/R600/AMDGPUMCInstLower.h +++ b/lib/Target/R600/AMDGPUMCInstLower.h @@ -13,16 +13,30 @@ namespace llvm { +class AMDGPUSubtarget; class MCInst; class MCContext; class MachineInstr; class AMDGPUMCInstLower { + // This must be kept in sync with the SISubtarget class in SIInstrInfo.td + enum SISubtarget { + SI = 0 + }; + MCContext &Ctx; + const AMDGPUSubtarget &ST; + + /// Convert a member of the AMDGPUSubtarget::Generation enum to the + /// SISubtarget enum. + enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) const; + + /// Get the MC opcode for this MachineInstr. + unsigned getMCOpcode(unsigned MIOpcode) const; public: - AMDGPUMCInstLower(MCContext &ctx); + AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &ST); /// \brief Lower a MachineInstr to an MCInst void lower(const MachineInstr *MI, MCInst &OutMI) const; diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp index 8fbec4e..19927fa 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.cpp +++ b/lib/Target/R600/AMDGPURegisterInfo.cpp @@ -27,10 +27,10 @@ AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm) // they are not supported at this time. //===----------------------------------------------------------------------===// -const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister; +const MCPhysReg AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister; -const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) - const { +const MCPhysReg* +AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return &CalleeSavedReg; } @@ -54,7 +54,7 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const { AMDGPU::sub15 }; - assert (Channel < array_lengthof(SubRegs)); + assert(Channel < array_lengthof(SubRegs)); return SubRegs[Channel]; } diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h index 688e1a0..a7cba0d 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.h +++ b/lib/Target/R600/AMDGPURegisterInfo.h @@ -30,11 +30,11 @@ class TargetInstrInfo; struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { TargetMachine &TM; - static const uint16_t CalleeSavedReg; + static const MCPhysReg CalleeSavedReg; AMDGPURegisterInfo(TargetMachine &tm); - virtual BitVector getReservedRegs(const MachineFunction &MF) const { + BitVector getReservedRegs(const MachineFunction &MF) const override { assert(!"Unimplemented"); return BitVector(); } @@ -43,11 +43,11 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { /// \returns The ISA reg class that is equivalent to \p RC. virtual const TargetRegisterClass * getISARegClass( const TargetRegisterClass * RC) const { - assert(!"Unimplemented"); return NULL; + assert(!"Unimplemented"); return nullptr; } virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const { - assert(!"Unimplemented"); return NULL; + assert(!"Unimplemented"); return nullptr; } virtual unsigned getHWRegIndex(unsigned Reg) const { @@ -58,11 +58,11 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0) unsigned getSubRegFromChannel(unsigned Channel) const; - const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const; + const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF) const override; void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS) const; - unsigned getFrameRegister(const MachineFunction &MF) const; + RegScavenger *RS) const override; + unsigned getFrameRegister(const MachineFunction &MF) const override; unsigned getIndirectSubReg(unsigned IndirectIndex) const; diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index e77ab5e..f3b9932 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -16,6 +16,8 @@ using namespace llvm; +#define DEBUG_TYPE "amdgpu-subtarget" + #define GET_SUBTARGETINFO_ENUM #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR @@ -28,9 +30,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) : // Default card StringRef GPU = CPU; Is64bit = false; - DefaultSize[0] = 64; - DefaultSize[1] = 1; - DefaultSize[2] = 1; HasVertexCache = false; TexVTXClauseSize = 0; Gen = AMDGPUSubtarget::R600; @@ -106,14 +105,6 @@ bool AMDGPUSubtarget::isTargetELF() const { return false; } -size_t -AMDGPUSubtarget::getDefaultSize(uint32_t dim) const { - if (dim > 2) { - return 1; - } else { - return DefaultSize[dim]; - } -} std::string AMDGPUSubtarget::getDeviceName() const { diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 8874d14..1b041d6 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -38,7 +38,6 @@ public: }; private: - size_t DefaultSize[3]; std::string DevName; bool Is64bit; bool Is32on64bit; @@ -60,7 +59,7 @@ public: AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS); const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } - virtual void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); bool is64bit() const; bool hasVertexCache() const; @@ -77,20 +76,28 @@ public: return hasBFE(); } + bool hasMulU24() const { + return (getGeneration() >= EVERGREEN); + } + + bool hasMulI24() const { + return (getGeneration() >= SOUTHERN_ISLANDS || + hasCaymanISA()); + } + bool IsIRStructurizerEnabled() const; bool isIfCvtEnabled() const; unsigned getWavefrontSize() const; unsigned getStackEntrySize() const; bool hasCFAluBug() const; - virtual bool enableMachineScheduler() const { + bool enableMachineScheduler() const override { return getGeneration() <= NORTHERN_ISLANDS; } // Helper functions to simplify if statements bool isTargetELF() const; std::string getDeviceName() const; - virtual size_t getDefaultSize(uint32_t dim) const; bool dumpCode() const { return DumpCode; } bool r600ALUEncoding() const { return R600ALUInst; } diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index b11fce3..174fdca 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -42,7 +42,7 @@ extern "C" void LLVMInitializeR600Target() { } static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { - return new ScheduleDAGMILive(C, new R600SchedStrategy()); + return new ScheduleDAGMILive(C, make_unique()); } static MachineSchedRegistry @@ -54,7 +54,7 @@ static std::string computeDataLayout(const AMDGPUSubtarget &ST) { if (ST.is64bit()) { // 32-bit private, local, and region pointers. 64-bit global and constant. - Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64"; + Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64"; } Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256" @@ -103,20 +103,20 @@ public: return getTM(); } - virtual ScheduleDAGInstrs * - createMachineScheduler(MachineSchedContext *C) const { + ScheduleDAGInstrs * + createMachineScheduler(MachineSchedContext *C) const override { const AMDGPUSubtarget &ST = TM->getSubtarget(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) return createR600MachineScheduler(C); - return 0; + return nullptr; } - virtual bool addPreISel(); - virtual bool addInstSelector(); - virtual bool addPreRegAlloc(); - virtual bool addPostRegAlloc(); - virtual bool addPreSched2(); - virtual bool addPreEmitPass(); + bool addPreISel() override; + bool addInstSelector() override; + bool addPreRegAlloc() override; + bool addPostRegAlloc() override; + bool addPreSched2() override; + bool addPreEmitPass() override; }; } // End of anonymous namespace @@ -154,6 +154,7 @@ AMDGPUPassConfig::addPreISel() { bool AMDGPUPassConfig::addInstSelector() { addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); + addPass(createSILowerI1CopiesPass()); return false; } diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h index f942614..1287e13 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.h +++ b/lib/Target/R600/AMDGPUTargetMachine.h @@ -20,7 +20,6 @@ #include "AMDGPUSubtarget.h" #include "AMDILIntrinsicInfo.h" #include "R600ISelLowering.h" -#include "llvm/ADT/OwningPtr.h" #include "llvm/IR/DataLayout.h" namespace llvm { @@ -31,8 +30,8 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { const DataLayout Layout; AMDGPUFrameLowering FrameLowering; AMDGPUIntrinsicInfo IntrinsicInfo; - OwningPtr InstrInfo; - OwningPtr TLInfo; + std::unique_ptr InstrInfo; + std::unique_ptr TLInfo; const InstrItineraryData *InstrItins; public: @@ -40,30 +39,32 @@ public: StringRef CPU, TargetOptions Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); ~AMDGPUTargetMachine(); - virtual const AMDGPUFrameLowering *getFrameLowering() const { + const AMDGPUFrameLowering *getFrameLowering() const override { return &FrameLowering; } - virtual const AMDGPUIntrinsicInfo *getIntrinsicInfo() const { + const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override { return &IntrinsicInfo; } - virtual const AMDGPUInstrInfo *getInstrInfo() const { + const AMDGPUInstrInfo *getInstrInfo() const override { return InstrInfo.get(); } - virtual const AMDGPUSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual const AMDGPURegisterInfo *getRegisterInfo() const { + const AMDGPUSubtarget *getSubtargetImpl() const override { + return &Subtarget; + } + const AMDGPURegisterInfo *getRegisterInfo() const override { return &InstrInfo->getRegisterInfo(); } - virtual AMDGPUTargetLowering *getTargetLowering() const { + AMDGPUTargetLowering *getTargetLowering() const override { return TLInfo.get(); } - virtual const InstrItineraryData *getInstrItineraryData() const { + const InstrItineraryData *getInstrItineraryData() const override { return InstrItins; } - virtual const DataLayout *getDataLayout() const { return &Layout; } - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + const DataLayout *getDataLayout() const override { return &Layout; } + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; /// \brief Register R600 analysis passes with a pass manager. - virtual void addAnalysisPasses(PassManagerBase &PM); + void addAnalysisPasses(PassManagerBase &PM) override; }; } // End namespace llvm diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp index 51225eb..ea78f43 100644 --- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "AMDGPUtti" #include "AMDGPU.h" #include "AMDGPUTargetMachine.h" #include "llvm/Analysis/LoopInfo.h" @@ -26,6 +25,8 @@ #include "llvm/Target/TargetLowering.h" using namespace llvm; +#define DEBUG_TYPE "AMDGPUtti" + // Declare the pass initialization routine locally as target-specific passes // don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. @@ -45,7 +46,7 @@ class AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo { unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; public: - AMDGPUTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { + AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) { llvm_unreachable("This pass cannot be directly constructed"); } @@ -55,9 +56,9 @@ public: initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry()); } - virtual void initializePass() override { pushTTIStack(this); } + void initializePass() override { pushTTIStack(this); } - virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + void getAnalysisUsage(AnalysisUsage &AU) const override { TargetTransformInfo::getAnalysisUsage(AU); } @@ -65,15 +66,16 @@ public: static char ID; /// Provide necessary pointer adjustments for the two base classes. - virtual void *getAdjustedAnalysisPointer(const void *ID) override { + void *getAdjustedAnalysisPointer(const void *ID) override { if (ID == &TargetTransformInfo::ID) return (TargetTransformInfo *)this; return this; } - virtual bool hasBranchDivergence() const override; + bool hasBranchDivergence() const override; - virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; + void getUnrollingPreferences(Loop *L, + UnrollingPreferences &UP) const override; /// @} }; @@ -109,11 +111,11 @@ void AMDGPUTTI::getUnrollingPreferences(Loop *L, // require us to use indirect addressing, which is slow and prone to // compiler bugs. If this loop does an address calculation on an // alloca ptr, then we want to use a higher than normal loop unroll - // threshold. This will give SROA a better chance to eliminate these - // allocas. - // - // Don't use the maximum allowed value here as it will make some - // programs way too big. + // threshold. This will give SROA a better chance to eliminate these + // allocas. + // + // Don't use the maximum allowed value here as it will make some + // programs way too big. UP.Threshold = 500; } } diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp index 21ca560..f3a0391 100644 --- a/lib/Target/R600/AMDILCFGStructurizer.cpp +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp @@ -8,8 +8,6 @@ /// \file //==-----------------------------------------------------------------------===// -#define DEBUG_TYPE "structcfg" - #include "AMDGPU.h" #include "AMDGPUInstrInfo.h" #include "R600InstrInfo.h" @@ -34,6 +32,8 @@ using namespace llvm; +#define DEBUG_TYPE "structcfg" + #define DEFAULT_VEC_SLOTS 8 // TODO: move-begin. @@ -135,15 +135,15 @@ public: static char ID; AMDGPUCFGStructurizer() : - MachineFunctionPass(ID), TII(NULL), TRI(NULL) { + MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) { initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry()); } - const char *getPassName() const { + const char *getPassName() const override { return "AMDGPU Control Flow Graph structurizer Pass"; } - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved(); AU.addRequired(); AU.addRequired(); @@ -159,7 +159,7 @@ public: /// sure all loops have an exit block bool prepare(); - bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { TII = static_cast(MF.getTarget().getInstrInfo()); TRI = &TII->getRegisterInfo(); DEBUG(MF.dump();); @@ -168,7 +168,7 @@ public: MLI = &getAnalysis(); DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); MDT = &getAnalysis(); - DEBUG(MDT->print(dbgs(), (const llvm::Module*)0);); + DEBUG(MDT->print(dbgs(), (const llvm::Module*)nullptr);); PDT = &getAnalysis(); DEBUG(PDT->print(dbgs());); prepare(); @@ -334,7 +334,7 @@ protected: MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I); void recordSccnum(MachineBasicBlock *MBB, int SCCNum); void retireBlock(MachineBasicBlock *MBB); - void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = NULL); + void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = nullptr); MachineBasicBlock *findNearestCommonPostDom(std::set&); /// This is work around solution for findNearestCommonDominator not avaiable @@ -361,7 +361,7 @@ MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep) const { LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep); if (It == LLInfoMap.end()) - return NULL; + return nullptr; return (*It).second; } @@ -632,7 +632,7 @@ MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr( MachineInstr *MI = &*It; if (MI && (isCondBranch(MI) || isUncondBranch(MI))) return MI; - return NULL; + return nullptr; } MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr( @@ -648,7 +648,7 @@ MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr( break; } } - return NULL; + return nullptr; } MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) { @@ -658,7 +658,7 @@ MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) { if (instr->getOpcode() == AMDGPU::RETURN) return instr; } - return NULL; + return nullptr; } MachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) { @@ -668,7 +668,7 @@ MachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) { if (MI->getOpcode() == AMDGPU::CONTINUE) return MI; } - return NULL; + return nullptr; } bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { @@ -819,7 +819,7 @@ bool AMDGPUCFGStructurizer::run() { SmallVectorImpl::const_iterator SccBeginIter = It; - MachineBasicBlock *SccBeginMBB = NULL; + MachineBasicBlock *SccBeginMBB = nullptr; int SccNumBlk = 0; // The number of active blocks, init to a // maximum possible number. int SccNumIter; // Number of iteration in this SCC. @@ -874,7 +874,7 @@ bool AMDGPUCFGStructurizer::run() { } if (ContNextScc) - SccBeginMBB = NULL; + SccBeginMBB = nullptr; } //while, "one iteration" over the function. MachineBasicBlock *EntryMBB = @@ -933,7 +933,7 @@ void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) { MachineBasicBlock *MBB; for (scc_iterator It = scc_begin(MF); !It.isAtEnd(); ++It, ++SccNum) { - std::vector &SccNext = *It; + const std::vector &SccNext = *It; for (std::vector::const_iterator blockIter = SccNext.begin(), blockEnd = SccNext.end(); blockIter != blockEnd; ++blockIter) { @@ -1026,7 +1026,7 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) { // Triangle pattern, false is empty LandBlk = FalseMBB; - FalseMBB = NULL; + FalseMBB = nullptr; } else if (FalseMBB->succ_size() == 1 && *FalseMBB->succ_begin() == TrueMBB) { // Triangle pattern, true is empty @@ -1034,7 +1034,7 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { std::swap(TrueMBB, FalseMBB); reversePredicateSetter(MBB->end()); LandBlk = FalseMBB; - FalseMBB = NULL; + FalseMBB = nullptr; } else if (FalseMBB->succ_size() == 1 && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) { LandBlk = *FalseMBB->succ_begin(); @@ -1075,13 +1075,11 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { int AMDGPUCFGStructurizer::loopendPatternMatch() { std::vector NestedLoops; - for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end(); - It != E; ++It) { - df_iterator LpIt = df_begin(*It), - LpE = df_end(*It); - for (; LpIt != LpE; ++LpIt) - NestedLoops.push_back(*LpIt); - } + for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end(); It != E; + ++It) + for (MachineLoop *ML : depth_first(*It)) + NestedLoops.push_back(ML); + if (NestedLoops.size() == 0) return 0; @@ -1244,7 +1242,7 @@ int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, DEBUG( dbgs() << " not working\n"; ); - DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : NULL; + DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr; } // walk down the postDomTree return Num; @@ -1723,11 +1721,11 @@ AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); if (!LoopHeader || !LoopLatch) - return NULL; + return nullptr; MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch); // Is LoopRep an infinite loop ? if (!BranchMI || !isUncondBranch(BranchMI)) - return NULL; + return nullptr; MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); FuncRep->push_back(DummyExitBlk); //insert to function @@ -1860,7 +1858,7 @@ AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1, return findNearestCommonPostDom(MBB1, *MBB2->succ_begin()); if (!Node1 || !Node2) - return NULL; + return nullptr; Node1 = Node1->getIDom(); while (Node1) { @@ -1869,7 +1867,7 @@ AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1, Node1 = Node1->getIDom(); } - return NULL; + return nullptr; } MachineBasicBlock * diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp index 0761ff4..7cea803 100644 --- a/lib/Target/R600/AMDILISelLowering.cpp +++ b/lib/Target/R600/AMDILISelLowering.cpp @@ -39,61 +39,55 @@ using namespace llvm; // TargetLowering Class Implementation Begins //===----------------------------------------------------------------------===// void AMDGPUTargetLowering::InitAMDILLowering() { - static const int types[] = { - (int)MVT::i8, - (int)MVT::i16, - (int)MVT::i32, - (int)MVT::f32, - (int)MVT::f64, - (int)MVT::i64, - (int)MVT::v2i8, - (int)MVT::v4i8, - (int)MVT::v2i16, - (int)MVT::v4i16, - (int)MVT::v4f32, - (int)MVT::v4i32, - (int)MVT::v2f32, - (int)MVT::v2i32, - (int)MVT::v2f64, - (int)MVT::v2i64 + static const MVT::SimpleValueType types[] = { + MVT::i8, + MVT::i16, + MVT::i32, + MVT::f32, + MVT::f64, + MVT::i64, + MVT::v2i8, + MVT::v4i8, + MVT::v2i16, + MVT::v4i16, + MVT::v4f32, + MVT::v4i32, + MVT::v2f32, + MVT::v2i32, + MVT::v2f64, + MVT::v2i64 }; - static const int IntTypes[] = { - (int)MVT::i8, - (int)MVT::i16, - (int)MVT::i32, - (int)MVT::i64 + static const MVT::SimpleValueType IntTypes[] = { + MVT::i8, + MVT::i16, + MVT::i32, + MVT::i64 }; - static const int FloatTypes[] = { - (int)MVT::f32, - (int)MVT::f64 + static const MVT::SimpleValueType FloatTypes[] = { + MVT::f32, + MVT::f64 }; - static const int VectorTypes[] = { - (int)MVT::v2i8, - (int)MVT::v4i8, - (int)MVT::v2i16, - (int)MVT::v4i16, - (int)MVT::v4f32, - (int)MVT::v4i32, - (int)MVT::v2f32, - (int)MVT::v2i32, - (int)MVT::v2f64, - (int)MVT::v2i64 + static const MVT::SimpleValueType VectorTypes[] = { + MVT::v2i8, + MVT::v4i8, + MVT::v2i16, + MVT::v4i16, + MVT::v4f32, + MVT::v4i32, + MVT::v2f32, + MVT::v2i32, + MVT::v2f64, + MVT::v2i64 }; - const size_t NumTypes = array_lengthof(types); - const size_t NumFloatTypes = array_lengthof(FloatTypes); - const size_t NumIntTypes = array_lengthof(IntTypes); - const size_t NumVectorTypes = array_lengthof(VectorTypes); const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget(); // These are the current register classes that are // supported - for (unsigned int x = 0; x < NumTypes; ++x) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; - + for (MVT VT : types) { setOperationAction(ISD::SUBE, VT, Expand); setOperationAction(ISD::SUBC, VT, Expand); setOperationAction(ISD::ADDE, VT, Expand); @@ -109,9 +103,7 @@ void AMDGPUTargetLowering::InitAMDILLowering() { setOperationAction(ISD::SDIV, VT, Custom); } } - for (unsigned int x = 0; x < NumFloatTypes; ++x) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; - + for (MVT VT : FloatTypes) { // IL does not have these operations for floating point types setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); setOperationAction(ISD::SETOLT, VT, Expand); @@ -124,9 +116,7 @@ void AMDGPUTargetLowering::InitAMDILLowering() { setOperationAction(ISD::SETULE, VT, Expand); } - for (unsigned int x = 0; x < NumIntTypes; ++x) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; - + for (MVT VT : IntTypes) { // GPU also does not have divrem function for signed or unsigned setOperationAction(ISD::SDIVREM, VT, Expand); @@ -142,9 +132,7 @@ void AMDGPUTargetLowering::InitAMDILLowering() { setOperationAction(ISD::CTLZ, VT, Expand); } - for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; - + for (MVT VT : VectorTypes) { setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); diff --git a/lib/Target/R600/AMDILIntrinsicInfo.cpp b/lib/Target/R600/AMDILIntrinsicInfo.cpp index 762ee39..fab4a3b 100644 --- a/lib/Target/R600/AMDILIntrinsicInfo.cpp +++ b/lib/Target/R600/AMDILIntrinsicInfo.cpp @@ -38,7 +38,7 @@ AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys, }; if (IntrID < Intrinsic::num_intrinsics) { - return 0; + return nullptr; } assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics && "Invalid intrinsic ID"); diff --git a/lib/Target/R600/AMDILIntrinsicInfo.h b/lib/Target/R600/AMDILIntrinsicInfo.h index 35559e2..924275a 100644 --- a/lib/Target/R600/AMDILIntrinsicInfo.h +++ b/lib/Target/R600/AMDILIntrinsicInfo.h @@ -34,13 +34,13 @@ enum ID { class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo { public: AMDGPUIntrinsicInfo(TargetMachine *tm); - std::string getName(unsigned int IntrId, Type **Tys = 0, - unsigned int numTys = 0) const; - unsigned int lookupName(const char *Name, unsigned int Len) const; - bool isOverloaded(unsigned int IID) const; + std::string getName(unsigned int IntrId, Type **Tys = nullptr, + unsigned int numTys = 0) const override; + unsigned int lookupName(const char *Name, unsigned int Len) const override; + bool isOverloaded(unsigned int IID) const override; Function *getDeclaration(Module *M, unsigned int ID, - Type **Tys = 0, - unsigned int numTys = 0) const; + Type **Tys = nullptr, + unsigned int numTys = 0) const override; }; } // end namespace llvm diff --git a/lib/Target/R600/AMDILIntrinsics.td b/lib/Target/R600/AMDILIntrinsics.td index 658deb5..4a3e02e 100644 --- a/lib/Target/R600/AMDILIntrinsics.td +++ b/lib/Target/R600/AMDILIntrinsics.td @@ -92,10 +92,6 @@ let TargetPrefix = "AMDIL", isTarget = 1 in { BinaryIntInt; def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">, BinaryIntInt; - def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">, - BinaryIntInt; - def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">, - BinaryIntInt; def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">, BinaryIntInt; def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">, diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index 93a5117..3c6fa5a 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -45,6 +45,7 @@ add_llvm_target(R600CodeGen SIInstrInfo.cpp SIISelLowering.cpp SILowerControlFlow.cpp + SILowerI1Copies.cpp SIMachineFunctionInfo.cpp SIRegisterInfo.cpp SITypeRewriter.cpp diff --git a/lib/Target/R600/CaymanInstructions.td b/lib/Target/R600/CaymanInstructions.td index acd7bde..2630345 100644 --- a/lib/Target/R600/CaymanInstructions.td +++ b/lib/Target/R600/CaymanInstructions.td @@ -21,12 +21,14 @@ def isCayman : Predicate<"Subtarget.hasCaymanISA()">; let Predicates = [isCayman] in { def MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24", - [(set i32:$dst, (add (mul I24:$src0, I24:$src1), i32:$src2))], VecALU + [(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))], VecALU >; def MUL_INT24_cm : R600_2OP <0x5B, "MUL_INT24", - [(set i32:$dst, (mul I24:$src0, I24:$src1))], VecALU + [(set i32:$dst, (AMDGPUmul_i24 i32:$src0, i32:$src1))], VecALU >; +def : IMad24Pat; + let isVector = 1 in { def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>; @@ -47,6 +49,7 @@ def COS_cm : COS_Common<0x8E>; def : POW_Common ; defm DIV_cm : DIV_Common; +defm : Expand24UBitOps; // RECIP_UINT emulation for Cayman // The multiplication scales from [0,1] to the unsigned integer range diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td index 6430ca6..2065441 100644 --- a/lib/Target/R600/EvergreenInstructions.td +++ b/lib/Target/R600/EvergreenInstructions.td @@ -75,6 +75,8 @@ def COS_eg : COS_Common<0x8E>; def : POW_Common ; def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>; +defm : Expand24IBitOps; + //===----------------------------------------------------------------------===// // Memory read/write instructions //===----------------------------------------------------------------------===// @@ -273,7 +275,7 @@ def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", VecALU >; -def BFE_INT_eg : R600_3OP <0x4, "BFE_INT", +def BFE_INT_eg : R600_3OP <0x5, "BFE_INT", [(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))], VecALU >; @@ -286,6 +288,13 @@ def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", VecALU >; +def : Pat<(i32 (sext_inreg i32:$src, i1)), + (BFE_INT_eg i32:$src, (i32 ZERO), (i32 ONE_INT))>; +def : Pat<(i32 (sext_inreg i32:$src, i8)), + (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 8))>; +def : Pat<(i32 (sext_inreg i32:$src, i16)), + (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>; + defm : BFIPatterns ; def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT", @@ -294,8 +303,11 @@ def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT", >; def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24", - [(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))], VecALU + [(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))], VecALU >; + +def : UMad24Pat; + def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>; def : ROTRPattern ; def MULADD_eg : MULADD_Common<0x14>; @@ -309,7 +321,7 @@ def CNDGE_eg : CNDGE_Common<0x1B>; def MUL_LIT_eg : MUL_LIT_Common<0x1F>; def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>; def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24", - [(set i32:$dst, (mul U24:$src0, U24:$src1))], VecALU + [(set i32:$dst, (AMDGPUmul_u24 i32:$src0, i32:$src1))], VecALU >; def DOT4_eg : DOT4_Common<0xBE>; defm CUBE_eg : CUBE_Common<0xC0>; diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index 7105879..11ae091 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -12,6 +12,8 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/MathExtras.h" using namespace llvm; @@ -23,6 +25,21 @@ void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, printAnnotation(OS, Annot); } +void AMDGPUInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << formatHex(MI->getOperand(OpNo).getImm() & 0xff); +} + +void AMDGPUInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << formatHex(MI->getOperand(OpNo).getImm() & 0xffff); +} + +void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff); +} + void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) { switch (reg) { case AMDGPU::VCC: @@ -41,43 +58,78 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) { break; } - // It's seems there's no way to use SIRegisterInfo here, and dealing with the - // giant enum of all the different shifted sets of registers is pretty - // unmanagable, so parse the name and reformat it to be prettier. - StringRef Name(getRegisterName(reg)); - - std::pair Split = Name.split('_'); - StringRef SubRegName = Split.first; - StringRef Rest = Split.second; + char Type; + unsigned NumRegs; + + if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(reg)) { + Type = 'v'; + NumRegs = 1; + } else if (MRI.getRegClass(AMDGPU::SGPR_32RegClassID).contains(reg)) { + Type = 's'; + NumRegs = 1; + } else if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(reg)) { + Type = 'v'; + NumRegs = 2; + } else if (MRI.getRegClass(AMDGPU::SReg_64RegClassID).contains(reg)) { + Type = 's'; + NumRegs = 2; + } else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(reg)) { + Type = 'v'; + NumRegs = 4; + } else if (MRI.getRegClass(AMDGPU::SReg_128RegClassID).contains(reg)) { + Type = 's'; + NumRegs = 4; + } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(reg)) { + Type = 'v'; + NumRegs = 3; + } else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(reg)) { + Type = 'v'; + NumRegs = 8; + } else if (MRI.getRegClass(AMDGPU::SReg_256RegClassID).contains(reg)) { + Type = 's'; + NumRegs = 8; + } else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(reg)) { + Type = 'v'; + NumRegs = 16; + } else if (MRI.getRegClass(AMDGPU::SReg_512RegClassID).contains(reg)) { + Type = 's'; + NumRegs = 16; + } else { + O << getRegisterName(reg); + return; + } - if (SubRegName.size() <= 4) { // Must at least be as long as "SGPR"/"VGPR". - O << Name; + // The low 8 bits encoding value is the register index, for both VGPRs and + // SGPRs. + unsigned RegIdx = MRI.getEncodingValue(reg) & ((1 << 8) - 1); + if (NumRegs == 1) { + O << Type << RegIdx; return; } - unsigned RegIndex; - StringRef RegIndexStr = SubRegName.drop_front(4); + O << Type << '[' << RegIdx << ':' << (RegIdx + NumRegs - 1) << ']'; +} - if (RegIndexStr.getAsInteger(10, RegIndex)) { - O << Name; +void AMDGPUInstPrinter::printImmediate(uint32_t Imm, raw_ostream &O) { + int32_t SImm = static_cast(Imm); + if (SImm >= -16 && SImm <= 64) { + O << SImm; return; } - if (SubRegName.front() == 'V') - O << 'v'; - else if (SubRegName.front() == 'S') - O << 's'; - else { - O << Name; + if (Imm == FloatToBits(1.0f) || + Imm == FloatToBits(-1.0f) || + Imm == FloatToBits(0.5f) || + Imm == FloatToBits(-0.5f) || + Imm == FloatToBits(2.0f) || + Imm == FloatToBits(-2.0f) || + Imm == FloatToBits(4.0f) || + Imm == FloatToBits(-4.0f)) { + O << BitsToFloat(Imm); return; } - if (Rest.empty()) // Only 1 32-bit register - O << RegIndex; - else { - unsigned NumReg = Rest.count('_') + 2; - O << '[' << RegIndex << ':' << (RegIndex + NumReg - 1) << ']'; - } + O << formatHex(static_cast(Imm)); } void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, @@ -95,7 +147,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, break; } } else if (Op.isImm()) { - O << Op.getImm(); + printImmediate(Op.getImm(), O); } else if (Op.isFPImm()) { O << Op.getFPImm(); } else if (Op.isExpr()) { @@ -106,6 +158,18 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } +void AMDGPUInstPrinter::printOperandAndMods(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned InputModifiers = MI->getOperand(OpNo).getImm(); + if (InputModifiers & 0x1) + O << "-"; + if (InputModifiers & 0x2) + O << "|"; + printOperand(MI, OpNo + 1, O); + if (InputModifiers & 0x2) + O << "|"; +} + void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h index 1d24680..6ca7170 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h @@ -29,13 +29,18 @@ public: void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); - virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; private: - static void printRegOperand(unsigned RegNo, raw_ostream &O); - static void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU32ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printRegOperand(unsigned RegNo, raw_ostream &O); + void printImmediate(uint32_t Imm, raw_ostream &O); + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printOperandAndMods(const MCInst *MI, unsigned OpNo, raw_ostream &O); static void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O); - static void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O, StringRef Asm, StringRef Default = ""); static void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp index a6bb59f..489cec7 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -23,8 +23,8 @@ namespace { class AMDGPUMCObjectWriter : public MCObjectWriter { public: AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { } - virtual void ExecutePostLayoutBinding(MCAssembler &Asm, - const MCAsmLayout &Layout) { + void ExecutePostLayoutBinding(MCAssembler &Asm, + const MCAsmLayout &Layout) override { //XXX: Implement if necessary. } void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, @@ -34,7 +34,7 @@ public: assert(!"Not implemented"); } - virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout); + void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; }; @@ -43,19 +43,19 @@ public: AMDGPUAsmBackend(const Target &T) : MCAsmBackend() {} - virtual unsigned getNumFixupKinds() const { return 0; }; - virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel) const; - virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const { + unsigned getNumFixupKinds() const override { return 0; }; + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value, bool IsPCRel) const override; + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override { return false; } - virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const { + void relaxInstruction(const MCInst &Inst, MCInst &Res) const override { assert(!"Not implemented"); } - virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; } - virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { + bool mayNeedRelaxation(const MCInst &Inst) const override { return false; } + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override { return true; } }; @@ -88,7 +88,7 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend { public: ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { return createAMDGPUELFObjectWriter(OS); } }; diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp index aee9bd1..78bbe0a 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -35,7 +35,7 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() { Data16bitsDirective = ".short\t"; Data32bitsDirective = ".long\t"; Data64bitsDirective = ".quad\t"; - GPRel32Directive = 0; + GPRel32Directive = nullptr; SunStyleELFSectionSwitchSyntax = true; UsesELFSectionDirectiveForBSS = true; @@ -58,5 +58,5 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() { const MCSection* AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const { - return 0; + return nullptr; } diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h index 22afd63..59aebec 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h @@ -22,7 +22,7 @@ class StringRef; class AMDGPUMCAsmInfo : public MCAsmInfo { public: explicit AMDGPUMCAsmInfo(StringRef &TT); - const MCSection* getNonexecutableStackSection(MCContext &CTX) const; + const MCSection* getNonexecutableStackSection(MCContext &CTX) const override; }; } // namespace llvm #endif // AMDGPUMCASMINFO_H diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp index 6592b0e..38a2956 100644 --- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp +++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp @@ -24,6 +24,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + #define GET_INSTRINFO_MC_DESC #include "AMDGPUGenInstrInfo.inc" @@ -33,8 +35,6 @@ #define GET_REGINFO_MC_DESC #include "AMDGPUGenRegisterInfo.inc" -using namespace llvm; - static MCInstrInfo *createAMDGPUMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitAMDGPUMCInstrInfo(X); diff --git a/lib/Target/R600/MCTargetDesc/LLVMBuild.txt b/lib/Target/R600/MCTargetDesc/LLVMBuild.txt index b1beab0..74b8ca0 100644 --- a/lib/Target/R600/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/R600/MCTargetDesc/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./lib/Target/R600/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; +;===- ./lib/Target/R600/MCTargetDesc/LLVMBuild.txt -------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -19,5 +19,5 @@ type = Library name = R600Desc parent = R600 -required_libraries = R600AsmPrinter R600Info MC +required_libraries = MC R600AsmPrinter R600Info Support add_to_library_groups = R600 diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 286c7d1..5e7cefe 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -41,14 +41,14 @@ public: : MCII(mcii), MRI(mri) { } /// \brief Encode the instruction and write it to the OS. - virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + const MCSubtargetInfo &STI) const override; /// \returns the encoding for an MCOperand. - virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; private: void EmitByte(unsigned int byte, raw_ostream &OS) const; diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp index f42e978..ee02111 100644 --- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp @@ -54,14 +54,14 @@ public: ~SIMCCodeEmitter() { } /// \brief Encode the instruction and write it to the OS. - virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + const MCSubtargetInfo &STI) const override; /// \returns the encoding for an MCOperand. - virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; }; } // End anonymous namespace diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td index fde4481..ce17d7c 100644 --- a/lib/Target/R600/Processors.td +++ b/lib/Target/R600/Processors.td @@ -106,3 +106,5 @@ def : Proc<"kabini", SI_Itin, [FeatureSeaIslands]>; def : Proc<"kaveri", SI_Itin, [FeatureSeaIslands]>; def : Proc<"hawaii", SI_Itin, [FeatureSeaIslands]>; + +def : Proc<"mullins", SI_Itin, [FeatureSeaIslands]>; diff --git a/lib/Target/R600/R600ClauseMergePass.cpp b/lib/Target/R600/R600ClauseMergePass.cpp index 3d9015c..92bf0df 100644 --- a/lib/Target/R600/R600ClauseMergePass.cpp +++ b/lib/Target/R600/R600ClauseMergePass.cpp @@ -13,7 +13,6 @@ /// It needs to be called after IfCvt for best results. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "r600mergeclause" #include "AMDGPU.h" #include "R600Defines.h" #include "R600InstrInfo.h" @@ -27,6 +26,8 @@ using namespace llvm; +#define DEBUG_TYPE "r600mergeclause" + namespace { static bool isCFAlu(const MachineInstr *MI) { @@ -62,9 +63,9 @@ private: public: R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - const char *getPassName() const; + const char *getPassName() const override; }; char R600ClauseMergePass::ID = 0; diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index f74bef3..d255e96 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -12,7 +12,6 @@ /// computing their address on the fly ; it also sets STACK_SIZE info. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "r600cf" #include "llvm/Support/Debug.h" #include "AMDGPU.h" #include "R600Defines.h" @@ -26,6 +25,8 @@ using namespace llvm; +#define DEBUG_TYPE "r600cf" + namespace { struct CFStack { @@ -468,13 +469,13 @@ private: public: R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID), - TII (0), TRI(0), + TII (nullptr), TRI(nullptr), ST(tm.getSubtarget()) { const AMDGPUSubtarget &ST = tm.getSubtarget(); MaxFetchInst = ST.getTexVTXClauseSize(); } - virtual bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { TII=static_cast(MF.getTarget().getInstrInfo()); TRI=static_cast(MF.getTarget().getRegisterInfo()); R600MachineFunctionInfo *MFI = MF.getInfo(); @@ -501,13 +502,13 @@ public: DEBUG(dbgs() << CfCount << ":"; I->dump();); FetchClauses.push_back(MakeFetchClause(MBB, I)); CfCount++; - LastAlu.back() = 0; + LastAlu.back() = nullptr; continue; } MachineBasicBlock::iterator MI = I; if (MI->getOpcode() != AMDGPU::ENDIF) - LastAlu.back() = 0; + LastAlu.back() = nullptr; if (MI->getOpcode() == AMDGPU::CF_ALU) LastAlu.back() = MI; I++; @@ -558,7 +559,7 @@ public: break; } case AMDGPU::IF_PREDICATE_SET: { - LastAlu.push_back(0); + LastAlu.push_back(nullptr); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_JUMP)) .addImm(0) @@ -665,7 +666,7 @@ public: return false; } - const char *getPassName() const { + const char *getPassName() const override { return "R600 Control Flow Finalizer Pass"; } }; diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp index 5bd793a..38afebe 100644 --- a/lib/Target/R600/R600EmitClauseMarkers.cpp +++ b/lib/Target/R600/R600EmitClauseMarkers.cpp @@ -291,12 +291,12 @@ private: public: static char ID; - R600EmitClauseMarkers() : MachineFunctionPass(ID), TII(0), Address(0) { + R600EmitClauseMarkers() : MachineFunctionPass(ID), TII(nullptr), Address(0) { initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry()); } - virtual bool runOnMachineFunction(MachineFunction &MF) { + bool runOnMachineFunction(MachineFunction &MF) override { TII = static_cast(MF.getTarget().getInstrInfo()); for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); @@ -315,7 +315,7 @@ public: return false; } - const char *getPassName() const { + const char *getPassName() const override { return "R600 Emit Clause Markers Pass"; } }; diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp index ca1189d..732b06d 100644 --- a/lib/Target/R600/R600ExpandSpecialInstrs.cpp +++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp @@ -38,11 +38,11 @@ private: public: R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID), - TII(0) { } + TII(nullptr) { } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - const char *getPassName() const { + const char *getPassName() const override { return "R600 Expand special instructions pass"; } }; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 6405a82..d6c6830 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -82,9 +82,31 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::SELECT, MVT::i32, Expand); setOperationAction(ISD::SELECT, MVT::f32, Expand); setOperationAction(ISD::SELECT, MVT::v2i32, Expand); - setOperationAction(ISD::SELECT, MVT::v2f32, Expand); setOperationAction(ISD::SELECT, MVT::v4i32, Expand); - setOperationAction(ISD::SELECT, MVT::v4f32, Expand); + + // Expand sign extension of vectors + if (!Subtarget->hasBFE()) + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand); + + if (!Subtarget->hasBFE()) + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand); + + if (!Subtarget->hasBFE()) + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); + // Legalize loads and stores to the private address space. setOperationAction(ISD::LOAD, MVT::i32, Custom); @@ -117,6 +139,11 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); + // These should be replaced by UDVIREM, but it does not happen automatically + // during Type Legalization + setOperationAction(ISD::UDIV, MVT::i64, Custom); + setOperationAction(ISD::UREM, MVT::i64, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setBooleanContents(ZeroOrNegativeOneBooleanContent); @@ -538,8 +565,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(2, MVT::i32), // SWZ_Z DAG.getConstant(3, MVT::i32) // SWZ_W }; - return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), - Args, 8); + return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args); } // default for switch(IntrinsicID) @@ -689,7 +715,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const Op.getOperand(9), Op.getOperand(10) }; - return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19); + return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs); } case AMDGPUIntrinsic::AMDGPU_dp4: { SDValue Args[8] = { @@ -710,7 +736,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2), DAG.getConstant(3, MVT::i32)) }; - return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8); + return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args); } case Intrinsic::r600_read_ngroups_x: @@ -960,13 +986,6 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode); } - - // Possible Min/Max pattern - SDValue MinMax = LowerMinMax(Op, DAG); - if (MinMax.getNode()) { - return MinMax; - } - // If we make it this for it means we have no native instructions to handle // this SELECT_CC, so we must lower it. SDValue HWTrue, HWFalse; @@ -1088,10 +1107,10 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(0, MVT::i32), Mask }; - SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4); + SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src); SDValue Args[3] = { Chain, Input, DWordAddr }; return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL, - Op->getVTList(), Args, 3, MemVT, + Op->getVTList(), Args, MemVT, StoreNode->getMemOperand()); } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && Value.getValueType().bitsGE(MVT::i32)) { @@ -1131,7 +1150,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { if (ValueVT.isVector()) { unsigned NumElemVT = ValueVT.getVectorNumElements(); EVT ElemVT = ValueVT.getVectorElementType(); - SDValue Stores[4]; + SmallVector Stores(NumElemVT); assert(NumElemVT >= StackWidth && "Stack width cannot be greater than " "vector width in load"); @@ -1148,7 +1167,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { Chain, Elem, Ptr, DAG.getTargetConstant(Channel, MVT::i32)); } - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores); } else { if (ValueVT == MVT::i8) { Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value); @@ -1212,10 +1231,11 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG); if (Ret.getNode()) { - SDValue Ops[2]; - Ops[0] = Ret; - Ops[1] = Chain; - return DAG.getMergeValues(Ops, 2, DL); + SDValue Ops[2] = { + Ret, + Chain + }; + return DAG.getMergeValues(Ops, DL); } @@ -1224,7 +1244,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const SplitVectorLoad(Op, DAG), Chain }; - return DAG.getMergeValues(MergedValues, 2, DL); + return DAG.getMergeValues(MergedValues, DL); } int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); @@ -1232,8 +1252,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) || (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) { SDValue Result; - if (isa(LoadNode->getSrcValue()) || - isa(LoadNode->getSrcValue()) || + if (isa(LoadNode->getMemOperand()->getValue()) || + isa(LoadNode->getMemOperand()->getValue()) || isa(Ptr)) { SDValue Slots[4]; for (unsigned i = 0; i < 4; i++) { @@ -1252,7 +1272,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const NewVT = VT; NumElements = VT.getVectorNumElements(); } - Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements); + Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, + makeArrayRef(Slots, NumElements)); } else { // non-constant ptr can't be folded, keeps it as a v4f32 load Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32, @@ -1268,10 +1289,10 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const } SDValue MergedValues[2] = { - Result, - Chain + Result, + Chain }; - return DAG.getMergeValues(MergedValues, 2, DL); + return DAG.getMergeValues(MergedValues, DL); } // For most operations returning SDValue() will result in the node being @@ -1295,7 +1316,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount); SDValue MergedValues[2] = { Sra, Chain }; - return DAG.getMergeValues(MergedValues, 2, DL); + return DAG.getMergeValues(MergedValues, DL); } if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { @@ -1332,7 +1353,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const Loads[i] = DAG.getUNDEF(ElemVT); } EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4); - LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4); + LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads); } else { LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT, Chain, Ptr, @@ -1340,11 +1361,12 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const Op.getOperand(2)); } - SDValue Ops[2]; - Ops[0] = LoweredLoad; - Ops[1] = Chain; + SDValue Ops[2] = { + LoweredLoad, + Chain + }; - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } /// XXX Only kernel functions are supported, so we can assume for now that @@ -1365,8 +1387,7 @@ SDValue R600TargetLowering::LowerFormalArguments( SmallVector LocalIns; - getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins, - LocalIns); + getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns); AnalyzeFormalArguments(CCInfo, LocalIns); @@ -1392,32 +1413,38 @@ SDValue R600TargetLowering::LowerFormalArguments( // The first 36 bytes of the input buffer contains information about // thread group and global sizes. - SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain, + + // FIXME: This should really check the extload type, but the handling of + // extload vecto parameters seems to be broken. + //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + ISD::LoadExtType Ext = ISD::SEXTLOAD; + SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain, DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32), MachinePointerInfo(UndefValue::get(PtrTy)), MemVT, false, false, 4); - // 4 is the preferred alignment for - // the CONSTANT memory space. + + // 4 is the preferred alignment for the CONSTANT memory space. InVals.push_back(Arg); } return Chain; } EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { - if (!VT.isVector()) return MVT::i32; + if (!VT.isVector()) + return MVT::i32; return VT.changeVectorElementTypeToInteger(); } -static SDValue -CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, - DenseMap &RemapSwizzle) { +static SDValue CompactSwizzlableVector( + SelectionDAG &DAG, SDValue VectorEntry, + DenseMap &RemapSwizzle) { assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR); assert(RemapSwizzle.empty()); SDValue NewBldVec[4] = { - VectorEntry.getOperand(0), - VectorEntry.getOperand(1), - VectorEntry.getOperand(2), - VectorEntry.getOperand(3) + VectorEntry.getOperand(0), + VectorEntry.getOperand(1), + VectorEntry.getOperand(2), + VectorEntry.getOperand(3) }; for (unsigned i = 0; i < 4; i++) { @@ -1448,7 +1475,7 @@ CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry), - VectorEntry.getValueType(), NewBldVec, 4); + VectorEntry.getValueType(), NewBldVec); } static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, @@ -1486,7 +1513,7 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry), - VectorEntry.getValueType(), NewBldVec, 4); + VectorEntry.getValueType(), NewBldVec); } @@ -1524,6 +1551,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, SelectionDAG &DAG = DCI.DAG; switch (N->getOpcode()) { + default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a) case ISD::FP_ROUND: { SDValue Arg = N->getOperand(0); @@ -1613,8 +1641,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, } // Return the new vector - return DAG.getNode(ISD::BUILD_VECTOR, dl, - VT, &Ops[0], Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } // Extract_vec (Build_vector) generated by custom lowering @@ -1638,6 +1665,11 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, } case ISD::SELECT_CC: { + // Try common optimizations + SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI); + if (Ret.getNode()) + return Ret; + // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq -> // selectcc x, y, a, b, inv(cc) // @@ -1697,7 +1729,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, }; SDLoc DL(N); NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG); - return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8); + return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs); } case AMDGPUISD::TEXTURE_FETCH: { SDValue Arg = N->getOperand(1); @@ -1727,10 +1759,11 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, }; NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG); return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(), - NewArgs, 19); + NewArgs); } } - return SDValue(); + + return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); } static bool @@ -1779,8 +1812,7 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg, TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W) }; std::vector Consts; - for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) { - int OtherSrcIdx = SrcIndices[i]; + for (int OtherSrcIdx : SrcIndices) { int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx); if (OtherSrcIdx < 0 || OtherSelIdx < 0) continue; @@ -1791,14 +1823,14 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg, if (RegisterSDNode *Reg = dyn_cast(ParentNode->getOperand(OtherSrcIdx))) { if (Reg->getReg() == AMDGPU::ALU_CONST) { - ConstantSDNode *Cst = dyn_cast( - ParentNode->getOperand(OtherSelIdx)); + ConstantSDNode *Cst + = cast(ParentNode->getOperand(OtherSelIdx)); Consts.push_back(Cst->getZExtValue()); } } } - ConstantSDNode *Cst = dyn_cast(CstOffset); + ConstantSDNode *Cst = cast(CstOffset); Consts.push_back(Cst->getZExtValue()); if (!TII->fitsConstReadLimitations(Consts)) { return false; diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index 22ef728..a8a464f 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -24,21 +24,21 @@ class R600InstrInfo; class R600TargetLowering : public AMDGPUTargetLowering { public: R600TargetLowering(TargetMachine &TM); - virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock * BB) const; - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; - virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - virtual void ReplaceNodeResults(SDNode * N, - SmallVectorImpl &Results, - SelectionDAG &DAG) const override; - virtual SDValue LowerFormalArguments( - SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl &Ins, - SDLoc DL, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - virtual EVT getSetCCResultType(LLVMContext &, EVT VT) const; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock * BB) const override; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + void ReplaceNodeResults(SDNode * N, + SmallVectorImpl &Results, + SelectionDAG &DAG) const override; + SDValue LowerFormalArguments( + SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + SDLoc DL, SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; + EVT getSetCCResultType(LLVMContext &, EVT VT) const override; private: unsigned Gen; /// Each OpenCL kernel has nine implicit parameters that are stored in the @@ -66,7 +66,7 @@ private: void getStackAddress(unsigned StackWidth, unsigned ElemIdx, unsigned &Channel, unsigned &PtrIncr) const; bool isZero(SDValue Op) const; - virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const; + SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override; }; } // End namespace llvm; diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index 0281dd0..b0d9ae3 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -23,11 +23,11 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +using namespace llvm; + #define GET_INSTRINFO_CTOR_DTOR #include "AMDGPUGenDFAPacketizer.inc" -using namespace llvm; - R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) : AMDGPUInstrInfo(tm), RI(tm), @@ -677,7 +677,7 @@ findFirstPredicateSetterFrom(MachineBasicBlock &MBB, return MI; } - return NULL; + return nullptr; } static @@ -797,7 +797,7 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, DebugLoc DL) const { assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - if (FBB == 0) { + if (!FBB) { if (Cond.empty()) { BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB); return 1; diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index d5ff4de..b5304a0 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -50,13 +50,13 @@ namespace llvm { explicit R600InstrInfo(AMDGPUTargetMachine &tm); - const R600RegisterInfo &getRegisterInfo() const; - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; + const R600RegisterInfo &getRegisterInfo() const override; + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; bool isLegalToSplitMBBAt(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI) const; + MachineBasicBlock::iterator MBBI) const override; bool isTrig(const MachineInstr &MI) const; bool isPlaceHolderOpcode(unsigned opcode) const; @@ -142,79 +142,79 @@ namespace llvm { /// instruction slots within an instruction group. bool isVector(const MachineInstr &MI) const; - virtual unsigned getIEQOpcode() const; - virtual bool isMov(unsigned Opcode) const; + unsigned getIEQOpcode() const override; + bool isMov(unsigned Opcode) const override; DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM, - const ScheduleDAG *DAG) const; + const ScheduleDAG *DAG) const override; - bool ReverseBranchCondition(SmallVectorImpl &Cond) const; + bool ReverseBranchCondition(SmallVectorImpl &Cond) const override; bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, bool AllowModify) const; + SmallVectorImpl &Cond, bool AllowModify) const override; - unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl &Cond, DebugLoc DL) const; + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl &Cond, DebugLoc DL) const override; - unsigned RemoveBranch(MachineBasicBlock &MBB) const; + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; - bool isPredicated(const MachineInstr *MI) const; + bool isPredicated(const MachineInstr *MI) const override; - bool isPredicable(MachineInstr *MI) const; + bool isPredicable(MachineInstr *MI) const override; bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, - const BranchProbability &Probability) const; + const BranchProbability &Probability) const override; bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles, unsigned ExtraPredCycles, - const BranchProbability &Probability) const ; + const BranchProbability &Probability) const override ; bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB, unsigned NumFCycles, unsigned ExtraFCycles, - const BranchProbability &Probability) const; + const BranchProbability &Probability) const override; bool DefinesPredicate(MachineInstr *MI, - std::vector &Pred) const; + std::vector &Pred) const override; bool SubsumesPredicate(const SmallVectorImpl &Pred1, - const SmallVectorImpl &Pred2) const; + const SmallVectorImpl &Pred2) const override; bool isProfitableToUnpredicate(MachineBasicBlock &TMBB, - MachineBasicBlock &FMBB) const; + MachineBasicBlock &FMBB) const override; bool PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl &Pred) const; + const SmallVectorImpl &Pred) const override; - unsigned int getPredicationCost(const MachineInstr *) const; + unsigned int getPredicationCost(const MachineInstr *) const override; unsigned int getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, - unsigned *PredCost = 0) const; + unsigned *PredCost = nullptr) const override; - virtual int getInstrLatency(const InstrItineraryData *ItinData, - SDNode *Node) const { return 1;} + int getInstrLatency(const InstrItineraryData *ItinData, + SDNode *Node) const override { return 1;} /// \brief Reserve the registers that may be accesed using indirect addressing. void reserveIndirectRegisters(BitVector &Reserved, const MachineFunction &MF) const; - virtual unsigned calculateIndirectAddress(unsigned RegIndex, - unsigned Channel) const; + unsigned calculateIndirectAddress(unsigned RegIndex, + unsigned Channel) const override; - virtual const TargetRegisterClass *getIndirectAddrRegClass() const; + const TargetRegisterClass *getIndirectAddrRegClass() const override; - virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB, - MachineBasicBlock::iterator I, - unsigned ValueReg, unsigned Address, - unsigned OffsetReg) const; + MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg) const override; - virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB, - MachineBasicBlock::iterator I, - unsigned ValueReg, unsigned Address, - unsigned OffsetReg) const; + MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg) const override; unsigned getMaxAlusPerClause() const; @@ -244,7 +244,7 @@ namespace llvm { MachineInstr *buildMovInstr(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, - unsigned DstReg, unsigned SrcReg) const; + unsigned DstReg, unsigned SrcReg) const override; /// \brief Get the index of Op in the MachineInstr. /// diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index d2075c0..590fde2 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -1625,6 +1625,12 @@ def : DwordAddrPat ; } // End isR600toCayman Predicate +let Predicates = [isR600] in { +// Intrinsic patterns +defm : Expand24IBitOps; +defm : Expand24UBitOps; +} // End isR600 + def getLDSNoRetOp : InstrMapping { let FilterClass = "R600_LDS_1A1D"; let RowFields = ["BaseOp"]; diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h index c1bec0a..b0ae22e 100644 --- a/lib/Target/R600/R600MachineFunctionInfo.h +++ b/lib/Target/R600/R600MachineFunctionInfo.h @@ -21,7 +21,7 @@ namespace llvm { class R600MachineFunctionInfo : public AMDGPUMachineFunction { - virtual void anchor(); + void anchor() override; public: R600MachineFunctionInfo(const MachineFunction &MF); SmallVector LiveOuts; diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp index d3ffb50..d1655d1 100644 --- a/lib/Target/R600/R600MachineScheduler.cpp +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -12,8 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "misched" - #include "R600MachineScheduler.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -23,6 +21,8 @@ using namespace llvm; +#define DEBUG_TYPE "misched" + void R600SchedStrategy::initialize(ScheduleDAGMI *dag) { assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness"); DAG = static_cast(dag); @@ -56,7 +56,7 @@ unsigned getWFCountLimitedByGPR(unsigned GPRCount) { } SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) { - SUnit *SU = 0; + SUnit *SU = nullptr; NextInstKind = IDOther; IsTopNode = false; @@ -316,7 +316,7 @@ int R600SchedStrategy::getInstKind(SUnit* SU) { SUnit *R600SchedStrategy::PopInst(std::vector &Q, bool AnyALU) { if (Q.empty()) - return NULL; + return nullptr; for (std::vector::reverse_iterator It = Q.rbegin(), E = Q.rend(); It != E; ++It) { SUnit *SU = *It; @@ -331,7 +331,7 @@ SUnit *R600SchedStrategy::PopInst(std::vector &Q, bool AnyALU) { InstructionsGroupCandidate.pop_back(); } } - return NULL; + return nullptr; } void R600SchedStrategy::LoadAlu() { @@ -448,11 +448,11 @@ SUnit* R600SchedStrategy::pickAlu() { } PrepareNextSlot(); } - return NULL; + return nullptr; } SUnit* R600SchedStrategy::pickOther(int QID) { - SUnit *SU = 0; + SUnit *SU = nullptr; std::vector &AQ = Available[QID]; if (AQ.empty()) { diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h index b909ff7..fd475af 100644 --- a/lib/Target/R600/R600MachineScheduler.h +++ b/lib/Target/R600/R600MachineScheduler.h @@ -68,17 +68,16 @@ class R600SchedStrategy : public MachineSchedStrategy { public: R600SchedStrategy() : - DAG(0), TII(0), TRI(0), MRI(0) { + DAG(nullptr), TII(nullptr), TRI(nullptr), MRI(nullptr) { } - virtual ~R600SchedStrategy() { - } + virtual ~R600SchedStrategy() {} - virtual void initialize(ScheduleDAGMI *dag); - virtual SUnit *pickNode(bool &IsTopNode); - virtual void schedNode(SUnit *SU, bool IsTopNode); - virtual void releaseTopNode(SUnit *SU); - virtual void releaseBottomNode(SUnit *SU); + void initialize(ScheduleDAGMI *dag) override; + SUnit *pickNode(bool &IsTopNode) override; + void schedNode(SUnit *SU, bool IsTopNode) override; + void releaseTopNode(SUnit *SU) override; + void releaseBottomNode(SUnit *SU) override; private: std::vector InstructionsGroupCandidate; diff --git a/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/lib/Target/R600/R600OptimizeVectorRegisters.cpp index 767e5e3..2314136 100644 --- a/lib/Target/R600/R600OptimizeVectorRegisters.cpp +++ b/lib/Target/R600/R600OptimizeVectorRegisters.cpp @@ -27,7 +27,6 @@ /// to reduce MOV count. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "vec-merger" #include "llvm/Support/Debug.h" #include "AMDGPU.h" #include "R600InstrInfo.h" @@ -42,6 +41,8 @@ using namespace llvm; +#define DEBUG_TYPE "vec-merger" + namespace { static bool @@ -107,9 +108,9 @@ private: public: static char ID; R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID), - TII(0) { } + TII(nullptr) { } - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); AU.addPreserved(); @@ -118,11 +119,11 @@ public: MachineFunctionPass::getAnalysisUsage(AU); } - const char *getPassName() const { + const char *getPassName() const override { return "R600 Vector Registers Merge Pass"; } - bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; }; char R600VectorRegMerger::ID = 0; diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp index b7b7610..c2f6c03 100644 --- a/lib/Target/R600/R600Packetizer.cpp +++ b/lib/Target/R600/R600Packetizer.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "packets" #include "llvm/Support/Debug.h" #include "AMDGPU.h" #include "R600InstrInfo.h" @@ -28,6 +27,8 @@ using namespace llvm; +#define DEBUG_TYPE "packets" + namespace { class R600Packetizer : public MachineFunctionPass { @@ -36,7 +37,7 @@ public: static char ID; R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {} - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); AU.addPreserved(); @@ -45,11 +46,11 @@ public: MachineFunctionPass::getAnalysisUsage(AU); } - const char *getPassName() const { + const char *getPassName() const override { return "R600 Packetizer"; } - bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; }; char R600Packetizer::ID = 0; @@ -155,18 +156,19 @@ public: } // initPacketizerState - initialize some internal flags. - void initPacketizerState() { + void initPacketizerState() override { ConsideredInstUsesAlreadyWrittenVectorElement = false; } // ignorePseudoInstruction - Ignore bundling of pseudo instructions. - bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) { + bool ignorePseudoInstruction(MachineInstr *MI, + MachineBasicBlock *MBB) override { return false; } // isSoloInstruction - return true if instruction MI can not be packetized // with any other instruction, which means that MI itself is a packet. - bool isSoloInstruction(MachineInstr *MI) { + bool isSoloInstruction(MachineInstr *MI) override { if (TII->isVector(*MI)) return true; if (!TII->isALUInstr(MI->getOpcode())) @@ -182,7 +184,7 @@ public: // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ // together. - bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { + bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override { MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); if (getSlot(MII) == getSlot(MIJ)) ConsideredInstUsesAlreadyWrittenVectorElement = true; @@ -219,7 +221,9 @@ public: // isLegalToPruneDependencies - Is it legal to prune dependece between SUI // and SUJ. - bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {return false;} + bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override { + return false; + } void setIsLastBit(MachineInstr *MI, unsigned Bit) const { unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last); @@ -288,7 +292,7 @@ public: return true; } - MachineBasicBlock::iterator addToPacket(MachineInstr *MI) { + MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override { MachineBasicBlock::iterator FirstInBundle = CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front(); const DenseMap &PV = diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h index c74c49e..52e1a4b 100644 --- a/lib/Target/R600/R600RegisterInfo.h +++ b/lib/Target/R600/R600RegisterInfo.h @@ -28,27 +28,28 @@ struct R600RegisterInfo : public AMDGPURegisterInfo { R600RegisterInfo(AMDGPUTargetMachine &tm); - virtual BitVector getReservedRegs(const MachineFunction &MF) const; + BitVector getReservedRegs(const MachineFunction &MF) const override; /// \param RC is an AMDIL reg class. /// /// \returns the R600 reg class that is equivalent to \p RC. - virtual const TargetRegisterClass *getISARegClass( - const TargetRegisterClass *RC) const; + const TargetRegisterClass *getISARegClass( + const TargetRegisterClass *RC) const override; /// \brief get the HW encoding for a register's channel. unsigned getHWRegChan(unsigned reg) const; - virtual unsigned getHWRegIndex(unsigned Reg) const; + unsigned getHWRegIndex(unsigned Reg) const override; /// \brief get the register class of the specified type to use in the /// CFGStructurizer - virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; + const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override; - virtual const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const; + const RegClassWeight & + getRegClassWeight(const TargetRegisterClass *RC) const override; // \returns true if \p Reg can be defined in one ALU caluse and used in another. - virtual bool isPhysRegLiveAcrossClauses(unsigned Reg) const; + bool isPhysRegLiveAcrossClauses(unsigned Reg) const; }; } // End namespace llvm diff --git a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp index 9d24404..419ec8b 100644 --- a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp +++ b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp @@ -209,7 +209,7 @@ public: FunctionPass(ID) { } - virtual bool doInitialization(Module &M) { + bool doInitialization(Module &M) override { LLVMContext &Ctx = M.getContext(); Mod = &M; FloatType = Type::getFloatTy(Ctx); @@ -245,16 +245,16 @@ public: return false; } - virtual bool runOnFunction(Function &F) { + bool runOnFunction(Function &F) override { visit(F); return false; } - virtual const char *getPassName() const { + const char *getPassName() const override { return "R600 Texture Intrinsics Replacer"; } - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { } void visitCallInst(CallInst &I) { diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp index f9214a8..d6e4451 100644 --- a/lib/Target/R600/SIAnnotateControlFlow.cpp +++ b/lib/Target/R600/SIAnnotateControlFlow.cpp @@ -12,8 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "si-annotate-control-flow" - #include "AMDGPU.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/IR/Constants.h" @@ -26,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "si-annotate-control-flow" + namespace { // Complex types used in this pass @@ -91,15 +91,15 @@ public: SIAnnotateControlFlow(): FunctionPass(ID) { } - virtual bool doInitialization(Module &M); + bool doInitialization(Module &M) override; - virtual bool runOnFunction(Function &F); + bool runOnFunction(Function &F) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "SI annotate control flow"; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); FunctionPass::getAnalysisUsage(AU); @@ -118,7 +118,7 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) { Void = Type::getVoidTy(Context); Boolean = Type::getInt1Ty(Context); Int64 = Type::getInt64Ty(Context); - ReturnStruct = StructType::get(Boolean, Int64, (Type *)0); + ReturnStruct = StructType::get(Boolean, Int64, (Type *)nullptr); BoolTrue = ConstantInt::getTrue(Context); BoolFalse = ConstantInt::getFalse(Context); @@ -126,25 +126,25 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) { Int64Zero = ConstantInt::get(Int64, 0); If = M.getOrInsertFunction( - IfIntrinsic, ReturnStruct, Boolean, (Type *)0); + IfIntrinsic, ReturnStruct, Boolean, (Type *)nullptr); Else = M.getOrInsertFunction( - ElseIntrinsic, ReturnStruct, Int64, (Type *)0); + ElseIntrinsic, ReturnStruct, Int64, (Type *)nullptr); Break = M.getOrInsertFunction( - BreakIntrinsic, Int64, Int64, (Type *)0); + BreakIntrinsic, Int64, Int64, (Type *)nullptr); IfBreak = M.getOrInsertFunction( - IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)0); + IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)nullptr); ElseBreak = M.getOrInsertFunction( - ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)0); + ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)nullptr); Loop = M.getOrInsertFunction( - LoopIntrinsic, Boolean, Int64, (Type *)0); + LoopIntrinsic, Boolean, Int64, (Type *)nullptr); EndCf = M.getOrInsertFunction( - EndCfIntrinsic, Void, Int64, (Type *)0); + EndCfIntrinsic, Void, Int64, (Type *)nullptr); return false; } diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp index 402f1f4..5f71453 100644 --- a/lib/Target/R600/SIFixSGPRCopies.cpp +++ b/lib/Target/R600/SIFixSGPRCopies.cpp @@ -65,7 +65,6 @@ /// ultimately led to the creation of an illegal COPY. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sgpr-copies" #include "AMDGPU.h" #include "SIInstrInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -77,6 +76,8 @@ using namespace llvm; +#define DEBUG_TYPE "sgpr-copies" + namespace { class SIFixSGPRCopies : public MachineFunctionPass { @@ -97,9 +98,9 @@ private: public: SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - const char *getPassName() const { + const char *getPassName() const override { return "SI Fix SGPR copies"; } @@ -184,7 +185,8 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy, const TargetRegisterClass *SrcRC; if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || - DstRC == &AMDGPU::M0RegRegClass) + DstRC == &AMDGPU::M0RegRegClass || + MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass) return false; SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg); @@ -256,6 +258,19 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { TII->moveToVALU(MI); break; } + case AMDGPU::INSERT_SUBREG: { + const TargetRegisterClass *DstRC, *Src0RC, *Src1RC; + DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); + Src0RC = MRI.getRegClass(MI.getOperand(1).getReg()); + Src1RC = MRI.getRegClass(MI.getOperand(2).getReg()); + if (TRI->isSGPRClass(DstRC) && + (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) { + DEBUG(dbgs() << " Fixing INSERT_SUBREG:\n"); + DEBUG(MI.print(dbgs())); + TII->moveToVALU(MI); + } + break; + } } } } diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 0b55411..c9e247c 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -29,22 +29,21 @@ using namespace llvm; SITargetLowering::SITargetLowering(TargetMachine &TM) : AMDGPUTargetLowering(TM) { - addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass); - addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass); + addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass); + addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass); addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass); - addRegisterClass(MVT::i32, &AMDGPU::VSrc_32RegClass); - addRegisterClass(MVT::f32, &AMDGPU::VSrc_32RegClass); + addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass); + addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); - addRegisterClass(MVT::f64, &AMDGPU::VSrc_64RegClass); - addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass); - addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass); + addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass); + addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass); + addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass); - addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass); + addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); - addRegisterClass(MVT::i128, &AMDGPU::SReg_128RegClass); addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass); addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass); @@ -78,8 +77,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::ADDC, MVT::i32, Legal); setOperationAction(ISD::ADDE, MVT::i32, Legal); - setOperationAction(ISD::BITCAST, MVT::i128, Legal); - // We need to custom lower vector stores from local memory setOperationAction(ISD::LOAD, MVT::v2i32, Custom); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); @@ -99,10 +96,11 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::i1, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); setOperationAction(ISD::STORE, MVT::i64, Custom); - setOperationAction(ISD::STORE, MVT::i128, Custom); setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); + setOperationAction(ISD::SELECT, MVT::f32, Promote); + AddPromotedToType(ISD::SELECT, MVT::f32, MVT::i32); setOperationAction(ISD::SELECT, MVT::i64, Custom); setOperationAction(ISD::SELECT, MVT::f64, Promote); AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64); @@ -119,6 +117,22 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Custom); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom); @@ -126,39 +140,48 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); - setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand); setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand); + + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setTruncStoreAction(MVT::i32, MVT::i8, Custom); setTruncStoreAction(MVT::i32, MVT::i16, Custom); setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::i64, MVT::i32, Expand); - setTruncStoreAction(MVT::i128, MVT::i64, Expand); setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); + setOperationAction(ISD::LOAD, MVT::i1, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::FrameIndex, MVT::i32, Custom); + // These should use UDIVREM, so set them to expand + setOperationAction(ISD::UDIV, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + // We only support LOAD/STORE and vector manipulation ops for vectors // with > 4 elements. MVT VecTypes[] = { MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32 }; - const size_t NumVecTypes = array_lengthof(VecTypes); - for (unsigned Type = 0; Type < NumVecTypes; ++Type) { + for (MVT VT : VecTypes) { for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { switch(Op) { case ISD::LOAD: @@ -172,7 +195,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : case ISD::EXTRACT_SUBVECTOR: break; default: - setOperationAction(Op, VecTypes[Type], Expand); + setOperationAction(Op, VT, Expand); break; } } @@ -189,6 +212,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::FTRUNC, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); } setTargetDAGCombine(ISD::SELECT_CC); @@ -204,10 +228,40 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace, bool *IsFast) const { + if (IsFast) + *IsFast = false; + // XXX: This depends on the address space and also we may want to revist // the alignment values we specify in the DataLayout. + + // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96, + // which isn't a simple VT. if (!VT.isSimple() || VT == MVT::Other) return false; + + // XXX - CI changes say "Support for unaligned memory accesses" but I don't + // see what for specifically. The wording everywhere else seems to be the + // same. + + // 3.6.4 - Operations using pairs of VGPRs (for example: double-floats) have + // no alignment restrictions. + if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) { + // Using any pair of GPRs should be the same as any other pair. + if (IsFast) + *IsFast = true; + return VT.bitsGE(MVT::i64); + } + + // XXX - The only mention I see of this in the ISA manual is for LDS direct + // reads the "byte address and must be dword aligned". Is it also true for the + // normal loads and stores? + if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS) + return false; + + // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the + // byte-address are ignored, thus forcing Dword alignment. + if (IsFast) + *IsFast = true; return VT.bitsGT(MVT::i32); } @@ -224,7 +278,7 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL, SDValue Chain, - unsigned Offset) const { + unsigned Offset, bool Signed) const { MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), AMDGPUAS::CONSTANT_ADDRESS); @@ -232,7 +286,7 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64); SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, DAG.getConstant(Offset, MVT::i64)); - return DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain, Ptr, + return DAG.getExtLoad(Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL, VT, Chain, Ptr, MachinePointerInfo(UndefValue::get(PtrTy)), MemVT, false, false, MemVT.getSizeInBits() >> 3); @@ -340,7 +394,8 @@ SDValue SITargetLowering::LowerFormalArguments( // The first 36 bytes of the input buffer contains information about // thread group and global sizes. SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(), - 36 + VA.getLocMemOffset()); + 36 + VA.getLocMemOffset(), + Ins[i].Flags.isSExt()); InVals.push_back(Arg); continue; } @@ -381,8 +436,7 @@ SDValue SITargetLowering::LowerFormalArguments( for (unsigned j = 0; j != NumElements; ++j) Regs.push_back(DAG.getUNDEF(VT)); - InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT, - Regs.data(), Regs.size())); + InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT, Regs)); continue; } @@ -395,15 +449,15 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MachineInstr * MI, MachineBasicBlock * BB) const { MachineBasicBlock::iterator I = *MI; + const SIInstrInfo *TII = + static_cast(getTargetMachine().getInstrInfo()); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); switch (MI->getOpcode()) { default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); case AMDGPU::BRANCH: return BB; case AMDGPU::SI_ADDR64_RSRC: { - const SIInstrInfo *TII = - static_cast(getTargetMachine().getInstrInfo()); - MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); unsigned SuperReg = MI->getOperand(0).getReg(); unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass); unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass); @@ -428,9 +482,7 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MI->eraseFromParent(); break; } - case AMDGPU::V_SUB_F64: { - const SIInstrInfo *TII = - static_cast(getTargetMachine().getInstrInfo()); + case AMDGPU::V_SUB_F64: BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) @@ -442,11 +494,9 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( .addImm(2); /* NEG */ MI->eraseFromParent(); break; - } + case AMDGPU::SI_RegisterStorePseudo: { MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - const SIInstrInfo *TII = - static_cast(getTargetMachine().getInstrInfo()); unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); MachineInstrBuilder MIB = BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore), @@ -455,6 +505,50 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MIB.addOperand(MI->getOperand(i)); MI->eraseFromParent(); + break; + } + case AMDGPU::FABS_SI: { + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const SIInstrInfo *TII = + static_cast(getTargetMachine().getInstrInfo()); + unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), + Reg) + .addImm(0x7fffffff); + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_AND_B32_e32), + MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addReg(Reg); + MI->eraseFromParent(); + break; + } + case AMDGPU::FNEG_SI: { + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + const SIInstrInfo *TII = + static_cast(getTargetMachine().getInstrInfo()); + unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), + Reg) + .addImm(0x80000000); + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_XOR_B32_e32), + MI->getOperand(0).getReg()) + .addReg(MI->getOperand(1).getReg()) + .addReg(Reg); + MI->eraseFromParent(); + break; + } + case AMDGPU::FCLAMP_SI: { + const SIInstrInfo *TII = + static_cast(getTargetMachine().getInstrInfo()); + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F32_e64), + MI->getOperand(0).getReg()) + .addImm(0) // SRC0 modifiers + .addOperand(MI->getOperand(1)) + .addImm(0) // SRC1 modifiers + .addImm(0) // SRC1 + .addImm(1) // CLAMP + .addImm(0); // OMOD + MI->eraseFromParent(); } } return BB; @@ -510,7 +604,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { SplitVectorLoad(Op, DAG), Load->getChain() }; - return DAG.getMergeValues(MergedValues, 2, SDLoc(Op)); + return DAG.getMergeValues(MergedValues, SDLoc(Op)); } else { return LowerLOAD(Op, DAG); } @@ -533,23 +627,23 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (IntrinsicID) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); case Intrinsic::r600_read_ngroups_x: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0, false); case Intrinsic::r600_read_ngroups_y: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4, false); case Intrinsic::r600_read_ngroups_z: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8, false); case Intrinsic::r600_read_global_size_x: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12, false); case Intrinsic::r600_read_global_size_y: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16, false); case Intrinsic::r600_read_global_size_z: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20, false); case Intrinsic::r600_read_local_size_x: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24, false); case Intrinsic::r600_read_local_size_y: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28, false); case Intrinsic::r600_read_local_size_z: - return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32); + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32, false); case Intrinsic::r600_read_tgid_x: return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 0), VT); @@ -570,7 +664,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { AMDGPU::VGPR2, VT); case AMDGPUIntrinsic::SI_load_const: { SDValue Ops [] = { - ResourceDescriptorToi128(Op.getOperand(1), DAG), + Op.getOperand(1), Op.getOperand(2) }; @@ -579,7 +673,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, VT.getSizeInBits() / 8, 4); return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL, - Op->getVTList(), Ops, 2, VT, MMO); + Op->getVTList(), Ops, VT, MMO); } case AMDGPUIntrinsic::SI_sample: return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG); @@ -591,7 +685,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG); case AMDGPUIntrinsic::SI_vs_load_input: return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT, - ResourceDescriptorToi128(Op.getOperand(1), DAG), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } @@ -606,7 +700,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); SDValue Ops [] = { Chain, - ResourceDescriptorToi128(Op.getOperand(2), DAG), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(4), Op.getOperand(5), @@ -627,8 +721,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { MachineMemOperand::MOStore, VT.getSizeInBits() / 8, 4); return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, - Op->getVTList(), Ops, - sizeof(Ops)/sizeof(Ops[0]), VT, MMO); + Op->getVTList(), Ops, VT, MMO); } default: break; @@ -650,7 +743,7 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) { if (I->getOpcode() == Opcode) return *I; } - return 0; + return nullptr; } /// This transforms the control flow intrinsics to get the branch destination as @@ -662,7 +755,7 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, SDNode *Intr = BRCOND.getOperand(1).getNode(); SDValue Target = BRCOND.getOperand(2); - SDNode *BR = 0; + SDNode *BR = nullptr; if (Intr->getOpcode() == ISD::SETCC) { // As long as we negate the condition everything is fine @@ -695,7 +788,7 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, // build the new intrinsic call SDNode *Result = DAG.getNode( Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL, - DAG.getVTList(Res.data(), Res.size()), Ops.data(), Ops.size()).getNode(); + DAG.getVTList(Res), Ops).getNode(); if (BR) { // Give the branch instruction our target @@ -703,7 +796,7 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, BR->getOperand(0), BRCOND.getOperand(2) }; - DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops, 2); + DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops); } SDValue Chain = SDValue(Result, Result->getNumValues() - 1); @@ -739,7 +832,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { MergedValues[1] = Load->getChain(); if (Ret.getNode()) { MergedValues[0] = Ret; - return DAG.getMergeValues(MergedValues, 2, DL); + return DAG.getMergeValues(MergedValues, DL); } if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { @@ -770,30 +863,16 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { } MergedValues[0] = Ret; - return DAG.getMergeValues(MergedValues, 2, DL); + return DAG.getMergeValues(MergedValues, DL); } -SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op, - SelectionDAG &DAG) const { - - if (Op.getValueType() == MVT::i128) { - return Op; - } - - assert(Op.getOpcode() == ISD::UNDEF); - - return DAG.getNode(ISD::BUILD_PAIR, SDLoc(Op), MVT::i128, - DAG.getConstant(0, MVT::i64), - DAG.getConstant(0, MVT::i64)); -} - SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op, SelectionDAG &DAG) const { return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2), - ResourceDescriptorToi128(Op.getOperand(3), DAG), + Op.getOperand(3), Op.getOperand(4)); } @@ -833,12 +912,6 @@ SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc DL(Op); - // Possible Min/Max pattern - SDValue MinMax = LowerMinMax(Op, DAG); - if (MinMax.getNode()) { - return MinMax; - } - SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC); return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); } @@ -948,8 +1021,12 @@ SDValue SITargetLowering::LowerZERO_EXTEND(SDValue Op, return SDValue(); } - return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Op.getOperand(0), - DAG.getConstant(0, MVT::i32)); + SDValue Src = Op.getOperand(0); + if (Src.getValueType() != MVT::i32) + Src = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Src); + + SDValue Zero = DAG.getConstant(0, MVT::i32); + return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Src, Zero); } //===----------------------------------------------------------------------===// @@ -963,7 +1040,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, EVT VT = N->getValueType(0); switch (N->getOpcode()) { - default: break; + default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); case ISD::SELECT_CC: { ConstantSDNode *True, *False; // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc) @@ -982,7 +1059,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, SDValue Arg0 = N->getOperand(0); SDValue Arg1 = N->getOperand(1); SDValue CC = N->getOperand(2); - ConstantSDNode * C = NULL; + ConstantSDNode * C = nullptr; ISD::CondCode CCOp = dyn_cast(CC)->get(); // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne) @@ -998,7 +1075,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, break; } } - return SDValue(); + + return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); } /// \brief Test if RegClass is one of the VSrc classes @@ -1029,9 +1107,11 @@ int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const { return -1; } Imm.I = Node->getSExtValue(); - } else if (const ConstantFPSDNode *Node = dyn_cast(N)) + } else if (const ConstantFPSDNode *Node = dyn_cast(N)) { + if (N->getValueType(0) != MVT::f32) + return -1; Imm.F = Node->getValueAPF().convertToFloat(); - else + } else return -1; // It isn't an immediate if ((Imm.I >= -16 && Imm.I <= 64) || @@ -1051,7 +1131,7 @@ bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate, MachineSDNode *Mov = dyn_cast(Operand); const SIInstrInfo *TII = static_cast(getTargetMachine().getInstrInfo()); - if (Mov == 0 || !TII->isMov(Mov->getMachineOpcode())) + if (!Mov || !TII->isMov(Mov->getMachineOpcode())) return false; const SDValue &Op = Mov->getOperand(0); @@ -1098,7 +1178,7 @@ const TargetRegisterClass *SITargetLowering::getRegClassForNode( } return TRI.getPhysRegClass(Reg); } - default: return NULL; + default: return nullptr; } } const MCInstrDesc &Desc = TII->get(Op->getMachineOpcode()); @@ -1202,17 +1282,17 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node, // Commuted opcode if available int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1; - const MCInstrDesc *DescRev = OpcodeRev == -1 ? 0 : &TII->get(OpcodeRev); + const MCInstrDesc *DescRev = OpcodeRev == -1 ? nullptr : &TII->get(OpcodeRev); assert(!DescRev || DescRev->getNumDefs() == NumDefs); assert(!DescRev || DescRev->getNumOperands() == NumOps); // e64 version if available, -1 otherwise int OpcodeE64 = AMDGPU::getVOPe64(Opcode); - const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64); + const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? nullptr : &TII->get(OpcodeE64); + int InputModifiers[3] = {0}; assert(!DescE64 || DescE64->getNumDefs() == NumDefs); - assert(!DescE64 || DescE64->getNumOperands() == (NumOps + 4)); int32_t Immediate = Desc->getSize() == 4 ? 0 : -1; bool HaveVSrc = false, HaveSSrc = false; @@ -1279,17 +1359,18 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node, fitsRegClass(DAG, Ops[1], OtherRegClass))) { // Swap commutable operands - SDValue Tmp = Ops[1]; - Ops[1] = Ops[0]; - Ops[0] = Tmp; + std::swap(Ops[0], Ops[1]); Desc = DescRev; - DescRev = 0; + DescRev = nullptr; continue; } } - if (DescE64 && !Immediate) { + if (Immediate) + continue; + + if (DescE64) { // Test if it makes sense to switch to e64 encoding unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass; @@ -1305,14 +1386,46 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node, Immediate = -1; Promote2e64 = true; Desc = DescE64; - DescE64 = 0; + DescE64 = nullptr; } } + + if (!DescE64 && !Promote2e64) + continue; + if (!Operand.isMachineOpcode()) + continue; + if (Operand.getMachineOpcode() == AMDGPU::FNEG_SI) { + Ops.pop_back(); + Ops.push_back(Operand.getOperand(0)); + InputModifiers[i] = 1; + Promote2e64 = true; + if (!DescE64) + continue; + Desc = DescE64; + DescE64 = 0; + } + else if (Operand.getMachineOpcode() == AMDGPU::FABS_SI) { + Ops.pop_back(); + Ops.push_back(Operand.getOperand(0)); + InputModifiers[i] = 2; + Promote2e64 = true; + if (!DescE64) + continue; + Desc = DescE64; + DescE64 = 0; + } } if (Promote2e64) { + std::vector OldOps(Ops); + Ops.clear(); + for (unsigned i = 0; i < OldOps.size(); ++i) { + // src_modifier + Ops.push_back(DAG.getTargetConstant(InputModifiers[i], MVT::i32)); + Ops.push_back(OldOps[i]); + } // Add the modifier flags while promoting - for (unsigned i = 0; i < 4; ++i) + for (unsigned i = 0; i < 2; ++i) Ops.push_back(DAG.getTargetConstant(0, MVT::i32)); } @@ -1390,7 +1503,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, Ops.push_back(DAG.getTargetConstant(NewDmask, MVT::i32)); for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) Ops.push_back(Node->getOperand(i)); - Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); + Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops); // If we only got one lane, replace it with a copy // (if NewDmask has only one bit set...) diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index ca73f53..c6eaa81 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -22,7 +22,7 @@ namespace llvm { class SITargetLowering : public AMDGPUTargetLowering { SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL, - SDValue Chain, unsigned Offset) const; + SDValue Chain, unsigned Offset, bool Signed) const; SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; @@ -33,7 +33,6 @@ class SITargetLowering : public AMDGPUTargetLowering { SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; - SDValue ResourceDescriptorToi128(SDValue Op, SelectionDAG &DAG) const; bool foldImm(SDValue &Operand, int32_t &Immediate, bool &ScalarSlotUsed) const; const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG, @@ -49,32 +48,33 @@ class SITargetLowering : public AMDGPUTargetLowering { public: SITargetLowering(TargetMachine &tm); - bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS, bool *IsFast) const; - virtual bool shouldSplitVectorType(EVT VT) const override; + bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS, + bool *IsFast) const override; + bool shouldSplitVectorType(EVT VT) const override; - virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm, - Type *Ty) const override; + bool shouldConvertConstantLoadToIntImm(const APInt &Imm, + Type *Ty) const override; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, SDLoc DL, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; + SmallVectorImpl &InVals) const override; - virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, - MachineBasicBlock * BB) const; - virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const; - virtual MVT getScalarShiftAmountTy(EVT VT) const; - virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const; - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; - virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const; - virtual void AdjustInstrPostInstrSelection(MachineInstr *MI, - SDNode *Node) const; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI, + MachineBasicBlock * BB) const override; + EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; + MVT getScalarShiftAmountTy(EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override; + void AdjustInstrPostInstrSelection(MachineInstr *MI, + SDNode *Node) const override; int32_t analyzeImmediate(const SDNode *N) const; SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, - unsigned Reg, EVT VT) const; + unsigned Reg, EVT VT) const override; }; } // End namespace llvm diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index 695ec40..a17fed7 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -97,13 +97,13 @@ private: public: SIInsertWaits(TargetMachine &tm) : MachineFunctionPass(ID), - TII(0), - TRI(0), + TII(nullptr), + TRI(nullptr), ExpInstrTypesSeen(0) { } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - const char *getPassName() const { + const char *getPassName() const override { return "SI insert wait instructions"; } diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index aa2c22c..168eff2 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// class InstSI pattern> : - AMDGPUInst { + AMDGPUInst, PredicateControl { field bits<1> VM_CNT = 0; field bits<1> EXP_CNT = 0; @@ -210,16 +210,19 @@ class VOP3 op, dag outs, dag ins, string asm, list pattern> : Enc64 { bits<8> dst; + bits<2> src0_modifiers; bits<9> src0; + bits<2> src1_modifiers; bits<9> src1; + bits<2> src2_modifiers; bits<9> src2; - bits<3> abs; bits<1> clamp; bits<2> omod; - bits<3> neg; let Inst{7-0} = dst; - let Inst{10-8} = abs; + let Inst{8} = src0_modifiers{1}; + let Inst{9} = src1_modifiers{1}; + let Inst{10} = src2_modifiers{1}; let Inst{11} = clamp; let Inst{25-17} = op; let Inst{31-26} = 0x34; //encoding @@ -227,7 +230,9 @@ class VOP3 op, dag outs, dag ins, string asm, list pattern> : let Inst{49-41} = src1; let Inst{58-50} = src2; let Inst{60-59} = omod; - let Inst{63-61} = neg; + let Inst{61} = src0_modifiers{0}; + let Inst{62} = src1_modifiers{0}; + let Inst{63} = src2_modifiers{0}; let mayLoad = 0; let mayStore = 0; @@ -240,12 +245,14 @@ class VOP3b op, dag outs, dag ins, string asm, list pattern> : Enc64 { bits<8> dst; + bits<2> src0_modifiers; bits<9> src0; + bits<2> src1_modifiers; bits<9> src1; + bits<2> src2_modifiers; bits<9> src2; bits<7> sdst; bits<2> omod; - bits<3> neg; let Inst{7-0} = dst; let Inst{14-8} = sdst; @@ -255,7 +262,9 @@ class VOP3b op, dag outs, dag ins, string asm, list pattern> : let Inst{49-41} = src1; let Inst{58-50} = src2; let Inst{60-59} = omod; - let Inst{63-61} = neg; + let Inst{61} = src0_modifiers{0}; + let Inst{62} = src1_modifiers{0}; + let Inst{63} = src2_modifiers{0}; let mayLoad = 0; let mayStore = 0; diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index ab2fe09..4a9e346 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -187,27 +187,45 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo(); DebugLoc DL = MBB.findDebugLoc(MI); unsigned KillFlag = isKill ? RegState::Kill : 0; + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) { - unsigned Lane = MFI->SpillTracker.getNextLane(MRI); - BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), - MFI->SpillTracker.LaneVGPR) + unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent()); + + BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), MFI->SpillTracker.LaneVGPR) .addReg(SrcReg, KillFlag) .addImm(Lane); + MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, Lane); + } else if (RI.isSGPRClass(RC)) { + // We are only allowed to create one new instruction when spilling + // registers, so we need to use pseudo instruction for vector + // registers. + // + // Reserve a spot in the spill tracker for each sub-register of + // the vector register. + unsigned NumSubRegs = RC->getSize() / 4; + unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent(), + NumSubRegs); MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, - Lane); - } else { - for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) { - unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - BuildMI(MBB, MI, MBB.findDebugLoc(MI), get(AMDGPU::COPY), SubReg) - .addReg(SrcReg, 0, RI.getSubRegFromChannel(i)); - storeRegToStackSlot(MBB, MI, SubReg, isKill, FrameIndex + i, - &AMDGPU::SReg_32RegClass, TRI); + FirstLane); + + unsigned Opcode; + switch (RC->getSize() * 8) { + case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break; + case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break; + case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break; + case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break; + default: llvm_unreachable("Cannot spill register class"); } + + BuildMI(MBB, MI, DL, get(Opcode), MFI->SpillTracker.LaneVGPR) + .addReg(SrcReg) + .addImm(FrameIndex); + } else { + llvm_unreachable("VGPR spilling not supported"); } } @@ -216,30 +234,125 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo(); DebugLoc DL = MBB.findDebugLoc(MI); if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) { - SIMachineFunctionInfo::SpilledReg Spill = + SIMachineFunctionInfo::SpilledReg Spill = MFI->SpillTracker.getSpilledReg(FrameIndex); assert(Spill.VGPR); BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg) .addReg(Spill.VGPR) .addImm(Spill.Lane); + insertNOPs(MI, 3); + } else if (RI.isSGPRClass(RC)){ + unsigned Opcode; + switch(RC->getSize() * 8) { + case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break; + case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break; + case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break; + case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break; + default: llvm_unreachable("Cannot spill register class"); + } + + SIMachineFunctionInfo::SpilledReg Spill = + MFI->SpillTracker.getSpilledReg(FrameIndex); + + BuildMI(MBB, MI, DL, get(Opcode), DestReg) + .addReg(Spill.VGPR) + .addImm(FrameIndex); + insertNOPs(MI, 3); } else { - for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) { - unsigned Flags = RegState::Define; - if (i == 0) { - Flags |= RegState::Undef; - } - unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass); - loadRegFromStackSlot(MBB, MI, SubReg, FrameIndex + i, - &AMDGPU::SReg_32RegClass, TRI); - BuildMI(MBB, MI, DL, get(AMDGPU::COPY)) - .addReg(DestReg, Flags, RI.getSubRegFromChannel(i)) - .addReg(SubReg); + llvm_unreachable("VGPR spilling not supported"); + } +} + +static unsigned getNumSubRegsForSpillOp(unsigned Op) { + + switch (Op) { + case AMDGPU::SI_SPILL_S512_SAVE: + case AMDGPU::SI_SPILL_S512_RESTORE: + return 16; + case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S256_RESTORE: + return 8; + case AMDGPU::SI_SPILL_S128_SAVE: + case AMDGPU::SI_SPILL_S128_RESTORE: + return 4; + case AMDGPU::SI_SPILL_S64_SAVE: + case AMDGPU::SI_SPILL_S64_RESTORE: + return 2; + default: llvm_unreachable("Invalid spill opcode"); + } +} + +void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI, + int Count) const { + while (Count > 0) { + int Arg; + if (Count >= 8) + Arg = 7; + else + Arg = Count - 1; + Count -= 8; + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP)) + .addImm(Arg); + } +} + +bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + SIMachineFunctionInfo *MFI = + MI->getParent()->getParent()->getInfo(); + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MBB.findDebugLoc(MI); + switch (MI->getOpcode()) { + default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); + + // SGPR register spill + case AMDGPU::SI_SPILL_S512_SAVE: + case AMDGPU::SI_SPILL_S256_SAVE: + case AMDGPU::SI_SPILL_S128_SAVE: + case AMDGPU::SI_SPILL_S64_SAVE: { + unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); + unsigned FrameIndex = MI->getOperand(2).getImm(); + + for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { + SIMachineFunctionInfo::SpilledReg Spill; + unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(1).getReg(), + &AMDGPU::SGPR_32RegClass, i); + Spill = MFI->SpillTracker.getSpilledReg(FrameIndex); + + BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), + MI->getOperand(0).getReg()) + .addReg(SubReg) + .addImm(Spill.Lane + i); + } + MI->eraseFromParent(); + break; + } + + // SGPR register restore + case AMDGPU::SI_SPILL_S512_RESTORE: + case AMDGPU::SI_SPILL_S256_RESTORE: + case AMDGPU::SI_SPILL_S128_RESTORE: + case AMDGPU::SI_SPILL_S64_RESTORE: { + unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); + + for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { + SIMachineFunctionInfo::SpilledReg Spill; + unsigned FrameIndex = MI->getOperand(2).getImm(); + unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(0).getReg(), + &AMDGPU::SGPR_32RegClass, i); + Spill = MFI->SpillTracker.getSpilledReg(FrameIndex); + + BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), SubReg) + .addReg(MI->getOperand(1).getReg()) + .addImm(Spill.Lane + i); } + MI->eraseFromParent(); + break; } + } + return true; } MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, @@ -247,18 +360,18 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg()) - return 0; + return nullptr; // Cannot commute VOP2 if src0 is SGPR. if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() && RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg()))) - return 0; + return nullptr; if (!MI->getOperand(2).isReg()) { // XXX: Commute instructions with FPImm operands if (NewMI || MI->getOperand(2).isFPImm() || (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) { - return 0; + return nullptr; } // XXX: Commute VOP3 instructions with abs and neg set. @@ -267,7 +380,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI, AMDGPU::OpName::abs)).getImm() || MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::neg)).getImm())) - return 0; + return nullptr; unsigned Reg = MI->getOperand(1).getReg(); unsigned SubReg = MI->getOperand(1).getSubReg(); @@ -516,6 +629,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE; case AMDGPU::COPY: return AMDGPU::COPY; case AMDGPU::PHI: return AMDGPU::PHI; + case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG; case AMDGPU::S_MOV_B32: return MI.getOperand(1).isReg() ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; @@ -536,6 +650,23 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64; case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32; case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64; + case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32; + case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32; + case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32; + case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32; + case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; + case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32; + case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32; + case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32; + case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32; + case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32; + case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32; + case AMDGPU::S_LOAD_DWORD_IMM: + case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64; + case AMDGPU::S_LOAD_DWORDX2_IMM: + case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; + case AMDGPU::S_LOAD_DWORDX4_IMM: + case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; } } @@ -559,6 +690,8 @@ bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const { switch (MI.getOpcode()) { case AMDGPU::COPY: case AMDGPU::REG_SEQUENCE: + case AMDGPU::PHI: + case AMDGPU::INSERT_SUBREG: return RI.hasVGPRs(getOpRegClass(MI, 0)); default: return RI.hasVGPRs(getOpRegClass(MI, OpNo)); @@ -737,11 +870,12 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { } } - // Legalize REG_SEQUENCE + // Legalize REG_SEQUENCE and PHI // The register class of the operands much be the same type as the register // class of the output. - if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) { - const TargetRegisterClass *RC = NULL, *SRC = NULL, *VRC = NULL; + if (MI->getOpcode() == AMDGPU::REG_SEQUENCE || + MI->getOpcode() == AMDGPU::PHI) { + const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr; for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) { if (!MI->getOperand(i).isReg() || !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) @@ -774,13 +908,40 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg())) continue; unsigned DstReg = MRI.createVirtualRegister(RC); - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + MachineBasicBlock *InsertBB; + MachineBasicBlock::iterator Insert; + if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) { + InsertBB = MI->getParent(); + Insert = MI; + } else { + // MI is a PHI instruction. + InsertBB = MI->getOperand(i + 1).getMBB(); + Insert = InsertBB->getFirstTerminator(); + } + BuildMI(*InsertBB, Insert, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg) .addOperand(MI->getOperand(i)); MI->getOperand(i).setReg(DstReg); } } + // Legalize INSERT_SUBREG + // src0 must have the same register class as dst + if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) { + unsigned Dst = MI->getOperand(0).getReg(); + unsigned Src0 = MI->getOperand(1).getReg(); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); + const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0); + if (DstRC != Src0RC) { + MachineBasicBlock &MBB = *MI->getParent(); + unsigned NewSrc0 = MRI.createVirtualRegister(DstRC); + BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0) + .addReg(Src0); + MI->getOperand(1).setReg(NewSrc0); + } + return; + } + // Legalize MUBUF* instructions // FIXME: If we start using the non-addr64 instructions for compute, we // may need to legalize them here. @@ -886,6 +1047,72 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const { } } +void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const { + MachineBasicBlock *MBB = MI->getParent(); + switch (MI->getOpcode()) { + case AMDGPU::S_LOAD_DWORD_IMM: + case AMDGPU::S_LOAD_DWORD_SGPR: + case AMDGPU::S_LOAD_DWORDX2_IMM: + case AMDGPU::S_LOAD_DWORDX2_SGPR: + case AMDGPU::S_LOAD_DWORDX4_IMM: + case AMDGPU::S_LOAD_DWORDX4_SGPR: + unsigned NewOpcode = getVALUOp(*MI); + unsigned RegOffset; + unsigned ImmOffset; + + if (MI->getOperand(2).isReg()) { + RegOffset = MI->getOperand(2).getReg(); + ImmOffset = 0; + } else { + assert(MI->getOperand(2).isImm()); + // SMRD instructions take a dword offsets and MUBUF instructions + // take a byte offset. + ImmOffset = MI->getOperand(2).getImm() << 2; + RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + if (isUInt<12>(ImmOffset)) { + BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), + RegOffset) + .addImm(0); + } else { + BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), + RegOffset) + .addImm(ImmOffset); + ImmOffset = 0; + } + } + + unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass); + unsigned DWord0 = RegOffset; + unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); + + BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1) + .addImm(0); + BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2) + .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF); + BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3) + .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); + BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc) + .addReg(DWord0) + .addImm(AMDGPU::sub0) + .addReg(DWord1) + .addImm(AMDGPU::sub1) + .addReg(DWord2) + .addImm(AMDGPU::sub2) + .addReg(DWord3) + .addImm(AMDGPU::sub3); + MI->setDesc(get(NewOpcode)); + if (MI->getOperand(2).isReg()) { + MI->getOperand(2).setReg(MI->getOperand(1).getReg()); + } else { + MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false); + } + MI->getOperand(1).setReg(SRsrc); + MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset)); + } +} + void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { SmallVector Worklist; Worklist.push_back(&TopInst); @@ -895,8 +1122,16 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { MachineBasicBlock *MBB = Inst->getParent(); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned Opcode = Inst->getOpcode(); + unsigned NewOpcode = getVALUOp(*Inst); + // Handle some special cases - switch(Inst->getOpcode()) { + switch (Opcode) { + default: + if (isSMRD(Inst->getOpcode())) { + moveSMRDToVALU(Inst, MRI); + } + break; case AMDGPU::S_MOV_B64: { DebugLoc DL = Inst->getDebugLoc(); @@ -947,7 +1182,6 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { llvm_unreachable("Moving this op to VALU not implemented"); } - unsigned NewOpcode = getVALUOp(*Inst); if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { // We cannot move this instruction to the VALU, so we should try to // legalize its operands instead. @@ -968,27 +1202,52 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { Inst->RemoveOperand(i); } - // Add the implict and explicit register definitions. - if (NewDesc.ImplicitUses) { - for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) { - unsigned Reg = NewDesc.ImplicitUses[i]; - Inst->addOperand(MachineOperand::CreateReg(Reg, false, true)); - } + if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { + // We are converting these to a BFE, so we need to add the missing + // operands for the size and offset. + unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16; + Inst->addOperand(Inst->getOperand(1)); + Inst->getOperand(1).ChangeToImmediate(0); + Inst->addOperand(MachineOperand::CreateImm(0)); + Inst->addOperand(MachineOperand::CreateImm(0)); + Inst->addOperand(MachineOperand::CreateImm(0)); + Inst->addOperand(MachineOperand::CreateImm(Size)); + + // XXX - Other pointless operands. There are 4, but it seems you only need + // 3 to not hit an assertion later in MCInstLower. + Inst->addOperand(MachineOperand::CreateImm(0)); + Inst->addOperand(MachineOperand::CreateImm(0)); } - if (NewDesc.ImplicitDefs) { - for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) { - unsigned Reg = NewDesc.ImplicitDefs[i]; - Inst->addOperand(MachineOperand::CreateReg(Reg, true, true)); - } + addDescImplicitUseDef(NewDesc, Inst); + + if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) { + const MachineOperand &OffsetWidthOp = Inst->getOperand(2); + // If we need to move this to VGPRs, we need to unpack the second operand + // back into the 2 separate ones for bit offset and width. + assert(OffsetWidthOp.isImm() && + "Scalar BFE is only implemented for constant width and offset"); + uint32_t Imm = OffsetWidthOp.getImm(); + + uint32_t Offset = Imm & 0x3f; // Extract bits [5:0]. + uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16]. + + Inst->RemoveOperand(2); // Remove old immediate. + Inst->addOperand(Inst->getOperand(1)); + Inst->getOperand(1).ChangeToImmediate(0); + Inst->addOperand(MachineOperand::CreateImm(0)); + Inst->addOperand(MachineOperand::CreateImm(Offset)); + Inst->addOperand(MachineOperand::CreateImm(0)); + Inst->addOperand(MachineOperand::CreateImm(BitWidth)); + Inst->addOperand(MachineOperand::CreateImm(0)); + Inst->addOperand(MachineOperand::CreateImm(0)); } - legalizeOperands(Inst); - // Update the destination register class. + const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0); - switch (Inst->getOpcode()) { + switch (Opcode) { // For target instructions, getOpRegClass just returns the virtual // register class associated with the operand, so we need to find an // equivalent VGPR register class in order to move the instruction to the @@ -996,6 +1255,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { case AMDGPU::COPY: case AMDGPU::PHI: case AMDGPU::REG_SEQUENCE: + case AMDGPU::INSERT_SUBREG: if (RI.hasVGPRs(NewDstRC)) continue; NewDstRC = RI.getEquivalentVGPRClass(NewDstRC); @@ -1010,6 +1270,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC); MRI.replaceRegWith(DstReg, NewDstReg); + // Legalize the operands + legalizeOperands(Inst); + for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg), E = MRI.use_end(); I != E; ++I) { MachineInstr &UseMI = *I->getParent(); @@ -1097,6 +1360,24 @@ void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl &Worklist, Worklist.push_back(HiHalf); } +void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc, + MachineInstr *Inst) const { + // Add the implict and explicit register definitions. + if (NewDesc.ImplicitUses) { + for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) { + unsigned Reg = NewDesc.ImplicitUses[i]; + Inst->addOperand(MachineOperand::CreateReg(Reg, false, true)); + } + } + + if (NewDesc.ImplicitDefs) { + for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) { + unsigned Reg = NewDesc.ImplicitDefs[i]; + Inst->addOperand(MachineOperand::CreateReg(Reg, true, true)); + } + } +} + MachineInstrBuilder SIInstrInfo::buildIndirectWrite( MachineBasicBlock *MBB, MachineBasicBlock::iterator I, diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index c537038..7b31a81 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -47,49 +47,52 @@ private: void splitScalar64BitOp(SmallVectorImpl & Worklist, MachineInstr *Inst, unsigned Opcode) const; + void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const; public: explicit SIInstrInfo(AMDGPUTargetMachine &tm); - const SIRegisterInfo &getRegisterInfo() const { + const SIRegisterInfo &getRegisterInfo() const override { return RI; } - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo *TRI) const override; void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo *TRI) const override; + + virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; unsigned commuteOpcode(unsigned Opcode) const; - virtual MachineInstr *commuteInstruction(MachineInstr *MI, - bool NewMI=false) const; + MachineInstr *commuteInstruction(MachineInstr *MI, + bool NewMI=false) const override; bool isTriviallyReMaterializable(const MachineInstr *MI, - AliasAnalysis *AA = 0) const; + AliasAnalysis *AA = nullptr) const; - virtual unsigned getIEQOpcode() const { + unsigned getIEQOpcode() const override { llvm_unreachable("Unimplemented"); } MachineInstr *buildMovInstr(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, - unsigned DstReg, unsigned SrcReg) const; - virtual bool isMov(unsigned Opcode) const; + unsigned DstReg, unsigned SrcReg) const override; + bool isMov(unsigned Opcode) const override; - virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const; + bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override; bool isDS(uint16_t Opcode) const; int isMIMG(uint16_t Opcode) const; int isSMRD(uint16_t Opcode) const; @@ -101,8 +104,8 @@ public: bool isInlineConstant(const MachineOperand &MO) const; bool isLiteralConstant(const MachineOperand &MO) const; - virtual bool verifyInstruction(const MachineInstr *MI, - StringRef &ErrInfo) const; + bool verifyInstruction(const MachineInstr *MI, + StringRef &ErrInfo) const override; bool isSALUInstr(const MachineInstr &MI) const; static unsigned getVALUOp(const MachineInstr &MI); @@ -136,32 +139,36 @@ public: /// create new instruction and insert them before \p MI. void legalizeOperands(MachineInstr *MI) const; + void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const; + /// \brief Replace this instruction's opcode with the equivalent VALU /// opcode. This function will also move the users of \p MI to the /// VALU if necessary. void moveToVALU(MachineInstr &MI) const; - virtual unsigned calculateIndirectAddress(unsigned RegIndex, - unsigned Channel) const; + unsigned calculateIndirectAddress(unsigned RegIndex, + unsigned Channel) const override; - virtual const TargetRegisterClass *getIndirectAddrRegClass() const; + const TargetRegisterClass *getIndirectAddrRegClass() const override; - virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB, - MachineBasicBlock::iterator I, - unsigned ValueReg, - unsigned Address, - unsigned OffsetReg) const; + MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, + unsigned Address, + unsigned OffsetReg) const override; - virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB, - MachineBasicBlock::iterator I, - unsigned ValueReg, - unsigned Address, - unsigned OffsetReg) const; + MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, + unsigned Address, + unsigned OffsetReg) const override; void reserveIndirectRegisters(BitVector &Reserved, const MachineFunction &MF) const; void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I, unsigned SavReg, unsigned IndexReg) const; + + void insertNOPs(MachineBasicBlock::iterator MI, int Count) const; }; namespace AMDGPU { @@ -169,6 +176,7 @@ namespace AMDGPU { int getVOPe64(uint16_t Opcode); int getCommuteRev(uint16_t Opcode); int getCommuteOrig(uint16_t Opcode); + int getMCOpcode(uint16_t Opcode, unsigned Gen); const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index e05ab65..2242e6d 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -7,23 +7,25 @@ // //===----------------------------------------------------------------------===// +// Execpt for the NONE field, this must be kept in sync with the SISubtarget enum +// in AMDGPUMCInstLower.h +def SISubtarget { + int NONE = -1; + int SI = 0; +} + //===----------------------------------------------------------------------===// // SI DAG Nodes //===----------------------------------------------------------------------===// -// SMRD takes a 64bit memory address and can only add an 32bit offset -def SIadd64bit32bit : SDNode<"ISD::ADD", - SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVT<0, i64>, SDTCisVT<2, i32>]> ->; - def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT", - SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, i128>, SDTCisVT<2, i32>]>, + SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>, [SDNPMayLoad, SDNPMemOperand] >; def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", SDTypeProfile<0, 13, - [SDTCisVT<0, i128>, // rsrc(SGPR) + [SDTCisVT<0, v4i32>, // rsrc(SGPR) SDTCisVT<1, iAny>, // vdata(VGPR) SDTCisVT<2, i32>, // num_channels(imm) SDTCisVT<3, i32>, // vaddr(VGPR) @@ -41,13 +43,13 @@ def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", >; def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT", - SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, i16>, + SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i16>, SDTCisVT<3, i32>]> >; class SDSample : SDNode , SDTCisVT<2, v32i8>, - SDTCisVT<3, i128>, SDTCisVT<4, i32>]> + SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]> >; def SIsample : SDSample<"AMDGPUISD::SAMPLE">; @@ -111,14 +113,17 @@ def IMM16bit : PatLeaf <(imm), [{return isUInt<16>(N->getZExtValue());}] >; +def IMM32bit : PatLeaf <(imm), + [{return isUInt<32>(N->getZExtValue());}] +>; + def mubuf_vaddr_offset : PatFrag< (ops node:$ptr, node:$offset, node:$imm_offset), (add (add node:$ptr, node:$offset), node:$imm_offset) >; class InlineImm : PatLeaf <(vt imm), [{ - return - (*(const SITargetLowering *)getTargetLowering()).analyzeImmediate(N) == 0; + return isInlineImmediate(N); }]>; class SGPRImm : PatLeaf : PatLeaf; def FRAMEri32 : Operand { - let MIOperandInfo = (ops SReg_32:$ptr, i32imm:$index); + let MIOperandInfo = (ops i32:$ptr, i32imm:$index); } //===----------------------------------------------------------------------===// @@ -197,15 +202,17 @@ class SOP2_SHIFT_64 op, string opName, list pattern> : SOP2 < opName#" $dst, $src0, $src1", pattern >; -class SOPC_32 op, string opName, list pattern> : SOPC < - op, (outs SCCReg:$dst), (ins SSrc_32:$src0, SSrc_32:$src1), - opName#" $dst, $src0, $src1", pattern ->; -class SOPC_64 op, string opName, list pattern> : SOPC < - op, (outs SCCReg:$dst), (ins SSrc_64:$src0, SSrc_64:$src1), - opName#" $dst, $src0, $src1", pattern ->; +class SOPC_Helper op, RegisterClass rc, ValueType vt, + string opName, PatLeaf cond> : SOPC < + op, (outs SCCReg:$dst), (ins rc:$src0, rc:$src1), + opName#" $dst, $src0, $src1", []>; + +class SOPC_32 op, string opName, PatLeaf cond = COND_NULL> + : SOPC_Helper; + +class SOPC_64 op, string opName, PatLeaf cond = COND_NULL> + : SOPC_Helper; class SOPK_32 op, string opName, list pattern> : SOPK < op, (outs SReg_32:$dst), (ins i16imm:$src0), @@ -221,7 +228,7 @@ multiclass SMRD_Helper op, string asm, RegisterClass baseClass, RegisterClass dstClass> { def _IMM : SMRD < op, 1, (outs dstClass:$dst), - (ins baseClass:$sbase, i32imm:$offset), + (ins baseClass:$sbase, u32imm:$offset), asm#" $dst, $sbase, $offset", [] >; @@ -245,6 +252,28 @@ class VOP2_REV { bit IsOrig = isOrig; } +class SIMCInstr { + string PseudoInstr = pseudo; + int Subtarget = subtarget; +} + +multiclass VOP3_m op, dag outs, dag ins, string asm, list pattern, + string opName> { + + def "" : InstSI , VOP , + SIMCInstr { + let isPseudo = 1; + } + + def _si : VOP3 , SIMCInstr; + +} + +// This must always be right before the operand being input modified. +def InputMods : OperandWithDefaultOps { + let PrintMethod = "printOperandAndMods"; +} + multiclass VOP1_Helper op, RegisterClass drc, RegisterClass src, string opName, list pattern> { @@ -256,10 +285,8 @@ multiclass VOP1_Helper op, RegisterClass drc, RegisterClass src, def _e64 : VOP3 < {1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, (outs drc:$dst), - (ins src:$src0, - i32imm:$abs, i32imm:$clamp, - i32imm:$omod, i32imm:$neg), - opName#"_e64 $dst, $src0, $abs, $clamp, $omod, $neg", [] + (ins InputMods:$src0_modifiers, src:$src0, i32imm:$clamp, i32imm:$omod), + opName#"_e64 $dst, $src0_modifiers, $clamp, $omod", [] >, VOP { let src1 = SIOperand.ZERO; let src2 = SIOperand.ZERO; @@ -288,10 +315,10 @@ multiclass VOP2_Helper op, RegisterClass vrc, RegisterClass arc, def _e64 : VOP3 < {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, (outs vrc:$dst), - (ins arc:$src0, arc:$src1, - i32imm:$abs, i32imm:$clamp, - i32imm:$omod, i32imm:$neg), - opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] + (ins InputMods:$src0_modifiers, arc:$src0, + InputMods:$src1_modifiers, arc:$src1, + i32imm:$clamp, i32imm:$omod), + opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", [] >, VOP , VOP2_REV { let src2 = SIOperand.ZERO; } @@ -316,10 +343,10 @@ multiclass VOP2b_32 op, string opName, list pattern, def _e64 : VOP3b < {1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, (outs VReg_32:$dst), - (ins VSrc_32:$src0, VSrc_32:$src1, - i32imm:$abs, i32imm:$clamp, - i32imm:$omod, i32imm:$neg), - opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", [] + (ins InputMods: $src0_modifiers, VSrc_32:$src0, + InputMods:$src1_modifiers, VSrc_32:$src1, + i32imm:$clamp, i32imm:$omod), + opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", [] >, VOP , VOP2_REV { let src2 = SIOperand.ZERO; /* the VOP2 variant puts the carry out into VCC, the VOP3 variant @@ -340,15 +367,16 @@ multiclass VOPC_Helper op, RegisterClass vrc, RegisterClass arc, def _e64 : VOP3 < {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, (outs SReg_64:$dst), - (ins arc:$src0, arc:$src1, - InstFlag:$abs, InstFlag:$clamp, - InstFlag:$omod, InstFlag:$neg), - opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", + (ins InputMods:$src0_modifiers, arc:$src0, + InputMods:$src1_modifiers, arc:$src1, + InstFlag:$clamp, InstFlag:$omod), + opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", !if(!eq(!cast(cond), "COND_NULL"), [], [(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))] ) >, VOP { let src2 = SIOperand.ZERO; + let src2_modifiers = 0; } } @@ -360,12 +388,13 @@ multiclass VOPC_64 op, string opName, ValueType vt = untyped, PatLeaf cond = COND_NULL> : VOPC_Helper ; -class VOP3_32 op, string opName, list pattern> : VOP3 < +multiclass VOP3_32 op, string opName, list pattern> : VOP3_m < op, (outs VReg_32:$dst), - (ins VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2, - InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), - opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern ->, VOP ; + (ins InputMods: $src0_modifiers, VSrc_32:$src0, InputMods:$src1_modifiers, + VSrc_32:$src1, InputMods:$src2_modifiers, VSrc_32:$src2, + InstFlag:$clamp, InstFlag:$omod), + opName#" $dst, $src0_modifiers, $src1, $src2, $clamp, $omod", pattern, opName +>; class VOP3_64_Shift op, string opName, list pattern> : VOP3 < op, (outs VReg_64:$dst), @@ -374,10 +403,9 @@ class VOP3_64_Shift op, string opName, list pattern> : VOP3 < >, VOP { let src2 = SIOperand.ZERO; - let abs = 0; + let src0_modifiers = 0; let clamp = 0; let omod = 0; - let neg = 0; } class VOP3_64 op, string opName, list pattern> : VOP3 < @@ -403,7 +431,7 @@ class DS_1A op, dag outs, dag ins, string asm, list pat> : class DS_Load_Helper op, string asm, RegisterClass regClass> : DS_1A < op, (outs regClass:$vdst), - (ins i1imm:$gds, VReg_32:$addr, i16imm:$offset), + (ins i1imm:$gds, VReg_32:$addr, u16imm:$offset), asm#" $vdst, $addr, $offset, [M0]", []> { let data0 = 0; @@ -415,7 +443,7 @@ class DS_Load_Helper op, string asm, RegisterClass regClass> : DS_1A < class DS_Load2_Helper op, string asm, RegisterClass regClass> : DS < op, (outs regClass:$vdst), - (ins i1imm:$gds, VReg_32:$addr, i8imm:$offset0, i8imm:$offset1), + (ins i1imm:$gds, VReg_32:$addr, u8imm:$offset0, u8imm:$offset1), asm#" $gds, $vdst, $addr, $offset0, $offset1, [M0]", []> { let data0 = 0; @@ -427,7 +455,7 @@ class DS_Load2_Helper op, string asm, RegisterClass regClass> : DS < class DS_Store_Helper op, string asm, RegisterClass regClass> : DS_1A < op, (outs), - (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, i16imm:$offset), + (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, u16imm:$offset), asm#" $addr, $data0, $offset [M0]", []> { let data1 = 0; @@ -439,7 +467,7 @@ class DS_Store_Helper op, string asm, RegisterClass regClass> : DS_1A < class DS_Store2_Helper op, string asm, RegisterClass regClass> : DS_1A < op, (outs), - (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, i8imm:$offset0, i8imm:$offset1), + (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, u8imm:$offset0, u8imm:$offset1), asm#" $addr, $data0, $data1, $offset0, $offset1 [M0]", []> { let mayStore = 1; @@ -450,7 +478,7 @@ class DS_Store2_Helper op, string asm, RegisterClass regClass> : DS_1A class DS_1A1D_RET op, string asm, RegisterClass rc> : DS_1A < op, (outs rc:$vdst), - (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, i16imm:$offset), + (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, u16imm:$offset), asm#" $vdst, $addr, $data0, $offset, [M0]", []> { @@ -462,7 +490,7 @@ class DS_1A1D_RET op, string asm, RegisterClass rc> : DS_1A < class MTBUF_Store_Helper op, string asm, RegisterClass regClass> : MTBUF < op, (outs), - (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, + (ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt," @@ -481,7 +509,7 @@ multiclass MUBUF_Load_Helper op, string asm, RegisterClass regClass> { let offen = 0, idxen = 0 in { def _OFFSET : MUBUF ; } @@ -497,7 +525,7 @@ multiclass MUBUF_Load_Helper op, string asm, RegisterClass regClass> { let offen = 0, idxen = 1 in { def _IDXEN : MUBUF ; } @@ -513,7 +541,7 @@ multiclass MUBUF_Load_Helper op, string asm, RegisterClass regClass> { let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in { def _ADDR64 : MUBUF ; } } @@ -521,7 +549,7 @@ multiclass MUBUF_Load_Helper op, string asm, RegisterClass regClass> { class MUBUF_Store_Helper op, string name, RegisterClass vdataClass> : MUBUF { @@ -542,7 +570,7 @@ class MUBUF_Store_Helper op, string name, RegisterClass vdataClass> : class MTBUF_Load_Helper op, string asm, RegisterClass regClass> : MTBUF < op, (outs regClass:$dst), - (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, + (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset), asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt," @@ -677,4 +705,12 @@ def isDS : InstrMapping { let ValueCols = [["8"]]; } +def getMCOpcode : InstrMapping { + let FilterClass = "SIMCInstr"; + let RowFields = ["PseudoInstr"]; + let ColFields = ["Subtarget"]; + let KeyCol = [!cast(SISubtarget.NONE)]; + let ValueCols = [[!cast(SISubtarget.SI)]]; +} + include "SIInstructions.td" diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 5232139..500fa78 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -32,9 +32,56 @@ def isSI : Predicate<"Subtarget.getGeneration() " def isCI : Predicate<"Subtarget.getGeneration() " ">= AMDGPUSubtarget::SEA_ISLANDS">; +def isCFDepth0 : Predicate<"isCFDepth0()">; + def WAIT_FLAG : InstFlag<"printWaitFlag">; -let Predicates = [isSI] in { +let SubtargetPredicate = isSI in { +let OtherPredicates = [isCFDepth0] in { + +//===----------------------------------------------------------------------===// +// SMRD Instructions +//===----------------------------------------------------------------------===// + +let mayLoad = 1 in { + +// We are using the SGPR_32 and not the SReg_32 register class for 32-bit +// SMRD instructions, because the SGPR_32 register class does not include M0 +// and writing to M0 from an SMRD instruction will hang the GPU. +defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SGPR_32>; +defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>; +defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>; +defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>; +defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>; + +defm S_BUFFER_LOAD_DWORD : SMRD_Helper < + 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SGPR_32 +>; + +defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper < + 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64 +>; + +defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper < + 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128 +>; + +defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper < + 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256 +>; + +defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < + 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512 +>; + +} // mayLoad = 1 + +//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; +//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>; + +//===----------------------------------------------------------------------===// +// SOP1 Instructions +//===----------------------------------------------------------------------===// let neverHasSideEffects = 1 in { @@ -45,7 +92,10 @@ def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>; def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>; } // End isMoveImm = 1 -def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>; +def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", + [(set i32:$dst, (not i32:$src0))] +>; + def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>; def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>; def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>; @@ -65,8 +115,13 @@ def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>; //def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>; def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>; //def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>; -//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>; -//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>; +def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", + [(set i32:$dst, (sext_inreg i32:$src0, i8))] +>; +def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", + [(set i32:$dst, (sext_inreg i32:$src0, i16))] +>; + ////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>; ////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>; ////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>; @@ -99,6 +154,150 @@ def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>; def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>; def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>; def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>; + +//===----------------------------------------------------------------------===// +// SOP2 Instructions +//===----------------------------------------------------------------------===// + +let Defs = [SCC] in { // Carry out goes to SCC +let isCommutable = 1 in { +def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>; +def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", + [(set i32:$dst, (add SSrc_32:$src0, SSrc_32:$src1))] +>; +} // End isCommutable = 1 + +def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>; +def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", + [(set i32:$dst, (sub SSrc_32:$src0, SSrc_32:$src1))] +>; + +let Uses = [SCC] in { // Carry in comes from SCC +let isCommutable = 1 in { +def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", + [(set i32:$dst, (adde (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>; +} // End isCommutable = 1 + +def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", + [(set i32:$dst, (sube (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>; +} // End Uses = [SCC] +} // End Defs = [SCC] + +def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", + [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))] +>; +def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", + [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))] +>; +def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", + [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))] +>; +def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", + [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))] +>; + +def S_CSELECT_B32 : SOP2 < + 0x0000000a, (outs SReg_32:$dst), + (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32", + [] +>; + +def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>; + +def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", + [(set i32:$dst, (and i32:$src0, i32:$src1))] +>; + +def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64", + [(set i64:$dst, (and i64:$src0, i64:$src1))] +>; + +def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", + [(set i32:$dst, (or i32:$src0, i32:$src1))] +>; + +def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", + [(set i64:$dst, (or i64:$src0, i64:$src1))] +>; + +def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", + [(set i32:$dst, (xor i32:$src0, i32:$src1))] +>; + +def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", + [(set i64:$dst, (xor i64:$src0, i64:$src1))] +>; +def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>; +def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>; +def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>; +def S_ORN2_B64 : SOP2_64 <0x00000017, "S_ORN2_B64", []>; +def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>; +def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>; +def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>; +def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>; +def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>; +def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>; + +// Use added complexity so these patterns are preferred to the VALU patterns. +let AddedComplexity = 1 in { + +def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", + [(set i32:$dst, (shl i32:$src0, i32:$src1))] +>; +def S_LSHL_B64 : SOP2_SHIFT_64 <0x0000001f, "S_LSHL_B64", + [(set i64:$dst, (shl i64:$src0, i32:$src1))] +>; +def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", + [(set i32:$dst, (srl i32:$src0, i32:$src1))] +>; +def S_LSHR_B64 : SOP2_SHIFT_64 <0x00000021, "S_LSHR_B64", + [(set i64:$dst, (srl i64:$src0, i32:$src1))] +>; +def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", + [(set i32:$dst, (sra i32:$src0, i32:$src1))] +>; +def S_ASHR_I64 : SOP2_SHIFT_64 <0x00000023, "S_ASHR_I64", + [(set i64:$dst, (sra i64:$src0, i32:$src1))] +>; + +} // End AddedComplexity = 1 + +def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>; +def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>; +def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>; +def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>; +def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>; +def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>; +def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>; +//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>; +def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>; + +//===----------------------------------------------------------------------===// +// SOPC Instructions +//===----------------------------------------------------------------------===// + +def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32">; +def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32">; +def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32">; +def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32">; +def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32">; +def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32">; +def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32">; +def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32">; +def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32">; +def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32">; +def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32">; +def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32">; +////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>; +////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>; +////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>; +////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>; +//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>; + +//===----------------------------------------------------------------------===// +// SOPK Instructions +//===----------------------------------------------------------------------===// + def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>; def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>; @@ -147,6 +346,108 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>; //def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>; //def EXP : EXP_ <0x00000000, "EXP", []>; +} // End let OtherPredicates = [isCFDepth0] + +//===----------------------------------------------------------------------===// +// SOPP Instructions +//===----------------------------------------------------------------------===// + +def S_NOP : SOPP <0x00000000, (ins i16imm:$SIMM16), "S_NOP $SIMM16", []>; + +let isTerminator = 1 in { + +def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM", + [(IL_retflag)]> { + let SIMM16 = 0; + let isBarrier = 1; + let hasCtrlDep = 1; +} + +let isBranch = 1 in { +def S_BRANCH : SOPP < + 0x00000002, (ins brtarget:$target), "S_BRANCH $target", + [(br bb:$target)]> { + let isBarrier = 1; +} + +let DisableEncoding = "$scc" in { +def S_CBRANCH_SCC0 : SOPP < + 0x00000004, (ins brtarget:$target, SCCReg:$scc), + "S_CBRANCH_SCC0 $target", [] +>; +def S_CBRANCH_SCC1 : SOPP < + 0x00000005, (ins brtarget:$target, SCCReg:$scc), + "S_CBRANCH_SCC1 $target", + [] +>; +} // End DisableEncoding = "$scc" + +def S_CBRANCH_VCCZ : SOPP < + 0x00000006, (ins brtarget:$target, VCCReg:$vcc), + "S_CBRANCH_VCCZ $target", + [] +>; +def S_CBRANCH_VCCNZ : SOPP < + 0x00000007, (ins brtarget:$target, VCCReg:$vcc), + "S_CBRANCH_VCCNZ $target", + [] +>; + +let DisableEncoding = "$exec" in { +def S_CBRANCH_EXECZ : SOPP < + 0x00000008, (ins brtarget:$target, EXECReg:$exec), + "S_CBRANCH_EXECZ $target", + [] +>; +def S_CBRANCH_EXECNZ : SOPP < + 0x00000009, (ins brtarget:$target, EXECReg:$exec), + "S_CBRANCH_EXECNZ $target", + [] +>; +} // End DisableEncoding = "$exec" + + +} // End isBranch = 1 +} // End isTerminator = 1 + +let hasSideEffects = 1 in { +def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER", + [(int_AMDGPU_barrier_local)] +> { + let SIMM16 = 0; + let isBarrier = 1; + let hasCtrlDep = 1; + let mayLoad = 1; + let mayStore = 1; +} + +def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16", + [] +>; +//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>; +//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>; +//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>; + +let Uses = [EXEC] in { + def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16", + [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)] + > { + let DisableEncoding = "$m0"; + } +} // End Uses = [EXEC] + +//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>; +//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>; +//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>; +//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>; +//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>; +//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>; +} // End hasSideEffects + +//===----------------------------------------------------------------------===// +// VOPC Instructions +//===----------------------------------------------------------------------===// + let isCompare = 1 in { defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32">; @@ -403,6 +704,10 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">; } // End isCompare = 1 +//===----------------------------------------------------------------------===// +// DS Instructions +//===----------------------------------------------------------------------===// + def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>; def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>; def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>; @@ -427,6 +732,9 @@ def DS_READ2_B64 : DS_Load2_Helper <0x00000075, "DS_READ2_B64", VReg_128>; // TODO: DS_READ2ST64_B32, DS_READ2ST64_B64, // DS_WRITE2ST64_B32, DS_WRITE2ST64_B64 +//===----------------------------------------------------------------------===// +// MUBUF Instructions +//===----------------------------------------------------------------------===// //def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>; //def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>; @@ -499,6 +807,11 @@ def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper < //def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>; //def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>; //def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>; + +//===----------------------------------------------------------------------===// +// MTBUF Instructions +//===----------------------------------------------------------------------===// + //def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>; //def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>; //def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>; @@ -508,41 +821,10 @@ def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "TBUFFER_STORE_FOR def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", VReg_128>; def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", VReg_128>; -let mayLoad = 1 in { - -// We are using the SGPR_32 and not the SReg_32 register class for 32-bit -// SMRD instructions, because the SGPR_32 register class does not include M0 -// and writing to M0 from an SMRD instruction will hang the GPU. -defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SGPR_32>; -defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>; -defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>; -defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>; -defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>; - -defm S_BUFFER_LOAD_DWORD : SMRD_Helper < - 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SGPR_32 ->; - -defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper < - 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64 ->; - -defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper < - 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128 ->; - -defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper < - 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256 ->; - -defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper < - 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512 ->; - -} // mayLoad = 1 +//===----------------------------------------------------------------------===// +// MIMG Instructions +//===----------------------------------------------------------------------===// -//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>; -//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>; defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "IMAGE_LOAD">; defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "IMAGE_LOAD_MIP">; //def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>; @@ -638,8 +920,12 @@ defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">; //def IMAGE_SAMPLE_C_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL_O", 0x0000006f>; //def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>; //def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>; -//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>; +//===----------------------------------------------------------------------===// +// VOP1 Instructions +//===----------------------------------------------------------------------===// + +//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>; let neverHasSideEffects = 1, isMoveImm = 1 in { defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>; @@ -691,8 +977,13 @@ defm V_CVT_F64_F32 : VOP1_64_32 <0x00000010, "V_CVT_F64_F32", //defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>; //defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>; //defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>; -//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>; -//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>; +defm V_CVT_U32_F64 : VOP1_32_64 <0x00000015, "V_CVT_U32_F64", + [(set i32:$dst, (fp_to_uint f64:$src0))] +>; +defm V_CVT_F64_U32 : VOP1_64_32 <0x00000016, "V_CVT_F64_U32", + [(set f64:$dst, (uint_to_fp i32:$src0))] +>; + defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32", [(set f32:$dst, (AMDGPUfract f32:$src0))] >; @@ -752,131 +1043,48 @@ defm V_FRACT_F64 : VOP1_64 <0x0000003e, "V_FRACT_F64", []>; //defm V_FREXP_EXP_I32_F32 : VOP1_32 <0x0000003f, "V_FREXP_EXP_I32_F32", []>; defm V_FREXP_MANT_F32 : VOP1_32 <0x00000040, "V_FREXP_MANT_F32", []>; //def V_CLREXCP : VOP1_ <0x00000041, "V_CLREXCP", []>; -defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>; -defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>; -defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>; - -def V_INTERP_P1_F32 : VINTRP < - 0x00000000, - (outs VReg_32:$dst), - (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), - "V_INTERP_P1_F32 $dst, $i, $attr_chan, $attr, [$m0]", - []> { - let DisableEncoding = "$m0"; -} - -def V_INTERP_P2_F32 : VINTRP < - 0x00000001, - (outs VReg_32:$dst), - (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), - "V_INTERP_P2_F32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]", - []> { - - let Constraints = "$src0 = $dst"; - let DisableEncoding = "$src0,$m0"; - -} - -def V_INTERP_MOV_F32 : VINTRP < - 0x00000002, - (outs VReg_32:$dst), - (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), - "V_INTERP_MOV_F32 $dst, $src0, $attr_chan, $attr, [$m0]", - []> { - let DisableEncoding = "$m0"; -} - -//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>; - -let isTerminator = 1 in { - -def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM", - [(IL_retflag)]> { - let SIMM16 = 0; - let isBarrier = 1; - let hasCtrlDep = 1; -} - -let isBranch = 1 in { -def S_BRANCH : SOPP < - 0x00000002, (ins brtarget:$target), "S_BRANCH $target", - [(br bb:$target)]> { - let isBarrier = 1; -} - -let DisableEncoding = "$scc" in { -def S_CBRANCH_SCC0 : SOPP < - 0x00000004, (ins brtarget:$target, SCCReg:$scc), - "S_CBRANCH_SCC0 $target", [] ->; -def S_CBRANCH_SCC1 : SOPP < - 0x00000005, (ins brtarget:$target, SCCReg:$scc), - "S_CBRANCH_SCC1 $target", - [] ->; -} // End DisableEncoding = "$scc" - -def S_CBRANCH_VCCZ : SOPP < - 0x00000006, (ins brtarget:$target, VCCReg:$vcc), - "S_CBRANCH_VCCZ $target", - [] ->; -def S_CBRANCH_VCCNZ : SOPP < - 0x00000007, (ins brtarget:$target, VCCReg:$vcc), - "S_CBRANCH_VCCNZ $target", - [] ->; - -let DisableEncoding = "$exec" in { -def S_CBRANCH_EXECZ : SOPP < - 0x00000008, (ins brtarget:$target, EXECReg:$exec), - "S_CBRANCH_EXECZ $target", - [] ->; -def S_CBRANCH_EXECNZ : SOPP < - 0x00000009, (ins brtarget:$target, EXECReg:$exec), - "S_CBRANCH_EXECNZ $target", - [] ->; -} // End DisableEncoding = "$exec" +defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>; +defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>; +defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>; -} // End isBranch = 1 -} // End isTerminator = 1 +//===----------------------------------------------------------------------===// +// VINTRP Instructions +//===----------------------------------------------------------------------===// -let hasSideEffects = 1 in { -def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER", - [(int_AMDGPU_barrier_local)] -> { - let SIMM16 = 0; - let isBarrier = 1; - let hasCtrlDep = 1; - let mayLoad = 1; - let mayStore = 1; +def V_INTERP_P1_F32 : VINTRP < + 0x00000000, + (outs VReg_32:$dst), + (ins VReg_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), + "V_INTERP_P1_F32 $dst, $i, $attr_chan, $attr, [$m0]", + []> { + let DisableEncoding = "$m0"; } -def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16", - [] ->; -//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>; -//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>; -//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>; +def V_INTERP_P2_F32 : VINTRP < + 0x00000001, + (outs VReg_32:$dst), + (ins VReg_32:$src0, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), + "V_INTERP_P2_F32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]", + []> { -let Uses = [EXEC] in { - def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16", - [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)] - > { - let DisableEncoding = "$m0"; - } -} // End Uses = [EXEC] + let Constraints = "$src0 = $dst"; + let DisableEncoding = "$src0,$m0"; -//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>; -//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>; -//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>; -//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>; -//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>; -//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>; -} // End hasSideEffects +} + +def V_INTERP_MOV_F32 : VINTRP < + 0x00000002, + (outs VReg_32:$dst), + (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0), + "V_INTERP_MOV_F32 $dst, $src0, $attr_chan, $attr, [$m0]", + []> { + let DisableEncoding = "$m0"; +} + +//===----------------------------------------------------------------------===// +// VOP2 Instructions +//===----------------------------------------------------------------------===// def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst), (ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc), @@ -891,18 +1099,11 @@ def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst), InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), "V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", [(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))] ->; - -//f32 pattern for V_CNDMASK_B32_e64 -def : Pat < - (f32 (select i1:$src2, f32:$src1, f32:$src0)), - (V_CNDMASK_B32_e64 $src0, $src1, $src2) ->; - -def : Pat < - (i32 (trunc i64:$val)), - (EXTRACT_SUBREG $val, sub0) ->; +> { + let src0_modifiers = 0; + let src1_modifiers = 0; + let src2_modifiers = 0; +} def V_READLANE_B32 : VOP2 < 0x00000001, @@ -946,11 +1147,11 @@ defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32", defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24", - [(set i32:$dst, (mul I24:$src0, I24:$src1))] + [(set i32:$dst, (AMDGPUmul_i24 i32:$src0, i32:$src1))] >; //defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>; defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24", - [(set i32:$dst, (mul U24:$src0, U24:$src1))] + [(set i32:$dst, (AMDGPUmul_u24 i32:$src0, i32:$src1))] >; //defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>; @@ -965,27 +1166,43 @@ defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32", defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>; defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>; -defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>; -defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>; -defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>; -defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>; +defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", + [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]>; +defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", + [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]>; +defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", + [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]>; +defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", + [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]>; + +defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", + [(set i32:$dst, (srl i32:$src0, i32:$src1))] +>; -defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>; defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">; -defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>; +defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", + [(set i32:$dst, (sra i32:$src0, i32:$src1))] +>; defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">; let hasPostISelHook = 1 in { -defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>; +defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", + [(set i32:$dst, (shl i32:$src0, i32:$src1))] +>; } defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">; -defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", []>; -defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", []>; -defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", []>; +defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", + [(set i32:$dst, (and i32:$src0, i32:$src1))]>; +defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", + [(set i32:$dst, (or i32:$src0, i32:$src1))] +>; +defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", + [(set i32:$dst, (xor i32:$src0, i32:$src1))] +>; } // End isCommutable = 1 @@ -1001,14 +1218,18 @@ defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC // No patterns so that the scalar instructions are always selected. // The scalar versions will be replaced with vector when needed later. -defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [], VSrc_32>; -defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [], VSrc_32>; +defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", + [(set i32:$dst, (add i32:$src0, i32:$src1))], VSrc_32>; +defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", + [(set i32:$dst, (sub i32:$src0, i32:$src1))], VSrc_32>; defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], VSrc_32, "V_SUB_I32">; let Uses = [VCC] in { // Carry-in comes from VCC -defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", [], VReg_32>; -defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", [], VReg_32>; +defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", + [(set i32:$dst, (adde i32:$src0, i32:$src1))], VReg_32>; +defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", + [(set i32:$dst, (sube i32:$src0, i32:$src1))], VReg_32>; defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32, "V_SUBB_U32">; } // End Uses = [VCC] @@ -1023,274 +1244,127 @@ defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32", >; ////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>; ////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>; -def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>; -def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32", []>; -def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32", []>; -def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32", []>; -def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32", []>; -def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32", []>; -def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32", []>; -def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32", []>; -def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32", []>; -def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32", []>; -def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32", []>; -def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32", []>; -////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>; -////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>; -////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>; -////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>; -//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>; + +//===----------------------------------------------------------------------===// +// VOP3 Instructions +//===----------------------------------------------------------------------===// let neverHasSideEffects = 1 in { -def V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>; -def V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>; -def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", - [(set i32:$dst, (add (mul I24:$src0, I24:$src1), i32:$src2))] +defm V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>; +defm V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", + [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))] +>; +defm V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24", + [(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))] >; -def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", - [(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))] +defm V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24", + [(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))] >; } // End neverHasSideEffects -def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>; -def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>; -def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>; -def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>; + +defm V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>; +defm V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>; +defm V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>; +defm V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>; let neverHasSideEffects = 1, mayLoad = 0, mayStore = 0 in { -def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", +defm V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", [(set i32:$dst, (AMDGPUbfe_u32 i32:$src0, i32:$src1, i32:$src2))]>; -def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", +defm V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", [(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))]>; } -def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", +defm V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", [(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))]>; -defm : BFIPatterns ; -def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", +defm V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", [(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))] >; -def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", - [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))] ->; -//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>; -def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>; -def : ROTRPattern ; - -def V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>; -def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>; -////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>; -////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>; -////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>; -////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>; -////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>; -////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>; -////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>; -////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>; -////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>; -//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>; -//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>; -//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>; -def V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>; -////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>; -def V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>; -def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>; - -def V_LSHL_B64 : VOP3_64_Shift <0x00000161, "V_LSHL_B64", - [(set i64:$dst, (shl i64:$src0, i32:$src1))] ->; -def V_LSHR_B64 : VOP3_64_Shift <0x00000162, "V_LSHR_B64", - [(set i64:$dst, (srl i64:$src0, i32:$src1))] ->; -def V_ASHR_I64 : VOP3_64_Shift <0x00000163, "V_ASHR_I64", - [(set i64:$dst, (sra i64:$src0, i32:$src1))] ->; - -let isCommutable = 1 in { - -def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>; -def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>; -def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>; -def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>; - -} // isCommutable = 1 - -def : Pat < - (fadd f64:$src0, f64:$src1), - (V_ADD_F64 $src0, $src1, (i64 0)) ->; - -def : Pat < - (fmul f64:$src0, f64:$src1), - (V_MUL_F64 $src0, $src1, (i64 0)) ->; - -def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>; - -let isCommutable = 1 in { - -def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>; -def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>; -def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>; -def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; - -} // isCommutable = 1 - -def : Pat < - (mul i32:$src0, i32:$src1), - (V_MUL_LO_I32 $src0, $src1, (i32 0)) ->; - -def : Pat < - (mulhu i32:$src0, i32:$src1), - (V_MUL_HI_U32 $src0, $src1, (i32 0)) ->; - -def : Pat < - (mulhs i32:$src0, i32:$src1), - (V_MUL_HI_I32 $src0, $src1, (i32 0)) ->; - -def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; -def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; -def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>; -def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>; -//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>; -//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>; -//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>; -def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>; - -let Defs = [SCC] in { // Carry out goes to SCC -let isCommutable = 1 in { -def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>; -def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32", - [(set i32:$dst, (add SSrc_32:$src0, SSrc_32:$src1))] ->; -} // End isCommutable = 1 - -def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>; -def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32", - [(set i32:$dst, (sub SSrc_32:$src0, SSrc_32:$src1))] ->; - -let Uses = [SCC] in { // Carry in comes from SCC -let isCommutable = 1 in { -def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32", - [(set i32:$dst, (adde (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>; -} // End isCommutable = 1 - -def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32", - [(set i32:$dst, (sube (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>; -} // End Uses = [SCC] -} // End Defs = [SCC] - -def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32", - [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))] ->; -def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32", - [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))] ->; -def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32", - [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))] ->; -def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32", - [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))] ->; - -def S_CSELECT_B32 : SOP2 < - 0x0000000a, (outs SReg_32:$dst), - (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32", - [] ->; - -def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>; - -def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32", - [(set i32:$dst, (and i32:$src0, i32:$src1))] ->; - -def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64", - [(set i64:$dst, (and i64:$src0, i64:$src1))] ->; - -def : Pat < - (i1 (and i1:$src0, i1:$src1)), - (S_AND_B64 $src0, $src1) ->; - -def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32", - [(set i32:$dst, (or i32:$src0, i32:$src1))] ->; - -def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64", - [(set i64:$dst, (or i64:$src0, i64:$src1))] ->; - -def : Pat < - (i1 (or i1:$src0, i1:$src1)), - (S_OR_B64 $src0, $src1) ->; - -def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32", - [(set i32:$dst, (xor i32:$src0, i32:$src1))] ->; - -def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64", - [(set i1:$dst, (xor i1:$src0, i1:$src1))] ->; -def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>; -def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>; -def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>; -def S_ORN2_B64 : SOP2_64 <0x00000017, "S_ORN2_B64", []>; -def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>; -def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>; -def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>; -def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>; -def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>; -def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>; +def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64", + [(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))] +>; +//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>; +defm V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>; -// Use added complexity so these patterns are preferred to the VALU patterns. -let AddedComplexity = 1 in { +defm V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>; +defm V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>; +////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>; +////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>; +////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>; +////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "V_MAX3_F32", []>; +////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "V_MAX3_I32", []>; +////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "V_MAX3_U32", []>; +////def V_MED3_F32 : VOP3_MED3 <0x00000157, "V_MED3_F32", []>; +////def V_MED3_I32 : VOP3_MED3 <0x00000158, "V_MED3_I32", []>; +////def V_MED3_U32 : VOP3_MED3 <0x00000159, "V_MED3_U32", []>; +//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>; +//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>; +//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>; +defm V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>; +////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>; +defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>; +def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>; -def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32", - [(set i32:$dst, (shl i32:$src0, i32:$src1))] ->; -def S_LSHL_B64 : SOP2_SHIFT_64 <0x0000001f, "S_LSHL_B64", +def V_LSHL_B64 : VOP3_64_Shift <0x00000161, "V_LSHL_B64", [(set i64:$dst, (shl i64:$src0, i32:$src1))] >; -def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32", - [(set i32:$dst, (srl i32:$src0, i32:$src1))] ->; -def S_LSHR_B64 : SOP2_SHIFT_64 <0x00000021, "S_LSHR_B64", +def V_LSHR_B64 : VOP3_64_Shift <0x00000162, "V_LSHR_B64", [(set i64:$dst, (srl i64:$src0, i32:$src1))] >; -def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32", - [(set i32:$dst, (sra i32:$src0, i32:$src1))] ->; -def S_ASHR_I64 : SOP2_SHIFT_64 <0x00000023, "S_ASHR_I64", +def V_ASHR_I64 : VOP3_64_Shift <0x00000163, "V_ASHR_I64", [(set i64:$dst, (sra i64:$src0, i32:$src1))] >; -} // End AddedComplexity = 1 +let isCommutable = 1 in { -def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>; -def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>; -def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>; -def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>; -def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>; -def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>; -def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>; -//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>; -def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>; +def V_ADD_F64 : VOP3_64 <0x00000164, "V_ADD_F64", []>; +def V_MUL_F64 : VOP3_64 <0x00000165, "V_MUL_F64", []>; +def V_MIN_F64 : VOP3_64 <0x00000166, "V_MIN_F64", []>; +def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>; + +} // isCommutable = 1 + +def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>; + +let isCommutable = 1 in { + +defm V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>; +defm V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>; +defm V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>; +defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; + +} // isCommutable = 1 + +defm V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; +def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; +defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>; +def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>; +//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>; +//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>; +//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>; +def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>; + +//===----------------------------------------------------------------------===// +// Pseudo Instructions +//===----------------------------------------------------------------------===// let isCodeGenOnly = 1, isPseudo = 1 in { -def LOAD_CONST : AMDGPUShaderInst < - (outs GPRF32:$dst), - (ins i32imm:$src), - "LOAD_CONST $dst, $src", - [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))] +def V_MOV_I1 : InstSI < + (outs VReg_1:$dst), + (ins i1imm:$src), + "", [(set i1:$dst, (imm:$src))] +>; + +def V_AND_I1 : InstSI < + (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "", + [(set i1:$dst, (and i1:$src0, i1:$src1))] +>; + +def V_OR_I1 : InstSI < + (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "", + [(set i1:$dst, (or i1:$src0, i1:$src1))] >; // SI pseudo instructions. These are used by the CFG structurizer pass @@ -1301,19 +1375,19 @@ let mayLoad = 1, mayStore = 1, hasSideEffects = 1, let isBranch = 1, isTerminator = 1 in { -def SI_IF : InstSI < +def SI_IF: InstSI < (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target), - "SI_IF $dst, $vcc, $target", + "", [(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))] >; def SI_ELSE : InstSI < (outs SReg_64:$dst), (ins SReg_64:$src, brtarget:$target), - "SI_ELSE $dst, $src, $target", - [(set i64:$dst, (int_SI_else i64:$src, bb:$target))]> { - + "", + [(set i64:$dst, (int_SI_else i64:$src, bb:$target))] +> { let Constraints = "$src = $dst"; } @@ -1370,7 +1444,7 @@ let Uses = [EXEC], Defs = [EXEC,VCC,M0] in { let UseNamedOperandTable = 1 in { -def SI_RegisterLoad : AMDGPUShaderInst < +def SI_RegisterLoad : InstSI < (outs VReg_32:$dst, SReg_64:$temp), (ins FRAMEri32:$addr, i32imm:$chan), "", [] @@ -1379,7 +1453,7 @@ def SI_RegisterLoad : AMDGPUShaderInst < let mayLoad = 1; } -class SIRegStore : AMDGPUShaderInst < +class SIRegStore : InstSI < outs, (ins VReg_32:$val, FRAMEri32:$addr, i32imm:$chan), "", [] @@ -1439,8 +1513,33 @@ def V_SUB_F64 : InstSI < } // end usesCustomInserter +multiclass SI_SPILL_SGPR { + + def _SAVE : InstSI < + (outs VReg_32:$dst), + (ins sgpr_class:$src, i32imm:$frame_idx), + "", [] + >; + + def _RESTORE : InstSI < + (outs sgpr_class:$dst), + (ins VReg_32:$src, i32imm:$frame_idx), + "", [] + >; + +} + +defm SI_SPILL_S64 : SI_SPILL_SGPR ; +defm SI_SPILL_S128 : SI_SPILL_SGPR ; +defm SI_SPILL_S256 : SI_SPILL_SGPR ; +defm SI_SPILL_S512 : SI_SPILL_SGPR ; + } // end IsCodeGenOnly, isPseudo +} // end SubtargetPredicate = SI + +let Predicates = [isSI] in { + def : Pat< (int_AMDGPU_cndlt f32:$src0, f32:$src1, f32:$src2), (V_CNDMASK_B32_e64 $src2, $src1, (V_CMP_GT_F32_e64 0, $src0)) @@ -1453,7 +1552,7 @@ def : Pat < /* int_SI_vs_load_input */ def : Pat< - (SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr), + (SIload_input v4i32:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr), (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0) >; @@ -1470,40 +1569,116 @@ def : Pat < (V_SUB_F64 $src0, $src1) >; +//===----------------------------------------------------------------------===// +// SMRD Patterns +//===----------------------------------------------------------------------===// + +multiclass SMRD_Pattern { + + // 1. Offset as 8bit DWORD immediate + def : Pat < + (constant_load (add i64:$sbase, (i64 IMM8bitDWORD:$offset))), + (vt (Instr_IMM $sbase, (as_dword_i32imm $offset))) + >; + + // 2. Offset loaded in an 32bit SGPR + def : Pat < + (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))), + (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset))))) + >; + + // 3. No offset at all + def : Pat < + (constant_load i64:$sbase), + (vt (Instr_IMM $sbase, 0)) + >; +} + +defm : SMRD_Pattern ; +defm : SMRD_Pattern ; +defm : SMRD_Pattern ; +defm : SMRD_Pattern ; +defm : SMRD_Pattern ; +defm : SMRD_Pattern ; +defm : SMRD_Pattern ; +defm : SMRD_Pattern ; + +// 1. Offset as 8bit DWORD immediate +def : Pat < + (SIload_constant v4i32:$sbase, IMM8bitDWORD:$offset), + (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset)) +>; + +// 2. Offset loaded in an 32bit SGPR +def : Pat < + (SIload_constant v4i32:$sbase, imm:$offset), + (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset)) +>; + +//===----------------------------------------------------------------------===// +// SOP2 Patterns +//===----------------------------------------------------------------------===// + +def : Pat < + (i1 (xor i1:$src0, i1:$src1)), + (S_XOR_B64 $src0, $src1) +>; + +//===----------------------------------------------------------------------===// +// VOP2 Patterns +//===----------------------------------------------------------------------===// + +def : Pat < + (or i64:$src0, i64:$src1), + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub0), + (EXTRACT_SUBREG i64:$src1, sub0)), sub0), + (V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub1), + (EXTRACT_SUBREG i64:$src1, sub1)), sub1) +>; + +class SextInReg : Pat < + (sext_inreg i32:$src0, vt), + (V_ASHRREV_I32_e32 ShiftAmt, (V_LSHLREV_B32_e32 ShiftAmt, $src0)) +>; + +def : SextInReg ; +def : SextInReg ; + /********** ======================= **********/ /********** Image sampling patterns **********/ /********** ======================= **********/ /* SIsample for simple 1D texture lookup */ def : Pat < - (SIsample i32:$addr, v32i8:$rsrc, i128:$sampler, imm), + (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm), (IMAGE_SAMPLE_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SamplePattern : Pat < - (name vt:$addr, v32i8:$rsrc, i128:$sampler, imm), + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, imm), (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleRectPattern : Pat < - (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_RECT), + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_RECT), (opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleArrayPattern : Pat < - (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_ARRAY), + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_ARRAY), (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleShadowPattern : Pat < - (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW), + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW), (opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; class SampleShadowArrayPattern : Pat < - (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW_ARRAY), + (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY), (opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler) >; @@ -1692,8 +1867,6 @@ def : BitConvert ; def : BitConvert ; def : BitConvert ; -def : BitConvert ; -def : BitConvert ; def : BitConvert ; def : BitConvert ; @@ -1711,10 +1884,18 @@ def : BitConvert ; /********** Src & Dst modifiers **********/ /********** =================== **********/ +def FCLAMP_SI : AMDGPUShaderInst < + (outs VReg_32:$dst), + (ins VSrc_32:$src0), + "FCLAMP_SI $dst, $src0", + [] +> { + let usesCustomInserter = 1; +} + def : Pat < (int_AMDIL_clamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)), - (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */), - 0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */) + (FCLAMP_SI f32:$src) >; /********** ================================ **********/ @@ -1733,14 +1914,32 @@ def : Pat < (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */ >; +def FABS_SI : AMDGPUShaderInst < + (outs VReg_32:$dst), + (ins VSrc_32:$src0), + "FABS_SI $dst, $src0", + [] +> { + let usesCustomInserter = 1; +} + def : Pat < (fabs f32:$src), - (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff)) /* Clear sign bit */ + (FABS_SI f32:$src) >; +def FNEG_SI : AMDGPUShaderInst < + (outs VReg_32:$dst), + (ins VSrc_32:$src0), + "FNEG_SI $dst, $src0", + [] +> { + let usesCustomInserter = 1; +} + def : Pat < (fneg f32:$src), - (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Toggle sign bit */ + (FNEG_SI f32:$src) >; /********** ================== **********/ @@ -1768,30 +1967,10 @@ def : Pat < >; def : Pat < - (i1 imm:$imm), - (S_MOV_B64 imm:$imm) ->; - -def : Pat < (i64 InlineImm:$imm), (S_MOV_B64 InlineImm:$imm) >; -// i64 immediates aren't supported in hardware, split it into two 32bit values -def : Pat < - (i64 imm:$imm), - (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (S_MOV_B32 (i32 (LO32 imm:$imm))), sub0), - (S_MOV_B32 (i32 (HI32 imm:$imm))), sub1) ->; - -def : Pat < - (f64 fpimm:$imm), - (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)), - (V_MOV_B32_e32 (f32 (LO32f fpimm:$imm))), sub0), - (V_MOV_B32_e32 (f32 (HI32f fpimm:$imm))), sub1) ->; - /********** ===================== **********/ /********** Interpolation Paterns **********/ /********** ===================== **********/ @@ -1875,21 +2054,9 @@ class Ext32Pat : Pat < def : Ext32Pat ; def : Ext32Pat ; -// 1. Offset as 8bit DWORD immediate -def : Pat < - (SIload_constant i128:$sbase, IMM8bitDWORD:$offset), - (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset)) ->; - -// 2. Offset loaded in an 32bit SGPR -def : Pat < - (SIload_constant i128:$sbase, imm:$offset), - (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset)) ->; - -// 3. Offset in an 32Bit VGPR +// Offset in an 32Bit VGPR def : Pat < - (SIload_constant i128:$sbase, i32:$voff), + (SIload_constant v4i32:$sbase, i32:$voff), (BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0) >; @@ -1904,18 +2071,44 @@ def : Pat < def : Pat < (int_SI_tid), (V_MBCNT_HI_U32_B32_e32 0xffffffff, - (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0, 0, 0, 0, 0)) + (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0, 0, 0)) >; -/********** ================== **********/ -/********** VOP3 Patterns **********/ -/********** ================== **********/ +//===----------------------------------------------------------------------===// +// VOP3 Patterns +//===----------------------------------------------------------------------===// + +def : IMad24Pat; +def : UMad24Pat; + +def : Pat < + (fadd f64:$src0, f64:$src1), + (V_ADD_F64 $src0, $src1, (i64 0)) +>; + +def : Pat < + (fmul f64:$src0, f64:$src1), + (V_MUL_F64 $src0, $src1, (i64 0)) +>; + +def : Pat < + (mul i32:$src0, i32:$src1), + (V_MUL_LO_I32 $src0, $src1, (i32 0)) +>; + +def : Pat < + (mulhu i32:$src0, i32:$src1), + (V_MUL_HI_U32 $src0, $src1, (i32 0)) +>; def : Pat < - (f32 (fadd (fmul f32:$src0, f32:$src1), f32:$src2)), - (V_MAD_F32 $src0, $src1, $src2) + (mulhs i32:$src0, i32:$src1), + (V_MUL_HI_I32 $src0, $src1, (i32 0)) >; +defm : BFIPatterns ; +def : ROTRPattern ; + /********** ======================= **********/ /********** Load/Store Patterns **********/ /********** ======================= **********/ @@ -1962,41 +2155,6 @@ def : Pat <(atomic_load_add_local i32:$ptr, i32:$val), def : Pat <(atomic_load_sub_local i32:$ptr, i32:$val), (DS_SUB_U32_RTN 0, $ptr, $val, 0)>; -/********** ================== **********/ -/********** SMRD Patterns **********/ -/********** ================== **********/ - -multiclass SMRD_Pattern { - - // 1. Offset as 8bit DWORD immediate - def : Pat < - (constant_load (add i64:$sbase, (i64 IMM8bitDWORD:$offset))), - (vt (Instr_IMM $sbase, (as_dword_i32imm $offset))) - >; - - // 2. Offset loaded in an 32bit SGPR - def : Pat < - (constant_load (SIadd64bit32bit i64:$sbase, imm:$offset)), - (vt (Instr_SGPR $sbase, (S_MOV_B32 imm:$offset))) - >; - - // 3. No offset at all - def : Pat < - (constant_load i64:$sbase), - (vt (Instr_IMM $sbase, 0)) - >; -} - -defm : SMRD_Pattern ; -defm : SMRD_Pattern ; -defm : SMRD_Pattern ; -defm : SMRD_Pattern ; -defm : SMRD_Pattern ; -defm : SMRD_Pattern ; -defm : SMRD_Pattern ; -defm : SMRD_Pattern ; -defm : SMRD_Pattern ; - //===----------------------------------------------------------------------===// // MUBUF Patterns //===----------------------------------------------------------------------===// @@ -2083,7 +2241,7 @@ multiclass MUBUF_Load_Dword { def : Pat < - (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, + (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, imm:$offset, 0, 0, imm:$glc, imm:$slc, imm:$tfe)), (offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), @@ -2091,7 +2249,7 @@ multiclass MUBUF_Load_Dword ; def : Pat < - (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, + (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, imm, 1, 0, imm:$glc, imm:$slc, imm:$tfe)), (offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc), @@ -2099,7 +2257,7 @@ multiclass MUBUF_Load_Dword ; def : Pat < - (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset, + (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset, imm:$offset, 0, 1, imm:$glc, imm:$slc, imm:$tfe)), (idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc), @@ -2107,7 +2265,7 @@ multiclass MUBUF_Load_Dword ; def : Pat < - (vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset, + (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset, imm, 1, 1, imm:$glc, imm:$slc, imm:$tfe)), (bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc), @@ -2128,7 +2286,7 @@ defm : MUBUF_Load_Dword : Pat< - (SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr, + (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr, i32:$soffset, imm:$inst_offset, imm:$dfmt, imm:$nfmt, imm:$offen, imm:$idxen, imm:$glc, imm:$slc, imm:$tfe), @@ -2156,12 +2314,13 @@ defm V_CEIL_F64 : VOP1_64 <0x00000018, "V_CEIL_F64", defm V_FLOOR_F64 : VOP1_64 <0x0000001A, "V_FLOOR_F64", [(set f64:$dst, (ffloor f64:$src0))] >; +defm V_RNDNE_F64 : VOP1_64 <0x00000019, "V_RNDNE_F64", + [(set f64:$dst, (frint f64:$src0))] +>; -defm V_RNDNE_F64 : VOP1_64 <0x00000019, "V_RNDNE_F64", []>; - -def V_QSAD_PK_U16_U8 : VOP3_32 <0x00000173, "V_QSAD_PK_U16_U8", []>; -def V_MQSAD_U16_U8 : VOP3_32 <0x000000172, "V_MQSAD_U16_U8", []>; -def V_MQSAD_U32_U8 : VOP3_32 <0x00000175, "V_MQSAD_U32_U8", []>; +defm V_QSAD_PK_U16_U8 : VOP3_32 <0x00000173, "V_QSAD_PK_U16_U8", []>; +defm V_MQSAD_U16_U8 : VOP3_32 <0x000000172, "V_MQSAD_U16_U8", []>; +defm V_MQSAD_U32_U8 : VOP3_32 <0x00000175, "V_MQSAD_U32_U8", []>; def V_MAD_U64_U32 : VOP3_64 <0x00000176, "V_MAD_U64_U32", []>; // XXX - Does this set VCC? @@ -2248,17 +2407,43 @@ def : Pat< >; //===----------------------------------------------------------------------===// -// Miscellaneous Patterns +// Conversion Patterns //===----------------------------------------------------------------------===// +def : Pat<(i32 (sext_inreg i32:$src, i1)), + (S_BFE_I32 i32:$src, 65536)>; // 0 | 1 << 16 + +// TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it +// might not be worth the effort, and will need to expand to shifts when +// fixing SGPR copies. + +// Handle sext_inreg in i64 +def : Pat < + (i64 (sext_inreg i64:$src, i1)), + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (S_BFE_I32 (EXTRACT_SUBREG i64:$src, sub0), 65536), sub0), // 0 | 1 << 16 + (S_MOV_B32 -1), sub1) +>; + def : Pat < - (i64 (trunc i128:$x)), + (i64 (sext_inreg i64:$src, i8)), (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (i32 (EXTRACT_SUBREG $x, sub0)), sub0), - (i32 (EXTRACT_SUBREG $x, sub1)), sub1) + (S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0), + (S_MOV_B32 -1), sub1) >; def : Pat < + (i64 (sext_inreg i64:$src, i16)), + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0), + (S_MOV_B32 -1), sub1) +>; + +//===----------------------------------------------------------------------===// +// Miscellaneous Patterns +//===----------------------------------------------------------------------===// + +def : Pat < (i32 (trunc i64:$a)), (EXTRACT_SUBREG $a, sub0) >; diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index c2f8696..6601f2a 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -67,7 +67,7 @@ private: static const unsigned SkipThreshold = 12; static char ID; - const TargetRegisterInfo *TRI; + const SIRegisterInfo *TRI; const SIInstrInfo *TII; bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To); @@ -92,11 +92,11 @@ private: public: SILowerControlFlowPass(TargetMachine &tm) : - MachineFunctionPass(ID), TRI(0), TII(0) { } + MachineFunctionPass(ID), TRI(nullptr), TII(nullptr) { } - virtual bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - const char *getPassName() const { + const char *getPassName() const override { return "SI Lower control flow instructions"; } @@ -427,7 +427,7 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) { bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { TII = static_cast(MF.getTarget().getInstrInfo()); - TRI = MF.getTarget().getRegisterInfo(); + TRI = static_cast(MF.getTarget().getRegisterInfo()); SIMachineFunctionInfo *MFI = MF.getInfo(); bool HaveKill = false; diff --git a/lib/Target/R600/SILowerI1Copies.cpp b/lib/Target/R600/SILowerI1Copies.cpp new file mode 100644 index 0000000..738c90b --- /dev/null +++ b/lib/Target/R600/SILowerI1Copies.cpp @@ -0,0 +1,148 @@ +//===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// i1 values are usually inserted by the CFG Structurize pass and they are +/// unique in that they can be copied from VALU to SALU registers. +/// This is not possible for any other value type. Since there are no +/// MOV instructions for i1, we to use V_CMP_* and V_CNDMASK to move the i1. +/// +//===----------------------------------------------------------------------===// +// + +#define DEBUG_TYPE "si-i1-copies" +#include "AMDGPU.h" +#include "SIInstrInfo.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + +class SILowerI1Copies : public MachineFunctionPass { +public: + static char ID; + +public: + SILowerI1Copies() : MachineFunctionPass(ID) { + initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF) override; + + virtual const char *getPassName() const override { + return "SI Lower il Copies"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // End anonymous namespace. + +INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE, + "SI Lower il Copies", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(SILowerI1Copies, DEBUG_TYPE, + "SI Lower il Copies", false, false) + +char SILowerI1Copies::ID = 0; + +char &llvm::SILowerI1CopiesID = SILowerI1Copies::ID; + +FunctionPass *llvm::createSILowerI1CopiesPass() { + return new SILowerI1Copies(); +} + +bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const SIInstrInfo *TII = static_cast( + MF.getTarget().getInstrInfo()); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + std::vector I1Defs; + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) { + + MachineBasicBlock &MBB = *BI; + MachineBasicBlock::iterator I, Next; + for (I = MBB.begin(); I != MBB.end(); I = Next) { + Next = std::next(I); + MachineInstr &MI = *I; + + if (MI.getOpcode() == AMDGPU::V_MOV_I1) { + I1Defs.push_back(MI.getOperand(0).getReg()); + MI.setDesc(TII->get(AMDGPU::V_MOV_B32_e32)); + continue; + } + + if (MI.getOpcode() == AMDGPU::V_AND_I1) { + I1Defs.push_back(MI.getOperand(0).getReg()); + MI.setDesc(TII->get(AMDGPU::V_AND_B32_e32)); + continue; + } + + if (MI.getOpcode() == AMDGPU::V_OR_I1) { + I1Defs.push_back(MI.getOperand(0).getReg()); + MI.setDesc(TII->get(AMDGPU::V_OR_B32_e32)); + continue; + } + + if (MI.getOpcode() != AMDGPU::COPY || + !TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()) || + !TargetRegisterInfo::isVirtualRegister(MI.getOperand(1).getReg())) + continue; + + + const TargetRegisterClass *DstRC = + MRI.getRegClass(MI.getOperand(0).getReg()); + const TargetRegisterClass *SrcRC = + MRI.getRegClass(MI.getOperand(1).getReg()); + + if (DstRC == &AMDGPU::VReg_1RegClass && + TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) { + I1Defs.push_back(MI.getOperand(0).getReg()); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CNDMASK_B32_e64)) + .addOperand(MI.getOperand(0)) + .addImm(0) + .addImm(-1) + .addOperand(MI.getOperand(1)) + .addImm(0) + .addImm(0) + .addImm(0) + .addImm(0); + MI.eraseFromParent(); + } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) && + SrcRC == &AMDGPU::VReg_1RegClass) { + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64)) + .addOperand(MI.getOperand(0)) + .addImm(0) + .addOperand(MI.getOperand(1)) + .addImm(0) + .addImm(0) + .addImm(0) + .addImm(0); + MI.eraseFromParent(); + } + } + } + + for (unsigned Reg : I1Defs) + MRI.setRegClass(Reg, &AMDGPU::VReg_32RegClass); + + return false; +} diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp index ea04346..af60995 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.cpp +++ b/lib/Target/R600/SIMachineFunctionInfo.cpp @@ -10,8 +10,11 @@ #include "SIMachineFunctionInfo.h" +#include "SIInstrInfo.h" #include "SIRegisterInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #define MAX_LANES 64 @@ -26,21 +29,57 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) PSInputAddr(0), SpillTracker() { } -static unsigned createLaneVGPR(MachineRegisterInfo &MRI) { - return MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); +static unsigned createLaneVGPR(MachineRegisterInfo &MRI, MachineFunction *MF) { + unsigned VGPR = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); + + // We need to add this register as live out for the function, in order to + // have the live range calculated directly. + // + // When register spilling begins, we have already calculated the live + // live intervals for all the registers. Since we are spilling SGPRs to + // VGPRs, we need to update the Lane VGPR's live interval every time we + // spill or restore a register. + // + // Unfortunately, there is no good way to update the live interval as + // the TargetInstrInfo callbacks for spilling and restoring don't give + // us access to the live interval information. + // + // We are lucky, though, because the InlineSpiller calls + // LiveRangeEdit::calculateRegClassAndHint() which iterates through + // all the new register that have been created when restoring a register + // and calls LiveIntervals::getInterval(), which creates and computes + // the live interval for the newly created register. However, once this + // live intervals is created, it doesn't change and since we usually reuse + // the Lane VGPR multiple times, this means any uses after the first aren't + // added to the live interval. + // + // To work around this, we add Lane VGPRs to the functions live out list, + // so that we can guarantee its live range will cover all of its uses. + + for (MachineBasicBlock &MBB : *MF) { + if (MBB.back().getOpcode() == AMDGPU::S_ENDPGM) { + MBB.back().addOperand(*MF, MachineOperand::CreateReg(VGPR, false, true)); + return VGPR; + } + } + MF->getFunction()->getContext().emitError( + "Could not found S_ENGPGM instrtuction."); + return VGPR; } -unsigned SIMachineFunctionInfo::RegSpillTracker::getNextLane(MachineRegisterInfo &MRI) { +unsigned SIMachineFunctionInfo::RegSpillTracker::reserveLanes( + MachineRegisterInfo &MRI, MachineFunction *MF, unsigned NumRegs) { + unsigned StartLane = CurrentLane; + CurrentLane += NumRegs; if (!LaneVGPR) { - LaneVGPR = createLaneVGPR(MRI); + LaneVGPR = createLaneVGPR(MRI, MF); } else { - CurrentLane++; - if (CurrentLane == MAX_LANES) { - CurrentLane = 0; - LaneVGPR = createLaneVGPR(MRI); + if (CurrentLane >= MAX_LANES) { + StartLane = CurrentLane = 0; + LaneVGPR = createLaneVGPR(MRI, MF); } } - return CurrentLane; + return StartLane; } void SIMachineFunctionInfo::RegSpillTracker::addSpilledReg(unsigned FrameIndex, diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h index 8dc82a0..96e619b 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.h +++ b/lib/Target/R600/SIMachineFunctionInfo.h @@ -25,7 +25,7 @@ class MachineRegisterInfo; /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which /// tells the hardware which interpolation parameters to load. class SIMachineFunctionInfo : public AMDGPUMachineFunction { - virtual void anchor(); + void anchor() override; public: struct SpilledReg { @@ -43,7 +43,12 @@ public: public: unsigned LaneVGPR; RegSpillTracker() : CurrentLane(0), SpilledRegisters(), LaneVGPR(0) { } - unsigned getNextLane(MachineRegisterInfo &MRI); + /// \p NumRegs The number of consecutive registers what need to be spilled. + /// This function will ensure that all registers are stored in + /// the same VGPR. + /// \returns The lane to be used for storing the first register. + unsigned reserveLanes(MachineRegisterInfo &MRI, MachineFunction *MF, + unsigned NumRegs = 1); void addSpilledReg(unsigned FrameIndex, unsigned Reg, int Lane = -1); const SpilledReg& getSpilledReg(unsigned FrameIndex); bool programSpillsRegisters() { return !SpilledRegisters.empty(); } diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index 6cef195..c72d549 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -71,13 +71,12 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const { &AMDGPU::SReg_256RegClass }; - for (unsigned i = 0, e = sizeof(BaseClasses) / - sizeof(const TargetRegisterClass*); i != e; ++i) { - if (BaseClasses[i]->contains(Reg)) { - return BaseClasses[i]; + for (const TargetRegisterClass *BaseClass : BaseClasses) { + if (BaseClass->contains(Reg)) { + return BaseClass; } } - return NULL; + return nullptr; } bool SIRegisterInfo::isSGPRClass(const TargetRegisterClass *RC) const { @@ -113,7 +112,7 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass( } else if (getCommonSubClass(SRC, &AMDGPU::SReg_512RegClass)) { return &AMDGPU::VReg_512RegClass; } - return NULL; + return nullptr; } const TargetRegisterClass *SIRegisterInfo::getSubRegClass( @@ -129,3 +128,10 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass( return &AMDGPU::VGPR_32RegClass; } } + +unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg, + const TargetRegisterClass *SubRC, + unsigned Channel) const { + unsigned Index = getHWRegIndex(Reg); + return SubRC->getRegister(Index + Channel); +} diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index 8148f7f..36b4fcd 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -27,22 +27,22 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { SIRegisterInfo(AMDGPUTargetMachine &tm); - virtual BitVector getReservedRegs(const MachineFunction &MF) const; + BitVector getReservedRegs(const MachineFunction &MF) const override; - virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const override; /// \param RC is an AMDIL reg class. /// /// \returns the SI register class that is equivalent to \p RC. - virtual const TargetRegisterClass * - getISARegClass(const TargetRegisterClass *RC) const; + const TargetRegisterClass * + getISARegClass(const TargetRegisterClass *RC) const override; /// \brief get the register class of the specified type to use in the /// CFGStructurizer - virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const; + const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override; - virtual unsigned getHWRegIndex(unsigned Reg) const; + unsigned getHWRegIndex(unsigned Reg) const override; /// \brief Return the 'base' register class for this register. /// e.g. SGPR0 => SReg_32, VGPR => VReg_32 SGPR0_SGPR1 -> SReg_32, etc. @@ -63,6 +63,12 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { /// be returned. const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC, unsigned SubIdx) const; + + /// \p Channel This is the register channel (e.g. a value from 0-16), not the + /// SubReg index. + /// \returns The sub-register of Reg that is in Channel. + unsigned getPhysRegSubReg(unsigned Reg, const TargetRegisterClass *SubRC, + unsigned Channel) const; }; } // End namespace llvm diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index 65cf311..f1f01de 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -168,7 +168,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64, (add SGPR_64Regs, VCCReg, EXECReg) >; -def SReg_128 : RegisterClass<"AMDGPU", [i128, v4i32], 128, (add SGPR_128)>; +def SReg_128 : RegisterClass<"AMDGPU", [v4i32], 128, (add SGPR_128)>; def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>; @@ -183,14 +183,16 @@ def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> { let Size = 96; } -def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, i128], 128, (add VGPR_128)>; +def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>; def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>; def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>; +def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)>; + //===----------------------------------------------------------------------===// -// [SV]Src_* register classes, can have either an immediate or an register +// [SV]Src_(32|64) register classes, can have either an immediate or an register //===----------------------------------------------------------------------===// def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>; @@ -201,3 +203,9 @@ def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>; def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>; +//===----------------------------------------------------------------------===// +// SGPR and VGPR register classes +//===----------------------------------------------------------------------===// + +def VSrc_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, + (add VReg_128, SReg_128)>; diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp index 9bf2caf..a0b6907 100644 --- a/lib/Target/R600/SITypeRewriter.cpp +++ b/lib/Target/R600/SITypeRewriter.cpp @@ -35,13 +35,13 @@ class SITypeRewriter : public FunctionPass, static char ID; Module *Mod; Type *v16i8; - Type *i128; + Type *v4i32; public: SITypeRewriter() : FunctionPass(ID) { } - virtual bool doInitialization(Module &M); - virtual bool runOnFunction(Function &F); - virtual const char *getPassName() const { + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; + const char *getPassName() const override { return "SI Type Rewriter"; } void visitLoadInst(LoadInst &I); @@ -56,7 +56,7 @@ char SITypeRewriter::ID = 0; bool SITypeRewriter::doInitialization(Module &M) { Mod = &M; v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16); - i128 = Type::getIntNTy(M.getContext(), 128); + v4i32 = VectorType::get(Type::getInt32Ty(M.getContext()), 4); return false; } @@ -84,7 +84,8 @@ void SITypeRewriter::visitLoadInst(LoadInst &I) { Type *ElemTy = PtrTy->getPointerElementType(); IRBuilder<> Builder(&I); if (ElemTy == v16i8) { - Value *BitCast = Builder.CreateBitCast(Ptr, Type::getIntNPtrTy(I.getContext(), 128, 2)); + Value *BitCast = Builder.CreateBitCast(Ptr, + PointerType::get(v4i32,PtrTy->getPointerAddressSpace())); LoadInst *Load = Builder.CreateLoad(BitCast); SmallVector , 8> MD; I.getAllMetadataOtherThanDebugLoc(MD); @@ -99,6 +100,7 @@ void SITypeRewriter::visitLoadInst(LoadInst &I) { void SITypeRewriter::visitCallInst(CallInst &I) { IRBuilder<> Builder(&I); + SmallVector Args; SmallVector Types; bool NeedToReplace = false; @@ -107,10 +109,10 @@ void SITypeRewriter::visitCallInst(CallInst &I) { for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { Value *Arg = I.getArgOperand(i); if (Arg->getType() == v16i8) { - Args.push_back(Builder.CreateBitCast(Arg, i128)); - Types.push_back(i128); + Args.push_back(Builder.CreateBitCast(Arg, v4i32)); + Types.push_back(v4i32); NeedToReplace = true; - Name = Name + ".i128"; + Name = Name + ".v4i32"; } else if (Arg->getType()->isVectorTy() && Arg->getType()->getVectorNumElements() == 1 && Arg->getType()->getVectorElementType() == @@ -144,12 +146,12 @@ void SITypeRewriter::visitCallInst(CallInst &I) { void SITypeRewriter::visitBitCast(BitCastInst &I) { IRBuilder<> Builder(&I); - if (I.getDestTy() != i128) { + if (I.getDestTy() != v4i32) { return; } if (BitCastInst *Op = dyn_cast(I.getOperand(0))) { - if (Op->getSrcTy() == i128) { + if (Op->getSrcTy() == v4i32) { I.replaceAllUsesWith(Op->getOperand(0)); I.eraseFromParent(); } diff --git a/lib/Target/Sparc/AsmParser/LLVMBuild.txt b/lib/Target/Sparc/AsmParser/LLVMBuild.txt index c3ddf5a..08fdc9d 100644 --- a/lib/Target/Sparc/AsmParser/LLVMBuild.txt +++ b/lib/Target/Sparc/AsmParser/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = SparcAsmParser parent = Sparc -required_libraries = MC MCParser Support SparcDesc SparcInfo +required_libraries = MC MCParser SparcDesc SparcInfo Support add_to_library_groups = Sparc diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 2ff6cdd..da88820 100644 --- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -49,15 +49,15 @@ class SparcAsmParser : public MCTargetAsmParser { bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVectorImpl &Operands, MCStreamer &Out, unsigned &ErrorInfo, - bool MatchingInlineAsm); - bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); + bool MatchingInlineAsm) override; + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, - SmallVectorImpl &Operands); - bool ParseDirective(AsmToken DirectiveID); + SmallVectorImpl &Operands) override; + bool ParseDirective(AsmToken DirectiveID) override; - virtual unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, - unsigned Kind); + unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, + unsigned Kind) override; // Custom parse functions for Sparc specific operands. OperandMatchResultTy @@ -83,7 +83,8 @@ class SparcAsmParser : public MCTargetAsmParser { bool is64Bit() const { return STI.getTargetTriple().startswith("sparcv9"); } public: SparcAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser, - const MCInstrInfo &MII) + const MCInstrInfo &MII, + const MCTargetOptions &Options) : MCTargetAsmParser(), STI(sti), Parser(parser) { // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); @@ -181,10 +182,10 @@ private: struct MemOp Mem; }; public: - bool isToken() const { return Kind == k_Token; } - bool isReg() const { return Kind == k_Register; } - bool isImm() const { return Kind == k_Immediate; } - bool isMem() const { return isMEMrr() || isMEMri(); } + bool isToken() const override { return Kind == k_Token; } + bool isReg() const override { return Kind == k_Register; } + bool isImm() const override { return Kind == k_Immediate; } + bool isMem() const override { return isMEMrr() || isMEMri(); } bool isMEMrr() const { return Kind == k_MemoryReg; } bool isMEMri() const { return Kind == k_MemoryImm; } @@ -203,7 +204,7 @@ public: return StringRef(Tok.Data, Tok.Length); } - unsigned getReg() const { + unsigned getReg() const override { assert((Kind == k_Register) && "Invalid access!"); return Reg.RegNum; } @@ -229,22 +230,22 @@ public: } /// getStartLoc - Get the location of the first token of this operand. - SMLoc getStartLoc() const { + SMLoc getStartLoc() const override { return StartLoc; } /// getEndLoc - Get the location of the last token of this operand. - SMLoc getEndLoc() const { + SMLoc getEndLoc() const override { return EndLoc; } - virtual void print(raw_ostream &OS) const { + void print(raw_ostream &OS) const override { switch (Kind) { case k_Token: OS << "Token: " << getToken() << "\n"; break; case k_Register: OS << "Reg: #" << getReg() << "\n"; break; case k_Immediate: OS << "Imm: " << getImm() << "\n"; break; case k_MemoryReg: OS << "Mem: " << getMemBase() << "+" << getMemOffsetReg() << "\n"; break; - case k_MemoryImm: assert(getMemOff() != 0); + case k_MemoryImm: assert(getMemOff() != nullptr); OS << "Mem: " << getMemBase() << "+" << *getMemOff() << "\n"; break; @@ -264,7 +265,7 @@ public: void addExpr(MCInst &Inst, const MCExpr *Expr) const{ // Add as immediate when possible. Null MCExpr = 0. - if (Expr == 0) + if (!Expr) Inst.addOperand(MCOperand::CreateImm(0)); else if (const MCConstantExpr *CE = dyn_cast(Expr)) Inst.addOperand(MCOperand::CreateImm(CE->getValue())); @@ -323,7 +324,7 @@ public: assert(Op->Reg.Kind == rk_FloatReg); unsigned regIdx = Reg - Sparc::F0; if (regIdx % 2 || regIdx > 31) - return 0; + return nullptr; Op->Reg.RegNum = DoubleRegs[regIdx / 2]; Op->Reg.Kind = rk_DoubleReg; return Op; @@ -337,13 +338,13 @@ public: case rk_FloatReg: regIdx = Reg - Sparc::F0; if (regIdx % 4 || regIdx > 31) - return 0; + return nullptr; Reg = QuadFPRegs[regIdx / 4]; break; case rk_DoubleReg: regIdx = Reg - Sparc::D0; if (regIdx % 2 || regIdx > 31) - return 0; + return nullptr; Reg = QuadFPRegs[regIdx / 2]; break; } @@ -357,7 +358,7 @@ public: Op->Kind = k_MemoryReg; Op->Mem.Base = Base; Op->Mem.OffsetReg = offsetReg; - Op->Mem.Off = 0; + Op->Mem.Off = nullptr; return Op; } @@ -564,7 +565,7 @@ parseMEMOperand(SmallVectorImpl &Operands) case AsmToken::Comma: case AsmToken::RBrac: case AsmToken::EndOfStatement: - Operands.push_back(SparcOperand::CreateMEMri(BaseReg, 0, S, E)); + Operands.push_back(SparcOperand::CreateMEMri(BaseReg, nullptr, S, E)); return MatchOperand_Success; case AsmToken:: Plus: @@ -574,7 +575,7 @@ parseMEMOperand(SmallVectorImpl &Operands) break; } - SparcOperand *Offset = 0; + SparcOperand *Offset = nullptr; OperandMatchResultTy ResTy = parseSparcAsmOperand(Offset); if (ResTy != MatchOperand_Success || !Offset) return MatchOperand_NoMatch; @@ -636,7 +637,7 @@ parseOperand(SmallVectorImpl &Operands, return MatchOperand_Success; } - SparcOperand *Op = 0; + SparcOperand *Op = nullptr; ResTy = parseSparcAsmOperand(Op, (Mnemonic == "call")); if (ResTy != MatchOperand_Success || !Op) @@ -656,7 +657,7 @@ SparcAsmParser::parseSparcAsmOperand(SparcOperand *&Op, bool isCall) SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); const MCExpr *EVal; - Op = 0; + Op = nullptr; switch (getLexer().getKind()) { default: break; diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index 88fba39..f3441ff 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -12,7 +12,6 @@ // NOP is placed. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "delay-slot-filler" #include "Sparc.h" #include "SparcSubtarget.h" #include "llvm/ADT/SmallSet.h" @@ -27,6 +26,8 @@ using namespace llvm; +#define DEBUG_TYPE "delay-slot-filler" + STATISTIC(FilledSlots, "Number of delay slots filled"); static cl::opt DisableDelaySlotFiller( @@ -49,12 +50,12 @@ namespace { Subtarget(&TM.getSubtarget()) { } - virtual const char *getPassName() const { + const char *getPassName() const override { return "SPARC Delay Slot Filler"; } bool runOnMachineBasicBlock(MachineBasicBlock &MBB); - bool runOnMachineFunction(MachineFunction &F) { + bool runOnMachineFunction(MachineFunction &F) override { bool Changed = false; // This pass invalidates liveness information when it reorders diff --git a/lib/Target/Sparc/Disassembler/LLVMBuild.txt b/lib/Target/Sparc/Disassembler/LLVMBuild.txt index e7387cd..c27398f 100644 --- a/lib/Target/Sparc/Disassembler/LLVMBuild.txt +++ b/lib/Target/Sparc/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = SparcDisassembler parent = Sparc -required_libraries = MC Support SparcInfo +required_libraries = MC SparcInfo Support add_to_library_groups = Sparc diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp index 5cd99d6..4df0990 100644 --- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp +++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sparc-disassembler" - #include "Sparc.h" #include "SparcRegisterInfo.h" #include "SparcSubtarget.h" @@ -23,6 +21,8 @@ using namespace llvm; +#define DEBUG_TYPE "sparc-disassembler" + typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { @@ -32,22 +32,18 @@ class SparcDisassembler : public MCDisassembler { public: /// Constructor - Initializes the disassembler. /// - SparcDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info) : - MCDisassembler(STI), RegInfo(Info) + SparcDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : + MCDisassembler(STI, Ctx) {} virtual ~SparcDisassembler() {} - const MCRegisterInfo *getRegInfo() const { return RegInfo.get(); } - /// getInstruction - See MCDisassembler. - virtual DecodeStatus getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const; -private: - OwningPtr RegInfo; + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const override; }; } @@ -58,8 +54,9 @@ namespace llvm { static MCDisassembler *createSparcDisassembler( const Target &T, - const MCSubtargetInfo &STI) { - return new SparcDisassembler(STI, T.createMCRegInfo("")); + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new SparcDisassembler(STI, Ctx); } diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp index fabc125..261fb38 100644 --- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp +++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp @@ -11,15 +11,17 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "SparcInstPrinter.h" #include "Sparc.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "asm-printer" + // The generated AsmMatcher SparcGenAsmWriter uses "Sparc" as the target // namespace. But SPARC backend uses "SP" as its namespace. namespace llvm { diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h index 45ee6c0..8fe4075 100644 --- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h +++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h @@ -30,19 +30,21 @@ public: const MCSubtargetInfo &sti) : MCInstPrinter(MAI, MII, MRI), STI(sti) {} - virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + void printRegName(raw_ostream &OS, unsigned RegNo) const override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; bool printSparcAliasInstr(const MCInst *MI, raw_ostream &OS); bool isV9() const; // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); bool printAliasInstr(const MCInst *MI, raw_ostream &O); + void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, + unsigned PrintMethodIdx, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); void printOperand(const MCInst *MI, int opNum, raw_ostream &OS); void printMemOperand(const MCInst *MI, int opNum, raw_ostream &OS, - const char *Modifier = 0); + const char *Modifier = nullptr); void printCCOperand(const MCInst *MI, int opNum, raw_ostream &OS); bool printGetPCX(const MCInst *MI, unsigned OpNo, raw_ostream &OS); diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index 39c9996..7d517b6 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -102,11 +102,11 @@ namespace { public: SparcAsmBackend(const Target &T) : MCAsmBackend(), TheTarget(T) {} - unsigned getNumFixupKinds() const { + unsigned getNumFixupKinds() const override { return Sparc::NumTargetFixupKinds; } - const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { const static MCFixupKindInfo Infos[Sparc::NumTargetFixupKinds] = { // name offset bits flags { "fixup_sparc_call30", 2, 30, MCFixupKindInfo::FKF_IsPCRel }, @@ -184,7 +184,7 @@ namespace { } } - bool mayNeedRelaxation(const MCInst &Inst) const { + bool mayNeedRelaxation(const MCInst &Inst) const override { // FIXME. return false; } @@ -194,17 +194,17 @@ namespace { bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const { + const MCAsmLayout &Layout) const override { // FIXME. assert(0 && "fixupNeedsRelaxation() unimplemented"); return false; } - void relaxInstruction(const MCInst &Inst, MCInst &Res) const { + void relaxInstruction(const MCInst &Inst, MCInst &Res) const override { // FIXME. assert(0 && "relaxInstruction() unimplemented"); } - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const { + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override { // Cannot emit NOP with size not multiple of 32 bits. if (Count % 4 != 0) return false; @@ -229,7 +229,7 @@ namespace { SparcAsmBackend(T), OSType(OSType) { } void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel) const { + uint64_t Value, bool IsPCRel) const override { Value = adjustFixupValue(Fixup.getKind(), Value); if (!Value) return; // Doesn't change encoding. @@ -244,7 +244,7 @@ namespace { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType); return createSparcELFObjectWriter(OS, is64Bit(), OSABI); } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp index ef5f8ce..6875fc6 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp @@ -32,7 +32,7 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) { Data16bitsDirective = "\t.half\t"; Data32bitsDirective = "\t.word\t"; // .xword is only supported by V9. - Data64bitsDirective = (isV9) ? "\t.xword\t" : 0; + Data64bitsDirective = (isV9) ? "\t.xword\t" : nullptr; ZeroDirective = "\t.skip\t"; CommentString = "!"; HasLEB128 = true; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h index d53d09d..e126b68 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h @@ -20,15 +20,15 @@ namespace llvm { class StringRef; class SparcELFMCAsmInfo : public MCAsmInfoELF { - virtual void anchor(); + void anchor() override; public: explicit SparcELFMCAsmInfo(StringRef TT); - virtual const MCExpr* getExprForPersonalitySymbol(const MCSymbol *Sym, - unsigned Encoding, - MCStreamer &Streamer) const; - virtual const MCExpr* getExprForFDESymbol(const MCSymbol *Sym, - unsigned Encoding, - MCStreamer &Streamer) const; + const MCExpr* + getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding, + MCStreamer &Streamer) const override; + const MCExpr* getExprForFDESymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const override; }; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp index 310fbd9..b19ad7b 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mccodeemitter" #include "SparcMCExpr.h" #include "MCTargetDesc/SparcFixupKinds.h" #include "SparcMCTargetDesc.h" @@ -26,6 +25,8 @@ using namespace llvm; +#define DEBUG_TYPE "mccodeemitter" + STATISTIC(MCNumEmitted, "Number of MC instructions emitted"); namespace { @@ -41,7 +42,7 @@ public: void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + const MCSubtargetInfo &STI) const override; // getBinaryCodeForInstr - TableGen'erated function for getting the // binary encoding for an instruction. diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp index e6b2aca..ae57fdc 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sparcmcexpr" #include "SparcMCExpr.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -23,6 +22,8 @@ using namespace llvm; +#define DEBUG_TYPE "sparcmcexpr" + const SparcMCExpr* SparcMCExpr::Create(VariantKind Kind, const MCExpr *Expr, MCContext &Ctx) { diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h index be6526e..78dd945 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h @@ -85,15 +85,15 @@ public: Sparc::Fixups getFixupKind() const { return getFixupKind(Kind); } /// @} - void PrintImpl(raw_ostream &OS) const; + void PrintImpl(raw_ostream &OS) const override; bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const; - void AddValueSymbols(MCAssembler *) const; - const MCSection *FindAssociatedSection() const { + const MCAsmLayout *Layout) const override; + void AddValueSymbols(MCAssembler *) const override; + const MCSection *FindAssociatedSection() const override { return getSubExpr()->FindAssociatedSection(); } - void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const; + void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override; static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp index c69af56..571017d 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp @@ -22,6 +22,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + #define GET_INSTRINFO_MC_DESC #include "SparcGenInstrInfo.inc" @@ -31,14 +33,11 @@ #define GET_REGINFO_MC_DESC #include "SparcGenRegisterInfo.inc" -using namespace llvm; - - static MCAsmInfo *createSparcMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { MCAsmInfo *MAI = new SparcELFMCAsmInfo(TT); unsigned Reg = MRI.getDwarfRegNum(SP::O6, true); - MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, Reg, 0); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0); MAI->addInitialFrameState(Inst); return MAI; } @@ -47,7 +46,7 @@ static MCAsmInfo *createSparcV9MCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { MCAsmInfo *MAI = new SparcELFMCAsmInfo(TT); unsigned Reg = MRI.getDwarfRegNum(SP::O6, true); - MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, Reg, 2047); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 2047); MAI->addInitialFrameState(Inst); return MAI; } @@ -136,13 +135,12 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, static MCStreamer * createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useCFI, bool useDwarfDirectory, + bool isVerboseAsm, bool useDwarfDirectory, MCInstPrinter *InstPrint, MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) { - MCStreamer *S = - llvm::createAsmStreamer(Ctx, OS, isVerboseAsm, useCFI, useDwarfDirectory, - InstPrint, CE, TAB, ShowInst); + MCStreamer *S = llvm::createAsmStreamer( + Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); new SparcTargetAsmStreamer(*S, OS); return S; } diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index 50506a6..1b7330e 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "Sparc.h" #include "InstPrinter/SparcInstPrinter.h" #include "MCTargetDesc/SparcMCExpr.h" @@ -35,6 +34,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "asm-printer" + namespace { class SparcAsmPrinter : public AsmPrinter { SparcTargetStreamer &getTargetStreamer() { @@ -45,18 +46,18 @@ namespace { explicit SparcAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : AsmPrinter(TM, Streamer) {} - virtual const char *getPassName() const { + const char *getPassName() const override { return "Sparc Assembly Printer"; } void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS); void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, - const char *Modifier = 0); + const char *Modifier = nullptr); void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &OS); - virtual void EmitFunctionBodyStart(); - virtual void EmitInstruction(const MachineInstr *MI); - virtual void EmitEndOfAsmFile(Module &M); + void EmitFunctionBodyStart() override; + void EmitInstruction(const MachineInstr *MI) override; + void EmitEndOfAsmFile(Module &M) override; static const char *getRegisterName(unsigned RegNo) { return SparcInstPrinter::getRegisterName(RegNo); @@ -64,10 +65,10 @@ namespace { bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); + raw_ostream &O) override; bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); + raw_ostream &O) override; void LowerGETPCXAndEmitMCInsts(const MachineInstr *MI, const MCSubtargetInfo &STI); diff --git a/lib/Target/Sparc/SparcCodeEmitter.cpp b/lib/Target/Sparc/SparcCodeEmitter.cpp index 4f8d477..247da2a 100644 --- a/lib/Target/Sparc/SparcCodeEmitter.cpp +++ b/lib/Target/Sparc/SparcCodeEmitter.cpp @@ -12,7 +12,6 @@ // //===---------------------------------------------------------------------===// -#define DEBUG_TYPE "jit" #include "Sparc.h" #include "MCTargetDesc/SparcMCExpr.h" #include "SparcRelocations.h" @@ -25,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "jit" + STATISTIC(NumEmitted, "Number of machine instructions emitted"); namespace { @@ -39,7 +40,7 @@ class SparcCodeEmitter : public MachineFunctionPass { const std::vector *MCPEs; bool IsPIC; - void getAnalysisUsage(AnalysisUsage &AU) const { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired (); MachineFunctionPass::getAnalysisUsage(AU); } @@ -48,13 +49,13 @@ class SparcCodeEmitter : public MachineFunctionPass { public: SparcCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) - : MachineFunctionPass(ID), JTI(0), II(0), TD(0), - TM(tm), MCE(mce), MCPEs(0), + : MachineFunctionPass(ID), JTI(nullptr), II(nullptr), TD(nullptr), + TM(tm), MCE(mce), MCPEs(nullptr), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} - bool runOnMachineFunction(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &MF) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "Sparc Machine Code Emitter"; } diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp index d96a4c0..a37da94 100644 --- a/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/lib/Target/Sparc/SparcFrameLowering.cpp @@ -109,18 +109,21 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { // Emit ".cfi_def_cfa_register 30". unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, regFP)); - BuildMI(MBB, MBBI, dl, TII.get(SP::CFI_INSTRUCTION)).addCFIIndex(CFIIndex); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); // Emit ".cfi_window_save". CFIIndex = MMI.addFrameInst(MCCFIInstruction::createWindowSave(nullptr)); - BuildMI(MBB, MBBI, dl, TII.get(SP::CFI_INSTRUCTION)).addCFIIndex(CFIIndex); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); unsigned regInRA = MRI->getDwarfRegNum(SP::I7, true); unsigned regOutRA = MRI->getDwarfRegNum(SP::O7, true); // Emit ".cfi_register 15, 31". CFIIndex = MMI.addFrameInst( MCCFIInstruction::createRegister(nullptr, regOutRA, regInRA)); - BuildMI(MBB, MBBI, dl, TII.get(SP::CFI_INSTRUCTION)).addCFIIndex(CFIIndex); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } void SparcFrameLowering:: diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h index 072fde3..bda7b7c 100644 --- a/lib/Target/Sparc/SparcFrameLowering.h +++ b/lib/Target/Sparc/SparcFrameLowering.h @@ -31,17 +31,18 @@ public: /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void emitPrologue(MachineFunction &MF) const override; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + void + eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; - bool hasReservedCallFrame(const MachineFunction &MF) const; - bool hasFP(const MachineFunction &MF) const; + bool hasReservedCallFrame(const MachineFunction &MF) const override; + bool hasFP(const MachineFunction &MF) const override; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const override; private: // Remap input registers to output registers for leaf procedure. diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index b012bfd..2fade27 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -41,7 +41,7 @@ public: TM(tm) { } - SDNode *Select(SDNode *N); + SDNode *Select(SDNode *N) override; // Complex Pattern Selectors. bool SelectADDRrr(SDValue N, SDValue &R1, SDValue &R2); @@ -49,11 +49,11 @@ public: /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector &OutOps); + bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector &OutOps) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "SPARC DAG->DAG Pattern Instruction Selection"; } @@ -143,7 +143,7 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) { SDLoc dl(N); if (N->isMachineOpcode()) { N->setNodeId(-1); - return NULL; // Already selected. + return nullptr; // Already selected. } switch (N->getOpcode()) { diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 8e720ee..ef61466 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -53,7 +53,7 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { - static const uint16_t RegList[] = { + static const MCPhysReg RegList[] = { SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 }; // Try to get first reg. @@ -235,8 +235,7 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain, if (Flag.getNode()) RetOps.push_back(Flag); - return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other, - &RetOps[0], RetOps.size()); + return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other, RetOps); } // Lower return values for the 64-bit ABI. @@ -315,8 +314,7 @@ SparcTargetLowering::LowerReturn_64(SDValue Chain, if (Flag.getNode()) RetOps.push_back(Flag); - return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other, - &RetOps[0], RetOps.size()); + return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other, RetOps); } SDValue SparcTargetLowering:: @@ -357,10 +355,13 @@ LowerFormalArguments_32(SDValue Chain, const unsigned StackOffset = 92; - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + unsigned InIdx = 0; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i, ++InIdx) { CCValAssign &VA = ArgLocs[i]; - if (i == 0 && Ins[i].Flags.isSRet()) { + if (Ins[InIdx].Flags.isSRet()) { + if (InIdx != 0) + report_fatal_error("sparc only supports sret on the first parameter"); // Get SRet from [%fp+64]. int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, 64, true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); @@ -493,11 +494,11 @@ LowerFormalArguments_32(SDValue Chain, // Store remaining ArgRegs to the stack if this is a varargs function. if (isVarArg) { - static const uint16_t ArgRegs[] = { + static const MCPhysReg ArgRegs[] = { SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5 }; unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6); - const uint16_t *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6; + const MCPhysReg *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6; unsigned ArgOffset = CCInfo.getNextStackOffset(); if (NumAllocated == 6) ArgOffset += StackOffset; @@ -528,8 +529,7 @@ LowerFormalArguments_32(SDValue Chain, if (!OutChains.empty()) { OutChains.push_back(Chain); - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &OutChains[0], OutChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } } @@ -644,8 +644,7 @@ LowerFormalArguments_64(SDValue Chain, } if (!OutChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - &OutChains[0], OutChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains); return Chain; } @@ -663,7 +662,7 @@ static bool hasReturnsTwiceAttr(SelectionDAG &DAG, SDValue Callee, if (CS) return CS->hasFnAttr(Attribute::ReturnsTwice); - const Function *CalleeFn = 0; + const Function *CalleeFn = nullptr; if (GlobalAddressSDNode *G = dyn_cast(Callee)) { CalleeFn = dyn_cast(G->getGlobal()); } else if (ExternalSymbolSDNode *E = @@ -877,8 +876,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, // Emit all stores, make sure the occur before any copies into physregs. if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. @@ -927,7 +925,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI, if (InFlag.getNode()) Ops.push_back(InFlag); - Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true), @@ -961,9 +959,9 @@ static bool isFP128ABICall(const char *CalleeName) "_Q_sqrt", "_Q_neg", "_Q_itoq", "_Q_stoq", "_Q_dtoq", "_Q_utoq", "_Q_lltoq", "_Q_ulltoq", - 0 + nullptr }; - for (const char * const *I = ABICalls; *I != 0; ++I) + for (const char * const *I = ABICalls; *I != nullptr; ++I) if (strcmp(CalleeName, *I) == 0) return true; return false; @@ -972,7 +970,7 @@ static bool isFP128ABICall(const char *CalleeName) unsigned SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const { - const Function *CalleeFn = 0; + const Function *CalleeFn = nullptr; if (GlobalAddressSDNode *G = dyn_cast(Callee)) { CalleeFn = dyn_cast(G->getGlobal()); } else if (ExternalSymbolSDNode *E = @@ -1194,8 +1192,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, // Emit all stores, make sure they occur before the call. if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); // Build a sequence of CopyToReg nodes glued together with token chain and // glue operands which copy the outgoing args into registers. The InGlue is @@ -1245,7 +1242,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, // Now the call itself. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, Ops); InGlue = Chain.getValue(1); // Revert the stack pointer immediately after the call. @@ -1263,7 +1260,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI, // Set inreg flag manually for codegen generated library calls that // return float. - if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && CLI.CS == 0) + if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && CLI.CS == nullptr) CLI.Ins[0].Flags.setInReg(); RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_Sparc64); @@ -1677,7 +1674,7 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return 0; + default: return nullptr; case SPISD::CMPICC: return "SPISD::CMPICC"; case SPISD::CMPFCC: return "SPISD::CMPFCC"; case SPISD::BRICC: return "SPISD::BRICC"; @@ -1711,7 +1708,7 @@ EVT SparcTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { /// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to /// be zero. Op is expected to be a target specific node. Used by DAG /// combiner. -void SparcTargetLowering::computeMaskedBitsForTargetNode +void SparcTargetLowering::computeKnownBitsForTargetNode (const SDValue Op, APInt &KnownZero, APInt &KnownOne, @@ -1725,10 +1722,8 @@ void SparcTargetLowering::computeMaskedBitsForTargetNode case SPISD::SELECT_ICC: case SPISD::SELECT_XCC: case SPISD::SELECT_FCC: - DAG.ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); - DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + DAG.computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + DAG.computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; @@ -1914,7 +1909,7 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op, assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); Ops.push_back(InFlag); - Chain = DAG.getNode(SPISD::TLS_CALL, DL, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(SPISD::TLS_CALL, DL, NodeTys, Ops); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(1, true), DAG.getIntPtrConstant(0, true), InFlag, DL); @@ -2033,13 +2028,10 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG, for (unsigned i = 0, e = numArgs; i != e; ++i) { Chain = LowerF128_LibCallArg(Chain, Args, Op.getOperand(i), SDLoc(Op), DAG); } - TargetLowering:: - CallLoweringInfo CLI(Chain, - RetTyABI, - false, false, false, false, - 0, CallingConv::C, - false, false, true, - Callee, Args, DAG, SDLoc(Op)); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(SDLoc(Op)).setChain(Chain) + .setCallee(CallingConv::C, RetTyABI, Callee, &Args, 0); + std::pair CallInfo = LowerCallTo(CLI); // chain is in second result. @@ -2065,7 +2057,7 @@ SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS, SDLoc DL, SelectionDAG &DAG) const { - const char *LibCall = 0; + const char *LibCall = nullptr; bool is64Bit = Subtarget->is64Bit(); switch(SPCC) { default: llvm_unreachable("Unhandled conditional code!"); @@ -2092,13 +2084,9 @@ SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS, Chain = LowerF128_LibCallArg(Chain, Args, LHS, DL, DAG); Chain = LowerF128_LibCallArg(Chain, Args, RHS, DL, DAG); - TargetLowering:: - CallLoweringInfo CLI(Chain, - RetTy, - false, false, false, false, - 0, CallingConv::C, - false, false, true, - Callee, Args, DAG, DL); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL).setChain(Chain) + .setCallee(CallingConv::C, RetTy, Callee, &Args, 0); std::pair CallInfo = LowerCallTo(CLI); @@ -2174,7 +2162,7 @@ LowerF128_FPEXTEND(SDValue Op, SelectionDAG &DAG, TLI.getLibcallName(RTLIB::FPEXT_F32_F128), 1); llvm_unreachable("fpextend with non-float operand!"); - return SDValue(0, 0); + return SDValue(); } static SDValue @@ -2192,7 +2180,7 @@ LowerF128_FPROUND(SDValue Op, SelectionDAG &DAG, TLI.getLibcallName(RTLIB::FPROUND_F128_F32), 1); llvm_unreachable("fpround to non-float!"); - return SDValue(0, 0); + return SDValue(); } static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG, @@ -2213,7 +2201,7 @@ static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG, // Expand if the resulting type is illegal. if (!TLI.isTypeLegal(VT)) - return SDValue(0, 0); + return SDValue(); // Otherwise, Convert the fp value to integer in an FP register. if (VT == MVT::i32) @@ -2244,7 +2232,7 @@ static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG, // Expand if the operand type is illegal. if (!TLI.isTypeLegal(OpVT)) - return SDValue(0, 0); + return SDValue(); // Otherwise, Convert the int value to FP in an FP register. SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, floatVT, Op.getOperand(0)); @@ -2262,7 +2250,7 @@ static SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG, // quad floating point instructions and the resulting type is legal. if (Op.getOperand(0).getValueType() != MVT::f128 || (hasHardQuad && TLI.isTypeLegal(VT))) - return SDValue(0, 0); + return SDValue(); assert(VT == MVT::i32 || VT == MVT::i64); @@ -2283,7 +2271,7 @@ static SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG, // Expand if it does not involve f128 or the target has support for // quad floating point instructions and the operand type is legal. if (Op.getValueType() != MVT::f128 || (hasHardQuad && TLI.isTypeLegal(OpVT))) - return SDValue(0, 0); + return SDValue(); return TLI.LowerF128Op(Op, DAG, TLI.getLibcallName(OpVT == MVT::i32 @@ -2428,7 +2416,7 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, SDValue NewVal = DAG.getNode(ISD::ADD, dl, VT, NewSP, DAG.getConstant(regSpillArea, VT)); SDValue Ops[2] = { NewVal, Chain }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } @@ -2597,10 +2585,9 @@ static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG) SubRegOdd); SDValue OutChains[2] = { SDValue(Hi64.getNode(), 1), SDValue(Lo64.getNode(), 1) }; - SDValue OutChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &OutChains[0], 2); + SDValue OutChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); SDValue Ops[2] = {SDValue(InFP128,0), OutChain}; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } // Lower a f128 store into two f64 stores. @@ -2644,8 +2631,7 @@ static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) { LoPtr, MachinePointerInfo(), false, false, alignment); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &OutChains[0], 2); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } static SDValue LowerFNEGorFABS(SDValue Op, SelectionDAG &DAG, bool isV9) { @@ -2726,7 +2712,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { SDValue Dst = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, Lo); SDValue Ops[2] = { Dst, Carry }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } // Custom lower UMULO/SMULO for SPARC. This code is similar to ExpandNode() @@ -2773,7 +2759,7 @@ static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG, DAG.DeleteNode(MulResult.getNode()); SDValue Ops[2] = { BottomHalf, TopHalf } ; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } static SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) { @@ -3092,7 +3078,7 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info, Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; // Look at the constraint type. @@ -3117,7 +3103,7 @@ LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector &Ops, SelectionDAG &DAG) const { - SDValue Result(0, 0); + SDValue Result(nullptr, 0); // Only support length 1 constraints for now. if (Constraint.length() > 1) diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index f7b45d0..a24cc82 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -55,47 +55,47 @@ namespace llvm { const SparcSubtarget *Subtarget; public: SparcTargetLowering(TargetMachine &TM); - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - /// computeMaskedBitsForTargetNode - Determine which of the bits specified + /// computeKnownBitsForTargetNode - Determine which of the bits specified /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. - virtual void computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth = 0) const; + void computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth = 0) const override; - virtual MachineBasicBlock * + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; + MachineBasicBlock *MBB) const override; - virtual const char *getTargetNodeName(unsigned Opcode) const; + const char *getTargetNodeName(unsigned Opcode) const override; - ConstraintType getConstraintType(const std::string &Constraint) const; + ConstraintType getConstraintType(const std::string &Constraint) const override; ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, - const char *constraint) const; + const char *constraint) const override; void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector &Ops, - SelectionDAG &DAG) const; + SelectionDAG &DAG) const override; std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; + getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const override; - virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; - virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } /// getSetCCResultType - Return the ISD::SETCC ValueType - virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const; + EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; - virtual SDValue + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; + SmallVectorImpl &InVals) const override; SDValue LowerFormalArguments_32(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -109,20 +109,20 @@ namespace llvm { SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; - virtual SDValue + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const; + SmallVectorImpl &InVals) const override; SDValue LowerCall_32(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; SDValue LowerCall_64(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; - virtual SDValue + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, - SDLoc dl, SelectionDAG &DAG) const; + SDLoc dl, SelectionDAG &DAG) const override; SDValue LowerReturn_32(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Outs, @@ -156,15 +156,15 @@ namespace llvm { SDLoc DL, SelectionDAG &DAG) const; - bool ShouldShrinkFPConstant(EVT VT) const { + bool ShouldShrinkFPConstant(EVT VT) const override { // Do not shrink FP constpool if VT == MVT::f128. // (ldd, call _Q_fdtoq) is more expensive than two ldds. return VT != MVT::f128; } - virtual void ReplaceNodeResults(SDNode *N, + void ReplaceNodeResults(SDNode *N, SmallVectorImpl& Results, - SelectionDAG &DAG) const; + SelectionDAG &DAG) const override; MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB, unsigned BROpcode) const; diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td index a34ce26..54d8240 100644 --- a/lib/Target/Sparc/SparcInstr64Bit.td +++ b/lib/Target/Sparc/SparcInstr64Bit.td @@ -359,9 +359,9 @@ multiclass BranchOnReg cond, string OpcStr> { multiclass bpr_alias { def : InstAlias; + (NAPT I64Regs:$rs1, bprtarget16:$imm16), 0>; def : InstAlias; + (APT I64Regs:$rs1, bprtarget16:$imm16), 0>; } defm BPZ : BranchOnReg<0b001, "brz">; diff --git a/lib/Target/Sparc/SparcInstrAliases.td b/lib/Target/Sparc/SparcInstrAliases.td index 33c2aa1..d36f67b 100644 --- a/lib/Target/Sparc/SparcInstrAliases.td +++ b/lib/Target/Sparc/SparcInstrAliases.td @@ -281,12 +281,12 @@ defm : fp_cond_alias<"o", 0b1111>; // Instruction aliases for JMPL. // jmp addr -> jmpl addr, %g0 -def : InstAlias<"jmp $addr", (JMPLrr G0, MEMrr:$addr)>; -def : InstAlias<"jmp $addr", (JMPLri G0, MEMri:$addr)>; +def : InstAlias<"jmp $addr", (JMPLrr G0, MEMrr:$addr), 0>; +def : InstAlias<"jmp $addr", (JMPLri G0, MEMri:$addr), 0>; // call addr -> jmpl addr, %o7 -def : InstAlias<"call $addr", (JMPLrr O7, MEMrr:$addr)>; -def : InstAlias<"call $addr", (JMPLri O7, MEMri:$addr)>; +def : InstAlias<"call $addr", (JMPLrr O7, MEMrr:$addr), 0>; +def : InstAlias<"call $addr", (JMPLri O7, MEMri:$addr), 0>; // retl -> RETL 8 def : InstAlias<"retl", (RETL 8)>; diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index abf6c17..8b2e6bc 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -24,11 +24,10 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" -#define GET_INSTRINFO_CTOR_DTOR -#include "SparcGenInstrInfo.inc" - using namespace llvm; +#define GET_INSTRINFO_CTOR_DTOR +#include "SparcGenInstrInfo.inc" // Pin the vtable to this file. void SparcInstrInfo::anchor() {} @@ -162,10 +161,10 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, std::next(I)->eraseFromParent(); Cond.clear(); - FBB = 0; + FBB = nullptr; if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { - TBB = 0; + TBB = nullptr; I->eraseFromParent(); I = MBB.end(); UnCondBrIter = MBB.end(); @@ -285,7 +284,7 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, bool KillSrc) const { unsigned numSubRegs = 0; unsigned movOpc = 0; - const unsigned *subRegIdx = 0; + const unsigned *subRegIdx = nullptr; const unsigned DFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd }; const unsigned QFP_DFP_SubRegsIdx[] = { SP::sub_even64, SP::sub_odd64 }; @@ -329,11 +328,11 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, } else llvm_unreachable("Impossible reg-to-reg copy"); - if (numSubRegs == 0 || subRegIdx == 0 || movOpc == 0) + if (numSubRegs == 0 || subRegIdx == nullptr || movOpc == 0) return; const TargetRegisterInfo *TRI = &getRegisterInfo(); - MachineInstr *MovMI = 0; + MachineInstr *MovMI = nullptr; for (unsigned i = 0; i != numSubRegs; ++i) { unsigned Dst = TRI->getSubReg(DestReg, subRegIdx[i]); diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index a86cbcb..3a1472e 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -45,52 +45,52 @@ public: /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - virtual const SparcRegisterInfo &getRegisterInfo() const { return RI; } + const SparcRegisterInfo &getRegisterInfo() const { return RI; } /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than loading from the stack slot. - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than storing to the stack slot. - virtual unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify = false) const ; - - virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - - virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const; - - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; + unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; + + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify = false) const override ; + + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; + + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const override; + + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; unsigned getGlobalBaseReg(MachineFunction *MF) const; }; diff --git a/lib/Target/Sparc/SparcJITInfo.cpp b/lib/Target/Sparc/SparcJITInfo.cpp index 959d12f..c775e9e 100644 --- a/lib/Target/Sparc/SparcJITInfo.cpp +++ b/lib/Target/Sparc/SparcJITInfo.cpp @@ -10,7 +10,6 @@ // This file implements the JIT interfaces for the Sparc target. // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jit" #include "SparcJITInfo.h" #include "Sparc.h" #include "SparcRelocations.h" @@ -20,6 +19,8 @@ using namespace llvm; +#define DEBUG_TYPE "jit" + /// JITCompilerFunction - This contains the address of the JIT function used to /// compile a function lazily. static TargetJITInfo::JITCompilerFn JITCompilerFunction; diff --git a/lib/Target/Sparc/SparcJITInfo.h b/lib/Target/Sparc/SparcJITInfo.h index 9c6e488..ff1b43a 100644 --- a/lib/Target/Sparc/SparcJITInfo.h +++ b/lib/Target/Sparc/SparcJITInfo.h @@ -34,27 +34,27 @@ class SparcJITInfo : public TargetJITInfo { /// overwriting OLD with a branch to NEW. This is used for self-modifying /// code. /// - virtual void replaceMachineCodeForFunction(void *Old, void *New); + void replaceMachineCodeForFunction(void *Old, void *New) override; // getStubLayout - Returns the size and alignment of the largest call stub // on Sparc. - virtual StubLayout getStubLayout(); + StubLayout getStubLayout() override; /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a /// small native function that simply calls the function at the specified /// address. - virtual void *emitFunctionStub(const Function *F, void *Fn, - JITCodeEmitter &JCE); + void *emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE) override; /// getLazyResolverFunction - Expose the lazy resolver to the JIT. - virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); + LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; /// relocate - Before the JIT can run a block of code that has been emitted, /// it must rewrite the code to contain the actual addresses of any /// referenced global symbols. - virtual void relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char *GOTBase); + void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char *GOTBase) override; /// Initialize - Initialize internal stage for the function being JITted. void Initialize(const MachineFunction &MF, bool isPIC) { diff --git a/lib/Target/Sparc/SparcMCInstLower.cpp b/lib/Target/Sparc/SparcMCInstLower.cpp index 737e378..9e94d2c 100644 --- a/lib/Target/Sparc/SparcMCInstLower.cpp +++ b/lib/Target/Sparc/SparcMCInstLower.cpp @@ -34,7 +34,7 @@ static MCOperand LowerSymbolOperand(const MachineInstr *MI, SparcMCExpr::VariantKind Kind = (SparcMCExpr::VariantKind)MO.getTargetFlags(); - const MCSymbol *Symbol = 0; + const MCSymbol *Symbol = nullptr; switch(MO.getType()) { default: llvm_unreachable("Unknown type in LowerSymbolOperand"); diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index f222382..dc1ec7c 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -25,11 +25,11 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" +using namespace llvm; + #define GET_REGINFO_TARGET_DESC #include "SparcGenRegisterInfo.inc" -using namespace llvm; - static cl::opt ReserveAppRegisters("sparc-reserve-app-registers", cl::Hidden, cl::init(false), cl::desc("Reserve application registers (%g2-%g4)")); @@ -38,8 +38,8 @@ SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st) : SparcGenRegisterInfo(SP::O7), Subtarget(st) { } -const uint16_t* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) - const { +const MCPhysReg* +SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_SaveList; } diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 00b5a98..77f879a 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -31,25 +31,26 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { SparcRegisterInfo(SparcSubtarget &st); /// Code Generation virtual methods... - const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const uint32_t* getCallPreservedMask(CallingConv::ID CC) const; + const MCPhysReg * + getCalleeSavedRegs(const MachineFunction *MF =nullptr) const override; + const uint32_t* getCallPreservedMask(CallingConv::ID CC) const override; const uint32_t* getRTCallPreservedMask(CallingConv::ID CC) const; - BitVector getReservedRegs(const MachineFunction &MF) const; + BitVector getReservedRegs(const MachineFunction &MF) const override; const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, - unsigned Kind) const; + unsigned Kind) const override; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const; // Debug information queries. - unsigned getFrameRegister(const MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const override; }; } // end namespace llvm diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp index 190c575..eb36d29 100644 --- a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp +++ b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp @@ -11,10 +11,11 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sparc-selectiondag-info" #include "SparcTargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "sparc-selectiondag-info" + SparcSelectionDAGInfo::SparcSelectionDAGInfo(const SparcTargetMachine &TM) : TargetSelectionDAGInfo(TM) { } diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp index 6fc9d56..e38fb02 100644 --- a/lib/Target/Sparc/SparcSubtarget.cpp +++ b/lib/Target/Sparc/SparcSubtarget.cpp @@ -16,12 +16,14 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +#define DEBUG_TYPE "sparc-subtarget" + #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "SparcGenSubtargetInfo.inc" -using namespace llvm; - void SparcSubtarget::anchor() { } SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU, diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 83f3474..2469d93 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -77,8 +77,8 @@ public: return getTM(); } - virtual bool addInstSelector(); - virtual bool addPreEmitPass(); + bool addInstSelector() override; + bool addPreEmitPass() override; }; } // namespace diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 8c9bcd3..7d04338 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -40,28 +40,28 @@ public: Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64bit); - virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetFrameLowering *getFrameLowering() const { + const SparcInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; } - virtual const SparcSubtarget *getSubtargetImpl() const{ return &Subtarget; } - virtual const SparcRegisterInfo *getRegisterInfo() const { + const SparcSubtarget *getSubtargetImpl() const override { return &Subtarget; } + const SparcRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); } - virtual const SparcTargetLowering* getTargetLowering() const { + const SparcTargetLowering* getTargetLowering() const override { return &TLInfo; } - virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const { + const SparcSelectionDAGInfo* getSelectionDAGInfo() const override { return &TSInfo; } - virtual SparcJITInfo *getJITInfo() { + SparcJITInfo *getJITInfo() override { return &JITInfo; } - virtual const DataLayout *getDataLayout() const { return &DL; } + const DataLayout *getDataLayout() const override { return &DL; } // Pass Pipeline Configuration - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); - virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE); + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override; }; /// SparcV8TargetMachine - Sparc 32-bit target machine diff --git a/lib/Target/Sparc/SparcTargetObjectFile.cpp b/lib/Target/Sparc/SparcTargetObjectFile.cpp index f1630e0..32b2240 100644 --- a/lib/Target/Sparc/SparcTargetObjectFile.cpp +++ b/lib/Target/Sparc/SparcTargetObjectFile.cpp @@ -28,7 +28,7 @@ const MCExpr *SparcELFTargetObjectFile::getTTypeGlobalReference( // Add information about the stub reference to ELFMMI so that the stub // gets emitted by the asmprinter. MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym); - if (StubSym.getPointer() == 0) { + if (!StubSym.getPointer()) { MCSymbol *Sym = TM.getSymbol(GV, Mang); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } diff --git a/lib/Target/Sparc/SparcTargetStreamer.h b/lib/Target/Sparc/SparcTargetStreamer.h index 503ebd9..3767d8e 100644 --- a/lib/Target/Sparc/SparcTargetStreamer.h +++ b/lib/Target/Sparc/SparcTargetStreamer.h @@ -31,8 +31,8 @@ class SparcTargetAsmStreamer : public SparcTargetStreamer { public: SparcTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); - virtual void emitSparcRegisterIgnore(unsigned reg); - virtual void emitSparcRegisterScratch(unsigned reg); + void emitSparcRegisterIgnore(unsigned reg) override; + void emitSparcRegisterScratch(unsigned reg) override; }; @@ -41,8 +41,8 @@ class SparcTargetELFStreamer : public SparcTargetStreamer { public: SparcTargetELFStreamer(MCStreamer &S); MCELFStreamer &getStreamer(); - virtual void emitSparcRegisterIgnore(unsigned reg) {} - virtual void emitSparcRegisterScratch(unsigned reg) {} + void emitSparcRegisterIgnore(unsigned reg) override {} + void emitSparcRegisterScratch(unsigned reg) override {} }; } // end namespace llvm diff --git a/lib/Target/SystemZ/AsmParser/LLVMBuild.txt b/lib/Target/SystemZ/AsmParser/LLVMBuild.txt index 0b97e71..602898e 100644 --- a/lib/Target/SystemZ/AsmParser/LLVMBuild.txt +++ b/lib/Target/SystemZ/AsmParser/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = SystemZAsmParser parent = SystemZ -required_libraries = SystemZDesc SystemZInfo MC MCParser Support +required_libraries = MC MCParser Support SystemZDesc SystemZInfo add_to_library_groups = SystemZ diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index a3dd4b6..71de64f 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -110,7 +110,7 @@ private: void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediates when possible. Null MCExpr = 0. - if (Expr == 0) + if (!Expr) Inst.addOperand(MCOperand::CreateImm(0)); else if (auto *CE = dyn_cast(Expr)) Inst.addOperand(MCOperand::CreateImm(CE->getValue())); @@ -208,7 +208,7 @@ public: return (Kind == KindMem && Mem.RegKind == RegKind && (MemKind == BDXMem || !Mem.Index) && - (MemKind == BDLMem) == (Mem.Length != 0)); + (MemKind == BDLMem) == (Mem.Length != nullptr)); } bool isMemDisp12(RegisterKind RegKind, MemoryKind MemKind) const { return isMem(RegKind, MemKind) && inRange(Mem.Disp, 0, 0xfff); @@ -331,7 +331,8 @@ private: public: SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser, - const MCInstrInfo &MII) + const MCInstrInfo &MII, + const MCTargetOptions &Options) : MCTargetAsmParser(), STI(sti), Parser(parser) { MCAsmParserExtension::Initialize(Parser); @@ -526,7 +527,7 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp, // Parse the optional base and index. Index = 0; Base = 0; - Length = 0; + Length = nullptr; if (getLexer().is(AsmToken::LParen)) { Parser.Lex(); @@ -758,7 +759,7 @@ parseAccessReg(SmallVectorImpl &Operands) { return MatchOperand_NoMatch; Register Reg; - if (parseRegister(Reg, RegAccess, 0)) + if (parseRegister(Reg, RegAccess, nullptr)) return MatchOperand_ParseFail; Operands.push_back(SystemZOperand::createAccessReg(Reg.Num, diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp index 59a1fe9..2350776 100644 --- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -17,13 +17,15 @@ using namespace llvm; +#define DEBUG_TYPE "systemz-disassembler" + typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { class SystemZDisassembler : public MCDisassembler { public: - SystemZDisassembler(const MCSubtargetInfo &STI) - : MCDisassembler(STI) {} + SystemZDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) + : MCDisassembler(STI, Ctx) {} virtual ~SystemZDisassembler() {} // Override MCDisassembler. @@ -35,8 +37,9 @@ public: } // end anonymous namespace static MCDisassembler *createSystemZDisassembler(const Target &T, - const MCSubtargetInfo &STI) { - return new SystemZDisassembler(STI); + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new SystemZDisassembler(STI, Ctx); } extern "C" void LLVMInitializeSystemZDisassembler() { diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp index e1e64d3..d2ba9b6 100644 --- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp +++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp @@ -7,8 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" - #include "SystemZInstPrinter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInstrInfo.h" @@ -16,6 +14,8 @@ using namespace llvm; +#define DEBUG_TYPE "asm-printer" + #include "SystemZGenAsmWriter.inc" void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp, diff --git a/lib/Target/SystemZ/MCTargetDesc/LLVMBuild.txt b/lib/Target/SystemZ/MCTargetDesc/LLVMBuild.txt index cbdb59c..dabd214 100644 --- a/lib/Target/SystemZ/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/SystemZ/MCTargetDesc/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = SystemZDesc parent = SystemZ -required_libraries = MC SystemZAsmPrinter SystemZInfo Support +required_libraries = MC Support SystemZAsmPrinter SystemZInfo add_to_library_groups = SystemZ diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp index df50863..27b4bd8 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mccodeemitter" #include "MCTargetDesc/SystemZMCTargetDesc.h" #include "MCTargetDesc/SystemZMCFixups.h" #include "llvm/MC/MCCodeEmitter.h" @@ -21,6 +20,8 @@ using namespace llvm; +#define DEBUG_TYPE "mccodeemitter" + namespace { class SystemZMCCodeEmitter : public MCCodeEmitter { const MCInstrInfo &MCII; diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp index 54c6987..c6a1816 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp @@ -82,7 +82,7 @@ static unsigned getPLTReloc(unsigned Kind) { unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { - MCSymbolRefExpr::VariantKind Modifier = Fixup.getAccessVariant(); + MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); unsigned Kind = Fixup.getKind(); switch (Modifier) { case MCSymbolRefExpr::VK_None: diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp index 8d1bac9..cc94869 100644 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp @@ -16,6 +16,8 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + #define GET_INSTRINFO_MC_DESC #include "SystemZGenInstrInfo.inc" @@ -25,8 +27,6 @@ #define GET_REGINFO_MC_DESC #include "SystemZGenRegisterInfo.inc" -using namespace llvm; - const unsigned SystemZMC::GR32Regs[16] = { SystemZ::R0L, SystemZ::R1L, SystemZ::R2L, SystemZ::R3L, SystemZ::R4L, SystemZ::R5L, SystemZ::R6L, SystemZ::R7L, @@ -98,7 +98,8 @@ static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { MCAsmInfo *MAI = new SystemZMCAsmInfo(TT); MCCFIInstruction Inst = - MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(SystemZ::R15D, true), + MCCFIInstruction::createDefCfa(nullptr, + MRI.getDwarfRegNum(SystemZ::R15D, true), SystemZMC::CFAOffsetFromInitialSP); MAI->addInitialFrameState(Inst); return MAI; diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp index fdf80a9..dc210d6 100644 --- a/lib/Target/SystemZ/SystemZElimCompare.cpp +++ b/lib/Target/SystemZ/SystemZElimCompare.cpp @@ -13,8 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "systemz-elim-compare" - #include "SystemZTargetMachine.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -28,6 +26,8 @@ using namespace llvm; +#define DEBUG_TYPE "systemz-elim-compare" + STATISTIC(BranchOnCounts, "Number of branch-on-count instructions"); STATISTIC(EliminatedComparisons, "Number of eliminated comparisons"); STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions"); @@ -64,14 +64,14 @@ class SystemZElimCompare : public MachineFunctionPass { public: static char ID; SystemZElimCompare(const SystemZTargetMachine &tm) - : MachineFunctionPass(ID), TII(0), TRI(0) {} + : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {} const char *getPassName() const override { return "SystemZ Comparison Elimination"; } bool processBlock(MachineBasicBlock &MBB); - bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineFunction(MachineFunction &F) override; private: Reference getRegReferences(MachineInstr *MI, unsigned Reg); diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp index c856955..65f3caf 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -93,7 +93,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // save and restore the stack pointer at the same time, via STMG and LMG. // This allows the deallocation to be done by the LMG, rather than needing // a separate %r15 addition. - const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF); + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); for (unsigned I = 0; CSRegs[I]; ++I) { unsigned Reg = CSRegs[I]; if (SystemZ::GR64BitRegClass.contains(Reg) && MRI.isPhysRegUsed(Reg)) { diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index f46eb16..24f7584 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -19,6 +19,8 @@ using namespace llvm; +#define DEBUG_TYPE "systemz-isel" + namespace { // Used to build addressing modes. struct SystemZAddressingMode { @@ -72,14 +74,14 @@ struct SystemZAddressingMode { errs() << "SystemZAddressingMode " << this << '\n'; errs() << " Base "; - if (Base.getNode() != 0) + if (Base.getNode()) Base.getNode()->dump(); else errs() << "null\n"; if (hasIndexField()) { errs() << " Index "; - if (Index.getNode() != 0) + if (Index.getNode()) Index.getNode()->dump(); else errs() << "null\n"; @@ -663,7 +665,7 @@ bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op, uint64_t Used = allOnes(Op.getValueType().getSizeInBits()); if (Used != (AndMask | InsertMask)) { APInt KnownZero, KnownOne; - CurDAG->ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne); + CurDAG->computeKnownBits(Op.getOperand(0), KnownZero, KnownOne); if (Used != (AndMask | InsertMask | KnownZero.getZExtValue())) return false; } @@ -712,7 +714,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { // been removed from the mask. See if adding them back in makes the // mask suitable. APInt KnownZero, KnownOne; - CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne); + CurDAG->computeKnownBits(Input, KnownZero, KnownOne); Mask |= KnownZero.getZExtValue(); if (!refineRxSBGMask(RxSBG, Mask)) return false; @@ -736,7 +738,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { // been removed from the mask. See if adding them back in makes the // mask suitable. APInt KnownZero, KnownOne; - CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne); + CurDAG->computeKnownBits(Input, KnownZero, KnownOne); Mask &= ~KnownOne.getZExtValue(); if (!refineRxSBGMask(RxSBG, Mask)) return false; @@ -867,12 +869,12 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND) Count += 1; if (Count == 0) - return 0; + return nullptr; if (Count == 1) { // Prefer to use normal shift instructions over RISBG, since they can handle // all cases and are sometimes shorter. if (N->getOpcode() != ISD::AND) - return 0; + return nullptr; // Prefer register extensions like LLC over RISBG. Also prefer to start // out with normal ANDs if one instruction would be enough. We can convert @@ -889,7 +891,7 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), NewMask); return SelectCode(N); } - return 0; + return nullptr; } } @@ -927,7 +929,7 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { // Do nothing if neither operand is suitable. if (Count[0] == 0 && Count[1] == 0) - return 0; + return nullptr; // Pick the deepest second operand. unsigned I = Count[0] > Count[1] ? 0 : 1; @@ -937,7 +939,7 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { if (Opcode == SystemZ::ROSBG && (RxSBG[I].Mask & 0xff) == 0) if (auto *Load = dyn_cast(Op0.getNode())) if (Load->getMemoryVT() == MVT::i8) - return 0; + return nullptr; // See whether we can avoid an AND in the first operand by converting // ROSBG to RISBG. @@ -986,8 +988,8 @@ bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store, return true; // Otherwise we need to check whether there's an alias. - const Value *V1 = Load->getSrcValue(); - const Value *V2 = Store->getSrcValue(); + const Value *V1 = Load->getMemOperand()->getValue(); + const Value *V2 = Store->getMemOperand()->getValue(); if (!V1 || !V2) return false; @@ -1037,11 +1039,11 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { if (Node->isMachineOpcode()) { DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); Node->setNodeId(-1); - return 0; + return nullptr; } unsigned Opcode = Node->getOpcode(); - SDNode *ResNode = 0; + SDNode *ResNode = nullptr; switch (Opcode) { case ISD::OR: if (Node->getOperand(1).getOpcode() != ISD::Constant) @@ -1114,7 +1116,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { ResNode = SelectCode(Node); DEBUG(errs() << "=> "; - if (ResNode == NULL || ResNode == Node) + if (ResNode == nullptr || ResNode == Node) Node->dump(CurDAG); else ResNode->dump(CurDAG); diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 714b6c9..6fe1fb9 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "systemz-lower" - #include "SystemZISelLowering.h" #include "SystemZCallingConv.h" #include "SystemZConstantPoolValue.h" @@ -26,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "systemz-lower" + namespace { // Represents a sequence for extracting a 0/1 value from an IPM result: // (((X ^ XORValue) + AddValue) >> Bit) @@ -424,7 +424,7 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info, Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. @@ -492,7 +492,7 @@ parseRegisterNumber(const std::string &Constraint, if (Index < 16 && Map[Index]) return std::make_pair(Map[Index], RC); } - return std::make_pair(0u, static_cast(0)); + return std::make_pair(0U, nullptr); } std::pair SystemZTargetLowering:: @@ -772,8 +772,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, } // Join the stores, which are independent of one another. Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - &MemOps[NumFixedFPRs], - SystemZ::NumArgFPRs - NumFixedFPRs); + makeArrayRef(&MemOps[NumFixedFPRs], + SystemZ::NumArgFPRs-NumFixedFPRs)); } } @@ -875,8 +875,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Join the stores, which are independent of one another. if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); // Accept direct calls by converting symbolic call addresses to the // associated Target* opcodes. Force %r1 to be used for indirect @@ -919,8 +918,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Emit the call. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (IsTailCall) - return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, &Ops[0], Ops.size()); - Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, &Ops[0], Ops.size()); + return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops); + Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops); Glue = Chain.getValue(1); // Mark the end of the call, which is glued to the call itself. @@ -996,8 +995,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, if (Glue.getNode()) RetOps.push_back(Glue); - return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, - RetOps.data(), RetOps.size()); + return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps); } SDValue SystemZTargetLowering:: @@ -1489,7 +1487,7 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) { // Check whether the nonconstant input is an AND with a constant mask. Comparison NewC(C); uint64_t MaskVal; - ConstantSDNode *Mask = 0; + ConstantSDNode *Mask = nullptr; if (C.Op0.getOpcode() == ISD::AND) { NewC.Op0 = C.Op0.getOperand(0); NewC.Op1 = C.Op0.getOperand(1); @@ -1779,7 +1777,7 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, Ops.push_back(Glue); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); - return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size()); + return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); } SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, @@ -1971,7 +1969,7 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, false, false, 0); Offset += 8; } - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps, NumFields); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); } SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, @@ -2012,7 +2010,7 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); SDValue Ops[2] = { Result, Chain }; - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, @@ -2054,7 +2052,7 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL); Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum); } - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, @@ -2073,7 +2071,7 @@ SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, // low half first, so the results are in reverse order. lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, @@ -2100,7 +2098,7 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, SDValue Ops[2]; lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode, Op0, Op1, Ops[1], Ops[0]); - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, @@ -2118,7 +2116,7 @@ SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, else lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64, Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { @@ -2127,8 +2125,8 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { // Get the known-zero masks for each operand. SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) }; APInt KnownZero[2], KnownOne[2]; - DAG.ComputeMaskedBits(Ops[0], KnownZero[0], KnownOne[0]); - DAG.ComputeMaskedBits(Ops[1], KnownZero[1], KnownOne[1]); + DAG.computeKnownBits(Ops[0], KnownZero[0], KnownOne[0]); + DAG.computeKnownBits(Ops[1], KnownZero[1], KnownOne[1]); // See if the upper 32 bits of one operand and the lower 32 bits of the // other are known zero. They are the low and high operands respectively. @@ -2259,7 +2257,6 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, DAG.getConstant(BitSize, WideVT) }; SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops, - array_lengthof(Ops), NarrowVT, MMO); // Rotate the result of the final CS so that the field is in the lower @@ -2269,7 +2266,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift); SDValue RetOps[2] = { Result, AtomicOp.getValue(1) }; - return DAG.getMergeValues(RetOps, 2, DL); + return DAG.getMergeValues(RetOps, DL); } // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations @@ -2351,8 +2348,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, NegBitShift, DAG.getConstant(BitSize, WideVT) }; SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, - VTList, Ops, array_lengthof(Ops), - NarrowVT, MMO); + VTList, Ops, NarrowVT, MMO); return AtomicOp; } @@ -2388,7 +2384,7 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, Op.getOperand(1) }; return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op), - Node->getVTList(), Ops, array_lengthof(Ops), + Node->getVTList(), Ops, Node->getMemoryVT(), Node->getMemOperand()); } @@ -2517,7 +2513,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(ATOMIC_CMP_SWAPW); OPCODE(PREFETCH); } - return NULL; + return nullptr; #undef OPCODE } @@ -3116,7 +3112,7 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, // When generating more than one CLC, all but the last will need to // branch to the end when a difference is found. MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ? - splitBlockAfter(MI, MBB) : 0); + splitBlockAfter(MI, MBB) : nullptr); // Check for the loop form, in which operand 5 is the trip count. if (MI->getNumExplicitOperands() > 5) { diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index 50badf8..add675a 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -516,7 +516,7 @@ class InstSS op, dag outs, dag ins, string asmstr, list pattern> // // Binary: // One register output operand and two input operands. The first -// input operand is always a register and he second may be a register, +// input operand is always a register and the second may be a register, // immediate or memory. // // Shift: diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index e20834c..6a18b2d 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -17,12 +17,12 @@ #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +using namespace llvm; + #define GET_INSTRINFO_CTOR_DTOR #define GET_INSTRMAP_INFO #include "SystemZGenInstrInfo.inc" -using namespace llvm; - // Return a mask with Count low bits set. static uint64_t allOnes(unsigned int Count) { return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1; @@ -284,11 +284,11 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, std::next(I)->eraseFromParent(); Cond.clear(); - FBB = 0; + FBB = nullptr; // Delete the JMP if it's equivalent to a fall-through. if (MBB.isLayoutSuccessor(Branch.Target->getMBB())) { - TBB = 0; + TBB = nullptr; I->eraseFromParent(); I = MBB.end(); continue; @@ -418,7 +418,7 @@ bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI, static MachineInstr *getDef(unsigned Reg, const MachineRegisterInfo *MRI) { if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return 0; + return nullptr; return MRI->getUniqueVRegDef(Reg); } @@ -442,7 +442,7 @@ static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) { static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg, const MachineRegisterInfo *MRI, const TargetRegisterInfo *TRI) { - MachineInstr *LGFR = 0; + MachineInstr *LGFR = nullptr; MachineInstr *RLL = getDef(SrcReg, MRI); if (RLL && RLL->getOpcode() == SystemZ::LGFR) { LGFR = RLL; @@ -542,7 +542,7 @@ PredicateInstruction(MachineInstr *MI, MI->setDesc(get(CondOpcode)); MachineInstrBuilder(*MI->getParent()->getParent(), MI) .addImm(CCValid).addImm(CCMask) - .addReg(SystemZ::CC, RegState::Implicit);; + .addReg(SystemZ::CC, RegState::Implicit); return true; } } @@ -740,7 +740,7 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return finishConvertToThreeAddress(MI, MIB, LV); } } - return 0; + return nullptr; } MachineInstr * @@ -761,12 +761,12 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, .addFrameIndex(FrameIndex).addImm(0) .addImm(MI->getOperand(2).getImm()); } - return 0; + return nullptr; } // All other cases require a single operand. if (Ops.size() != 1) - return 0; + return nullptr; unsigned OpNum = Ops[0]; assert(Size == MF.getRegInfo() @@ -858,14 +858,14 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } } - return 0; + return nullptr; } MachineInstr * SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, const SmallVectorImpl &Ops, MachineInstr* LoadMI) const { - return 0; + return nullptr; } bool diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 55f80af..09aee5d 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -229,7 +229,7 @@ public: // BRANCH exists, return the opcode for the latter, otherwise return 0. // MI, if nonnull, is the compare instruction. unsigned getCompareAndBranch(unsigned Opcode, - const MachineInstr *MI = 0) const; + const MachineInstr *MI = nullptr) const; // Emit code before MBBI in MI to move immediate value Value into // physical register Reg. diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp index 1b88d06..8081334 100644 --- a/lib/Target/SystemZ/SystemZLongBranch.cpp +++ b/lib/Target/SystemZ/SystemZLongBranch.cpp @@ -53,8 +53,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "systemz-long-branch" - #include "SystemZTargetMachine.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -68,6 +66,8 @@ using namespace llvm; +#define DEBUG_TYPE "systemz-long-branch" + STATISTIC(LongBranches, "Number of long branches."); namespace { @@ -111,7 +111,8 @@ struct TerminatorInfo { // otherwise it is zero. unsigned ExtraRelaxSize; - TerminatorInfo() : Branch(0), Size(0), TargetBlock(0), ExtraRelaxSize(0) {} + TerminatorInfo() : Branch(nullptr), Size(0), TargetBlock(0), + ExtraRelaxSize(0) {} }; // Used to keep track of the current position while iterating over the blocks. @@ -131,13 +132,13 @@ class SystemZLongBranch : public MachineFunctionPass { public: static char ID; SystemZLongBranch(const SystemZTargetMachine &tm) - : MachineFunctionPass(ID), TII(0) {} + : MachineFunctionPass(ID), TII(nullptr) {} const char *getPassName() const override { return "SystemZ Long Branch"; } - bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineFunction(MachineFunction &F) override; private: void skipNonTerminators(BlockPosition &Position, MBBInfo &Block); @@ -424,7 +425,7 @@ void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) { Terminator.Size += Terminator.ExtraRelaxSize; Terminator.ExtraRelaxSize = 0; - Terminator.Branch = 0; + Terminator.Branch = nullptr; ++LongBranches; } diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 1ac4e32..a04d703 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -12,17 +12,17 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +using namespace llvm; + #define GET_REGINFO_TARGET_DESC #include "SystemZGenRegisterInfo.inc" -using namespace llvm; - SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm) : SystemZGenRegisterInfo(SystemZ::R14D), TM(tm) {} -const uint16_t* +const MCPhysReg* SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - static const uint16_t CalleeSavedRegs[] = { + static const MCPhysReg CalleeSavedRegs[] = { SystemZ::R6D, SystemZ::R7D, SystemZ::R8D, SystemZ::R9D, SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D, SystemZ::R14D, SystemZ::R15D, diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index 4ad8048..e236f71 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -49,7 +49,7 @@ public: bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override { return true; } - const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const + const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; void eliminateFrameIndex(MachineBasicBlock::iterator MI, diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 7635bdc..97abee3 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "systemz-selectiondag-info" #include "SystemZTargetMachine.h" #include "llvm/CodeGen/SelectionDAG.h" using namespace llvm; +#define DEBUG_TYPE "systemz-selectiondag-info" + SystemZSelectionDAGInfo:: SystemZSelectionDAGInfo(const SystemZTargetMachine &TM) : TargetSelectionDAGInfo(TM) { @@ -230,7 +231,7 @@ EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain, Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, MVT::i32)); Ops.push_back(Glue); VTs = DAG.getVTList(PtrVT, MVT::Glue); - End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size()); + End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); return std::make_pair(End, Chain); } diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp index 9350779..aad899c 100644 --- a/lib/Target/SystemZ/SystemZShortenInst.cpp +++ b/lib/Target/SystemZ/SystemZShortenInst.cpp @@ -13,13 +13,13 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "systemz-shorten-inst" - #include "SystemZTargetMachine.h" #include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; +#define DEBUG_TYPE "systemz-shorten-inst" + namespace { class SystemZShortenInst : public MachineFunctionPass { public: @@ -31,7 +31,7 @@ public: } bool processBlock(MachineBasicBlock &MBB); - bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineFunction(MachineFunction &F) override; private: bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther, @@ -53,7 +53,7 @@ FunctionPass *llvm::createSystemZShortenInstPass(SystemZTargetMachine &TM) { } SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm) - : MachineFunctionPass(ID), TII(0), LowGPRs(), HighGPRs() { + : MachineFunctionPass(ID), TII(nullptr), LowGPRs(), HighGPRs() { // Set up LowGPRs and HighGPRs. for (unsigned I = 0; I < 16; ++I) { LowGPRs[SystemZMC::GR32Regs[I]] |= 1 << I; diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp index 33d7e06..a011157 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -12,12 +12,14 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/Support/Host.h" +using namespace llvm; + +#define DEBUG_TYPE "systemz-subtarget" + #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "SystemZGenSubtargetInfo.inc" -using namespace llvm; - // Pin the vtabel to this file. void SystemZSubtarget::anchor() {} diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp index 627786d..d277f82 100644 --- a/lib/Target/Target.cpp +++ b/lib/Target/Target.cpp @@ -24,14 +24,6 @@ using namespace llvm; -inline DataLayout *unwrap(LLVMTargetDataRef P) { - return reinterpret_cast(P); -} - -inline LLVMTargetDataRef wrap(const DataLayout *P) { - return reinterpret_cast(const_cast(P)); -} - inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) { return reinterpret_cast(P); } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 50b1e31..39e0459 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -46,7 +46,7 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx, InitMCObjectFileInfo(TM.getTargetTriple(), TM.getRelocationModel(), TM.getCodeModel(), *Ctx); } - + TargetLoweringObjectFile::~TargetLoweringObjectFile() { } @@ -62,7 +62,7 @@ static bool isSuitableForBSS(const GlobalVariable *GV, bool NoZerosInBSS) { return false; // If the global has an explicit section specified, don't put it in BSS. - if (!GV->getSection().empty()) + if (GV->hasSection()) return false; // If -nozero-initialized-in-bss is specified, don't ever use BSS. @@ -138,7 +138,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, // Early exit - functions should be always in text sections. const GlobalVariable *GVar = dyn_cast(GV); - if (GVar == 0) + if (!GVar) return SectionKind::getText(); // Handle thread-local data first. @@ -284,10 +284,10 @@ TargetLoweringObjectFile::SelectSectionForGlobal(const GlobalValue *GV, if (Kind.isText()) return getTextSection(); - if (Kind.isBSS() && BSSSection != 0) + if (Kind.isBSS() && BSSSection != nullptr) return BSSSection; - if (Kind.isReadOnly() && ReadOnlySection != 0) + if (Kind.isReadOnly() && ReadOnlySection != nullptr) return ReadOnlySection; return getDataSection(); @@ -298,7 +298,7 @@ TargetLoweringObjectFile::SelectSectionForGlobal(const GlobalValue *GV, /// should be placed in. const MCSection * TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const { - if (Kind.isReadOnly() && ReadOnlySection != 0) + if (Kind.isReadOnly() && ReadOnlySection != nullptr) return ReadOnlySection; return DataSection; diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index fe3c870..8365f64 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" @@ -28,24 +29,6 @@ using namespace llvm; //--------------------------------------------------------------------------- -// Command-line options that tend to be useful on more than one back-end. -// - -namespace llvm { - bool HasDivModLibcall; - bool AsmVerbosityDefault(false); -} - -static cl::opt -DataSections("fdata-sections", - cl::desc("Emit data into separate sections"), - cl::init(false)); -static cl::opt -FunctionSections("ffunction-sections", - cl::desc("Emit functions into separate sections"), - cl::init(false)); - -//--------------------------------------------------------------------------- // TargetMachine Class // @@ -53,12 +36,7 @@ TargetMachine::TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options) : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS), - CodeGenInfo(0), AsmInfo(0), - MCRelaxAll(false), - MCNoExecStack(false), - MCSaveTempLabels(false), - MCUseCFI(true), - MCUseDwarfDirectory(false), + CodeGenInfo(nullptr), AsmInfo(nullptr), RequireStructuredCFG(false), Options(Options) { } @@ -89,6 +67,8 @@ void TargetMachine::resetTargetOptions(const MachineFunction *MF) const { RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math"); RESET_OPTION(UseSoftFloat, "use-soft-float"); RESET_OPTION(DisableTailCalls, "disable-tail-calls"); + + TO.MCOptions.SanitizeAddress = F->hasFnAttribute(Attribute::SanitizeAddress); } /// getRelocationModel - Returns the code generation relocation model. The @@ -126,19 +106,13 @@ static TLSModel::Model getSelectedTLSModel(const GlobalVariable *Var) { } TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const { - // If GV is an alias then use the aliasee for determining - // thread-localness. - if (const GlobalAlias *GA = dyn_cast(GV)) - GV = GA->getAliasedGlobal(); - const GlobalVariable *Var = cast(GV); - - bool isLocal = Var->hasLocalLinkage(); - bool isDeclaration = Var->isDeclaration(); + bool isLocal = GV->hasLocalLinkage(); + bool isDeclaration = GV->isDeclaration(); bool isPIC = getRelocationModel() == Reloc::PIC_; bool isPIE = Options.PositionIndependentExecutable; // FIXME: what should we do for protected and internal visibility? // For variables, is internal different from hidden? - bool isHidden = Var->hasHiddenVisibility(); + bool isHidden = GV->hasHiddenVisibility(); TLSModel::Model Model; if (isPIC && !isPIE) { @@ -153,10 +127,13 @@ TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const { Model = TLSModel::InitialExec; } - // If the user specified a more specific model, use that. - TLSModel::Model SelectedModel = getSelectedTLSModel(Var); - if (SelectedModel > Model) - return SelectedModel; + const GlobalVariable *Var = dyn_cast(GV); + if (Var) { + // If the user specified a more specific model, use that. + TLSModel::Model SelectedModel = getSelectedTLSModel(Var); + if (SelectedModel > Model) + return SelectedModel; + } return Model; } @@ -174,28 +151,28 @@ void TargetMachine::setOptLevel(CodeGenOpt::Level Level) const { CodeGenInfo->setOptLevel(Level); } -bool TargetMachine::getAsmVerbosityDefault() { - return AsmVerbosityDefault; +bool TargetMachine::getAsmVerbosityDefault() const { + return Options.MCOptions.AsmVerbose; } void TargetMachine::setAsmVerbosityDefault(bool V) { - AsmVerbosityDefault = V; + Options.MCOptions.AsmVerbose = V; } -bool TargetMachine::getFunctionSections() { - return FunctionSections; +bool TargetMachine::getFunctionSections() const { + return Options.FunctionSections; } -bool TargetMachine::getDataSections() { - return DataSections; +bool TargetMachine::getDataSections() const { + return Options.DataSections; } void TargetMachine::setFunctionSections(bool V) { - FunctionSections = V; + Options.FunctionSections = V; } void TargetMachine::setDataSections(bool V) { - DataSections = V; + Options.DataSections = V; } void TargetMachine::getNameWithPrefix(SmallVectorImpl &Name, diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp index a2829d4..20923c9 100644 --- a/lib/Target/TargetMachineC.cpp +++ b/lib/Target/TargetMachineC.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/Module.h" #include "llvm/PassManager.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" @@ -29,23 +30,6 @@ using namespace llvm; -inline DataLayout *unwrap(LLVMTargetDataRef P) { - return reinterpret_cast(P); -} - -inline LLVMTargetDataRef wrap(const DataLayout *P) { - return reinterpret_cast(const_cast(P)); -} - -inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) { - return reinterpret_cast(P); -} - -inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) { - TargetLibraryInfo *X = const_cast(P); - return reinterpret_cast(X); -} - inline TargetMachine *unwrap(LLVMTargetMachineRef P) { return reinterpret_cast(P); } @@ -62,7 +46,7 @@ inline LLVMTargetRef wrap(const Target * P) { LLVMTargetRef LLVMGetFirstTarget() { if(TargetRegistry::begin() == TargetRegistry::end()) { - return NULL; + return nullptr; } const Target* target = &*TargetRegistry::begin(); @@ -80,7 +64,7 @@ LLVMTargetRef LLVMGetTargetFromName(const char *Name) { return wrap(&*IT); } - return NULL; + return nullptr; } LLVMBool LLVMGetTargetFromTriple(const char* TripleStr, LLVMTargetRef *T, diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp index df8948f..3ca13da 100644 --- a/lib/Target/TargetSubtargetInfo.cpp +++ b/lib/Target/TargetSubtargetInfo.cpp @@ -24,11 +24,12 @@ TargetSubtargetInfo::TargetSubtargetInfo() {} TargetSubtargetInfo::~TargetSubtargetInfo() {} // Temporary option to compare overall performance change when moving from the -// SD scheduler to the MachineScheduler pass pipeline. It should be removed -// before 3.4. The normal way to enable/disable the MachineScheduling pass -// itself is by using -enable-misched. For targets that already use MI sched -// (via MySubTarget::enableMachineScheduler()) -misched-bench=false negates the -// subtarget hook. +// SD scheduler to the MachineScheduler pass pipeline. This is convenient for +// benchmarking during the transition from SD to MI scheduling. Once armv7 makes +// the switch, it should go away. The normal way to enable/disable the +// MachineScheduling pass itself is by using -enable-misched. For targets that +// already use MI sched (via MySubTarget::enableMachineScheduler()) +// -misched-bench=false negates the subtarget hook. static cl::opt BenchMachineSched("misched-bench", cl::Hidden, cl::desc("Migrate from the target's default SD scheduler to MI scheduler")); diff --git a/lib/Target/X86/Android.mk b/lib/Target/X86/Android.mk index 73031de..0d0a9ca 100644 --- a/lib/Target/X86/Android.mk +++ b/lib/Target/X86/Android.mk @@ -12,7 +12,6 @@ x86_codegen_TBLGEN_TABLES := \ x86_codegen_SRC_FILES := \ X86AsmPrinter.cpp \ - X86COFFMachineModuleInfo.cpp \ X86CodeEmitter.cpp \ X86FastISel.cpp \ X86FixupLEAs.cpp \ diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp index db29228..f3e6b3f 100644 --- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp +++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp @@ -11,21 +11,25 @@ #include "X86AsmInstrumentation.h" #include "X86Operand.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" -#include "llvm/MC/MCParser/MCParsedAsmOperand.h" namespace llvm { namespace { -static cl::opt ClAsanInstrumentInlineAssembly( - "asan-instrument-inline-assembly", cl::desc("instrument inline assembly"), - cl::Hidden, cl::init(false)); +static cl::opt ClAsanInstrumentAssembly( + "asan-instrument-assembly", + cl::desc("instrument assembly with AddressSanitizer checks"), cl::Hidden, + cl::init(false)); bool IsStackReg(unsigned Reg) { return Reg == X86::RSP || Reg == X86::ESP || Reg == X86::SP; @@ -38,14 +42,14 @@ std::string FuncName(unsigned AccessSize, bool IsWrite) { class X86AddressSanitizer : public X86AsmInstrumentation { public: - X86AddressSanitizer(MCSubtargetInfo &sti) : STI(sti) {} + X86AddressSanitizer(const MCSubtargetInfo &STI) : STI(STI) {} virtual ~X86AddressSanitizer() {} // X86AsmInstrumentation implementation: virtual void InstrumentInstruction( const MCInst &Inst, SmallVectorImpl &Operands, - MCContext &Ctx, MCStreamer &Out) override { - InstrumentMOV(Inst, Operands, Ctx, Out); + MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out) override { + InstrumentMOV(Inst, Operands, Ctx, MII, Out); } // Should be implemented differently in x86_32 and x86_64 subclasses. @@ -57,13 +61,13 @@ public: bool IsWrite, MCContext &Ctx, MCStreamer &Out); void InstrumentMOV(const MCInst &Inst, SmallVectorImpl &Operands, - MCContext &Ctx, MCStreamer &Out); + MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out); void EmitInstruction(MCStreamer &Out, const MCInst &Inst) { Out.EmitInstruction(Inst, STI); } protected: - MCSubtargetInfo &STI; + const MCSubtargetInfo &STI; }; void X86AddressSanitizer::InstrumentMemOperand( @@ -83,68 +87,53 @@ void X86AddressSanitizer::InstrumentMemOperand( void X86AddressSanitizer::InstrumentMOV( const MCInst &Inst, SmallVectorImpl &Operands, - MCContext &Ctx, MCStreamer &Out) { + MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out) { // Access size in bytes. unsigned AccessSize = 0; - unsigned long OpIx = Operands.size(); + switch (Inst.getOpcode()) { case X86::MOV8mi: case X86::MOV8mr: - AccessSize = 1; - OpIx = 2; - break; case X86::MOV8rm: AccessSize = 1; - OpIx = 1; break; case X86::MOV16mi: case X86::MOV16mr: - AccessSize = 2; - OpIx = 2; - break; case X86::MOV16rm: AccessSize = 2; - OpIx = 1; break; case X86::MOV32mi: case X86::MOV32mr: - AccessSize = 4; - OpIx = 2; - break; case X86::MOV32rm: AccessSize = 4; - OpIx = 1; break; case X86::MOV64mi32: case X86::MOV64mr: - AccessSize = 8; - OpIx = 2; - break; case X86::MOV64rm: AccessSize = 8; - OpIx = 1; break; case X86::MOVAPDmr: case X86::MOVAPSmr: - AccessSize = 16; - OpIx = 2; - break; case X86::MOVAPDrm: case X86::MOVAPSrm: AccessSize = 16; - OpIx = 1; break; - } - if (OpIx >= Operands.size()) + default: return; + } - const bool IsWrite = (OpIx != 1); - InstrumentMemOperand(Operands[OpIx], AccessSize, IsWrite, Ctx, Out); + const bool IsWrite = MII.get(Inst.getOpcode()).mayStore(); + for (unsigned Ix = 0; Ix < Operands.size(); ++Ix) { + MCParsedAsmOperand *Op = Operands[Ix]; + if (Op && Op->isMem()) + InstrumentMemOperand(Op, AccessSize, IsWrite, Ctx, Out); + } } class X86AddressSanitizer32 : public X86AddressSanitizer { public: - X86AddressSanitizer32(MCSubtargetInfo &sti) : X86AddressSanitizer(sti) {} + X86AddressSanitizer32(const MCSubtargetInfo &STI) + : X86AddressSanitizer(STI) {} virtual ~X86AddressSanitizer32() {} virtual void InstrumentMemOperandImpl(X86Operand *Op, unsigned AccessSize, @@ -172,14 +161,14 @@ void X86AddressSanitizer32::InstrumentMemOperandImpl( MCSymbolRefExpr::Create(FuncSym, MCSymbolRefExpr::VK_PLT, Ctx); EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FuncExpr)); } - EmitInstruction(Out, MCInstBuilder(X86::ADD32ri).addReg(X86::ESP) - .addReg(X86::ESP).addImm(4)); + EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX)); EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX)); } class X86AddressSanitizer64 : public X86AddressSanitizer { public: - X86AddressSanitizer64(MCSubtargetInfo &sti) : X86AddressSanitizer(sti) {} + X86AddressSanitizer64(const MCSubtargetInfo &STI) + : X86AddressSanitizer(STI) {} virtual ~X86AddressSanitizer64() {} virtual void InstrumentMemOperandImpl(X86Operand *Op, unsigned AccessSize, @@ -187,13 +176,26 @@ public: MCStreamer &Out) override; }; -void X86AddressSanitizer64::InstrumentMemOperandImpl( - X86Operand *Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx, - MCStreamer &Out) { +void X86AddressSanitizer64::InstrumentMemOperandImpl(X86Operand *Op, + unsigned AccessSize, + bool IsWrite, + MCContext &Ctx, + MCStreamer &Out) { // FIXME: emit .cfi directives for correct stack unwinding. - // Set %rsp below current red zone (128 bytes wide) - EmitInstruction(Out, MCInstBuilder(X86::SUB64ri32).addReg(X86::RSP) - .addReg(X86::RSP).addImm(128)); + + // Set %rsp below current red zone (128 bytes wide) using LEA instruction to + // preserve flags. + { + MCInst Inst; + Inst.setOpcode(X86::LEA64r); + Inst.addOperand(MCOperand::CreateReg(X86::RSP)); + + const MCExpr *Disp = MCConstantExpr::Create(-128, Ctx); + std::unique_ptr Op( + X86Operand::CreateMem(0, Disp, X86::RSP, 0, 1, SMLoc(), SMLoc())); + Op->addMemOperands(Inst, 5); + EmitInstruction(Out, Inst); + } EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RDI)); { MCInst Inst; @@ -210,8 +212,19 @@ void X86AddressSanitizer64::InstrumentMemOperandImpl( EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FuncExpr)); } EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RDI)); - EmitInstruction(Out, MCInstBuilder(X86::ADD64ri32).addReg(X86::RSP) - .addReg(X86::RSP).addImm(128)); + + // Restore old %rsp value. + { + MCInst Inst; + Inst.setOpcode(X86::LEA64r); + Inst.addOperand(MCOperand::CreateReg(X86::RSP)); + + const MCExpr *Disp = MCConstantExpr::Create(128, Ctx); + std::unique_ptr Op( + X86Operand::CreateMem(0, Disp, X86::RSP, 0, 1, SMLoc(), SMLoc())); + Op->addMemOperands(Inst, 5); + EmitInstruction(Out, Inst); + } } } // End anonymous namespace @@ -221,10 +234,15 @@ X86AsmInstrumentation::~X86AsmInstrumentation() {} void X86AsmInstrumentation::InstrumentInstruction( const MCInst &Inst, SmallVectorImpl &Operands, - MCContext &Ctx, MCStreamer &Out) {} - -X86AsmInstrumentation *CreateX86AsmInstrumentation(MCSubtargetInfo &STI) { - if (ClAsanInstrumentInlineAssembly) { + MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out) {} + +X86AsmInstrumentation * +CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions, + const MCContext &Ctx, const MCSubtargetInfo &STI) { + Triple T(STI.getTargetTriple()); + const bool hasCompilerRTSupport = T.isOSLinux(); + if (ClAsanInstrumentAssembly && hasCompilerRTSupport && + MCOptions.SanitizeAddress) { if ((STI.getFeatureBits() & X86::Mode32Bit) != 0) return new X86AddressSanitizer32(STI); if ((STI.getFeatureBits() & X86::Mode64Bit) != 0) diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h index c783a78..0369b14 100644 --- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h +++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h @@ -16,13 +16,17 @@ namespace llvm { class MCContext; class MCInst; +class MCInstrInfo; class MCParsedAsmOperand; class MCStreamer; class MCSubtargetInfo; +class MCTargetOptions; class X86AsmInstrumentation; -X86AsmInstrumentation *CreateX86AsmInstrumentation(MCSubtargetInfo &STI); +X86AsmInstrumentation * +CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions, + const MCContext &Ctx, const MCSubtargetInfo &STI); class X86AsmInstrumentation { public: @@ -32,15 +36,18 @@ public: // instruction is sent to Out. virtual void InstrumentInstruction( const MCInst &Inst, SmallVectorImpl &Operands, - MCContext &Ctx, MCStreamer &Out); + MCContext &Ctx, + const MCInstrInfo &MII, + MCStreamer &Out); protected: friend X86AsmInstrumentation * - CreateX86AsmInstrumentation(MCSubtargetInfo &STI); + CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions, + const MCContext &Ctx, const MCSubtargetInfo &STI); X86AsmInstrumentation(); }; -} // End llvm namespace +} // End llvm namespace -#endif // X86_ASM_INSTRUMENTATION_H +#endif // X86_ASM_INSTRUMENTATION_H diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 9eddc74..d3e695e 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -20,6 +20,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" @@ -55,6 +56,7 @@ static const char OpPrecedence[] = { class X86AsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; MCAsmParser &Parser; + const MCInstrInfo &MII; ParseInstructionInfo *InstInfo; std::unique_ptr Instrumentation; private: @@ -257,7 +259,7 @@ private: public: IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) : State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0), - Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac), + Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac), AddImmPrefix(addimmprefix) { Info.clear(); } unsigned getBaseReg() { return BaseReg; } @@ -618,7 +620,7 @@ private: X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) { Error(Loc, Msg); - return 0; + return nullptr; } X86Operand *DefaultMemSIOperand(SMLoc Loc); @@ -710,13 +712,17 @@ private: public: X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser, - const MCInstrInfo &MII) - : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) { + const MCInstrInfo &mii, + const MCTargetOptions &Options) + : MCTargetAsmParser(), STI(sti), Parser(parser), MII(mii), + InstInfo(nullptr) { // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); - Instrumentation.reset(CreateX86AsmInstrumentation(STI)); + Instrumentation.reset( + CreateX86AsmInstrumentation(Options, Parser.getContext(), STI)); } + bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; bool @@ -1173,9 +1179,9 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, // expression. IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true); if (ParseIntelExpression(SM, End)) - return 0; + return nullptr; - const MCExpr *Disp = 0; + const MCExpr *Disp = nullptr; if (const MCExpr *Sym = SM.getSym()) { // A symbolic displacement. Disp = Sym; @@ -1199,7 +1205,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, if (Tok.getString().find('.') != StringRef::npos) { const MCExpr *NewDisp; if (ParseIntelDotOperator(Disp, NewDisp)) - return 0; + return nullptr; End = Tok.getEndLoc(); Parser.Lex(); // Eat the field. @@ -1220,7 +1226,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, StringRef ErrMsg; if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) { Error(StartInBrac, ErrMsg); - return 0; + return nullptr; } return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start, End, Size); @@ -1237,7 +1243,7 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, InlineAsmIdentifierInfo &Info, bool IsUnevaluatedOperand, SMLoc &End) { assert (isParsingInlineAsm() && "Expected to be parsing inline assembly."); - Val = 0; + Val = nullptr; StringRef LineBuf(Identifier.data()); SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); @@ -1309,7 +1315,7 @@ X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, StringRef Identifier = Tok.getString(); if (ParseIntelIdentifier(Val, Identifier, Info, /*Unevaluated=*/false, End)) - return 0; + return nullptr; return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0, /*Scale=*/1, Start, End, Size, Identifier, Info); } @@ -1337,7 +1343,7 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start, StringRef Identifier = Tok.getString(); if (ParseIntelIdentifier(Val, Identifier, Info, /*Unevaluated=*/false, End)) - return 0; + return nullptr; if (!getLexer().is(AsmToken::LBrac)) return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0, @@ -1349,19 +1355,19 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start, IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true, /*AddImmPrefix=*/false); if (ParseIntelExpression(SM, End)) - return 0; + return nullptr; if (SM.getSym()) { Error(Start, "cannot use more than one symbol in memory operand"); - return 0; + return nullptr; } if (SM.getBaseReg()) { Error(Start, "cannot use base register with variable reference"); - return 0; + return nullptr; } if (SM.getIndexReg()) { Error(Start, "cannot use index register with variable reference"); - return 0; + return nullptr; } const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext()); @@ -1430,7 +1436,7 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() { StringRef Identifier = Tok.getString(); if (ParseIntelIdentifier(Val, Identifier, Info, /*Unevaluated=*/false, End)) - return 0; + return nullptr; // Don't emit the offset operator. InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7)); @@ -1461,13 +1467,13 @@ X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) { SMLoc TypeLoc = Tok.getLoc(); Parser.Lex(); // Eat operator. - const MCExpr *Val = 0; + const MCExpr *Val = nullptr; InlineAsmIdentifierInfo Info; SMLoc Start = Tok.getLoc(), End; StringRef Identifier = Tok.getString(); if (ParseIntelIdentifier(Val, Identifier, Info, /*Unevaluated=*/true, End)) - return 0; + return nullptr; if (!Info.OpDecl) return ErrorOperand(Start, "unable to lookup expression"); @@ -1522,7 +1528,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() { IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true, /*AddImmPrefix=*/false); if (ParseIntelExpression(SM, End)) - return 0; + return nullptr; int64_t Imm = SM.getImm(); if (isParsingInlineAsm()) { @@ -1580,11 +1586,11 @@ X86Operand *X86AsmParser::ParseATTOperand() { // Read the register. unsigned RegNo; SMLoc Start, End; - if (ParseRegister(RegNo, Start, End)) return 0; + if (ParseRegister(RegNo, Start, End)) return nullptr; if (RegNo == X86::EIZ || RegNo == X86::RIZ) { Error(Start, "%eiz and %riz can only be used as index registers", SMRange(Start, End)); - return 0; + return nullptr; } // If this is a segment register followed by a ':', then this is the start @@ -1601,7 +1607,7 @@ X86Operand *X86AsmParser::ParseATTOperand() { Parser.Lex(); const MCExpr *Val; if (getParser().parseExpression(Val, End)) - return 0; + return nullptr; return X86Operand::CreateImm(Val, Start, End); } } @@ -1630,7 +1636,7 @@ X86AsmParser::HandleAVX512Operand(SmallVectorImpl &Operands StringSwitch(getLexer().getTok().getIdentifier()) .Case("to8", "{1to8}") .Case("to16", "{1to16}") - .Default(0); + .Default(nullptr); if (!BroadcastPrimitive) return !ErrorAndEatStatement(getLexer().getLoc(), "Invalid memory broadcast primitive."); @@ -1685,7 +1691,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext()); if (getLexer().isNot(AsmToken::LParen)) { SMLoc ExprEnd; - if (getParser().parseExpression(Disp, ExprEnd)) return 0; + if (getParser().parseExpression(Disp, ExprEnd)) return nullptr; // After parsing the base expression we could either have a parenthesized // memory address or not. If not, return now. If so, eat the (. @@ -1712,7 +1718,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // It must be an parenthesized expression, parse it now. if (getParser().parseParenExpression(Disp, ExprEnd)) - return 0; + return nullptr; // After parsing the base expression we could either have a parenthesized // memory address or not. If not, return now. If so, eat the (. @@ -1736,11 +1742,11 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { if (getLexer().is(AsmToken::Percent)) { SMLoc StartLoc, EndLoc; BaseLoc = Parser.getTok().getLoc(); - if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0; + if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr; if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) { Error(StartLoc, "eiz and riz can only be used as index registers", SMRange(StartLoc, EndLoc)); - return 0; + return nullptr; } } @@ -1756,7 +1762,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. if (getLexer().is(AsmToken::Percent)) { SMLoc L; - if (ParseRegister(IndexReg, L, L)) return 0; + if (ParseRegister(IndexReg, L, L)) return nullptr; if (getLexer().isNot(AsmToken::RParen)) { // Parse the scale amount: @@ -1764,7 +1770,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { if (getLexer().isNot(AsmToken::Comma)) { Error(Parser.getTok().getLoc(), "expected comma in scale expression"); - return 0; + return nullptr; } Parser.Lex(); // Eat the comma. @@ -1774,18 +1780,18 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { int64_t ScaleVal; if (getParser().parseAbsoluteExpression(ScaleVal)){ Error(Loc, "expected scale expression"); - return 0; + return nullptr; } // Validate the scale amount. if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && ScaleVal != 1) { Error(Loc, "scale factor in 16-bit address must be 1"); - return 0; + return nullptr; } if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){ Error(Loc, "scale factor in address must be 1, 2, 4 or 8"); - return 0; + return nullptr; } Scale = (unsigned)ScaleVal; } @@ -1797,7 +1803,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { int64_t Value; if (getParser().parseAbsoluteExpression(Value)) - return 0; + return nullptr; if (Value != 1) Warning(Loc, "scale factor without index register is ignored"); @@ -1808,7 +1814,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. if (getLexer().isNot(AsmToken::RParen)) { Error(Parser.getTok().getLoc(), "unexpected token in memory operand"); - return 0; + return nullptr; } SMLoc MemEnd = Parser.getTok().getEndLoc(); Parser.Lex(); // Eat the ')'. @@ -1821,18 +1827,18 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { BaseReg != X86::SI && BaseReg != X86::DI)) && BaseReg != X86::DX) { Error(BaseLoc, "invalid 16-bit base register"); - return 0; + return nullptr; } if (BaseReg == 0 && X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) { Error(IndexLoc, "16-bit memory operand may not include only index register"); - return 0; + return nullptr; } StringRef ErrMsg; if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) { Error(BaseLoc, ErrMsg); - return 0; + return nullptr; } return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, @@ -1851,7 +1857,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, PatchedName = PatchedName.substr(0, Name.size()-1); // FIXME: Hack to recognize cmp{ss,sd,ps,pd}. - const MCExpr *ExtraImmOp = 0; + const MCExpr *ExtraImmOp = nullptr; if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && (PatchedName.endswith("ss") || PatchedName.endswith("sd") || PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { @@ -2070,8 +2076,10 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, (Name == "smov" || Name == "smovb" || Name == "smovw" || Name == "smovl" || Name == "smovd" || Name == "smovq"))) { if (Operands.size() == 1) { - if (Name == "movsd") + if (Name == "movsd") { + delete Operands.back(); Operands.back() = X86Operand::CreateToken("movsl", NameLoc); + } if (isParsingIntelSyntax()) { Operands.push_back(DefaultMemDIOperand(NameLoc)); Operands.push_back(DefaultMemSIOperand(NameLoc)); @@ -2253,7 +2261,8 @@ static const char *getSubtargetFeatureName(unsigned Val); void X86AsmParser::EmitInstruction( MCInst &Inst, SmallVectorImpl &Operands, MCStreamer &Out) { - Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), Out); + Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), MII, + Out); Out.EmitInstruction(Inst, STI); } @@ -2291,7 +2300,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, .Case("fstsw", "fnstsw") .Case("fstsww", "fnstsw") .Case("fclex", "fnclex") - .Default(0); + .Default(nullptr); assert(Repl && "Unknown wait-prefixed instruction"); delete Operands[0]; Operands[0] = X86Operand::CreateToken(Repl, IDLoc); diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h index 45fe2a9..de3be38 100644 --- a/lib/Target/X86/AsmParser/X86Operand.h +++ b/lib/Target/X86/AsmParser/X86Operand.h @@ -422,7 +422,7 @@ struct X86Operand : public MCParsedAsmOperand { bool AddressOf = false, SMLoc OffsetOfLoc = SMLoc(), StringRef SymName = StringRef(), - void *OpDecl = 0) { + void *OpDecl = nullptr) { X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); Res->Reg.RegNo = RegNo; Res->AddressOf = AddressOf; @@ -441,7 +441,7 @@ struct X86Operand : public MCParsedAsmOperand { /// Create an absolute memory operand. static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, unsigned Size = 0, StringRef SymName = StringRef(), - void *OpDecl = 0) { + void *OpDecl = nullptr) { X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; @@ -461,7 +461,7 @@ struct X86Operand : public MCParsedAsmOperand { unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, unsigned Size = 0, StringRef SymName = StringRef(), - void *OpDecl = 0) { + void *OpDecl = nullptr) { // We should never just have a displacement, that should be parsed as an // absolute memory operand. assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 206b651..c54fbc1 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -14,7 +14,6 @@ add_public_tablegen_target(X86CommonTableGen) set(sources X86AsmPrinter.cpp - X86COFFMachineModuleInfo.cpp X86CodeEmitter.cpp X86FastISel.cpp X86FloatingPoint.cpp diff --git a/lib/Target/X86/Disassembler/Android.mk b/lib/Target/X86/Disassembler/Android.mk index 3984266..0b3b8a5 100644 --- a/lib/Target/X86/Disassembler/Android.mk +++ b/lib/Target/X86/Disassembler/Android.mk @@ -8,7 +8,8 @@ x86_disassembler_TBLGEN_TABLES := \ x86_disassembler_SRC_FILES := \ X86Disassembler.cpp \ - X86DisassemblerDecoder.c + X86DisassemblerDecoder.cpp + # For the device # ===================================================== diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt index deed115..4370282 100644 --- a/lib/Target/X86/Disassembler/CMakeLists.txt +++ b/lib/Target/X86/Disassembler/CMakeLists.txt @@ -1,4 +1,4 @@ add_llvm_library(LLVMX86Disassembler X86Disassembler.cpp - X86DisassemblerDecoder.c + X86DisassemblerDecoder.cpp ) diff --git a/lib/Target/X86/Disassembler/Makefile b/lib/Target/X86/Disassembler/Makefile index 8669fd8..51e7b82 100644 --- a/lib/Target/X86/Disassembler/Makefile +++ b/lib/Target/X86/Disassembler/Makefile @@ -10,7 +10,9 @@ LEVEL = ../../../.. LIBRARYNAME = LLVMX86Disassembler -# Hack: we need to include 'main' x86 target directory to grab private headers +# Hack: we need to include 'main' x86 target directory to grab private headers. CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. include $(LEVEL)/Makefile.common + +.PHONY: $(PROJ_SRC_DIR)/X86DisassemblerDecoder.c diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index d5759cd..c366725 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -27,6 +27,11 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace llvm::X86Disassembler; + +#define DEBUG_TYPE "x86-disassembler" + #define GET_REGINFO_ENUM #include "X86GenRegisterInfo.inc" #define GET_INSTRINFO_ENUM @@ -34,21 +39,18 @@ #define GET_SUBTARGETINFO_ENUM #include "X86GenSubtargetInfo.inc" -using namespace llvm; -using namespace llvm::X86Disassembler; - -void x86DisassemblerDebug(const char *file, - unsigned line, - const char *s) { +void llvm::X86Disassembler::Debug(const char *file, unsigned line, + const char *s) { dbgs() << file << ":" << line << ": " << s; } -const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii) { +const char *llvm::X86Disassembler::GetInstrName(unsigned Opcode, + const void *mii) { const MCInstrInfo *MII = static_cast(mii); return MII->getName(Opcode); } -#define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s)); +#define debug(s) DEBUG(Debug(__FILE__, __LINE__, s)); namespace llvm { @@ -74,9 +76,11 @@ static bool translateInstruction(MCInst &target, InternalInstruction &source, const MCDisassembler *Dis); -X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, - const MCInstrInfo *MII) - : MCDisassembler(STI), MII(MII) { +X86GenericDisassembler::X86GenericDisassembler( + const MCSubtargetInfo &STI, + MCContext &Ctx, + std::unique_ptr MII) + : MCDisassembler(STI, Ctx), MII(std::move(MII)) { switch (STI.getFeatureBits() & (X86::Mode16Bit | X86::Mode32Bit | X86::Mode64Bit)) { case X86::Mode16Bit: @@ -93,10 +97,6 @@ X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, } } -X86GenericDisassembler::~X86GenericDisassembler() { - delete MII; -} - /// regionReader - a callback function that wraps the readByte method from /// MemoryObject. /// @@ -140,14 +140,14 @@ X86GenericDisassembler::getInstruction(MCInst &instr, dlog_t loggerFn = logger; if (&vStream == &nulls()) - loggerFn = 0; // Disable logging completely if it's going to nulls(). + loggerFn = nullptr; // Disable logging completely if it's going to nulls(). int ret = decodeInstruction(&internalInstr, regionReader, (const void*)®ion, loggerFn, (void*)&vStream, - (const void*)MII, + (const void*)MII.get(), address, fMode); @@ -319,7 +319,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, } // By default sign-extend all X86 immediates based on their encoding. else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 || - type == TYPE_IMM64) { + type == TYPE_IMM64 || type == TYPE_IMMv) { uint32_t Opcode = mcInst.getOpcode(); switch (operand.encoding) { default: @@ -787,13 +787,11 @@ static bool translateInstruction(MCInst &mcInst, mcInst.setOpcode(X86::XACQUIRE_PREFIX); } - int index; - insn.numImmediatesTranslated = 0; - for (index = 0; index < X86_MAX_OPERANDS; ++index) { - if (insn.operands[index].encoding != ENCODING_NONE) { - if (translateOperand(mcInst, insn.operands[index], insn, Dis)) { + for (const auto &Op : insn.operands) { + if (Op.encoding != ENCODING_NONE) { + if (translateOperand(mcInst, Op, insn, Dis)) { return true; } } @@ -803,9 +801,10 @@ static bool translateInstruction(MCInst &mcInst, } static MCDisassembler *createX86Disassembler(const Target &T, - const MCSubtargetInfo &STI) { - return new X86Disassembler::X86GenericDisassembler(STI, - T.createMCInstrInfo()); + const MCSubtargetInfo &STI, + MCContext &Ctx) { + std::unique_ptr MII(T.createMCInstrInfo()); + return new X86Disassembler::X86GenericDisassembler(STI, Ctx, std::move(MII)); } extern "C" void LLVMInitializeX86Disassembler() { diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h index 4e6e297..4dc7c29 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/lib/Target/X86/Disassembler/X86Disassembler.h @@ -74,17 +74,7 @@ #ifndef X86DISASSEMBLER_H #define X86DISASSEMBLER_H -#define INSTRUCTION_SPECIFIER_FIELDS \ - uint16_t operands; - -#define INSTRUCTION_IDS \ - uint16_t instructionIDs; - #include "X86DisassemblerDecoderCommon.h" - -#undef INSTRUCTION_SPECIFIER_FIELDS -#undef INSTRUCTION_IDS - #include "llvm/MC/MCDisassembler.h" namespace llvm { @@ -101,13 +91,12 @@ namespace X86Disassembler { /// All each platform class should have to do is subclass the constructor, and /// provide a different disassemblerMode value. class X86GenericDisassembler : public MCDisassembler { - const MCInstrInfo *MII; + std::unique_ptr MII; public: /// Constructor - Initializes the disassembler. /// - X86GenericDisassembler(const MCSubtargetInfo &STI, const MCInstrInfo *MII); -private: - ~X86GenericDisassembler(); + X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, + std::unique_ptr MII); public: /// getInstruction - See MCDisassembler. diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c deleted file mode 100644 index 0801c96..0000000 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ /dev/null @@ -1,1821 +0,0 @@ -/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===* - * - * The LLVM Compiler Infrastructure - * - * This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===* - * - * This file is part of the X86 Disassembler. - * It contains the implementation of the instruction decoder. - * Documentation for the disassembler can be found in X86Disassembler.h. - * - *===----------------------------------------------------------------------===*/ - -#include /* for va_*() */ -#include /* for vsnprintf() */ -#include /* for exit() */ -#include /* for memset() */ - -#include "X86DisassemblerDecoder.h" - -#include "X86GenDisassemblerTables.inc" - -#define TRUE 1 -#define FALSE 0 - -#ifndef NDEBUG -#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0) -#else -#define debug(s) do { } while (0) -#endif - - -/* - * contextForAttrs - Client for the instruction context table. Takes a set of - * attributes and returns the appropriate decode context. - * - * @param attrMask - Attributes, from the enumeration attributeBits. - * @return - The InstructionContext to use when looking up an - * an instruction with these attributes. - */ -static InstructionContext contextForAttrs(uint16_t attrMask) { - return CONTEXTS_SYM[attrMask]; -} - -/* - * modRMRequired - Reads the appropriate instruction table to determine whether - * the ModR/M byte is required to decode a particular instruction. - * - * @param type - The opcode type (i.e., how many bytes it has). - * @param insnContext - The context for the instruction, as returned by - * contextForAttrs. - * @param opcode - The last byte of the instruction's opcode, not counting - * ModR/M extensions and escapes. - * @return - TRUE if the ModR/M byte is required, FALSE otherwise. - */ -static int modRMRequired(OpcodeType type, - InstructionContext insnContext, - uint16_t opcode) { - const struct ContextDecision* decision = 0; - - switch (type) { - case ONEBYTE: - decision = &ONEBYTE_SYM; - break; - case TWOBYTE: - decision = &TWOBYTE_SYM; - break; - case THREEBYTE_38: - decision = &THREEBYTE38_SYM; - break; - case THREEBYTE_3A: - decision = &THREEBYTE3A_SYM; - break; - case XOP8_MAP: - decision = &XOP8_MAP_SYM; - break; - case XOP9_MAP: - decision = &XOP9_MAP_SYM; - break; - case XOPA_MAP: - decision = &XOPA_MAP_SYM; - break; - } - - return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. - modrm_type != MODRM_ONEENTRY; -} - -/* - * decode - Reads the appropriate instruction table to obtain the unique ID of - * an instruction. - * - * @param type - See modRMRequired(). - * @param insnContext - See modRMRequired(). - * @param opcode - See modRMRequired(). - * @param modRM - The ModR/M byte if required, or any value if not. - * @return - The UID of the instruction, or 0 on failure. - */ -static InstrUID decode(OpcodeType type, - InstructionContext insnContext, - uint8_t opcode, - uint8_t modRM) { - const struct ModRMDecision* dec = 0; - - switch (type) { - case ONEBYTE: - dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; - break; - case TWOBYTE: - dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; - break; - case THREEBYTE_38: - dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; - break; - case THREEBYTE_3A: - dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; - break; - case XOP8_MAP: - dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; - break; - case XOP9_MAP: - dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; - break; - case XOPA_MAP: - dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; - break; - } - - switch (dec->modrm_type) { - default: - debug("Corrupt table! Unknown modrm_type"); - return 0; - case MODRM_ONEENTRY: - return modRMTable[dec->instructionIDs]; - case MODRM_SPLITRM: - if (modFromModRM(modRM) == 0x3) - return modRMTable[dec->instructionIDs+1]; - return modRMTable[dec->instructionIDs]; - case MODRM_SPLITREG: - if (modFromModRM(modRM) == 0x3) - return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; - return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; - case MODRM_SPLITMISC: - if (modFromModRM(modRM) == 0x3) - return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; - return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; - case MODRM_FULL: - return modRMTable[dec->instructionIDs+modRM]; - } -} - -/* - * specifierForUID - Given a UID, returns the name and operand specification for - * that instruction. - * - * @param uid - The unique ID for the instruction. This should be returned by - * decode(); specifierForUID will not check bounds. - * @return - A pointer to the specification for that instruction. - */ -static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { - return &INSTRUCTIONS_SYM[uid]; -} - -/* - * consumeByte - Uses the reader function provided by the user to consume one - * byte from the instruction's memory and advance the cursor. - * - * @param insn - The instruction with the reader function to use. The cursor - * for this instruction is advanced. - * @param byte - A pointer to a pre-allocated memory buffer to be populated - * with the data read. - * @return - 0 if the read was successful; nonzero otherwise. - */ -static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { - int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); - - if (!ret) - ++(insn->readerCursor); - - return ret; -} - -/* - * lookAtByte - Like consumeByte, but does not advance the cursor. - * - * @param insn - See consumeByte(). - * @param byte - See consumeByte(). - * @return - See consumeByte(). - */ -static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { - return insn->reader(insn->readerArg, byte, insn->readerCursor); -} - -static void unconsumeByte(struct InternalInstruction* insn) { - insn->readerCursor--; -} - -#define CONSUME_FUNC(name, type) \ - static int name(struct InternalInstruction* insn, type* ptr) { \ - type combined = 0; \ - unsigned offset; \ - for (offset = 0; offset < sizeof(type); ++offset) { \ - uint8_t byte; \ - int ret = insn->reader(insn->readerArg, \ - &byte, \ - insn->readerCursor + offset); \ - if (ret) \ - return ret; \ - combined = combined | ((uint64_t)byte << (offset * 8)); \ - } \ - *ptr = combined; \ - insn->readerCursor += sizeof(type); \ - return 0; \ - } - -/* - * consume* - Use the reader function provided by the user to consume data - * values of various sizes from the instruction's memory and advance the - * cursor appropriately. These readers perform endian conversion. - * - * @param insn - See consumeByte(). - * @param ptr - A pointer to a pre-allocated memory of appropriate size to - * be populated with the data read. - * @return - See consumeByte(). - */ -CONSUME_FUNC(consumeInt8, int8_t) -CONSUME_FUNC(consumeInt16, int16_t) -CONSUME_FUNC(consumeInt32, int32_t) -CONSUME_FUNC(consumeUInt16, uint16_t) -CONSUME_FUNC(consumeUInt32, uint32_t) -CONSUME_FUNC(consumeUInt64, uint64_t) - -/* - * dbgprintf - Uses the logging function provided by the user to log a single - * message, typically without a carriage-return. - * - * @param insn - The instruction containing the logging function. - * @param format - See printf(). - * @param ... - See printf(). - */ -static void dbgprintf(struct InternalInstruction* insn, - const char* format, - ...) { - char buffer[256]; - va_list ap; - - if (!insn->dlog) - return; - - va_start(ap, format); - (void)vsnprintf(buffer, sizeof(buffer), format, ap); - va_end(ap); - - insn->dlog(insn->dlogArg, buffer); - - return; -} - -/* - * setPrefixPresent - Marks that a particular prefix is present at a particular - * location. - * - * @param insn - The instruction to be marked as having the prefix. - * @param prefix - The prefix that is present. - * @param location - The location where the prefix is located (in the address - * space of the instruction's reader). - */ -static void setPrefixPresent(struct InternalInstruction* insn, - uint8_t prefix, - uint64_t location) -{ - insn->prefixPresent[prefix] = 1; - insn->prefixLocations[prefix] = location; -} - -/* - * isPrefixAtLocation - Queries an instruction to determine whether a prefix is - * present at a given location. - * - * @param insn - The instruction to be queried. - * @param prefix - The prefix. - * @param location - The location to query. - * @return - Whether the prefix is at that location. - */ -static BOOL isPrefixAtLocation(struct InternalInstruction* insn, - uint8_t prefix, - uint64_t location) -{ - if (insn->prefixPresent[prefix] == 1 && - insn->prefixLocations[prefix] == location) - return TRUE; - else - return FALSE; -} - -/* - * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the - * instruction as having them. Also sets the instruction's default operand, - * address, and other relevant data sizes to report operands correctly. - * - * @param insn - The instruction whose prefixes are to be read. - * @return - 0 if the instruction could be read until the end of the prefix - * bytes, and no prefixes conflicted; nonzero otherwise. - */ -static int readPrefixes(struct InternalInstruction* insn) { - BOOL isPrefix = TRUE; - BOOL prefixGroups[4] = { FALSE }; - uint64_t prefixLocation; - uint8_t byte = 0; - uint8_t nextByte; - - BOOL hasAdSize = FALSE; - BOOL hasOpSize = FALSE; - - dbgprintf(insn, "readPrefixes()"); - - while (isPrefix) { - prefixLocation = insn->readerCursor; - - /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */ - if (consumeByte(insn, &byte)) - break; - - /* - * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then - * break and let it be disassembled as a normal "instruction". - */ - if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) - break; - - if (insn->readerCursor - 1 == insn->startLocation - && (byte == 0xf2 || byte == 0xf3) - && !lookAtByte(insn, &nextByte)) - { - /* - * If the byte is 0xf2 or 0xf3, and any of the following conditions are - * met: - * - it is followed by a LOCK (0xf0) prefix - * - it is followed by an xchg instruction - * then it should be disassembled as a xacquire/xrelease not repne/rep. - */ - if ((byte == 0xf2 || byte == 0xf3) && - ((nextByte == 0xf0) | - ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) - insn->xAcquireRelease = TRUE; - /* - * Also if the byte is 0xf3, and the following condition is met: - * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or - * "mov mem, imm" (opcode 0xc6/0xc7) instructions. - * then it should be disassembled as an xrelease not rep. - */ - if (byte == 0xf3 && - (nextByte == 0x88 || nextByte == 0x89 || - nextByte == 0xc6 || nextByte == 0xc7)) - insn->xAcquireRelease = TRUE; - if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { - if (consumeByte(insn, &nextByte)) - return -1; - if (lookAtByte(insn, &nextByte)) - return -1; - unconsumeByte(insn); - } - if (nextByte != 0x0f && nextByte != 0x90) - break; - } - - switch (byte) { - case 0xf0: /* LOCK */ - case 0xf2: /* REPNE/REPNZ */ - case 0xf3: /* REP or REPE/REPZ */ - if (prefixGroups[0]) - dbgprintf(insn, "Redundant Group 1 prefix"); - prefixGroups[0] = TRUE; - setPrefixPresent(insn, byte, prefixLocation); - break; - case 0x2e: /* CS segment override -OR- Branch not taken */ - case 0x36: /* SS segment override -OR- Branch taken */ - case 0x3e: /* DS segment override */ - case 0x26: /* ES segment override */ - case 0x64: /* FS segment override */ - case 0x65: /* GS segment override */ - switch (byte) { - case 0x2e: - insn->segmentOverride = SEG_OVERRIDE_CS; - break; - case 0x36: - insn->segmentOverride = SEG_OVERRIDE_SS; - break; - case 0x3e: - insn->segmentOverride = SEG_OVERRIDE_DS; - break; - case 0x26: - insn->segmentOverride = SEG_OVERRIDE_ES; - break; - case 0x64: - insn->segmentOverride = SEG_OVERRIDE_FS; - break; - case 0x65: - insn->segmentOverride = SEG_OVERRIDE_GS; - break; - default: - debug("Unhandled override"); - return -1; - } - if (prefixGroups[1]) - dbgprintf(insn, "Redundant Group 2 prefix"); - prefixGroups[1] = TRUE; - setPrefixPresent(insn, byte, prefixLocation); - break; - case 0x66: /* Operand-size override */ - if (prefixGroups[2]) - dbgprintf(insn, "Redundant Group 3 prefix"); - prefixGroups[2] = TRUE; - hasOpSize = TRUE; - setPrefixPresent(insn, byte, prefixLocation); - break; - case 0x67: /* Address-size override */ - if (prefixGroups[3]) - dbgprintf(insn, "Redundant Group 4 prefix"); - prefixGroups[3] = TRUE; - hasAdSize = TRUE; - setPrefixPresent(insn, byte, prefixLocation); - break; - default: /* Not a prefix byte */ - isPrefix = FALSE; - break; - } - - if (isPrefix) - dbgprintf(insn, "Found prefix 0x%hhx", byte); - } - - insn->vectorExtensionType = TYPE_NO_VEX_XOP; - - if (byte == 0x62) { - uint8_t byte1, byte2; - - if (consumeByte(insn, &byte1)) { - dbgprintf(insn, "Couldn't read second byte of EVEX prefix"); - return -1; - } - - if (lookAtByte(insn, &byte2)) { - dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); - return -1; - } - - if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) && - ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) { - insn->vectorExtensionType = TYPE_EVEX; - } - else { - unconsumeByte(insn); /* unconsume byte1 */ - unconsumeByte(insn); /* unconsume byte */ - insn->necessaryPrefixLocation = insn->readerCursor - 2; - } - - if (insn->vectorExtensionType == TYPE_EVEX) { - insn->vectorExtensionPrefix[0] = byte; - insn->vectorExtensionPrefix[1] = byte1; - if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) { - dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); - return -1; - } - if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) { - dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix"); - return -1; - } - - /* We simulate the REX prefix for simplicity's sake */ - if (insn->mode == MODE_64BIT) { - insn->rexPrefix = 0x40 - | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) - | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) - | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) - | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); - } - - dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx", - insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], - insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]); - } - } - else if (byte == 0xc4) { - uint8_t byte1; - - if (lookAtByte(insn, &byte1)) { - dbgprintf(insn, "Couldn't read second byte of VEX"); - return -1; - } - - if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { - insn->vectorExtensionType = TYPE_VEX_3B; - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } - else { - unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } - - if (insn->vectorExtensionType == TYPE_VEX_3B) { - insn->vectorExtensionPrefix[0] = byte; - consumeByte(insn, &insn->vectorExtensionPrefix[1]); - consumeByte(insn, &insn->vectorExtensionPrefix[2]); - - /* We simulate the REX prefix for simplicity's sake */ - - if (insn->mode == MODE_64BIT) { - insn->rexPrefix = 0x40 - | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) - | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) - | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) - | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); - } - - dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", - insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], - insn->vectorExtensionPrefix[2]); - } - } - else if (byte == 0xc5) { - uint8_t byte1; - - if (lookAtByte(insn, &byte1)) { - dbgprintf(insn, "Couldn't read second byte of VEX"); - return -1; - } - - if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { - insn->vectorExtensionType = TYPE_VEX_2B; - } - else { - unconsumeByte(insn); - } - - if (insn->vectorExtensionType == TYPE_VEX_2B) { - insn->vectorExtensionPrefix[0] = byte; - consumeByte(insn, &insn->vectorExtensionPrefix[1]); - - if (insn->mode == MODE_64BIT) { - insn->rexPrefix = 0x40 - | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); - } - - switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) - { - default: - break; - case VEX_PREFIX_66: - hasOpSize = TRUE; - break; - } - - dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", - insn->vectorExtensionPrefix[0], - insn->vectorExtensionPrefix[1]); - } - } - else if (byte == 0x8f) { - uint8_t byte1; - - if (lookAtByte(insn, &byte1)) { - dbgprintf(insn, "Couldn't read second byte of XOP"); - return -1; - } - - if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */ - insn->vectorExtensionType = TYPE_XOP; - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } - else { - unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } - - if (insn->vectorExtensionType == TYPE_XOP) { - insn->vectorExtensionPrefix[0] = byte; - consumeByte(insn, &insn->vectorExtensionPrefix[1]); - consumeByte(insn, &insn->vectorExtensionPrefix[2]); - - /* We simulate the REX prefix for simplicity's sake */ - - if (insn->mode == MODE_64BIT) { - insn->rexPrefix = 0x40 - | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) - | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) - | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) - | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); - } - - switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) - { - default: - break; - case VEX_PREFIX_66: - hasOpSize = TRUE; - break; - } - - dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx", - insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], - insn->vectorExtensionPrefix[2]); - } - } - else { - if (insn->mode == MODE_64BIT) { - if ((byte & 0xf0) == 0x40) { - uint8_t opcodeByte; - - if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { - dbgprintf(insn, "Redundant REX prefix"); - return -1; - } - - insn->rexPrefix = byte; - insn->necessaryPrefixLocation = insn->readerCursor - 2; - - dbgprintf(insn, "Found REX prefix 0x%hhx", byte); - } else { - unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } - } else { - unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } - } - - if (insn->mode == MODE_16BIT) { - insn->registerSize = (hasOpSize ? 4 : 2); - insn->addressSize = (hasAdSize ? 4 : 2); - insn->displacementSize = (hasAdSize ? 4 : 2); - insn->immediateSize = (hasOpSize ? 4 : 2); - } else if (insn->mode == MODE_32BIT) { - insn->registerSize = (hasOpSize ? 2 : 4); - insn->addressSize = (hasAdSize ? 2 : 4); - insn->displacementSize = (hasAdSize ? 2 : 4); - insn->immediateSize = (hasOpSize ? 2 : 4); - } else if (insn->mode == MODE_64BIT) { - if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { - insn->registerSize = 8; - insn->addressSize = (hasAdSize ? 4 : 8); - insn->displacementSize = 4; - insn->immediateSize = 4; - } else if (insn->rexPrefix) { - insn->registerSize = (hasOpSize ? 2 : 4); - insn->addressSize = (hasAdSize ? 4 : 8); - insn->displacementSize = (hasOpSize ? 2 : 4); - insn->immediateSize = (hasOpSize ? 2 : 4); - } else { - insn->registerSize = (hasOpSize ? 2 : 4); - insn->addressSize = (hasAdSize ? 4 : 8); - insn->displacementSize = (hasOpSize ? 2 : 4); - insn->immediateSize = (hasOpSize ? 2 : 4); - } - } - - return 0; -} - -/* - * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of - * extended or escape opcodes). - * - * @param insn - The instruction whose opcode is to be read. - * @return - 0 if the opcode could be read successfully; nonzero otherwise. - */ -static int readOpcode(struct InternalInstruction* insn) { - /* Determine the length of the primary opcode */ - - uint8_t current; - - dbgprintf(insn, "readOpcode()"); - - insn->opcodeType = ONEBYTE; - - if (insn->vectorExtensionType == TYPE_EVEX) - { - switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) { - default: - dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)", - mmFromEVEX2of4(insn->vectorExtensionPrefix[1])); - return -1; - case VEX_LOB_0F: - insn->opcodeType = TWOBYTE; - return consumeByte(insn, &insn->opcode); - case VEX_LOB_0F38: - insn->opcodeType = THREEBYTE_38; - return consumeByte(insn, &insn->opcode); - case VEX_LOB_0F3A: - insn->opcodeType = THREEBYTE_3A; - return consumeByte(insn, &insn->opcode); - } - } - else if (insn->vectorExtensionType == TYPE_VEX_3B) { - switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) { - default: - dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", - mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); - return -1; - case VEX_LOB_0F: - insn->opcodeType = TWOBYTE; - return consumeByte(insn, &insn->opcode); - case VEX_LOB_0F38: - insn->opcodeType = THREEBYTE_38; - return consumeByte(insn, &insn->opcode); - case VEX_LOB_0F3A: - insn->opcodeType = THREEBYTE_3A; - return consumeByte(insn, &insn->opcode); - } - } - else if (insn->vectorExtensionType == TYPE_VEX_2B) { - insn->opcodeType = TWOBYTE; - return consumeByte(insn, &insn->opcode); - } - else if (insn->vectorExtensionType == TYPE_XOP) { - switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) { - default: - dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", - mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); - return -1; - case XOP_MAP_SELECT_8: - insn->opcodeType = XOP8_MAP; - return consumeByte(insn, &insn->opcode); - case XOP_MAP_SELECT_9: - insn->opcodeType = XOP9_MAP; - return consumeByte(insn, &insn->opcode); - case XOP_MAP_SELECT_A: - insn->opcodeType = XOPA_MAP; - return consumeByte(insn, &insn->opcode); - } - } - - if (consumeByte(insn, ¤t)) - return -1; - - if (current == 0x0f) { - dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); - - if (consumeByte(insn, ¤t)) - return -1; - - if (current == 0x38) { - dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); - - if (consumeByte(insn, ¤t)) - return -1; - - insn->opcodeType = THREEBYTE_38; - } else if (current == 0x3a) { - dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); - - if (consumeByte(insn, ¤t)) - return -1; - - insn->opcodeType = THREEBYTE_3A; - } else { - dbgprintf(insn, "Didn't find a three-byte escape prefix"); - - insn->opcodeType = TWOBYTE; - } - } - - /* - * At this point we have consumed the full opcode. - * Anything we consume from here on must be unconsumed. - */ - - insn->opcode = current; - - return 0; -} - -static int readModRM(struct InternalInstruction* insn); - -/* - * getIDWithAttrMask - Determines the ID of an instruction, consuming - * the ModR/M byte as appropriate for extended and escape opcodes, - * and using a supplied attribute mask. - * - * @param instructionID - A pointer whose target is filled in with the ID of the - * instruction. - * @param insn - The instruction whose ID is to be determined. - * @param attrMask - The attribute mask to search. - * @return - 0 if the ModR/M could be read when needed or was not - * needed; nonzero otherwise. - */ -static int getIDWithAttrMask(uint16_t* instructionID, - struct InternalInstruction* insn, - uint16_t attrMask) { - BOOL hasModRMExtension; - - uint16_t instructionClass; - - instructionClass = contextForAttrs(attrMask); - - hasModRMExtension = modRMRequired(insn->opcodeType, - instructionClass, - insn->opcode); - - if (hasModRMExtension) { - if (readModRM(insn)) - return -1; - - *instructionID = decode(insn->opcodeType, - instructionClass, - insn->opcode, - insn->modRM); - } else { - *instructionID = decode(insn->opcodeType, - instructionClass, - insn->opcode, - 0); - } - - return 0; -} - -/* - * is16BitEquivalent - Determines whether two instruction names refer to - * equivalent instructions but one is 16-bit whereas the other is not. - * - * @param orig - The instruction that is not 16-bit - * @param equiv - The instruction that is 16-bit - */ -static BOOL is16BitEquivalent(const char* orig, const char* equiv) { - off_t i; - - for (i = 0;; i++) { - if (orig[i] == '\0' && equiv[i] == '\0') - return TRUE; - if (orig[i] == '\0' || equiv[i] == '\0') - return FALSE; - if (orig[i] != equiv[i]) { - if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') - continue; - if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') - continue; - if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') - continue; - return FALSE; - } - } -} - -/* - * getID - Determines the ID of an instruction, consuming the ModR/M byte as - * appropriate for extended and escape opcodes. Determines the attributes and - * context for the instruction before doing so. - * - * @param insn - The instruction whose ID is to be determined. - * @return - 0 if the ModR/M could be read when needed or was not needed; - * nonzero otherwise. - */ -static int getID(struct InternalInstruction* insn, const void *miiArg) { - uint16_t attrMask; - uint16_t instructionID; - - dbgprintf(insn, "getID()"); - - attrMask = ATTR_NONE; - - if (insn->mode == MODE_64BIT) - attrMask |= ATTR_64BIT; - - if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) { - attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX; - - if (insn->vectorExtensionType == TYPE_EVEX) { - switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) { - case VEX_PREFIX_66: - attrMask |= ATTR_OPSIZE; - break; - case VEX_PREFIX_F3: - attrMask |= ATTR_XS; - break; - case VEX_PREFIX_F2: - attrMask |= ATTR_XD; - break; - } - - if (zFromEVEX4of4(insn->vectorExtensionPrefix[3])) - attrMask |= ATTR_EVEXKZ; - if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) - attrMask |= ATTR_EVEXB; - if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])) - attrMask |= ATTR_EVEXK; - if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) - attrMask |= ATTR_EVEXL; - if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3])) - attrMask |= ATTR_EVEXL2; - } - else if (insn->vectorExtensionType == TYPE_VEX_3B) { - switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { - case VEX_PREFIX_66: - attrMask |= ATTR_OPSIZE; - break; - case VEX_PREFIX_F3: - attrMask |= ATTR_XS; - break; - case VEX_PREFIX_F2: - attrMask |= ATTR_XD; - break; - } - - if (lFromVEX3of3(insn->vectorExtensionPrefix[2])) - attrMask |= ATTR_VEXL; - } - else if (insn->vectorExtensionType == TYPE_VEX_2B) { - switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { - case VEX_PREFIX_66: - attrMask |= ATTR_OPSIZE; - break; - case VEX_PREFIX_F3: - attrMask |= ATTR_XS; - break; - case VEX_PREFIX_F2: - attrMask |= ATTR_XD; - break; - } - - if (lFromVEX2of2(insn->vectorExtensionPrefix[1])) - attrMask |= ATTR_VEXL; - } - else if (insn->vectorExtensionType == TYPE_XOP) { - switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { - case VEX_PREFIX_66: - attrMask |= ATTR_OPSIZE; - break; - case VEX_PREFIX_F3: - attrMask |= ATTR_XS; - break; - case VEX_PREFIX_F2: - attrMask |= ATTR_XD; - break; - } - - if (lFromXOP3of3(insn->vectorExtensionPrefix[2])) - attrMask |= ATTR_VEXL; - } - else { - return -1; - } - } - else { - if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) - attrMask |= ATTR_OPSIZE; - else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) - attrMask |= ATTR_ADSIZE; - else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) - attrMask |= ATTR_XS; - else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) - attrMask |= ATTR_XD; - } - - if (insn->rexPrefix & 0x08) - attrMask |= ATTR_REXW; - - if (getIDWithAttrMask(&instructionID, insn, attrMask)) - return -1; - - /* - * JCXZ/JECXZ need special handling for 16-bit mode because the meaning - * of the AdSize prefix is inverted w.r.t. 32-bit mode. - */ - if (insn->mode == MODE_16BIT && insn->opcode == 0xE3) { - const struct InstructionSpecifier *spec; - spec = specifierForUID(instructionID); - - /* - * Check for Ii8PCRel instructions. We could alternatively do a - * string-compare on the names, but this is probably cheaper. - */ - if (x86OperandSets[spec->operands][0].type == TYPE_REL8) { - attrMask ^= ATTR_ADSIZE; - if (getIDWithAttrMask(&instructionID, insn, attrMask)) - return -1; - } - } - - /* The following clauses compensate for limitations of the tables. */ - - if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) && - !(attrMask & ATTR_OPSIZE)) { - /* - * The instruction tables make no distinction between instructions that - * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a - * particular spot (i.e., many MMX operations). In general we're - * conservative, but in the specific case where OpSize is present but not - * in the right place we check if there's a 16-bit operation. - */ - - const struct InstructionSpecifier *spec; - uint16_t instructionIDWithOpsize; - const char *specName, *specWithOpSizeName; - - spec = specifierForUID(instructionID); - - if (getIDWithAttrMask(&instructionIDWithOpsize, - insn, - attrMask | ATTR_OPSIZE)) { - /* - * ModRM required with OpSize but not present; give up and return version - * without OpSize set - */ - - insn->instructionID = instructionID; - insn->spec = spec; - return 0; - } - - specName = x86DisassemblerGetInstrName(instructionID, miiArg); - specWithOpSizeName = - x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); - - if (is16BitEquivalent(specName, specWithOpSizeName) && - (insn->mode == MODE_16BIT) ^ insn->prefixPresent[0x66]) { - insn->instructionID = instructionIDWithOpsize; - insn->spec = specifierForUID(instructionIDWithOpsize); - } else { - insn->instructionID = instructionID; - insn->spec = spec; - } - return 0; - } - - if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && - insn->rexPrefix & 0x01) { - /* - * NOOP shouldn't decode as NOOP if REX.b is set. Instead - * it should decode as XCHG %r8, %eax. - */ - - const struct InstructionSpecifier *spec; - uint16_t instructionIDWithNewOpcode; - const struct InstructionSpecifier *specWithNewOpcode; - - spec = specifierForUID(instructionID); - - /* Borrow opcode from one of the other XCHGar opcodes */ - insn->opcode = 0x91; - - if (getIDWithAttrMask(&instructionIDWithNewOpcode, - insn, - attrMask)) { - insn->opcode = 0x90; - - insn->instructionID = instructionID; - insn->spec = spec; - return 0; - } - - specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); - - /* Change back */ - insn->opcode = 0x90; - - insn->instructionID = instructionIDWithNewOpcode; - insn->spec = specWithNewOpcode; - - return 0; - } - - insn->instructionID = instructionID; - insn->spec = specifierForUID(insn->instructionID); - - return 0; -} - -/* - * readSIB - Consumes the SIB byte to determine addressing information for an - * instruction. - * - * @param insn - The instruction whose SIB byte is to be read. - * @return - 0 if the SIB byte was successfully read; nonzero otherwise. - */ -static int readSIB(struct InternalInstruction* insn) { - SIBIndex sibIndexBase = 0; - SIBBase sibBaseBase = 0; - uint8_t index, base; - - dbgprintf(insn, "readSIB()"); - - if (insn->consumedSIB) - return 0; - - insn->consumedSIB = TRUE; - - switch (insn->addressSize) { - case 2: - dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); - return -1; - case 4: - sibIndexBase = SIB_INDEX_EAX; - sibBaseBase = SIB_BASE_EAX; - break; - case 8: - sibIndexBase = SIB_INDEX_RAX; - sibBaseBase = SIB_BASE_RAX; - break; - } - - if (consumeByte(insn, &insn->sib)) - return -1; - - index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); - if (insn->vectorExtensionType == TYPE_EVEX) - index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4; - - switch (index) { - case 0x4: - insn->sibIndex = SIB_INDEX_NONE; - break; - default: - insn->sibIndex = (SIBIndex)(sibIndexBase + index); - if (insn->sibIndex == SIB_INDEX_sib || - insn->sibIndex == SIB_INDEX_sib64) - insn->sibIndex = SIB_INDEX_NONE; - break; - } - - switch (scaleFromSIB(insn->sib)) { - case 0: - insn->sibScale = 1; - break; - case 1: - insn->sibScale = 2; - break; - case 2: - insn->sibScale = 4; - break; - case 3: - insn->sibScale = 8; - break; - } - - base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); - - switch (base) { - case 0x5: - case 0xd: - switch (modFromModRM(insn->modRM)) { - case 0x0: - insn->eaDisplacement = EA_DISP_32; - insn->sibBase = SIB_BASE_NONE; - break; - case 0x1: - insn->eaDisplacement = EA_DISP_8; - insn->sibBase = (SIBBase)(sibBaseBase + base); - break; - case 0x2: - insn->eaDisplacement = EA_DISP_32; - insn->sibBase = (SIBBase)(sibBaseBase + base); - break; - case 0x3: - debug("Cannot have Mod = 0b11 and a SIB byte"); - return -1; - } - break; - default: - insn->sibBase = (SIBBase)(sibBaseBase + base); - break; - } - - return 0; -} - -/* - * readDisplacement - Consumes the displacement of an instruction. - * - * @param insn - The instruction whose displacement is to be read. - * @return - 0 if the displacement byte was successfully read; nonzero - * otherwise. - */ -static int readDisplacement(struct InternalInstruction* insn) { - int8_t d8; - int16_t d16; - int32_t d32; - - dbgprintf(insn, "readDisplacement()"); - - if (insn->consumedDisplacement) - return 0; - - insn->consumedDisplacement = TRUE; - insn->displacementOffset = insn->readerCursor - insn->startLocation; - - switch (insn->eaDisplacement) { - case EA_DISP_NONE: - insn->consumedDisplacement = FALSE; - break; - case EA_DISP_8: - if (consumeInt8(insn, &d8)) - return -1; - insn->displacement = d8; - break; - case EA_DISP_16: - if (consumeInt16(insn, &d16)) - return -1; - insn->displacement = d16; - break; - case EA_DISP_32: - if (consumeInt32(insn, &d32)) - return -1; - insn->displacement = d32; - break; - } - - insn->consumedDisplacement = TRUE; - return 0; -} - -/* - * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and - * displacement) for an instruction and interprets it. - * - * @param insn - The instruction whose addressing information is to be read. - * @return - 0 if the information was successfully read; nonzero otherwise. - */ -static int readModRM(struct InternalInstruction* insn) { - uint8_t mod, rm, reg; - - dbgprintf(insn, "readModRM()"); - - if (insn->consumedModRM) - return 0; - - if (consumeByte(insn, &insn->modRM)) - return -1; - insn->consumedModRM = TRUE; - - mod = modFromModRM(insn->modRM); - rm = rmFromModRM(insn->modRM); - reg = regFromModRM(insn->modRM); - - /* - * This goes by insn->registerSize to pick the correct register, which messes - * up if we're using (say) XMM or 8-bit register operands. That gets fixed in - * fixupReg(). - */ - switch (insn->registerSize) { - case 2: - insn->regBase = MODRM_REG_AX; - insn->eaRegBase = EA_REG_AX; - break; - case 4: - insn->regBase = MODRM_REG_EAX; - insn->eaRegBase = EA_REG_EAX; - break; - case 8: - insn->regBase = MODRM_REG_RAX; - insn->eaRegBase = EA_REG_RAX; - break; - } - - reg |= rFromREX(insn->rexPrefix) << 3; - rm |= bFromREX(insn->rexPrefix) << 3; - if (insn->vectorExtensionType == TYPE_EVEX) { - reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; - rm |= xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; - } - - insn->reg = (Reg)(insn->regBase + reg); - - switch (insn->addressSize) { - case 2: - insn->eaBaseBase = EA_BASE_BX_SI; - - switch (mod) { - case 0x0: - if (rm == 0x6) { - insn->eaBase = EA_BASE_NONE; - insn->eaDisplacement = EA_DISP_16; - if (readDisplacement(insn)) - return -1; - } else { - insn->eaBase = (EABase)(insn->eaBaseBase + rm); - insn->eaDisplacement = EA_DISP_NONE; - } - break; - case 0x1: - insn->eaBase = (EABase)(insn->eaBaseBase + rm); - insn->eaDisplacement = EA_DISP_8; - insn->displacementSize = 1; - if (readDisplacement(insn)) - return -1; - break; - case 0x2: - insn->eaBase = (EABase)(insn->eaBaseBase + rm); - insn->eaDisplacement = EA_DISP_16; - if (readDisplacement(insn)) - return -1; - break; - case 0x3: - insn->eaBase = (EABase)(insn->eaRegBase + rm); - if (readDisplacement(insn)) - return -1; - break; - } - break; - case 4: - case 8: - insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); - - switch (mod) { - case 0x0: - insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ - switch (rm) { - case 0x14: - case 0x4: - case 0xc: /* in case REXW.b is set */ - insn->eaBase = (insn->addressSize == 4 ? - EA_BASE_sib : EA_BASE_sib64); - if (readSIB(insn) || readDisplacement(insn)) - return -1; - break; - case 0x5: - insn->eaBase = EA_BASE_NONE; - insn->eaDisplacement = EA_DISP_32; - if (readDisplacement(insn)) - return -1; - break; - default: - insn->eaBase = (EABase)(insn->eaBaseBase + rm); - break; - } - break; - case 0x1: - insn->displacementSize = 1; - /* FALLTHROUGH */ - case 0x2: - insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); - switch (rm) { - case 0x14: - case 0x4: - case 0xc: /* in case REXW.b is set */ - insn->eaBase = EA_BASE_sib; - if (readSIB(insn) || readDisplacement(insn)) - return -1; - break; - default: - insn->eaBase = (EABase)(insn->eaBaseBase + rm); - if (readDisplacement(insn)) - return -1; - break; - } - break; - case 0x3: - insn->eaDisplacement = EA_DISP_NONE; - insn->eaBase = (EABase)(insn->eaRegBase + rm); - break; - } - break; - } /* switch (insn->addressSize) */ - - return 0; -} - -#define GENERIC_FIXUP_FUNC(name, base, prefix) \ - static uint8_t name(struct InternalInstruction *insn, \ - OperandType type, \ - uint8_t index, \ - uint8_t *valid) { \ - *valid = 1; \ - switch (type) { \ - default: \ - debug("Unhandled register type"); \ - *valid = 0; \ - return 0; \ - case TYPE_Rv: \ - return base + index; \ - case TYPE_R8: \ - if (insn->rexPrefix && \ - index >= 4 && index <= 7) { \ - return prefix##_SPL + (index - 4); \ - } else { \ - return prefix##_AL + index; \ - } \ - case TYPE_R16: \ - return prefix##_AX + index; \ - case TYPE_R32: \ - return prefix##_EAX + index; \ - case TYPE_R64: \ - return prefix##_RAX + index; \ - case TYPE_XMM512: \ - return prefix##_ZMM0 + index; \ - case TYPE_XMM256: \ - return prefix##_YMM0 + index; \ - case TYPE_XMM128: \ - case TYPE_XMM64: \ - case TYPE_XMM32: \ - case TYPE_XMM: \ - return prefix##_XMM0 + index; \ - case TYPE_VK1: \ - case TYPE_VK8: \ - case TYPE_VK16: \ - return prefix##_K0 + index; \ - case TYPE_MM64: \ - case TYPE_MM32: \ - case TYPE_MM: \ - if (index > 7) \ - *valid = 0; \ - return prefix##_MM0 + index; \ - case TYPE_SEGMENTREG: \ - if (index > 5) \ - *valid = 0; \ - return prefix##_ES + index; \ - case TYPE_DEBUGREG: \ - if (index > 7) \ - *valid = 0; \ - return prefix##_DR0 + index; \ - case TYPE_CONTROLREG: \ - if (index > 8) \ - *valid = 0; \ - return prefix##_CR0 + index; \ - } \ - } - -/* - * fixup*Value - Consults an operand type to determine the meaning of the - * reg or R/M field. If the operand is an XMM operand, for example, an - * operand would be XMM0 instead of AX, which readModRM() would otherwise - * misinterpret it as. - * - * @param insn - The instruction containing the operand. - * @param type - The operand type. - * @param index - The existing value of the field as reported by readModRM(). - * @param valid - The address of a uint8_t. The target is set to 1 if the - * field is valid for the register class; 0 if not. - * @return - The proper value. - */ -GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) -GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) - -/* - * fixupReg - Consults an operand specifier to determine which of the - * fixup*Value functions to use in correcting readModRM()'ss interpretation. - * - * @param insn - See fixup*Value(). - * @param op - The operand specifier. - * @return - 0 if fixup was successful; -1 if the register returned was - * invalid for its class. - */ -static int fixupReg(struct InternalInstruction *insn, - const struct OperandSpecifier *op) { - uint8_t valid; - - dbgprintf(insn, "fixupReg()"); - - switch ((OperandEncoding)op->encoding) { - default: - debug("Expected a REG or R/M encoding in fixupReg"); - return -1; - case ENCODING_VVVV: - insn->vvvv = (Reg)fixupRegValue(insn, - (OperandType)op->type, - insn->vvvv, - &valid); - if (!valid) - return -1; - break; - case ENCODING_REG: - insn->reg = (Reg)fixupRegValue(insn, - (OperandType)op->type, - insn->reg - insn->regBase, - &valid); - if (!valid) - return -1; - break; - case ENCODING_RM: - if (insn->eaBase >= insn->eaRegBase) { - insn->eaBase = (EABase)fixupRMValue(insn, - (OperandType)op->type, - insn->eaBase - insn->eaRegBase, - &valid); - if (!valid) - return -1; - } - break; - } - - return 0; -} - -/* - * readOpcodeRegister - Reads an operand from the opcode field of an - * instruction and interprets it appropriately given the operand width. - * Handles AddRegFrm instructions. - * - * @param insn - the instruction whose opcode field is to be read. - * @param size - The width (in bytes) of the register being specified. - * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means - * RAX. - * @return - 0 on success; nonzero otherwise. - */ -static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { - dbgprintf(insn, "readOpcodeRegister()"); - - if (size == 0) - size = insn->registerSize; - - switch (size) { - case 1: - insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) - | (insn->opcode & 7))); - if (insn->rexPrefix && - insn->opcodeRegister >= MODRM_REG_AL + 0x4 && - insn->opcodeRegister < MODRM_REG_AL + 0x8) { - insn->opcodeRegister = (Reg)(MODRM_REG_SPL - + (insn->opcodeRegister - MODRM_REG_AL - 4)); - } - - break; - case 2: - insn->opcodeRegister = (Reg)(MODRM_REG_AX - + ((bFromREX(insn->rexPrefix) << 3) - | (insn->opcode & 7))); - break; - case 4: - insn->opcodeRegister = (Reg)(MODRM_REG_EAX - + ((bFromREX(insn->rexPrefix) << 3) - | (insn->opcode & 7))); - break; - case 8: - insn->opcodeRegister = (Reg)(MODRM_REG_RAX - + ((bFromREX(insn->rexPrefix) << 3) - | (insn->opcode & 7))); - break; - } - - return 0; -} - -/* - * readImmediate - Consumes an immediate operand from an instruction, given the - * desired operand size. - * - * @param insn - The instruction whose operand is to be read. - * @param size - The width (in bytes) of the operand. - * @return - 0 if the immediate was successfully consumed; nonzero - * otherwise. - */ -static int readImmediate(struct InternalInstruction* insn, uint8_t size) { - uint8_t imm8; - uint16_t imm16; - uint32_t imm32; - uint64_t imm64; - - dbgprintf(insn, "readImmediate()"); - - if (insn->numImmediatesConsumed == 2) { - debug("Already consumed two immediates"); - return -1; - } - - if (size == 0) - size = insn->immediateSize; - else - insn->immediateSize = size; - insn->immediateOffset = insn->readerCursor - insn->startLocation; - - switch (size) { - case 1: - if (consumeByte(insn, &imm8)) - return -1; - insn->immediates[insn->numImmediatesConsumed] = imm8; - break; - case 2: - if (consumeUInt16(insn, &imm16)) - return -1; - insn->immediates[insn->numImmediatesConsumed] = imm16; - break; - case 4: - if (consumeUInt32(insn, &imm32)) - return -1; - insn->immediates[insn->numImmediatesConsumed] = imm32; - break; - case 8: - if (consumeUInt64(insn, &imm64)) - return -1; - insn->immediates[insn->numImmediatesConsumed] = imm64; - break; - } - - insn->numImmediatesConsumed++; - - return 0; -} - -/* - * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. - * - * @param insn - The instruction whose operand is to be read. - * @return - 0 if the vvvv was successfully consumed; nonzero - * otherwise. - */ -static int readVVVV(struct InternalInstruction* insn) { - dbgprintf(insn, "readVVVV()"); - - if (insn->vectorExtensionType == TYPE_EVEX) - insn->vvvv = vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]); - else if (insn->vectorExtensionType == TYPE_VEX_3B) - insn->vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]); - else if (insn->vectorExtensionType == TYPE_VEX_2B) - insn->vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]); - else if (insn->vectorExtensionType == TYPE_XOP) - insn->vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]); - else - return -1; - - if (insn->mode != MODE_64BIT) - insn->vvvv &= 0x7; - - return 0; -} - -/* - * readMaskRegister - Reads an mask register from the opcode field of an - * instruction. - * - * @param insn - The instruction whose opcode field is to be read. - * @return - 0 on success; nonzero otherwise. - */ -static int readMaskRegister(struct InternalInstruction* insn) { - dbgprintf(insn, "readMaskRegister()"); - - if (insn->vectorExtensionType != TYPE_EVEX) - return -1; - - insn->writemask = aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]); - return 0; -} - -/* - * readOperands - Consults the specifier for an instruction and consumes all - * operands for that instruction, interpreting them as it goes. - * - * @param insn - The instruction whose operands are to be read and interpreted. - * @return - 0 if all operands could be read; nonzero otherwise. - */ -static int readOperands(struct InternalInstruction* insn) { - int index; - int hasVVVV, needVVVV; - int sawRegImm = 0; - - dbgprintf(insn, "readOperands()"); - - /* If non-zero vvvv specified, need to make sure one of the operands - uses it. */ - hasVVVV = !readVVVV(insn); - needVVVV = hasVVVV && (insn->vvvv != 0); - - for (index = 0; index < X86_MAX_OPERANDS; ++index) { - switch (x86OperandSets[insn->spec->operands][index].encoding) { - case ENCODING_NONE: - case ENCODING_SI: - case ENCODING_DI: - break; - case ENCODING_REG: - case ENCODING_RM: - if (readModRM(insn)) - return -1; - if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) - return -1; - break; - case ENCODING_CB: - case ENCODING_CW: - case ENCODING_CD: - case ENCODING_CP: - case ENCODING_CO: - case ENCODING_CT: - dbgprintf(insn, "We currently don't hande code-offset encodings"); - return -1; - case ENCODING_IB: - if (sawRegImm) { - /* Saw a register immediate so don't read again and instead split the - previous immediate. FIXME: This is a hack. */ - insn->immediates[insn->numImmediatesConsumed] = - insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; - ++insn->numImmediatesConsumed; - break; - } - if (readImmediate(insn, 1)) - return -1; - if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 && - insn->immediates[insn->numImmediatesConsumed - 1] > 7) - return -1; - if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 && - insn->immediates[insn->numImmediatesConsumed - 1] > 31) - return -1; - if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 || - x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256) - sawRegImm = 1; - break; - case ENCODING_IW: - if (readImmediate(insn, 2)) - return -1; - break; - case ENCODING_ID: - if (readImmediate(insn, 4)) - return -1; - break; - case ENCODING_IO: - if (readImmediate(insn, 8)) - return -1; - break; - case ENCODING_Iv: - if (readImmediate(insn, insn->immediateSize)) - return -1; - break; - case ENCODING_Ia: - if (readImmediate(insn, insn->addressSize)) - return -1; - break; - case ENCODING_RB: - if (readOpcodeRegister(insn, 1)) - return -1; - break; - case ENCODING_RW: - if (readOpcodeRegister(insn, 2)) - return -1; - break; - case ENCODING_RD: - if (readOpcodeRegister(insn, 4)) - return -1; - break; - case ENCODING_RO: - if (readOpcodeRegister(insn, 8)) - return -1; - break; - case ENCODING_Rv: - if (readOpcodeRegister(insn, 0)) - return -1; - break; - case ENCODING_FP: - break; - case ENCODING_VVVV: - needVVVV = 0; /* Mark that we have found a VVVV operand. */ - if (!hasVVVV) - return -1; - if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) - return -1; - break; - case ENCODING_WRITEMASK: - if (readMaskRegister(insn)) - return -1; - break; - case ENCODING_DUP: - break; - default: - dbgprintf(insn, "Encountered an operand with an unknown encoding."); - return -1; - } - } - - /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ - if (needVVVV) return -1; - - return 0; -} - -/* - * decodeInstruction - Reads and interprets a full instruction provided by the - * user. - * - * @param insn - A pointer to the instruction to be populated. Must be - * pre-allocated. - * @param reader - The function to be used to read the instruction's bytes. - * @param readerArg - A generic argument to be passed to the reader to store - * any internal state. - * @param logger - If non-NULL, the function to be used to write log messages - * and warnings. - * @param loggerArg - A generic argument to be passed to the logger to store - * any internal state. - * @param startLoc - The address (in the reader's address space) of the first - * byte in the instruction. - * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to - * decode the instruction in. - * @return - 0 if the instruction's memory could be read; nonzero if - * not. - */ -int decodeInstruction(struct InternalInstruction* insn, - byteReader_t reader, - const void* readerArg, - dlog_t logger, - void* loggerArg, - const void* miiArg, - uint64_t startLoc, - DisassemblerMode mode) { - memset(insn, 0, sizeof(struct InternalInstruction)); - - insn->reader = reader; - insn->readerArg = readerArg; - insn->dlog = logger; - insn->dlogArg = loggerArg; - insn->startLocation = startLoc; - insn->readerCursor = startLoc; - insn->mode = mode; - insn->numImmediatesConsumed = 0; - - if (readPrefixes(insn) || - readOpcode(insn) || - getID(insn, miiArg) || - insn->instructionID == 0 || - readOperands(insn)) - return -1; - - insn->operands = &x86OperandSets[insn->spec->operands][0]; - - insn->length = insn->readerCursor - insn->startLocation; - - dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", - startLoc, insn->readerCursor, insn->length); - - if (insn->length > 15) - dbgprintf(insn, "Instruction exceeds 15-byte limit"); - - return 0; -} diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp new file mode 100644 index 0000000..804606d --- /dev/null +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -0,0 +1,1838 @@ +//===-- X86DisassemblerDecoder.c - Disassembler decoder -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the X86 Disassembler. +// It contains the implementation of the instruction decoder. +// Documentation for the disassembler can be found in X86Disassembler.h. +// +//===----------------------------------------------------------------------===// + +#include /* for va_*() */ +#include /* for vsnprintf() */ +#include /* for exit() */ +#include /* for memset() */ + +#include "X86DisassemblerDecoder.h" + +using namespace llvm::X86Disassembler; + +/// Specifies whether a ModR/M byte is needed and (if so) which +/// instruction each possible value of the ModR/M byte corresponds to. Once +/// this information is known, we have narrowed down to a single instruction. +struct ModRMDecision { + uint8_t modrm_type; + uint16_t instructionIDs; +}; + +/// Specifies which set of ModR/M->instruction tables to look at +/// given a particular opcode. +struct OpcodeDecision { + ModRMDecision modRMDecisions[256]; +}; + +/// Specifies which opcode->instruction tables to look at given +/// a particular context (set of attributes). Since there are many possible +/// contexts, the decoder first uses CONTEXTS_SYM to determine which context +/// applies given a specific set of attributes. Hence there are only IC_max +/// entries in this table, rather than 2^(ATTR_max). +struct ContextDecision { + OpcodeDecision opcodeDecisions[IC_max]; +}; + +#include "X86GenDisassemblerTables.inc" + +#ifndef NDEBUG +#define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0) +#else +#define debug(s) do { } while (0) +#endif + + +/* + * contextForAttrs - Client for the instruction context table. Takes a set of + * attributes and returns the appropriate decode context. + * + * @param attrMask - Attributes, from the enumeration attributeBits. + * @return - The InstructionContext to use when looking up an + * an instruction with these attributes. + */ +static InstructionContext contextForAttrs(uint16_t attrMask) { + return static_cast(CONTEXTS_SYM[attrMask]); +} + +/* + * modRMRequired - Reads the appropriate instruction table to determine whether + * the ModR/M byte is required to decode a particular instruction. + * + * @param type - The opcode type (i.e., how many bytes it has). + * @param insnContext - The context for the instruction, as returned by + * contextForAttrs. + * @param opcode - The last byte of the instruction's opcode, not counting + * ModR/M extensions and escapes. + * @return - true if the ModR/M byte is required, false otherwise. + */ +static int modRMRequired(OpcodeType type, + InstructionContext insnContext, + uint16_t opcode) { + const struct ContextDecision* decision = nullptr; + + switch (type) { + case ONEBYTE: + decision = &ONEBYTE_SYM; + break; + case TWOBYTE: + decision = &TWOBYTE_SYM; + break; + case THREEBYTE_38: + decision = &THREEBYTE38_SYM; + break; + case THREEBYTE_3A: + decision = &THREEBYTE3A_SYM; + break; + case XOP8_MAP: + decision = &XOP8_MAP_SYM; + break; + case XOP9_MAP: + decision = &XOP9_MAP_SYM; + break; + case XOPA_MAP: + decision = &XOPA_MAP_SYM; + break; + } + + return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. + modrm_type != MODRM_ONEENTRY; +} + +/* + * decode - Reads the appropriate instruction table to obtain the unique ID of + * an instruction. + * + * @param type - See modRMRequired(). + * @param insnContext - See modRMRequired(). + * @param opcode - See modRMRequired(). + * @param modRM - The ModR/M byte if required, or any value if not. + * @return - The UID of the instruction, or 0 on failure. + */ +static InstrUID decode(OpcodeType type, + InstructionContext insnContext, + uint8_t opcode, + uint8_t modRM) { + const struct ModRMDecision* dec = nullptr; + + switch (type) { + case ONEBYTE: + dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; + case TWOBYTE: + dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; + case THREEBYTE_38: + dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; + case THREEBYTE_3A: + dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; + case XOP8_MAP: + dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; + case XOP9_MAP: + dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; + case XOPA_MAP: + dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; + } + + switch (dec->modrm_type) { + default: + debug("Corrupt table! Unknown modrm_type"); + return 0; + case MODRM_ONEENTRY: + return modRMTable[dec->instructionIDs]; + case MODRM_SPLITRM: + if (modFromModRM(modRM) == 0x3) + return modRMTable[dec->instructionIDs+1]; + return modRMTable[dec->instructionIDs]; + case MODRM_SPLITREG: + if (modFromModRM(modRM) == 0x3) + return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; + return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; + case MODRM_SPLITMISC: + if (modFromModRM(modRM) == 0x3) + return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; + return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; + case MODRM_FULL: + return modRMTable[dec->instructionIDs+modRM]; + } +} + +/* + * specifierForUID - Given a UID, returns the name and operand specification for + * that instruction. + * + * @param uid - The unique ID for the instruction. This should be returned by + * decode(); specifierForUID will not check bounds. + * @return - A pointer to the specification for that instruction. + */ +static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { + return &INSTRUCTIONS_SYM[uid]; +} + +/* + * consumeByte - Uses the reader function provided by the user to consume one + * byte from the instruction's memory and advance the cursor. + * + * @param insn - The instruction with the reader function to use. The cursor + * for this instruction is advanced. + * @param byte - A pointer to a pre-allocated memory buffer to be populated + * with the data read. + * @return - 0 if the read was successful; nonzero otherwise. + */ +static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { + int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); + + if (!ret) + ++(insn->readerCursor); + + return ret; +} + +/* + * lookAtByte - Like consumeByte, but does not advance the cursor. + * + * @param insn - See consumeByte(). + * @param byte - See consumeByte(). + * @return - See consumeByte(). + */ +static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { + return insn->reader(insn->readerArg, byte, insn->readerCursor); +} + +static void unconsumeByte(struct InternalInstruction* insn) { + insn->readerCursor--; +} + +#define CONSUME_FUNC(name, type) \ + static int name(struct InternalInstruction* insn, type* ptr) { \ + type combined = 0; \ + unsigned offset; \ + for (offset = 0; offset < sizeof(type); ++offset) { \ + uint8_t byte; \ + int ret = insn->reader(insn->readerArg, \ + &byte, \ + insn->readerCursor + offset); \ + if (ret) \ + return ret; \ + combined = combined | ((uint64_t)byte << (offset * 8)); \ + } \ + *ptr = combined; \ + insn->readerCursor += sizeof(type); \ + return 0; \ + } + +/* + * consume* - Use the reader function provided by the user to consume data + * values of various sizes from the instruction's memory and advance the + * cursor appropriately. These readers perform endian conversion. + * + * @param insn - See consumeByte(). + * @param ptr - A pointer to a pre-allocated memory of appropriate size to + * be populated with the data read. + * @return - See consumeByte(). + */ +CONSUME_FUNC(consumeInt8, int8_t) +CONSUME_FUNC(consumeInt16, int16_t) +CONSUME_FUNC(consumeInt32, int32_t) +CONSUME_FUNC(consumeUInt16, uint16_t) +CONSUME_FUNC(consumeUInt32, uint32_t) +CONSUME_FUNC(consumeUInt64, uint64_t) + +/* + * dbgprintf - Uses the logging function provided by the user to log a single + * message, typically without a carriage-return. + * + * @param insn - The instruction containing the logging function. + * @param format - See printf(). + * @param ... - See printf(). + */ +static void dbgprintf(struct InternalInstruction* insn, + const char* format, + ...) { + char buffer[256]; + va_list ap; + + if (!insn->dlog) + return; + + va_start(ap, format); + (void)vsnprintf(buffer, sizeof(buffer), format, ap); + va_end(ap); + + insn->dlog(insn->dlogArg, buffer); + + return; +} + +/* + * setPrefixPresent - Marks that a particular prefix is present at a particular + * location. + * + * @param insn - The instruction to be marked as having the prefix. + * @param prefix - The prefix that is present. + * @param location - The location where the prefix is located (in the address + * space of the instruction's reader). + */ +static void setPrefixPresent(struct InternalInstruction* insn, + uint8_t prefix, + uint64_t location) +{ + insn->prefixPresent[prefix] = 1; + insn->prefixLocations[prefix] = location; +} + +/* + * isPrefixAtLocation - Queries an instruction to determine whether a prefix is + * present at a given location. + * + * @param insn - The instruction to be queried. + * @param prefix - The prefix. + * @param location - The location to query. + * @return - Whether the prefix is at that location. + */ +static bool isPrefixAtLocation(struct InternalInstruction* insn, + uint8_t prefix, + uint64_t location) +{ + if (insn->prefixPresent[prefix] == 1 && + insn->prefixLocations[prefix] == location) + return true; + else + return false; +} + +/* + * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the + * instruction as having them. Also sets the instruction's default operand, + * address, and other relevant data sizes to report operands correctly. + * + * @param insn - The instruction whose prefixes are to be read. + * @return - 0 if the instruction could be read until the end of the prefix + * bytes, and no prefixes conflicted; nonzero otherwise. + */ +static int readPrefixes(struct InternalInstruction* insn) { + bool isPrefix = true; + bool prefixGroups[4] = { false }; + uint64_t prefixLocation; + uint8_t byte = 0; + uint8_t nextByte; + + bool hasAdSize = false; + bool hasOpSize = false; + + dbgprintf(insn, "readPrefixes()"); + + while (isPrefix) { + prefixLocation = insn->readerCursor; + + /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */ + if (consumeByte(insn, &byte)) + break; + + /* + * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then + * break and let it be disassembled as a normal "instruction". + */ + if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) + break; + + if (insn->readerCursor - 1 == insn->startLocation + && (byte == 0xf2 || byte == 0xf3) + && !lookAtByte(insn, &nextByte)) + { + /* + * If the byte is 0xf2 or 0xf3, and any of the following conditions are + * met: + * - it is followed by a LOCK (0xf0) prefix + * - it is followed by an xchg instruction + * then it should be disassembled as a xacquire/xrelease not repne/rep. + */ + if ((byte == 0xf2 || byte == 0xf3) && + ((nextByte == 0xf0) | + ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) + insn->xAcquireRelease = true; + /* + * Also if the byte is 0xf3, and the following condition is met: + * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or + * "mov mem, imm" (opcode 0xc6/0xc7) instructions. + * then it should be disassembled as an xrelease not rep. + */ + if (byte == 0xf3 && + (nextByte == 0x88 || nextByte == 0x89 || + nextByte == 0xc6 || nextByte == 0xc7)) + insn->xAcquireRelease = true; + if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { + if (consumeByte(insn, &nextByte)) + return -1; + if (lookAtByte(insn, &nextByte)) + return -1; + unconsumeByte(insn); + } + if (nextByte != 0x0f && nextByte != 0x90) + break; + } + + switch (byte) { + case 0xf0: /* LOCK */ + case 0xf2: /* REPNE/REPNZ */ + case 0xf3: /* REP or REPE/REPZ */ + if (prefixGroups[0]) + dbgprintf(insn, "Redundant Group 1 prefix"); + prefixGroups[0] = true; + setPrefixPresent(insn, byte, prefixLocation); + break; + case 0x2e: /* CS segment override -OR- Branch not taken */ + case 0x36: /* SS segment override -OR- Branch taken */ + case 0x3e: /* DS segment override */ + case 0x26: /* ES segment override */ + case 0x64: /* FS segment override */ + case 0x65: /* GS segment override */ + switch (byte) { + case 0x2e: + insn->segmentOverride = SEG_OVERRIDE_CS; + break; + case 0x36: + insn->segmentOverride = SEG_OVERRIDE_SS; + break; + case 0x3e: + insn->segmentOverride = SEG_OVERRIDE_DS; + break; + case 0x26: + insn->segmentOverride = SEG_OVERRIDE_ES; + break; + case 0x64: + insn->segmentOverride = SEG_OVERRIDE_FS; + break; + case 0x65: + insn->segmentOverride = SEG_OVERRIDE_GS; + break; + default: + debug("Unhandled override"); + return -1; + } + if (prefixGroups[1]) + dbgprintf(insn, "Redundant Group 2 prefix"); + prefixGroups[1] = true; + setPrefixPresent(insn, byte, prefixLocation); + break; + case 0x66: /* Operand-size override */ + if (prefixGroups[2]) + dbgprintf(insn, "Redundant Group 3 prefix"); + prefixGroups[2] = true; + hasOpSize = true; + setPrefixPresent(insn, byte, prefixLocation); + break; + case 0x67: /* Address-size override */ + if (prefixGroups[3]) + dbgprintf(insn, "Redundant Group 4 prefix"); + prefixGroups[3] = true; + hasAdSize = true; + setPrefixPresent(insn, byte, prefixLocation); + break; + default: /* Not a prefix byte */ + isPrefix = false; + break; + } + + if (isPrefix) + dbgprintf(insn, "Found prefix 0x%hhx", byte); + } + + insn->vectorExtensionType = TYPE_NO_VEX_XOP; + + if (byte == 0x62) { + uint8_t byte1, byte2; + + if (consumeByte(insn, &byte1)) { + dbgprintf(insn, "Couldn't read second byte of EVEX prefix"); + return -1; + } + + if (lookAtByte(insn, &byte2)) { + dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); + return -1; + } + + if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) && + ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) { + insn->vectorExtensionType = TYPE_EVEX; + } + else { + unconsumeByte(insn); /* unconsume byte1 */ + unconsumeByte(insn); /* unconsume byte */ + insn->necessaryPrefixLocation = insn->readerCursor - 2; + } + + if (insn->vectorExtensionType == TYPE_EVEX) { + insn->vectorExtensionPrefix[0] = byte; + insn->vectorExtensionPrefix[1] = byte1; + if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) { + dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); + return -1; + } + if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) { + dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix"); + return -1; + } + + /* We simulate the REX prefix for simplicity's sake */ + if (insn->mode == MODE_64BIT) { + insn->rexPrefix = 0x40 + | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) + | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) + | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) + | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); + } + + dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx", + insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], + insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]); + } + } + else if (byte == 0xc4) { + uint8_t byte1; + + if (lookAtByte(insn, &byte1)) { + dbgprintf(insn, "Couldn't read second byte of VEX"); + return -1; + } + + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { + insn->vectorExtensionType = TYPE_VEX_3B; + insn->necessaryPrefixLocation = insn->readerCursor - 1; + } + else { + unconsumeByte(insn); + insn->necessaryPrefixLocation = insn->readerCursor - 1; + } + + if (insn->vectorExtensionType == TYPE_VEX_3B) { + insn->vectorExtensionPrefix[0] = byte; + consumeByte(insn, &insn->vectorExtensionPrefix[1]); + consumeByte(insn, &insn->vectorExtensionPrefix[2]); + + /* We simulate the REX prefix for simplicity's sake */ + + if (insn->mode == MODE_64BIT) { + insn->rexPrefix = 0x40 + | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) + | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) + | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) + | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); + } + + dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", + insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], + insn->vectorExtensionPrefix[2]); + } + } + else if (byte == 0xc5) { + uint8_t byte1; + + if (lookAtByte(insn, &byte1)) { + dbgprintf(insn, "Couldn't read second byte of VEX"); + return -1; + } + + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { + insn->vectorExtensionType = TYPE_VEX_2B; + } + else { + unconsumeByte(insn); + } + + if (insn->vectorExtensionType == TYPE_VEX_2B) { + insn->vectorExtensionPrefix[0] = byte; + consumeByte(insn, &insn->vectorExtensionPrefix[1]); + + if (insn->mode == MODE_64BIT) { + insn->rexPrefix = 0x40 + | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); + } + + switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) + { + default: + break; + case VEX_PREFIX_66: + hasOpSize = true; + break; + } + + dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", + insn->vectorExtensionPrefix[0], + insn->vectorExtensionPrefix[1]); + } + } + else if (byte == 0x8f) { + uint8_t byte1; + + if (lookAtByte(insn, &byte1)) { + dbgprintf(insn, "Couldn't read second byte of XOP"); + return -1; + } + + if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */ + insn->vectorExtensionType = TYPE_XOP; + insn->necessaryPrefixLocation = insn->readerCursor - 1; + } + else { + unconsumeByte(insn); + insn->necessaryPrefixLocation = insn->readerCursor - 1; + } + + if (insn->vectorExtensionType == TYPE_XOP) { + insn->vectorExtensionPrefix[0] = byte; + consumeByte(insn, &insn->vectorExtensionPrefix[1]); + consumeByte(insn, &insn->vectorExtensionPrefix[2]); + + /* We simulate the REX prefix for simplicity's sake */ + + if (insn->mode == MODE_64BIT) { + insn->rexPrefix = 0x40 + | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) + | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) + | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) + | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); + } + + switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) + { + default: + break; + case VEX_PREFIX_66: + hasOpSize = true; + break; + } + + dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx", + insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], + insn->vectorExtensionPrefix[2]); + } + } + else { + if (insn->mode == MODE_64BIT) { + if ((byte & 0xf0) == 0x40) { + uint8_t opcodeByte; + + if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { + dbgprintf(insn, "Redundant REX prefix"); + return -1; + } + + insn->rexPrefix = byte; + insn->necessaryPrefixLocation = insn->readerCursor - 2; + + dbgprintf(insn, "Found REX prefix 0x%hhx", byte); + } else { + unconsumeByte(insn); + insn->necessaryPrefixLocation = insn->readerCursor - 1; + } + } else { + unconsumeByte(insn); + insn->necessaryPrefixLocation = insn->readerCursor - 1; + } + } + + if (insn->mode == MODE_16BIT) { + insn->registerSize = (hasOpSize ? 4 : 2); + insn->addressSize = (hasAdSize ? 4 : 2); + insn->displacementSize = (hasAdSize ? 4 : 2); + insn->immediateSize = (hasOpSize ? 4 : 2); + } else if (insn->mode == MODE_32BIT) { + insn->registerSize = (hasOpSize ? 2 : 4); + insn->addressSize = (hasAdSize ? 2 : 4); + insn->displacementSize = (hasAdSize ? 2 : 4); + insn->immediateSize = (hasOpSize ? 2 : 4); + } else if (insn->mode == MODE_64BIT) { + if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { + insn->registerSize = 8; + insn->addressSize = (hasAdSize ? 4 : 8); + insn->displacementSize = 4; + insn->immediateSize = 4; + } else if (insn->rexPrefix) { + insn->registerSize = (hasOpSize ? 2 : 4); + insn->addressSize = (hasAdSize ? 4 : 8); + insn->displacementSize = (hasOpSize ? 2 : 4); + insn->immediateSize = (hasOpSize ? 2 : 4); + } else { + insn->registerSize = (hasOpSize ? 2 : 4); + insn->addressSize = (hasAdSize ? 4 : 8); + insn->displacementSize = (hasOpSize ? 2 : 4); + insn->immediateSize = (hasOpSize ? 2 : 4); + } + } + + return 0; +} + +/* + * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of + * extended or escape opcodes). + * + * @param insn - The instruction whose opcode is to be read. + * @return - 0 if the opcode could be read successfully; nonzero otherwise. + */ +static int readOpcode(struct InternalInstruction* insn) { + /* Determine the length of the primary opcode */ + + uint8_t current; + + dbgprintf(insn, "readOpcode()"); + + insn->opcodeType = ONEBYTE; + + if (insn->vectorExtensionType == TYPE_EVEX) + { + switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) { + default: + dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)", + mmFromEVEX2of4(insn->vectorExtensionPrefix[1])); + return -1; + case VEX_LOB_0F: + insn->opcodeType = TWOBYTE; + return consumeByte(insn, &insn->opcode); + case VEX_LOB_0F38: + insn->opcodeType = THREEBYTE_38; + return consumeByte(insn, &insn->opcode); + case VEX_LOB_0F3A: + insn->opcodeType = THREEBYTE_3A; + return consumeByte(insn, &insn->opcode); + } + } + else if (insn->vectorExtensionType == TYPE_VEX_3B) { + switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) { + default: + dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", + mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); + return -1; + case VEX_LOB_0F: + insn->opcodeType = TWOBYTE; + return consumeByte(insn, &insn->opcode); + case VEX_LOB_0F38: + insn->opcodeType = THREEBYTE_38; + return consumeByte(insn, &insn->opcode); + case VEX_LOB_0F3A: + insn->opcodeType = THREEBYTE_3A; + return consumeByte(insn, &insn->opcode); + } + } + else if (insn->vectorExtensionType == TYPE_VEX_2B) { + insn->opcodeType = TWOBYTE; + return consumeByte(insn, &insn->opcode); + } + else if (insn->vectorExtensionType == TYPE_XOP) { + switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) { + default: + dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", + mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); + return -1; + case XOP_MAP_SELECT_8: + insn->opcodeType = XOP8_MAP; + return consumeByte(insn, &insn->opcode); + case XOP_MAP_SELECT_9: + insn->opcodeType = XOP9_MAP; + return consumeByte(insn, &insn->opcode); + case XOP_MAP_SELECT_A: + insn->opcodeType = XOPA_MAP; + return consumeByte(insn, &insn->opcode); + } + } + + if (consumeByte(insn, ¤t)) + return -1; + + if (current == 0x0f) { + dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); + + if (consumeByte(insn, ¤t)) + return -1; + + if (current == 0x38) { + dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); + + if (consumeByte(insn, ¤t)) + return -1; + + insn->opcodeType = THREEBYTE_38; + } else if (current == 0x3a) { + dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); + + if (consumeByte(insn, ¤t)) + return -1; + + insn->opcodeType = THREEBYTE_3A; + } else { + dbgprintf(insn, "Didn't find a three-byte escape prefix"); + + insn->opcodeType = TWOBYTE; + } + } + + /* + * At this point we have consumed the full opcode. + * Anything we consume from here on must be unconsumed. + */ + + insn->opcode = current; + + return 0; +} + +static int readModRM(struct InternalInstruction* insn); + +/* + * getIDWithAttrMask - Determines the ID of an instruction, consuming + * the ModR/M byte as appropriate for extended and escape opcodes, + * and using a supplied attribute mask. + * + * @param instructionID - A pointer whose target is filled in with the ID of the + * instruction. + * @param insn - The instruction whose ID is to be determined. + * @param attrMask - The attribute mask to search. + * @return - 0 if the ModR/M could be read when needed or was not + * needed; nonzero otherwise. + */ +static int getIDWithAttrMask(uint16_t* instructionID, + struct InternalInstruction* insn, + uint16_t attrMask) { + bool hasModRMExtension; + + InstructionContext instructionClass = contextForAttrs(attrMask); + + hasModRMExtension = modRMRequired(insn->opcodeType, + instructionClass, + insn->opcode); + + if (hasModRMExtension) { + if (readModRM(insn)) + return -1; + + *instructionID = decode(insn->opcodeType, + instructionClass, + insn->opcode, + insn->modRM); + } else { + *instructionID = decode(insn->opcodeType, + instructionClass, + insn->opcode, + 0); + } + + return 0; +} + +/* + * is16BitEquivalent - Determines whether two instruction names refer to + * equivalent instructions but one is 16-bit whereas the other is not. + * + * @param orig - The instruction that is not 16-bit + * @param equiv - The instruction that is 16-bit + */ +static bool is16BitEquivalent(const char* orig, const char* equiv) { + off_t i; + + for (i = 0;; i++) { + if (orig[i] == '\0' && equiv[i] == '\0') + return true; + if (orig[i] == '\0' || equiv[i] == '\0') + return false; + if (orig[i] != equiv[i]) { + if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') + continue; + if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') + continue; + if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') + continue; + return false; + } + } +} + +/* + * getID - Determines the ID of an instruction, consuming the ModR/M byte as + * appropriate for extended and escape opcodes. Determines the attributes and + * context for the instruction before doing so. + * + * @param insn - The instruction whose ID is to be determined. + * @return - 0 if the ModR/M could be read when needed or was not needed; + * nonzero otherwise. + */ +static int getID(struct InternalInstruction* insn, const void *miiArg) { + uint16_t attrMask; + uint16_t instructionID; + + dbgprintf(insn, "getID()"); + + attrMask = ATTR_NONE; + + if (insn->mode == MODE_64BIT) + attrMask |= ATTR_64BIT; + + if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) { + attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX; + + if (insn->vectorExtensionType == TYPE_EVEX) { + switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) { + case VEX_PREFIX_66: + attrMask |= ATTR_OPSIZE; + break; + case VEX_PREFIX_F3: + attrMask |= ATTR_XS; + break; + case VEX_PREFIX_F2: + attrMask |= ATTR_XD; + break; + } + + if (zFromEVEX4of4(insn->vectorExtensionPrefix[3])) + attrMask |= ATTR_EVEXKZ; + if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) + attrMask |= ATTR_EVEXB; + if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])) + attrMask |= ATTR_EVEXK; + if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) + attrMask |= ATTR_EVEXL; + if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3])) + attrMask |= ATTR_EVEXL2; + } + else if (insn->vectorExtensionType == TYPE_VEX_3B) { + switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { + case VEX_PREFIX_66: + attrMask |= ATTR_OPSIZE; + break; + case VEX_PREFIX_F3: + attrMask |= ATTR_XS; + break; + case VEX_PREFIX_F2: + attrMask |= ATTR_XD; + break; + } + + if (lFromVEX3of3(insn->vectorExtensionPrefix[2])) + attrMask |= ATTR_VEXL; + } + else if (insn->vectorExtensionType == TYPE_VEX_2B) { + switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { + case VEX_PREFIX_66: + attrMask |= ATTR_OPSIZE; + break; + case VEX_PREFIX_F3: + attrMask |= ATTR_XS; + break; + case VEX_PREFIX_F2: + attrMask |= ATTR_XD; + break; + } + + if (lFromVEX2of2(insn->vectorExtensionPrefix[1])) + attrMask |= ATTR_VEXL; + } + else if (insn->vectorExtensionType == TYPE_XOP) { + switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { + case VEX_PREFIX_66: + attrMask |= ATTR_OPSIZE; + break; + case VEX_PREFIX_F3: + attrMask |= ATTR_XS; + break; + case VEX_PREFIX_F2: + attrMask |= ATTR_XD; + break; + } + + if (lFromXOP3of3(insn->vectorExtensionPrefix[2])) + attrMask |= ATTR_VEXL; + } + else { + return -1; + } + } + else { + if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) + attrMask |= ATTR_OPSIZE; + else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) + attrMask |= ATTR_ADSIZE; + else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) + attrMask |= ATTR_XS; + else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) + attrMask |= ATTR_XD; + } + + if (insn->rexPrefix & 0x08) + attrMask |= ATTR_REXW; + + if (getIDWithAttrMask(&instructionID, insn, attrMask)) + return -1; + + /* + * JCXZ/JECXZ need special handling for 16-bit mode because the meaning + * of the AdSize prefix is inverted w.r.t. 32-bit mode. + */ + if (insn->mode == MODE_16BIT && insn->opcode == 0xE3) { + const struct InstructionSpecifier *spec; + spec = specifierForUID(instructionID); + + /* + * Check for Ii8PCRel instructions. We could alternatively do a + * string-compare on the names, but this is probably cheaper. + */ + if (x86OperandSets[spec->operands][0].type == TYPE_REL8) { + attrMask ^= ATTR_ADSIZE; + if (getIDWithAttrMask(&instructionID, insn, attrMask)) + return -1; + } + } + + /* The following clauses compensate for limitations of the tables. */ + + if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) && + !(attrMask & ATTR_OPSIZE)) { + /* + * The instruction tables make no distinction between instructions that + * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a + * particular spot (i.e., many MMX operations). In general we're + * conservative, but in the specific case where OpSize is present but not + * in the right place we check if there's a 16-bit operation. + */ + + const struct InstructionSpecifier *spec; + uint16_t instructionIDWithOpsize; + const char *specName, *specWithOpSizeName; + + spec = specifierForUID(instructionID); + + if (getIDWithAttrMask(&instructionIDWithOpsize, + insn, + attrMask | ATTR_OPSIZE)) { + /* + * ModRM required with OpSize but not present; give up and return version + * without OpSize set + */ + + insn->instructionID = instructionID; + insn->spec = spec; + return 0; + } + + specName = GetInstrName(instructionID, miiArg); + specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg); + + if (is16BitEquivalent(specName, specWithOpSizeName) && + (insn->mode == MODE_16BIT) ^ insn->prefixPresent[0x66]) { + insn->instructionID = instructionIDWithOpsize; + insn->spec = specifierForUID(instructionIDWithOpsize); + } else { + insn->instructionID = instructionID; + insn->spec = spec; + } + return 0; + } + + if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && + insn->rexPrefix & 0x01) { + /* + * NOOP shouldn't decode as NOOP if REX.b is set. Instead + * it should decode as XCHG %r8, %eax. + */ + + const struct InstructionSpecifier *spec; + uint16_t instructionIDWithNewOpcode; + const struct InstructionSpecifier *specWithNewOpcode; + + spec = specifierForUID(instructionID); + + /* Borrow opcode from one of the other XCHGar opcodes */ + insn->opcode = 0x91; + + if (getIDWithAttrMask(&instructionIDWithNewOpcode, + insn, + attrMask)) { + insn->opcode = 0x90; + + insn->instructionID = instructionID; + insn->spec = spec; + return 0; + } + + specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); + + /* Change back */ + insn->opcode = 0x90; + + insn->instructionID = instructionIDWithNewOpcode; + insn->spec = specWithNewOpcode; + + return 0; + } + + insn->instructionID = instructionID; + insn->spec = specifierForUID(insn->instructionID); + + return 0; +} + +/* + * readSIB - Consumes the SIB byte to determine addressing information for an + * instruction. + * + * @param insn - The instruction whose SIB byte is to be read. + * @return - 0 if the SIB byte was successfully read; nonzero otherwise. + */ +static int readSIB(struct InternalInstruction* insn) { + SIBIndex sibIndexBase = SIB_INDEX_NONE; + SIBBase sibBaseBase = SIB_BASE_NONE; + uint8_t index, base; + + dbgprintf(insn, "readSIB()"); + + if (insn->consumedSIB) + return 0; + + insn->consumedSIB = true; + + switch (insn->addressSize) { + case 2: + dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); + return -1; + case 4: + sibIndexBase = SIB_INDEX_EAX; + sibBaseBase = SIB_BASE_EAX; + break; + case 8: + sibIndexBase = SIB_INDEX_RAX; + sibBaseBase = SIB_BASE_RAX; + break; + } + + if (consumeByte(insn, &insn->sib)) + return -1; + + index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); + if (insn->vectorExtensionType == TYPE_EVEX) + index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4; + + switch (index) { + case 0x4: + insn->sibIndex = SIB_INDEX_NONE; + break; + default: + insn->sibIndex = (SIBIndex)(sibIndexBase + index); + if (insn->sibIndex == SIB_INDEX_sib || + insn->sibIndex == SIB_INDEX_sib64) + insn->sibIndex = SIB_INDEX_NONE; + break; + } + + switch (scaleFromSIB(insn->sib)) { + case 0: + insn->sibScale = 1; + break; + case 1: + insn->sibScale = 2; + break; + case 2: + insn->sibScale = 4; + break; + case 3: + insn->sibScale = 8; + break; + } + + base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); + + switch (base) { + case 0x5: + case 0xd: + switch (modFromModRM(insn->modRM)) { + case 0x0: + insn->eaDisplacement = EA_DISP_32; + insn->sibBase = SIB_BASE_NONE; + break; + case 0x1: + insn->eaDisplacement = EA_DISP_8; + insn->sibBase = (SIBBase)(sibBaseBase + base); + break; + case 0x2: + insn->eaDisplacement = EA_DISP_32; + insn->sibBase = (SIBBase)(sibBaseBase + base); + break; + case 0x3: + debug("Cannot have Mod = 0b11 and a SIB byte"); + return -1; + } + break; + default: + insn->sibBase = (SIBBase)(sibBaseBase + base); + break; + } + + return 0; +} + +/* + * readDisplacement - Consumes the displacement of an instruction. + * + * @param insn - The instruction whose displacement is to be read. + * @return - 0 if the displacement byte was successfully read; nonzero + * otherwise. + */ +static int readDisplacement(struct InternalInstruction* insn) { + int8_t d8; + int16_t d16; + int32_t d32; + + dbgprintf(insn, "readDisplacement()"); + + if (insn->consumedDisplacement) + return 0; + + insn->consumedDisplacement = true; + insn->displacementOffset = insn->readerCursor - insn->startLocation; + + switch (insn->eaDisplacement) { + case EA_DISP_NONE: + insn->consumedDisplacement = false; + break; + case EA_DISP_8: + if (consumeInt8(insn, &d8)) + return -1; + insn->displacement = d8; + break; + case EA_DISP_16: + if (consumeInt16(insn, &d16)) + return -1; + insn->displacement = d16; + break; + case EA_DISP_32: + if (consumeInt32(insn, &d32)) + return -1; + insn->displacement = d32; + break; + } + + insn->consumedDisplacement = true; + return 0; +} + +/* + * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and + * displacement) for an instruction and interprets it. + * + * @param insn - The instruction whose addressing information is to be read. + * @return - 0 if the information was successfully read; nonzero otherwise. + */ +static int readModRM(struct InternalInstruction* insn) { + uint8_t mod, rm, reg; + + dbgprintf(insn, "readModRM()"); + + if (insn->consumedModRM) + return 0; + + if (consumeByte(insn, &insn->modRM)) + return -1; + insn->consumedModRM = true; + + mod = modFromModRM(insn->modRM); + rm = rmFromModRM(insn->modRM); + reg = regFromModRM(insn->modRM); + + /* + * This goes by insn->registerSize to pick the correct register, which messes + * up if we're using (say) XMM or 8-bit register operands. That gets fixed in + * fixupReg(). + */ + switch (insn->registerSize) { + case 2: + insn->regBase = MODRM_REG_AX; + insn->eaRegBase = EA_REG_AX; + break; + case 4: + insn->regBase = MODRM_REG_EAX; + insn->eaRegBase = EA_REG_EAX; + break; + case 8: + insn->regBase = MODRM_REG_RAX; + insn->eaRegBase = EA_REG_RAX; + break; + } + + reg |= rFromREX(insn->rexPrefix) << 3; + rm |= bFromREX(insn->rexPrefix) << 3; + if (insn->vectorExtensionType == TYPE_EVEX) { + reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; + rm |= xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; + } + + insn->reg = (Reg)(insn->regBase + reg); + + switch (insn->addressSize) { + case 2: + insn->eaBaseBase = EA_BASE_BX_SI; + + switch (mod) { + case 0x0: + if (rm == 0x6) { + insn->eaBase = EA_BASE_NONE; + insn->eaDisplacement = EA_DISP_16; + if (readDisplacement(insn)) + return -1; + } else { + insn->eaBase = (EABase)(insn->eaBaseBase + rm); + insn->eaDisplacement = EA_DISP_NONE; + } + break; + case 0x1: + insn->eaBase = (EABase)(insn->eaBaseBase + rm); + insn->eaDisplacement = EA_DISP_8; + insn->displacementSize = 1; + if (readDisplacement(insn)) + return -1; + break; + case 0x2: + insn->eaBase = (EABase)(insn->eaBaseBase + rm); + insn->eaDisplacement = EA_DISP_16; + if (readDisplacement(insn)) + return -1; + break; + case 0x3: + insn->eaBase = (EABase)(insn->eaRegBase + rm); + if (readDisplacement(insn)) + return -1; + break; + } + break; + case 4: + case 8: + insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); + + switch (mod) { + case 0x0: + insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ + switch (rm) { + case 0x14: + case 0x4: + case 0xc: /* in case REXW.b is set */ + insn->eaBase = (insn->addressSize == 4 ? + EA_BASE_sib : EA_BASE_sib64); + if (readSIB(insn) || readDisplacement(insn)) + return -1; + break; + case 0x5: + insn->eaBase = EA_BASE_NONE; + insn->eaDisplacement = EA_DISP_32; + if (readDisplacement(insn)) + return -1; + break; + default: + insn->eaBase = (EABase)(insn->eaBaseBase + rm); + break; + } + break; + case 0x1: + insn->displacementSize = 1; + /* FALLTHROUGH */ + case 0x2: + insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); + switch (rm) { + case 0x14: + case 0x4: + case 0xc: /* in case REXW.b is set */ + insn->eaBase = EA_BASE_sib; + if (readSIB(insn) || readDisplacement(insn)) + return -1; + break; + default: + insn->eaBase = (EABase)(insn->eaBaseBase + rm); + if (readDisplacement(insn)) + return -1; + break; + } + break; + case 0x3: + insn->eaDisplacement = EA_DISP_NONE; + insn->eaBase = (EABase)(insn->eaRegBase + rm); + break; + } + break; + } /* switch (insn->addressSize) */ + + return 0; +} + +#define GENERIC_FIXUP_FUNC(name, base, prefix) \ + static uint8_t name(struct InternalInstruction *insn, \ + OperandType type, \ + uint8_t index, \ + uint8_t *valid) { \ + *valid = 1; \ + switch (type) { \ + default: \ + debug("Unhandled register type"); \ + *valid = 0; \ + return 0; \ + case TYPE_Rv: \ + return base + index; \ + case TYPE_R8: \ + if (insn->rexPrefix && \ + index >= 4 && index <= 7) { \ + return prefix##_SPL + (index - 4); \ + } else { \ + return prefix##_AL + index; \ + } \ + case TYPE_R16: \ + return prefix##_AX + index; \ + case TYPE_R32: \ + return prefix##_EAX + index; \ + case TYPE_R64: \ + return prefix##_RAX + index; \ + case TYPE_XMM512: \ + return prefix##_ZMM0 + index; \ + case TYPE_XMM256: \ + return prefix##_YMM0 + index; \ + case TYPE_XMM128: \ + case TYPE_XMM64: \ + case TYPE_XMM32: \ + case TYPE_XMM: \ + return prefix##_XMM0 + index; \ + case TYPE_VK1: \ + case TYPE_VK8: \ + case TYPE_VK16: \ + return prefix##_K0 + index; \ + case TYPE_MM64: \ + case TYPE_MM32: \ + case TYPE_MM: \ + if (index > 7) \ + *valid = 0; \ + return prefix##_MM0 + index; \ + case TYPE_SEGMENTREG: \ + if (index > 5) \ + *valid = 0; \ + return prefix##_ES + index; \ + case TYPE_DEBUGREG: \ + if (index > 7) \ + *valid = 0; \ + return prefix##_DR0 + index; \ + case TYPE_CONTROLREG: \ + if (index > 8) \ + *valid = 0; \ + return prefix##_CR0 + index; \ + } \ + } + +/* + * fixup*Value - Consults an operand type to determine the meaning of the + * reg or R/M field. If the operand is an XMM operand, for example, an + * operand would be XMM0 instead of AX, which readModRM() would otherwise + * misinterpret it as. + * + * @param insn - The instruction containing the operand. + * @param type - The operand type. + * @param index - The existing value of the field as reported by readModRM(). + * @param valid - The address of a uint8_t. The target is set to 1 if the + * field is valid for the register class; 0 if not. + * @return - The proper value. + */ +GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) +GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) + +/* + * fixupReg - Consults an operand specifier to determine which of the + * fixup*Value functions to use in correcting readModRM()'ss interpretation. + * + * @param insn - See fixup*Value(). + * @param op - The operand specifier. + * @return - 0 if fixup was successful; -1 if the register returned was + * invalid for its class. + */ +static int fixupReg(struct InternalInstruction *insn, + const struct OperandSpecifier *op) { + uint8_t valid; + + dbgprintf(insn, "fixupReg()"); + + switch ((OperandEncoding)op->encoding) { + default: + debug("Expected a REG or R/M encoding in fixupReg"); + return -1; + case ENCODING_VVVV: + insn->vvvv = (Reg)fixupRegValue(insn, + (OperandType)op->type, + insn->vvvv, + &valid); + if (!valid) + return -1; + break; + case ENCODING_REG: + insn->reg = (Reg)fixupRegValue(insn, + (OperandType)op->type, + insn->reg - insn->regBase, + &valid); + if (!valid) + return -1; + break; + case ENCODING_RM: + if (insn->eaBase >= insn->eaRegBase) { + insn->eaBase = (EABase)fixupRMValue(insn, + (OperandType)op->type, + insn->eaBase - insn->eaRegBase, + &valid); + if (!valid) + return -1; + } + break; + } + + return 0; +} + +/* + * readOpcodeRegister - Reads an operand from the opcode field of an + * instruction and interprets it appropriately given the operand width. + * Handles AddRegFrm instructions. + * + * @param insn - the instruction whose opcode field is to be read. + * @param size - The width (in bytes) of the register being specified. + * 1 means AL and friends, 2 means AX, 4 means EAX, and 8 means + * RAX. + * @return - 0 on success; nonzero otherwise. + */ +static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { + dbgprintf(insn, "readOpcodeRegister()"); + + if (size == 0) + size = insn->registerSize; + + switch (size) { + case 1: + insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) + | (insn->opcode & 7))); + if (insn->rexPrefix && + insn->opcodeRegister >= MODRM_REG_AL + 0x4 && + insn->opcodeRegister < MODRM_REG_AL + 0x8) { + insn->opcodeRegister = (Reg)(MODRM_REG_SPL + + (insn->opcodeRegister - MODRM_REG_AL - 4)); + } + + break; + case 2: + insn->opcodeRegister = (Reg)(MODRM_REG_AX + + ((bFromREX(insn->rexPrefix) << 3) + | (insn->opcode & 7))); + break; + case 4: + insn->opcodeRegister = (Reg)(MODRM_REG_EAX + + ((bFromREX(insn->rexPrefix) << 3) + | (insn->opcode & 7))); + break; + case 8: + insn->opcodeRegister = (Reg)(MODRM_REG_RAX + + ((bFromREX(insn->rexPrefix) << 3) + | (insn->opcode & 7))); + break; + } + + return 0; +} + +/* + * readImmediate - Consumes an immediate operand from an instruction, given the + * desired operand size. + * + * @param insn - The instruction whose operand is to be read. + * @param size - The width (in bytes) of the operand. + * @return - 0 if the immediate was successfully consumed; nonzero + * otherwise. + */ +static int readImmediate(struct InternalInstruction* insn, uint8_t size) { + uint8_t imm8; + uint16_t imm16; + uint32_t imm32; + uint64_t imm64; + + dbgprintf(insn, "readImmediate()"); + + if (insn->numImmediatesConsumed == 2) { + debug("Already consumed two immediates"); + return -1; + } + + if (size == 0) + size = insn->immediateSize; + else + insn->immediateSize = size; + insn->immediateOffset = insn->readerCursor - insn->startLocation; + + switch (size) { + case 1: + if (consumeByte(insn, &imm8)) + return -1; + insn->immediates[insn->numImmediatesConsumed] = imm8; + break; + case 2: + if (consumeUInt16(insn, &imm16)) + return -1; + insn->immediates[insn->numImmediatesConsumed] = imm16; + break; + case 4: + if (consumeUInt32(insn, &imm32)) + return -1; + insn->immediates[insn->numImmediatesConsumed] = imm32; + break; + case 8: + if (consumeUInt64(insn, &imm64)) + return -1; + insn->immediates[insn->numImmediatesConsumed] = imm64; + break; + } + + insn->numImmediatesConsumed++; + + return 0; +} + +/* + * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. + * + * @param insn - The instruction whose operand is to be read. + * @return - 0 if the vvvv was successfully consumed; nonzero + * otherwise. + */ +static int readVVVV(struct InternalInstruction* insn) { + dbgprintf(insn, "readVVVV()"); + + int vvvv; + if (insn->vectorExtensionType == TYPE_EVEX) + vvvv = vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]); + else if (insn->vectorExtensionType == TYPE_VEX_3B) + vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]); + else if (insn->vectorExtensionType == TYPE_VEX_2B) + vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]); + else if (insn->vectorExtensionType == TYPE_XOP) + vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]); + else + return -1; + + if (insn->mode != MODE_64BIT) + vvvv &= 0x7; + + insn->vvvv = static_cast(vvvv); + return 0; +} + +/* + * readMaskRegister - Reads an mask register from the opcode field of an + * instruction. + * + * @param insn - The instruction whose opcode field is to be read. + * @return - 0 on success; nonzero otherwise. + */ +static int readMaskRegister(struct InternalInstruction* insn) { + dbgprintf(insn, "readMaskRegister()"); + + if (insn->vectorExtensionType != TYPE_EVEX) + return -1; + + insn->writemask = + static_cast(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])); + return 0; +} + +/* + * readOperands - Consults the specifier for an instruction and consumes all + * operands for that instruction, interpreting them as it goes. + * + * @param insn - The instruction whose operands are to be read and interpreted. + * @return - 0 if all operands could be read; nonzero otherwise. + */ +static int readOperands(struct InternalInstruction* insn) { + int hasVVVV, needVVVV; + int sawRegImm = 0; + + dbgprintf(insn, "readOperands()"); + + /* If non-zero vvvv specified, need to make sure one of the operands + uses it. */ + hasVVVV = !readVVVV(insn); + needVVVV = hasVVVV && (insn->vvvv != 0); + + for (const auto &Op : x86OperandSets[insn->spec->operands]) { + switch (Op.encoding) { + case ENCODING_NONE: + case ENCODING_SI: + case ENCODING_DI: + break; + case ENCODING_REG: + case ENCODING_RM: + if (readModRM(insn)) + return -1; + if (fixupReg(insn, &Op)) + return -1; + break; + case ENCODING_CB: + case ENCODING_CW: + case ENCODING_CD: + case ENCODING_CP: + case ENCODING_CO: + case ENCODING_CT: + dbgprintf(insn, "We currently don't hande code-offset encodings"); + return -1; + case ENCODING_IB: + if (sawRegImm) { + /* Saw a register immediate so don't read again and instead split the + previous immediate. FIXME: This is a hack. */ + insn->immediates[insn->numImmediatesConsumed] = + insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; + ++insn->numImmediatesConsumed; + break; + } + if (readImmediate(insn, 1)) + return -1; + if (Op.type == TYPE_IMM3 && + insn->immediates[insn->numImmediatesConsumed - 1] > 7) + return -1; + if (Op.type == TYPE_IMM5 && + insn->immediates[insn->numImmediatesConsumed - 1] > 31) + return -1; + if (Op.type == TYPE_XMM128 || + Op.type == TYPE_XMM256) + sawRegImm = 1; + break; + case ENCODING_IW: + if (readImmediate(insn, 2)) + return -1; + break; + case ENCODING_ID: + if (readImmediate(insn, 4)) + return -1; + break; + case ENCODING_IO: + if (readImmediate(insn, 8)) + return -1; + break; + case ENCODING_Iv: + if (readImmediate(insn, insn->immediateSize)) + return -1; + break; + case ENCODING_Ia: + if (readImmediate(insn, insn->addressSize)) + return -1; + break; + case ENCODING_RB: + if (readOpcodeRegister(insn, 1)) + return -1; + break; + case ENCODING_RW: + if (readOpcodeRegister(insn, 2)) + return -1; + break; + case ENCODING_RD: + if (readOpcodeRegister(insn, 4)) + return -1; + break; + case ENCODING_RO: + if (readOpcodeRegister(insn, 8)) + return -1; + break; + case ENCODING_Rv: + if (readOpcodeRegister(insn, 0)) + return -1; + break; + case ENCODING_FP: + break; + case ENCODING_VVVV: + needVVVV = 0; /* Mark that we have found a VVVV operand. */ + if (!hasVVVV) + return -1; + if (fixupReg(insn, &Op)) + return -1; + break; + case ENCODING_WRITEMASK: + if (readMaskRegister(insn)) + return -1; + break; + case ENCODING_DUP: + break; + default: + dbgprintf(insn, "Encountered an operand with an unknown encoding."); + return -1; + } + } + + /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ + if (needVVVV) return -1; + + return 0; +} + +/* + * decodeInstruction - Reads and interprets a full instruction provided by the + * user. + * + * @param insn - A pointer to the instruction to be populated. Must be + * pre-allocated. + * @param reader - The function to be used to read the instruction's bytes. + * @param readerArg - A generic argument to be passed to the reader to store + * any internal state. + * @param logger - If non-NULL, the function to be used to write log messages + * and warnings. + * @param loggerArg - A generic argument to be passed to the logger to store + * any internal state. + * @param startLoc - The address (in the reader's address space) of the first + * byte in the instruction. + * @param mode - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to + * decode the instruction in. + * @return - 0 if the instruction's memory could be read; nonzero if + * not. + */ +int llvm::X86Disassembler::decodeInstruction( + struct InternalInstruction *insn, byteReader_t reader, + const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg, + uint64_t startLoc, DisassemblerMode mode) { + memset(insn, 0, sizeof(struct InternalInstruction)); + + insn->reader = reader; + insn->readerArg = readerArg; + insn->dlog = logger; + insn->dlogArg = loggerArg; + insn->startLocation = startLoc; + insn->readerCursor = startLoc; + insn->mode = mode; + insn->numImmediatesConsumed = 0; + + if (readPrefixes(insn) || + readOpcode(insn) || + getID(insn, miiArg) || + insn->instructionID == 0 || + readOperands(insn)) + return -1; + + insn->operands = x86OperandSets[insn->spec->operands]; + + insn->length = insn->readerCursor - insn->startLocation; + + dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", + startLoc, insn->readerCursor, insn->length); + + if (insn->length > 15) + dbgprintf(insn, "Instruction exceeds 15-byte limit"); + + return 0; +} diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index ac3b39d..8c45402 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -1,39 +1,28 @@ -/*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===* - * - * The LLVM Compiler Infrastructure - * - * This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===* - * - * This file is part of the X86 Disassembler. - * It contains the public interface of the instruction decoder. - * Documentation for the disassembler can be found in X86Disassembler.h. - * - *===----------------------------------------------------------------------===*/ +//===-- X86DisassemblerDecoderInternal.h - Disassembler decoder -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the X86 Disassembler. +// It contains the public interface of the instruction decoder. +// Documentation for the disassembler can be found in X86Disassembler.h. +// +//===----------------------------------------------------------------------===// #ifndef X86DISASSEMBLERDECODER_H #define X86DISASSEMBLERDECODER_H -#ifdef __cplusplus -extern "C" { -#endif - -#define INSTRUCTION_SPECIFIER_FIELDS \ - uint16_t operands; - -#define INSTRUCTION_IDS \ - uint16_t instructionIDs; - #include "X86DisassemblerDecoderCommon.h" +#include "llvm/ADT/ArrayRef.h" -#undef INSTRUCTION_SPECIFIER_FIELDS -#undef INSTRUCTION_IDS +namespace llvm { +namespace X86Disassembler { -/* - * Accessor functions for various fields of an Intel instruction - */ +// Accessor functions for various fields of an Intel instruction #define modFromModRM(modRM) (((modRM) & 0xc0) >> 6) #define regFromModRM(modRM) (((modRM) & 0x38) >> 3) #define rmFromModRM(modRM) ((modRM) & 0x7) @@ -83,10 +72,7 @@ extern "C" { #define lFromXOP3of3(xop) (((xop) & 0x4) >> 2) #define ppFromXOP3of3(xop) ((xop) & 0x3) -/* - * These enums represent Intel registers for use by the decoder. - */ - +// These enums represent Intel registers for use by the decoder. #define REGS_8BIT \ ENTRY(AL) \ ENTRY(CL) \ @@ -392,13 +378,11 @@ extern "C" { REGS_CONTROL \ ENTRY(RIP) -/* - * EABase - All possible values of the base field for effective-address - * computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We - * distinguish between bases (EA_BASE_*) and registers that just happen to be - * referred to when Mod == 0b11 (EA_REG_*). - */ -typedef enum { +/// \brief All possible values of the base field for effective-address +/// computations, a.k.a. the Mod and R/M fields of the ModR/M byte. +/// We distinguish between bases (EA_BASE_*) and registers that just happen +/// to be referred to when Mod == 0b11 (EA_REG_*). +enum EABase { EA_BASE_NONE, #define ENTRY(x) EA_BASE_##x, ALL_EA_BASES @@ -407,15 +391,13 @@ typedef enum { ALL_REGS #undef ENTRY EA_max -} EABase; - -/* - * SIBIndex - All possible values of the SIB index field. - * Borrows entries from ALL_EA_BASES with the special case that - * sib is synonymous with NONE. - * Vector SIB: index can be XMM or YMM. - */ -typedef enum { +}; + +/// \brief All possible values of the SIB index field. +/// borrows entries from ALL_EA_BASES with the special case that +/// sib is synonymous with NONE. +/// Vector SIB: index can be XMM or YMM. +enum SIBIndex { SIB_INDEX_NONE, #define ENTRY(x) SIB_INDEX_##x, ALL_EA_BASES @@ -424,23 +406,18 @@ typedef enum { REGS_ZMM #undef ENTRY SIB_INDEX_max -} SIBIndex; +}; -/* - * SIBBase - All possible values of the SIB base field. - */ -typedef enum { +/// \brief All possible values of the SIB base field. +enum SIBBase { SIB_BASE_NONE, #define ENTRY(x) SIB_BASE_##x, ALL_SIB_BASES #undef ENTRY SIB_BASE_max -} SIBBase; +}; -/* - * EADisplacement - Possible displacement types for effective-address - * computations. - */ +/// \brief Possible displacement types for effective-address computations. typedef enum { EA_DISP_NONE, EA_DISP_8, @@ -448,20 +425,16 @@ typedef enum { EA_DISP_32 } EADisplacement; -/* - * Reg - All possible values of the reg field in the ModR/M byte. - */ -typedef enum { +/// \brief All possible values of the reg field in the ModR/M byte. +enum Reg { #define ENTRY(x) MODRM_REG_##x, ALL_REGS #undef ENTRY MODRM_REG_max -} Reg; +}; -/* - * SegmentOverride - All possible segment overrides. - */ -typedef enum { +/// \brief All possible segment overrides. +enum SegmentOverride { SEG_OVERRIDE_NONE, SEG_OVERRIDE_CS, SEG_OVERRIDE_SS, @@ -470,235 +443,220 @@ typedef enum { SEG_OVERRIDE_FS, SEG_OVERRIDE_GS, SEG_OVERRIDE_max -} SegmentOverride; - -/* - * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field - */ +}; -typedef enum { +/// \brief Possible values for the VEX.m-mmmm field +enum VEXLeadingOpcodeByte { VEX_LOB_0F = 0x1, VEX_LOB_0F38 = 0x2, VEX_LOB_0F3A = 0x3 -} VEXLeadingOpcodeByte; +}; -typedef enum { +enum XOPMapSelect { XOP_MAP_SELECT_8 = 0x8, XOP_MAP_SELECT_9 = 0x9, XOP_MAP_SELECT_A = 0xA -} XOPMapSelect; - -/* - * VEXPrefixCode - Possible values for the VEX.pp/EVEX.pp field - */ +}; -typedef enum { +/// \brief Possible values for the VEX.pp/EVEX.pp field +enum VEXPrefixCode { VEX_PREFIX_NONE = 0x0, VEX_PREFIX_66 = 0x1, VEX_PREFIX_F3 = 0x2, VEX_PREFIX_F2 = 0x3 -} VEXPrefixCode; +}; -typedef enum { +enum VectorExtensionType { TYPE_NO_VEX_XOP = 0x0, TYPE_VEX_2B = 0x1, TYPE_VEX_3B = 0x2, TYPE_EVEX = 0x3, TYPE_XOP = 0x4 -} VectorExtensionType; - -typedef uint8_t BOOL; - -/* - * byteReader_t - Type for the byte reader that the consumer must provide to - * the decoder. Reads a single byte from the instruction's address space. - * @param arg - A baton that the consumer can associate with any internal - * state that it needs. - * @param byte - A pointer to a single byte in memory that should be set to - * contain the value at address. - * @param address - The address in the instruction's address space that should - * be read from. - * @return - -1 if the byte cannot be read for any reason; 0 otherwise. - */ -typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address); - -/* - * dlog_t - Type for the logging function that the consumer can provide to - * get debugging output from the decoder. - * @param arg - A baton that the consumer can associate with any internal - * state that it needs. - * @param log - A string that contains the message. Will be reused after - * the logger returns. - */ -typedef void (*dlog_t)(void* arg, const char *log); - -/* - * The x86 internal instruction, which is produced by the decoder. - */ +}; + +/// \brief Type for the byte reader that the consumer must provide to +/// the decoder. Reads a single byte from the instruction's address space. +/// \param arg A baton that the consumer can associate with any internal +/// state that it needs. +/// \param byte A pointer to a single byte in memory that should be set to +/// contain the value at address. +/// \param address The address in the instruction's address space that should +/// be read from. +/// \return -1 if the byte cannot be read for any reason; 0 otherwise. +typedef int (*byteReader_t)(const void *arg, uint8_t *byte, uint64_t address); + +/// \brief Type for the logging function that the consumer can provide to +/// get debugging output from the decoder. +/// \param arg A baton that the consumer can associate with any internal +/// state that it needs. +/// \param log A string that contains the message. Will be reused after +/// the logger returns. +typedef void (*dlog_t)(void *arg, const char *log); + +/// The specification for how to extract and interpret a full instruction and +/// its operands. +struct InstructionSpecifier { + uint16_t operands; +}; + +/// The x86 internal instruction, which is produced by the decoder. struct InternalInstruction { - /* Reader interface (C) */ + // Reader interface (C) byteReader_t reader; - /* Opaque value passed to the reader */ + // Opaque value passed to the reader const void* readerArg; - /* The address of the next byte to read via the reader */ + // The address of the next byte to read via the reader uint64_t readerCursor; - /* Logger interface (C) */ + // Logger interface (C) dlog_t dlog; - /* Opaque value passed to the logger */ + // Opaque value passed to the logger void* dlogArg; - /* General instruction information */ + // General instruction information - /* The mode to disassemble for (64-bit, protected, real) */ + // The mode to disassemble for (64-bit, protected, real) DisassemblerMode mode; - /* The start of the instruction, usable with the reader */ + // The start of the instruction, usable with the reader uint64_t startLocation; - /* The length of the instruction, in bytes */ + // The length of the instruction, in bytes size_t length; - /* Prefix state */ + // Prefix state - /* 1 if the prefix byte corresponding to the entry is present; 0 if not */ + // 1 if the prefix byte corresponding to the entry is present; 0 if not uint8_t prefixPresent[0x100]; - /* contains the location (for use with the reader) of the prefix byte */ + // contains the location (for use with the reader) of the prefix byte uint64_t prefixLocations[0x100]; - /* The value of the vector extension prefix(EVEX/VEX/XOP), if present */ + // The value of the vector extension prefix(EVEX/VEX/XOP), if present uint8_t vectorExtensionPrefix[4]; - /* The type of the vector extension prefix */ + // The type of the vector extension prefix VectorExtensionType vectorExtensionType; - /* The value of the REX prefix, if present */ + // The value of the REX prefix, if present uint8_t rexPrefix; - /* The location where a mandatory prefix would have to be (i.e., right before - the opcode, or right before the REX prefix if one is present) */ + // The location where a mandatory prefix would have to be (i.e., right before + // the opcode, or right before the REX prefix if one is present). uint64_t necessaryPrefixLocation; - /* The segment override type */ + // The segment override type SegmentOverride segmentOverride; - /* 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease */ - BOOL xAcquireRelease; + // 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease + bool xAcquireRelease; - /* Sizes of various critical pieces of data, in bytes */ + // Sizes of various critical pieces of data, in bytes uint8_t registerSize; uint8_t addressSize; uint8_t displacementSize; uint8_t immediateSize; - /* Offsets from the start of the instruction to the pieces of data, which is - needed to find relocation entries for adding symbolic operands */ + // Offsets from the start of the instruction to the pieces of data, which is + // needed to find relocation entries for adding symbolic operands. uint8_t displacementOffset; uint8_t immediateOffset; - /* opcode state */ + // opcode state - /* The last byte of the opcode, not counting any ModR/M extension */ + // The last byte of the opcode, not counting any ModR/M extension uint8_t opcode; - /* The ModR/M byte of the instruction, if it is an opcode extension */ + // The ModR/M byte of the instruction, if it is an opcode extension uint8_t modRMExtension; - /* decode state */ + // decode state - /* The type of opcode, used for indexing into the array of decode tables */ + // The type of opcode, used for indexing into the array of decode tables OpcodeType opcodeType; - /* The instruction ID, extracted from the decode table */ + // The instruction ID, extracted from the decode table uint16_t instructionID; - /* The specifier for the instruction, from the instruction info table */ - const struct InstructionSpecifier *spec; + // The specifier for the instruction, from the instruction info table + const InstructionSpecifier *spec; - /* state for additional bytes, consumed during operand decode. Pattern: - consumed___ indicates that the byte was already consumed and does not - need to be consumed again */ + // state for additional bytes, consumed during operand decode. Pattern: + // consumed___ indicates that the byte was already consumed and does not + // need to be consumed again. - /* The VEX.vvvv field, which contains a third register operand for some AVX - instructions */ + // The VEX.vvvv field, which contains a third register operand for some AVX + // instructions. Reg vvvv; - /* The writemask for AVX-512 instructions which is contained in EVEX.aaa */ + // The writemask for AVX-512 instructions which is contained in EVEX.aaa Reg writemask; - /* The ModR/M byte, which contains most register operands and some portion of - all memory operands */ - BOOL consumedModRM; + // The ModR/M byte, which contains most register operands and some portion of + // all memory operands. + bool consumedModRM; uint8_t modRM; - /* The SIB byte, used for more complex 32- or 64-bit memory operands */ - BOOL consumedSIB; + // The SIB byte, used for more complex 32- or 64-bit memory operands + bool consumedSIB; uint8_t sib; - /* The displacement, used for memory operands */ - BOOL consumedDisplacement; + // The displacement, used for memory operands + bool consumedDisplacement; int32_t displacement; - /* Immediates. There can be two in some cases */ + // Immediates. There can be two in some cases uint8_t numImmediatesConsumed; uint8_t numImmediatesTranslated; uint64_t immediates[2]; - /* A register or immediate operand encoded into the opcode */ + // A register or immediate operand encoded into the opcode Reg opcodeRegister; - /* Portions of the ModR/M byte */ + // Portions of the ModR/M byte - /* These fields determine the allowable values for the ModR/M fields, which - depend on operand and address widths */ + // These fields determine the allowable values for the ModR/M fields, which + // depend on operand and address widths. EABase eaBaseBase; EABase eaRegBase; Reg regBase; - /* The Mod and R/M fields can encode a base for an effective address, or a - register. These are separated into two fields here */ + // The Mod and R/M fields can encode a base for an effective address, or a + // register. These are separated into two fields here. EABase eaBase; EADisplacement eaDisplacement; - /* The reg field always encodes a register */ + // The reg field always encodes a register Reg reg; - /* SIB state */ + // SIB state SIBIndex sibIndex; uint8_t sibScale; SIBBase sibBase; - const struct OperandSpecifier *operands; + ArrayRef operands; }; -/* decodeInstruction - Decode one instruction and store the decoding results in - * a buffer provided by the consumer. - * @param insn - The buffer to store the instruction in. Allocated by the - * consumer. - * @param reader - The byteReader_t for the bytes to be read. - * @param readerArg - An argument to pass to the reader for storing context - * specific to the consumer. May be NULL. - * @param logger - The dlog_t to be used in printing status messages from the - * disassembler. May be NULL. - * @param loggerArg - An argument to pass to the logger for storing context - * specific to the logger. May be NULL. - * @param startLoc - The address (in the reader's address space) of the first - * byte in the instruction. - * @param mode - The mode (16-bit, 32-bit, 64-bit) to decode in. - * @return - Nonzero if there was an error during decode, 0 otherwise. - */ -int decodeInstruction(struct InternalInstruction* insn, +/// \brief Decode one instruction and store the decoding results in +/// a buffer provided by the consumer. +/// \param insn The buffer to store the instruction in. Allocated by the +/// consumer. +/// \param reader The byteReader_t for the bytes to be read. +/// \param readerArg An argument to pass to the reader for storing context +/// specific to the consumer. May be NULL. +/// \param logger The dlog_t to be used in printing status messages from the +/// disassembler. May be NULL. +/// \param loggerArg An argument to pass to the logger for storing context +/// specific to the logger. May be NULL. +/// \param startLoc The address (in the reader's address space) of the first +/// byte in the instruction. +/// \param mode The mode (16-bit, 32-bit, 64-bit) to decode in. +/// \return Nonzero if there was an error during decode, 0 otherwise. +int decodeInstruction(InternalInstruction *insn, byteReader_t reader, - const void* readerArg, + const void *readerArg, dlog_t logger, - void* loggerArg, - const void* miiArg, + void *loggerArg, + const void *miiArg, uint64_t startLoc, DisassemblerMode mode); -/* x86DisassemblerDebug - C-accessible function for printing a message to - * debugs() - * @param file - The name of the file printing the debug message. - * @param line - The line number that printed the debug message. - * @param s - The message to print. - */ +/// \brief Print a message to debugs() +/// \param file The name of the file printing the debug message. +/// \param line The line number that printed the debug message. +/// \param s The message to print. +void Debug(const char *file, unsigned line, const char *s); -void x86DisassemblerDebug(const char *file, - unsigned line, - const char *s); +const char *GetInstrName(unsigned Opcode, const void *mii); -const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii); - -#ifdef __cplusplus -} -#endif +} // namespace X86Disassembler +} // namespace llvm #endif diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 523ae99..f59e0b6 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -1,29 +1,27 @@ -/*===-- X86DisassemblerDecoderCommon.h - Disassembler decoder -----*- C -*-===* - * - * The LLVM Compiler Infrastructure - * - * This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===* - * - * This file is part of the X86 Disassembler. - * It contains common definitions used by both the disassembler and the table - * generator. - * Documentation for the disassembler can be found in X86Disassembler.h. - * - *===----------------------------------------------------------------------===*/ - -/* - * This header file provides those definitions that need to be shared between - * the decoder and the table generator in a C-friendly manner. - */ +//===-- X86DisassemblerDecoderCommon.h - Disassembler decoder ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the X86 Disassembler. +// It contains common definitions used by both the disassembler and the table +// generator. +// Documentation for the disassembler can be found in X86Disassembler.h. +// +//===----------------------------------------------------------------------===// #ifndef X86DISASSEMBLERDECODERCOMMON_H #define X86DISASSEMBLERDECODERCOMMON_H #include "llvm/Support/DataTypes.h" +namespace llvm { +namespace X86Disassembler { + #define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers #define CONTEXTS_SYM x86DisassemblerContexts #define ONEBYTE_SYM x86DisassemblerOneByteOpcodes @@ -44,11 +42,9 @@ #define XOP9_MAP_STR "x86DisassemblerXOP9Opcodes" #define XOPA_MAP_STR "x86DisassemblerXOPAOpcodes" -/* - * Attributes of an instruction that must be known before the opcode can be - * processed correctly. Most of these indicate the presence of particular - * prefixes, but ATTR_64BIT is simply an attribute of the decoding context. - */ +// Attributes of an instruction that must be known before the opcode can be +// processed correctly. Most of these indicate the presence of particular +// prefixes, but ATTR_64BIT is simply an attribute of the decoding context. #define ATTRIBUTE_BITS \ ENUM_ENTRY(ATTR_NONE, 0x00) \ ENUM_ENTRY(ATTR_64BIT, (0x1 << 0)) \ @@ -73,13 +69,11 @@ enum attributeBits { }; #undef ENUM_ENTRY -/* - * Combinations of the above attributes that are relevant to instruction - * decode. Although other combinations are possible, they can be reduced to - * these without affecting the ultimately decoded instruction. - */ +// Combinations of the above attributes that are relevant to instruction +// decode. Although other combinations are possible, they can be reduced to +// these without affecting the ultimately decoded instruction. -/* Class name Rank Rationale for rank assignment */ +// Class name Rank Rationale for rank assignment #define INSTRUCTION_CONTEXTS \ ENUM_ENTRY(IC, 0, "says nothing about the instruction") \ ENUM_ENTRY(IC_64BIT, 1, "says the instruction applies in " \ @@ -274,17 +268,15 @@ enum attributeBits { ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize") #define ENUM_ENTRY(n, r, d) n, -typedef enum { +enum InstructionContext { INSTRUCTION_CONTEXTS IC_max -} InstructionContext; +}; #undef ENUM_ENTRY -/* - * Opcode types, which determine which decode table to use, both in the Intel - * manual and also for the decoder. - */ -typedef enum { +// Opcode types, which determine which decode table to use, both in the Intel +// manual and also for the decoder. +enum OpcodeType { ONEBYTE = 0, TWOBYTE = 1, THREEBYTE_38 = 2, @@ -292,39 +284,33 @@ typedef enum { XOP8_MAP = 4, XOP9_MAP = 5, XOPA_MAP = 6 -} OpcodeType; - -/* - * The following structs are used for the hierarchical decode table. After - * determining the instruction's class (i.e., which IC_* constant applies to - * it), the decoder reads the opcode. Some instructions require specific - * values of the ModR/M byte, so the ModR/M byte indexes into the final table. - * - * If a ModR/M byte is not required, "required" is left unset, and the values - * for each instructionID are identical. - */ +}; +// The following structs are used for the hierarchical decode table. After +// determining the instruction's class (i.e., which IC_* constant applies to +// it), the decoder reads the opcode. Some instructions require specific +// values of the ModR/M byte, so the ModR/M byte indexes into the final table. +// +// If a ModR/M byte is not required, "required" is left unset, and the values +// for each instructionID are identical. typedef uint16_t InstrUID; -/* - * ModRMDecisionType - describes the type of ModR/M decision, allowing the - * consumer to determine the number of entries in it. - * - * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded - * instruction is the same. - * MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode - * corresponds to one instruction; otherwise, it corresponds to - * a different instruction. - * MODRM_SPLITMISC- If the ModR/M byte is between 0x00 and 0xbf, ModR/M byte - * divided by 8 is used to select instruction; otherwise, each - * value of the ModR/M byte could correspond to a different - * instruction. - * MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This - corresponds to instructions that use reg field as opcode - * MODRM_FULL - Potentially, each value of the ModR/M byte could correspond - * to a different instruction. - */ - +// ModRMDecisionType - describes the type of ModR/M decision, allowing the +// consumer to determine the number of entries in it. +// +// MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded +// instruction is the same. +// MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode +// corresponds to one instruction; otherwise, it corresponds to +// a different instruction. +// MODRM_SPLITMISC- If the ModR/M byte is between 0x00 and 0xbf, ModR/M byte +// divided by 8 is used to select instruction; otherwise, each +// value of the ModR/M byte could correspond to a different +// instruction. +// MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This +// corresponds to instructions that use reg field as opcode +// MODRM_FULL - Potentially, each value of the ModR/M byte could correspond +// to a different instruction. #define MODRMTYPES \ ENUM_ENTRY(MODRM_ONEENTRY) \ ENUM_ENTRY(MODRM_SPLITRM) \ @@ -333,47 +319,13 @@ typedef uint16_t InstrUID; ENUM_ENTRY(MODRM_FULL) #define ENUM_ENTRY(n) n, -typedef enum { +enum ModRMDecisionType { MODRMTYPES MODRM_max -} ModRMDecisionType; -#undef ENUM_ENTRY - -/* - * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which - * instruction each possible value of the ModR/M byte corresponds to. Once - * this information is known, we have narrowed down to a single instruction. - */ -struct ModRMDecision { - uint8_t modrm_type; - - /* The macro below must be defined wherever this file is included. */ - INSTRUCTION_IDS -}; - -/* - * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at - * given a particular opcode. - */ -struct OpcodeDecision { - struct ModRMDecision modRMDecisions[256]; -}; - -/* - * ContextDecision - Specifies which opcode->instruction tables to look at given - * a particular context (set of attributes). Since there are many possible - * contexts, the decoder first uses CONTEXTS_SYM to determine which context - * applies given a specific set of attributes. Hence there are only IC_max - * entries in this table, rather than 2^(ATTR_max). - */ -struct ContextDecision { - struct OpcodeDecision opcodeDecisions[IC_max]; }; +#undef ENUM_ENTRY -/* - * Physical encodings of instruction operands. - */ - +// Physical encodings of instruction operands. #define ENCODINGS \ ENUM_ENTRY(ENCODING_NONE, "") \ ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \ @@ -408,16 +360,13 @@ struct ContextDecision { ENUM_ENTRY(ENCODING_DI, "Destination index; encoded in prefixes") #define ENUM_ENTRY(n, d) n, - typedef enum { - ENCODINGS - ENCODING_max - } OperandEncoding; +enum OperandEncoding { + ENCODINGS + ENCODING_max +}; #undef ENUM_ENTRY -/* - * Semantic interpretations of instruction operands. - */ - +// Semantic interpretations of instruction operands. #define TYPES \ ENUM_ENTRY(TYPE_NONE, "") \ ENUM_ENTRY(TYPE_REL8, "1-byte immediate address") \ @@ -508,56 +457,42 @@ struct ContextDecision { ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state") #define ENUM_ENTRY(n, d) n, -typedef enum { +enum OperandType { TYPES TYPE_max -} OperandType; +}; #undef ENUM_ENTRY -/* - * OperandSpecifier - The specification for how to extract and interpret one - * operand. - */ +/// \brief The specification for how to extract and interpret one operand. struct OperandSpecifier { uint8_t encoding; uint8_t type; }; -/* - * Indicates where the opcode modifier (if any) is to be found. Extended - * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte. - */ - +// Indicates where the opcode modifier (if any) is to be found. Extended +// opcodes with AddRegFrm have the opcode modifier in the ModR/M byte. #define MODIFIER_TYPES \ ENUM_ENTRY(MODIFIER_NONE) #define ENUM_ENTRY(n) n, -typedef enum { +enum ModifierType { MODIFIER_TYPES MODIFIER_max -} ModifierType; +}; #undef ENUM_ENTRY -#define X86_MAX_OPERANDS 5 - -/* - * The specification for how to extract and interpret a full instruction and - * its operands. - */ -struct InstructionSpecifier { - /* The macro below must be defined wherever this file is included. */ - INSTRUCTION_SPECIFIER_FIELDS -}; +static const unsigned X86_MAX_OPERANDS = 5; -/* - * Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode - * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode, - * respectively. - */ -typedef enum { +/// Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode +/// are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode, +/// respectively. +enum DisassemblerMode { MODE_16BIT, MODE_32BIT, MODE_64BIT -} DisassemblerMode; +}; + +} // namespace X86Disassembler +} // namespace llvm #endif diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index eea0a76..b45b118 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "X86ATTInstPrinter.h" #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" @@ -28,6 +27,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "asm-printer" + // Include the auto-generated portion of the assembly writer. #define PRINT_ALIAS_INSTR #include "X86GenAsmWriter.inc" diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index f34e633..531183b 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -32,6 +32,8 @@ public: // Autogenerated by tblgen, returns true if we successfully printed an // alias. bool printAliasInstr(const MCInst *MI, raw_ostream &OS); + void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx, + unsigned PrintMethodIdx, raw_ostream &O); // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &OS); diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index db61fb0..baf6507 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -32,7 +32,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, const char *(*getRegName)(unsigned)) { // If this is a shuffle operation, the switch should fill in this state. SmallVector ShuffleMask; - const char *DestName = 0, *Src1Name = 0, *Src2Name = 0; + const char *DestName = nullptr, *Src1Name = nullptr, *Src2Name = nullptr; switch (MI->getOpcode()) { case X86::INSERTPSrr: @@ -492,7 +492,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, // If this was a shuffle operation, print the shuffle mask. if (!ShuffleMask.empty()) { - if (DestName == 0) DestName = Src1Name; + if (!DestName) DestName = Src1Name; OS << (DestName ? DestName : "mem") << " = "; // If the two sources are the same, canonicalize the input elements to be diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 1c95d37..1c8466b 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "X86IntelInstPrinter.h" #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" @@ -25,6 +24,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "asm-printer" + #include "X86GenAsmWriter1.inc" void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { diff --git a/lib/Target/X86/MCTargetDesc/Android.mk b/lib/Target/X86/MCTargetDesc/Android.mk index ee37c27..a3c9bc8 100644 --- a/lib/Target/X86/MCTargetDesc/Android.mk +++ b/lib/Target/X86/MCTargetDesc/Android.mk @@ -14,7 +14,8 @@ x86_mc_desc_SRC_FILES := \ X86MCCodeEmitter.cpp \ X86MachORelocationInfo.cpp \ X86MachObjectWriter.cpp \ - X86WinCOFFObjectWriter.cpp + X86WinCOFFObjectWriter.cpp \ + X86WinCOFFStreamer.cpp # For the host # ===================================================== diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt index 3f5a0e2..129c28d 100644 --- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMX86Desc X86MCCodeEmitter.cpp X86MachObjectWriter.cpp X86ELFObjectWriter.cpp + X86WinCOFFStreamer.cpp X86WinCOFFObjectWriter.cpp X86MachORelocationInfo.cpp X86ELFRelocationInfo.cpp diff --git a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt index 9e1d29c..146d111 100644 --- a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = X86Desc parent = X86 -required_libraries = MC Support X86AsmPrinter X86Info +required_libraries = MC Object Support X86AsmPrinter X86Info add_to_library_groups = X86 diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 23763f7..bf30a8e 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -37,23 +37,29 @@ MCDisableArithRelaxation("mc-x86-disable-arith-relaxation", static unsigned getFixupKindLog2Size(unsigned Kind) { switch (Kind) { - default: llvm_unreachable("invalid fixup kind!"); + default: + llvm_unreachable("invalid fixup kind!"); case FK_PCRel_1: case FK_SecRel_1: - case FK_Data_1: return 0; + case FK_Data_1: + return 0; case FK_PCRel_2: case FK_SecRel_2: - case FK_Data_2: return 1; + case FK_Data_2: + return 1; case FK_PCRel_4: case X86::reloc_riprel_4byte: case X86::reloc_riprel_4byte_movq_load: case X86::reloc_signed_4byte: case X86::reloc_global_offset_table: case FK_SecRel_4: - case FK_Data_4: return 2; + case FK_Data_4: + return 2; case FK_PCRel_8: case FK_SecRel_8: - case FK_Data_8: return 3; + case FK_Data_8: + case X86::reloc_global_offset_table8: + return 3; } } diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 38fab15..6aeb1f2 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -643,6 +643,10 @@ namespace X86II { /// counted as one operand. /// inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) { + bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; + bool HasEVEX_K = ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K); + switch (TSFlags & X86II::FormMask) { default: llvm_unreachable("Unknown FormMask value in getMemoryOperandNo!"); case X86II::Pseudo: @@ -660,9 +664,6 @@ namespace X86II { case X86II::MRMDestMem: return 0; case X86II::MRMSrcMem: { - bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; - bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; - bool HasEVEX_K = ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K); unsigned FirstMemOp = 1; if (HasVEX_4V) ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV). @@ -690,6 +691,8 @@ namespace X86II { unsigned FirstMemOp = 0; if (HasVEX_4V) ++FirstMemOp;// Skip the register dest (which is encoded in VEX_VVVV). + if (HasEVEX_K) + ++FirstMemOp;// Skip the mask register return FirstMemOp; } case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2: diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp index c44d88d..3fdec87 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp @@ -43,7 +43,7 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, bool IsPCRel) const { // determine the type of the relocation - MCSymbolRefExpr::VariantKind Modifier = Fixup.getAccessVariant(); + MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); unsigned Type; if (getEMachine() == ELF::EM_X86_64) { if (IsPCRel) { @@ -98,6 +98,12 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, } else { switch ((unsigned)Fixup.getKind()) { default: llvm_unreachable("invalid fixup kind!"); + case X86::reloc_global_offset_table8: + Type = ELF::R_X86_64_GOTPC64; + break; + case X86::reloc_global_offset_table: + Type = ELF::R_X86_64_GOTPC32; + break; case FK_Data_8: switch (Modifier) { default: diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp index 4fa519c..b679316 100644 --- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp @@ -39,7 +39,7 @@ public: if (Sym->isVariable() == false) Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx)); - const MCExpr *Expr = 0; + const MCExpr *Expr = nullptr; // If hasAddend is true, then we need to add Addend (r_addend) to Expr. bool hasAddend = false; diff --git a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h index f2e34cb..09396b7 100644 --- a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h +++ b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h @@ -23,6 +23,7 @@ enum Fixups { reloc_global_offset_table, // 32-bit, relative to the start // of the instruction. Used only // for _GLOBAL_OFFSET_TABLE_. + reloc_global_offset_table8, // 64-bit variant. // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 6561804..39480ea 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -51,7 +51,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { TextAlignFillValue = 0x90; if (!is64Bit) - Data64bitsDirective = 0; // we can't emit a 64-bit unit + Data64bitsDirective = nullptr; // we can't emit a 64-bit unit // Use ## as a comment string so that .s files generated by llvm can go // through the GCC preprocessor without causing an error. This is needed @@ -115,7 +115,7 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { // into two .words. if ((T.getOS() == Triple::OpenBSD || T.getOS() == Triple::Bitrig) && T.getArch() == Triple::x86) - Data64bitsDirective = 0; + Data64bitsDirective = nullptr; // Always enable the integrated assembler by default. // Clang also enabled it when the OS is Solaris but that is redundant here. @@ -157,8 +157,10 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { void X86MCAsmInfoGNUCOFF::anchor() { } X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { - if (Triple.getArch() == Triple::x86_64) + if (Triple.getArch() == Triple::x86_64) { PrivateGlobalPrefix = ".L"; + PointerSize = 8; + } AssemblerDialect = AsmWriterFlavor; diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index e6fb037..2152b21 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mccodeemitter" #include "MCTargetDesc/X86MCTargetDesc.h" #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86FixupKinds.h" @@ -27,6 +26,8 @@ using namespace llvm; +#define DEBUG_TYPE "mccodeemitter" + namespace { class X86MCCodeEmitter : public MCCodeEmitter { X86MCCodeEmitter(const X86MCCodeEmitter &) LLVM_DELETED_FUNCTION; @@ -285,7 +286,7 @@ enum GlobalOffsetTableExprKind { }; static GlobalOffsetTableExprKind StartsWithGlobalOffsetTable(const MCExpr *Expr) { - const MCExpr *RHS = 0; + const MCExpr *RHS = nullptr; if (Expr->getKind() == MCExpr::Binary) { const MCBinaryExpr *BE = static_cast(Expr); Expr = BE->getLHS(); @@ -316,7 +317,7 @@ void X86MCCodeEmitter:: EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size, MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl &Fixups, int ImmOffset) const { - const MCExpr *Expr = NULL; + const MCExpr *Expr = nullptr; if (DispOp.isImm()) { // If this is a simple integer displacement that doesn't require a // relocation, emit it now. @@ -339,7 +340,13 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size, if (Kind != GOT_None) { assert(ImmOffset == 0); - FixupKind = MCFixupKind(X86::reloc_global_offset_table); + if (Size == 8) { + FixupKind = MCFixupKind(X86::reloc_global_offset_table8); + } else { + assert(Size == 4); + FixupKind = MCFixupKind(X86::reloc_global_offset_table); + } + if (Kind == GOT_Normal) ImmOffset = CurByte; } else if (Expr->getKind() == MCExpr::SymbolRef) { @@ -1421,6 +1428,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRM6r: case X86II::MRM7r: { if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). ++CurOp; + if (HasEVEX_K) // Skip writemask + ++CurOp; EmitByte(BaseOpcode, CurByte, OS); uint64_t Form = TSFlags & X86II::FormMask; EmitRegModRMByte(MI.getOperand(CurOp++), @@ -1436,6 +1445,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRM6m: case X86II::MRM7m: { if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). ++CurOp; + if (HasEVEX_K) // Skip writemask + ++CurOp; EmitByte(BaseOpcode, CurByte, OS); uint64_t Form = TSFlags & X86II::FormMask; EmitMemModRMByte(MI, CurOp, (Form == X86II::MRMXm) ? 0 : Form-X86II::MRM0m, diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index 09fdb9c..e63036c 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -27,6 +27,12 @@ #include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" +#if _MSC_VER +#include +#endif + +using namespace llvm; + #define GET_REGINFO_MC_DESC #include "X86GenRegisterInfo.inc" @@ -36,13 +42,6 @@ #define GET_SUBTARGETINFO_MC_DESC #include "X86GenSubtargetInfo.inc" -#if _MSC_VER -#include -#endif - -using namespace llvm; - - std::string X86_MC::ParseX86Triple(StringRef TT) { Triple TheTriple(TT); std::string FS; @@ -230,14 +229,8 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU, } std::string CPUName = CPU; - if (CPUName.empty()) { -#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\ - || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) - CPUName = sys::getHostCPUName(); -#else + if (CPUName.empty()) CPUName = "generic"; -#endif - } MCSubtargetInfo *X = new MCSubtargetInfo(); InitX86MCSubtargetInfo(X, TT, CPUName, ArchFS); @@ -294,13 +287,13 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { // Initial state of the frame pointer is esp+stackGrowth. unsigned StackPtr = is64Bit ? X86::RSP : X86::ESP; MCCFIInstruction Inst = MCCFIInstruction::createDefCfa( - 0, MRI.getDwarfRegNum(StackPtr, true), -stackGrowth); + nullptr, MRI.getDwarfRegNum(StackPtr, true), -stackGrowth); MAI->addInitialFrameState(Inst); // Add return address to move list unsigned InstPtr = is64Bit ? X86::RIP : X86::EIP; MCCFIInstruction Inst2 = MCCFIInstruction::createOffset( - 0, MRI.getDwarfRegNum(InstPtr, true), stackGrowth); + nullptr, MRI.getDwarfRegNum(InstPtr, true), stackGrowth); MAI->addInitialFrameState(Inst2); return MAI; @@ -365,13 +358,16 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, bool NoExecStack) { Triple TheTriple(TT); - if (TheTriple.isOSBinFormatMachO()) + switch (TheTriple.getObjectFormat()) { + default: llvm_unreachable("unsupported object format"); + case Triple::MachO: return createMachOStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll); - - if (TheTriple.isOSWindows() && !TheTriple.isOSBinFormatELF()) - return createWinCOFFStreamer(Ctx, MAB, *_Emitter, _OS, RelaxAll); - - return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack); + case Triple::COFF: + assert(TheTriple.isOSWindows() && "only Windows COFF is supported"); + return createX86WinCOFFStreamer(Ctx, MAB, _Emitter, _OS, RelaxAll); + case Triple::ELF: + return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack); + } } static MCInstPrinter *createX86MCInstPrinter(const Target &T, @@ -384,7 +380,7 @@ static MCInstPrinter *createX86MCInstPrinter(const Target &T, return new X86ATTInstPrinter(MAI, MII, MRI); if (SyntaxVariant == 1) return new X86IntelInstPrinter(MAI, MII, MRI); - return 0; + return nullptr; } static MCRelocationInfo *createX86MCRelocationInfo(StringRef TT, diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 41ae435..8fe40fd 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -26,6 +26,7 @@ class MCObjectWriter; class MCRegisterInfo; class MCSubtargetInfo; class MCRelocationInfo; +class MCStreamer; class Target; class StringRef; class raw_ostream; @@ -84,6 +85,14 @@ MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI, MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, StringRef TT, StringRef CPU); +/// createX86WinCOFFStreamer - Construct an X86 Windows COFF machine code +/// streamer which will generate PE/COFF format object files. +/// +/// Takes ownership of \p AB and \p CE. +MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, + MCCodeEmitter *CE, raw_ostream &OS, + bool RelaxAll); + /// createX86MachObjectWriter - Construct an X86 Mach-O object writer. MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS, bool Is64Bit, diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp index f2023e3..3b81d53 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp @@ -40,7 +40,7 @@ public: // FIXME: check that the value is actually the same. if (Sym->isVariable() == false) Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx)); - const MCExpr *Expr = 0; + const MCExpr *Expr = nullptr; switch(RelType) { case X86_64_RELOC_TLV: diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 1a35ced..ead3338 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -146,13 +146,13 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, const MCSymbol *A = &Target.getSymA()->getSymbol(); if (A->isTemporary()) A = &A->AliasedSymbol(); - MCSymbolData &A_SD = Asm.getSymbolData(*A); + const MCSymbolData &A_SD = Asm.getSymbolData(*A); const MCSymbolData *A_Base = Asm.getAtom(&A_SD); const MCSymbol *B = &Target.getSymB()->getSymbol(); if (B->isTemporary()) B = &B->AliasedSymbol(); - MCSymbolData &B_SD = Asm.getSymbolData(*B); + const MCSymbolData &B_SD = Asm.getSymbolData(*B); const MCSymbolData *B_Base = Asm.getAtom(&B_SD); // Neither symbol can be modified. @@ -186,9 +186,9 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, false); Value += Writer->getSymbolAddress(&A_SD, Layout) - - (A_Base == NULL ? 0 : Writer->getSymbolAddress(A_Base, Layout)); + (!A_Base ? 0 : Writer->getSymbolAddress(A_Base, Layout)); Value -= Writer->getSymbolAddress(&B_SD, Layout) - - (B_Base == NULL ? 0 : Writer->getSymbolAddress(B_Base, Layout)); + (!B_Base ? 0 : Writer->getSymbolAddress(B_Base, Layout)); if (A_Base) { Index = A_Base->getIndex(); @@ -220,7 +220,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, Type = MachO::X86_64_RELOC_SUBTRACTOR; } else { const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); - MCSymbolData &SD = Asm.getSymbolData(*Symbol); + const MCSymbolData &SD = Asm.getSymbolData(*Symbol); const MCSymbolData *Base = Asm.getAtom(&SD); // Relocations inside debug sections always use local relocations when @@ -231,7 +231,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer, const MCSectionMachO &Section = static_cast( Fragment->getParent()->getSection()); if (Section.hasAttribute(MachO::S_ATTR_DEBUG)) - Base = 0; + Base = nullptr; } // x86_64 almost always uses external relocations, except when there is no @@ -369,7 +369,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); + const MCSymbolData *A_SD = &Asm.getSymbolData(*A); if (!A_SD->getFragment()) report_fatal_error("symbol '" + A->getName() + @@ -382,7 +382,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer, uint32_t Value2 = 0; if (const MCSymbolRefExpr *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); if (!B_SD->getFragment()) report_fatal_error("symbol '" + B->getSymbol().getName() + @@ -465,7 +465,7 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer, unsigned IsPCRel = 0; // Get the symbol data. - MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + const MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol()); unsigned Index = SD_A->getIndex(); // We're only going to have a second symbol in pic mode and it'll be a @@ -476,7 +476,8 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer, // If this is a subtraction then we're pcrel. uint32_t FixupAddress = Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset(); - MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol()); + const MCSymbolData *SD_B = + &Asm.getSymbolData(Target.getSymB()->getSymbol()); IsPCRel = 1; FixedValue = (FixupAddress - Writer->getSymbolAddress(SD_B, Layout) + Target.getConstant()); @@ -524,7 +525,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer, } // Get the symbol data, if any. - MCSymbolData *SD = 0; + const MCSymbolData *SD = nullptr; if (Target.getSymA()) SD = &Asm.getSymbolData(Target.getSymA()->getSymbol()); diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp index ffc9e8d..40af822 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp @@ -23,10 +23,8 @@ namespace llvm { namespace { class X86WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter { - const bool Is64Bit; - public: - X86WinCOFFObjectWriter(bool Is64Bit_); + X86WinCOFFObjectWriter(bool Is64Bit); virtual ~X86WinCOFFObjectWriter(); unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, @@ -34,10 +32,9 @@ namespace { }; } -X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit_) - : MCWinCOFFObjectTargetWriter(Is64Bit_ ? COFF::IMAGE_FILE_MACHINE_AMD64 : - COFF::IMAGE_FILE_MACHINE_I386), - Is64Bit(Is64Bit_) {} +X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit) + : MCWinCOFFObjectTargetWriter(Is64Bit ? COFF::IMAGE_FILE_MACHINE_AMD64 + : COFF::IMAGE_FILE_MACHINE_I386) {} X86WinCOFFObjectWriter::~X86WinCOFFObjectWriter() {} @@ -49,29 +46,46 @@ unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target, MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind(); - switch (FixupKind) { - case FK_PCRel_4: - case X86::reloc_riprel_4byte: - case X86::reloc_riprel_4byte_movq_load: - return Is64Bit ? COFF::IMAGE_REL_AMD64_REL32 : COFF::IMAGE_REL_I386_REL32; - case FK_Data_4: - case X86::reloc_signed_4byte: - if (Modifier == MCSymbolRefExpr::VK_COFF_IMGREL32) - return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32NB : - COFF::IMAGE_REL_I386_DIR32NB; - return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32 : COFF::IMAGE_REL_I386_DIR32; - case FK_Data_8: - if (Is64Bit) + if (getMachine() == COFF::IMAGE_FILE_MACHINE_AMD64) { + switch (FixupKind) { + case FK_PCRel_4: + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + return COFF::IMAGE_REL_AMD64_REL32; + case FK_Data_4: + case X86::reloc_signed_4byte: + if (Modifier == MCSymbolRefExpr::VK_COFF_IMGREL32) + return COFF::IMAGE_REL_AMD64_ADDR32NB; + return COFF::IMAGE_REL_AMD64_ADDR32; + case FK_Data_8: return COFF::IMAGE_REL_AMD64_ADDR64; - llvm_unreachable("unsupported relocation type"); - case FK_SecRel_2: - return Is64Bit ? COFF::IMAGE_REL_AMD64_SECTION - : COFF::IMAGE_REL_I386_SECTION; - case FK_SecRel_4: - return Is64Bit ? COFF::IMAGE_REL_AMD64_SECREL : COFF::IMAGE_REL_I386_SECREL; - default: - llvm_unreachable("unsupported relocation type"); - } + case FK_SecRel_2: + return COFF::IMAGE_REL_AMD64_SECTION; + case FK_SecRel_4: + return COFF::IMAGE_REL_AMD64_SECREL; + default: + llvm_unreachable("unsupported relocation type"); + } + } else if (getMachine() == COFF::IMAGE_FILE_MACHINE_I386) { + switch (FixupKind) { + case FK_PCRel_4: + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + return COFF::IMAGE_REL_I386_REL32; + case FK_Data_4: + case X86::reloc_signed_4byte: + if (Modifier == MCSymbolRefExpr::VK_COFF_IMGREL32) + return COFF::IMAGE_REL_I386_DIR32NB; + return COFF::IMAGE_REL_I386_DIR32; + case FK_SecRel_2: + return COFF::IMAGE_REL_I386_SECTION; + case FK_SecRel_4: + return COFF::IMAGE_REL_I386_SECREL; + default: + llvm_unreachable("unsupported relocation type"); + } + } else + llvm_unreachable("Unsupported COFF machine type."); } MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_ostream &OS, diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp new file mode 100644 index 0000000..c62fd0a --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp @@ -0,0 +1,51 @@ +//===-- X86WinCOFFStreamer.cpp - X86 Target WinCOFF Streamer ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86MCTargetDesc.h" +#include "llvm/MC/MCWinCOFFStreamer.h" + +using namespace llvm; + +namespace { +class X86WinCOFFStreamer : public MCWinCOFFStreamer { +public: + X86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter *CE, + raw_ostream &OS) + : MCWinCOFFStreamer(C, AB, *CE, OS) { } + + void EmitWin64EHHandlerData() override; + void FinishImpl() override; +}; + +void X86WinCOFFStreamer::EmitWin64EHHandlerData() { + MCStreamer::EmitWin64EHHandlerData(); + + // We have to emit the unwind info now, because this directive + // actually switches to the .xdata section! + MCWin64EHUnwindEmitter::EmitUnwindInfo(*this, getCurrentW64UnwindInfo()); +} + +void X86WinCOFFStreamer::FinishImpl() { + EmitFrames(nullptr); + EmitW64Tables(); + + MCWinCOFFStreamer::FinishImpl(); +} +} + +namespace llvm { +MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, + MCCodeEmitter *CE, raw_ostream &OS, + bool RelaxAll) { + X86WinCOFFStreamer *S = new X86WinCOFFStreamer(C, AB, CE, OS); + S->getAssembler().setRelaxAll(RelaxAll); + return S; +} +} + diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 18e6845..64e8ea8 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -30,9 +30,9 @@ class X86TargetMachine; FunctionPass *createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel); -/// createGlobalBaseRegPass - This pass initializes a global base +/// createX86GlobalBaseRegPass - This pass initializes a global base /// register for PIC on x86-32. -FunctionPass* createGlobalBaseRegPass(); +FunctionPass* createX86GlobalBaseRegPass(); /// createCleanupLocalDynamicTLSPass() - This pass combines multiple accesses /// to local-dynamic TLS variables so that the TLS base address for the module diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 78edcf0..6912b57 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -166,6 +166,8 @@ def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect", "Call register indirect">; def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", "LEA instruction needs inputs at AG stage">; +def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", + "LEA instruction with certain arguments is slow">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -195,8 +197,7 @@ def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>; def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>; def : Proc<"pentium4", [FeatureSSE2]>; def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>; -def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem, - FeatureFastUAMem]>; + // Intel Core Duo. def : ProcessorModel<"yonah", SandyBridgeModel, [FeatureSSE3, FeatureSlowBTMem]>; @@ -227,6 +228,7 @@ def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM, FeaturePCLMUL, FeatureAES, FeatureCallRegIndirect, FeaturePRFCHW, + FeatureSlowLEA, FeatureSlowBTMem, FeatureFastUAMem]>; // "Arrandale" along with corei3 and corei5 def : ProcessorModel<"corei7", SandyBridgeModel, @@ -329,6 +331,13 @@ def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B, FeaturePOPCNT, FeatureBMI, FeatureTBM, FeatureFMA, FeatureFSGSBase]>; +// Excavator +def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4, + FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW, + FeaturePCLMUL, FeatureF16C, FeatureLZCNT, + FeaturePOPCNT, FeatureBMI, FeatureBMI2, + FeatureTBM, FeatureFMA, FeatureFSGSBase]>; + def : Proc<"geode", [Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureMMX]>; @@ -336,6 +345,20 @@ def : Proc<"winchip2", [Feature3DNow]>; def : Proc<"c3", [Feature3DNow]>; def : Proc<"c3-2", [FeatureSSE1]>; +// We also provide a generic 64-bit specific x86 processor model which tries to +// be good for modern chips without enabling instruction set encodings past the +// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and +// modern 64-bit x86 chip, and enables features that are generally beneficial. +// +// We currently use the Sandy Bridge model as the default scheduling model as +// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which +// covers a huge swath of x86 processors. If there are specific scheduling +// knobs which need to be tuned differently for AMD chips, we might consider +// forming a common base for them. +def : ProcessorModel<"x86-64", SandyBridgeModel, + [FeatureSSE2, Feature64Bit, FeatureSlowBTMem, + FeatureFastUAMem]>; + //===----------------------------------------------------------------------===// // Register File Description //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index fb66acc..1dca568 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -15,7 +15,6 @@ #include "X86AsmPrinter.h" #include "InstPrinter/X86ATTInstPrinter.h" #include "MCTargetDesc/X86BaseInfo.h" -#include "X86COFFMachineModuleInfo.h" #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" #include "llvm/ADT/SmallString.h" @@ -102,7 +101,7 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO, MCSymbol *Sym = P.getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); MachineModuleInfoImpl::StubValueTy &StubSym = P.MMI->getObjFileInfo().getGVStubEntry(Sym); - if (StubSym.getPointer() == 0) + if (!StubSym.getPointer()) StubSym = MachineModuleInfoImpl:: StubValueTy(P.getSymbol(GV), !GV->hasInternalLinkage()); } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){ @@ -110,14 +109,14 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO, MachineModuleInfoImpl::StubValueTy &StubSym = P.MMI->getObjFileInfo().getHiddenGVStubEntry( Sym); - if (StubSym.getPointer() == 0) + if (!StubSym.getPointer()) StubSym = MachineModuleInfoImpl:: StubValueTy(P.getSymbol(GV), !GV->hasInternalLinkage()); } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) { MCSymbol *Sym = P.getSymbolWithGlobalValueBase(GV, "$stub"); MachineModuleInfoImpl::StubValueTy &StubSym = P.MMI->getObjFileInfo().getFnStubEntry(Sym); - if (StubSym.getPointer() == 0) + if (!StubSym.getPointer()) StubSym = MachineModuleInfoImpl:: StubValueTy(P.getSymbol(GV), !GV->hasInternalLinkage()); } @@ -174,7 +173,7 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO, static void printOperand(X86AsmPrinter &P, const MachineInstr *MI, unsigned OpNo, raw_ostream &O, - const char *Modifier = 0, unsigned AsmVariant = 0); + const char *Modifier = nullptr, unsigned AsmVariant = 0); /// printPCRelImm - This is used to print an immediate value that ends up /// being encoded as a pc-relative value. These print slightly differently, for @@ -232,7 +231,7 @@ static void printOperand(X86AsmPrinter &P, const MachineInstr *MI, static void printLeaMemReference(X86AsmPrinter &P, const MachineInstr *MI, unsigned Op, raw_ostream &O, - const char *Modifier = NULL) { + const char *Modifier = nullptr) { const MachineOperand &BaseReg = MI->getOperand(Op+X86::AddrBaseReg); const MachineOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg); const MachineOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp); @@ -284,7 +283,7 @@ static void printLeaMemReference(X86AsmPrinter &P, const MachineInstr *MI, static void printMemReference(X86AsmPrinter &P, const MachineInstr *MI, unsigned Op, raw_ostream &O, - const char *Modifier = NULL) { + const char *Modifier = nullptr) { assert(isMem(MI, Op) && "Invalid memory reference!"); const MachineOperand &Segment = MI->getOperand(Op+X86::AddrSegmentReg); if (Segment.getReg()) { @@ -296,7 +295,7 @@ static void printMemReference(X86AsmPrinter &P, const MachineInstr *MI, static void printIntelMemReference(X86AsmPrinter &P, const MachineInstr *MI, unsigned Op, raw_ostream &O, - const char *Modifier = NULL, + const char *Modifier = nullptr, unsigned AsmVariant = 1) { const MachineOperand &BaseReg = MI->getOperand(Op+X86::AddrBaseReg); unsigned ScaleVal = MI->getOperand(Op+X86::AddrScaleAmt).getImm(); @@ -464,7 +463,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, } } - printOperand(*this, MI, OpNo, O, /*Modifier*/ 0, AsmVariant); + printOperand(*this, MI, OpNo, O, /*Modifier*/ nullptr, AsmVariant); return false; } @@ -527,6 +526,55 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) { } } +static void +emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel, + MachineModuleInfoImpl::StubValueTy &MCSym) { + // L_foo$stub: + OutStreamer.EmitLabel(StubLabel); + // .indirect_symbol _foo + OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol); + + if (MCSym.getInt()) + // External to current translation unit. + OutStreamer.EmitIntValue(0, 4/*size*/); + else + // Internal to current translation unit. + // + // When we place the LSDA into the TEXT section, the type info + // pointers need to be indirect and pc-rel. We accomplish this by + // using NLPs; however, sometimes the types are local to the file. + // We need to fill in the value for the NLP in those cases. + OutStreamer.EmitValue( + MCSymbolRefExpr::Create(MCSym.getPointer(), OutStreamer.getContext()), + 4 /*size*/); +} + +void X86AsmPrinter::GenerateExportDirective(const MCSymbol *Sym, bool IsData) { + SmallString<128> Directive; + raw_svector_ostream OS(Directive); + StringRef Name = Sym->getName(); + + if (Subtarget->isTargetKnownWindowsMSVC()) + OS << " /EXPORT:"; + else + OS << " -export:"; + + if ((Subtarget->isTargetWindowsGNU() || Subtarget->isTargetWindowsCygwin()) && + (Name[0] == getDataLayout().getGlobalPrefix())) + Name = Name.drop_front(); + + OS << Name; + + if (IsData) { + if (Subtarget->isTargetKnownWindowsMSVC()) + OS << ",DATA"; + else + OS << ",data"; + } + + OS.flush(); + OutStreamer.EmitBytes(Directive); +} void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { if (Subtarget->isTargetMacho()) { @@ -547,11 +595,11 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { 5, SectionKind::getMetadata()); OutStreamer.SwitchSection(TheSection); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + for (const auto &Stub : Stubs) { // L_foo$stub: - OutStreamer.EmitLabel(Stubs[i].first); + OutStreamer.EmitLabel(Stub.first); // .indirect_symbol _foo - OutStreamer.EmitSymbolAttribute(Stubs[i].second.getPointer(), + OutStreamer.EmitSymbolAttribute(Stub.second.getPointer(), MCSA_IndirectSymbol); // hlt; hlt; hlt; hlt; hlt hlt = 0xf4. const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4"; @@ -571,44 +619,24 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { SectionKind::getMetadata()); OutStreamer.SwitchSection(TheSection); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - // L_foo$non_lazy_ptr: - OutStreamer.EmitLabel(Stubs[i].first); - // .indirect_symbol _foo - MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second; - OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), - MCSA_IndirectSymbol); - // .long 0 - if (MCSym.getInt()) - // External to current translation unit. - OutStreamer.EmitIntValue(0, 4/*size*/); - else - // Internal to current translation unit. - // - // When we place the LSDA into the TEXT section, the type info - // pointers need to be indirect and pc-rel. We accomplish this by - // using NLPs. However, sometimes the types are local to the file. So - // we need to fill in the value for the NLP in those cases. - OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(), - OutContext), 4/*size*/); - } + for (auto &Stub : Stubs) + emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second); + Stubs.clear(); OutStreamer.AddBlankLine(); } Stubs = MMIMacho.GetHiddenGVStubList(); if (!Stubs.empty()) { - OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); - EmitAlignment(2); - - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - // L_foo$non_lazy_ptr: - OutStreamer.EmitLabel(Stubs[i].first); - // .long _foo - OutStreamer.EmitValue(MCSymbolRefExpr:: - Create(Stubs[i].second.getPointer(), - OutContext), 4/*size*/); - } + const MCSection *TheSection = + OutContext.getMachOSection("__IMPORT", "__pointers", + MachO::S_NON_LAZY_SYMBOL_POINTERS, + SectionKind::getMetadata()); + OutStreamer.SwitchSection(TheSection); + + for (auto &Stub : Stubs) + emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second); + Stubs.clear(); OutStreamer.AddBlankLine(); } @@ -630,46 +658,25 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { } if (Subtarget->isTargetCOFF()) { - X86COFFMachineModuleInfo &COFFMMI = - MMI->getObjFileInfo(); - - // Emit type information for external functions - typedef X86COFFMachineModuleInfo::externals_iterator externals_iterator; - for (externals_iterator I = COFFMMI.externals_begin(), - E = COFFMMI.externals_end(); - I != E; ++I) { - OutStreamer.BeginCOFFSymbolDef(CurrentFnSym); - OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL); - OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION - << COFF::SCT_COMPLEX_TYPE_SHIFT); - OutStreamer.EndCOFFSymbolDef(); - } - // Necessary for dllexport support std::vector DLLExportedFns, DLLExportedGlobals; - for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) - if (I->hasDLLExportStorageClass()) - DLLExportedFns.push_back(getSymbol(I)); + for (const auto &Function : M) + if (Function.hasDLLExportStorageClass()) + DLLExportedFns.push_back(getSymbol(&Function)); - for (Module::const_global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) - if (I->hasDLLExportStorageClass()) - DLLExportedGlobals.push_back(getSymbol(I)); + for (const auto &Global : M.globals()) + if (Global.hasDLLExportStorageClass()) + DLLExportedGlobals.push_back(getSymbol(&Global)); - for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); - I != E; ++I) { - const GlobalValue *GV = I; - if (!GV->hasDLLExportStorageClass()) + for (const auto &Alias : M.aliases()) { + if (!Alias.hasDLLExportStorageClass()) continue; - while (const GlobalAlias *A = dyn_cast(GV)) - GV = A->getAliasedGlobal(); - - if (isa(GV)) - DLLExportedFns.push_back(getSymbol(I)); - else if (isa(GV)) - DLLExportedGlobals.push_back(getSymbol(I)); + if (Alias.getType()->getElementType()->isFunctionTy()) + DLLExportedFns.push_back(getSymbol(&Alias)); + else + DLLExportedGlobals.push_back(getSymbol(&Alias)); } // Output linker support code for dllexported globals on windows. @@ -678,28 +685,11 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { static_cast(getObjFileLowering()); OutStreamer.SwitchSection(TLOFCOFF.getDrectveSection()); - SmallString<128> name; - for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) { - if (Subtarget->isTargetKnownWindowsMSVC()) - name = " /EXPORT:"; - else - name = " -export:"; - name += DLLExportedGlobals[i]->getName(); - if (Subtarget->isTargetKnownWindowsMSVC()) - name += ",DATA"; - else - name += ",data"; - OutStreamer.EmitBytes(name); - } - for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) { - if (Subtarget->isTargetKnownWindowsMSVC()) - name = " /EXPORT:"; - else - name = " -export:"; - name += DLLExportedFns[i]->getName(); - OutStreamer.EmitBytes(name); - } + for (auto & Symbol : DLLExportedGlobals) + GenerateExportDirective(Symbol, /*IsData=*/true); + for (auto & Symbol : DLLExportedFns) + GenerateExportDirective(Symbol, /*IsData=*/false); } } @@ -715,9 +705,9 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) { OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); const DataLayout *TD = TM.getDataLayout(); - for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { - OutStreamer.EmitLabel(Stubs[i].first); - OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), + for (const auto &Stub : Stubs) { + OutStreamer.EmitLabel(Stub.first); + OutStreamer.EmitSymbolValue(Stub.second.getPointer(), TD->getPointerSize()); } Stubs.clear(); diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index 3308cc2..e4eef5d 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -16,13 +16,15 @@ #include "llvm/Target/TargetMachine.h" namespace llvm { - class MCStreamer; +class MCSymbol; class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { const X86Subtarget *Subtarget; StackMaps SM; + void GenerateExportDirective(const MCSymbol *Sym, bool IsData); + public: explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : AsmPrinter(TM, Streamer), SM(*this) { diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp deleted file mode 100644 index 6a6125b..0000000 --- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp +++ /dev/null @@ -1,19 +0,0 @@ -//===-- X86COFFMachineModuleInfo.cpp - X86 COFF MMI Impl ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is an MMI implementation for X86 COFF (windows) targets. -// -//===----------------------------------------------------------------------===// - -#include "X86COFFMachineModuleInfo.h" -using namespace llvm; - - -X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() { -} diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h deleted file mode 100644 index 0dfeb42..0000000 --- a/lib/Target/X86/X86COFFMachineModuleInfo.h +++ /dev/null @@ -1,46 +0,0 @@ -//===-- X86coffmachinemoduleinfo.h - X86 COFF MMI Impl ----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is an MMI implementation for X86 COFF (windows) targets. -// -//===----------------------------------------------------------------------===// - -#ifndef X86COFF_MACHINEMODULEINFO_H -#define X86COFF_MACHINEMODULEINFO_H - -#include "X86MachineFunctionInfo.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/CodeGen/MachineModuleInfo.h" - -namespace llvm { - class X86MachineFunctionInfo; - class DataLayout; - -/// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation -/// for X86 COFF targets. -class X86COFFMachineModuleInfo : public MachineModuleInfoImpl { - DenseSet Externals; -public: - X86COFFMachineModuleInfo(const MachineModuleInfo &) {} - virtual ~X86COFFMachineModuleInfo(); - - void addExternalFunction(MCSymbol* Symbol) { - Externals.insert(Symbol); - } - - typedef DenseSet::const_iterator externals_iterator; - externals_iterator externals_begin() const { return Externals.begin(); } - externals_iterator externals_end() const { return Externals.end(); } -}; - - - -} // end namespace llvm - -#endif diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h index 040da35..e76f9fd 100644 --- a/lib/Target/X86/X86CallingConv.h +++ b/lib/Target/X86/X86CallingConv.h @@ -29,33 +29,6 @@ inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &, return false; } -inline bool CC_X86_CDeclMethod_SRet(unsigned &ValNo, MVT &ValVT, MVT &LocVT, - CCValAssign::LocInfo &LocInfo, - ISD::ArgFlagsTy &ArgFlags, CCState &State) { - // Swap the order of the first two parameters if the first parameter is sret. - if (ArgFlags.isSRet()) { - assert(ValNo == 0); - assert(ValVT == MVT::i32); - State.AllocateStack(8, 4); - State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 4, LocVT, LocInfo)); - - // Indicate that we need to swap the order of the first and second - // parameters by "allocating" register zero. There are no register - // parameters with cdecl methods, so we can use this to communicate to the - // next call. - State.AllocateReg(1); - return true; - } else if (ValNo == 1 && State.isAllocated(1)) { - assert(ValVT == MVT::i32 && "non-i32-sized this param unsupported"); - // Stack was already allocated while processing sret. - State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 0, LocVT, LocInfo)); - return true; - } - - // All other args use the C calling convention. - return false; -} - } // End llvm namespace #endif diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 1cfd827..0824d4e 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -485,15 +485,6 @@ def CC_X86_32_ThisCall_Win : CallingConv<[ CCDelegateTo ]>; -def CC_X86_CDeclMethod : CallingConv<[ - // Promote i8/i16 arguments to i32. - CCIfType<[i8, i16], CCPromoteToType>, - - CCCustom<"CC_X86_CDeclMethod_SRet">, - - CCDelegateTo -]>; - def CC_X86_32_ThisCall : CallingConv<[ CCIfSubtarget<"isTargetCygMing()", CCDelegateTo>, CCDelegateTo @@ -583,7 +574,6 @@ def CC_Intel_OCL_BI : CallingConv<[ def CC_X86_32 : CallingConv<[ CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo>, CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo>, - CCIfCC<"CallingConv::X86_CDeclMethod", CCDelegateTo>, CCIfCC<"CallingConv::Fast", CCDelegateTo>, CCIfCC<"CallingConv::GHC", CCDelegateTo>, CCIfCC<"CallingConv::HiPE", CCDelegateTo>, diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index f6c4c2e..76718d0 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "x86-emitter" #include "X86.h" #include "X86InstrInfo.h" #include "X86JITInfo.h" @@ -36,6 +35,8 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; +#define DEBUG_TYPE "x86-emitter" + STATISTIC(NumEmitted, "Number of machine instructions emitted"); namespace { @@ -52,7 +53,7 @@ namespace { public: static char ID; explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce) - : MachineFunctionPass(ID), II(0), TD(0), TM(tm), + : MachineFunctionPass(ID), II(nullptr), TD(nullptr), TM(tm), MCE(mce), PICBaseOffset(0), Is64BitMode(false), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} @@ -450,7 +451,7 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, intptr_t PCAdj) { const MachineOperand &Op3 = MI.getOperand(Op+3); int DispVal = 0; - const MachineOperand *DispForReloc = 0; + const MachineOperand *DispForReloc = nullptr; // Figure out what sort of displacement we have to handle here. if (Op3.isGlobal()) { @@ -1475,7 +1476,7 @@ void Emitter::emitInstruction(MachineInstr &MI, #ifndef NDEBUG dbgs() << "Cannot encode all operands of: " << MI << "\n"; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } MCE.processDebugLoc(MI.getDebugLoc(), false); diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 1aab1ea..56bcfa3 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -183,7 +183,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &ResultReg) { // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; - const TargetRegisterClass *RC = NULL; + const TargetRegisterClass *RC = nullptr; switch (VT.getSimpleVT().SimpleTy) { default: return false; case MVT::i1: @@ -363,7 +363,7 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { // it works...). if (const GlobalAlias *GA = dyn_cast(GV)) if (const GlobalVariable *GVar = - dyn_cast_or_null(GA->getAliasedGlobal())) + dyn_cast_or_null(GA->getAliasee())) if (GVar->isThreadLocal()) return false; @@ -406,7 +406,7 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { } else { // Issue load from stub. unsigned Opc = 0; - const TargetRegisterClass *RC = NULL; + const TargetRegisterClass *RC = nullptr; X86AddressMode StubAM; StubAM.Base.Reg = AM.Base.Reg; StubAM.GV = GV; @@ -441,7 +441,7 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { // Now construct the final address. Note that the Disp, Scale, // and Index values may already be set here. AM.Base.Reg = LoadReg; - AM.GV = 0; + AM.GV = nullptr; return true; } } @@ -467,7 +467,7 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { SmallVector GEPs; redo_gep: - const User *U = NULL; + const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast(V)) { // Don't walk into other basic blocks; it's possible we haven't @@ -626,7 +626,7 @@ redo_gep: /// X86SelectCallAddress - Attempt to fill in an address from the given value. /// bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { - const User *U = NULL; + const User *U = nullptr; unsigned Opcode = Instruction::UserOp1; const Instruction *I = dyn_cast(V); // Record if the value is defined in the same basic block. @@ -1247,7 +1247,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { bool X86FastISel::X86SelectShift(const Instruction *I) { unsigned CReg = 0, OpReg = 0; - const TargetRegisterClass *RC = NULL; + const TargetRegisterClass *RC = nullptr; if (I->getType()->isIntegerTy(8)) { CReg = X86::CL; RC = &X86::GR8RegClass; @@ -1487,7 +1487,7 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) { if (!Subtarget->hasCMov()) return false; unsigned Opc = 0; - const TargetRegisterClass *RC = NULL; + const TargetRegisterClass *RC = nullptr; if (VT == MVT::i16) { Opc = X86::CMOVE16rr; RC = &X86::GR16RegClass; @@ -1821,10 +1821,10 @@ bool X86FastISel::FastLowerArguments() { } } - static const uint16_t GPR32ArgRegs[] = { + static const MCPhysReg GPR32ArgRegs[] = { X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D }; - static const uint16_t GPR64ArgRegs[] = { + static const MCPhysReg GPR64ArgRegs[] = { X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9 }; @@ -1865,7 +1865,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (cast(I)->isTailCall()) return false; - return DoSelectCall(I, 0); + return DoSelectCall(I, nullptr); } static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget, @@ -1936,8 +1936,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { if (!X86SelectCallAddress(Callee, CalleeAM)) return false; unsigned CalleeOp = 0; - const GlobalValue *GV = 0; - if (CalleeAM.GV != 0) { + const GlobalValue *GV = nullptr; + if (CalleeAM.GV != nullptr) { GV = CalleeAM.GV; } else if (CalleeAM.Base.Reg != 0) { CalleeOp = CalleeAM.Base.Reg; @@ -2163,7 +2163,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { if (Subtarget->is64Bit() && isVarArg && !isWin64) { // Count the number of XMM registers allocated. - static const uint16_t XMMArgRegs[] = { + static const MCPhysReg XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; @@ -2387,7 +2387,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; - const TargetRegisterClass *RC = NULL; + const TargetRegisterClass *RC = nullptr; switch (VT.SimpleTy) { default: return 0; case MVT::i8: @@ -2437,7 +2437,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { // If the expression is just a basereg, then we're done, otherwise we need // to emit an LEA. if (AM.BaseType == X86AddressMode::RegBase && - AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0) + AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr) return AM.Base.Reg; Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r; @@ -2510,7 +2510,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { // Get opcode and regclass for the given zero. unsigned Opc = 0; - const TargetRegisterClass *RC = NULL; + const TargetRegisterClass *RC = nullptr; switch (VT.SimpleTy) { default: return 0; case MVT::f32: @@ -2558,7 +2558,7 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, MachineInstr *Result = XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment); - if (Result == 0) return false; + if (!Result) return false; FuncInfo.MBB->insert(FuncInfo.InsertPt, Result); MI->eraseFromParent(); diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp index c2c234b..6c5b86f 100644 --- a/lib/Target/X86/X86FixupLEAs.cpp +++ b/lib/Target/X86/X86FixupLEAs.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "x86-fixup-LEAs" #include "X86.h" #include "X86InstrInfo.h" #include "X86Subtarget.h" @@ -28,6 +27,8 @@ #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; +#define DEBUG_TYPE "x86-fixup-LEAs" + STATISTIC(NumLEAs, "Number of LEA instructions created"); namespace { @@ -56,6 +57,11 @@ namespace { void processInstruction(MachineBasicBlock::iterator& I, MachineFunction::iterator MFI); + /// \brief Given a LEA instruction which is unprofitable + /// on Silvermont try to replace it with an equivalent ADD instruction + void processInstructionForSLM(MachineBasicBlock::iterator& I, + MachineFunction::iterator MFI); + /// \brief Determine if an instruction references a machine register /// and, if so, whether it reads or writes the register. RegUsageState usesRegister(MachineOperand& p, @@ -85,7 +91,7 @@ namespace { private: MachineFunction *MF; const TargetMachine *TM; - const TargetInstrInfo *TII; // Machine instruction info. + const X86InstrInfo *TII; // Machine instruction info. }; char FixupLEAPass::ID = 0; @@ -97,7 +103,7 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI, MachineInstr* MI = MBBI; MachineInstr* NewMI; switch (MI->getOpcode()) { - case X86::MOV32rr: + case X86::MOV32rr: case X86::MOV64rr: { const MachineOperand& Src = MI->getOperand(1); const MachineOperand& Dest = MI->getOperand(0); @@ -123,7 +129,7 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI, if (!MI->getOperand(2).isImm()) { // convertToThreeAddress will call getImm() // which requires isImm() to be true - return 0; + return nullptr; } break; case X86::ADD16rr: @@ -132,10 +138,10 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI, // if src1 != src2, then convertToThreeAddress will // need to create a Virtual register, which we cannot do // after register allocation. - return 0; + return nullptr; } } - return TII->convertToThreeAddress(MFI, MBBI, 0); + return TII->convertToThreeAddress(MFI, MBBI, nullptr); } FunctionPass *llvm::createX86FixupLEAs() { @@ -143,9 +149,12 @@ FunctionPass *llvm::createX86FixupLEAs() { } bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) { - MF = &Func; - TM = &MF->getTarget(); - TII = TM->getInstrInfo(); + TM = &Func.getTarget(); + const X86Subtarget &ST = TM->getSubtarget(); + if (!ST.LEAusesAG() && !ST.slowLEA()) + return false; + + TII = static_cast(TM->getInstrInfo()); DEBUG(dbgs() << "Start X86FixupLEAs\n";); // Process all basic blocks. @@ -211,7 +220,7 @@ MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p, InstrDistance += TII->getInstrLatency(TM->getInstrItineraryData(), CurInst); Found = getPreviousInstr(CurInst, MFI); } - return 0; + return nullptr; } void FixupLEAPass::processInstruction(MachineBasicBlock::iterator& I, @@ -242,9 +251,9 @@ void FixupLEAPass::seekLEAFixup(MachineOperand& p, MachineInstr* NewMI = postRAConvertToLEA(MFI, MBI); if (NewMI) { ++NumLEAs; - DEBUG(dbgs() << "Candidate to replace:"; MBI->dump();); + DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump();); // now to replace with an equivalent LEA... - DEBUG(dbgs() << "Replaced by: "; NewMI->dump();); + DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump();); MFI->erase(MBI); MachineBasicBlock::iterator J = static_cast (NewMI); @@ -253,10 +262,80 @@ void FixupLEAPass::seekLEAFixup(MachineOperand& p, } } +void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I, + MachineFunction::iterator MFI) { + MachineInstr *MI = I; + const int opcode = MI->getOpcode(); + if (opcode != X86::LEA16r && opcode != X86::LEA32r && opcode != X86::LEA64r && + opcode != X86::LEA64_32r) + return; + if (MI->getOperand(5).getReg() != 0 || !MI->getOperand(4).isImm() || + !TII->isSafeToClobberEFLAGS(*MFI, I)) + return; + const unsigned DstR = MI->getOperand(0).getReg(); + const unsigned SrcR1 = MI->getOperand(1).getReg(); + const unsigned SrcR2 = MI->getOperand(3).getReg(); + if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR)) + return; + if (MI->getOperand(2).getImm() > 1) + return; + int addrr_opcode, addri_opcode; + switch (opcode) { + case X86::LEA16r: + addrr_opcode = X86::ADD16rr; + addri_opcode = X86::ADD16ri; + break; + case X86::LEA32r: + addrr_opcode = X86::ADD32rr; + addri_opcode = X86::ADD32ri; + break; + case X86::LEA64_32r: + case X86::LEA64r: + addrr_opcode = X86::ADD64rr; + addri_opcode = X86::ADD64ri32; + break; + default: + assert(false && "Unexpected LEA instruction"); + } + DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump();); + DEBUG(dbgs() << "FixLEA: Replaced by: ";); + MachineInstr *NewMI = 0; + const MachineOperand &Dst = MI->getOperand(0); + // Make ADD instruction for two registers writing to LEA's destination + if (SrcR1 != 0 && SrcR2 != 0) { + const MachineOperand &Src1 = MI->getOperand(SrcR1 == DstR ? 1 : 3); + const MachineOperand &Src2 = MI->getOperand(SrcR1 == DstR ? 3 : 1); + NewMI = BuildMI(*MF, MI->getDebugLoc(), TII->get(addrr_opcode)) + .addOperand(Dst) + .addOperand(Src1) + .addOperand(Src2); + MFI->insert(I, NewMI); + DEBUG(NewMI->dump();); + } + // Make ADD instruction for immediate + if (MI->getOperand(4).getImm() != 0) { + const MachineOperand &SrcR = MI->getOperand(SrcR1 == DstR ? 1 : 3); + NewMI = BuildMI(*MF, MI->getDebugLoc(), TII->get(addri_opcode)) + .addOperand(Dst) + .addOperand(SrcR) + .addImm(MI->getOperand(4).getImm()); + MFI->insert(I, NewMI); + DEBUG(NewMI->dump();); + } + if (NewMI) { + MFI->erase(I); + I = static_cast(NewMI); + } +} + bool FixupLEAPass::processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI) { - for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) - processInstruction(I, MFI); + for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) { + if (TM->getSubtarget().isSLM()) + processInstructionForSLM(I, MFI); + else + processInstruction(I, MFI); + } return false; } diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 7955ade..c8a3ab3 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -23,7 +23,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "x86-codegen" #include "X86.h" #include "X86InstrInfo.h" #include "llvm/ADT/DepthFirstIterator.h" @@ -45,6 +44,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "x86-codegen" + STATISTIC(NumFXCH, "Number of fxch instructions inserted"); STATISTIC(NumFP , "Number of floating point instructions"); @@ -430,7 +431,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { if (FPInstClass == X86II::NotFP) continue; // Efficiently ignore non-fp insts! - MachineInstr *PrevMI = 0; + MachineInstr *PrevMI = nullptr; if (I != BB.begin()) PrevMI = std::prev(I); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index f0ad4d1..4c1374f 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -182,7 +182,7 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, } } - MachineInstr *MI = NULL; + MachineInstr *MI = nullptr; if (UseLEA) { MI = addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), @@ -204,7 +204,7 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, /// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator. static void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - unsigned StackPtr, uint64_t *NumBytes = NULL) { + unsigned StackPtr, uint64_t *NumBytes = nullptr) { if (MBBI == MBB.begin()) return; MachineBasicBlock::iterator PI = std::prev(MBBI); @@ -225,11 +225,12 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, } } -/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator. +/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower +/// iterator. static void mergeSPUpdatesDown(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - unsigned StackPtr, uint64_t *NumBytes = NULL) { + unsigned StackPtr, uint64_t *NumBytes = nullptr) { // FIXME: THIS ISN'T RUN!!! return; @@ -257,19 +258,19 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB, } /// mergeSPUpdates - Checks the instruction before/after the passed -/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and the -/// stack adjustment is returned as a positive value for ADD/LEA and a negative for -/// SUB. +/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and +/// the stack adjustment is returned as a positive value for ADD/LEA and a +/// negative for SUB. static int mergeSPUpdates(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned StackPtr, - bool doMergeWithPrevious) { + MachineBasicBlock::iterator &MBBI, unsigned StackPtr, + bool doMergeWithPrevious) { if ((doMergeWithPrevious && MBBI == MBB.begin()) || (!doMergeWithPrevious && MBBI == MBB.end())) return 0; MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI; - MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : std::next(MBBI); + MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr + : std::next(MBBI); unsigned Opc = PI->getOpcode(); int Offset = 0; @@ -366,8 +367,10 @@ void X86FrameLowering::emitCalleeSavedFrameMoves( unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); unsigned CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createOffset(0, DwarfReg, Offset)); - BuildMI(MBB, MBBI, DL, TII.get(X86::CFI_INSTRUCTION)).addCFIIndex(CFIIndex); + MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, + Offset)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } } @@ -446,7 +449,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { !MFI->adjustsStack() && // No calls. !IsWin64 && // Win64 has no Red Zone !usesTheStack(MF) && // Don't push and pop. - !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack + !MF.shouldSplitStack()) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); @@ -511,15 +514,16 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Define the current CFA rule to use the provided offset. assert(StackSize); unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(0, 2 * stackGrowth)); - BuildMI(MBB, MBBI, DL, TII.get(X86::CFI_INSTRUCTION)) + MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); // Change the rule for the FramePtr to be an "offset" rule. unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true); CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createOffset(0, DwarfFramePtr, 2 * stackGrowth)); - BuildMI(MBB, MBBI, DL, TII.get(X86::CFI_INSTRUCTION)) + MCCFIInstruction::createOffset(nullptr, + DwarfFramePtr, 2 * stackGrowth)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } @@ -534,8 +538,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Define the current CFA to use the EBP/RBP register. unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true); unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaRegister(0, DwarfFramePtr)); - BuildMI(MBB, MBBI, DL, TII.get(X86::CFI_INSTRUCTION)) + MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr)); + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } @@ -564,7 +568,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { assert(StackSize); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset)); - BuildMI(MBB, MBBI, DL, TII.get(X86::CFI_INSTRUCTION)) + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); StackOffset += stackGrowth; } @@ -698,9 +702,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Define the current CFA rule to use the provided offset. assert(StackSize); unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(0, -StackSize + stackGrowth)); + MCCFIInstruction::createDefCfaOffset(nullptr, + -StackSize + stackGrowth)); - BuildMI(MBB, MBBI, DL, TII.get(X86::CFI_INSTRUCTION)) + BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } @@ -905,7 +910,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } } -int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { +int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { const X86RegisterInfo *RegInfo = static_cast(MF.getTarget().getRegisterInfo()); const MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -1170,6 +1176,15 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { !STI.isTargetWin32() && !STI.isTargetWin64() && !STI.isTargetFreeBSD()) report_fatal_error("Segmented stacks not supported on this platform."); + // Eventually StackSize will be calculated by a link-time pass; which will + // also decide whether checking code needs to be injected into this particular + // prologue. + StackSize = MFI->getStackSize(); + + // Do not generate a prologue for functions with a stack of size zero + if (StackSize == 0) + return; + MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); X86MachineFunctionInfo *X86FI = MF.getInfo(); @@ -1194,11 +1209,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MF.push_front(allocMBB); MF.push_front(checkMBB); - // Eventually StackSize will be calculated by a link-time pass; which will - // also decide whether checking code needs to be injected into this particular - // prologue. - StackSize = MFI->getStackSize(); - // When the frame size is less than 256 we just compare the stack // boundary directly to the value of the stack pointer, per gcc. bool CompareStackPointer = StackSize < kSplitStackAvailable; @@ -1256,22 +1266,23 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); } else if (STI.isTargetDarwin()) { - // TlsOffset doesn't fit into a mod r/m byte so we need an extra register + // TlsOffset doesn't fit into a mod r/m byte so we need an extra register. unsigned ScratchReg2; bool SaveScratch2; if (CompareStackPointer) { - // The primary scratch register is available for holding the TLS offset + // The primary scratch register is available for holding the TLS offset. ScratchReg2 = GetScratchRegister(Is64Bit, MF, true); SaveScratch2 = false; } else { // Need to use a second register to hold the TLS offset ScratchReg2 = GetScratchRegister(Is64Bit, MF, false); - // Unfortunately, with fastcc the second scratch register may hold an arg + // Unfortunately, with fastcc the second scratch register may hold an + // argument. SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2); } - // If Scratch2 is live-in then it needs to be saved + // If Scratch2 is live-in then it needs to be saved. assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) && "Scratch register is live-in and not saved"); @@ -1348,14 +1359,14 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) /// /// CheckStack: -/// temp0 = sp - MaxStack -/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart +/// temp0 = sp - MaxStack +/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart /// OldStart: -/// ... +/// ... /// IncStack: -/// call inc_stack # doubles the stack space -/// temp0 = sp - MaxStack -/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart +/// call inc_stack # doubles the stack space +/// temp0 = sp - MaxStack +/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { const X86InstrInfo &TII = *TM.getInstrInfo(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -1514,7 +1525,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; - MachineInstr *New = 0; + MachineInstr *New = nullptr; if (Opcode == TII.getCallFrameSetupOpcode()) { New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), StackPtr) diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index f0db8cb..208bb8b 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -47,7 +47,7 @@ public: void adjustForHiPEPrologue(MachineFunction &MF) const override; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = NULL) const override; + RegScavenger *RS = nullptr) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 3e45adb..74386d3 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "x86-isel" #include "X86.h" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" @@ -36,6 +35,8 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; +#define DEBUG_TYPE "x86-isel" + STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); //===----------------------------------------------------------------------===// @@ -70,17 +71,18 @@ namespace { X86ISelAddressMode() : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), - Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0), - SymbolFlags(X86II::MO_NO_FLAG) { + Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr), + JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) { } bool hasSymbolicDisplacement() const { - return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0; + return GV != nullptr || CP != nullptr || ES != nullptr || + JT != -1 || BlockAddr != nullptr; } bool hasBaseOrIndexReg() const { return BaseType == FrameIndexBase || - IndexReg.getNode() != 0 || Base_Reg.getNode() != 0; + IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr; } /// isRIPRelative - Return true if this addressing mode is already RIP @@ -102,14 +104,14 @@ namespace { void dump() { dbgs() << "X86ISelAddressMode " << this << '\n'; dbgs() << "Base_Reg "; - if (Base_Reg.getNode() != 0) + if (Base_Reg.getNode()) Base_Reg.getNode()->dump(); else dbgs() << "nul"; dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n' << " Scale" << Scale << '\n' << "IndexReg "; - if (IndexReg.getNode() != 0) + if (IndexReg.getNode()) IndexReg.getNode()->dump(); else dbgs() << "nul"; @@ -160,6 +162,13 @@ namespace { return "X86 DAG->DAG Instruction Selection"; } + bool runOnMachineFunction(MachineFunction &MF) override { + // Reset the subtarget each time through. + Subtarget = &TM.getSubtarget(); + SelectionDAGISel::runOnMachineFunction(MF); + return true; + } + void EmitFunctionEntryCode() override; bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; @@ -374,14 +383,13 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, else Ops.push_back(Chain.getOperand(i)); SDValue NewChain = - CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), - MVT::Other, &Ops[0], Ops.size()); + CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops); Ops.clear(); Ops.push_back(NewChain); } for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i) Ops.push_back(OrigChain.getOperand(i)); - CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops); CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), Load.getOperand(1), Load.getOperand(2)); @@ -390,7 +398,7 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, Ops.push_back(SDValue(Load.getNode(), 1)); for (unsigned i = 1, e = NumOps; i != e; ++i) Ops.push_back(Call.getOperand(i)); - CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps); + CurDAG->UpdateNodeOperands(Call.getNode(), Ops); } /// isCalleeLoad - Return true if call address is a load and it can be @@ -612,7 +620,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ // gs:0 (or fs:0 on X86-64) contains its own address. // For more information see http://people.redhat.com/drepper/tls.pdf if (ConstantSDNode *C = dyn_cast(Address)) - if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 && + if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr && Subtarget->isTargetLinux()) switch (N->getPointerInfo().getAddrSpace()) { case 256: @@ -733,7 +741,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { // a smaller encoding and avoids a scaled-index. if (AM.Scale == 2 && AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base_Reg.getNode() == 0) { + AM.Base_Reg.getNode() == nullptr) { AM.Base_Reg = AM.IndexReg; AM.Scale = 1; } @@ -745,8 +753,8 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { Subtarget->is64Bit() && AM.Scale == 1 && AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base_Reg.getNode() == 0 && - AM.IndexReg.getNode() == 0 && + AM.Base_Reg.getNode() == nullptr && + AM.IndexReg.getNode() == nullptr && AM.SymbolFlags == X86II::MO_NO_FLAG && AM.hasSymbolicDisplacement()) AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64); @@ -926,7 +934,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, APInt MaskedHighBits = APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ); APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(X, KnownZero, KnownOne); + DAG.computeKnownBits(X, KnownZero, KnownOne); if (MaskedHighBits != KnownZero) return true; // We've identified a pattern that can be transformed into a single shift @@ -1009,7 +1017,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case ISD::FrameIndex: if (AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base_Reg.getNode() == 0 && + AM.Base_Reg.getNode() == nullptr && (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) { AM.BaseType = X86ISelAddressMode::FrameIndexBase; AM.Base_FrameIndex = cast(N)->getIndex(); @@ -1018,7 +1026,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, break; case ISD::SHL: - if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) + if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; if (ConstantSDNode @@ -1052,7 +1060,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case ISD::SRL: { // Scale must not be used already. - if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; + if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; SDValue And = N.getOperand(0); if (And.getOpcode() != ISD::AND) break; @@ -1086,8 +1094,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case X86ISD::MUL_IMM: // X*[3,5,9] -> X+X*[2,4,8] if (AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base_Reg.getNode() == 0 && - AM.IndexReg.getNode() == 0) { + AM.Base_Reg.getNode() == nullptr && + AM.IndexReg.getNode() == nullptr) { if (ConstantSDNode *CN = dyn_cast(N.getNode()->getOperand(1))) if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || @@ -1237,7 +1245,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // with a constant to enable use of the scaled offset field. // Scale must not be used already. - if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break; + if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; SDValue Shift = N.getOperand(0); if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break; @@ -1276,7 +1284,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { // Is the base register already occupied? if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { // If so, check to see if the scale index register is set. - if (AM.IndexReg.getNode() == 0) { + if (!AM.IndexReg.getNode()) { AM.IndexReg = N; AM.Scale = 1; return false; @@ -1567,7 +1575,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) - return NULL; + return nullptr; MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = cast(Node)->getMemOperand(); const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain}; @@ -1756,7 +1764,7 @@ static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG, SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) { if (Node->hasAnyUseOfValue(0)) - return 0; + return nullptr; SDLoc dl(Node); @@ -1768,13 +1776,13 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) { SDValue Val = Node->getOperand(2); SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) - return 0; + return nullptr; // Which index into the table. enum AtomicOpc Op; switch (Node->getOpcode()) { default: - return 0; + return nullptr; case ISD::ATOMIC_LOAD_OR: Op = OR; break; @@ -1795,7 +1803,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) { unsigned Opc = 0; switch (NVT.SimpleTy) { - default: return 0; + default: return nullptr; case MVT::i8: if (isCN) Opc = AtomicOpcTbl[Op][ConstantI8]; @@ -1847,7 +1855,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) { } cast(Ret)->setMemRefs(MemOp, MemOp + 1); SDValue RetVals[] = { Undef, Ret }; - return CurDAG->getMergeValues(RetVals, 2, dl).getNode(); + return CurDAG->getMergeValues(RetVals, dl).getNode(); } /// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has @@ -1990,7 +1998,7 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, // Make a new TokenFactor with all the other input chains except // for the load. InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), - MVT::Other, &ChainOps[0], ChainOps.size()); + MVT::Other, ChainOps); } if (!ChainCheck) return false; @@ -2027,7 +2035,7 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) { SDValue VMask = Node->getOperand(5); ConstantSDNode *Scale = dyn_cast(Node->getOperand(6)); if (!Scale) - return 0; + return nullptr; SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(), MVT::Other); @@ -2058,7 +2066,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { if (Node->isMachineOpcode()) { DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); Node->setNodeId(-1); - return NULL; // Already selected. + return nullptr; // Already selected. } switch (Opcode) { @@ -2108,7 +2116,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDNode *RetVal = SelectGather(Node, Opc); if (RetVal) // We already called ReplaceUses inside SelectGather. - return NULL; + return nullptr; break; } } @@ -2259,7 +2267,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1)); ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2)); - return NULL; + return nullptr; } case ISD::SMUL_LOHI: @@ -2386,7 +2394,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the low half of the result, if it is needed. if (!SDValue(Node, 0).use_empty()) { - if (ResLo.getNode() == 0) { + if (!ResLo.getNode()) { assert(LoReg && "Register for low half is not defined!"); ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT, InFlag); @@ -2397,7 +2405,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - if (ResHi.getNode() == 0) { + if (!ResHi.getNode()) { assert(HiReg && "Register for high half is not defined!"); ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT, InFlag); @@ -2407,7 +2415,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n'); } - return NULL; + return nullptr; } case ISD::SDIVREM: @@ -2575,7 +2583,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } - return NULL; + return nullptr; } case X86ISD::CMP: @@ -2632,7 +2640,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // one, do not call ReplaceAllUsesWith. ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), SDValue(NewNode, 0)); - return NULL; + return nullptr; } // For example, "testl %eax, $2048" to "testb %ah, $8". @@ -2669,7 +2677,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // one, do not call ReplaceAllUsesWith. ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), SDValue(NewNode, 0)); - return NULL; + return nullptr; } // For example, "testl %eax, $32776" to "testw %ax, $32776". @@ -2691,7 +2699,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // one, do not call ReplaceAllUsesWith. ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), SDValue(NewNode, 0)); - return NULL; + return nullptr; } // For example, "testq %rax, $268468232" to "testl %eax, $268468232". @@ -2713,7 +2721,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { // one, do not call ReplaceAllUsesWith. ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)), SDValue(NewNode, 0)); - return NULL; + return nullptr; } } break; @@ -2740,7 +2748,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue StoredVal = StoreNode->getOperand(1); unsigned Opc = StoredVal->getOpcode(); - LoadSDNode *LoadNode = 0; + LoadSDNode *LoadNode = nullptr; SDValue InputChain; if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG, LoadNode, InputChain)) @@ -2772,7 +2780,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDNode *ResNode = SelectCode(Node); DEBUG(dbgs() << "=> "; - if (ResNode == NULL || ResNode == Node) + if (ResNode == nullptr || ResNode == Node) Node->dump(CurDAG); else ResNode->dump(CurDAG); @@ -2790,7 +2798,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, case 'v': // not offsetable ?? default: return true; case 'm': // memory - if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4)) + if (!SelectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4)) return true; break; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2a35061..cbaf44e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "x86-isel" #include "X86ISelLowering.h" #include "Utils/X86ShuffleDecode.h" #include "X86CallingConv.h" @@ -23,6 +22,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/VariadicFunction.h" #include "llvm/CodeGen/IntrinsicLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -52,6 +52,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "x86-isel" + STATISTIC(NumTailCalls, "Number of tail calls"); // Forward declarations. @@ -84,7 +86,8 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal, // If the input is a buildvector just emit a smaller one. if (Vec.getOpcode() == ISD::BUILD_VECTOR) return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT, - Vec->op_begin()+NormalizedIdxVal, ElemsPerChunk); + makeArrayRef(Vec->op_begin()+NormalizedIdxVal, + ElemsPerChunk)); SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal); SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, @@ -265,10 +268,10 @@ void X86TargetLowering::resetOperationActions() { // The _ftol2 runtime function has an unusual calling conv, which // is modeled by a special pseudo-instruction. - setLibcallName(RTLIB::FPTOUINT_F64_I64, 0); - setLibcallName(RTLIB::FPTOUINT_F32_I64, 0); - setLibcallName(RTLIB::FPTOUINT_F64_I32, 0); - setLibcallName(RTLIB::FPTOUINT_F32_I32, 0); + setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr); + setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr); + setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr); + setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr); } if (Subtarget->isTargetDarwin()) { @@ -635,15 +638,8 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - if (Subtarget->isOSWindows() && !Subtarget->isTargetMacho()) - setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? - MVT::i64 : MVT::i32, Custom); - else if (TM.Options.EnableSegmentedStacks) - setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? - MVT::i64 : MVT::i32, Custom); - else - setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? - MVT::i64 : MVT::i32, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? + MVT::i64 : MVT::i32, Custom); if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) { // f32 and f64 use SSE. @@ -832,7 +828,9 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::FRINT, VT, Expand); setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::UMUL_LOHI, VT, Expand); + setOperationAction(ISD::MULHU, VT, Expand); setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); @@ -944,6 +942,10 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::ADD, MVT::v2i64, Legal); setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v2i64, Custom); + setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom); + setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom); + setOperationAction(ISD::MULHU, MVT::v8i16, Legal); + setOperationAction(ISD::MULHS, MVT::v8i16, Legal); setOperationAction(ISD::SUB, MVT::v16i8, Legal); setOperationAction(ISD::SUB, MVT::v8i16, Legal); setOperationAction(ISD::SUB, MVT::v4i32, Legal); @@ -1036,6 +1038,10 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal); + + setOperationAction(ISD::BITCAST, MVT::v2i32, Custom); + setOperationAction(ISD::BITCAST, MVT::v4i16, Custom); + setOperationAction(ISD::BITCAST, MVT::v8i8, Custom); } if (!TM.Options.UseSoftFloat && Subtarget->hasSSE41()) { @@ -1064,11 +1070,14 @@ void X86TargetLowering::resetOperationActions() { // FIXME: Do we need to handle scalar-to-vector here? setOperationAction(ISD::MUL, MVT::v4i32, Legal); - setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); - setOperationAction(ISD::VSELECT, MVT::v2i64, Legal); + setOperationAction(ISD::VSELECT, MVT::v2f64, Custom); + setOperationAction(ISD::VSELECT, MVT::v2i64, Custom); + setOperationAction(ISD::VSELECT, MVT::v4i32, Custom); + setOperationAction(ISD::VSELECT, MVT::v4f32, Custom); + setOperationAction(ISD::VSELECT, MVT::v8i16, Custom); + // There is no BLENDI for byte vectors. We don't need to custom lower + // some vselects for now. setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); - setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); - setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); // i8 and i16 vectors are custom , because the source register and source // source memory operand types are not the same width. f32 vectors are @@ -1111,9 +1120,6 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::SHL, MVT::v4i32, Custom); setOperationAction(ISD::SRA, MVT::v4i32, Custom); - - setOperationAction(ISD::SDIV, MVT::v8i16, Custom); - setOperationAction(ISD::SDIV, MVT::v4i32, Custom); } if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) { @@ -1178,8 +1184,6 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::SRA, MVT::v16i16, Custom); setOperationAction(ISD::SRA, MVT::v32i8, Custom); - setOperationAction(ISD::SDIV, MVT::v16i16, Custom); - setOperationAction(ISD::SETCC, MVT::v32i8, Custom); setOperationAction(ISD::SETCC, MVT::v16i16, Custom); setOperationAction(ISD::SETCC, MVT::v8i32, Custom); @@ -1189,10 +1193,10 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::SELECT, MVT::v4i64, Custom); setOperationAction(ISD::SELECT, MVT::v8f32, Custom); - setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); - setOperationAction(ISD::VSELECT, MVT::v4i64, Legal); - setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); - setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); + setOperationAction(ISD::VSELECT, MVT::v4f64, Custom); + setOperationAction(ISD::VSELECT, MVT::v4i64, Custom); + setOperationAction(ISD::VSELECT, MVT::v8i32, Custom); + setOperationAction(ISD::VSELECT, MVT::v8f32, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); @@ -1232,9 +1236,13 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::MUL, MVT::v16i16, Legal); // Don't lower v32i8 because there is no 128-bit byte mul - setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); + setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom); + setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom); + setOperationAction(ISD::MULHU, MVT::v16i16, Legal); + setOperationAction(ISD::MULHS, MVT::v16i16, Legal); - setOperationAction(ISD::SDIV, MVT::v8i32, Custom); + setOperationAction(ISD::VSELECT, MVT::v16i16, Custom); + setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); setOperationAction(ISD::ADD, MVT::v8i32, Custom); @@ -1343,7 +1351,6 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::FNEG, MVT::v8f64, Custom); setOperationAction(ISD::FMA, MVT::v8f64, Legal); setOperationAction(ISD::FMA, MVT::v16f32, Legal); - setOperationAction(ISD::SDIV, MVT::v16i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); @@ -1358,9 +1365,11 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal); setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal); @@ -1392,6 +1401,8 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom); setOperationAction(ISD::SELECT, MVT::v8f64, Custom); @@ -1474,6 +1485,8 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + if (!Subtarget->is64Bit()) + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't // handle type legalization for these operations here. @@ -1498,9 +1511,9 @@ void X86TargetLowering::resetOperationActions() { if (!Subtarget->is64Bit()) { // These libcalls are not available in 32-bit. - setLibcallName(RTLIB::SHL_I128, 0); - setLibcallName(RTLIB::SRL_I128, 0); - setLibcallName(RTLIB::SRA_I128, 0); + setLibcallName(RTLIB::SHL_I128, nullptr); + setLibcallName(RTLIB::SRL_I128, nullptr); + setLibcallName(RTLIB::SRA_I128, nullptr); } // Combine sin / cos into one node or libcall if possible. @@ -1516,6 +1529,15 @@ void X86TargetLowering::resetOperationActions() { } } + if (Subtarget->isTargetWin64()) { + setOperationAction(ISD::SDIV, MVT::i128, Custom); + setOperationAction(ISD::UDIV, MVT::i128, Custom); + setOperationAction(ISD::SREM, MVT::i128, Custom); + setOperationAction(ISD::UREM, MVT::i128, Custom); + setOperationAction(ISD::SDIVREM, MVT::i128, Custom); + setOperationAction(ISD::UDIVREM, MVT::i128, Custom); + } + // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); @@ -1540,6 +1562,7 @@ void X86TargetLowering::resetOperationActions() { setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); if (Subtarget->is64Bit()) setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::XOR); @@ -1738,7 +1761,7 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, // FIXME: Why this routine is here? Move to RegInfo! std::pair X86TargetLowering::findRepresentativeClass(MVT VT) const{ - const TargetRegisterClass *RRC = 0; + const TargetRegisterClass *RRC = nullptr; uint8_t Cost = 1; switch (VT.SimpleTy) { default: @@ -1806,8 +1829,8 @@ X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, return CCInfo.CheckReturn(Outs, RetCC_X86); } -const uint16_t *X86TargetLowering::getScratchRegisters(CallingConv::ID) const { - static const uint16_t ScratchRegs[] = { X86::R11, 0 }; +const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const { + static const MCPhysReg ScratchRegs[] = { X86::R11, 0 }; return ScratchRegs; } @@ -1930,8 +1953,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, if (Flag.getNode()) RetOps.push_back(Flag); - return DAG.getNode(X86ISD::RET_FLAG, dl, - MVT::Other, &RetOps[0], RetOps.size()); + return DAG.getNode(X86ISD::RET_FLAG, dl, MVT::Other, RetOps); } bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { @@ -2285,22 +2307,25 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, InVals.push_back(ArgValue); } - // The x86-64 ABIs require that for returning structs by value we copy - // the sret argument into %rax/%eax (depending on ABI) for the return. - // Win32 requires us to put the sret argument to %eax as well. - // Save the argument into a virtual register so that we can access it - // from the return points. - if (MF.getFunction()->hasStructRetAttr() && - (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) { - X86MachineFunctionInfo *FuncInfo = MF.getInfo(); - unsigned Reg = FuncInfo->getSRetReturnReg(); - if (!Reg) { - MVT PtrTy = getPointerTy(); - Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); - FuncInfo->setSRetReturnReg(Reg); + if (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC()) { + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + // The x86-64 ABIs require that for returning structs by value we copy + // the sret argument into %rax/%eax (depending on ABI) for the return. + // Win32 requires us to put the sret argument to %eax as well. + // Save the argument into a virtual register so that we can access it + // from the return points. + if (Ins[i].Flags.isSRet()) { + unsigned Reg = FuncInfo->getSRetReturnReg(); + if (!Reg) { + MVT PtrTy = getPointerTy(); + Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); + FuncInfo->setSRetReturnReg(Reg); + } + SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); + break; + } } - SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]); - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); } unsigned StackSize = CCInfo.getNextStackOffset(); @@ -2320,17 +2345,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; // FIXME: We should really autogenerate these arrays - static const uint16_t GPR64ArgRegsWin64[] = { + static const MCPhysReg GPR64ArgRegsWin64[] = { X86::RCX, X86::RDX, X86::R8, X86::R9 }; - static const uint16_t GPR64ArgRegs64Bit[] = { + static const MCPhysReg GPR64ArgRegs64Bit[] = { X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 }; - static const uint16_t XMMArgRegs64Bit[] = { + static const MCPhysReg XMMArgRegs64Bit[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; - const uint16_t *GPR64ArgRegs; + const MCPhysReg *GPR64ArgRegs; unsigned NumXMMRegs = 0; if (IsWin64) { @@ -2424,13 +2449,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SaveXMMOps.push_back(Val); } MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl, - MVT::Other, - &SaveXMMOps[0], SaveXMMOps.size())); + MVT::Other, SaveXMMOps)); } if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); } } @@ -2497,10 +2520,10 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, /// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call /// optimization is performed and it is required (FPDiff!=0). -static SDValue -EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, - SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, - unsigned SlotSize, int FPDiff, SDLoc dl) { +static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, + SDValue Chain, SDValue RetAddrFrIdx, + EVT PtrVT, unsigned SlotSize, + int FPDiff, SDLoc dl) { // Store the return address to the appropriate stack slot. if (!FPDiff) return Chain; // Calculate the new stack slot for the return address. @@ -2537,7 +2560,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (MF.getTarget().Options.DisableTailCalls) isTailCall = false; - if (isTailCall) { + bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall(); + if (IsMustTail) { + // Force this to be a tail call. The verifier rules are enough to ensure + // that we can lower this successfully without moving the return address + // around. + isTailCall = true; + } else if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, SR != NotStructReturn, @@ -2578,7 +2607,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); int FPDiff = 0; - if (isTailCall && !IsSibcall) { + if (isTailCall && !IsSibcall && !IsMustTail) { // Lower arguments at fp - stackoffset + fpdiff. X86MachineFunctionInfo *X86Info = MF.getInfo(); unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn(); @@ -2683,7 +2712,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } } else if (!IsSibcall && (!isTailCall || isByVal)) { assert(VA.isMemLoc()); - if (StackPtr.getNode() == 0) + if (!StackPtr.getNode()) StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), getPointerTy()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, @@ -2692,8 +2721,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); if (Subtarget->isPICStyleGOT()) { // ELF / PIC requires GOT in the EBX register before function calls via PLT @@ -2730,7 +2758,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // registers used and is in the range 0 - 8 inclusive. // Count the number of XMM registers allocated. - static const uint16_t XMMArgRegs[] = { + static const MCPhysReg XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 }; @@ -2742,8 +2770,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DAG.getConstant(NumXMMRegs, MVT::i8))); } - // For tail calls lower the arguments to the 'real' stack slot. - if (isTailCall) { + // For tail calls lower the arguments to the 'real' stack slots. Sibcalls + // don't need this because the eligibility check rejects calls that require + // shuffling arguments passed in memory. + if (!IsSibcall && isTailCall) { // Force all the incoming stack arguments to be loaded from the stack // before any new outgoing arguments are stored to the stack, because the // outgoing stack slots may alias the incoming argument stack slots, and @@ -2755,45 +2785,45 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector MemOpChains2; SDValue FIN; int FI = 0; - if (getTargetMachine().Options.GuaranteedTailCallOpt) { - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - if (VA.isRegLoc()) - continue; - assert(VA.isMemLoc()); - SDValue Arg = OutVals[i]; - ISD::ArgFlagsTy Flags = Outs[i].Flags; - // Create frame index. - int32_t Offset = VA.getLocMemOffset()+FPDiff; - uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; - FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); - FIN = DAG.getFrameIndex(FI, getPointerTy()); - - if (Flags.isByVal()) { - // Copy relative to framepointer. - SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); - if (StackPtr.getNode() == 0) - StackPtr = DAG.getCopyFromReg(Chain, dl, - RegInfo->getStackRegister(), - getPointerTy()); - Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source); - - MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, - ArgChain, - Flags, DAG, dl)); - } else { - // Store relative to framepointer. - MemOpChains2.push_back( - DAG.getStore(ArgChain, dl, Arg, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, 0)); - } + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isRegLoc()) + continue; + assert(VA.isMemLoc()); + SDValue Arg = OutVals[i]; + ISD::ArgFlagsTy Flags = Outs[i].Flags; + // Skip inalloca arguments. They don't require any work. + if (Flags.isInAlloca()) + continue; + // Create frame index. + int32_t Offset = VA.getLocMemOffset()+FPDiff; + uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; + FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); + FIN = DAG.getFrameIndex(FI, getPointerTy()); + + if (Flags.isByVal()) { + // Copy relative to framepointer. + SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); + if (!StackPtr.getNode()) + StackPtr = DAG.getCopyFromReg(Chain, dl, + RegInfo->getStackRegister(), + getPointerTy()); + Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source); + + MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, + ArgChain, + Flags, DAG, dl)); + } else { + // Store relative to framepointer. + MemOpChains2.push_back( + DAG.getStore(ArgChain, dl, Arg, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, 0)); } } if (!MemOpChains2.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains2[0], MemOpChains2.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); // Store the return address to the appropriate stack slot. Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, @@ -2930,10 +2960,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // This isn't right, although it's probably harmless on x86; liveouts // should be computed from returns not tail calls. Consider a void // function making a tail call to a function returning int. - return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops); } - Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. @@ -3927,6 +3957,29 @@ static bool isMOVLHPSMask(ArrayRef Mask, MVT VT) { return true; } +/// isINSERTPSMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to INSERTPS. +/// i. e: If all but one element come from the same vector. +static bool isINSERTPSMask(ArrayRef Mask, MVT VT) { + // TODO: Deal with AVX's VINSERTPS + if (!VT.is128BitVector() || (VT != MVT::v4f32 && VT != MVT::v4i32)) + return false; + + unsigned CorrectPosV1 = 0; + unsigned CorrectPosV2 = 0; + for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) + if (Mask[i] == i) + ++CorrectPosV1; + else if (Mask[i] == i + 4) + ++CorrectPosV2; + + if (CorrectPosV1 == 3 || CorrectPosV2 == 3) + // We have 3 elements from one vector, and one from another. + return true; + + return false; +} + // // Some special combinations that can be optimized. // @@ -4146,6 +4199,29 @@ static bool isUNPCKH_v_undef_Mask(ArrayRef Mask, MVT VT, bool HasInt256) { return true; } +// Match for INSERTI64x4 INSERTF64x4 instructions (src0[0], src1[0]) or +// (src1[0], src0[1]), manipulation with 256-bit sub-vectors +static bool isINSERT64x4Mask(ArrayRef Mask, MVT VT, unsigned int *Imm) { + if (!VT.is512BitVector()) + return false; + + unsigned NumElts = VT.getVectorNumElements(); + unsigned HalfSize = NumElts/2; + if (isSequentialOrUndefInRange(Mask, 0, HalfSize, 0)) { + if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, NumElts)) { + *Imm = 1; + return true; + } + } + if (isSequentialOrUndefInRange(Mask, 0, HalfSize, NumElts)) { + if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, HalfSize)) { + *Imm = 0; + return true; + } + } + return false; +} + /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. @@ -4624,11 +4700,17 @@ unsigned X86::getInsertVINSERT256Immediate(SDNode *N) { return getInsertVINSERTImmediate(N, 256); } +/// isZero - Returns true if Elt is a constant integer zero +static bool isZero(SDValue V) { + ConstantSDNode *C = dyn_cast(V); + return C && C->isNullValue(); +} + /// isZeroNode - Returns true if Elt is a constant zero or a floating point /// constant +0.0. bool X86::isZeroNode(SDValue Elt) { - if (ConstantSDNode *CN = dyn_cast(Elt)) - return CN->isNullValue(); + if (isZero(Elt)) + return true; if (ConstantFPSDNode *CFP = dyn_cast(Elt)) return CFP->getValueAPF().isPosZero(); return false; @@ -4677,7 +4759,7 @@ static bool ShouldXformToMOVHLPS(ArrayRef Mask, MVT VT) { /// isScalarLoadToVector - Returns true if the node is a scalar load that /// is promoted to a vector. It also returns the LoadSDNode by reference if /// required. -static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { +static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = nullptr) { if (N->getOpcode() != ISD::SCALAR_TO_VECTOR) return false; N = N->getOperand(0).getNode(); @@ -4803,28 +4885,24 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget, if (Subtarget->hasInt256()) { // AVX2 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, - array_lengthof(Ops)); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops); } else { // 256-bit logic and arithmetic instructions in AVX are all // floating-point, no support for integer ops. Emit fp zeroed vectors. SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, - array_lengthof(Ops)); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops); } } else if (VT.is512BitVector()) { // AVX-512 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops, 16); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops); } else if (VT.getScalarType() == MVT::i1) { assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type"); SDValue Cst = DAG.getTargetConstant(0, MVT::i1); - SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, - Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, - Ops, VT.getVectorNumElements()); + SmallVector Ops(VT.getVectorNumElements(), Cst); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } else llvm_unreachable("Unexpected vector type"); @@ -4844,8 +4922,7 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG, if (VT.is256BitVector()) { if (HasInt256) { // AVX2 SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, - array_lengthof(Ops)); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops); } else { // AVX Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl); @@ -5307,7 +5384,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, return SDValue(); SDLoc dl(Op); - SDValue V(0, 0); + SDValue V; bool First = true; for (unsigned i = 0; i < 16; ++i) { bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; @@ -5320,7 +5397,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, } if ((i & 1) != 0) { - SDValue ThisElt(0, 0), LastElt(0, 0); + SDValue ThisElt, LastElt; bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; if (LastIsNonZero) { LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl, @@ -5355,7 +5432,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, return SDValue(); SDLoc dl(Op); - SDValue V(0, 0); + SDValue V; bool First = true; for (unsigned i = 0; i < 8; ++i) { bool isNonZero = (NonZeros & (1 << i)) != 0; @@ -5376,6 +5453,79 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, return V; } +/// LowerBuildVectorv4x32 - Custom lower build_vector of v4i32 or v4f32. +static SDValue LowerBuildVectorv4x32(SDValue Op, unsigned NumElems, + unsigned NonZeros, unsigned NumNonZero, + unsigned NumZero, SelectionDAG &DAG, + const X86Subtarget *Subtarget, + const TargetLowering &TLI) { + // We know there's at least one non-zero element + unsigned FirstNonZeroIdx = 0; + SDValue FirstNonZero = Op->getOperand(FirstNonZeroIdx); + while (FirstNonZero.getOpcode() == ISD::UNDEF || + X86::isZeroNode(FirstNonZero)) { + ++FirstNonZeroIdx; + FirstNonZero = Op->getOperand(FirstNonZeroIdx); + } + + if (FirstNonZero.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa(FirstNonZero.getOperand(1))) + return SDValue(); + + SDValue V = FirstNonZero.getOperand(0); + MVT VVT = V.getSimpleValueType(); + if (!Subtarget->hasSSE41() || (VVT != MVT::v4f32 && VVT != MVT::v4i32)) + return SDValue(); + + unsigned FirstNonZeroDst = + cast(FirstNonZero.getOperand(1))->getZExtValue(); + unsigned CorrectIdx = FirstNonZeroDst == FirstNonZeroIdx; + unsigned IncorrectIdx = CorrectIdx ? -1U : FirstNonZeroIdx; + unsigned IncorrectDst = CorrectIdx ? -1U : FirstNonZeroDst; + + for (unsigned Idx = FirstNonZeroIdx + 1; Idx < NumElems; ++Idx) { + SDValue Elem = Op.getOperand(Idx); + if (Elem.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elem)) + continue; + + // TODO: What else can be here? Deal with it. + if (Elem.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + + // TODO: Some optimizations are still possible here + // ex: Getting one element from a vector, and the rest from another. + if (Elem.getOperand(0) != V) + return SDValue(); + + unsigned Dst = cast(Elem.getOperand(1))->getZExtValue(); + if (Dst == Idx) + ++CorrectIdx; + else if (IncorrectIdx == -1U) { + IncorrectIdx = Idx; + IncorrectDst = Dst; + } else + // There was already one element with an incorrect index. + // We can't optimize this case to an insertps. + return SDValue(); + } + + if (NumNonZero == CorrectIdx || NumNonZero == CorrectIdx + 1) { + SDLoc dl(Op); + EVT VT = Op.getSimpleValueType(); + unsigned ElementMoveMask = 0; + if (IncorrectIdx == -1U) + ElementMoveMask = FirstNonZeroIdx << 6 | FirstNonZeroIdx << 4; + else + ElementMoveMask = IncorrectDst << 6 | IncorrectIdx << 4; + + SDValue InsertpsMask = + DAG.getIntPtrConstant(ElementMoveMask | (~NonZeros & 0xf)); + return DAG.getNode(X86ISD::INSERTPS, dl, VT, V, V, InsertpsMask); + } + + return SDValue(); +} + /// getVShift - Return a vector logical shift node. /// static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, @@ -5480,7 +5630,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl &Elts, EVT EltVT = VT.getVectorElementType(); unsigned NumElems = Elts.size(); - LoadSDNode *LDBase = NULL; + LoadSDNode *LDBase = nullptr; unsigned LastLoadedElt = -1U; // For each element in the initializer, see if we've found a load or an undef. @@ -5545,8 +5695,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl &Elts, SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; SDValue ResNode = - DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, - array_lengthof(Ops), MVT::i64, + DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, MVT::i64, LDBase->getPointerInfo(), LDBase->getAlignment(), false/*isVolatile*/, true/*ReadMem*/, @@ -5661,7 +5810,7 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, unsigned ScalarSize = CVT.getSizeInBits(); if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)) { - const Constant *C = 0; + const Constant *C = nullptr; if (ConstantSDNode *CI = dyn_cast(Ld)) C = CI->getConstantIntValue(); else if (ConstantFPSDNode *CF = dyn_cast(Ld)) @@ -5706,6 +5855,41 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, return SDValue(); } +/// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real +/// underlying vector and index. +/// +/// Modifies \p ExtractedFromVec to the real vector and returns the real +/// index. +static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec, + SDValue ExtIdx) { + int Idx = cast(ExtIdx)->getZExtValue(); + if (!isa(ExtractedFromVec)) + return Idx; + + // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already + // lowered this: + // (extract_vector_elt (v8f32 %vreg1), Constant<6>) + // to: + // (extract_vector_elt (vector_shuffle<2,u,u,u> + // (extract_subvector (v8f32 %vreg0), Constant<4>), + // undef) + // Constant<0>) + // In this case the vector is the extract_subvector expression and the index + // is 2, as specified by the shuffle. + ShuffleVectorSDNode *SVOp = cast(ExtractedFromVec); + SDValue ShuffleVec = SVOp->getOperand(0); + MVT ShuffleVecVT = ShuffleVec.getSimpleValueType(); + assert(ShuffleVecVT.getVectorElementType() == + ExtractedFromVec.getSimpleValueType().getVectorElementType()); + + int ShuffleIdx = SVOp->getMaskElt(Idx); + if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) { + ExtractedFromVec = ShuffleVec; + return ShuffleIdx; + } + return Idx; +} + static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); @@ -5739,34 +5923,32 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0); SDValue ExtIdx = Op.getOperand(i).getOperand(1); + // Quit if non-constant index. + if (!isa(ExtIdx)) + return SDValue(); + int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx); // Quit if extracted from vector of different type. if (ExtractedFromVec.getValueType() != VT) return SDValue(); - // Quit if non-constant index. - if (!isa(ExtIdx)) - return SDValue(); - - if (VecIn1.getNode() == 0) + if (!VecIn1.getNode()) VecIn1 = ExtractedFromVec; else if (VecIn1 != ExtractedFromVec) { - if (VecIn2.getNode() == 0) + if (!VecIn2.getNode()) VecIn2 = ExtractedFromVec; else if (VecIn2 != ExtractedFromVec) // Quit if more than 2 vectors to shuffle return SDValue(); } - unsigned Idx = cast(ExtIdx)->getZExtValue(); - if (ExtractedFromVec == VecIn1) Mask[i] = Idx; else if (ExtractedFromVec == VecIn2) Mask[i] = Idx + NumElems; } - if (VecIn1.getNode() == 0) + if (!VecIn1.getNode()) return SDValue(); VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); @@ -5791,24 +5973,22 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); if (ISD::isBuildVectorAllZeros(Op.getNode())) { SDValue Cst = DAG.getTargetConstant(0, MVT::i1); - SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, - Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, - Ops, VT.getVectorNumElements()); + SmallVector Ops(VT.getVectorNumElements(), Cst); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } if (ISD::isBuildVectorAllOnes(Op.getNode())) { SDValue Cst = DAG.getTargetConstant(1, MVT::i1); - SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst, - Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst }; - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, - Ops, VT.getVectorNumElements()); + SmallVector Ops(VT.getVectorNumElements(), Cst); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } bool AllContants = true; uint64_t Immediate = 0; int NonConstIdx = -1; bool IsSplat = true; + unsigned NumNonConsts = 0; + unsigned NumConsts = 0; for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) { SDValue In = Op.getOperand(idx); if (In.getOpcode() == ISD::UNDEF) @@ -5816,9 +5996,13 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { if (!isa(In)) { AllContants = false; NonConstIdx = idx; + NumNonConsts++; } - else if (cast(In)->getZExtValue()) + else { + NumConsts++; + if (cast(In)->getZExtValue()) Immediate |= (1ULL << idx); + } if (In != Op.getOperand(0)) IsSplat = false; } @@ -5830,6 +6014,19 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { DAG.getIntPtrConstant(0)); } + if (NumNonConsts == 1 && NonConstIdx != 0) { + SDValue DstVec; + if (NumConsts) { + SDValue VecAsImm = DAG.getConstant(Immediate, + MVT::getIntegerVT(VT.getSizeInBits())); + DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm); + } + else + DstVec = DAG.getUNDEF(VT); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec, + Op.getOperand(NonConstIdx), + DAG.getIntPtrConstant(NonConstIdx)); + } if (!IsSplat && (NonConstIdx != 0)) llvm_unreachable("Unsupported BUILD_VECTOR operation"); MVT SelectVT = (VT == MVT::v16i1)? MVT::i16 : MVT::i8; @@ -6043,9 +6240,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); // Build both the lower and upper subvector. - SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2); - SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2], - NumElems/2); + SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, + makeArrayRef(&V[0], NumElems/2)); + SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, + makeArrayRef(&V[NumElems / 2], NumElems/2)); // Recreate the wider vector with the lower and upper part. if (VT.is256BitVector()) @@ -6078,6 +6276,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (V.getNode()) return V; } + // If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS + if (EVTBits == 32 && NumElems == 4) { + SDValue V = LowerBuildVectorv4x32(Op, NumElems, NonZeros, NumNonZero, + NumZero, DAG, Subtarget, *this); + if (V.getNode()) + return V; + } + // If element VT is == 32 bits, turn it into a number of shuffles. SmallVector V(NumElems); if (NumElems == 4 && NumZero > 0) { @@ -6332,8 +6538,7 @@ static SDValue getPSHUFB(ArrayRef MaskVals, SDValue V1, SDLoc &dl, if (ShufVT != VT) V1 = DAG.getNode(ISD::BITCAST, dl, ShufVT, V1); return DAG.getNode(X86ISD::PSHUFB, dl, ShufVT, V1, - DAG.getNode(ISD::BUILD_VECTOR, dl, ShufVT, - PshufbMask.data(), PshufbMask.size())); + DAG.getNode(ISD::BUILD_VECTOR, dl, ShufVT, PshufbMask)); } // v8i16 shuffles - Prefer shuffles in the following order: @@ -6516,7 +6721,7 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget, NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); - if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) { + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSE2()) { ShuffleVectorSDNode *SVOp = cast(NewV.getNode()); NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16, NewV.getOperand(0), @@ -6540,7 +6745,7 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget, NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), &MaskV[0]); - if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) { + if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSE2()) { ShuffleVectorSDNode *SVOp = cast(NewV.getNode()); NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16, NewV.getOperand(0), @@ -6635,7 +6840,7 @@ static SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, } V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::v16i8, &pshufbMask[0], 16)); + MVT::v16i8, pshufbMask)); // As PSHUFB will zero elements with negative indices, it's safe to ignore // the 2nd operand if it's undefined or zero. @@ -6653,7 +6858,7 @@ static SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, } V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2, DAG.getNode(ISD::BUILD_VECTOR, dl, - MVT::v16i8, &pshufbMask[0], 16)); + MVT::v16i8, pshufbMask)); return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2); } @@ -6771,6 +6976,9 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, unsigned Scale; switch (VT.SimpleTy) { default: llvm_unreachable("Unexpected!"); + case MVT::v2i64: + case MVT::v2f64: + return SDValue(SVOp, 0); case MVT::v4f32: NewVT = MVT::v2f64; Scale = 2; break; case MVT::v4i32: NewVT = MVT::v2i64; Scale = 2; break; case MVT::v8i16: NewVT = MVT::v4i32; Scale = 2; break; @@ -6805,7 +7013,7 @@ static SDValue getVZextMovL(MVT VT, MVT OpVT, SDValue SrcOp, SelectionDAG &DAG, const X86Subtarget *Subtarget, SDLoc dl) { if (VT == MVT::v2f64 || VT == MVT::v4f32) { - LoadSDNode *LD = NULL; + LoadSDNode *LD = nullptr; if (!isScalarLoadToVector(SrcOp.getNode(), &LD)) LD = dyn_cast(SrcOp); if (!LD) { @@ -6924,8 +7132,7 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { } // Construct the output using a BUILD_VECTOR. - Output[l] = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &SVOps[0], - SVOps.size()); + Output[l] = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, SVOps); } else if (InputUsed[0] < 0) { // No input vectors were used! The result is undefined. Output[l] = DAG.getUNDEF(NVT); @@ -7207,6 +7414,93 @@ SDValue getMOVLP(SDValue &Op, SDLoc &dl, SelectionDAG &DAG, bool HasSSE2) { getShuffleSHUFImmediate(SVOp), DAG); } +static SDValue NarrowVectorLoadToElement(LoadSDNode *Load, unsigned Index, + SelectionDAG &DAG) { + SDLoc dl(Load); + MVT VT = Load->getSimpleValueType(0); + MVT EVT = VT.getVectorElementType(); + SDValue Addr = Load->getOperand(1); + SDValue NewAddr = DAG.getNode( + ISD::ADD, dl, Addr.getSimpleValueType(), Addr, + DAG.getConstant(Index * EVT.getStoreSize(), Addr.getSimpleValueType())); + + SDValue NewLoad = + DAG.getLoad(EVT, dl, Load->getChain(), NewAddr, + DAG.getMachineFunction().getMachineMemOperand( + Load->getMemOperand(), 0, EVT.getStoreSize())); + return NewLoad; +} + +// It is only safe to call this function if isINSERTPSMask is true for +// this shufflevector mask. +static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl, + SelectionDAG &DAG) { + // Generate an insertps instruction when inserting an f32 from memory onto a + // v4f32 or when copying a member from one v4f32 to another. + // We also use it for transferring i32 from one register to another, + // since it simply copies the same bits. + // If we're transferring an i32 from memory to a specific element in a + // register, we output a generic DAG that will match the PINSRD + // instruction. + MVT VT = SVOp->getSimpleValueType(0); + MVT EVT = VT.getVectorElementType(); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + auto Mask = SVOp->getMask(); + assert((VT == MVT::v4f32 || VT == MVT::v4i32) && + "unsupported vector type for insertps/pinsrd"); + + int FromV1 = std::count_if(Mask.begin(), Mask.end(), + [](const int &i) { return i < 4; }); + + SDValue From; + SDValue To; + unsigned DestIndex; + if (FromV1 == 1) { + From = V1; + To = V2; + DestIndex = std::find_if(Mask.begin(), Mask.end(), + [](const int &i) { return i < 4; }) - + Mask.begin(); + } else { + From = V2; + To = V1; + DestIndex = std::find_if(Mask.begin(), Mask.end(), + [](const int &i) { return i >= 4; }) - + Mask.begin(); + } + + if (MayFoldLoad(From)) { + // Trivial case, when From comes from a load and is only used by the + // shuffle. Make it use insertps from the vector that we need from that + // load. + SDValue NewLoad = + NarrowVectorLoadToElement(cast(From), DestIndex, DAG); + if (!NewLoad.getNode()) + return SDValue(); + + if (EVT == MVT::f32) { + // Create this as a scalar to vector to match the instruction pattern. + SDValue LoadScalarToVector = + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, NewLoad); + SDValue InsertpsMask = DAG.getIntPtrConstant(DestIndex << 4); + return DAG.getNode(X86ISD::INSERTPS, dl, VT, To, LoadScalarToVector, + InsertpsMask); + } else { // EVT == MVT::i32 + // If we're getting an i32 from memory, use an INSERT_VECTOR_ELT + // instruction, to match the PINSRD instruction, which loads an i32 to a + // certain vector element. + return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, To, NewLoad, + DAG.getConstant(DestIndex, MVT::i32)); + } + } + + // Vector-element-to-vector + unsigned SrcIndex = Mask[DestIndex] % 4; + SDValue InsertpsMask = DAG.getIntPtrConstant(DestIndex << 4 | SrcIndex << 6); + return DAG.getNode(X86ISD::INSERTPS, dl, VT, To, From, InsertpsMask); +} + // Reduce a vector shuffle to zext. static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { @@ -7295,9 +7589,8 @@ static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget, DAG.getNode(X86ISD::VZEXT, DL, NVT, V1)); } -static SDValue -NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, - SelectionDAG &DAG) { +static SDValue NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast(Op); MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); @@ -7322,31 +7615,29 @@ NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, // If the shuffle can be profitably rewritten as a narrower shuffle, then // do it! - if (VT == MVT::v8i16 || VT == MVT::v16i8 || - VT == MVT::v16i16 || VT == MVT::v32i8) { + if (VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v16i16 || + VT == MVT::v32i8) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) return DAG.getNode(ISD::BITCAST, dl, VT, NewOp); - } else if ((VT == MVT::v4i32 || - (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { + } else if (VT.is128BitVector() && Subtarget->hasSSE2()) { // FIXME: Figure out a cleaner way to do this. - // Try to make use of movq to zero out the top part. if (ISD::isBuildVectorAllZeros(V2.getNode())) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) { MVT NewVT = NewOp.getSimpleValueType(); if (isCommutedMOVLMask(cast(NewOp)->getMask(), NewVT, true, false)) - return getVZextMovL(VT, NewVT, NewOp.getOperand(0), - DAG, Subtarget, dl); + return getVZextMovL(VT, NewVT, NewOp.getOperand(0), DAG, Subtarget, + dl); } } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG); if (NewOp.getNode()) { MVT NewVT = NewOp.getSimpleValueType(); if (isMOVLMask(cast(NewOp)->getMask(), NewVT)) - return getVZextMovL(VT, NewVT, NewOp.getOperand(1), - DAG, Subtarget, dl); + return getVZextMovL(VT, NewVT, NewOp.getOperand(1), DAG, Subtarget, + dl); } } } @@ -7609,6 +7900,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { getShuffleSHUFImmediate(SVOp), DAG); } + unsigned Idx; + if (VT.is512BitVector() && isINSERT64x4Mask(M, VT, &Idx)) + return Insert256BitVector(V1, Extract256BitVector(V2, 0, DAG, dl), + Idx*(NumElems/2), DAG, dl); + // Handle VPERM2F128/VPERM2I128 permutations if (isVPERM2X128Mask(M, VT, HasFp256)) return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1, @@ -7618,6 +7914,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (BlendOp.getNode()) return BlendOp; + if (Subtarget->hasSSE41() && isINSERTPSMask(M, VT)) + return getINSERTPS(SVOp, dl, DAG); + unsigned Imm8; if (V2IsUndef && HasInt256 && isPermImmMask(M, VT, Imm8)) return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1, Imm8, DAG); @@ -7631,8 +7930,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MaskEltVT)); } - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVectorVT, - &permclMask[0], NumElems); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVectorVT, permclMask); if (V2IsUndef) // Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32 return DAG.getNode(X86ISD::VPERMV, dl, VT, @@ -7684,6 +7982,109 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } +// This function assumes its argument is a BUILD_VECTOR of constants or +// undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is +// true. +static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector, + unsigned &MaskValue) { + MaskValue = 0; + unsigned NumElems = BuildVector->getNumOperands(); + // There are 2 lanes if (NumElems > 8), and 1 lane otherwise. + unsigned NumLanes = (NumElems - 1) / 8 + 1; + unsigned NumElemsInLane = NumElems / NumLanes; + + // Blend for v16i16 should be symetric for the both lanes. + for (unsigned i = 0; i < NumElemsInLane; ++i) { + SDValue EltCond = BuildVector->getOperand(i); + SDValue SndLaneEltCond = + (NumLanes == 2) ? BuildVector->getOperand(i + NumElemsInLane) : EltCond; + + int Lane1Cond = -1, Lane2Cond = -1; + if (isa(EltCond)) + Lane1Cond = !isZero(EltCond); + if (isa(SndLaneEltCond)) + Lane2Cond = !isZero(SndLaneEltCond); + + if (Lane1Cond == Lane2Cond || Lane2Cond < 0) + // Lane1Cond != 0, means we want the first argument. + // Lane1Cond == 0, means we want the second argument. + // The encoding of this argument is 0 for the first argument, 1 + // for the second. Therefore, invert the condition. + MaskValue |= !Lane1Cond << i; + else if (Lane1Cond < 0) + MaskValue |= !Lane2Cond << i; + else + return false; + } + return true; +} + +// Try to lower a vselect node into a simple blend instruction. +static SDValue LowerVSELECTtoBlend(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDValue Cond = Op.getOperand(0); + SDValue LHS = Op.getOperand(1); + SDValue RHS = Op.getOperand(2); + SDLoc dl(Op); + MVT VT = Op.getSimpleValueType(); + MVT EltVT = VT.getVectorElementType(); + unsigned NumElems = VT.getVectorNumElements(); + + // There is no blend with immediate in AVX-512. + if (VT.is512BitVector()) + return SDValue(); + + if (!Subtarget->hasSSE41() || EltVT == MVT::i8) + return SDValue(); + if (!Subtarget->hasInt256() && VT == MVT::v16i16) + return SDValue(); + + if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) + return SDValue(); + + // Check the mask for BLEND and build the value. + unsigned MaskValue = 0; + if (!BUILD_VECTORtoBlendMask(cast(Cond), MaskValue)) + return SDValue(); + + // Convert i32 vectors to floating point if it is not AVX2. + // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors. + MVT BlendVT = VT; + if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) { + BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()), + NumElems); + LHS = DAG.getNode(ISD::BITCAST, dl, VT, LHS); + RHS = DAG.getNode(ISD::BITCAST, dl, VT, RHS); + } + + SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, LHS, RHS, + DAG.getConstant(MaskValue, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, VT, Ret); +} + +SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { + SDValue BlendOp = LowerVSELECTtoBlend(Op, Subtarget, DAG); + if (BlendOp.getNode()) + return BlendOp; + + // Some types for vselect were previously set to Expand, not Legal or + // Custom. Return an empty SDValue so we fall-through to Expand, after + // the Custom lowering phase. + MVT VT = Op.getSimpleValueType(); + switch (VT.SimpleTy) { + default: + break; + case MVT::v8i16: + case MVT::v16i16: + return SDValue(); + } + + // We couldn't create a "Blend with immediate" node. + // This node should still be legal, but we'll have to emit a blendv* + // instruction. + return Op; +} + static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); @@ -7946,10 +8347,47 @@ static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { return SDValue(); } +/// Insert one bit to mask vector, like v16i1 or v8i1. +/// AVX-512 feature. +SDValue +X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue Vec = Op.getOperand(0); + SDValue Elt = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + MVT VecVT = Vec.getSimpleValueType(); + + if (!isa(Idx)) { + // Non constant index. Extend source and destination, + // insert element and then truncate the result. + MVT ExtVecVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32); + MVT ExtEltVT = (VecVT == MVT::v8i1 ? MVT::i64 : MVT::i32); + SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT, + DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVecVT, Vec), + DAG.getNode(ISD::ZERO_EXTEND, dl, ExtEltVT, Elt), Idx); + return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp); + } + + unsigned IdxVal = cast(Idx)->getZExtValue(); + SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt); + if (Vec.getOpcode() == ISD::UNDEF) + return DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec, + DAG.getConstant(IdxVal, MVT::i8)); + const TargetRegisterClass* rc = getRegClassFor(VecVT); + unsigned MaxSift = rc->getSize()*8 - 1; + EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec, + DAG.getConstant(MaxSift, MVT::i8)); + EltInVec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, EltInVec, + DAG.getConstant(MaxSift - IdxVal, MVT::i8)); + return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec); +} SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); MVT EltVT = VT.getVectorElementType(); + + if (EltVT == MVT::i1) + return InsertBitToMaskVector(Op, DAG); SDLoc dl(Op); SDValue N0 = Op.getOperand(0); @@ -8294,10 +8732,10 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, if (InFlag) { SDValue Ops[] = { Chain, TGA, *InFlag }; - Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops)); + Chain = DAG.getNode(CallType, dl, NodeTys, Ops); } else { SDValue Ops[] = { Chain, TGA }; - Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops)); + Chain = DAG.getNode(CallType, dl, NodeTys, Ops); } // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. @@ -8325,7 +8763,7 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, static SDValue LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT) { - return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, + return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX, X86II::MO_TLSGD); } @@ -8342,7 +8780,7 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, SDValue Base; if (is64Bit) { - Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX, + Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX, X86II::MO_TLSLD, /*LocalDynamic=*/true); } else { SDValue InFlag; @@ -8481,7 +8919,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = DAG.getEntryNode(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Args[] = { Chain, Offset }; - Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args, 2); + Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args); // TLSCALL will be codegen'ed as call. Inform MFI that function has calls. MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); @@ -8507,10 +8945,6 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // Windows 64bit: gs:0x58 // Windows 32bit: fs:__tls_array - // If GV is an alias then use the aliasee for determining - // thread-localness. - if (const GlobalAlias *GA = dyn_cast(GV)) - GV = GA->getAliasedGlobal(); SDLoc dl(GA); SDValue Chain = DAG.getEntryNode(); @@ -8609,15 +9043,15 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) { SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond }; if (Op.getOpcode() == ISD::SHL_PARTS) { - Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4); - Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4); + Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0); + Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1); } else { - Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4); - Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4); + Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0); + Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1); } SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, array_lengthof(Ops), dl); + return DAG.getMergeValues(Ops, dl); } SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, @@ -8680,8 +9114,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) }; SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, DL, - Tys, Ops, array_lengthof(Ops), - SrcVT, MMO); + Tys, Ops, SrcVT, MMO); if (useSSE) { Chain = Result.getValue(1); @@ -8704,8 +9137,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, MachineMemOperand::MOStore, SSFISize, SSFISize); Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys, - Ops, array_lengthof(Ops), - Op.getValueType(), MMO); + Ops, Op.getValueType(), MMO); Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot, MachinePointerInfo::getFixedStack(SSFI), false, false, false, 0); @@ -8900,7 +9332,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) }; SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, - array_lengthof(Ops), MVT::i64, MMO); + MVT::i64, MMO); APInt FF(32, 0x5F800000ULL); @@ -8993,8 +9425,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, MachineMemOperand *MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI), MachineMemOperand::MOLoad, MemSize, MemSize); - Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, - array_lengthof(Ops), DstTy, MMO); + Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO); Chain = Value.getValue(1); SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); @@ -9008,8 +9439,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, // Build the FP_TO_INT*_IN_MEM SDValue Ops[] = { Chain, Value, StackSlot }; SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other), - Ops, array_lengthof(Ops), DstTy, - MMO); + Ops, DstTy, MMO); return std::make_pair(FIST, StackSlot); } else { SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL, @@ -9021,8 +9451,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, MVT::i32, eax.getValue(2)); SDValue Ops[] = { eax, edx }; SDValue pair = IsReplace - ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, array_lengthof(Ops)) - : DAG.getMergeValues(Ops, array_lengthof(Ops), DL); + ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops) + : DAG.getMergeValues(Ops, DL); return std::make_pair(pair, SDValue()); } } @@ -9217,8 +9647,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { for (unsigned j = 0; j < 8; ++j) pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); } - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, - &pshufbMask[0], 32); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, pshufbMask); In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV); In = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, In); @@ -9284,7 +9713,7 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, /*IsSigned=*/ true, /*IsReplace=*/ false); SDValue FIST = Vals.first, StackSlot = Vals.second; // If FP_TO_INTHelper failed, the node is actually supposed to be Legal. - if (FIST.getNode() == 0) return Op; + if (!FIST.getNode()) return Op; if (StackSlot.getNode()) // Load the result. @@ -9581,12 +10010,29 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget, VecIns.back(), VecIns.back()); } +/// \brief return true if \c Op has a use that doesn't just read flags. +static bool hasNonFlagsUse(SDValue Op) { + for (SDNode::use_iterator UI = Op->use_begin(), UE = Op->use_end(); UI != UE; + ++UI) { + SDNode *User = *UI; + unsigned UOpNo = UI.getOperandNo(); + if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { + // Look pass truncate. + UOpNo = User->use_begin().getOperandNo(); + User = *User->use_begin(); + } + + if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC && + !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) + return true; + } + return false; +} + /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent. -SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, +SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl, SelectionDAG &DAG) const { - SDLoc dl(Op); - if (Op.getValueType() == MVT::i1) // KORTEST instruction should be selected return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, @@ -9687,31 +10133,35 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, Opcode = X86ISD::ADD; NumOperands = 2; break; - case ISD::AND: { - // If the primary and result isn't used, don't bother using X86ISD::AND, - // because a TEST instruction will be better. - bool NonFlagUse = false; - for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - unsigned UOpNo = UI.getOperandNo(); - if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { - // Look pass truncate. - UOpNo = User->use_begin().getOperandNo(); - User = *User->use_begin(); - } - - if (User->getOpcode() != ISD::BRCOND && - User->getOpcode() != ISD::SETCC && - !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) { - NonFlagUse = true; + case ISD::SHL: + case ISD::SRL: + // If we have a constant logical shift that's only used in a comparison + // against zero turn it into an equivalent AND. This allows turning it into + // a TEST instruction later. + if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) && + isa(Op->getOperand(1)) && !hasNonFlagsUse(Op)) { + EVT VT = Op.getValueType(); + unsigned BitWidth = VT.getSizeInBits(); + unsigned ShAmt = Op->getConstantOperandVal(1); + if (ShAmt >= BitWidth) // Avoid undefined shifts. break; - } + APInt Mask = ArithOp.getOpcode() == ISD::SRL + ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt) + : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt); + if (!Mask.isSignedIntN(32)) // Avoid large immediates. + break; + SDValue New = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0), + DAG.getConstant(Mask, VT)); + DAG.ReplaceAllUsesWith(Op, New); + Op = New; } + break; - if (!NonFlagUse) + case ISD::AND: + // If the primary and result isn't used, don't bother using X86ISD::AND, + // because a TEST instruction will be better. + if (!hasNonFlagsUse(Op)) break; - } // FALL THROUGH case ISD::SUB: case ISD::OR: @@ -9794,7 +10244,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, for (unsigned i = 0; i != NumOperands; ++i) Ops.push_back(Op.getOperand(i)); - SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands); + SDValue New = DAG.getNode(Opcode, dl, VTs, Ops); DAG.ReplaceAllUsesWith(Op, New); return SDValue(New.getNode(), 1); } @@ -9802,11 +10252,10 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, /// Emit nodes that will be selected as "cmp Op0,Op1", or something /// equivalent. SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, - SelectionDAG &DAG) const { - SDLoc dl(Op0); + SDLoc dl, SelectionDAG &DAG) const { if (ConstantSDNode *C = dyn_cast(Op1)) { if (C->getAPIntValue() == 0) - return EmitTest(Op0, X86CC, DAG); + return EmitTest(Op0, X86CC, dl, DAG); if (Op0.getValueType() == MVT::i1) llvm_unreachable("Unexpected comparison operation for MVT::i1 operands"); @@ -9888,7 +10337,7 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, unsigned AndBitWidth = And.getValueSizeInBits(); if (BitWidth > AndBitWidth) { APInt Zeros, Ones; - DAG.ComputeMaskedBits(Op0, Zeros, Ones); + DAG.computeKnownBits(Op0, Zeros, Ones); if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth) return SDValue(); } @@ -10054,7 +10503,7 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG, /// \brief Try to turn a VSETULT into a VSETULE by modifying its second /// operand \p Op1. If non-trivial (for example because it's not constant) /// return an empty value. -static SDValue ChangeVSETULTtoVSETULE(SDValue Op1, SelectionDAG &DAG) +static SDValue ChangeVSETULTtoVSETULE(SDLoc dl, SDValue Op1, SelectionDAG &DAG) { BuildVectorSDNode *BV = dyn_cast(Op1.getNode()); if (!BV) @@ -10078,8 +10527,7 @@ static SDValue ChangeVSETULTtoVSETULE(SDValue Op1, SelectionDAG &DAG) ULTOp1.push_back(DAG.getConstant(Val - 1, EVT)); } - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op1), VT, ULTOp1.data(), - ULTOp1.size()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, ULTOp1); } static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, @@ -10204,7 +10652,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget, // Only do this pre-AVX since vpcmp* is no longer destructive. if (Subtarget->hasAVX()) break; - SDValue ULEOp1 = ChangeVSETULTtoVSETULE(Op1, DAG); + SDValue ULEOp1 = ChangeVSETULTtoVSETULE(dl, Op1, DAG); if (ULEOp1.getNode()) { Op1 = ULEOp1; Subus = true; Invert = false; Swap = false; @@ -10383,7 +10831,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (X86CC == X86::COND_INVALID) return SDValue(); - SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG); + SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, dl, DAG); EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, MVT::i8), EFLAGS); @@ -10418,11 +10866,6 @@ static bool isX86LogicalCmp(SDValue Op) { return false; } -static bool isZero(SDValue V) { - ConstantSDNode *C = dyn_cast(V); - return C && C->isNullValue(); -} - static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) { if (V.getOpcode() != ISD::TRUNCATE) return false; @@ -10517,7 +10960,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { Res = DAG.getNOT(DL, Res, Res.getValueType()); ConstantSDNode *N2C = dyn_cast(Op2); - if (N2C == 0 || !N2C->isNullValue()) + if (!N2C || !N2C->isNullValue()) Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y); return Res; } @@ -10606,7 +11049,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { if (addTest) { CC = DAG.getConstant(X86::COND_NE, MVT::i8); - Cond = EmitTest(Cond, X86::COND_NE, DAG); + Cond = EmitTest(Cond, X86::COND_NE, DL, DAG); } // a < b ? -1 : 0 -> RES = ~setcc_carry @@ -10646,7 +11089,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // condition is true. SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); SDValue Ops[] = { Op2, Op1, CC, Cond }; - return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops)); + return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops); } static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) { @@ -11027,7 +11470,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { if (addTest) { CC = DAG.getConstant(X86::COND_NE, MVT::i8); - Cond = EmitTest(Cond, X86::COND_NE, DAG); + Cond = EmitTest(Cond, X86::COND_NE, dl, DAG); } Cond = ConvertCmpIfNecessary(Cond, DAG); return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), @@ -11042,13 +11485,50 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { - assert((Subtarget->isOSWindows() || - getTargetMachine().Options.EnableSegmentedStacks) && - "This should be used only on Windows targets or when segmented stacks " - "are being used"); - assert(!Subtarget->isTargetMacho() && "Not implemented"); + MachineFunction &MF = DAG.getMachineFunction(); + bool SplitStack = MF.shouldSplitStack(); + bool Lower = (Subtarget->isOSWindows() && !Subtarget->isTargetMacho()) || + SplitStack; SDLoc dl(Op); + if (!Lower) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDNode* Node = Op.getNode(); + + unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); + assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" + " not tell us which reg is the stack pointer!"); + EVT VT = Node->getValueType(0); + SDValue Tmp1 = SDValue(Node, 0); + SDValue Tmp2 = SDValue(Node, 1); + SDValue Tmp3 = Node->getOperand(2); + SDValue Chain = Tmp1.getOperand(0); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true), + SDLoc(Node)); + + SDValue Size = Tmp2.getOperand(1); + SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); + Chain = SP.getValue(1); + unsigned Align = cast(Tmp3)->getZExtValue(); + const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering(); + unsigned StackAlign = TFI.getStackAlignment(); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + if (Align > StackAlign) + Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, + DAG.getConstant(-(uint64_t)Align, VT)); + Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain + + Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), + DAG.getIntPtrConstant(0, true), SDValue(), + SDLoc(Node)); + + SDValue Ops[2] = { Tmp1, Tmp2 }; + return DAG.getMergeValues(Ops, dl); + } + // Get the inputs. SDValue Chain = Op.getOperand(0); SDValue Size = Op.getOperand(1); @@ -11058,8 +11538,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, bool Is64Bit = Subtarget->is64Bit(); EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32; - if (getTargetMachine().Options.EnableSegmentedStacks) { - MachineFunction &MF = DAG.getMachineFunction(); + if (SplitStack) { MachineRegisterInfo &MRI = MF.getRegInfo(); if (Is64Bit) { @@ -11081,7 +11560,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, DAG.getRegister(Vreg, SPTy)); SDValue Ops1[2] = { Value, Chain }; - return DAG.getMergeValues(Ops1, 2, dl); + return DAG.getMergeValues(Ops1, dl); } else { SDValue Flag; unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX); @@ -11105,7 +11584,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, } SDValue Ops1[2] = { SP, Chain }; - return DAG.getMergeValues(Ops1, 2, dl); + return DAG.getMergeValues(Ops1, dl); } } @@ -11166,8 +11645,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, MachinePointerInfo(SV, 16), false, false, 0); MemOps.push_back(Store); - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, - &MemOps[0], MemOps.size()); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); } SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { @@ -11221,8 +11699,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { InstOps.push_back(DAG.getConstant(Align, MVT::i32)); SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other); SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl, - VTs, &InstOps[0], InstOps.size(), - MVT::i64, + VTs, InstOps, MVT::i64, MachinePointerInfo(SV), /*Align=*/0, /*Volatile=*/false, @@ -11262,6 +11739,10 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, SelectionDAG &DAG) { MVT ElementType = VT.getVectorElementType(); + // Fold this packed shift into its first operand if ShiftAmt is 0. + if (ShiftAmt == 0) + return SrcOp; + // Check for ShiftAmt >= element width if (ShiftAmt >= ElementType.getSizeInBits()) { if (Opc == X86ISD::VSRAI) @@ -11282,7 +11763,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, ConstantSDNode *ND; switch(Opc) { - default: llvm_unreachable(0); + default: llvm_unreachable(nullptr); case X86ISD::VSHLI: for (unsigned i=0; i!=NumElts; ++i) { SDValue CurrentOp = SrcOp->getOperand(i); @@ -11321,7 +11802,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT, break; } - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Elts[0], NumElts); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Elts); } return DAG.getNode(Opc, dl, VT, SrcOp, DAG.getConstant(ShiftAmt, MVT::i8)); @@ -11353,7 +11834,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, MVT::i32); ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, ShOps); // The return type has to be a 128-bit type with the same element // type as the input type. @@ -11476,6 +11957,21 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse41_pmuldq: + case Intrinsic::x86_avx2_pmul_dq: + return DAG.getNode(X86ISD::PMULDQ, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + + case Intrinsic::x86_sse2_pmulhu_w: + case Intrinsic::x86_avx2_pmulhu_w: + return DAG.getNode(ISD::MULHU, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + + case Intrinsic::x86_sse2_pmulh_w: + case Intrinsic::x86_avx2_pmulh_w: + return DAG.getNode(ISD::MULHS, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + // SSE2/AVX2 sub with unsigned saturation intrinsics case Intrinsic::x86_sse2_psubus_b: case Intrinsic::x86_sse2_psubus_w: @@ -11927,7 +12423,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { } SmallVector NewOps(Op->op_begin()+1, Op->op_end()); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); - SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size()); + SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, DAG.getConstant(X86CC, MVT::i8), SDValue(PCMP.getNode(), 1)); @@ -11944,7 +12440,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { SmallVector NewOps(Op->op_begin()+1, Op->op_end()); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); - return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size()); + return DAG.getNode(Opcode, dl, VTs, NewOps); } case Intrinsic::x86_fma_vfmadd_ps: case Intrinsic::x86_fma_vfmadd_pd: @@ -12042,27 +12538,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { } static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, - SDValue Base, SDValue Index, - SDValue ScaleOp, SDValue Chain, - const X86Subtarget * Subtarget) { - SDLoc dl(Op); - ConstantSDNode *C = dyn_cast(ScaleOp); - assert(C && "Invalid scale type"); - SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8); - SDValue Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl); - EVT MaskVT = MVT::getVectorVT(MVT::i1, - Index.getSimpleValueType().getVectorNumElements()); - SDValue MaskInReg = DAG.getConstant(~0, MaskVT); - SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); - SDValue Disp = DAG.getTargetConstant(0, MVT::i32); - SDValue Segment = DAG.getRegister(0, MVT::i32); - SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain}; - SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); - SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) }; - return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl); -} - -static SDValue getMGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Src, SDValue Mask, SDValue Base, SDValue Index, SDValue ScaleOp, SDValue Chain, const X86Subtarget * Subtarget) { @@ -12072,7 +12547,12 @@ static SDValue getMGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8); EVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); - SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); + SDValue MaskInReg; + ConstantSDNode *MaskC = dyn_cast(Mask); + if (MaskC) + MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT); + else + MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other); SDValue Disp = DAG.getTargetConstant(0, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); @@ -12081,12 +12561,12 @@ static SDValue getMGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) }; - return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl); + return DAG.getMergeValues(RetOps, dl); } static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, - SDValue Src, SDValue Base, SDValue Index, - SDValue ScaleOp, SDValue Chain) { + SDValue Src, SDValue Mask, SDValue Base, + SDValue Index, SDValue ScaleOp, SDValue Chain) { SDLoc dl(Op); ConstantSDNode *C = dyn_cast(ScaleOp); assert(C && "Invalid scale type"); @@ -12095,52 +12575,218 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, SDValue Segment = DAG.getRegister(0, MVT::i32); EVT MaskVT = MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); - SDValue MaskInReg = DAG.getConstant(~0, MaskVT); + SDValue MaskInReg; + ConstantSDNode *MaskC = dyn_cast(Mask); + if (MaskC) + MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT); + else + MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other); SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain}; SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); return SDValue(Res, 1); } -static SDValue getMScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, - SDValue Src, SDValue Mask, SDValue Base, - SDValue Index, SDValue ScaleOp, SDValue Chain) { +static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, + SDValue Mask, SDValue Base, SDValue Index, + SDValue ScaleOp, SDValue Chain) { SDLoc dl(Op); ConstantSDNode *C = dyn_cast(ScaleOp); assert(C && "Invalid scale type"); SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8); SDValue Disp = DAG.getTargetConstant(0, MVT::i32); SDValue Segment = DAG.getRegister(0, MVT::i32); - EVT MaskVT = MVT::getVectorVT(MVT::i1, - Index.getSimpleValueType().getVectorNumElements()); - SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); - SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other); - SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain}; - SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops); - return SDValue(Res, 1); + EVT MaskVT = + MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); + SDValue MaskInReg; + ConstantSDNode *MaskC = dyn_cast(Mask); + if (MaskC) + MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT); + else + MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask); + //SDVTList VTs = DAG.getVTList(MVT::Other); + SDValue Ops[] = {MaskInReg, Base, Scale, Index, Disp, Segment, Chain}; + SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops); + return SDValue(Res, 0); +} + +// getReadTimeStampCounter - Handles the lowering of builtin intrinsics that +// read the time stamp counter (x86_rdtsc and x86_rdtscp). This function is +// also used to custom lower READCYCLECOUNTER nodes. +static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode, + SelectionDAG &DAG, const X86Subtarget *Subtarget, + SmallVectorImpl &Results) { + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue rd = DAG.getNode(Opcode, DL, Tys, N->getOperand(0)); + SDValue LO, HI; + + // The processor's time-stamp counter (a 64-bit MSR) is stored into the + // EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR + // and the EAX register is loaded with the low-order 32 bits. + if (Subtarget->is64Bit()) { + LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1)); + HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64, + LO.getValue(2)); + } else { + LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1)); + HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32, + LO.getValue(2)); + } + SDValue Chain = HI.getValue(1); + + if (Opcode == X86ISD::RDTSCP_DAG) { + assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); + + // Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into + // the ECX register. Add 'ecx' explicitly to the chain. + SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32, + HI.getValue(2)); + // Explicitly store the content of ECX at the location passed in input + // to the 'rdtscp' intrinsic. + Chain = DAG.getStore(ecx.getValue(1), DL, ecx, N->getOperand(2), + MachinePointerInfo(), false, false, 0); + } + + if (Subtarget->is64Bit()) { + // The EDX register is loaded with the high-order 32 bits of the MSR, and + // the EAX register is loaded with the low-order 32 bits. + SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI, + DAG.getConstant(32, MVT::i8)); + Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp)); + Results.push_back(Chain); + return; + } + + // Use a buildpair to merge the two 32-bit values into a 64-bit one. + SDValue Ops[] = { LO, HI }; + SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops); + Results.push_back(Pair); + Results.push_back(Chain); +} + +static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SmallVector Results; + SDLoc DL(Op); + getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget, + Results); + return DAG.getMergeValues(Results, DL); +} + +enum IntrinsicType { + GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDTSC, XTEST +}; + +struct IntrinsicData { + IntrinsicData(IntrinsicType IType, unsigned IOpc0, unsigned IOpc1) + :Type(IType), Opc0(IOpc0), Opc1(IOpc1) {} + IntrinsicType Type; + unsigned Opc0; + unsigned Opc1; +}; + +std::map < unsigned, IntrinsicData> IntrMap; +static void InitIntinsicsMap() { + static bool Initialized = false; + if (Initialized) + return; + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qps_512, + IntrinsicData(GATHER, X86::VGATHERQPSZrm, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qps_512, + IntrinsicData(GATHER, X86::VGATHERQPSZrm, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpd_512, + IntrinsicData(GATHER, X86::VGATHERQPDZrm, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpd_512, + IntrinsicData(GATHER, X86::VGATHERDPDZrm, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dps_512, + IntrinsicData(GATHER, X86::VGATHERDPSZrm, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpi_512, + IntrinsicData(GATHER, X86::VPGATHERQDZrm, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpq_512, + IntrinsicData(GATHER, X86::VPGATHERQQZrm, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpi_512, + IntrinsicData(GATHER, X86::VPGATHERDDZrm, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpq_512, + IntrinsicData(GATHER, X86::VPGATHERDQZrm, 0))); + + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qps_512, + IntrinsicData(SCATTER, X86::VSCATTERQPSZmr, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpd_512, + IntrinsicData(SCATTER, X86::VSCATTERQPDZmr, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpd_512, + IntrinsicData(SCATTER, X86::VSCATTERDPDZmr, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dps_512, + IntrinsicData(SCATTER, X86::VSCATTERDPSZmr, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpi_512, + IntrinsicData(SCATTER, X86::VPSCATTERQDZmr, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpq_512, + IntrinsicData(SCATTER, X86::VPSCATTERQQZmr, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpi_512, + IntrinsicData(SCATTER, X86::VPSCATTERDDZmr, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpq_512, + IntrinsicData(SCATTER, X86::VPSCATTERDQZmr, 0))); + + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_qps_512, + IntrinsicData(PREFETCH, X86::VGATHERPF0QPSm, + X86::VGATHERPF1QPSm))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_qpd_512, + IntrinsicData(PREFETCH, X86::VGATHERPF0QPDm, + X86::VGATHERPF1QPDm))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_dpd_512, + IntrinsicData(PREFETCH, X86::VGATHERPF0DPDm, + X86::VGATHERPF1DPDm))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_dps_512, + IntrinsicData(PREFETCH, X86::VGATHERPF0DPSm, + X86::VGATHERPF1DPSm))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_qps_512, + IntrinsicData(PREFETCH, X86::VSCATTERPF0QPSm, + X86::VSCATTERPF1QPSm))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_qpd_512, + IntrinsicData(PREFETCH, X86::VSCATTERPF0QPDm, + X86::VSCATTERPF1QPDm))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_dpd_512, + IntrinsicData(PREFETCH, X86::VSCATTERPF0DPDm, + X86::VSCATTERPF1DPDm))); + IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_dps_512, + IntrinsicData(PREFETCH, X86::VSCATTERPF0DPSm, + X86::VSCATTERPF1DPSm))); + IntrMap.insert(std::make_pair(Intrinsic::x86_rdrand_16, + IntrinsicData(RDRAND, X86ISD::RDRAND, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_rdrand_32, + IntrinsicData(RDRAND, X86ISD::RDRAND, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_rdrand_64, + IntrinsicData(RDRAND, X86ISD::RDRAND, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_rdseed_16, + IntrinsicData(RDSEED, X86ISD::RDSEED, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_rdseed_32, + IntrinsicData(RDSEED, X86ISD::RDSEED, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_rdseed_64, + IntrinsicData(RDSEED, X86ISD::RDSEED, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_xtest, + IntrinsicData(XTEST, X86ISD::XTEST, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_rdtsc, + IntrinsicData(RDTSC, X86ISD::RDTSC_DAG, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_rdtscp, + IntrinsicData(RDTSC, X86ISD::RDTSCP_DAG, 0))); + Initialized = true; } static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { - SDLoc dl(Op); + InitIntinsicsMap(); unsigned IntNo = cast(Op.getOperand(1))->getZExtValue(); - switch (IntNo) { - default: return SDValue(); // Don't custom lower most intrinsics. + std::map < unsigned, IntrinsicData>::const_iterator itr = IntrMap.find(IntNo); + if (itr == IntrMap.end()) + return SDValue(); - // RDRAND/RDSEED intrinsics. - case Intrinsic::x86_rdrand_16: - case Intrinsic::x86_rdrand_32: - case Intrinsic::x86_rdrand_64: - case Intrinsic::x86_rdseed_16: - case Intrinsic::x86_rdseed_32: - case Intrinsic::x86_rdseed_64: { - unsigned Opcode = (IntNo == Intrinsic::x86_rdseed_16 || - IntNo == Intrinsic::x86_rdseed_32 || - IntNo == Intrinsic::x86_rdseed_64) ? X86ISD::RDSEED : - X86ISD::RDRAND; + SDLoc dl(Op); + IntrinsicData Intr = itr->second; + switch(Intr.Type) { + case RDSEED: + case RDRAND: { // Emit the node with the right value type. SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other); - SDValue Result = DAG.getNode(Opcode, dl, VTs, Op.getOperand(0)); + SDValue Result = DAG.getNode(Intr.Opc0, dl, VTs, Op.getOperand(0)); // If the value returned by RDRAND/RDSEED was valid (CF=1), return 1. // Otherwise return the value from Rand, which is always 0, casted to i32. @@ -12150,152 +12796,55 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, SDValue(Result.getNode(), 1) }; SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, DAG.getVTList(Op->getValueType(1), MVT::Glue), - Ops, array_lengthof(Ops)); + Ops); // Return { result, isValid, chain }. return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid, SDValue(Result.getNode(), 2)); } - //int_gather(index, base, scale); - case Intrinsic::x86_avx512_gather_qpd_512: - case Intrinsic::x86_avx512_gather_qps_512: - case Intrinsic::x86_avx512_gather_dpd_512: - case Intrinsic::x86_avx512_gather_qpi_512: - case Intrinsic::x86_avx512_gather_qpq_512: - case Intrinsic::x86_avx512_gather_dpq_512: - case Intrinsic::x86_avx512_gather_dps_512: - case Intrinsic::x86_avx512_gather_dpi_512: { - unsigned Opc; - switch (IntNo) { - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - case Intrinsic::x86_avx512_gather_qps_512: Opc = X86::VGATHERQPSZrm; break; - case Intrinsic::x86_avx512_gather_qpd_512: Opc = X86::VGATHERQPDZrm; break; - case Intrinsic::x86_avx512_gather_dpd_512: Opc = X86::VGATHERDPDZrm; break; - case Intrinsic::x86_avx512_gather_dps_512: Opc = X86::VGATHERDPSZrm; break; - case Intrinsic::x86_avx512_gather_qpi_512: Opc = X86::VPGATHERQDZrm; break; - case Intrinsic::x86_avx512_gather_qpq_512: Opc = X86::VPGATHERQQZrm; break; - case Intrinsic::x86_avx512_gather_dpi_512: Opc = X86::VPGATHERDDZrm; break; - case Intrinsic::x86_avx512_gather_dpq_512: Opc = X86::VPGATHERDQZrm; break; - } - SDValue Chain = Op.getOperand(0); - SDValue Index = Op.getOperand(2); - SDValue Base = Op.getOperand(3); - SDValue Scale = Op.getOperand(4); - return getGatherNode(Opc, Op, DAG, Base, Index, Scale, Chain, Subtarget); - } - //int_gather_mask(v1, mask, index, base, scale); - case Intrinsic::x86_avx512_gather_qps_mask_512: - case Intrinsic::x86_avx512_gather_qpd_mask_512: - case Intrinsic::x86_avx512_gather_dpd_mask_512: - case Intrinsic::x86_avx512_gather_dps_mask_512: - case Intrinsic::x86_avx512_gather_qpi_mask_512: - case Intrinsic::x86_avx512_gather_qpq_mask_512: - case Intrinsic::x86_avx512_gather_dpi_mask_512: - case Intrinsic::x86_avx512_gather_dpq_mask_512: { - unsigned Opc; - switch (IntNo) { - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - case Intrinsic::x86_avx512_gather_qps_mask_512: - Opc = X86::VGATHERQPSZrm; break; - case Intrinsic::x86_avx512_gather_qpd_mask_512: - Opc = X86::VGATHERQPDZrm; break; - case Intrinsic::x86_avx512_gather_dpd_mask_512: - Opc = X86::VGATHERDPDZrm; break; - case Intrinsic::x86_avx512_gather_dps_mask_512: - Opc = X86::VGATHERDPSZrm; break; - case Intrinsic::x86_avx512_gather_qpi_mask_512: - Opc = X86::VPGATHERQDZrm; break; - case Intrinsic::x86_avx512_gather_qpq_mask_512: - Opc = X86::VPGATHERQQZrm; break; - case Intrinsic::x86_avx512_gather_dpi_mask_512: - Opc = X86::VPGATHERDDZrm; break; - case Intrinsic::x86_avx512_gather_dpq_mask_512: - Opc = X86::VPGATHERDQZrm; break; - } + case GATHER: { + //gather(v1, mask, index, base, scale); SDValue Chain = Op.getOperand(0); SDValue Src = Op.getOperand(2); - SDValue Mask = Op.getOperand(3); + SDValue Base = Op.getOperand(3); SDValue Index = Op.getOperand(4); - SDValue Base = Op.getOperand(5); + SDValue Mask = Op.getOperand(5); SDValue Scale = Op.getOperand(6); - return getMGatherNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain, + return getGatherNode(Intr.Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain, Subtarget); } - //int_scatter(base, index, v1, scale); - case Intrinsic::x86_avx512_scatter_qpd_512: - case Intrinsic::x86_avx512_scatter_qps_512: - case Intrinsic::x86_avx512_scatter_dpd_512: - case Intrinsic::x86_avx512_scatter_qpi_512: - case Intrinsic::x86_avx512_scatter_qpq_512: - case Intrinsic::x86_avx512_scatter_dpq_512: - case Intrinsic::x86_avx512_scatter_dps_512: - case Intrinsic::x86_avx512_scatter_dpi_512: { - unsigned Opc; - switch (IntNo) { - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - case Intrinsic::x86_avx512_scatter_qpd_512: - Opc = X86::VSCATTERQPDZmr; break; - case Intrinsic::x86_avx512_scatter_qps_512: - Opc = X86::VSCATTERQPSZmr; break; - case Intrinsic::x86_avx512_scatter_dpd_512: - Opc = X86::VSCATTERDPDZmr; break; - case Intrinsic::x86_avx512_scatter_dps_512: - Opc = X86::VSCATTERDPSZmr; break; - case Intrinsic::x86_avx512_scatter_qpi_512: - Opc = X86::VPSCATTERQDZmr; break; - case Intrinsic::x86_avx512_scatter_qpq_512: - Opc = X86::VPSCATTERQQZmr; break; - case Intrinsic::x86_avx512_scatter_dpq_512: - Opc = X86::VPSCATTERDQZmr; break; - case Intrinsic::x86_avx512_scatter_dpi_512: - Opc = X86::VPSCATTERDDZmr; break; - } - SDValue Chain = Op.getOperand(0); - SDValue Base = Op.getOperand(2); - SDValue Index = Op.getOperand(3); - SDValue Src = Op.getOperand(4); - SDValue Scale = Op.getOperand(5); - return getScatterNode(Opc, Op, DAG, Src, Base, Index, Scale, Chain); - } - //int_scatter_mask(base, mask, index, v1, scale); - case Intrinsic::x86_avx512_scatter_qps_mask_512: - case Intrinsic::x86_avx512_scatter_qpd_mask_512: - case Intrinsic::x86_avx512_scatter_dpd_mask_512: - case Intrinsic::x86_avx512_scatter_dps_mask_512: - case Intrinsic::x86_avx512_scatter_qpi_mask_512: - case Intrinsic::x86_avx512_scatter_qpq_mask_512: - case Intrinsic::x86_avx512_scatter_dpi_mask_512: - case Intrinsic::x86_avx512_scatter_dpq_mask_512: { - unsigned Opc; - switch (IntNo) { - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - case Intrinsic::x86_avx512_scatter_qpd_mask_512: - Opc = X86::VSCATTERQPDZmr; break; - case Intrinsic::x86_avx512_scatter_qps_mask_512: - Opc = X86::VSCATTERQPSZmr; break; - case Intrinsic::x86_avx512_scatter_dpd_mask_512: - Opc = X86::VSCATTERDPDZmr; break; - case Intrinsic::x86_avx512_scatter_dps_mask_512: - Opc = X86::VSCATTERDPSZmr; break; - case Intrinsic::x86_avx512_scatter_qpi_mask_512: - Opc = X86::VPSCATTERQDZmr; break; - case Intrinsic::x86_avx512_scatter_qpq_mask_512: - Opc = X86::VPSCATTERQQZmr; break; - case Intrinsic::x86_avx512_scatter_dpq_mask_512: - Opc = X86::VPSCATTERDQZmr; break; - case Intrinsic::x86_avx512_scatter_dpi_mask_512: - Opc = X86::VPSCATTERDDZmr; break; - } + case SCATTER: { + //scatter(base, mask, index, v1, scale); SDValue Chain = Op.getOperand(0); SDValue Base = Op.getOperand(2); SDValue Mask = Op.getOperand(3); SDValue Index = Op.getOperand(4); SDValue Src = Op.getOperand(5); SDValue Scale = Op.getOperand(6); - return getMScatterNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain); + return getScatterNode(Intr.Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain); + } + case PREFETCH: { + SDValue Hint = Op.getOperand(6); + unsigned HintVal; + if (dyn_cast (Hint) == 0 || + (HintVal = dyn_cast (Hint)->getZExtValue()) > 1) + llvm_unreachable("Wrong prefetch hint in intrinsic: should be 0 or 1"); + unsigned Opcode = (HintVal ? Intr.Opc1 : Intr.Opc0); + SDValue Chain = Op.getOperand(0); + SDValue Mask = Op.getOperand(2); + SDValue Index = Op.getOperand(3); + SDValue Base = Op.getOperand(4); + SDValue Scale = Op.getOperand(5); + return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain); + } + // Read Time Stamp Counter (RDTSC) and Processor ID (RDTSCP). + case RDTSC: { + SmallVector Results; + getReadTimeStampCounter(Op.getNode(), dl, Intr.Opc0, DAG, Subtarget, Results); + return DAG.getMergeValues(Results, dl); } // XTEST intrinsics. - case Intrinsic::x86_xtest: { + case XTEST: { SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other); SDValue InTrans = DAG.getNode(X86ISD::XTEST, dl, VTs, Op.getOperand(0)); SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, @@ -12306,6 +12855,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, Ret, SDValue(InTrans.getNode(), 1)); } } + llvm_unreachable("Unknown Intrinsic Type"); } SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, @@ -12358,6 +12908,19 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } +// FIXME? Maybe this could be a TableGen attribute on some registers and +// this table could be generated automatically from RegInfo. +unsigned X86TargetLowering::getRegisterByName(const char* RegName, + EVT VT) const { + unsigned Reg = StringSwitch(RegName) + .Case("esp", X86::ESP) + .Case("rsp", X86::RSP) + .Default(0); + if (Reg) + return Reg; + report_fatal_error("Invalid register name global variable"); +} + SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const { const X86RegisterInfo *RegInfo = @@ -12477,7 +13040,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, MachinePointerInfo(TrmpAddr, 22), false, false, 0); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } else { const Function *Func = cast(cast(Op.getOperand(5))->getValue()); @@ -12557,7 +13120,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, MachinePointerInfo(TrmpAddr, 6), false, false, 1); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } } @@ -12600,8 +13163,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SDValue Ops[] = { DAG.getEntryNode(), StackSlot }; SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), - Ops, array_lengthof(Ops), MVT::i16, - MMO); + Ops, MVT::i16, MMO); // Load FP Control Word from stack slot SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, @@ -12654,7 +13216,7 @@ static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(X86::COND_E, MVT::i8), Op.getValue(1) }; - Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops)); + Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops); // Finally xor with NumBits-1. Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT)); @@ -12706,7 +13268,7 @@ static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(X86::COND_E, MVT::i8), Op.getValue(1) }; - return DAG.getNode(X86ISD::CMOV, dl, VT, Ops, array_lengthof(Ops)); + return DAG.getNode(X86ISD::CMOV, dl, VT, Ops); } // Lower256IntArith - Break a 256-bit integer operation into two new 128-bit @@ -12824,59 +13386,104 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget, return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo); } -static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { - MVT VT = Op.getSimpleValueType(); - MVT EltTy = VT.getVectorElementType(); - unsigned NumElts = VT.getVectorNumElements(); - SDValue N0 = Op.getOperand(0); +SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetWin64() && "Unexpected target"); + EVT VT = Op.getValueType(); + assert(VT.isInteger() && VT.getSizeInBits() == 128 && + "Unexpected return type for lowering"); + + RTLIB::Libcall LC; + bool isSigned; + switch (Op->getOpcode()) { + default: llvm_unreachable("Unexpected request for libcall!"); + case ISD::SDIV: isSigned = true; LC = RTLIB::SDIV_I128; break; + case ISD::UDIV: isSigned = false; LC = RTLIB::UDIV_I128; break; + case ISD::SREM: isSigned = true; LC = RTLIB::SREM_I128; break; + case ISD::UREM: isSigned = false; LC = RTLIB::UREM_I128; break; + case ISD::SDIVREM: isSigned = true; LC = RTLIB::SDIVREM_I128; break; + case ISD::UDIVREM: isSigned = false; LC = RTLIB::UDIVREM_I128; break; + } + SDLoc dl(Op); + SDValue InChain = DAG.getEntryNode(); - // Lower sdiv X, pow2-const. - BuildVectorSDNode *C = dyn_cast(Op.getOperand(1)); - if (!C) - return SDValue(); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { + EVT ArgVT = Op->getOperand(i).getValueType(); + assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 && + "Unexpected argument type for lowering"); + SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16); + Entry.Node = StackPtr; + InChain = DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr, MachinePointerInfo(), + false, false, 16); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Ty = PointerType::get(ArgTy,0); + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + } + + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), + getPointerTy()); - APInt SplatValue, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (!C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, - HasAnyUndefs) || - EltTy.getSizeInBits() < SplatBitSize) - return SDValue(); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(InChain) + .setCallee(getLibcallCallingConv(LC), + static_cast(MVT::v2i64).getTypeForEVT(*DAG.getContext()), + Callee, &Args, 0) + .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); + + std::pair CallInfo = LowerCallTo(CLI); + return DAG.getNode(ISD::BITCAST, dl, VT, CallInfo.first); +} + +static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); + EVT VT = Op0.getValueType(); + SDLoc dl(Op); - if ((SplatValue != 0) && - (SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) { - unsigned Lg2 = SplatValue.countTrailingZeros(); - // Splat the sign bit. - SmallVector Sz(NumElts, - DAG.getConstant(EltTy.getSizeInBits() - 1, - EltTy)); - SDValue SGN = DAG.getNode(ISD::SRA, dl, VT, N0, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Sz[0], - NumElts)); - // Add (N0 < 0) ? abs2 - 1 : 0; - SmallVector Amt(NumElts, - DAG.getConstant(EltTy.getSizeInBits() - Lg2, - EltTy)); - SDValue SRL = DAG.getNode(ISD::SRL, dl, VT, SGN, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Amt[0], - NumElts)); - SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL); - SmallVector Lg2Amt(NumElts, DAG.getConstant(Lg2, EltTy)); - SDValue SRA = DAG.getNode(ISD::SRA, dl, VT, ADD, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Lg2Amt[0], - NumElts)); - - // If we're dividing by a positive value, we're done. Otherwise, we must - // negate the result. - if (SplatValue.isNonNegative()) - return SRA; - - SmallVector V(NumElts, DAG.getConstant(0, EltTy)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], NumElts); - return DAG.getNode(ISD::SUB, dl, VT, Zero, SRA); + assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) || + (VT == MVT::v8i32 && Subtarget->hasInt256())); + + // Get the high parts. + const int Mask[] = {1, 2, 3, 4, 5, 6, 7, 8}; + SDValue Hi0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask); + SDValue Hi1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask); + + // Emit two multiplies, one for the lower 2 ints and one for the higher 2 + // ints. + MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64; + bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI; + unsigned Opcode = + (!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ; + SDValue Mul1 = DAG.getNode(ISD::BITCAST, dl, VT, + DAG.getNode(Opcode, dl, MulVT, Op0, Op1)); + SDValue Mul2 = DAG.getNode(ISD::BITCAST, dl, VT, + DAG.getNode(Opcode, dl, MulVT, Hi0, Hi1)); + + // Shuffle it back into the right order. + const int HighMask[] = {1, 5, 3, 7, 9, 13, 11, 15}; + SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); + const int LowMask[] = {0, 4, 2, 6, 8, 12, 10, 14}; + SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); + + // If we have a signed multiply but no PMULDQ fix up the high parts of a + // unsigned multiply. + if (IsSigned && !Subtarget->hasSSE41()) { + SDValue ShAmt = + DAG.getConstant(31, DAG.getTargetLoweringInfo().getShiftAmountTy(VT)); + SDValue T1 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1); + SDValue T2 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRA, dl, VT, Op1, ShAmt), Op0); + + SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2); + Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup); } - return SDValue(); + + return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getValueType(), Highs, Lows); } static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, @@ -12920,7 +13527,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, DAG.getConstant(uint8_t(-1U << ShiftAmt), MVT::i8)); return DAG.getNode(ISD::AND, dl, VT, SHL, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16)); + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V)); } if (Op.getOpcode() == ISD::SRL) { // Make a large shift. @@ -12933,7 +13540,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, MVT::i8)); return DAG.getNode(ISD::AND, dl, VT, SRL, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16)); + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V)); } if (Op.getOpcode() == ISD::SRA) { if (ShiftAmt == 7) { @@ -12946,7 +13553,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); SmallVector V(16, DAG.getConstant(128 >> ShiftAmt, MVT::i8)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V); Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); return Res; @@ -12966,7 +13573,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, DAG.getConstant(uint8_t(-1U << ShiftAmt), MVT::i8)); return DAG.getNode(ISD::AND, dl, VT, SHL, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32)); + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V)); } if (Op.getOpcode() == ISD::SRL) { // Make a large shift. @@ -12979,7 +13586,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, MVT::i8)); return DAG.getNode(ISD::AND, dl, VT, SRL, - DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32)); + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V)); } if (Op.getOpcode() == ISD::SRA) { if (ShiftAmt == 7) { @@ -12992,7 +13599,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); SmallVector V(32, DAG.getConstant(128 >> ShiftAmt, MVT::i8)); - SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V); Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); return Res; @@ -13014,7 +13621,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, uint64_t ShiftAmt = 0; for (unsigned i = 0; i != Ratio; ++i) { ConstantSDNode *C = dyn_cast(Amt.getOperand(i)); - if (C == 0) + if (!C) return SDValue(); // 6 == Log2(64) ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2))); @@ -13025,7 +13632,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, for (unsigned j = 0; j != Ratio; ++j) { ConstantSDNode *C = dyn_cast(Amt.getOperand(i + j)); - if (C == 0) + if (!C) return SDValue(); // 6 == Log2(64) ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2))); @@ -13107,7 +13714,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, BaseShAmt = InVec.getOperand(1); } } - if (BaseShAmt.getNode() == 0) + if (!BaseShAmt.getNode()) BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Amt, DAG.getIntPtrConstant(0)); } @@ -13260,7 +13867,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, } Elts.push_back(DAG.getConstant(One.shl(ShAmt), SVT)); } - SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Elts[0], NumElems); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Elts); return DAG.getNode(ISD::MUL, dl, VT, R, BV); } @@ -13274,6 +13881,79 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, return DAG.getNode(ISD::MUL, dl, VT, Op, R); } + // If possible, lower this shift as a sequence of two shifts by + // constant plus a MOVSS/MOVSD instead of scalarizing it. + // Example: + // (v4i32 (srl A, (build_vector < X, Y, Y, Y>))) + // + // Could be rewritten as: + // (v4i32 (MOVSS (srl A, ), (srl A, ))) + // + // The advantage is that the two shifts from the example would be + // lowered as X86ISD::VSRLI nodes. This would be cheaper than scalarizing + // the vector shift into four scalar shifts plus four pairs of vector + // insert/extract. + if ((VT == MVT::v8i16 || VT == MVT::v4i32) && + ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) { + unsigned TargetOpcode = X86ISD::MOVSS; + bool CanBeSimplified; + // The splat value for the first packed shift (the 'X' from the example). + SDValue Amt1 = Amt->getOperand(0); + // The splat value for the second packed shift (the 'Y' from the example). + SDValue Amt2 = (VT == MVT::v4i32) ? Amt->getOperand(1) : + Amt->getOperand(2); + + // See if it is possible to replace this node with a sequence of + // two shifts followed by a MOVSS/MOVSD + if (VT == MVT::v4i32) { + // Check if it is legal to use a MOVSS. + CanBeSimplified = Amt2 == Amt->getOperand(2) && + Amt2 == Amt->getOperand(3); + if (!CanBeSimplified) { + // Otherwise, check if we can still simplify this node using a MOVSD. + CanBeSimplified = Amt1 == Amt->getOperand(1) && + Amt->getOperand(2) == Amt->getOperand(3); + TargetOpcode = X86ISD::MOVSD; + Amt2 = Amt->getOperand(2); + } + } else { + // Do similar checks for the case where the machine value type + // is MVT::v8i16. + CanBeSimplified = Amt1 == Amt->getOperand(1); + for (unsigned i=3; i != 8 && CanBeSimplified; ++i) + CanBeSimplified = Amt2 == Amt->getOperand(i); + + if (!CanBeSimplified) { + TargetOpcode = X86ISD::MOVSD; + CanBeSimplified = true; + Amt2 = Amt->getOperand(4); + for (unsigned i=0; i != 4 && CanBeSimplified; ++i) + CanBeSimplified = Amt1 == Amt->getOperand(i); + for (unsigned j=4; j != 8 && CanBeSimplified; ++j) + CanBeSimplified = Amt2 == Amt->getOperand(j); + } + } + + if (CanBeSimplified && isa(Amt1) && + isa(Amt2)) { + // Replace this node with two shifts followed by a MOVSS/MOVSD. + EVT CastVT = MVT::v4i32; + SDValue Splat1 = + DAG.getConstant(cast(Amt1)->getAPIntValue(), VT); + SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1); + SDValue Splat2 = + DAG.getConstant(cast(Amt2)->getAPIntValue(), VT); + SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2); + if (TargetOpcode == X86ISD::MOVSD) + CastVT = MVT::v2i64; + SDValue BitCast1 = DAG.getNode(ISD::BITCAST, dl, CastVT, Shift1); + SDValue BitCast2 = DAG.getNode(ISD::BITCAST, dl, CastVT, Shift2); + SDValue Result = getTargetShuffleNode(TargetOpcode, dl, CastVT, BitCast2, + BitCast1, DAG); + return DAG.getNode(ISD::BITCAST, dl, VT, Result); + } + } + if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) { assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq."); @@ -13351,10 +14031,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, for (unsigned i = NumElems/2; i != NumElems; ++i) Amt2Csts.push_back(Amt->getOperand(i)); - Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, - &Amt1Csts[0], NumElems/2); - Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, - &Amt2Csts[0], NumElems/2); + Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt1Csts); + Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt2Csts); } else { // Variable shift amount Amt1 = Extract128BitVector(Amt, 0, DAG, dl); @@ -13585,35 +14263,47 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); MachineMemOperand *MMO = cast(Op)->getMemOperand(); SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, - Ops, array_lengthof(Ops), T, MMO); + Ops, T, MMO); SDValue cpOut = DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1)); return cpOut; } -static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget, - SelectionDAG &DAG) { - assert(Subtarget->is64Bit() && "Result not type legalized?"); - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue TheChain = Op.getOperand(0); - SDLoc dl(Op); - SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1); - SDValue rax = DAG.getCopyFromReg(rd, dl, X86::RAX, MVT::i64, rd.getValue(1)); - SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), dl, X86::RDX, MVT::i64, - rax.getValue(2)); - SDValue Tmp = DAG.getNode(ISD::SHL, dl, MVT::i64, rdx, - DAG.getConstant(32, MVT::i8)); - SDValue Ops[] = { - DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp), - rdx.getValue(1) - }; - return DAG.getMergeValues(Ops, array_lengthof(Ops), dl); -} - static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { MVT SrcVT = Op.getOperand(0).getSimpleValueType(); MVT DstVT = Op.getSimpleValueType(); + + if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) { + assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); + if (DstVT != MVT::f64) + // This conversion needs to be expanded. + return SDValue(); + + SDValue InVec = Op->getOperand(0); + SDLoc dl(Op); + unsigned NumElts = SrcVT.getVectorNumElements(); + EVT SVT = SrcVT.getVectorElementType(); + + // Widen the vector in input in the case of MVT::v2i32. + // Example: from MVT::v2i32 to MVT::v4i32. + SmallVector Elts; + for (unsigned i = 0, e = NumElts; i != e; ++i) + Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, InVec, + DAG.getIntPtrConstant(i))); + + // Explicitly mark the extra elements as Undef. + SDValue Undef = DAG.getUNDEF(SVT); + for (unsigned i = NumElts, e = NumElts * 2; i != e; ++i) + Elts.push_back(Undef); + + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Elts); + SDValue ToV2F64 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, BV); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64, + DAG.getIntPtrConstant(0)); + } + assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() && Subtarget->hasMMX() && "Unexpected custom BITCAST"); assert((DstVT == MVT::i64 || @@ -13641,8 +14331,7 @@ static SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { cast(Node)->getMemoryVT(), Node->getOperand(0), Node->getOperand(1), negOp, - cast(Node)->getSrcValue(), - cast(Node)->getAlignment(), + cast(Node)->getMemOperand(), cast(Node)->getOrdering(), cast(Node)->getSynchScope()); } @@ -13730,12 +14419,11 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget, Type *RetTy = isF64 ? (Type*)StructType::get(ArgTy, ArgTy, NULL) : (Type*)VectorType::get(ArgTy, 4); - TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, - false, false, false, false, 0, - CallingConv::C, /*isTaillCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed*/true, - Callee, Args, DAG, dl); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(CallingConv::C, RetTy, Callee, &Args, 0); + std::pair CallResult = TLI.LowerCallTo(CLI); if (isF64) @@ -13764,6 +14452,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::VSELECT: return LowerVSELECT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG); @@ -13815,6 +14504,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG); case ISD::CTTZ: return LowerCTTZ(Op, DAG); case ISD::MUL: return LowerMUL(Op, Subtarget, DAG); + case ISD::UMUL_LOHI: + case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG); case ISD::SRA: case ISD::SRL: case ISD::SHL: return LowerShift(Op, Subtarget, DAG); @@ -13832,7 +14523,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ADD: return LowerADD(Op, DAG); case ISD::SUB: return LowerSUB(Op, DAG); - case ISD::SDIV: return LowerSDIV(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); } } @@ -13875,10 +14565,10 @@ ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl&Results, SDValue Ops[] = { Chain, In1, In2L, In2H }; SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); SDValue Result = - DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, array_lengthof(Ops), MVT::i64, + DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, MVT::i64, cast(Node)->getMemOperand()); SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)}; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF)); Results.push_back(Result.getValue(2)); } @@ -13899,6 +14589,16 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case ISD::SUBE: // We don't want to expand or promote these. return; + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + case ISD::SDIVREM: + case ISD::UDIVREM: { + SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG); + Results.push_back(V); + return; + } case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: { bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; @@ -13909,10 +14609,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, std::pair Vals = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true); SDValue FIST = Vals.first, StackSlot = Vals.second; - if (FIST.getNode() != 0) { + if (FIST.getNode()) { EVT VT = N->getValueType(0); // Return a load from the stack slot. - if (StackSlot.getNode() != 0) + if (StackSlot.getNode()) Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, MachinePointerInfo(), false, false, false, 0)); @@ -13945,20 +14645,22 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(V); return; } + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); + switch (IntNo) { + default : llvm_unreachable("Do not know how to custom type " + "legalize this intrinsic operation!"); + case Intrinsic::x86_rdtsc: + return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget, + Results); + case Intrinsic::x86_rdtscp: + return getReadTimeStampCounter(N, dl, X86ISD::RDTSCP_DAG, DAG, Subtarget, + Results); + } + } case ISD::READCYCLECOUNTER: { - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue TheChain = N->getOperand(0); - SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1); - SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32, - rd.getValue(1)); - SDValue edx = DAG.getCopyFromReg(eax.getValue(1), dl, X86::EDX, MVT::i32, - eax.getValue(2)); - // Use a buildpair to merge the two 32-bit values into a 64-bit one. - SDValue Ops[] = { eax, edx }; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, - array_lengthof(Ops))); - Results.push_back(edx.getValue(1)); - return; + return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget, + Results); } case ISD::ATOMIC_CMP_SWAP: { EVT T = N->getValueType(0); @@ -13994,8 +14696,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, MachineMemOperand *MMO = cast(N)->getMemOperand(); unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG : X86ISD::LCMPXCHG8_DAG; - SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, - Ops, array_lengthof(Ops), T, MMO); + SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO); SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, Regs64bit ? X86::RAX : X86::EAX, HalfT, Result.getValue(1)); @@ -14003,7 +14704,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Regs64bit ? X86::RDX : X86::EDX, HalfT, cpOutL.getValue(2)); SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)}; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF, 2)); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF)); Results.push_back(cpOutH.getValue(1)); return; } @@ -14058,14 +14759,39 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc); return; } - case ISD::ATOMIC_LOAD: + case ISD::ATOMIC_LOAD: { ReplaceATOMIC_LOAD(N, Results, DAG); + return; + } + case ISD::BITCAST: { + assert(Subtarget->hasSSE2() && "Requires at least SSE2!"); + EVT DstVT = N->getValueType(0); + EVT SrcVT = N->getOperand(0)->getValueType(0); + + if (SrcVT != MVT::f64 || + (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8)) + return; + + unsigned NumElts = DstVT.getVectorNumElements(); + EVT SVT = DstVT.getVectorElementType(); + EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2); + SDValue Expanded = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, + MVT::v2f64, N->getOperand(0)); + SDValue ToVecInt = DAG.getNode(ISD::BITCAST, dl, WiderVT, Expanded); + + SmallVector Elts; + for (unsigned i = 0, e = NumElts; i != e; ++i) + Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, + ToVecInt, DAG.getIntPtrConstant(i))); + + Results.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, DstVT, Elts)); + } } } const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { - default: return NULL; + default: return nullptr; case X86ISD::BSF: return "X86ISD::BSF"; case X86ISD::BSR: return "X86ISD::BSR"; case X86ISD::SHLD: return "X86ISD::SHLD"; @@ -14176,7 +14902,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::OR: return "X86ISD::OR"; case X86ISD::XOR: return "X86ISD::XOR"; case X86ISD::AND: return "X86ISD::AND"; - case X86ISD::BZHI: return "X86ISD::BZHI"; case X86ISD::BEXTR: return "X86ISD::BEXTR"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; @@ -14203,6 +14928,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::UNPCKH: return "X86ISD::UNPCKH"; case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST"; case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM"; + case X86ISD::VEXTRACT: return "X86ISD::VEXTRACT"; case X86ISD::VPERMILP: return "X86ISD::VPERMILP"; case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128"; case X86ISD::VPERMV: return "X86ISD::VPERMV"; @@ -14210,6 +14936,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3"; case X86ISD::VPERMI: return "X86ISD::VPERMI"; case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ"; + case X86ISD::PMULDQ: return "X86ISD::PMULDQ"; case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA"; @@ -14240,7 +14967,7 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, Reloc::Model R = getTargetMachine().getRelocationModel(); // X86 allows a sign-extended 32-bit immediate field as a displacement. - if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL)) + if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != nullptr)) return false; if (AM.BaseGV) { @@ -14418,7 +15145,23 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, if (VT.getSizeInBits() == 64) return false; - // FIXME: pshufb, blends, shifts. + // If this is a single-input shuffle with no 128 bit lane crossings we can + // lower it into pshufb. + if ((SVT.is128BitVector() && Subtarget->hasSSSE3()) || + (SVT.is256BitVector() && Subtarget->hasInt256())) { + bool isLegal = true; + for (unsigned I = 0, E = M.size(); I != E; ++I) { + if (M[I] >= (int)SVT.getVectorNumElements() || + ShuffleCrosses128bitLane(SVT, I, M[I])) { + isLegal = false; + break; + } + } + if (isLegal) + return true; + } + + // FIXME: blends, shifts. return (SVT.getVectorNumElements() == 2 || ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isMOVLMask(M, SVT) || @@ -15366,7 +16109,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter( OffsetDestReg = 0; // unused OverflowDestReg = DestReg; - offsetMBB = NULL; + offsetMBB = nullptr; overflowMBB = thisMBB; endMBB = thisMBB; } else { @@ -15736,7 +16479,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB, MachineFunction *MF = BB->getParent(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); - assert(getTargetMachine().Options.EnableSegmentedStacks); + assert(MF->shouldSplitStack()); unsigned TlsReg = Is64Bit ? X86::FS : X86::GS; unsigned TlsOffset = Is64Bit ? 0x70 : 0x30; @@ -16509,11 +17252,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // X86 Optimization Hooks //===----------------------------------------------------------------------===// -void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const { +void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { unsigned BitWidth = KnownZero.getBitWidth(); unsigned Opc = Op.getOpcode(); assert((Opc >= ISD::BUILTIN_OP_END || @@ -16576,8 +17319,10 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, } } -unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, - unsigned Depth) const { +unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode( + SDValue Op, + const SelectionDAG &, + unsigned Depth) const { // SETCC_CARRY sets the dest to ~0 for true or 0 for false. if (Op.getOpcode() == X86ISD::SETCC_CARRY) return Op.getValueType().getScalarType().getSizeInBits(); @@ -16679,7 +17424,6 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() }; SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, - array_lengthof(Ops), Ld->getMemoryVT(), Ld->getPointerInfo(), Ld->getAlignment(), @@ -17036,6 +17780,51 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS, return std::make_pair(Opc, NeedSplit); } +static SDValue +TransformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + SDLoc dl(N); + SDValue Cond = N->getOperand(0); + SDValue LHS = N->getOperand(1); + SDValue RHS = N->getOperand(2); + + if (Cond.getOpcode() == ISD::SIGN_EXTEND) { + SDValue CondSrc = Cond->getOperand(0); + if (CondSrc->getOpcode() == ISD::SIGN_EXTEND_INREG) + Cond = CondSrc->getOperand(0); + } + + MVT VT = N->getSimpleValueType(0); + MVT EltVT = VT.getVectorElementType(); + unsigned NumElems = VT.getVectorNumElements(); + // There is no blend with immediate in AVX-512. + if (VT.is512BitVector()) + return SDValue(); + + if (!Subtarget->hasSSE41() || EltVT == MVT::i8) + return SDValue(); + if (!Subtarget->hasInt256() && VT == MVT::v16i16) + return SDValue(); + + if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) + return SDValue(); + + unsigned MaskValue = 0; + if (!BUILD_VECTORtoBlendMask(cast(Cond), MaskValue)) + return SDValue(); + + SmallVector ShuffleMask(NumElems, -1); + for (unsigned i = 0; i < NumElems; ++i) { + // Be sure we emit undef where we can. + if (Cond.getOperand(i)->getOpcode() == ISD::UNDEF) + ShuffleMask[i] = -1; + else + ShuffleMask[i] = i + NumElems * ((MaskValue >> i) & 1); + } + + return DAG.getVectorShuffle(VT, dl, LHS, RHS, &ShuffleMask[0]); +} + /// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT /// nodes. static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, @@ -17378,7 +18167,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // Another special case: If C was a sign bit, the sub has been // canonicalized into a xor. - // FIXME: Would it be better to use ComputeMaskedBits to determine whether + // FIXME: Would it be better to use computeKnownBits to determine whether // it's safe to decanonicalize the xor? // x s< 0 ? x^C : 0 --> subus x, C if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR && @@ -17544,7 +18333,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, // depend on the highest bit in each word. Try to use SimplifyDemandedBits // to simplify previous instructions. if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() && - !DCI.isBeforeLegalize() && TLI.isOperationLegal(ISD::VSELECT, VT)) { + !DCI.isBeforeLegalize() && + // We explicitly check against v8i16 and v16i16 because, although + // they're marked as Custom, they might only be legal when Cond is a + // build_vector of constants. This will be taken care in a later + // condition. + (TLI.isOperationLegalOrCustom(ISD::VSELECT, VT) && VT != MVT::v16i16 && + VT != MVT::v8i16)) { unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits(); // Don't optimize vector selects that map to mask-registers. @@ -17571,6 +18366,23 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, DCI.CommitTargetLoweringOpt(TLO); } + // We should generate an X86ISD::BLENDI from a vselect if its argument + // is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of + // constants. This specific pattern gets generated when we split a + // selector for a 512 bit vector in a machine without AVX512 (but with + // 256-bit vectors), during legalization: + // + // (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS) + // + // Iff we find this pattern and the build_vectors are built from + // constants, we translate the vselect into a shuffle_vector that we + // know will be matched by LowerVECTOR_SHUFFLEtoBlend. + if (N->getOpcode() == ISD::VSELECT && !DCI.isBeforeLegalize()) { + SDValue Shuffle = TransformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget); + if (Shuffle.getNode()) + return Shuffle; + } + return SDValue(); } @@ -17605,7 +18417,7 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { SDValue Op2 = Cmp.getOperand(1); SDValue SetCC; - const ConstantSDNode* C = 0; + const ConstantSDNode* C = nullptr; bool needOppositeCond = (CC == X86::COND_E); bool checkAgainstTrue = false; // Is it a comparison against 1? @@ -17740,8 +18552,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) { SDValue Ops[] = { FalseOp, TrueOp, DAG.getConstant(CC, MVT::i8), Flags }; - return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), - Ops, array_lengthof(Ops)); + return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops); } // If this is a select between two integer constants, try to do some @@ -17856,7 +18667,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, // the DCI.xxxx conditions are provided to postpone the optimization as // late as possible. - ConstantSDNode *CmpAgainst = 0; + ConstantSDNode *CmpAgainst = nullptr; if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) && (CmpAgainst = dyn_cast(Cond.getOperand(1))) && !isa(Cond.getOperand(0))) { @@ -17871,8 +18682,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, CmpAgainst == dyn_cast(TrueOp)) { SDValue Ops[] = { FalseOp, Cond.getOperand(0), DAG.getConstant(CC, MVT::i8), Cond }; - return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops, - array_lengthof(Ops)); + return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops); } } } @@ -17880,6 +18690,106 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); + switch (IntNo) { + default: return SDValue(); + // SSE/AVX/AVX2 blend intrinsics. + case Intrinsic::x86_avx2_pblendvb: + case Intrinsic::x86_avx2_pblendw: + case Intrinsic::x86_avx2_pblendd_128: + case Intrinsic::x86_avx2_pblendd_256: + // Don't try to simplify this intrinsic if we don't have AVX2. + if (!Subtarget->hasAVX2()) + return SDValue(); + // FALL-THROUGH + case Intrinsic::x86_avx_blend_pd_256: + case Intrinsic::x86_avx_blend_ps_256: + case Intrinsic::x86_avx_blendv_pd_256: + case Intrinsic::x86_avx_blendv_ps_256: + // Don't try to simplify this intrinsic if we don't have AVX. + if (!Subtarget->hasAVX()) + return SDValue(); + // FALL-THROUGH + case Intrinsic::x86_sse41_pblendw: + case Intrinsic::x86_sse41_blendpd: + case Intrinsic::x86_sse41_blendps: + case Intrinsic::x86_sse41_blendvps: + case Intrinsic::x86_sse41_blendvpd: + case Intrinsic::x86_sse41_pblendvb: { + SDValue Op0 = N->getOperand(1); + SDValue Op1 = N->getOperand(2); + SDValue Mask = N->getOperand(3); + + // Don't try to simplify this intrinsic if we don't have SSE4.1. + if (!Subtarget->hasSSE41()) + return SDValue(); + + // fold (blend A, A, Mask) -> A + if (Op0 == Op1) + return Op0; + // fold (blend A, B, allZeros) -> A + if (ISD::isBuildVectorAllZeros(Mask.getNode())) + return Op0; + // fold (blend A, B, allOnes) -> B + if (ISD::isBuildVectorAllOnes(Mask.getNode())) + return Op1; + + // Simplify the case where the mask is a constant i32 value. + if (ConstantSDNode *C = dyn_cast(Mask)) { + if (C->isNullValue()) + return Op0; + if (C->isAllOnesValue()) + return Op1; + } + } + + // Packed SSE2/AVX2 arithmetic shift immediate intrinsics. + case Intrinsic::x86_sse2_psrai_w: + case Intrinsic::x86_sse2_psrai_d: + case Intrinsic::x86_avx2_psrai_w: + case Intrinsic::x86_avx2_psrai_d: + case Intrinsic::x86_sse2_psra_w: + case Intrinsic::x86_sse2_psra_d: + case Intrinsic::x86_avx2_psra_w: + case Intrinsic::x86_avx2_psra_d: { + SDValue Op0 = N->getOperand(1); + SDValue Op1 = N->getOperand(2); + EVT VT = Op0.getValueType(); + assert(VT.isVector() && "Expected a vector type!"); + + if (isa(Op1)) + Op1 = Op1.getOperand(0); + + if (!isa(Op1)) + return SDValue(); + + EVT SVT = VT.getVectorElementType(); + unsigned SVTBits = SVT.getSizeInBits(); + + ConstantSDNode *CND = cast(Op1); + const APInt &C = APInt(SVTBits, CND->getAPIntValue().getZExtValue()); + uint64_t ShAmt = C.getZExtValue(); + + // Don't try to convert this shift into a ISD::SRA if the shift + // count is bigger than or equal to the element size. + if (ShAmt >= SVTBits) + return SDValue(); + + // Trivial case: if the shift count is zero, then fold this + // into the first operand. + if (ShAmt == 0) + return Op0; + + // Replace this packed shift intrinsic with a target independent + // shift dag node. + SDValue Splat = DAG.getConstant(C, VT); + return DAG.getNode(ISD::SRA, SDLoc(N), VT, Op0, Splat); + } + } +} + /// PerformMulCombine - Optimize a single multiply with constant into two /// in order to implement it with two cheaper instructions, e.g. /// LEA + SHL, LEA + LEA. @@ -18223,7 +19133,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(), N1->getOperand(0)); SmallVector C(WideVT.getVectorNumElements(), N1); - N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, &C[0], C.size()); + N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, C); } else if (RHSTrunc) { N1 = N1->getOperand(0); } @@ -18260,40 +19170,13 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (R.getNode()) return R; - // Create BEXTR and BZHI instructions - // BZHI is X & ((1 << Y) - 1) + // Create BEXTR instructions // BEXTR is ((X >> imm) & (2**size-1)) if (VT == MVT::i32 || VT == MVT::i64) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDLoc DL(N); - if (Subtarget->hasBMI2()) { - // Check for (and (add (shl 1, Y), -1), X) - if (N0.getOpcode() == ISD::ADD && isAllOnes(N0.getOperand(1))) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::SHL) { - SDValue N001 = N00.getOperand(1); - assert(N001.getValueType() == MVT::i8 && "unexpected type"); - ConstantSDNode *C = dyn_cast(N00.getOperand(0)); - if (C && C->getZExtValue() == 1) - return DAG.getNode(X86ISD::BZHI, DL, VT, N1, N001); - } - } - - // Check for (and X, (add (shl 1, Y), -1)) - if (N1.getOpcode() == ISD::ADD && isAllOnes(N1.getOperand(1))) { - SDValue N10 = N1.getOperand(0); - if (N10.getOpcode() == ISD::SHL) { - SDValue N101 = N10.getOperand(1); - assert(N101.getValueType() == MVT::i8 && "unexpected type"); - ConstantSDNode *C = dyn_cast(N10.getOperand(0)); - if (C && C->getZExtValue() == 1) - return DAG.getNode(X86ISD::BZHI, DL, VT, N0, N101); - } - } - } - // Check for BEXTR. if ((Subtarget->hasBMI() || Subtarget->hasTBM()) && (N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)) { @@ -18533,8 +19416,7 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) { SDValue Ops[] = { N0.getOperand(0), Neg, DAG.getConstant(X86::COND_GE, MVT::i8), SDValue(Neg.getNode(), 1) }; - return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue), - Ops, array_lengthof(Ops)); + return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue), Ops); } return SDValue(); } @@ -18691,8 +19573,7 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); } - SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], - Chains.size()); + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); // Bitcast the loaded value to a vector of the original element type, in // the size of the target vector type. @@ -18867,8 +19748,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, Chains.push_back(Ch); } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], - Chains.size()); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); } // Turn load->store of MMX types into GPR load/stores. This avoids clobbering @@ -18891,7 +19771,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, !cast(St->getValue())->isVolatile() && St->getChain().hasOneUse() && !St->isVolatile()) { SDNode* LdVal = St->getValue().getNode(); - LoadSDNode *Ld = 0; + LoadSDNode *Ld = nullptr; int TokenFactorIndex = -1; SmallVector Ops; SDNode* ChainVal = St->getChain().getNode(); @@ -18934,8 +19814,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue NewChain = NewLd.getValue(1); if (TokenFactorIndex != -1) { Ops.push_back(NewChain); - NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0], - Ops.size()); + NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); } return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(), St->getPointerInfo(), @@ -18962,8 +19841,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, if (TokenFactorIndex != -1) { Ops.push_back(LoLd); Ops.push_back(HiLd); - NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0], - Ops.size()); + NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops); } LoAddr = St->getBasePtr(); @@ -19432,6 +20310,33 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue PerformINSERTPSCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + SDLoc dl(N); + MVT VT = N->getOperand(1)->getSimpleValueType(0); + assert((VT == MVT::v4f32 || VT == MVT::v4i32) && + "X86insertps is only defined for v4x32"); + + SDValue Ld = N->getOperand(1); + if (MayFoldLoad(Ld)) { + // Extract the countS bits from the immediate so we can get the proper + // address when narrowing the vector load to a specific element. + // When the second source op is a memory address, interps doesn't use + // countS and just gets an f32 from that address. + unsigned DestIndex = + cast(N->getOperand(2))->getZExtValue() >> 6; + Ld = NarrowVectorLoadToElement(cast(Ld), DestIndex, DAG); + } else + return SDValue(); + + // Create this as a scalar to vector to match the instruction pattern. + SDValue LoadScalarToVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Ld); + // countS bits are ignored when loading from memory on insertps, which + // means we don't need to explicitly set them to 0. + return DAG.getNode(X86ISD::INSERTPS, dl, VT, N->getOperand(0), + LoadScalarToVector, N->getOperand(2)); +} + // Helper function of PerformSETCCCombine. It is to materialize "setb reg" // as "sbb reg,reg", since it can be extended without zext and produces // an all-ones bit which is more useful than 0/1 in some cases. @@ -19711,7 +20616,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget); case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget); - case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); + case ISD::SIGN_EXTEND_INREG: + return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget); case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget); case ISD::SETCC: return PerformISDSETCCCombine(N, DAG, Subtarget); case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget); @@ -19732,6 +20638,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::VPERM2X128: case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget); case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget); + case ISD::INTRINSIC_WO_CHAIN: + return PerformINTRINSIC_WO_CHAINCombine(N, DAG, Subtarget); + case X86ISD::INSERTPS: + return PerformINSERTPSCombine(N, DAG, Subtarget); } return SDValue(); @@ -20006,7 +20916,7 @@ TargetLowering::ConstraintWeight Value *CallOperandVal = info.CallOperandVal; // If we don't have a value, we can't do a match, // but allow it at the lowest weight. - if (CallOperandVal == NULL) + if (!CallOperandVal) return CW_Default; Type *type = CallOperandVal->getType(); // Look at the constraint type. @@ -20124,7 +21034,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector&Ops, SelectionDAG &DAG) const { - SDValue Result(0, 0); + SDValue Result; // Only support length 1 constraints for now. if (Constraint.length() > 1) return; @@ -20207,7 +21117,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, // If we are in non-pic codegen mode, we allow the address of a global (with // an optional displacement) to be used with 'i'. - GlobalAddressSDNode *GA = 0; + GlobalAddressSDNode *GA = nullptr; int64_t Offset = 0; // Match either (GA), (GA+C), (GA+C1+C2), etc. @@ -20363,7 +21273,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); // Not found as a standard register? - if (Res.second == 0) { + if (!Res.second) { // Map st(0) -> st(7) -> ST0 if (Constraint.size() == 7 && Constraint[0] == '{' && tolower(Constraint[1]) == 's' && @@ -20488,3 +21398,30 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return Res; } + +int X86TargetLowering::getScalingFactorCost(const AddrMode &AM, + Type *Ty) const { + // Scaling factors are not free at all. + // An indexed folded instruction, i.e., inst (reg1, reg2, scale), + // will take 2 allocations in the out of order engine instead of 1 + // for plain addressing mode, i.e. inst (reg1). + // E.g., + // vaddps (%rsi,%drx), %ymm0, %ymm1 + // Requires two allocations (one for the load, one for the computation) + // whereas: + // vaddps (%rsi), %ymm0, %ymm1 + // Requires just 1 allocation, i.e., freeing allocations for other operations + // and having less micro operations to execute. + // + // For some X86 architectures, this is even worse because for instance for + // stores, the complex addressing mode forces the instruction to use the + // "load" ports instead of the dedicated "store" port. + // E.g., on Haswell: + // vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3. + // vmovaps %ymm1, (%r8) can use port 2, 3, or 7. + if (isLegalAddressingMode(AM, Ty)) + // Scale represents reg2 * scale, thus account for 1 + // as soon as we use a second register. + return AM.Scale != 0; + return -1; +} diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 0f0d17b..9f51b53 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -83,6 +83,9 @@ namespace llvm { /// readcyclecounter RDTSC_DAG, + /// X86 Read Time-Stamp Counter and Processor ID. + RDTSCP_DAG, + /// X86 compare and logical compare instructions. CMP, COMI, UCOMI, @@ -291,7 +294,6 @@ namespace llvm { ADD, SUB, ADC, SBB, SMUL, INC, DEC, OR, XOR, AND, - BZHI, // BZHI - Zero high bits BEXTR, // BEXTR - Bit field extract UMUL, // LOW, HI, FLAGS = umul LHS, RHS @@ -345,6 +347,8 @@ namespace llvm { // PMULUDQ - Vector multiply packed unsigned doubleword integers PMULUDQ, + // PMULUDQ - Vector multiply packed signed doubleword integers + PMULDQ, // FMA nodes FMADD, @@ -614,18 +618,19 @@ namespace llvm { /// getSetCCResultType - Return the value type to use for ISD::SETCC. EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override; - /// computeMaskedBitsForTargetNode - Determine which of the bits specified + /// computeKnownBitsForTargetNode - Determine which of the bits specified /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. - void computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth = 0) const override; + void computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth = 0) const override; // ComputeNumSignBitsForTargetNode - Determine the number of bits in the // operation that are sign bits. unsigned ComputeNumSignBitsForTargetNode(SDValue Op, + const SelectionDAG &DAG, unsigned Depth) const override; bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA, @@ -679,6 +684,12 @@ namespace llvm { /// the immediate into a register. bool isLegalAddImmediate(int64_t Imm) const override; + /// \brief Return the cost of the scaling factor used in the addressing + /// mode represented by AM for this target, for a load/store + /// of the specified type. + /// If the AM is supported, the return value must be >= 0. + /// If the AM is not supported, it returns a negative value. + int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override; bool isVectorShiftByScalarCheap(Type *Ty) const override; @@ -771,10 +782,12 @@ namespace llvm { Type *Ty) const override; /// Intel processors have a unified instruction and data cache - const char * getClearCacheBuiltinName() const { - return 0; // nothing to do, move along. + const char * getClearCacheBuiltinName() const override { + return nullptr; // nothing to do, move along. } + unsigned getRegisterByName(const char* RegName, EVT VT) const override; + /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. FastISel *createFastISel(FunctionLoweringInfo &funcInfo, @@ -871,8 +884,11 @@ namespace llvm { SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const; + SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; @@ -908,6 +924,7 @@ namespace llvm { SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFormalArguments(SDValue Chain, @@ -936,7 +953,7 @@ namespace llvm { const SmallVectorImpl &Outs, LLVMContext &Context) const override; - const uint16_t *getScratchRegisters(CallingConv::ID CC) const override; + const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; /// Utility function to emit atomic-load-arith operations (and, or, xor, /// nand, max, min, umax, umin). It takes the corresponding instruction to @@ -987,11 +1004,12 @@ namespace llvm { /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. - SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const; + SDValue EmitTest(SDValue Op0, unsigned X86CC, SDLoc dl, + SelectionDAG &DAG) const; /// Emit nodes that will be selected as "cmp Op0,Op1", or something /// equivalent, for use with the given x86 condition code. - SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, + SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, SDLoc dl, SelectionDAG &DAG) const; /// Convert a comparison if required by the subtarget. diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 2c5edf6..37bcc52 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -209,12 +209,12 @@ def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1), def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR128X:$dst, (X86insrtps VR128X:$src1, VR128X:$src2, imm:$src3))]>, + [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>, EVEX_4V; def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), (ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - [(set VR128X:$dst, (X86insrtps VR128X:$src1, + [(set VR128X:$dst, (X86insertps VR128X:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>; @@ -621,6 +621,22 @@ defm VPERMT2PS : avx512_perm_3src<0x7F, "vpermt2ps", VR512, memopv16f32, i512me X86VPermv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; defm VPERMT2PD : avx512_perm_3src<0x7F, "vpermt2pd", VR512, memopv8f64, i512mem, X86VPermv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +def : Pat<(v16f32 (int_x86_avx512_mask_vpermt_ps_512 (v16i32 VR512:$idx), + (v16f32 VR512:$src1), (v16f32 VR512:$src2), (i16 -1))), + (VPERMT2PSrr VR512:$src1, VR512:$idx, VR512:$src2)>; + +def : Pat<(v16i32 (int_x86_avx512_mask_vpermt_d_512 (v16i32 VR512:$idx), + (v16i32 VR512:$src1), (v16i32 VR512:$src2), (i16 -1))), + (VPERMT2Drr VR512:$src1, VR512:$idx, VR512:$src2)>; + +def : Pat<(v8f64 (int_x86_avx512_mask_vpermt_pd_512 (v8i64 VR512:$idx), + (v8f64 VR512:$src1), (v8f64 VR512:$src2), (i8 -1))), + (VPERMT2PDrr VR512:$src1, VR512:$idx, VR512:$src2)>; + +def : Pat<(v8i64 (int_x86_avx512_mask_vpermt_q_512 (v8i64 VR512:$idx), + (v8i64 VR512:$src1), (v8i64 VR512:$src2), (i8 -1))), + (VPERMT2Qrr VR512:$src1, VR512:$idx, VR512:$src2)>; //===----------------------------------------------------------------------===// // AVX-512 - BLEND using mask // @@ -984,6 +1000,10 @@ let Predicates = [HasAVX512] in { (EXTRACT_SUBREG (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_16bit)>; + def : Pat<(v16i1 (scalar_to_vector VK1:$src)), + (COPY_TO_REGCLASS VK1:$src, VK16)>; + def : Pat<(v8i1 (scalar_to_vector VK1:$src)), + (COPY_TO_REGCLASS VK1:$src, VK8)>; } // With AVX-512 only, 8-bit mask is promoted to 16-bit mask. let Predicates = [HasAVX512] in { @@ -1356,6 +1376,23 @@ defm VMOVDQU64: avx512_load<0x6F, VR512, VK8WM, i512mem, load, "vmovdqu64", SSEPackedInt, v8i64>, XS, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; +def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr, + (v16i32 immAllZerosV), GR16:$mask)), + (VMOVDQU32rmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>; + +def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr, + (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)), + (VMOVDQU64rmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>; + +def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src), + GR16:$mask), + (VMOVDQU32mrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), + VR512:$src)>; +def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src), + GR8:$mask), + (VMOVDQU64mrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), + VR512:$src)>; + let AddedComplexity = 20 in { def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src), (bc_v8i64 (v16i32 immAllZerosV)))), @@ -3112,6 +3149,17 @@ def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; +def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))), + (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr + (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>; + +def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))), + (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr + (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; + +def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))), + (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr + (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>; def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src), (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)), @@ -3715,7 +3763,7 @@ defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512, EVEX_CD8<32, CD8VF>; def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1), - imm:$src2, (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), + imm:$src2, (v16f32 VR512:$src1), (i16 -1), FROUND_CURRENT)), (VRNDSCALEPSZr VR512:$src1, imm:$src2)>; @@ -3725,7 +3773,7 @@ defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512, VEX_W, EVEX_CD8<64, CD8VF>; def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1), - imm:$src2, (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), + imm:$src2, (v8f64 VR512:$src1), (i8 -1), FROUND_CURRENT)), (VRNDSCALEPDZr VR512:$src1, imm:$src2)>; @@ -3807,7 +3855,13 @@ multiclass avx512_trunc_sat opc, string OpcodeStr, !strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"), []>, EVEX; - def krr : AVX512XS8I, EVEX, EVEX_K; + + def rrkz : AVX512XS8I opc, string OpcodeStr, def mr : AVX512XS8I, EVEX; + + def mrk : AVX512XS8I, EVEX, EVEX_K; + } defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM, i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>; @@ -3855,60 +3915,86 @@ def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>; def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>; def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), - (VPMOVDBkrr VK16WM:$mask, VR512:$src)>; + (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>; def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))), - (VPMOVDWkrr VK16WM:$mask, VR512:$src)>; + (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>; def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), - (VPMOVQWkrr VK8WM:$mask, VR512:$src)>; + (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>; def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))), - (VPMOVQDkrr VK8WM:$mask, VR512:$src)>; + (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>; -multiclass avx512_extend opc, string OpcodeStr, RegisterClass DstRC, - RegisterClass SrcRC, SDNode OpNode, PatFrag mem_frag, - X86MemOperand x86memop, ValueType OpVT, ValueType InVT> { +multiclass avx512_extend opc, string OpcodeStr, RegisterClass KRC, + RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode, + PatFrag mem_frag, X86MemOperand x86memop, + ValueType OpVT, ValueType InVT> { def rr : AVX5128I, EVEX; - def rm : AVX5128I, EVEX, EVEX_K; + + def rrkz : AVX5128I, EVEX, EVEX_KZ; + + let mayLoad = 1 in { + def rm : AVX5128I, EVEX; + + def rmk : AVX5128I, + EVEX, EVEX_K; + + def rmkz : AVX5128I, + EVEX, EVEX_KZ; + } } -defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VR512, VR128X, X86vzext, +defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext, memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, EVEX_CD8<8, CD8VQ>; -defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VR512, VR128X, X86vzext, +defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext, memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VR512, VR256X, X86vzext, +defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext, memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VR512, VR128X, X86vzext, +defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext, memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VR512, VR256X, X86vzext, +defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext, memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, EVEX_CD8<32, CD8VH>; - -defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VR512, VR128X, X86vsext, + +defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext, memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512, EVEX_CD8<8, CD8VQ>; -defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VR512, VR128X, X86vsext, +defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext, memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512, EVEX_CD8<8, CD8VO>; -defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VR512, VR256X, X86vsext, +defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext, memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512, EVEX_CD8<16, CD8VH>; -defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VR512, VR128X, X86vsext, +defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext, memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512, EVEX_CD8<16, CD8VQ>; -defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VR512, VR256X, X86vsext, +defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext, memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512, EVEX_CD8<32, CD8VH>; @@ -3984,6 +4070,62 @@ defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>, defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; +// prefetch +multiclass avx512_gather_scatter_prefetch opc, Format F, string OpcodeStr, + RegisterClass KRC, X86MemOperand memop> { + let Predicates = [HasPFI], hasSideEffects = 1 in + def m : AVX5128I, EVEX, EVEX_K; +} + +defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps", + VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps", + VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; + +defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd", + VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; + +defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd", + VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; + +defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps", + VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps", + VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; + +defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd", + VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; + +defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd", + VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; + +defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps", + VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps", + VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; + +defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd", + VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; + +defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd", + VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; + +defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps", + VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>; + +defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps", + VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>; + +defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd", + VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>; + +defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd", + VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; //===----------------------------------------------------------------------===// // VSHUFPS - VSHUFPD Operations @@ -4200,3 +4342,19 @@ def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1, GR8:$mask), (VPCONFLICTQrrk VR512:$src1, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>; + +def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; +def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; +def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>; + +def : Pat<(store VK1:$src, addr:$dst), + (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK16))>; + +def truncstorei1 : PatFrag<(ops node:$val, node:$ptr), + (truncstore node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i1; +}]>; + +def : Pat<(truncstorei1 GR8:$src, addr:$dst), + (MOV8mr addr:$dst, GR8:$src)>; + diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index aaef4a4..e421f8c 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -52,7 +52,8 @@ struct X86AddressMode { unsigned GVOpFlags; X86AddressMode() - : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) { + : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(nullptr), + GVOpFlags(0) { Base.Reg = 0; } diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 401849f..34d8fb9 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -1187,9 +1187,9 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); APInt KnownZero0, KnownOne0; - CurDAG->ComputeMaskedBits(N->getOperand(0), KnownZero0, KnownOne0, 0); + CurDAG->computeKnownBits(N->getOperand(0), KnownZero0, KnownOne0, 0); APInt KnownZero1, KnownOne1; - CurDAG->ComputeMaskedBits(N->getOperand(1), KnownZero1, KnownOne1, 0); + CurDAG->computeKnownBits(N->getOperand(1), KnownZero1, KnownOne1, 0); return (~KnownZero0 & ~KnownZero1) == 0; }]>; diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index df6c9da..c0a6864 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -19,8 +19,9 @@ let Constraints = "$src1 = $dst" in { multiclass fma3p_rm opc, string OpcodeStr, PatFrag MemFrag128, PatFrag MemFrag256, ValueType OpVT128, ValueType OpVT256, + bit IsRVariantCommutable = 0, bit IsMVariantCommutable = 0, SDPatternOperator Op = null_frag> { - let usesCustomInserter = 1 in + let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in def r : FMA3 opc, string OpcodeStr, [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1, VR128:$src3)))]>; - let mayLoad = 1 in + let mayLoad = 1, isCommutable = IsMVariantCommutable in def m : FMA3 opc, string OpcodeStr, [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1, (MemFrag128 addr:$src3))))]>; - let usesCustomInserter = 1 in + let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in def rY : FMA3 opc, string OpcodeStr, [(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1, VR256:$src3)))]>, VEX_L; - let mayLoad = 1 in + let mayLoad = 1, isCommutable = IsMVariantCommutable in def mY : FMA3 opc132, bits<8> opc213, bits<8> opc231, string OpcodeStr, string PackTy, PatFrag MemFrag128, PatFrag MemFrag256, SDNode Op, ValueType OpTy128, ValueType OpTy256> { - let isCommutable = 1 in + // For 213, both the register and memory variant are commutable. + // Indeed, the commutable operands are 1 and 2 and both live in registers + // for both variants. defm r213 : fma3p_rm; + MemFrag128, MemFrag256, OpTy128, OpTy256, + /* IsRVariantCommutable */ 1, + /* IsMVariantCommutable */ 1, + Op>; let neverHasSideEffects = 1 in { defm r132 : fma3p_rm; - let isCommutable = 1 in + // For 231, only the register variant is commutable. + // For the memory variant the folded operand must be in 3. Thus, + // in that case, it cannot be swapped with 2. defm r231 : fma3p_rm; + MemFrag128, MemFrag256, OpTy128, OpTy256, + /* IsRVariantCommutable */ 1, + /* IsMVariantCommutable */ 0>; } // neverHasSideEffects = 1 } @@ -119,8 +129,9 @@ let ExeDomain = SSEPackedDouble in { let Constraints = "$src1 = $dst" in { multiclass fma3s_rm opc, string OpcodeStr, X86MemOperand x86memop, RegisterClass RC, ValueType OpVT, PatFrag mem_frag, + bit IsRVariantCommutable = 0, bit IsMVariantCommutable = 0, SDPatternOperator OpNode = null_frag> { - let usesCustomInserter = 1 in + let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in def r : FMA3 opc, string OpcodeStr, X86MemOperand x86memop, [(set RC:$dst, (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>; - let mayLoad = 1 in + let mayLoad = 1, isCommutable = IsMVariantCommutable in def m : FMA3 opc132, bits<8> opc213, bits<8> opc231, let neverHasSideEffects = 1 in { defm r132 : fma3s_rm; - let isCommutable = 1 in + // See the other defm of r231 for the explanation regarding the + // commutable flags. defm r231 : fma3s_rm; + x86memop, RC, OpVT, mem_frag, + /* IsRVariantCommutable */ 1, + /* IsMVariantCommutable */ 0>; } -let isCommutable = 1 in +// See the other defm of r213 for the explanation regarding the +// commutable flags. defm r213 : fma3s_rm; + x86memop, RC, OpVT, mem_frag, + /* IsRVariantCommutable */ 1, + /* IsMVariantCommutable */ 1, + OpNode>; } multiclass fma3s opc132, bits<8> opc213, bits<8> opc231, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 486e5a9..1582f43 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -81,7 +81,7 @@ def X86pinsrb : SDNode<"X86ISD::PINSRB", def X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; -def X86insrtps : SDNode<"X86ISD::INSERTPS", +def X86insertps : SDNode<"X86ISD::INSERTPS", SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>, SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>; def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", @@ -175,6 +175,9 @@ def X86select : SDNode<"X86ISD::SELECT" , SDTSelect>; def X86pmuludq : SDNode<"X86ISD::PMULUDQ", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>>; +def X86pmuldq : SDNode<"X86ISD::PMULDQ", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisSameAs<1,2>]>>; // Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get // translated into one of the target nodes below during lowering. diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 6450f2a..6993577 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -36,11 +36,13 @@ #include "llvm/Target/TargetOptions.h" #include +using namespace llvm; + +#define DEBUG_TYPE "x86-instr-info" + #define GET_INSTRINFO_CTOR_DTOR #include "X86GenInstrInfo.inc" -using namespace llvm; - static cl::opt NoFusing("disable-spill-fusing", cl::desc("Disable fusing of spill code into instructions")); @@ -1511,12 +1513,14 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, /// operand and follow operands form a reference to the stack frame. bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, int &FrameIndex) const { - if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() && - MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() && - MI->getOperand(Op+1).getImm() == 1 && - MI->getOperand(Op+2).getReg() == 0 && - MI->getOperand(Op+3).getImm() == 0) { - FrameIndex = MI->getOperand(Op).getIndex(); + if (MI->getOperand(Op+X86::AddrBaseReg).isFI() && + MI->getOperand(Op+X86::AddrScaleAmt).isImm() && + MI->getOperand(Op+X86::AddrIndexReg).isReg() && + MI->getOperand(Op+X86::AddrDisp).isImm() && + MI->getOperand(Op+X86::AddrScaleAmt).getImm() == 1 && + MI->getOperand(Op+X86::AddrIndexReg).getReg() == 0 && + MI->getOperand(Op+X86::AddrDisp).getImm() == 0) { + FrameIndex = MI->getOperand(Op+X86::AddrBaseReg).getIndex(); return true; } return false; @@ -1680,15 +1684,16 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::FsMOVAPSrm: case X86::FsMOVAPDrm: { // Loads from constant pools are trivially rematerializable. - if (MI->getOperand(1).isReg() && - MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && + if (MI->getOperand(1+X86::AddrBaseReg).isReg() && + MI->getOperand(1+X86::AddrScaleAmt).isImm() && + MI->getOperand(1+X86::AddrIndexReg).isReg() && + MI->getOperand(1+X86::AddrIndexReg).getReg() == 0 && MI->isInvariantLoad(AA)) { - unsigned BaseReg = MI->getOperand(1).getReg(); + unsigned BaseReg = MI->getOperand(1+X86::AddrBaseReg).getReg(); if (BaseReg == 0 || BaseReg == X86::RIP) return true; // Allow re-materialization of PIC load. - if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) + if (!ReMatPICStubLoad && MI->getOperand(1+X86::AddrDisp).isGlobal()) return false; const MachineFunction &MF = *MI->getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -1699,13 +1704,14 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::LEA32r: case X86::LEA64r: { - if (MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && - !MI->getOperand(4).isReg()) { + if (MI->getOperand(1+X86::AddrScaleAmt).isImm() && + MI->getOperand(1+X86::AddrIndexReg).isReg() && + MI->getOperand(1+X86::AddrIndexReg).getReg() == 0 && + !MI->getOperand(1+X86::AddrDisp).isReg()) { // lea fi#, lea GV, etc. are all rematerializable. - if (!MI->getOperand(1).isReg()) + if (!MI->getOperand(1+X86::AddrBaseReg).isReg()) return true; - unsigned BaseReg = MI->getOperand(1).getReg(); + unsigned BaseReg = MI->getOperand(1+X86::AddrBaseReg).getReg(); if (BaseReg == 0) return true; // Allow re-materialization of lea PICBase + x. @@ -1722,12 +1728,8 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, return true; } -/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that -/// would clobber the EFLAGS condition register. Note the result may be -/// conservative. If it cannot definitely determine the safety after visiting -/// a few instructions in each direction it assumes it's not safe. -static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) { +bool X86InstrInfo::isSafeToClobberEFLAGS(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const { MachineBasicBlock::iterator E = MBB.end(); // For compile time consideration, if we are not able to determine the @@ -1998,7 +2000,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); unsigned leaInReg2 = 0; - MachineInstr *InsMI2 = 0; + MachineInstr *InsMI2 = nullptr; if (Src == Src2) { // ADD16rr %reg1028, %reg1028 // just a single insert_subreg. @@ -2062,14 +2064,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // convert them to equivalent lea if the condition code register def's // are dead! if (hasLiveCondCodeDef(MI)) - return 0; + return nullptr; MachineFunction &MF = *MI->getParent()->getParent(); // All instructions input are two-addr instructions. Get the known operands. const MachineOperand &Dest = MI->getOperand(0); const MachineOperand &Src = MI->getOperand(1); - MachineInstr *NewMI = NULL; + MachineInstr *NewMI = nullptr; // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When // we have better subtarget support, enable the 16-bit LEA generation here. // 16-bit LEA is also slow on Core2. @@ -2080,11 +2082,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, switch (MIOpc) { case X86::SHUFPSrri: { assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!"); - if (!TM.getSubtarget().hasSSE2()) return 0; + if (!TM.getSubtarget().hasSSE2()) return nullptr; unsigned B = MI->getOperand(1).getReg(); unsigned C = MI->getOperand(2).getReg(); - if (B != C) return 0; + if (B != C) return nullptr; unsigned M = MI->getOperand(3).getImm(); NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) .addOperand(Dest).addOperand(Src).addImm(M); @@ -2092,11 +2094,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::SHUFPDrri: { assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!"); - if (!TM.getSubtarget().hasSSE2()) return 0; + if (!TM.getSubtarget().hasSSE2()) return nullptr; unsigned B = MI->getOperand(1).getReg(); unsigned C = MI->getOperand(2).getReg(); - if (B != C) return 0; + if (B != C) return nullptr; unsigned M = MI->getOperand(3).getImm(); // Convert to PSHUFD mask. @@ -2109,13 +2111,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::SHL64ri: { assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); unsigned ShAmt = getTruncatedShiftCount(MI, 2); - if (!isTruncatedShiftCountForLEA(ShAmt)) return 0; + if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; // LEA can't handle RSP. if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) && !MF.getRegInfo().constrainRegClass(Src.getReg(), &X86::GR64_NOSPRegClass)) - return 0; + return nullptr; NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) .addOperand(Dest) @@ -2125,7 +2127,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::SHL32ri: { assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); unsigned ShAmt = getTruncatedShiftCount(MI, 2); - if (!isTruncatedShiftCountForLEA(ShAmt)) return 0; + if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; @@ -2135,7 +2137,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, isUndef, ImplicitOp)) - return 0; + return nullptr; MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addOperand(Dest) @@ -2151,10 +2153,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::SHL16ri: { assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); unsigned ShAmt = getTruncatedShiftCount(MI, 2); - if (!isTruncatedShiftCountForLEA(ShAmt)) return 0; + if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr; if (DisableLEA16) - return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : nullptr; NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addOperand(Dest) .addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0); @@ -2163,7 +2165,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, default: { switch (MIOpc) { - default: return 0; + default: return nullptr; case X86::INC64r: case X86::INC32r: case X86::INC64_32r: { @@ -2175,7 +2177,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, isUndef, ImplicitOp)) - return 0; + return nullptr; MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addOperand(Dest) @@ -2189,7 +2191,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::INC16r: case X86::INC64_16r: if (DisableLEA16) - return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) + : nullptr; assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addOperand(Dest).addOperand(Src), 1); @@ -2206,7 +2209,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill, isUndef, ImplicitOp)) - return 0; + return nullptr; MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addOperand(Dest) @@ -2221,7 +2224,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::DEC16r: case X86::DEC64_16r: if (DisableLEA16) - return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) + : nullptr; assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addOperand(Dest).addOperand(Src), -1); @@ -2242,7 +2246,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, SrcReg, isKill, isUndef, ImplicitOp)) - return 0; + return nullptr; const MachineOperand &Src2 = MI->getOperand(2); bool isKill2, isUndef2; @@ -2250,7 +2254,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false, SrcReg2, isKill2, isUndef2, ImplicitOp2)) - return 0; + return nullptr; MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addOperand(Dest); @@ -2272,7 +2276,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD16rr: case X86::ADD16rr_DB: { if (DisableLEA16) - return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) + : nullptr; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Src2 = MI->getOperand(2).getReg(); bool isKill2 = MI->getOperand(2).isKill(); @@ -2311,7 +2316,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false); if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true, SrcReg, isKill, isUndef, ImplicitOp)) - return 0; + return nullptr; MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addOperand(Dest) @@ -2327,7 +2332,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD16ri_DB: case X86::ADD16ri8_DB: if (DisableLEA16) - return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; + return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) + : nullptr; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addOperand(Dest).addOperand(Src), @@ -2337,7 +2343,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } } - if (!NewMI) return 0; + if (!NewMI) return nullptr; if (LV) { // Update live variables if (Src.isKill()) @@ -2789,11 +2795,11 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, std::next(I)->eraseFromParent(); Cond.clear(); - FBB = 0; + FBB = nullptr; // Delete the JMP if it's equivalent to a fall-through. if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { - TBB = 0; + TBB = nullptr; I->eraseFromParent(); I = MBB.end(); UnCondBrIter = MBB.end(); @@ -3549,6 +3555,26 @@ inline static bool isDefConvertible(MachineInstr *MI) { } } +/// isUseDefConvertible - check whether the use can be converted +/// to remove a comparison against zero. +static X86::CondCode isUseDefConvertible(MachineInstr *MI) { + switch (MI->getOpcode()) { + default: return X86::COND_INVALID; + case X86::LZCNT16rr: case X86::LZCNT16rm: + case X86::LZCNT32rr: case X86::LZCNT32rm: + case X86::LZCNT64rr: case X86::LZCNT64rm: + return X86::COND_B; + case X86::POPCNT16rr:case X86::POPCNT16rm: + case X86::POPCNT32rr:case X86::POPCNT32rm: + case X86::POPCNT64rr:case X86::POPCNT64rm: + return X86::COND_E; + case X86::TZCNT16rr: case X86::TZCNT16rm: + case X86::TZCNT32rr: case X86::TZCNT32rm: + case X86::TZCNT64rr: case X86::TZCNT64rm: + return X86::COND_B; + } +} + /// optimizeCompareInstr - Check if there exists an earlier instruction that /// operates on the same source operands and sets flags in the same way as /// Compare; remove Compare if possible. @@ -3615,13 +3641,38 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // If we are comparing against zero, check whether we can use MI to update // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize. bool IsCmpZero = (SrcReg2 == 0 && CmpValue == 0); - if (IsCmpZero && (MI->getParent() != CmpInstr->getParent() || - !isDefConvertible(MI))) + if (IsCmpZero && MI->getParent() != CmpInstr->getParent()) return false; + // If we have a use of the source register between the def and our compare + // instruction we can eliminate the compare iff the use sets EFLAGS in the + // right way. + bool ShouldUpdateCC = false; + X86::CondCode NewCC = X86::COND_INVALID; + if (IsCmpZero && !isDefConvertible(MI)) { + // Scan forward from the use until we hit the use we're looking for or the + // compare instruction. + for (MachineBasicBlock::iterator J = MI;; ++J) { + // Do we have a convertible instruction? + NewCC = isUseDefConvertible(J); + if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() && + J->getOperand(1).getReg() == SrcReg) { + assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!"); + ShouldUpdateCC = true; // Update CC later on. + // This is not a def of SrcReg, but still a def of EFLAGS. Keep going + // with the new def. + MI = Def = J; + break; + } + + if (J == I) + return false; + } + } + // We are searching for an earlier instruction that can make CmpInstr // redundant and that instruction will be saved in Sub. - MachineInstr *Sub = NULL; + MachineInstr *Sub = nullptr; const TargetRegisterInfo *TRI = &getRegisterInfo(); // We iterate backward, starting from the instruction before CmpInstr and @@ -3634,7 +3685,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, RE = CmpInstr->getParent() == MI->getParent() ? MachineBasicBlock::reverse_iterator(++Def) /* points to MI */ : CmpInstr->getParent()->rend(); - MachineInstr *Movr0Inst = 0; + MachineInstr *Movr0Inst = nullptr; for (; RI != RE; ++RI) { MachineInstr *Instr = &*RI; // Check whether CmpInstr can be made redundant by the current instruction. @@ -3716,13 +3767,28 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // CF and OF are used, we can't perform this optimization. return false; } + + // If we're updating the condition code check if we have to reverse the + // condition. + if (ShouldUpdateCC) + switch (OldCC) { + default: + return false; + case X86::COND_E: + break; + case X86::COND_NE: + NewCC = GetOppositeBranchCondition(NewCC); + break; + } } else if (IsSwapped) { // If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs // to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. // We swap the condition code and synthesize the new opcode. - X86::CondCode NewCC = getSwappedCondition(OldCC); + NewCC = getSwappedCondition(OldCC); if (NewCC == X86::COND_INVALID) return false; + } + if ((ShouldUpdateCC || IsSwapped) && NewCC != OldCC) { // Synthesize the new opcode. bool HasMemoryOperand = Instr.hasOneMemOperand(); unsigned NewOpc; @@ -3809,19 +3875,19 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, unsigned &FoldAsLoadDefReg, MachineInstr *&DefMI) const { if (FoldAsLoadDefReg == 0) - return 0; + return nullptr; // To be conservative, if there exists another load, clear the load candidate. if (MI->mayLoad()) { FoldAsLoadDefReg = 0; - return 0; + return nullptr; } // Check whether we can move DefMI here. DefMI = MRI->getVRegDef(FoldAsLoadDefReg); assert(DefMI); bool SawStore = false; - if (!DefMI->isSafeToMove(this, 0, SawStore)) - return 0; + if (!DefMI->isSafeToMove(this, nullptr, SawStore)) + return nullptr; // We try to commute MI if possible. unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1; @@ -3838,12 +3904,12 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, continue; // Do not fold if we have a subreg use or a def or multiple uses. if (MO.getSubReg() || MO.isDef() || FoundSrcOperand) - return 0; + return nullptr; SrcOperandId = i; FoundSrcOperand = true; } - if (!FoundSrcOperand) return 0; + if (!FoundSrcOperand) return nullptr; // Check whether we can fold the def into SrcOperandId. SmallVector Ops; @@ -3857,22 +3923,22 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI, if (Idx == 1) { // MI was changed but it didn't help, commute it back! commuteInstruction(MI, false); - return 0; + return nullptr; } // Check whether we can commute MI and enable folding. if (MI->isCommutable()) { MachineInstr *NewMI = commuteInstruction(MI, false); // Unable to commute. - if (!NewMI) return 0; + if (!NewMI) return nullptr; if (NewMI != MI) { // New instruction. It doesn't need to be kept. NewMI->eraseFromParent(); - return 0; + return nullptr; } } } - return 0; + return nullptr; } /// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr @@ -4007,7 +4073,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned i, const SmallVectorImpl &MOs, unsigned Size, unsigned Align) const { - const DenseMap > *OpcodeTablePtr = 0; + const DenseMap > *OpcodeTablePtr = nullptr; bool isCallRegIndirect = TM.getSubtarget().callRegIndirect(); bool isTwoAddrFold = false; @@ -4015,7 +4082,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // when X86Subtarget is Atom. if (isCallRegIndirect && (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r)) { - return NULL; + return nullptr; } unsigned NumOps = MI->getDesc().getNumOperands(); @@ -4026,9 +4093,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. if (MI->getOpcode() == X86::ADD32ri && MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) - return NULL; + return nullptr; - MachineInstr *NewMI = NULL; + MachineInstr *NewMI = nullptr; // Folding a memory location into the two-address part of a two-address // instruction is different than folding it other places. It requires // replacing the *two* registers with the memory location. @@ -4063,7 +4130,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, unsigned Opcode = I->second.first; unsigned MinAlign = (I->second.second & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT; if (Align < MinAlign) - return NULL; + return nullptr; bool NarrowToMOV32rm = false; if (Size) { unsigned RCSize = getRegClass(MI->getDesc(), i, &RI, MF)->getSize(); @@ -4071,12 +4138,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4) - return NULL; + return nullptr; // If this is a 64-bit load, but the spill slot is 32, then we can do // a 32-bit load which is implicitly zero-extended. This likely is due // to liveintervalanalysis remat'ing a load from stack slot. if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg()) - return NULL; + return nullptr; Opcode = X86::MOV32rm; NarrowToMOV32rm = true; } @@ -4105,7 +4172,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // No fusion if (PrintFailedFusing && !MI->isCopy()) dbgs() << "We failed to fuse operand " << i << " in " << *MI; - return NULL; + return nullptr; } /// hasPartialRegUpdate - Return true for all instructions that only update @@ -4270,14 +4337,14 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl &Ops, int FrameIndex) const { // Check switch flag - if (NoFusing) return NULL; + if (NoFusing) return nullptr; // Unless optimizing for size, don't fold to avoid partial // register update stalls if (!MF.getFunction()->getAttributes(). hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) && hasPartialRegUpdate(MI->getOpcode())) - return 0; + return nullptr; const MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned Size = MFI->getObjectSize(FrameIndex); @@ -4290,7 +4357,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned NewOpc = 0; unsigned RCSize = 0; switch (MI->getOpcode()) { - default: return NULL; + default: return nullptr; case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break; case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break; @@ -4299,12 +4366,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, // Check if it's safe to fold the load. If the size of the object is // narrower than the load width, then it's not. if (Size < RCSize) - return NULL; + return nullptr; // Change to CMPXXri r, 0 first. MI->setDesc(get(NewOpc)); MI->getOperand(1).ChangeToImmediate(0); } else if (Ops.size() != 1) - return NULL; + return nullptr; SmallVector MOs; MOs.push_back(MachineOperand::CreateFI(FrameIndex)); @@ -4322,14 +4389,14 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex); // Check switch flag - if (NoFusing) return NULL; + if (NoFusing) return nullptr; // Unless optimizing for size, don't fold to avoid partial // register update stalls if (!MF.getFunction()->getAttributes(). hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) && hasPartialRegUpdate(MI->getOpcode())) - return 0; + return nullptr; // Determine the alignment of the load. unsigned Alignment = 0; @@ -4352,12 +4419,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, Alignment = 4; break; default: - return 0; + return nullptr; } if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; switch (MI->getOpcode()) { - default: return NULL; + default: return nullptr; case X86::TEST8rr: NewOpc = X86::CMP8ri; break; case X86::TEST16rr: NewOpc = X86::CMP16ri8; break; case X86::TEST32rr: NewOpc = X86::CMP32ri8; break; @@ -4367,12 +4434,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MI->setDesc(get(NewOpc)); MI->getOperand(1).ChangeToImmediate(0); } else if (Ops.size() != 1) - return NULL; + return nullptr; // Make sure the subregisters match. // Otherwise we risk changing the size of the load. if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg()) - return NULL; + return nullptr; SmallVector MOs; switch (LoadMI->getOpcode()) { @@ -4388,7 +4455,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Medium and large mode can't fold loads this way. if (TM.getCodeModel() != CodeModel::Small && TM.getCodeModel() != CodeModel::Kernel) - return NULL; + return nullptr; // x86-32 PIC requires a PIC base register for constant pools. unsigned PICBase = 0; @@ -4400,7 +4467,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // This doesn't work for several reasons. // 1. GlobalBaseReg may have been spilled. // 2. It may not be live at MI. - return NULL; + return nullptr; } // Create a constant-pool entry. @@ -4436,14 +4503,14 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, > 4) // These instructions only load 32 bits, we can't fold them if the // destination register is wider than 32 bits (4 bytes). - return NULL; + return nullptr; if ((LoadMI->getOpcode() == X86::MOVSDrm || LoadMI->getOpcode() == X86::VMOVSDrm) && MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize() > 8) // These instructions only load 64 bits, we can't fold them if the // destination register is wider than 64 bits (8 bytes). - return NULL; + return nullptr; // Folding a normal load. Just copy the load's address operands. for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) @@ -4489,7 +4556,8 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, // Folding a memory location into the two-address part of a two-address // instruction is different than folding it other places. It requires // replacing the *two* registers with the memory location. - const DenseMap > *OpcodeTablePtr = 0; + const DenseMap > *OpcodeTablePtr = nullptr; if (isTwoAddr && NumOps >= 2 && OpNum < 2) { OpcodeTablePtr = &RegOp2MemOpTable2Addr; } else if (OpNum == 0) { // If operand 0 @@ -4671,7 +4739,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, AddrOps.push_back(Chain); // Emit the load instruction. - SDNode *Load = 0; + SDNode *Load = nullptr; if (FoldedLoad) { EVT VT = *RC->vt_begin(); std::pair VTs; - const TargetRegisterClass *DstRC = 0; + const TargetRegisterClass *DstRC = nullptr; if (MCID.getNumDefs() > 0) { DstRC = getRegClass(MCID, 0, &RI, MF); VTs.push_back(*DstRC->vt_begin()); @@ -5190,14 +5258,14 @@ static const uint16_t *lookup(unsigned opcode, unsigned domain) { for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i) if (ReplaceableInstrs[i][domain-1] == opcode) return ReplaceableInstrs[i]; - return 0; + return nullptr; } static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) { for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i) if (ReplaceableInstrsAVX2[i][domain-1] == opcode) return ReplaceableInstrsAVX2[i]; - return 0; + return nullptr; } std::pair @@ -5327,8 +5395,10 @@ namespace { const X86TargetMachine *TM = static_cast(&MF.getTarget()); - assert(!TM->getSubtarget().is64Bit() && - "X86-64 PIC uses RIP relative addressing"); + // Don't do anything if this is 64-bit as 64-bit PIC + // uses RIP relative addressing. + if (TM->getSubtarget().is64Bit()) + return false; // Only emit a global base reg in PIC mode. if (TM->getRelocationModel() != Reloc::PIC_) @@ -5383,7 +5453,7 @@ namespace { char CGBR::ID = 0; FunctionPass* -llvm::createGlobalBaseRegPass() { return new CGBR(); } +llvm::createX86GlobalBaseRegPass() { return new CGBR(); } namespace { struct LDTLSCleanup : public MachineFunctionPass { diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 156291e..5f34915 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -325,7 +325,7 @@ public: /// value. unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, - unsigned *LoadRegIndex = 0) const override; + unsigned *LoadRegIndex = nullptr) const override; /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler /// to determine if two loads are loading from the same base address. It @@ -359,6 +359,13 @@ public: /// instruction that defines the specified register class. bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override; + /// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction tha + /// would clobber the EFLAGS condition register. Note the result may be + /// conservative. If it cannot definitely determine the safety after visiting + /// a few instructions in each direction it assumes it's not safe. + bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + static bool isX86_64ExtendedReg(const MachineOperand &MO) { if (!MO.isReg()) return false; return X86II::isX86_64ExtendedReg(MO.getReg()); diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 8edf873..0d97669 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -206,6 +206,8 @@ def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr, def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void, [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; +def X86rdtscp : SDNode<"X86ISD::RDTSCP_DAG", SDTX86Void, + [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>; def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>; @@ -249,7 +251,6 @@ def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags, def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, [SDNPCommutative]>; -def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntShiftOp>; def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>; def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; @@ -2001,6 +2002,46 @@ let Predicates = [HasLZCNT], Defs = [EFLAGS] in { (implicit EFLAGS)]>, XS; } +let Predicates = [HasLZCNT] in { + def : Pat<(X86cmov (ctlz GR16:$src), (i16 16), (X86_COND_E), + (X86cmp GR16:$src, (i16 0))), + (LZCNT16rr GR16:$src)>; + def : Pat<(X86cmov (ctlz GR32:$src), (i32 32), (X86_COND_E), + (X86cmp GR32:$src, (i32 0))), + (LZCNT32rr GR32:$src)>; + def : Pat<(X86cmov (ctlz GR64:$src), (i64 64), (X86_COND_E), + (X86cmp GR64:$src, (i64 0))), + (LZCNT64rr GR64:$src)>; + def : Pat<(X86cmov (i16 16), (ctlz GR16:$src), (X86_COND_E), + (X86cmp GR16:$src, (i16 0))), + (LZCNT16rr GR16:$src)>; + def : Pat<(X86cmov (i32 32), (ctlz GR32:$src), (X86_COND_E), + (X86cmp GR32:$src, (i32 0))), + (LZCNT32rr GR32:$src)>; + def : Pat<(X86cmov (i64 64), (ctlz GR64:$src), (X86_COND_E), + (X86cmp GR64:$src, (i64 0))), + (LZCNT64rr GR64:$src)>; + + def : Pat<(X86cmov (ctlz (loadi16 addr:$src)), (i16 16), (X86_COND_E), + (X86cmp (loadi16 addr:$src), (i16 0))), + (LZCNT16rm addr:$src)>; + def : Pat<(X86cmov (ctlz (loadi32 addr:$src)), (i32 32), (X86_COND_E), + (X86cmp (loadi32 addr:$src), (i32 0))), + (LZCNT32rm addr:$src)>; + def : Pat<(X86cmov (ctlz (loadi64 addr:$src)), (i64 64), (X86_COND_E), + (X86cmp (loadi64 addr:$src), (i64 0))), + (LZCNT64rm addr:$src)>; + def : Pat<(X86cmov (i16 16), (ctlz (loadi16 addr:$src)), (X86_COND_E), + (X86cmp (loadi16 addr:$src), (i16 0))), + (LZCNT16rm addr:$src)>; + def : Pat<(X86cmov (i32 32), (ctlz (loadi32 addr:$src)), (X86_COND_E), + (X86cmp (loadi32 addr:$src), (i32 0))), + (LZCNT32rm addr:$src)>; + def : Pat<(X86cmov (i64 64), (ctlz (loadi64 addr:$src)), (X86_COND_E), + (X86cmp (loadi64 addr:$src), (i64 0))), + (LZCNT64rm addr:$src)>; +} + //===----------------------------------------------------------------------===// // BMI Instructions // @@ -2077,6 +2118,47 @@ let Predicates = [HasBMI] in { (BLSI64rr GR64:$src)>; } +let Predicates = [HasBMI] in { + def : Pat<(X86cmov (cttz GR16:$src), (i16 16), (X86_COND_E), + (X86cmp GR16:$src, (i16 0))), + (TZCNT16rr GR16:$src)>; + def : Pat<(X86cmov (cttz GR32:$src), (i32 32), (X86_COND_E), + (X86cmp GR32:$src, (i32 0))), + (TZCNT32rr GR32:$src)>; + def : Pat<(X86cmov (cttz GR64:$src), (i64 64), (X86_COND_E), + (X86cmp GR64:$src, (i64 0))), + (TZCNT64rr GR64:$src)>; + def : Pat<(X86cmov (i16 16), (cttz GR16:$src), (X86_COND_E), + (X86cmp GR16:$src, (i16 0))), + (TZCNT16rr GR16:$src)>; + def : Pat<(X86cmov (i32 32), (cttz GR32:$src), (X86_COND_E), + (X86cmp GR32:$src, (i32 0))), + (TZCNT32rr GR32:$src)>; + def : Pat<(X86cmov (i64 64), (cttz GR64:$src), (X86_COND_E), + (X86cmp GR64:$src, (i64 0))), + (TZCNT64rr GR64:$src)>; + + def : Pat<(X86cmov (cttz (loadi16 addr:$src)), (i16 16), (X86_COND_E), + (X86cmp (loadi16 addr:$src), (i16 0))), + (TZCNT16rm addr:$src)>; + def : Pat<(X86cmov (cttz (loadi32 addr:$src)), (i32 32), (X86_COND_E), + (X86cmp (loadi32 addr:$src), (i32 0))), + (TZCNT32rm addr:$src)>; + def : Pat<(X86cmov (cttz (loadi64 addr:$src)), (i64 64), (X86_COND_E), + (X86cmp (loadi64 addr:$src), (i64 0))), + (TZCNT64rm addr:$src)>; + def : Pat<(X86cmov (i16 16), (cttz (loadi16 addr:$src)), (X86_COND_E), + (X86cmp (loadi16 addr:$src), (i16 0))), + (TZCNT16rm addr:$src)>; + def : Pat<(X86cmov (i32 32), (cttz (loadi32 addr:$src)), (X86_COND_E), + (X86cmp (loadi32 addr:$src), (i32 0))), + (TZCNT32rm addr:$src)>; + def : Pat<(X86cmov (i64 64), (cttz (loadi64 addr:$src)), (X86_COND_E), + (X86cmp (loadi64 addr:$src), (i64 0))), + (TZCNT64rm addr:$src)>; +} + + multiclass bmi_bextr_bzhi opc, string mnemonic, RegisterClass RC, X86MemOperand x86memop, Intrinsic Int, PatFrag ld_frag> { @@ -2104,18 +2186,38 @@ let Predicates = [HasBMI2], Defs = [EFLAGS] in { int_x86_bmi_bzhi_64, loadi64>, VEX_W; } -def : Pat<(X86bzhi GR32:$src1, GR8:$src2), - (BZHI32rr GR32:$src1, - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; -def : Pat<(X86bzhi (loadi32 addr:$src1), GR8:$src2), - (BZHI32rm addr:$src1, - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; -def : Pat<(X86bzhi GR64:$src1, GR8:$src2), - (BZHI64rr GR64:$src1, - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; -def : Pat<(X86bzhi (loadi64 addr:$src1), GR8:$src2), - (BZHI64rm addr:$src1, - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; + +def CountTrailingOnes : SDNodeXFormgetZExtValue())); +}]>; + +def BZHIMask : ImmLeaf 32); +}]>; + +let Predicates = [HasBMI2] in { + def : Pat<(and GR64:$src, BZHIMask:$mask), + (BZHI64rr GR64:$src, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>; + + def : Pat<(and GR32:$src, (add (shl 1, GR8:$lz), -1)), + (BZHI32rr GR32:$src, + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>; + + def : Pat<(and (loadi32 addr:$src), (add (shl 1, GR8:$lz), -1)), + (BZHI32rm addr:$src, + (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>; + + def : Pat<(and GR64:$src, (add (shl 1, GR8:$lz), -1)), + (BZHI64rr GR64:$src, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>; + + def : Pat<(and (loadi64 addr:$src), (add (shl 1, GR8:$lz), -1)), + (BZHI64rm addr:$src, + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>; +} // HasBMI2 let Predicates = [HasBMI] in { def : Pat<(X86bextr GR32:$src1, GR32:$src2), @@ -2617,21 +2719,21 @@ def : InstAlias<"fnstsw" , (FNSTSW16r)>; // lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but // this is compatible with what GAS does. -def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>; -def : InstAlias<"ljmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>; -def : InstAlias<"lcall *$dst", (FARCALL32m opaque48mem:$dst)>, Requires<[Not16BitMode]>; -def : InstAlias<"ljmp *$dst", (FARJMP32m opaque48mem:$dst)>, Requires<[Not16BitMode]>; -def : InstAlias<"lcall $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>; -def : InstAlias<"ljmp $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>; -def : InstAlias<"lcall *$dst", (FARCALL16m opaque32mem:$dst)>, Requires<[In16BitMode]>; -def : InstAlias<"ljmp *$dst", (FARJMP16m opaque32mem:$dst)>, Requires<[In16BitMode]>; - -def : InstAlias<"call *$dst", (CALL64m i16mem:$dst)>, Requires<[In64BitMode]>; -def : InstAlias<"jmp *$dst", (JMP64m i16mem:$dst)>, Requires<[In64BitMode]>; -def : InstAlias<"call *$dst", (CALL32m i16mem:$dst)>, Requires<[In32BitMode]>; -def : InstAlias<"jmp *$dst", (JMP32m i16mem:$dst)>, Requires<[In32BitMode]>; -def : InstAlias<"call *$dst", (CALL16m i16mem:$dst)>, Requires<[In16BitMode]>; -def : InstAlias<"jmp *$dst", (JMP16m i16mem:$dst)>, Requires<[In16BitMode]>; +def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>; +def : InstAlias<"ljmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>; +def : InstAlias<"lcall *$dst", (FARCALL32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>; +def : InstAlias<"ljmp *$dst", (FARJMP32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>; +def : InstAlias<"lcall $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>; +def : InstAlias<"ljmp $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>; +def : InstAlias<"lcall *$dst", (FARCALL16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>; +def : InstAlias<"ljmp *$dst", (FARJMP16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>; + +def : InstAlias<"call *$dst", (CALL64m i16mem:$dst), 0>, Requires<[In64BitMode]>; +def : InstAlias<"jmp *$dst", (JMP64m i16mem:$dst), 0>, Requires<[In64BitMode]>; +def : InstAlias<"call *$dst", (CALL32m i16mem:$dst), 0>, Requires<[In32BitMode]>; +def : InstAlias<"jmp *$dst", (JMP32m i16mem:$dst), 0>, Requires<[In32BitMode]>; +def : InstAlias<"call *$dst", (CALL16m i16mem:$dst), 0>, Requires<[In16BitMode]>; +def : InstAlias<"jmp *$dst", (JMP16m i16mem:$dst), 0>, Requires<[In16BitMode]>; // "imul , B" is an alias for "imul , B, B". @@ -2664,11 +2766,11 @@ def : InstAlias<"jmpl $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>; // Force mov without a suffix with a segment and mem to prefer the 'l' form of // the move. All segment/mem forms are equivalent, this has the shortest // encoding. -def : InstAlias<"mov $mem, $seg", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem)>; -def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>; +def : InstAlias<"mov $mem, $seg", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem), 0>; +def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg), 0>; // Match 'movq , ' as an alias for movabsq. -def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>; +def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm), 0>; // Match 'movq GR64, MMX' as an alias for movd. def : InstAlias<"movq $src, $dst", @@ -2705,7 +2807,7 @@ def : InstAlias<"outl\t$port", (OUT32ir i8imm:$port), 0>; // 'sldt ' can be encoded with either sldtw or sldtq with the same // effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity // errors, since its encoding is the most compact. -def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>; +def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem), 0>; // shld/shrd op,op -> shld op, op, CL def : InstAlias<"shld{w}\t{$r2, $r1|$r1, $r2}", (SHLD16rrCL GR16:$r1, GR16:$r2), 0>; @@ -2751,19 +2853,29 @@ defm : ShiftRotateByOneAlias<"ror", "ROR">; FIXME */ // test: We accept "testX , " and "testX , " as synonyms. -def : InstAlias<"test{b}\t{$val, $mem|$mem, $val}", (TEST8rm GR8 :$val, i8mem :$mem)>; -def : InstAlias<"test{w}\t{$val, $mem|$mem, $val}", (TEST16rm GR16:$val, i16mem:$mem)>; -def : InstAlias<"test{l}\t{$val, $mem|$mem, $val}", (TEST32rm GR32:$val, i32mem:$mem)>; -def : InstAlias<"test{q}\t{$val, $mem|$mem, $val}", (TEST64rm GR64:$val, i64mem:$mem)>; +def : InstAlias<"test{b}\t{$val, $mem|$mem, $val}", + (TEST8rm GR8 :$val, i8mem :$mem), 0>; +def : InstAlias<"test{w}\t{$val, $mem|$mem, $val}", + (TEST16rm GR16:$val, i16mem:$mem), 0>; +def : InstAlias<"test{l}\t{$val, $mem|$mem, $val}", + (TEST32rm GR32:$val, i32mem:$mem), 0>; +def : InstAlias<"test{q}\t{$val, $mem|$mem, $val}", + (TEST64rm GR64:$val, i64mem:$mem), 0>; // xchg: We accept "xchgX , " and "xchgX , " as synonyms. -def : InstAlias<"xchg{b}\t{$mem, $val|$val, $mem}", (XCHG8rm GR8 :$val, i8mem :$mem)>; -def : InstAlias<"xchg{w}\t{$mem, $val|$val, $mem}", (XCHG16rm GR16:$val, i16mem:$mem)>; -def : InstAlias<"xchg{l}\t{$mem, $val|$val, $mem}", (XCHG32rm GR32:$val, i32mem:$mem)>; -def : InstAlias<"xchg{q}\t{$mem, $val|$val, $mem}", (XCHG64rm GR64:$val, i64mem:$mem)>; +def : InstAlias<"xchg{b}\t{$mem, $val|$val, $mem}", + (XCHG8rm GR8 :$val, i8mem :$mem), 0>; +def : InstAlias<"xchg{w}\t{$mem, $val|$val, $mem}", + (XCHG16rm GR16:$val, i16mem:$mem), 0>; +def : InstAlias<"xchg{l}\t{$mem, $val|$val, $mem}", + (XCHG32rm GR32:$val, i32mem:$mem), 0>; +def : InstAlias<"xchg{q}\t{$mem, $val|$val, $mem}", + (XCHG64rm GR64:$val, i64mem:$mem), 0>; // xchg: We accept "xchgX , %eax" and "xchgX %eax, " as synonyms. -def : InstAlias<"xchg{w}\t{%ax, $src|$src, ax}", (XCHG16ar GR16:$src)>; -def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", (XCHG32ar GR32:$src)>, Requires<[Not64BitMode]>; -def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", (XCHG32ar64 GR32_NOAX:$src)>, Requires<[In64BitMode]>; -def : InstAlias<"xchg{q}\t{%rax, $src|$src, rax}", (XCHG64ar GR64:$src)>; +def : InstAlias<"xchg{w}\t{%ax, $src|$src, ax}", (XCHG16ar GR16:$src), 0>; +def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", + (XCHG32ar GR32:$src), 0>, Requires<[Not64BitMode]>; +def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", + (XCHG32ar64 GR32_NOAX:$src), 0>, Requires<[In64BitMode]>; +def : InstAlias<"xchg{q}\t{%rax, $src|$src, rax}", (XCHG64ar GR64:$src), 0>; diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 050ee39..ecf80a1 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -254,6 +254,11 @@ let neverHasSideEffects = 1 in def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), "movq\t{$src, $dst|$dst, $src}", [], IIC_MMX_MOVQ_RR>; +let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { +def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDestReg, (outs VR64:$dst), (ins VR64:$src), + "movq\t{$src, $dst|$dst, $src}", [], + IIC_MMX_MOVQ_RR>; +} } // SchedRW let SchedRW = [WriteLoad] in { @@ -262,11 +267,12 @@ def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (load_mmx addr:$src))], IIC_MMX_MOVQ_RM>; +} // SchedRW +let SchedRW = [WriteStore] in def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (x86mmx VR64:$src), addr:$dst)], IIC_MMX_MOVQ_RM>; -} // SchedRW let SchedRW = [WriteMove] in { def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index f2f3967..1eb0485 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1561,9 +1561,9 @@ defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, let Predicates = [UseAVX] in { def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", - (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>; + (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src), 0>; def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", - (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>; + (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src), 0>; def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; @@ -1627,9 +1627,9 @@ def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", (CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}", - (CVTSI2SSrm FR64:$dst, i32mem:$src)>; + (CVTSI2SSrm FR64:$dst, i32mem:$src), 0>; def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", - (CVTSI2SDrm FR64:$dst, i32mem:$src)>; + (CVTSI2SDrm FR64:$dst, i32mem:$src), 0>; // Conversion Instructions Intrinsics - Match intrinsics which expect MM // and/or XMM operand(s). @@ -2005,7 +2005,7 @@ def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), // XMM only def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", - (VCVTPD2DQrr VR128:$dst, VR128:$src)>; + (VCVTPD2DQrr VR128:$dst, VR128:$src), 0>; def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "vcvtpd2dqx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2024,7 +2024,7 @@ def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), (int_x86_avx_cvt_pd2dq_256 (loadv4f64 addr:$src)))]>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", - (VCVTPD2DQYrr VR128:$dst, VR256:$src)>; + (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0>; } def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), @@ -2127,7 +2127,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), // XMM only def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", - (VCVTTPD2DQrr VR128:$dst, VR128:$src)>; + (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0>; def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttpd2dqx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq @@ -2146,7 +2146,7 @@ def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), (int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))], IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", - (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>; + (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>; let Predicates = [HasAVX] in { def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), @@ -2252,7 +2252,7 @@ def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), // XMM only def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", - (VCVTPD2PSrr VR128:$dst, VR128:$src)>; + (VCVTPD2PSrr VR128:$dst, VR128:$src), 0>; def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2psx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2271,7 +2271,7 @@ def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), (int_x86_avx_cvt_pd2_ps_256 (loadv4f64 addr:$src)))], IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", - (VCVTPD2PSYrr VR128:$dst, VR256:$src)>; + (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>; def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", @@ -2973,6 +2973,19 @@ defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; let isCommutable = 0 in defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>; +// AVX1 requires type coercions in order to fold loads directly into logical +// operations. +let Predicates = [HasAVX1Only] in { + def : Pat<(bc_v8f32 (and VR256:$src1, (loadv4i64 addr:$src2))), + (VANDPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(bc_v8f32 (or VR256:$src1, (loadv4i64 addr:$src2))), + (VORPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(bc_v8f32 (xor VR256:$src1, (loadv4i64 addr:$src2))), + (VXORPSYrm VR256:$src1, addr:$src2)>; + def : Pat<(bc_v8f32 (X86andnp VR256:$src1, (loadv4i64 addr:$src2))), + (VANDNPSYrm VR256:$src1, addr:$src2)>; +} + //===----------------------------------------------------------------------===// // SSE 1 & 2 - Arithmetic Instructions //===----------------------------------------------------------------------===// @@ -3144,23 +3157,23 @@ let Predicates = [UseSSE2] in { let Predicates = [UseSSE41] in { // If the subtarget has SSE4.1 but not AVX, the vector insert - // instruction is lowered into a X86insrtps rather than a X86Movss. + // instruction is lowered into a X86insertps rather than a X86Movss. // When selecting SSE scalar single-precision fp arithmetic instructions, - // make sure that we correctly match the X86insrtps. + // make sure that we correctly match the X86insertps. - def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), FR32:$src))), (iPTR 0))), (ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; - def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), FR32:$src))), (iPTR 0))), (SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; - def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), FR32:$src))), (iPTR 0))), (MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; - def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), FR32:$src))), (iPTR 0))), (DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; @@ -3186,19 +3199,19 @@ let Predicates = [HasAVX] in { (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), FR64:$src))))), (VDIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>; - def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fadd (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), FR32:$src))), (iPTR 0))), (VADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; - def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fsub (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), FR32:$src))), (iPTR 0))), (VSUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; - def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fmul (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), FR32:$src))), (iPTR 0))), (VMULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; - def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector + def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), FR32:$src))), (iPTR 0))), (VDIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>; @@ -4068,6 +4081,10 @@ defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, SSE_INTALUQ_ITINS_P, 1>; defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, SSE_INTMUL_ITINS_P, 1>; +defm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16, + SSE_INTMUL_ITINS_P, 1>; +defm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16, + SSE_INTMUL_ITINS_P, 1>; defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, SSE_INTALU_ITINS_P, 0>; defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, @@ -4102,10 +4119,6 @@ defm PADDUSB : PDI_binop_all_int<0xDC, "paddusb", int_x86_sse2_paddus_b, int_x86_avx2_paddus_b, SSE_INTALU_ITINS_P, 1>; defm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w, int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>; -defm PMULHUW : PDI_binop_all_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, - int_x86_avx2_pmulhu_w, SSE_INTMUL_ITINS_P, 1>; -defm PMULHW : PDI_binop_all_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, - int_x86_avx2_pmulh_w, SSE_INTMUL_ITINS_P, 1>; defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, int_x86_avx2_pmadd_wd, SSE_PMADD, 1>; defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b, @@ -6515,7 +6528,7 @@ multiclass SS41I_insertf32 opc, string asm, bit Is2Addr = 1, !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>, + (X86insertps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>, Sched<[WriteFShuffle]>; def rm : SS4AIi8 opc, string asm, bit Is2Addr = 1, !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), [(set VR128:$dst, - (X86insrtps VR128:$src1, + (X86insertps VR128:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), imm:$src3))], itins.rm>, Sched<[WriteFShuffleLd, ReadAfterLd]>; @@ -6537,6 +6550,29 @@ let ExeDomain = SSEPackedSingle in { defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>; } +let Predicates = [UseSSE41] in { + // If we're inserting an element from a load or a null pshuf of a load, + // fold the load into the insertps instruction. + def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), (X86PShufd (v4f32 + (scalar_to_vector (loadf32 addr:$src2))), (i8 0)), + imm:$src3)), + (INSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; + def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), (X86PShufd + (loadv4f32 addr:$src2), (i8 0)), imm:$src3)), + (INSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; +} + +let Predicates = [UseAVX] in { + // If we're inserting an element from a vbroadcast of a load, fold the + // load into the X86insertps instruction. + def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), + (X86VBroadcast (loadf32 addr:$src2)), imm:$src3)), + (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; + def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), + (X86VBroadcast (loadv4f32 addr:$src2)), imm:$src3)), + (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; +} + //===----------------------------------------------------------------------===// // SSE4.1 - Round Instructions //===----------------------------------------------------------------------===// @@ -6990,6 +7026,31 @@ multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, Sched<[itins.Sched.Folded, ReadAfterLd]>; } +/// SS48I_binop_rm2 - Simple SSE41 binary operator with different src and dst +/// types. +multiclass SS48I_binop_rm2 opc, string OpcodeStr, SDNode OpNode, + ValueType DstVT, ValueType SrcVT, RegisterClass RC, + PatFrag memop_frag, X86MemOperand x86memop, + OpndItins itins, + bit IsCommutable = 0, bit Is2Addr = 1> { + let isCommutable = IsCommutable in + def rr : SS48I, + Sched<[itins.Sched]>; + def rm : SS48I, + Sched<[itins.Sched.Folded, ReadAfterLd]>; +} + let Predicates = [HasAVX] in { let isCommutable = 0 in defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, @@ -7018,8 +7079,9 @@ let Predicates = [HasAVX] in { defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, VEX_4V; - defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, - 0, DEFAULT_ITINS_VECIMULSCHED>, VEX_4V; + defm VPMULDQ : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v2i64, v4i32, + VR128, loadv2i64, i128mem, + SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; } let Predicates = [HasAVX2] in { @@ -7051,9 +7113,9 @@ let Predicates = [HasAVX2] in { defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, VEX_4V, VEX_L; - defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq", - int_x86_avx2_pmul_dq, WriteVecIMul>, - VEX_4V, VEX_L; + defm VPMULDQY : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v4i64, v8i32, + VR256, loadv4i64, i256mem, + SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; } let Constraints = "$src1 = $dst" in { @@ -7076,8 +7138,9 @@ let Constraints = "$src1 = $dst" in { memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128, memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; - defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, - 1, SSE_INTMUL_ITINS_P>; + defm PMULDQ : SS48I_binop_rm2<0x28, "pmuldq", X86pmuldq, v2i64, v4i32, + VR128, memopv2i64, i128mem, + SSE_INTMUL_ITINS_P, 1>; } let Predicates = [HasAVX] in { @@ -7394,6 +7457,7 @@ let Predicates = [UseSSE41] in { } +let SchedRW = [WriteLoad] in { let Predicates = [HasAVX] in def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", @@ -7407,6 +7471,7 @@ def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>; +} // SchedRW //===----------------------------------------------------------------------===// // SSE4.2 - Compare Instructions @@ -7831,18 +7896,20 @@ def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), multiclass pclmul_alias { def : InstAlias; + (PCLMULQDQrr VR128:$dst, VR128:$src, immop), 0>; def : InstAlias; + (PCLMULQDQrm VR128:$dst, i128mem:$src, immop), 0>; def : InstAlias; + (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop), + 0>; def : InstAlias; + (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop), + 0>; } defm : pclmul_alias<"hqhq", 0x11>; defm : pclmul_alias<"hqlq", 0x01>; @@ -8291,6 +8358,12 @@ let Predicates = [HasF16C] in { defm VCVTPH2PSY : f16c_ph2ps, VEX_L; defm VCVTPS2PH : f16c_ps2ph; defm VCVTPS2PHY : f16c_ps2ph, VEX_L; + + // Pattern match vcvtph2ps of a scalar i64 load. + def : Pat<(int_x86_vcvtph2ps_128 (vzmovl_v2i64 addr:$src)), + (VCVTPH2PSrm addr:$src)>; + def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)), + (VCVTPH2PSrm addr:$src)>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 9d3aa1c..b5595cb 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -19,7 +19,7 @@ let Defs = [RAX, RDX] in TB; let Defs = [RAX, RCX, RDX] in - def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB; + def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", [(X86rdtscp)]>, TB; // CPU flow control instructions diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index e99f2d9..e969ef2 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jit" #include "X86JITInfo.h" #include "X86Relocations.h" #include "X86Subtarget.h" @@ -24,6 +23,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "jit" + // Determine the platform we're running on #if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64) # define X86_64_JIT @@ -427,9 +428,14 @@ X86JITInfo::getLazyResolverFunction(JITCompilerFn F) { TsanIgnoreWritesEnd(); #if defined (X86_32_JIT) && !defined (_MSC_VER) +#if defined(__SSE__) + // SSE Callback should be called for SSE-enabled LLVM. + return X86CompilationCallback_SSE; +#else if (Subtarget->hasSSE1()) return X86CompilationCallback_SSE; #endif +#endif return X86CompilationCallback; } @@ -437,7 +443,7 @@ X86JITInfo::getLazyResolverFunction(JITCompilerFn F) { X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) { Subtarget = &TM.getSubtarget(); useGOT = 0; - TLSOffset = 0; + TLSOffset = nullptr; } void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 6d7f3cb..0190080 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -120,7 +120,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { case X86II::MO_DARWIN_NONLAZY_PIC_BASE: { MachineModuleInfoImpl::StubValueTy &StubSym = getMachOMMI().getGVStubEntry(Sym); - if (StubSym.getPointer() == 0) { + if (!StubSym.getPointer()) { assert(MO.isGlobal() && "Extern symbol not handled yet"); StubSym = MachineModuleInfoImpl:: @@ -132,7 +132,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const { case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: { MachineModuleInfoImpl::StubValueTy &StubSym = getMachOMMI().getHiddenGVStubEntry(Sym); - if (StubSym.getPointer() == 0) { + if (!StubSym.getPointer()) { assert(MO.isGlobal() && "Extern symbol not handled yet"); StubSym = MachineModuleInfoImpl:: @@ -168,7 +168,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { // FIXME: We would like an efficient form for this, so we don't have to do a // lot of extra uniquing. - const MCExpr *Expr = 0; + const MCExpr *Expr = nullptr; MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None; switch (MO.getTargetFlags()) { @@ -223,7 +223,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, break; } - if (Expr == 0) + if (!Expr) Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx); if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp index 746d0d6..6639875 100644 --- a/lib/Target/X86/X86PadShortFunction.cpp +++ b/lib/Target/X86/X86PadShortFunction.cpp @@ -15,9 +15,9 @@ #include -#define DEBUG_TYPE "x86-pad-short-functions" #include "X86.h" #include "X86InstrInfo.h" +#include "X86Subtarget.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -30,6 +30,8 @@ using namespace llvm; +#define DEBUG_TYPE "x86-pad-short-functions" + STATISTIC(NumBBsPadded, "Number of basic blocks padded"); namespace { @@ -49,7 +51,7 @@ namespace { struct PadShortFunc : public MachineFunctionPass { static char ID; PadShortFunc() : MachineFunctionPass(ID) - , Threshold(4), TM(0), TII(0) {} + , Threshold(4), TM(nullptr), TII(nullptr) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -100,6 +102,9 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) { } TM = &MF.getTarget(); + if (!TM->getSubtarget().padShortFunctions()) + return false; + TII = TM->getInstrInfo(); // Search through basic blocks and mark the ones that have early returns diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 85aa9b5..a83e1e4 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -38,11 +38,11 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +using namespace llvm; + #define GET_REGINFO_TARGET_DESC #include "X86GenRegisterInfo.inc" -using namespace llvm; - cl::opt ForceStackAlign("force-align-stack", cl::desc("Force align the stack to the minimum alignment" @@ -129,7 +129,7 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, if (!Is64Bit && SubIdx == X86::sub_8bit) { A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi); if (!A) - return 0; + return nullptr; } return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx); } @@ -231,7 +231,7 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, } } -const uint16_t * +const MCPhysReg * X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { bool HasAVX = TM.getSubtarget().hasAVX(); bool HasAVX512 = TM.getSubtarget().hasAVX512(); diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 6a71113..2289d91 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -100,7 +100,7 @@ public: /// getCalleeSavedRegs - Return a null-terminated list of all of the /// callee-save registers on this target. - const uint16_t * + const MCPhysReg * getCalleeSavedRegs(const MachineFunction* MF) const override; const uint32_t *getCallPreservedMask(CallingConv::ID) const override; const uint32_t *getNoPreservedMask() const; @@ -122,7 +122,7 @@ public: void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS = NULL) const override; + RegScavenger *RS = nullptr) const override; // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const override; diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index f5b51ee..6966d61 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -20,6 +20,9 @@ def HaswellModel : SchedMachineModel { let LoadLatency = 4; let MispredictPenalty = 16; + // Based on the LSD (loop-stream detector) queue size and benchmarking data. + let LoopMicroOpBufferSize = 50; + // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow // the scheduler to assign a default model to unrecognized opcodes. let CompleteModel = 0; diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index a58859a..83f0534 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -21,6 +21,9 @@ def SandyBridgeModel : SchedMachineModel { let LoadLatency = 4; let MispredictPenalty = 16; + // Based on the LSD (loop-stream detector) queue size. + let LoopMicroOpBufferSize = 28; + // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow // the scheduler to assign a default model to unrecognized opcodes. let CompleteModel = 0; diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index ba72f29..3256ee7 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -535,5 +535,9 @@ def AtomModel : SchedMachineModel { let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles. let HighLatency = 30;// Expected, may be overriden by OperandCycles. + // On the Atom, the throughput for taken branches is 2 cycles. For small + // simple loops, expand by a small factor to hide the backedge cost. + let LoopMicroOpBufferSize = 10; + let Itineraries = AtomItineraries; } diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td index 6c2a304..823d101 100644 --- a/lib/Target/X86/X86ScheduleSLM.td +++ b/lib/Target/X86/X86ScheduleSLM.td @@ -1,4 +1,4 @@ -//===- X86ScheduleSLM.td - X86 Atom Scheduling Definitions -*- tablegen -*-==// +//=- X86ScheduleSLM.td - X86 Silvermont Scheduling -----------*- tablegen -*-=// // // The LLVM Compiler Infrastructure // @@ -7,662 +7,225 @@ // //===----------------------------------------------------------------------===// // -// This file defines the itinerary class data for the Intel Atom -// (Silvermont) processor. +// This file defines the machine model for Intel Silvermont to support +// instruction scheduling and other instruction cost heuristics. // //===----------------------------------------------------------------------===// -def IEC_RSV0 : FuncUnit; -def IEC_RSV1 : FuncUnit; -def FPC_RSV0 : FuncUnit; -def FPC_RSV1 : FuncUnit; -def MEC_RSV : FuncUnit; - - - - - - - - - - - - - - -def SLMItineraries : ProcessorItineraries< - [ IEC_RSV0, IEC_RSV1, FPC_RSV0, FPC_RSV1, MEC_RSV ], - [], [ - // [InstrStage] - // [InstrStage, InstrStage] - // [InstrStage] - // [InstrStage,InstrStage] - // - // Default is 1 cycle, IEC_RSV0 or IEC_RSV1 - //InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // mul - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<3, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - // imul by al, ax, eax, rax - InstrItinData] >, - InstrItinData, - InstrStage<6, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<6, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - // imul reg by reg|mem - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<3, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - // imul reg = reg/mem * imm - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData, - InstrStage<3, [MEC_RSV]>] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - // idiv - min latency - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // div - min latency - InstrItinData] >, - InstrItinData, - InstrStage<25, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // neg/not/inc/dec - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - // add/sub/and/or/xor/adc/sbc/cmp/test - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - // adc/sbb - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - // shift/rotate - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - // shift double - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - // cmov - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData] >, - // set - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - // jcc - InstrItinData] >, - // jcxz/jecxz/jrcxz - InstrItinData] >, - // jmp rel - InstrItinData] >, - // jmp indirect - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - // jmp far - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - // loop/loope/loopne - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // call - all but reg/imm - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - //ret - InstrItinData] >, - InstrItinData] >, - //sign extension movs - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - //zero extension movs - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - - InstrItinData] >, - InstrItinData] >, - - // SSE binary operations - // arithmetic fp scalar - InstrItinData] >, - InstrItinData, - InstrStage<3, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<3, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<13, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<13, [MEC_RSV]>] >, - - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData, - InstrStage<6, [MEC_RSV]>] >, - - // arithmetic fp parallel - InstrItinData] >, - InstrItinData, - InstrStage<3, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<27, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<27, [MEC_RSV]>] >, - - // bitwise parallel - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - - // arithmetic int parallel - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - - // multiply int parallel - InstrItinData] >, - InstrItinData, - InstrStage<5, [MEC_RSV]>] >, - - // shift parallel - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData] >, - - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - - InstrItinData] >, - - InstrItinData] >, - InstrItinData, - InstrStage<26, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<13, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<26, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<13, [MEC_RSV]>] >, - - InstrItinData] >, - InstrItinData, - InstrStage<9, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - - InstrItinData] >, - - InstrItinData] >, - - InstrItinData] >, - InstrItinData, - InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >, - InstrItinData] >, - - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData, - InstrStage<6, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<9, [MEC_RSV]>] >, - InstrItinData, - InstrStage<9, [MEC_RSV]>] >, - InstrItinData, - InstrStage<9, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<5, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - - // conversions - // to/from PD ... - InstrItinData] >, - InstrItinData, - InstrStage<5, [MEC_RSV]>] >, - // to/from PS except to/from PD and PS2PI - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - - // MMX MOVs - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // other MMX - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // conversions - // from/to PD - InstrItinData] >, - InstrItinData] >, - // from/to PI - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, +def SLMModel : SchedMachineModel { + // All x86 instructions are modeled as a single micro-op, and SLM can decode 2 + // instructions per cycle. + let IssueWidth = 2; + let MicroOpBufferSize = 32; // Based on the reorder buffer. + let LoadLatency = 3; + let MispredictPenalty = 10; + + // For small loops, expand by a small factor to hide the backedge cost. + let LoopMicroOpBufferSize = 10; + + // FIXME: SSE4 is unimplemented. This flag is set to allow + // the scheduler to assign a default model to unrecognized opcodes. + let CompleteModel = 0; +} - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, +let SchedModel = SLMModel in { + +// Silvermont has 5 reservation stations for micro-ops + +def IEC_RSV0 : ProcResource<1>; +def IEC_RSV1 : ProcResource<1>; +def FPC_RSV0 : ProcResource<1> { let BufferSize = 1; } +def FPC_RSV1 : ProcResource<1> { let BufferSize = 1; } +def MEC_RSV : ProcResource<1>; + +// Many micro-ops are capable of issuing on multiple ports. +def IEC_RSV01 : ProcResGroup<[IEC_RSV0, IEC_RSV1]>; +def FPC_RSV01 : ProcResGroup<[FPC_RSV0, FPC_RSV1]>; + +def SMDivider : ProcResource<1>; +def SMFPMultiplier : ProcResource<1>; +def SMFPDivider : ProcResource<1>; + +// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3 +// cycles after the memory operand. +def : ReadAdvance; + +// Many SchedWrites are defined in pairs with and without a folded load. +// Instructions with folded loads are usually micro-fused, so they only appear +// as two micro-ops when queued in the reservation station. +// This multiclass defines the resource usage for variants with and without +// folded loads. +multiclass SMWriteResPair { + // Register variant is using a single cycle on ExePort. + def : WriteRes { let Latency = Lat; } + + // Memory variant also uses a cycle on MEC_RSV and adds 3 cycles to the + // latency. + def : WriteRes { + let Latency = !add(Lat, 3); + } +} - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, +// A folded store needs a cycle on MEC_RSV for the store data, but it does not +// need an extra port cycle to recompute the address. +def : WriteRes; + +def : WriteRes; +def : WriteRes { let Latency = 3; } +def : WriteRes; +def : WriteRes; + +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; + +// This is for simple LEAs with one or two input operands. +// The complex ones can only execute on port 1, and they require two cycles on +// the port to read all inputs. We don't model that. +def : WriteRes; + +// This is quite rough, latency depends on the dividend. +def : WriteRes { + let Latency = 25; + let ResourceCycles = [1, 25]; +} +def : WriteRes { + let Latency = 29; + let ResourceCycles = [1, 1, 25]; +} - // System instructions - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, +// Scalar and vector floating point. +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; + +// This is quite rough, latency depends on precision +def : WriteRes { + let Latency = 5; + let ResourceCycles = [1, 2]; +} +def : WriteRes { + let Latency = 8; + let ResourceCycles = [1, 1, 2]; +} - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, +def : WriteRes { + let Latency = 34; + let ResourceCycles = [1, 34]; +} +def : WriteRes { + let Latency = 37; + let ResourceCycles = [1, 1, 34]; +} - InstrItinData] >, - InstrItinData] >, - // worst case for mov REG_CRx - InstrItinData] >, - InstrItinData] >, +// Vector integer operations. +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; + +// String instructions. +// Packed Compare Implicit Length Strings, Return Mask +def : WriteRes { + let Latency = 13; + let ResourceCycles = [13]; +} +def : WriteRes { + let Latency = 13; + let ResourceCycles = [13, 1]; +} - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // LAR - InstrItinData] >, - InstrItinData] >, - // LSL - InstrItinData] >, - InstrItinData] >, +// Packed Compare Explicit Length Strings, Return Mask +def : WriteRes { + let Latency = 17; + let ResourceCycles = [17]; +} +def : WriteRes { + let Latency = 17; + let ResourceCycles = [17, 1]; +} - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // push control register, segment registers - InstrItinData] >, - InstrItinData] >, - // pop control register, segment registers - InstrItinData] >, - InstrItinData] >, - // VERR, VERW - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // WRMSR, RDMSR - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // SMSW, LMSW - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, +// Packed Compare Implicit Length Strings, Return Index +def : WriteRes { + let Latency = 17; + let ResourceCycles = [17]; +} +def : WriteRes { + let Latency = 17; + let ResourceCycles = [17, 1]; +} - InstrItinData] >, - InstrItinData] >, +// Packed Compare Explicit Length Strings, Return Index +def : WriteRes { + let Latency = 21; + let ResourceCycles = [21]; +} +def : WriteRes { + let Latency = 21; + let ResourceCycles = [21, 1]; +} - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, +// AES Instructions. +def : WriteRes { + let Latency = 8; + let ResourceCycles = [5]; +} +def : WriteRes { + let Latency = 8; + let ResourceCycles = [5, 1]; +} - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, +def : WriteRes { + let Latency = 8; + let ResourceCycles = [5]; +} +def : WriteRes { + let Latency = 8; + let ResourceCycles = [5, 1]; +} - InstrItinData] >, - InstrItinData, - InstrStage<10, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<5, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<5, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<6, [MEC_RSV]>] >, - InstrItinData, - InstrStage<6, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData, - InstrStage<10, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<3, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<12, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<15, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<11, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<5, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<10, [MEC_RSV]>] >, +def : WriteRes { + let Latency = 8; + let ResourceCycles = [5]; +} +def : WriteRes { + let Latency = 8; + let ResourceCycles = [5, 1]; +} - InstrItinData] > - ]>; +// Carry-less multiplication instructions. +def : WriteRes { + let Latency = 10; + let ResourceCycles = [10]; +} +def : WriteRes { + let Latency = 10; + let ResourceCycles = [10, 1]; +} -// Silvermont machine model. -def SLMModel : SchedMachineModel { - let IssueWidth = 2; // Allows 2 instructions per scheduling group. - let MinLatency = 1; // InstrStage cycles overrides MinLatency. - // OperandCycles may be used for expected latency. - let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles. - let HighLatency = 30;// Expected, may be overriden by OperandCycles. - let Itineraries = SLMItineraries; -} +def : WriteRes { let Latency = 100; } +def : WriteRes { let Latency = 100; } +def : WriteRes; +def : WriteRes; + +// AVX is not supported on that architecture, but we should define the basic +// scheduling resources anyway. +def : WriteRes; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +} // SchedModel diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index b9c620f..744890d 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -11,12 +11,13 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "x86-selectiondag-info" #include "X86TargetMachine.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DerivedTypes.h" using namespace llvm; +#define DEBUG_TYPE "x86-selectiondag-info" + X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) : TargetSelectionDAGInfo(TM), Subtarget(&TM.getSubtarget()), @@ -50,7 +51,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, ConstantSDNode *V = dyn_cast(Src); if (const char *bzeroEntry = V && - V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { + V->isNullValue() ? Subtarget->getBZeroEntry() : nullptr) { EVT IntPtr = TLI.getPointerTy(); Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; @@ -60,15 +61,14 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); - TargetLowering:: - CallLoweringInfo CLI(Chain, Type::getVoidTy(*DAG.getContext()), - false, false, false, false, - 0, CallingConv::C, /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/false, - DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, - DAG, dl); - std::pair CallResult = - TLI.LowerCallTo(CLI); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(bzeroEntry, IntPtr), &Args, 0) + .setDiscardResult(); + + std::pair CallResult = TLI.LowerCallTo(CLI); return CallResult.second; } @@ -77,7 +77,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, } uint64_t SizeVal = ConstantSize->getZExtValue(); - SDValue InFlag(0, 0); + SDValue InFlag; EVT AVT; SDValue Count; ConstantSDNode *ValC = dyn_cast(Src); @@ -139,7 +139,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; - Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); + Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); if (TwoRepStos) { InFlag = Chain.getValue(1); @@ -153,7 +153,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, InFlag = Chain.getValue(1); Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; - Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); + Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); } else if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; @@ -225,7 +225,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Count = DAG.getIntPtrConstant(CountVal); unsigned BytesLeft = SizeVal % UBytes; - SDValue InFlag(0, 0); + SDValue InFlag; Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); @@ -241,8 +241,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; - SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, - array_lengthof(Ops)); + SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops); SmallVector Results; Results.push_back(RepMovs); @@ -263,6 +262,5 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SrcPtrInfo.getWithOffset(Offset))); } - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Results[0], Results.size()); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results); } diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 207d0ba..989e0d6 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "subtarget" #include "X86Subtarget.h" #include "X86InstrInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Host.h" @@ -24,15 +24,24 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#if defined(_MSC_VER) +#include +#endif + +using namespace llvm; + +#define DEBUG_TYPE "subtarget" + #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "X86GenSubtargetInfo.inc" -using namespace llvm; +// Temporary option to control early if-conversion for x86 while adding machine +// models. +static cl::opt +X86EarlyIfConv("x86-early-ifcvt", cl::Hidden, + cl::desc("Enable early if-conversion on X86")); -#if defined(_MSC_VER) -#include -#endif /// ClassifyBlockAddressReference - Classify a blockaddress reference for the /// current subtarget according to how we should reference it in a non-pcrel @@ -153,7 +162,7 @@ const char *X86Subtarget::getBZeroEntry() const { !getTargetTriple().isMacOSXVersionLT(10, 6)) return "__bzero"; - return 0; + return nullptr; } bool X86Subtarget::hasSinCos() const { @@ -173,251 +182,16 @@ bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const { return isTargetELF() || TM.getRelocationModel() == Reloc::Static; } -static bool OSHasAVXSupport() { -#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\ - || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) -#if defined(__GNUC__) - // Check xgetbv; this uses a .byte sequence instead of the instruction - // directly because older assemblers do not include support for xgetbv and - // there is no easy way to conditionally compile based on the assembler used. - int rEAX, rEDX; - __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0)); -#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK) - unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK); -#else - int rEAX = 0; // Ensures we return false -#endif - return (rEAX & 6) == 6; -#else - return false; -#endif -} - -void X86Subtarget::AutoDetectSubtargetFeatures() { - unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; - unsigned MaxLevel; - union { - unsigned u[3]; - char c[12]; - } text; - - if (X86_MC::GetCpuIDAndInfo(0, &MaxLevel, text.u+0, text.u+2, text.u+1) || - MaxLevel < 1) - return; - - X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX); - - if ((EDX >> 15) & 1) { HasCMov = true; ToggleFeature(X86::FeatureCMOV); } - if ((EDX >> 23) & 1) { X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX); } - if ((EDX >> 25) & 1) { X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1); } - if ((EDX >> 26) & 1) { X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE2); } - if (ECX & 0x1) { X86SSELevel = SSE3; ToggleFeature(X86::FeatureSSE3); } - if ((ECX >> 9) & 1) { X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);} - if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);} - if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);} - if (((ECX >> 27) & 1) && ((ECX >> 28) & 1) && OSHasAVXSupport()) { - X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX); - } - - bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0; - bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0; - - if ((ECX >> 1) & 0x1) { - HasPCLMUL = true; - ToggleFeature(X86::FeaturePCLMUL); - } - if ((ECX >> 12) & 0x1) { - HasFMA = true; - ToggleFeature(X86::FeatureFMA); - } - if (IsIntel && ((ECX >> 22) & 0x1)) { - HasMOVBE = true; - ToggleFeature(X86::FeatureMOVBE); - } - if ((ECX >> 23) & 0x1) { - HasPOPCNT = true; - ToggleFeature(X86::FeaturePOPCNT); - } - if ((ECX >> 25) & 0x1) { - HasAES = true; - ToggleFeature(X86::FeatureAES); - } - if ((ECX >> 29) & 0x1) { - HasF16C = true; - ToggleFeature(X86::FeatureF16C); - } - if (IsIntel && ((ECX >> 30) & 0x1)) { - HasRDRAND = true; - ToggleFeature(X86::FeatureRDRAND); - } - - if ((ECX >> 13) & 0x1) { - HasCmpxchg16b = true; - ToggleFeature(X86::FeatureCMPXCHG16B); - } - - if (IsIntel || IsAMD) { - // Determine if bit test memory instructions are slow. - unsigned Family = 0; - unsigned Model = 0; - X86_MC::DetectFamilyModel(EAX, Family, Model); - if (IsAMD || (Family == 6 && Model >= 13)) { - IsBTMemSlow = true; - ToggleFeature(X86::FeatureSlowBTMem); - } - - // Determine if SHLD/SHRD instructions have higher latency then the - // equivalent series of shifts/or instructions. - // FIXME: Add Intel's processors that have SHLD instructions with very - // poor latency. - if (IsAMD) { - IsSHLDSlow = true; - ToggleFeature(X86::FeatureSlowSHLD); - } - - // If it's an Intel chip since Nehalem and not an Atom chip, unaligned - // memory access is fast. We hard code model numbers here because they - // aren't strictly increasing for Intel chips it seems. - if (IsIntel && - ((Family == 6 && Model == 0x1E) || // Nehalem: Clarksfield, Lynnfield, - // Jasper Froest - (Family == 6 && Model == 0x1A) || // Nehalem: Bloomfield, Nehalem-EP - (Family == 6 && Model == 0x2E) || // Nehalem: Nehalem-EX - (Family == 6 && Model == 0x25) || // Westmere: Arrandale, Clarksdale - (Family == 6 && Model == 0x2C) || // Westmere: Gulftown, Westmere-EP - (Family == 6 && Model == 0x2F) || // Westmere: Westmere-EX - (Family == 6 && Model == 0x2A) || // SandyBridge - (Family == 6 && Model == 0x2D) || // SandyBridge: SandyBridge-E* - (Family == 6 && Model == 0x3A) || // IvyBridge - (Family == 6 && Model == 0x3E) || // IvyBridge EP - (Family == 6 && Model == 0x3C) || // Haswell - (Family == 6 && Model == 0x3F) || // ... - (Family == 6 && Model == 0x45) || // ... - (Family == 6 && Model == 0x46))) { // ... - IsUAMemFast = true; - ToggleFeature(X86::FeatureFastUAMem); - } - - // Set processor type. Currently only Atom or Silvermont (SLM) is detected. - if (Family == 6 && - (Model == 28 || Model == 38 || Model == 39 || - Model == 53 || Model == 54)) { - X86ProcFamily = IntelAtom; - - UseLeaForSP = true; - ToggleFeature(X86::FeatureLeaForSP); - } - else if (Family == 6 && - (Model == 55 || Model == 74 || Model == 77)) { - X86ProcFamily = IntelSLM; - } - - unsigned MaxExtLevel; - X86_MC::GetCpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); - - if (MaxExtLevel >= 0x80000001) { - X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); - if ((EDX >> 29) & 0x1) { - HasX86_64 = true; - ToggleFeature(X86::Feature64Bit); - } - if ((ECX >> 5) & 0x1) { - HasLZCNT = true; - ToggleFeature(X86::FeatureLZCNT); - } - if (IsIntel && ((ECX >> 8) & 0x1)) { - HasPRFCHW = true; - ToggleFeature(X86::FeaturePRFCHW); - } - if (IsAMD) { - if ((ECX >> 6) & 0x1) { - HasSSE4A = true; - ToggleFeature(X86::FeatureSSE4A); - } - if ((ECX >> 11) & 0x1) { - HasXOP = true; - ToggleFeature(X86::FeatureXOP); - } - if ((ECX >> 16) & 0x1) { - HasFMA4 = true; - ToggleFeature(X86::FeatureFMA4); - } - } - } - } - - if (MaxLevel >= 7) { - if (!X86_MC::GetCpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX)) { - if (IsIntel && (EBX & 0x1)) { - HasFSGSBase = true; - ToggleFeature(X86::FeatureFSGSBase); - } - if ((EBX >> 3) & 0x1) { - HasBMI = true; - ToggleFeature(X86::FeatureBMI); - } - if ((EBX >> 4) & 0x1) { - HasHLE = true; - ToggleFeature(X86::FeatureHLE); - } - if (IsIntel && ((EBX >> 5) & 0x1)) { - X86SSELevel = AVX2; - ToggleFeature(X86::FeatureAVX2); - } - if (IsIntel && ((EBX >> 8) & 0x1)) { - HasBMI2 = true; - ToggleFeature(X86::FeatureBMI2); - } - if (IsIntel && ((EBX >> 11) & 0x1)) { - HasRTM = true; - ToggleFeature(X86::FeatureRTM); - } - if (IsIntel && ((EBX >> 16) & 0x1)) { - X86SSELevel = AVX512F; - ToggleFeature(X86::FeatureAVX512); - } - if (IsIntel && ((EBX >> 18) & 0x1)) { - HasRDSEED = true; - ToggleFeature(X86::FeatureRDSEED); - } - if (IsIntel && ((EBX >> 19) & 0x1)) { - HasADX = true; - ToggleFeature(X86::FeatureADX); - } - if (IsIntel && ((EBX >> 26) & 0x1)) { - HasPFI = true; - ToggleFeature(X86::FeaturePFI); - } - if (IsIntel && ((EBX >> 27) & 0x1)) { - HasERI = true; - ToggleFeature(X86::FeatureERI); - } - if (IsIntel && ((EBX >> 28) & 0x1)) { - HasCDI = true; - ToggleFeature(X86::FeatureCDI); - } - if (IsIntel && ((EBX >> 29) & 0x1)) { - HasSHA = true; - ToggleFeature(X86::FeatureSHA); - } - } - if (IsAMD && ((ECX >> 21) & 0x1)) { - HasTBM = true; - ToggleFeature(X86::FeatureTBM); - } - } -} - void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) { AttributeSet FnAttrs = MF->getFunction()->getAttributes(); - Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, - "target-cpu"); - Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, - "target-features"); + Attribute CPUAttr = + FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu"); + Attribute FSAttr = + FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features"); std::string CPU = - !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; + !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString() : ""; std::string FS = - !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; + !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; if (!FS.empty()) { initializeEnvironment(); resetSubtargetFeatures(CPU, FS); @@ -426,54 +200,23 @@ void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) { void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { std::string CPUName = CPU; - if (!FS.empty() || !CPU.empty()) { - if (CPUName.empty()) { -#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\ - || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) - CPUName = sys::getHostCPUName(); -#else - CPUName = "generic"; -#endif - } - - // Make sure 64-bit features are available in 64-bit mode. (But make sure - // SSE2 can be turned off explicitly.) - std::string FullFS = FS; - if (In64BitMode) { - if (!FullFS.empty()) - FullFS = "+64bit,+sse2," + FullFS; - else - FullFS = "+64bit,+sse2"; - } - - // If feature string is not empty, parse features string. - ParseSubtargetFeatures(CPUName, FullFS); - } else { - if (CPUName.empty()) { -#if defined (__x86_64__) || defined(__i386__) - CPUName = sys::getHostCPUName(); -#else - CPUName = "generic"; -#endif - } - // Otherwise, use CPUID to auto-detect feature set. - AutoDetectSubtargetFeatures(); - - // Make sure 64-bit features are available in 64-bit mode. - if (In64BitMode) { - if (!HasX86_64) { HasX86_64 = true; ToggleFeature(X86::Feature64Bit); } - if (!HasCMov) { HasCMov = true; ToggleFeature(X86::FeatureCMOV); } - - if (X86SSELevel < SSE2) { - X86SSELevel = SSE2; - ToggleFeature(X86::FeatureSSE1); - ToggleFeature(X86::FeatureSSE2); - } - } + if (CPUName.empty()) + CPUName = "generic"; + + // Make sure 64-bit features are available in 64-bit mode. (But make sure + // SSE2 can be turned off explicitly.) + std::string FullFS = FS; + if (In64BitMode) { + if (!FullFS.empty()) + FullFS = "+64bit,+sse2," + FullFS; + else + FullFS = "+64bit,+sse2"; } - // CPUName may have been set by the CPU detection code. Make sure the - // new MCSchedModel is used. + // If feature string is not empty, parse features string. + ParseSubtargetFeatures(CPUName, FullFS); + + // Make sure the right MCSchedModel is used. InitCPUSchedModel(CPUName); if (X86ProcFamily == IntelAtom || X86ProcFamily == IntelSLM) @@ -547,33 +290,36 @@ void X86Subtarget::initializeEnvironment() { PadShortFunctions = false; CallRegIndirect = false; LEAUsesAG = false; + SlowLEA = false; stackAlignment = 4; // FIXME: this is a known good value for Yonah. How about others? MaxInlineSizeThreshold = 128; } X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, - unsigned StackAlignOverride) - : X86GenSubtargetInfo(TT, CPU, FS) - , X86ProcFamily(Others) - , PICStyle(PICStyles::None) - , TargetTriple(TT) - , StackAlignOverride(StackAlignOverride) - , In64BitMode(TargetTriple.getArch() == Triple::x86_64) - , In32BitMode(TargetTriple.getArch() == Triple::x86 && - TargetTriple.getEnvironment() != Triple::CODE16) - , In16BitMode(TargetTriple.getArch() == Triple::x86 && - TargetTriple.getEnvironment() == Triple::CODE16) { + const std::string &FS, unsigned StackAlignOverride) + : X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others), + PICStyle(PICStyles::None), TargetTriple(TT), + StackAlignOverride(StackAlignOverride), + In64BitMode(TargetTriple.getArch() == Triple::x86_64), + In32BitMode(TargetTriple.getArch() == Triple::x86 && + TargetTriple.getEnvironment() != Triple::CODE16), + In16BitMode(TargetTriple.getArch() == Triple::x86 && + TargetTriple.getEnvironment() == Triple::CODE16) { initializeEnvironment(); resetSubtargetFeatures(CPU, FS); } -bool X86Subtarget::enablePostRAScheduler( - CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { +bool +X86Subtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode &Mode, + RegClassVector &CriticalPathRCs) const { Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; CriticalPathRCs.clear(); return PostRAScheduler && OptLevel >= CodeGenOpt::Default; } + +bool +X86Subtarget::enableEarlyIfConversion() const { + return hasCMov() && X86EarlyIfConv; +} diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 52986b9..703559a 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -178,6 +178,9 @@ protected: /// address generation (AG) time. bool LEAUsesAG; + /// SlowLEA - True if the LEA instruction with certain arguments is slow + bool SlowLEA; + /// Processor has AVX-512 PreFetch Instructions bool HasPFI; @@ -235,10 +238,6 @@ public: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - /// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID - /// instruction. - void AutoDetectSubtargetFeatures(); - /// \brief Reset the features for the X86 target. void resetSubtargetFeatures(const MachineFunction *MF) override; private: @@ -319,11 +318,13 @@ public: bool padShortFunctions() const { return PadShortFunctions; } bool callRegIndirect() const { return CallRegIndirect; } bool LEAusesAG() const { return LEAUsesAG; } + bool slowLEA() const { return SlowLEA; } bool hasCDI() const { return HasCDI; } bool hasPFI() const { return HasPFI; } bool hasERI() const { return HasERI; } bool isAtom() const { return X86ProcFamily == IntelAtom; } + bool isSLM() const { return X86ProcFamily == IntelSLM; } const Triple &getTargetTriple() const { return TargetTriple; } @@ -429,6 +430,8 @@ public: bool postRAScheduler() const { return PostRAScheduler; } + bool enableEarlyIfConversion() const override; + /// getInstrItins = Return the instruction itineraries based on the /// subtarget selection. const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 6f09ccf..93760ef 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -108,6 +108,13 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, if (Options.FloatABIType == FloatABI::Default) this->Options.FloatABIType = FloatABI::Hard; + // Windows stack unwinder gets confused when execution flow "falls through" + // after a call to 'noreturn' function. + // To prevent that, we emit a trap for 'unreachable' IR instructions. + // (which on X86, happens to be the 'ud2' instruction) + if (Subtarget.isTargetWin64()) + this->Options.TrapUnreachable = true; + initAsmInfo(); } @@ -119,12 +126,6 @@ UseVZeroUpper("x86-use-vzeroupper", cl::Hidden, cl::desc("Minimize AVX to SSE transition penalty"), cl::init(true)); -// Temporary option to control early if-conversion for x86 while adding machine -// models. -static cl::opt -X86EarlyIfConv("x86-early-ifcvt", cl::Hidden, - cl::desc("Enable early if-conversion on X86")); - //===----------------------------------------------------------------------===// // X86 Analysis Pass Setup //===----------------------------------------------------------------------===// @@ -177,19 +178,14 @@ bool X86PassConfig::addInstSelector() { if (getX86Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None) addPass(createCleanupLocalDynamicTLSPass()); - // For 32-bit, prepend instructions to set the "global base reg" for PIC. - if (!getX86Subtarget().is64Bit()) - addPass(createGlobalBaseRegPass()); + addPass(createX86GlobalBaseRegPass()); return false; } bool X86PassConfig::addILPOpts() { - if (X86EarlyIfConv && getX86Subtarget().hasCMov()) { - addPass(&EarlyIfConverterID); - return true; - } - return false; + addPass(&EarlyIfConverterID); + return true; } bool X86PassConfig::addPreRegAlloc() { @@ -208,18 +204,13 @@ bool X86PassConfig::addPreEmitPass() { ShouldPrint = true; } - if (getX86Subtarget().hasAVX() && UseVZeroUpper) { + if (UseVZeroUpper) { addPass(createX86IssueVZeroUpperPass()); ShouldPrint = true; } - if (getOptLevel() != CodeGenOpt::None && - getX86Subtarget().padShortFunctions()) { + if (getOptLevel() != CodeGenOpt::None) { addPass(createX86PadShortFunctions()); - ShouldPrint = true; - } - if (getOptLevel() != CodeGenOpt::None && - getX86Subtarget().LEAusesAG()){ addPass(createX86FixupLEAs()); ShouldPrint = true; } diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 0a88e98..8157085 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -26,7 +26,7 @@ const MCExpr *X86_64MachoTargetObjectFile::getTTypeGlobalReference( // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which // is an indirect pc-relative reference. - if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) { + if ((Encoding & DW_EH_PE_indirect) && (Encoding & DW_EH_PE_pcrel)) { const MCSymbol *Sym = TM.getSymbol(GV, Mang); const MCExpr *Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext()); @@ -62,7 +62,7 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol( // operation. const SubOperator *Sub = dyn_cast(CE); if (!Sub) - return 0; + return nullptr; // Symbols must first be numbers before we can subtract them, we need to see a // ptrtoint on both subtraction operands. @@ -71,13 +71,13 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol( const PtrToIntOperator *SubRHS = dyn_cast(Sub->getOperand(1)); if (!SubLHS || !SubRHS) - return 0; + return nullptr; // Our symbols should exist in address space zero, cowardly no-op if // otherwise. if (SubLHS->getPointerAddressSpace() != 0 || SubRHS->getPointerAddressSpace() != 0) - return 0; + return nullptr; // Both ptrtoint instructions must wrap global variables: // - Only global variables are eligible for image relative relocations. @@ -87,7 +87,7 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol( const GlobalVariable *GVRHS = dyn_cast(SubRHS->getPointerOperand()); if (!GVLHS || !GVRHS) - return 0; + return nullptr; // We expect __ImageBase to be a global variable without a section, externally // defined. @@ -96,11 +96,11 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol( if (GVRHS->isThreadLocal() || GVRHS->getName() != "__ImageBase" || !GVRHS->hasExternalLinkage() || GVRHS->hasInitializer() || GVRHS->hasSection()) - return 0; + return nullptr; // An image-relative, thread-local, symbol makes no sense. if (GVLHS->isThreadLocal()) - return 0; + return nullptr; return MCSymbolRefExpr::Create(TM.getSymbol(GVLHS, Mang), MCSymbolRefExpr::VK_COFF_IMGREL32, diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index c04964d..91b9d40 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -14,37 +14,24 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "x86tti" #include "X86.h" #include "X86TargetMachine.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" using namespace llvm; +#define DEBUG_TYPE "x86tti" + // Declare the pass initialization routine locally as target-specific passes -// don't havve a target-wide initialization entry point, and so we rely on the +// don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. namespace llvm { void initializeX86TTIPass(PassRegistry &); } -static cl::opt -UsePartialUnrolling("x86-use-partial-unrolling", cl::init(true), - cl::desc("Use partial unrolling for some X86 targets"), cl::Hidden); -static cl::opt -PartialUnrollingThreshold("x86-partial-unrolling-threshold", cl::init(0), - cl::desc("Threshold for X86 partial unrolling"), cl::Hidden); -static cl::opt -PartialUnrollingMaxBranches("x86-partial-max-branches", cl::init(2), - cl::desc("Threshold for taken branches in X86 partial unrolling"), - cl::Hidden); - namespace { class X86TTI final : public ImmutablePass, public TargetTransformInfo { @@ -56,7 +43,7 @@ class X86TTI final : public ImmutablePass, public TargetTransformInfo { unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; public: - X86TTI() : ImmutablePass(ID), ST(0), TLI(0) { + X86TTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) { llvm_unreachable("This pass cannot be directly constructed"); } @@ -87,8 +74,6 @@ public: /// \name Scalar TTI Implementations /// @{ PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; - void getUnrollingPreferences(Loop *L, - UnrollingPreferences &UP) const override; /// @} @@ -153,93 +138,6 @@ X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const { return ST->hasPOPCNT() ? PSK_FastHardware : PSK_Software; } -void X86TTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { - if (!UsePartialUnrolling) - return; - // According to the Intel 64 and IA-32 Architectures Optimization Reference - // Manual, Intel Core models and later have a loop stream detector - // (and associated uop queue) that can benefit from partial unrolling. - // The relevant requirements are: - // - The loop must have no more than 4 (8 for Nehalem and later) branches - // taken, and none of them may be calls. - // - The loop can have no more than 18 (28 for Nehalem and later) uops. - - // According to the Software Optimization Guide for AMD Family 15h Processors, - // models 30h-4fh (Steamroller and later) have a loop predictor and loop - // buffer which can benefit from partial unrolling. - // The relevant requirements are: - // - The loop must have fewer than 16 branches - // - The loop must have less than 40 uops in all executed loop branches - - unsigned MaxBranches, MaxOps; - if (PartialUnrollingThreshold.getNumOccurrences() > 0) { - MaxBranches = PartialUnrollingMaxBranches; - MaxOps = PartialUnrollingThreshold; - } else if (ST->isAtom()) { - // On the Atom, the throughput for taken branches is 2 cycles. For small - // simple loops, expand by a small factor to hide the backedge cost. - MaxBranches = 2; - MaxOps = 10; - } else if (ST->hasFSGSBase() && ST->hasXOP() /* Steamroller and later */) { - MaxBranches = 16; - MaxOps = 40; - } else if (ST->hasFMA4() /* Any other recent AMD */) { - return; - } else if (ST->hasAVX() || ST->hasSSE42() /* Nehalem and later */) { - MaxBranches = 8; - MaxOps = 28; - } else if (ST->hasSSSE3() /* Intel Core */) { - MaxBranches = 4; - MaxOps = 18; - } else { - return; - } - - // Scan the loop: don't unroll loops with calls, and count the potential - // number of taken branches (this is somewhat conservative because we're - // counting all block transitions as potential branches while in reality some - // of these will become implicit via block placement). - unsigned MaxDepth = 0; - for (df_iterator DI = df_begin(L->getHeader()), - DE = df_end(L->getHeader()); DI != DE;) { - if (!L->contains(*DI)) { - DI.skipChildren(); - continue; - } - - MaxDepth = std::max(MaxDepth, DI.getPathLength()); - if (MaxDepth > MaxBranches) - return; - - for (BasicBlock::iterator I = DI->begin(), IE = DI->end(); I != IE; ++I) - if (isa(I) || isa(I)) { - ImmutableCallSite CS(I); - if (const Function *F = CS.getCalledFunction()) { - if (!isLoweredToCall(F)) - continue; - } - - return; - } - - ++DI; - } - - // Enable runtime and partial unrolling up to the specified size. - UP.Partial = UP.Runtime = true; - UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps; - - // Set the maximum count based on the loop depth. The maximum number of - // branches taken in a loop (including the backedge) is equal to the maximum - // loop depth (the DFS path length from the loop header to any block in the - // loop). When the loop is unrolled, this depth (except for the backedge - // itself) is multiplied by the unrolling factor. This new unrolled depth - // must be less than the target-specific maximum branch count (which limits - // the number of taken branches in the uop buffer). - if (MaxDepth > 1) - UP.MaxCount = (MaxBranches-1)/(MaxDepth-1); -} - unsigned X86TTI::getNumberOfRegisters(bool Vector) const { if (Vector && !ST->hasSSE1()) return 0; @@ -283,6 +181,21 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + static const CostTblEntry + AVX2UniformConstCostTable[] = { + { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence + { ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence + { ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence + { ISD::UDIV, MVT::v8i32, 15 }, // vpmuludq sequence + }; + + if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && + ST->hasAVX2()) { + int Idx = CostTableLookup(AVX2UniformConstCostTable, ISD, LT.second); + if (Idx != -1) + return LT.first * AVX2UniformConstCostTable[Idx].Cost; + } + static const CostTblEntry AVX2CostTable[] = { // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to // customize them to detect the cases where shift amount is a scalar one. @@ -350,10 +263,19 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb. { ISD::SRA, MVT::v8i16, 1 }, // psraw. { ISD::SRA, MVT::v4i32, 1 }, // psrad. + + { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence + { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence + { ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence + { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence }; if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && ST->hasSSE2()) { + // pmuldq sequence. + if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41()) + return LT.first * 15; + int Idx = CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second); if (Idx != -1) return LT.first * SSE2UniformConstCostTable[Idx].Cost; @@ -893,6 +815,13 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { if (BitSize == 0) return ~0U; + // Never hoist constants larger than 128bit, because this might lead to + // incorrect code generation or assertions in codegen. + // Fixme: Create a cost model for types larger than i128 once the codegen + // issues have been fixed. + if (BitSize > 128) + return TCC_Free; + if (Imm == 0) return TCC_Free; @@ -908,8 +837,10 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. if (BitSize == 0) - return ~0U; + return TCC_Free; unsigned ImmIdx = ~0U; switch (Opcode) { @@ -931,15 +862,19 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, case Instruction::SDiv: case Instruction::URem: case Instruction::SRem: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: case Instruction::And: case Instruction::Or: case Instruction::Xor: case Instruction::ICmp: ImmIdx = 1; break; + // Always return TCC_Free for the shift value of a shift instruction. + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + if (Idx == 1) + return TCC_Free; + break; case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: @@ -966,8 +901,10 @@ unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); + // There is no cost model for constants with a bit size of 0. Return TCC_Free + // here, so that constant hoisting will ignore this constant. if (BitSize == 0) - return ~0U; + return TCC_Free; switch (IID) { default: return TCC_Free; diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index d4341b9..0bb5f99 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "x86-vzeroupper" #include "X86.h" #include "X86InstrInfo.h" #include "X86Subtarget.h" @@ -28,6 +27,8 @@ #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; +#define DEBUG_TYPE "x86-vzeroupper" + STATISTIC(NumVZU, "Number of vzeroupper instructions inserted"); namespace { @@ -246,7 +247,8 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) { /// runOnMachineFunction - Loop over all of the basic blocks, inserting /// vzero upper instructions before function calls. bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { - if (MF.getTarget().getSubtarget().hasAVX512()) + const X86Subtarget &ST = MF.getTarget().getSubtarget(); + if (!ST.hasAVX() || ST.hasAVX512()) return false; TII = MF.getTarget().getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp index 9c20abd..7fef796 100644 --- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp +++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp @@ -14,6 +14,7 @@ #include "XCore.h" #include "XCoreRegisterInfo.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" #include "llvm/MC/MCInst.h" @@ -23,16 +24,17 @@ using namespace llvm; +#define DEBUG_TYPE "xcore-disassembler" + typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { /// \brief A disassembler class for XCore. class XCoreDisassembler : public MCDisassembler { - OwningPtr RegInfo; public: - XCoreDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info) : - MCDisassembler(STI), RegInfo(Info) {} + XCoreDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : + MCDisassembler(STI, Ctx) {} /// \brief See MCDisassembler. virtual DecodeStatus getInstruction(MCInst &instr, @@ -40,9 +42,8 @@ public: const MemoryObject ®ion, uint64_t address, raw_ostream &vStream, - raw_ostream &cStream) const; + raw_ostream &cStream) const override; - const MCRegisterInfo *getRegInfo() const { return RegInfo.get(); } }; } @@ -81,7 +82,8 @@ static bool readInstruction32(const MemoryObject ®ion, static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) { const XCoreDisassembler *Dis = static_cast(D); - return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo); + const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo(); + return *(RegInfo->getRegClass(RC).begin() + RegNo); } static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst, @@ -788,8 +790,9 @@ namespace llvm { } static MCDisassembler *createXCoreDisassembler(const Target &T, - const MCSubtargetInfo &STI) { - return new XCoreDisassembler(STI, T.createMCRegInfo("")); + const MCSubtargetInfo &STI, + MCContext &Ctx) { + return new XCoreDisassembler(STI, Ctx); } extern "C" void LLVMInitializeXCoreDisassembler() { diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp index 9ae8c0d..215fe89 100644 --- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp +++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "XCoreInstPrinter.h" #include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCExpr.h" @@ -22,6 +21,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "asm-printer" + #include "XCoreGenAsmWriter.inc" void XCoreInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h index 772c515..98e7c98 100644 --- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h +++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h @@ -31,8 +31,8 @@ public: void printInstruction(const MCInst *MI, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); - virtual void printRegName(raw_ostream &OS, unsigned RegNo) const; - virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); + void printRegName(raw_ostream &OS, unsigned RegNo) const override; + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override; private: void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O); void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O); diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp index f788c59..5665911 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp @@ -17,7 +17,7 @@ XCoreMCAsmInfo::XCoreMCAsmInfo(StringRef TT) { SupportsDebugInformation = true; Data16bitsDirective = "\t.short\t"; Data32bitsDirective = "\t.long\t"; - Data64bitsDirective = 0; + Data64bitsDirective = nullptr; ZeroDirective = "\t.space\t"; CommentString = "#"; diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h index e53c96b..da2689a 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h @@ -21,7 +21,7 @@ namespace llvm { class Target; class XCoreMCAsmInfo : public MCAsmInfoELF { - virtual void anchor(); + void anchor() override; public: explicit XCoreMCAsmInfo(StringRef TT); }; diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp index 439d0ab..d54e94f 100644 --- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp +++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp @@ -23,6 +23,8 @@ #include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + #define GET_INSTRINFO_MC_DESC #include "XCoreGenInstrInfo.inc" @@ -32,8 +34,6 @@ #define GET_REGINFO_MC_DESC #include "XCoreGenRegisterInfo.inc" -using namespace llvm; - static MCInstrInfo *createXCoreMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitXCoreMCInstrInfo(X); @@ -58,7 +58,7 @@ static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI, MCAsmInfo *MAI = new XCoreMCAsmInfo(TT); // Initial state of the frame pointer is SP. - MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, XCore::SP, 0); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, XCore::SP, 0); MAI->addInitialFrameState(Inst); return MAI; @@ -128,12 +128,11 @@ void XCoreTargetAsmStreamer::emitCCBottomFunction(StringRef Name) { static MCStreamer * createXCoreMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useCFI, bool useDwarfDirectory, + bool isVerboseAsm, bool useDwarfDirectory, MCInstPrinter *InstPrint, MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) { - MCStreamer *S = - llvm::createAsmStreamer(Ctx, OS, isVerboseAsm, useCFI, useDwarfDirectory, - InstPrint, CE, TAB, ShowInst); + MCStreamer *S = llvm::createAsmStreamer( + Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst); new XCoreTargetAsmStreamer(*S, OS); return S; } diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp index 21acedf..e98d4f9 100644 --- a/lib/Target/XCore/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/XCoreAsmPrinter.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asm-printer" #include "XCore.h" #include "InstPrinter/XCoreInstPrinter.h" #include "XCoreInstrInfo.h" @@ -47,6 +46,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "asm-printer" + namespace { class XCoreAsmPrinter : public AsmPrinter { const XCoreSubtarget &Subtarget; @@ -58,7 +59,7 @@ namespace { : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget()), MCInstLowering(*this) {} - virtual const char *getPassName() const { + const char *getPassName() const override { return "XCore Assembly Printer"; } @@ -70,18 +71,18 @@ namespace { void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); + raw_ostream &O) override; bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) override; void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV); - virtual void EmitGlobalVariable(const GlobalVariable *GV); + void EmitGlobalVariable(const GlobalVariable *GV) override; - void EmitFunctionEntryLabel(); - void EmitInstruction(const MachineInstr *MI); - void EmitFunctionBodyStart(); - void EmitFunctionBodyEnd(); + void EmitFunctionEntryLabel() override; + void EmitInstruction(const MachineInstr *MI) override; + void EmitFunctionBodyStart() override; + void EmitFunctionBodyEnd() override; }; } // end of anonymous namespace diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 954fddf..5499aba 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -64,7 +64,8 @@ static void EmitDefCfaRegister(MachineBasicBlock &MBB, MachineModuleInfo *MMI, unsigned DRegNum) { unsigned CFIIndex = MMI->addFrameInst( MCCFIInstruction::createDefCfaRegister(nullptr, DRegNum)); - BuildMI(MBB, MBBI, dl, TII.get(XCore::CFI_INSTRUCTION)).addCFIIndex(CFIIndex); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } static void EmitDefCfaOffset(MachineBasicBlock &MBB, @@ -73,7 +74,8 @@ static void EmitDefCfaOffset(MachineBasicBlock &MBB, MachineModuleInfo *MMI, int Offset) { unsigned CFIIndex = MMI->addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -Offset)); - BuildMI(MBB, MBBI, dl, TII.get(XCore::CFI_INSTRUCTION)).addCFIIndex(CFIIndex); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } static void EmitCfiOffset(MachineBasicBlock &MBB, @@ -82,7 +84,8 @@ static void EmitCfiOffset(MachineBasicBlock &MBB, unsigned DRegNum, int Offset) { unsigned CFIIndex = MMI->addFrameInst( MCCFIInstruction::createOffset(nullptr, DRegNum, Offset)); - BuildMI(MBB, MBBI, dl, TII.get(XCore::CFI_INSTRUCTION)).addCFIIndex(CFIIndex); + BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); } /// The SP register is moved in steps of 'MaxImmU16' towards the bottom of the @@ -113,7 +116,8 @@ static void IfNeededExtSP(MachineBasicBlock &MBB, /// IfNeededLDAWSP emits the necessary LDAWSP instructions to move the SP only /// as far as to make 'OffsetFromTop' reachable using an LDAWSP_lru6. /// \param OffsetFromTop the spill offset from the top of the frame. -/// \param [in,out] RemainingAdj the current SP offset from the top of the frame. +/// \param [in,out] RemainingAdj the current SP offset from the top of the +/// frame. static void IfNeededLDAWSP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc dl, const TargetInstrInfo &TII, int OffsetFromTop, @@ -346,7 +350,8 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF, RemainingAdj /= 4; if (RetOpcode == XCore::EH_RETURN) { - // 'Restore' the exception info the unwinder has placed into the stack slots. + // 'Restore' the exception info the unwinder has placed into the stack + // slots. SmallVector SpillList; GetEHSpillList(SpillList, MFI, XFI, MF.getTarget().getTargetLowering()); RestoreSpillList(MBB, MBBI, dl, TII, RemainingAdj, SpillList); @@ -495,7 +500,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, errs() << "eliminateCallFramePseudoInstr size too big: " << Amount << "\n"; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } MachineInstr *New; @@ -514,7 +519,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MBB.insert(I, New); } } - + MBB.erase(I); } diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h index 6cd90c9..e4f806a 100644 --- a/lib/Target/XCore/XCoreFrameLowering.h +++ b/lib/Target/XCore/XCoreFrameLowering.h @@ -27,29 +27,30 @@ namespace llvm { /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + void emitPrologue(MachineFunction &MF) const override; + void emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const; + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const; + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const override; void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; - bool hasFP(const MachineFunction &MF) const; + bool hasFP(const MachineFunction &MF) const override; void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const override; void processFunctionBeforeFrameFinalized(MachineFunction &MF, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const override; //! Stack slot size (4 bytes) static int stackSlotSize() { diff --git a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp index c18eff9..30c7b59 100644 --- a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp +++ b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp @@ -26,9 +26,9 @@ namespace { static char ID; XCoreFTAOElim() : MachineFunctionPass(ID) {} - virtual bool runOnMachineFunction(MachineFunction &Fn); + bool runOnMachineFunction(MachineFunction &Fn) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "XCore FRAME_TO_ARGS_OFFSET Elimination"; } }; diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 5b0fcfa..86bc6f2 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -44,7 +44,7 @@ namespace { : SelectionDAGISel(TM, OptLevel), Subtarget(*TM.getSubtargetImpl()) { } - SDNode *Select(SDNode *N); + SDNode *Select(SDNode *N) override; SDNode *SelectBRIND(SDNode *N); /// getI32Imm - Return a target constant with the specified value, of type @@ -70,7 +70,7 @@ namespace { bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector &OutOps) override; - virtual const char *getPassName() const { + const char *getPassName() const override { return "XCore DAG->DAG Pattern Instruction Selection"; } @@ -89,14 +89,14 @@ FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM, bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset) { - FrameIndexSDNode *FIN = 0; + FrameIndexSDNode *FIN = nullptr; if ((FIN = dyn_cast(Addr))) { Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); Offset = CurDAG->getTargetConstant(0, MVT::i32); return true; } if (Addr.getOpcode() == ISD::ADD) { - ConstantSDNode *CN = 0; + ConstantSDNode *CN = nullptr; if ((FIN = dyn_cast(Addr.getOperand(0))) && (CN = dyn_cast(Addr.getOperand(1))) && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) { @@ -227,8 +227,7 @@ replaceInChain(SelectionDAG *CurDAG, SDValue Chain, SDValue Old, SDValue New) } if (!found) return SDValue(); - return CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, - &Ops[0], Ops.size()); + return CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, Ops); } SDNode *XCoreDAGToDAGISel::SelectBRIND(SDNode *N) { @@ -237,10 +236,10 @@ SDNode *XCoreDAGToDAGISel::SelectBRIND(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue Addr = N->getOperand(1); if (Addr->getOpcode() != ISD::INTRINSIC_W_CHAIN) - return 0; + return nullptr; unsigned IntNo = cast(Addr->getOperand(1))->getZExtValue(); if (IntNo != Intrinsic::xcore_checkevent) - return 0; + return nullptr; SDValue nextAddr = Addr->getOperand(2); SDValue CheckEventChainOut(Addr.getNode(), 1); if (!CheckEventChainOut.use_empty()) { @@ -252,7 +251,7 @@ SDNode *XCoreDAGToDAGISel::SelectBRIND(SDNode *N) { SDValue NewChain = replaceInChain(CurDAG, Chain, CheckEventChainOut, CheckEventChainIn); if (!NewChain.getNode()) - return 0; + return nullptr; Chain = NewChain; } // Enable events on the thread using setsr 1 and then disable them immediately diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 1b74013..9d78586 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -11,8 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "xcore-lower" - #include "XCoreISelLowering.h" #include "XCore.h" #include "XCoreMachineFunctionInfo.h" @@ -41,6 +39,8 @@ using namespace llvm; +#define DEBUG_TYPE "xcore-lower" + const char *XCoreTargetLowering:: getTargetNodeName(unsigned Opcode) const { @@ -64,7 +64,7 @@ getTargetNodeName(unsigned Opcode) const case XCoreISD::FRAME_TO_ARGS_OFFSET : return "XCoreISD::FRAME_TO_ARGS_OFFSET"; case XCoreISD::EH_RETURN : return "XCoreISD::EH_RETURN"; case XCoreISD::MEMBARRIER : return "XCoreISD::MEMBARRIER"; - default : return NULL; + default : return nullptr; } } @@ -268,21 +268,19 @@ LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const Op.getOperand(1)); } -SDValue XCoreTargetLowering:: -getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV, - SelectionDAG &DAG) const -{ +SDValue XCoreTargetLowering::getGlobalAddressWrapper(SDValue GA, + const GlobalValue *GV, + SelectionDAG &DAG) const { // FIXME there is no actual debug info here SDLoc dl(GA); const GlobalValue *UnderlyingGV = GV; // If GV is an alias then use the aliasee to determine the wrapper type if (const GlobalAlias *GA = dyn_cast(GV)) - UnderlyingGV = GA->getAliasedGlobal(); + UnderlyingGV = GA->getAliasee(); if (const GlobalVariable *GVar = dyn_cast(UnderlyingGV)) { - if ( ( GVar->isConstant() && - UnderlyingGV->isLocalLinkage(GV->getLinkage()) ) - || ( GVar->hasSection() && - StringRef(GVar->getSection()).startswith(".cp.") ) ) + if ((GVar->isConstant() && GV->hasLocalLinkage()) || + (GVar->hasSection() && + StringRef(GVar->getSection()).startswith(".cp."))) return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA); return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA); } @@ -428,13 +426,13 @@ lowerLoadWordFromAlignedBasePlusOffset(SDLoc DL, SDValue Chain, SDValue Base, Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1), High.getValue(1)); SDValue Ops[] = { Result, Chain }; - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } static bool isWordAligned(SDValue Value, SelectionDAG &DAG) { APInt KnownZero, KnownOne; - DAG.ComputeMaskedBits(Value, KnownZero, KnownOne); + DAG.computeKnownBits(Value, KnownZero, KnownOne); return KnownZero.countTrailingOnes() >= 2; } @@ -494,7 +492,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1), High.getValue(1)); SDValue Ops[] = { Result, Chain }; - return DAG.getMergeValues(Ops, 2, DL); + return DAG.getMergeValues(Ops, DL); } // Lower to a call to __misaligned_load(BasePtr). @@ -506,17 +504,15 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { Entry.Node = BasePtr; Args.push_back(Entry); - TargetLowering::CallLoweringInfo CLI(Chain, IntPtrTy, false, false, - false, false, 0, CallingConv::C, /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__misaligned_load", getPointerTy()), - Args, DAG, DL); - std::pair CallResult = LowerCallTo(CLI); - - SDValue Ops[] = - { CallResult.first, CallResult.second }; + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL).setChain(Chain) + .setCallee(CallingConv::C, IntPtrTy, + DAG.getExternalSymbol("__misaligned_load", getPointerTy()), + &Args, 0); - return DAG.getMergeValues(Ops, 2, DL); + std::pair CallResult = LowerCallTo(CLI); + SDValue Ops[] = { CallResult.first, CallResult.second }; + return DAG.getMergeValues(Ops, DL); } SDValue XCoreTargetLowering:: @@ -568,14 +564,13 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const Entry.Node = Value; Args.push_back(Entry); - TargetLowering::CallLoweringInfo CLI(Chain, - Type::getVoidTy(*DAG.getContext()), false, false, - false, false, 0, CallingConv::C, /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol("__misaligned_store", getPointerTy()), - Args, DAG, dl); - std::pair CallResult = LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__misaligned_store", getPointerTy()), + &Args, 0); + std::pair CallResult = LowerCallTo(CLI); return CallResult.second; } @@ -593,7 +588,7 @@ LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const LHS, RHS); SDValue Lo(Hi.getNode(), 1); SDValue Ops[] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } SDValue XCoreTargetLowering:: @@ -610,7 +605,7 @@ LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const Zero, Zero); SDValue Lo(Hi.getNode(), 1); SDValue Ops[] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } /// isADDADDMUL - Return whether Op is in a form that is equivalent to @@ -741,7 +736,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const if (N->getOpcode() == ISD::ADD) { SDValue Result = TryExpandADDWithMul(N, DAG); - if (Result.getNode() != 0) + if (Result.getNode()) return Result; } @@ -886,7 +881,7 @@ LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { DAG.getCopyToReg(Chain, dl, HandlerReg, Handler) }; - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 2); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); return DAG.getNode(XCoreISD::EH_RETURN, dl, MVT::Other, Chain, DAG.getRegister(StackReg, MVT::i32), @@ -952,7 +947,7 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { MachinePointerInfo(TrmpAddr, 16), false, false, 0); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } SDValue XCoreTargetLowering:: @@ -967,7 +962,7 @@ LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { Op.getOperand(1), Op.getOperand(2) , Op.getOperand(3)); SDValue Crc(Data.getNode(), 1); SDValue Results[] = { Crc, Data }; - return DAG.getMergeValues(Results, 2, DL); + return DAG.getMergeValues(Results, DL); } return SDValue(); } @@ -1111,7 +1106,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, unsigned index = ResultMemLocs[i].second; SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); SDValue Ops[] = { Chain, DAG.getConstant(offset / 4, MVT::i32) }; - SDValue load = DAG.getNode(XCoreISD::LDWSP, dl, VTs, Ops, 2); + SDValue load = DAG.getNode(XCoreISD::LDWSP, dl, VTs, Ops); InVals[index] = load; MemOpChains.push_back(load.getValue(1)); } @@ -1119,8 +1114,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, // Transform all loads nodes into one single node because // all load nodes are independent of each other. if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); return Chain; } @@ -1204,8 +1198,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Transform all store nodes into one single node because // all store nodes are independent of each other. if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. @@ -1244,7 +1237,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, if (InFlag.getNode()) Ops.push_back(InFlag); - Chain = DAG.getNode(XCoreISD::BL, dl, NodeTys, &Ops[0], Ops.size()); + Chain = DAG.getNode(XCoreISD::BL, dl, NodeTys, Ops); InFlag = Chain.getValue(1); // Create the CALLSEQ_END node. @@ -1347,7 +1340,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, errs() << "LowerFormalArguments Unhandled argument type: " << RegVT.getSimpleVT().SimpleTy << "\n"; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } case MVT::i32: unsigned VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass); @@ -1384,7 +1377,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // 1b. CopyFromReg vararg registers. if (isVarArg) { // Argument registers - static const uint16_t ArgRegs[] = { + static const MCPhysReg ArgRegs[] = { XCore::R0, XCore::R1, XCore::R2, XCore::R3 }; XCoreFunctionInfo *XFI = MF.getInfo(); @@ -1422,8 +1415,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // 2. chain CopyFromReg nodes into a TokenFactor. if (!CFRegNode.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &CFRegNode[0], - CFRegNode.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, CFRegNode); // 3. Memcpy 'byVal' args & push final InVals. // Aggregates passed "byVal" need to be copied by the callee. @@ -1452,8 +1444,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // 4, chain mem ops nodes into a TokenFactor. if (!MemOps.empty()) { MemOps.push_back(Chain); - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0], - MemOps.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); } return Chain; @@ -1535,8 +1526,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, // Transform all store nodes into one single node because // all stores are independent of each other. if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); // Now handle return values copied to registers. for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { @@ -1558,8 +1548,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, if (Flag.getNode()) RetOps.push_back(Flag); - return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other, - &RetOps[0], RetOps.size()); + return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other, RetOps); } //===----------------------------------------------------------------------===// @@ -1696,7 +1685,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, SDValue Result = DAG.getNode(ISD::AND, dl, VT, N2, DAG.getConstant(1, VT)); SDValue Ops[] = { Result, Carry }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the @@ -1705,12 +1694,12 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.ComputeMaskedBits(N2, KnownZero, KnownOne); + DAG.computeKnownBits(N2, KnownZero, KnownOne); if ((KnownZero & Mask) == Mask) { SDValue Carry = DAG.getConstant(0, VT); SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2); SDValue Ops[] = { Result, Carry }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } } } @@ -1728,13 +1717,13 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.ComputeMaskedBits(N2, KnownZero, KnownOne); + DAG.computeKnownBits(N2, KnownZero, KnownOne); if ((KnownZero & Mask) == Mask) { SDValue Borrow = N2; SDValue Result = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), N2); SDValue Ops[] = { Result, Borrow }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } } @@ -1744,12 +1733,12 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); - DAG.ComputeMaskedBits(N2, KnownZero, KnownOne); + DAG.computeKnownBits(N2, KnownZero, KnownOne); if ((KnownZero & Mask) == Mask) { SDValue Borrow = DAG.getConstant(0, VT); SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2); SDValue Ops[] = { Result, Borrow }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } } } @@ -1775,14 +1764,14 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, if (N->hasNUsesOfValue(0, 0)) { SDValue Lo = DAG.getNode(ISD::ADD, dl, VT, N2, N3); SDValue Ops[] = { Lo, Lo }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } // Otherwise fold to ladd(a, b, 0) SDValue Result = DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1); SDValue Carry(Result.getNode(), 1); SDValue Ops[] = { Carry, Result }; - return DAG.getMergeValues(Ops, 2, dl); + return DAG.getMergeValues(Ops, dl); } } break; @@ -1866,11 +1855,11 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } -void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth) const { +void XCoreTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); switch (Op.getOpcode()) { default: break; diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 65e2bad..d28715b 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -97,31 +97,30 @@ namespace llvm { explicit XCoreTargetLowering(XCoreTargetMachine &TM); using TargetLowering::isZExtFree; - virtual bool isZExtFree(SDValue Val, EVT VT2) const; + bool isZExtFree(SDValue Val, EVT VT2) const override; - virtual unsigned getJumpTableEncoding() const; - virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + unsigned getJumpTableEncoding() const override; + MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } /// LowerOperation - Provide custom lowering hooks for some operations. - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. /// - virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, - SelectionDAG &DAG) const; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, + SelectionDAG &DAG) const override; /// getTargetNodeName - This method returns the name of a target specific // DAG node. - virtual const char *getTargetNodeName(unsigned Opcode) const; + const char *getTargetNodeName(unsigned Opcode) const override; - virtual MachineBasicBlock * + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *MBB) const; + MachineBasicBlock *MBB) const override; - virtual bool isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; private: const XCoreTargetMachine &TM; @@ -176,44 +175,44 @@ namespace llvm { // Inline asm support std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - MVT VT) const; + MVT VT) const override; // Expand specifics SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const; SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const; - virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - virtual void computeMaskedBitsForTargetNode(const SDValue Op, - APInt &KnownZero, - APInt &KnownOne, - const SelectionDAG &DAG, - unsigned Depth = 0) const; + void computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth = 0) const override; - virtual SDValue + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, SDLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; + SmallVectorImpl &InVals) const override; - virtual SDValue + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const; + SmallVectorImpl &InVals) const override; - virtual SDValue + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, - SDLoc dl, SelectionDAG &DAG) const; + SDLoc dl, SelectionDAG &DAG) const override; - virtual bool + bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl &ArgsFlags, - LLVMContext &Context) const; + LLVMContext &Context) const override; }; } diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index cea3bbf..984f0cd 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -26,6 +26,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + #define GET_INSTRINFO_CTOR_DTOR #include "XCoreGenInstrInfo.inc" @@ -41,9 +43,6 @@ namespace XCore { } } -using namespace llvm; - - // Pin the vtable to this file. void XCoreInstrInfo::anchor() {} @@ -289,7 +288,7 @@ XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, assert((Cond.size() == 2 || Cond.size() == 0) && "Unexpected number of components!"); - if (FBB == 0) { // One way branch. + if (!FBB) { // One way branch. if (Cond.empty()) { // Unconditional branch BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(TBB); @@ -428,13 +427,21 @@ static inline bool isImmU16(unsigned val) { return val < (1 << 16); } +static bool isImmMskBitp(unsigned val) { + if (!isMask_32(val)) { + return false; + } + int N = Log2_32(val) + 1; + return (N >= 1 && N <= 8) || N == 16 || N == 24 || N == 32; +} + MachineBasicBlock::iterator XCoreInstrInfo::loadImmediate( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned Reg, uint64_t Value) const { DebugLoc dl; if (MI != MBB.end()) dl = MI->getDebugLoc(); - if (isMask_32(Value)) { + if (isImmMskBitp(Value)) { int N = Log2_32(Value) + 1; return BuildMI(MBB, MI, dl, get(XCore::MKMSK_rus), Reg).addImm(N); } diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index 48c9cb5..e0be96b 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -32,55 +32,55 @@ public: /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// - virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; } + const TargetRegisterInfo &getRegisterInfo() const { return RI; } /// isLoadFromStackSlot - If the specified machine instruction is a direct /// load from a stack slot, return the virtual or physical register number of /// the destination along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than loading from the stack slot. - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; + /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than storing to the stack slot. - virtual unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const; - - virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const; - - virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual bool ReverseBranchCondition( - SmallVectorImpl &Cond) const; + unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; + + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const override; + + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const override; + + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; + + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + bool ReverseBranchCondition( + SmallVectorImpl &Cond) const override; // Emit code before MBBI to load immediate value into physical register Reg. // Returns an iterator to the new instruction. diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp index b398c2d..ac3bae5 100644 --- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp +++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp @@ -48,7 +48,7 @@ namespace { bool lowerGlobal(GlobalVariable *GV); - bool runOnModule(Module &M); + bool runOnModule(Module &M) override; }; } @@ -189,13 +189,14 @@ bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) { // Create replacement global. ArrayType *NewType = createLoweredType(GV->getType()->getElementType()); - Constant *NewInitializer = 0; + Constant *NewInitializer = nullptr; if (GV->hasInitializer()) NewInitializer = createLoweredInitializer(NewType, GV->getInitializer()); GlobalVariable *NewGV = new GlobalVariable(*M, NewType, GV->isConstant(), GV->getLinkage(), - NewInitializer, "", 0, GlobalVariable::NotThreadLocal, + NewInitializer, "", nullptr, + GlobalVariable::NotThreadLocal, GV->getType()->getAddressSpace(), GV->isExternallyInitialized()); diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index d85d717..316c82c 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -33,11 +33,13 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +using namespace llvm; + +#define DEBUG_TYPE "xcore-reg-info" + #define GET_REGINFO_TARGET_DESC #include "XCoreGenRegisterInfo.inc" -using namespace llvm; - XCoreRegisterInfo::XCoreRegisterInfo() : XCoreGenRegisterInfo(XCore::LR) { } @@ -205,16 +207,16 @@ bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) { MF.getFunction()->needsUnwindTableEntry(); } -const uint16_t* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) +const MCPhysReg* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { // The callee saved registers LR & FP are explicitly handled during // emitPrologue & emitEpilogue and related functions. - static const uint16_t CalleeSavedRegs[] = { + static const MCPhysReg CalleeSavedRegs[] = { XCore::R4, XCore::R5, XCore::R6, XCore::R7, XCore::R8, XCore::R9, XCore::R10, 0 }; - static const uint16_t CalleeSavedRegsFP[] = { + static const MCPhysReg CalleeSavedRegsFP[] = { XCore::R4, XCore::R5, XCore::R6, XCore::R7, XCore::R8, XCore::R9, 0 diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index 36ba7b4..aa617a0 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -29,22 +29,23 @@ public: /// Code Generation virtual methods... - const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; + const MCPhysReg * + getCalleeSavedRegs(const MachineFunction *MF =nullptr) const override; - BitVector getReservedRegs(const MachineFunction &MF) const; + BitVector getReservedRegs(const MachineFunction &MF) const override; - bool requiresRegisterScavenging(const MachineFunction &MF) const; + bool requiresRegisterScavenging(const MachineFunction &MF) const override; - bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const; + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override; - bool useFPForScavengingIndex(const MachineFunction &MF) const; + bool useFPForScavengingIndex(const MachineFunction &MF) const override; void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS = NULL) const; + RegScavenger *RS = nullptr) const override; // Debug information queries. - unsigned getFrameRegister(const MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const override; //! Return whether to emit frame moves static bool needsFrameMoves(const MachineFunction &MF); diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp index 68ede6a..5a6bbe7 100644 --- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp +++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp @@ -11,10 +11,11 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "xcore-selectiondag-info" #include "XCoreTargetMachine.h" using namespace llvm; +#define DEBUG_TYPE "xcore-selectiondag-info" + XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM) : TargetSelectionDAGInfo(TM) { } @@ -41,13 +42,15 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); - TargetLowering::CallLoweringInfo - CLI(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, - 0, TLI.getLibcallCallingConv(RTLIB::MEMCPY), /*isTailCall=*/false, - /*doesNotRet=*/false, /*isReturnValueUsed=*/false, - DAG.getExternalSymbol("__memcpy_4", TLI.getPointerTy()), Args, DAG, dl); - std::pair CallResult = - TLI.LowerCallTo(CLI); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__memcpy_4", TLI.getPointerTy()), + &Args, 0) + .setDiscardResult(); + + std::pair CallResult = TLI.LowerCallTo(CLI); return CallResult.second; } diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h index 31704f3..ea6af98 100644 --- a/lib/Target/XCore/XCoreSelectionDAGInfo.h +++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h @@ -25,14 +25,14 @@ public: explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM); ~XCoreSelectionDAGInfo(); - virtual SDValue + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Op1, SDValue Op2, SDValue Op3, unsigned Align, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, - MachinePointerInfo SrcPtrInfo) const; + MachinePointerInfo SrcPtrInfo) const override; }; } diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp index 8cfb770..89ea03a 100644 --- a/lib/Target/XCore/XCoreSubtarget.cpp +++ b/lib/Target/XCore/XCoreSubtarget.cpp @@ -15,12 +15,14 @@ #include "XCore.h" #include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +#define DEBUG_TYPE "xcore-subtarget" + #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "XCoreGenSubtargetInfo.inc" -using namespace llvm; - void XCoreSubtarget::anchor() { } XCoreSubtarget::XCoreSubtarget(const std::string &TT, diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 781a87b..0fb21c5 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -46,9 +46,9 @@ public: return getTM(); } - virtual bool addPreISel(); - virtual bool addInstSelector(); - virtual bool addPreEmitPass(); + bool addPreISel() override; + bool addInstSelector() override; + bool addPreEmitPass() override; }; } // namespace diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index a19a677..a57ca55 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -37,28 +37,28 @@ public: Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); - virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const XCoreFrameLowering *getFrameLowering() const { + const XCoreInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const XCoreFrameLowering *getFrameLowering() const override { return &FrameLowering; } - virtual const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual const XCoreTargetLowering *getTargetLowering() const { + const XCoreSubtarget *getSubtargetImpl() const override { return &Subtarget; } + const XCoreTargetLowering *getTargetLowering() const override { return &TLInfo; } - virtual const XCoreSelectionDAGInfo* getSelectionDAGInfo() const { + const XCoreSelectionDAGInfo* getSelectionDAGInfo() const override { return &TSInfo; } - virtual const TargetRegisterInfo *getRegisterInfo() const { + const TargetRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); } - virtual const DataLayout *getDataLayout() const { return &DL; } + const DataLayout *getDataLayout() const override { return &DL; } // Pass Pipeline Configuration - virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - virtual void addAnalysisPasses(PassManagerBase &PM); + void addAnalysisPasses(PassManagerBase &PM) override; }; } // end namespace llvm diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h index 733e6d3..34d756e 100644 --- a/lib/Target/XCore/XCoreTargetObjectFile.h +++ b/lib/Target/XCore/XCoreTargetObjectFile.h @@ -22,7 +22,7 @@ static const unsigned CodeModelLargeSize = 256; const MCSection *ReadOnlySectionLarge; const MCSection *DataRelROSectionLarge; public: - void Initialize(MCContext &Ctx, const TargetMachine &TM); + void Initialize(MCContext &Ctx, const TargetMachine &TM) override; const MCSection * getExplicitSectionGlobal(const GlobalValue *GV, diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.cpp b/lib/Target/XCore/XCoreTargetTransformInfo.cpp index 313d18d..80d193d 100644 --- a/lib/Target/XCore/XCoreTargetTransformInfo.cpp +++ b/lib/Target/XCore/XCoreTargetTransformInfo.cpp @@ -14,7 +14,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "xcoretti" #include "XCore.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/Debug.h" @@ -22,8 +21,10 @@ #include "llvm/Target/TargetLowering.h" using namespace llvm; +#define DEBUG_TYPE "xcoretti" + // Declare the pass initialization routine locally as target-specific passes -// don't havve a target-wide initialization entry point, and so we rely on the +// don't have a target-wide initialization entry point, and so we rely on the // pass constructor initialization. namespace llvm { void initializeXCoreTTIPass(PassRegistry &); diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp index c514c49..29b9bb8 100644 --- a/lib/Transforms/Hello/Hello.cpp +++ b/lib/Transforms/Hello/Hello.cpp @@ -12,13 +12,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "hello" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Function.h" #include "llvm/Pass.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "hello" + STATISTIC(HelloCounter, "Counts number of functions greeted"); namespace { diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 48d3fba..377fa15 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -29,7 +29,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "argpromotion" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" @@ -49,6 +48,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "argpromotion" + STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted"); STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted"); STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted"); @@ -123,14 +124,14 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { Function *F = CGN->getFunction(); // Make sure that it is local to this module. - if (!F || !F->hasLocalLinkage()) return 0; + if (!F || !F->hasLocalLinkage()) return nullptr; // First check: see if there are any pointer arguments! If not, quick exit. SmallVector PointerArgs; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) if (I->getType()->isPointerTy()) PointerArgs.push_back(I); - if (PointerArgs.empty()) return 0; + if (PointerArgs.empty()) return nullptr; // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. Also see if the function @@ -139,7 +140,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { for (Use &U : F->uses()) { CallSite CS(U.getUser()); // Must be a direct call. - if (CS.getInstruction() == 0 || !CS.isCallee(&U)) return 0; + if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr; if (CS.getInstruction()->getParent()->getParent() == F) isSelfRecursive = true; @@ -207,7 +208,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // No promotable pointer arguments. if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) - return 0; + return nullptr; return DoPromotion(F, ArgsToPromote, ByValArgsToTransform); } @@ -660,7 +661,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Type *AgTy = cast(I->getType())->getElementType(); StructType *STy = cast(AgTy); Value *Idxs[2] = { - ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; + ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); Value *Idx = GetElementPtrInst::Create(*AI, Idxs, @@ -788,10 +789,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // Just add all the struct element types. Type *AgTy = cast(I->getType())->getElementType(); - Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt); + Value *TheAlloca = new AllocaInst(AgTy, nullptr, "", InsertPt); StructType *STy = cast(AgTy); Value *Idxs[2] = { - ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 }; + ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr }; for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i); diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp index 5c3acea..23be081 100644 --- a/lib/Transforms/IPO/ConstantMerge.cpp +++ b/lib/Transforms/IPO/ConstantMerge.cpp @@ -17,7 +17,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "constmerge" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PointerIntPair.h" @@ -31,6 +30,8 @@ #include "llvm/Pass.h" using namespace llvm; +#define DEBUG_TYPE "constmerge" + STATISTIC(NumMerged, "Number of global constants merged"); namespace { @@ -66,7 +67,7 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); } /// Find values that are marked as llvm.used. static void FindUsedValues(GlobalVariable *LLVMUsed, SmallPtrSet &UsedValues) { - if (LLVMUsed == 0) return; + if (!LLVMUsed) return; ConstantArray *Inits = cast(LLVMUsed->getInitializer()); for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { @@ -103,7 +104,7 @@ unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const { bool ConstantMerge::runOnModule(Module &M) { DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; // Find all the globals that are marked "used". These cannot be merged. SmallPtrSet UsedGlobals; @@ -161,7 +162,7 @@ bool ConstantMerge::runOnModule(Module &M) { // If this is the first constant we find or if the old one is local, // replace with the current one. If the current is externally visible // it cannot be replace, but can be the canonical constant we merge with. - if (Slot == 0 || IsBetterCanonical(*GV, *Slot)) + if (!Slot || IsBetterCanonical(*GV, *Slot)) Slot = GV; } diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 1aba3df..284b896 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -17,7 +17,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "deadargelim" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" @@ -38,8 +37,11 @@ #include "llvm/Support/raw_ostream.h" #include #include +#include using namespace llvm; +#define DEBUG_TYPE "deadargelim" + STATISTIC(NumArgumentsEliminated, "Number of unread args removed"); STATISTIC(NumRetValsEliminated , "Number of unused return values removed"); STATISTIC(NumArgumentsReplacedWithUndef, @@ -764,7 +766,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // Find out the new return value. Type *RetTy = FTy->getReturnType(); - Type *NRetTy = NULL; + Type *NRetTy = nullptr; unsigned RetCount = NumRetVals(F); // -1 means unused, other numbers are the new index @@ -1050,7 +1052,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { Value *RetVal; if (NFTy->getReturnType()->isVoidTy()) { - RetVal = 0; + RetVal = nullptr; } else { assert (RetTy->isStructTy()); // The original return value was a struct, insert diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 4211f12..40ec9fa 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -27,11 +27,10 @@ using namespace llvm; /// the split module remain valid. static void makeVisible(GlobalValue &GV, bool Delete) { bool Local = GV.hasLocalLinkage(); - if (Local) - GV.setVisibility(GlobalValue::HiddenVisibility); - if (Local || Delete) { GV.setLinkage(GlobalValue::ExternalLinkage); + if (Local) + GV.setVisibility(GlobalValue::HiddenVisibility); return; } @@ -95,7 +94,7 @@ namespace { makeVisible(*I, Delete); if (Delete) - I->setInitializer(0); + I->setInitializer(nullptr); } // Visit the Functions. @@ -134,7 +133,7 @@ namespace { } else { Declaration = new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage, - 0, CurI->getName()); + nullptr, CurI->getName()); } CurI->replaceAllUsesWith(Declaration); diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index b716718..fed8839 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -18,7 +18,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "functionattrs" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SetVector.h" @@ -35,6 +34,8 @@ #include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; +#define DEBUG_TYPE "functionattrs" + STATISTIC(NumReadNone, "Number of functions marked readnone"); STATISTIC(NumReadOnly, "Number of functions marked readonly"); STATISTIC(NumNoCapture, "Number of arguments marked nocapture"); @@ -46,7 +47,7 @@ STATISTIC(NumAnnotated, "Number of attributes added to library functions"); namespace { struct FunctionAttrs : public CallGraphSCCPass { static char ID; // Pass identification, replacement for typeid - FunctionAttrs() : CallGraphSCCPass(ID), AA(0) { + FunctionAttrs() : CallGraphSCCPass(ID), AA(nullptr) { initializeFunctionAttrsPass(*PassRegistry::getPassRegistry()); } @@ -160,7 +161,7 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); - if (F == 0) + if (!F) // External node - may write memory. Just give up. return false; @@ -319,7 +320,7 @@ namespace { ArgumentGraphNode SyntheticRoot; public: - ArgumentGraph() { SyntheticRoot.Definition = 0; } + ArgumentGraph() { SyntheticRoot.Definition = nullptr; } typedef SmallVectorImpl::iterator iterator; @@ -521,7 +522,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); - if (F == 0) + if (!F) // External node - only a problem for arguments that we pass to it. continue; @@ -600,7 +601,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { // captures. for (scc_iterator I = scc_begin(&AG); !I.isAtEnd(); ++I) { - std::vector &ArgumentSCC = *I; + const std::vector &ArgumentSCC = *I; if (ArgumentSCC.size() == 1) { if (!ArgumentSCC[0]->Definition) continue; // synthetic root node @@ -616,8 +617,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { } bool SCCCaptured = false; - for (std::vector::iterator I = ArgumentSCC.begin(), - E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) { + for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); + I != E && !SCCCaptured; ++I) { ArgumentGraphNode *Node = *I; if (Node->Uses.empty()) { if (!Node->Definition->hasNoCaptureAttr()) @@ -629,13 +630,12 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) { SmallPtrSet ArgumentSCCNodes; // Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for // quickly looking up whether a given Argument is in this ArgumentSCC. - for (std::vector::iterator I = ArgumentSCC.begin(), - E = ArgumentSCC.end(); I != E; ++I) { + for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) { ArgumentSCCNodes.insert((*I)->Definition); } - for (std::vector::iterator I = ArgumentSCC.begin(), - E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) { + for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); + I != E && !SCCCaptured; ++I) { ArgumentGraphNode *N = *I; for (SmallVectorImpl::iterator UI = N->Uses.begin(), UE = N->Uses.end(); UI != UE; ++UI) { @@ -775,7 +775,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); - if (F == 0) + if (!F) // External node - skip it; return false; @@ -1668,7 +1668,7 @@ bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); - if (F != 0 && F->isDeclaration()) + if (F && F->isDeclaration()) MadeChange |= inferPrototypeAttributes(*F); } diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 0c081f1..9decddc 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -15,15 +15,18 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "globaldce" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Pass.h" using namespace llvm; +#define DEBUG_TYPE "globaldce" + STATISTIC(NumAliases , "Number of global aliases removed"); STATISTIC(NumFunctions, "Number of functions removed"); STATISTIC(NumVariables, "Number of global variables removed"); @@ -53,6 +56,15 @@ namespace { }; } +/// Returns true if F contains only a single "ret" instruction. +static bool isEmptyFunction(Function *F) { + BasicBlock &Entry = F->getEntryBlock(); + if (Entry.size() != 1 || !isa(Entry.front())) + return false; + ReturnInst &RI = cast(Entry.front()); + return RI.getReturnValue() == NULL; +} + char GlobalDCE::ID = 0; INITIALIZE_PASS(GlobalDCE, "globaldce", "Dead Global Elimination", false, false) @@ -61,7 +73,10 @@ ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); } bool GlobalDCE::runOnModule(Module &M) { bool Changed = false; - + + // Remove empty functions from the global ctors list. + Changed |= optimizeGlobalCtorsList(M, isEmptyFunction); + // Loop over the module, adding globals which are obviously necessary. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { Changed |= RemoveUnusedGlobalValue(*I); @@ -99,7 +114,7 @@ bool GlobalDCE::runOnModule(Module &M) { I != E; ++I) if (!AliveGlobals.count(I)) { DeadGlobalVars.push_back(I); // Keep track of dead globals - I->setInitializer(0); + I->setInitializer(nullptr); } // The second pass drops the bodies of functions which are dead... @@ -117,7 +132,7 @@ bool GlobalDCE::runOnModule(Module &M) { ++I) if (!AliveGlobals.count(I)) { DeadAliases.push_back(I); - I->setAliasee(0); + I->setAliasee(nullptr); } if (!DeadFunctions.empty()) { diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 1a510cf..ae80c43 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "globalopt" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -39,11 +38,15 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/CtorUtils.h" #include "llvm/Transforms/Utils/GlobalStatus.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include +#include using namespace llvm; +#define DEBUG_TYPE "globalopt" + STATISTIC(NumMarked , "Number of globals marked constant"); STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr"); STATISTIC(NumSRA , "Number of aggregate globals broken into scalars"); @@ -74,11 +77,9 @@ namespace { bool runOnModule(Module &M) override; private: - GlobalVariable *FindGlobalCtors(Module &M); bool OptimizeFunctions(Module &M); bool OptimizeGlobalVars(Module &M); bool OptimizeGlobalAliases(Module &M); - bool OptimizeGlobalCtorsList(GlobalVariable *&GCL); bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI); bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI, const GlobalStatus &GS); @@ -294,7 +295,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, Changed = true; } else if (ConstantExpr *CE = dyn_cast(U)) { if (CE->getOpcode() == Instruction::GetElementPtr) { - Constant *SubInit = 0; + Constant *SubInit = nullptr; if (Init) SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE); Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI); @@ -302,7 +303,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, CE->getType()->isPointerTy()) || CE->getOpcode() == Instruction::AddrSpaceCast) { // Pointer cast, delete any stores and memsets to the global. - Changed |= CleanupConstantGlobalUsers(CE, 0, DL, TLI); + Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI); } if (CE->use_empty()) { @@ -313,7 +314,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init, // Do not transform "gepinst (gep constexpr (GV))" here, because forming // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold // and will invalidate our notion of what Init is. - Constant *SubInit = 0; + Constant *SubInit = nullptr; if (!isa(GEP->getOperand(0))) { ConstantExpr *CE = dyn_cast_or_null(ConstantFoldInstruction(GEP, DL, TLI)); @@ -370,7 +371,7 @@ static bool isSafeSROAElementUse(Value *V) { // Otherwise, it must be a GEP. GetElementPtrInst *GEPI = dyn_cast(I); - if (GEPI == 0) return false; + if (!GEPI) return false; if (GEPI->getNumOperands() < 3 || !isa(GEPI->getOperand(1)) || !cast(GEPI->getOperand(1))->isNullValue()) @@ -470,7 +471,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) { static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { // Make sure this global only has simple uses that we can SRA. if (!GlobalUsersSafeToSRA(GV)) - return 0; + return nullptr; assert(GV->hasLocalLinkage() && !GV->isConstant()); Constant *Init = GV->getInitializer(); @@ -514,7 +515,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { NumElements = cast(STy)->getNumElements(); if (NumElements > 16 && GV->hasNUsesOrMore(16)) - return 0; // It's not worth it. + return nullptr; // It's not worth it. NewGlobals.reserve(NumElements); uint64_t EltSize = DL.getTypeAllocSize(STy->getElementType()); @@ -541,7 +542,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { } if (NewGlobals.empty()) - return 0; + return nullptr; DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV); @@ -603,7 +604,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) { if (FirstGlobal == i) ++FirstGlobal; } - return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0; + return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr; } /// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified @@ -785,7 +786,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV, Changed |= CleanupPointerRootUsers(GV, TLI); } else { Changed = true; - CleanupConstantGlobalUsers(GV, 0, DL, TLI); + CleanupConstantGlobalUsers(GV, nullptr, DL, TLI); } if (GV->use_empty()) { DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n"); @@ -847,7 +848,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, // If there are bitcast users of the malloc (which is typical, usually we have // a malloc + bitcast) then replace them with uses of the new global. Update // other users to use the global as well. - BitCastInst *TheBC = 0; + BitCastInst *TheBC = nullptr; while (!CI->use_empty()) { Instruction *User = cast(CI->user_back()); if (BitCastInst *BCI = dyn_cast(User)) { @@ -858,7 +859,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, BCI->setOperand(0, NewGV); } } else { - if (TheBC == 0) + if (!TheBC) TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI); User->replaceUsesOfWith(CI, TheBC); } @@ -1169,10 +1170,13 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, } else if (PHINode *PN = dyn_cast(V)) { // PN's type is pointer to struct. Make a new PHI of pointer to struct // field. - StructType *ST = cast(PN->getType()->getPointerElementType()); + PointerType *PTy = cast(PN->getType()); + StructType *ST = cast(PTy->getElementType()); + + unsigned AS = PTy->getAddressSpace(); PHINode *NewPN = - PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), + PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS), PN->getNumIncomingValues(), PN->getName()+".f"+Twine(FieldNo), PN); Result = NewPN; @@ -1284,9 +1288,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, std::vector FieldGlobals; std::vector FieldMallocs; + unsigned AS = GV->getType()->getPointerAddressSpace(); for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){ Type *FieldTy = STy->getElementType(FieldNo); - PointerType *PFieldTy = PointerType::getUnqual(FieldTy); + PointerType *PFieldTy = PointerType::get(FieldTy, AS); GlobalVariable *NGV = new GlobalVariable(*GV->getParent(), @@ -1302,7 +1307,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, Type *IntPtrTy = DL->getIntPtrType(CI->getType()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), - NElems, 0, + NElems, nullptr, CI->getName() + ".f" + Twine(FieldNo)); FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, CI); @@ -1535,7 +1540,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements, - 0, CI->getName()); + nullptr, CI->getName()); Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); CI->replaceAllUsesWith(Cast); CI->eraseFromParent(); @@ -1750,7 +1755,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, ->getEntryBlock().begin()); Type *ElemTy = GV->getType()->getElementType(); // FIXME: Pass Global's alignment when globals have alignment - AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI); + AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr, + GV->getName(), &FirstI); if (!isa(GV->getInitializer())) new StoreInst(GV->getInitializer(), Alloca, &FirstI); @@ -1957,116 +1963,6 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { return Changed; } -/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all -/// initializers have an init priority of 65535. -GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { - GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); - if (GV == 0) return 0; - - // Verify that the initializer is simple enough for us to handle. We are - // only allowed to optimize the initializer if it is unique. - if (!GV->hasUniqueInitializer()) return 0; - - if (isa(GV->getInitializer())) - return GV; - ConstantArray *CA = cast(GV->getInitializer()); - - for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { - if (isa(*i)) - continue; - ConstantStruct *CS = cast(*i); - if (isa(CS->getOperand(1))) - continue; - - // Must have a function or null ptr. - if (!isa(CS->getOperand(1))) - return 0; - - // Init priority must be standard. - ConstantInt *CI = cast(CS->getOperand(0)); - if (CI->getZExtValue() != 65535) - return 0; - } - - return GV; -} - -/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, -/// return a list of the functions and null terminator as a vector. -static std::vector ParseGlobalCtors(GlobalVariable *GV) { - if (GV->getInitializer()->isNullValue()) - return std::vector(); - ConstantArray *CA = cast(GV->getInitializer()); - std::vector Result; - Result.reserve(CA->getNumOperands()); - for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { - ConstantStruct *CS = cast(*i); - Result.push_back(dyn_cast(CS->getOperand(1))); - } - return Result; -} - -/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the -/// specified array, returning the new global to use. -static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, - const std::vector &Ctors) { - // If we made a change, reassemble the initializer list. - Constant *CSVals[2]; - CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535); - CSVals[1] = 0; - - StructType *StructTy = - cast(GCL->getType()->getElementType()->getArrayElementType()); - - // Create the new init list. - std::vector CAList; - for (unsigned i = 0, e = Ctors.size(); i != e; ++i) { - if (Ctors[i]) { - CSVals[1] = Ctors[i]; - } else { - Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()), - false); - PointerType *PFTy = PointerType::getUnqual(FTy); - CSVals[1] = Constant::getNullValue(PFTy); - CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), - 0x7fffffff); - } - CAList.push_back(ConstantStruct::get(StructTy, CSVals)); - } - - // Create the array initializer. - Constant *CA = ConstantArray::get(ArrayType::get(StructTy, - CAList.size()), CAList); - - // If we didn't change the number of elements, don't create a new GV. - if (CA->getType() == GCL->getInitializer()->getType()) { - GCL->setInitializer(CA); - return GCL; - } - - // Create the new global and insert it next to the existing list. - GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(), - GCL->getLinkage(), CA, "", - GCL->getThreadLocalMode()); - GCL->getParent()->getGlobalList().insert(GCL, NGV); - NGV->takeName(GCL); - - // Nuke the old list, replacing any uses with the new one. - if (!GCL->use_empty()) { - Constant *V = NGV; - if (V->getType() != GCL->getType()) - V = ConstantExpr::getBitCast(V, GCL->getType()); - GCL->replaceAllUsesWith(V); - } - GCL->eraseFromParent(); - - if (Ctors.size()) - return NGV; - else - return 0; -} - - static inline bool isSimpleEnoughValueToCommit(Constant *C, SmallPtrSet &SimpleConstants, @@ -2271,22 +2167,16 @@ class Evaluator { public: Evaluator(const DataLayout *DL, const TargetLibraryInfo *TLI) : DL(DL), TLI(TLI) { - ValueStack.push_back(new DenseMap); + ValueStack.emplace_back(); } ~Evaluator() { - DeleteContainerPointers(ValueStack); - while (!AllocaTmps.empty()) { - GlobalVariable *Tmp = AllocaTmps.back(); - AllocaTmps.pop_back(); - + for (auto &Tmp : AllocaTmps) // If there are still users of the alloca, the program is doing something // silly, e.g. storing the address of the alloca somewhere and using it // later. Since this is undefined, we'll just make it be null. if (!Tmp->use_empty()) Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType())); - delete Tmp; - } } /// EvaluateFunction - Evaluate a call to function F, returning true if @@ -2302,13 +2192,13 @@ public: Constant *getVal(Value *V) { if (Constant *CV = dyn_cast(V)) return CV; - Constant *R = ValueStack.back()->lookup(V); + Constant *R = ValueStack.back().lookup(V); assert(R && "Reference to an uncomputed value!"); return R; } void setVal(Value *V, Constant *C) { - ValueStack.back()->operator[](V) = C; + ValueStack.back()[V] = C; } const DenseMap &getMutatedMemory() const { @@ -2323,9 +2213,9 @@ private: Constant *ComputeLoadResult(Constant *P); /// ValueStack - As we compute SSA register values, we store their contents - /// here. The back of the vector contains the current function and the stack + /// here. The back of the deque contains the current function and the stack /// contains the values in the calling frames. - SmallVector*, 4> ValueStack; + std::deque> ValueStack; /// CallStack - This is used to detect recursion. In pathological situations /// we could hit exponential behavior, but at least there is nothing @@ -2340,7 +2230,7 @@ private: /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable /// to represent its body. This vector is needed so we can delete the /// temporary globals when we are done. - SmallVector AllocaTmps; + SmallVector, 32> AllocaTmps; /// Invariants - These global variables have been marked invariant by the /// static constructor. @@ -2369,7 +2259,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) { if (GlobalVariable *GV = dyn_cast(P)) { if (GV->hasDefinitiveInitializer()) return GV->getInitializer(); - return 0; + return nullptr; } // Handle a constantexpr getelementptr. @@ -2381,7 +2271,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) { return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); } - return 0; // don't know how to evaluate. + return nullptr; // don't know how to evaluate. } /// EvaluateBlock - Evaluate all instructions in block BB, returning true if @@ -2391,7 +2281,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB) { // This is the main evaluation loop. while (1) { - Constant *InstResult = 0; + Constant *InstResult = nullptr; DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); @@ -2517,7 +2407,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, "folding: " << *Ptr << "\n"); } InstResult = ComputeLoadResult(Ptr); - if (InstResult == 0) { + if (!InstResult) { DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load." "\n"); return false; // Could not evaluate load. @@ -2530,11 +2420,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, return false; // Cannot handle array allocs. } Type *Ty = AI->getType()->getElementType(); - AllocaTmps.push_back(new GlobalVariable(Ty, false, - GlobalValue::InternalLinkage, - UndefValue::get(Ty), - AI->getName())); - InstResult = AllocaTmps.back(); + AllocaTmps.push_back( + make_unique(Ty, false, GlobalValue::InternalLinkage, + UndefValue::get(Ty), AI->getName())); + InstResult = AllocaTmps.back().get(); DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); } else if (isa(CurInst) || isa(CurInst)) { CallSite CS(CurInst); @@ -2636,17 +2525,17 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, return false; } - Constant *RetVal = 0; + Constant *RetVal = nullptr; // Execute the call, if successful, use the return value. - ValueStack.push_back(new DenseMap); + ValueStack.emplace_back(); if (!EvaluateFunction(Callee, RetVal, Formals)) { DEBUG(dbgs() << "Failed to evaluate function.\n"); return false; } - delete ValueStack.pop_back_val(); + ValueStack.pop_back(); InstResult = RetVal; - if (InstResult != NULL) { + if (InstResult) { DEBUG(dbgs() << "Successfully evaluated function. Result: " << InstResult << "\n\n"); } else { @@ -2678,7 +2567,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, else return false; // Cannot determine. } else if (isa(CurInst)) { - NextBB = 0; + NextBB = nullptr; } else { // invoke, unwind, resume, unreachable. DEBUG(dbgs() << "Can not handle terminator."); @@ -2743,13 +2632,13 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, BasicBlock::iterator CurInst = CurBB->begin(); while (1) { - BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings. + BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings. DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); if (!EvaluateBlock(CurInst, NextBB)) return false; - if (NextBB == 0) { + if (!NextBB) { // Successfully running until there's no next block means that we found // the return. Fill it the return value and pop the call stack. ReturnInst *RI = cast(CurBB->getTerminator()); @@ -2768,7 +2657,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal, // Okay, we have never been in this block before. Check to see if there // are any PHI nodes. If so, evaluate them with information about where // we came from. - PHINode *PN = 0; + PHINode *PN = nullptr; for (CurInst = NextBB->begin(); (PN = dyn_cast(CurInst)); ++CurInst) setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB))); @@ -2789,6 +2678,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL, SmallVector()); if (EvalSuccess) { + ++NumCtorsEvaluated; + // We succeeded at evaluation: commit the result. DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '" << F->getName() << "' to " << Eval.getMutatedMemory().size() @@ -2806,46 +2697,6 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL, return EvalSuccess; } -/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible. -/// Return true if anything changed. -bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) { - std::vector Ctors = ParseGlobalCtors(GCL); - bool MadeChange = false; - if (Ctors.empty()) return false; - - // Loop over global ctors, optimizing them when we can. - for (unsigned i = 0; i != Ctors.size(); ++i) { - Function *F = Ctors[i]; - // Found a null terminator in the middle of the list, prune off the rest of - // the list. - if (F == 0) { - if (i != Ctors.size()-1) { - Ctors.resize(i+1); - MadeChange = true; - } - break; - } - DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n"); - - // We cannot simplify external ctor functions. - if (F->empty()) continue; - - // If we can evaluate the ctor at compile time, do. - if (EvaluateStaticConstructor(F, DL, TLI)) { - Ctors.erase(Ctors.begin()+i); - MadeChange = true; - --i; - ++NumCtorsEvaluated; - continue; - } - } - - if (!MadeChange) return false; - - GCL = InstallGlobalCtors(GCL, Ctors); - return true; -} - static int compareNames(Constant *const *A, Constant *const *B) { return (*A)->getName().compare((*B)->getName()); } @@ -3010,7 +2861,7 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { if (!hasUsesToReplace(*J, Used, RenameTarget)) continue; - J->replaceAllUsesWith(Aliasee); + J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType())); ++NumAliasesResolved; Changed = true; @@ -3042,12 +2893,12 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::cxa_atexit)) - return 0; + return nullptr; Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit)); if (!Fn) - return 0; + return nullptr; FunctionType *FTy = Fn->getFunctionType(); @@ -3058,7 +2909,7 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) { !FTy->getParamType(0)->isPointerTy() || !FTy->getParamType(1)->isPointerTy() || !FTy->getParamType(2)->isPointerTy()) - return 0; + return nullptr; return Fn; } @@ -3160,12 +3011,9 @@ bool GlobalOpt::runOnModule(Module &M) { bool Changed = false; DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis(); - // Try to find the llvm.globalctors list. - GlobalVariable *GlobalCtors = FindGlobalCtors(M); - bool LocalChange = true; while (LocalChange) { LocalChange = false; @@ -3174,8 +3022,9 @@ bool GlobalOpt::runOnModule(Module &M) { LocalChange |= OptimizeFunctions(M); // Optimize global_ctors list. - if (GlobalCtors) - LocalChange |= OptimizeGlobalCtorsList(GlobalCtors); + LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) { + return EvaluateStaticConstructor(F, DL, TLI); + }); // Optimize non-address-taken globals. LocalChange |= OptimizeGlobalVars(M); diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index 8684796..af541d1 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ipconstprop" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -27,6 +26,8 @@ #include "llvm/Pass.h" using namespace llvm; +#define DEBUG_TYPE "ipconstprop" + STATISTIC(NumArgumentsProped, "Number of args turned into constants"); STATISTIC(NumReturnValProped, "Number of return values turned into constants"); @@ -112,7 +113,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { continue; Constant *C = dyn_cast(*AI); - if (C && ArgumentConstants[i].first == 0) { + if (C && ArgumentConstants[i].first == nullptr) { ArgumentConstants[i].first = C; // First constant seen. } else if (C && ArgumentConstants[i].first == C) { // Still the constant value we think it is. @@ -139,7 +140,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { continue; Value *V = ArgumentConstants[i].first; - if (V == 0) V = UndefValue::get(AI->getType()); + if (!V) V = UndefValue::get(AI->getType()); AI->replaceAllUsesWith(V); ++NumArgumentsProped; MadeChange = true; @@ -209,7 +210,7 @@ bool IPCP::PropagateConstantReturn(Function &F) { } // Different or no known return value? Don't propagate this return // value. - RetVals[i] = 0; + RetVals[i] = nullptr; // All values non-constant? Stop looking. if (++NumNonConstant == RetVals.size()) return false; @@ -235,7 +236,7 @@ bool IPCP::PropagateConstantReturn(Function &F) { MadeChange = true; - if (STy == 0) { + if (!STy) { Value* New = RetVals[0]; if (Argument *A = dyn_cast(New)) // Was an argument returned? Then find the corresponding argument in diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp index 6cf3040..624cb90 100644 --- a/lib/Transforms/IPO/InlineAlways.cpp +++ b/lib/Transforms/IPO/InlineAlways.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "inline" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/CallGraph.h" @@ -28,6 +27,8 @@ using namespace llvm; +#define DEBUG_TYPE "inline" + namespace { /// \brief Inliner pass which only handles "always inline" functions. @@ -36,12 +37,13 @@ class AlwaysInliner : public Inliner { public: // Use extremely low threshold. - AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true), ICA(0) { + AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true), + ICA(nullptr) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } AlwaysInliner(bool InsertLifetime) - : Inliner(ID, -2000000000, InsertLifetime), ICA(0) { + : Inliner(ID, -2000000000, InsertLifetime), ICA(nullptr) { initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry()); } @@ -93,8 +95,7 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) { // that are viable for inlining. FIXME: We shouldn't even get here for // declarations. if (Callee && !Callee->isDeclaration() && - Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::AlwaysInline) && + CS.hasFnAttr(Attribute::AlwaysInline) && ICA->isInlineViable(*Callee)) return InlineCost::getAlways(); diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index 7141064..d189756 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "inline" #include "llvm/Transforms/IPO.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" @@ -26,6 +25,8 @@ using namespace llvm; +#define DEBUG_TYPE "inline" + namespace { /// \brief Actual inliner pass implementation. @@ -37,12 +38,12 @@ class SimpleInliner : public Inliner { InlineCostAnalysis *ICA; public: - SimpleInliner() : Inliner(ID), ICA(0) { + SimpleInliner() : Inliner(ID), ICA(nullptr) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } SimpleInliner(int Threshold) - : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(0) { + : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(nullptr) { initializeSimpleInlinerPass(*PassRegistry::getPassRegistry()); } diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index e97fb83..9087ab2 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "inline" #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -21,6 +20,7 @@ #include "llvm/Analysis/InlineCost.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" @@ -32,6 +32,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "inline" + STATISTIC(NumInlined, "Number of functions inlined"); STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); @@ -183,7 +185,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, // canonicalized to be an allocation *of* an array), or allocations whose // type is not itself an array (because we're afraid of pessimizing SRoA). ArrayType *ATy = dyn_cast(AI->getAllocatedType()); - if (ATy == 0 || AI->isArrayAllocation()) + if (!ATy || AI->isArrayAllocation()) continue; // Get the list of all available allocas for this array type. @@ -239,7 +241,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI, AI->eraseFromParent(); MergedAwayAlloca = true; ++NumMergedAllocas; - IFI.StaticAllocas[AllocaNo] = 0; + IFI.StaticAllocas[AllocaNo] = nullptr; break; } @@ -288,12 +290,24 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const { bool ColdCallee = Callee && !Callee->isDeclaration() && Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::Cold); - if (ColdCallee && ColdThreshold < thres) + // Command line argument for InlineLimit will override the default + // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold, + // do not use the default cold threshold even if it is smaller. + if ((InlineLimit.getNumOccurrences() == 0 || + ColdThreshold.getNumOccurrences() > 0) && ColdCallee && + ColdThreshold < thres) thres = ColdThreshold; return thres; } +static void emitAnalysis(CallSite CS, const Twine &Msg) { + Function *Caller = CS.getCaller(); + LLVMContext &Ctx = Caller->getContext(); + DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg); +} + /// shouldInline - Return true if the inliner should attempt to inline /// at the given CallSite. bool Inliner::shouldInline(CallSite CS) { @@ -302,12 +316,16 @@ bool Inliner::shouldInline(CallSite CS) { if (IC.isAlways()) { DEBUG(dbgs() << " Inlining: cost=always" << ", Call: " << *CS.getInstruction() << "\n"); + emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) + + " should always be inlined (cost=always)"); return true; } if (IC.isNever()) { DEBUG(dbgs() << " NOT Inlining: cost=never" << ", Call: " << *CS.getInstruction() << "\n"); + emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + + " should never be inlined (cost=never)")); return false; } @@ -316,6 +334,10 @@ bool Inliner::shouldInline(CallSite CS) { DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost() << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << "\n"); + emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() + + " too costly to inline (cost=") + + Twine(IC.getCost()) + ", threshold=" + + Twine(IC.getCostDelta() + IC.getCost()) + ")"); return false; } @@ -383,6 +405,11 @@ bool Inliner::shouldInline(CallSite CS) { DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() << " Cost = " << IC.getCost() << ", outer Cost = " << TotalSecondaryCost << '\n'); + emitAnalysis( + CS, Twine("Not inlining. Cost of inlining " + + CS.getCalledFunction()->getName() + + " increases the cost of inlining " + + CS.getCaller()->getName() + " in other contexts")); return false; } } @@ -390,6 +417,10 @@ bool Inliner::shouldInline(CallSite CS) { DEBUG(dbgs() << " Inlining: cost=" << IC.getCost() << ", thres=" << (IC.getCostDelta() + IC.getCost()) << ", Call: " << *CS.getInstruction() << '\n'); + emitAnalysis( + CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") + + CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) + + " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")"); return true; } @@ -410,7 +441,7 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis().getCallGraph(); DataLayoutPass *DLP = getAnalysisIfAvailable(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = getAnalysisIfAvailable(); SmallPtrSet SCCFunctions; @@ -499,7 +530,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { ++NumCallsDeleted; } else { // We can only inline direct calls to non-declarations. - if (Callee == 0 || Callee->isDeclaration()) continue; + if (!Callee || Callee->isDeclaration()) continue; // If this call site was obtained by inlining another function, verify // that the include path for the function did not include the callee @@ -511,18 +542,37 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) continue; - + LLVMContext &CallerCtx = Caller->getContext(); + + // Get DebugLoc to report. CS will be invalid after Inliner. + DebugLoc DLoc = CS.getInstruction()->getDebugLoc(); + // If the policy determines that we should inline this function, // try to do so. - if (!shouldInline(CS)) + if (!shouldInline(CS)) { + emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, + Twine(Callee->getName() + + " will not be inlined into " + + Caller->getName())); continue; + } // Attempt to inline the function. if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas, - InlineHistoryID, InsertLifetime, DL)) + InlineHistoryID, InsertLifetime, DL)) { + emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc, + Twine(Callee->getName() + + " will not be inlined into " + + Caller->getName())); continue; + } ++NumInlined; - + + // Report the inline decision. + emitOptimizationRemark( + CallerCtx, DEBUG_TYPE, *Caller, DLoc, + Twine(Callee->getName() + " inlined into " + Caller->getName())); + // If inlining this function gave us any new call sites, throw them // onto our worklist to process. They are useful inline candidates. if (!InlineInfo.InlinedCalls.empty()) { diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index c1fe01c..c970a1a 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -19,7 +19,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "internalize" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -35,6 +34,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "internalize" + STATISTIC(NumAliases , "Number of aliases internalized"); STATISTIC(NumFunctions, "Number of functions internalized"); STATISTIC(NumGlobals , "Number of global vars internalized"); @@ -131,8 +132,8 @@ static bool shouldInternalize(const GlobalValue &GV, bool InternalizePass::runOnModule(Module &M) { CallGraphWrapperPass *CGPass = getAnalysisIfAvailable(); - CallGraph *CG = CGPass ? &CGPass->getCallGraph() : 0; - CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0; + CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr; + CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr; bool Changed = false; SmallPtrSet Used; @@ -158,6 +159,7 @@ bool InternalizePass::runOnModule(Module &M) { if (!shouldInternalize(*I, ExternalNames)) continue; + I->setVisibility(GlobalValue::DefaultVisibility); I->setLinkage(GlobalValue::InternalLinkage); if (ExternalNode) @@ -194,6 +196,7 @@ bool InternalizePass::runOnModule(Module &M) { if (!shouldInternalize(*I, ExternalNames)) continue; + I->setVisibility(GlobalValue::DefaultVisibility); I->setLinkage(GlobalValue::InternalLinkage); Changed = true; ++NumGlobals; @@ -206,6 +209,7 @@ bool InternalizePass::runOnModule(Module &M) { if (!shouldInternalize(*I, ExternalNames)) continue; + I->setVisibility(GlobalValue::DefaultVisibility); I->setLinkage(GlobalValue::InternalLinkage); Changed = true; ++NumAliases; diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp index 464aa99..20414aa 100644 --- a/lib/Transforms/IPO/LoopExtractor.cpp +++ b/lib/Transforms/IPO/LoopExtractor.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-extract" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopPass.h" @@ -30,6 +29,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "loop-extract" + STATISTIC(NumExtracted, "Number of loops extracted"); namespace { @@ -136,7 +137,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) { if (NumLoops == 0) return Changed; --NumLoops; CodeExtractor Extractor(DT, *L); - if (Extractor.extractCodeRegion() != 0) { + if (Extractor.extractCodeRegion() != nullptr) { Changed = true; // After extraction, the loop is replaced by a function call, so // we shouldn't try to run any more loop passes on it. @@ -241,7 +242,7 @@ void BlockExtractorPass::SplitLandingPadPreds(Function *F) { if (!Split) continue; SmallVector NewBBs; - SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", 0, NewBBs); + SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", nullptr, NewBBs); } } diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 8555d2c..c3a2b12 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -43,7 +43,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mergefunc" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FoldingSet.h" @@ -67,6 +66,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "mergefunc" + STATISTIC(NumFunctionsMerged, "Number of functions merged"); STATISTIC(NumThunksWritten, "Number of thunks generated"); STATISTIC(NumAliasesWritten, "Number of aliases generated"); @@ -120,12 +121,12 @@ public: void release() { assert(Func && "Attempted to release function twice, or release empty/tombstone!"); - Func = NULL; + Func = nullptr; } private: explicit ComparableFunction(unsigned Hash) - : Func(NULL), Hash(Hash), DL(NULL) {} + : Func(nullptr), Hash(Hash), DL(nullptr) {} AssertingVH Func; unsigned Hash; @@ -175,19 +176,181 @@ private: /// Test whether two basic blocks have equivalent behaviour. bool compare(const BasicBlock *BB1, const BasicBlock *BB2); + /// Constants comparison. + /// Its analog to lexicographical comparison between hypothetical numbers + /// of next format: + /// + /// + /// 1. Bitcastability. + /// Check whether L's type could be losslessly bitcasted to R's type. + /// On this stage method, in case when lossless bitcast is not possible + /// method returns -1 or 1, thus also defining which type is greater in + /// context of bitcastability. + /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight + /// to the contents comparison. + /// If types differ, remember types comparison result and check + /// whether we still can bitcast types. + /// Stage 1: Types that satisfies isFirstClassType conditions are always + /// greater then others. + /// Stage 2: Vector is greater then non-vector. + /// If both types are vectors, then vector with greater bitwidth is + /// greater. + /// If both types are vectors with the same bitwidth, then types + /// are bitcastable, and we can skip other stages, and go to contents + /// comparison. + /// Stage 3: Pointer types are greater than non-pointers. If both types are + /// pointers of the same address space - go to contents comparison. + /// Different address spaces: pointer with greater address space is + /// greater. + /// Stage 4: Types are neither vectors, nor pointers. And they differ. + /// We don't know how to bitcast them. So, we better don't do it, + /// and return types comparison result (so it determines the + /// relationship among constants we don't know how to bitcast). + /// + /// Just for clearance, let's see how the set of constants could look + /// on single dimension axis: + /// + /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors] + /// Where: NFCT - Not a FirstClassType + /// FCT - FirstClassTyp: + /// + /// 2. Compare raw contents. + /// It ignores types on this stage and only compares bits from L and R. + /// Returns 0, if L and R has equivalent contents. + /// -1 or 1 if values are different. + /// Pretty trivial: + /// 2.1. If contents are numbers, compare numbers. + /// Ints with greater bitwidth are greater. Ints with same bitwidths + /// compared by their contents. + /// 2.2. "And so on". Just to avoid discrepancies with comments + /// perhaps it would be better to read the implementation itself. + /// 3. And again about overall picture. Let's look back at how the ordered set + /// of constants will look like: + /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors] + /// + /// Now look, what could be inside [FCT, "others"], for example: + /// [FCT, "others"] = + /// [ + /// [double 0.1], [double 1.23], + /// [i32 1], [i32 2], + /// { double 1.0 }, ; StructTyID, NumElements = 1 + /// { i32 1 }, ; StructTyID, NumElements = 1 + /// { double 1, i32 1 }, ; StructTyID, NumElements = 2 + /// { i32 1, double 1 } ; StructTyID, NumElements = 2 + /// ] + /// + /// Let's explain the order. Float numbers will be less than integers, just + /// because of cmpType terms: FloatTyID < IntegerTyID. + /// Floats (with same fltSemantics) are sorted according to their value. + /// Then you can see integers, and they are, like a floats, + /// could be easy sorted among each others. + /// The structures. Structures are grouped at the tail, again because of their + /// TypeID: StructTyID > IntegerTyID > FloatTyID. + /// Structures with greater number of elements are greater. Structures with + /// greater elements going first are greater. + /// The same logic with vectors, arrays and other possible complex types. + /// + /// Bitcastable constants. + /// Let's assume, that some constant, belongs to some group of + /// "so-called-equal" values with different types, and at the same time + /// belongs to another group of constants with equal types + /// and "really" equal values. + /// + /// Now, prove that this is impossible: + /// + /// If constant A with type TyA is bitcastable to B with type TyB, then: + /// 1. All constants with equal types to TyA, are bitcastable to B. Since + /// those should be vectors (if TyA is vector), pointers + /// (if TyA is pointer), or else (if TyA equal to TyB), those types should + /// be equal to TyB. + /// 2. All constants with non-equal, but bitcastable types to TyA, are + /// bitcastable to B. + /// Once again, just because we allow it to vectors and pointers only. + /// This statement could be expanded as below: + /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to + /// vector B, and thus bitcastable to B as well. + /// 2.2. All pointers of the same address space, no matter what they point to, + /// bitcastable. So if C is pointer, it could be bitcasted to A and to B. + /// So any constant equal or bitcastable to A is equal or bitcastable to B. + /// QED. + /// + /// In another words, for pointers and vectors, we ignore top-level type and + /// look at their particular properties (bit-width for vectors, and + /// address space for pointers). + /// If these properties are equal - compare their contents. + int cmpConstants(const Constant *L, const Constant *R); + /// Assign or look up previously assigned numbers for the two values, and /// return whether the numbers are equal. Numbers are assigned in the order /// visited. - bool enumerate(const Value *V1, const Value *V2); + /// Comparison order: + /// Stage 0: Value that is function itself is always greater then others. + /// If left and right values are references to their functions, then + /// they are equal. + /// Stage 1: Constants are greater than non-constants. + /// If both left and right are constants, then the result of + /// cmpConstants is used as cmpValues result. + /// Stage 2: InlineAsm instances are greater than others. If both left and + /// right are InlineAsm instances, InlineAsm* pointers casted to + /// integers and compared as numbers. + /// Stage 3: For all other cases we compare order we meet these values in + /// their functions. If right value was met first during scanning, + /// then left value is greater. + /// In another words, we compare serial numbers, for more details + /// see comments for sn_mapL and sn_mapR. + int cmpValues(const Value *L, const Value *R); + + bool enumerate(const Value *V1, const Value *V2) { + return cmpValues(V1, V2) == 0; + } /// Compare two Instructions for equivalence, similar to /// Instruction::isSameOperationAs but with modifications to the type /// comparison. + /// Stages are listed in "most significant stage first" order: + /// On each stage below, we do comparison between some left and right + /// operation parts. If parts are non-equal, we assign parts comparison + /// result to the operation comparison result and exit from method. + /// Otherwise we proceed to the next stage. + /// Stages: + /// 1. Operations opcodes. Compared as numbers. + /// 2. Number of operands. + /// 3. Operation types. Compared with cmpType method. + /// 4. Compare operation subclass optional data as stream of bytes: + /// just convert it to integers and call cmpNumbers. + /// 5. Compare in operation operand types with cmpType in + /// most significant operand first order. + /// 6. Last stage. Check operations for some specific attributes. + /// For example, for Load it would be: + /// 6.1.Load: volatile (as boolean flag) + /// 6.2.Load: alignment (as integer numbers) + /// 6.3.Load: synch-scope (as integer numbers) + /// On this stage its better to see the code, since its not more than 10-15 + /// strings for particular instruction, and could change sometimes. + int cmpOperation(const Instruction *L, const Instruction *R) const; + bool isEquivalentOperation(const Instruction *I1, - const Instruction *I2) const; + const Instruction *I2) const { + return cmpOperation(I1, I2) == 0; + } /// Compare two GEPs for equivalent pointer arithmetic. - bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2); + /// Parts to be compared for each comparison stage, + /// most significant stage first: + /// 1. Address space. As numbers. + /// 2. Constant offset, (if "DataLayout *DL" field is not NULL, + /// using GEPOperator::accumulateConstantOffset method). + /// 3. Pointer operand type (using cmpType method). + /// 4. Number of operands. + /// 5. Compare operands, using cmpValues method. + int cmpGEP(const GEPOperator *GEPL, const GEPOperator *GEPR); + int cmpGEP(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) { + return cmpGEP(cast(GEPL), cast(GEPR)); + } + + bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2) { + return cmpGEP(GEP1, GEP2) == 0; + } bool isEquivalentGEP(const GetElementPtrInst *GEP1, const GetElementPtrInst *GEP2) { return isEquivalentGEP(cast(GEP1), cast(GEP2)); @@ -241,13 +404,50 @@ private: int cmpNumbers(uint64_t L, uint64_t R) const; + int cmpAPInt(const APInt &L, const APInt &R) const; + int cmpAPFloat(const APFloat &L, const APFloat &R) const; + int cmpStrings(StringRef L, StringRef R) const; + int cmpAttrs(const AttributeSet L, const AttributeSet R) const; + // The two functions undergoing comparison. const Function *F1, *F2; const DataLayout *DL; - DenseMap id_map; - DenseSet seen_values; + /// Assign serial numbers to values from left function, and values from + /// right function. + /// Explanation: + /// Being comparing functions we need to compare values we meet at left and + /// right sides. + /// Its easy to sort things out for external values. It just should be + /// the same value at left and right. + /// But for local values (those were introduced inside function body) + /// we have to ensure they were introduced at exactly the same place, + /// and plays the same role. + /// Let's assign serial number to each value when we meet it first time. + /// Values that were met at same place will be with same serial numbers. + /// In this case it would be good to explain few points about values assigned + /// to BBs and other ways of implementation (see below). + /// + /// 1. Safety of BB reordering. + /// It's safe to change the order of BasicBlocks in function. + /// Relationship with other functions and serial numbering will not be + /// changed in this case. + /// As follows from FunctionComparator::compare(), we do CFG walk: we start + /// from the entry, and then take each terminator. So it doesn't matter how in + /// fact BBs are ordered in function. And since cmpValues are called during + /// this walk, the numbering depends only on how BBs located inside the CFG. + /// So the answer is - yes. We will get the same numbering. + /// + /// 2. Impossibility to use dominance properties of values. + /// If we compare two instruction operands: first is usage of local + /// variable AL from function FL, and second is usage of local variable AR + /// from FR, we could compare their origins and check whether they are + /// defined at the same place. + /// But, we are still not able to compare operands of PHI nodes, since those + /// could be operands from further BBs we didn't scan yet. + /// So it's impossible to use dominance properties in general. + DenseMap sn_mapL, sn_mapR; }; } @@ -258,6 +458,206 @@ int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const { return 0; } +int FunctionComparator::cmpAPInt(const APInt &L, const APInt &R) const { + if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth())) + return Res; + if (L.ugt(R)) return 1; + if (R.ugt(L)) return -1; + return 0; +} + +int FunctionComparator::cmpAPFloat(const APFloat &L, const APFloat &R) const { + if (int Res = cmpNumbers((uint64_t)&L.getSemantics(), + (uint64_t)&R.getSemantics())) + return Res; + return cmpAPInt(L.bitcastToAPInt(), R.bitcastToAPInt()); +} + +int FunctionComparator::cmpStrings(StringRef L, StringRef R) const { + // Prevent heavy comparison, compare sizes first. + if (int Res = cmpNumbers(L.size(), R.size())) + return Res; + + // Compare strings lexicographically only when it is necessary: only when + // strings are equal in size. + return L.compare(R); +} + +int FunctionComparator::cmpAttrs(const AttributeSet L, + const AttributeSet R) const { + if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots())) + return Res; + + for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) { + AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i), + RE = R.end(i); + for (; LI != LE && RI != RE; ++LI, ++RI) { + Attribute LA = *LI; + Attribute RA = *RI; + if (LA < RA) + return -1; + if (RA < LA) + return 1; + } + if (LI != LE) + return 1; + if (RI != RE) + return -1; + } + return 0; +} + +/// Constants comparison: +/// 1. Check whether type of L constant could be losslessly bitcasted to R +/// type. +/// 2. Compare constant contents. +/// For more details see declaration comments. +int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { + + Type *TyL = L->getType(); + Type *TyR = R->getType(); + + // Check whether types are bitcastable. This part is just re-factored + // Type::canLosslesslyBitCastTo method, but instead of returning true/false, + // we also pack into result which type is "less" for us. + int TypesRes = cmpType(TyL, TyR); + if (TypesRes != 0) { + // Types are different, but check whether we can bitcast them. + if (!TyL->isFirstClassType()) { + if (TyR->isFirstClassType()) + return -1; + // Neither TyL nor TyR are values of first class type. Return the result + // of comparing the types + return TypesRes; + } + if (!TyR->isFirstClassType()) { + if (TyL->isFirstClassType()) + return 1; + return TypesRes; + } + + // Vector -> Vector conversions are always lossless if the two vector types + // have the same size, otherwise not. + unsigned TyLWidth = 0; + unsigned TyRWidth = 0; + + if (const VectorType *VecTyL = dyn_cast(TyL)) + TyLWidth = VecTyL->getBitWidth(); + if (const VectorType *VecTyR = dyn_cast(TyR)) + TyRWidth = VecTyR->getBitWidth(); + + if (TyLWidth != TyRWidth) + return cmpNumbers(TyLWidth, TyRWidth); + + // Zero bit-width means neither TyL nor TyR are vectors. + if (!TyLWidth) { + PointerType *PTyL = dyn_cast(TyL); + PointerType *PTyR = dyn_cast(TyR); + if (PTyL && PTyR) { + unsigned AddrSpaceL = PTyL->getAddressSpace(); + unsigned AddrSpaceR = PTyR->getAddressSpace(); + if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR)) + return Res; + } + if (PTyL) + return 1; + if (PTyR) + return -1; + + // TyL and TyR aren't vectors, nor pointers. We don't know how to + // bitcast them. + return TypesRes; + } + } + + // OK, types are bitcastable, now check constant contents. + + if (L->isNullValue() && R->isNullValue()) + return TypesRes; + if (L->isNullValue() && !R->isNullValue()) + return 1; + if (!L->isNullValue() && R->isNullValue()) + return -1; + + if (int Res = cmpNumbers(L->getValueID(), R->getValueID())) + return Res; + + switch (L->getValueID()) { + case Value::UndefValueVal: return TypesRes; + case Value::ConstantIntVal: { + const APInt &LInt = cast(L)->getValue(); + const APInt &RInt = cast(R)->getValue(); + return cmpAPInt(LInt, RInt); + } + case Value::ConstantFPVal: { + const APFloat &LAPF = cast(L)->getValueAPF(); + const APFloat &RAPF = cast(R)->getValueAPF(); + return cmpAPFloat(LAPF, RAPF); + } + case Value::ConstantArrayVal: { + const ConstantArray *LA = cast(L); + const ConstantArray *RA = cast(R); + uint64_t NumElementsL = cast(TyL)->getNumElements(); + uint64_t NumElementsR = cast(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (uint64_t i = 0; i < NumElementsL; ++i) { + if (int Res = cmpConstants(cast(LA->getOperand(i)), + cast(RA->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantStructVal: { + const ConstantStruct *LS = cast(L); + const ConstantStruct *RS = cast(R); + unsigned NumElementsL = cast(TyL)->getNumElements(); + unsigned NumElementsR = cast(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (unsigned i = 0; i != NumElementsL; ++i) { + if (int Res = cmpConstants(cast(LS->getOperand(i)), + cast(RS->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantVectorVal: { + const ConstantVector *LV = cast(L); + const ConstantVector *RV = cast(R); + unsigned NumElementsL = cast(TyL)->getNumElements(); + unsigned NumElementsR = cast(TyR)->getNumElements(); + if (int Res = cmpNumbers(NumElementsL, NumElementsR)) + return Res; + for (uint64_t i = 0; i < NumElementsL; ++i) { + if (int Res = cmpConstants(cast(LV->getOperand(i)), + cast(RV->getOperand(i)))) + return Res; + } + return 0; + } + case Value::ConstantExprVal: { + const ConstantExpr *LE = cast(L); + const ConstantExpr *RE = cast(R); + unsigned NumOperandsL = LE->getNumOperands(); + unsigned NumOperandsR = RE->getNumOperands(); + if (int Res = cmpNumbers(NumOperandsL, NumOperandsR)) + return Res; + for (unsigned i = 0; i < NumOperandsL; ++i) { + if (int Res = cmpConstants(cast(LE->getOperand(i)), + cast(RE->getOperand(i)))) + return Res; + } + return 0; + } + case Value::FunctionVal: + case Value::GlobalVariableVal: + case Value::GlobalAliasVal: + default: // Unknown constant, cast L and R pointers to numbers and compare. + return cmpNumbers((uint64_t)L, (uint64_t)R); + } +} + /// cmpType - compares two types, /// defines total ordering among the types set. /// See method declaration comments for more details. @@ -350,143 +750,209 @@ int FunctionComparator::cmpType(Type *TyL, Type *TyR) const { // Determine whether the two operations are the same except that pointer-to-A // and pointer-to-B are equivalent. This should be kept in sync with // Instruction::isSameOperationAs. -bool FunctionComparator::isEquivalentOperation(const Instruction *I1, - const Instruction *I2) const { +// Read method declaration comments for more details. +int FunctionComparator::cmpOperation(const Instruction *L, + const Instruction *R) const { // Differences from Instruction::isSameOperationAs: // * replace type comparison with calls to isEquivalentType. // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top // * because of the above, we don't test for the tail bit on calls later on - if (I1->getOpcode() != I2->getOpcode() || - I1->getNumOperands() != I2->getNumOperands() || - !isEquivalentType(I1->getType(), I2->getType()) || - !I1->hasSameSubclassOptionalData(I2)) - return false; + if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode())) + return Res; + + if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) + return Res; + + if (int Res = cmpType(L->getType(), R->getType())) + return Res; + + if (int Res = cmpNumbers(L->getRawSubclassOptionalData(), + R->getRawSubclassOptionalData())) + return Res; // We have two instructions of identical opcode and #operands. Check to see // if all operands are the same type - for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i) - if (!isEquivalentType(I1->getOperand(i)->getType(), - I2->getOperand(i)->getType())) - return false; + for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) { + if (int Res = + cmpType(L->getOperand(i)->getType(), R->getOperand(i)->getType())) + return Res; + } // Check special state that is a part of some instructions. - if (const LoadInst *LI = dyn_cast(I1)) - return LI->isVolatile() == cast(I2)->isVolatile() && - LI->getAlignment() == cast(I2)->getAlignment() && - LI->getOrdering() == cast(I2)->getOrdering() && - LI->getSynchScope() == cast(I2)->getSynchScope(); - if (const StoreInst *SI = dyn_cast(I1)) - return SI->isVolatile() == cast(I2)->isVolatile() && - SI->getAlignment() == cast(I2)->getAlignment() && - SI->getOrdering() == cast(I2)->getOrdering() && - SI->getSynchScope() == cast(I2)->getSynchScope(); - if (const CmpInst *CI = dyn_cast(I1)) - return CI->getPredicate() == cast(I2)->getPredicate(); - if (const CallInst *CI = dyn_cast(I1)) - return CI->getCallingConv() == cast(I2)->getCallingConv() && - CI->getAttributes() == cast(I2)->getAttributes(); - if (const InvokeInst *CI = dyn_cast(I1)) - return CI->getCallingConv() == cast(I2)->getCallingConv() && - CI->getAttributes() == cast(I2)->getAttributes(); - if (const InsertValueInst *IVI = dyn_cast(I1)) - return IVI->getIndices() == cast(I2)->getIndices(); - if (const ExtractValueInst *EVI = dyn_cast(I1)) - return EVI->getIndices() == cast(I2)->getIndices(); - if (const FenceInst *FI = dyn_cast(I1)) - return FI->getOrdering() == cast(I2)->getOrdering() && - FI->getSynchScope() == cast(I2)->getSynchScope(); - if (const AtomicCmpXchgInst *CXI = dyn_cast(I1)) - return CXI->isVolatile() == cast(I2)->isVolatile() && - CXI->getSuccessOrdering() == - cast(I2)->getSuccessOrdering() && - CXI->getFailureOrdering() == - cast(I2)->getFailureOrdering() && - CXI->getSynchScope() == cast(I2)->getSynchScope(); - if (const AtomicRMWInst *RMWI = dyn_cast(I1)) - return RMWI->getOperation() == cast(I2)->getOperation() && - RMWI->isVolatile() == cast(I2)->isVolatile() && - RMWI->getOrdering() == cast(I2)->getOrdering() && - RMWI->getSynchScope() == cast(I2)->getSynchScope(); + if (const LoadInst *LI = dyn_cast(L)) { + if (int Res = cmpNumbers(LI->isVolatile(), cast(R)->isVolatile())) + return Res; + if (int Res = + cmpNumbers(LI->getAlignment(), cast(R)->getAlignment())) + return Res; + if (int Res = + cmpNumbers(LI->getOrdering(), cast(R)->getOrdering())) + return Res; + return cmpNumbers(LI->getSynchScope(), cast(R)->getSynchScope()); + } + if (const StoreInst *SI = dyn_cast(L)) { + if (int Res = + cmpNumbers(SI->isVolatile(), cast(R)->isVolatile())) + return Res; + if (int Res = + cmpNumbers(SI->getAlignment(), cast(R)->getAlignment())) + return Res; + if (int Res = + cmpNumbers(SI->getOrdering(), cast(R)->getOrdering())) + return Res; + return cmpNumbers(SI->getSynchScope(), cast(R)->getSynchScope()); + } + if (const CmpInst *CI = dyn_cast(L)) + return cmpNumbers(CI->getPredicate(), cast(R)->getPredicate()); + if (const CallInst *CI = dyn_cast(L)) { + if (int Res = cmpNumbers(CI->getCallingConv(), + cast(R)->getCallingConv())) + return Res; + return cmpAttrs(CI->getAttributes(), cast(R)->getAttributes()); + } + if (const InvokeInst *CI = dyn_cast(L)) { + if (int Res = cmpNumbers(CI->getCallingConv(), + cast(R)->getCallingConv())) + return Res; + return cmpAttrs(CI->getAttributes(), cast(R)->getAttributes()); + } + if (const InsertValueInst *IVI = dyn_cast(L)) { + ArrayRef LIndices = IVI->getIndices(); + ArrayRef RIndices = cast(R)->getIndices(); + if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) + return Res; + for (size_t i = 0, e = LIndices.size(); i != e; ++i) { + if (int Res = cmpNumbers(LIndices[i], RIndices[i])) + return Res; + } + } + if (const ExtractValueInst *EVI = dyn_cast(L)) { + ArrayRef LIndices = EVI->getIndices(); + ArrayRef RIndices = cast(R)->getIndices(); + if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) + return Res; + for (size_t i = 0, e = LIndices.size(); i != e; ++i) { + if (int Res = cmpNumbers(LIndices[i], RIndices[i])) + return Res; + } + } + if (const FenceInst *FI = dyn_cast(L)) { + if (int Res = + cmpNumbers(FI->getOrdering(), cast(R)->getOrdering())) + return Res; + return cmpNumbers(FI->getSynchScope(), cast(R)->getSynchScope()); + } - return true; + if (const AtomicCmpXchgInst *CXI = dyn_cast(L)) { + if (int Res = cmpNumbers(CXI->isVolatile(), + cast(R)->isVolatile())) + return Res; + if (int Res = cmpNumbers(CXI->getSuccessOrdering(), + cast(R)->getSuccessOrdering())) + return Res; + if (int Res = cmpNumbers(CXI->getFailureOrdering(), + cast(R)->getFailureOrdering())) + return Res; + return cmpNumbers(CXI->getSynchScope(), + cast(R)->getSynchScope()); + } + if (const AtomicRMWInst *RMWI = dyn_cast(L)) { + if (int Res = cmpNumbers(RMWI->getOperation(), + cast(R)->getOperation())) + return Res; + if (int Res = cmpNumbers(RMWI->isVolatile(), + cast(R)->isVolatile())) + return Res; + if (int Res = cmpNumbers(RMWI->getOrdering(), + cast(R)->getOrdering())) + return Res; + return cmpNumbers(RMWI->getSynchScope(), + cast(R)->getSynchScope()); + } + return 0; } // Determine whether two GEP operations perform the same underlying arithmetic. -bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1, - const GEPOperator *GEP2) { - unsigned AS = GEP1->getPointerAddressSpace(); - if (AS != GEP2->getPointerAddressSpace()) - return false; +// Read method declaration comments for more details. +int FunctionComparator::cmpGEP(const GEPOperator *GEPL, + const GEPOperator *GEPR) { + + unsigned int ASL = GEPL->getPointerAddressSpace(); + unsigned int ASR = GEPR->getPointerAddressSpace(); + if (int Res = cmpNumbers(ASL, ASR)) + return Res; + + // When we have target data, we can reduce the GEP down to the value in bytes + // added to the address. if (DL) { - // When we have target data, we can reduce the GEP down to the value in bytes - // added to the address. - unsigned BitWidth = DL ? DL->getPointerSizeInBits(AS) : 1; - APInt Offset1(BitWidth, 0), Offset2(BitWidth, 0); - if (GEP1->accumulateConstantOffset(*DL, Offset1) && - GEP2->accumulateConstantOffset(*DL, Offset2)) { - return Offset1 == Offset2; - } + unsigned BitWidth = DL->getPointerSizeInBits(ASL); + APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); + if (GEPL->accumulateConstantOffset(*DL, OffsetL) && + GEPR->accumulateConstantOffset(*DL, OffsetR)) + return cmpAPInt(OffsetL, OffsetR); } - if (GEP1->getPointerOperand()->getType() != - GEP2->getPointerOperand()->getType()) - return false; + if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(), + (uint64_t)GEPR->getPointerOperand()->getType())) + return Res; - if (GEP1->getNumOperands() != GEP2->getNumOperands()) - return false; + if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands())) + return Res; - for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) { - if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i))) - return false; + for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) { + if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i))) + return Res; } - return true; + return 0; } -// Compare two values used by the two functions under pair-wise comparison. If -// this is the first time the values are seen, they're added to the mapping so -// that we will detect mismatches on next use. -bool FunctionComparator::enumerate(const Value *V1, const Value *V2) { - // Check for function @f1 referring to itself and function @f2 referring to - // itself, or referring to each other, or both referring to either of them. - // They're all equivalent if the two functions are otherwise equivalent. - if (V1 == F1 && V2 == F2) - return true; - if (V1 == F2 && V2 == F1) - return true; +/// Compare two values used by the two functions under pair-wise comparison. If +/// this is the first time the values are seen, they're added to the mapping so +/// that we will detect mismatches on next use. +/// See comments in declaration for more details. +int FunctionComparator::cmpValues(const Value *L, const Value *R) { + // Catch self-reference case. + if (L == F1) { + if (R == F2) + return 0; + return -1; + } + if (R == F2) { + if (L == F1) + return 0; + return 1; + } - if (const Constant *C1 = dyn_cast(V1)) { - if (V1 == V2) return true; - const Constant *C2 = dyn_cast(V2); - if (!C2) return false; - // TODO: constant expressions with GEP or references to F1 or F2. - if (C1->isNullValue() && C2->isNullValue() && - isEquivalentType(C1->getType(), C2->getType())) - return true; - // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1 - // then they must have equal bit patterns. - return C1->getType()->canLosslesslyBitCastTo(C2->getType()) && - C1 == ConstantExpr::getBitCast(const_cast(C2), C1->getType()); - } - - if (isa(V1) || isa(V2)) - return V1 == V2; - - // Check that V1 maps to V2. If we find a value that V1 maps to then we simply - // check whether it's equal to V2. When there is no mapping then we need to - // ensure that V2 isn't already equivalent to something else. For this - // purpose, we track the V2 values in a set. - - const Value *&map_elem = id_map[V1]; - if (map_elem) - return map_elem == V2; - if (!seen_values.insert(V2).second) - return false; - map_elem = V2; - return true; -} + const Constant *ConstL = dyn_cast(L); + const Constant *ConstR = dyn_cast(R); + if (ConstL && ConstR) { + if (L == R) + return 0; + return cmpConstants(ConstL, ConstR); + } + + if (ConstL) + return 1; + if (ConstR) + return -1; + + const InlineAsm *InlineAsmL = dyn_cast(L); + const InlineAsm *InlineAsmR = dyn_cast(R); + + if (InlineAsmL && InlineAsmR) + return cmpNumbers((uint64_t)L, (uint64_t)R); + if (InlineAsmL) + return 1; + if (InlineAsmR) + return -1; + + auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())), + RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size())); + return cmpNumbers(LeftSN.first->second, RightSN.first->second); +} // Test whether two basic blocks have equivalent behaviour. bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) { BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end(); @@ -535,6 +1001,9 @@ bool FunctionComparator::compare() { // We need to recheck everything, but check the things that weren't included // in the hash first. + sn_mapL.clear(); + sn_mapR.clear(); + if (F1->getAttributes() != F2->getAttributes()) return false; @@ -683,7 +1152,7 @@ ModulePass *llvm::createMergeFunctionsPass() { bool MergeFunctions::runOnModule(Module &M) { bool Changed = false; DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) @@ -783,8 +1252,23 @@ void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) { // Helper for writeThunk, // Selects proper bitcast operation, // but a bit simpler then CastInst::getCastOpcode. -static Value* createCast(IRBuilder &Builder, Value *V, Type *DestTy) { +static Value *createCast(IRBuilder &Builder, Value *V, Type *DestTy) { Type *SrcTy = V->getType(); + if (SrcTy->isStructTy()) { + assert(DestTy->isStructTy()); + assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements()); + Value *Result = UndefValue::get(DestTy); + for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) { + Value *Element = createCast( + Builder, Builder.CreateExtractValue(V, ArrayRef(I)), + DestTy->getStructElementType(I)); + + Result = + Builder.CreateInsertValue(Result, Element, ArrayRef(I)); + } + return Result; + } + assert(!DestTy->isStructTy()); if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) return Builder.CreateIntToPtr(V, DestTy); else if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) @@ -843,9 +1327,9 @@ void MergeFunctions::writeThunk(Function *F, Function *G) { // Replace G with an alias to F and delete G. void MergeFunctions::writeAlias(Function *F, Function *G) { - Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType()); - GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "", - BitcastF, G->getParent()); + PointerType *PTy = G->getType(); + auto *GA = GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), + G->getLinkage(), "", F); F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); GA->takeName(G); GA->setVisibility(G->getVisibility()); diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index ac88aee..76d6dfa 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "partialinlining" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/CFG.h" @@ -24,6 +23,8 @@ #include "llvm/Transforms/Utils/CodeExtractor.h" using namespace llvm; +#define DEBUG_TYPE "partialinlining" + STATISTIC(NumPartialInlined, "Number of functions partially inlined"); namespace { @@ -52,10 +53,10 @@ Function* PartialInliner::unswitchFunction(Function* F) { BasicBlock* entryBlock = F->begin(); BranchInst *BR = dyn_cast(entryBlock->getTerminator()); if (!BR || BR->isUnconditional()) - return 0; + return nullptr; - BasicBlock* returnBlock = 0; - BasicBlock* nonReturnBlock = 0; + BasicBlock* returnBlock = nullptr; + BasicBlock* nonReturnBlock = nullptr; unsigned returnCount = 0; for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock); SI != SE; ++SI) @@ -66,7 +67,7 @@ Function* PartialInliner::unswitchFunction(Function* F) { nonReturnBlock = *SI; if (returnCount != 1) - return 0; + return nullptr; // Clone the function, so that we can hack away on it. ValueToValueMapTy VMap; diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 4a28b34..38e1b8e 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -56,8 +56,9 @@ RunLoopRerolling("reroll-loops", cl::Hidden, PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; - LibraryInfo = 0; - Inliner = 0; + LibraryInfo = nullptr; + Inliner = nullptr; + DisableTailCalls = false; DisableUnitAtATime = false; DisableUnrollLoops = false; BBVectorize = RunBBVectorization; @@ -128,7 +129,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { if (OptLevel == 0) { if (Inliner) { MPM.add(Inliner); - Inliner = 0; + Inliner = nullptr; } // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC @@ -156,6 +157,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createDeadArgEliminationPass()); // Dead argument elimination MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE + addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE } @@ -164,7 +166,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createPruneEHPass()); // Remove dead EH info if (Inliner) { MPM.add(Inliner); - Inliner = 0; + Inliner = nullptr; } if (!DisableUnitAtATime) MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs @@ -182,8 +184,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Combine silly seq's + addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createTailCallEliminationPass()); // Eliminate tail calls + if (!DisableTailCalls) + MPM.add(createTailCallEliminationPass()); // Eliminate tail calls MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions MPM.add(createLoopRotatePass()); // Rotate Loop @@ -206,6 +210,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); MPM.add(createDeadStoreEliminationPass()); // Delete dead stores @@ -220,6 +225,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { if (BBVectorize) { MPM.add(createBBVectorizePass()); MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); if (OptLevel > 1 && UseGVNAfterVectorization) MPM.add(createGVNPass()); // Remove redundancies else @@ -233,6 +239,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Clean up after everything. + addExtensionsToPM(EP_Peephole, MPM); // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC // pass manager that we are specifically trying to avoid. To prevent this @@ -245,6 +252,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { // as function calls, so that we can only pass them when the vectorizer // changed the code. MPM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); if (!DisableUnrollLoops) @@ -297,6 +305,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, // function pointers. When this happens, we often have to resolve varargs // calls, etc, so let instcombine do this. PM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, PM); // Inline small functions if (RunInliner) @@ -315,6 +324,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, // The IPO passes may leave cruft around. Clean up after them. PM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); // Break up allocas @@ -334,11 +344,17 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, // Nuke dead stores. PM.add(createDeadStoreEliminationPass()); - // More loops are countable try to vectorize them. + // More loops are countable; try to optimize them. + PM.add(createIndVarSimplifyPass()); + PM.add(createLoopDeletionPass()); PM.add(createLoopVectorizePass(true, true)); + // More scalar chains could be vectorized due to more alias information + PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + // Cleanup and simplify the code after the scalar optimizations. PM.add(createInstructionCombiningPass()); + addExtensionsToPM(EP_Peephole, PM); PM.add(createJumpThreadingPass()); diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index c61ec5e..b2c4a09 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "prune-eh" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -30,6 +29,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "prune-eh" + STATISTIC(NumRemoved, "Number of invokes removed"); STATISTIC(NumUnreach, "Number of noreturn calls optimized"); @@ -85,7 +86,7 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) { for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) { Function *F = (*I)->getFunction(); - if (F == 0) { + if (!F) { SCCMightUnwind = true; SCCMightReturn = true; } else if (F->isDeclaration() || F->mayBeOverridden()) { diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp index 1c6532d..956991a 100644 --- a/lib/Transforms/IPO/StripDeadPrototypes.cpp +++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp @@ -14,13 +14,14 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "strip-dead-prototypes" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" using namespace llvm; +#define DEBUG_TYPE "strip-dead-prototypes" + STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed"); namespace { diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 6d0be8f..1abbccc 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -192,7 +192,7 @@ static void StripTypeNames(Module &M, bool PreserveDbgInfo) { /// Find values that are marked as llvm.used. static void findUsedValues(GlobalVariable *LLVMUsed, SmallPtrSet &UsedValues) { - if (LLVMUsed == 0) return; + if (!LLVMUsed) return; UsedValues.insert(LLVMUsed); ConstantArray *Inits = cast(LLVMUsed->getInitializer()); diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 822e146..e04b1be 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -20,34 +20,38 @@ #include "llvm/Pass.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" +#define DEBUG_TYPE "instcombine" + namespace llvm { - class CallSite; - class DataLayout; - class TargetLibraryInfo; - class DbgDeclareInst; - class MemIntrinsic; - class MemSetInst; +class CallSite; +class DataLayout; +class TargetLibraryInfo; +class DbgDeclareInst; +class MemIntrinsic; +class MemSetInst; /// SelectPatternFlavor - We can match a variety of different patterns for /// select operations. enum SelectPatternFlavor { SPF_UNKNOWN = 0, - SPF_SMIN, SPF_UMIN, - SPF_SMAX, SPF_UMAX - //SPF_ABS - TODO. + SPF_SMIN, + SPF_UMIN, + SPF_SMAX, + SPF_UMAX + // SPF_ABS - TODO. }; /// getComplexity: Assign a complexity or rank value to LLVM Values... /// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst static inline unsigned getComplexity(Value *V) { if (isa(V)) { - if (BinaryOperator::isNeg(V) || - BinaryOperator::isFNeg(V) || + if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) || BinaryOperator::isNot(V)) return 3; return 4; } - if (isa(V)) return 3; + if (isa(V)) + return 3; return isa(V) ? (isa(V) ? 0 : 1) : 2; } @@ -60,18 +64,18 @@ static inline Constant *SubOne(Constant *C) { return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1)); } - /// InstCombineIRInserter - This is an IRBuilder insertion helper that works /// just like the normal insertion helper, but also adds any new instructions /// to the instcombine worklist. class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter : public IRBuilderDefaultInserter { InstCombineWorklist &Worklist; + public: InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {} - void InsertHelper(Instruction *I, const Twine &Name, - BasicBlock *BB, BasicBlock::iterator InsertPt) const { + void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, + BasicBlock::iterator InsertPt) const { IRBuilderDefaultInserter::InsertHelper(I, Name, BB, InsertPt); Worklist.Add(I); } @@ -79,13 +83,14 @@ public: /// InstCombiner - The -instcombine pass. class LLVM_LIBRARY_VISIBILITY InstCombiner - : public FunctionPass, - public InstVisitor { + : public FunctionPass, + public InstVisitor { const DataLayout *DL; TargetLibraryInfo *TLI; bool MadeIRChange; LibCallSimplifier *Simplifier; bool MinimizeSize; + public: /// Worklist - All of the instructions that need to be simplified. InstCombineWorklist Worklist; @@ -96,7 +101,7 @@ public: BuilderTy *Builder; static char ID; // Pass identification, replacement for typeid - InstCombiner() : FunctionPass(ID), DL(0), Builder(0) { + InstCombiner() : FunctionPass(ID), DL(nullptr), Builder(nullptr) { MinimizeSize = false; initializeInstCombinerPass(*PassRegistry::getPassRegistry()); } @@ -144,9 +149,9 @@ public: Instruction *visitAnd(BinaryOperator &I); Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS); Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS); - Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, - Value *A, Value *B, Value *C); - Instruction *visitOr (BinaryOperator &I); + Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, + Value *B, Value *C); + Instruction *visitOr(BinaryOperator &I); Instruction *visitXor(BinaryOperator &I); Instruction *visitShl(BinaryOperator &I); Instruction *visitAShr(BinaryOperator &I); @@ -156,12 +161,11 @@ public: Constant *RHSC); Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, - ConstantInt *AndCst = 0); + ConstantInt *AndCst = nullptr); Instruction *visitFCmpInst(FCmpInst &I); Instruction *visitICmpInst(ICmpInst &I); Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI); - Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, - Instruction *LHS, + Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Instruction *LHS, ConstantInt *RHS); Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, ConstantInt *DivRHS); @@ -171,7 +175,7 @@ public: ICmpInst::Predicate Pred); Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, ICmpInst::Predicate Cond, Instruction &I); - Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1, + Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1, BinaryOperator &I); Instruction *commonCastTransforms(CastInst &CI); Instruction *commonPointerCastTransforms(CastInst &CI); @@ -188,9 +192,8 @@ public: Instruction *visitIntToPtr(IntToPtrInst &CI); Instruction *visitBitCast(BitCastInst &CI); Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI); - Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, - Instruction *FI); - Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*); + Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, Instruction *FI); + Instruction *FoldSelectIntoOp(SelectInst &SI, Value *, Value *); Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1, Value *A, Value *B, Instruction &Outer, SelectPatternFlavor SPF2, Value *C); @@ -209,6 +212,7 @@ public: Instruction *visitStoreInst(StoreInst &SI); Instruction *visitBranchInst(BranchInst &BI); Instruction *visitSwitchInst(SwitchInst &SI); + Instruction *visitInsertValueInst(InsertValueInst &IV); Instruction *visitInsertElementInst(InsertElementInst &IE); Instruction *visitExtractElementInst(ExtractElementInst &EI); Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI); @@ -216,21 +220,21 @@ public: Instruction *visitLandingPadInst(LandingPadInst &LI); // visitInstruction - Specify what to return for unhandled instructions... - Instruction *visitInstruction(Instruction &I) { return 0; } + Instruction *visitInstruction(Instruction &I) { return nullptr; } private: bool ShouldChangeType(Type *From, Type *To) const; Value *dyn_castNegVal(Value *V) const; - Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const; + Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const; Type *FindElementAtOffset(Type *PtrTy, int64_t Offset, - SmallVectorImpl &NewIndices); + SmallVectorImpl &NewIndices); Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI); /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually /// results in any code being generated and is interesting to optimize out. If /// the cast can be eliminated by some other simple transformation, we prefer /// to do the simplification first. - bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V, + bool ShouldOptimizeCast(Instruction::CastOps opcode, const Value *V, Type *Ty); Instruction *visitCallSite(CallSite CS); @@ -251,10 +255,10 @@ public: // in the program. Add the new instruction to the worklist. // Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) { - assert(New && New->getParent() == 0 && + assert(New && !New->getParent() && "New instruction already inserted into a basic block!"); BasicBlock *BB = Old.getParent(); - BB->getInstList().insert(&Old, New); // Insert inst + BB->getInstList().insert(&Old, New); // Insert inst Worklist.Add(New); return New; } @@ -274,7 +278,7 @@ public: // modified. // Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) { - Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. + Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist. // If we are replacing the instruction with itself, this must be in a // segment of unreachable code, so just clobber the instruction. @@ -306,12 +310,12 @@ public: Worklist.Remove(&I); I.eraseFromParent(); MadeIRChange = true; - return 0; // Don't do anything with FI + return nullptr; // Don't do anything with FI } - void ComputeMaskedBits(Value *V, APInt &KnownZero, - APInt &KnownOne, unsigned Depth = 0) const { - return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, DL, Depth); + void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, + unsigned Depth = 0) const { + return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth); } bool MaskedValueIsZero(Value *V, const APInt &Mask, @@ -323,7 +327,6 @@ public: } private: - /// SimplifyAssociativeOrCommutative - This performs a few simplifications for /// operators which are associative or commutative. bool SimplifyAssociativeOrCommutative(BinaryOperator &I); @@ -337,12 +340,10 @@ private: /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value /// based on the demanded bits. - Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, - APInt& KnownZero, APInt& KnownOne, - unsigned Depth); - bool SimplifyDemandedBits(Use &U, APInt DemandedMask, - APInt& KnownZero, APInt& KnownOne, - unsigned Depth=0); + Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero, + APInt &KnownOne, unsigned Depth); + bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero, + APInt &KnownOne, unsigned Depth = 0); /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence. Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl, @@ -355,7 +356,9 @@ private: bool SimplifyDemandedInstructionBits(Instruction &Inst); Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, - APInt& UndefElts, unsigned Depth = 0); + APInt &UndefElts, unsigned Depth = 0); + + Value *SimplifyVectorOp(BinaryOperator &Inst); // FoldOpIntoPhi - Given a binary operator, cast instruction, or select // which has a PHI node as operand #0, see if we can fold the instruction @@ -372,21 +375,19 @@ private: Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN); Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN); - Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS, ConstantInt *AndRHS, BinaryOperator &TheAnd); Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask, bool isSub, Instruction &I); - Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, - bool isSigned, bool Inside); + Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, bool isSigned, + bool Inside); Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI); Instruction *MatchBSwap(BinaryOperator &I); bool SimplifyStoreAtEndOfBlock(StoreInst &SI); Instruction *SimplifyMemTransfer(MemIntrinsic *MI); Instruction *SimplifyMemSet(MemSetInst *MI); - Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned); /// Descale - Return a value X such that Val = X * Scale, or null if none. If @@ -394,8 +395,8 @@ private: Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap); }; - - } // end namespace llvm. +#undef DEBUG_TYPE + #endif diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 97910c7..c37a9cf 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -20,6 +20,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + namespace { /// Class representing coefficient of floating-point addend. @@ -112,12 +114,12 @@ namespace { /// class FAddend { public: - FAddend() { Val = 0; } + FAddend() { Val = nullptr; } Value *getSymVal (void) const { return Val; } const FAddendCoef &getCoef(void) const { return Coeff; } - bool isConstant() const { return Val == 0; } + bool isConstant() const { return Val == nullptr; } bool isZero() const { return Coeff.isZero(); } void set(short Coefficient, Value *V) { Coeff.set(Coefficient), Val = V; } @@ -154,7 +156,7 @@ namespace { /// class FAddCombine { public: - FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {} + FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(nullptr) {} Value *simplify(Instruction *FAdd); private: @@ -348,8 +350,8 @@ Value *FAddendCoef::getValue(Type *Ty) const { // unsigned FAddend::drillValueDownOneStep (Value *Val, FAddend &Addend0, FAddend &Addend1) { - Instruction *I = 0; - if (Val == 0 || !(I = dyn_cast(Val))) + Instruction *I = nullptr; + if (!Val || !(I = dyn_cast(Val))) return 0; unsigned Opcode = I->getOpcode(); @@ -359,16 +361,16 @@ unsigned FAddend::drillValueDownOneStep Value *Opnd0 = I->getOperand(0); Value *Opnd1 = I->getOperand(1); if ((C0 = dyn_cast(Opnd0)) && C0->isZero()) - Opnd0 = 0; + Opnd0 = nullptr; if ((C1 = dyn_cast(Opnd1)) && C1->isZero()) - Opnd1 = 0; + Opnd1 = nullptr; if (Opnd0) { if (!C0) Addend0.set(1, Opnd0); else - Addend0.set(C0, 0); + Addend0.set(C0, nullptr); } if (Opnd1) { @@ -376,7 +378,7 @@ unsigned FAddend::drillValueDownOneStep if (!C1) Addend.set(1, Opnd1); else - Addend.set(C1, 0); + Addend.set(C1, nullptr); if (Opcode == Instruction::FSub) Addend.negate(); } @@ -385,7 +387,7 @@ unsigned FAddend::drillValueDownOneStep return Opnd0 && Opnd1 ? 2 : 1; // Both operands are zero. Weird! - Addend0.set(APFloat(C0->getValueAPF().getSemantics()), 0); + Addend0.set(APFloat(C0->getValueAPF().getSemantics()), nullptr); return 1; } @@ -443,13 +445,13 @@ Value *FAddCombine::performFactorization(Instruction *I) { Instruction *I1 = dyn_cast(I->getOperand(1)); if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) - return 0; + return nullptr; bool isMpy = false; if (I0->getOpcode() == Instruction::FMul) isMpy = true; else if (I0->getOpcode() != Instruction::FDiv) - return 0; + return nullptr; Value *Opnd0_0 = I0->getOperand(0); Value *Opnd0_1 = I0->getOperand(1); @@ -461,8 +463,8 @@ Value *FAddCombine::performFactorization(Instruction *I) { // (x*y) +/- (x*z) x y z // (y/x) +/- (z/x) x y z // - Value *Factor = 0; - Value *AddSub0 = 0, *AddSub1 = 0; + Value *Factor = nullptr; + Value *AddSub0 = nullptr, *AddSub1 = nullptr; if (isMpy) { if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) @@ -481,7 +483,7 @@ Value *FAddCombine::performFactorization(Instruction *I) { } if (!Factor) - return 0; + return nullptr; FastMathFlags Flags; Flags.setUnsafeAlgebra(); @@ -495,7 +497,7 @@ Value *FAddCombine::performFactorization(Instruction *I) { if (ConstantFP *CFP = dyn_cast(NewAddSub)) { const APFloat &F = CFP->getValueAPF(); if (!F.isNormal()) - return 0; + return nullptr; } else if (Instruction *II = dyn_cast(NewAddSub)) II->setFastMathFlags(Flags); @@ -517,7 +519,7 @@ Value *FAddCombine::simplify(Instruction *I) { // Currently we are not able to handle vector type. if (I->getType()->isVectorTy()) - return 0; + return nullptr; assert((I->getOpcode() == Instruction::FAdd || I->getOpcode() == Instruction::FSub) && "Expect add/sub"); @@ -568,7 +570,7 @@ Value *FAddCombine::simplify(Instruction *I) { // been optimized into "I = Y - X" in the previous steps. // const FAddendCoef &CE = Opnd0.getCoef(); - return CE.isOne() ? Opnd0.getSymVal() : 0; + return CE.isOne() ? Opnd0.getSymVal() : nullptr; } // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1] @@ -614,7 +616,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { // constant close to supper-expr(s) will potentially reveal some optimization // opportunities in super-expr(s). // - const FAddend *ConstAdd = 0; + const FAddend *ConstAdd = nullptr; // Simplified addends are placed . AddendVect SimpVect; @@ -647,7 +649,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { if (T && T->getSymVal() == Val) { // Set null such that next iteration of the outer loop will not process // this addend again. - Addends[SameSymIdx] = 0; + Addends[SameSymIdx] = nullptr; SimpVect.push_back(T); } } @@ -661,7 +663,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { // Pop all addends being folded and push the resulting folded addend. SimpVect.resize(StartIdx); - if (Val != 0) { + if (Val) { if (!R.isZero()) { SimpVect.push_back(&R); } @@ -698,7 +700,7 @@ Value *FAddCombine::createNaryFAdd // unsigned InstrNeeded = calcInstrNumber(Opnds); if (InstrNeeded > InstrQuota) - return 0; + return nullptr; initCreateInstNum(); @@ -710,7 +712,7 @@ Value *FAddCombine::createNaryFAdd // N-ary addition has at most two instructions, and we don't need to worry // about tree-height when constructing the N-ary addition. - Value *LastVal = 0; + Value *LastVal = nullptr; bool LastValNeedNeg = false; // Iterate the addends, creating fadd/fsub using adjacent two addends. @@ -870,10 +872,10 @@ Value *FAddCombine::createAddendVal // static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) { if (!V->hasOneUse() || !V->getType()->isIntOrIntVectorTy()) - return 0; + return nullptr; Instruction *I = dyn_cast(V); - if (I == 0) return 0; + if (!I) return nullptr; if (I->getOpcode() == Instruction::Mul) if ((CST = dyn_cast(I->getOperand(1)))) @@ -884,7 +886,7 @@ static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) { CST = ConstantExpr::getShl(ConstantInt::get(V->getType(), 1), CST); return I->getOperand(0); } - return 0; + return nullptr; } @@ -918,6 +920,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), DL)) return ReplaceInstUsesWith(I, V); @@ -942,7 +947,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (ZI->getSrcTy()->isIntegerTy(1)) return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); - Value *XorLHS = 0; ConstantInt *XorRHS = 0; + Value *XorLHS = nullptr; ConstantInt *XorRHS = nullptr; if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); const APInt &RHSVal = CI->getValue(); @@ -974,7 +979,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { IntegerType *IT = cast(I.getType()); APInt LHSKnownOne(IT->getBitWidth(), 0); APInt LHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(XorLHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(XorLHS, LHSKnownZero, LHSKnownOne); if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue()) return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), XorLHS); @@ -1042,11 +1047,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (IntegerType *IT = dyn_cast(I.getType())) { APInt LHSKnownOne(IT->getBitWidth(), 0); APInt LHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); if (LHSKnownZero != 0) { APInt RHSKnownOne(IT->getBitWidth(), 0); APInt RHSKnownZero(IT->getBitWidth(), 0); - ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); // No bits in common -> bitwise or. if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) @@ -1174,7 +1179,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // Check for (x & y) + (x ^ y) { - Value *A = 0, *B = 0; + Value *A = nullptr, *B = nullptr; if (match(RHS, m_Xor(m_Value(A), m_Value(B))) && (match(LHS, m_And(m_Specific(A), m_Specific(B))) || match(LHS, m_And(m_Specific(B), m_Specific(A))))) @@ -1186,13 +1191,16 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return BinaryOperator::CreateOr(A, B); } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL)) return ReplaceInstUsesWith(I, V); @@ -1266,7 +1274,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) && match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) { if (C1 == C2) { - Constant *Z1=0, *Z2=0; + Constant *Z1=nullptr, *Z2=nullptr; Value *A, *B, *C=C1; if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) { Z1 = dyn_cast(A1); A = A2; @@ -1290,7 +1298,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { return ReplaceInstUsesWith(I, V); } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } @@ -1305,7 +1313,7 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize // this. bool Swapped = false; - GEPOperator *GEP1 = 0, *GEP2 = 0; + GEPOperator *GEP1 = nullptr, *GEP2 = nullptr; // For now we require one side to be the base pointer "A" or a constant // GEP derived from it. @@ -1343,9 +1351,9 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, // Avoid duplicating the arithmetic if GEP2 has non-constant indices and // multiple users. - if (GEP1 == 0 || - (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse())) - return 0; + if (!GEP1 || + (GEP2 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse())) + return nullptr; // Emit the offset of the GEP and an intptr_t. Value *Result = EmitGEPOffset(GEP1); @@ -1368,6 +1376,9 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, Instruction *InstCombiner::visitSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), DL)) return ReplaceInstUsesWith(I, V); @@ -1393,7 +1404,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (Constant *C = dyn_cast(Op0)) { // C - ~X == X + (1+C) - Value *X = 0; + Value *X = nullptr; if (match(Op1, m_Not(m_Value(X)))) return BinaryOperator::CreateAdd(X, AddOne(C)); @@ -1451,9 +1462,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { } if (Op1->hasOneUse()) { - Value *X = 0, *Y = 0, *Z = 0; - Constant *C = 0; - Constant *CI = 0; + Value *X = nullptr, *Y = nullptr, *Z = nullptr; + Constant *C = nullptr; + Constant *CI = nullptr; // (X - (Y - Z)) --> (X + (Z - Y)). if (match(Op1, m_Sub(m_Value(Y), m_Value(Z)))) @@ -1532,12 +1543,15 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return ReplaceInstUsesWith(I, Res); } - return 0; + return nullptr; } Instruction *InstCombiner::visitFSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL)) return ReplaceInstUsesWith(I, V); @@ -1574,5 +1588,5 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { return ReplaceInstUsesWith(I, V); } - return 0; + return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 2c1bfc7..4f5d65a 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -20,6 +20,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + /// isFreeToInvert - Return true if the specified value is free to invert (apply /// ~ to). This happens in cases where the ~ can be eliminated. static inline bool isFreeToInvert(Value *V) { @@ -50,7 +52,7 @@ static inline Value *dyn_castNotVal(Value *V) { // Constants can be considered to be not'ed values... if (ConstantInt *C = dyn_cast(V)) return ConstantInt::get(C->getType(), ~C->getValue()); - return 0; + return nullptr; } /// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp @@ -123,7 +125,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, ConstantInt *AndRHS, BinaryOperator &TheAnd) { Value *X = Op->getOperand(0); - Constant *Together = 0; + Constant *Together = nullptr; if (!Op->isShift()) Together = ConstantExpr::getAnd(AndRHS, OpRHS); @@ -250,7 +252,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, } break; } - return 0; + return nullptr; } /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise @@ -332,12 +334,12 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, Instruction &I) { Instruction *LHSI = dyn_cast(LHS); if (!LHSI || LHSI->getNumOperands() != 2 || - !isa(LHSI->getOperand(1))) return 0; + !isa(LHSI->getOperand(1))) return nullptr; ConstantInt *N = cast(LHSI->getOperand(1)); switch (LHSI->getOpcode()) { - default: return 0; + default: return nullptr; case Instruction::And: if (ConstantExpr::getAnd(N, Mask) == Mask) { // If the AndRHS is a power of two minus one (0+1+), this is simple. @@ -357,7 +359,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, break; } } - return 0; + return nullptr; case Instruction::Or: case Instruction::Xor: // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0 @@ -365,7 +367,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth() && ConstantExpr::getAnd(N, Mask)->isNullValue()) break; - return 0; + return nullptr; } if (isSub) @@ -418,12 +420,12 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, ConstantInt *BCst = dyn_cast(B); ConstantInt *CCst = dyn_cast(C); bool icmp_eq = (SCC == ICmpInst::ICMP_EQ); - bool icmp_abit = (ACst != 0 && !ACst->isZero() && + bool icmp_abit = (ACst && !ACst->isZero() && ACst->getValue().isPowerOf2()); - bool icmp_bbit = (BCst != 0 && !BCst->isZero() && + bool icmp_bbit = (BCst && !BCst->isZero() && BCst->getValue().isPowerOf2()); unsigned result = 0; - if (CCst != 0 && CCst->isZero()) { + if (CCst && CCst->isZero()) { // if C is zero, then both A and B qualify as mask result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes | FoldMskICmp_Mask_AllZeroes | @@ -455,7 +457,7 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, FoldMskICmp_AMask_NotMixed) : (FoldMskICmp_Mask_AllZeroes | FoldMskICmp_AMask_Mixed)); - } else if (ACst != 0 && CCst != 0 && + } else if (ACst && CCst && ConstantExpr::getAnd(ACst, CCst) == CCst) { result |= (icmp_eq ? FoldMskICmp_AMask_Mixed : FoldMskICmp_AMask_NotMixed); @@ -470,7 +472,7 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, FoldMskICmp_BMask_NotMixed) : (FoldMskICmp_Mask_AllZeroes | FoldMskICmp_BMask_Mixed)); - } else if (BCst != 0 && CCst != 0 && + } else if (BCst && CCst && ConstantExpr::getAnd(BCst, CCst) == CCst) { result |= (icmp_eq ? FoldMskICmp_BMask_Mixed : FoldMskICmp_BMask_NotMixed); @@ -570,12 +572,12 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, Value *L11,*L12,*L21,*L22; // Check whether the icmp can be decomposed into a bit test. if (decomposeBitTestICmp(LHS, LHSCC, L11, L12, L2)) { - L21 = L22 = L1 = 0; + L21 = L22 = L1 = nullptr; } else { // Look for ANDs in the LHS icmp. if (!L1->getType()->isIntegerTy()) { // You can icmp pointers, for example. They really aren't masks. - L11 = L12 = 0; + L11 = L12 = nullptr; } else if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) { // Any icmp can be viewed as being trivially masked; if it allows us to // remove one, it's worth it. @@ -585,7 +587,7 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, if (!L2->getType()->isIntegerTy()) { // You can icmp pointers, for example. They really aren't masks. - L21 = L22 = 0; + L21 = L22 = nullptr; } else if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) { L21 = L2; L22 = Constant::getAllOnesValue(L2->getType()); @@ -608,7 +610,7 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, } else { return 0; } - E = R2; R1 = 0; ok = true; + E = R2; R1 = nullptr; ok = true; } else if (R1->getType()->isIntegerTy()) { if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) { // As before, model no mask as a trivial mask if it'll let us do an @@ -665,11 +667,11 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, /// into a single (icmp(A & X) ==/!= Y) static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, llvm::InstCombiner::BuilderTy* Builder) { - Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0; + Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr; ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate(); unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS, LHSCC, RHSCC); - if (mask == 0) return 0; + if (mask == 0) return nullptr; assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) && "foldLogOpOfMaskedICmpsHelper must return an equality predicate."); @@ -722,9 +724,9 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, // their actual values. This isn't strictly, necessary, just a "handle the // easy cases for now" decision. ConstantInt *BCst = dyn_cast(B); - if (BCst == 0) return 0; + if (!BCst) return nullptr; ConstantInt *DCst = dyn_cast(D); - if (DCst == 0) return 0; + if (!DCst) return nullptr; if (mask & (FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_BMask_NotAllOnes)) { // (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and @@ -763,11 +765,11 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, // (icmp ne (A & B), B) & (icmp eq (A & D), D) // with B and D, having a single bit set ConstantInt *CCst = dyn_cast(C); - if (CCst == 0) return 0; + if (!CCst) return nullptr; if (LHSCC != NEWCC) CCst = dyn_cast( ConstantExpr::getXor(BCst, CCst) ); ConstantInt *ECst = dyn_cast(E); - if (ECst == 0) return 0; + if (!ECst) return nullptr; if (RHSCC != NEWCC) ECst = dyn_cast( ConstantExpr::getXor(DCst, ECst) ); ConstantInt* MCst = dyn_cast( @@ -776,13 +778,13 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd, // if there is a conflict we should actually return a false for the // whole construct if (!MCst->isZero()) - return 0; + return nullptr; Value *newOr1 = Builder->CreateOr(B, D); Value *newOr2 = ConstantExpr::getOr(CCst, ECst); Value *newAnd = Builder->CreateAnd(A, newOr1); return Builder->CreateICmp(NEWCC, newAnd, newOr2); } - return 0; + return nullptr; } /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. @@ -811,7 +813,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0); ConstantInt *LHSCst = dyn_cast(LHS->getOperand(1)); ConstantInt *RHSCst = dyn_cast(RHS->getOperand(1)); - if (LHSCst == 0 || RHSCst == 0) return 0; + if (!LHSCst || !RHSCst) return nullptr; if (LHSCst == RHSCst && LHSCC == RHSCC) { // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) @@ -835,7 +837,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { if (LHSCC == ICmpInst::ICMP_EQ && LHSCC == RHSCC && LHS->hasOneUse() && RHS->hasOneUse()) { Value *V; - ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0; + ConstantInt *AndCst, *SmallCst = nullptr, *BigCst = nullptr; // (trunc x) == C1 & (and x, CA) == C2 // (and x, CA) == C2 & (trunc x) == C1 @@ -866,14 +868,14 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // From here on, we only handle: // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler. - if (Val != Val2) return 0; + if (Val != Val2) return nullptr; // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) - return 0; + return nullptr; // Make a constant range that's the intersection of the two icmp ranges. // If the intersection is empty, we know that the result is false. @@ -887,7 +889,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // We can't fold (ugt x, C) & (sgt x, C2). if (!PredicatesFoldable(LHSCC, RHSCC)) - return 0; + return nullptr; // Ensure that the larger constant is on the RHS. bool ShouldSwap; @@ -1016,7 +1018,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { break; } - return 0; + return nullptr; } /// FoldAndOfFCmps - Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of @@ -1026,7 +1028,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { if (LHS->getPredicate() == FCmpInst::FCMP_ORD && RHS->getPredicate() == FCmpInst::FCMP_ORD) { if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) - return 0; + return nullptr; // (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y) if (ConstantFP *LHSC = dyn_cast(LHS->getOperand(1))) @@ -1043,7 +1045,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { if (isa(LHS->getOperand(1)) && isa(RHS->getOperand(1))) return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0)); - return 0; + return nullptr; } Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); @@ -1096,7 +1098,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { } } - return 0; + return nullptr; } @@ -1104,6 +1106,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyAndInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1198,7 +1203,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { // If this is an integer truncation, and if the source is an 'and' with // immediate, transform it. This frequently occurs for bitfield accesses. { - Value *X = 0; ConstantInt *YC = 0; + Value *X = nullptr; ConstantInt *YC = nullptr; if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) { // Change: and (trunc (and X, YC) to T), C2 // into : and (trunc X to T), trunc(YC) & C2 @@ -1231,7 +1236,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } { - Value *A = 0, *B = 0, *C = 0, *D = 0; + Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; // (A|B) & ~(A&B) -> A^B if (match(Op0, m_Or(m_Value(A), m_Value(B))) && match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) && @@ -1339,7 +1344,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { } { - Value *X = 0; + Value *X = nullptr; bool OpsSwapped = false; // Canonicalize SExt or Not to the LHS if (match(Op1, m_SExt(m_Value())) || @@ -1366,7 +1371,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { std::swap(Op0, Op1); } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } /// CollectBSwapParts - Analyze the specified subexpression and see if it is @@ -1498,7 +1503,7 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { if (!ITy || ITy->getBitWidth() % 16 || // ByteMask only allows up to 32-byte values. ITy->getBitWidth() > 32*8) - return 0; // Can only bswap pairs of bytes. Can't do vectors. + return nullptr; // Can only bswap pairs of bytes. Can't do vectors. /// ByteValues - For each byte of the result, we keep track of which value /// defines each byte. @@ -1508,16 +1513,16 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { // Try to find all the pieces corresponding to the bswap. uint32_t ByteMask = ~0U >> (32-ByteValues.size()); if (CollectBSwapParts(&I, 0, ByteMask, ByteValues)) - return 0; + return nullptr; // Check to see if all of the bytes come from the same value. Value *V = ByteValues[0]; - if (V == 0) return 0; // Didn't find a byte? Must be zero. + if (!V) return nullptr; // Didn't find a byte? Must be zero. // Check to make sure that all of the bytes come from the same value. for (unsigned i = 1, e = ByteValues.size(); i != e; ++i) if (ByteValues[i] != V) - return 0; + return nullptr; Module *M = I.getParent()->getParent()->getParent(); Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy); return CallInst::Create(F, V); @@ -1529,10 +1534,10 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) { static Instruction *MatchSelectFromAndOr(Value *A, Value *B, Value *C, Value *D) { // If A is not a select of -1/0, this cannot match. - Value *Cond = 0; + Value *Cond = nullptr; if (!match(A, m_SExt(m_Value(Cond))) || !Cond->getType()->isIntegerTy(1)) - return 0; + return nullptr; // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B. if (match(D, m_Not(m_SExt(m_Specific(Cond))))) @@ -1545,7 +1550,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B, return SelectInst::Create(Cond, C, D); if (match(B, m_SExt(m_Not(m_Specific(Cond))))) return SelectInst::Create(Cond, C, D); - return 0; + return nullptr; } /// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. @@ -1566,8 +1571,8 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { LAnd->getOpcode() == Instruction::And && RAnd->getOpcode() == Instruction::And) { - Value *Mask = 0; - Value *Masked = 0; + Value *Mask = nullptr; + Value *Masked = nullptr; if (LAnd->getOperand(0) == RAnd->getOperand(0) && isKnownToBeAPowerOfTwo(LAnd->getOperand(1)) && isKnownToBeAPowerOfTwo(RAnd->getOperand(1))) { @@ -1608,7 +1613,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { if (LHS->hasOneUse() || RHS->hasOneUse()) { // (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1) // (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1) - Value *A = 0, *B = 0; + Value *A = nullptr, *B = nullptr; if (LHSCC == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero()) { B = Val; if (RHSCC == ICmpInst::ICMP_ULT && Val == RHS->getOperand(1)) @@ -1632,7 +1637,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). - if (LHSCst == 0 || RHSCst == 0) return 0; + if (!LHSCst || !RHSCst) return nullptr; if (LHSCst == RHSCst && LHSCC == RHSCC) { // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) @@ -1653,18 +1658,18 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { // From here on, we only handle: // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. - if (Val != Val2) return 0; + if (Val != Val2) return nullptr; // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere. if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE || RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE || LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) - return 0; + return nullptr; // We can't fold (ugt x, C) | (sgt x, C2). if (!PredicatesFoldable(LHSCC, RHSCC)) - return 0; + return nullptr; // Ensure that the larger constant is on the RHS. bool ShouldSwap; @@ -1809,7 +1814,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { } break; } - return 0; + return nullptr; } /// FoldOrOfFCmps - Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of @@ -1837,7 +1842,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { isa(RHS->getOperand(1))) return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0)); - return 0; + return nullptr; } Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1); @@ -1869,7 +1874,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { return getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS, Builder); } } - return 0; + return nullptr; } /// FoldOrWithConstants - This helper function folds: @@ -1884,27 +1889,30 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) { Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A, Value *B, Value *C) { ConstantInt *CI1 = dyn_cast(C); - if (!CI1) return 0; + if (!CI1) return nullptr; - Value *V1 = 0; - ConstantInt *CI2 = 0; - if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0; + Value *V1 = nullptr; + ConstantInt *CI2 = nullptr; + if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return nullptr; APInt Xor = CI1->getValue() ^ CI2->getValue(); - if (!Xor.isAllOnesValue()) return 0; + if (!Xor.isAllOnesValue()) return nullptr; if (V1 == A || V1 == B) { Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1); return BinaryOperator::CreateOr(NewOp, V1); } - return 0; + return nullptr; } Instruction *InstCombiner::visitOr(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyOrInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1918,7 +1926,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return &I; if (ConstantInt *RHS = dyn_cast(Op1)) { - ConstantInt *C1 = 0; Value *X = 0; + ConstantInt *C1 = nullptr; Value *X = nullptr; // (X & C1) | C2 --> (X | C2) & (C1|C2) // iff (C1 & C2) == 0. if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) && @@ -1949,8 +1957,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return NV; } - Value *A = 0, *B = 0; - ConstantInt *C1 = 0, *C2 = 0; + Value *A = nullptr, *B = nullptr; + ConstantInt *C1 = nullptr, *C2 = nullptr; // (A | B) | C and A | (B | C) -> bswap if possible. // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. @@ -1981,10 +1989,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } // (A & C)|(B & D) - Value *C = 0, *D = 0; + Value *C = nullptr, *D = nullptr; if (match(Op0, m_And(m_Value(A), m_Value(C))) && match(Op1, m_And(m_Value(B), m_Value(D)))) { - Value *V1 = 0, *V2 = 0; + Value *V1 = nullptr, *V2 = nullptr; C1 = dyn_cast(C); C2 = dyn_cast(D); if (C1 && C2) { // (A & C1)|(B & C2) @@ -2028,7 +2036,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2) // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0. - ConstantInt *C3 = 0, *C4 = 0; + ConstantInt *C3 = nullptr, *C4 = nullptr; if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) && (C3->getValue() & ~C1->getValue()) == 0 && match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) && @@ -2220,7 +2228,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { // Since this OR statement hasn't been optimized further yet, we hope // that this transformation will allow the new ORs to be optimized. { - Value *X = 0, *Y = 0; + Value *X = nullptr, *Y = nullptr; if (Op0->hasOneUse() && Op1->hasOneUse() && match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) && match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) { @@ -2230,13 +2238,16 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } Instruction *InstCombiner::visitXor(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyXorInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -2494,5 +2505,5 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { } } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 0bc3ac7..d4b583b 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -22,6 +22,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + STATISTIC(NumSimplified, "Number of library calls simplified"); /// getPromotedType - Return the specified type promoted as it would be to pass @@ -70,7 +72,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with // load/store. ConstantInt *MemOpLength = dyn_cast(MI->getArgOperand(2)); - if (MemOpLength == 0) return 0; + if (!MemOpLength) return nullptr; // Source and destination pointer types are always "i8*" for intrinsic. See // if the size is something we can handle with a single primitive load/store. @@ -80,7 +82,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { assert(Size && "0-sized memory transferring should be removed already."); if (Size > 8 || (Size&(Size-1))) - return 0; // If not 1/2/4/8 bytes, exit. + return nullptr; // If not 1/2/4/8 bytes, exit. // Use an integer load+store unless we can find something better. unsigned SrcAddrSp = @@ -99,7 +101,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // dest address will be promotable. See if we can find a better type than the // integer datatype. Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts(); - MDNode *CopyMD = 0; + MDNode *CopyMD = nullptr; if (StrippedDest != MI->getArgOperand(0)) { Type *SrcETy = cast(StrippedDest->getType()) ->getElementType(); @@ -163,7 +165,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { ConstantInt *LenC = dyn_cast(MI->getLength()); ConstantInt *FillC = dyn_cast(MI->getValue()); if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) - return 0; + return nullptr; uint64_t Len = LenC->getLimitedValue(); Alignment = MI->getAlignment(); assert(Len && "0-sized memory setting should be removed already."); @@ -191,7 +193,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { return MI; } - return 0; + return nullptr; } /// visitCallInst - CallInst simplification. This mostly only handles folding @@ -233,7 +235,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // No other transformations apply to volatile transfers. if (MI->isVolatile()) - return 0; + return nullptr; // If we have a memmove and the source operation is a constant global, // then the source and dest pointers can't alias, so we can change this @@ -276,11 +278,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint64_t Size; if (getObjectSize(II->getArgOperand(0), Size, DL, TLI)) return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size)); - return 0; + return nullptr; } case Intrinsic::bswap: { Value *IIOperand = II->getArgOperand(0); - Value *X = 0; + Value *X = nullptr; // bswap(bswap(x)) -> x if (match(IIOperand, m_BSwap(m_Value(X)))) @@ -320,7 +322,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne); + computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne); unsigned TrailingZeros = KnownOne.countTrailingZeros(); APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros)); if ((Mask & KnownZero) == Mask) @@ -338,7 +340,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne); + computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne); unsigned LeadingZeros = KnownOne.countLeadingZeros(); APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros)); if ((Mask & KnownZero) == Mask) @@ -353,14 +355,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { uint32_t BitWidth = IT->getBitWidth(); APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; if (LHSKnownNegative || LHSKnownPositive) { APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; if (LHSKnownNegative && RHSKnownNegative) { @@ -447,10 +449,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { APInt LHSKnownZero(BitWidth, 0); APInt LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); APInt RHSKnownZero(BitWidth, 0); APInt RHSKnownOne(BitWidth, 0); - ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); // Get the largest possible values for each operand. APInt LHSMax = ~LHSKnownZero; @@ -554,6 +556,79 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + // Constant fold << Ci. + // FIXME: We don't handle _dq because it's a shift of an i128, but is + // represented in the IR as <2 x i64>. A per element shift is wrong. + case Intrinsic::x86_sse2_psll_d: + case Intrinsic::x86_sse2_psll_q: + case Intrinsic::x86_sse2_psll_w: + case Intrinsic::x86_sse2_pslli_d: + case Intrinsic::x86_sse2_pslli_q: + case Intrinsic::x86_sse2_pslli_w: + case Intrinsic::x86_avx2_psll_d: + case Intrinsic::x86_avx2_psll_q: + case Intrinsic::x86_avx2_psll_w: + case Intrinsic::x86_avx2_pslli_d: + case Intrinsic::x86_avx2_pslli_q: + case Intrinsic::x86_avx2_pslli_w: + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: { + // Simplify if count is constant. To 0 if >= BitWidth, + // otherwise to shl/lshr. + auto CDV = dyn_cast(II->getArgOperand(1)); + auto CInt = dyn_cast(II->getArgOperand(1)); + if (!CDV && !CInt) + break; + ConstantInt *Count; + if (CDV) + Count = cast(CDV->getElementAsConstant(0)); + else + Count = CInt; + + auto Vec = II->getArgOperand(0); + auto VT = cast(Vec->getType()); + if (Count->getZExtValue() > + VT->getElementType()->getPrimitiveSizeInBits() - 1) + return ReplaceInstUsesWith( + CI, ConstantAggregateZero::get(Vec->getType())); + + bool isPackedShiftLeft = true; + switch (II->getIntrinsicID()) { + default : break; + case Intrinsic::x86_sse2_psrl_d: + case Intrinsic::x86_sse2_psrl_q: + case Intrinsic::x86_sse2_psrl_w: + case Intrinsic::x86_sse2_psrli_d: + case Intrinsic::x86_sse2_psrli_q: + case Intrinsic::x86_sse2_psrli_w: + case Intrinsic::x86_avx2_psrl_d: + case Intrinsic::x86_avx2_psrl_q: + case Intrinsic::x86_avx2_psrl_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break; + } + + unsigned VWidth = VT->getNumElements(); + // Get a constant vector of the same type as the first operand. + auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue()); + if (isPackedShiftLeft) + return BinaryOperator::CreateShl(Vec, + Builder->CreateVectorSplat(VWidth, VTCI)); + + return BinaryOperator::CreateLShr(Vec, + Builder->CreateVectorSplat(VWidth, VTCI)); + } case Intrinsic::x86_sse41_pmovsxbw: case Intrinsic::x86_sse41_pmovsxwd: @@ -576,6 +651,153 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::x86_sse4a_insertqi: { + // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top + // ones undef + // TODO: eventually we should lower this intrinsic to IR + if (auto CIWidth = dyn_cast(II->getArgOperand(2))) { + if (auto CIStart = dyn_cast(II->getArgOperand(3))) { + if (CIWidth->equalsInt(64) && CIStart->isZero()) { + Value *Vec = II->getArgOperand(1); + Value *Undef = UndefValue::get(Vec->getType()); + const uint32_t Mask[] = { 0, 2 }; + return ReplaceInstUsesWith( + CI, + Builder->CreateShuffleVector( + Vec, Undef, ConstantDataVector::get( + II->getContext(), ArrayRef(Mask)))); + + } else if (auto Source = + dyn_cast(II->getArgOperand(0))) { + if (Source->hasOneUse() && + Source->getArgOperand(1) == II->getArgOperand(1)) { + // If the source of the insert has only one use and it's another + // insert (and they're both inserting from the same vector), try to + // bundle both together. + auto CISourceWidth = + dyn_cast(Source->getArgOperand(2)); + auto CISourceStart = + dyn_cast(Source->getArgOperand(3)); + if (CISourceStart && CISourceWidth) { + unsigned Start = CIStart->getZExtValue(); + unsigned Width = CIWidth->getZExtValue(); + unsigned End = Start + Width; + unsigned SourceStart = CISourceStart->getZExtValue(); + unsigned SourceWidth = CISourceWidth->getZExtValue(); + unsigned SourceEnd = SourceStart + SourceWidth; + unsigned NewStart, NewWidth; + bool ShouldReplace = false; + if (Start <= SourceStart && SourceStart <= End) { + NewStart = Start; + NewWidth = std::max(End, SourceEnd) - NewStart; + ShouldReplace = true; + } else if (SourceStart <= Start && Start <= SourceEnd) { + NewStart = SourceStart; + NewWidth = std::max(SourceEnd, End) - NewStart; + ShouldReplace = true; + } + + if (ShouldReplace) { + Constant *ConstantWidth = ConstantInt::get( + II->getArgOperand(2)->getType(), NewWidth, false); + Constant *ConstantStart = ConstantInt::get( + II->getArgOperand(3)->getType(), NewStart, false); + Value *Args[4] = { Source->getArgOperand(0), + II->getArgOperand(1), ConstantWidth, + ConstantStart }; + Module *M = CI.getParent()->getParent()->getParent(); + Value *F = + Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi); + return ReplaceInstUsesWith(CI, Builder->CreateCall(F, Args)); + } + } + } + } + } + } + break; + } + + case Intrinsic::x86_sse41_pblendvb: + case Intrinsic::x86_sse41_blendvps: + case Intrinsic::x86_sse41_blendvpd: + case Intrinsic::x86_avx_blendv_ps_256: + case Intrinsic::x86_avx_blendv_pd_256: + case Intrinsic::x86_avx2_pblendvb: { + // Convert blendv* to vector selects if the mask is constant. + // This optimization is convoluted because the intrinsic is defined as + // getting a vector of floats or doubles for the ps and pd versions. + // FIXME: That should be changed. + Value *Mask = II->getArgOperand(2); + if (auto C = dyn_cast(Mask)) { + auto Tyi1 = Builder->getInt1Ty(); + auto SelectorType = cast(Mask->getType()); + auto EltTy = SelectorType->getElementType(); + unsigned Size = SelectorType->getNumElements(); + unsigned BitWidth = + EltTy->isFloatTy() + ? 32 + : (EltTy->isDoubleTy() ? 64 : EltTy->getIntegerBitWidth()); + assert((BitWidth == 64 || BitWidth == 32 || BitWidth == 8) && + "Wrong arguments for variable blend intrinsic"); + SmallVector Selectors; + for (unsigned I = 0; I < Size; ++I) { + // The intrinsics only read the top bit + uint64_t Selector; + if (BitWidth == 8) + Selector = C->getElementAsInteger(I); + else + Selector = C->getElementAsAPFloat(I).bitcastToAPInt().getZExtValue(); + Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1))); + } + auto NewSelector = ConstantVector::get(Selectors); + return SelectInst::Create(NewSelector, II->getArgOperand(1), + II->getArgOperand(0), "blendv"); + } else { + break; + } + } + + case Intrinsic::x86_avx_vpermilvar_ps: + case Intrinsic::x86_avx_vpermilvar_ps_256: + case Intrinsic::x86_avx_vpermilvar_pd: + case Intrinsic::x86_avx_vpermilvar_pd_256: { + // Convert vpermil* to shufflevector if the mask is constant. + Value *V = II->getArgOperand(1); + unsigned Size = cast(V->getType())->getNumElements(); + assert(Size == 8 || Size == 4 || Size == 2); + uint32_t Indexes[8]; + if (auto C = dyn_cast(V)) { + // The intrinsics only read one or two bits, clear the rest. + for (unsigned I = 0; I < Size; ++I) { + uint32_t Index = C->getElementAsInteger(I) & 0x3; + if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd || + II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) + Index >>= 1; + Indexes[I] = Index; + } + } else if (isa(V)) { + for (unsigned I = 0; I < Size; ++I) + Indexes[I] = 0; + } else { + break; + } + // The _256 variants are a bit trickier since the mask bits always index + // into the corresponding 128 half. In order to convert to a generic + // shuffle, we have to make that explicit. + if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 || + II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) { + for (unsigned I = Size / 2; I < Size; ++I) + Indexes[I] += Size / 2; + } + auto NewC = + ConstantDataVector::get(V->getContext(), makeArrayRef(Indexes, Size)); + auto V1 = II->getArgOperand(0); + auto V2 = UndefValue::get(V1->getType()); + auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC); + return ReplaceInstUsesWith(CI, Shuffle); + } + case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. if (Constant *Mask = dyn_cast(II->getArgOperand(2))) { @@ -586,8 +808,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { bool AllEltsOk = true; for (unsigned i = 0; i != 16; ++i) { Constant *Elt = Mask->getAggregateElement(i); - if (Elt == 0 || - !(isa(Elt) || isa(Elt))) { + if (!Elt || !(isa(Elt) || isa(Elt))) { AllEltsOk = false; break; } @@ -612,7 +833,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { cast(Mask->getAggregateElement(i))->getZExtValue(); Idx &= 31; // Match the hardware behavior. - if (ExtractedElts[Idx] == 0) { + if (!ExtractedElts[Idx]) { ExtractedElts[Idx] = Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, Builder->getInt32(Idx&15)); @@ -655,8 +876,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::arm_neon_vmulls: case Intrinsic::arm_neon_vmullu: - case Intrinsic::arm64_neon_smull: - case Intrinsic::arm64_neon_umull: { + case Intrinsic::aarch64_neon_smull: + case Intrinsic::aarch64_neon_umull: { Value *Arg0 = II->getArgOperand(0); Value *Arg1 = II->getArgOperand(1); @@ -667,7 +888,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // Check for constant LHS & RHS - in this case we just simplify. bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu || - II->getIntrinsicID() == Intrinsic::arm64_neon_umull); + II->getIntrinsicID() == Intrinsic::aarch64_neon_umull); VectorType *NewVT = cast(II->getType()); if (Constant *CV0 = dyn_cast(Arg0)) { if (Constant *CV1 = dyn_cast(Arg1)) { @@ -776,14 +997,14 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS, // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, // strcat_chk and strncat_chk. Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) { - if (CI->getCalledFunction() == 0) return 0; + if (!CI->getCalledFunction()) return nullptr; if (Value *With = Simplifier->optimizeCall(CI)) { ++NumSimplified; return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With); } - return 0; + return nullptr; } static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { @@ -792,35 +1013,35 @@ static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { Value *Underlying = TrampMem->stripPointerCasts(); if (Underlying != TrampMem && (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem)) - return 0; + return nullptr; if (!isa(Underlying)) - return 0; + return nullptr; - IntrinsicInst *InitTrampoline = 0; + IntrinsicInst *InitTrampoline = nullptr; for (User *U : TrampMem->users()) { IntrinsicInst *II = dyn_cast(U); if (!II) - return 0; + return nullptr; if (II->getIntrinsicID() == Intrinsic::init_trampoline) { if (InitTrampoline) // More than one init_trampoline writes to this value. Give up. - return 0; + return nullptr; InitTrampoline = II; continue; } if (II->getIntrinsicID() == Intrinsic::adjust_trampoline) // Allow any number of calls to adjust.trampoline. continue; - return 0; + return nullptr; } // No call to init.trampoline found. if (!InitTrampoline) - return 0; + return nullptr; // Check that the alloca is being used in the expected way. if (InitTrampoline->getOperand(0) != TrampMem) - return 0; + return nullptr; return InitTrampoline; } @@ -837,9 +1058,9 @@ static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp, II->getOperand(0) == TrampMem) return II; if (Inst->mayWriteToMemory()) - return 0; + return nullptr; } - return 0; + return nullptr; } // Given a call to llvm.adjust.trampoline, find and return the corresponding @@ -851,7 +1072,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) { IntrinsicInst *AdjustTramp = dyn_cast(Callee); if (!AdjustTramp || AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline) - return 0; + return nullptr; Value *TrampMem = AdjustTramp->getOperand(0); @@ -859,7 +1080,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) { return IT; if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem)) return IT; - return 0; + return nullptr; } // visitCallSite - Improvements for call and invoke instructions. @@ -874,7 +1095,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // arguments of the call/invoke. Value *Callee = CS.getCalledValue(); if (!isa(Callee) && transformConstExprCastCall(CS)) - return 0; + return nullptr; if (Function *CalleeF = dyn_cast(Callee)) // If the call and callee calling conventions don't match, this call must @@ -899,7 +1120,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // change the callee to a null pointer. cast(OldCall)->setCalledFunction( Constant::getNullValue(CalleeF->getType())); - return 0; + return nullptr; } if (isa(Callee) || isa(Callee)) { @@ -911,7 +1132,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { if (isa(CS.getInstruction())) { // Can't remove an invoke because we cannot change the CFG. - return 0; + return nullptr; } // This instruction is not reachable, just remove it. We insert a store to @@ -959,7 +1180,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { if (I) return EraseInstFromFunction(*I); } - return Changed ? CS.getInstruction() : 0; + return Changed ? CS.getInstruction() : nullptr; } // transformConstExprCastCall - If the callee is a constexpr cast of a function, @@ -968,7 +1189,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { bool InstCombiner::transformConstExprCastCall(CallSite CS) { Function *Callee = dyn_cast(CS.getCalledValue()->stripPointerCasts()); - if (Callee == 0) + if (!Callee) return false; Instruction *Caller = CS.getInstruction(); const AttributeSet &CallerPAL = CS.getAttributes(); @@ -1044,7 +1265,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1, Attribute::ByVal)) { PointerType *ParamPTy = dyn_cast(ParamTy); - if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || DL == 0) + if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL) return false; Type *CurElTy = ActTy->getPointerElementType(); @@ -1235,7 +1456,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, // If the call already has the 'nest' attribute somewhere then give up - // otherwise 'nest' would occur twice after splicing in the chain. if (Attrs.hasAttrSomewhere(Attribute::Nest)) - return 0; + return nullptr; assert(Tramp && "transformCallThroughTrampoline called with incorrect CallSite."); @@ -1247,7 +1468,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS, const AttributeSet &NestAttrs = NestF->getAttributes(); if (!NestAttrs.isEmpty()) { unsigned NestIdx = 1; - Type *NestTy = 0; + Type *NestTy = nullptr; AttributeSet NestAttr; // Look for a parameter marked with the 'nest' attribute. diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index c2b862a..356803a 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -19,6 +19,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + /// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear /// expression. If so, decompose it, returning some value X, such that Val is /// X*Scale+Offset. @@ -79,7 +81,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI) { // This requires DataLayout to get the alloca alignment and size information. - if (!DL) return 0; + if (!DL) return nullptr; PointerType *PTy = cast(CI.getType()); @@ -89,26 +91,26 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // Get the type really allocated and the type casted to. Type *AllocElTy = AI.getAllocatedType(); Type *CastElTy = PTy->getElementType(); - if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0; + if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr; unsigned AllocElTyAlign = DL->getABITypeAlignment(AllocElTy); unsigned CastElTyAlign = DL->getABITypeAlignment(CastElTy); - if (CastElTyAlign < AllocElTyAlign) return 0; + if (CastElTyAlign < AllocElTyAlign) return nullptr; // If the allocation has multiple uses, only promote it if we are strictly // increasing the alignment of the resultant allocation. If we keep it the // same, we open the door to infinite loops of various kinds. - if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0; + if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr; uint64_t AllocElTySize = DL->getTypeAllocSize(AllocElTy); uint64_t CastElTySize = DL->getTypeAllocSize(CastElTy); - if (CastElTySize == 0 || AllocElTySize == 0) return 0; + if (CastElTySize == 0 || AllocElTySize == 0) return nullptr; // If the allocation has multiple uses, only promote it if we're not // shrinking the amount of memory being allocated. uint64_t AllocElTyStoreSize = DL->getTypeStoreSize(AllocElTy); uint64_t CastElTyStoreSize = DL->getTypeStoreSize(CastElTy); - if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0; + if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr; // See if we can satisfy the modulus by pulling a scale out of the array // size argument. @@ -120,10 +122,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // If we can now satisfy the modulus, by using a non-1 scale, we really can // do the xform. if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 || - (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0; + (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr; unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize; - Value *Amt = 0; + Value *Amt = nullptr; if (Scale == 1) { Amt = NumElements; } else { @@ -141,6 +143,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt); New->setAlignment(AI.getAlignment()); New->takeName(&AI); + New->setUsedWithInAlloca(AI.isUsedWithInAlloca()); // If the allocation has multiple real uses, insert a cast and change all // things that used it to use the new cast. This will also hack on CI, but it @@ -169,7 +172,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty, // Otherwise, it must be an instruction. Instruction *I = cast(V); - Instruction *Res = 0; + Instruction *Res = nullptr; unsigned Opc = I->getOpcode(); switch (Opc) { case Instruction::Add: @@ -245,11 +248,11 @@ isEliminableCastPair( Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode()); Instruction::CastOps secondOp = Instruction::CastOps(opcode); Type *SrcIntPtrTy = DL && SrcTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(SrcTy) : 0; + DL->getIntPtrType(SrcTy) : nullptr; Type *MidIntPtrTy = DL && MidTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(MidTy) : 0; + DL->getIntPtrType(MidTy) : nullptr; Type *DstIntPtrTy = DL && DstTy->isPtrOrPtrVectorTy() ? - DL->getIntPtrType(DstTy) : 0; + DL->getIntPtrType(DstTy) : nullptr; unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy, SrcIntPtrTy, MidIntPtrTy, DstIntPtrTy); @@ -318,7 +321,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) { return NV; } - return 0; + return nullptr; } /// CanEvaluateTruncated - Return true if we can evaluate the specified @@ -470,7 +473,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { } // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion. - Value *A = 0; ConstantInt *Cst = 0; + Value *A = nullptr; ConstantInt *Cst = nullptr; if (Src->hasOneUse() && match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) { // We have three types to worry about here, the type of A, the source of @@ -502,7 +505,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { ConstantExpr::getTrunc(Cst, CI.getType())); } - return 0; + return nullptr; } /// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations @@ -550,7 +553,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, // If Op1C some other power of two, convert: uint32_t BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne); + computeKnownBits(ICI->getOperand(0), KnownZero, KnownOne); APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? @@ -598,8 +601,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); - ComputeMaskedBits(LHS, KnownZeroLHS, KnownOneLHS); - ComputeMaskedBits(RHS, KnownZeroRHS, KnownOneRHS); + computeKnownBits(LHS, KnownZeroLHS, KnownOneLHS); + computeKnownBits(RHS, KnownZeroRHS, KnownOneRHS); if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) { APInt KnownBits = KnownZeroLHS | KnownOneLHS; @@ -627,7 +630,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, } } - return 0; + return nullptr; } /// CanEvaluateZExtd - Determine if the specified value can be computed in the @@ -758,7 +761,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { // If this zero extend is only used by a truncate, let the truncate be // eliminated before we try to optimize this zext. if (CI.hasOneUse() && isa(CI.user_back())) - return 0; + return nullptr; // If one of the common conversion will work, do it. if (Instruction *Result = commonCastTransforms(CI)) @@ -883,7 +886,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1)); } - return 0; + return nullptr; } /// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations @@ -918,7 +921,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ unsigned BitWidth = Op1C->getType()->getBitWidth(); APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(Op0, KnownZero, KnownOne); + computeKnownBits(Op0, KnownZero, KnownOne); APInt KnownZeroMask(~KnownZero); if (KnownZeroMask.isPowerOf2()) { @@ -967,7 +970,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { } } - return 0; + return nullptr; } /// CanEvaluateSExtd - Return true if we can take the specified value @@ -1039,7 +1042,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // If this sign extend is only used by a truncate, let the truncate be // eliminated before we try to optimize this sext. if (CI.hasOneUse() && isa(CI.user_back())) - return 0; + return nullptr; if (Instruction *I = commonCastTransforms(CI)) return I; @@ -1107,9 +1110,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { // into: // %a = shl i32 %i, 30 // %d = ashr i32 %a, 30 - Value *A = 0; + Value *A = nullptr; // TODO: Eventually this could be subsumed by EvaluateInDifferentType. - ConstantInt *BA = 0, *CA = 0; + ConstantInt *BA = nullptr, *CA = nullptr; if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_ConstantInt(BA)), m_ConstantInt(CA))) && BA == CA && A->getType() == CI.getType()) { @@ -1121,7 +1124,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { return BinaryOperator::CreateAShr(A, ShAmtV); } - return 0; + return nullptr; } @@ -1133,7 +1136,7 @@ static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) { (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo); if (!losesInfo) return ConstantFP::get(CFP->getContext(), F); - return 0; + return nullptr; } /// LookThroughFPExtensions - If this is an fp extension instruction, look @@ -1345,7 +1348,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { } } - return 0; + return nullptr; } Instruction *InstCombiner::visitFPExt(CastInst &CI) { @@ -1354,7 +1357,7 @@ Instruction *InstCombiner::visitFPExt(CastInst &CI) { Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { Instruction *OpI = dyn_cast(FI.getOperand(0)); - if (OpI == 0) + if (!OpI) return commonCastTransforms(FI); // fptoui(uitofp(X)) --> X @@ -1374,7 +1377,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) { Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) { Instruction *OpI = dyn_cast(FI.getOperand(0)); - if (OpI == 0) + if (!OpI) return commonCastTransforms(FI); // fptosi(sitofp(X)) --> X @@ -1421,7 +1424,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) { if (Instruction *I = commonCastTransforms(CI)) return I; - return 0; + return nullptr; } /// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint) @@ -1520,7 +1523,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy, // there yet. if (SrcTy->getElementType()->getPrimitiveSizeInBits() != DestTy->getElementType()->getPrimitiveSizeInBits()) - return 0; + return nullptr; SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements()); InVal = IC.Builder->CreateBitCast(InVal, SrcTy); @@ -1598,7 +1601,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, ElementIndex = Elements.size() - ElementIndex - 1; // Fail if multiple elements are inserted into this slot. - if (Elements[ElementIndex] != 0) + if (Elements[ElementIndex]) return false; Elements[ElementIndex] = V; @@ -1638,7 +1641,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, if (!V->hasOneUse()) return false; Instruction *I = dyn_cast(V); - if (I == 0) return false; + if (!I) return false; switch (I->getOpcode()) { default: return false; // Unhandled case. case Instruction::BitCast: @@ -1659,7 +1662,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, case Instruction::Shl: { // Must be shifting by a constant that is a multiple of the element size. ConstantInt *CI = dyn_cast(I->getOperand(1)); - if (CI == 0) return false; + if (!CI) return false; Shift += CI->getZExtValue(); if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false; return CollectInsertionElements(I->getOperand(0), Shift, @@ -1687,7 +1690,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift, static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombiner &IC) { // We need to know the target byte order to perform this optimization. - if (!IC.getDataLayout()) return 0; + if (!IC.getDataLayout()) return nullptr; VectorType *DestVecTy = cast(CI.getType()); Value *IntInput = CI.getOperand(0); @@ -1695,14 +1698,14 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, SmallVector Elements(DestVecTy->getNumElements()); if (!CollectInsertionElements(IntInput, 0, Elements, DestVecTy->getElementType(), IC)) - return 0; + return nullptr; // If we succeeded, we know that all of the element are specified by Elements // or are zero if Elements has a null entry. Recast this as a set of // insertions. Value *Result = Constant::getNullValue(CI.getType()); for (unsigned i = 0, e = Elements.size(); i != e; ++i) { - if (Elements[i] == 0) continue; // Unset element. + if (!Elements[i]) continue; // Unset element. Result = IC.Builder->CreateInsertElement(Result, Elements[i], IC.Builder->getInt32(i)); @@ -1716,14 +1719,14 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI, /// bitcast. The various long double bitcasts can't get in here. static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ // We need to know the target byte order to perform this optimization. - if (!IC.getDataLayout()) return 0; + if (!IC.getDataLayout()) return nullptr; Value *Src = CI.getOperand(0); Type *DestTy = CI.getType(); // If this is a bitcast from int to float, check to see if the int is an // extraction from a vector. - Value *VecInput = 0; + Value *VecInput = nullptr; // bitcast(trunc(bitcast(somevector))) if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) && isa(VecInput->getType())) { @@ -1747,7 +1750,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ } // bitcast(trunc(lshr(bitcast(somevector), cst)) - ConstantInt *ShAmt = 0; + ConstantInt *ShAmt = nullptr; if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), m_ConstantInt(ShAmt)))) && isa(VecInput->getType())) { @@ -1769,7 +1772,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); } } - return 0; + return nullptr; } Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 8c0ad52..02e8bf1 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -24,6 +24,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + static ConstantInt *getOne(Constant *C) { return ConstantInt::get(cast(C->getType()), 1); } @@ -218,15 +220,15 @@ Instruction *InstCombiner:: FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI, ConstantInt *AndCst) { // We need TD information to know the pointer size unless this is inbounds. - if (!GEP->isInBounds() && DL == 0) - return 0; + if (!GEP->isInBounds() && !DL) + return nullptr; Constant *Init = GV->getInitializer(); if (!isa(Init) && !isa(Init)) - return 0; + return nullptr; uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); - if (ArrayElementCount > 1024) return 0; // Don't blow up on huge arrays. + if (ArrayElementCount > 1024) return nullptr; // Don't blow up on huge arrays. // There are many forms of this optimization we can handle, for now, just do // the simple index into a single-dimensional array. @@ -236,7 +238,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, !isa(GEP->getOperand(1)) || !cast(GEP->getOperand(1))->isZero() || isa(GEP->getOperand(2))) - return 0; + return nullptr; // Check that indices after the variable are constants and in-range for the // type they index. Collect the indices. This is typically for arrays of @@ -246,18 +248,18 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, Type *EltTy = Init->getType()->getArrayElementType(); for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) { ConstantInt *Idx = dyn_cast(GEP->getOperand(i)); - if (Idx == 0) return 0; // Variable index. + if (!Idx) return nullptr; // Variable index. uint64_t IdxVal = Idx->getZExtValue(); - if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index. + if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index. if (StructType *STy = dyn_cast(EltTy)) EltTy = STy->getElementType(IdxVal); else if (ArrayType *ATy = dyn_cast(EltTy)) { - if (IdxVal >= ATy->getNumElements()) return 0; + if (IdxVal >= ATy->getNumElements()) return nullptr; EltTy = ATy->getElementType(); } else { - return 0; // Unknown type. + return nullptr; // Unknown type. } LaterIndices.push_back(IdxVal); @@ -296,7 +298,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, Constant *CompareRHS = cast(ICI.getOperand(1)); for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) { Constant *Elt = Init->getAggregateElement(i); - if (Elt == 0) return 0; + if (!Elt) return nullptr; // If this is indexing an array of structures, get the structure element. if (!LaterIndices.empty()) @@ -321,7 +323,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // If we can't compute the result for any of the elements, we have to give // up evaluating the entire conditional. - if (!isa(C)) return 0; + if (!isa(C)) return nullptr; // Otherwise, we know if the comparison is true or false for this element, // update our state machines. @@ -375,7 +377,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined && SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined && FalseRangeEnd == Overdefined) - return 0; + return nullptr; } // Now that we've scanned the entire array, emit our new comparison(s). We @@ -467,7 +469,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, // of this load, replace it with computation that does: // ((magic_cst >> i) & 1) != 0 { - Type *Ty = 0; + Type *Ty = nullptr; // Look for an appropriate type: // - The type of Idx if the magic fits @@ -480,7 +482,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, else if (ArrayElementCount <= 32) Ty = Type::getInt32Ty(Init->getContext()); - if (Ty != 0) { + if (Ty) { Value *V = Builder->CreateIntCast(Idx, Ty, false); V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V); V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V); @@ -488,7 +490,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV, } } - return 0; + return nullptr; } @@ -533,7 +535,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { // If there are no variable indices, we must have a constant offset, just // evaluate it the general way. - if (i == e) return 0; + if (i == e) return nullptr; Value *VariableIdx = GEP->getOperand(i); // Determine the scale factor of the variable element. For example, this is @@ -543,7 +545,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { // Verify that there are no other variable indices. If so, emit the hard way. for (++i, ++GTI; i != e; ++i, ++GTI) { ConstantInt *CI = dyn_cast(GEP->getOperand(i)); - if (!CI) return 0; + if (!CI) return nullptr; // Compute the aggregate offset of constant indices. if (CI->isZero()) continue; @@ -587,7 +589,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) { // multiple of the variable scale. int64_t NewOffs = Offset / (int64_t)VariableScale; if (Offset != NewOffs*(int64_t)VariableScale) - return 0; + return nullptr; // Okay, we can do this evaluation. Start by converting the index to intptr. if (VariableIdx->getType() != IntPtrTy) @@ -608,7 +610,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // e.g. "&foo[0] (RHS)) @@ -623,7 +625,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this); // If not, synthesize the offset the hard way. - if (Offset == 0) + if (!Offset) Offset = EmitGEPOffset(GEPLHS); return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, Constant::getNullValue(Offset->getType())); @@ -661,7 +663,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // Otherwise, the base pointers are different and the indices are // different, bail out. - return 0; + return nullptr; } // If one of the GEPs has all zero indices, recurse. @@ -729,7 +731,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R); } } - return 0; + return nullptr; } /// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X". @@ -812,11 +814,11 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // if it finds it. bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv; if (!ICI.isEquality() && DivIsSigned != ICI.isSigned()) - return 0; + return nullptr; if (DivRHS->isZero()) - return 0; // The ProdOV computation fails on divide by zero. + return nullptr; // The ProdOV computation fails on divide by zero. if (DivIsSigned && DivRHS->isAllOnesValue()) - return 0; // The overflow computation also screws up here + return nullptr; // The overflow computation also screws up here if (DivRHS->isOne()) { // This eliminates some funny cases with INT_MIN. ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X. @@ -850,7 +852,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, // overflow variable is set to 0 if it's corresponding bound variable is valid // -1 if overflowed off the bottom end, or +1 if overflowed off the top end. int LoOverflow = 0, HiOverflow = 0; - Constant *LoBound = 0, *HiBound = 0; + Constant *LoBound = nullptr, *HiBound = nullptr; if (!DivIsSigned) { // udiv // e.g. X/5 op 3 --> [15, 20) @@ -890,7 +892,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI, HiBound = cast(ConstantExpr::getNeg(RangeSize)); if (HiBound == DivRHS) { // -INTMIN = INTMIN HiOverflow = 1; // [INTMIN+1, overflow) - HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN + HiBound = nullptr; // e.g. X/INTMIN = 0 --> X > INTMIN } } else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos // e.g. X/-5 op 3 --> [-19, -14) @@ -964,20 +966,20 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, uint32_t TypeBits = CmpRHSV.getBitWidth(); uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits); if (ShAmtVal >= TypeBits || ShAmtVal == 0) - return 0; + return nullptr; if (!ICI.isEquality()) { // If we have an unsigned comparison and an ashr, we can't simplify this. // Similarly for signed comparisons with lshr. if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr)) - return 0; + return nullptr; // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv // by a power of 2. Since we already have logic to simplify these, // transform to div and then simplify the resultant comparison. if (Shr->getOpcode() == Instruction::AShr && (!Shr->isExact() || ShAmtVal == TypeBits - 1)) - return 0; + return nullptr; // Revisit the shift (to delete it). Worklist.Add(Shr); @@ -994,7 +996,7 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, // If the builder folded the binop, just return it. BinaryOperator *TheDiv = dyn_cast(Tmp); - if (TheDiv == 0) + if (!TheDiv) return &ICI; // Otherwise, fold this div/compare. @@ -1037,7 +1039,7 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr, Mask, Shr->getName()+".mask"); return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS); } - return 0; + return nullptr; } @@ -1056,7 +1058,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(), SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits(); APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0); - ComputeMaskedBits(LHSI->getOperand(0), KnownZero, KnownOne); + computeKnownBits(LHSI->getOperand(0), KnownZero, KnownOne); // If all the high bits are known, we can do this xform. if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) { @@ -1181,10 +1183,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // access. BinaryOperator *Shift = dyn_cast(LHSI->getOperand(0)); if (Shift && !Shift->isShift()) - Shift = 0; + Shift = nullptr; ConstantInt *ShAmt; - ShAmt = Shift ? dyn_cast(Shift->getOperand(1)) : 0; + ShAmt = Shift ? dyn_cast(Shift->getOperand(1)) : nullptr; // This seemingly simple opportunity to fold away a shift turns out to // be rather complicated. See PR17827 @@ -1777,7 +1779,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } } } - return 0; + return nullptr; } /// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst). @@ -1794,7 +1796,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // integer type is the same size as the pointer type. if (DL && LHSCI->getOpcode() == Instruction::PtrToInt && DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) { - Value *RHSOp = 0; + Value *RHSOp = nullptr; if (Constant *RHSC = dyn_cast(ICI.getOperand(1))) { RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy); } else if (PtrToIntInst *RHSC = dyn_cast(ICI.getOperand(1))) { @@ -1812,7 +1814,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // Enforce this. if (LHSCI->getOpcode() != Instruction::ZExt && LHSCI->getOpcode() != Instruction::SExt) - return 0; + return nullptr; bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt; bool isSignedCmp = ICI.isSigned(); @@ -1821,12 +1823,12 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // Not an extension from the same type? RHSCIOp = CI->getOperand(0); if (RHSCIOp->getType() != LHSCIOp->getType()) - return 0; + return nullptr; // If the signedness of the two casts doesn't agree (i.e. one is a sext // and the other is a zext), then we can't handle this. if (CI->getOpcode() != LHSCI->getOpcode()) - return 0; + return nullptr; // Deal with equality cases early. if (ICI.isEquality()) @@ -1844,7 +1846,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // If we aren't dealing with a constant on the RHS, exit early ConstantInt *CI = dyn_cast(ICI.getOperand(1)); if (!CI) - return 0; + return nullptr; // Compute the constant that would happen if we truncated to SrcTy then // reextended to DestTy. @@ -1873,7 +1875,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) { // by SimplifyICmpInst, so only deal with the tricky case. if (isSignedCmp || !isSignedExt) - return 0; + return nullptr; // Evaluate the comparison for LT (we invert for GT below). LE and GE cases // should have been folded away previously and not enter in here. @@ -1909,12 +1911,12 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // In order to eliminate the add-with-constant, the compare can be its only // use. Instruction *AddWithCst = cast(I.getOperand(0)); - if (!AddWithCst->hasOneUse()) return 0; + if (!AddWithCst->hasOneUse()) return nullptr; // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow. - if (!CI2->getValue().isPowerOf2()) return 0; + if (!CI2->getValue().isPowerOf2()) return nullptr; unsigned NewWidth = CI2->getValue().countTrailingZeros(); - if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0; + if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return nullptr; // The width of the new add formed is 1 more than the bias. ++NewWidth; @@ -1922,7 +1924,7 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // Check to see that CI1 is an all-ones value with NewWidth bits. if (CI1->getBitWidth() == NewWidth || CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth)) - return 0; + return nullptr; // This is only really a signed overflow check if the inputs have been // sign-extended; check for that condition. For example, if CI2 is 2^31 and @@ -1930,7 +1932,7 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1; if (IC.ComputeNumSignBits(A) < NeededSignBits || IC.ComputeNumSignBits(B) < NeededSignBits) - return 0; + return nullptr; // In order to replace the original add with a narrower // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant @@ -1946,8 +1948,8 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B, // original add had another add which was then immediately truncated, we // could still do the transformation. TruncInst *TI = dyn_cast(U); - if (TI == 0 || - TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0; + if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth) + return nullptr; } // If the pattern matches, truncate the inputs to the narrower type and @@ -1983,11 +1985,11 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, InstCombiner &IC) { // Don't bother doing this transformation for pointers, don't do it for // vectors. - if (!isa(OrigAddV->getType())) return 0; + if (!isa(OrigAddV->getType())) return nullptr; // If the add is a constant expr, then we don't bother transforming it. Instruction *OrigAdd = dyn_cast(OrigAddV); - if (OrigAdd == 0) return 0; + if (!OrigAdd) return nullptr; Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1); @@ -2008,6 +2010,236 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, return ExtractValueInst::Create(Call, 1, "uadd.overflow"); } +/// \brief Recognize and process idiom involving test for multiplication +/// overflow. +/// +/// The caller has matched a pattern of the form: +/// I = cmp u (mul(zext A, zext B), V +/// The function checks if this is a test for overflow and if so replaces +/// multiplication with call to 'mul.with.overflow' intrinsic. +/// +/// \param I Compare instruction. +/// \param MulVal Result of 'mult' instruction. It is one of the arguments of +/// the compare instruction. Must be of integer type. +/// \param OtherVal The other argument of compare instruction. +/// \returns Instruction which must replace the compare instruction, NULL if no +/// replacement required. +static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal, + Value *OtherVal, InstCombiner &IC) { + assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal); + assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal); + assert(isa(MulVal->getType())); + Instruction *MulInstr = cast(MulVal); + assert(MulInstr->getOpcode() == Instruction::Mul); + + Instruction *LHS = cast(MulInstr->getOperand(0)), + *RHS = cast(MulInstr->getOperand(1)); + assert(LHS->getOpcode() == Instruction::ZExt); + assert(RHS->getOpcode() == Instruction::ZExt); + Value *A = LHS->getOperand(0), *B = RHS->getOperand(0); + + // Calculate type and width of the result produced by mul.with.overflow. + Type *TyA = A->getType(), *TyB = B->getType(); + unsigned WidthA = TyA->getPrimitiveSizeInBits(), + WidthB = TyB->getPrimitiveSizeInBits(); + unsigned MulWidth; + Type *MulType; + if (WidthB > WidthA) { + MulWidth = WidthB; + MulType = TyB; + } else { + MulWidth = WidthA; + MulType = TyA; + } + + // In order to replace the original mul with a narrower mul.with.overflow, + // all uses must ignore upper bits of the product. The number of used low + // bits must be not greater than the width of mul.with.overflow. + if (MulVal->hasNUsesOrMore(2)) + for (User *U : MulVal->users()) { + if (U == &I) + continue; + if (TruncInst *TI = dyn_cast(U)) { + // Check if truncation ignores bits above MulWidth. + unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits(); + if (TruncWidth > MulWidth) + return nullptr; + } else if (BinaryOperator *BO = dyn_cast(U)) { + // Check if AND ignores bits above MulWidth. + if (BO->getOpcode() != Instruction::And) + return nullptr; + if (ConstantInt *CI = dyn_cast(BO->getOperand(1))) { + const APInt &CVal = CI->getValue(); + if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth) + return nullptr; + } + } else { + // Other uses prohibit this transformation. + return nullptr; + } + } + + // Recognize patterns + switch (I.getPredicate()) { + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_NE: + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp eq/neq mulval, zext trunc mulval + if (ZExtInst *Zext = dyn_cast(OtherVal)) + if (Zext->hasOneUse()) { + Value *ZextArg = Zext->getOperand(0); + if (TruncInst *Trunc = dyn_cast(ZextArg)) + if (Trunc->getType()->getPrimitiveSizeInBits() == MulWidth) + break; //Recognized + } + + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits. + ConstantInt *CI; + Value *ValToMask; + if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) { + if (ValToMask != MulVal) + return nullptr; + const APInt &CVal = CI->getValue() + 1; + if (CVal.isPowerOf2()) { + unsigned MaskWidth = CVal.logBase2(); + if (MaskWidth == MulWidth) + break; // Recognized + } + } + return nullptr; + + case ICmpInst::ICMP_UGT: + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp ugt mulval, max + if (ConstantInt *CI = dyn_cast(OtherVal)) { + APInt MaxVal = APInt::getMaxValue(MulWidth); + MaxVal = MaxVal.zext(CI->getBitWidth()); + if (MaxVal.eq(CI->getValue())) + break; // Recognized + } + return nullptr; + + case ICmpInst::ICMP_UGE: + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp uge mulval, max+1 + if (ConstantInt *CI = dyn_cast(OtherVal)) { + APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth); + if (MaxVal.eq(CI->getValue())) + break; // Recognized + } + return nullptr; + + case ICmpInst::ICMP_ULE: + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp ule mulval, max + if (ConstantInt *CI = dyn_cast(OtherVal)) { + APInt MaxVal = APInt::getMaxValue(MulWidth); + MaxVal = MaxVal.zext(CI->getBitWidth()); + if (MaxVal.eq(CI->getValue())) + break; // Recognized + } + return nullptr; + + case ICmpInst::ICMP_ULT: + // Recognize pattern: + // mulval = mul(zext A, zext B) + // cmp ule mulval, max + 1 + if (ConstantInt *CI = dyn_cast(OtherVal)) { + APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth); + if (MaxVal.eq(CI->getValue())) + break; // Recognized + } + return nullptr; + + default: + return nullptr; + } + + InstCombiner::BuilderTy *Builder = IC.Builder; + Builder->SetInsertPoint(MulInstr); + Module *M = I.getParent()->getParent()->getParent(); + + // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B) + Value *MulA = A, *MulB = B; + if (WidthA < MulWidth) + MulA = Builder->CreateZExt(A, MulType); + if (WidthB < MulWidth) + MulB = Builder->CreateZExt(B, MulType); + Value *F = + Intrinsic::getDeclaration(M, Intrinsic::umul_with_overflow, MulType); + CallInst *Call = Builder->CreateCall2(F, MulA, MulB, "umul"); + IC.Worklist.Add(MulInstr); + + // If there are uses of mul result other than the comparison, we know that + // they are truncation or binary AND. Change them to use result of + // mul.with.overflow and adjust properly mask/size. + if (MulVal->hasNUsesOrMore(2)) { + Value *Mul = Builder->CreateExtractValue(Call, 0, "umul.value"); + for (User *U : MulVal->users()) { + if (U == &I || U == OtherVal) + continue; + if (TruncInst *TI = dyn_cast(U)) { + if (TI->getType()->getPrimitiveSizeInBits() == MulWidth) + IC.ReplaceInstUsesWith(*TI, Mul); + else + TI->setOperand(0, Mul); + } else if (BinaryOperator *BO = dyn_cast(U)) { + assert(BO->getOpcode() == Instruction::And); + // Replace (mul & mask) --> zext (mul.with.overflow & short_mask) + ConstantInt *CI = cast(BO->getOperand(1)); + APInt ShortMask = CI->getValue().trunc(MulWidth); + Value *ShortAnd = Builder->CreateAnd(Mul, ShortMask); + Instruction *Zext = + cast(Builder->CreateZExt(ShortAnd, BO->getType())); + IC.Worklist.Add(Zext); + IC.ReplaceInstUsesWith(*BO, Zext); + } else { + llvm_unreachable("Unexpected Binary operation"); + } + IC.Worklist.Add(cast(U)); + } + } + if (isa(OtherVal)) + IC.Worklist.Add(cast(OtherVal)); + + // The original icmp gets replaced with the overflow value, maybe inverted + // depending on predicate. + bool Inverse = false; + switch (I.getPredicate()) { + case ICmpInst::ICMP_NE: + break; + case ICmpInst::ICMP_EQ: + Inverse = true; + break; + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + if (I.getOperand(0) == MulVal) + break; + Inverse = true; + break; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + if (I.getOperand(1) == MulVal) + break; + Inverse = true; + break; + default: + llvm_unreachable("Unexpected predicate"); + } + if (Inverse) { + Value *Res = Builder->CreateExtractValue(Call, 1); + return BinaryOperator::CreateNot(Res); + } + + return ExtractValueInst::Create(Call, 1); +} + // DemandedBitsLHSMask - When performing a comparison against a constant, // it is possible that not all the bits in the LHS are demanded. This helper // method computes the mask that IS demanded. @@ -2178,7 +2410,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // See if we are doing a comparison with a constant. if (ConstantInt *CI = dyn_cast(Op1)) { - Value *A = 0, *B = 0; + Value *A = nullptr, *B = nullptr; // Match the following pattern, which is a common idiom when writing // overflow-safe integer arithmetic function. The source performs an @@ -2293,15 +2525,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { APInt Op0KnownZeroInverted = ~Op0KnownZero; if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { // If the LHS is an AND with the same constant, look through it. - Value *LHS = 0; - ConstantInt *LHSC = 0; + Value *LHS = nullptr; + ConstantInt *LHSC = nullptr; if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || LHSC->getValue() != Op0KnownZeroInverted) LHS = Op0; // If the LHS is 1 << x, and we know the result is a power of 2 like 8, // then turn "((1 << x)&8) == 0" into "x != 3". - Value *X = 0; + Value *X = nullptr; if (match(LHS, m_Shl(m_One(), m_Value(X)))) { unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); return new ICmpInst(ICmpInst::ICMP_NE, X, @@ -2330,15 +2562,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { APInt Op0KnownZeroInverted = ~Op0KnownZero; if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { // If the LHS is an AND with the same constant, look through it. - Value *LHS = 0; - ConstantInt *LHSC = 0; + Value *LHS = nullptr; + ConstantInt *LHSC = nullptr; if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) || LHSC->getValue() != Op0KnownZeroInverted) LHS = Op0; // If the LHS is 1 << x, and we know the result is a power of 2 like 8, // then turn "((1 << x)&8) != 0" into "x == 3". - Value *X = 0; + Value *X = nullptr; if (match(LHS, m_Shl(m_One(), m_Value(X)))) { unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); return new ICmpInst(ICmpInst::ICMP_EQ, X, @@ -2470,7 +2702,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (SelectInst *SI = dyn_cast(*I.user_begin())) if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) || (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1)) - return 0; + return nullptr; // See if we are doing a comparison between a constant and an instruction that // can be folded into the comparison. @@ -2506,7 +2738,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // If either operand of the select is a constant, we can fold the // comparison into the select arms, which will cause one to be // constant folded and the select turned into a bitwise or. - Value *Op1 = 0, *Op2 = 0; + Value *Op1 = nullptr, *Op2 = nullptr; if (Constant *C = dyn_cast(LHSI->getOperand(1))) Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC); if (Constant *C = dyn_cast(LHSI->getOperand(2))) @@ -2618,7 +2850,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // Analyze the case when either Op0 or Op1 is an add instruction. // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null). - Value *A = 0, *B = 0, *C = 0, *D = 0; + Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; if (BO0 && BO0->getOpcode() == Instruction::Add) A = BO0->getOperand(0), B = BO0->getOperand(1); if (BO1 && BO1->getOpcode() == Instruction::Add) @@ -2713,7 +2945,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // Analyze the case when either Op0 or Op1 is a sub instruction. // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null). - A = 0; B = 0; C = 0; D = 0; + A = nullptr; B = nullptr; C = nullptr; D = nullptr; if (BO0 && BO0->getOpcode() == Instruction::Sub) A = BO0->getOperand(0), B = BO0->getOperand(1); if (BO1 && BO1->getOpcode() == Instruction::Sub) @@ -2739,7 +2971,17 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { BO0->hasOneUse() && BO1->hasOneUse()) return new ICmpInst(Pred, D, B); - BinaryOperator *SRem = NULL; + // icmp (0-X) < cst --> x > -cst + if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) { + Value *X; + if (match(BO0, m_Neg(m_Value(X)))) + if (ConstantInt *RHSC = dyn_cast(Op1)) + if (!RHSC->isMinValue(/*isSigned=*/true)) + return new ICmpInst(I.getSwappedPredicate(), X, + ConstantExpr::getNeg(RHSC)); + } + + BinaryOperator *SRem = nullptr; // icmp (srem X, Y), Y if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1)) @@ -2877,6 +3119,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { (Op0 == A || Op0 == B)) if (Instruction *R = ProcessUAddIdiom(I, Op1, *this)) return R; + + // (zext a) * (zext b) --> llvm.umul.with.overflow. + if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { + if (Instruction *R = ProcessUMulZExtIdiom(I, Op0, Op1, *this)) + return R; + } + if (match(Op1, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) { + if (Instruction *R = ProcessUMulZExtIdiom(I, Op1, Op0, *this)) + return R; + } } if (I.isEquality()) { @@ -2918,7 +3170,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { - Value *X = 0, *Y = 0, *Z = 0; + Value *X = nullptr, *Y = nullptr, *Z = nullptr; if (A == C) { X = B; Y = D; Z = A; @@ -3009,7 +3261,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X) return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate()); } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } /// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible. @@ -3017,13 +3269,13 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI, Constant *RHSC) { - if (!isa(RHSC)) return 0; + if (!isa(RHSC)) return nullptr; const APFloat &RHS = cast(RHSC)->getValueAPF(); // Get the width of the mantissa. We don't want to hack on conversions that // might lose information from the integer, e.g. "i64 -> float" int MantissaWidth = LHSI->getType()->getFPMantissaWidth(); - if (MantissaWidth == -1) return 0; // Unknown. + if (MantissaWidth == -1) return nullptr; // Unknown. // Check to see that the input is converted from an integer type that is small // enough that preserves all bits. TODO: check here for "known" sign bits. @@ -3037,7 +3289,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I, // If the conversion would lose info, don't hack on this. if ((int)InputSize > MantissaWidth) - return 0; + return nullptr; // Otherwise, we can potentially simplify the comparison. We know that it // will always come through as an integer value and we know the constant is @@ -3383,5 +3635,5 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), RHSExt->getOperand(0)); - return Changed ? &I : 0; + return Changed ? &I : nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index dcc8b0f..66d0938 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -20,6 +20,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "instcombine" + STATISTIC(NumDeadStore, "Number of dead stores eliminated"); STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); @@ -29,10 +31,13 @@ STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); static bool pointsToConstantGlobal(Value *V) { if (GlobalVariable *GV = dyn_cast(V)) return GV->isConstant(); - if (ConstantExpr *CE = dyn_cast(V)) + + if (ConstantExpr *CE = dyn_cast(V)) { if (CE->getOpcode() == Instruction::BitCast || + CE->getOpcode() == Instruction::AddrSpaceCast || CE->getOpcode() == Instruction::GetElementPtr) return pointsToConstantGlobal(CE->getOperand(0)); + } return false; } @@ -60,9 +65,9 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, continue; } - if (BitCastInst *BCI = dyn_cast(I)) { + if (isa(I) || isa(I)) { // If uses of the bitcast are ok, we are ok. - if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset)) + if (!isOnlyCopiedFromConstantGlobal(I, TheCopy, ToDelete, IsOffset)) return false; continue; } @@ -112,7 +117,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, // If this is isn't our memcpy/memmove, reject it as something we can't // handle. MemTransferInst *MI = dyn_cast(I); - if (MI == 0) + if (!MI) return false; // If the transfer is using the alloca as a source of the transfer, then @@ -148,10 +153,10 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, static MemTransferInst * isOnlyCopiedFromConstantGlobal(AllocaInst *AI, SmallVectorImpl &ToDelete) { - MemTransferInst *TheCopy = 0; + MemTransferInst *TheCopy = nullptr; if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) return TheCopy; - return 0; + return nullptr; } Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { @@ -172,7 +177,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { if (const ConstantInt *C = dyn_cast(AI.getArraySize())) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); + AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName()); New->setAlignment(AI.getAlignment()); // Scan to the end of the allocation instructions, to skip over a block of @@ -295,7 +300,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, // If the address spaces don't match, don't eliminate the cast. if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) - return 0; + return nullptr; Type *SrcPTy = SrcTy->getElementType(); @@ -346,7 +351,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, } } } - return 0; + return nullptr; } Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { @@ -373,7 +378,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { // None of the following transforms are legal for volatile/atomic loads. // FIXME: Some of it is okay for atomic loads; needs refactoring. - if (!LI.isSimple()) return 0; + if (!LI.isSimple()) return nullptr; // Do really simple store-to-load forwarding and load CSE, to catch cases // where there are several consecutive memory accesses to the same location, @@ -455,7 +460,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { } } } - return 0; + return nullptr; } /// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P @@ -467,12 +472,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { Type *DestPTy = cast(CI->getType())->getElementType(); PointerType *SrcTy = dyn_cast(CastOp->getType()); - if (SrcTy == 0) return 0; + if (!SrcTy) return nullptr; Type *SrcPTy = SrcTy->getElementType(); if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) - return 0; + return nullptr; /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" /// to its first element. This allows us to handle things like: @@ -506,20 +511,20 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { } if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) - return 0; + return nullptr; // If the pointers point into different address spaces don't do the // transformation. if (SrcTy->getAddressSpace() != cast(CI->getType())->getAddressSpace()) - return 0; + return nullptr; // If the pointers point to values of different sizes don't do the // transformation. if (!IC.getDataLayout() || IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != IC.getDataLayout()->getTypeSizeInBits(DestPTy)) - return 0; + return nullptr; // If the pointers point to pointers to different address spaces don't do the // transformation. It is not safe to introduce an addrspacecast instruction in @@ -527,7 +532,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { // cast. if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() && SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace()) - return 0; + return nullptr; // Okay, we are casting from one integer or pointer type to another of // the same size. Instead of casting the pointer before @@ -607,7 +612,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { // Don't hack volatile/atomic stores. // FIXME: Some bits are legal for atomic stores; needs refactoring. - if (!SI.isSimple()) return 0; + if (!SI.isSimple()) return nullptr; // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. @@ -674,7 +679,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (Instruction *U = dyn_cast(Val)) Worklist.Add(U); // Dropped a use. } - return 0; // Do not modify these! + return nullptr; // Do not modify these! } // store undef, Ptr -> noop @@ -703,9 +708,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (BranchInst *BI = dyn_cast(BBI)) if (BI->isUnconditional()) if (SimplifyStoreAtEndOfBlock(SI)) - return 0; // xform done! + return nullptr; // xform done! - return 0; + return nullptr; } /// SimplifyStoreAtEndOfBlock - Turn things like: @@ -728,7 +733,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { // the other predecessor. pred_iterator PI = pred_begin(DestBB); BasicBlock *P = *PI; - BasicBlock *OtherBB = 0; + BasicBlock *OtherBB = nullptr; if (P != StoreBB) OtherBB = P; @@ -758,7 +763,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { // If the other block ends in an unconditional branch, check for the 'if then // else' case. there is an instruction before the branch. - StoreInst *OtherStore = 0; + StoreInst *OtherStore = nullptr; if (OtherBr->isUnconditional()) { --BBI; // Skip over debugging info. diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 71fbb6c..9996ebc 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -19,6 +19,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + /// simplifyValueKnownNonZero - The specific integer value is used in a context /// where it is known to be non-zero. If this allows us to simplify the @@ -27,13 +29,13 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { // If V has multiple uses, then we would have to do more analysis to determine // if this is safe. For example, the use could be in dynamically unreached // code. - if (!V->hasOneUse()) return 0; + if (!V->hasOneUse()) return nullptr; bool MadeChange = false; // ((1 << A) >>u B) --> (1 << (A-B)) // Because V cannot be zero, we know that B is less than A. - Value *A = 0, *B = 0, *PowerOf2 = 0; + Value *A = nullptr, *B = nullptr, *PowerOf2 = nullptr; if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))), m_Value(B))) && // The "1" can be any value known to be a power of 2. @@ -68,7 +70,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) { // If V is a phi node, we can call this on each of its operands. // "select cond, X, 0" can simplify to "X". - return MadeChange ? V : 0; + return MadeChange ? V : nullptr; } @@ -107,7 +109,7 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) { for (unsigned I = 0, E = CV->getNumElements(); I != E; ++I) { Constant *Elt = CV->getElementAsConstant(I); if (!match(Elt, m_APInt(IVal)) || !IVal->isPowerOf2()) - return 0; + return nullptr; Elts.push_back(ConstantInt::get(Elt->getType(), IVal->logBase2())); } @@ -118,6 +120,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyMulInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -139,7 +144,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { return BinaryOperator::CreateMul(NewOp, ConstantExpr::getShl(C1, C2)); if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) { - Constant *NewCst = 0; + Constant *NewCst = nullptr; if (match(C1, m_APInt(IVal)) && IVal->isPowerOf2()) // Replace X*(2^C) with X << C, where C is either a scalar or a splat. NewCst = ConstantInt::get(NewOp->getType(), IVal->logBase2()); @@ -165,10 +170,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { const APInt & Val = CI->getValue(); const APInt &PosVal = Val.abs(); if (Val.isNegative() && PosVal.isPowerOf2()) { - Value *X = 0, *Y = 0; + Value *X = nullptr, *Y = nullptr; if (Op0->hasOneUse()) { ConstantInt *C1; - Value *Sub = 0; + Value *Sub = nullptr; if (match(Op0, m_Sub(m_Value(Y), m_Value(X)))) Sub = Builder->CreateSub(X, Y, "suba"); else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1)))) @@ -268,7 +273,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { // -2 is "-1 << 1" so it is all bits set except the low one. APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true); - Value *BoolCast = 0, *OtherOp = 0; + Value *BoolCast = nullptr, *OtherOp = nullptr; if (MaskedValueIsZero(Op0, Negative2)) BoolCast = Op0, OtherOp = Op1; else if (MaskedValueIsZero(Op1, Negative2)) @@ -281,7 +286,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { } } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } // @@ -384,7 +389,7 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, Constant *C, Constant *C0 = dyn_cast(Opnd0); Constant *C1 = dyn_cast(Opnd1); - BinaryOperator *R = 0; + BinaryOperator *R = nullptr; // (X * C0) * C => X * (C0*C) if (FMulOrDiv->getOpcode() == Instruction::FMul) { @@ -426,6 +431,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { bool Changed = SimplifyAssociativeOrCommutative(I); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (isa(Op0)) std::swap(Op0, Op1); @@ -483,7 +491,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { Value *M1 = ConstantExpr::getFMul(C1, C); Value *M0 = isNormalFp(cast(M1)) ? foldFMulConst(cast(Opnd0), C, &I) : - 0; + nullptr; if (M0 && M1) { if (Swap && FAddSub->getOpcode() == Instruction::FSub) std::swap(M0, M1); @@ -503,8 +511,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { // Under unsafe algebra do: // X * log2(0.5*Y) = X*log2(Y) - X if (I.hasUnsafeAlgebra()) { - Value *OpX = NULL; - Value *OpY = NULL; + Value *OpX = nullptr; + Value *OpY = nullptr; IntrinsicInst *Log2; detectLog2OfHalf(Op0, OpY, Log2); if (OpY) { @@ -567,7 +575,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { Value *Opnd0_0, *Opnd0_1; if (Opnd0->hasOneUse() && match(Opnd0, m_FMul(m_Value(Opnd0_0), m_Value(Opnd0_1)))) { - Value *Y = 0; + Value *Y = nullptr; if (Opnd0_0 == Opnd1 && Opnd0_1 != Opnd1) Y = Opnd0_1; else if (Opnd0_1 == Opnd1 && Opnd0_0 != Opnd1) @@ -621,7 +629,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) { break; } - return Changed ? &I : 0; + return Changed ? &I : nullptr; } /// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select @@ -682,12 +690,12 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { // If we past the instruction, quit looking for it. if (&*BBI == SI) - SI = 0; + SI = nullptr; if (&*BBI == SelectCond) - SelectCond = 0; + SelectCond = nullptr; // If we ran out of things to eliminate, break out of the loop. - if (SelectCond == 0 && SI == 0) + if (!SelectCond && !SI) break; } @@ -719,7 +727,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode()) if (ConstantInt *LHSRHS = dyn_cast(LHS->getOperand(1))) { if (MultiplyOverflows(RHS, LHSRHS, - I.getOpcode()==Instruction::SDiv)) + I.getOpcode() == Instruction::SDiv)) return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0), ConstantExpr::getMul(RHS, LHSRHS)); @@ -735,12 +743,31 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { } } + if (ConstantInt *One = dyn_cast(Op0)) { + if (One->isOne() && !I.getType()->isIntegerTy(1)) { + bool isSigned = I.getOpcode() == Instruction::SDiv; + if (isSigned) { + // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the + // result is one, if Op1 is -1 then the result is minus one, otherwise + // it's zero. + Value *Inc = Builder->CreateAdd(Op1, One); + Value *Cmp = Builder->CreateICmpULT( + Inc, ConstantInt::get(I.getType(), 3)); + return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0)); + } else { + // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the + // result is one, otherwise it's zero. + return new ZExtInst(Builder->CreateICmpEQ(Op1, One), I.getType()); + } + } + } + // See if we can fold away this div instruction. if (SimplifyDemandedInstructionBits(I)) return &I; // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y - Value *X = 0, *Z = 0; + Value *X = nullptr, *Z = nullptr; if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1 bool isSigned = I.getOpcode() == Instruction::SDiv; if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) || @@ -748,7 +775,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { return BinaryOperator::Create(I.getOpcode(), X, Op1); } - return 0; + return nullptr; } /// dyn_castZExtVal - Checks if V is a zext or constant that can @@ -761,7 +788,7 @@ static Value *dyn_castZExtVal(Value *V, Type *Ty) { if (C->getValue().getActiveBits() <= cast(Ty)->getBitWidth()) return ConstantExpr::getTrunc(C, Ty); } - return 0; + return nullptr; } namespace { @@ -786,7 +813,7 @@ struct UDivFoldAction { }; UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand) - : FoldAction(FA), OperandToFold(InputOperand), FoldResult(0) {} + : FoldAction(FA), OperandToFold(InputOperand), FoldResult(nullptr) {} UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand, size_t SLHS) : FoldAction(FA), OperandToFold(InputOperand), SelectLHSIdx(SLHS) {} }; @@ -865,7 +892,8 @@ static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I, if (SelectInst *SI = dyn_cast(Op1)) if (size_t LHSIdx = visitUDivOperand(Op0, SI->getOperand(1), I, Actions)) if (visitUDivOperand(Op0, SI->getOperand(2), I, Actions)) { - Actions.push_back(UDivFoldAction((FoldUDivOperandCb)0, Op1, LHSIdx-1)); + Actions.push_back(UDivFoldAction((FoldUDivOperandCb)nullptr, Op1, + LHSIdx-1)); return Actions.size(); } @@ -875,6 +903,9 @@ static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I, Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyUDivInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -928,12 +959,15 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { return Inst; } - return 0; + return nullptr; } Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifySDivInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -983,7 +1017,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { } } - return 0; + return nullptr; } /// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special @@ -997,7 +1031,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend, Constant *Divisor, bool AllowReciprocal) { if (!isa(Divisor)) // TODO: handle vectors. - return 0; + return nullptr; const APFloat &FpVal = cast(Divisor)->getValueAPF(); APFloat Reciprocal(FpVal.getSemantics()); @@ -1010,7 +1044,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend, } if (!Cvt) - return 0; + return nullptr; ConstantFP *R; R = ConstantFP::get(Dividend->getType()->getContext(), Reciprocal); @@ -1020,6 +1054,9 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend, Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyFDivInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1037,10 +1074,10 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { return R; if (AllowReassociate) { - Constant *C1 = 0; + Constant *C1 = nullptr; Constant *C2 = Op1C; Value *X; - Instruction *Res = 0; + Instruction *Res = nullptr; if (match(Op0, m_FMul(m_Value(X), m_Constant(C1)))) { // (X*C1)/C2 => X * (C1/C2) @@ -1071,12 +1108,12 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { return T; } - return 0; + return nullptr; } if (AllowReassociate && isa(Op0)) { Constant *C1 = cast(Op0), *C2; - Constant *Fold = 0; + Constant *Fold = nullptr; Value *X; bool CreateDiv = true; @@ -1098,13 +1135,13 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { R->setFastMathFlags(I.getFastMathFlags()); return R; } - return 0; + return nullptr; } if (AllowReassociate) { Value *X, *Y; - Value *NewInst = 0; - Instruction *SimpR = 0; + Value *NewInst = nullptr; + Instruction *SimpR = nullptr; if (Op0->hasOneUse() && match(Op0, m_FDiv(m_Value(X), m_Value(Y)))) { // (X/Y) / Z => X / (Y*Z) @@ -1140,7 +1177,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { } } - return 0; + return nullptr; } /// This function implements the transforms common to both integer remainder @@ -1176,12 +1213,15 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { } } - return 0; + return nullptr; } Instruction *InstCombiner::visitURem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyURemInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1208,12 +1248,15 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { return ReplaceInstUsesWith(I, Ext); } - return 0; + return nullptr; } Instruction *InstCombiner::visitSRem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifySRemInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1250,7 +1293,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { bool hasMissing = false; for (unsigned i = 0; i != VWidth; ++i) { Constant *Elt = C->getAggregateElement(i); - if (Elt == 0) { + if (!Elt) { hasMissing = true; break; } @@ -1279,12 +1322,15 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { } } - return 0; + return nullptr; } Instruction *InstCombiner::visitFRem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyFRemInst(Op0, Op1, DL)) return ReplaceInstUsesWith(I, V); @@ -1292,5 +1338,5 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) { if (isa(Op1) && SimplifyDivRemOfSelect(I)) return &I; - return 0; + return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 0ab657a..46f7b8a 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -18,6 +18,8 @@ #include "llvm/IR/DataLayout.h" using namespace llvm; +#define DEBUG_TYPE "instcombine" + /// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)] /// and if a/b/c and the add's all have a single use, turn this into a phi /// and a single binop. @@ -48,12 +50,12 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { // types. I->getOperand(0)->getType() != LHSType || I->getOperand(1)->getType() != RHSType) - return 0; + return nullptr; // If they are CmpInst instructions, check their predicates if (CmpInst *CI = dyn_cast(I)) if (CI->getPredicate() != cast(FirstInst)->getPredicate()) - return 0; + return nullptr; if (isNUW) isNUW = cast(I)->hasNoUnsignedWrap(); @@ -63,8 +65,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { isExact = cast(I)->isExact(); // Keep track of which operand needs a phi node. - if (I->getOperand(0) != LHSVal) LHSVal = 0; - if (I->getOperand(1) != RHSVal) RHSVal = 0; + if (I->getOperand(0) != LHSVal) LHSVal = nullptr; + if (I->getOperand(1) != RHSVal) RHSVal = nullptr; } // If both LHS and RHS would need a PHI, don't do this transformation, @@ -72,14 +74,14 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { // which leads to higher register pressure. This is especially // bad when the PHIs are in the header of a loop. if (!LHSVal && !RHSVal) - return 0; + return nullptr; // Otherwise, this is safe to transform! Value *InLHS = FirstInst->getOperand(0); Value *InRHS = FirstInst->getOperand(1); - PHINode *NewLHS = 0, *NewRHS = 0; - if (LHSVal == 0) { + PHINode *NewLHS = nullptr, *NewRHS = nullptr; + if (!LHSVal) { NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(), FirstInst->getOperand(0)->getName() + ".pn"); NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); @@ -87,7 +89,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { LHSVal = NewLHS; } - if (RHSVal == 0) { + if (!RHSVal) { NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(), FirstInst->getOperand(1)->getName() + ".pn"); NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); @@ -148,7 +150,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { GetElementPtrInst *GEP= dyn_cast(PN.getIncomingValue(i)); if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() || GEP->getNumOperands() != FirstInst->getNumOperands()) - return 0; + return nullptr; AllInBounds &= GEP->isInBounds(); @@ -170,19 +172,19 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { // for struct indices, which must always be constant. if (isa(FirstInst->getOperand(op)) || isa(GEP->getOperand(op))) - return 0; + return nullptr; if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType()) - return 0; + return nullptr; // If we already needed a PHI for an earlier operand, and another operand // also requires a PHI, we'd be introducing more PHIs than we're // eliminating, which increases register pressure on entry to the PHI's // block. if (NeededPhi) - return 0; + return nullptr; - FixedOperands[op] = 0; // Needs a PHI. + FixedOperands[op] = nullptr; // Needs a PHI. NeededPhi = true; } } @@ -194,7 +196,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { // load up into the predecessors so that we have a load of a gep of an alloca, // which can usually all be folded into the load. if (AllBasePointersAreAllocas) - return 0; + return nullptr; // Otherwise, this is safe to transform. Insert PHI nodes for each operand // that is variable. @@ -288,7 +290,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // FIXME: This is overconservative; this transform is allowed in some cases // for atomic operations. if (FirstLI->isAtomic()) - return 0; + return nullptr; // When processing loads, we need to propagate two bits of information to the // sunk load: whether it is volatile, and what its alignment is. We currently @@ -303,20 +305,20 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // load and the PHI. if (FirstLI->getParent() != PN.getIncomingBlock(0) || !isSafeAndProfitableToSinkLoad(FirstLI)) - return 0; + return nullptr; // If the PHI is of volatile loads and the load block has multiple // successors, sinking it would remove a load of the volatile value from // the path through the other successor. if (isVolatile && FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1) - return 0; + return nullptr; // Check to see if all arguments are the same operation. for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { LoadInst *LI = dyn_cast(PN.getIncomingValue(i)); if (!LI || !LI->hasOneUse()) - return 0; + return nullptr; // We can't sink the load if the loaded value could be modified between // the load and the PHI. @@ -324,12 +326,12 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { LI->getParent() != PN.getIncomingBlock(i) || LI->getPointerAddressSpace() != LoadAddrSpace || !isSafeAndProfitableToSinkLoad(LI)) - return 0; + return nullptr; // If some of the loads have an alignment specified but not all of them, // we can't do the transformation. if ((LoadAlignment != 0) != (LI->getAlignment() != 0)) - return 0; + return nullptr; LoadAlignment = std::min(LoadAlignment, LI->getAlignment()); @@ -338,7 +340,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // the path through the other successor. if (isVolatile && LI->getParent()->getTerminator()->getNumSuccessors() != 1) - return 0; + return nullptr; } // Okay, they are all the same operation. Create a new PHI node of the @@ -354,7 +356,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { Value *NewInVal = cast(PN.getIncomingValue(i))->getOperand(0); if (NewInVal != InVal) - InVal = 0; + InVal = nullptr; NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); } @@ -398,8 +400,8 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { // If all input operands to the phi are the same instruction (e.g. a cast from // the same type or "+42") we can pull the operation through the PHI, reducing // code size and simplifying code. - Constant *ConstantOp = 0; - Type *CastSrcTy = 0; + Constant *ConstantOp = nullptr; + Type *CastSrcTy = nullptr; bool isNUW = false, isNSW = false, isExact = false; if (isa(FirstInst)) { @@ -409,13 +411,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { // the code by turning an i32 into an i1293. if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) { if (!ShouldChangeType(PN.getType(), CastSrcTy)) - return 0; + return nullptr; } } else if (isa(FirstInst) || isa(FirstInst)) { // Can fold binop, compare or shift here if the RHS is a constant, // otherwise call FoldPHIArgBinOpIntoPHI. ConstantOp = dyn_cast(FirstInst->getOperand(1)); - if (ConstantOp == 0) + if (!ConstantOp) return FoldPHIArgBinOpIntoPHI(PN); if (OverflowingBinaryOperator *BO = @@ -426,19 +428,19 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { dyn_cast(FirstInst)) isExact = PEO->isExact(); } else { - return 0; // Cannot fold this operation. + return nullptr; // Cannot fold this operation. } // Check to see if all arguments are the same operation. for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { Instruction *I = dyn_cast(PN.getIncomingValue(i)); - if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst)) - return 0; + if (!I || !I->hasOneUse() || !I->isSameOperationAs(FirstInst)) + return nullptr; if (CastSrcTy) { if (I->getOperand(0)->getType() != CastSrcTy) - return 0; // Cast operation must match. + return nullptr; // Cast operation must match. } else if (I->getOperand(1) != ConstantOp) { - return 0; + return nullptr; } if (isNUW) @@ -462,7 +464,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { Value *NewInVal = cast(PN.getIncomingValue(i))->getOperand(0); if (NewInVal != InVal) - InVal = 0; + InVal = nullptr; NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); } @@ -587,10 +589,10 @@ namespace llvm { template<> struct DenseMapInfo { static inline LoweredPHIRecord getEmptyKey() { - return LoweredPHIRecord(0, 0); + return LoweredPHIRecord(nullptr, 0); } static inline LoweredPHIRecord getTombstoneKey() { - return LoweredPHIRecord(0, 1); + return LoweredPHIRecord(nullptr, 1); } static unsigned getHashValue(const LoweredPHIRecord &Val) { return DenseMapInfo::getHashValue(Val.PN) ^ (Val.Shift>>3) ^ @@ -637,14 +639,14 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // bail out. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { InvokeInst *II = dyn_cast(PN->getIncomingValue(i)); - if (II == 0) continue; + if (!II) continue; if (II->getParent() != PN->getIncomingBlock(i)) continue; // If we have a phi, and if it's directly in the predecessor, then we have // a critical edge where we need to put the truncate. Since we can't // split the edge in instcombine, we have to bail out. - return 0; + return nullptr; } for (User *U : PN->users()) { @@ -667,7 +669,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { if (UserI->getOpcode() != Instruction::LShr || !UserI->hasOneUse() || !isa(UserI->user_back()) || !isa(UserI->getOperand(1))) - return 0; + return nullptr; unsigned Shift = cast(UserI->getOperand(1))->getZExtValue(); PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, UserI->user_back())); @@ -705,7 +707,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // If we've already lowered a user like this, reuse the previously lowered // value. - if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { + if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == nullptr) { // Otherwise, Create the new PHI node for this user. EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(), @@ -894,5 +896,5 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { if (Instruction *Res = SliceUpIllegalIntegerPHI(PN)) return Res; - return 0; + return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index e74d912..9a41e4b 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -18,16 +18,18 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + /// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms, /// returning the kind and providing the out parameter results if we /// successfully match. static SelectPatternFlavor MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { SelectInst *SI = dyn_cast(V); - if (SI == 0) return SPF_UNKNOWN; + if (!SI) return SPF_UNKNOWN; ICmpInst *ICI = dyn_cast(SI->getCondition()); - if (ICI == 0) return SPF_UNKNOWN; + if (!ICI) return SPF_UNKNOWN; LHS = ICI->getOperand(0); RHS = ICI->getOperand(1); @@ -129,15 +131,15 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, if (TI->isCast()) { Type *FIOpndTy = FI->getOperand(0)->getType(); if (TI->getOperand(0)->getType() != FIOpndTy) - return 0; + return nullptr; // The select condition may be a vector. We may only change the operand // type if the vector width remains the same (and matches the condition). Type *CondTy = SI.getCondition()->getType(); if (CondTy->isVectorTy() && (!FIOpndTy->isVectorTy() || CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements())) - return 0; + return nullptr; } else { - return 0; // unknown unary op. + return nullptr; // unknown unary op. } // Fold this by inserting a select from the input values. @@ -149,7 +151,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, // Only handle binary operators here. if (!isa(TI)) - return 0; + return nullptr; // Figure out if the operations have any operands in common. Value *MatchOp, *OtherOpT, *OtherOpF; @@ -165,7 +167,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, OtherOpF = FI->getOperand(0); MatchIsOpZero = false; } else if (!TI->isCommutative()) { - return 0; + return nullptr; } else if (TI->getOperand(0) == FI->getOperand(1)) { MatchOp = TI->getOperand(0); OtherOpT = TI->getOperand(1); @@ -177,7 +179,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI, OtherOpF = FI->getOperand(1); MatchIsOpZero = true; } else { - return 0; + return nullptr; } // If we reach here, they do have operations in common. @@ -282,7 +284,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, } } - return 0; + return nullptr; } /// SimplifyWithOpReplaced - See if V simplifies when its operand Op is @@ -296,7 +298,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, Instruction *I = dyn_cast(V); if (!I) - return 0; + return nullptr; // If this is a binary operator, try to simplify it with the replaced op. if (BinaryOperator *B = dyn_cast(I)) { @@ -347,7 +349,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, } } - return 0; + return nullptr; } /// foldSelectICmpAndOr - We want to turn: @@ -368,18 +370,18 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, InstCombiner::BuilderTy *Builder) { const ICmpInst *IC = dyn_cast(SI.getCondition()); if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy()) - return 0; + return nullptr; Value *CmpLHS = IC->getOperand(0); Value *CmpRHS = IC->getOperand(1); if (!match(CmpRHS, m_Zero())) - return 0; + return nullptr; Value *X; const APInt *C1; if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1)))) - return 0; + return nullptr; const APInt *C2; bool OrOnTrueVal = false; @@ -388,7 +390,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2))); if (!OrOnFalseVal && !OrOnTrueVal) - return 0; + return nullptr; Value *V = CmpLHS; Value *Y = OrOnFalseVal ? TrueVal : FalseVal; @@ -527,7 +529,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, if (IntegerType *Ty = dyn_cast(CmpLHS->getType())) { if (TrueVal->getType() == Ty) { if (ConstantInt *Cmp = dyn_cast(CmpRHS)) { - ConstantInt *C1 = NULL, *C2 = NULL; + ConstantInt *C1 = nullptr, *C2 = nullptr; if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) { C1 = dyn_cast(TrueVal); C2 = dyn_cast(FalseVal); @@ -586,7 +588,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder)) return ReplaceInstUsesWith(SI, V); - return Changed ? &SI : 0; + return Changed ? &SI : nullptr; } @@ -606,7 +608,7 @@ static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V, // If the value is a non-instruction value like a constant or argument, it // can always be mapped. const Instruction *I = dyn_cast(V); - if (I == 0) return true; + if (!I) return true; // If V is a PHI node defined in the same block as the condition PHI, we can // map the arguments. @@ -649,11 +651,35 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, return ReplaceInstUsesWith(Outer, C); } - // TODO: MIN(MIN(A, 23), 97) - return 0; + if (SPF1 == SPF2) { + if (ConstantInt *CB = dyn_cast(B)) { + if (ConstantInt *CC = dyn_cast(C)) { + APInt ACB = CB->getValue(); + APInt ACC = CC->getValue(); + + // MIN(MIN(A, 23), 97) -> MIN(A, 23) + // MAX(MAX(A, 97), 23) -> MAX(A, 97) + if ((SPF1 == SPF_UMIN && ACB.ule(ACC)) || + (SPF1 == SPF_SMIN && ACB.sle(ACC)) || + (SPF1 == SPF_UMAX && ACB.uge(ACC)) || + (SPF1 == SPF_SMAX && ACB.sge(ACC))) + return ReplaceInstUsesWith(Outer, Inner); + + // MIN(MIN(A, 97), 23) -> MIN(A, 23) + // MAX(MAX(A, 23), 97) -> MAX(A, 97) + if ((SPF1 == SPF_UMIN && ACB.ugt(ACC)) || + (SPF1 == SPF_SMIN && ACB.sgt(ACC)) || + (SPF1 == SPF_UMAX && ACB.ult(ACC)) || + (SPF1 == SPF_SMAX && ACB.slt(ACC))) { + Outer.replaceUsesOfWith(Inner, A); + return &Outer; + } + } + } + } + return nullptr; } - /// foldSelectICmpAnd - If one of the constants is zero (we know they can't /// both be) and we have an icmp instruction with zero, and we have an 'and' /// with the non-constant value and a power of two we can turn the select @@ -663,27 +689,27 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, InstCombiner::BuilderTy *Builder) { const ICmpInst *IC = dyn_cast(SI.getCondition()); if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy()) - return 0; + return nullptr; if (!match(IC->getOperand(1), m_Zero())) - return 0; + return nullptr; ConstantInt *AndRHS; Value *LHS = IC->getOperand(0); if (!match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS)))) - return 0; + return nullptr; // If both select arms are non-zero see if we have a select of the form // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic // for 'x ? 2^n : 0' and fix the thing up at the end. - ConstantInt *Offset = 0; + ConstantInt *Offset = nullptr; if (!TrueVal->isZero() && !FalseVal->isZero()) { if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2()) Offset = FalseVal; else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2()) Offset = TrueVal; else - return 0; + return nullptr; // Adjust TrueVal and FalseVal to the offset. TrueVal = ConstantInt::get(Builder->getContext(), @@ -696,7 +722,7 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, if (!AndRHS->getValue().isPowerOf2() || (!TrueVal->getValue().isPowerOf2() && !FalseVal->getValue().isPowerOf2())) - return 0; + return nullptr; // Determine which shift is needed to transform result of the 'and' into the // desired result. @@ -708,7 +734,7 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, // or a trunc of the 'and'. The trunc case requires that all of the truncated // bits are zero, we can figure that out by looking at the 'and' mask. if (AndZeros >= ValC->getBitWidth()) - return 0; + return nullptr; Value *V = Builder->CreateZExtOrTrunc(LHS, SI.getType()); if (ValZeros > AndZeros) @@ -866,7 +892,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (Instruction *TI = dyn_cast(TrueVal)) if (Instruction *FI = dyn_cast(FalseVal)) if (TI->hasOneUse() && FI->hasOneUse()) { - Instruction *AddOp = 0, *SubOp = 0; + Instruction *AddOp = nullptr, *SubOp = nullptr; // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z)) if (TI->getOpcode() == FI->getOpcode()) @@ -888,7 +914,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } if (AddOp) { - Value *OtherAddOp = 0; + Value *OtherAddOp = nullptr; if (SubOp->getOperand(0) == AddOp->getOperand(0)) { OtherAddOp = AddOp->getOperand(1); } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) { @@ -969,7 +995,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (SelectInst *TrueSI = dyn_cast(TrueVal)) { if (TrueSI->getCondition() == CondVal) { if (SI.getTrueValue() == TrueSI->getTrueValue()) - return 0; + return nullptr; SI.setOperand(1, TrueSI->getTrueValue()); return &SI; } @@ -977,7 +1003,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { if (SelectInst *FalseSI = dyn_cast(FalseVal)) { if (FalseSI->getCondition() == CondVal) { if (SI.getFalseValue() == FalseSI->getFalseValue()) - return 0; + return nullptr; SI.setOperand(2, FalseSI->getFalseValue()); return &SI; } @@ -1005,5 +1031,5 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { } } - return 0; + return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 8273dfd..cc6665c 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -19,6 +19,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { assert(I.getOperand(1)->getType() == I.getOperand(0)->getType()); Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -33,7 +35,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { if (Instruction *R = FoldOpIntoSelect(I, SI)) return R; - if (ConstantInt *CUI = dyn_cast(Op1)) + if (Constant *CUI = dyn_cast(Op1)) if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) return Res; @@ -50,7 +52,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { return &I; } - return 0; + return nullptr; } /// CanEvaluateShifted - See if we can compute the specified value, but shifted @@ -78,7 +80,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, // if the needed bits are already zero in the input. This allows us to reuse // the value which means that we don't care if the shift has multiple uses. // TODO: Handle opposite shift by exact value. - ConstantInt *CI = 0; + ConstantInt *CI = nullptr; if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) || (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) { if (CI->getZExtValue() == NumBits) { @@ -115,7 +117,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, case Instruction::Shl: { // We can often fold the shift into shifts-by-a-constant. CI = dyn_cast(I->getOperand(1)); - if (CI == 0) return false; + if (!CI) return false; // We can always fold shl(c1)+shl(c2) -> shl(c1+c2). if (isLeftShift) return true; @@ -139,7 +141,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift, case Instruction::LShr: { // We can often fold the shift into shifts-by-a-constant. CI = dyn_cast(I->getOperand(1)); - if (CI == 0) return false; + if (!CI) return false; // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2). if (!isLeftShift) return true; @@ -309,37 +311,38 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift, -Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, +Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, BinaryOperator &I) { bool isLeftShift = I.getOpcode() == Instruction::Shl; + ConstantInt *COp1 = nullptr; + if (ConstantDataVector *CV = dyn_cast(Op1)) + COp1 = dyn_cast_or_null(CV->getSplatValue()); + else if (ConstantVector *CV = dyn_cast(Op1)) + COp1 = dyn_cast_or_null(CV->getSplatValue()); + else + COp1 = dyn_cast(Op1); + + if (!COp1) + return nullptr; // See if we can propagate this shift into the input, this covers the trivial // cast of lshr(shl(x,c1),c2) as well as other more complex cases. if (I.getOpcode() != Instruction::AShr && - CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) { + CanEvaluateShifted(Op0, COp1->getZExtValue(), isLeftShift, *this)) { DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression" " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n"); return ReplaceInstUsesWith(I, - GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this)); + GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this)); } - // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. uint32_t TypeBits = Op0->getType()->getScalarSizeInBits(); - // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate - // a signed shift. - // - if (Op1->uge(TypeBits)) { - if (I.getOpcode() != Instruction::AShr) - return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType())); - // ashr i32 X, 32 --> ashr i32 X, 31 - I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1)); - return &I; - } + assert(!COp1->uge(TypeBits) && + "Shift over the type width should have been removed already"); // ((X*C1) << C2) == (X * (C1 << C2)) if (BinaryOperator *BO = dyn_cast(Op0)) @@ -367,7 +370,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, if (TrOp && I.isLogicalShift() && TrOp->isShift() && isa(TrOp->getOperand(1))) { // Okay, we'll do this xform. Make the shift of shift. - Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType()); + Constant *ShAmt = ConstantExpr::getZExt(COp1, TrOp->getType()); // (shift2 (shift1 & 0x00FF), c2) Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName()); @@ -384,10 +387,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // shift. We know that it is a logical shift by a constant, so adjust the // mask as appropriate. if (I.getOpcode() == Instruction::Shl) - MaskV <<= Op1->getZExtValue(); + MaskV <<= COp1->getZExtValue(); else { assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift"); - MaskV = MaskV.lshr(Op1->getZExtValue()); + MaskV = MaskV.lshr(COp1->getZExtValue()); } // shift1 & 0x00FF @@ -421,9 +424,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // (X + (Y << C)) Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1, Op0BO->getOperand(1)->getName()); - uint32_t Op1Val = Op1->getLimitedValue(TypeBits); - return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), - APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); + uint32_t Op1Val = COp1->getLimitedValue(TypeBits); + + APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); + Constant *Mask = ConstantInt::get(I.getContext(), Bits); + if (VectorType *VT = dyn_cast(X->getType())) + Mask = ConstantVector::getSplat(VT->getNumElements(), Mask); + return BinaryOperator::CreateAnd(X, Mask); } // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C)) @@ -453,9 +460,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // (X + (Y << C)) Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS, Op0BO->getOperand(0)->getName()); - uint32_t Op1Val = Op1->getLimitedValue(TypeBits); - return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(), - APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val))); + uint32_t Op1Val = COp1->getLimitedValue(TypeBits); + + APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val); + Constant *Mask = ConstantInt::get(I.getContext(), Bits); + if (VectorType *VT = dyn_cast(X->getType())) + Mask = ConstantVector::getSplat(VT->getNumElements(), Mask); + return BinaryOperator::CreateAnd(X, Mask); } // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C) @@ -523,7 +534,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, // Find out if this is a shift of a shift by a constant. BinaryOperator *ShiftOp = dyn_cast(Op0); if (ShiftOp && !ShiftOp->isShift()) - ShiftOp = 0; + ShiftOp = nullptr; if (ShiftOp && isa(ShiftOp->getOperand(1))) { @@ -541,9 +552,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, ConstantInt *ShiftAmt1C = cast(ShiftOp->getOperand(1)); uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits); - uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits); + uint32_t ShiftAmt2 = COp1->getLimitedValue(TypeBits); assert(ShiftAmt2 != 0 && "Should have been simplified earlier"); - if (ShiftAmt1 == 0) return 0; // Will be simplified in the future. + if (ShiftAmt1 == 0) return nullptr; // Will be simplified in the future. Value *X = ShiftOp->getOperand(0); IntegerType *Ty = cast(I.getType()); @@ -671,10 +682,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1, } } } - return 0; + return nullptr; } Instruction *InstCombiner::visitShl(BinaryOperator &I) { + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), DL)) @@ -709,10 +723,13 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { match(I.getOperand(1), m_Constant(C2))) return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A); - return 0; + return nullptr; } Instruction *InstCombiner::visitLShr(BinaryOperator &I) { + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), DL)) return ReplaceInstUsesWith(I, V); @@ -749,10 +766,13 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { } } - return 0; + return nullptr; } Instruction *InstCombiner::visitAShr(BinaryOperator &I) { + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); + if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(), DL)) return ReplaceInstUsesWith(I, V); @@ -805,6 +825,5 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { if (NumSignBits == Op0->getType()->getScalarSizeInBits()) return ReplaceInstUsesWith(I, Op0); - return 0; + return nullptr; } - diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index a47b709..1b42d3d 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// - #include "InstCombine.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/IntrinsicInst.h" @@ -21,6 +20,8 @@ using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "instcombine" + /// ShrinkDemandedConstant - Check to see if the specified operand of the /// specified instruction is a constant integer. If so, check to see if there /// are any bits set in the constant that are not demanded. If so, shrink the @@ -57,7 +58,7 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) { Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, KnownZero, KnownOne, 0); - if (V == 0) return false; + if (!V) return false; if (V == &Inst) return true; ReplaceInstUsesWith(Inst, V); return true; @@ -71,7 +72,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, unsigned Depth) { Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask, KnownZero, KnownOne, Depth); - if (NewVal == 0) return false; + if (!NewVal) return false; U = NewVal; return true; } @@ -101,7 +102,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne, unsigned Depth) { - assert(V != 0 && "Null pointer of Value???"); + assert(V != nullptr && "Null pointer of Value???"); assert(Depth <= 6 && "Limit Search Depth"); uint32_t BitWidth = DemandedMask.getBitWidth(); Type *VTy = V->getType(); @@ -118,33 +119,33 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // We know all of the bits for a constant! KnownOne = CI->getValue() & DemandedMask; KnownZero = ~KnownOne & DemandedMask; - return 0; + return nullptr; } if (isa(V)) { // We know all of the bits for a constant! KnownOne.clearAllBits(); KnownZero = DemandedMask; - return 0; + return nullptr; } KnownZero.clearAllBits(); KnownOne.clearAllBits(); if (DemandedMask == 0) { // Not demanding any bits from V. if (isa(V)) - return 0; + return nullptr; return UndefValue::get(VTy); } if (Depth == 6) // Limit search depth. - return 0; + return nullptr; APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0); Instruction *I = dyn_cast(V); if (!I) { - ComputeMaskedBits(V, KnownZero, KnownOne, Depth); - return 0; // Only analyze instructions. + computeKnownBits(V, KnownZero, KnownOne, Depth); + return nullptr; // Only analyze instructions. } // If there are multiple uses of this value and we aren't at the root, then @@ -157,8 +158,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // this instruction has a simpler value in that context. if (I->getOpcode() == Instruction::And) { // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and' in this @@ -179,8 +180,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // only bits from X or Y are demanded. // If either the LHS or the RHS are One, the result is One. - ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If all of the demanded bits are known zero on one side, return the // other. These bits cannot contribute to the result of the 'or' in this @@ -204,8 +205,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // We can simplify (X^Y) -> X or Y in the user's context if we know that // only bits from X or Y are demanded. - ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If all of the demanded bits are known zero on one side, return the // other. @@ -216,8 +217,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } // Compute the KnownZero/KnownOne bits to simplify things downstream. - ComputeMaskedBits(I, KnownZero, KnownOne, Depth); - return 0; + computeKnownBits(I, KnownZero, KnownOne, Depth); + return nullptr; } // If this is the root being simplified, allow it to have multiple uses, @@ -229,7 +230,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, switch (I->getOpcode()) { default: - ComputeMaskedBits(I, KnownZero, KnownOne, Depth); + computeKnownBits(I, KnownZero, KnownOne, Depth); break; case Instruction::And: // If either the LHS or the RHS are Zero, the result is zero. @@ -409,20 +410,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } case Instruction::BitCast: if (!I->getOperand(0)->getType()->isIntOrIntVectorTy()) - return 0; // vector->int or fp->int? + return nullptr; // vector->int or fp->int? if (VectorType *DstVTy = dyn_cast(I->getType())) { if (VectorType *SrcVTy = dyn_cast(I->getOperand(0)->getType())) { if (DstVTy->getNumElements() != SrcVTy->getNumElements()) // Don't touch a bitcast between vectors of different element counts. - return 0; + return nullptr; } else // Don't touch a scalar-to-vector bitcast. - return 0; + return nullptr; } else if (I->getOperand(0)->getType()->isVectorTy()) // Don't touch a vector-to-scalar bitcast. - return 0; + return nullptr; if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, KnownZero, KnownOne, Depth+1)) @@ -578,9 +579,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, return I; } - // Otherwise just hand the sub off to ComputeMaskedBits to fill in + // Otherwise just hand the sub off to computeKnownBits to fill in // the known zeros and ones. - ComputeMaskedBits(V, KnownZero, KnownOne, Depth); + computeKnownBits(V, KnownZero, KnownOne, Depth); // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known // zero. @@ -751,7 +752,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // remainder is zero. if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); - ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); + computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1); // If it's known zero, our sign bit is also zero. if (LHSKnownZero.isNegative()) KnownZero.setBit(KnownZero.getBitWidth() - 1); @@ -810,10 +811,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, } case Intrinsic::x86_sse42_crc32_64_64: KnownZero = APInt::getHighBitsSet(64, 32); - return 0; + return nullptr; } } - ComputeMaskedBits(V, KnownZero, KnownOne, Depth); + computeKnownBits(V, KnownZero, KnownOne, Depth); break; } @@ -821,7 +822,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // constant. if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) return Constant::getIntegerValue(VTy, KnownOne); - return 0; + return nullptr; } /// Helper routine of SimplifyDemandedUseBits. It tries to simplify @@ -847,13 +848,13 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShlOp1 = cast(Shl->getOperand(1))->getValue(); const APInt &ShrOp1 = cast(Shr->getOperand(1))->getValue(); if (!ShlOp1 || !ShrOp1) - return 0; // Noop. + return nullptr; // Noop. Value *VarX = Shr->getOperand(0); Type *Ty = VarX->getType(); unsigned BitWidth = Ty->getIntegerBitWidth(); if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth)) - return 0; // Undef. + return nullptr; // Undef. unsigned ShlAmt = ShlOp1.getZExtValue(); unsigned ShrAmt = ShrOp1.getZExtValue(); @@ -882,7 +883,7 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, return VarX; if (!Shr->hasOneUse()) - return 0; + return nullptr; BinaryOperator *New; if (ShrAmt < ShlAmt) { @@ -902,7 +903,7 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, return InsertNewInstWith(New, *Shl); } - return 0; + return nullptr; } /// SimplifyDemandedVectorElts - The specified value produces a vector with @@ -923,7 +924,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (isa(V)) { // If the entire vector is undefined, just return this info. UndefElts = EltMask; - return 0; + return nullptr; } if (DemandedElts == 0) { // If nothing is demanded, provide undef. @@ -938,7 +939,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // Check if this is identity. If so, return 0 since we are not simplifying // anything. if (DemandedElts.isAllOnesValue()) - return 0; + return nullptr; Type *EltTy = cast(V->getType())->getElementType(); Constant *Undef = UndefValue::get(EltTy); @@ -952,7 +953,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, } Constant *Elt = C->getAggregateElement(i); - if (Elt == 0) return 0; + if (!Elt) return nullptr; if (isa(Elt)) { // Already undef. Elts.push_back(Undef); @@ -964,12 +965,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // If we changed the constant, return it. Constant *NewCV = ConstantVector::get(Elts); - return NewCV != C ? NewCV : 0; + return NewCV != C ? NewCV : nullptr; } // Limit search depth. if (Depth == 10) - return 0; + return nullptr; // If multiple users are using the root value, proceed with // simplification conservatively assuming that all elements @@ -980,14 +981,14 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // the main instcombine process. if (Depth != 0) // TODO: Just compute the UndefElts information recursively. - return 0; + return nullptr; // Conservatively assume that all elements are needed. DemandedElts = EltMask; } Instruction *I = dyn_cast(V); - if (!I) return 0; // Only analyze instructions. + if (!I) return nullptr; // Only analyze instructions. bool MadeChange = false; APInt UndefElts2(VWidth, 0); @@ -999,7 +1000,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, // If this is a variable index, we don't know which element it overwrites. // demand exactly the same input as we produce. ConstantInt *Idx = dyn_cast(I->getOperand(2)); - if (Idx == 0) { + if (!Idx) { // Note that we can't propagate undef elt info, because we don't know // which elt is getting updated. TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts, @@ -1281,5 +1282,5 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, break; } } - return MadeChange ? I : 0; + return MadeChange ? I : nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 521dc9c..8c5e202 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -17,6 +17,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "instcombine" + /// CheapToScalarize - Return true if the value is cheaper to scalarize than it /// is to leave as a vector operation. isConstant indicates whether we're /// extracting one known element. If false we're extracting a variable index. @@ -73,7 +75,7 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { if (InsertElementInst *III = dyn_cast(V)) { // If this is an insert to a variable element, we don't know what it is. if (!isa(III->getOperand(2))) - return 0; + return nullptr; unsigned IIElt = cast(III->getOperand(2))->getZExtValue(); // If this is an insert to the element we are looking for, return the @@ -97,14 +99,14 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { } // Extract a value from a vector add operation with a constant zero. - Value *Val = 0; Constant *Con = 0; + Value *Val = nullptr; Constant *Con = nullptr; if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) { if (Con->getAggregateElement(EltNo)->isNullValue()) return FindScalarElement(Val, EltNo); } // Otherwise, we don't know. - return 0; + return nullptr; } // If we have a PHI node with a vector type that has only 2 uses: feed @@ -113,7 +115,7 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { // Verify that the PHI node has exactly 2 uses. Otherwise return NULL. if (!PN->hasNUses(2)) - return NULL; + return nullptr; // If so, it's known at this point that one operand is PHI and the other is // an extractelement node. Find the PHI user that is not the extractelement @@ -128,7 +130,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { // otherwise return NULL. if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) || !(isa(PHIUser)) || !CheapToScalarize(PHIUser, true)) - return NULL; + return nullptr; // Create a scalar PHI node that will replace the vector PHI node // just before the current PHI node. @@ -318,7 +320,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { } } } - return 0; + return nullptr; } /// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns @@ -440,10 +442,10 @@ static ShuffleOps CollectShuffleElements(Value *V, // Either the extracted from or inserted into vector must be RHSVec, // otherwise we'd end up with a shuffle of three inputs. - if (EI->getOperand(0) == PermittedRHS || PermittedRHS == 0) { + if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) { Value *RHS = EI->getOperand(0); ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS); - assert(LR.second == 0 || LR.second == RHS); + assert(LR.second == nullptr || LR.second == RHS); if (LR.first->getType() != RHS->getType()) { // We tried our best, but we can't find anything compatible with RHS @@ -488,6 +490,41 @@ static ShuffleOps CollectShuffleElements(Value *V, return std::make_pair(V, nullptr); } +/// Try to find redundant insertvalue instructions, like the following ones: +/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0 +/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0 +/// Here the second instruction inserts values at the same indices, as the +/// first one, making the first one redundant. +/// It should be transformed to: +/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0 +Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) { + bool IsRedundant = false; + ArrayRef FirstIndices = I.getIndices(); + + // If there is a chain of insertvalue instructions (each of them except the + // last one has only one use and it's another insertvalue insn from this + // chain), check if any of the 'children' uses the same indices as the first + // instruction. In this case, the first one is redundant. + Value *V = &I; + unsigned Depth = 0; + while (V->hasOneUse() && Depth < 10) { + User *U = V->user_back(); + auto UserInsInst = dyn_cast(U); + if (!UserInsInst || U->getOperand(0) != V) + break; + if (UserInsInst->getIndices() == FirstIndices) { + IsRedundant = true; + break; + } + V = UserInsInst; + Depth++; + } + + if (IsRedundant) + return ReplaceInstUsesWith(I, I.getOperand(0)); + return nullptr; +} + Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { Value *VecOp = IE.getOperand(0); Value *ScalarOp = IE.getOperand(1); @@ -523,13 +560,14 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { // (and any insertelements it points to), into one big shuffle. if (!IE.hasOneUse() || !isa(IE.user_back())) { SmallVector Mask; - ShuffleOps LR = CollectShuffleElements(&IE, Mask, 0); + ShuffleOps LR = CollectShuffleElements(&IE, Mask, nullptr); // The proposed shuffle may be trivial, in which case we shouldn't // perform the combine. if (LR.first != &IE && LR.second != &IE) { // We now have a shuffle of LHS, RHS, Mask. - if (LR.second == 0) LR.second = UndefValue::get(LR.first->getType()); + if (LR.second == nullptr) + LR.second = UndefValue::get(LR.first->getType()); return new ShuffleVectorInst(LR.first, LR.second, ConstantVector::get(Mask)); } @@ -546,7 +584,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { return &IE; } - return 0; + return nullptr; } /// Return true if we can evaluate the specified expression tree if the vector @@ -801,6 +839,20 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef Mask) { llvm_unreachable("failed to reorder elements of vector instruction!"); } +static void RecognizeIdentityMask(const SmallVectorImpl &Mask, + bool &isLHSID, bool &isRHSID) { + isLHSID = isRHSID = true; + + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] < 0) continue; // Ignore undef values. + // Is this an identity shuffle of the LHS value? + isLHSID &= (Mask[i] == (int)i); + + // Is this an identity shuffle of the RHS value? + isRHSID &= (Mask[i]-e == i); + } +} + Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); @@ -864,16 +916,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (VWidth == LHSWidth) { // Analyze the shuffle, are the LHS or RHS and identity shuffles? - bool isLHSID = true, isRHSID = true; - - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] < 0) continue; // Ignore undef values. - // Is this an identity shuffle of the LHS value? - isLHSID &= (Mask[i] == (int)i); - - // Is this an identity shuffle of the RHS value? - isRHSID &= (Mask[i]-e == i); - } + bool isLHSID, isRHSID; + RecognizeIdentityMask(Mask, isLHSID, isRHSID); // Eliminate identity shuffles. if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); @@ -932,16 +976,16 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { ShuffleVectorInst* RHSShuffle = dyn_cast(RHS); if (LHSShuffle) if (!isa(LHSShuffle->getOperand(1)) && !isa(RHS)) - LHSShuffle = NULL; + LHSShuffle = nullptr; if (RHSShuffle) if (!isa(RHSShuffle->getOperand(1))) - RHSShuffle = NULL; + RHSShuffle = nullptr; if (!LHSShuffle && !RHSShuffle) - return MadeChange ? &SVI : 0; + return MadeChange ? &SVI : nullptr; - Value* LHSOp0 = NULL; - Value* LHSOp1 = NULL; - Value* RHSOp0 = NULL; + Value* LHSOp0 = nullptr; + Value* LHSOp1 = nullptr; + Value* RHSOp0 = nullptr; unsigned LHSOp0Width = 0; unsigned RHSOp0Width = 0; if (LHSShuffle) { @@ -973,11 +1017,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // case 4 if (LHSOp0 == RHSOp0) { newLHS = LHSOp0; - newRHS = NULL; + newRHS = nullptr; } if (newLHS == LHS && newRHS == RHS) - return MadeChange ? &SVI : 0; + return MadeChange ? &SVI : nullptr; SmallVector LHSMask; SmallVector RHSMask; @@ -1037,7 +1081,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { // If newRHS == newLHS, we want to remap any references from newRHS to // newLHS so that we can properly identify splats that may occur due to // obfuscation across the two vectors. - if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS) + if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS) eltMask += newLHSWidth; } @@ -1063,10 +1107,17 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Elts.push_back(ConstantInt::get(Int32Ty, newMask[i])); } } - if (newRHS == NULL) + if (!newRHS) newRHS = UndefValue::get(newLHS->getType()); return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts)); } - return MadeChange ? &SVI : 0; + // If the result mask is an identity, replace uses of this instruction with + // corresponding argument. + bool isLHSID, isRHSID; + RecognizeIdentityMask(newMask, isLHSID, isRHSID); + if (isLHSID && VWidth == LHSOp0Width) return ReplaceInstUsesWith(SVI, newLHS); + if (isRHSID && VWidth == RHSOp0Width) return ReplaceInstUsesWith(SVI, newRHS); + + return MadeChange ? &SVI : nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h index 8c780b5..1ab7db3 100644 --- a/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -10,7 +10,6 @@ #ifndef INSTCOMBINE_WORKLIST_H #define INSTCOMBINE_WORKLIST_H -#define DEBUG_TYPE "instcombine" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Instruction.h" @@ -18,6 +17,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#define DEBUG_TYPE "instcombine" + namespace llvm { /// InstCombineWorklist - This is the worklist management logic for @@ -68,7 +69,7 @@ public: if (It == WorklistMap.end()) return; // Not in worklist. // Don't bother moving everything down, just null out the slot. - Worklist[It->second] = 0; + Worklist[It->second] = nullptr; WorklistMap.erase(It); } @@ -101,4 +102,6 @@ public: } // end namespace llvm. +#undef DEBUG_TYPE + #endif diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 0cab81b..4c36887 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -33,7 +33,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "instcombine" #include "llvm/Transforms/Scalar.h" #include "InstCombine.h" #include "llvm-c/Initialization.h" @@ -58,6 +57,8 @@ using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "instcombine" + STATISTIC(NumCombined , "Number of insts combined"); STATISTIC(NumConstProp, "Number of constant folds"); STATISTIC(NumDeadInst , "Number of dead inst eliminated"); @@ -512,7 +513,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { } } - return 0; + return nullptr; } // dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction @@ -530,7 +531,7 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const { if (C->getType()->getElementType()->isIntegerTy()) return ConstantExpr::getNeg(C); - return 0; + return nullptr; } // dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the @@ -549,7 +550,7 @@ Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const { if (C->getType()->getElementType()->isFloatingPointTy()) return ConstantExpr::getFNeg(C); - return 0; + return nullptr; } static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, @@ -595,13 +596,13 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO, // not have a second operand. Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { // Don't modify shared select instructions - if (!SI->hasOneUse()) return 0; + if (!SI->hasOneUse()) return nullptr; Value *TV = SI->getOperand(1); Value *FV = SI->getOperand(2); if (isa(TV) || isa(FV)) { // Bool selects with constant operands can be folded to logical ops. - if (SI->getType()->isIntegerTy(1)) return 0; + if (SI->getType()->isIntegerTy(1)) return nullptr; // If it's a bitcast involving vectors, make sure it has the same number of // elements on both sides. @@ -610,10 +611,10 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { VectorType *SrcTy = dyn_cast(BC->getSrcTy()); // Verify that either both or neither are vectors. - if ((SrcTy == NULL) != (DestTy == NULL)) return 0; + if ((SrcTy == nullptr) != (DestTy == nullptr)) return nullptr; // If vectors, verify that they have the same number of elements. if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements()) - return 0; + return nullptr; } Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this); @@ -622,7 +623,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) { return SelectInst::Create(SI->getCondition(), SelectTrueVal, SelectFalseVal); } - return 0; + return nullptr; } @@ -634,7 +635,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { PHINode *PN = cast(I.getOperand(0)); unsigned NumPHIValues = PN->getNumIncomingValues(); if (NumPHIValues == 0) - return 0; + return nullptr; // We normally only transform phis with a single use. However, if a PHI has // multiple uses and they are all the same operation, we can fold *all* of the @@ -644,7 +645,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { for (User *U : PN->users()) { Instruction *UI = cast(U); if (UI != &I && !I.isIdenticalTo(UI)) - return 0; + return nullptr; } // Otherwise, we can replace *all* users with the new PHI we form. } @@ -654,14 +655,14 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { // remember the BB it is in. If there is more than one or if *it* is a PHI, // bail out. We don't do arbitrary constant expressions here because moving // their computation can be expensive without a cost model. - BasicBlock *NonConstBB = 0; + BasicBlock *NonConstBB = nullptr; for (unsigned i = 0; i != NumPHIValues; ++i) { Value *InVal = PN->getIncomingValue(i); if (isa(InVal) && !isa(InVal)) continue; - if (isa(InVal)) return 0; // Itself a phi. - if (NonConstBB) return 0; // More than one non-const value. + if (isa(InVal)) return nullptr; // Itself a phi. + if (NonConstBB) return nullptr; // More than one non-const value. NonConstBB = PN->getIncomingBlock(i); @@ -669,22 +670,22 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { // insert a computation after it without breaking the edge. if (InvokeInst *II = dyn_cast(InVal)) if (II->getParent() == NonConstBB) - return 0; + return nullptr; // If the incoming non-constant value is in I's block, we will remove one // instruction, but insert another equivalent one, leading to infinite // instcombine. if (NonConstBB == I.getParent()) - return 0; + return nullptr; } // If there is exactly one non-constant value, we can insert a copy of the // operation in that block. However, if this is a critical edge, we would be // inserting the computation one some other paths (e.g. inside a loop). Only // do this if the pred block is unconditionally branching into the phi block. - if (NonConstBB != 0) { + if (NonConstBB != nullptr) { BranchInst *BI = dyn_cast(NonConstBB->getTerminator()); - if (!BI || !BI->isUnconditional()) return 0; + if (!BI || !BI->isUnconditional()) return nullptr; } // Okay, we can do the transformation: create the new PHI node. @@ -708,7 +709,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { BasicBlock *ThisBB = PN->getIncomingBlock(i); Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB); Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB); - Value *InV = 0; + Value *InV = nullptr; // Beware of ConstantExpr: it may eventually evaluate to getNullValue, // even if currently isNullValue gives false. Constant *InC = dyn_cast(PN->getIncomingValue(i)); @@ -722,7 +723,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { } else if (CmpInst *CI = dyn_cast(&I)) { Constant *C = cast(I.getOperand(1)); for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV = 0; + Value *InV = nullptr; if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C); else if (isa(CI)) @@ -736,7 +737,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { } else if (I.getNumOperands() == 2) { Constant *C = cast(I.getOperand(1)); for (unsigned i = 0; i != NumPHIValues; ++i) { - Value *InV = 0; + Value *InV = nullptr; if (Constant *InC = dyn_cast(PN->getIncomingValue(i))) InV = ConstantExpr::get(I.getOpcode(), InC, C); else @@ -776,11 +777,11 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, assert(PtrTy->isPtrOrPtrVectorTy()); if (!DL) - return 0; + return nullptr; Type *Ty = PtrTy->getPointerElementType(); if (!Ty->isSized()) - return 0; + return nullptr; // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type @@ -806,7 +807,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, while (Offset) { // Indexing into tail padding between struct/array elements. if (uint64_t(Offset*8) >= DL->getTypeSizeInBits(Ty)) - return 0; + return nullptr; if (StructType *STy = dyn_cast(Ty)) { const StructLayout *SL = DL->getStructLayout(STy); @@ -827,7 +828,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset, Ty = AT->getElementType(); } else { // Otherwise, we can't index into the middle of this atomic type, bail. - return 0; + return nullptr; } } @@ -859,7 +860,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // If Scale is zero then it does not divide Val. if (Scale.isMinValue()) - return 0; + return nullptr; // Look through chains of multiplications, searching for a constant that is // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4 @@ -902,7 +903,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder); if (!Remainder.isMinValue()) // Not divisible by Scale. - return 0; + return nullptr; // Replace with the quotient in the parent. Op = ConstantInt::get(CI->getType(), Quotient); NoSignedWrap = true; @@ -915,7 +916,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Multiplication. NoSignedWrap = BO->hasNoSignedWrap(); if (RequireNoSignedWrap && !NoSignedWrap) - return 0; + return nullptr; // There are three cases for multiplication: multiplication by exactly // the scale, multiplication by a constant different to the scale, and @@ -934,7 +935,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Otherwise drill down into the constant. if (!Op->hasOneUse()) - return 0; + return nullptr; Parent = std::make_pair(BO, 1); continue; @@ -943,7 +944,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Multiplication by something else. Drill down into the left-hand side // since that's where the reassociate pass puts the good stuff. if (!Op->hasOneUse()) - return 0; + return nullptr; Parent = std::make_pair(BO, 0); continue; @@ -954,7 +955,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Multiplication by a power of 2. NoSignedWrap = BO->hasNoSignedWrap(); if (RequireNoSignedWrap && !NoSignedWrap) - return 0; + return nullptr; Value *LHS = BO->getOperand(0); int32_t Amt = cast(BO->getOperand(1))-> @@ -968,7 +969,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { break; } if (Amt < logScale || !Op->hasOneUse()) - return 0; + return nullptr; // Multiplication by more than the scale. Reduce the multiplying amount // by the scale in the parent. @@ -979,7 +980,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { } if (!Op->hasOneUse()) - return 0; + return nullptr; if (CastInst *Cast = dyn_cast(Op)) { if (Cast->getOpcode() == Instruction::SExt) { @@ -993,7 +994,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // Scale and the multiplication Y * SmallScale should not overflow. if (SmallScale.sext(Scale.getBitWidth()) != Scale) // SmallScale does not sign-extend to Scale. - return 0; + return nullptr; assert(SmallScale.exactLogBase2() == logScale); // Require that Y * SmallScale must not overflow. RequireNoSignedWrap = true; @@ -1012,7 +1013,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { // trunc (Y * sext Scale) does not, so nsw flags need to be cleared // from this point up in the expression (see later). if (RequireNoSignedWrap) - return 0; + return nullptr; // Drill down through the cast. unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); @@ -1026,7 +1027,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { } // Unsupported expression, bail out. - return 0; + return nullptr; } // We know that we can successfully descale, so from here on we can safely @@ -1082,6 +1083,101 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { } while (1); } +/// \brief Creates node of binary operation with the same attributes as the +/// specified one but with other operands. +static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS, + InstCombiner::BuilderTy *B) { + Value *BORes = B->CreateBinOp(Inst.getOpcode(), LHS, RHS); + if (BinaryOperator *NewBO = dyn_cast(BORes)) { + if (isa(NewBO)) { + NewBO->setHasNoSignedWrap(Inst.hasNoSignedWrap()); + NewBO->setHasNoUnsignedWrap(Inst.hasNoUnsignedWrap()); + } + if (isa(NewBO)) + NewBO->setIsExact(Inst.isExact()); + } + return BORes; +} + +/// \brief Makes transformation of binary operation specific for vector types. +/// \param Inst Binary operator to transform. +/// \return Pointer to node that must replace the original binary operator, or +/// null pointer if no transformation was made. +Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { + if (!Inst.getType()->isVectorTy()) return nullptr; + + unsigned VWidth = cast(Inst.getType())->getNumElements(); + Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1); + assert(cast(LHS->getType())->getNumElements() == VWidth); + assert(cast(RHS->getType())->getNumElements() == VWidth); + + // If both arguments of binary operation are shuffles, which use the same + // mask and shuffle within a single vector, it is worthwhile to move the + // shuffle after binary operation: + // Op(shuffle(v1, m), shuffle(v2, m)) -> shuffle(Op(v1, v2), m) + if (isa(LHS) && isa(RHS)) { + ShuffleVectorInst *LShuf = cast(LHS); + ShuffleVectorInst *RShuf = cast(RHS); + if (isa(LShuf->getOperand(1)) && + isa(RShuf->getOperand(1)) && + LShuf->getOperand(0)->getType() == RShuf->getOperand(0)->getType() && + LShuf->getMask() == RShuf->getMask()) { + Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0), + RShuf->getOperand(0), Builder); + Value *Res = Builder->CreateShuffleVector(NewBO, + UndefValue::get(NewBO->getType()), LShuf->getMask()); + return Res; + } + } + + // If one argument is a shuffle within one vector, the other is a constant, + // try moving the shuffle after the binary operation. + ShuffleVectorInst *Shuffle = nullptr; + Constant *C1 = nullptr; + if (isa(LHS)) Shuffle = cast(LHS); + if (isa(RHS)) Shuffle = cast(RHS); + if (isa(LHS)) C1 = cast(LHS); + if (isa(RHS)) C1 = cast(RHS); + if (Shuffle && C1 && isa(Shuffle->getOperand(1)) && + Shuffle->getType() == Shuffle->getOperand(0)->getType()) { + SmallVector ShMask = Shuffle->getShuffleMask(); + // Find constant C2 that has property: + // shuffle(C2, ShMask) = C1 + // If such constant does not exist (example: ShMask=<0,0> and C1=<1,2>) + // reorder is not possible. + SmallVector C2M(VWidth, + UndefValue::get(C1->getType()->getScalarType())); + bool MayChange = true; + for (unsigned I = 0; I < VWidth; ++I) { + if (ShMask[I] >= 0) { + assert(ShMask[I] < (int)VWidth); + if (!isa(C2M[ShMask[I]])) { + MayChange = false; + break; + } + C2M[ShMask[I]] = C1->getAggregateElement(I); + } + } + if (MayChange) { + Constant *C2 = ConstantVector::get(C2M); + Value *NewLHS, *NewRHS; + if (isa(LHS)) { + NewLHS = C2; + NewRHS = Shuffle->getOperand(0); + } else { + NewLHS = Shuffle->getOperand(0); + NewRHS = C2; + } + Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder); + Value *Res = Builder->CreateShuffleVector(NewBO, + UndefValue::get(Inst.getType()), Shuffle->getMask()); + return Res; + } + } + + return nullptr; +} + Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector Ops(GEP.op_begin(), GEP.op_end()); @@ -1130,7 +1226,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // if (GEPOperator *Src = dyn_cast(PtrOp)) { if (!shouldMergeGEPs(*cast(&GEP), *Src)) - return 0; + return nullptr; // Note that if our source is a gep chain itself then we wait for that // chain to be resolved before we perform this transformation. This @@ -1138,7 +1234,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (GEPOperator *SrcGEP = dyn_cast(Src->getOperand(0))) if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) - return 0; // Wait until our source is folded to completion. + return nullptr; // Wait until our source is folded to completion. SmallVector Indices; @@ -1166,7 +1262,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // intptr_t). Just avoid transforming this until the input has been // normalized. if (SO1->getType() != GO1->getType()) - return 0; + return nullptr; Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum"); } @@ -1216,7 +1312,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // We do not handle pointer-vector geps here. if (!StrippedPtrTy) - return 0; + return nullptr; if (StrippedPtr != PtrOp) { bool HasZeroPointerIndex = false; @@ -1241,7 +1337,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { GetElementPtrInst *Res = GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName()); Res->setIsInBounds(GEP.isInBounds()); - return Res; + if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) + return Res; + // Insert Res, and create an addrspacecast. + // e.g., + // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ... + // -> + // %0 = GEP i8 addrspace(1)* X, ... + // addrspacecast i8 addrspace(1)* %0 to i8* + return new AddrSpaceCastInst(Builder->Insert(Res), GEP.getType()); } if (ArrayType *XATy = @@ -1253,8 +1357,24 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // to an array of the same type as the destination pointer // array. Because the array type is never stepped over (there // is a leading zero) we can fold the cast into this GEP. - GEP.setOperand(0, StrippedPtr); - return &GEP; + if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) { + GEP.setOperand(0, StrippedPtr); + return &GEP; + } + // Cannot replace the base pointer directly because StrippedPtr's + // address space is different. Instead, create a new GEP followed by + // an addrspacecast. + // e.g., + // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*), + // i32 0, ... + // -> + // %0 = GEP [10 x i8] addrspace(1)* X, ... + // addrspacecast i8 addrspace(1)* %0 to i8* + SmallVector Idx(GEP.idx_begin(), GEP.idx_end()); + Value *NewGEP = GEP.isInBounds() ? + Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) : + Builder->CreateGEP(StrippedPtr, Idx, GEP.getName()); + return new AddrSpaceCastInst(NewGEP, GEP.getType()); } } } @@ -1360,7 +1480,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } if (!DL) - return 0; + return nullptr; /// See if we can simplify: /// X = bitcast A* to B* @@ -1412,7 +1532,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } } - return 0; + return nullptr; } static bool @@ -1527,7 +1647,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { } return EraseInstFromFunction(MI); } - return 0; + return nullptr; } /// \brief Move the call to free before a NULL test. @@ -1556,30 +1676,30 @@ tryToMoveFreeBeforeNullTest(CallInst &FI) { // would duplicate the call to free in each predecessor and it may // not be profitable even for code size. if (!PredBB) - return 0; + return nullptr; // Validate constraint #2: Does this block contains only the call to // free and an unconditional branch? // FIXME: We could check if we can speculate everything in the // predecessor block if (FreeInstrBB->size() != 2) - return 0; + return nullptr; BasicBlock *SuccBB; if (!match(FreeInstrBB->getTerminator(), m_UnconditionalBr(SuccBB))) - return 0; + return nullptr; // Validate the rest of constraint #1 by matching on the pred branch. TerminatorInst *TI = PredBB->getTerminator(); BasicBlock *TrueBB, *FalseBB; ICmpInst::Predicate Pred; if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Op), m_Zero()), TrueBB, FalseBB))) - return 0; + return nullptr; if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE) - return 0; + return nullptr; // Validate constraint #3: Ensure the null case just falls through. if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB)) - return 0; + return nullptr; assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) && "Broken CFG: missing edge from predecessor to successor"); @@ -1614,14 +1734,14 @@ Instruction *InstCombiner::visitFree(CallInst &FI) { if (Instruction *I = tryToMoveFreeBeforeNullTest(FI)) return I; - return 0; + return nullptr; } Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { // Change br (not X), label True, label False to: br X, label False, True - Value *X = 0; + Value *X = nullptr; BasicBlock *TrueDest; BasicBlock *FalseDest; if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) && @@ -1664,7 +1784,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) { return &BI; } - return 0; + return nullptr; } Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { @@ -1688,7 +1808,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { return &SI; } } - return 0; + return nullptr; } Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { @@ -1705,7 +1825,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // first index return ExtractValueInst::Create(C2, EV.getIndices().slice(1)); } - return 0; // Can't handle other constants + return nullptr; // Can't handle other constants } if (InsertValueInst *IV = dyn_cast(Agg)) { @@ -1838,7 +1958,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { // and if again single-use then via load (gep (gep)) to load (gep). // However, double extracts from e.g. function arguments or return values // aren't handled yet. - return 0; + return nullptr; } enum Personality_Type { @@ -2177,7 +2297,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { return &LI; } - return 0; + return nullptr; } @@ -2270,7 +2390,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); i != e; ++i) { ConstantExpr *CE = dyn_cast(i); - if (CE == 0) continue; + if (CE == nullptr) continue; Constant*& FoldRes = FoldedConstants[CE]; if (!FoldRes) @@ -2374,7 +2494,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { while (!Worklist.isEmpty()) { Instruction *I = Worklist.RemoveOne(); - if (I == 0) continue; // skip null values. + if (I == nullptr) continue; // skip null values. // Check to see if we can DCE the instruction. if (isInstructionTriviallyDead(I, TLI)) { @@ -2516,7 +2636,7 @@ bool InstCombiner::runOnFunction(Function &F) { return false; DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis(); // Minimizing size? MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, @@ -2543,7 +2663,7 @@ bool InstCombiner::runOnFunction(Function &F) { while (DoOneIteration(F, Iteration++)) EverMadeChange = true; - Builder = 0; + Builder = nullptr; return EverMadeChange; } diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index bbfa4c5..95fca75 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -13,8 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "asan" - #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -53,8 +51,11 @@ using namespace llvm; +#define DEBUG_TYPE "asan" + static const uint64_t kDefaultShadowScale = 3; static const uint64_t kDefaultShadowOffset32 = 1ULL << 29; +static const uint64_t kIOSShadowOffset32 = 1ULL << 30; static const uint64_t kDefaultShadowOffset64 = 1ULL << 44; static const uint64_t kSmallX86_64ShadowOffset = 0x7FFF8000; // < 2G. static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41; @@ -79,6 +80,7 @@ static const char *const kAsanUnregisterGlobalsName = static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; static const char *const kAsanInitName = "__asan_init_v3"; +static const char *const kAsanCovModuleInitName = "__sanitizer_cov_module_init"; static const char *const kAsanCovName = "__sanitizer_cov"; static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp"; static const char *const kAsanPtrSub = "__sanitizer_ptr_sub"; @@ -135,10 +137,12 @@ static cl::opt ClGlobals("asan-globals", static cl::opt ClCoverage("asan-coverage", cl::desc("ASan coverage. 0: none, 1: entry block, 2: all blocks"), cl::Hidden, cl::init(false)); +static cl::opt ClCoverageBlockThreshold("asan-coverage-block-threshold", + cl::desc("Add coverage instrumentation only to the entry block if there " + "are more than this number of blocks."), + cl::Hidden, cl::init(1500)); static cl::opt ClInitializers("asan-initialization-order", cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false)); -static cl::opt ClMemIntrin("asan-memintrin", - cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true)); static cl::opt ClInvalidPointerPairs("asan-detect-invalid-pointer-pair", cl::desc("Instrument <, <=, >, >=, - with pointer operands"), cl::Hidden, cl::init(false)); @@ -148,6 +152,16 @@ static cl::opt ClRealignStack("asan-realign-stack", static cl::opt ClBlacklistFile("asan-blacklist", cl::desc("File containing the list of objects to ignore " "during instrumentation"), cl::Hidden); +static cl::opt ClInstrumentationWithCallsThreshold( + "asan-instrumentation-with-call-threshold", + cl::desc("If the function being instrumented contains more than " + "this number of memory accesses, use callbacks instead of " + "inline checks (-1 means never use callbacks)."), + cl::Hidden, cl::init(7000)); +static cl::opt ClMemoryAccessCallbackPrefix( + "asan-memory-access-callback-prefix", + cl::desc("Prefix for memory access callbacks"), cl::Hidden, + cl::init("__asan_")); // This is an experimental feature that will allow to choose between // instrumented and non-instrumented code at link-time. @@ -238,7 +252,7 @@ struct ShadowMapping { static ShadowMapping getShadowMapping(const Module &M, int LongSize) { llvm::Triple TargetTriple(M.getTargetTriple()); bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android; - // bool IsMacOSX = TargetTriple.getOS() == llvm::Triple::MacOSX; + bool IsIOS = TargetTriple.getOS() == llvm::Triple::IOS; bool IsFreeBSD = TargetTriple.getOS() == llvm::Triple::FreeBSD; bool IsLinux = TargetTriple.getOS() == llvm::Triple::Linux; bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64 || @@ -256,6 +270,8 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize) { Mapping.Offset = kMIPS32_ShadowOffset32; else if (IsFreeBSD) Mapping.Offset = kFreeBSD_ShadowOffset32; + else if (IsIOS) + Mapping.Offset = kIOSShadowOffset32; else Mapping.Offset = kDefaultShadowOffset32; } else { // LongSize == 64 @@ -303,20 +319,17 @@ struct AddressSanitizer : public FunctionPass { const char *getPassName() const override { return "AddressSanitizerFunctionPass"; } - void instrumentMop(Instruction *I); + void instrumentMop(Instruction *I, bool UseCalls); void instrumentPointerComparisonOrSubtraction(Instruction *I); void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize, bool IsWrite, - Value *SizeArgument); + Value *SizeArgument, bool UseCalls); Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, Value *ShadowValue, uint32_t TypeSize); Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr, bool IsWrite, size_t AccessSizeIndex, Value *SizeArgument); - bool instrumentMemIntrinsic(MemIntrinsic *MI); - void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr, - Value *Size, - Instruction *InsertBefore, bool IsWrite); + void instrumentMemIntrinsic(MemIntrinsic *MI); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool runOnFunction(Function &F) override; bool maybeInsertAsanInitAtFunctionEntry(Function &F); @@ -349,8 +362,11 @@ struct AddressSanitizer : public FunctionPass { std::unique_ptr BL; // This array is indexed by AccessIsWrite and log2(AccessSize). Function *AsanErrorCallback[2][kNumberOfAccessSizes]; + Function *AsanMemoryAccessCallback[2][kNumberOfAccessSizes]; // This array is indexed by AccessIsWrite. - Function *AsanErrorCallbackSized[2]; + Function *AsanErrorCallbackSized[2], + *AsanMemoryAccessCallbackSized[2]; + Function *AsanMemmove, *AsanMemcpy, *AsanMemset; InlineAsm *EmptyAsm; SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals; @@ -393,6 +409,7 @@ class AddressSanitizerModule : public ModulePass { Function *AsanUnpoisonGlobals; Function *AsanRegisterGlobals; Function *AsanUnregisterGlobals; + Function *AsanCovModuleInit; }; // Stack poisoning does not play well with exception handling. @@ -443,11 +460,9 @@ struct FunctionStackPoisoner : public InstVisitor { bool runOnFunction() { if (!ClStack) return false; // Collect alloca, ret, lifetime instructions etc. - for (df_iterator DI = df_begin(&F.getEntryBlock()), - DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { - BasicBlock *BB = *DI; + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB); - } + if (AllocaVec.empty()) return false; initializeCallbacks(*F.getParent()); @@ -590,72 +605,54 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) { return IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset)); } -void AddressSanitizer::instrumentMemIntrinsicParam( - Instruction *OrigIns, - Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) { - IRBuilder<> IRB(InsertBefore); - if (Size->getType() != IntptrTy) - Size = IRB.CreateIntCast(Size, IntptrTy, false); - // Check the first byte. - instrumentAddress(OrigIns, InsertBefore, Addr, 8, IsWrite, Size); - // Check the last byte. - IRB.SetInsertPoint(InsertBefore); - Value *SizeMinusOne = IRB.CreateSub(Size, ConstantInt::get(IntptrTy, 1)); - Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); - Value *AddrLast = IRB.CreateAdd(AddrLong, SizeMinusOne); - instrumentAddress(OrigIns, InsertBefore, AddrLast, 8, IsWrite, Size); -} - // Instrument memset/memmove/memcpy -bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { - Value *Dst = MI->getDest(); - MemTransferInst *MemTran = dyn_cast(MI); - Value *Src = MemTran ? MemTran->getSource() : 0; - Value *Length = MI->getLength(); - - Constant *ConstLength = dyn_cast(Length); - Instruction *InsertBefore = MI; - if (ConstLength) { - if (ConstLength->isNullValue()) return false; - } else { - // The size is not a constant so it could be zero -- check at run-time. - IRBuilder<> IRB(InsertBefore); - - Value *Cmp = IRB.CreateICmpNE(Length, - Constant::getNullValue(Length->getType())); - InsertBefore = SplitBlockAndInsertIfThen(Cmp, InsertBefore, false); +void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { + IRBuilder<> IRB(MI); + if (isa(MI)) { + IRB.CreateCall3( + isa(MI) ? AsanMemmove : AsanMemcpy, + IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()), + IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()), + IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)); + } else if (isa(MI)) { + IRB.CreateCall3( + AsanMemset, + IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()), + IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false), + IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)); } - - instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true); - if (Src) - instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false); - return true; + MI->eraseFromParent(); } // If I is an interesting memory access, return the PointerOperand -// and set IsWrite. Otherwise return NULL. -static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) { +// and set IsWrite/Alignment. Otherwise return NULL. +static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite, + unsigned *Alignment) { if (LoadInst *LI = dyn_cast(I)) { - if (!ClInstrumentReads) return NULL; + if (!ClInstrumentReads) return nullptr; *IsWrite = false; + *Alignment = LI->getAlignment(); return LI->getPointerOperand(); } if (StoreInst *SI = dyn_cast(I)) { - if (!ClInstrumentWrites) return NULL; + if (!ClInstrumentWrites) return nullptr; *IsWrite = true; + *Alignment = SI->getAlignment(); return SI->getPointerOperand(); } if (AtomicRMWInst *RMW = dyn_cast(I)) { - if (!ClInstrumentAtomics) return NULL; + if (!ClInstrumentAtomics) return nullptr; *IsWrite = true; + *Alignment = 0; return RMW->getPointerOperand(); } if (AtomicCmpXchgInst *XCHG = dyn_cast(I)) { - if (!ClInstrumentAtomics) return NULL; + if (!ClInstrumentAtomics) return nullptr; *IsWrite = true; + *Alignment = 0; return XCHG->getPointerOperand(); } - return NULL; + return nullptr; } static bool isPointerOperand(Value *V) { @@ -700,9 +697,10 @@ AddressSanitizer::instrumentPointerComparisonOrSubtraction(Instruction *I) { IRB.CreateCall2(F, Param[0], Param[1]); } -void AddressSanitizer::instrumentMop(Instruction *I) { +void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) { bool IsWrite = false; - Value *Addr = isInterestingMemoryAccess(I, &IsWrite); + unsigned Alignment = 0; + Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &Alignment); assert(Addr); if (ClOpt && ClOptGlobals) { if (GlobalVariable *G = dyn_cast(Addr)) { @@ -737,22 +735,29 @@ void AddressSanitizer::instrumentMop(Instruction *I) { else NumInstrumentedReads++; - // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check. - if (TypeSize == 8 || TypeSize == 16 || - TypeSize == 32 || TypeSize == 64 || TypeSize == 128) - return instrumentAddress(I, I, Addr, TypeSize, IsWrite, 0); - // Instrument unusual size (but still multiple of 8). + unsigned Granularity = 1 << Mapping.Scale; + // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check + // if the data is properly aligned. + if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 || + TypeSize == 128) && + (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8)) + return instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, UseCalls); + // Instrument unusual size or unusual alignment. // We can not do it with a single check, so we do 1-byte check for the first // and the last bytes. We call __asan_report_*_n(addr, real_size) to be able // to report the actual access size. IRBuilder<> IRB(I); - Value *LastByte = IRB.CreateIntToPtr( - IRB.CreateAdd(IRB.CreatePointerCast(Addr, IntptrTy), - ConstantInt::get(IntptrTy, TypeSize / 8 - 1)), - OrigPtrTy); Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8); - instrumentAddress(I, I, Addr, 8, IsWrite, Size); - instrumentAddress(I, I, LastByte, 8, IsWrite, Size); + Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); + if (UseCalls) { + IRB.CreateCall2(AsanMemoryAccessCallbackSized[IsWrite], AddrLong, Size); + } else { + Value *LastByte = IRB.CreateIntToPtr( + IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)), + OrigPtrTy); + instrumentAddress(I, I, Addr, 8, IsWrite, Size, false); + instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false); + } } // Validate the result of Module::getOrInsertFunction called for an interface @@ -800,11 +805,18 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong, } void AddressSanitizer::instrumentAddress(Instruction *OrigIns, - Instruction *InsertBefore, - Value *Addr, uint32_t TypeSize, - bool IsWrite, Value *SizeArgument) { + Instruction *InsertBefore, Value *Addr, + uint32_t TypeSize, bool IsWrite, + Value *SizeArgument, bool UseCalls) { IRBuilder<> IRB(InsertBefore); Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); + size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); + + if (UseCalls) { + IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][AccessSizeIndex], + AddrLong); + return; + } Type *ShadowTy = IntegerType::get( *C, std::max(8U, TypeSize >> Mapping.Scale)); @@ -815,9 +827,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy)); Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal); - size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize); size_t Granularity = 1 << Mapping.Scale; - TerminatorInst *CrashTerm = 0; + TerminatorInst *CrashTerm = nullptr; if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) { TerminatorInst *CheckTerm = @@ -842,8 +853,29 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, void AddressSanitizerModule::createInitializerPoisonCalls( Module &M, GlobalValue *ModuleName) { - // We do all of our poisoning and unpoisoning within _GLOBAL__I_a. - Function *GlobalInit = M.getFunction("_GLOBAL__I_a"); + // We do all of our poisoning and unpoisoning within a global constructor. + // These are called _GLOBAL__(sub_)?I_.*. + // TODO: Consider looking through the functions in + // M.getGlobalVariable("llvm.global_ctors") instead of using this stringly + // typed approach. + Function *GlobalInit = nullptr; + for (auto &F : M.getFunctionList()) { + StringRef FName = F.getName(); + + const char kGlobalPrefix[] = "_GLOBAL__"; + if (!FName.startswith(kGlobalPrefix)) + continue; + FName = FName.substr(strlen(kGlobalPrefix)); + + const char kOptionalSub[] = "sub_"; + if (FName.startswith(kOptionalSub)) + FName = FName.substr(strlen(kOptionalSub)); + + if (FName.startswith("I_")) { + GlobalInit = &F; + break; + } + } // If that function is not present, this TU contains no globals, or they have // all been optimized away if (!GlobalInit) @@ -858,7 +890,7 @@ void AddressSanitizerModule::createInitializerPoisonCalls( // Add calls to unpoison all globals before each return instruction. for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end(); - I != E; ++I) { + I != E; ++I) { if (ReturnInst *RI = dyn_cast(I->getTerminator())) { CallInst::Create(AsanUnpoisonGlobals, "", RI); } @@ -902,8 +934,8 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { // Ignore the globals from the __OBJC section. The ObjC runtime assumes // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to // them. - if ((Section.find("__OBJC,") == 0) || - (Section.find("__DATA, __objc_") == 0)) { + if (Section.startswith("__OBJC,") || + Section.startswith("__DATA, __objc_")) { DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G << "\n"); return false; } @@ -915,16 +947,26 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { // is placed into __DATA,__cfstring // Therefore there's no point in placing redzones into __DATA,__cfstring. // Moreover, it causes the linker to crash on OS X 10.7 - if (Section.find("__DATA,__cfstring") == 0) { + if (Section.startswith("__DATA,__cfstring")) { DEBUG(dbgs() << "Ignoring CFString: " << *G << "\n"); return false; } // The linker merges the contents of cstring_literals and removes the // trailing zeroes. - if (Section.find("__TEXT,__cstring,cstring_literals") == 0) { + if (Section.startswith("__TEXT,__cstring,cstring_literals")) { DEBUG(dbgs() << "Ignoring a cstring literal: " << *G << "\n"); return false; } + + // Callbacks put into the CRT initializer/terminator sections + // should not be instrumented. + // See https://code.google.com/p/address-sanitizer/issues/detail?id=305 + // and http://msdn.microsoft.com/en-US/en-en/library/bb918180(v=vs.120).aspx + if (Section.startswith(".CRT")) { + DEBUG(dbgs() << "Ignoring a global initializer callback: " << *G << "\n"); + return false; + } + // Globals from llvm.metadata aren't emitted, do not instrument them. if (Section == "llvm.metadata") return false; } @@ -950,6 +992,10 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage); + AsanCovModuleInit = checkInterfaceFunction(M.getOrInsertFunction( + kAsanCovModuleInitName, + IRB.getVoidTy(), IntptrTy, NULL)); + AsanCovModuleInit->setLinkage(Function::ExternalLinkage); } // This function replaces all global variables with new variables that have @@ -980,6 +1026,14 @@ bool AddressSanitizerModule::runOnModule(Module &M) { GlobalsToChange.push_back(G); } + Function *CtorFunc = M.getFunction(kAsanModuleCtorName); + assert(CtorFunc); + IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator()); + + Function *CovFunc = M.getFunction(kAsanCovName); + int nCov = CovFunc ? CovFunc->getNumUses() : 0; + IRB.CreateCall(AsanCovModuleInit, ConstantInt::get(IntptrTy, nCov)); + size_t n = GlobalsToChange.size(); if (n == 0) return false; @@ -996,10 +1050,6 @@ bool AddressSanitizerModule::runOnModule(Module &M) { IntptrTy, IntptrTy, NULL); SmallVector Initializers(n); - Function *CtorFunc = M.getFunction(kAsanModuleCtorName); - assert(CtorFunc); - IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator()); - bool HasDynamicallyInitializedGlobals = false; // We shouldn't merge same module names, as this string serves as unique @@ -1110,12 +1160,16 @@ void AddressSanitizer::initializeCallbacks(Module &M) { for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; AccessSizeIndex++) { // IsWrite and TypeSize are encoded in the function name. - std::string FunctionName = std::string(kAsanReportErrorTemplate) + + std::string Suffix = (AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex); - // If we are merging crash callbacks, they have two parameters. AsanErrorCallback[AccessIsWrite][AccessSizeIndex] = - checkInterfaceFunction(M.getOrInsertFunction( - FunctionName, IRB.getVoidTy(), IntptrTy, NULL)); + checkInterfaceFunction( + M.getOrInsertFunction(kAsanReportErrorTemplate + Suffix, + IRB.getVoidTy(), IntptrTy, NULL)); + AsanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] = + checkInterfaceFunction( + M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + Suffix, + IRB.getVoidTy(), IntptrTy, NULL)); } } AsanErrorCallbackSized[0] = checkInterfaceFunction(M.getOrInsertFunction( @@ -1123,8 +1177,25 @@ void AddressSanitizer::initializeCallbacks(Module &M) { AsanErrorCallbackSized[1] = checkInterfaceFunction(M.getOrInsertFunction( kAsanReportStoreN, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); - AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction( - kAsanHandleNoReturnName, IRB.getVoidTy(), NULL)); + AsanMemoryAccessCallbackSized[0] = checkInterfaceFunction( + M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "loadN", + IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + AsanMemoryAccessCallbackSized[1] = checkInterfaceFunction( + M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "storeN", + IRB.getVoidTy(), IntptrTy, IntptrTy, NULL)); + + AsanMemmove = checkInterfaceFunction(M.getOrInsertFunction( + ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, NULL)); + AsanMemcpy = checkInterfaceFunction(M.getOrInsertFunction( + ClMemoryAccessCallbackPrefix + "memcpy", IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, NULL)); + AsanMemset = checkInterfaceFunction(M.getOrInsertFunction( + ClMemoryAccessCallbackPrefix + "memset", IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, NULL)); + + AsanHandleNoReturnFunc = checkInterfaceFunction( + M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy(), NULL)); AsanCovFunction = checkInterfaceFunction(M.getOrInsertFunction( kAsanCovName, IRB.getVoidTy(), NULL)); AsanPtrCmpFunction = checkInterfaceFunction(M.getOrInsertFunction( @@ -1142,7 +1213,7 @@ bool AddressSanitizer::doInitialization(Module &M) { // Initialize the private fields. No one has accessed them before. DataLayoutPass *DLP = getAnalysisIfAvailable(); if (!DLP) - return false; + report_fatal_error("data layout missing"); DL = &DLP->getDataLayout(); BL.reset(SpecialCaseList::createOrDie(BlacklistFile)); @@ -1241,7 +1312,8 @@ bool AddressSanitizer::InjectCoverage(Function &F, const ArrayRef AllBlocks) { if (!ClCoverage) return false; - if (ClCoverage == 1) { + if (ClCoverage == 1 || + (unsigned)ClCoverageBlockThreshold < AllBlocks.size()) { InjectCoverageAtBlock(F, F.getEntryBlock()); } else { for (size_t i = 0, n = AllBlocks.size(); i < n; i++) @@ -1275,6 +1347,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { SmallVector PointerComparisonsOrSubtracts; int NumAllocas = 0; bool IsWrite; + unsigned Alignment; // Fill the set of memory operations to instrument. for (Function::iterator FI = F.begin(), FE = F.end(); @@ -1285,7 +1358,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { if (LooksLikeCodeInBug11395(BI)) return false; - if (Value *Addr = isInterestingMemoryAccess(BI, &IsWrite)) { + if (Value *Addr = isInterestingMemoryAccess(BI, &IsWrite, &Alignment)) { if (ClOpt && ClOptSameTemp) { if (!TempsToInstrument.insert(Addr)) continue; // We've seen this temp in the current BB. @@ -1294,7 +1367,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { isInterestingPointerComparisonOrSubtraction(BI)) { PointerComparisonsOrSubtracts.push_back(BI); continue; - } else if (isa(BI) && ClMemIntrin) { + } else if (isa(BI)) { // ok, take it. } else { if (isa(BI)) @@ -1315,7 +1388,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { } } - Function *UninstrumentedDuplicate = 0; + Function *UninstrumentedDuplicate = nullptr; bool LikelyToInstrument = !NoReturnCalls.empty() || !ToInstrument.empty() || (NumAllocas > 0); if (ClKeepUninstrumented && LikelyToInstrument) { @@ -1326,14 +1399,19 @@ bool AddressSanitizer::runOnFunction(Function &F) { F.getParent()->getFunctionList().push_back(UninstrumentedDuplicate); } + bool UseCalls = false; + if (ClInstrumentationWithCallsThreshold >= 0 && + ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold) + UseCalls = true; + // Instrument. int NumInstrumented = 0; for (size_t i = 0, n = ToInstrument.size(); i != n; i++) { Instruction *Inst = ToInstrument[i]; if (ClDebugMin < 0 || ClDebugMax < 0 || (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { - if (isInterestingMemoryAccess(Inst, &IsWrite)) - instrumentMop(Inst); + if (isInterestingMemoryAccess(Inst, &IsWrite, &Alignment)) + instrumentMop(Inst, UseCalls); else instrumentMemIntrinsic(cast(Inst)); } @@ -1464,12 +1542,23 @@ void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined( } } +static DebugLoc getFunctionEntryDebugLocation(Function &F) { + BasicBlock::iterator I = F.getEntryBlock().begin(), + E = F.getEntryBlock().end(); + for (; I != E; ++I) + if (!isa(I)) + break; + return I->getDebugLoc(); +} + void FunctionStackPoisoner::poisonStack() { int StackMallocIdx = -1; + DebugLoc EntryDebugLocation = getFunctionEntryDebugLocation(F); assert(AllocaVec.size() > 0); Instruction *InsBefore = AllocaVec[0]; IRBuilder<> IRB(InsBefore); + IRB.SetCurrentDebugLocation(EntryDebugLocation); SmallVector SVD; SVD.reserve(AllocaVec.size()); @@ -1493,6 +1582,7 @@ void FunctionStackPoisoner::poisonStack() { Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize); AllocaInst *MyAlloca = new AllocaInst(ByteArrayTy, "MyAlloca", InsBefore); + MyAlloca->setDebugLoc(EntryDebugLocation); assert((ClRealignStack & (ClRealignStack - 1)) == 0); size_t FrameAlignment = std::max(L.FrameAlignment, (size_t)ClRealignStack); MyAlloca->setAlignment(FrameAlignment); @@ -1513,11 +1603,13 @@ void FunctionStackPoisoner::poisonStack() { Instruction *Term = SplitBlockAndInsertIfThen(Cmp, InsBefore, false); BasicBlock *CmpBlock = cast(Cmp)->getParent(); IRBuilder<> IRBIf(Term); + IRBIf.SetCurrentDebugLocation(EntryDebugLocation); LocalStackBase = IRBIf.CreateCall2( AsanStackMallocFunc[StackMallocIdx], ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase); BasicBlock *SetBlock = cast(LocalStackBase)->getParent(); IRB.SetInsertPoint(InsBefore); + IRB.SetCurrentDebugLocation(EntryDebugLocation); PHINode *Phi = IRB.CreatePHI(IntptrTy, 2); Phi->addIncoming(OrigStackBase, CmpBlock); Phi->addIncoming(LocalStackBase, SetBlock); @@ -1654,7 +1746,7 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size, AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) { if (AllocaInst *AI = dyn_cast(V)) // We're intested only in allocas we can handle. - return isInterestingAlloca(*AI) ? AI : 0; + return isInterestingAlloca(*AI) ? AI : nullptr; // See if we've already calculated (or started to calculate) alloca for a // given value. AllocaForValueMapTy::iterator I = AllocaForValue.find(V); @@ -1662,8 +1754,8 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) { return I->second; // Store 0 while we're calculating alloca for value V to avoid // infinite recursion if the value references itself. - AllocaForValue[V] = 0; - AllocaInst *Res = 0; + AllocaForValue[V] = nullptr; + AllocaInst *Res = nullptr; if (CastInst *CI = dyn_cast(V)) Res = findAllocaForValue(CI->getOperand(0)); else if (PHINode *PN = dyn_cast(V)) { @@ -1673,12 +1765,12 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) { if (IncValue == PN) continue; AllocaInst *IncValueAI = findAllocaForValue(IncValue); // AI for incoming values should exist and should all be equal. - if (IncValueAI == 0 || (Res != 0 && IncValueAI != Res)) - return 0; + if (IncValueAI == nullptr || (Res != nullptr && IncValueAI != Res)) + return nullptr; Res = IncValueAI; } } - if (Res != 0) + if (Res) AllocaForValue[V] = Res; return Res; } diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp index 505fb83..9a5cea8 100644 --- a/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "bounds-checking" #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -28,6 +27,8 @@ #include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; +#define DEBUG_TYPE "bounds-checking" + static cl::opt SingleTrapBB("bounds-checking-single-trap", cl::desc("Use one trap block per function")); @@ -61,7 +62,7 @@ namespace { BasicBlock *TrapBB; BasicBlock *getTrapBB(); - void emitBranchToTrap(Value *Cmp = 0); + void emitBranchToTrap(Value *Cmp = nullptr); bool instrument(Value *Ptr, Value *Val); }; } @@ -103,7 +104,7 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) { if (!C->getZExtValue()) return; else - Cmp = 0; // unconditional branch + Cmp = nullptr; // unconditional branch } ++ChecksAdded; @@ -167,7 +168,7 @@ bool BoundsChecking::runOnFunction(Function &F) { DL = &getAnalysis().getDataLayout(); TLI = &getAnalysis(); - TrapBB = 0; + TrapBB = nullptr; BuilderTy TheBuilder(F.getContext(), TargetFolder(DL)); Builder = &TheBuilder; ObjectSizeOffsetEvaluator TheObjSizeEval(DL, TLI, F.getContext(), diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index df1549d..7f468f7 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -211,7 +211,8 @@ class DataFlowSanitizer : public ModulePass { public: DataFlowSanitizer(StringRef ABIListFile = StringRef(), - void *(*getArgTLS)() = 0, void *(*getRetValTLS)() = 0); + void *(*getArgTLS)() = nullptr, + void *(*getRetValTLS)() = nullptr); static char ID; bool doInitialization(Module &M) override; bool runOnModule(Module &M) override; @@ -233,8 +234,8 @@ struct DFSanFunction { DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI) : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), - IsNativeABI(IsNativeABI), ArgTLSPtr(0), RetvalTLSPtr(0), - LabelReturnAlloca(0) {} + IsNativeABI(IsNativeABI), ArgTLSPtr(nullptr), RetvalTLSPtr(nullptr), + LabelReturnAlloca(nullptr) {} Value *getArgTLSPtr(); Value *getArgTLS(unsigned Index, Instruction *Pos); Value *getRetvalTLS(); @@ -303,7 +304,7 @@ FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) { ArgTypes.push_back(ShadowPtrTy); Type *RetType = T->getReturnType(); if (!RetType->isVoidTy()) - RetType = StructType::get(RetType, ShadowTy, (Type *)0); + RetType = StructType::get(RetType, ShadowTy, (Type *)nullptr); return FunctionType::get(RetType, ArgTypes, T->isVarArg()); } @@ -345,7 +346,7 @@ FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) { bool DataFlowSanitizer::doInitialization(Module &M) { DataLayoutPass *DLP = getAnalysisIfAvailable(); if (!DLP) - return false; + report_fatal_error("data layout missing"); DL = &DLP->getDataLayout(); Mod = &M; @@ -373,18 +374,20 @@ bool DataFlowSanitizer::doInitialization(Module &M) { if (GetArgTLSPtr) { Type *ArgTLSTy = ArrayType::get(ShadowTy, 64); - ArgTLS = 0; + ArgTLS = nullptr; GetArgTLS = ConstantExpr::getIntToPtr( ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)), PointerType::getUnqual( - FunctionType::get(PointerType::getUnqual(ArgTLSTy), (Type *)0))); + FunctionType::get(PointerType::getUnqual(ArgTLSTy), + (Type *)nullptr))); } if (GetRetvalTLSPtr) { - RetvalTLS = 0; + RetvalTLS = nullptr; GetRetvalTLS = ConstantExpr::getIntToPtr( ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)), PointerType::getUnqual( - FunctionType::get(PointerType::getUnqual(ShadowTy), (Type *)0))); + FunctionType::get(PointerType::getUnqual(ShadowTy), + (Type *)nullptr))); } ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000); @@ -554,7 +557,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { ++i; // Don't stop on weak. We assume people aren't playing games with the // instrumentedness of overridden weak aliases. - if (Function *F = dyn_cast(GA->getAliasedGlobal())) { + if (Function *F = dyn_cast(GA->getAliasee())) { bool GAInst = isInstrumented(GA), FInst = isInstrumented(F); if (GAInst && FInst) { addGlobalNamePrefix(GA); @@ -629,7 +632,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { // function... yet. } else if (FT->isVarArg()) { UnwrappedFnMap[&F] = &F; - *i = 0; + *i = nullptr; } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) { // Build a wrapper function for F. The wrapper simply calls F, and is // added to FnsToInstrument so that any instrumentation according to its @@ -680,9 +683,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) { // DFSanVisitor may create new basic blocks, which confuses df_iterator. // Build a copy of the list before iterating over it. - llvm::SmallVector BBList; - std::copy(df_begin(&(*i)->getEntryBlock()), df_end(&(*i)->getEntryBlock()), - std::back_inserter(BBList)); + llvm::SmallVector BBList( + depth_first(&(*i)->getEntryBlock())); for (llvm::SmallVector::iterator i = BBList.begin(), e = BBList.end(); @@ -1313,7 +1315,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) { } } - Instruction *Next = 0; + Instruction *Next = nullptr; if (!CS.getType()->isVoidTy()) { if (InvokeInst *II = dyn_cast(CS.getInstruction())) { if (II->getNormalDest()->getSinglePredecessor()) { diff --git a/lib/Transforms/Instrumentation/DebugIR.cpp b/lib/Transforms/Instrumentation/DebugIR.cpp index 069886e..18bda1a 100644 --- a/lib/Transforms/Instrumentation/DebugIR.cpp +++ b/lib/Transforms/Instrumentation/DebugIR.cpp @@ -16,8 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "debug-ir" - #include "llvm/IR/ValueMap.h" #include "DebugIR.h" #include "llvm/IR/AssemblyAnnotationWriter.h" @@ -42,6 +40,8 @@ using namespace llvm; +#define DEBUG_TYPE "debug-ir" + namespace { /// Builds a map of Value* to line numbers on which the Value appears in a @@ -118,7 +118,7 @@ public: void visitInstruction(Instruction &I) { if (I.getMetadata(LLVMContext::MD_dbg)) - I.setMetadata(LLVMContext::MD_dbg, 0); + I.setMetadata(LLVMContext::MD_dbg, nullptr); } void run(Module *M) { @@ -168,11 +168,11 @@ class DIUpdater : public InstVisitor { public: DIUpdater(Module &M, StringRef Filename = StringRef(), - StringRef Directory = StringRef(), const Module *DisplayM = 0, - const ValueToValueMapTy *VMap = 0) + StringRef Directory = StringRef(), const Module *DisplayM = nullptr, + const ValueToValueMapTy *VMap = nullptr) : Builder(M), Layout(&M), LineTable(DisplayM ? DisplayM : &M), VMap(VMap), - Finder(), Filename(Filename), Directory(Directory), FileNode(0), - LexicalBlockFileNode(0), CUNode(0) { + Finder(), Filename(Filename), Directory(Directory), FileNode(nullptr), + LexicalBlockFileNode(nullptr), CUNode(nullptr) { Finder.processModule(M); visit(&M); } @@ -184,7 +184,7 @@ public: report_fatal_error("DebugIR pass supports only a signle compile unit per " "Module."); createCompileUnit(Finder.compile_unit_count() == 1 ? - (MDNode*)*Finder.compile_units().begin() : 0); + (MDNode*)*Finder.compile_units().begin() : nullptr); } void visitFunction(Function &F) { @@ -232,7 +232,7 @@ public: /// If a ValueToValueMap is provided, use it to get the real instruction as /// the line table was generated on a clone of the module on which we are /// operating. - Value *RealInst = 0; + Value *RealInst = nullptr; if (VMap) RealInst = VMap->lookup(&I); @@ -256,7 +256,7 @@ public: NewLoc = DebugLoc::get(Line, Col, Loc.getScope(RealInst->getContext()), Loc.getInlinedAt(RealInst->getContext())); else if (MDNode *scope = findScope(&I)) - NewLoc = DebugLoc::get(Line, Col, scope, 0); + NewLoc = DebugLoc::get(Line, Col, scope, nullptr); else { DEBUG(dbgs() << "WARNING: no valid scope for instruction " << &I << ". no DebugLoc will be present." @@ -334,7 +334,7 @@ private: } DEBUG(dbgs() << "unable to find DISubprogram node for function " << F->getName().str() << "\n"); - return 0; + return nullptr; } /// Sets Line to the line number on which V appears and returns true. If a @@ -366,7 +366,7 @@ private: TypeNodeIter i = TypeDescriptors.find(T); if (i != TypeDescriptors.end()) return i->second; - return 0; + return nullptr; } /// Returns a DebugInfo type from an LLVM type T. @@ -375,12 +375,12 @@ private: if (N) return DIDerivedType(N); else if (T->isVoidTy()) - return DIDerivedType(0); + return DIDerivedType(nullptr); else if (T->isStructTy()) { N = Builder.createStructType( DIScope(LexicalBlockFileNode), T->getStructName(), DIFile(FileNode), 0, Layout.getTypeSizeInBits(T), Layout.getABITypeAlignment(T), 0, - DIType(0), DIArray(0)); // filled in later + DIType(nullptr), DIArray(nullptr)); // filled in later // N is added to the map (early) so that element search below can find it, // so as to avoid infinite recursion for structs that contain pointers to @@ -535,7 +535,7 @@ void DebugIR::writeDebugBitcode(const Module *M, int *fd) { Out.reset(new raw_fd_ostream(*fd, true)); } - M->print(*Out, 0); + M->print(*Out, nullptr); Out->close(); } diff --git a/lib/Transforms/Instrumentation/DebugIR.h b/lib/Transforms/Instrumentation/DebugIR.h index 3f57da5..02831ed 100644 --- a/lib/Transforms/Instrumentation/DebugIR.h +++ b/lib/Transforms/Instrumentation/DebugIR.h @@ -90,7 +90,7 @@ private: /// Write M to disk, optionally passing in an fd to an open file which is /// closed by this function after writing. If no fd is specified, a new file /// is opened, written, and closed. - void writeDebugBitcode(const llvm::Module *M, int *fd = 0); + void writeDebugBitcode(const llvm::Module *M, int *fd = nullptr); }; } // llvm namespace diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index bd00ec8..8330a9b 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -14,8 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "insert-gcov-profiling" - #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Hashing.h" @@ -39,10 +37,13 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include +#include #include #include using namespace llvm; +#define DEBUG_TYPE "insert-gcov-profiling" + static cl::opt DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden, cl::ValueRequired); @@ -77,9 +78,6 @@ namespace { "GCOVProfiler asked to do nothing?"); init(); } - ~GCOVProfiler() { - DeleteContainerPointers(Funcs); - } const char *getPassName() const override { return "GCOV Profiler"; } @@ -141,7 +139,7 @@ namespace { Module *M; LLVMContext *Ctx; - SmallVector Funcs; + SmallVector, 16> Funcs; }; } @@ -449,6 +447,21 @@ bool GCOVProfiler::runOnModule(Module &M) { return false; } +static bool functionHasLines(Function *F) { + // Check whether this function actually has any source lines. Not only + // do these waste space, they also can crash gcov. + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); + I != IE; ++I) { + const DebugLoc &Loc = I->getDebugLoc(); + if (Loc.isUnknown()) continue; + if (Loc.getLine() != 0) + return true; + } + } + return false; +} + void GCOVProfiler::emitProfileNotes() { NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return; @@ -474,6 +487,7 @@ void GCOVProfiler::emitProfileNotes() { Function *F = SP.getFunction(); if (!F) continue; + if (!functionHasLines(F)) continue; // gcov expects every function to start with an entry block that has a // single successor, so split the entry block to make sure of that. @@ -483,19 +497,19 @@ void GCOVProfiler::emitProfileNotes() { ++It; EntryBlock.splitBasicBlock(It); - GCOVFunction *Func = - new GCOVFunction(SP, &out, i, Options.UseCfgChecksum); - Funcs.push_back(Func); + Funcs.push_back( + make_unique(SP, &out, i, Options.UseCfgChecksum)); + GCOVFunction &Func = *Funcs.back(); for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - GCOVBlock &Block = Func->getBlock(BB); + GCOVBlock &Block = Func.getBlock(BB); TerminatorInst *TI = BB->getTerminator(); if (int successors = TI->getNumSuccessors()) { for (int i = 0; i != successors; ++i) { - Block.addEdge(Func->getBlock(TI->getSuccessor(i))); + Block.addEdge(Func.getBlock(TI->getSuccessor(i))); } } else if (isa(TI)) { - Block.addEdge(Func->getReturnBlock()); + Block.addEdge(Func.getReturnBlock()); } uint32_t Line = 0; @@ -511,7 +525,7 @@ void GCOVProfiler::emitProfileNotes() { Lines.addLine(Loc.getLine()); } } - EdgeDestinations += Func->getEdgeDestinations(); + EdgeDestinations += Func.getEdgeDestinations(); } FileChecksums.push_back(hash_value(EdgeDestinations)); @@ -519,9 +533,7 @@ void GCOVProfiler::emitProfileNotes() { out.write(ReversedVersion, 4); out.write(reinterpret_cast(&FileChecksums.back()), 4); - for (SmallVectorImpl::iterator I = Funcs.begin(), - E = Funcs.end(); I != E; ++I) { - GCOVFunction *Func = *I; + for (auto &Func : Funcs) { Func->setCfgChecksum(FileChecksums.back()); Func->writeOut(); } @@ -549,6 +561,7 @@ bool GCOVProfiler::emitProfileArcs() { continue; Function *F = SP.getFunction(); if (!F) continue; + if (!functionHasLines(F)) continue; if (!Result) Result = true; unsigned Edges = 0; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index ec1a195..b8e632e 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -93,12 +93,11 @@ //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "msan" - #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" @@ -122,6 +121,8 @@ using namespace llvm; +#define DEBUG_TYPE "msan" + static const uint64_t kShadowMask32 = 1ULL << 31; static const uint64_t kShadowMask64 = 1ULL << 46; static const uint64_t kOriginOffset32 = 1ULL << 30; @@ -129,6 +130,9 @@ static const uint64_t kOriginOffset64 = 1ULL << 45; static const unsigned kMinOriginAlignment = 4; static const unsigned kShadowTLSAlignment = 8; +// Accesses sizes are powers of two: 1, 2, 4, 8. +static const size_t kNumberOfAccessSizes = 4; + /// \brief Track origins of uninitialized values. /// /// Adds a section to MemorySanitizer report that points to the allocation @@ -178,6 +182,14 @@ static cl::opt ClBlacklistFile("msan-blacklist", cl::desc("File containing the list of functions where MemorySanitizer " "should not report bugs"), cl::Hidden); +static cl::opt ClInstrumentationWithCallThreshold( + "msan-instrumentation-with-call-threshold", + cl::desc( + "If the function being instrumented requires more than " + "this number of checks and origin stores, use callbacks instead of " + "inline checks (-1 means never use callbacks)."), + cl::Hidden, cl::init(3500)); + // Experimental. Wraps all indirect calls in the instrumented code with // a call to the given function. This is needed to assist the dynamic // helper tool (MSanDR) to regain control on transition between instrumented and @@ -203,8 +215,8 @@ class MemorySanitizer : public FunctionPass { StringRef BlacklistFile = StringRef()) : FunctionPass(ID), TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)), - DL(0), - WarningFn(0), + DL(nullptr), + WarningFn(nullptr), BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile : BlacklistFile), WrapIndirectCalls(!ClWrapIndirectCalls.empty()) {} const char *getPassName() const override { return "MemorySanitizer"; } @@ -245,6 +257,10 @@ class MemorySanitizer : public FunctionPass { /// \brief The run-time callback to print a warning. Value *WarningFn; + // These arrays are indexed by log2(AccessSize). + Value *MaybeWarningFn[kNumberOfAccessSizes]; + Value *MaybeStoreOriginFn[kNumberOfAccessSizes]; + /// \brief Run-time helper that generates a new origin value for a stack /// allocation. Value *MsanSetAllocaOrigin4Fn; @@ -321,6 +337,20 @@ void MemorySanitizer::initializeCallbacks(Module &M) { : "__msan_warning_noreturn"; WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), NULL); + for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes; + AccessSizeIndex++) { + unsigned AccessSize = 1 << AccessSizeIndex; + std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize); + MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction( + FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), + IRB.getInt32Ty(), NULL); + + FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize); + MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction( + FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), + IRB.getInt8PtrTy(), IRB.getInt32Ty(), NULL); + } + MsanSetAllocaOrigin4Fn = M.getOrInsertFunction( "__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy(), IntptrTy, NULL); @@ -341,31 +371,32 @@ void MemorySanitizer::initializeCallbacks(Module &M) { // Create globals. RetvalTLS = new GlobalVariable( M, ArrayType::get(IRB.getInt64Ty(), 8), false, - GlobalVariable::ExternalLinkage, 0, "__msan_retval_tls", 0, + GlobalVariable::ExternalLinkage, nullptr, "__msan_retval_tls", nullptr, GlobalVariable::InitialExecTLSModel); RetvalOriginTLS = new GlobalVariable( - M, OriginTy, false, GlobalVariable::ExternalLinkage, 0, - "__msan_retval_origin_tls", 0, GlobalVariable::InitialExecTLSModel); + M, OriginTy, false, GlobalVariable::ExternalLinkage, nullptr, + "__msan_retval_origin_tls", nullptr, GlobalVariable::InitialExecTLSModel); ParamTLS = new GlobalVariable( M, ArrayType::get(IRB.getInt64Ty(), 1000), false, - GlobalVariable::ExternalLinkage, 0, "__msan_param_tls", 0, + GlobalVariable::ExternalLinkage, nullptr, "__msan_param_tls", nullptr, GlobalVariable::InitialExecTLSModel); ParamOriginTLS = new GlobalVariable( M, ArrayType::get(OriginTy, 1000), false, GlobalVariable::ExternalLinkage, - 0, "__msan_param_origin_tls", 0, GlobalVariable::InitialExecTLSModel); + nullptr, "__msan_param_origin_tls", nullptr, + GlobalVariable::InitialExecTLSModel); VAArgTLS = new GlobalVariable( M, ArrayType::get(IRB.getInt64Ty(), 1000), false, - GlobalVariable::ExternalLinkage, 0, "__msan_va_arg_tls", 0, + GlobalVariable::ExternalLinkage, nullptr, "__msan_va_arg_tls", nullptr, GlobalVariable::InitialExecTLSModel); VAArgOverflowSizeTLS = new GlobalVariable( - M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, 0, - "__msan_va_arg_overflow_size_tls", 0, + M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, nullptr, + "__msan_va_arg_overflow_size_tls", nullptr, GlobalVariable::InitialExecTLSModel); OriginTLS = new GlobalVariable( - M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, 0, - "__msan_origin_tls", 0, GlobalVariable::InitialExecTLSModel); + M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, nullptr, + "__msan_origin_tls", nullptr, GlobalVariable::InitialExecTLSModel); // We insert an empty inline asm after __msan_report* to avoid callback merge. EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false), @@ -379,14 +410,14 @@ void MemorySanitizer::initializeCallbacks(Module &M) { ClWrapIndirectCalls, AnyFunctionPtrTy, AnyFunctionPtrTy, NULL); } - if (ClWrapIndirectCallsFast) { + if (WrapIndirectCalls && ClWrapIndirectCallsFast) { MsandrModuleStart = new GlobalVariable( M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage, - 0, "__executable_start"); + nullptr, "__executable_start"); MsandrModuleStart->setVisibility(GlobalVariable::HiddenVisibility); MsandrModuleEnd = new GlobalVariable( M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage, - 0, "_end"); + nullptr, "_end"); MsandrModuleEnd->setVisibility(GlobalVariable::HiddenVisibility); } } @@ -397,7 +428,7 @@ void MemorySanitizer::initializeCallbacks(Module &M) { bool MemorySanitizer::doInitialization(Module &M) { DataLayoutPass *DLP = getAnalysisIfAvailable(); if (!DLP) - return false; + report_fatal_error("data layout missing"); DL = &DLP->getDataLayout(); BL.reset(SpecialCaseList::createOrDie(BlacklistFile)); @@ -474,6 +505,11 @@ VarArgHelper* CreateVarArgHelper(Function &Func, MemorySanitizer &Msan, MemorySanitizerVisitor &Visitor); +unsigned TypeSizeToSizeIndex(unsigned TypeSize) { + if (TypeSize <= 8) return 0; + return Log2_32_Ceil(TypeSize / 8); +} + /// This class does all the work for a given function. Store and Load /// instructions store and load corresponding shadow and origin /// values. Most instructions propagate shadow from arguments to their @@ -529,9 +565,42 @@ struct MemorySanitizerVisitor : public InstVisitor { return IRB.CreateCall(MS.MsanChainOriginFn, V); } - void materializeStores() { + void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin, + unsigned Alignment, bool AsCall) { + if (isa(Shadow->getType())) { + IRB.CreateAlignedStore(updateOrigin(Origin, IRB), getOriginPtr(Addr, IRB), + Alignment); + } else { + Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB); + // TODO(eugenis): handle non-zero constant shadow by inserting an + // unconditional check (can not simply fail compilation as this could + // be in the dead code). + if (isa(ConvertedShadow)) return; + unsigned TypeSizeInBits = + MS.DL->getTypeSizeInBits(ConvertedShadow->getType()); + unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits); + if (AsCall && SizeIndex < kNumberOfAccessSizes) { + Value *Fn = MS.MaybeStoreOriginFn[SizeIndex]; + Value *ConvertedShadow2 = IRB.CreateZExt( + ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex))); + IRB.CreateCall3(Fn, ConvertedShadow2, + IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), + updateOrigin(Origin, IRB)); + } else { + Value *Cmp = IRB.CreateICmpNE( + ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp"); + Instruction *CheckTerm = SplitBlockAndInsertIfThen( + Cmp, IRB.GetInsertPoint(), false, MS.OriginStoreWeights); + IRBuilder<> IRBNew(CheckTerm); + IRBNew.CreateAlignedStore(updateOrigin(Origin, IRBNew), + getOriginPtr(Addr, IRBNew), Alignment); + } + } + } + + void materializeStores(bool InstrumentWithCalls) { for (size_t i = 0, n = StoreList.size(); i < n; i++) { - StoreInst& I = *dyn_cast(StoreList[i]); + StoreInst &I = *dyn_cast(StoreList[i]); IRBuilder<> IRB(&I); Value *Val = I.getValueOperand(); @@ -540,53 +609,41 @@ struct MemorySanitizerVisitor : public InstVisitor { Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB); StoreInst *NewSI = - IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment()); + IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment()); DEBUG(dbgs() << " STORE: " << *NewSI << "\n"); (void)NewSI; - if (ClCheckAccessAddress) - insertShadowCheck(Addr, &I); + if (ClCheckAccessAddress) insertShadowCheck(Addr, &I); - if (I.isAtomic()) - I.setOrdering(addReleaseOrdering(I.getOrdering())); + if (I.isAtomic()) I.setOrdering(addReleaseOrdering(I.getOrdering())); if (MS.TrackOrigins) { unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment()); - if (isa(Shadow->getType())) { - IRB.CreateAlignedStore(updateOrigin(getOrigin(Val), IRB), - getOriginPtr(Addr, IRB), Alignment); - } else { - Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB); - - // TODO(eugenis): handle non-zero constant shadow by inserting an - // unconditional check (can not simply fail compilation as this could - // be in the dead code). - if (isa(ConvertedShadow)) - continue; - - Value *Cmp = IRB.CreateICmpNE(ConvertedShadow, - getCleanShadow(ConvertedShadow), "_mscmp"); - Instruction *CheckTerm = - SplitBlockAndInsertIfThen(Cmp, &I, false, MS.OriginStoreWeights); - IRBuilder<> IRBNew(CheckTerm); - IRBNew.CreateAlignedStore(updateOrigin(getOrigin(Val), IRBNew), - getOriginPtr(Addr, IRBNew), Alignment); - } + storeOrigin(IRB, Addr, Shadow, getOrigin(Val), Alignment, + InstrumentWithCalls); } } } - void materializeChecks() { - for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) { - Value *Shadow = InstrumentationList[i].Shadow; - Instruction *OrigIns = InstrumentationList[i].OrigIns; - IRBuilder<> IRB(OrigIns); - DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n"); - Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB); - DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n"); - // See the comment in materializeStores(). - if (isa(ConvertedShadow)) - continue; + void materializeOneCheck(Instruction *OrigIns, Value *Shadow, Value *Origin, + bool AsCall) { + IRBuilder<> IRB(OrigIns); + DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n"); + Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB); + DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n"); + // See the comment in materializeStores(). + if (isa(ConvertedShadow)) return; + unsigned TypeSizeInBits = + MS.DL->getTypeSizeInBits(ConvertedShadow->getType()); + unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits); + if (AsCall && SizeIndex < kNumberOfAccessSizes) { + Value *Fn = MS.MaybeWarningFn[SizeIndex]; + Value *ConvertedShadow2 = + IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex))); + IRB.CreateCall2(Fn, ConvertedShadow2, MS.TrackOrigins && Origin + ? Origin + : (Value *)IRB.getInt32(0)); + } else { Value *Cmp = IRB.CreateICmpNE(ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp"); Instruction *CheckTerm = SplitBlockAndInsertIfThen( @@ -595,14 +652,22 @@ struct MemorySanitizerVisitor : public InstVisitor { IRB.SetInsertPoint(CheckTerm); if (MS.TrackOrigins) { - Value *Origin = InstrumentationList[i].Origin; - IRB.CreateStore(Origin ? (Value*)Origin : (Value*)IRB.getInt32(0), + IRB.CreateStore(Origin ? (Value *)Origin : (Value *)IRB.getInt32(0), MS.OriginTLS); } IRB.CreateCall(MS.WarningFn); IRB.CreateCall(MS.EmptyAsm); DEBUG(dbgs() << " CHECK: " << *Cmp << "\n"); } + } + + void materializeChecks(bool InstrumentWithCalls) { + for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) { + Instruction *OrigIns = InstrumentationList[i].OrigIns; + Value *Shadow = InstrumentationList[i].Shadow; + Value *Origin = InstrumentationList[i].Origin; + materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls); + } DEBUG(dbgs() << "DONE:\n" << F); } @@ -662,17 +727,15 @@ struct MemorySanitizerVisitor : public InstVisitor { // Iterate all BBs in depth-first order and create shadow instructions // for all instructions (where applicable). // For PHI nodes we create dummy shadow PHIs which will be finalized later. - for (df_iterator DI = df_begin(&F.getEntryBlock()), - DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { - BasicBlock *BB = *DI; + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) visit(*BB); - } + // Finalize PHI nodes. for (size_t i = 0, n = ShadowPHINodes.size(); i < n; i++) { PHINode *PN = ShadowPHINodes[i]; PHINode *PNS = cast(getShadow(PN)); - PHINode *PNO = MS.TrackOrigins ? cast(getOrigin(PN)) : 0; + PHINode *PNO = MS.TrackOrigins ? cast(getOrigin(PN)) : nullptr; size_t NumValues = PN->getNumIncomingValues(); for (size_t v = 0; v < NumValues; v++) { PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v)); @@ -683,12 +746,16 @@ struct MemorySanitizerVisitor : public InstVisitor { VAHelper->finalizeInstrumentation(); + bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 && + InstrumentationList.size() + StoreList.size() > + (unsigned)ClInstrumentationWithCallThreshold; + // Delayed instrumentation of StoreInst. // This may add new checks to be inserted later. - materializeStores(); + materializeStores(InstrumentWithCalls); // Insert shadow value checks. - materializeChecks(); + materializeChecks(InstrumentWithCalls); // Wrap indirect calls. materializeIndirectCalls(); @@ -704,7 +771,7 @@ struct MemorySanitizerVisitor : public InstVisitor { /// \brief Compute the shadow type that corresponds to a given Type. Type *getShadowTy(Type *OrigTy) { if (!OrigTy->isSized()) { - return 0; + return nullptr; } // For integer type, shadow is the same as the original type. // This may return weird-sized types like i1. @@ -784,7 +851,7 @@ struct MemorySanitizerVisitor : public InstVisitor { /// \brief Compute the origin address for a given function argument. Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB, int ArgOffset) { - if (!MS.TrackOrigins) return 0; + if (!MS.TrackOrigins) return nullptr; Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy); Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset)); return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0), @@ -825,7 +892,7 @@ struct MemorySanitizerVisitor : public InstVisitor { Constant *getCleanShadow(Value *V) { Type *ShadowTy = getShadowTy(V); if (!ShadowTy) - return 0; + return nullptr; return Constant::getNullValue(ShadowTy); } @@ -845,7 +912,7 @@ struct MemorySanitizerVisitor : public InstVisitor { Constant *getPoisonedShadow(Value *V) { Type *ShadowTy = getShadowTy(V); if (!ShadowTy) - return 0; + return nullptr; return getPoisonedShadow(ShadowTy); } @@ -936,7 +1003,7 @@ struct MemorySanitizerVisitor : public InstVisitor { /// \brief Get the origin for a value. Value *getOrigin(Value *V) { - if (!MS.TrackOrigins) return 0; + if (!MS.TrackOrigins) return nullptr; if (isa(V) || isa(V)) { Value *Origin = OriginMap[V]; if (!Origin) { @@ -1234,7 +1301,7 @@ struct MemorySanitizerVisitor : public InstVisitor { public: Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB) : - Shadow(0), Origin(0), IRB(IRB), MSV(MSV) {} + Shadow(nullptr), Origin(nullptr), IRB(IRB), MSV(MSV) {} /// \brief Add a pair of shadow and origin values to the mix. Combiner &Add(Value *OpShadow, Value *OpOrigin) { @@ -1265,7 +1332,7 @@ struct MemorySanitizerVisitor : public InstVisitor { /// \brief Add an application value to the mix. Combiner &Add(Value *V) { Value *OpShadow = MSV->getShadow(V); - Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : 0; + Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr; return Add(OpShadow, OpOrigin); } @@ -1480,7 +1547,7 @@ struct MemorySanitizerVisitor : public InstVisitor { void handleSignedRelationalComparison(ICmpInst &I) { Constant *constOp0 = dyn_cast(I.getOperand(0)); Constant *constOp1 = dyn_cast(I.getOperand(1)); - Value* op = NULL; + Value* op = nullptr; CmpInst::Predicate pre = I.getPredicate(); if (constOp0 && constOp0->isNullValue() && (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE)) { @@ -1789,7 +1856,7 @@ struct MemorySanitizerVisitor : public InstVisitor { break; case 1: ConvertOp = I.getArgOperand(0); - CopyOp = NULL; + CopyOp = nullptr; break; default: llvm_unreachable("Cvt intrinsic with unsupported number of arguments."); @@ -1803,7 +1870,7 @@ struct MemorySanitizerVisitor : public InstVisitor { // FIXME: consider propagating shadow of ConvertOp, at least in the case of // int->any conversion. Value *ConvertShadow = getShadow(ConvertOp); - Value *AggShadow = 0; + Value *AggShadow = nullptr; if (ConvertOp->getType()->isVectorTy()) { AggShadow = IRB.CreateExtractElement( ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0)); @@ -2055,7 +2122,7 @@ struct MemorySanitizerVisitor : public InstVisitor { continue; } unsigned Size = 0; - Value *Store = 0; + Value *Store = nullptr; // Compute the Shadow for arg even if it is ByVal, because // in that case getShadow() will copy the actual arg shadow to // __msan_param_tls. @@ -2080,7 +2147,7 @@ struct MemorySanitizerVisitor : public InstVisitor { IRB.CreateStore(getOrigin(A), getOriginPtrForArgument(A, IRB, ArgOffset)); (void)Store; - assert(Size != 0 && Store != 0); + assert(Size != 0 && Store != nullptr); DEBUG(dbgs() << " Param:" << *Store << "\n"); ArgOffset += DataLayout::RoundUpAlignment(Size, 8); } @@ -2098,7 +2165,7 @@ struct MemorySanitizerVisitor : public InstVisitor { // Until we have full dynamic coverage, make sure the retval shadow is 0. Value *Base = getShadowPtrForRetval(&I, IRBBefore); IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment); - Instruction *NextInsn = 0; + Instruction *NextInsn = nullptr; if (CS.isCall()) { NextInsn = I.getNextNode(); } else { @@ -2318,7 +2385,8 @@ struct VarArgAMD64Helper : public VarArgHelper { VarArgAMD64Helper(Function &F, MemorySanitizer &MS, MemorySanitizerVisitor &MSV) - : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(0), VAArgOverflowSize(0) { } + : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr), + VAArgOverflowSize(nullptr) {} enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory }; diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 5ffb17c..8fe9bca 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -19,8 +19,6 @@ // The rest is handled by the run-time library. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "tsan" - #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" @@ -46,6 +44,8 @@ using namespace llvm; +#define DEBUG_TYPE "tsan" + static cl::opt ClBlacklistFile("tsan-blacklist", cl::desc("Blacklist file"), cl::Hidden); static cl::opt ClInstrumentMemoryAccesses( @@ -78,7 +78,7 @@ namespace { struct ThreadSanitizer : public FunctionPass { ThreadSanitizer(StringRef BlacklistFile = StringRef()) : FunctionPass(ID), - DL(0), + DL(nullptr), BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile : BlacklistFile) { } const char *getPassName() const override; @@ -174,8 +174,8 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { for (int op = AtomicRMWInst::FIRST_BINOP; op <= AtomicRMWInst::LAST_BINOP; ++op) { - TsanAtomicRMW[op][i] = NULL; - const char *NamePart = NULL; + TsanAtomicRMW[op][i] = nullptr; + const char *NamePart = nullptr; if (op == AtomicRMWInst::Xchg) NamePart = "_exchange"; else if (op == AtomicRMWInst::Add) @@ -226,7 +226,7 @@ void ThreadSanitizer::initializeCallbacks(Module &M) { bool ThreadSanitizer::doInitialization(Module &M) { DataLayoutPass *DLP = getAnalysisIfAvailable(); if (!DLP) - return false; + report_fatal_error("data layout missing"); DL = &DLP->getDataLayout(); BL.reset(SpecialCaseList::createOrDie(BlacklistFile)); @@ -518,7 +518,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) { if (Idx < 0) return false; Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx]; - if (F == NULL) + if (!F) return false; const size_t ByteSize = 1 << Idx; const size_t BitSize = ByteSize * 8; diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h index 4eac39d..4098428 100644 --- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h +++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h @@ -43,34 +43,34 @@ public: EPT_RetainAutoreleaseRV }; - ARCRuntimeEntryPoints() : TheModule(0), - AutoreleaseRV(0), - Release(0), - Retain(0), - RetainBlock(0), - Autorelease(0), - StoreStrong(0), - RetainRV(0), - RetainAutorelease(0), - RetainAutoreleaseRV(0) { } + ARCRuntimeEntryPoints() : TheModule(nullptr), + AutoreleaseRV(nullptr), + Release(nullptr), + Retain(nullptr), + RetainBlock(nullptr), + Autorelease(nullptr), + StoreStrong(nullptr), + RetainRV(nullptr), + RetainAutorelease(nullptr), + RetainAutoreleaseRV(nullptr) { } ~ARCRuntimeEntryPoints() { } void Initialize(Module *M) { TheModule = M; - AutoreleaseRV = 0; - Release = 0; - Retain = 0; - RetainBlock = 0; - Autorelease = 0; - StoreStrong = 0; - RetainRV = 0; - RetainAutorelease = 0; - RetainAutoreleaseRV = 0; + AutoreleaseRV = nullptr; + Release = nullptr; + Retain = nullptr; + RetainBlock = nullptr; + Autorelease = nullptr; + StoreStrong = nullptr; + RetainRV = nullptr; + RetainAutorelease = nullptr; + RetainAutoreleaseRV = nullptr; } Constant *get(const EntryPointType entry) { - assert(TheModule != 0 && "Not initialized."); + assert(TheModule != nullptr && "Not initialized."); switch (entry) { case EPT_AutoreleaseRV: diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp index 8780359..08c8842 100644 --- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp +++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp @@ -20,7 +20,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "objc-arc-dependency" #include "ObjCARC.h" #include "DependencyAnalysis.h" #include "ProvenanceAnalysis.h" @@ -29,6 +28,8 @@ using namespace llvm; using namespace llvm::objcarc; +#define DEBUG_TYPE "objc-arc-dependency" + /// Test whether the given instruction can result in a reference count /// modification (positive or negative) for the pointer's object. bool @@ -223,7 +224,7 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor, pred_iterator PI(LocalStartBB), PE(LocalStartBB, false); if (PI == PE) // If we've reached the function entry, produce a null dependence. - DependingInsts.insert(0); + DependingInsts.insert(nullptr); else // Add the predecessors to the worklist. do { diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp index cb7e4da..1a25391 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp @@ -24,7 +24,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "objc-arc-ap-elim" #include "ObjCARC.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Constants.h" @@ -34,6 +33,8 @@ using namespace llvm; using namespace llvm::objcarc; +#define DEBUG_TYPE "objc-arc-ap-elim" + namespace { /// \brief Autorelease pool elimination. class ObjCARCAPElim : public ModulePass { @@ -93,7 +94,7 @@ bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) { bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) { bool Changed = false; - Instruction *Push = 0; + Instruction *Push = nullptr; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *Inst = I++; switch (GetBasicInstructionClass(Inst)) { @@ -112,11 +113,11 @@ bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) { Inst->eraseFromParent(); Push->eraseFromParent(); } - Push = 0; + Push = nullptr; break; case IC_CallOrUser: if (MayAutorelease(ImmutableCallSite(Inst))) - Push = 0; + Push = nullptr; break; default: break; @@ -154,8 +155,8 @@ bool ObjCARCAPElim::runOnModule(Module &M) { for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end(); OI != OE; ++OI) { Value *Op = *OI; - // llvm.global_ctors is an array of pairs where the second members - // are constructor functions. + // llvm.global_ctors is an array of three-field structs where the second + // members are constructor functions. Function *F = dyn_cast(cast(Op)->getOperand(1)); // If the user used a constructor function with the wrong signature and // it got bitcasted or whatever, look the other way. diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp index d18667b..2c09e70 100644 --- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp @@ -20,7 +20,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "objc-arc-aa" #include "ObjCARC.h" #include "ObjCARCAliasAnalysis.h" #include "llvm/IR/Instruction.h" @@ -28,6 +27,8 @@ #include "llvm/PassAnalysisSupport.h" #include "llvm/PassSupport.h" +#define DEBUG_TYPE "objc-arc-aa" + namespace llvm { class Function; class Value; diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp index 3da5a0e..f48d53d 100644 --- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp @@ -26,7 +26,6 @@ // TODO: ObjCARCContract could insert PHI nodes when uses aren't // dominated by single calls. -#define DEBUG_TYPE "objc-arc-contract" #include "ObjCARC.h" #include "ARCRuntimeEntryPoints.h" #include "DependencyAnalysis.h" @@ -40,6 +39,8 @@ using namespace llvm; using namespace llvm::objcarc; +#define DEBUG_TYPE "objc-arc-contract" + STATISTIC(NumPeeps, "Number of calls peephole-optimized"); STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed"); @@ -157,7 +158,7 @@ ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease, // Check that there are no instructions between the retain and the autorelease // (such as an autorelease_pop) which may change the count. - CallInst *Retain = 0; + CallInst *Retain = nullptr; if (Class == IC_AutoreleaseRV) FindDependencies(RetainAutoreleaseRVDep, Arg, Autorelease->getParent(), Autorelease, @@ -218,7 +219,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release, BasicBlock::iterator I = Load, End = BB->end(); ++I; AliasAnalysis::Location Loc = AA->getLocation(Load); - StoreInst *Store = 0; + StoreInst *Store = nullptr; bool SawRelease = false; for (; !Store || !SawRelease; ++I) { if (I == End) @@ -300,7 +301,7 @@ bool ObjCARCContract::doInitialization(Module &M) { EP.Initialize(&M); // Initialize RetainRVMarker. - RetainRVMarker = 0; + RetainRVMarker = nullptr; if (NamedMDNode *NMD = M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker")) if (NMD->getNumOperands() == 1) { diff --git a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp index 8bec699..bf9fcbb 100644 --- a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp @@ -23,8 +23,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "objc-arc-expand" - #include "ObjCARC.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Function.h" @@ -40,6 +38,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#define DEBUG_TYPE "objc-arc-expand" + namespace llvm { class Module; } diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp index eed3cb2..dd4dd50 100644 --- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp +++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp @@ -24,7 +24,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "objc-arc-opts" #include "ObjCARC.h" #include "ARCRuntimeEntryPoints.h" #include "DependencyAnalysis.h" @@ -44,6 +43,8 @@ using namespace llvm; using namespace llvm::objcarc; +#define DEBUG_TYPE "objc-arc-opts" + /// \defgroup MiscUtils Miscellaneous utilities that are not ARC specific. /// @{ @@ -156,7 +157,7 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { return FindSingleUseIdentifiedObject( cast(Arg)->getArgOperand(0)); if (!IsObjCIdentifiedObject(Arg)) - return 0; + return nullptr; return Arg; } @@ -165,12 +166,12 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) { if (IsObjCIdentifiedObject(Arg)) { for (const User *U : Arg->users()) if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg) - return 0; + return nullptr; return Arg; } - return 0; + return nullptr; } /// This is a wrapper around getUnderlyingObjCPtr along the lines of @@ -373,7 +374,7 @@ namespace { bool CFGHazardAfflicted; RRInfo() : - KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0), + KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(nullptr), CFGHazardAfflicted(false) {} void clear(); @@ -388,7 +389,7 @@ namespace { void RRInfo::clear() { KnownSafe = false; IsTailCallRelease = false; - ReleaseMetadata = 0; + ReleaseMetadata = nullptr; Calls.clear(); ReverseInsertPts.clear(); CFGHazardAfflicted = false; @@ -397,7 +398,7 @@ void RRInfo::clear() { bool RRInfo::Merge(const RRInfo &Other) { // Conservatively merge the ReleaseMetadata information. if (ReleaseMetadata != Other.ReleaseMetadata) - ReleaseMetadata = 0; + ReleaseMetadata = nullptr; // Conservatively merge the boolean state. KnownSafe &= Other.KnownSafe; @@ -456,7 +457,7 @@ namespace { } bool IsTrackingImpreciseReleases() const { - return RRI.ReleaseMetadata != 0; + return RRI.ReleaseMetadata != nullptr; } const MDNode *GetReleaseMetadata() const { @@ -818,7 +819,7 @@ ARCAnnotationTargetIdentifier("objc-arc-annotation-target-identifier", /// arc annotation processor tool. If the function is an static MDString *AppendMDNodeToSourcePtr(unsigned NodeId, Value *Ptr) { - MDString *Hash = 0; + MDString *Hash = nullptr; // If pointer is a result of an instruction and it does not have a source // MDNode it, attach a new MDNode onto it. If pointer is a result of @@ -880,7 +881,7 @@ static void AppendMDNodeToInstForPtr(unsigned NodeId, MDString *PtrSourceMDNodeID, Sequence OldSeq, Sequence NewSeq) { - MDNode *Node = 0; + MDNode *Node = nullptr; Value *tmp[3] = {PtrSourceMDNodeID, SequenceToMDString(Inst->getContext(), OldSeq), @@ -916,7 +917,7 @@ static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB, Value *PtrName; StringRef Tmp = Ptr->getName(); - if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) { + if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) { Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp, Tmp + "_STR"); PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, @@ -925,7 +926,7 @@ static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB, Value *S; std::string SeqStr = SequenceToString(Seq); - if (0 == (S = M->getGlobalVariable(SeqStr, true))) { + if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) { Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr, SeqStr + "_STR"); S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, @@ -959,7 +960,7 @@ static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB, Value *PtrName; StringRef Tmp = Ptr->getName(); - if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) { + if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) { Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp, Tmp + "_STR"); PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, @@ -968,7 +969,7 @@ static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB, Value *S; std::string SeqStr = SequenceToString(Seq); - if (0 == (S = M->getGlobalVariable(SeqStr, true))) { + if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) { Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr, SeqStr + "_STR"); S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage, @@ -1718,7 +1719,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst, BBState &MyStates) { bool NestingDetected = false; InstructionClass Class = GetInstructionClass(Inst); - const Value *Arg = 0; + const Value *Arg = nullptr; DEBUG(dbgs() << "Class: " << Class << "\n"); @@ -1974,7 +1975,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, BBState &MyStates) { bool NestingDetected = false; InstructionClass Class = GetInstructionClass(Inst); - const Value *Arg = 0; + const Value *Arg = nullptr; switch (Class) { case IC_RetainBlock: @@ -2026,7 +2027,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst, switch (OldSeq) { case S_Retain: case S_CanRelease: - if (OldSeq == S_Retain || ReleaseMetadata != 0) + if (OldSeq == S_Retain || ReleaseMetadata != nullptr) S.ClearReverseInsertPts(); // FALL THROUGH case S_Use: @@ -2432,7 +2433,7 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap } else { if (ReleasesToMove.ReleaseMetadata != NewRetainReleaseRRI.ReleaseMetadata) - ReleasesToMove.ReleaseMetadata = 0; + ReleasesToMove.ReleaseMetadata = nullptr; if (ReleasesToMove.IsTailCallRelease != NewRetainReleaseRRI.IsTailCallRelease) ReleasesToMove.IsTailCallRelease = false; @@ -2884,7 +2885,7 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB, FindDependencies(CanChangeRetainCount, Arg, BB, Autorelease, DepInsts, Visited, PA); if (DepInsts.size() != 1) - return 0; + return nullptr; CallInst *Retain = dyn_cast_or_null(*DepInsts.begin()); @@ -2893,7 +2894,7 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB, if (!Retain || !IsRetain(GetBasicInstructionClass(Retain)) || GetObjCArg(Retain) != Arg) { - return 0; + return nullptr; } return Retain; @@ -2911,17 +2912,17 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB, FindDependencies(NeedsPositiveRetainCount, Arg, BB, Ret, DepInsts, V, PA); if (DepInsts.size() != 1) - return 0; + return nullptr; CallInst *Autorelease = dyn_cast_or_null(*DepInsts.begin()); if (!Autorelease) - return 0; + return nullptr; InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease); if (!IsAutorelease(AutoreleaseClass)) - return 0; + return nullptr; if (GetObjCArg(Autorelease) != Arg) - return 0; + return nullptr; return Autorelease; } diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index fa8b598..1a3a4aa 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "adce" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" @@ -28,6 +27,8 @@ #include "llvm/Pass.h" using namespace llvm; +#define DEBUG_TYPE "adce" + STATISTIC(NumRemoved, "Number of instructions removed"); namespace { diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk index 3894f93..079cc86 100644 --- a/lib/Transforms/Scalar/Android.mk +++ b/lib/Transforms/Scalar/Android.mk @@ -32,6 +32,7 @@ transforms_scalar_SRC_FILES := \ Scalar.cpp \ Scalarizer.cpp \ ScalarReplAggregates.cpp \ + SeparateConstOffsetFromGEP.cpp \ SimplifyCFGPass.cpp \ Sink.cpp \ StructurizeCFG.cpp \ @@ -60,11 +61,6 @@ include $(CLEAR_VARS) LOCAL_SRC_FILES := $(transforms_scalar_SRC_FILES) LOCAL_MODULE:= libLLVMScalarOpts -# Override the default optimization level to work around a SIGSEGV -# on x86 target builds for SROA.cpp. -# Bug: 8047767 -LOCAL_CFLAGS_x86 += -O1 - LOCAL_MODULE_TAGS := optional include $(LLVM_DEVICE_BUILD_MK) diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 27434c1..3ad1488 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -5,19 +5,19 @@ add_llvm_library(LLVMScalarOpts CorrelatedValuePropagation.cpp DCE.cpp DeadStoreElimination.cpp - Scalarizer.cpp EarlyCSE.cpp - GlobalMerge.cpp + FlattenCFGPass.cpp GVN.cpp + GlobalMerge.cpp IndVarSimplify.cpp JumpThreading.cpp LICM.cpp LoopDeletion.cpp LoopIdiomRecognize.cpp LoopInstSimplify.cpp + LoopRerollPass.cpp LoopRotation.cpp LoopStrengthReduce.cpp - LoopRerollPass.cpp LoopUnrollPass.cpp LoopUnswitch.cpp LowerAtomic.cpp @@ -25,13 +25,14 @@ add_llvm_library(LLVMScalarOpts PartiallyInlineLibCalls.cpp Reassociate.cpp Reg2Mem.cpp - SampleProfile.cpp SCCP.cpp SROA.cpp + SampleProfile.cpp Scalar.cpp ScalarReplAggregates.cpp + Scalarizer.cpp + SeparateConstOffsetFromGEP.cpp SimplifyCFGPass.cpp - FlattenCFGPass.cpp Sink.cpp StructurizeCFG.cpp TailRecursionElimination.cpp diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp index 57a1521..763d02b 100644 --- a/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -33,7 +33,6 @@ // %0 = load i64* inttoptr (i64 big_constant to i64*) //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "consthoist" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -44,9 +43,12 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" +#include using namespace llvm; +#define DEBUG_TYPE "consthoist" + STATISTIC(NumConstantsHoisted, "Number of constants hoisted"); STATISTIC(NumConstantsRebased, "Number of constants rebased"); @@ -117,7 +119,8 @@ class ConstantHoisting : public FunctionPass { SmallVector ConstantVec; public: static char ID; // Pass identification, replacement for typeid - ConstantHoisting() : FunctionPass(ID), TTI(0), DT(0), Entry(0) { + ConstantHoisting() : FunctionPass(ID), TTI(nullptr), DT(nullptr), + Entry(nullptr) { initializeConstantHoistingPass(*PassRegistry::getPassRegistry()); } @@ -206,7 +209,16 @@ bool ConstantHoisting::runOnFunction(Function &Fn) { /// \brief Find the constant materialization insertion point. Instruction *ConstantHoisting::findMatInsertPt(Instruction *Inst, unsigned Idx) const { - // The simple and common case. + // If the operand is a cast instruction, then we have to materialize the + // constant before the cast instruction. + if (Idx != ~0U) { + Value *Opnd = Inst->getOperand(Idx); + if (auto CastInst = dyn_cast(Opnd)) + if (CastInst->isCast()) + return CastInst; + } + + // The simple and common case. This also includes constant expressions. if (!isa(Inst) && !isa(Inst)) return Inst; @@ -228,7 +240,7 @@ findConstantInsertionPoint(const ConstantInfo &ConstInfo) const { SmallPtrSet BBs; for (auto const &RCI : ConstInfo.RebasedConstants) for (auto const &U : RCI.Uses) - BBs.insert(U.Inst->getParent()); + BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent()); if (BBs.count(Entry)) return &Entry->front(); @@ -487,8 +499,8 @@ void ConstantHoisting::emitBaseConstants(Instruction *Base, Constant *Offset, ClonedCastInst->insertAfter(CastInst); // Use the same debug location as the original cast instruction. ClonedCastInst->setDebugLoc(CastInst->getDebugLoc()); - DEBUG(dbgs() << "Clone instruction: " << *ClonedCastInst << '\n' - << "To : " << *CastInst << '\n'); + DEBUG(dbgs() << "Clone instruction: " << *CastInst << '\n' + << "To : " << *ClonedCastInst << '\n'); } DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n'); diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp index 7045b36..dd51ce1 100644 --- a/lib/Transforms/Scalar/ConstantProp.cpp +++ b/lib/Transforms/Scalar/ConstantProp.cpp @@ -18,7 +18,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "constprop" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" @@ -31,6 +30,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "constprop" + STATISTIC(NumInstKilled, "Number of instructions killed"); namespace { @@ -68,7 +69,7 @@ bool ConstantPropagation::runOnFunction(Function &F) { } bool Changed = false; DataLayoutPass *DLP = getAnalysisIfAvailable(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; TargetLibraryInfo *TLI = &getAnalysis(); while (!WorkList.empty()) { diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 0490767..0829462 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "correlated-value-propagation" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -26,6 +25,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "correlated-value-propagation" + STATISTIC(NumPhis, "Number of phis propagated"); STATISTIC(NumSelects, "Number of selects propagated"); STATISTIC(NumMemAccess, "Number of memory access targets propagated"); @@ -138,7 +139,7 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) { } bool CorrelatedValuePropagation::processMemAccess(Instruction *I) { - Value *Pointer = 0; + Value *Pointer = nullptr; if (LoadInst *L = dyn_cast(I)) Pointer = L->getPointerOperand(); else diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index 8377fd9..99fac75 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dce" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/InstIterator.h" @@ -26,6 +25,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "dce" + STATISTIC(DIEEliminated, "Number of insts removed by DIE pass"); STATISTIC(DCEEliminated, "Number of insts removed"); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index f54c00d..3af8ee7 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "dse" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -38,6 +37,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "dse" + STATISTIC(NumFastStores, "Number of stores deleted"); STATISTIC(NumFastOther , "Number of other instrs removed"); @@ -49,7 +50,7 @@ namespace { const TargetLibraryInfo *TLI; static char ID; // Pass identification, replacement for typeid - DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) { + DSE() : FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr) { initializeDSEPass(*PassRegistry::getPassRegistry()); } @@ -69,7 +70,7 @@ namespace { if (DT->isReachableFromEntry(I)) Changed |= runOnBasicBlock(*I); - AA = 0; MD = 0; DT = 0; + AA = nullptr; MD = nullptr; DT = nullptr; return Changed; } @@ -111,9 +112,9 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); } /// If ValueSet is non-null, remove any deleted instructions from it as well. /// static void DeleteDeadInstruction(Instruction *I, - MemoryDependenceAnalysis &MD, - const TargetLibraryInfo *TLI, - SmallSetVector *ValueSet = 0) { + MemoryDependenceAnalysis &MD, + const TargetLibraryInfo *TLI, + SmallSetVector *ValueSet = nullptr) { SmallVector NowDeadInsts; NowDeadInsts.push_back(I); @@ -131,7 +132,7 @@ static void DeleteDeadInstruction(Instruction *I, for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { Value *Op = DeadInst->getOperand(op); - DeadInst->setOperand(op, 0); + DeadInst->setOperand(op, nullptr); // If this operand just became dead, add it to the NowDeadInsts list. if (!Op->use_empty()) continue; @@ -203,13 +204,13 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { // If we don't have target data around, an unknown size in Location means // that we should use the size of the pointee type. This isn't valid for // memset/memcpy, which writes more than an i8. - if (Loc.Size == AliasAnalysis::UnknownSize && DL == 0) + if (Loc.Size == AliasAnalysis::UnknownSize && DL == nullptr) return AliasAnalysis::Location(); return Loc; } IntrinsicInst *II = dyn_cast(Inst); - if (II == 0) return AliasAnalysis::Location(); + if (!II) return AliasAnalysis::Location(); switch (II->getIntrinsicID()) { default: return AliasAnalysis::Location(); // Unhandled intrinsic. @@ -217,7 +218,7 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { // If we don't have target data around, an unknown size in Location means // that we should use the size of the pointee type. This isn't valid for // init.trampoline, which writes more than an i8. - if (DL == 0) return AliasAnalysis::Location(); + if (!DL) return AliasAnalysis::Location(); // FIXME: We don't know the size of the trampoline, so we can't really // handle it here. @@ -359,7 +360,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // If we have no DataLayout information around, then the size of the store // is inferrable from the pointee type. If they are the same type, then // we know that the store is safe. - if (DL == 0 && Later.Ptr->getType() == Earlier.Ptr->getType()) + if (DL == nullptr && Later.Ptr->getType() == Earlier.Ptr->getType()) return OverwriteComplete; return OverwriteUnknown; @@ -373,7 +374,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // Otherwise, we have to have size information, and the later store has to be // larger than the earlier one. if (Later.Size == AliasAnalysis::UnknownSize || - Earlier.Size == AliasAnalysis::UnknownSize || DL == 0) + Earlier.Size == AliasAnalysis::UnknownSize || DL == nullptr) return OverwriteUnknown; // Check to see if the later store is to the entire object (either a global, @@ -461,7 +462,7 @@ static bool isPossibleSelfRead(Instruction *Inst, // Self reads can only happen for instructions that read memory. Get the // location read. AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA); - if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction. + if (!InstReadLoc.Ptr) return false; // Not a reading instruction. // If the read and written loc obviously don't alias, it isn't a read. if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false; @@ -528,7 +529,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { DeleteDeadInstruction(SI, *MD, TLI); - if (NextInst == 0) // Next instruction deleted. + if (!NextInst) // Next instruction deleted. BBI = BB.begin(); else if (BBI != BB.begin()) // Revisit this instruction if possible. --BBI; @@ -543,7 +544,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA); // If we didn't get a useful location, fail. - if (Loc.Ptr == 0) + if (!Loc.Ptr) continue; while (InstDep.isDef() || InstDep.isClobber()) { @@ -557,7 +558,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { Instruction *DepWrite = InstDep.getInst(); AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA); // If we didn't get a useful location, or if it isn't a size, bail out. - if (DepLoc.Ptr == 0) + if (!DepLoc.Ptr) break; // If we find a write that is a) removable (i.e., non-volatile), b) is diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index af2c3d1..735f5c1 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "early-cse" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/ScopedHashTable.h" @@ -29,6 +28,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "early-cse" + STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd"); STATISTIC(NumCSE, "Number of instructions CSE'd"); STATISTIC(NumCSELoad, "Number of load instructions CSE'd"); @@ -207,7 +208,7 @@ namespace { return false; CallInst *CI = dyn_cast(Inst); - if (CI == 0 || !CI->onlyReadsMemory()) + if (!CI || !CI->onlyReadsMemory()) return false; return true; } @@ -405,14 +406,14 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // have invalidated the live-out memory values of our parent value. For now, // just be conservative and invalidate memory if this block has multiple // predecessors. - if (BB->getSinglePredecessor() == 0) + if (!BB->getSinglePredecessor()) ++CurrentGeneration; /// LastStore - Keep track of the last non-volatile store that we saw... for /// as long as there in no instruction that reads memory. If we see a store /// to the same location, we delete the dead store. This zaps trivial dead /// stores which can occur in bitfield code among other things. - StoreInst *LastStore = 0; + StoreInst *LastStore = nullptr; bool Changed = false; @@ -462,7 +463,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (LoadInst *LI = dyn_cast(Inst)) { // Ignore volatile loads. if (!LI->isSimple()) { - LastStore = 0; + LastStore = nullptr; continue; } @@ -470,7 +471,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // generation, replace this instruction. std::pair InVal = AvailableLoads->lookup(Inst->getOperand(0)); - if (InVal.first != 0 && InVal.second == CurrentGeneration) { + if (InVal.first != nullptr && InVal.second == CurrentGeneration) { DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: " << *InVal.first << '\n'); if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first); @@ -483,20 +484,20 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Otherwise, remember that we have this instruction. AvailableLoads->insert(Inst->getOperand(0), std::pair(Inst, CurrentGeneration)); - LastStore = 0; + LastStore = nullptr; continue; } // If this instruction may read from memory, forget LastStore. if (Inst->mayReadFromMemory()) - LastStore = 0; + LastStore = nullptr; // If this is a read-only call, process it. if (CallValue::canHandle(Inst)) { // If we have an available version of this call, and if it is the right // generation, replace this instruction. std::pair InVal = AvailableCalls->lookup(Inst); - if (InVal.first != 0 && InVal.second == CurrentGeneration) { + if (InVal.first != nullptr && InVal.second == CurrentGeneration) { DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: " << *InVal.first << '\n'); if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first); @@ -528,7 +529,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { LastStore->eraseFromParent(); Changed = true; ++NumDSE; - LastStore = 0; + LastStore = nullptr; continue; } @@ -558,7 +559,7 @@ bool EarlyCSE::runOnFunction(Function &F) { std::vector nodesToProcess; DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis(); DT = &getAnalysis().getDomTree(); diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp index e7f2564..0430c18 100644 --- a/lib/Transforms/Scalar/FlattenCFGPass.cpp +++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "flattencfg" #include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/CFG.h" @@ -19,6 +18,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "flattencfg" + namespace { struct FlattenCFGPass : public FunctionPass { static char ID; // Pass identification, replacement for typeid diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 33c387c..6d07ddd 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -15,11 +15,11 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "gvn" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -50,6 +50,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "gvn" + STATISTIC(NumGVNInstr, "Number of instructions deleted"); STATISTIC(NumGVNLoad, "Number of loads deleted"); STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); @@ -213,13 +215,13 @@ Expression ValueTable::create_cmp_expression(unsigned Opcode, } Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) { - assert(EI != 0 && "Not an ExtractValueInst?"); + assert(EI && "Not an ExtractValueInst?"); Expression e; e.type = EI->getType(); e.opcode = 0; IntrinsicInst *I = dyn_cast(EI->getAggregateOperand()); - if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { + if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) { // EI might be an extract from one of our recognised intrinsics. If it // is we'll synthesize a semantically equivalent expression instead on // an extract value expression. @@ -327,7 +329,7 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) { const MemoryDependenceAnalysis::NonLocalDepInfo &deps = MD->getNonLocalCallDependency(CallSite(C)); // FIXME: Move the checking logic to MemDep! - CallInst* cdep = 0; + CallInst* cdep = nullptr; // Check to see if we have a single dominating call instruction that is // identical to C. @@ -338,8 +340,8 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) { // We don't handle non-definitions. If we already have a call, reject // instruction dependencies. - if (!I->getResult().isDef() || cdep != 0) { - cdep = 0; + if (!I->getResult().isDef() || cdep != nullptr) { + cdep = nullptr; break; } @@ -350,7 +352,7 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) { continue; } - cdep = 0; + cdep = nullptr; break; } @@ -551,7 +553,7 @@ namespace { static AvailableValueInBlock getUndef(BasicBlock *BB) { AvailableValueInBlock Res; Res.BB = BB; - Res.Val.setPointer(0); + Res.Val.setPointer(nullptr); Res.Val.setInt(UndefVal); Res.Offset = 0; return Res; @@ -611,7 +613,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid explicit GVN(bool noloads = false) - : FunctionPass(ID), NoLoads(noloads), MD(0) { + : FunctionPass(ID), NoLoads(noloads), MD(nullptr) { initializeGVNPass(*PassRegistry::getPassRegistry()); } @@ -649,7 +651,7 @@ namespace { /// removeFromLeaderTable - Scan the list of values corresponding to a given /// value number, and remove the given instruction if encountered. void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) { - LeaderTableEntry* Prev = 0; + LeaderTableEntry* Prev = nullptr; LeaderTableEntry* Curr = &LeaderTable[N]; while (Curr->Val != I || Curr->BB != BB) { @@ -661,8 +663,8 @@ namespace { Prev->Next = Curr->Next; } else { if (!Curr->Next) { - Curr->Val = 0; - Curr->BB = 0; + Curr->Val = nullptr; + Curr->BB = nullptr; } else { LeaderTableEntry* Next = Curr->Next; Curr->Val = Next->Val; @@ -855,7 +857,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Instruction *InsertPt, const DataLayout &DL) { if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL)) - return 0; + return nullptr; // If this is already the right type, just return it. Type *StoredValTy = StoredVal->getType(); @@ -1060,7 +1062,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, const DataLayout &DL) { // If the mem operation is a non-constant size, we can't handle it. ConstantInt *SizeCst = dyn_cast(MI->getLength()); - if (SizeCst == 0) return -1; + if (!SizeCst) return -1; uint64_t MemSizeInBits = SizeCst->getZExtValue()*8; // If this is memset, we just need to see if the offset is valid in the size @@ -1075,10 +1077,10 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr, MemTransferInst *MTI = cast(MI); Constant *Src = dyn_cast(MTI->getSource()); - if (Src == 0) return -1; + if (!Src) return -1; GlobalVariable *GV = dyn_cast(GetUnderlyingObject(Src, &DL)); - if (GV == 0 || !GV->isConstant()) return -1; + if (!GV || !GV->isConstant()) return -1; // See if the access is within the bounds of the transfer. int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, @@ -1420,8 +1422,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, // If this is a clobber and L is the first instruction in its block, then // we have the first instruction in the entry block. if (DepLI != LI && Address && DL) { - int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), - LI->getPointerOperand(), + int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), Address, DepLI, *DL); if (Offset != -1) { @@ -1469,8 +1470,8 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, if (S->getValueOperand()->getType() != LI->getType()) { // If the stored value is larger or equal to the loaded value, we can // reuse it. - if (DL == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), - LI->getType(), *DL)) { + if (!DL || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(), + LI->getType(), *DL)) { UnavailableBlocks.push_back(DepBB); continue; } @@ -1486,7 +1487,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, if (LD->getType() != LI->getType()) { // If the stored value is larger or equal to the loaded value, we can // reuse it. - if (DL == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)){ + if (!DL || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)) { UnavailableBlocks.push_back(DepBB); continue; } @@ -1539,7 +1540,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // Check to see how many predecessors have the loaded value fully // available. - DenseMap PredLoads; + MapVector PredLoads; DenseMap FullyAvailableBlocks; for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) FullyAvailableBlocks[ValuesPerBlock[i].BB] = true; @@ -1553,7 +1554,6 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) { continue; } - PredLoads[Pred] = 0; if (Pred->getTerminator()->getNumSuccessors() != 1) { if (isa(Pred->getTerminator())) { @@ -1570,11 +1570,14 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, } CriticalEdgePred.push_back(Pred); + } else { + // Only add the predecessors that will not be split for now. + PredLoads[Pred] = nullptr; } } // Decide whether PRE is profitable for this load. - unsigned NumUnavailablePreds = PredLoads.size(); + unsigned NumUnavailablePreds = PredLoads.size() + CriticalEdgePred.size(); assert(NumUnavailablePreds != 0 && "Fully available value should already be eliminated!"); @@ -1586,12 +1589,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, return false; // Split critical edges, and update the unavailable predecessors accordingly. - for (SmallVectorImpl::iterator I = CriticalEdgePred.begin(), - E = CriticalEdgePred.end(); I != E; I++) { - BasicBlock *OrigPred = *I; + for (BasicBlock *OrigPred : CriticalEdgePred) { BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB); - PredLoads.erase(OrigPred); - PredLoads[NewPred] = 0; + assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!"); + PredLoads[NewPred] = nullptr; DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->" << LoadBB->getName() << '\n'); } @@ -1599,9 +1600,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // Check if the load can safely be moved to all the unavailable predecessors. bool CanDoPRE = true; SmallVector NewInsts; - for (DenseMap::iterator I = PredLoads.begin(), - E = PredLoads.end(); I != E; ++I) { - BasicBlock *UnavailablePred = I->first; + for (auto &PredLoad : PredLoads) { + BasicBlock *UnavailablePred = PredLoad.first; // Do PHI translation to get its value in the predecessor if necessary. The // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. @@ -1610,20 +1610,20 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, // the load on the pred (?!?), so we can insert code to materialize the // pointer if it is not available. PHITransAddr Address(LI->getPointerOperand(), DL); - Value *LoadPtr = 0; + Value *LoadPtr = nullptr; LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, *DT, NewInsts); // If we couldn't find or insert a computation of this phi translated value, // we fail PRE. - if (LoadPtr == 0) { + if (!LoadPtr) { DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " << *LI->getPointerOperand() << "\n"); CanDoPRE = false; break; } - I->second = LoadPtr; + PredLoad.second = LoadPtr; } if (!CanDoPRE) { @@ -1632,8 +1632,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, if (MD) MD->removeInstruction(I); I->eraseFromParent(); } - // HINT:Don't revert the edge-splitting as following transformation may - // also need to split these critial edges. + // HINT: Don't revert the edge-splitting as following transformation may + // also need to split these critical edges. return !CriticalEdgePred.empty(); } @@ -1654,10 +1654,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock, VN.lookup_or_add(NewInsts[i]); } - for (DenseMap::iterator I = PredLoads.begin(), - E = PredLoads.end(); I != E; ++I) { - BasicBlock *UnavailablePred = I->first; - Value *LoadPtr = I->second; + for (const auto &PredLoad : PredLoads) { + BasicBlock *UnavailablePred = PredLoad.first; + Value *LoadPtr = PredLoad.second; Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, LI->getAlignment(), @@ -1776,7 +1775,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) { MDNode *ReplMD = Metadata[i].second; switch(Kind) { default: - ReplInst->setMetadata(Kind, NULL); // Remove unknown metadata + ReplInst->setMetadata(Kind, nullptr); // Remove unknown metadata break; case LLVMContext::MD_dbg: llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg"); @@ -1832,7 +1831,7 @@ bool GVN::processLoad(LoadInst *L) { // a common base + constant offset, and if the previous store (or memset) // completely covers this load. This sort of thing can happen in bitfield // access code. - Value *AvailVal = 0; + Value *AvailVal = nullptr; if (StoreInst *DepSI = dyn_cast(Dep.getInst())) { int Offset = AnalyzeLoadFromClobberingStore(L->getType(), L->getPointerOperand(), @@ -1920,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) { if (DL) { StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(), L, *DL); - if (StoredVal == 0) + if (!StoredVal) return false; DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal @@ -1949,7 +1948,7 @@ bool GVN::processLoad(LoadInst *L) { if (DL) { AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L, *DL); - if (AvailableVal == 0) + if (!AvailableVal) return false; DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal @@ -1999,9 +1998,9 @@ bool GVN::processLoad(LoadInst *L) { // a few comparisons of DFS numbers. Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) { LeaderTableEntry Vals = LeaderTable[num]; - if (!Vals.Val) return 0; + if (!Vals.Val) return nullptr; - Value *Val = 0; + Value *Val = nullptr; if (DT->dominates(Vals.BB, BB)) { Val = Vals.Val; if (isa(Val)) return Val; @@ -2052,7 +2051,7 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E, const BasicBlock *Src = E.getStart(); assert((!Pred || Pred == Src) && "No edge between these basic blocks!"); (void)Src; - return Pred != 0; + return Pred != nullptr; } /// propagateEquality - The given values are known to be equal in every block @@ -2296,7 +2295,7 @@ bool GVN::processInstruction(Instruction *I) { // Perform fast-path value-number based elimination of values inherited from // dominators. Value *repl = findLeader(I->getParent(), Num); - if (repl == 0) { + if (!repl) { // Failure, just remember this instance for future use. addToLeaderTable(Num, I, I->getParent()); return false; @@ -2319,7 +2318,7 @@ bool GVN::runOnFunction(Function& F) { MD = &getAnalysis(); DT = &getAnalysis().getDomTree(); DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis(); VN.setAliasAnalysis(&getAnalysis()); VN.setMemDep(MD); @@ -2421,10 +2420,7 @@ bool GVN::processBlock(BasicBlock *BB) { bool GVN::performPRE(Function &F) { bool Changed = false; SmallVector, 8> predMap; - for (df_iterator DI = df_begin(&F.getEntryBlock()), - DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) { - BasicBlock *CurrentBlock = *DI; - + for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) { // Nothing to PRE in the entry block. if (CurrentBlock == &F.getEntryBlock()) continue; @@ -2464,7 +2460,7 @@ bool GVN::performPRE(Function &F) { // more complicated to get right. unsigned NumWith = 0; unsigned NumWithout = 0; - BasicBlock *PREPred = 0; + BasicBlock *PREPred = nullptr; predMap.clear(); for (pred_iterator PI = pred_begin(CurrentBlock), @@ -2482,8 +2478,8 @@ bool GVN::performPRE(Function &F) { } Value* predV = findLeader(P, ValNo); - if (predV == 0) { - predMap.push_back(std::make_pair(static_cast(0), P)); + if (!predV) { + predMap.push_back(std::make_pair(static_cast(nullptr), P)); PREPred = P; ++NumWithout; } else if (predV == CurInst) { @@ -2637,9 +2633,8 @@ bool GVN::iterateOnFunction(Function &F) { // std::vector BBVect; BBVect.reserve(256); - for (df_iterator DI = df_begin(DT->getRootNode()), - DE = df_end(DT->getRootNode()); DI != DE; ++DI) - BBVect.push_back(DI->getBlock()); + for (DomTreeNode *x : depth_first(DT->getRootNode())) + BBVect.push_back(x->getBlock()); for (std::vector::iterator I = BBVect.begin(), E = BBVect.end(); I != E; I++) diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp index 8ffd64b..990d067 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/Transforms/Scalar/GlobalMerge.cpp @@ -51,7 +51,6 @@ // note that we saved 2 registers here almostly "for free". // ===---------------------------------------------------------------------===// -#define DEBUG_TYPE "global-merge" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -70,6 +69,8 @@ #include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; +#define DEBUG_TYPE "global-merge" + cl::opt EnableGlobalMerge("global-merge", cl::Hidden, cl::desc("Enable global merge pass"), @@ -107,7 +108,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - explicit GlobalMerge(const TargetMachine *TM = 0) + explicit GlobalMerge(const TargetMachine *TM = nullptr) : FunctionPass(ID), TM(TM) { initializeGlobalMergePass(*PassRegistry::getPassRegistry()); } @@ -173,7 +174,8 @@ bool GlobalMerge::doMerge(SmallVectorImpl &Globals, GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst, GlobalValue::InternalLinkage, MergedInit, "_MergedGlobals", - 0, GlobalVariable::NotThreadLocal, + nullptr, + GlobalVariable::NotThreadLocal, AddrSpace); for (size_t k = i; k < j; ++k) { Constant *Idx[2] = { diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 7537632..e83a5c4 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -24,7 +24,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "indvars" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" @@ -50,6 +49,8 @@ #include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; +#define DEBUG_TYPE "indvars" + STATISTIC(NumWidened , "Number of indvars widened"); STATISTIC(NumReplaced , "Number of exit values replaced"); STATISTIC(NumLFTR , "Number of loop exit tests replaced"); @@ -79,8 +80,8 @@ namespace { public: static char ID; // Pass identification, replacement for typeid - IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), DL(0), - Changed(false) { + IndVarSimplify() : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), + DL(nullptr), Changed(false) { initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry()); } @@ -196,7 +197,7 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def, if (!PHI) return User; - Instruction *InsertPt = 0; + Instruction *InsertPt = nullptr; for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) { if (PHI->getIncomingValue(i) != Def) continue; @@ -257,13 +258,13 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // an add or increment value can not be represented by an integer. BinaryOperator *Incr = dyn_cast(PN->getIncomingValue(BackEdge)); - if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; + if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return; // If this is not an add of the PHI with a constantfp, or if the constant fp // is not an integer, bail out. ConstantFP *IncValueVal = dyn_cast(Incr->getOperand(1)); int64_t IncValue; - if (IncValueVal == 0 || Incr->getOperand(0) != PN || + if (IncValueVal == nullptr || Incr->getOperand(0) != PN || !ConvertToSInt(IncValueVal->getValueAPF(), IncValue)) return; @@ -280,7 +281,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { FCmpInst *Compare = dyn_cast(U1); if (!Compare) Compare = dyn_cast(U2); - if (Compare == 0 || !Compare->hasOneUse() || + if (!Compare || !Compare->hasOneUse() || !isa(Compare->user_back())) return; @@ -301,7 +302,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { // transform it. ConstantFP *ExitValueVal = dyn_cast(Compare->getOperand(1)); int64_t ExitValue; - if (ExitValueVal == 0 || + if (ExitValueVal == nullptr || !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) return; @@ -651,7 +652,8 @@ namespace { Type *WidestNativeType; // Widest integer type created [sz]ext bool IsSigned; // Was an sext user seen before a zext? - WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {} + WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr), + IsSigned(false) {} }; } @@ -693,7 +695,7 @@ struct NarrowIVDefUse { Instruction *NarrowUse; Instruction *WideDef; - NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {} + NarrowIVDefUse(): NarrowDef(nullptr), NarrowUse(nullptr), WideDef(nullptr) {} NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD): NarrowDef(ND), NarrowUse(NU), WideDef(WD) {} @@ -736,9 +738,9 @@ public: L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), - WidePhi(0), - WideInc(0), - WideIncExpr(0), + WidePhi(nullptr), + WideInc(nullptr), + WideIncExpr(nullptr), DeadInsts(DI) { assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV"); } @@ -793,7 +795,7 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) { unsigned Opcode = DU.NarrowUse->getOpcode(); switch (Opcode) { default: - return 0; + return nullptr; case Instruction::Add: case Instruction::Mul: case Instruction::UDiv: @@ -838,14 +840,14 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) { const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { // Handle the common case of add if (DU.NarrowUse->getOpcode() != Instruction::Add) - return 0; + return nullptr; // One operand (NarrowDef) has already been extended to WideDef. Now determine // if extending the other will lead to a recurrence. unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0; assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU"); - const SCEV *ExtendOperExpr = 0; + const SCEV *ExtendOperExpr = nullptr; const OverflowingBinaryOperator *OBO = cast(DU.NarrowUse); if (IsSigned && OBO->hasNoSignedWrap()) @@ -855,7 +857,7 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { ExtendOperExpr = SE->getZeroExtendExpr( SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType); else - return 0; + return nullptr; // When creating this AddExpr, don't apply the current operations NSW or NUW // flags. This instruction may be guarded by control flow that the no-wrap @@ -866,7 +868,7 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr)); if (!AddRec || AddRec->getLoop() != L) - return 0; + return nullptr; return AddRec; } @@ -877,14 +879,14 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) { /// recurrence. Otherwise return NULL. const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { if (!SE->isSCEVable(NarrowUse->getType())) - return 0; + return nullptr; const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); if (SE->getTypeSizeInBits(NarrowExpr->getType()) >= SE->getTypeSizeInBits(WideType)) { // NarrowUse implicitly widens its operand. e.g. a gep with a narrow // index. So don't follow this use. - return 0; + return nullptr; } const SCEV *WideExpr = IsSigned ? @@ -892,7 +894,7 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { SE->getZeroExtendExpr(NarrowExpr, WideType); const SCEVAddRecExpr *AddRec = dyn_cast(WideExpr); if (!AddRec || AddRec->getLoop() != L) - return 0; + return nullptr; return AddRec; } @@ -930,7 +932,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to " << *WidePhi << "\n"); } - return 0; + return nullptr; } } // Our raison d'etre! Eliminate sign and zero extension. @@ -968,7 +970,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // push the uses of WideDef here. // No further widening is needed. The deceased [sz]ext had done it for us. - return 0; + return nullptr; } // Does this user itself evaluate to a recurrence after widening? @@ -981,7 +983,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. truncateIVUse(DU, DT); - return 0; + return nullptr; } // Assume block terminators cannot evaluate to a recurrence. We can't to // insert a Trunc after a terminator if there happens to be a critical edge. @@ -990,14 +992,14 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // Reuse the IV increment that SCEVExpander created as long as it dominates // NarrowUse. - Instruction *WideUse = 0; + Instruction *WideUse = nullptr; if (WideAddRec == WideIncExpr && Rewriter.hoistIVInc(WideInc, DU.NarrowUse)) WideUse = WideInc; else { WideUse = CloneIVUser(DU); if (!WideUse) - return 0; + return nullptr; } // Evaluation of WideAddRec ensured that the narrow expression could be // extended outside the loop without overflow. This suggests that the wide use @@ -1008,7 +1010,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n"); DeadInsts.push_back(WideUse); - return 0; + return nullptr; } // Returning WideUse pushes it on the worklist. @@ -1043,7 +1045,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // Is this phi an induction variable? const SCEVAddRecExpr *AddRec = dyn_cast(SE->getSCEV(OrigPhi)); if (!AddRec) - return NULL; + return nullptr; // Widen the induction variable expression. const SCEV *WideIVExpr = IsSigned ? @@ -1056,7 +1058,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) { // Can the IV be extended outside the loop without overflow? AddRec = dyn_cast(WideIVExpr); if (!AddRec || AddRec->getLoop() != L) - return NULL; + return nullptr; // An AddRec must have loop-invariant operands. Since this AddRec is // materialized by a loop header phi, the expression cannot have any post-loop @@ -1282,7 +1284,7 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { Instruction *IncI = dyn_cast(IncV); if (!IncI) - return 0; + return nullptr; switch (IncI->getOpcode()) { case Instruction::Add: @@ -1293,17 +1295,17 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { if (IncI->getNumOperands() == 2) break; default: - return 0; + return nullptr; } PHINode *Phi = dyn_cast(IncI->getOperand(0)); if (Phi && Phi->getParent() == L->getHeader()) { if (isLoopInvariant(IncI->getOperand(1), L, DT)) return Phi; - return 0; + return nullptr; } if (IncI->getOpcode() == Instruction::GetElementPtr) - return 0; + return nullptr; // Allow add/sub to be commuted. Phi = dyn_cast(IncI->getOperand(1)); @@ -1311,7 +1313,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) { if (isLoopInvariant(IncI->getOperand(0), L, DT)) return Phi; } - return 0; + return nullptr; } /// Return the compare guarding the loop latch, or NULL for unrecognized tests. @@ -1321,7 +1323,7 @@ static ICmpInst *getLoopTest(Loop *L) { BasicBlock *LatchBlock = L->getLoopLatch(); // Don't bother with LFTR if the loop is not properly simplified. if (!LatchBlock) - return 0; + return nullptr; BranchInst *BI = dyn_cast(L->getExitingBlock()->getTerminator()); assert(BI && "expected exit branch"); @@ -1446,8 +1448,8 @@ FindLoopCounter(Loop *L, const SCEV *BECount, cast(L->getExitingBlock()->getTerminator())->getCondition(); // Loop over all of the PHI nodes, looking for a simple counter. - PHINode *BestPhi = 0; - const SCEV *BestInit = 0; + PHINode *BestPhi = nullptr; + const SCEV *BestInit = nullptr; BasicBlock *LatchBlock = L->getLoopLatch(); assert(LatchBlock && "needsLFTR should guarantee a loop latch"); @@ -1571,7 +1573,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, // IVInit integer and IVCount pointer would only occur if a canonical IV // were generated on top of case #2, which is not expected. - const SCEV *IVLimit = 0; + const SCEV *IVLimit = nullptr; // For unit stride, IVCount = Start + BECount with 2's complement overflow. // For non-zero Start, compute IVCount here. if (AR->getStart()->isZero()) @@ -1813,7 +1815,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { SE = &getAnalysis(); DT = &getAnalysis().getDomTree(); DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = getAnalysisIfAvailable(); DeadInsts.clear(); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 067deb7..230a381 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "jump-threading" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -38,6 +37,8 @@ #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; +#define DEBUG_TYPE "jump-threading" + STATISTIC(NumThreads, "Number of jumps threaded"); STATISTIC(NumFolds, "Number of terminators folded"); STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi"); @@ -153,7 +154,7 @@ bool JumpThreading::runOnFunction(Function &F) { DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n"); DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis(); LVI = &getAnalysis(); @@ -308,7 +309,7 @@ void JumpThreading::FindLoopHeaders(Function &F) { /// Returns null if Val is null or not an appropriate constant. static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) { if (!Val) - return 0; + return nullptr; // Undef is "known" enough. if (UndefValue *U = dyn_cast(Val)) @@ -352,7 +353,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, // If V is a non-instruction value, or an instruction in a different block, // then it can't be derived from a PHI. Instruction *I = dyn_cast(V); - if (I == 0 || I->getParent() != BB) { + if (!I || I->getParent() != BB) { // Okay, if this is a live-in value, see if it has a known value at the end // of any of our predecessors. @@ -495,7 +496,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB); Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL); - if (Res == 0) { + if (!Res) { if (!isa(RHS)) continue; @@ -581,7 +582,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result, // Either operand will do, so be sure to pick the one that's a known // constant. // FIXME: Do this more cleverly if both values are known constants? - KnownCond = (TrueVal != 0); + KnownCond = (TrueVal != nullptr); } // See if the select has a known constant value for this predecessor. @@ -737,7 +738,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { Instruction *CondInst = dyn_cast(Condition); // All the rest of our checks depend on the condition being an instruction. - if (CondInst == 0) { + if (!CondInst) { // FIXME: Unify this with code below. if (ProcessThreadableEdges(Condition, BB, Preference)) return true; @@ -890,7 +891,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { SmallPtrSet PredsScanned; typedef SmallVector, 8> AvailablePredsTy; AvailablePredsTy AvailablePreds; - BasicBlock *OneUnavailablePred = 0; + BasicBlock *OneUnavailablePred = nullptr; // If we got here, the loaded value is transparent through to the start of the // block. Check to see if it is available in any of the predecessor blocks. @@ -904,16 +905,16 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Scan the predecessor to see if the value is available in the pred. BBIt = PredBB->end(); - MDNode *ThisTBAATag = 0; + MDNode *ThisTBAATag = nullptr; Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6, - 0, &ThisTBAATag); + nullptr, &ThisTBAATag); if (!PredAvailable) { OneUnavailablePred = PredBB; continue; } // If tbaa tags disagree or are not present, forget about them. - if (TBAATag != ThisTBAATag) TBAATag = 0; + if (TBAATag != ThisTBAATag) TBAATag = nullptr; // If so, this load is partially redundant. Remember this info so that we // can create a PHI node. @@ -929,7 +930,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // predecessor, we want to insert a merge block for those common predecessors. // This ensures that we only have to insert one reload, thus not increasing // code size. - BasicBlock *UnavailablePred = 0; + BasicBlock *UnavailablePred = nullptr; // If there is exactly one predecessor where the value is unavailable, the // already computed 'OneUnavailablePred' block is it. If it ends in an @@ -996,7 +997,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { BasicBlock *P = *PI; AvailablePredsTy::iterator I = std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), - std::make_pair(P, (Value*)0)); + std::make_pair(P, (Value*)nullptr)); assert(I != AvailablePreds.end() && I->first == P && "Didn't find entry for predecessor!"); @@ -1103,7 +1104,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, SmallPtrSet SeenPreds; SmallVector, 16> PredToDestList; - BasicBlock *OnlyDest = 0; + BasicBlock *OnlyDest = nullptr; BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL; for (unsigned i = 0, e = PredValues.size(); i != e; ++i) { @@ -1120,7 +1121,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, BasicBlock *DestBB; if (isa(Val)) - DestBB = 0; + DestBB = nullptr; else if (BranchInst *BI = dyn_cast(BB->getTerminator())) DestBB = BI->getSuccessor(cast(Val)->isZero()); else if (SwitchInst *SI = dyn_cast(BB->getTerminator())) { @@ -1171,7 +1172,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, // If the threadable edges are branching on an undefined value, we get to pick // the destination that these predecessors should get to. - if (MostPopularDest == 0) + if (!MostPopularDest) MostPopularDest = BB->getTerminator()-> getSuccessor(GetBestDestForJumpOnUndef(BB)); @@ -1273,7 +1274,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { } // Determine which value to split on, true, false, or undef if neither. - ConstantInt *SplitVal = 0; + ConstantInt *SplitVal = nullptr; if (NumTrue > NumFalse) SplitVal = ConstantInt::getTrue(BB->getContext()); else if (NumTrue != 0 || NumFalse != 0) @@ -1294,7 +1295,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) { // help us. However, we can just replace the LHS or RHS with the constant. if (BlocksToFoldInto.size() == cast(BB->front()).getNumIncomingValues()) { - if (SplitVal == 0) { + if (!SplitVal) { // If all preds provide undef, just nuke the xor, because it is undef too. BO->replaceAllUsesWith(UndefValue::get(BO->getType())); BO->eraseFromParent(); @@ -1531,7 +1532,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, // can just clone the bits from BB into the end of the new PredBB. BranchInst *OldPredBranch = dyn_cast(PredBB->getTerminator()); - if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) { + if (!OldPredBranch || !OldPredBranch->isUnconditional()) { PredBB = SplitEdge(PredBB, BB, this); OldPredBranch = cast(PredBB->getTerminator()); } diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index b69f2dc..0a8d16f 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -30,7 +30,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "licm" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -60,6 +59,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "licm" + STATISTIC(NumSunk , "Number of instructions sunk out of loop"); STATISTIC(NumHoisted , "Number of instructions hoisted out of loop"); STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk"); @@ -223,7 +224,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { DT = &getAnalysis().getDomTree(); DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis(); assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); @@ -315,8 +316,8 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { "Parent loop not left in LCSSA form after LICM!"); // Clear out loops state information for the next iteration - CurLoop = 0; - Preheader = 0; + CurLoop = nullptr; + Preheader = nullptr; // If this loop is nested inside of another one, save the alias information // for when we process the outer loop. @@ -334,7 +335,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) { /// iteration. /// void LICM::SinkRegion(DomTreeNode *N) { - assert(N != 0 && "Null dominator tree node?"); + assert(N != nullptr && "Null dominator tree node?"); BasicBlock *BB = N->getBlock(); // If this subregion is not in the top level loop at all, exit. @@ -381,7 +382,7 @@ void LICM::SinkRegion(DomTreeNode *N) { /// before uses, allowing us to hoist a loop body in one pass without iteration. /// void LICM::HoistRegion(DomTreeNode *N) { - assert(N != 0 && "Null dominator tree node?"); + assert(N != nullptr && "Null dominator tree node?"); BasicBlock *BB = N->getBlock(); // If this subregion is not in the top level loop at all, exit. @@ -774,7 +775,7 @@ void LICM::PromoteAliasSet(AliasSet &AS, // We start with an alignment of one and try to find instructions that allow // us to prove better alignment. unsigned Alignment = 1; - MDNode *TBAATag = 0; + MDNode *TBAATag = nullptr; // Check that all of the pointers in the alias set have the same type. We // cannot (yet) promote a memory location that is loaded and stored in diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 9a520c8..5ab686a 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-delete" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -23,6 +22,8 @@ #include "llvm/IR/Dominators.h" using namespace llvm; +#define DEBUG_TYPE "loop-delete" + STATISTIC(NumDeleted, "Number of loops deleted"); namespace { diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index e5e8b84..26a83df 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -41,7 +41,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-idiom" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -61,6 +60,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "loop-idiom" + STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); @@ -114,7 +115,7 @@ namespace { Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const; /// Return true iff the idiom is detected in the loop. and 1) \p CntInst - /// is set to the instruction counting the pupulation bit. 2) \p CntPhi + /// is set to the instruction counting the population bit. 2) \p CntPhi /// is set to the corresponding phi node. 3) \p Var is set to the value /// whose population bits are being counted. bool detectIdiom @@ -138,7 +139,7 @@ namespace { static char ID; explicit LoopIdiomRecognize() : LoopPass(ID) { initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); - DL = 0; DT = 0; SE = 0; TLI = 0; TTI = 0; + DL = nullptr; DT = nullptr; SE = nullptr; TLI = nullptr; TTI = nullptr; } bool runOnLoop(Loop *L, LPPassManager &LPM) override; @@ -182,7 +183,7 @@ namespace { if (DL) return DL; DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; return DL; } @@ -247,7 +248,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE, for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { Value *Op = DeadInst->getOperand(op); - DeadInst->setOperand(op, 0); + DeadInst->setOperand(op, nullptr); // If this operand just became dead, add it to the NowDeadInsts list. if (!Op->use_empty()) continue; @@ -292,9 +293,9 @@ bool LIRUtil::isAlmostEmpty(BasicBlock *BB) { BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) { if (BasicBlock *BB = PreHead->getSinglePredecessor()) { BranchInst *Br = getBranch(BB); - return Br && Br->isConditional() ? BB : 0; + return Br && Br->isConditional() ? BB : nullptr; } - return 0; + return nullptr; } //===----------------------------------------------------------------------===// @@ -304,7 +305,7 @@ BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) { //===----------------------------------------------------------------------===// NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR): - LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(0) { + LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(nullptr) { } bool NclPopcountRecognize::preliminaryScreen() { @@ -341,25 +342,25 @@ bool NclPopcountRecognize::preliminaryScreen() { return true; } -Value *NclPopcountRecognize::matchCondition (BranchInst *Br, - BasicBlock *LoopEntry) const { +Value *NclPopcountRecognize::matchCondition(BranchInst *Br, + BasicBlock *LoopEntry) const { if (!Br || !Br->isConditional()) - return 0; + return nullptr; ICmpInst *Cond = dyn_cast(Br->getCondition()); if (!Cond) - return 0; + return nullptr; ConstantInt *CmpZero = dyn_cast(Cond->getOperand(1)); if (!CmpZero || !CmpZero->isZero()) - return 0; + return nullptr; ICmpInst::Predicate Pred = Cond->getPredicate(); if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) || (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry)) return Cond->getOperand(0); - return 0; + return nullptr; } bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, @@ -390,9 +391,9 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, Value *VarX1, *VarX0; PHINode *PhiX, *CountPhi; - DefX2 = CountInst = 0; - VarX1 = VarX0 = 0; - PhiX = CountPhi = 0; + DefX2 = CountInst = nullptr; + VarX1 = VarX0 = nullptr; + PhiX = CountPhi = nullptr; LoopEntry = *(CurLoop->block_begin()); // step 1: Check if the loop-back branch is in desirable form. @@ -439,7 +440,7 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst, // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1 { - CountInst = NULL; + CountInst = nullptr; for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(), IterE = LoopEntry->end(); Iter != IterE; Iter++) { Instruction *Inst = Iter; @@ -744,7 +745,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, // If processing the store invalidated our iterator, start over from the // top of the block. - if (InstPtr == 0) + if (!InstPtr) I = BB->begin(); continue; } @@ -757,7 +758,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, // If processing the memset invalidated our iterator, start over from the // top of the block. - if (InstPtr == 0) + if (!InstPtr) I = BB->begin(); continue; } @@ -784,7 +785,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { // random store we can't handle. const SCEVAddRecExpr *StoreEv = dyn_cast(SE->getSCEV(StorePtr)); - if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) + if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) return false; // Check to see if the stride matches the size of the store. If so, then we @@ -792,7 +793,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { unsigned StoreSize = (unsigned)SizeInBits >> 3; const SCEVConstant *Stride = dyn_cast(StoreEv->getOperand(1)); - if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) { + if (!Stride || StoreSize != Stride->getValue()->getValue()) { // TODO: Could also handle negative stride here someday, that will require // the validity check in mayLoopAccessLocation to be updated though. // Enable this to print exact negative strides. @@ -841,7 +842,7 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { // loop, which indicates a strided store. If we have something else, it's a // random store we can't handle. const SCEVAddRecExpr *Ev = dyn_cast(SE->getSCEV(Pointer)); - if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine()) + if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine()) return false; // Reject memsets that are so large that they overflow an unsigned. @@ -855,7 +856,7 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { // TODO: Could also handle negative stride here someday, that will require the // validity check in mayLoopAccessLocation to be updated though. - if (Stride == 0 || MSI->getLength() != Stride->getValue()) + if (!Stride || MSI->getLength() != Stride->getValue()) return false; return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, @@ -908,23 +909,23 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout &DL) { // array. We could theoretically do a store to an alloca or something, but // that doesn't seem worthwhile. Constant *C = dyn_cast(V); - if (C == 0) return 0; + if (!C) return nullptr; // Only handle simple values that are a power of two bytes in size. uint64_t Size = DL.getTypeSizeInBits(V->getType()); if (Size == 0 || (Size & 7) || (Size & (Size-1))) - return 0; + return nullptr; // Don't care enough about darwin/ppc to implement this. if (DL.isBigEndian()) - return 0; + return nullptr; // Convert to size in bytes. Size /= 8; // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see // if the top and bottom are the same (e.g. for vectors and large integers). - if (Size > 16) return 0; + if (Size > 16) return nullptr; // If the constant is exactly 16 bytes, just use it. if (Size == 16) return C; @@ -949,7 +950,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // are stored. A store of i32 0x01020304 can never be turned into a memset, // but it can be turned into memset_pattern if the target supports it. Value *SplatValue = isBytewiseValue(StoredVal); - Constant *PatternValue = 0; + Constant *PatternValue = nullptr; unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); @@ -960,13 +961,13 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // promote the memset. CurLoop->isLoopInvariant(SplatValue)) { // Keep and use SplatValue. - PatternValue = 0; + PatternValue = nullptr; } else if (DestAS == 0 && TLI->has(LibFunc::memset_pattern16) && (PatternValue = getMemSetPatternValue(StoredVal, *DL))) { // Don't create memset_pattern16s with address spaces. // It looks like we can use PatternValue! - SplatValue = 0; + SplatValue = nullptr; } else { // Otherwise, this isn't an idiom we can transform. For example, we can't // do anything with a 3-byte store. @@ -1033,7 +1034,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, Int8PtrTy, Int8PtrTy, IntPtr, - (void*)0); + (void*)nullptr); // Otherwise we should form a memset_pattern16. PatternValue is known to be // an constant array of 16-bytes. Plop the value into a mergable global. diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 263ba93..ab1a939 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-instsimplify" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" @@ -26,6 +25,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "loop-instsimplify" + STATISTIC(NumSimplified, "Number of redundant instructions simplified"); namespace { @@ -70,10 +71,10 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0; + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; LoopInfo *LI = &getAnalysis(); DataLayoutPass *DLP = getAnalysisIfAvailable(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = &getAnalysis(); SmallVector ExitBlocks; @@ -126,7 +127,15 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ++NumSimplified; } } - LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + if (res) { + // RecursivelyDeleteTriviallyDeadInstruction can remove + // more than one instruction, so simply incrementing the + // iterator does not work. When instructions get deleted + // re-iterate instead. + BI = BB->begin(); BE = BB->end(); + LocalChanged |= res; + } if (IsSubloopHeader && !isa(I)) break; diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp index 81c1e42..8b5e036 100644 --- a/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-reroll" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" @@ -36,6 +35,8 @@ using namespace llvm; +#define DEBUG_TYPE "loop-reroll" + STATISTIC(NumRerolledLoops, "Number of rerolled loops"); static cl::opt @@ -945,7 +946,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, bool InReduction = Reductions.isPairInSame(J1, J2); if (!(InReduction && J1->isAssociative())) { - bool Swapped = false, SomeOpMatched = false;; + bool Swapped = false, SomeOpMatched = false; for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) { Value *Op2 = J2->getOperand(j); @@ -1133,7 +1134,7 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) { SE = &getAnalysis(); TLI = &getAnalysis(); DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; DT = &getAnalysis().getDomTree(); BasicBlock *Header = L->getHeader(); diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index fde6bac..2ce5831 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-rotate" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CodeMetrics.h" @@ -24,6 +23,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -31,7 +31,11 @@ #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; -#define MAX_HEADER_SIZE 16 +#define DEBUG_TYPE "loop-rotate" + +static cl::opt +DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden, + cl::desc("The default maximum header size for automatic loop rotation")); STATISTIC(NumRotated, "Number of loops rotated"); namespace { @@ -39,8 +43,12 @@ namespace { class LoopRotate : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopRotate() : LoopPass(ID) { + LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) { initializeLoopRotatePass(*PassRegistry::getPassRegistry()); + if (SpecifiedMaxHeaderSize == -1) + MaxHeaderSize = DefaultRotationThreshold; + else + MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize); } // LCSSA form makes instruction renaming easier. @@ -61,6 +69,7 @@ namespace { bool rotateLoop(Loop *L, bool SimplifiedLatch); private: + unsigned MaxHeaderSize; LoopInfo *LI; const TargetTransformInfo *TTI; }; @@ -74,7 +83,9 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false) -Pass *llvm::createLoopRotatePass() { return new LoopRotate(); } +Pass *llvm::createLoopRotatePass(int MaxHeaderSize) { + return new LoopRotate(MaxHeaderSize); +} /// Rotate Loop L as many times as possible. Return true if /// the loop is rotated at least once. @@ -82,6 +93,9 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) { if (skipOptnoneFunction(L)) return false; + // Save the loop metadata. + MDNode *LoopMD = L->getLoopID(); + LI = &getAnalysis(); TTI = &getAnalysis(); @@ -96,6 +110,12 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) { MadeChange = true; SimplifiedLatch = false; } + + // Restore the loop metadata. + // NB! We presume LoopRotation DOESN'T ADD its own metadata. + if ((MadeChange || SimplifiedLatch) && LoopMD) + L->setLoopID(LoopMD); + return MadeChange; } @@ -281,7 +301,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { BasicBlock *OrigLatch = L->getLoopLatch(); BranchInst *BI = dyn_cast(OrigHeader->getTerminator()); - if (BI == 0 || BI->isUnconditional()) + if (!BI || BI->isUnconditional()) return false; // If the loop header is not one of the loop exiting blocks then @@ -292,7 +312,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // If the loop latch already contains a branch that leaves the loop then the // loop is already rotated. - if (OrigLatch == 0) + if (!OrigLatch) return false; // Rotate if either the loop latch does *not* exit the loop, or if the loop @@ -310,7 +330,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { << " instructions: "; L->dump()); return false; } - if (Metrics.NumInsts > MAX_HEADER_SIZE) + if (Metrics.NumInsts > MaxHeaderSize) return false; } @@ -319,7 +339,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { // If the loop could not be converted to canonical form, it must have an // indirectbr in it, just give up. - if (OrigPreheader == 0) + if (!OrigPreheader) return false; // Anything ScalarEvolution may know about this loop or the PHI nodes diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 272a16d..914b56a 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -53,7 +53,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-reduce" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Hashing.h" @@ -78,6 +77,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "loop-reduce" + /// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for /// bail out. This threshold is far beyond the number of users that LSR can /// conceivably solve, so it should not affect generated code, but catches the @@ -237,7 +238,15 @@ struct Formula { int64_t Scale; /// BaseRegs - The list of "base" registers for this use. When this is - /// non-empty, + /// non-empty. The canonical representation of a formula is + /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and + /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty(). + /// #1 enforces that the scaled register is always used when at least two + /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2. + /// #2 enforces that 1 * reg is reg. + /// This invariant can be temporarly broken while building a formula. + /// However, every formula inserted into the LSRInstance must be in canonical + /// form. SmallVector BaseRegs; /// ScaledReg - The 'scaled' register for this use. This should be non-null @@ -250,12 +259,18 @@ struct Formula { int64_t UnfoldedOffset; Formula() - : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0), - UnfoldedOffset(0) {} + : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0), + ScaledReg(nullptr), UnfoldedOffset(0) {} void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); - unsigned getNumRegs() const; + bool isCanonical() const; + + void Canonicalize(); + + bool Unscale(); + + size_t getNumRegs() const; Type *getType() const; void DeleteBaseReg(const SCEV *&S); @@ -345,12 +360,58 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { BaseRegs.push_back(Sum); HasBaseReg = true; } + Canonicalize(); +} + +/// \brief Check whether or not this formula statisfies the canonical +/// representation. +/// \see Formula::BaseRegs. +bool Formula::isCanonical() const { + if (ScaledReg) + return Scale != 1 || !BaseRegs.empty(); + return BaseRegs.size() <= 1; +} + +/// \brief Helper method to morph a formula into its canonical representation. +/// \see Formula::BaseRegs. +/// Every formula having more than one base register, must use the ScaledReg +/// field. Otherwise, we would have to do special cases everywhere in LSR +/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ... +/// On the other hand, 1*reg should be canonicalized into reg. +void Formula::Canonicalize() { + if (isCanonical()) + return; + // So far we did not need this case. This is easy to implement but it is + // useless to maintain dead code. Beside it could hurt compile time. + assert(!BaseRegs.empty() && "1*reg => reg, should not be needed."); + // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg. + ScaledReg = BaseRegs.back(); + BaseRegs.pop_back(); + Scale = 1; + size_t BaseRegsSize = BaseRegs.size(); + size_t Try = 0; + // If ScaledReg is an invariant, try to find a variant expression. + while (Try < BaseRegsSize && !isa(ScaledReg)) + std::swap(ScaledReg, BaseRegs[Try++]); +} + +/// \brief Get rid of the scale in the formula. +/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2. +/// \return true if it was possible to get rid of the scale, false otherwise. +/// \note After this operation the formula may not be in the canonical form. +bool Formula::Unscale() { + if (Scale != 1) + return false; + Scale = 0; + BaseRegs.push_back(ScaledReg); + ScaledReg = nullptr; + return true; } /// getNumRegs - Return the total number of register operands used by this /// formula. This does not include register uses implied by non-constant /// addrec strides. -unsigned Formula::getNumRegs() const { +size_t Formula::getNumRegs() const { return !!ScaledReg + BaseRegs.size(); } @@ -360,7 +421,7 @@ Type *Formula::getType() const { return !BaseRegs.empty() ? BaseRegs.front()->getType() : ScaledReg ? ScaledReg->getType() : BaseGV ? BaseGV->getType() : - 0; + nullptr; } /// DeleteBaseReg - Delete the given base reg from the BaseRegs list. @@ -487,11 +548,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, // Check for a division of a constant by a constant. if (const SCEVConstant *C = dyn_cast(LHS)) { if (!RC) - return 0; + return nullptr; const APInt &LA = C->getValue()->getValue(); const APInt &RA = RC->getValue()->getValue(); if (LA.srem(RA) != 0) - return 0; + return nullptr; return SE.getConstant(LA.sdiv(RA)); } @@ -500,16 +561,16 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) { const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE, IgnoreSignificantBits); - if (!Step) return 0; + if (!Step) return nullptr; const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, IgnoreSignificantBits); - if (!Start) return 0; + if (!Start) return nullptr; // FlagNW is independent of the start value, step direction, and is // preserved with smaller magnitude steps. // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap); } - return 0; + return nullptr; } // Distribute the sdiv over add operands, if the add doesn't overflow. @@ -520,12 +581,12 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, I != E; ++I) { const SCEV *Op = getExactSDiv(*I, RHS, SE, IgnoreSignificantBits); - if (!Op) return 0; + if (!Op) return nullptr; Ops.push_back(Op); } return SE.getAddExpr(Ops); } - return 0; + return nullptr; } // Check for a multiply operand that we can pull RHS out of. @@ -544,13 +605,13 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, } Ops.push_back(S); } - return Found ? SE.getMulExpr(Ops) : 0; + return Found ? SE.getMulExpr(Ops) : nullptr; } - return 0; + return nullptr; } // Otherwise we don't know. - return 0; + return nullptr; } /// ExtractImmediate - If S involves the addition of a constant integer value, @@ -604,7 +665,7 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { SCEV::FlagAnyWrap); return Result; } - return 0; + return nullptr; } /// isAddressUse - Returns true if the specified instruction is using the @@ -755,12 +816,12 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl &DeadInsts) { Value *V = DeadInsts.pop_back_val(); Instruction *I = dyn_cast_or_null(V); - if (I == 0 || !isInstructionTriviallyDead(I)) + if (!I || !isInstructionTriviallyDead(I)) continue; for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) if (Instruction *U = dyn_cast(*OI)) { - *OI = 0; + *OI = nullptr; if (U->use_empty()) DeadInsts.push_back(U); } @@ -775,9 +836,18 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl &DeadInsts) { namespace { class LSRUse; } -// Check if it is legal to fold 2 base registers. -static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU, - const Formula &F); + +/// \brief Check if the addressing mode defined by \p F is completely +/// folded in \p LU at isel time. +/// This includes address-mode folding and special icmp tricks. +/// This function returns true if \p LU can accommodate what \p F +/// defines and up to 1 base + 1 scaled + offset. +/// In other words, if \p F has several base registers, this function may +/// still return true. Therefore, users still need to account for +/// additional base registers and/or unfolded offsets to derive an +/// accurate cost model. +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + const LSRUse &LU, const Formula &F); // Get the cost of the scaling factor used in F for LU. static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F); @@ -828,7 +898,7 @@ public: const SmallVectorImpl &Offsets, ScalarEvolution &SE, DominatorTree &DT, const LSRUse &LU, - SmallPtrSet *LoserRegs = 0); + SmallPtrSet *LoserRegs = nullptr); void print(raw_ostream &OS) const; void dump() const; @@ -921,6 +991,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, ScalarEvolution &SE, DominatorTree &DT, const LSRUse &LU, SmallPtrSet *LoserRegs) { + assert(F.isCanonical() && "Cost is accurate only for canonical formula"); // Tally up the registers. if (const SCEV *ScaledReg = F.ScaledReg) { if (VisitedRegs.count(ScaledReg)) { @@ -944,11 +1015,13 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, } // Determine how many (unfolded) adds we'll need inside the loop. - size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0); + size_t NumBaseParts = F.getNumRegs(); if (NumBaseParts > 1) // Do not count the base and a possible second register if the target // allows to fold 2 registers. - NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F)); + NumBaseAdds += + NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F))); + NumBaseAdds += (F.UnfoldedOffset != 0); // Accumulate non-free scaling amounts. ScaleCost += getScalingFactorCost(TTI, LU, F); @@ -1047,7 +1120,8 @@ struct LSRFixup { } LSRFixup::LSRFixup() - : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {} + : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)), + Offset(0) {} /// isUseFullyOutsideLoop - Test whether this fixup always uses its /// value outside of the given loop. @@ -1183,7 +1257,7 @@ public: MaxOffset(INT64_MIN), AllFixupsOutsideLoop(true), RigidFormula(false), - WidestFixupType(0) {} + WidestFixupType(nullptr) {} bool HasFormulaWithSameRegs(const Formula &F) const; bool InsertFormula(const Formula &F); @@ -1208,7 +1282,10 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { /// InsertFormula - If the given formula has not yet been inserted, add it to /// the list, and return true. Return false otherwise. +/// The formula must be in canonical form. bool LSRUse::InsertFormula(const Formula &F) { + assert(F.isCanonical() && "Invalid canonical representation"); + if (!Formulae.empty() && RigidFormula) return false; @@ -1234,6 +1311,8 @@ bool LSRUse::InsertFormula(const Formula &F) { // Record registers now being used by this use. Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); + if (F.ScaledReg) + Regs.insert(F.ScaledReg); return true; } @@ -1300,12 +1379,10 @@ void LSRUse::dump() const { } #endif -/// isLegalUse - Test whether the use described by AM is "legal", meaning it can -/// be completely folded into the user instruction at isel time. This includes -/// address-mode folding and special icmp tricks. -static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind, - Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale) { +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + LSRUse::KindType Kind, Type *AccessTy, + GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) { switch (Kind) { case LSRUse::Address: return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale); @@ -1356,10 +1433,11 @@ static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind, llvm_unreachable("Invalid LSRUse Kind!"); } -static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, - int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, - GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, - int64_t Scale) { +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + int64_t MinOffset, int64_t MaxOffset, + LSRUse::KindType Kind, Type *AccessTy, + GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) { // Check for overflow. if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) != (MinOffset > 0)) @@ -1370,9 +1448,41 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, return false; MaxOffset = (uint64_t)BaseOffset + MaxOffset; - return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg, - Scale) && - isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale); + return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset, + HasBaseReg, Scale) && + isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset, + HasBaseReg, Scale); +} + +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + int64_t MinOffset, int64_t MaxOffset, + LSRUse::KindType Kind, Type *AccessTy, + const Formula &F) { + // For the purpose of isAMCompletelyFolded either having a canonical formula + // or a scale not equal to zero is correct. + // Problems may arise from non canonical formulae having a scale == 0. + // Strictly speaking it would best to just rely on canonical formulae. + // However, when we generate the scaled formulae, we first check that the + // scaling factor is profitable before computing the actual ScaledReg for + // compile time sake. + assert((F.isCanonical() || F.Scale != 0)); + return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, + F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale); +} + +/// isLegalUse - Test whether we know how to expand the current formula. +static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, + int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, + GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) { + // We know how to expand completely foldable formulae. + return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, + BaseOffset, HasBaseReg, Scale) || + // Or formulae that use a base register produced by a sum of base + // registers. + (Scale == 1 && + isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, + BaseGV, BaseOffset, true, 0)); } static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, @@ -1382,36 +1492,23 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, F.BaseOffset, F.HasBaseReg, F.Scale); } -static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU, - const Formula &F) { - // If F is used as an Addressing Mode, it may fold one Base plus one - // scaled register. If the scaled register is nil, do as if another - // element of the base regs is a 1-scaled register. - // This is possible if BaseRegs has at least 2 registers. - - // If this is not an address calculation, this is not an addressing mode - // use. - if (LU.Kind != LSRUse::Address) - return false; - - // F is already scaled. - if (F.Scale != 0) - return false; - - // We need to keep one register for the base and one to scale. - if (F.BaseRegs.size() < 2) - return false; - - return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, - F.BaseGV, F.BaseOffset, F.HasBaseReg, 1); - } +static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, + const LSRUse &LU, const Formula &F) { + return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg, + F.Scale); +} static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F) { if (!F.Scale) return 0; - assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, - LU.AccessTy, F) && "Illegal formula in use."); + + // If the use is not completely folded in that instruction, we will have to + // pay an extra cost only for scale != 1. + if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, F)) + return F.Scale != 1; switch (LU.Kind) { case LSRUse::Address: { @@ -1430,12 +1527,10 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI, return std::max(ScaleCostMinOffset, ScaleCostMaxOffset); } case LSRUse::ICmpZero: - // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg. - // Therefore, return 0 in case F.Scale == -1. - return F.Scale != -1; - case LSRUse::Basic: case LSRUse::Special: + // The use is completely folded, i.e., everything is folded into the + // instruction. return 0; } @@ -1460,7 +1555,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI, HasBaseReg = true; } - return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale); + return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset, + HasBaseReg, Scale); } static bool isAlwaysFoldable(const TargetTransformInfo &TTI, @@ -1485,8 +1581,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI, // base and a scale. int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1; - return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, - BaseOffset, HasBaseReg, Scale); + return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, + BaseOffset, HasBaseReg, Scale); } namespace { @@ -1515,7 +1611,7 @@ struct IVChain { SmallVector Incs; const SCEV *ExprBase; - IVChain() : ExprBase(0) {} + IVChain() : ExprBase(nullptr) {} IVChain(const IVInc &Head, const SCEV *Base) : Incs(1, Head), ExprBase(Base) {} @@ -1642,8 +1738,19 @@ class LSRInstance { void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base, unsigned Depth = 0); + + void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, unsigned Depth, + size_t Idx, bool IsScaledReg = false); void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, size_t Idx, + bool IsScaledReg = false); void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); + void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, + const SmallVectorImpl &Worklist, + size_t Idx, bool IsScaledReg = false); void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base); void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base); @@ -1721,7 +1828,7 @@ void LSRInstance::OptimizeShadowIV() { IVUsers::const_iterator CandidateUI = UI; ++UI; Instruction *ShadowUse = CandidateUI->getUser(); - Type *DestTy = 0; + Type *DestTy = nullptr; bool IsSigned = false; /* If shadow use is a int->float cast then insert a second IV @@ -1783,7 +1890,7 @@ void LSRInstance::OptimizeShadowIV() { continue; /* Initialize new IV, double d = 0.0 in above example. */ - ConstantInt *C = 0; + ConstantInt *C = nullptr; if (Incr->getOperand(0) == PH) C = dyn_cast(Incr->getOperand(1)); else if (Incr->getOperand(1) == PH) @@ -1905,7 +2012,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { // for ICMP_ULE here because the comparison would be with zero, which // isn't interesting. CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; - const SCEVNAryExpr *Max = 0; + const SCEVNAryExpr *Max = nullptr; if (const SCEVSMaxExpr *S = dyn_cast(BackedgeTakenCount)) { Pred = ICmpInst::ICMP_SLE; Max = S; @@ -1948,7 +2055,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { // Check the right operand of the select, and remember it, as it will // be used in the new comparison instruction. - Value *NewRHS = 0; + Value *NewRHS = nullptr; if (ICmpInst::isTrueWhenEqual(Pred)) { // Look for n+1, and grab n. if (AddOperator *BO = dyn_cast(Sel->getOperand(1))) @@ -2018,7 +2125,7 @@ LSRInstance::OptimizeLoopTermCond() { continue; // Search IVUsesByStride to find Cond's IVUse if there is one. - IVStrideUse *CondUse = 0; + IVStrideUse *CondUse = nullptr; ICmpInst *Cond = cast(TermBr->getCondition()); if (!FindIVUserForCond(Cond, CondUse)) continue; @@ -2071,12 +2178,12 @@ LSRInstance::OptimizeLoopTermCond() { // Check for possible scaled-address reuse. Type *AccessTy = getAccessType(UI->getUser()); int64_t Scale = C->getSExtValue(); - if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0, + if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr, /*BaseOffset=*/ 0, /*HasBaseReg=*/ false, Scale)) goto decline_post_inc; Scale = -Scale; - if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0, + if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr, /*BaseOffset=*/ 0, /*HasBaseReg=*/ false, Scale)) goto decline_post_inc; @@ -2146,23 +2253,25 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, // the uses will have all its uses outside the loop, for example. if (LU.Kind != Kind) return false; + + // Check for a mismatched access type, and fall back conservatively as needed. + // TODO: Be less conservative when the type is similar and can use the same + // addressing modes. + if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) + NewAccessTy = Type::getVoidTy(AccessTy->getContext()); + // Conservatively assume HasBaseReg is true for now. if (NewOffset < LU.MinOffset) { - if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, + if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr, LU.MaxOffset - NewOffset, HasBaseReg)) return false; NewMinOffset = NewOffset; } else if (NewOffset > LU.MaxOffset) { - if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, + if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr, NewOffset - LU.MinOffset, HasBaseReg)) return false; NewMaxOffset = NewOffset; } - // Check for a mismatched access type, and fall back conservatively as needed. - // TODO: Be less conservative when the type is similar and can use the same - // addressing modes. - if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) - NewAccessTy = Type::getVoidTy(AccessTy->getContext()); // Update the use. LU.MinOffset = NewMinOffset; @@ -2183,7 +2292,7 @@ LSRInstance::getUse(const SCEV *&Expr, int64_t Offset = ExtractImmediate(Expr, SE); // Basic uses can't accept any offset, for example. - if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, + if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr, Offset, /*HasBaseReg=*/ true)) { Expr = Copy; Offset = 0; @@ -2267,7 +2376,7 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, } // Nothing looked good. - return 0; + return nullptr; } void LSRInstance::CollectInterestingTypesAndFactors() { @@ -2385,7 +2494,7 @@ static const SCEV *getExprBase(const SCEV *S) { default: // uncluding scUnknown. return S; case scConstant: - return 0; + return nullptr; case scTruncate: return getExprBase(cast(S)->getOperand()); case scZeroExtend: @@ -2476,7 +2585,7 @@ isProfitableChain(IVChain &Chain, SmallPtrSet &Users, && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) { --cost; } - const SCEV *LastIncExpr = 0; + const SCEV *LastIncExpr = nullptr; unsigned NumConstIncrements = 0; unsigned NumVarIncrements = 0; unsigned NumReusedIncrements = 0; @@ -2535,7 +2644,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, // Visit all existing chains. Check if its IVOper can be computed as a // profitable loop invariant increment from the last link in the Chain. unsigned ChainIdx = 0, NChains = IVChainVec.size(); - const SCEV *LastIncExpr = 0; + const SCEV *LastIncExpr = nullptr; for (; ChainIdx < NChains; ++ChainIdx) { IVChain &Chain = IVChainVec[ChainIdx]; @@ -2755,7 +2864,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, int64_t IncOffset = IncConst->getValue()->getSExtValue(); if (!isAlwaysFoldable(TTI, LSRUse::Address, - getAccessType(UserInst), /*BaseGV=*/ 0, + getAccessType(UserInst), /*BaseGV=*/ nullptr, IncOffset, /*HaseBaseReg=*/ false)) return false; @@ -2773,7 +2882,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, // findIVOperand returns IVOpEnd if it can no longer find a valid IV user. User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(), IVOpEnd, L, SE); - Value *IVSrc = 0; + Value *IVSrc = nullptr; while (IVOpIter != IVOpEnd) { IVSrc = getWideOperand(*IVOpIter); @@ -2800,7 +2909,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n"); Type *IVTy = IVSrc->getType(); Type *IntTy = SE.getEffectiveSCEVType(IVTy); - const SCEV *LeftOverExpr = 0; + const SCEV *LeftOverExpr = nullptr; for (IVChain::const_iterator IncI = Chain.begin(), IncE = Chain.end(); IncI != IncE; ++IncI) { @@ -2831,7 +2940,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, TTI)) { assert(IVTy == IVOper->getType() && "inconsistent IV increment type"); IVSrc = IVOper; - LeftOverExpr = 0; + LeftOverExpr = nullptr; } } Type *OperTy = IncI->IVOperand->getType(); @@ -2886,7 +2995,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { LF.PostIncLoops = UI->getPostIncLoops(); LSRUse::KindType Kind = LSRUse::Basic; - Type *AccessTy = 0; + Type *AccessTy = nullptr; if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) { Kind = LSRUse::Address; AccessTy = getAccessType(LF.UserInst); @@ -2917,7 +3026,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) { // S is normalized, so normalize N before folding it into S // to keep the result normalized. - N = TransformForPostIncUse(Normalize, N, CI, 0, + N = TransformForPostIncUse(Normalize, N, CI, nullptr, LF.PostIncLoops, SE, DT); Kind = LSRUse::ICmpZero; S = SE.getMinusSCEV(N, S); @@ -2992,6 +3101,9 @@ void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) { /// InsertFormula - If the given formula has not yet been inserted, add it to /// the list, and return true. Return false otherwise. bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) { + // Do not insert formula that we will not be able to expand. + assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && + "Formula is illegal"); if (!LU.InsertFormula(F)) return false; @@ -3068,7 +3180,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { LSRFixup &LF = getNewFixup(); LF.UserInst = const_cast(UserInst); LF.OperandValToReplace = U; - std::pair P = getUse(S, LSRUse::Basic, 0); + std::pair P = getUse(S, LSRUse::Basic, nullptr); LF.LUIdx = P.first; LF.Offset = P.second; LSRUse &LU = Uses[LF.LUIdx]; @@ -3107,7 +3219,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, if (Remainder) Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); } - return 0; + return nullptr; } else if (const SCEVAddRecExpr *AR = dyn_cast(S)) { // Split a non-zero base out of an addrec. if (AR->getStart()->isZero()) @@ -3119,7 +3231,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, // does not pertain to this loop. if (Remainder && (AR->getLoop() == L || !isa(Remainder))) { Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); - Remainder = 0; + Remainder = nullptr; } if (Remainder != AR->getStart()) { if (!Remainder) @@ -3141,90 +3253,110 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1); if (Remainder) Ops.push_back(SE.getMulExpr(C, Remainder)); - return 0; + return nullptr; } } return S; } -/// GenerateReassociations - Split out subexpressions from adds and the bases of -/// addrecs. -void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, - Formula Base, - unsigned Depth) { - // Arbitrarily cap recursion to protect compile time. - if (Depth >= 3) return; - - for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { - const SCEV *BaseReg = Base.BaseRegs[i]; +/// \brief Helper function for LSRInstance::GenerateReassociations. +void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, + unsigned Depth, size_t Idx, + bool IsScaledReg) { + const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; + SmallVector AddOps; + const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE); + if (Remainder) + AddOps.push_back(Remainder); + + if (AddOps.size() == 1) + return; - SmallVector AddOps; - const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE); - if (Remainder) - AddOps.push_back(Remainder); + for (SmallVectorImpl::const_iterator J = AddOps.begin(), + JE = AddOps.end(); + J != JE; ++J) { - if (AddOps.size() == 1) continue; + // Loop-variant "unknown" values are uninteresting; we won't be able to + // do anything meaningful with them. + if (isa(*J) && !SE.isLoopInvariant(*J, L)) + continue; - for (SmallVectorImpl::const_iterator J = AddOps.begin(), - JE = AddOps.end(); J != JE; ++J) { + // Don't pull a constant into a register if the constant could be folded + // into an immediate field. + if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, *J, Base.getNumRegs() > 1)) + continue; - // Loop-variant "unknown" values are uninteresting; we won't be able to - // do anything meaningful with them. - if (isa(*J) && !SE.isLoopInvariant(*J, L)) - continue; + // Collect all operands except *J. + SmallVector InnerAddOps( + ((const SmallVector &)AddOps).begin(), J); + InnerAddOps.append(std::next(J), + ((const SmallVector &)AddOps).end()); + + // Don't leave just a constant behind in a register if the constant could + // be folded into an immediate field. + if (InnerAddOps.size() == 1 && + isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, + LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1)) + continue; - // Don't pull a constant into a register if the constant could be folded - // into an immediate field. - if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, - LU.AccessTy, *J, Base.getNumRegs() > 1)) - continue; + const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); + if (InnerSum->isZero()) + continue; + Formula F = Base; - // Collect all operands except *J. - SmallVector InnerAddOps( - ((const SmallVector &)AddOps).begin(), J); - InnerAddOps.append(std::next(J), - ((const SmallVector &)AddOps).end()); - - // Don't leave just a constant behind in a register if the constant could - // be folded into an immediate field. - if (InnerAddOps.size() == 1 && - isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, - LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1)) - continue; + // Add the remaining pieces of the add back into the new formula. + const SCEVConstant *InnerSumSC = dyn_cast(InnerSum); + if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && + TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + InnerSumSC->getValue()->getZExtValue())) { + F.UnfoldedOffset = + (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue(); + if (IsScaledReg) + F.ScaledReg = nullptr; + else + F.BaseRegs.erase(F.BaseRegs.begin() + Idx); + } else if (IsScaledReg) + F.ScaledReg = InnerSum; + else + F.BaseRegs[Idx] = InnerSum; + + // Add J as its own register, or an unfolded immediate. + const SCEVConstant *SC = dyn_cast(*J); + if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && + TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + + SC->getValue()->getZExtValue())) + F.UnfoldedOffset = + (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue(); + else + F.BaseRegs.push_back(*J); + // We may have changed the number of register in base regs, adjust the + // formula accordingly. + F.Canonicalize(); + + if (InsertFormula(LU, LUIdx, F)) + // If that formula hadn't been seen before, recurse to find more like + // it. + GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1); + } +} - const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); - if (InnerSum->isZero()) - continue; - Formula F = Base; +/// GenerateReassociations - Split out subexpressions from adds and the bases of +/// addrecs. +void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, + Formula Base, unsigned Depth) { + assert(Base.isCanonical() && "Input must be in the canonical form"); + // Arbitrarily cap recursion to protect compile time. + if (Depth >= 3) + return; - // Add the remaining pieces of the add back into the new formula. - const SCEVConstant *InnerSumSC = dyn_cast(InnerSum); - if (InnerSumSC && - SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && - TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + - InnerSumSC->getValue()->getZExtValue())) { - F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + - InnerSumSC->getValue()->getZExtValue(); - F.BaseRegs.erase(F.BaseRegs.begin() + i); - } else - F.BaseRegs[i] = InnerSum; - - // Add J as its own register, or an unfolded immediate. - const SCEVConstant *SC = dyn_cast(*J); - if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && - TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + - SC->getValue()->getZExtValue())) - F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + - SC->getValue()->getZExtValue(); - else - F.BaseRegs.push_back(*J); + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) + GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i); - if (InsertFormula(LU, LUIdx, F)) - // If that formula hadn't been seen before, recurse to find more like - // it. - GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1); - } - } + if (Base.Scale == 1) + GenerateReassociationsImpl(LU, LUIdx, Base, Depth, + /* Idx */ -1, /* IsScaledReg */ true); } /// GenerateCombinations - Generate a formula consisting of all of the @@ -3232,8 +3364,12 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base) { // This method is only interesting on a plurality of registers. - if (Base.BaseRegs.size() <= 1) return; + if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1) + return; + // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before + // processing the formula. + Base.Unscale(); Formula F = Base; F.BaseRegs.clear(); SmallVector Ops; @@ -3253,29 +3389,87 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, // rather than proceed with zero in a register. if (!Sum->isZero()) { F.BaseRegs.push_back(Sum); + F.Canonicalize(); (void)InsertFormula(LU, LUIdx, F); } } } +/// \brief Helper function for LSRInstance::GenerateSymbolicOffsets. +void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx, + const Formula &Base, size_t Idx, + bool IsScaledReg) { + const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; + GlobalValue *GV = ExtractSymbol(G, SE); + if (G->isZero() || !GV) + return; + Formula F = Base; + F.BaseGV = GV; + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) + return; + if (IsScaledReg) + F.ScaledReg = G; + else + F.BaseRegs[Idx] = G; + (void)InsertFormula(LU, LUIdx, F); +} + /// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets. void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base) { // We can't add a symbolic offset if the address already contains one. if (Base.BaseGV) return; - for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { - const SCEV *G = Base.BaseRegs[i]; - GlobalValue *GV = ExtractSymbol(G, SE); - if (G->isZero() || !GV) - continue; + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) + GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i); + if (Base.Scale == 1) + GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1, + /* IsScaledReg */ true); +} + +/// \brief Helper function for LSRInstance::GenerateConstantOffsets. +void LSRInstance::GenerateConstantOffsetsImpl( + LSRUse &LU, unsigned LUIdx, const Formula &Base, + const SmallVectorImpl &Worklist, size_t Idx, bool IsScaledReg) { + const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; + for (SmallVectorImpl::const_iterator I = Worklist.begin(), + E = Worklist.end(); + I != E; ++I) { Formula F = Base; - F.BaseGV = GV; - if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) - continue; - F.BaseRegs[i] = G; - (void)InsertFormula(LU, LUIdx, F); + F.BaseOffset = (uint64_t)Base.BaseOffset - *I; + if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, + LU.AccessTy, F)) { + // Add the offset to the base register. + const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G); + // If it cancelled out, drop the base register, otherwise update it. + if (NewG->isZero()) { + if (IsScaledReg) { + F.Scale = 0; + F.ScaledReg = nullptr; + } else + F.DeleteBaseReg(F.BaseRegs[Idx]); + F.Canonicalize(); + } else if (IsScaledReg) + F.ScaledReg = NewG; + else + F.BaseRegs[Idx] = NewG; + + (void)InsertFormula(LU, LUIdx, F); + } } + + int64_t Imm = ExtractImmediate(G, SE); + if (G->isZero() || Imm == 0) + return; + Formula F = Base; + F.BaseOffset = (uint64_t)F.BaseOffset + Imm; + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) + return; + if (IsScaledReg) + F.ScaledReg = G; + else + F.BaseRegs[Idx] = G; + (void)InsertFormula(LU, LUIdx, F); } /// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets. @@ -3288,38 +3482,11 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, if (LU.MaxOffset != LU.MinOffset) Worklist.push_back(LU.MaxOffset); - for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { - const SCEV *G = Base.BaseRegs[i]; - - for (SmallVectorImpl::const_iterator I = Worklist.begin(), - E = Worklist.end(); I != E; ++I) { - Formula F = Base; - F.BaseOffset = (uint64_t)Base.BaseOffset - *I; - if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, - LU.AccessTy, F)) { - // Add the offset to the base register. - const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G); - // If it cancelled out, drop the base register, otherwise update it. - if (NewG->isZero()) { - std::swap(F.BaseRegs[i], F.BaseRegs.back()); - F.BaseRegs.pop_back(); - } else - F.BaseRegs[i] = NewG; - - (void)InsertFormula(LU, LUIdx, F); - } - } - - int64_t Imm = ExtractImmediate(G, SE); - if (G->isZero() || Imm == 0) - continue; - Formula F = Base; - F.BaseOffset = (uint64_t)F.BaseOffset + Imm; - if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) - continue; - F.BaseRegs[i] = G; - (void)InsertFormula(LU, LUIdx, F); - } + for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) + GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i); + if (Base.Scale == 1) + GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1, + /* IsScaledReg */ true); } /// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up @@ -3419,7 +3586,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { if (!IntTy) return; // If this Formula already has a scaled register, we can't add another one. - if (Base.Scale != 0) return; + // Try to unscale the formula to generate a better scale. + if (Base.Scale != 0 && !Base.Unscale()) + return; + + assert(Base.Scale == 0 && "Unscale did not did its job!"); // Check each interesting stride. for (SmallSetVector::const_iterator @@ -3460,6 +3631,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { Formula F = Base; F.ScaledReg = Quotient; F.DeleteBaseReg(F.BaseRegs[i]); + // The canonical representation of 1*reg is reg, which is already in + // Base. In that case, do not try to insert the formula, it will be + // rejected anyway. + if (F.Scale == 1 && F.BaseRegs.empty()) + continue; (void)InsertFormula(LU, LUIdx, F); } } @@ -3624,7 +3800,12 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { // TODO: Use a more targeted data structure. for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) { - const Formula &F = LU.Formulae[L]; + Formula F = LU.Formulae[L]; + // FIXME: The code for the scaled and unscaled registers looks + // very similar but slightly different. Investigate if they + // could be merged. That way, we would not have to unscale the + // Formula. + F.Unscale(); // Use the immediate in the scaled register. if (F.ScaledReg == OrigReg) { int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale; @@ -3650,6 +3831,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { continue; // OK, looks good. + NewF.Canonicalize(); (void)InsertFormula(LU, LUIdx, NewF); } else { // Use the immediate in a base register. @@ -3683,6 +3865,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { goto skip_formula; // Ok, looks good. + NewF.Canonicalize(); (void)InsertFormula(LU, LUIdx, NewF); break; skip_formula:; @@ -3936,7 +4119,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { for (SmallVectorImpl::const_iterator I = LU.Formulae.begin(), E = LU.Formulae.end(); I != E; ++I) { const Formula &F = *I; - if (F.BaseOffset == 0 || F.Scale != 0) + if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1)) continue; LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU); @@ -4033,7 +4216,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { // Pick the register which is used by the most LSRUses, which is likely // to be a good reuse register candidate. - const SCEV *Best = 0; + const SCEV *Best = nullptr; unsigned BestNum = 0; for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end(); I != E; ++I) { @@ -4130,19 +4313,22 @@ void LSRInstance::SolveRecurse(SmallVectorImpl &Solution, E = LU.Formulae.end(); I != E; ++I) { const Formula &F = *I; - // Ignore formulae which do not use any of the required registers. - bool SatisfiedReqReg = true; + // Ignore formulae which may not be ideal in terms of register reuse of + // ReqRegs. The formula should use all required registers before + // introducing new ones. + int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size()); for (SmallSetVector::const_iterator J = ReqRegs.begin(), JE = ReqRegs.end(); J != JE; ++J) { const SCEV *Reg = *J; - if ((!F.ScaledReg || F.ScaledReg != Reg) && - std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) == + if ((F.ScaledReg && F.ScaledReg == Reg) || + std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) != F.BaseRegs.end()) { - SatisfiedReqReg = false; - break; + --NumReqRegsToFind; + if (NumReqRegsToFind == 0) + break; } } - if (!SatisfiedReqReg) { + if (NumReqRegsToFind != 0) { // If none of the formulae satisfied the required registers, then we could // clear ReqRegs and try again. Currently, we simply give up in this case. continue; @@ -4240,7 +4426,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, } bool AllDominate = true; - Instruction *BetterPos = 0; + Instruction *BetterPos = nullptr; Instruction *Tentative = IDom->getTerminator(); for (SmallVectorImpl::const_iterator I = Inputs.begin(), E = Inputs.end(); I != E; ++I) { @@ -4379,11 +4565,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF, LF.UserInst, LF.OperandValToReplace, Loops, SE, DT); - Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP))); + Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP))); } // Expand the ScaledReg portion. - Value *ICmpScaledV = 0; + Value *ICmpScaledV = nullptr; if (F.Scale != 0) { const SCEV *ScaledS = F.ScaledReg; @@ -4394,25 +4580,34 @@ Value *LSRInstance::Expand(const LSRFixup &LF, Loops, SE, DT); if (LU.Kind == LSRUse::ICmpZero) { - // An interesting way of "folding" with an icmp is to use a negated - // scale, which we'll implement by inserting it into the other operand - // of the icmp. - assert(F.Scale == -1 && - "The only scale supported by ICmpZero uses is -1!"); - ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP); + // Expand ScaleReg as if it was part of the base regs. + if (F.Scale == 1) + Ops.push_back( + SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP))); + else { + // An interesting way of "folding" with an icmp is to use a negated + // scale, which we'll implement by inserting it into the other operand + // of the icmp. + assert(F.Scale == -1 && + "The only scale supported by ICmpZero uses is -1!"); + ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP); + } } else { // Otherwise just expand the scaled register and an explicit scale, // which is expected to be matched as part of the address. // Flush the operand list to suppress SCEVExpander hoisting address modes. - if (!Ops.empty() && LU.Kind == LSRUse::Address) { + // Unless the addressing mode will not be folded. + if (!Ops.empty() && LU.Kind == LSRUse::Address && + isAMCompletelyFolded(TTI, LU, F)) { Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } - ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP)); - ScaledS = SE.getMulExpr(ScaledS, - SE.getConstant(ScaledS->getType(), F.Scale)); + ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)); + if (F.Scale != 1) + ScaledS = + SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale)); Ops.push_back(ScaledS); } } @@ -4490,7 +4685,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF, } CI->setOperand(1, ICmpScaledV); } else { - assert(F.Scale == 0 && + // A scale of 1 means that the scale has been expanded as part of the + // base regs. + assert((F.Scale == 0 || F.Scale == 1) && "ICmp does not support folding a global value and " "a scale at the same time!"); Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy), @@ -4531,7 +4728,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN, Loop *PNLoop = LI.getLoopFor(Parent); if (!PNLoop || Parent != PNLoop->getHeader()) { // Split the critical edge. - BasicBlock *NewBB = 0; + BasicBlock *NewBB = nullptr; if (!Parent->isLandingPad()) { NewBB = SplitCriticalEdge(BB, Parent, P, /*MergeIdenticalEdges=*/true, @@ -4560,7 +4757,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN, } std::pair::iterator, bool> Pair = - Inserted.insert(std::make_pair(BB, static_cast(0))); + Inserted.insert(std::make_pair(BB, static_cast(nullptr))); if (!Pair.second) PN->setIncomingValue(i, Pair.first->second); else { @@ -4670,7 +4867,7 @@ LSRInstance::LSRInstance(Loop *L, Pass *P) DT(P->getAnalysis().getDomTree()), LI(P->getAnalysis()), TTI(P->getAnalysis()), L(L), Changed(false), - IVIncInsertPos(0) { + IVIncInsertPos(nullptr) { // If LoopSimplify form is not available, stay out of trouble. if (!L->isLoopSimplifyForm()) return; diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index ecd350b..fc28fd2 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -12,7 +12,6 @@ // counts of loops easily. //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Scalar.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/LoopPass.h" @@ -29,6 +28,8 @@ using namespace llvm; +#define DEBUG_TYPE "loop-unroll" + static cl::opt UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden, cl::desc("The cut-off point for automatic loop unrolling")); @@ -237,9 +238,12 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { return false; } uint64_t Size = (uint64_t)LoopSize*Count; - if (TripCount != 1 && Size > Threshold) { - DEBUG(dbgs() << " Too large to fully unroll with count: " << Count - << " because size: " << Size << ">" << Threshold << "\n"); + if (TripCount != 1 && + (Size > Threshold || (Count != TripCount && Size > PartialThreshold))) { + if (Size > Threshold) + DEBUG(dbgs() << " Too large to fully unroll with count: " << Count + << " because size: " << Size << ">" << Threshold << "\n"); + bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial; if (!AllowPartial && !(Runtime && TripCount == 0)) { DEBUG(dbgs() << " will not try to unroll partially because " diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 5954f4a..977c53a 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -26,7 +26,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-unswitch" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -53,6 +52,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "loop-unswitch" + STATISTIC(NumBranches, "Number of branches unswitched"); STATISTIC(NumSwitches, "Number of switches unswitched"); STATISTIC(NumSelects , "Number of selects unswitched"); @@ -96,7 +97,7 @@ namespace { public: LUAnalysisCache() : - CurLoopInstructions(0), CurrentLoopProperties(0), + CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr), MaxSize(Threshold) {} @@ -151,8 +152,8 @@ namespace { static char ID; // Pass ID, replacement for typeid explicit LoopUnswitch(bool Os = false) : LoopPass(ID), OptimizeForSize(Os), redoLoop(false), - currentLoop(0), DT(0), loopHeader(0), - loopPreheader(0) { + currentLoop(nullptr), DT(nullptr), loopHeader(nullptr), + loopPreheader(nullptr) { initializeLoopUnswitchPass(*PassRegistry::getPassRegistry()); } @@ -180,15 +181,6 @@ namespace { BranchesInfo.forgetLoop(currentLoop); } - /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist, - /// remove it. - void RemoveLoopFromWorklist(Loop *L) { - std::vector::iterator I = std::find(LoopProcessWorklist.begin(), - LoopProcessWorklist.end(), L); - if (I != LoopProcessWorklist.end()) - LoopProcessWorklist.erase(I); - } - void initLoopData() { loopHeader = currentLoop->getHeader(); loopPreheader = currentLoop->getLoopPreheader(); @@ -212,9 +204,8 @@ namespace { Instruction *InsertPt); void SimplifyCode(std::vector &Worklist, Loop *L); - void RemoveLoopFromHierarchy(Loop *L); - bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0, - BasicBlock **LoopExit = 0); + bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = nullptr, + BasicBlock **LoopExit = nullptr); }; } @@ -283,8 +274,8 @@ void LUAnalysisCache::forgetLoop(const Loop *L) { LoopsProperties.erase(LIt); } - CurrentLoopProperties = 0; - CurLoopInstructions = 0; + CurrentLoopProperties = nullptr; + CurLoopInstructions = nullptr; } // Mark case value as unswitched. @@ -355,10 +346,10 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { // We can never unswitch on vector conditions. if (Cond->getType()->isVectorTy()) - return 0; + return nullptr; // Constants should be folded, not unswitched on! - if (isa(Cond)) return 0; + if (isa(Cond)) return nullptr; // TODO: Handle: br (VARIANT|INVARIANT). @@ -378,7 +369,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) { return RHS; } - return 0; + return nullptr; } bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { @@ -389,7 +380,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) { LPM = &LPM_Ref; DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); - DT = DTWP ? &DTWP->getDomTree() : 0; + DT = DTWP ? &DTWP->getDomTree() : nullptr; currentLoop = L; Function *F = currentLoop->getHeader()->getParent(); bool Changed = false; @@ -461,7 +452,7 @@ bool LoopUnswitch::processCurrentLoop() { // Find a value to unswitch on: // FIXME: this should chose the most expensive case! // FIXME: scan for a case with a non-critical edge? - Constant *UnswitchVal = 0; + Constant *UnswitchVal = nullptr; // Do not process same value again and again. // At this point we have some cases already unswitched and @@ -518,7 +509,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, if (!L->contains(BB)) { // Otherwise, this is a loop exit, this is fine so long as this is the // first exit. - if (ExitBB != 0) return false; + if (ExitBB) return false; ExitBB = BB; return true; } @@ -545,10 +536,10 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { std::set Visited; Visited.insert(L->getHeader()); // Branches to header make infinite loops. - BasicBlock *ExitBB = 0; + BasicBlock *ExitBB = nullptr; if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited)) return ExitBB; - return 0; + return nullptr; } /// IsTrivialUnswitchCondition - Check to see if this unswitch condition is @@ -569,7 +560,7 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, TerminatorInst *HeaderTerm = Header->getTerminator(); LLVMContext &Context = Header->getContext(); - BasicBlock *LoopExitBB = 0; + BasicBlock *LoopExitBB = nullptr; if (BranchInst *BI = dyn_cast(HeaderTerm)) { // If the header block doesn't end with a conditional branch on Cond, we // can't handle it. @@ -639,8 +630,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, /// unswitch the loop, reprocess the pieces, then return true. bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { Function *F = loopHeader->getParent(); - Constant *CondVal = 0; - BasicBlock *ExitBlock = 0; + Constant *CondVal = nullptr; + BasicBlock *ExitBlock = nullptr; if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) { // If the condition is trivial, always unswitch. There is no code growth @@ -948,17 +939,6 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V, ++NumSimplify; } -/// RemoveLoopFromHierarchy - We have discovered that the specified loop has -/// become unwrapped, either because the backedge was deleted, or because the -/// edge into the header was removed. If the edge into the header from the -/// latch block was removed, the loop is unwrapped but subloops are still alive, -/// so they just reparent loops. If the loops are actually dead, they will be -/// removed later. -void LoopUnswitch::RemoveLoopFromHierarchy(Loop *L) { - LPM->deleteLoopFromQueue(L); - RemoveLoopFromWorklist(L); -} - // RewriteLoopBodyWithConditionConstant - We know either that the value LIC has // the value specified by Val in the specified loop, or we know it does NOT have // that value. Rewrite any uses of LIC or of properties correlated to it. @@ -1020,7 +1000,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, // If we know that LIC is not Val, use this info to simplify code. SwitchInst *SI = dyn_cast(UI); - if (SI == 0 || !isa(Val)) continue; + if (!SI || !isa(Val)) continue; SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast(Val)); // Default case is live for multiple values. diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp index 7c0a623..4251ac4 100644 --- a/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/lib/Transforms/Scalar/LowerAtomic.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loweratomic" #include "llvm/Transforms/Scalar.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -20,6 +19,8 @@ #include "llvm/Pass.h" using namespace llvm; +#define DEBUG_TYPE "loweratomic" + static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { IRBuilder<> Builder(CXI->getParent(), CXI); Value *Ptr = CXI->getPointerOperand(); @@ -42,7 +43,7 @@ static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) { Value *Val = RMWI->getValOperand(); LoadInst *Orig = Builder.CreateLoad(Ptr); - Value *Res = NULL; + Value *Res = nullptr; switch (RMWI->getOperation()) { default: llvm_unreachable("Unexpected RMW operation"); diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 2603c96..b6bc792 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "memcpyopt" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -33,6 +32,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "memcpyopt" + STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); STATISTIC(NumMemSetInfer, "Number of memsets inferred"); STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); @@ -49,7 +50,7 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, int64_t Offset = 0; for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { ConstantInt *OpC = dyn_cast(GEP->getOperand(i)); - if (OpC == 0) + if (!OpC) return VariableIdxFound = true; if (OpC->isZero()) continue; // No offset. @@ -89,12 +90,12 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, // If one pointer is a GEP and the other isn't, then see if the GEP is a // constant offset from the base, as in "P" and "gep P, 1". - if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { + if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD); return !VariableIdxFound; } - if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { + if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD); return !VariableIdxFound; } @@ -317,9 +318,9 @@ namespace { static char ID; // Pass identification, replacement for typeid MemCpyOpt() : FunctionPass(ID) { initializeMemCpyOptPass(*PassRegistry::getPassRegistry()); - MD = 0; - TLI = 0; - DL = 0; + MD = nullptr; + TLI = nullptr; + DL = nullptr; } bool runOnFunction(Function &F) override; @@ -373,7 +374,7 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", /// attempts to merge them together into a memcpy/memset. Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, Value *StartPtr, Value *ByteVal) { - if (DL == 0) return 0; + if (!DL) return nullptr; // Okay, so we now have a single store that can be splatable. Scan to find // all subsequent stores of the same value to offset from the same pointer. @@ -426,7 +427,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, // If we have no ranges, then we just had a single store with nothing that // could be merged in. This is a very common case of course. if (Ranges.empty()) - return 0; + return nullptr; // If we had at least one store that could be merged in, add the starting // store as well. We try to avoid this unless there is at least something @@ -440,7 +441,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, // Now that we have full information about ranges, loop over the ranges and // emit memset's for anything big enough to be worthwhile. - Instruction *AMemSet = 0; + Instruction *AMemSet = nullptr; for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { const MemsetRange &Range = *I; @@ -491,7 +492,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (!SI->isSimple()) return false; - if (DL == 0) return false; + if (!DL) return false; // Detect cases where we're performing call slot forwarding, but // happen to be using a load-store pair to implement it, rather than @@ -500,7 +501,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { if (LI->isSimple() && LI->hasOneUse() && LI->getParent() == SI->getParent()) { MemDepResult ldep = MD->getDependency(LI); - CallInst *C = 0; + CallInst *C = nullptr; if (ldep.isClobber() && !isa(ldep.getInst())) C = dyn_cast(ldep.getInst()); @@ -512,7 +513,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { for (BasicBlock::iterator I = --BasicBlock::iterator(SI), E = C; I != E; --I) { if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) { - C = 0; + C = nullptr; break; } } @@ -603,7 +604,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, return false; // Check that all of src is copied to dest. - if (DL == 0) return false; + if (!DL) return false; ConstantInt *srcArraySize = dyn_cast(srcAlloca->getArraySize()); if (!srcArraySize) @@ -846,7 +847,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { // The optimizations after this point require the memcpy size. ConstantInt *CopySize = dyn_cast(M->getLength()); - if (CopySize == 0) return false; + if (!CopySize) return false; // The are three possible optimizations we can do for memcpy: // a) memcpy-memcpy xform which exposes redundance for DSE. @@ -929,7 +930,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) { /// processByValArgument - This is called on every byval argument in call sites. bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { - if (DL == 0) return false; + if (!DL) return false; // Find out what feeds this byval argument. Value *ByValArg = CS.getArgument(ArgNo); @@ -946,13 +947,13 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { // a memcpy, see if we can byval from the source of the memcpy instead of the // result. MemCpyInst *MDep = dyn_cast(DepInfo.getInst()); - if (MDep == 0 || MDep->isVolatile() || + if (!MDep || MDep->isVolatile() || ByValArg->stripPointerCasts() != MDep->getDest()) return false; // The length of the memcpy must be larger or equal to the size of the byval. ConstantInt *C1 = dyn_cast(MDep->getLength()); - if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize) + if (!C1 || C1->getValue().getZExtValue() < ByValSize) return false; // Get the alignment of the byval. If the call doesn't specify the alignment, @@ -1043,7 +1044,7 @@ bool MemCpyOpt::runOnFunction(Function &F) { bool MadeChange = false; MD = &getAnalysis(); DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TLI = &getAnalysis(); // If we don't have at least memset and memcpy, there is little point of doing @@ -1058,6 +1059,6 @@ bool MemCpyOpt::runOnFunction(Function &F) { MadeChange = true; } - MD = 0; + MD = nullptr; return MadeChange; } diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp index 2f19935..7cce89e 100644 --- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp +++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp @@ -13,7 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "partially-inline-libcalls" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Intrinsics.h" @@ -25,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "partially-inline-libcalls" + namespace { class PartiallyInlineLibCalls : public FunctionPass { public: diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index b6b4d97..986d6a4 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -20,7 +20,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "reassociate" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" @@ -42,6 +41,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "reassociate" + STATISTIC(NumChanged, "Number of insts reassociated"); STATISTIC(NumAnnihil, "Number of expr tree annihilated"); STATISTIC(NumFactor , "Number of multiplies factored"); @@ -122,14 +123,14 @@ namespace { public: XorOpnd(Value *V); - bool isInvalid() const { return SymbolicPart == 0; } + bool isInvalid() const { return SymbolicPart == nullptr; } bool isOrExpr() const { return isOr; } Value *getValue() const { return OrigVal; } Value *getSymbolicPart() const { return SymbolicPart; } unsigned getSymbolicRank() const { return SymbolicRank; } const APInt &getConstPart() const { return ConstPart; } - void Invalidate() { SymbolicPart = OrigVal = 0; } + void Invalidate() { SymbolicPart = OrigVal = nullptr; } void setSymbolicRank(unsigned R) { SymbolicRank = R; } // Sort the XorOpnd-Pointer in ascending order of symbolic-value-rank. @@ -236,7 +237,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { if (V->hasOneUse() && isa(V) && cast(V)->getOpcode() == Opcode) return cast(V); - return 0; + return nullptr; } static bool isUnmovableInstruction(Instruction *I) { @@ -284,7 +285,7 @@ void Reassociate::BuildRankMap(Function &F) { unsigned Reassociate::getRank(Value *V) { Instruction *I = dyn_cast(V); - if (I == 0) { + if (!I) { if (isa(V)) return ValueRankMap[V]; // Function argument. return 0; // Otherwise it's a global or constant, rank 0. } @@ -705,7 +706,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I, // ExpressionChanged - Non-null if the rewritten expression differs from the // original in some non-trivial way, requiring the clearing of optional flags. // Flags are cleared from the operator in ExpressionChanged up to I inclusive. - BinaryOperator *ExpressionChanged = 0; + BinaryOperator *ExpressionChanged = nullptr; for (unsigned i = 0; ; ++i) { // The last operation (which comes earliest in the IR) is special as both // operands will come from Ops, rather than just one with the other being @@ -995,7 +996,7 @@ static Value *EmitAddTreeOfValues(Instruction *I, /// remove Factor from the tree and return the new tree. Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { BinaryOperator *BO = isReassociableOp(V, Instruction::Mul); - if (!BO) return 0; + if (!BO) return nullptr; SmallVector Tree; MadeChange |= LinearizeExprTree(BO, Tree); @@ -1029,7 +1030,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { if (!FoundFactor) { // Make sure to restore the operands to the expression tree. RewriteExprTree(BO, Factors); - return 0; + return nullptr; } BasicBlock::iterator InsertPt = BO; ++InsertPt; @@ -1114,7 +1115,7 @@ static Value *OptimizeAndOrXor(unsigned Opcode, ++NumAnnihil; } } - return 0; + return nullptr; } /// Helper funciton of CombineXorOpnd(). It creates a bitwise-and @@ -1135,7 +1136,7 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd, } return Opnd; } - return 0; + return nullptr; } // Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd" @@ -1261,7 +1262,7 @@ Value *Reassociate::OptimizeXor(Instruction *I, return V; if (Ops.size() == 1) - return 0; + return nullptr; SmallVector Opnds; SmallVector OpndPtrs; @@ -1294,7 +1295,7 @@ Value *Reassociate::OptimizeXor(Instruction *I, std::stable_sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor()); // Step 3: Combine adjacent operands - XorOpnd *PrevOpnd = 0; + XorOpnd *PrevOpnd = nullptr; bool Changed = false; for (unsigned i = 0, e = Opnds.size(); i < e; i++) { XorOpnd *CurrOpnd = OpndPtrs[i]; @@ -1328,7 +1329,7 @@ Value *Reassociate::OptimizeXor(Instruction *I, PrevOpnd = CurrOpnd; } else { CurrOpnd->Invalidate(); - PrevOpnd = 0; + PrevOpnd = nullptr; } Changed = true; } @@ -1358,7 +1359,7 @@ Value *Reassociate::OptimizeXor(Instruction *I, } } - return 0; + return nullptr; } /// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This @@ -1445,7 +1446,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4) // where they are actually the same multiply. unsigned MaxOcc = 0; - Value *MaxOccVal = 0; + Value *MaxOccVal = nullptr; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul); if (!BOp) @@ -1543,7 +1544,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2)); } - return 0; + return nullptr; } /// \brief Build up a vector of value/power pairs factoring a product. @@ -1688,14 +1689,14 @@ Value *Reassociate::OptimizeMul(BinaryOperator *I, // We can only optimize the multiplies when there is a chain of more than // three, such that a balanced tree might require fewer total multiplies. if (Ops.size() < 4) - return 0; + return nullptr; // Try to turn linear trees of multiplies without other uses of the // intermediate stages into minimal multiply DAGs with perfect sub-expression // re-use. SmallVector Factors; if (!collectMultiplyFactors(Ops, Factors)) - return 0; // All distinct factors, so nothing left for us to do. + return nullptr; // All distinct factors, so nothing left for us to do. IRBuilder<> Builder(I); Value *V = buildMinimalMultiplyDAG(Builder, Factors); @@ -1704,14 +1705,14 @@ Value *Reassociate::OptimizeMul(BinaryOperator *I, ValueEntry NewEntry = ValueEntry(getRank(V), V); Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry); - return 0; + return nullptr; } Value *Reassociate::OptimizeExpression(BinaryOperator *I, SmallVectorImpl &Ops) { // Now that we have the linearized expression tree, try to optimize it. // Start by folding any constants that we found. - Constant *Cst = 0; + Constant *Cst = nullptr; unsigned Opcode = I->getOpcode(); while (!Ops.empty() && isa(Ops.back().Op)) { Constant *C = cast(Ops.pop_back_val().Op); @@ -1761,7 +1762,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, if (Ops.size() != NumOps) return OptimizeExpression(I, Ops); - return 0; + return nullptr; } /// EraseInst - Zap the given instruction, adding interesting operands to the diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index d9809ce..b6023e2 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "reg2mem" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/BasicBlock.h" @@ -30,6 +29,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "reg2mem" + STATISTIC(NumRegsDemoted, "Number of registers demoted"); STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted"); diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index b8f10e9..feeb231 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -17,7 +17,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sccp" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -42,6 +41,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "sccp" + STATISTIC(NumInstRemoved, "Number of instructions removed"); STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable"); @@ -81,7 +82,7 @@ class LatticeVal { } public: - LatticeVal() : Val(0, undefined) {} + LatticeVal() : Val(nullptr, undefined) {} bool isUndefined() const { return getLatticeValue() == undefined; } bool isConstant() const { @@ -133,7 +134,7 @@ public: ConstantInt *getConstantInt() const { if (isConstant()) return dyn_cast(getConstant()); - return 0; + return nullptr; } void markForcedConstant(Constant *V) { @@ -403,7 +404,7 @@ private: if (Constant *C = dyn_cast(V)) { Constant *Elt = C->getAggregateElement(i); - if (Elt == 0) + if (!Elt) LV.markOverdefined(); // Unknown sort of constant. else if (isa(Elt)) ; // Undef values remain undefined. @@ -522,7 +523,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, LatticeVal BCValue = getValueState(BI->getCondition()); ConstantInt *CI = BCValue.getConstantInt(); - if (CI == 0) { + if (!CI) { // Overdefined condition variables, and branches on unfoldable constant // conditions, mean the branch could go either way. if (!BCValue.isUndefined()) @@ -549,7 +550,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI, LatticeVal SCValue = getValueState(SI->getCondition()); ConstantInt *CI = SCValue.getConstantInt(); - if (CI == 0) { // Overdefined or undefined condition? + if (!CI) { // Overdefined or undefined condition? // All destinations are executable! if (!SCValue.isUndefined()) Succs.assign(TI.getNumSuccessors(), true); @@ -594,7 +595,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { // Overdefined condition variables mean the branch could go either way, // undef conditions mean that neither edge is feasible yet. ConstantInt *CI = BCValue.getConstantInt(); - if (CI == 0) + if (!CI) return !BCValue.isUndefined(); // Constant condition variables mean the branch can only go a single way. @@ -612,7 +613,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { LatticeVal SCValue = getValueState(SI->getCondition()); ConstantInt *CI = SCValue.getConstantInt(); - if (CI == 0) + if (!CI) return !SCValue.isUndefined(); return SI->findCaseValue(CI).getCaseSuccessor() == To; @@ -626,7 +627,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) { #ifndef NDEBUG dbgs() << "Unknown terminator instruction: " << *TI << '\n'; #endif - llvm_unreachable(0); + llvm_unreachable(nullptr); } // visit Implementations - Something changed in this instruction, either an @@ -667,7 +668,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) { // constant. If they are constant and don't agree, the PHI is overdefined. // If there are no executable operands, the PHI remains undefined. // - Constant *OperandVal = 0; + Constant *OperandVal = nullptr; for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { LatticeVal IV = getValueState(PN.getIncomingValue(i)); if (IV.isUndefined()) continue; // Doesn't influence PHI node. @@ -678,7 +679,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) { if (IV.isOverdefined()) // PHI node becomes overdefined! return markOverdefined(&PN); - if (OperandVal == 0) { // Grab the first value. + if (!OperandVal) { // Grab the first value. OperandVal = IV.getConstant(); continue; } @@ -774,7 +775,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) { void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) { StructType *STy = dyn_cast(IVI.getType()); - if (STy == 0) + if (!STy) return markOverdefined(&IVI); // If this has more than one index, we can't handle it, drive all results to @@ -862,7 +863,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { // If this is an AND or OR with 0 or -1, it doesn't matter that the other // operand is overdefined. if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) { - LatticeVal *NonOverdefVal = 0; + LatticeVal *NonOverdefVal = nullptr; if (!V1State.isOverdefined()) NonOverdefVal = &V1State; else if (!V2State.isOverdefined()) @@ -1081,7 +1082,7 @@ void SCCPSolver::visitCallSite(CallSite CS) { // The common case is that we aren't tracking the callee, either because we // are not doing interprocedural analysis or the callee is indirect, or is // external. Handle these cases first. - if (F == 0 || F->isDeclaration()) { + if (!F || F->isDeclaration()) { CallOverdefined: // Void return and not tracking callee, just bail. if (I->getType()->isVoidTy()) return; @@ -1555,7 +1556,7 @@ bool SCCP::runOnFunction(Function &F) { DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n"); const DataLayoutPass *DLP = getAnalysisIfAvailable(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = &getAnalysis(); SCCPSolver Solver(DL, TLI); @@ -1684,7 +1685,7 @@ static bool AddressIsTaken(const GlobalValue *GV) { bool IPSCCP::runOnModule(Module &M) { DataLayoutPass *DLP = getAnalysisIfAvailable(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = &getAnalysis(); SCCPSolver Solver(DL, TLI); diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index ed5e618..04bf4f8 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -23,7 +23,6 @@ /// //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sroa" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -64,6 +63,8 @@ using namespace llvm; +#define DEBUG_TYPE "sroa" + STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement"); STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed"); STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca"); @@ -159,8 +160,8 @@ public: Use *getUse() const { return UseAndIsSplittable.getPointer(); } - bool isDead() const { return getUse() == 0; } - void kill() { UseAndIsSplittable.setPointer(0); } + bool isDead() const { return getUse() == nullptr; } + void kill() { UseAndIsSplittable.setPointer(nullptr); } /// \brief Support for ordering ranges. /// @@ -320,7 +321,7 @@ static Value *foldSelectInst(SelectInst &SI) { if (SI.getOperand(1) == SI.getOperand(2)) return SI.getOperand(1); - return 0; + return nullptr; } /// \brief Builder for the alloca slices. @@ -642,7 +643,7 @@ private: Uses.push_back(std::make_pair(I, cast(U))); } while (!Uses.empty()); - return 0; + return nullptr; } void visitPHINode(PHINode &PN) { @@ -724,7 +725,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) AI(AI), #endif - PointerEscapingInstr(0) { + PointerEscapingInstr(nullptr) { SliceBuilder PB(DL, AI, *this); SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI); if (PtrI.isEscaped() || PtrI.isAborted()) { @@ -873,7 +874,7 @@ public: for (SmallVectorImpl::const_iterator I = DVIs.begin(), E = DVIs.end(); I != E; ++I) { DbgValueInst *DVI = *I; - Value *Arg = 0; + Value *Arg = nullptr; if (StoreInst *SI = dyn_cast(Inst)) { // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. @@ -969,7 +970,7 @@ class SROA : public FunctionPass { public: SROA(bool RequiresDomTree = true) : FunctionPass(ID), RequiresDomTree(RequiresDomTree), - C(0), DL(0), DT(0) { + C(nullptr), DL(nullptr), DT(nullptr) { initializeSROAPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; @@ -1011,9 +1012,9 @@ INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates", static Type *findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset) { - Type *Ty = 0; + Type *Ty = nullptr; bool TyIsCommon = true; - IntegerType *ITy = 0; + IntegerType *ITy = nullptr; // Note that we need to look at *every* alloca slice's Use to ensure we // always get consistent results regardless of the order of slices. @@ -1024,7 +1025,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B, if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset) continue; - Type *UserTy = 0; + Type *UserTy = nullptr; if (LoadInst *LI = dyn_cast(U->getUser())) { UserTy = LI->getType(); } else if (StoreInst *SI = dyn_cast(U->getUser())) { @@ -1074,7 +1075,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B, /// FIXME: This should be hoisted into a generic utility, likely in /// Transforms/Util/Local.h static bool isSafePHIToSpeculate(PHINode &PN, - const DataLayout *DL = 0) { + const DataLayout *DL = nullptr) { // For now, we can only do this promotion if the load is in the same block // as the PHI, and if there are no stores between the phi and load. // TODO: Allow recursive phi users. @@ -1084,7 +1085,7 @@ static bool isSafePHIToSpeculate(PHINode &PN, bool HaveLoad = false; for (User *U : PN.users()) { LoadInst *LI = dyn_cast(U); - if (LI == 0 || !LI->isSimple()) + if (!LI || !LI->isSimple()) return false; // For now we only allow loads in the same block as the PHI. This is @@ -1191,7 +1192,8 @@ static void speculatePHINodeLoads(PHINode &PN) { /// /// We can do this to a select if its only uses are loads and if the operand /// to the select can be loaded unconditionally. -static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = 0) { +static bool isSafeSelectToSpeculate(SelectInst &SI, + const DataLayout *DL = nullptr) { Value *TValue = SI.getTrueValue(); Value *FValue = SI.getFalseValue(); bool TDerefable = TValue->isDereferenceablePointer(); @@ -1199,7 +1201,7 @@ static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = 0) { for (User *U : SI.users()) { LoadInst *LI = dyn_cast(U); - if (LI == 0 || !LI->isSimple()) + if (!LI || !LI->isSimple()) return false; // Both operands to the select need to be dereferencable, either @@ -1332,19 +1334,21 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, // We can't recurse through pointer types. if (Ty->isPointerTy()) - return 0; + return nullptr; // We try to analyze GEPs over vectors here, but note that these GEPs are // extremely poorly defined currently. The long-term goal is to remove GEPing // over a vector from the IR completely. if (VectorType *VecTy = dyn_cast(Ty)) { unsigned ElementSizeInBits = DL.getTypeSizeInBits(VecTy->getScalarType()); - if (ElementSizeInBits % 8) - return 0; // GEPs over non-multiple of 8 size vector elements are invalid. + if (ElementSizeInBits % 8 != 0) { + // GEPs over non-multiple of 8 size vector elements are invalid. + return nullptr; + } APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8); APInt NumSkippedElements = Offset.sdiv(ElementSize); if (NumSkippedElements.ugt(VecTy->getNumElements())) - return 0; + return nullptr; Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(), @@ -1356,7 +1360,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy)); APInt NumSkippedElements = Offset.sdiv(ElementSize); if (NumSkippedElements.ugt(ArrTy->getNumElements())) - return 0; + return nullptr; Offset -= NumSkippedElements * ElementSize; Indices.push_back(IRB.getInt(NumSkippedElements)); @@ -1366,17 +1370,17 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, StructType *STy = dyn_cast(Ty); if (!STy) - return 0; + return nullptr; const StructLayout *SL = DL.getStructLayout(STy); uint64_t StructOffset = Offset.getZExtValue(); if (StructOffset >= SL->getSizeInBytes()) - return 0; + return nullptr; unsigned Index = SL->getElementContainingOffset(StructOffset); Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index)); Type *ElementTy = STy->getElementType(Index); if (Offset.uge(DL.getTypeAllocSize(ElementTy))) - return 0; // The offset points into alignment padding. + return nullptr; // The offset points into alignment padding. Indices.push_back(IRB.getInt32(Index)); return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy, @@ -1402,14 +1406,14 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL, // Don't consider any GEPs through an i8* as natural unless the TargetTy is // an i8. if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8)) - return 0; + return nullptr; Type *ElementTy = Ty->getElementType(); if (!ElementTy->isSized()) - return 0; // We can't GEP through an unsized element. + return nullptr; // We can't GEP through an unsized element. APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy)); if (ElementSize == 0) - return 0; // Zero-length arrays can't help us build a natural GEP. + return nullptr; // Zero-length arrays can't help us build a natural GEP. APInt NumSkippedElements = Offset.sdiv(ElementSize); Offset -= NumSkippedElements * ElementSize; @@ -1445,11 +1449,11 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, // We may end up computing an offset pointer that has the wrong type. If we // never are able to compute one directly that has the correct type, we'll // fall back to it, so keep it around here. - Value *OffsetPtr = 0; + Value *OffsetPtr = nullptr; // Remember any i8 pointer we come across to re-use if we need to do a raw // byte offset. - Value *Int8Ptr = 0; + Value *Int8Ptr = nullptr; APInt Int8PtrOffset(Offset.getBitWidth(), 0); Type *TargetTy = PointerTy->getPointerElementType(); @@ -2043,14 +2047,14 @@ public: NewAllocaBeginOffset(NewAllocaBeginOffset), NewAllocaEndOffset(NewAllocaEndOffset), NewAllocaTy(NewAI.getAllocatedType()), - VecTy(IsVectorPromotable ? cast(NewAllocaTy) : 0), - ElementTy(VecTy ? VecTy->getElementType() : 0), + VecTy(IsVectorPromotable ? cast(NewAllocaTy) : nullptr), + ElementTy(VecTy ? VecTy->getElementType() : nullptr), ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0), IntTy(IsIntegerPromotable ? Type::getIntNTy( NewAI.getContext(), DL.getTypeSizeInBits(NewAI.getAllocatedType())) - : 0), + : nullptr), BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(), OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers), IRB(NewAI.getContext(), ConstantFolder()) { @@ -2144,7 +2148,7 @@ private: /// /// You can optionally pass a type to this routine and if that type's ABI /// alignment is itself suitable, this will return zero. - unsigned getSliceAlign(Type *Ty = 0) { + unsigned getSliceAlign(Type *Ty = nullptr) { unsigned NewAIAlign = NewAI.getAlignment(); if (!NewAIAlign) NewAIAlign = DL.getABITypeAlignment(NewAI.getAllocatedType()); @@ -2594,7 +2598,7 @@ private: unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0; unsigned NumElements = EndIndex - BeginIndex; IntegerType *SubIntTy - = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0; + = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : nullptr; // Reset the other pointer type to match the register type we're going to // use, but using the address space of the original other pointer. @@ -2992,22 +2996,22 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, return stripAggregateTypeWrapping(DL, Ty); if (Offset > DL.getTypeAllocSize(Ty) || (DL.getTypeAllocSize(Ty) - Offset) < Size) - return 0; + return nullptr; if (SequentialType *SeqTy = dyn_cast(Ty)) { // We can't partition pointers... if (SeqTy->isPointerTy()) - return 0; + return nullptr; Type *ElementTy = SeqTy->getElementType(); uint64_t ElementSize = DL.getTypeAllocSize(ElementTy); uint64_t NumSkippedElements = Offset / ElementSize; if (ArrayType *ArrTy = dyn_cast(SeqTy)) { if (NumSkippedElements >= ArrTy->getNumElements()) - return 0; + return nullptr; } else if (VectorType *VecTy = dyn_cast(SeqTy)) { if (NumSkippedElements >= VecTy->getNumElements()) - return 0; + return nullptr; } Offset -= NumSkippedElements * ElementSize; @@ -3015,7 +3019,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, if (Offset > 0 || Size < ElementSize) { // Bail if the partition ends in a different array element. if ((Offset + Size) > ElementSize) - return 0; + return nullptr; // Recurse through the element type trying to peel off offset bytes. return getTypePartition(DL, ElementTy, Offset, Size); } @@ -3026,20 +3030,20 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, assert(Size > ElementSize); uint64_t NumElements = Size / ElementSize; if (NumElements * ElementSize != Size) - return 0; + return nullptr; return ArrayType::get(ElementTy, NumElements); } StructType *STy = dyn_cast(Ty); if (!STy) - return 0; + return nullptr; const StructLayout *SL = DL.getStructLayout(STy); if (Offset >= SL->getSizeInBytes()) - return 0; + return nullptr; uint64_t EndOffset = Offset + Size; if (EndOffset > SL->getSizeInBytes()) - return 0; + return nullptr; unsigned Index = SL->getElementContainingOffset(Offset); Offset -= SL->getElementOffset(Index); @@ -3047,12 +3051,12 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, Type *ElementTy = STy->getElementType(Index); uint64_t ElementSize = DL.getTypeAllocSize(ElementTy); if (Offset >= ElementSize) - return 0; // The offset points into alignment padding. + return nullptr; // The offset points into alignment padding. // See if any partition must be contained by the element. if (Offset > 0 || Size < ElementSize) { if ((Offset + Size) > ElementSize) - return 0; + return nullptr; return getTypePartition(DL, ElementTy, Offset, Size); } assert(Offset == 0); @@ -3065,14 +3069,14 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, if (EndOffset < SL->getSizeInBytes()) { unsigned EndIndex = SL->getElementContainingOffset(EndOffset); if (Index == EndIndex) - return 0; // Within a single element and its padding. + return nullptr; // Within a single element and its padding. // Don't try to form "natural" types if the elements don't line up with the // expected size. // FIXME: We could potentially recurse down through the last element in the // sub-struct to find a natural end point. if (SL->getElementOffset(EndIndex) != EndOffset) - return 0; + return nullptr; assert(Index < EndIndex); EE = STy->element_begin() + EndIndex; @@ -3083,7 +3087,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, STy->isPacked()); const StructLayout *SubSL = DL.getStructLayout(SubTy); if (Size != SubSL->getSizeInBytes()) - return 0; // The sub-struct doesn't have quite the size needed. + return nullptr; // The sub-struct doesn't have quite the size needed. return SubTy; } @@ -3108,7 +3112,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S, // Try to compute a friendly type for this partition of the alloca. This // won't always succeed, in which case we fall back to a legal integer type // or an i8 array of an appropriate size. - Type *SliceTy = 0; + Type *SliceTy = nullptr; if (Type *CommonUseTy = findCommonType(B, E, EndOffset)) if (DL->getTypeAllocSize(CommonUseTy) >= SliceSize) SliceTy = CommonUseTy; @@ -3155,7 +3159,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S, // the alloca's alignment unconstrained. if (Alignment <= DL->getABITypeAlignment(SliceTy)) Alignment = 0; - NewAI = new AllocaInst(SliceTy, 0, Alignment, + NewAI = new AllocaInst(SliceTy, nullptr, Alignment, AI.getName() + ".sroa." + Twine(B - S.begin()), &AI); ++NumNewAllocas; } @@ -3494,7 +3498,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet &DeletedAllocas) { for (Use &Operand : I->operands()) if (Instruction *U = dyn_cast(Operand)) { // Zero out the operand and see if it becomes trivially dead. - Operand = 0; + Operand = nullptr; if (isInstructionTriviallyDead(U)) DeadInsts.insert(U); } @@ -3612,7 +3616,7 @@ bool SROA::runOnFunction(Function &F) { DL = &DLP->getDataLayout(); DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); - DT = DTWP ? &DTWP->getDomTree() : 0; + DT = DTWP ? &DTWP->getDomTree() : nullptr; BasicBlock &EntryBB = F.getEntryBlock(); for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end()); diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp index 20d6daa..8e557aa 100644 --- a/lib/Transforms/Scalar/SampleProfile.cpp +++ b/lib/Transforms/Scalar/SampleProfile.cpp @@ -22,8 +22,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sample-profile" - #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" @@ -54,6 +52,8 @@ using namespace llvm; +#define DEBUG_TYPE "sample-profile" + // Command line option to specify the file to read samples from. This is // mainly used for debugging. static cl::opt SampleProfileFile( @@ -120,8 +120,8 @@ typedef DenseMap> BlockEdgeMap; class SampleFunctionProfile { public: SampleFunctionProfile() - : TotalSamples(0), TotalHeadSamples(0), HeaderLineno(0), DT(0), PDT(0), - LI(0), Ctx(0) {} + : TotalSamples(0), TotalHeadSamples(0), HeaderLineno(0), DT(nullptr), + PDT(nullptr), LI(nullptr), Ctx(nullptr) {} unsigned getFunctionLoc(Function &F); bool emitAnnotations(Function &F, DominatorTree *DomTree, @@ -315,7 +315,7 @@ protected: /// \brief Name of the profile file to load. StringRef Filename; - /// \brief Flag indicating whether the profile input loaded succesfully. + /// \brief Flag indicating whether the profile input loaded successfully. bool ProfileIsValid; }; } diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index e950eba..f8f828c 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -64,6 +64,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeStructurizeCFGPass(Registry); initializeSinkingPass(Registry); initializeTailCallElimPass(Registry); + initializeSeparateConstOffsetFromGEPPass(Registry); } void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) { @@ -181,6 +182,7 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) { void LLVMAddVerifierPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createVerifierPass()); + // FIXME: should this also add createDebugInfoVerifierPass()? } void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) { diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index e7b5ab2..58192fc 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -19,7 +19,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "scalarrepl" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" @@ -52,6 +51,8 @@ #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; +#define DEBUG_TYPE "scalarrepl" + STATISTIC(NumReplaced, "Number of allocas broken up"); STATISTIC(NumPromoted, "Number of allocas promoted"); STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion"); @@ -304,7 +305,7 @@ public: explicit ConvertToScalarInfo(unsigned Size, const DataLayout &DL, unsigned SLT) : AllocaSize(Size), DL(DL), ScalarLoadThreshold(SLT), IsNotTrivial(false), - ScalarKind(Unknown), VectorTy(0), HadNonMemTransferAccess(false), + ScalarKind(Unknown), VectorTy(nullptr), HadNonMemTransferAccess(false), HadDynamicAccess(false) { } AllocaInst *TryConvert(AllocaInst *AI); @@ -332,8 +333,8 @@ private: AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // If we can't convert this scalar, or if mem2reg can trivially do it, bail // out. - if (!CanConvertToScalar(AI, 0, 0) || !IsNotTrivial) - return 0; + if (!CanConvertToScalar(AI, 0, nullptr) || !IsNotTrivial) + return nullptr; // If an alloca has only memset / memcpy uses, it may still have an Unknown // ScalarKind. Treat it as an Integer below. @@ -361,23 +362,24 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { // Do not convert to scalar integer if the alloca size exceeds the // scalar load threshold. if (BitWidth > ScalarLoadThreshold) - return 0; + return nullptr; if ((ScalarKind == ImplicitVector || ScalarKind == Integer) && !HadNonMemTransferAccess && !DL.fitsInLegalInteger(BitWidth)) - return 0; + return nullptr; // Dynamic accesses on integers aren't yet supported. They need us to shift // by a dynamic amount which could be difficult to work out as we might not // know whether to use a left or right shift. if (ScalarKind == Integer && HadDynamicAccess) - return 0; + return nullptr; DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. NewTy = IntegerType::get(AI->getContext(), BitWidth); } - AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); - ConvertUsesToScalar(AI, NewAI, 0, 0); + AllocaInst *NewAI = new AllocaInst(NewTy, nullptr, "", + AI->getParent()->begin()); + ConvertUsesToScalar(AI, NewAI, 0, nullptr); return NewAI; } @@ -508,7 +510,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset, // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - Value *GEPNonConstantIdx = 0; + Value *GEPNonConstantIdx = nullptr; if (!GEP->hasAllConstantIndices()) { if (!isa(PtrTy->getElementType())) return false; @@ -564,7 +566,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset, if (NonConstantIdx) return false; ConstantInt *Len = dyn_cast(MTI->getLength()); - if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0) + if (!Len || Len->getZExtValue() != AllocaSize || Offset != 0) return false; IsNotTrivial = true; // Can't be mem2reg'd. @@ -608,7 +610,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, if (GetElementPtrInst *GEP = dyn_cast(User)) { // Compute the offset that this GEP adds to the pointer. SmallVector Indices(GEP->op_begin()+1, GEP->op_end()); - Value* GEPNonConstantIdx = 0; + Value* GEPNonConstantIdx = nullptr; if (!GEP->hasAllConstantIndices()) { assert(!NonConstantIdx && "Dynamic GEP reading from dynamic GEP unsupported"); @@ -671,7 +673,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); Value *New = ConvertScalar_InsertValue( ConstantInt::get(User->getContext(), APVal), - Old, Offset, 0, Builder); + Old, Offset, nullptr, Builder); Builder.CreateStore(New, NewAI); // If the load we just inserted is now dead, then the memset overwrote @@ -809,7 +811,7 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), Offset+Layout.getElementOffsetInBits(i), - 0, Builder); + nullptr, Builder); Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; @@ -822,7 +824,8 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType, Value *Res = UndefValue::get(AT); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), - Offset+i*EltSize, 0, Builder); + Offset+i*EltSize, nullptr, + Builder); Res = Builder.CreateInsertValue(Res, Elt, i); } return Res; @@ -938,7 +941,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, Value *Elt = Builder.CreateExtractValue(SV, i); Old = ConvertScalar_InsertValue(Elt, Old, Offset+Layout.getElementOffsetInBits(i), - 0, Builder); + nullptr, Builder); } return Old; } @@ -949,7 +952,8 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType()); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { Value *Elt = Builder.CreateExtractValue(SV, i); - Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, 0, Builder); + Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, nullptr, + Builder); } return Old; } @@ -1024,7 +1028,7 @@ bool SROA::runOnFunction(Function &F) { return false; DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; bool Changed = performPromotion(F); @@ -1054,7 +1058,7 @@ class AllocaPromoter : public LoadAndStorePromoter { public: AllocaPromoter(const SmallVectorImpl &Insts, SSAUpdater &S, DIBuilder *DB) - : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {} + : LoadAndStorePromoter(Insts, S), AI(nullptr), DIB(DB) {} void run(AllocaInst *AI, const SmallVectorImpl &Insts) { // Remember which alloca we're promoting (for isInstInList). @@ -1100,7 +1104,7 @@ public: for (SmallVectorImpl::const_iterator I = DVIs.begin(), E = DVIs.end(); I != E; ++I) { DbgValueInst *DVI = *I; - Value *Arg = NULL; + Value *Arg = nullptr; if (StoreInst *SI = dyn_cast(Inst)) { // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. @@ -1143,7 +1147,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) { for (User *U : SI->users()) { LoadInst *LI = dyn_cast(U); - if (LI == 0 || !LI->isSimple()) return false; + if (!LI || !LI->isSimple()) return false; // Both operands to the select need to be dereferencable, either absolutely // (e.g. allocas) or at this point because we can see other accesses to it. @@ -1183,7 +1187,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) { unsigned MaxAlign = 0; for (User *U : PN->users()) { LoadInst *LI = dyn_cast(U); - if (LI == 0 || !LI->isSimple()) return false; + if (!LI || !LI->isSimple()) return false; // For now we only allow loads in the same block as the PHI. This is a // common case that happens when instcombine merges two loads through a PHI. @@ -1380,7 +1384,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *Pred = PN->getIncomingBlock(i); LoadInst *&Load = InsertedLoads[Pred]; - if (Load == 0) { + if (!Load) { Load = new LoadInst(PN->getIncomingValue(i), PN->getName() + "." + Pred->getName(), Pred->getTerminator()); @@ -1400,7 +1404,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) { bool SROA::performPromotion(Function &F) { std::vector Allocas; - DominatorTree *DT = 0; + DominatorTree *DT = nullptr; if (HasDomTree) DT = &getAnalysis().getDomTree(); @@ -1537,7 +1541,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI, if (StructType *ST = dyn_cast(AI->getAllocatedType())) { ElementAllocas.reserve(ST->getNumContainedTypes()); for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { - AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, + AllocaInst *NA = new AllocaInst(ST->getContainedType(i), nullptr, AI->getAlignment(), AI->getName() + "." + Twine(i), AI); ElementAllocas.push_back(NA); @@ -1548,7 +1552,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI, ElementAllocas.reserve(AT->getNumElements()); Type *ElTy = AT->getElementType(); for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { - AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), + AllocaInst *NA = new AllocaInst(ElTy, nullptr, AI->getAlignment(), AI->getName() + "." + Twine(i), AI); ElementAllocas.push_back(NA); WorkList.push_back(NA); // Add to worklist for recursive processing @@ -1577,7 +1581,7 @@ void SROA::DeleteDeadInstructions() { // Zero out the operand and see if it becomes trivially dead. // (But, don't add allocas to the dead instruction list -- they are // already on the worklist and will be deleted separately.) - *OI = 0; + *OI = nullptr; if (isInstructionTriviallyDead(U) && !isa(U)) DeadInsts.push_back(U); } @@ -1604,12 +1608,10 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, isSafeForScalarRepl(GEPI, GEPOffset, Info); } else if (MemIntrinsic *MI = dyn_cast(User)) { ConstantInt *Length = dyn_cast(MI->getLength()); - if (Length == 0) - return MarkUnsafe(Info, User); - if (Length->isNegative()) + if (!Length || Length->isNegative()) return MarkUnsafe(Info, User); - isSafeMemAccess(Offset, Length->getZExtValue(), 0, + isSafeMemAccess(Offset, Length->getZExtValue(), nullptr, U.getOperandNo() == 0, Info, MI, true /*AllowWholeAccess*/); } else if (LoadInst *LI = dyn_cast(User)) { @@ -1744,12 +1746,12 @@ static bool isHomogeneousAggregate(Type *T, unsigned &NumElts, Type *&EltTy) { if (ArrayType *AT = dyn_cast(T)) { NumElts = AT->getNumElements(); - EltTy = (NumElts == 0 ? 0 : AT->getElementType()); + EltTy = (NumElts == 0 ? nullptr : AT->getElementType()); return true; } if (StructType *ST = dyn_cast(T)) { NumElts = ST->getNumContainedTypes(); - EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0)); + EltTy = (NumElts == 0 ? nullptr : ST->getContainedType(0)); for (unsigned n = 1; n < NumElts; ++n) { if (ST->getContainedType(n) != EltTy) return false; @@ -2038,7 +2040,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, // In this case, it must be the last GEP operand which is dynamic so keep that // aside until we've found the constant GEP offset then add it back in at the // end. - Value* NonConstantIdx = 0; + Value* NonConstantIdx = nullptr; if (!GEPI->hasAllConstantIndices()) NonConstantIdx = Indices.pop_back_val(); Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices); @@ -2108,7 +2110,8 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, if (NewOffset) { // Splice the first element and index 'NewOffset' bytes in. SROA will // split the alloca again later. - Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy()); + unsigned AS = AI->getType()->getAddressSpace(); + Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS)); V = Builder.CreateGEP(V, Builder.getInt64(NewOffset)); IdxTy = NewElts[Idx]->getAllocatedType(); @@ -2155,7 +2158,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // appropriate type. The "Other" pointer is the pointer that goes to memory // that doesn't have anything to do with the alloca that we are promoting. For // memset, this Value* stays null. - Value *OtherPtr = 0; + Value *OtherPtr = nullptr; unsigned MemAlignment = MI->getAlignment(); if (MemTransferInst *MTI = dyn_cast(MI)) { // memmove/memcopy if (Inst == MTI->getRawDest()) @@ -2207,7 +2210,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { // If this is a memcpy/memmove, emit a GEP of the other element address. - Value *OtherElt = 0; + Value *OtherElt = nullptr; unsigned OtherEltAlign = MemAlignment; if (OtherPtr) { @@ -2449,7 +2452,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, // There are two forms here: AI could be an array or struct. Both cases // have different ways to compute the element offset. - const StructLayout *Layout = 0; + const StructLayout *Layout = nullptr; uint64_t ArrayEltBitOffset = 0; if (StructType *EltSTy = dyn_cast(AllocaEltTy)) { Layout = DL->getStructLayout(EltSTy); diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp index 006375c..7a73f11 100644 --- a/lib/Transforms/Scalar/Scalarizer.cpp +++ b/lib/Transforms/Scalar/Scalarizer.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "scalarizer" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" @@ -25,6 +24,8 @@ using namespace llvm; +#define DEBUG_TYPE "scalarizer" + namespace { // Used to store the scattered form of a vector. typedef SmallVector ValueVector; @@ -48,7 +49,7 @@ public: // insert them before BBI in BB. If Cache is nonnull, use it to cache // the results. Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, - ValueVector *cachePtr = 0); + ValueVector *cachePtr = nullptr); // Return component I, creating a new Value for it if necessary. Value *operator[](unsigned I); @@ -101,7 +102,7 @@ struct BinarySplitter { // Information about a load or store that we're scalarizing. struct VectorLayout { - VectorLayout() : VecTy(0), ElemTy(0), VecAlign(0), ElemSize(0) {} + VectorLayout() : VecTy(nullptr), ElemTy(nullptr), VecAlign(0), ElemSize(0) {} // Return the alignment of element I. uint64_t getElemAlign(unsigned I) { @@ -186,9 +187,9 @@ Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, Ty = PtrTy->getElementType(); Size = Ty->getVectorNumElements(); if (!CachePtr) - Tmp.resize(Size, 0); + Tmp.resize(Size, nullptr); else if (CachePtr->empty()) - CachePtr->resize(Size, 0); + CachePtr->resize(Size, nullptr); else assert(Size == CachePtr->size() && "Inconsistent vector sizes"); } @@ -241,7 +242,7 @@ bool Scalarizer::doInitialization(Module &M) { bool Scalarizer::runOnFunction(Function &F) { DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) { BasicBlock *BB = BBI; for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp new file mode 100644 index 0000000..b8529e1 --- /dev/null +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -0,0 +1,623 @@ +//===-- SeparateConstOffsetFromGEP.cpp - ------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loop unrolling may create many similar GEPs for array accesses. +// e.g., a 2-level loop +// +// float a[32][32]; // global variable +// +// for (int i = 0; i < 2; ++i) { +// for (int j = 0; j < 2; ++j) { +// ... +// ... = a[x + i][y + j]; +// ... +// } +// } +// +// will probably be unrolled to: +// +// gep %a, 0, %x, %y; load +// gep %a, 0, %x, %y + 1; load +// gep %a, 0, %x + 1, %y; load +// gep %a, 0, %x + 1, %y + 1; load +// +// LLVM's GVN does not use partial redundancy elimination yet, and is thus +// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs +// significant slowdown in targets with limited addressing modes. For instance, +// because the PTX target does not support the reg+reg addressing mode, the +// NVPTX backend emits PTX code that literally computes the pointer address of +// each GEP, wasting tons of registers. It emits the following PTX for the +// first load and similar PTX for other loads. +// +// mov.u32 %r1, %x; +// mov.u32 %r2, %y; +// mul.wide.u32 %rl2, %r1, 128; +// mov.u64 %rl3, a; +// add.s64 %rl4, %rl3, %rl2; +// mul.wide.u32 %rl5, %r2, 4; +// add.s64 %rl6, %rl4, %rl5; +// ld.global.f32 %f1, [%rl6]; +// +// To reduce the register pressure, the optimization implemented in this file +// merges the common part of a group of GEPs, so we can compute each pointer +// address by adding a simple offset to the common part, saving many registers. +// +// It works by splitting each GEP into a variadic base and a constant offset. +// The variadic base can be computed once and reused by multiple GEPs, and the +// constant offsets can be nicely folded into the reg+immediate addressing mode +// (supported by most targets) without using any extra register. +// +// For instance, we transform the four GEPs and four loads in the above example +// into: +// +// base = gep a, 0, x, y +// load base +// laod base + 1 * sizeof(float) +// load base + 32 * sizeof(float) +// load base + 33 * sizeof(float) +// +// Given the transformed IR, a backend that supports the reg+immediate +// addressing mode can easily fold the pointer arithmetics into the loads. For +// example, the NVPTX backend can easily fold the pointer arithmetics into the +// ld.global.f32 instructions, and the resultant PTX uses much fewer registers. +// +// mov.u32 %r1, %tid.x; +// mov.u32 %r2, %tid.y; +// mul.wide.u32 %rl2, %r1, 128; +// mov.u64 %rl3, a; +// add.s64 %rl4, %rl3, %rl2; +// mul.wide.u32 %rl5, %r2, 4; +// add.s64 %rl6, %rl4, %rl5; +// ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX +// ld.global.f32 %f2, [%rl6+4]; // much better +// ld.global.f32 %f3, [%rl6+128]; // much better +// ld.global.f32 %f4, [%rl6+132]; // much better +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +static cl::opt DisableSeparateConstOffsetFromGEP( + "disable-separate-const-offset-from-gep", cl::init(false), + cl::desc("Do not separate the constant offset from a GEP instruction"), + cl::Hidden); + +namespace { + +/// \brief A helper class for separating a constant offset from a GEP index. +/// +/// In real programs, a GEP index may be more complicated than a simple addition +/// of something and a constant integer which can be trivially splitted. For +/// example, to split ((a << 3) | 5) + b, we need to search deeper for the +/// constant offset, so that we can separate the index to (a << 3) + b and 5. +/// +/// Therefore, this class looks into the expression that computes a given GEP +/// index, and tries to find a constant integer that can be hoisted to the +/// outermost level of the expression as an addition. Not every constant in an +/// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a + +/// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case, +/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15). +class ConstantOffsetExtractor { + public: + /// Extracts a constant offset from the given GEP index. It outputs the + /// numeric value of the extracted constant offset (0 if failed), and a + /// new index representing the remainder (equal to the original index minus + /// the constant offset). + /// \p Idx The given GEP index + /// \p NewIdx The new index to replace + /// \p DL The datalayout of the module + /// \p IP Calculating the new index requires new instructions. IP indicates + /// where to insert them (typically right before the GEP). + static int64_t Extract(Value *Idx, Value *&NewIdx, const DataLayout *DL, + Instruction *IP); + /// Looks for a constant offset without extracting it. The meaning of the + /// arguments and the return value are the same as Extract. + static int64_t Find(Value *Idx, const DataLayout *DL); + + private: + ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt) + : DL(Layout), IP(InsertionPt) {} + /// Searches the expression that computes V for a constant offset. If the + /// searching is successful, update UserChain as a path from V to the constant + /// offset. + int64_t find(Value *V); + /// A helper function to look into both operands of a binary operator U. + /// \p IsSub Whether U is a sub operator. If so, we need to negate the + /// constant offset at some point. + int64_t findInEitherOperand(User *U, bool IsSub); + /// After finding the constant offset and how it is reached from the GEP + /// index, we build a new index which is a clone of the old one except the + /// constant offset is removed. For example, given (a + (b + 5)) and knowning + /// the constant offset is 5, this function returns (a + b). + /// + /// We cannot simply change the constant to zero because the expression that + /// computes the index or its intermediate result may be used by others. + Value *rebuildWithoutConstantOffset(); + // A helper function for rebuildWithoutConstantOffset that rebuilds the direct + // user (U) of the constant offset (C). + Value *rebuildLeafWithoutConstantOffset(User *U, Value *C); + /// Returns a clone of U except the first occurrence of From with To. + Value *cloneAndReplace(User *U, Value *From, Value *To); + + /// Returns true if LHS and RHS have no bits in common, i.e., LHS | RHS == 0. + bool NoCommonBits(Value *LHS, Value *RHS) const; + /// Computes which bits are known to be one or zero. + /// \p KnownOne Mask of all bits that are known to be one. + /// \p KnownZero Mask of all bits that are known to be zero. + void ComputeKnownBits(Value *V, APInt &KnownOne, APInt &KnownZero) const; + /// Finds the first use of Used in U. Returns -1 if not found. + static unsigned FindFirstUse(User *U, Value *Used); + /// Returns whether OPC (sext or zext) can be distributed to the operands of + /// BO. e.g., sext can be distributed to the operands of an "add nsw" because + /// sext (add nsw a, b) == add nsw (sext a), (sext b). + static bool Distributable(unsigned OPC, BinaryOperator *BO); + + /// The path from the constant offset to the old GEP index. e.g., if the GEP + /// index is "a * b + (c + 5)". After running function find, UserChain[0] will + /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and + /// UserChain[2] will be the entire expression "a * b + (c + 5)". + /// + /// This path helps rebuildWithoutConstantOffset rebuild the new GEP index. + SmallVector UserChain; + /// The data layout of the module. Used in ComputeKnownBits. + const DataLayout *DL; + Instruction *IP; /// Insertion position of cloned instructions. +}; + +/// \brief A pass that tries to split every GEP in the function into a variadic +/// base and a constant offset. It is a FunctionPass because searching for the +/// constant offset may inspect other basic blocks. +class SeparateConstOffsetFromGEP : public FunctionPass { + public: + static char ID; + SeparateConstOffsetFromGEP() : FunctionPass(ID) { + initializeSeparateConstOffsetFromGEPPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + } + bool runOnFunction(Function &F) override; + + private: + /// Tries to split the given GEP into a variadic base and a constant offset, + /// and returns true if the splitting succeeds. + bool splitGEP(GetElementPtrInst *GEP); + /// Finds the constant offset within each index, and accumulates them. This + /// function only inspects the GEP without changing it. The output + /// NeedsExtraction indicates whether we can extract a non-zero constant + /// offset from any index. + int64_t accumulateByteOffset(GetElementPtrInst *GEP, const DataLayout *DL, + bool &NeedsExtraction); +}; +} // anonymous namespace + +char SeparateConstOffsetFromGEP::ID = 0; +INITIALIZE_PASS_BEGIN( + SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", + "Split GEPs to a variadic base and a constant offset for better CSE", false, + false) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_DEPENDENCY(DataLayoutPass) +INITIALIZE_PASS_END( + SeparateConstOffsetFromGEP, "separate-const-offset-from-gep", + "Split GEPs to a variadic base and a constant offset for better CSE", false, + false) + +FunctionPass *llvm::createSeparateConstOffsetFromGEPPass() { + return new SeparateConstOffsetFromGEP(); +} + +bool ConstantOffsetExtractor::Distributable(unsigned OPC, BinaryOperator *BO) { + assert(OPC == Instruction::SExt || OPC == Instruction::ZExt); + + // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B) + // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B) + if (BO->getOpcode() == Instruction::Add || + BO->getOpcode() == Instruction::Sub) { + return (OPC == Instruction::SExt && BO->hasNoSignedWrap()) || + (OPC == Instruction::ZExt && BO->hasNoUnsignedWrap()); + } + + // sext/zext (and/or/xor A, B) == and/or/xor (sext/zext A), (sext/zext B) + // -instcombine also leverages this invariant to do the reverse + // transformation to reduce integer casts. + return BO->getOpcode() == Instruction::And || + BO->getOpcode() == Instruction::Or || + BO->getOpcode() == Instruction::Xor; +} + +int64_t ConstantOffsetExtractor::findInEitherOperand(User *U, bool IsSub) { + assert(U->getNumOperands() == 2); + int64_t ConstantOffset = find(U->getOperand(0)); + // If we found a constant offset in the left operand, stop and return that. + // This shortcut might cause us to miss opportunities of combining the + // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9. + // However, such cases are probably already handled by -instcombine, + // given this pass runs after the standard optimizations. + if (ConstantOffset != 0) return ConstantOffset; + ConstantOffset = find(U->getOperand(1)); + // If U is a sub operator, negate the constant offset found in the right + // operand. + return IsSub ? -ConstantOffset : ConstantOffset; +} + +int64_t ConstantOffsetExtractor::find(Value *V) { + // TODO(jingyue): We can even trace into integer/pointer casts, such as + // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only + // integers because it gives good enough results for our benchmarks. + assert(V->getType()->isIntegerTy()); + + User *U = dyn_cast(V); + // We cannot do much with Values that are not a User, such as BasicBlock and + // MDNode. + if (U == nullptr) return 0; + + int64_t ConstantOffset = 0; + if (ConstantInt *CI = dyn_cast(U)) { + // Hooray, we found it! + ConstantOffset = CI->getSExtValue(); + } else if (Operator *O = dyn_cast(U)) { + // The GEP index may be more complicated than a simple addition of a + // varaible and a constant. Therefore, we trace into subexpressions for more + // hoisting opportunities. + switch (O->getOpcode()) { + case Instruction::Add: { + ConstantOffset = findInEitherOperand(U, false); + break; + } + case Instruction::Sub: { + ConstantOffset = findInEitherOperand(U, true); + break; + } + case Instruction::Or: { + // If LHS and RHS don't have common bits, (LHS | RHS) is equivalent to + // (LHS + RHS). + if (NoCommonBits(U->getOperand(0), U->getOperand(1))) + ConstantOffset = findInEitherOperand(U, false); + break; + } + case Instruction::SExt: + case Instruction::ZExt: { + // We trace into sext/zext if the operator can be distributed to its + // operand. e.g., we can transform into "sext (add nsw a, 5)" and + // extract constant 5, because + // sext (add nsw a, 5) == add nsw (sext a), 5 + if (BinaryOperator *BO = dyn_cast(U->getOperand(0))) { + if (Distributable(O->getOpcode(), BO)) + ConstantOffset = find(U->getOperand(0)); + } + break; + } + } + } + // If we found a non-zero constant offset, adds it to the path for future + // transformation (rebuildWithoutConstantOffset). Zero is a valid constant + // offset, but doesn't help this optimization. + if (ConstantOffset != 0) + UserChain.push_back(U); + return ConstantOffset; +} + +unsigned ConstantOffsetExtractor::FindFirstUse(User *U, Value *Used) { + for (unsigned I = 0, E = U->getNumOperands(); I < E; ++I) { + if (U->getOperand(I) == Used) + return I; + } + return -1; +} + +Value *ConstantOffsetExtractor::cloneAndReplace(User *U, Value *From, + Value *To) { + // Finds in U the first use of From. It is safe to ignore future occurrences + // of From, because findInEitherOperand similarly stops searching the right + // operand when the first operand has a non-zero constant offset. + unsigned OpNo = FindFirstUse(U, From); + assert(OpNo != (unsigned)-1 && "UserChain wasn't built correctly"); + + // ConstantOffsetExtractor::find only follows Operators (i.e., Instructions + // and ConstantExprs). Therefore, U is either an Instruction or a + // ConstantExpr. + if (Instruction *I = dyn_cast(U)) { + Instruction *Clone = I->clone(); + Clone->setOperand(OpNo, To); + Clone->insertBefore(IP); + return Clone; + } + // cast(To) is safe because a ConstantExpr only uses Constants. + return cast(U) + ->getWithOperandReplaced(OpNo, cast(To)); +} + +Value *ConstantOffsetExtractor::rebuildLeafWithoutConstantOffset(User *U, + Value *C) { + assert(U->getNumOperands() <= 2 && + "We didn't trace into any operator with more than 2 operands"); + // If U has only one operand which is the constant offset, removing the + // constant offset leaves U as a null value. + if (U->getNumOperands() == 1) + return Constant::getNullValue(U->getType()); + + // U->getNumOperands() == 2 + unsigned OpNo = FindFirstUse(U, C); // U->getOperand(OpNo) == C + assert(OpNo < 2 && "UserChain wasn't built correctly"); + Value *TheOther = U->getOperand(1 - OpNo); // The other operand of U + // If U = C - X, removing C makes U = -X; otherwise U will simply be X. + if (!isa(U) || OpNo == 1) + return TheOther; + if (isa(U)) + return ConstantExpr::getNeg(cast(TheOther)); + return BinaryOperator::CreateNeg(TheOther, "", IP); +} + +Value *ConstantOffsetExtractor::rebuildWithoutConstantOffset() { + assert(UserChain.size() > 0 && "you at least found a constant, right?"); + // Start with the constant and go up through UserChain, each time building a + // clone of the subexpression but with the constant removed. + // e.g., to build a clone of (a + (b + (c + 5)) but with the 5 removed, we + // first c, then (b + c), and finally (a + (b + c)). + // + // Fast path: if the GEP index is a constant, simply returns 0. + if (UserChain.size() == 1) + return ConstantInt::get(UserChain[0]->getType(), 0); + + Value *Remainder = + rebuildLeafWithoutConstantOffset(UserChain[1], UserChain[0]); + for (size_t I = 2; I < UserChain.size(); ++I) + Remainder = cloneAndReplace(UserChain[I], UserChain[I - 1], Remainder); + return Remainder; +} + +int64_t ConstantOffsetExtractor::Extract(Value *Idx, Value *&NewIdx, + const DataLayout *DL, + Instruction *IP) { + ConstantOffsetExtractor Extractor(DL, IP); + // Find a non-zero constant offset first. + int64_t ConstantOffset = Extractor.find(Idx); + if (ConstantOffset == 0) + return 0; + // Then rebuild a new index with the constant removed. + NewIdx = Extractor.rebuildWithoutConstantOffset(); + return ConstantOffset; +} + +int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL) { + return ConstantOffsetExtractor(DL, nullptr).find(Idx); +} + +void ConstantOffsetExtractor::ComputeKnownBits(Value *V, APInt &KnownOne, + APInt &KnownZero) const { + IntegerType *IT = cast(V->getType()); + KnownOne = APInt(IT->getBitWidth(), 0); + KnownZero = APInt(IT->getBitWidth(), 0); + llvm::computeKnownBits(V, KnownZero, KnownOne, DL, 0); +} + +bool ConstantOffsetExtractor::NoCommonBits(Value *LHS, Value *RHS) const { + assert(LHS->getType() == RHS->getType() && + "LHS and RHS should have the same type"); + APInt LHSKnownOne, LHSKnownZero, RHSKnownOne, RHSKnownZero; + ComputeKnownBits(LHS, LHSKnownOne, LHSKnownZero); + ComputeKnownBits(RHS, RHSKnownOne, RHSKnownZero); + return (LHSKnownZero | RHSKnownZero).isAllOnesValue(); +} + +int64_t SeparateConstOffsetFromGEP::accumulateByteOffset( + GetElementPtrInst *GEP, const DataLayout *DL, bool &NeedsExtraction) { + NeedsExtraction = false; + int64_t AccumulativeByteOffset = 0; + gep_type_iterator GTI = gep_type_begin(*GEP); + for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { + if (isa(*GTI)) { + // Tries to extract a constant offset from this GEP index. + int64_t ConstantOffset = + ConstantOffsetExtractor::Find(GEP->getOperand(I), DL); + if (ConstantOffset != 0) { + NeedsExtraction = true; + // A GEP may have multiple indices. We accumulate the extracted + // constant offset to a byte offset, and later offset the remainder of + // the original GEP with this byte offset. + AccumulativeByteOffset += + ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType()); + } + } + } + return AccumulativeByteOffset; +} + +bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { + // Skip vector GEPs. + if (GEP->getType()->isVectorTy()) + return false; + + // The backend can already nicely handle the case where all indices are + // constant. + if (GEP->hasAllConstantIndices()) + return false; + + bool Changed = false; + + // Shortcuts integer casts. Eliminating these explicit casts can make + // subsequent optimizations more obvious: ConstantOffsetExtractor needn't + // trace into these casts. + if (GEP->isInBounds()) { + // Doing this to inbounds GEPs is safe because their indices are guaranteed + // to be non-negative and in bounds. + gep_type_iterator GTI = gep_type_begin(*GEP); + for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { + if (isa(*GTI)) { + if (Operator *O = dyn_cast(GEP->getOperand(I))) { + if (O->getOpcode() == Instruction::SExt || + O->getOpcode() == Instruction::ZExt) { + GEP->setOperand(I, O->getOperand(0)); + Changed = true; + } + } + } + } + } + + const DataLayout *DL = &getAnalysis().getDataLayout(); + bool NeedsExtraction; + int64_t AccumulativeByteOffset = + accumulateByteOffset(GEP, DL, NeedsExtraction); + + if (!NeedsExtraction) + return Changed; + // Before really splitting the GEP, check whether the backend supports the + // addressing mode we are about to produce. If no, this splitting probably + // won't be beneficial. + TargetTransformInfo &TTI = getAnalysis(); + if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(), + /*BaseGV=*/nullptr, AccumulativeByteOffset, + /*HasBaseReg=*/true, /*Scale=*/0)) { + return Changed; + } + + // Remove the constant offset in each GEP index. The resultant GEP computes + // the variadic base. + gep_type_iterator GTI = gep_type_begin(*GEP); + for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { + if (isa(*GTI)) { + Value *NewIdx = nullptr; + // Tries to extract a constant offset from this GEP index. + int64_t ConstantOffset = + ConstantOffsetExtractor::Extract(GEP->getOperand(I), NewIdx, DL, GEP); + if (ConstantOffset != 0) { + assert(NewIdx != nullptr && + "ConstantOffset != 0 implies NewIdx is set"); + GEP->setOperand(I, NewIdx); + // Clear the inbounds attribute because the new index may be off-bound. + // e.g., + // + // b = add i64 a, 5 + // addr = gep inbounds float* p, i64 b + // + // is transformed to: + // + // addr2 = gep float* p, i64 a + // addr = gep float* addr2, i64 5 + // + // If a is -4, although the old index b is in bounds, the new index a is + // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the + // inbounds keyword is not present, the offsets are added to the base + // address with silently-wrapping two's complement arithmetic". + // Therefore, the final code will be a semantically equivalent. + // + // TODO(jingyue): do some range analysis to keep as many inbounds as + // possible. GEPs with inbounds are more friendly to alias analysis. + GEP->setIsInBounds(false); + Changed = true; + } + } + } + + // Offsets the base with the accumulative byte offset. + // + // %gep ; the base + // ... %gep ... + // + // => add the offset + // + // %gep2 ; clone of %gep + // %new.gep = gep %gep2, + // %gep ; will be removed + // ... %gep ... + // + // => replace all uses of %gep with %new.gep and remove %gep + // + // %gep2 ; clone of %gep + // %new.gep = gep %gep2, + // ... %new.gep ... + // + // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an + // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep): + // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the + // type of %gep. + // + // %gep2 ; clone of %gep + // %0 = bitcast %gep2 to i8* + // %uglygep = gep %0, + // %new.gep = bitcast %uglygep to + // ... %new.gep ... + Instruction *NewGEP = GEP->clone(); + NewGEP->insertBefore(GEP); + + Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + uint64_t ElementTypeSizeOfGEP = + DL->getTypeAllocSize(GEP->getType()->getElementType()); + if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) { + // Very likely. As long as %gep is natually aligned, the byte offset we + // extracted should be a multiple of sizeof(*%gep). + // Per ANSI C standard, signed / unsigned = unsigned. Therefore, we + // cast ElementTypeSizeOfGEP to signed. + int64_t Index = + AccumulativeByteOffset / static_cast(ElementTypeSizeOfGEP); + NewGEP = GetElementPtrInst::Create( + NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP); + } else { + // Unlikely but possible. For example, + // #pragma pack(1) + // struct S { + // int a[3]; + // int64 b[8]; + // }; + // #pragma pack() + // + // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After + // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is + // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of + // sizeof(int64). + // + // Emit an uglygep in this case. + Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(), + GEP->getPointerAddressSpace()); + NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP); + NewGEP = GetElementPtrInst::Create( + NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), + "uglygep", GEP); + if (GEP->getType() != I8PtrTy) + NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP); + } + + GEP->replaceAllUsesWith(NewGEP); + GEP->eraseFromParent(); + + return true; +} + +bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) { + if (DisableSeparateConstOffsetFromGEP) + return false; + + bool Changed = false; + for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) { + for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) { + if (GetElementPtrInst *GEP = dyn_cast(I++)) { + Changed |= splitGEP(GEP); + } + // No need to split GEP ConstantExprs because all its indices are constant + // already. + } + } + return Changed; +} diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index ceae5a7..5d5606b 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -21,7 +21,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "simplifycfg" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -38,6 +37,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "simplifycfg" + STATISTIC(NumSimpl, "Number of blocks simplified"); namespace { @@ -71,7 +72,7 @@ FunctionPass *llvm::createCFGSimplificationPass() { static bool mergeEmptyReturnBlocks(Function &F) { bool Changed = false; - BasicBlock *RetBlock = 0; + BasicBlock *RetBlock = nullptr; // Scan all the blocks in the function, looking for empty return blocks. for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) { @@ -79,7 +80,7 @@ static bool mergeEmptyReturnBlocks(Function &F) { // Only look at return blocks. ReturnInst *Ret = dyn_cast(BB.getTerminator()); - if (Ret == 0) continue; + if (!Ret) continue; // Only look at the block if it is empty or the only other thing in it is a // single PHI node that is the operand to the return. @@ -98,7 +99,7 @@ static bool mergeEmptyReturnBlocks(Function &F) { } // If this is the first returning block, remember it and keep going. - if (RetBlock == 0) { + if (!RetBlock) { RetBlock = &BB; continue; } @@ -119,7 +120,7 @@ static bool mergeEmptyReturnBlocks(Function &F) { // If the canonical return block has no PHI node, create one now. PHINode *RetBlockPHI = dyn_cast(RetBlock->begin()); - if (RetBlockPHI == 0) { + if (!RetBlockPHI) { Value *InVal = cast(RetBlock->getTerminator())->getOperand(0); pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock); RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), @@ -173,7 +174,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) { const TargetTransformInfo &TTI = getAnalysis(); DataLayoutPass *DLP = getAnalysisIfAvailable(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; bool EverChanged = removeUnreachableBlocks(F); EverChanged |= mergeEmptyReturnBlocks(F); EverChanged |= iterativelySimplifyCFG(F, TTI, DL); diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index 4107374..482c33a 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sink" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -25,6 +24,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "sink" + STATISTIC(NumSunk, "Number of instructions sunk"); STATISTIC(NumSinkIter, "Number of sinking iterations"); @@ -203,7 +204,7 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst, // Don't sink instructions into a loop. Loop *succ = LI->getLoopFor(SuccToSinkTo); Loop *cur = LI->getLoopFor(Inst->getParent()); - if (succ != 0 && succ != cur) + if (succ != nullptr && succ != cur) return false; } @@ -237,14 +238,14 @@ bool Sinking::SinkInstruction(Instruction *Inst, // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. - BasicBlock *SuccToSinkTo = 0; + BasicBlock *SuccToSinkTo = nullptr; // Instructions can only be sunk if all their uses are in blocks // dominated by one of the successors. // Look at all the postdominators and see if we can sink it in one. DomTreeNode *DTN = DT->getNode(Inst->getParent()); for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end(); - I != E && SuccToSinkTo == 0; ++I) { + I != E && SuccToSinkTo == nullptr; ++I) { BasicBlock *Candidate = (*I)->getBlock(); if ((*I)->getIDom()->getBlock() == Inst->getParent() && IsAcceptableTarget(Inst, Candidate)) @@ -254,13 +255,13 @@ bool Sinking::SinkInstruction(Instruction *Inst, // If no suitable postdominator was found, look at all the successors and // decide which one we should sink to, if any. for (succ_iterator I = succ_begin(Inst->getParent()), - E = succ_end(Inst->getParent()); I != E && SuccToSinkTo == 0; ++I) { + E = succ_end(Inst->getParent()); I != E && !SuccToSinkTo; ++I) { if (IsAcceptableTarget(Inst, *I)) SuccToSinkTo = *I; } // If we couldn't find a block to sink to, ignore this instruction. - if (SuccToSinkTo == 0) + if (!SuccToSinkTo) return false; DEBUG(dbgs() << "Sink" << *Inst << " ("; diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp index 8fd2268..7b77ae1 100644 --- a/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -7,7 +7,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "structurizecfg" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SCCIterator.h" @@ -21,6 +20,8 @@ using namespace llvm; using namespace llvm::PatternMatch; +#define DEBUG_TYPE "structurizecfg" + namespace { // Definition of the complex types used in this pass. @@ -64,14 +65,14 @@ public: /// \brief Start a new query NearestCommonDominator(DominatorTree *DomTree) { DT = DomTree; - Result = 0; + Result = nullptr; } /// \brief Add BB to the resulting dominator void addBlock(BasicBlock *BB, bool Remember = true) { DomTreeNode *Node = DT->getNode(BB); - if (Result == 0) { + if (!Result) { unsigned Numbering = 0; for (;Node;Node = Node->getIDom()) IndexMap[Node] = ++Numbering; @@ -279,7 +280,7 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) { void StructurizeCFG::orderNodes() { scc_iterator I = scc_begin(ParentRegion); for (Order.clear(); !I.isAtEnd(); ++I) { - std::vector &Nodes = *I; + const std::vector &Nodes = *I; Order.append(Nodes.begin(), Nodes.end()); } } @@ -453,10 +454,7 @@ void StructurizeCFG::insertConditions(bool Loops) { Value *Default = Loops ? BoolTrue : BoolFalse; SSAUpdater PhiInserter; - for (BranchVector::iterator I = Conds.begin(), - E = Conds.end(); I != E; ++I) { - - BranchInst *Term = *I; + for (BranchInst *Term : Conds) { assert(Term->isConditional()); BasicBlock *Parent = Term->getParent(); @@ -472,7 +470,7 @@ void StructurizeCFG::insertConditions(bool Loops) { NearestCommonDominator Dominator(DT); Dominator.addBlock(Parent, false); - Value *ParentValue = 0; + Value *ParentValue = nullptr; for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); PI != PE; ++PI) { @@ -591,7 +589,7 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, if (Node->isSubRegion()) { Region *SubRegion = Node->getNodeAs(); BasicBlock *OldExit = SubRegion->getExit(); - BasicBlock *Dominator = 0; + BasicBlock *Dominator = nullptr; // Find all the edges from the sub region to the exit for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit); @@ -678,7 +676,8 @@ BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow, /// \brief Set the previous node void StructurizeCFG::setPrevNode(BasicBlock *BB) { - PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) : 0; + PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) + : nullptr; } /// \brief Does BB dominate all the predicates of Node ? @@ -699,7 +698,7 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) { bool Dominated = false; // Regionentry is always true - if (PrevNode == 0) + if (!PrevNode) return true; for (BBPredicates::iterator I = Preds.begin(), E = Preds.end(); @@ -806,11 +805,11 @@ void StructurizeCFG::createFlow() { Conditions.clear(); LoopConds.clear(); - PrevNode = 0; + PrevNode = nullptr; Visited.clear(); while (!Order.empty()) { - handleLoops(EntryDominatesExit, 0); + handleLoops(EntryDominatesExit, nullptr); } if (PrevNode) diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 6d02777..05b9892 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -50,12 +50,12 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "tailcallelim" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" @@ -64,6 +64,7 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" @@ -76,6 +77,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "tailcallelim" + STATISTIC(NumEliminated, "Number of tail calls removed"); STATISTIC(NumRetDuped, "Number of return duplicated"); STATISTIC(NumAccumAdded, "Number of accumulators introduced"); @@ -94,6 +97,9 @@ namespace { bool runOnFunction(Function &F) override; private: + bool runTRE(Function &F); + bool markTails(Function &F, bool &AllCallsAreTailCalls); + CallInst *FindTRECandidate(Instruction *I, bool CannotTailCallElimCallsMarkedTail); bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, @@ -131,55 +137,255 @@ void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); } -/// CanTRE - Scan the specified basic block for alloca instructions. -/// If it contains any that are variable-sized or not in the entry block, -/// returns false. -static bool CanTRE(AllocaInst *AI) { - // Because of PR962, we don't TRE allocas outside the entry block. - - // If this alloca is in the body of the function, or if it is a variable - // sized allocation, we cannot tail call eliminate calls marked 'tail' - // with this mechanism. - BasicBlock *BB = AI->getParent(); - return BB == &BB->getParent()->getEntryBlock() && - isa(AI->getArraySize()); +/// \brief Scan the specified function for alloca instructions. +/// If it contains any dynamic allocas, returns false. +static bool CanTRE(Function &F) { + // Because of PR962, we don't TRE dynamic allocas. + for (auto &BB : F) { + for (auto &I : BB) { + if (AllocaInst *AI = dyn_cast(&I)) { + if (!AI->isStaticAlloca()) + return false; + } + } + } + + return true; } -namespace { -struct AllocaCaptureTracker : public CaptureTracker { - AllocaCaptureTracker() : Captured(false) {} +bool TailCallElim::runOnFunction(Function &F) { + if (skipOptnoneFunction(F)) + return false; - void tooManyUses() override { Captured = true; } + bool AllCallsAreTailCalls = false; + bool Modified = markTails(F, AllCallsAreTailCalls); + if (AllCallsAreTailCalls) + Modified |= runTRE(F); + return Modified; +} - bool shouldExplore(const Use *U) override { - Value *V = U->getUser(); - if (isa(V) || isa(V)) - UsesAlloca.insert(V); - return true; +namespace { +struct AllocaDerivedValueTracker { + // Start at a root value and walk its use-def chain to mark calls that use the + // value or a derived value in AllocaUsers, and places where it may escape in + // EscapePoints. + void walk(Value *Root) { + SmallVector Worklist; + SmallPtrSet Visited; + + auto AddUsesToWorklist = [&](Value *V) { + for (auto &U : V->uses()) { + if (!Visited.insert(&U)) + continue; + Worklist.push_back(&U); + } + }; + + AddUsesToWorklist(Root); + + while (!Worklist.empty()) { + Use *U = Worklist.pop_back_val(); + Instruction *I = cast(U->getUser()); + + switch (I->getOpcode()) { + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS(I); + bool IsNocapture = !CS.isCallee(U) && + CS.doesNotCapture(CS.getArgumentNo(U)); + callUsesLocalStack(CS, IsNocapture); + if (IsNocapture) { + // If the alloca-derived argument is passed in as nocapture, then it + // can't propagate to the call's return. That would be capturing. + continue; + } + break; + } + case Instruction::Load: { + // The result of a load is not alloca-derived (unless an alloca has + // otherwise escaped, but this is a local analysis). + continue; + } + case Instruction::Store: { + if (U->getOperandNo() == 0) + EscapePoints.insert(I); + continue; // Stores have no users to analyze. + } + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::PHI: + case Instruction::Select: + case Instruction::AddrSpaceCast: + break; + default: + EscapePoints.insert(I); + break; + } + + AddUsesToWorklist(I); + } } - bool captured(const Use *U) override { - if (isa(U->getUser())) - return false; - Captured = true; - return true; + void callUsesLocalStack(CallSite CS, bool IsNocapture) { + // Add it to the list of alloca users. If it's already there, skip further + // processing. + if (!AllocaUsers.insert(CS.getInstruction())) + return; + + // If it's nocapture then it can't capture the alloca. + if (IsNocapture) + return; + + // If it can write to memory, it can leak the alloca value. + if (!CS.onlyReadsMemory()) + EscapePoints.insert(CS.getInstruction()); } - bool Captured; - SmallPtrSet UsesAlloca; + SmallPtrSet AllocaUsers; + SmallPtrSet EscapePoints; }; -} // end anonymous namespace +} -bool TailCallElim::runOnFunction(Function &F) { - if (skipOptnoneFunction(F)) +bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) { + if (F.callsFunctionThatReturnsTwice()) return false; + AllCallsAreTailCalls = true; + + // The local stack holds all alloca instructions and all byval arguments. + AllocaDerivedValueTracker Tracker; + for (Argument &Arg : F.args()) { + if (Arg.hasByValAttr()) + Tracker.walk(&Arg); + } + for (auto &BB : F) { + for (auto &I : BB) + if (AllocaInst *AI = dyn_cast(&I)) + Tracker.walk(AI); + } + bool Modified = false; + + // Track whether a block is reachable after an alloca has escaped. Blocks that + // contain the escaping instruction will be marked as being visited without an + // escaped alloca, since that is how the block began. + enum VisitType { + UNVISITED, + UNESCAPED, + ESCAPED + }; + DenseMap Visited; + + // We propagate the fact that an alloca has escaped from block to successor. + // Visit the blocks that are propagating the escapedness first. To do this, we + // maintain two worklists. + SmallVector WorklistUnescaped, WorklistEscaped; + + // We may enter a block and visit it thinking that no alloca has escaped yet, + // then see an escape point and go back around a loop edge and come back to + // the same block twice. Because of this, we defer setting tail on calls when + // we first encounter them in a block. Every entry in this list does not + // statically use an alloca via use-def chain analysis, but may find an alloca + // through other means if the block turns out to be reachable after an escape + // point. + SmallVector DeferredTails; + + BasicBlock *BB = &F.getEntryBlock(); + VisitType Escaped = UNESCAPED; + do { + for (auto &I : *BB) { + if (Tracker.EscapePoints.count(&I)) + Escaped = ESCAPED; + + CallInst *CI = dyn_cast(&I); + if (!CI || CI->isTailCall()) + continue; + + if (CI->doesNotAccessMemory()) { + // A call to a readnone function whose arguments are all things computed + // outside this function can be marked tail. Even if you stored the + // alloca address into a global, a readnone function can't load the + // global anyhow. + // + // Note that this runs whether we know an alloca has escaped or not. If + // it has, then we can't trust Tracker.AllocaUsers to be accurate. + bool SafeToTail = true; + for (auto &Arg : CI->arg_operands()) { + if (isa(Arg.getUser())) + continue; + if (Argument *A = dyn_cast(Arg.getUser())) + if (!A->hasByValAttr()) + continue; + SafeToTail = false; + break; + } + if (SafeToTail) { + emitOptimizationRemark( + F.getContext(), "tailcallelim", F, CI->getDebugLoc(), + "marked this readnone call a tail call candidate"); + CI->setTailCall(); + Modified = true; + continue; + } + } + + if (Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) { + DeferredTails.push_back(CI); + } else { + AllCallsAreTailCalls = false; + } + } + + for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) { + auto &State = Visited[SuccBB]; + if (State < Escaped) { + State = Escaped; + if (State == ESCAPED) + WorklistEscaped.push_back(SuccBB); + else + WorklistUnescaped.push_back(SuccBB); + } + } + + if (!WorklistEscaped.empty()) { + BB = WorklistEscaped.pop_back_val(); + Escaped = ESCAPED; + } else { + BB = nullptr; + while (!WorklistUnescaped.empty()) { + auto *NextBB = WorklistUnescaped.pop_back_val(); + if (Visited[NextBB] == UNESCAPED) { + BB = NextBB; + Escaped = UNESCAPED; + break; + } + } + } + } while (BB); + + for (CallInst *CI : DeferredTails) { + if (Visited[CI->getParent()] != ESCAPED) { + // If the escape point was part way through the block, calls after the + // escape point wouldn't have been put into DeferredTails. + emitOptimizationRemark(F.getContext(), "tailcallelim", F, + CI->getDebugLoc(), + "marked this call a tail call candidate"); + CI->setTailCall(); + Modified = true; + } else { + AllCallsAreTailCalls = false; + } + } + + return Modified; +} + +bool TailCallElim::runTRE(Function &F) { // If this function is a varargs function, we won't be able to PHI the args // right, so don't even try to convert it... if (F.getFunctionType()->isVarArg()) return false; TTI = &getAnalysis(); - BasicBlock *OldEntry = 0; + BasicBlock *OldEntry = nullptr; bool TailCallsAreMarkedTail = false; SmallVector ArgumentPHIs; bool MadeChange = false; @@ -188,39 +394,23 @@ bool TailCallElim::runOnFunction(Function &F) { // marked with the 'tail' attribute, because doing so would cause the stack // size to increase (real TRE would deallocate variable sized allocas, TRE // doesn't). - bool CanTRETailMarkedCall = true; - - // Find calls that can be marked tail. - AllocaCaptureTracker ACT; - for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) { - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (AllocaInst *AI = dyn_cast(I)) { - CanTRETailMarkedCall &= CanTRE(AI); - PointerMayBeCaptured(AI, &ACT); - // If any allocas are captured, exit. - if (ACT.Captured) - return false; - } - } - } + bool CanTRETailMarkedCall = CanTRE(F); - // Second pass, change any tail recursive calls to loops. + // Change any tail recursive calls to loops. // // FIXME: The code generator produces really bad code when an 'escaping // alloca' is changed from being a static alloca to being a dynamic alloca. // Until this is resolved, disable this transformation if that would ever // happen. This bug is PR962. - if (ACT.UsesAlloca.empty()) { - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) { - bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, - ArgumentPHIs, !CanTRETailMarkedCall); - if (!Change && BB->getFirstNonPHIOrDbg() == Ret) - Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, - TailCallsAreMarkedTail, ArgumentPHIs, - !CanTRETailMarkedCall); - MadeChange |= Change; - } + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) { + bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail, + ArgumentPHIs, !CanTRETailMarkedCall); + if (!Change && BB->getFirstNonPHIOrDbg() == Ret) + Change = FoldReturnAndProcessPred(BB, Ret, OldEntry, + TailCallsAreMarkedTail, ArgumentPHIs, + !CanTRETailMarkedCall); + MadeChange |= Change; } } @@ -229,34 +419,13 @@ bool TailCallElim::runOnFunction(Function &F) { // with themselves. Check to see if we did and clean up our mess if so. This // occurs when a function passes an argument straight through to its tail // call. - if (!ArgumentPHIs.empty()) { - for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) { - PHINode *PN = ArgumentPHIs[i]; - - // If the PHI Node is a dynamic constant, replace it with the value it is. - if (Value *PNV = SimplifyInstruction(PN)) { - PN->replaceAllUsesWith(PNV); - PN->eraseFromParent(); - } - } - } + for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) { + PHINode *PN = ArgumentPHIs[i]; - // At this point, we know that the function does not have any captured - // allocas. If additionally the function does not call setjmp, mark all calls - // in the function that do not access stack memory with the tail keyword. This - // implies ensuring that there does not exist any path from a call that takes - // in an alloca but does not capture it and the call which we wish to mark - // with "tail". - if (!F.callsFunctionThatReturnsTwice()) { - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (CallInst *CI = dyn_cast(I)) { - if (!ACT.UsesAlloca.count(CI)) { - CI->setTailCall(); - MadeChange = true; - } - } - } + // If the PHI Node is a dynamic constant, replace it with the value it is. + if (Value *PNV = SimplifyInstruction(PN)) { + PN->replaceAllUsesWith(PNV); + PN->eraseFromParent(); } } @@ -343,11 +512,11 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { // static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { Function *F = CI->getParent()->getParent(); - Value *ReturnedValue = 0; + Value *ReturnedValue = nullptr; for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) { ReturnInst *RI = dyn_cast(BBI->getTerminator()); - if (RI == 0 || RI == IgnoreRI) continue; + if (RI == nullptr || RI == IgnoreRI) continue; // We can only perform this transformation if the value returned is // evaluatable at the start of the initial invocation of the function, @@ -355,10 +524,10 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { // Value *RetOp = RI->getOperand(0); if (!isDynamicConstant(RetOp, CI, RI)) - return 0; + return nullptr; if (ReturnedValue && RetOp != ReturnedValue) - return 0; // Cannot transform if differing values are returned. + return nullptr; // Cannot transform if differing values are returned. ReturnedValue = RetOp; } return ReturnedValue; @@ -370,18 +539,18 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { /// Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI) { - if (!I->isAssociative() || !I->isCommutative()) return 0; + if (!I->isAssociative() || !I->isCommutative()) return nullptr; assert(I->getNumOperands() == 2 && "Associative/commutative operations should have 2 args!"); // Exactly one operand should be the result of the call instruction. if ((I->getOperand(0) == CI && I->getOperand(1) == CI) || (I->getOperand(0) != CI && I->getOperand(1) != CI)) - return 0; + return nullptr; // The only user of this instruction we allow is a single return instruction. if (!I->hasOneUse() || !isa(I->user_back())) - return 0; + return nullptr; // Ok, now we have to check all of the other return instructions in this // function. If they return non-constants or differing values, then we cannot @@ -402,11 +571,11 @@ TailCallElim::FindTRECandidate(Instruction *TI, Function *F = BB->getParent(); if (&BB->front() == TI) // Make sure there is something before the terminator. - return 0; + return nullptr; // Scan backwards from the return, checking to see if there is a tail call in // this block. If so, set CI to it. - CallInst *CI = 0; + CallInst *CI = nullptr; BasicBlock::iterator BBI = TI; while (true) { CI = dyn_cast(BBI); @@ -414,14 +583,14 @@ TailCallElim::FindTRECandidate(Instruction *TI, break; if (BBI == BB->begin()) - return 0; // Didn't find a potential tail call. + return nullptr; // Didn't find a potential tail call. --BBI; } // If this call is marked as a tail call, and if there are dynamic allocas in // the function, we cannot perform this optimization. if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail) - return 0; + return nullptr; // As a special case, detect code like this: // double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call @@ -441,7 +610,7 @@ TailCallElim::FindTRECandidate(Instruction *TI, for (; I != E && FI != FE; ++I, ++FI) if (*I != &*FI) break; if (I == E && FI == FE) - return 0; + return nullptr; } return CI; @@ -462,8 +631,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, // which is different to the constant returned by other return instructions // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a // special case of accumulator recursion, the operation being "return C". - Value *AccumulatorRecursionEliminationInitVal = 0; - Instruction *AccumulatorRecursionInstr = 0; + Value *AccumulatorRecursionEliminationInitVal = nullptr; + Instruction *AccumulatorRecursionInstr = nullptr; // Ok, we found a potential tail call. We can currently only transform the // tail call if all of the instructions between the call and the return are @@ -493,8 +662,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, // accumulator recursion variable eliminated. if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && !isa(Ret->getReturnValue()) && - AccumulatorRecursionEliminationInitVal == 0 && - !getCommonReturnValue(0, CI)) { + AccumulatorRecursionEliminationInitVal == nullptr && + !getCommonReturnValue(nullptr, CI)) { // One case remains that we are able to handle: the current return // instruction returns a constant, and all other return instructions // return a different constant. @@ -510,9 +679,12 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, BasicBlock *BB = Ret->getParent(); Function *F = BB->getParent(); + emitOptimizationRemark(F->getContext(), "tailcallelim", *F, CI->getDebugLoc(), + "transforming tail recursion to loop"); + // OK! We can transform this tail call. If this is the first one found, // create the new entry block, allowing us to branch back to the old entry. - if (OldEntry == 0) { + if (!OldEntry) { OldEntry = &F->getEntryBlock(); BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry); NewEntry->takeName(OldEntry); diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp index f42635e..196ac79 100644 --- a/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/lib/Transforms/Utils/AddDiscriminators.cpp @@ -52,8 +52,6 @@ // http://wiki.dwarfstd.org/index.php?title=Path_Discriminators //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "add-discriminators" - #include "llvm/Transforms/Scalar.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -69,6 +67,8 @@ using namespace llvm; +#define DEBUG_TYPE "add-discriminators" + namespace { struct AddDiscriminators : public FunctionPass { static char ID; // Pass identification, replacement for typeid @@ -99,7 +99,7 @@ FunctionPass *llvm::createAddDiscriminatorsPass() { static bool hasDebugInfo(const Function &F) { NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu"); - return CUNodes != 0; + return CUNodes != nullptr; } /// \brief Assign DWARF discriminators. @@ -154,10 +154,15 @@ static bool hasDebugInfo(const Function &F) { /// file and line location as I2. This new lexical block will have a /// different discriminator number than I1. bool AddDiscriminators::runOnFunction(Function &F) { - // No need to do anything if there is no debug info for this function. // If the function has debug information, but the user has disabled // discriminators, do nothing. - if (!hasDebugInfo(F) || NoDiscriminators) return false; + // Simlarly, if the function has no debug info, do nothing. + // Finally, if this module is built with dwarf versions earlier than 4, + // do nothing (discriminator support is a DWARF 4 feature). + if (NoDiscriminators || + !hasDebugInfo(F) || + F.getParent()->getDwarfVersion() < 4) + return false; bool Changed = false; Module *M = F.getParent(); diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk index ab4d8a8..cbd8dd0 100644 --- a/lib/Transforms/Utils/Android.mk +++ b/lib/Transforms/Utils/Android.mk @@ -11,6 +11,7 @@ transforms_utils_SRC_FILES := \ CloneModule.cpp \ CmpInstAnalysis.cpp \ CodeExtractor.cpp \ + CtorUtils.cpp \ DemoteRegToStack.cpp \ GlobalStatus.cpp \ InlineFunction.cpp \ diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index b3cd5ce..80b7e22 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -68,8 +68,8 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) { void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) { if (!isa(BB->begin())) return; - AliasAnalysis *AA = 0; - MemoryDependenceAnalysis *MemDep = 0; + AliasAnalysis *AA = nullptr; + MemoryDependenceAnalysis *MemDep = nullptr; if (P) { AA = P->getAnalysisIfAvailable(); MemDep = P->getAnalysisIfAvailable(); @@ -130,7 +130,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) { BasicBlock *OnlySucc = BB; for (; SI != SE; ++SI) if (*SI != OnlySucc) { - OnlySucc = 0; // There are multiple distinct successors! + OnlySucc = nullptr; // There are multiple distinct successors! break; } @@ -217,7 +217,7 @@ void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL, /// void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL, BasicBlock::iterator &BI, Instruction *I) { - assert(I->getParent() == 0 && + assert(I->getParent() == nullptr && "ReplaceInstWithInst: Instruction already inserted into basic block!"); // Insert the new instruction into the basic block... @@ -254,7 +254,7 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) { // If the successor only has a single pred, split the top of the successor // block. assert(SP == BB && "CFG broken"); - SP = NULL; + SP = nullptr; return SplitBlock(Succ, Succ->begin(), P); } @@ -310,7 +310,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, if (!P) return; LoopInfo *LI = P->getAnalysisIfAvailable(); - Loop *L = LI ? LI->getLoopFor(OldBB) : 0; + Loop *L = LI ? LI->getLoopFor(OldBB) : nullptr; // If we need to preserve loop analyses, collect some information about how // this split will affect loops. @@ -351,7 +351,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, // loop). To find this, examine each of the predecessors and determine which // loops enclose them, and select the most-nested loop which contains the // loop containing the block being split. - Loop *InnermostPredLoop = 0; + Loop *InnermostPredLoop = nullptr; for (ArrayRef::iterator i = Preds.begin(), e = Preds.end(); i != e; ++i) { BasicBlock *Pred = *i; @@ -384,51 +384,68 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB, ArrayRef Preds, BranchInst *BI, Pass *P, bool HasLoopExit) { // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB. - AliasAnalysis *AA = P ? P->getAnalysisIfAvailable() : 0; + AliasAnalysis *AA = P ? P->getAnalysisIfAvailable() : nullptr; + SmallPtrSet PredSet(Preds.begin(), Preds.end()); for (BasicBlock::iterator I = OrigBB->begin(); isa(I); ) { PHINode *PN = cast(I++); // Check to see if all of the values coming in are the same. If so, we // don't need to create a new PHI node, unless it's needed for LCSSA. - Value *InVal = 0; + Value *InVal = nullptr; if (!HasLoopExit) { InVal = PN->getIncomingValueForBlock(Preds[0]); - for (unsigned i = 1, e = Preds.size(); i != e; ++i) - if (InVal != PN->getIncomingValueForBlock(Preds[i])) { - InVal = 0; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (!PredSet.count(PN->getIncomingBlock(i))) + continue; + if (!InVal) + InVal = PN->getIncomingValue(i); + else if (InVal != PN->getIncomingValue(i)) { + InVal = nullptr; break; } + } } if (InVal) { // If all incoming values for the new PHI would be the same, just don't // make a new PHI. Instead, just remove the incoming values from the old // PHI. - for (unsigned i = 0, e = Preds.size(); i != e; ++i) { - // Explicitly check the BB index here to handle duplicates in Preds. - int Idx = PN->getBasicBlockIndex(Preds[i]); - if (Idx >= 0) - PN->removeIncomingValue(Idx, false); - } - } else { - // If the values coming into the block are not the same, we need a PHI. - // Create the new PHI node, insert it into NewBB at the end of the block - PHINode *NewPHI = - PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); - if (AA) AA->copyValue(PN, NewPHI); - // Move all of the PHI values for 'Preds' to the new PHI. - for (unsigned i = 0, e = Preds.size(); i != e; ++i) { - Value *V = PN->removeIncomingValue(Preds[i], false); - NewPHI->addIncoming(V, Preds[i]); - } + // NOTE! This loop walks backwards for a reason! First off, this minimizes + // the cost of removal if we end up removing a large number of values, and + // second off, this ensures that the indices for the incoming values + // aren't invalidated when we remove one. + for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) + if (PredSet.count(PN->getIncomingBlock(i))) + PN->removeIncomingValue(i, false); + + // Add an incoming value to the PHI node in the loop for the preheader + // edge. + PN->addIncoming(InVal, NewBB); + continue; + } - InVal = NewPHI; + // If the values coming into the block are not the same, we need a new + // PHI. + // Create the new PHI node, insert it into NewBB at the end of the block + PHINode *NewPHI = + PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI); + if (AA) + AA->copyValue(PN, NewPHI); + + // NOTE! This loop walks backwards for a reason! First off, this minimizes + // the cost of removal if we end up removing a large number of values, and + // second off, this ensures that the indices for the incoming values aren't + // invalidated when we remove one. + for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) { + BasicBlock *IncomingBB = PN->getIncomingBlock(i); + if (PredSet.count(IncomingBB)) { + Value *V = PN->removeIncomingValue(i, false); + NewPHI->addIncoming(V, IncomingBB); + } } - // Add an incoming value to the PHI node in the loop for the preheader - // edge. - PN->addIncoming(InVal, NewBB); + PN->addIncoming(NewPHI, NewBB); } } @@ -542,7 +559,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, e = pred_end(OrigBB); } - BasicBlock *NewBB2 = 0; + BasicBlock *NewBB2 = nullptr; if (!NewBB2Preds.empty()) { // Create another basic block for the rest of OrigBB's predecessors. NewBB2 = BasicBlock::Create(OrigBB->getContext(), @@ -607,7 +624,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end(); i != e; ++i) { Value *V = *i; - Instruction *NewBC = 0; + Instruction *NewBC = nullptr; if (BitCastInst *BCI = dyn_cast(V)) { // Return value might be bitcasted. Clone and insert it before the // return instruction. @@ -724,32 +741,32 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore, Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse) { PHINode *SomePHI = dyn_cast(BB->begin()); - BasicBlock *Pred1 = NULL; - BasicBlock *Pred2 = NULL; + BasicBlock *Pred1 = nullptr; + BasicBlock *Pred2 = nullptr; if (SomePHI) { if (SomePHI->getNumIncomingValues() != 2) - return NULL; + return nullptr; Pred1 = SomePHI->getIncomingBlock(0); Pred2 = SomePHI->getIncomingBlock(1); } else { pred_iterator PI = pred_begin(BB), PE = pred_end(BB); if (PI == PE) // No predecessor - return NULL; + return nullptr; Pred1 = *PI++; if (PI == PE) // Only one predecessor - return NULL; + return nullptr; Pred2 = *PI++; if (PI != PE) // More than two predecessors - return NULL; + return nullptr; } // We can only handle branches. Other control flow will be lowered to // branches if possible anyway. BranchInst *Pred1Br = dyn_cast(Pred1->getTerminator()); BranchInst *Pred2Br = dyn_cast(Pred2->getTerminator()); - if (Pred1Br == 0 || Pred2Br == 0) - return 0; + if (!Pred1Br || !Pred2Br) + return nullptr; // Eliminate code duplication by ensuring that Pred1Br is conditional if // either are. @@ -759,7 +776,7 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // required anyway, we stand no chance of eliminating it, so the xform is // probably not profitable. if (Pred1Br->isConditional()) - return 0; + return nullptr; std::swap(Pred1, Pred2); std::swap(Pred1Br, Pred2Br); @@ -769,8 +786,8 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // The only thing we have to watch out for here is to make sure that Pred2 // doesn't have incoming edges from other blocks. If it does, the condition // doesn't dominate BB. - if (Pred2->getSinglePredecessor() == 0) - return 0; + if (!Pred2->getSinglePredecessor()) + return nullptr; // If we found a conditional branch predecessor, make sure that it branches // to BB and Pred2Br. If it doesn't, this isn't an "if statement". @@ -785,7 +802,7 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, } else { // We know that one arm of the conditional goes to BB, so the other must // go somewhere unrelated, and this must not be an "if statement". - return 0; + return nullptr; } return Pred1Br->getCondition(); @@ -795,12 +812,12 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, // BB. Don't panic! If both blocks only have a single (identical) // predecessor, and THAT is a conditional branch, then we're all ok! BasicBlock *CommonPred = Pred1->getSinglePredecessor(); - if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor()) - return 0; + if (CommonPred == nullptr || CommonPred != Pred2->getSinglePredecessor()) + return nullptr; // Otherwise, if this is a conditional branch, then we can use it! BranchInst *BI = dyn_cast(CommonPred->getTerminator()); - if (BI == 0) return 0; + if (!BI) return nullptr; assert(BI->isConditional() && "Two successors but not conditional?"); if (BI->getSuccessor(0) == Pred1) { diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 76ebb9f..80bd516 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "break-crit-edges" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -30,6 +29,8 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; +#define DEBUG_TYPE "break-crit-edges" + STATISTIC(NumBroken, "Number of blocks inserted"); namespace { @@ -141,7 +142,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, Pass *P, bool MergeIdenticalEdges, bool DontDeleteUselessPhis, bool SplitLandingPads) { - if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0; + if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return nullptr; assert(!isa(TI) && "Cannot split critical edge from IndirectBrInst"); @@ -151,7 +152,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // Splitting the critical edge to a landing pad block is non-trivial. Don't do // it in this generic function. - if (DestBB->isLandingPad()) return 0; + if (DestBB->isLandingPad()) return nullptr; // Create a new basic block, linking it into the CFG. BasicBlock *NewBB = BasicBlock::Create(TI->getContext(), @@ -207,15 +208,15 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // If we don't have a pass object, we can't update anything... - if (P == 0) return NewBB; + if (!P) return NewBB; DominatorTreeWrapperPass *DTWP = P->getAnalysisIfAvailable(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0; + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; LoopInfo *LI = P->getAnalysisIfAvailable(); // If we have nothing to update, just return. - if (DT == 0 && LI == 0) + if (!DT && !LI) return NewBB; // Now update analysis information. Since the only predecessor of NewBB is @@ -251,7 +252,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, // if (TINode) { // Don't break unreachable code! DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB); - DomTreeNode *DestBBNode = 0; + DomTreeNode *DestBBNode = nullptr; // If NewBBDominatesDestBB hasn't been computed yet, do so with DT. if (!OtherPreds.empty()) { diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 82384a1..be00b69 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -27,7 +27,8 @@ using namespace llvm; /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*. Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { - return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr"); + unsigned AS = V->getType()->getPointerAddressSpace(); + return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr"); } /// EmitStrLen - Emit a call to the strlen function to the builder, for the @@ -35,7 +36,7 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) { Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strlen)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -64,7 +65,7 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD, Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strnlen)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -94,7 +95,7 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B, Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strchr)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind }; @@ -120,7 +121,7 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::strncmp)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[3]; @@ -153,7 +154,7 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strcpy)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -177,7 +178,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI, StringRef Name) { if (!TLI->has(LibFunc::strncpy)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -204,7 +205,7 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcpy_chk)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS; @@ -232,7 +233,7 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memchr)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS; @@ -260,7 +261,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::memcmp)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[3]; @@ -347,7 +348,7 @@ Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name, Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::putchar)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(), @@ -369,7 +370,7 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD, Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::puts)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -393,7 +394,7 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD, Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputc)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[2]; @@ -426,7 +427,7 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B, Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fputs)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[3]; @@ -459,7 +460,7 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B, const DataLayout *TD, const TargetLibraryInfo *TLI) { if (!TLI->has(LibFunc::fwrite)) - return 0; + return nullptr; Module *M = B.GetInsertBlock()->getParent()->getParent(); AttributeSet AS[3]; diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index 1f517d0..f2d5e07 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "bypass-slow-division" #include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/Function.h" @@ -24,6 +23,8 @@ using namespace llvm; +#define DEBUG_TYPE "bypass-slow-division" + namespace { struct DivOpInfo { bool SignedOp; @@ -53,11 +54,11 @@ namespace llvm { } static DivOpInfo getEmptyKey() { - return DivOpInfo(false, 0, 0); + return DivOpInfo(false, nullptr, nullptr); } static DivOpInfo getTombstoneKey() { - return DivOpInfo(true, 0, 0); + return DivOpInfo(true, nullptr, nullptr); } static unsigned getHashValue(const DivOpInfo &Val) { diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index dac2090..e10ca90 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMTransformUtils BreakCriticalEdges.cpp BuildLibCalls.cpp BypassSlowDivision.cpp + CtorUtils.cpp CloneFunction.cpp CloneModule.cpp CmpInstAnalysis.cpp diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index a199086..5c8f20d 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -159,7 +159,7 @@ static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) { for (DISubprogram Subprogram : Finder.subprograms()) { if (Subprogram.describes(F)) return Subprogram; } - return NULL; + return nullptr; } // Add an operand to an existing MDNode. The new operand will be added at the @@ -359,7 +359,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // If the condition was a known constant in the callee... ConstantInt *Cond = dyn_cast(BI->getCondition()); // Or is a known constant in the caller... - if (Cond == 0) { + if (!Cond) { Value *V = VMap[BI->getCondition()]; Cond = dyn_cast_or_null(V); } @@ -375,7 +375,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, } else if (const SwitchInst *SI = dyn_cast(OldTI)) { // If switching on a value known constant in the caller. ConstantInt *Cond = dyn_cast(SI->getCondition()); - if (Cond == 0) { // Or known constant after constant prop in the callee... + if (!Cond) { // Or known constant after constant prop in the callee... Value *V = VMap[SI->getCondition()]; Cond = dyn_cast_or_null(V); } @@ -454,7 +454,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, BI != BE; ++BI) { Value *V = VMap[BI]; BasicBlock *NewBB = cast_or_null(V); - if (NewBB == 0) continue; // Dead block. + if (!NewBB) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 64df089..eb67db1 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -47,8 +47,8 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { GlobalVariable *GV = new GlobalVariable(*New, I->getType()->getElementType(), I->isConstant(), I->getLinkage(), - (Constant*) 0, I->getName(), - (GlobalVariable*) 0, + (Constant*) nullptr, I->getName(), + (GlobalVariable*) nullptr, I->getThreadLocalMode(), I->getType()->getAddressSpace()); GV->copyAttributesFrom(I); @@ -67,8 +67,10 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { // Loop over the aliases in the module for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { - GlobalAlias *GA = new GlobalAlias(I->getType(), I->getLinkage(), - I->getName(), NULL, New); + auto *PTy = cast(I->getType()); + auto *GA = + GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), + I->getLinkage(), I->getName(), New); GA->copyAttributesFrom(I); VMap[I] = GA; } @@ -105,8 +107,8 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { GlobalAlias *GA = cast(VMap[I]); - if (const Constant *C = I->getAliasee()) - GA->setAliasee(MapValue(C, VMap)); + if (const GlobalObject *C = I->getAliasee()) + GA->setAliasee(cast(MapValue(C, VMap))); } // And named metadata.... diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp index 8fa412a..3b15a0a 100644 --- a/lib/Transforms/Utils/CmpInstAnalysis.cpp +++ b/lib/Transforms/Utils/CmpInstAnalysis.cpp @@ -84,7 +84,7 @@ Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS, case 7: // True. return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1); } - return NULL; + return nullptr; } /// PredicatesFoldable - Return true if both predicates match sign or if at diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index b814842..e70a7d6 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -38,6 +38,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "code-extractor" + // Provide a command-line option to aggregate function arguments into a struct // for functions produced by the code extractor. This is useful when converting // extracted functions to pthread-based code, as only one argument (void*) can @@ -118,7 +120,7 @@ buildExtractionBlockSet(const RegionNode &RN) { } CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs) - : DT(0), AggregateArgs(AggregateArgs||AggregateArgsOpt), + : DT(nullptr), AggregateArgs(AggregateArgs||AggregateArgsOpt), Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {} CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, @@ -410,7 +412,7 @@ static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) { return P->getIncomingBlock(U); } - return 0; + return nullptr; } /// emitCallAndSwitchStatement - This method sets up the caller side by adding @@ -438,14 +440,14 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, StructValues.push_back(*i); } else { AllocaInst *alloca = - new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc", + new AllocaInst((*i)->getType(), nullptr, (*i)->getName()+".loc", codeReplacer->getParent()->begin()->begin()); ReloadOutputs.push_back(alloca); params.push_back(alloca); } } - AllocaInst *Struct = 0; + AllocaInst *Struct = nullptr; if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { std::vector ArgTypes; for (ValueSet::iterator v = StructValues.begin(), @@ -455,7 +457,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Allocate a struct at the beginning of this function Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); Struct = - new AllocaInst(StructArgTy, 0, "structArg", + new AllocaInst(StructArgTy, nullptr, "structArg", codeReplacer->getParent()->begin()->begin()); params.push_back(Struct); @@ -484,7 +486,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Reload the outputs passed in by reference for (unsigned i = 0, e = outputs.size(); i != e; ++i) { - Value *Output = 0; + Value *Output = nullptr; if (AggregateArgs) { Value *Idx[2]; Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); @@ -537,7 +539,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, newFunction); unsigned SuccNum = switchVal++; - Value *brVal = 0; + Value *brVal = nullptr; switch (NumExitBlocks) { case 0: case 1: break; // No value needed. @@ -633,7 +635,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, // Check if the function should return a value if (OldFnRetTy->isVoidTy()) { - ReturnInst::Create(Context, 0, TheSwitch); // Return void + ReturnInst::Create(Context, nullptr, TheSwitch); // Return void } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { // return what we have ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); @@ -685,7 +687,7 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) { Function *CodeExtractor::extractCodeRegion() { if (!isEligible()) - return 0; + return nullptr; ValueSet inputs, outputs; diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp new file mode 100644 index 0000000..a359424 --- /dev/null +++ b/lib/Transforms/Utils/CtorUtils.cpp @@ -0,0 +1,183 @@ +//===- CtorUtils.cpp - Helpers for working with global_ctors ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines functions that are used to process llvm.global_ctors. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/CtorUtils.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "ctor_utils" + +namespace llvm { + +namespace { +/// Given a specified llvm.global_ctors list, install the +/// specified array. +void installGlobalCtors(GlobalVariable *GCL, + const std::vector &Ctors) { + // If we made a change, reassemble the initializer list. + Constant *CSVals[3]; + + StructType *StructTy = + cast(GCL->getType()->getElementType()->getArrayElementType()); + + // Create the new init list. + std::vector CAList; + for (Function *F : Ctors) { + Type *Int32Ty = Type::getInt32Ty(GCL->getContext()); + if (F) { + CSVals[0] = ConstantInt::get(Int32Ty, 65535); + CSVals[1] = F; + } else { + CSVals[0] = ConstantInt::get(Int32Ty, 0x7fffffff); + CSVals[1] = Constant::getNullValue(StructTy->getElementType(1)); + } + // FIXME: Only allow the 3-field form in LLVM 4.0. + size_t NumElts = StructTy->getNumElements(); + if (NumElts > 2) + CSVals[2] = Constant::getNullValue(StructTy->getElementType(2)); + CAList.push_back( + ConstantStruct::get(StructTy, makeArrayRef(CSVals, NumElts))); + } + + // Create the array initializer. + Constant *CA = + ConstantArray::get(ArrayType::get(StructTy, CAList.size()), CAList); + + // If we didn't change the number of elements, don't create a new GV. + if (CA->getType() == GCL->getInitializer()->getType()) { + GCL->setInitializer(CA); + return; + } + + // Create the new global and insert it next to the existing list. + GlobalVariable *NGV = + new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(), + CA, "", GCL->getThreadLocalMode()); + GCL->getParent()->getGlobalList().insert(GCL, NGV); + NGV->takeName(GCL); + + // Nuke the old list, replacing any uses with the new one. + if (!GCL->use_empty()) { + Constant *V = NGV; + if (V->getType() != GCL->getType()) + V = ConstantExpr::getBitCast(V, GCL->getType()); + GCL->replaceAllUsesWith(V); + } + GCL->eraseFromParent(); +} + +/// Given a llvm.global_ctors list that we can understand, +/// return a list of the functions and null terminator as a vector. +std::vector parseGlobalCtors(GlobalVariable *GV) { + if (GV->getInitializer()->isNullValue()) + return std::vector(); + ConstantArray *CA = cast(GV->getInitializer()); + std::vector Result; + Result.reserve(CA->getNumOperands()); + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { + ConstantStruct *CS = cast(*i); + Result.push_back(dyn_cast(CS->getOperand(1))); + } + return Result; +} + +/// Find the llvm.global_ctors list, verifying that all initializers have an +/// init priority of 65535. +GlobalVariable *findGlobalCtors(Module &M) { + GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (!GV) + return nullptr; + + // Verify that the initializer is simple enough for us to handle. We are + // only allowed to optimize the initializer if it is unique. + if (!GV->hasUniqueInitializer()) + return nullptr; + + if (isa(GV->getInitializer())) + return GV; + ConstantArray *CA = cast(GV->getInitializer()); + + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { + if (isa(*i)) + continue; + ConstantStruct *CS = cast(*i); + if (isa(CS->getOperand(1))) + continue; + + // Must have a function or null ptr. + if (!isa(CS->getOperand(1))) + return nullptr; + + // Init priority must be standard. + ConstantInt *CI = cast(CS->getOperand(0)); + if (CI->getZExtValue() != 65535) + return nullptr; + } + + return GV; +} +} // namespace + +/// Call "ShouldRemove" for every entry in M's global_ctor list and remove the +/// entries for which it returns true. Return true if anything changed. +bool optimizeGlobalCtorsList(Module &M, + function_ref ShouldRemove) { + GlobalVariable *GlobalCtors = findGlobalCtors(M); + if (!GlobalCtors) + return false; + + std::vector Ctors = parseGlobalCtors(GlobalCtors); + if (Ctors.empty()) + return false; + + bool MadeChange = false; + + // Loop over global ctors, optimizing them when we can. + for (unsigned i = 0; i != Ctors.size(); ++i) { + Function *F = Ctors[i]; + // Found a null terminator in the middle of the list, prune off the rest of + // the list. + if (!F) { + if (i != Ctors.size() - 1) { + Ctors.resize(i + 1); + MadeChange = true; + } + break; + } + DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n"); + + // We cannot simplify external ctor functions. + if (F->empty()) + continue; + + // If we can evaluate the ctor at compile time, do. + if (ShouldRemove(F)) { + Ctors.erase(Ctors.begin() + i); + MadeChange = true; + --i; + continue; + } + } + + if (!MadeChange) + return false; + + installGlobalCtors(GlobalCtors, Ctors); + return true; +} + +} // End llvm namespace diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index ac6926f..9972b22 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -25,17 +25,17 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, Instruction *AllocaPoint) { if (I.use_empty()) { I.eraseFromParent(); - return 0; + return nullptr; } // Create a stack slot to hold the value. AllocaInst *Slot; if (AllocaPoint) { - Slot = new AllocaInst(I.getType(), 0, + Slot = new AllocaInst(I.getType(), nullptr, I.getName()+".reg2mem", AllocaPoint); } else { Function *F = I.getParent()->getParent(); - Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", + Slot = new AllocaInst(I.getType(), nullptr, I.getName()+".reg2mem", F->getEntryBlock().begin()); } @@ -56,7 +56,7 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) == &I) { Value *&V = Loads[PN->getIncomingBlock(i)]; - if (V == 0) { + if (!V) { // Insert the load into the predecessor block V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, PN->getIncomingBlock(i)->getTerminator()); @@ -110,17 +110,17 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { if (P->use_empty()) { P->eraseFromParent(); - return 0; + return nullptr; } // Create a stack slot to hold the value. AllocaInst *Slot; if (AllocaPoint) { - Slot = new AllocaInst(P->getType(), 0, + Slot = new AllocaInst(P->getType(), nullptr, P->getName()+".reg2mem", AllocaPoint); } else { Function *F = P->getParent()->getParent(); - Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", + Slot = new AllocaInst(P->getType(), nullptr, P->getName()+".reg2mem", F->getEntryBlock().begin()); } diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp index 39c80f8..51ead40 100644 --- a/lib/Transforms/Utils/FlattenCFG.cpp +++ b/lib/Transforms/Utils/FlattenCFG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "flattencfg" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -22,16 +21,19 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; +#define DEBUG_TYPE "flattencfg" + namespace { class FlattenCFGOpt { AliasAnalysis *AA; /// \brief Use parallel-and or parallel-or to generate conditions for /// conditional branches. - bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0); + bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, + Pass *P = nullptr); /// \brief If \param BB is the merge block of an if-region, attempt to merge /// the if-region with an adjacent if-region upstream if two if-regions /// contain identical instructions. - bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0); + bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = nullptr); /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which /// are from two if-regions whose entry blocks are \p Head1 and \p /// Head2. \returns true if \p Block1 and \p Block2 contain identical @@ -126,9 +128,9 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, if (PHI) return false; // For simplicity, avoid cases containing PHI nodes. - BasicBlock *LastCondBlock = NULL; - BasicBlock *FirstCondBlock = NULL; - BasicBlock *UnCondBlock = NULL; + BasicBlock *LastCondBlock = nullptr; + BasicBlock *FirstCondBlock = nullptr; + BasicBlock *UnCondBlock = nullptr; int Idx = -1; // Check predecessors of \param BB. diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp index e9ebc45..12057e4 100644 --- a/lib/Transforms/Utils/GlobalStatus.cpp +++ b/lib/Transforms/Utils/GlobalStatus.cpp @@ -61,7 +61,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, } else if (const Instruction *I = dyn_cast(UR)) { if (!GS.HasMultipleAccessingFunctions) { const Function *F = I->getParent()->getParent(); - if (GS.AccessingFunction == 0) + if (!GS.AccessingFunction) GS.AccessingFunction = F; else if (GS.AccessingFunction != F) GS.HasMultipleAccessingFunctions = true; @@ -176,6 +176,6 @@ bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) { GlobalStatus::GlobalStatus() : IsCompared(false), IsLoaded(false), StoredType(NotStored), - StoredOnceValue(0), AccessingFunction(0), + StoredOnceValue(nullptr), AccessingFunction(nullptr), HasMultipleAccessingFunctions(false), HasNonInstructionUser(false), Ordering(NotAtomic) {} diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 86def3e..e01d0c3 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" @@ -51,8 +52,8 @@ namespace { public: InvokeInliningInfo(InvokeInst *II) - : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0), - CallerLPad(0), InnerEHValuesPHI(0) { + : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr), + CallerLPad(nullptr), InnerEHValuesPHI(nullptr) { // If there are PHI nodes in the unwind destination block, we need to keep // track of which values came into them from the invoke before removing // the edge from this block. @@ -289,13 +290,13 @@ static void UpdateCallGraphAfterInlining(CallSite CS, ValueToValueMapTy::iterator VMI = VMap.find(OrigCall); // Only copy the edge if the call was inlined! - if (VMI == VMap.end() || VMI->second == 0) + if (VMI == VMap.end() || VMI->second == nullptr) continue; // If the call was inlined, but then constant folded, there is no edge to // add. Check for this case. Instruction *NewCall = dyn_cast(VMI->second); - if (NewCall == 0) continue; + if (!NewCall) continue; // Remember that this call site got inlined for the client of // InlineFunction. @@ -306,7 +307,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS, // happens, set the callee of the new call site to a more precise // destination. This can also happen if the call graph node of the caller // was just unnecessarily imprecise. - if (I->second->getFunction() == 0) + if (!I->second->getFunction()) if (Function *F = CallSite(NewCall).getCalledFunction()) { // Indirect call site resolved to direct call. CallerNode->addCalledFunction(CallSite(NewCall), CG[F]); @@ -322,13 +323,44 @@ static void UpdateCallGraphAfterInlining(CallSite CS, CallerNode->removeCallEdgeFor(CS); } +static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M, + BasicBlock *InsertBlock, + InlineFunctionInfo &IFI) { + LLVMContext &Context = Src->getContext(); + Type *VoidPtrTy = Type::getInt8PtrTy(Context); + Type *AggTy = cast(Src->getType())->getElementType(); + Type *Tys[3] = { VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context) }; + Function *MemCpyFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys); + IRBuilder<> builder(InsertBlock->begin()); + Value *DstCast = builder.CreateBitCast(Dst, VoidPtrTy, "tmp"); + Value *SrcCast = builder.CreateBitCast(Src, VoidPtrTy, "tmp"); + + Value *Size; + if (IFI.DL == nullptr) + Size = ConstantExpr::getSizeOf(AggTy); + else + Size = ConstantInt::get(Type::getInt64Ty(Context), + IFI.DL->getTypeStoreSize(AggTy)); + + // Always generate a memcpy of alignment 1 here because we don't know + // the alignment of the src pointer. Other optimizations can infer + // better alignment. + Value *CallArgs[] = { + DstCast, SrcCast, Size, + ConstantInt::get(Type::getInt32Ty(Context), 1), + ConstantInt::getFalse(Context) // isVolatile + }; + builder.CreateCall(MemCpyFn, CallArgs); +} + /// HandleByValArgument - When inlining a call site that has a byval argument, /// we have to make the implicit memcpy explicit by adding it. static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, const Function *CalledFunc, InlineFunctionInfo &IFI, unsigned ByValAlignment) { - Type *AggTy = cast(Arg->getType())->getElementType(); + PointerType *ArgTy = cast(Arg->getType()); + Type *AggTy = ArgTy->getElementType(); // If the called function is readonly, then it could not mutate the caller's // copy of the byval'd memory. In this case, it is safe to elide the copy and @@ -349,11 +381,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, // Otherwise, we have to make a memcpy to get a safe alignment. This is bad // for code quality, but rarely happens and is required for correctness. } - - LLVMContext &Context = Arg->getContext(); - Type *VoidPtrTy = Type::getInt8PtrTy(Context); - // Create the alloca. If we have DataLayout, use nice alignment. unsigned Align = 1; if (IFI.DL) @@ -366,32 +394,9 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, Function *Caller = TheCall->getParent()->getParent(); - Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), + Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(), &*Caller->begin()->begin()); - // Emit a memcpy. - Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)}; - Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(), - Intrinsic::memcpy, - Tys); - Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall); - Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall); - - Value *Size; - if (IFI.DL == 0) - Size = ConstantExpr::getSizeOf(AggTy); - else - Size = ConstantInt::get(Type::getInt64Ty(Context), - IFI.DL->getTypeStoreSize(AggTy)); - - // Always generate a memcpy of alignment 1 here because we don't know - // the alignment of the src pointer. Other optimizations can infer - // better alignment. - Value *CallArgs[] = { - DestCast, SrcCast, Size, - ConstantInt::get(Type::getInt32Ty(Context), 1), - ConstantInt::getFalse(Context) // isVolatile - }; - IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs); + IFI.StaticAllocas.push_back(cast(NewAlloca)); // Uses of the argument in the function should use our new alloca // instead. @@ -417,8 +422,10 @@ static bool isUsedByLifetimeMarker(Value *V) { // hasLifetimeMarkers - Check whether the given alloca already has // lifetime.start or lifetime.end intrinsics. static bool hasLifetimeMarkers(AllocaInst *AI) { - Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext()); - if (AI->getType() == Int8PtrTy) + Type *Ty = AI->getType(); + Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(), + Ty->getPointerAddressSpace()); + if (Ty == Int8PtrTy) return isUsedByLifetimeMarker(AI); // Do a scan to find all the casts to i8*. @@ -472,6 +479,33 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, } } +/// Returns a musttail call instruction if one immediately precedes the given +/// return instruction with an optional bitcast instruction between them. +static CallInst *getPrecedingMustTailCall(ReturnInst *RI) { + Instruction *Prev = RI->getPrevNode(); + if (!Prev) + return nullptr; + + if (Value *RV = RI->getReturnValue()) { + if (RV != Prev) + return nullptr; + + // Look through the optional bitcast. + if (auto *BI = dyn_cast(Prev)) { + RV = BI->getOperand(0); + Prev = BI->getPrevNode(); + if (!Prev || RV != Prev) + return nullptr; + } + } + + if (auto *CI = dyn_cast(Prev)) { + if (CI->isMustTailCall()) + return CI; + } + return nullptr; +} + /// InlineFunction - This function inlines the called function into the basic /// block of the caller. This returns false if it is not possible to inline /// this call. The program is still in a well defined state if this occurs @@ -491,15 +525,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, IFI.reset(); const Function *CalledFunc = CS.getCalledFunction(); - if (CalledFunc == 0 || // Can't inline external function or indirect + if (!CalledFunc || // Can't inline external function or indirect CalledFunc->isDeclaration() || // call, or call to a vararg function! CalledFunc->getFunctionType()->isVarArg()) return false; - // If the call to the callee is not a tail call, we must clear the 'tail' - // flags on any calls that we inline. - bool MustClearTailCallFlags = - !(isa(TheCall) && cast(TheCall)->isTailCall()); - // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CS.doesNotThrow(); @@ -519,7 +548,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } // Get the personality function from the callee if it contains a landing pad. - Value *CalleePersonality = 0; + Value *CalleePersonality = nullptr; for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I) if (const InvokeInst *II = dyn_cast(I->getTerminator())) { @@ -562,6 +591,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, { // Scope to destroy VMap after cloning. ValueToValueMapTy VMap; + // Keep a list of pair (dst, src) to emit byval initializations. + SmallVector, 4> ByValInit; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); @@ -581,11 +612,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, if (CS.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, CalledFunc->getParamAlignment(ArgNo+1)); - - // Calls that we inline may use the new alloca, so we need to clear - // their 'tail' flags if HandleByValArgument introduced a new alloca and - // the callee has calls. - MustClearTailCallFlags |= ActualArg != *AI; + if (ActualArg != *AI) + ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI)); } VMap[I] = ActualArg; @@ -602,6 +630,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; + // Inject byval arguments initialization. + for (std::pair &Init : ByValInit) + HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(), + FirstNewBlock, IFI); + // Update the callgraph if requested. if (IFI.CG) UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); @@ -619,7 +652,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) { AllocaInst *AI = dyn_cast(I++); - if (AI == 0) continue; + if (!AI) continue; // If the alloca is now dead, remove it. This often occurs due to code // specialization. @@ -651,6 +684,45 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } } + bool InlinedMustTailCalls = false; + if (InlinedFunctionInfo.ContainsCalls) { + CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; + if (CallInst *CI = dyn_cast(TheCall)) + CallSiteTailKind = CI->getTailCallKind(); + + for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; + ++BB) { + for (Instruction &I : *BB) { + CallInst *CI = dyn_cast(&I); + if (!CI) + continue; + + // We need to reduce the strength of any inlined tail calls. For + // musttail, we have to avoid introducing potential unbounded stack + // growth. For example, if functions 'f' and 'g' are mutually recursive + // with musttail, we can inline 'g' into 'f' so long as we preserve + // musttail on the cloned call to 'f'. If either the inlined call site + // or the cloned call site is *not* musttail, the program already has + // one frame of stack growth, so it's safe to remove musttail. Here is + // a table of example transformations: + // + // f -> musttail g -> musttail f ==> f -> musttail f + // f -> musttail g -> tail f ==> f -> tail f + // f -> g -> musttail f ==> f -> f + // f -> g -> tail f ==> f -> f + CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); + ChildTCK = std::min(CallSiteTailKind, ChildTCK); + CI->setTailCallKind(ChildTCK); + InlinedMustTailCalls |= CI->isMustTailCall(); + + // Calls inlined through a 'nounwind' call site should be marked + // 'nounwind'. + if (MarkNoUnwind) + CI->setDoesNotThrow(); + } + } + } + // Leave lifetime markers for the static alloca's, scoping them to the // function we just inlined. if (InsertLifetime && !IFI.StaticAllocas.empty()) { @@ -664,7 +736,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, continue; // Try to determine the size of the allocation. - ConstantInt *AllocaSize = 0; + ConstantInt *AllocaSize = nullptr; if (ConstantInt *AIArraySize = dyn_cast(AI->getArraySize())) { if (IFI.DL) { @@ -683,9 +755,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } builder.CreateLifetimeStart(AI, AllocaSize); - for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) { - IRBuilder<> builder(Returns[ri]); - builder.CreateLifetimeEnd(AI, AllocaSize); + for (ReturnInst *RI : Returns) { + // Don't insert llvm.lifetime.end calls between a musttail call and a + // return. The return kills all local allocas. + if (InlinedMustTailCalls && getPrecedingMustTailCall(RI)) + continue; + IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize); } } } @@ -704,33 +779,56 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. - for (unsigned i = 0, e = Returns.size(); i != e; ++i) { - IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr); + for (ReturnInst *RI : Returns) { + // Don't insert llvm.stackrestore calls between a musttail call and a + // return. The return will restore the stack pointer. + if (InlinedMustTailCalls && getPrecedingMustTailCall(RI)) + continue; + IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr); } } - // If we are inlining tail call instruction through a call site that isn't - // marked 'tail', we must remove the tail marker for any calls in the inlined - // code. Also, calls inlined through a 'nounwind' call site should be marked - // 'nounwind'. - if (InlinedFunctionInfo.ContainsCalls && - (MustClearTailCallFlags || MarkNoUnwind)) { - for (Function::iterator BB = FirstNewBlock, E = Caller->end(); - BB != E; ++BB) - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (CallInst *CI = dyn_cast(I)) { - if (MustClearTailCallFlags) - CI->setTailCall(false); - if (MarkNoUnwind) - CI->setDoesNotThrow(); - } - } - // If we are inlining for an invoke instruction, we must make sure to rewrite // any call instructions into invoke instructions. if (InvokeInst *II = dyn_cast(TheCall)) HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo); + // Handle any inlined musttail call sites. In order for a new call site to be + // musttail, the source of the clone and the inlined call site must have been + // musttail. Therefore it's safe to return without merging control into the + // phi below. + if (InlinedMustTailCalls) { + // Check if we need to bitcast the result of any musttail calls. + Type *NewRetTy = Caller->getReturnType(); + bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy; + + // Handle the returns preceded by musttail calls separately. + SmallVector NormalReturns; + for (ReturnInst *RI : Returns) { + CallInst *ReturnedMustTail = getPrecedingMustTailCall(RI); + if (!ReturnedMustTail) { + NormalReturns.push_back(RI); + continue; + } + if (!NeedBitCast) + continue; + + // Delete the old return and any preceding bitcast. + BasicBlock *CurBB = RI->getParent(); + auto *OldCast = dyn_cast_or_null(RI->getReturnValue()); + RI->eraseFromParent(); + if (OldCast) + OldCast->eraseFromParent(); + + // Insert a new bitcast and return with the right type. + IRBuilder<> Builder(CurBB); + Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy)); + } + + // Leave behind the normal returns so we can merge control flow. + std::swap(Returns, NormalReturns); + } + // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. @@ -774,7 +872,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; - BranchInst *CreatedBranchToNormalDest = NULL; + BranchInst *CreatedBranchToNormalDest = nullptr; if (InvokeInst *II = dyn_cast(TheCall)) { // Add an unconditional branch to make this look like the CallInst case... @@ -813,7 +911,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // any users of the original call/invoke instruction. Type *RTy = CalledFunc->getReturnType(); - PHINode *PHI = 0; + PHINode *PHI = nullptr; if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. @@ -886,6 +984,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, // Since we are now done with the Call/Invoke, we can delete it. TheCall->eraseFromParent(); + // If we inlined any musttail calls and the original return is now + // unreachable, delete it. It can only contain a bitcast and ret. + if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB)) + AfterCallBB->eraseFromParent(); + // We should always be able to fold the entry block of the function into the // single predecessor of the block... assert(cast(Br)->isUnconditional() && "splitBasicBlock broken!"); diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp index e73a543..9f91eeb 100644 --- a/lib/Transforms/Utils/IntegerDivision.cpp +++ b/lib/Transforms/Utils/IntegerDivision.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "integer-division" #include "llvm/Transforms/Utils/IntegerDivision.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -24,6 +23,8 @@ using namespace llvm; +#define DEBUG_TYPE "integer-division" + /// Generate code to compute the remainder of two signed integers. Returns the /// remainder, which will have the sign of the dividend. Builder's insert point /// should be pointing where the caller wants code generated, e.g. at the srem diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index d538175..51a3d9c 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -27,7 +27,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "lcssa" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" @@ -44,6 +43,8 @@ #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; +#define DEBUG_TYPE "lcssa" + STATISTIC(NumLCSSA, "Number of live out of a loop variables"); /// Return true if the specified block is in the list. @@ -267,8 +268,6 @@ struct LCSSA : public FunctionPass { } private: - bool processLoop(Loop &L); - void verifyAnalysis() const override; }; } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 9d0be8b..aedd787 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -43,6 +43,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "local" + STATISTIC(NumRemoved, "Number of unreachable basic blocks removed"); //===----------------------------------------------------------------------===// @@ -159,7 +161,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // Otherwise, check to see if the switch only branches to one destination. // We do this by reseting "TheOnlyDest" to null when we find two non-equal // destinations. - if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = 0; + if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = nullptr; } if (CI && !TheOnlyDest) { @@ -180,7 +182,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, // Found case matching a constant operand? BasicBlock *Succ = SI->getSuccessor(i); if (Succ == TheOnlyDest) - TheOnlyDest = 0; // Don't modify the first branch to TheOnlyDest + TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest else Succ->removePredecessor(BB); } @@ -233,7 +235,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { if (IBI->getDestination(i) == TheOnlyDest) - TheOnlyDest = 0; + TheOnlyDest = nullptr; else IBI->getDestination(i)->removePredecessor(IBI->getParent()); } @@ -331,7 +333,7 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V, // dead as we go. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { Value *OpV = I->getOperand(i); - I->setOperand(i, 0); + I->setOperand(i, nullptr); if (!OpV->use_empty()) continue; @@ -894,24 +896,26 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align, return PrefAlign; } - if (GlobalValue *GV = dyn_cast(V)) { + if (auto *GO = dyn_cast(V)) { // If there is a large requested alignment and we can, bump up the alignment // of the global. - if (GV->isDeclaration()) return Align; + if (GO->isDeclaration()) + return Align; // If the memory we set aside for the global may not be the memory used by // the final program then it is impossible for us to reliably enforce the // preferred alignment. - if (GV->isWeakForLinker()) return Align; + if (GO->isWeakForLinker()) + return Align; - if (GV->getAlignment() >= PrefAlign) - return GV->getAlignment(); + if (GO->getAlignment() >= PrefAlign) + return GO->getAlignment(); // We can only increase the alignment of the global if it has no alignment // specified or if it is not assigned a section. If it is assigned a // section, the global could be densely packed with other objects in the // section, increasing the alignment could cause padding issues. - if (!GV->hasSection() || GV->getAlignment() == 0) - GV->setAlignment(PrefAlign); - return GV->getAlignment(); + if (!GO->hasSection() || GO->getAlignment() == 0) + GO->setAlignment(PrefAlign); + return GO->getAlignment(); } return Align; @@ -928,7 +932,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64; APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, KnownZero, KnownOne, DL); + computeKnownBits(V, KnownZero, KnownOne, DL); unsigned TrailZ = KnownZero.countTrailingOnes(); // Avoid trouble with ridiculously large TrailZ values, such as @@ -981,10 +985,10 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (LdStHasDebugValue(DIVar, SI)) return true; - Instruction *DbgVal = NULL; + Instruction *DbgVal = nullptr; // If an argument is zero extended then use argument directly. The ZExt // may be zapped by an optimization pass in future. - Argument *ExtendedArg = NULL; + Argument *ExtendedArg = nullptr; if (ZExtInst *ZExt = dyn_cast(SI->getOperand(0))) ExtendedArg = dyn_cast(ZExt->getOperand(0)); if (SExtInst *SExt = dyn_cast(SI->getOperand(0))) @@ -993,14 +997,7 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, SI); else DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, SI); - - // Propagate any debug metadata from the store onto the dbg.value. - DebugLoc SIDL = SI->getDebugLoc(); - if (!SIDL.isUnknown()) - DbgVal->setDebugLoc(SIDL); - // Otherwise propagate debug metadata from dbg.declare. - else - DbgVal->setDebugLoc(DDI->getDebugLoc()); + DbgVal->setDebugLoc(DDI->getDebugLoc()); return true; } @@ -1020,17 +1017,16 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, Instruction *DbgVal = Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, LI); - - // Propagate any debug metadata from the store onto the dbg.value. - DebugLoc LIDL = LI->getDebugLoc(); - if (!LIDL.isUnknown()) - DbgVal->setDebugLoc(LIDL); - // Otherwise propagate debug metadata from dbg.declare. - else - DbgVal->setDebugLoc(DDI->getDebugLoc()); + DbgVal->setDebugLoc(DDI->getDebugLoc()); return true; } +/// Determine whether this alloca is either a VLA or an array. +static bool isArray(AllocaInst *AI) { + return AI->isArrayAllocation() || + AI->getType()->getElementType()->isArrayTy(); +} + /// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set /// of llvm.dbg.value intrinsics. bool llvm::LowerDbgDeclare(Function &F) { @@ -1049,20 +1045,26 @@ bool llvm::LowerDbgDeclare(Function &F) { AllocaInst *AI = dyn_cast_or_null(DDI->getAddress()); // If this is an alloca for a scalar variable, insert a dbg.value // at each load and store to the alloca and erase the dbg.declare. - if (AI && !AI->isArrayAllocation()) { - - // We only remove the dbg.declare intrinsic if all uses are - // converted to dbg.value intrinsics. - bool RemoveDDI = true; + // The dbg.values allow tracking a variable even if it is not + // stored on the stack, while the dbg.declare can only describe + // the stack slot (and at a lexical-scope granularity). Later + // passes will attempt to elide the stack slot. + if (AI && !isArray(AI)) { for (User *U : AI->users()) if (StoreInst *SI = dyn_cast(U)) ConvertDebugDeclareToDebugValue(DDI, SI, DIB); else if (LoadInst *LI = dyn_cast(U)) ConvertDebugDeclareToDebugValue(DDI, LI, DIB); - else - RemoveDDI = false; - if (RemoveDDI) - DDI->eraseFromParent(); + else if (CallInst *CI = dyn_cast(U)) { + // This is a call by-value or some other instruction that + // takes a pointer to the variable. Insert a *value* + // intrinsic that describes the alloca. + auto DbgVal = + DIB.insertDbgValueIntrinsic(AI, 0, + DIVariable(DDI->getVariable()), CI); + DbgVal->setDebugLoc(DDI->getDebugLoc()); + } + DDI->eraseFromParent(); } } return true; @@ -1076,7 +1078,7 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) { if (DbgDeclareInst *DDI = dyn_cast(U)) return DDI; - return 0; + return nullptr; } bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress, diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 47083ea..f7787da 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -37,7 +37,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-simplify" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" @@ -63,6 +62,8 @@ #include "llvm/Transforms/Utils/LoopUtils.h" using namespace llvm; +#define DEBUG_TYPE "loop-simplify" + STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted"); STATISTIC(NumNested , "Number of nested loops split out"); @@ -85,7 +86,7 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB, // Figure out *which* outside block to put this after. Prefer an outside // block that neighbors a BB actually in the loop. - BasicBlock *FoundBB = 0; + BasicBlock *FoundBB = nullptr; for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { Function::iterator BBI = SplitPreds[i]; if (++BBI != NewBB->getParent()->end() && @@ -119,7 +120,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) { // If the loop is branched to from an indirect branch, we won't // be able to fully transform the loop, because it prohibits // edge splitting. - if (isa(P->getTerminator())) return 0; + if (isa(P->getTerminator())) return nullptr; // Keep track of it. OutsideBlocks.push_back(P); @@ -160,14 +161,14 @@ static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) { BasicBlock *P = *I; if (L->contains(P)) { // Don't do this if the loop is exited via an indirect branch. - if (isa(P->getTerminator())) return 0; + if (isa(P->getTerminator())) return nullptr; LoopBlocks.push_back(P); } } assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); - BasicBlock *NewExitBB = 0; + BasicBlock *NewExitBB = nullptr; if (Exit->isLandingPad()) { SmallVector NewBBs; @@ -211,7 +212,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA, for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ) { PHINode *PN = cast(I); ++I; - if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) { + if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT)) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); if (AA) AA->deleteValue(PN); @@ -226,7 +227,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA, // We found something tasty to remove. return PN; } - return 0; + return nullptr; } /// \brief If this loop has multiple backedges, try to pull one of them out into @@ -253,14 +254,14 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, LoopInfo *LI, ScalarEvolution *SE, Pass *PP) { // Don't try to separate loops without a preheader. if (!Preheader) - return 0; + return nullptr; // The header is not a landing pad; preheader insertion should ensure this. assert(!L->getHeader()->isLandingPad() && "Can't insert backedge to landing pad"); PHINode *PN = findPHIToPartitionLoops(L, AA, DT); - if (PN == 0) return 0; // No known way to partition. + if (!PN) return nullptr; // No known way to partition. // Pull out all predecessors that have varying values in the loop. This // handles the case when a PHI node has multiple instances of itself as @@ -271,7 +272,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, !L->contains(PN->getIncomingBlock(i))) { // We can't split indirectbr edges. if (isa(PN->getIncomingBlock(i)->getTerminator())) - return 0; + return nullptr; OuterLoopPreds.push_back(PN->getIncomingBlock(i)); } } @@ -362,7 +363,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, // Unique backedge insertion currently depends on having a preheader. if (!Preheader) - return 0; + return nullptr; // The header is not a landing pad; preheader insertion should ensure this. assert(!Header->isLandingPad() && "Can't insert backedge to landing pad"); @@ -374,7 +375,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, // Indirectbr edges cannot be split, so we must fail if we find one. if (isa(P->getTerminator())) - return 0; + return nullptr; if (P != Preheader) BackedgeBlocks.push_back(P); } @@ -403,7 +404,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, // preheader over to the new PHI node. unsigned PreheaderIdx = ~0U; bool HasUniqueIncomingValue = true; - Value *UniqueValue = 0; + Value *UniqueValue = nullptr; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { BasicBlock *IBB = PN->getIncomingBlock(i); Value *IV = PN->getIncomingValue(i); @@ -412,7 +413,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, } else { NewPN->addIncoming(IV, IBB); if (HasUniqueIncomingValue) { - if (UniqueValue == 0) + if (!UniqueValue) UniqueValue = IV; else if (UniqueValue != IV) HasUniqueIncomingValue = false; @@ -609,7 +610,7 @@ ReprocessLoop: PHINode *PN; for (BasicBlock::iterator I = L->getHeader()->begin(); (PN = dyn_cast(I++)); ) - if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) { + if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT)) { if (AA) AA->deleteValue(PN); if (SE) SE->forgetValue(PN); PN->replaceAllUsesWith(V); @@ -653,7 +654,8 @@ ReprocessLoop: if (Inst == CI) continue; if (!L->makeLoopInvariant(Inst, AnyInvariant, - Preheader ? Preheader->getTerminator() : 0)) { + Preheader ? Preheader->getTerminator() + : nullptr)) { AllInvariant = false; break; } @@ -761,12 +763,6 @@ namespace { /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. void verifyAnalysis() const override; - - private: - bool ProcessLoop(Loop *L); - BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit); - Loop *SeparateNestedLoop(Loop *L, BasicBlock *Preheader); - BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader); }; } @@ -782,7 +778,7 @@ INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", char &llvm::LoopSimplifyID = LoopSimplify::ID; Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } -/// runOnLoop - Run down all loops in the CFG (recursively, but we could do +/// runOnFunction - Run down all loops in the CFG (recursively, but we could do /// it in any convenient order) inserting preheaders... /// bool LoopSimplify::runOnFunction(Function &F) { diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index d2dfc20..d953e30 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -16,7 +16,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -25,6 +24,8 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -34,6 +35,8 @@ #include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; +#define DEBUG_TYPE "loop-unroll" + // TODO: Should these be here or in LoopUnroll? STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); @@ -68,10 +71,10 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. BasicBlock *OnlyPred = BB->getSinglePredecessor(); - if (!OnlyPred) return 0; + if (!OnlyPred) return nullptr; if (OnlyPred->getTerminator()->getNumSuccessors() != 1) - return 0; + return nullptr; DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred); @@ -227,20 +230,33 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, (unsigned)GreatestCommonDivisor64(Count, TripMultiple); } + // Report the unrolling decision. + DebugLoc LoopLoc = L->getStartLoc(); + Function *F = Header->getParent(); + LLVMContext &Ctx = F->getContext(); + if (CompletelyUnroll) { DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() << " with trip count " << TripCount << "!\n"); + emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, + Twine("completely unrolled loop with ") + + Twine(TripCount) + " iterations"); } else { DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); + Twine DiagMsg("unrolled loop by a factor of " + Twine(Count)); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); + DiagMsg.concat(" with a breakout at trip " + Twine(BreakoutTrip)); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); + DiagMsg.concat(" with " + Twine(TripMultiple) + " trips per branch"); } else if (RuntimeTripCount) { DEBUG(dbgs() << " with run-time trip count"); + DiagMsg.concat(" with run-time trip count"); } DEBUG(dbgs() << "!\n"); + emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, DiagMsg); } bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); @@ -411,7 +427,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, } } - DominatorTree *DT = 0; + DominatorTree *DT = nullptr; if (PP) { // FIXME: Reconstruct dom info, because it is not preserved properly. // Incrementally updating domtree after loop unrolling would be easy. @@ -458,7 +474,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, Loop *OuterL = L->getParentLoop(); // Remove the loop from the LoopPassManager if it's completely removed. - if (CompletelyUnroll && LPM != NULL) + if (CompletelyUnroll && LPM != nullptr) LPM->deleteLoopFromQueue(L); // If we have a pass and a DominatorTree we should re-simplify impacted loops @@ -470,7 +486,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, OuterL = L; if (OuterL) { ScalarEvolution *SE = PP->getAnalysisIfAvailable(); - simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ 0, SE); + simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE); formLCSSARecursively(*OuterL, *DT, SE); } } diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index d801d5f..5bef091 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -21,7 +21,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "loop-unroll" #include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopIterator.h" @@ -37,6 +36,8 @@ using namespace llvm; +#define DEBUG_TYPE "loop-unroll" + STATISTIC(NumRuntimeUnrolled, "Number of loops unrolled with run-time trip counts"); @@ -58,7 +59,7 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, BasicBlock *OrigPH, BasicBlock *NewPH, ValueToValueMapTy &LVMap, Pass *P) { BasicBlock *Latch = L->getLoopLatch(); - assert(Latch != 0 && "Loop must have a latch"); + assert(Latch && "Loop must have a latch"); // Create a PHI node for each outgoing value from the original loop // (which means it is an outgoing value from the prolog code too). @@ -110,7 +111,7 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count, new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount, ConstantInt::get(TripCount->getType(), Count)); BasicBlock *Exit = L->getUniqueExitBlock(); - assert(Exit != 0 && "Loop must have a single exit block only"); + assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees SmallVector Preds(pred_begin(Exit), pred_end(Exit)); if (!Exit->isLandingPad()) { @@ -232,7 +233,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, // Make sure the loop is in canonical form, and there is a single // exit block only. - if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0) + if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock()) return false; // Use Scalar Evolution to compute the trip count. This allows more @@ -240,7 +241,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, if (!LPM) return false; ScalarEvolution *SE = LPM->getAnalysisIfAvailable(); - if (SE == 0) + if (!SE) return false; // Only unroll loops with a computable trip count and the trip count needs @@ -301,7 +302,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, ValueToValueMapTy LVMap; Function *F = Header->getParent(); // These variables are used to update the CFG links in each iteration - BasicBlock *CompareBB = 0; + BasicBlock *CompareBB = nullptr; BasicBlock *LastLoopBB = PH; // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog code diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp index 3e61289..ff89e74 100644 --- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp +++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "lower-expect-intrinsic" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/BasicBlock.h" @@ -29,6 +28,8 @@ using namespace llvm; +#define DEBUG_TYPE "lower-expect-intrinsic" + STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled"); static cl::opt diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index b1f758e..66d57b0 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "lowerinvoke" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -25,6 +24,8 @@ #include "llvm/Support/CommandLine.h" using namespace llvm; +#define DEBUG_TYPE "lowerinvoke" + STATISTIC(NumInvokes, "Number of invokes replaced"); namespace { diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 6fb7410..9ef694c 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -27,6 +27,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "lower-switch" + namespace { /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch /// instructions. @@ -51,7 +53,8 @@ namespace { Constant* High; BasicBlock* BB; - CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) : + CaseRange(Constant *low = nullptr, Constant *high = nullptr, + BasicBlock *bb = nullptr) : Low(low), High(high), BB(bb) { } }; @@ -182,7 +185,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val, F->getBasicBlockList().insert(++FI, NewLeaf); // Emit comparison - ICmpInst* Comp = NULL; + ICmpInst* Comp = nullptr; if (Leaf.Low == Leaf.High) { // Make the seteq instruction... Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val, diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index a188ac5..189caa7 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mem2reg" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/Statistic.h" #include "llvm/IR/Dominators.h" @@ -22,6 +21,8 @@ #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" using namespace llvm; +#define DEBUG_TYPE "mem2reg" + STATISTIC(NumPromoted, "Number of alloca's promoted"); namespace { diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index ff6e6f9..d9dbbca 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -24,16 +24,16 @@ static void appendToGlobalArray(const char *Array, Module &M, Function *F, int Priority) { IRBuilder<> IRB(M.getContext()); FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); - StructType *Ty = StructType::get( - IRB.getInt32Ty(), PointerType::getUnqual(FnTy), NULL); - - Constant *RuntimeCtorInit = ConstantStruct::get( - Ty, IRB.getInt32(Priority), F, NULL); // Get the current set of static global constructors and add the new ctor // to the list. SmallVector CurrentCtors; - if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) { + StructType *EltTy; + if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) { + // If there is a global_ctors array, use the existing struct type, which can + // have 2 or 3 fields. + ArrayType *ATy = cast(GVCtor->getType()->getElementType()); + EltTy = cast(ATy->getElementType()); if (Constant *Init = GVCtor->getInitializer()) { unsigned n = Init->getNumOperands(); CurrentCtors.reserve(n + 1); @@ -41,13 +41,26 @@ static void appendToGlobalArray(const char *Array, CurrentCtors.push_back(cast(Init->getOperand(i))); } GVCtor->eraseFromParent(); + } else { + // Use a simple two-field struct if there isn't one already. + EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), + nullptr); } + // Build a 2 or 3 field global_ctor entry. We don't take a comdat key. + Constant *CSVals[3]; + CSVals[0] = IRB.getInt32(Priority); + CSVals[1] = F; + // FIXME: Drop support for the two element form in LLVM 4.0. + if (EltTy->getNumElements() >= 3) + CSVals[2] = llvm::Constant::getNullValue(IRB.getInt8PtrTy()); + Constant *RuntimeCtorInit = + ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements())); + CurrentCtors.push_back(RuntimeCtorInit); // Create a new initializer. - ArrayType *AT = ArrayType::get(RuntimeCtorInit->getType(), - CurrentCtors.size()); + ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size()); Constant *NewInit = ConstantArray::get(AT, CurrentCtors); // Create the new global variable and replace all uses of diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 25fab89..06d73fe 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -25,7 +25,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "mem2reg" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -51,6 +50,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "mem2reg" + STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); @@ -59,6 +60,7 @@ STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); bool llvm::isAllocaPromotable(const AllocaInst *AI) { // FIXME: If the memory unit is of pointer or integer type, we can permit // assignments to subsections of the memory unit. + unsigned AS = AI->getType()->getAddressSpace(); // Only allow direct and non-volatile loads and stores... for (const User *U : AI->users()) { @@ -79,12 +81,12 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { II->getIntrinsicID() != Intrinsic::lifetime_end) return false; } else if (const BitCastInst *BCI = dyn_cast(U)) { - if (BCI->getType() != Type::getInt8PtrTy(U->getContext())) + if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) return false; if (!onlyUsedByLifetimeMarkers(BCI)) return false; } else if (const GetElementPtrInst *GEPI = dyn_cast(U)) { - if (GEPI->getType() != Type::getInt8PtrTy(U->getContext())) + if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) return false; if (!GEPI->hasAllZeroIndices()) return false; @@ -114,11 +116,11 @@ struct AllocaInfo { void clear() { DefiningBlocks.clear(); UsingBlocks.clear(); - OnlyStore = 0; - OnlyBlock = 0; + OnlyStore = nullptr; + OnlyBlock = nullptr; OnlyUsedInOneBlock = true; - AllocaPointerVal = 0; - DbgDeclare = 0; + AllocaPointerVal = nullptr; + DbgDeclare = nullptr; } /// Scan the uses of the specified alloca, filling in the AllocaInfo used @@ -146,7 +148,7 @@ struct AllocaInfo { } if (OnlyUsedInOneBlock) { - if (OnlyBlock == 0) + if (!OnlyBlock) OnlyBlock = User->getParent(); else if (OnlyBlock != User->getParent()) OnlyUsedInOneBlock = false; @@ -162,7 +164,7 @@ class RenamePassData { public: typedef std::vector ValVector; - RenamePassData() : BB(NULL), Pred(NULL), Values() {} + RenamePassData() : BB(nullptr), Pred(nullptr), Values() {} RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V) : BB(B), Pred(P), Values(V) {} BasicBlock *BB; @@ -471,7 +473,8 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, // Find the nearest store that has a lower index than this load. StoresByIndexTy::iterator I = std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), - std::make_pair(LoadIdx, static_cast(0)), + std::make_pair(LoadIdx, + static_cast(nullptr)), less_first()); if (I == StoresByIndex.begin()) @@ -632,7 +635,7 @@ void PromoteMem2Reg::run() { // and inserting the phi nodes we marked as necessary // std::vector RenamePassWorkList; - RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); + RenamePassWorkList.push_back(RenamePassData(F.begin(), nullptr, Values)); do { RenamePassData RPD; RPD.swap(RenamePassWorkList.back()); @@ -682,7 +685,7 @@ void PromoteMem2Reg::run() { PHINode *PN = I->second; // If this PHI node merges one value and/or undefs, get the value. - if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) { + if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT)) { if (AST && PN->getType()->isPointerTy()) AST->deleteValue(PN); PN->replaceAllUsesWith(V); @@ -990,7 +993,7 @@ NextIteration: // Get the next phi node. ++PNI; APN = dyn_cast(PNI); - if (APN == 0) + if (!APN) break; // Verify that it is missing entries. If not, it is not being inserted diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 28f5c44..3fcb789 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ssaupdater" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/TinyPtrVector.h" @@ -28,20 +27,22 @@ using namespace llvm; +#define DEBUG_TYPE "ssaupdater" + typedef DenseMap AvailableValsTy; static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast(AV); } SSAUpdater::SSAUpdater(SmallVectorImpl *NewPHI) - : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {} + : AV(nullptr), ProtoType(nullptr), ProtoName(), InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { delete static_cast(AV); } void SSAUpdater::Initialize(Type *Ty, StringRef Name) { - if (AV == 0) + if (!AV) AV = new AvailableValsTy(); else getAvailableVals(AV).clear(); @@ -54,7 +55,7 @@ bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const { } void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) { - assert(ProtoType != 0 && "Need to initialize SSAUpdater"); + assert(ProtoType && "Need to initialize SSAUpdater"); assert(ProtoType == V->getType() && "All rewritten values must have the same type"); getAvailableVals(AV)[BB] = V; @@ -90,7 +91,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // Otherwise, we have the hard case. Get the live-in values for each // predecessor. SmallVector, 8> PredValues; - Value *SingularValue = 0; + Value *SingularValue = nullptr; // We can get our predecessor info by walking the pred_iterator list, but it // is relatively slow. If we already have PHI nodes in this block, walk one @@ -105,7 +106,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { if (i == 0) SingularValue = PredVal; else if (PredVal != SingularValue) - SingularValue = 0; + SingularValue = nullptr; } } else { bool isFirstPred = true; @@ -119,7 +120,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { SingularValue = PredVal; isFirstPred = false; } else if (PredVal != SingularValue) - SingularValue = 0; + SingularValue = nullptr; } } @@ -128,7 +129,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { return UndefValue::get(ProtoType); // Otherwise, if all the merged values are the same, just use it. - if (SingularValue != 0) + if (SingularValue) return SingularValue; // Otherwise, we do need a PHI: check to see if we already have one available @@ -291,7 +292,7 @@ public: PHINode *PHI = ValueIsPHI(Val, Updater); if (PHI && PHI->getNumIncomingValues() == 0) return PHI; - return 0; + return nullptr; } /// GetPHIValue - For the specified PHI instruction, return the value @@ -401,7 +402,7 @@ run(const SmallVectorImpl &Insts) const { // the order of these instructions in the block. If the first use in the // block is a load, then it uses the live in value. The last store defines // the live out value. We handle this by doing a linear scan of the block. - Value *StoredValue = 0; + Value *StoredValue = nullptr; for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { if (LoadInst *L = dyn_cast(II)) { // If this is a load from an unrelated pointer, ignore it. diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 1e88587..150dbdd 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -11,7 +11,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "simplifycfg" #include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -50,6 +49,8 @@ using namespace llvm; using namespace PatternMatch; +#define DEBUG_TYPE "simplifycfg" + static cl::opt PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1), cl::desc("Control the amount of phi node folding to perform (default = 1)")); @@ -212,6 +213,7 @@ static unsigned ComputeSpeculationCost(const User *I) { if (!cast(I)->hasAllConstantIndices()) return UINT_MAX; return 1; + case Instruction::ExtractValue: case Instruction::Load: case Instruction::Add: case Instruction::Sub: @@ -272,12 +274,12 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // branch to BB, then it must be in the 'conditional' part of the "if // statement". If not, it definitely dominates the region. BranchInst *BI = dyn_cast(PBB->getTerminator()); - if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB) + if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB) return true; // If we aren't allowing aggressive promotion anymore, then don't consider // instructions in the 'if region'. - if (AggressiveInsts == 0) return false; + if (!AggressiveInsts) return false; // If we have seen this instruction before, don't count it again. if (AggressiveInsts->count(I)) return true; @@ -332,7 +334,7 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout *DL) { return cast (ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false)); } - return 0; + return nullptr; } /// GatherConstantCompares - Given a potentially 'or'd or 'and'd together @@ -343,7 +345,7 @@ static Value * GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra, const DataLayout *DL, bool isEQ, unsigned &UsedICmps) { Instruction *I = dyn_cast(V); - if (I == 0) return 0; + if (!I) return nullptr; // If this is an icmp against a constant, handle this as one of the cases. if (ICmpInst *ICI = dyn_cast(I)) { @@ -390,19 +392,19 @@ GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra, // If there are a ton of values, we don't want to make a ginormous switch. if (Span.getSetSize().ugt(8) || Span.isEmptySet()) - return 0; + return nullptr; for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp) Vals.push_back(ConstantInt::get(V->getContext(), Tmp)); UsedICmps++; return hasAdd ? RHSVal : I->getOperand(0); } - return 0; + return nullptr; } // Otherwise, we can only handle an | or &, depending on isEQ. if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And)) - return 0; + return nullptr; unsigned NumValsBeforeLHS = Vals.size(); unsigned UsedICmpsBeforeLHS = UsedICmps; @@ -420,19 +422,19 @@ GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra, // The RHS of the or/and can't be folded in and we haven't used "Extra" yet, // set it and return success. - if (Extra == 0 || Extra == I->getOperand(1)) { + if (Extra == nullptr || Extra == I->getOperand(1)) { Extra = I->getOperand(1); return LHS; } Vals.resize(NumValsBeforeLHS); UsedICmps = UsedICmpsBeforeLHS; - return 0; + return nullptr; } // If the LHS can't be folded in, but Extra is available and RHS can, try to // use LHS as Extra. - if (Extra == 0 || Extra == I->getOperand(0)) { + if (Extra == nullptr || Extra == I->getOperand(0)) { Value *OldExtra = Extra; Extra = I->getOperand(0); if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, DL, @@ -442,11 +444,11 @@ GatherConstantCompares(Value *V, std::vector &Vals, Value *&Extra, Extra = OldExtra; } - return 0; + return nullptr; } static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) { - Instruction *Cond = 0; + Instruction *Cond = nullptr; if (SwitchInst *SI = dyn_cast(TI)) { Cond = dyn_cast(SI->getCondition()); } else if (BranchInst *BI = dyn_cast(TI)) { @@ -463,7 +465,7 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) { /// isValueEqualityComparison - Return true if the specified terminator checks /// to see if a value is equal to constant integer value. Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) { - Value *CV = 0; + Value *CV = nullptr; if (SwitchInst *SI = dyn_cast(TI)) { // Do not permit merging of large switch instructions into their // predecessors unless there is only one predecessor. @@ -653,11 +655,11 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // Otherwise, TI's block must correspond to some matched value. Find out // which value (or set of values) this is. - ConstantInt *TIV = 0; + ConstantInt *TIV = nullptr; BasicBlock *TIBB = TI->getParent(); for (unsigned i = 0, e = PredCases.size(); i != e; ++i) if (PredCases[i].Dest == TIBB) { - if (TIV != 0) + if (TIV) return false; // Cannot handle multiple values coming to this block. TIV = PredCases[i].Value; } @@ -665,7 +667,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, // Okay, we found the one constant that our value can be if we get into TI's // BB. Find out which successor will unconditionally be branched to. - BasicBlock *TheRealDest = 0; + BasicBlock *TheRealDest = nullptr; for (unsigned i = 0, e = ThisCases.size(); i != e; ++i) if (ThisCases[i].Value == TIV) { TheRealDest = ThisCases[i].Dest; @@ -673,7 +675,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, } // If not handled by any explicit cases, it is handled by the default case. - if (TheRealDest == 0) TheRealDest = ThisDef; + if (!TheRealDest) TheRealDest = ThisDef; // Remove PHI node entries for dead edges. BasicBlock *CheckEdge = TheRealDest; @@ -681,7 +683,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI, if (*SI != CheckEdge) (*SI)->removePredecessor(TIBB); else - CheckEdge = 0; + CheckEdge = nullptr; // Insert the new branch. Instruction *NI = Builder.CreateBr(TheRealDest); @@ -950,10 +952,10 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI, // Okay, last check. If BB is still a successor of PSI, then we must // have an infinite loop case. If so, add an infinitely looping block // to handle the case to preserve the behavior of the code. - BasicBlock *InfLoopBlock = 0; + BasicBlock *InfLoopBlock = nullptr; for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i) if (NewSI->getSuccessor(i) == BB) { - if (InfLoopBlock == 0) { + if (!InfLoopBlock) { // Insert it at the end of the function, because it's either code, // or it won't matter if it's hot. :) InfLoopBlock = BasicBlock::Create(BB->getContext(), @@ -1099,7 +1101,7 @@ HoistTerminator: // These values do not agree. Insert a select instruction before NT // that determines the right value. SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)]; - if (SI == 0) + if (!SI) SI = cast (Builder.CreateSelect(BI->getCondition(), BB1V, BB2V, BB1V->getName()+"."+BB2V->getName())); @@ -1144,7 +1146,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { // Gather the PHI nodes in BBEnd. std::map > MapValueFromBB1ToBB2; - Instruction *FirstNonPhiInBBEnd = 0; + Instruction *FirstNonPhiInBBEnd = nullptr; for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); I != E; ++I) { if (PHINode *PN = dyn_cast(I)) { @@ -1222,7 +1224,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) { // The operands should be either the same or they need to be generated // with a PHI node after sinking. We only handle the case where there is // a single pair of different operands. - Value *DifferentOp1 = 0, *DifferentOp2 = 0; + Value *DifferentOp1 = nullptr, *DifferentOp2 = nullptr; unsigned Op1Idx = 0; for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) { if (I1->getOperand(I) == I2->getOperand(I)) @@ -1318,11 +1320,11 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB) { StoreInst *StoreToHoist = dyn_cast(I); if (!StoreToHoist) - return 0; + return nullptr; // Volatile or atomic. if (!StoreToHoist->isSimple()) - return 0; + return nullptr; Value *StorePtr = StoreToHoist->getPointerOperand(); @@ -1334,7 +1336,7 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, // Could be calling an instruction that effects memory like free(). if (CurI->mayHaveSideEffects() && !isa(CurI)) - return 0; + return nullptr; StoreInst *SI = dyn_cast(CurI); // Found the previous store make sure it stores to the same location. @@ -1342,10 +1344,10 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, // Found the previous store, return its value operand. return SI->getValueOperand(); else if (SI) - return 0; // Unknown store. + return nullptr; // Unknown store. } - return 0; + return nullptr; } /// \brief Speculate a conditional basic block flattening the CFG. @@ -1411,8 +1413,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { SmallDenseMap SinkCandidateUseCounts; unsigned SpeculationCost = 0; - Value *SpeculatedStoreValue = 0; - StoreInst *SpeculatedStore = 0; + Value *SpeculatedStoreValue = nullptr; + StoreInst *SpeculatedStore = nullptr; for (BasicBlock::iterator BBI = ThenBB->begin(), BBE = std::prev(ThenBB->end()); BBI != BBE; ++BBI) { @@ -1620,7 +1622,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) { // constants. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { ConstantInt *CB = dyn_cast(PN->getIncomingValue(i)); - if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue; + if (!CB || !CB->getType()->isIntegerTy(1)) continue; // Okay, we now know that all edges from PredBB should be revectored to // branch to RealDest. @@ -1745,7 +1747,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { // If we folded the first phi, PN dangles at this point. Refresh it. If // we ran out of PHIs then we simplified them all. PN = dyn_cast(BB->begin()); - if (PN == 0) return true; + if (!PN) return true; // Don't fold i1 branches on PHIs which contain binary operators. These can // often be turned into switches and other things. @@ -1759,11 +1761,11 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { // instructions in the predecessor blocks can be promoted as well. If // not, we won't be able to get rid of the control flow, so it's not // worth promoting to select instructions. - BasicBlock *DomBlock = 0; + BasicBlock *DomBlock = nullptr; BasicBlock *IfBlock1 = PN->getIncomingBlock(0); BasicBlock *IfBlock2 = PN->getIncomingBlock(1); if (cast(IfBlock1->getTerminator())->isConditional()) { - IfBlock1 = 0; + IfBlock1 = nullptr; } else { DomBlock = *pred_begin(IfBlock1); for (BasicBlock::iterator I = IfBlock1->begin();!isa(I);++I) @@ -1776,7 +1778,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { } if (cast(IfBlock2->getTerminator())->isConditional()) { - IfBlock2 = 0; + IfBlock2 = nullptr; } else { DomBlock = *pred_begin(IfBlock2); for (BasicBlock::iterator I = IfBlock2->begin();!isa(I);++I) @@ -1959,7 +1961,7 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { bool llvm::FoldBranchToCommonDest(BranchInst *BI) { BasicBlock *BB = BI->getParent(); - Instruction *Cond = 0; + Instruction *Cond = nullptr; if (BI->isConditional()) Cond = dyn_cast(BI->getCondition()); else { @@ -1985,12 +1987,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { } } - if (Cond == 0) + if (!Cond) return false; } - if (Cond == 0 || (!isa(Cond) && !isa(Cond)) || - Cond->getParent() != BB || !Cond->hasOneUse()) + if (!Cond || (!isa(Cond) && !isa(Cond)) || + Cond->getParent() != BB || !Cond->hasOneUse()) return false; // Only allow this if the condition is a simple instruction that can be @@ -2005,7 +2007,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // that feeds the branch. We later ensure that any values that _it_ uses // were also live in the predecessor, so that we don't unnecessarily create // register pressure or inhibit out-of-order execution. - Instruction *BonusInst = 0; + Instruction *BonusInst = nullptr; if (&*FrontIt != Cond && FrontIt->hasOneUse() && FrontIt->user_back() == Cond && isSafeToSpeculativelyExecute(FrontIt)) { @@ -2040,7 +2042,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Finally, don't infinitely unroll conditional loops. BasicBlock *TrueDest = BI->getSuccessor(0); - BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0; + BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr; if (TrueDest == BB || FalseDest == BB) return false; @@ -2052,7 +2054,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // the common successor, verify that the same value flows in from both // blocks. SmallVector PHIs; - if (PBI == 0 || PBI->isUnconditional() || + if (!PBI || PBI->isUnconditional() || (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) || (!BI->isConditional() && @@ -2142,7 +2144,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { } // If we have a bonus inst, clone it into the predecessor block. - Instruction *NewBonus = 0; + Instruction *NewBonus = nullptr; if (BonusInst) { NewBonus = BonusInst->clone(); @@ -2218,14 +2220,14 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { MDBuilder(BI->getContext()). createBranchWeights(MDWeights)); } else - PBI->setMetadata(LLVMContext::MD_prof, NULL); + PBI->setMetadata(LLVMContext::MD_prof, nullptr); } else { // Update PHI nodes in the common successors. for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { ConstantInt *PBI_C = cast( PHIs[i]->getIncomingValueForBlock(PBI->getParent())); assert(PBI_C->getType()->isIntegerTy(1)); - Instruction *MergedCond = 0; + Instruction *MergedCond = nullptr; if (PBI->getSuccessor(0) == TrueDest) { // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value) // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value) @@ -2498,16 +2500,16 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, // If TrueBB and FalseBB are equal, only try to preserve one copy of that // successor. BasicBlock *KeepEdge1 = TrueBB; - BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0; + BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr; // Then remove the rest. for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) { BasicBlock *Succ = OldTerm->getSuccessor(I); // Make sure only to keep exactly one copy of each edge. if (Succ == KeepEdge1) - KeepEdge1 = 0; + KeepEdge1 = nullptr; else if (Succ == KeepEdge2) - KeepEdge2 = 0; + KeepEdge2 = nullptr; else Succ->removePredecessor(OldTerm->getParent()); } @@ -2516,7 +2518,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc()); // Insert an appropriate new terminator. - if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) { + if (!KeepEdge1 && !KeepEdge2) { if (TrueBB == FalseBB) // We were only looking for one successor, and it was present. // Create an unconditional branch to it. @@ -2538,7 +2540,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, // One of the selected values was a successor, but the other wasn't. // Insert an unconditional branch to the one that was found; // the edge to the one that wasn't must be unreachable. - if (KeepEdge1 == 0) + if (!KeepEdge1) // Only TrueBB was found. Builder.CreateBr(TrueBB); else @@ -2639,7 +2641,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( // 'V' and this block is the default case for the switch. In this case we can // fold the compared value into the switch to simplify things. BasicBlock *Pred = BB->getSinglePredecessor(); - if (Pred == 0 || !isa(Pred->getTerminator())) return false; + if (!Pred || !isa(Pred->getTerminator())) return false; SwitchInst *SI = cast(Pred->getTerminator()); if (SI->getCondition() != V) @@ -2681,7 +2683,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( // the block. BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0); PHINode *PHIUse = dyn_cast(ICI->user_back()); - if (PHIUse == 0 || PHIUse != &SuccBlock->front() || + if (PHIUse == nullptr || PHIUse != &SuccBlock->front() || isa(++BasicBlock::iterator(PHIUse))) return false; @@ -2733,16 +2735,16 @@ static bool TryToSimplifyUncondBranchWithICmpInIt( static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL, IRBuilder<> &Builder) { Instruction *Cond = dyn_cast(BI->getCondition()); - if (Cond == 0) return false; + if (!Cond) return false; // Change br (X == 0 | X == 1), T, F into a switch instruction. // If this is a bunch of seteq's or'd together, or if it's a bunch of // 'setne's and'ed together, collect them. - Value *CompVal = 0; + Value *CompVal = nullptr; std::vector Values; bool TrueWhenEqual = true; - Value *ExtraCase = 0; + Value *ExtraCase = nullptr; unsigned UsedICmps = 0; if (Cond->getOpcode() == Instruction::Or) { @@ -2755,7 +2757,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL, } // If we didn't have a multiply compared value, fail. - if (CompVal == 0) return false; + if (!CompVal) return false; // Avoid turning single icmps into a switch. if (UsedICmps <= 1) @@ -3050,7 +3052,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // Find the most popular block. unsigned MaxPop = 0; unsigned MaxIndex = 0; - BasicBlock *MaxBlock = 0; + BasicBlock *MaxBlock = nullptr; for (std::map >::iterator I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { if (I->second.first > MaxPop || @@ -3188,7 +3190,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); APInt KnownZero(Bits, 0), KnownOne(Bits, 0); - ComputeMaskedBits(Cond, KnownZero, KnownOne); + computeKnownBits(Cond, KnownZero, KnownOne); // Gather dead cases. SmallVector DeadCases; @@ -3241,13 +3243,13 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex) { if (BB->getFirstNonPHIOrDbg() != BB->getTerminator()) - return NULL; // BB must be empty to be a candidate for simplification. + return nullptr; // BB must be empty to be a candidate for simplification. if (!BB->getSinglePredecessor()) - return NULL; // BB must be dominated by the switch. + return nullptr; // BB must be dominated by the switch. BranchInst *Branch = dyn_cast(BB->getTerminator()); if (!Branch || !Branch->isUnconditional()) - return NULL; // Terminator must be unconditional branch. + return nullptr; // Terminator must be unconditional branch. BasicBlock *Succ = Branch->getSuccessor(0); @@ -3263,7 +3265,7 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue, return PHI; } - return NULL; + return nullptr; } /// ForwardSwitchConditionToPHI - Try to forward the condition of a switch @@ -3336,12 +3338,12 @@ ConstantFold(Instruction *I, if (SelectInst *Select = dyn_cast(I)) { Constant *A = LookupConstant(Select->getCondition(), ConstantPool); if (!A) - return 0; + return nullptr; if (A->isAllOnesValue()) return LookupConstant(Select->getTrueValue(), ConstantPool); if (A->isNullValue()) return LookupConstant(Select->getFalseValue(), ConstantPool); - return 0; + return nullptr; } SmallVector COps; @@ -3349,7 +3351,7 @@ ConstantFold(Instruction *I, if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool)) COps.push_back(A); else - return 0; + return nullptr; } if (CmpInst *Cmp = dyn_cast(I)) @@ -3492,7 +3494,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M, const SmallVectorImpl >& Values, Constant *DefaultValue, const DataLayout *DL) - : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) { + : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr), + Array(nullptr) { assert(Values.size() && "Can't build lookup table without values!"); assert(TableSize >= Values.size() && "Can't fit values in table!"); @@ -3513,7 +3516,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M, TableContents[Idx] = CaseRes; if (CaseRes != SingleValue) - SingleValue = 0; + SingleValue = nullptr; } // Fill in any holes in the table with the default result. @@ -3526,7 +3529,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M, } if (DefaultValue != SingleValue) - SingleValue = 0; + SingleValue = nullptr; } // If each element in the table contains the same value, we only need to store @@ -3696,7 +3699,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, ConstantInt *MinCaseVal = CI.getCaseValue(); ConstantInt *MaxCaseVal = CI.getCaseValue(); - BasicBlock *CommonDest = 0; + BasicBlock *CommonDest = nullptr; typedef SmallVector, 4> ResultListTy; SmallDenseMap ResultLists; SmallDenseMap DefaultResults; @@ -3741,8 +3744,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, SmallVector, 4> DefaultResultsList; bool HasDefaultResults = false; if (TableHasHoles) { - HasDefaultResults = GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest, - DefaultResultsList, DL); + HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(), + &CommonDest, DefaultResultsList, DL); } bool NeedMask = (TableHasHoles && !HasDefaultResults); if (NeedMask) { @@ -4038,8 +4041,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // from BI. We know that the condbr dominates the two blocks, so see if // there is any identical code in the "then" and "else" blocks. If so, we // can hoist it up to the branching block. - if (BI->getSuccessor(0)->getSinglePredecessor() != 0) { - if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { + if (BI->getSuccessor(0)->getSinglePredecessor()) { + if (BI->getSuccessor(1)->getSinglePredecessor()) { if (HoistThenElseCodeToIf(BI)) return SimplifyCFG(BB, TTI, DL) | true; } else { @@ -4051,7 +4054,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0))) return SimplifyCFG(BB, TTI, DL) | true; } - } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) { + } else if (BI->getSuccessor(1)->getSinglePredecessor()) { // If Successor #0 has multiple preds, we may be able to conditionally // execute Successor #1 if it branches to successor #0. TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index 30f56be..b284e6f 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -13,8 +13,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "indvars" - #include "llvm/Transforms/Utils/SimplifyIndVar.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -34,6 +32,8 @@ using namespace llvm; +#define DEBUG_TYPE "indvars" + STATISTIC(NumElimIdentity, "Number of IV identities eliminated"); STATISTIC(NumElimOperand, "Number of IV operands folded into a use"); STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); @@ -56,14 +56,14 @@ namespace { public: SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM, - SmallVectorImpl &Dead, IVUsers *IVU = NULL) : + SmallVectorImpl &Dead, IVUsers *IVU = nullptr) : L(Loop), LI(LPM->getAnalysisIfAvailable()), SE(SE), DeadInsts(Dead), Changed(false) { DataLayoutPass *DLP = LPM->getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; assert(LI && "IV simplification requires LoopInfo"); } @@ -72,7 +72,7 @@ namespace { /// Iteratively perform simplification on a worklist of users of the /// specified induction variable. This is the top-level driver that applies /// all simplicitions to users of an IV. - void simplifyUsers(PHINode *CurrIV, IVVisitor *V = NULL); + void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr); Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand); @@ -95,25 +95,25 @@ namespace { /// be folded (in case more folding opportunities have been exposed). /// Otherwise return null. Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) { - Value *IVSrc = 0; + Value *IVSrc = nullptr; unsigned OperIdx = 0; - const SCEV *FoldedExpr = 0; + const SCEV *FoldedExpr = nullptr; switch (UseInst->getOpcode()) { default: - return 0; + return nullptr; case Instruction::UDiv: case Instruction::LShr: // We're only interested in the case where we know something about // the numerator and have a constant denominator. if (IVOperand != UseInst->getOperand(OperIdx) || !isa(UseInst->getOperand(1))) - return 0; + return nullptr; // Attempt to fold a binary operator with constant operand. // e.g. ((I + 1) >> 2) => I >> 2 if (!isa(IVOperand) || !isa(IVOperand->getOperand(1))) - return 0; + return nullptr; IVSrc = IVOperand->getOperand(0); // IVSrc must be the (SCEVable) IV, since the other operand is const. @@ -124,7 +124,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) // Get a constant for the divisor. See createSCEV. uint32_t BitWidth = cast(UseInst->getType())->getBitWidth(); if (D->getValue().uge(BitWidth)) - return 0; + return nullptr; D = ConstantInt::get(UseInst->getContext(), APInt::getOneBitSet(BitWidth, D->getZExtValue())); @@ -133,11 +133,11 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) } // We have something that might fold it's operand. Compare SCEVs. if (!SE->isSCEVable(UseInst->getType())) - return 0; + return nullptr; // Bypass the operand if SCEV can prove it has no effect. if (SE->getSCEV(UseInst) != FoldedExpr) - return 0; + return nullptr; DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand << " -> " << *UseInst << '\n'); @@ -283,8 +283,8 @@ Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser, return IVUser; // Find a branch guarded by the overflow check. - BranchInst *Branch = 0; - Instruction *AddVal = 0; + BranchInst *Branch = nullptr; + Instruction *AddVal = nullptr; for (User *U : II->users()) { if (ExtractValueInst *ExtractInst = dyn_cast(U)) { if (ExtractInst->getNumIndices() != 1) diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index bbd65f1..33b3637 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -14,7 +14,6 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "instsimplify" #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" @@ -29,6 +28,8 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; +#define DEBUG_TYPE "instsimplify" + STATISTIC(NumSimplified, "Number of redundant instructions removed"); namespace { @@ -47,17 +48,18 @@ namespace { bool runOnFunction(Function &F) override { const DominatorTreeWrapperPass *DTWP = getAnalysisIfAvailable(); - const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0; + const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; DataLayoutPass *DLP = getAnalysisIfAvailable(); - const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0; + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; const TargetLibraryInfo *TLI = &getAnalysis(); SmallPtrSet S1, S2, *ToSimplify = &S1, *Next = &S2; bool Changed = false; do { - for (df_iterator DI = df_begin(&F.getEntryBlock()), - DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) - for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) { + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) + // Here be subtlety: the iterator must be incremented before the loop + // body (not sure why), so a range-for loop won't work here. + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { Instruction *I = BI++; // The first time through the loop ToSimplify is empty and we try to // simplify all instructions. On later iterations ToSimplify is not @@ -74,7 +76,15 @@ namespace { ++NumSimplified; Changed = true; } - Changed |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI); + if (res) { + // RecursivelyDeleteTriviallyDeadInstruction can remove + // more than one instruction, so simply incrementing the + // iterator does not work. When instructions get deleted + // re-iterate instead. + BI = BB->begin(); BE = BB->end(); + Changed |= res; + } } // Place the list of instructions to simplify on the next loop iteration diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index b5bc391..3b61bb5 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicInst.h" @@ -75,7 +76,7 @@ public: // We never change the calling convention. if (!ignoreCallingConv() && CI->getCallingConv() != llvm::CallingConv::C) - return NULL; + return nullptr; return callOptimizer(CI->getCalledFunction(), CI, B); } @@ -186,14 +187,14 @@ struct MemCpyChkOpt : public InstFortifiedLibCallOptimization { !FT->getParamType(1)->isPointerTy() || FT->getParamType(2) != DL->getIntPtrType(Context) || FT->getParamType(3) != DL->getIntPtrType(Context)) - return 0; + return nullptr; if (isFoldable(3, 2, false)) { B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), 1); return CI->getArgOperand(0); } - return 0; + return nullptr; } }; @@ -210,14 +211,14 @@ struct MemMoveChkOpt : public InstFortifiedLibCallOptimization { !FT->getParamType(1)->isPointerTy() || FT->getParamType(2) != DL->getIntPtrType(Context) || FT->getParamType(3) != DL->getIntPtrType(Context)) - return 0; + return nullptr; if (isFoldable(3, 2, false)) { B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), 1); return CI->getArgOperand(0); } - return 0; + return nullptr; } }; @@ -234,7 +235,7 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization { !FT->getParamType(1)->isIntegerTy() || FT->getParamType(2) != DL->getIntPtrType(Context) || FT->getParamType(3) != DL->getIntPtrType(Context)) - return 0; + return nullptr; if (isFoldable(3, 2, false)) { Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), @@ -242,7 +243,7 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization { B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); return CI->getArgOperand(0); } - return 0; + return nullptr; } }; @@ -260,7 +261,7 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(Context) || FT->getParamType(2) != DL->getIntPtrType(Context)) - return 0; + return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // __strcpy_chk(x,x) -> x @@ -277,10 +278,10 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { } else { // Maybe we can stil fold __strcpy_chk to __memcpy_chk. uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; + if (Len == 0) return nullptr; // This optimization require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; Value *Ret = EmitMemCpyChk(Dst, Src, @@ -288,7 +289,7 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization { CI->getArgOperand(2), B, DL, TLI); return Ret; } - return 0; + return nullptr; } }; @@ -306,12 +307,12 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization { FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(Context) || FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0))) - return 0; + return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) Value *StrLen = EmitStrLen(Src, B, DL, TLI); - return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr; } // If a) we don't have any length information, or b) we know this will @@ -325,10 +326,10 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization { } else { // Maybe we can stil fold __stpcpy_chk to __memcpy_chk. uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; + if (Len == 0) return nullptr; // This optimization require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; Type *PT = FT->getParamType(0); Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len); @@ -336,10 +337,10 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization { ConstantInt::get(DL->getIntPtrType(PT), Len - 1)); if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, DL, TLI)) - return 0; + return nullptr; return DstEnd; } - return 0; + return nullptr; } }; @@ -357,7 +358,7 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization { FT->getParamType(0) != Type::getInt8PtrTy(Context) || !FT->getParamType(2)->isIntegerTy() || FT->getParamType(3) != DL->getIntPtrType(Context)) - return 0; + return nullptr; if (isFoldable(3, 2, false)) { Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), @@ -365,7 +366,7 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization { Name.substr(2, 7)); return Ret; } - return 0; + return nullptr; } }; @@ -382,7 +383,7 @@ struct StrCatOpt : public LibCallOptimization { FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || FT->getParamType(1) != FT->getReturnType()) - return 0; + return nullptr; // Extract some information from the instruction Value *Dst = CI->getArgOperand(0); @@ -390,7 +391,7 @@ struct StrCatOpt : public LibCallOptimization { // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; + if (Len == 0) return nullptr; --Len; // Unbias length. // Handle the simple, do-nothing case: strcat(x, "") -> x @@ -398,7 +399,7 @@ struct StrCatOpt : public LibCallOptimization { return Dst; // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; return emitStrLenMemCpy(Src, Dst, Len, B); } @@ -409,7 +410,7 @@ struct StrCatOpt : public LibCallOptimization { // memory is to be moved to. We just generate a call to strlen. Value *DstLen = EmitStrLen(Dst, B, DL, TLI); if (!DstLen) - return 0; + return nullptr; // Now that we have the destination's length, we must index into the // destination's pointer to get the actual memcpy destination (end of @@ -434,7 +435,7 @@ struct StrNCatOpt : public StrCatOpt { FT->getParamType(0) != FT->getReturnType() || FT->getParamType(1) != FT->getReturnType() || !FT->getParamType(2)->isIntegerTy()) - return 0; + return nullptr; // Extract some information from the instruction Value *Dst = CI->getArgOperand(0); @@ -445,11 +446,11 @@ struct StrNCatOpt : public StrCatOpt { if (ConstantInt *LengthArg = dyn_cast(CI->getArgOperand(2))) Len = LengthArg->getZExtValue(); else - return 0; + return nullptr; // See if we can get the length of the input string. uint64_t SrcLen = GetStringLength(Src); - if (SrcLen == 0) return 0; + if (SrcLen == 0) return nullptr; --SrcLen; // Unbias length. // Handle the simple, do-nothing cases: @@ -458,10 +459,10 @@ struct StrNCatOpt : public StrCatOpt { if (SrcLen == 0 || Len == 0) return Dst; // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; // We don't optimize this case - if (Len < SrcLen) return 0; + if (Len < SrcLen) return nullptr; // strncat(x, s, c) -> strcat(x, s) // s is constant so the strcat can be optimized further @@ -478,20 +479,20 @@ struct StrChrOpt : public LibCallOptimization { FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || !FT->getParamType(1)->isIntegerTy(32)) - return 0; + return nullptr; Value *SrcStr = CI->getArgOperand(0); // If the second operand is non-constant, see if we can compute the length // of the input string and turn this into memchr. ConstantInt *CharC = dyn_cast(CI->getArgOperand(1)); - if (CharC == 0) { + if (!CharC) { // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; uint64_t Len = GetStringLength(SrcStr); if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32. - return 0; + return nullptr; return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. ConstantInt::get(DL->getIntPtrType(*Context), Len), @@ -504,7 +505,7 @@ struct StrChrOpt : public LibCallOptimization { if (!getConstantStringInfo(SrcStr, Str)) { if (DL && CharC->isZero()) // strchr(p, 0) -> p + strlen(p) return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr"); - return 0; + return nullptr; } // Compute the offset, make sure to handle the case when we're searching for @@ -528,21 +529,21 @@ struct StrRChrOpt : public LibCallOptimization { FT->getReturnType() != B.getInt8PtrTy() || FT->getParamType(0) != FT->getReturnType() || !FT->getParamType(1)->isIntegerTy(32)) - return 0; + return nullptr; Value *SrcStr = CI->getArgOperand(0); ConstantInt *CharC = dyn_cast(CI->getArgOperand(1)); // Cannot fold anything if we're not looking for a constant. if (!CharC) - return 0; + return nullptr; StringRef Str; if (!getConstantStringInfo(SrcStr, Str)) { // strrchr(s, 0) -> strchr(s, 0) if (DL && CharC->isZero()) return EmitStrChr(SrcStr, '\0', B, DL, TLI); - return 0; + return nullptr; } // Compute the offset. @@ -565,7 +566,7 @@ struct StrCmpOpt : public LibCallOptimization { !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != B.getInt8PtrTy()) - return 0; + return nullptr; Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); if (Str1P == Str2P) // strcmp(x,x) -> 0 @@ -591,14 +592,14 @@ struct StrCmpOpt : public LibCallOptimization { uint64_t Len2 = GetStringLength(Str2P); if (Len1 && Len2) { // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; return EmitMemCmp(Str1P, Str2P, ConstantInt::get(DL->getIntPtrType(*Context), std::min(Len1, Len2)), B, DL, TLI); } - return 0; + return nullptr; } }; @@ -612,7 +613,7 @@ struct StrNCmpOpt : public LibCallOptimization { FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != B.getInt8PtrTy() || !FT->getParamType(2)->isIntegerTy()) - return 0; + return nullptr; Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); if (Str1P == Str2P) // strncmp(x,x,n) -> 0 @@ -623,7 +624,7 @@ struct StrNCmpOpt : public LibCallOptimization { if (ConstantInt *LengthArg = dyn_cast(CI->getArgOperand(2))) Length = LengthArg->getZExtValue(); else - return 0; + return nullptr; if (Length == 0) // strncmp(x,y,0) -> 0 return ConstantInt::get(CI->getType(), 0); @@ -649,7 +650,7 @@ struct StrNCmpOpt : public LibCallOptimization { if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType()); - return 0; + return nullptr; } }; @@ -662,18 +663,18 @@ struct StrCpyOpt : public LibCallOptimization { FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != B.getInt8PtrTy()) - return 0; + return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // strcpy(x,x) -> x return Src; // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; + if (Len == 0) return nullptr; // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. @@ -692,20 +693,20 @@ struct StpCpyOpt: public LibCallOptimization { FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != B.getInt8PtrTy()) - return 0; + return nullptr; // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x) Value *StrLen = EmitStrLen(Src, B, DL, TLI); - return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0; + return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr; } // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); - if (Len == 0) return 0; + if (Len == 0) return nullptr; Type *PT = FT->getParamType(0); Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len); @@ -728,7 +729,7 @@ struct StrNCpyOpt : public LibCallOptimization { FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != B.getInt8PtrTy() || !FT->getParamType(2)->isIntegerTy()) - return 0; + return nullptr; Value *Dst = CI->getArgOperand(0); Value *Src = CI->getArgOperand(1); @@ -736,7 +737,7 @@ struct StrNCpyOpt : public LibCallOptimization { // See if we can get the length of the input string. uint64_t SrcLen = GetStringLength(Src); - if (SrcLen == 0) return 0; + if (SrcLen == 0) return nullptr; --SrcLen; if (SrcLen == 0) { @@ -749,15 +750,15 @@ struct StrNCpyOpt : public LibCallOptimization { if (ConstantInt *LengthArg = dyn_cast(LenOp)) Len = LengthArg->getZExtValue(); else - return 0; + return nullptr; if (Len == 0) return Dst; // strncpy(x, y, 0) -> x // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; // Let strncpy handle the zero padding - if (Len > SrcLen+1) return 0; + if (Len > SrcLen+1) return nullptr; Type *PT = FT->getParamType(0); // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] @@ -776,7 +777,7 @@ struct StrLenOpt : public LibCallOptimization { if (FT->getNumParams() != 1 || FT->getParamType(0) != B.getInt8PtrTy() || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; Value *Src = CI->getArgOperand(0); @@ -784,11 +785,26 @@ struct StrLenOpt : public LibCallOptimization { if (uint64_t Len = GetStringLength(Src)) return ConstantInt::get(CI->getType(), Len-1); + // strlen(x?"foo":"bars") --> x ? 3 : 4 + if (SelectInst *SI = dyn_cast(Src)) { + uint64_t LenTrue = GetStringLength(SI->getTrueValue()); + uint64_t LenFalse = GetStringLength(SI->getFalseValue()); + if (LenTrue && LenFalse) { + emitOptimizationRemark(*Context, "simplify-libcalls", *Caller, + SI->getDebugLoc(), + "folded strlen(select) to select of constants"); + return B.CreateSelect(SI->getCondition(), + ConstantInt::get(CI->getType(), LenTrue-1), + ConstantInt::get(CI->getType(), LenFalse-1)); + } + } + // strlen(x) != 0 --> *x != 0 // strlen(x) == 0 --> *x == 0 if (isOnlyUsedInZeroEqualityComparison(CI)) return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType()); - return 0; + + return nullptr; } }; @@ -800,7 +816,7 @@ struct StrPBrkOpt : public LibCallOptimization { FT->getParamType(0) != B.getInt8PtrTy() || FT->getParamType(1) != FT->getParamType(0) || FT->getReturnType() != FT->getParamType(0)) - return 0; + return nullptr; StringRef S1, S2; bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); @@ -824,7 +840,7 @@ struct StrPBrkOpt : public LibCallOptimization { if (DL && HasS2 && S2.size() == 1) return EmitStrChr(CI->getArgOperand(0), S2[0], B, DL, TLI); - return 0; + return nullptr; } }; @@ -835,7 +851,7 @@ struct StrToOpt : public LibCallOptimization { if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy()) - return 0; + return nullptr; Value *EndPtr = CI->getArgOperand(1); if (isa(EndPtr)) { @@ -844,7 +860,7 @@ struct StrToOpt : public LibCallOptimization { CI->addAttribute(1, Attribute::NoCapture); } - return 0; + return nullptr; } }; @@ -856,7 +872,7 @@ struct StrSpnOpt : public LibCallOptimization { FT->getParamType(0) != B.getInt8PtrTy() || FT->getParamType(1) != FT->getParamType(0) || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; StringRef S1, S2; bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); @@ -874,7 +890,7 @@ struct StrSpnOpt : public LibCallOptimization { return ConstantInt::get(CI->getType(), Pos); } - return 0; + return nullptr; } }; @@ -886,7 +902,7 @@ struct StrCSpnOpt : public LibCallOptimization { FT->getParamType(0) != B.getInt8PtrTy() || FT->getParamType(1) != FT->getParamType(0) || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; StringRef S1, S2; bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1); @@ -907,7 +923,7 @@ struct StrCSpnOpt : public LibCallOptimization { if (DL && HasS2 && S2.empty()) return EmitStrLen(CI->getArgOperand(0), B, DL, TLI); - return 0; + return nullptr; } }; @@ -919,7 +935,7 @@ struct StrStrOpt : public LibCallOptimization { !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isPointerTy()) - return 0; + return nullptr; // fold strstr(x, x) -> x. if (CI->getArgOperand(0) == CI->getArgOperand(1)) @@ -929,11 +945,11 @@ struct StrStrOpt : public LibCallOptimization { if (DL && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI); if (!StrLen) - return 0; + return nullptr; Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), StrLen, B, DL, TLI); if (!StrNCmp) - return 0; + return nullptr; for (auto UI = CI->user_begin(), UE = CI->user_end(); UI != UE;) { ICmpInst *Old = cast(*UI++); Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, @@ -969,9 +985,9 @@ struct StrStrOpt : public LibCallOptimization { // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) { Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, DL, TLI); - return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0; + return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr; } - return 0; + return nullptr; } }; @@ -982,7 +998,7 @@ struct MemCmpOpt : public LibCallOptimization { if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isIntegerTy(32)) - return 0; + return nullptr; Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); @@ -991,7 +1007,7 @@ struct MemCmpOpt : public LibCallOptimization { // Make sure we have a constant length. ConstantInt *LenC = dyn_cast(CI->getArgOperand(2)); - if (!LenC) return 0; + if (!LenC) return nullptr; uint64_t Len = LenC->getZExtValue(); if (Len == 0) // memcmp(s1,s2,0) -> 0 @@ -1012,7 +1028,7 @@ struct MemCmpOpt : public LibCallOptimization { getConstantStringInfo(RHS, RHSStr)) { // Make sure we're not reading out-of-bounds memory. if (Len > LHSStr.size() || Len > RHSStr.size()) - return 0; + return nullptr; // Fold the memcmp and normalize the result. This way we get consistent // results across multiple platforms. uint64_t Ret = 0; @@ -1024,7 +1040,7 @@ struct MemCmpOpt : public LibCallOptimization { return ConstantInt::get(CI->getType(), Ret); } - return 0; + return nullptr; } }; @@ -1032,14 +1048,14 @@ struct MemCpyOpt : public LibCallOptimization { Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) override { // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || FT->getParamType(2) != DL->getIntPtrType(*Context)) - return 0; + return nullptr; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), @@ -1052,14 +1068,14 @@ struct MemMoveOpt : public LibCallOptimization { Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) override { // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || FT->getParamType(2) != DL->getIntPtrType(*Context)) - return 0; + return nullptr; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), @@ -1072,14 +1088,14 @@ struct MemSetOpt : public LibCallOptimization { Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) override { // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isIntegerTy() || FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0))) - return 0; + return nullptr; // memset(p, v, n) -> llvm.memset(p, v, n, 1) Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); @@ -1103,21 +1119,21 @@ struct UnaryDoubleFPOpt : public LibCallOptimization { FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || !FT->getParamType(0)->isDoubleTy()) - return 0; + return nullptr; if (CheckRetType) { // Check if all the uses for function like 'sin' are converted to float. for (User *U : CI->users()) { FPTruncInst *Cast = dyn_cast(U); - if (Cast == 0 || !Cast->getType()->isFloatTy()) - return 0; + if (!Cast || !Cast->getType()->isFloatTy()) + return nullptr; } } // If this is something like 'floor((double)floatval)', convert to floorf. FPExtInst *Cast = dyn_cast(CI->getArgOperand(0)); - if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) - return 0; + if (!Cast || !Cast->getOperand(0)->getType()->isFloatTy()) + return nullptr; // floor((double)floatval) -> (double)floorf(floatval) Value *V = Cast->getOperand(0); @@ -1138,15 +1154,15 @@ struct BinaryDoubleFPOpt : public LibCallOptimization { if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || FT->getParamType(0) != FT->getParamType(1) || !FT->getParamType(0)->isFloatingPointTy()) - return 0; + return nullptr; if (CheckRetType) { // Check if all the uses for function like 'fmin/fmax' are converted to // float. for (User *U : CI->users()) { FPTruncInst *Cast = dyn_cast(U); - if (Cast == 0 || !Cast->getType()->isFloatTy()) - return 0; + if (!Cast || !Cast->getType()->isFloatTy()) + return nullptr; } } @@ -1154,13 +1170,13 @@ struct BinaryDoubleFPOpt : public LibCallOptimization { // we convert it to fminf. FPExtInst *Cast1 = dyn_cast(CI->getArgOperand(0)); FPExtInst *Cast2 = dyn_cast(CI->getArgOperand(1)); - if (Cast1 == 0 || !Cast1->getOperand(0)->getType()->isFloatTy() || - Cast2 == 0 || !Cast2->getOperand(0)->getType()->isFloatTy()) - return 0; + if (!Cast1 || !Cast1->getOperand(0)->getType()->isFloatTy() || + !Cast2 || !Cast2->getOperand(0)->getType()->isFloatTy()) + return nullptr; // fmin((double)floatval1, (double)floatval2) // -> (double)fmin(floatval1, floatval2) - Value *V = NULL; + Value *V = nullptr; Value *V1 = Cast1->getOperand(0); Value *V2 = Cast2->getOperand(0); V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B, @@ -1180,7 +1196,7 @@ struct CosOpt : public UnsafeFPLibCallOptimization { CosOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) override { - Value *Ret = NULL; + Value *Ret = nullptr; if (UnsafeFPShrink && Callee->getName() == "cos" && TLI->has(LibFunc::cosf)) { UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); @@ -1208,7 +1224,7 @@ struct PowOpt : public UnsafeFPLibCallOptimization { PowOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) override { - Value *Ret = NULL; + Value *Ret = nullptr; if (UnsafeFPShrink && Callee->getName() == "pow" && TLI->has(LibFunc::powf)) { UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); @@ -1242,7 +1258,7 @@ struct PowOpt : public UnsafeFPLibCallOptimization { } ConstantFP *Op2C = dyn_cast(Op2); - if (Op2C == 0) return Ret; + if (!Op2C) return Ret; if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0 return ConstantFP::get(CI->getType(), 1.0); @@ -1275,7 +1291,7 @@ struct PowOpt : public UnsafeFPLibCallOptimization { if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); - return 0; + return nullptr; } }; @@ -1283,7 +1299,7 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization { Exp2Opt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {} Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) override { - Value *Ret = NULL; + Value *Ret = nullptr; if (UnsafeFPShrink && Callee->getName() == "exp2" && TLI->has(LibFunc::exp2f)) { UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true); @@ -1307,7 +1323,7 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization { LdExp = LibFunc::ldexp; if (TLI->has(LdExp)) { - Value *LdExpArg = 0; + Value *LdExpArg = nullptr; if (SIToFPInst *OpC = dyn_cast(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty()); @@ -1344,7 +1360,7 @@ struct SinCosPiOpt : public LibCallOptimization { // Make sure the prototype is as expected, otherwise the rest of the // function is probably invalid and likely to abort. if (!isTrigLibCall(CI)) - return 0; + return nullptr; Value *Arg = CI->getArgOperand(0); SmallVector SinCalls; @@ -1362,7 +1378,7 @@ struct SinCosPiOpt : public LibCallOptimization { // It's only worthwhile if both sinpi and cospi are actually used. if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty())) - return 0; + return nullptr; Value *Sin, *Cos, *SinCos; insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, @@ -1372,7 +1388,7 @@ struct SinCosPiOpt : public LibCallOptimization { replaceTrigInsts(CosCalls, Cos); replaceTrigInsts(SinCosCalls, SinCos); - return 0; + return nullptr; } bool isTrigLibCall(CallInst *CI) { @@ -1498,7 +1514,7 @@ struct FFSOpt : public LibCallOptimization { if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy(32) || !FT->getParamType(0)->isIntegerTy()) - return 0; + return nullptr; Value *Op = CI->getArgOperand(0); @@ -1531,7 +1547,7 @@ struct AbsOpt : public LibCallOptimization { // We require integer(integer) where the types agree. if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || FT->getParamType(0) != FT->getReturnType()) - return 0; + return nullptr; // abs(x) -> x >s -1 ? x : -x Value *Op = CI->getArgOperand(0); @@ -1549,7 +1565,7 @@ struct IsDigitOpt : public LibCallOptimization { // We require integer(i32) if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || !FT->getParamType(0)->isIntegerTy(32)) - return 0; + return nullptr; // isdigit(c) -> (c-'0') getArgOperand(0); @@ -1566,7 +1582,7 @@ struct IsAsciiOpt : public LibCallOptimization { // We require integer(i32) if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() || !FT->getParamType(0)->isIntegerTy(32)) - return 0; + return nullptr; // isascii(c) -> c getArgOperand(0); @@ -1582,7 +1598,7 @@ struct ToAsciiOpt : public LibCallOptimization { // We require i32(i32) if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || !FT->getParamType(0)->isIntegerTy(32)) - return 0; + return nullptr; // toascii(c) -> c & 0x7f return B.CreateAnd(CI->getArgOperand(0), @@ -1612,7 +1628,7 @@ struct ErrorReportingOpt : public LibCallOptimization { CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold); } - return 0; + return nullptr; } protected: @@ -1649,7 +1665,7 @@ struct PrintFOpt : public LibCallOptimization { // Check for a fixed format string. StringRef FormatStr; if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr)) - return 0; + return nullptr; // Empty format string -> noop. if (FormatStr.empty()) // Tolerate printf's declared void. @@ -1660,7 +1676,7 @@ struct PrintFOpt : public LibCallOptimization { // is used, in general the printf return value is not compatible with either // putchar() or puts(). if (!CI->use_empty()) - return 0; + return nullptr; // printf("x") -> putchar('x'), even for '%'. if (FormatStr.size() == 1) { @@ -1697,7 +1713,7 @@ struct PrintFOpt : public LibCallOptimization { CI->getArgOperand(1)->getType()->isPointerTy()) { return EmitPutS(CI->getArgOperand(1), B, DL, TLI); } - return 0; + return nullptr; } Value *callOptimizer(Function *Callee, CallInst *CI, @@ -1707,7 +1723,7 @@ struct PrintFOpt : public LibCallOptimization { if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || !(FT->getReturnType()->isIntegerTy() || FT->getReturnType()->isVoidTy())) - return 0; + return nullptr; if (Value *V = optimizeFixedFormatString(Callee, CI, B)) { return V; @@ -1724,7 +1740,7 @@ struct PrintFOpt : public LibCallOptimization { B.Insert(New); return New; } - return 0; + return nullptr; } }; @@ -1734,7 +1750,7 @@ struct SPrintFOpt : public LibCallOptimization { // Check for a fixed format string. StringRef FormatStr; if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) - return 0; + return nullptr; // If we just have a format string (nothing else crazy) transform it. if (CI->getNumArgOperands() == 2) { @@ -1742,10 +1758,10 @@ struct SPrintFOpt : public LibCallOptimization { // %% -> % in the future if we cared. for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) if (FormatStr[i] == '%') - return 0; // we found a format specifier, bail out. + return nullptr; // we found a format specifier, bail out. // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), @@ -1758,12 +1774,12 @@ struct SPrintFOpt : public LibCallOptimization { // and have an extra operand. if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumArgOperands() < 3) - return 0; + return nullptr; // Decode the second character of the format string. if (FormatStr[1] == 'c') { // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 - if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr; Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char"); Value *Ptr = CastToCStr(CI->getArgOperand(0), B); B.CreateStore(V, Ptr); @@ -1775,14 +1791,14 @@ struct SPrintFOpt : public LibCallOptimization { if (FormatStr[1] == 's') { // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) - if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; + if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr; Value *Len = EmitStrLen(CI->getArgOperand(2), B, DL, TLI); if (!Len) - return 0; + return nullptr; Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); @@ -1791,7 +1807,7 @@ struct SPrintFOpt : public LibCallOptimization { // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); } - return 0; + return nullptr; } Value *callOptimizer(Function *Callee, CallInst *CI, @@ -1801,7 +1817,7 @@ struct SPrintFOpt : public LibCallOptimization { if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { return V; @@ -1818,7 +1834,7 @@ struct SPrintFOpt : public LibCallOptimization { B.Insert(New); return New; } - return 0; + return nullptr; } }; @@ -1831,22 +1847,22 @@ struct FPrintFOpt : public LibCallOptimization { // All the optimizations depend on the format string. StringRef FormatStr; if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr)) - return 0; + return nullptr; // Do not do any of the following transformations if the fprintf return // value is used, in general the fprintf return value is not compatible // with fwrite(), fputc() or fputs(). if (!CI->use_empty()) - return 0; + return nullptr; // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) if (CI->getNumArgOperands() == 2) { for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) if (FormatStr[i] == '%') // Could handle %% -> % if we cared. - return 0; // We found a format specifier. + return nullptr; // We found a format specifier. // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; return EmitFWrite(CI->getArgOperand(1), ConstantInt::get(DL->getIntPtrType(*Context), @@ -1858,22 +1874,22 @@ struct FPrintFOpt : public LibCallOptimization { // and have an extra operand. if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumArgOperands() < 3) - return 0; + return nullptr; // Decode the second character of the format string. if (FormatStr[1] == 'c') { // fprintf(F, "%c", chr) --> fputc(chr, F) - if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr; return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI); } if (FormatStr[1] == 's') { // fprintf(F, "%s", str) --> fputs(str, F) if (!CI->getArgOperand(2)->getType()->isPointerTy()) - return 0; + return nullptr; return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI); } - return 0; + return nullptr; } Value *callOptimizer(Function *Callee, CallInst *CI, @@ -1883,7 +1899,7 @@ struct FPrintFOpt : public LibCallOptimization { if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; if (Value *V = optimizeFixedFormatString(Callee, CI, B)) { return V; @@ -1900,7 +1916,7 @@ struct FPrintFOpt : public LibCallOptimization { B.Insert(New); return New; } - return 0; + return nullptr; } }; @@ -1917,12 +1933,12 @@ struct FWriteOpt : public LibCallOptimization { !FT->getParamType(2)->isIntegerTy() || !FT->getParamType(3)->isPointerTy() || !FT->getReturnType()->isIntegerTy()) - return 0; + return nullptr; // Get the element size and count. ConstantInt *SizeC = dyn_cast(CI->getArgOperand(1)); ConstantInt *CountC = dyn_cast(CI->getArgOperand(2)); - if (!SizeC || !CountC) return 0; + if (!SizeC || !CountC) return nullptr; uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); // If this is writing zero records, remove the call (it's a noop). @@ -1934,10 +1950,10 @@ struct FWriteOpt : public LibCallOptimization { if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, DL, TLI); - return NewCI ? ConstantInt::get(CI->getType(), 1) : 0; + return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr; } - return 0; + return nullptr; } }; @@ -1948,18 +1964,18 @@ struct FPutsOpt : public LibCallOptimization { (void) ER.callOptimizer(Callee, CI, B); // These optimizations require DataLayout. - if (!DL) return 0; + if (!DL) return nullptr; // Require two pointers. Also, we can't optimize if return value is used. FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || !FT->getParamType(1)->isPointerTy() || !CI->use_empty()) - return 0; + return nullptr; // fputs(s,F) --> fwrite(s,1,strlen(s),F) uint64_t Len = GetStringLength(CI->getArgOperand(0)); - if (!Len) return 0; + if (!Len) return nullptr; // Known to have no uses (see above). return EmitFWrite(CI->getArgOperand(0), ConstantInt::get(DL->getIntPtrType(*Context), Len-1), @@ -1975,12 +1991,12 @@ struct PutsOpt : public LibCallOptimization { if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || !(FT->getReturnType()->isIntegerTy() || FT->getReturnType()->isVoidTy())) - return 0; + return nullptr; // Check for a constant string. StringRef Str; if (!getConstantStringInfo(CI->getArgOperand(0), Str)) - return 0; + return nullptr; if (Str.empty() && CI->use_empty()) { // puts("") -> putchar('\n') @@ -1989,7 +2005,7 @@ struct PutsOpt : public LibCallOptimization { return B.CreateIntCast(Res, CI->getType(), true); } - return 0; + return nullptr; } }; @@ -2100,7 +2116,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { case Intrinsic::exp2: return &Exp2; default: - return 0; + return nullptr; } } @@ -2210,7 +2226,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { case LibFunc::trunc: if (hasFloatVersion(FuncName)) return &UnaryDoubleFP; - return 0; + return nullptr; case LibFunc::acos: case LibFunc::acosh: case LibFunc::asin: @@ -2234,16 +2250,16 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { case LibFunc::tanh: if (UnsafeFPShrink && hasFloatVersion(FuncName)) return &UnsafeUnaryDoubleFP; - return 0; + return nullptr; case LibFunc::fmin: case LibFunc::fmax: if (hasFloatVersion(FuncName)) return &BinaryDoubleFP; - return 0; + return nullptr; case LibFunc::memcpy_chk: return &MemCpyChk; default: - return 0; + return nullptr; } } @@ -2263,7 +2279,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) { return &StrNCpyChk; } - return 0; + return nullptr; } @@ -2273,7 +2289,7 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) { IRBuilder<> Builder(CI); return LCO->optimizeCall(CI, DL, TLI, LCS, Builder); } - return 0; + return nullptr; } LibCallSimplifier::LibCallSimplifier(const DataLayout *DL, @@ -2287,7 +2303,7 @@ LibCallSimplifier::~LibCallSimplifier() { } Value *LibCallSimplifier::optimizeCall(CallInst *CI) { - if (CI->isNoBuiltin()) return 0; + if (CI->isNoBuiltin()) return nullptr; return Impl->optimizeCall(CI); } diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp index c318560..2c6fcd1 100644 --- a/lib/Transforms/Utils/SpecialCaseList.cpp +++ b/lib/Transforms/Utils/SpecialCaseList.cpp @@ -41,7 +41,7 @@ struct SpecialCaseList::Entry { StringSet<> Strings; Regex *RegEx; - Entry() : RegEx(0) {} + Entry() : RegEx(nullptr) {} bool match(StringRef Query) const { return Strings.count(Query) || (RegEx && RegEx->match(Query)); @@ -57,7 +57,7 @@ SpecialCaseList *SpecialCaseList::create( std::unique_ptr File; if (error_code EC = MemoryBuffer::getFile(Path, File)) { Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str(); - return 0; + return nullptr; } return create(File.get(), Error); } @@ -66,7 +66,7 @@ SpecialCaseList *SpecialCaseList::create( const MemoryBuffer *MB, std::string &Error) { std::unique_ptr SCL(new SpecialCaseList()); if (!SCL->parse(MB, Error)) - return 0; + return nullptr; return SCL.release(); } diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 560f581..0c2fc0a 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -59,7 +59,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { // Then unreachable blocks. if (UnreachableBlocks.empty()) { - UnreachableBlock = 0; + UnreachableBlock = nullptr; } else if (UnreachableBlocks.size() == 1) { UnreachableBlock = UnreachableBlocks.front(); } else { @@ -77,7 +77,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { // Now handle return blocks. if (ReturningBlocks.empty()) { - ReturnBlock = 0; + ReturnBlock = nullptr; return false; // No blocks return } else if (ReturningBlocks.size() == 1) { ReturnBlock = ReturningBlocks.front(); // Already has a single return block @@ -91,9 +91,9 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), "UnifiedReturnBlock", &F); - PHINode *PN = 0; + PHINode *PN = nullptr; if (F.getReturnType()->isVoidTy()) { - ReturnInst::Create(F.getContext(), NULL, NewRetBlock); + ReturnInst::Create(F.getContext(), nullptr, NewRetBlock); } else { // If the function doesn't return void... add a PHI node to the block... PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 457fc80..0f20e6d 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -71,12 +71,12 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // Check all operands to see if any need to be remapped. for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) { Value *OP = MD->getOperand(i); - if (OP == 0) continue; + if (!OP) continue; Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper, Materializer); // Use identity map if Mapped_Op is null and we can ignore missing // entries. if (Mapped_OP == OP || - (Mapped_OP == 0 && (Flags & RF_IgnoreMissingEntries))) + (Mapped_OP == nullptr && (Flags & RF_IgnoreMissingEntries))) continue; // Ok, at least one operand needs remapping. @@ -84,13 +84,13 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, Elts.reserve(MD->getNumOperands()); for (i = 0; i != e; ++i) { Value *Op = MD->getOperand(i); - if (Op == 0) - Elts.push_back(0); + if (!Op) + Elts.push_back(nullptr); else { Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper, Materializer); // Use identity map if Mapped_Op is null and we can ignore missing // entries. - if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries)) + if (Mapped_Op == nullptr && (Flags & RF_IgnoreMissingEntries)) Mapped_Op = Op; Elts.push_back(Mapped_Op); } @@ -112,8 +112,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // Okay, this either must be a constant (which may or may not be mappable) or // is something that is not in the mapping table. Constant *C = const_cast(dyn_cast(V)); - if (C == 0) - return 0; + if (!C) + return nullptr; if (BlockAddress *BA = dyn_cast(C)) { Function *F = @@ -126,7 +126,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags, // Otherwise, we have some other constant to remap. Start by checking to see // if all operands have an identity remapping. unsigned OpNo = 0, NumOperands = C->getNumOperands(); - Value *Mapped = 0; + Value *Mapped = nullptr; for (; OpNo != NumOperands; ++OpNo) { Value *Op = C->getOperand(OpNo); Mapped = MapValue(Op, VM, Flags, TypeMapper, Materializer); @@ -187,7 +187,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { Value *V = MapValue(*op, VMap, Flags, TypeMapper, Materializer); // If we aren't ignoring missing entries, assert that something happened. - if (V != 0) + if (V) *op = V; else assert((Flags & RF_IgnoreMissingEntries) && @@ -199,7 +199,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap, for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags); // If we aren't ignoring missing entries, assert that something happened. - if (V != 0) + if (V) PN->setIncomingBlock(i, cast(V)); else assert((Flags & RF_IgnoreMissingEntries) && diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp index 71350e7..28ec83b 100644 --- a/lib/Transforms/Vectorize/BBVectorize.cpp +++ b/lib/Transforms/Vectorize/BBVectorize.cpp @@ -15,7 +15,6 @@ //===----------------------------------------------------------------------===// #define BBV_NAME "bb-vectorize" -#define DEBUG_TYPE BBV_NAME #include "llvm/Transforms/Vectorize.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -50,6 +49,8 @@ #include using namespace llvm; +#define DEBUG_TYPE BBV_NAME + static cl::opt IgnoreTargetInfo("bb-vectorize-ignore-target-info", cl::init(false), cl::Hidden, cl::desc("Ignore target information")); @@ -122,6 +123,10 @@ NoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize floating-point math intrinsics")); static cl::opt + NoBitManipulation("bb-vectorize-no-bitmanip", cl::init(false), cl::Hidden, + cl::desc("Don't try to vectorize BitManipulation intrinsics")); + +static cl::opt NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden, cl::desc("Don't try to vectorize the fused-multiply-add intrinsic")); @@ -202,8 +207,8 @@ namespace { DT = &P->getAnalysis().getDomTree(); SE = &P->getAnalysis(); DataLayoutPass *DLP = P->getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; - TTI = IgnoreTargetInfo ? 0 : &P->getAnalysis(); + DL = DLP ? &DLP->getDataLayout() : nullptr; + TTI = IgnoreTargetInfo ? nullptr : &P->getAnalysis(); } typedef std::pair ValuePair; @@ -279,7 +284,7 @@ namespace { bool trackUsesOfI(DenseSet &Users, AliasSetTracker &WriteSet, Instruction *I, Instruction *J, bool UpdateUsers = true, - DenseSet *LoadMoveSetPairs = 0); + DenseSet *LoadMoveSetPairs = nullptr); void computePairsConnectedTo( DenseMap > &CandidatePairs, @@ -292,8 +297,8 @@ namespace { bool pairsConflict(ValuePair P, ValuePair Q, DenseSet &PairableInstUsers, DenseMap > - *PairableInstUserMap = 0, - DenseSet *PairableInstUserPairSet = 0); + *PairableInstUserMap = nullptr, + DenseSet *PairableInstUserPairSet = nullptr); bool pairWillFormCycle(ValuePair P, DenseMap > &PairableInstUsers, @@ -438,8 +443,8 @@ namespace { DT = &getAnalysis().getDomTree(); SE = &getAnalysis(); DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; - TTI = IgnoreTargetInfo ? 0 : &getAnalysis(); + DL = DLP ? &DLP->getDataLayout() : nullptr; + TTI = IgnoreTargetInfo ? nullptr : &getAnalysis(); return vectorizeBB(BB); } @@ -674,7 +679,20 @@ namespace { case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::pow: + case Intrinsic::round: + case Intrinsic::copysign: + case Intrinsic::ceil: + case Intrinsic::nearbyint: + case Intrinsic::rint: + case Intrinsic::trunc: + case Intrinsic::floor: + case Intrinsic::fabs: return Config.VectorizeMath; + case Intrinsic::bswap: + case Intrinsic::ctpop: + case Intrinsic::ctlz: + case Intrinsic::cttz: + return Config.VectorizeBitManipulations; case Intrinsic::fma: case Intrinsic::fmuladd: return Config.VectorizeFMA; @@ -878,7 +896,7 @@ namespace { } // We can't vectorize memory operations without target data - if (DL == 0 && IsSimpleLoadStore) + if (!DL && IsSimpleLoadStore) return false; Type *T1, *T2; @@ -915,7 +933,7 @@ namespace { if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy()) return false; - if ((!Config.VectorizePointers || DL == 0) && + if ((!Config.VectorizePointers || !DL) && (T1->getScalarType()->isPointerTy() || T2->getScalarType()->isPointerTy())) return false; @@ -1049,7 +1067,7 @@ namespace { (isa(JOp) || isa(JOp))) { Op2VK = TargetTransformInfo::OK_NonUniformConstantValue; Constant *SplatValue = cast(IOp)->getSplatValue(); - if (SplatValue != NULL && + if (SplatValue != nullptr && SplatValue == cast(JOp)->getSplatValue()) Op2VK = TargetTransformInfo::OK_UniformConstantValue; } @@ -1079,13 +1097,14 @@ namespace { CostSavings = ICost + JCost - VCost; } - // The powi intrinsic is special because only the first argument is - // vectorized, the second arguments must be equal. + // The powi,ctlz,cttz intrinsics are special because only the first + // argument is vectorized, the second arguments must be equal. CallInst *CI = dyn_cast(I); Function *FI; if (CI && (FI = CI->getCalledFunction())) { Intrinsic::ID IID = (Intrinsic::ID) FI->getIntrinsicID(); - if (IID == Intrinsic::powi) { + if (IID == Intrinsic::powi || IID == Intrinsic::ctlz || + IID == Intrinsic::cttz) { Value *A1I = CI->getArgOperand(1), *A1J = cast(J)->getArgOperand(1); const SCEV *A1ISCEV = SE->getSCEV(A1I), @@ -1109,7 +1128,8 @@ namespace { assert(CI->getNumArgOperands() == CJ->getNumArgOperands() && "Intrinsic argument counts differ"); for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { - if (IID == Intrinsic::powi && i == 1) + if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz || + IID == Intrinsic::cttz) && i == 1) Tys.push_back(CI->getArgOperand(i)->getType()); else Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(), @@ -1665,8 +1685,9 @@ namespace { C2->first.second == C->first.first || C2->first.second == C->first.second || pairsConflict(C2->first, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0, - UseCycleCheck ? &PairableInstUserPairSet : 0)) { + UseCycleCheck ? &PairableInstUserMap : nullptr, + UseCycleCheck ? &PairableInstUserPairSet + : nullptr)) { if (C2->second >= C->second) { CanAdd = false; break; @@ -1686,8 +1707,9 @@ namespace { T->second == C->first.first || T->second == C->first.second || pairsConflict(*T, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0, - UseCycleCheck ? &PairableInstUserPairSet : 0)) { + UseCycleCheck ? &PairableInstUserMap : nullptr, + UseCycleCheck ? &PairableInstUserPairSet + : nullptr)) { CanAdd = false; break; } @@ -1704,8 +1726,9 @@ namespace { C2->first.second == C->first.first || C2->first.second == C->first.second || pairsConflict(C2->first, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0, - UseCycleCheck ? &PairableInstUserPairSet : 0)) { + UseCycleCheck ? &PairableInstUserMap : nullptr, + UseCycleCheck ? &PairableInstUserPairSet + : nullptr)) { CanAdd = false; break; } @@ -1720,8 +1743,9 @@ namespace { ChosenPairs.begin(), E2 = ChosenPairs.end(); C2 != E2; ++C2) { if (pairsConflict(*C2, C->first, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0, - UseCycleCheck ? &PairableInstUserPairSet : 0)) { + UseCycleCheck ? &PairableInstUserMap : nullptr, + UseCycleCheck ? &PairableInstUserPairSet + : nullptr)) { CanAdd = false; break; } @@ -1802,8 +1826,8 @@ namespace { for (DenseMap::iterator C = ChosenPairs.begin(), E = ChosenPairs.end(); C != E; ++C) { if (pairsConflict(*C, IJ, PairableInstUsers, - UseCycleCheck ? &PairableInstUserMap : 0, - UseCycleCheck ? &PairableInstUserPairSet : 0)) { + UseCycleCheck ? &PairableInstUserMap : nullptr, + UseCycleCheck ? &PairableInstUserPairSet : nullptr)) { DoesConflict = true; break; } @@ -2373,7 +2397,7 @@ namespace { } while ((LIENext = dyn_cast(LIENext->getOperand(0)))); - LIENext = 0; + LIENext = nullptr; Value *LIEPrev = UndefValue::get(ArgTypeH); for (unsigned i = 0; i < numElemL; ++i) { if (isa(VectElemts[i])) continue; @@ -2441,14 +2465,14 @@ namespace { if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) { // We can have at most two unique vector inputs. bool CanUseInputs = true; - Value *I1, *I2 = 0; + Value *I1, *I2 = nullptr; if (LEE) { I1 = LEE->getOperand(0); } else { I1 = LSV->getOperand(0); I2 = LSV->getOperand(1); if (I2 == I1 || isa(I2)) - I2 = 0; + I2 = nullptr; } if (HEE) { @@ -2764,10 +2788,11 @@ namespace { ReplacedOperands[o] = Intrinsic::getDeclaration(M, IID, VArgType); continue; - } else if (IID == Intrinsic::powi && o == 1) { - // The second argument of powi is a single integer and we've already - // checked that both arguments are equal. As a result, we just keep - // I's second argument. + } else if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz || + IID == Intrinsic::cttz) && o == 1) { + // The second argument of powi/ctlz/cttz is a single integer/constant + // and we've already checked that both arguments are equal. + // As a result, we just keep I's second argument. ReplacedOperands[o] = I->getOperand(o); continue; } @@ -2952,7 +2977,7 @@ namespace { switch (Kind) { default: - K->setMetadata(Kind, 0); // Remove unknown metadata + K->setMetadata(Kind, nullptr); // Remove unknown metadata break; case LLVMContext::MD_tbaa: K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD)); @@ -3123,7 +3148,7 @@ namespace { // Instruction insertion point: Instruction *InsertionPt = K; - Instruction *K1 = 0, *K2 = 0; + Instruction *K1 = nullptr, *K2 = nullptr; replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2); // The use dag of the first original instruction must be moved to after @@ -3213,6 +3238,7 @@ VectorizeConfig::VectorizeConfig() { VectorizePointers = !::NoPointers; VectorizeCasts = !::NoCasts; VectorizeMath = !::NoMath; + VectorizeBitManipulations = !::NoBitManipulation; VectorizeFMA = !::NoFMA; VectorizeSelect = !::NoSelect; VectorizeCmp = !::NoCmp; diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 9a98c44..34d8a10 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -42,9 +42,6 @@ // //===----------------------------------------------------------------------===// -#define LV_NAME "loop-vectorize" -#define DEBUG_TYPE LV_NAME - #include "llvm/Transforms/Vectorize.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/EquivalenceClasses.h" @@ -54,6 +51,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -67,7 +65,9 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -85,16 +85,23 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/VectorUtils.h" #include #include +#include using namespace llvm; using namespace llvm::PatternMatch; +#define LV_NAME "loop-vectorize" +#define DEBUG_TYPE LV_NAME + +STATISTIC(LoopsVectorized, "Number of loops vectorized"); +STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization"); + static cl::opt VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect.")); @@ -223,8 +230,9 @@ public: const TargetLibraryInfo *TLI, unsigned VecWidth, unsigned UnrollFactor) : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI), - VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), Induction(0), - OldInduction(0), WidenMap(UnrollFactor), Legal(0) {} + VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), + Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor), + Legal(nullptr) {} // Perform the actual loop widening (vectorization). void vectorize(LoopVectorizationLegality *L) { @@ -469,6 +477,24 @@ static void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) { B.SetCurrentDebugLocation(DebugLoc()); } +#ifndef NDEBUG +/// \return string containing a file name and a line # for the given loop. +static std::string getDebugLocString(const Loop *L) { + std::string Result; + if (L) { + raw_string_ostream OS(Result); + const DebugLoc LoopDbgLoc = L->getStartLoc(); + if (!LoopDbgLoc.isUnknown()) + LoopDbgLoc.print(L->getHeader()->getContext(), OS); + else + // Just print the module name. + OS << L->getHeader()->getParent()->getParent()->getModuleIdentifier(); + OS.flush(); + } + return Result; +} +#endif + /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and /// to what vectorization factor. /// This class does not look at the profitability of vectorization, only the @@ -491,8 +517,8 @@ public: LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL, DominatorTree *DT, TargetLibraryInfo *TLI) : NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL), - DT(DT), TLI(TLI), Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false), - MaxSafeDepDistBytes(-1U) {} + DT(DT), TLI(TLI), Induction(nullptr), WidestIndTy(nullptr), + HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -530,7 +556,7 @@ public: /// This struct holds information about reduction variables. struct ReductionDescriptor { - ReductionDescriptor() : StartValue(0), LoopExitInstr(0), + ReductionDescriptor() : StartValue(nullptr), LoopExitInstr(nullptr), Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {} ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K, @@ -602,7 +628,7 @@ public: /// A struct for saving information about induction variables. struct InductionInfo { InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {} - InductionInfo() : StartValue(0), IK(IK_NoInduction) {} + InductionInfo() : StartValue(nullptr), IK(IK_NoInduction) {} /// Start value. TrackingVH StartValue; /// Induction kind. @@ -789,7 +815,8 @@ public: /// then this vectorization factor will be selected if vectorization is /// possible. VectorizationFactor selectVectorizationFactor(bool OptForSize, - unsigned UserVF); + unsigned UserVF, + bool ForceVectorization); /// \return The size (in bits) of the widest type in the code that /// needs to be vectorized. We ignore values that remain scalar such as @@ -856,35 +883,32 @@ private: /// Utility class for getting and setting loop vectorizer hints in the form /// of loop metadata. -struct LoopVectorizeHints { - /// Vectorization width. - unsigned Width; - /// Vectorization unroll factor. - unsigned Unroll; - /// Vectorization forced (-1 not selected, 0 force disabled, 1 force enabled) - int Force; +class LoopVectorizeHints { +public: + enum ForceKind { + FK_Undefined = -1, ///< Not selected. + FK_Disabled = 0, ///< Forcing disabled. + FK_Enabled = 1, ///< Forcing enabled. + }; LoopVectorizeHints(const Loop *L, bool DisableUnrolling) - : Width(VectorizationFactor) - , Unroll(DisableUnrolling ? 1 : VectorizationUnroll) - , Force(-1) - , LoopID(L->getLoopID()) { + : Width(VectorizationFactor), + Unroll(DisableUnrolling), + Force(FK_Undefined), + LoopID(L->getLoopID()) { getHints(L); - // The command line options override any loop metadata except for when - // width == 1 which is used to indicate the loop is already vectorized. - if (VectorizationFactor.getNumOccurrences() > 0 && Width != 1) - Width = VectorizationFactor; + // force-vector-unroll overrides DisableUnrolling. if (VectorizationUnroll.getNumOccurrences() > 0) Unroll = VectorizationUnroll; - DEBUG(if (DisableUnrolling && Unroll == 1) - dbgs() << "LV: Unrolling disabled by the pass manager\n"); + DEBUG(if (DisableUnrolling && Unroll == 1) dbgs() + << "LV: Unrolling disabled by the pass manager\n"); } /// Return the loop vectorizer metadata prefix. static StringRef Prefix() { return "llvm.vectorizer."; } - MDNode *createHint(LLVMContext &Context, StringRef Name, unsigned V) { + MDNode *createHint(LLVMContext &Context, StringRef Name, unsigned V) const { SmallVector Vals; Vals.push_back(MDString::get(Context, Name)); Vals.push_back(ConstantInt::get(Type::getInt32Ty(Context), V)); @@ -918,9 +942,12 @@ struct LoopVectorizeHints { LoopID = NewLoopID; } -private: - MDNode *LoopID; + unsigned getWidth() const { return Width; } + unsigned getUnroll() const { return Unroll; } + enum ForceKind getForce() const { return Force; } + MDNode *getLoopID() const { return LoopID; } +private: /// Find hints specified in the loop metadata. void getHints(const Loop *L) { if (!LoopID) @@ -931,7 +958,7 @@ private: assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { - const MDString *S = 0; + const MDString *S = nullptr; SmallVector Args; // The expected hint is either a MDString or a MDNode with the first @@ -980,13 +1007,23 @@ private: DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n"); } else if (Hint == "enable") { if (C->getBitWidth() == 1) - Force = Val; + Force = Val == 1 ? LoopVectorizeHints::FK_Enabled + : LoopVectorizeHints::FK_Disabled; else DEBUG(dbgs() << "LV: ignoring invalid enable hint metadata\n"); } else { DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint << '\n'); } } + + /// Vectorization width. + unsigned Width; + /// Vectorization unroll factor. + unsigned Unroll; + /// Vectorization forced + enum ForceKind Force; + + MDNode *LoopID; }; static void addInnerLoop(Loop &L, SmallVectorImpl &V) { @@ -1024,7 +1061,7 @@ struct LoopVectorize : public FunctionPass { bool runOnFunction(Function &F) override { SE = &getAnalysis(); DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; LI = &getAnalysis(); TTI = &getAnalysis(); DT = &getAnalysis().getDomTree(); @@ -1041,8 +1078,9 @@ struct LoopVectorize : public FunctionPass { if (!TTI->getNumberOfRegisters(true)) return false; - if (DL == NULL) { - DEBUG(dbgs() << "LV: Not vectorizing: Missing data layout\n"); + if (!DL) { + DEBUG(dbgs() << "\nLV: Not vectorizing " << F.getName() + << ": Missing data layout\n"); return false; } @@ -1054,6 +1092,8 @@ struct LoopVectorize : public FunctionPass { for (Loop *L : *LI) addInnerLoop(*L, Worklist); + LoopsAnalyzed += Worklist.size(); + // Now walk the identified inner loops. bool Changed = false; while (!Worklist.empty()) @@ -1065,26 +1105,56 @@ struct LoopVectorize : public FunctionPass { bool processLoop(Loop *L) { assert(L->empty() && "Only process inner loops."); - DEBUG(dbgs() << "LV: Checking a loop in \"" << - L->getHeader()->getParent()->getName() << "\"\n"); + +#ifndef NDEBUG + const std::string DebugLocStr = getDebugLocString(L); +#endif /* NDEBUG */ + + DEBUG(dbgs() << "\nLV: Checking a loop in \"" + << L->getHeader()->getParent()->getName() << "\" from " + << DebugLocStr << "\n"); LoopVectorizeHints Hints(L, DisableUnrolling); - if (Hints.Force == 0) { + DEBUG(dbgs() << "LV: Loop hints:" + << " force=" + << (Hints.getForce() == LoopVectorizeHints::FK_Disabled + ? "disabled" + : (Hints.getForce() == LoopVectorizeHints::FK_Enabled + ? "enabled" + : "?")) << " width=" << Hints.getWidth() + << " unroll=" << Hints.getUnroll() << "\n"); + + if (Hints.getForce() == LoopVectorizeHints::FK_Disabled) { DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); return false; } - if (!AlwaysVectorize && Hints.Force != 1) { + if (!AlwaysVectorize && Hints.getForce() != LoopVectorizeHints::FK_Enabled) { DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); return false; } - if (Hints.Width == 1 && Hints.Unroll == 1) { + if (Hints.getWidth() == 1 && Hints.getUnroll() == 1) { DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); return false; } + // Check the loop for a trip count threshold: + // do not vectorize loops with a tiny trip count. + BasicBlock *Latch = L->getLoopLatch(); + const unsigned TC = SE->getSmallConstantTripCount(L, Latch); + if (TC > 0u && TC < TinyTripCountVectorThreshold) { + DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " + << "This loop is not worth vectorizing."); + if (Hints.getForce() == LoopVectorizeHints::FK_Enabled) + DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); + else { + DEBUG(dbgs() << "\n"); + return false; + } + } + // Check if it is legal to vectorize the loop. LoopVectorizationLegality LVL(L, SE, DL, DT, TLI); if (!LVL.canVectorize()) { @@ -1098,8 +1168,8 @@ struct LoopVectorize : public FunctionPass { // Check the function attributes to find out if this function should be // optimized for size. Function *F = L->getHeader()->getParent(); - bool OptForSize = - Hints.Force != 1 && F->hasFnAttribute(Attribute::OptimizeForSize); + bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled && + F->hasFnAttribute(Attribute::OptimizeForSize); // Compute the weighted frequency of this loop being executed and see if it // is less than 20% of the function entry baseline frequency. Note that we @@ -1108,7 +1178,8 @@ struct LoopVectorize : public FunctionPass { // exactly what block frequency models. if (LoopVectorizeWithBlockFrequency) { BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader()); - if (Hints.Force != 1 && LoopEntryFreq < ColdEntryFreq) + if (Hints.getForce() != LoopVectorizeHints::FK_Enabled && + LoopEntryFreq < ColdEntryFreq) OptForSize = true; } @@ -1123,14 +1194,17 @@ struct LoopVectorize : public FunctionPass { } // Select the optimal vectorization factor. - LoopVectorizationCostModel::VectorizationFactor VF; - VF = CM.selectVectorizationFactor(OptForSize, Hints.Width); + const LoopVectorizationCostModel::VectorizationFactor VF = + CM.selectVectorizationFactor(OptForSize, Hints.getWidth(), + Hints.getForce() == + LoopVectorizeHints::FK_Enabled); + // Select the unroll factor. - unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll, VF.Width, - VF.Cost); + const unsigned UF = + CM.selectUnrollFactor(OptForSize, Hints.getUnroll(), VF.Width, VF.Cost); - DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<< - F->getParent()->getModuleIdentifier() << '\n'); + DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " + << DebugLocStr << '\n'); DEBUG(dbgs() << "LV: Unroll Factor is " << UF << '\n'); if (VF.Width == 1) { @@ -1138,6 +1212,13 @@ struct LoopVectorize : public FunctionPass { if (UF == 1) return false; DEBUG(dbgs() << "LV: Trying to at least unroll the loops.\n"); + + // Report the unrolling decision. + emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), + Twine("unrolled with interleaving factor " + + Twine(UF) + + " (vectorization not beneficial)")); + // We decided not to vectorize, but we may want to unroll. InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF); Unroller.vectorize(&LVL); @@ -1145,6 +1226,13 @@ struct LoopVectorize : public FunctionPass { // If we decided that it is *legal* to vectorize the loop then do it. InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF); LB.vectorize(&LVL); + ++LoopsVectorized; + + // Report the vectorization decision. + emitOptimizationRemark( + F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), + Twine("vectorized loop (vectorization factor: ") + Twine(VF.Width) + + ", unrolling interleave factor: " + Twine(UF) + ")"); } // Mark the loop as already vectorized to avoid vectorizing again. @@ -1188,7 +1276,7 @@ static Value *stripIntegerCast(Value *V) { /// \p Ptr. static const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE, ValueToValueMap &PtrToStride, - Value *Ptr, Value *OrigPtr = 0) { + Value *Ptr, Value *OrigPtr = nullptr) { const SCEV *OrigSCEV = SE->getSCEV(Ptr); @@ -1355,7 +1443,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { // We can emit wide load/stores only if the last non-zero index is the // induction variable. - const SCEV *Last = 0; + const SCEV *Last = nullptr; if (!Strides.count(Gep)) Last = SE->getSCEV(Gep->getOperand(InductionOperand)); else { @@ -1604,17 +1692,17 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic // Does this instruction return a value ? bool IsVoidRetTy = Instr->getType()->isVoidTy(); - Value *UndefVec = IsVoidRetTy ? 0 : + Value *UndefVec = IsVoidRetTy ? nullptr : UndefValue::get(VectorType::get(Instr->getType(), VF)); // Create a new entry in the WidenMap and initialize it to Undef or Null. VectorParts &VecResults = WidenMap.splat(Instr, UndefVec); Instruction *InsertPt = Builder.GetInsertPoint(); BasicBlock *IfBlock = Builder.GetInsertBlock(); - BasicBlock *CondBlock = 0; + BasicBlock *CondBlock = nullptr; VectorParts Cond; - Loop *VectorLp = 0; + Loop *VectorLp = nullptr; if (IfPredicateStore) { assert(Instr->getParent()->getSinglePredecessor() && "Only support single predecessor blocks"); @@ -1630,7 +1718,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic for (unsigned Width = 0; Width < VF; ++Width) { // Start if-block. - Value *Cmp = 0; + Value *Cmp = nullptr; if (IfPredicateStore) { Cmp = Builder.CreateExtractElement(Cond[Part], Builder.getInt32(Width)); Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp, ConstantInt::get(Cmp->getType(), 1)); @@ -1681,21 +1769,21 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V, if (FirstInst) return FirstInst; if (Instruction *I = dyn_cast(V)) - return I->getParent() == Loc->getParent() ? I : 0; - return 0; + return I->getParent() == Loc->getParent() ? I : nullptr; + return nullptr; } std::pair InnerLoopVectorizer::addStrideCheck(Instruction *Loc) { - Instruction *tnullptr = 0; + Instruction *tnullptr = nullptr; if (!Legal->mustCheckStrides()) return std::pair(tnullptr, tnullptr); IRBuilder<> ChkBuilder(Loc); // Emit checks. - Value *Check = 0; - Instruction *FirstInst = 0; + Value *Check = nullptr; + Instruction *FirstInst = nullptr; for (SmallPtrSet::iterator SI = Legal->strides_begin(), SE = Legal->strides_end(); SI != SE; ++SI) { @@ -1727,7 +1815,7 @@ InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) { LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck = Legal->getRuntimePointerCheck(); - Instruction *tnullptr = 0; + Instruction *tnullptr = nullptr; if (!PtrRtCheck->Need) return std::pair(tnullptr, tnullptr); @@ -1737,7 +1825,7 @@ InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) { LLVMContext &Ctx = Loc->getContext(); SCEVExpander Exp(*SE, "induction"); - Instruction *FirstInst = 0; + Instruction *FirstInst = nullptr; for (unsigned i = 0; i < NumPointers; ++i) { Value *Ptr = PtrRtCheck->Pointers[i]; @@ -1764,7 +1852,7 @@ InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) { IRBuilder<> ChkBuilder(Loc); // Our instructions might fold to a constant. - Value *MemoryRuntimeCheck = 0; + Value *MemoryRuntimeCheck = nullptr; for (unsigned i = 0; i < NumPointers; ++i) { for (unsigned j = i+1; j < NumPointers; ++j) { // No need to check if two readonly pointers intersect. @@ -2028,7 +2116,7 @@ void InnerLoopVectorizer::createEmptyLoop() { // start value. // This variable saves the new starting index for the scalar loop. - PHINode *ResumeIndex = 0; + PHINode *ResumeIndex = nullptr; LoopVectorizationLegality::InductionList::iterator I, E; LoopVectorizationLegality::InductionList *List = Legal->getInductionVars(); // Set builder to point to last bypass block. @@ -2044,9 +2132,9 @@ void InnerLoopVectorizer::createEmptyLoop() { // truncated version for the scalar loop. PHINode *TruncResumeVal = (OrigPhi == OldInduction) ? PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val", - MiddleBlock->getTerminator()) : 0; + MiddleBlock->getTerminator()) : nullptr; - Value *EndValue = 0; + Value *EndValue = nullptr; switch (II.IK) { case LoopVectorizationLegality::IK_NoInduction: llvm_unreachable("Unknown induction"); @@ -2209,148 +2297,6 @@ LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) { } } -static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I, - Intrinsic::ID ValidIntrinsicID) { - if (I.getNumArgOperands() != 1 || - !I.getArgOperand(0)->getType()->isFloatingPointTy() || - I.getType() != I.getArgOperand(0)->getType() || - !I.onlyReadsMemory()) - return Intrinsic::not_intrinsic; - - return ValidIntrinsicID; -} - -static Intrinsic::ID checkBinaryFloatSignature(const CallInst &I, - Intrinsic::ID ValidIntrinsicID) { - if (I.getNumArgOperands() != 2 || - !I.getArgOperand(0)->getType()->isFloatingPointTy() || - !I.getArgOperand(1)->getType()->isFloatingPointTy() || - I.getType() != I.getArgOperand(0)->getType() || - I.getType() != I.getArgOperand(1)->getType() || - !I.onlyReadsMemory()) - return Intrinsic::not_intrinsic; - - return ValidIntrinsicID; -} - - -static Intrinsic::ID -getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) { - // If we have an intrinsic call, check if it is trivially vectorizable. - if (IntrinsicInst *II = dyn_cast(CI)) { - switch (II->getIntrinsicID()) { - case Intrinsic::sqrt: - case Intrinsic::sin: - case Intrinsic::cos: - case Intrinsic::exp: - case Intrinsic::exp2: - case Intrinsic::log: - case Intrinsic::log10: - case Intrinsic::log2: - case Intrinsic::fabs: - case Intrinsic::copysign: - case Intrinsic::floor: - case Intrinsic::ceil: - case Intrinsic::trunc: - case Intrinsic::rint: - case Intrinsic::nearbyint: - case Intrinsic::round: - case Intrinsic::pow: - case Intrinsic::fma: - case Intrinsic::fmuladd: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - return II->getIntrinsicID(); - default: - return Intrinsic::not_intrinsic; - } - } - - if (!TLI) - return Intrinsic::not_intrinsic; - - LibFunc::Func Func; - Function *F = CI->getCalledFunction(); - // We're going to make assumptions on the semantics of the functions, check - // that the target knows that it's available in this environment and it does - // not have local linkage. - if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func)) - return Intrinsic::not_intrinsic; - - // Otherwise check if we have a call to a function that can be turned into a - // vector intrinsic. - switch (Func) { - default: - break; - case LibFunc::sin: - case LibFunc::sinf: - case LibFunc::sinl: - return checkUnaryFloatSignature(*CI, Intrinsic::sin); - case LibFunc::cos: - case LibFunc::cosf: - case LibFunc::cosl: - return checkUnaryFloatSignature(*CI, Intrinsic::cos); - case LibFunc::exp: - case LibFunc::expf: - case LibFunc::expl: - return checkUnaryFloatSignature(*CI, Intrinsic::exp); - case LibFunc::exp2: - case LibFunc::exp2f: - case LibFunc::exp2l: - return checkUnaryFloatSignature(*CI, Intrinsic::exp2); - case LibFunc::log: - case LibFunc::logf: - case LibFunc::logl: - return checkUnaryFloatSignature(*CI, Intrinsic::log); - case LibFunc::log10: - case LibFunc::log10f: - case LibFunc::log10l: - return checkUnaryFloatSignature(*CI, Intrinsic::log10); - case LibFunc::log2: - case LibFunc::log2f: - case LibFunc::log2l: - return checkUnaryFloatSignature(*CI, Intrinsic::log2); - case LibFunc::fabs: - case LibFunc::fabsf: - case LibFunc::fabsl: - return checkUnaryFloatSignature(*CI, Intrinsic::fabs); - case LibFunc::copysign: - case LibFunc::copysignf: - case LibFunc::copysignl: - return checkBinaryFloatSignature(*CI, Intrinsic::copysign); - case LibFunc::floor: - case LibFunc::floorf: - case LibFunc::floorl: - return checkUnaryFloatSignature(*CI, Intrinsic::floor); - case LibFunc::ceil: - case LibFunc::ceilf: - case LibFunc::ceill: - return checkUnaryFloatSignature(*CI, Intrinsic::ceil); - case LibFunc::trunc: - case LibFunc::truncf: - case LibFunc::truncl: - return checkUnaryFloatSignature(*CI, Intrinsic::trunc); - case LibFunc::rint: - case LibFunc::rintf: - case LibFunc::rintl: - return checkUnaryFloatSignature(*CI, Intrinsic::rint); - case LibFunc::nearbyint: - case LibFunc::nearbyintf: - case LibFunc::nearbyintl: - return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint); - case LibFunc::round: - case LibFunc::roundf: - case LibFunc::roundl: - return checkUnaryFloatSignature(*CI, Intrinsic::round); - case LibFunc::pow: - case LibFunc::powf: - case LibFunc::powl: - return checkBinaryFloatSignature(*CI, Intrinsic::pow); - } - - return Intrinsic::not_intrinsic; -} - /// This function translates the reduction kind to an LLVM binary operator. static unsigned getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) { @@ -2651,7 +2597,7 @@ void InnerLoopVectorizer::vectorizeLoop() { assert(isPowerOf2_32(VF) && "Reduction emission only supported for pow2 vectors!"); Value *TmpVec = ReducedPartRdx; - SmallVector ShuffleMask(VF, 0); + SmallVector ShuffleMask(VF, nullptr); for (unsigned i = VF; i != 1; i >>= 1) { // Move the upper half of the vector to the lower half. for (unsigned j = 0; j != i/2; ++j) @@ -3049,7 +2995,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { VectorParts &A = getVectorValue(it->getOperand(0)); VectorParts &B = getVectorValue(it->getOperand(1)); for (unsigned Part = 0; Part < UF; ++Part) { - Value *C = 0; + Value *C = nullptr; if (FCmp) C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]); else @@ -3275,15 +3221,6 @@ bool LoopVectorizationLegality::canVectorize() { return false; } - // Do not loop-vectorize loops with a tiny trip count. - BasicBlock *Latch = TheLoop->getLoopLatch(); - unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch); - if (TC > 0u && TC < TinyTripCountVectorThreshold) { - DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " << - "This loop is not worth vectorizing.\n"); - return false; - } - // Check if we can vectorize the instructions and CFG in this loop. if (!canVectorizeInstrs()) { DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); @@ -3536,14 +3473,14 @@ static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, ///\brief Look for a cast use of the passed value. static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) { - Value *UniqueCast = 0; + Value *UniqueCast = nullptr; for (User *U : Ptr->users()) { CastInst *CI = dyn_cast(U); if (CI && CI->getType() == Ty) { if (!UniqueCast) UniqueCast = CI; else - return 0; + return nullptr; } } return UniqueCast; @@ -3556,7 +3493,7 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, const DataLayout *DL, Loop *Lp) { const PointerType *PtrTy = dyn_cast(Ptr->getType()); if (!PtrTy || PtrTy->isAggregateType()) - return 0; + return nullptr; // Try to remove a gep instruction to make the pointer (actually index at this // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the @@ -3576,11 +3513,11 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, const SCEVAddRecExpr *S = dyn_cast(V); if (!S) - return 0; + return nullptr; V = S->getStepRecurrence(*SE); if (!V) - return 0; + return nullptr; // Strip off the size of access multiplication if we are still analyzing the // pointer. @@ -3588,24 +3525,24 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, DL->getTypeAllocSize(PtrTy->getElementType()); if (const SCEVMulExpr *M = dyn_cast(V)) { if (M->getOperand(0)->getSCEVType() != scConstant) - return 0; + return nullptr; const APInt &APStepVal = cast(M->getOperand(0))->getValue()->getValue(); // Huge step value - give up. if (APStepVal.getBitWidth() > 64) - return 0; + return nullptr; int64_t StepVal = APStepVal.getSExtValue(); if (PtrAccessSize != StepVal) - return 0; + return nullptr; V = M->getOperand(1); } } // Strip off casts. - Type *StripedOffRecurrenceCast = 0; + Type *StripedOffRecurrenceCast = nullptr; if (const SCEVCastExpr *C = dyn_cast(V)) { StripedOffRecurrenceCast = C->getType(); V = C->getOperand(); @@ -3614,11 +3551,11 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, // Look for the loop invariant symbolic value. const SCEVUnknown *U = dyn_cast(V); if (!U) - return 0; + return nullptr; Value *Stride = U->getValue(); if (!Lp->isLoopInvariant(Stride)) - return 0; + return nullptr; // If we have stripped off the recurrence cast we have to make sure that we // return the value that is used in this loop so that we can replace it later. @@ -3629,7 +3566,7 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, } void LoopVectorizationLegality::collectStridedAcccess(Value *MemAccess) { - Value *Ptr = 0; + Value *Ptr = nullptr; if (LoadInst *LI = dyn_cast(MemAccess)) Ptr = LI->getPointerOperand(); else if (StoreInst *SI = dyn_cast(MemAccess)) @@ -4628,7 +4565,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // We only allow for a single reduction value to be used outside the loop. // This includes users of the reduction, variables (which form a cycle // which ends in the phi node). - Instruction *ExitInstruction = 0; + Instruction *ExitInstruction = nullptr; // Indicates that we found a reduction operation in our scan. bool FoundReduxOp = false; @@ -4642,7 +4579,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // the number of instruction we saw from the recognized min/max pattern, // to make sure we only see exactly the two instructions. unsigned NumCmpSelectPatternInst = 0; - ReductionInstDesc ReduxDesc(false, 0); + ReductionInstDesc ReduxDesc(false, nullptr); SmallPtrSet VisitedInsts; SmallVector Worklist; @@ -4725,7 +4662,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // being used. In this case the user uses the value of the previous // iteration, in which case we would loose "VF-1" iterations of the // reduction operation if we vectorize. - if (ExitInstruction != 0 || Cur == Phi) + if (ExitInstruction != nullptr || Cur == Phi) return false; // The instruction used by an outside user must be the last instruction @@ -4741,7 +4678,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // Process instructions only once (termination). Each reduction cycle // value must only be used once, except by phi nodes and min/max // reductions which are represented as a cmp followed by a select. - ReductionInstDesc IgnoredVal(false, 0); + ReductionInstDesc IgnoredVal(false, nullptr); if (VisitedInsts.insert(UI)) { if (isa(UI)) PHIs.push_back(UI); @@ -4795,8 +4732,8 @@ LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I, assert((isa(I) || isa(I) || isa(I)) && "Expect a select instruction"); - Instruction *Cmp = 0; - SelectInst *Select = 0; + Instruction *Cmp = nullptr; + SelectInst *Select = nullptr; // We must handle the select(cmp()) as a single instruction. Advance to the // select. @@ -4982,7 +4919,8 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB, LoopVectorizationCostModel::VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, - unsigned UserVF) { + unsigned UserVF, + bool ForceVectorization) { // Width 1 means no vectorize VectorizationFactor Factor = { 1U, 0U }; if (OptForSize && Legal->getRuntimePointerCheck()->Need) { @@ -5052,8 +4990,18 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, } float Cost = expectedCost(1); +#ifndef NDEBUG + const float ScalarCost = Cost; +#endif /* NDEBUG */ unsigned Width = 1; - DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)Cost << ".\n"); + DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n"); + + // Ignore scalar width, because the user explicitly wants vectorization. + if (ForceVectorization && VF > 1) { + Width = 2; + Cost = expectedCost(Width) / (float)Width; + } + for (unsigned i=2; i <= VF; i*=2) { // Notice that the vector loop needs to be executed less times, so // we need to divide the cost of the vector loops by the width of @@ -5067,7 +5015,10 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize, } } - DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n"); + DEBUG(if (ForceVectorization && Width > 1 && Cost >= ScalarCost) dbgs() + << "LV: Vectorization seems to be not beneficial, " + << "but was forced by a user.\n"); + DEBUG(dbgs() << "LV: Selecting VF: "<< Width << ".\n"); Factor.Width = Width; Factor.Cost = Width * Cost; return Factor; @@ -5516,7 +5467,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { Op2VK = TargetTransformInfo::OK_UniformConstantValue; else if (isa(Op2) || isa(Op2)) { Op2VK = TargetTransformInfo::OK_NonUniformConstantValue; - if (cast(Op2)->getSplatValue() != NULL) + if (cast(Op2)->getSplatValue() != nullptr) Op2VK = TargetTransformInfo::OK_UniformConstantValue; } @@ -5730,17 +5681,17 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr, // Does this instruction return a value ? bool IsVoidRetTy = Instr->getType()->isVoidTy(); - Value *UndefVec = IsVoidRetTy ? 0 : + Value *UndefVec = IsVoidRetTy ? nullptr : UndefValue::get(Instr->getType()); // Create a new entry in the WidenMap and initialize it to Undef or Null. VectorParts &VecResults = WidenMap.splat(Instr, UndefVec); Instruction *InsertPt = Builder.GetInsertPoint(); BasicBlock *IfBlock = Builder.GetInsertBlock(); - BasicBlock *CondBlock = 0; + BasicBlock *CondBlock = nullptr; VectorParts Cond; - Loop *VectorLp = 0; + Loop *VectorLp = nullptr; if (IfPredicateStore) { assert(Instr->getParent()->getSinglePredecessor() && "Only support single predecessor blocks"); @@ -5755,7 +5706,7 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr, // For each scalar that we create: // Start an "if (pred) a[i] = ..." block. - Value *Cmp = 0; + Value *Cmp = nullptr; if (IfPredicateStore) { if (Cond[Part]->getType()->isVectorTy()) Cond[Part] = diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index ee32227..e13ba95 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15,9 +15,6 @@ // "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks. // //===----------------------------------------------------------------------===// -#define SV_NAME "slp-vectorizer" -#define DEBUG_TYPE "SLP" - #include "llvm/Transforms/Vectorize.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" @@ -34,6 +31,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/NoFolder.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" @@ -41,11 +39,15 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/VectorUtils.h" #include #include using namespace llvm; +#define SV_NAME "slp-vectorizer" +#define DEBUG_TYPE "SLP" + static cl::opt SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden, cl::desc("Only vectorize if you gain more than this " @@ -72,8 +74,6 @@ struct BlockNumbering { BlockNumbering(BasicBlock *Bb) : BB(Bb), Valid(false) {} - BlockNumbering() : BB(0), Valid(false) {} - void numberInstructions() { unsigned Loc = 0; InstrIdx.clear(); @@ -120,15 +120,15 @@ private: static BasicBlock *getSameBlock(ArrayRef VL) { Instruction *I0 = dyn_cast(VL[0]); if (!I0) - return 0; + return nullptr; BasicBlock *BB = I0->getParent(); for (int i = 1, e = VL.size(); i < e; i++) { Instruction *I = dyn_cast(VL[i]); if (!I) - return 0; + return nullptr; if (BB != I->getParent()) - return 0; + return nullptr; } return BB; } @@ -180,7 +180,7 @@ static Instruction *propagateMetadata(Instruction *I, ArrayRef VL) { switch (Kind) { default: - MD = 0; // Remove unknown metadata + MD = nullptr; // Remove unknown metadata break; case LLVMContext::MD_tbaa: MD = MDNode::getMostGenericTBAA(MD, IMD); @@ -201,7 +201,7 @@ static Type* getSameType(ArrayRef VL) { Type *Ty = VL[0]->getType(); for (int i = 1, e = VL.size(); i < e; i++) if (VL[i]->getType() != Ty) - return 0; + return nullptr; return Ty; } @@ -345,17 +345,10 @@ public: typedef SmallVector StoreList; BoUpSLP(Function *Func, ScalarEvolution *Se, const DataLayout *Dl, - TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li, - DominatorTree *Dt) : - F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li), DT(Dt), - Builder(Se->getContext()) { - // Setup the block numbering utility for all of the blocks in the - // function. - for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) { - BasicBlock *BB = it; - BlocksNumbers[BB] = BlockNumbering(BB); - } - } + TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa, + LoopInfo *Li, DominatorTree *Dt) + : F(Func), SE(Se), DL(Dl), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), + Builder(Se->getContext()) {} /// \brief Vectorize the tree that starts with the elements in \p VL. /// Returns the vectorized root. @@ -365,13 +358,13 @@ public: /// A negative number means that this is profitable. int getTreeCost(); - /// Construct a vectorizable tree that starts at \p Roots and is possibly - /// used by a reduction of \p RdxOps. - void buildTree(ArrayRef Roots, ValueSet *RdxOps = 0); + /// Construct a vectorizable tree that starts at \p Roots, ignoring users for + /// the purpose of scheduling and extraction in the \p UserIgnoreLst. + void buildTree(ArrayRef Roots, + ArrayRef UserIgnoreLst = None); /// Clear the internal data structures that are created by 'buildTree'. void deleteTree() { - RdxOps = 0; VectorizableTree.clear(); ScalarToTreeEntry.clear(); MustGather.clear(); @@ -446,7 +439,7 @@ private: bool isFullyVectorizableTinyTree(); struct TreeEntry { - TreeEntry() : Scalars(), VectorizedValue(0), LastScalarIndex(0), + TreeEntry() : Scalars(), VectorizedValue(nullptr), LastScalarIndex(0), NeedToGather(0) {} /// \returns true if the scalars in VL are equal to this entry. @@ -527,14 +520,22 @@ private: /// Numbers instructions in different blocks. DenseMap BlocksNumbers; - /// Reduction operators. - ValueSet *RdxOps; + /// \brief Get the corresponding instruction numbering list for a given + /// BasicBlock. The list is allocated lazily. + BlockNumbering &getBlockNumbering(BasicBlock *BB) { + auto I = BlocksNumbers.insert(std::make_pair(BB, BlockNumbering(BB))); + return I.first->second; + } + + /// List of users to ignore during scheduling and that don't need extracting. + ArrayRef UserIgnoreList; // Analysis and block reference. Function *F; ScalarEvolution *SE; const DataLayout *DL; TargetTransformInfo *TTI; + TargetLibraryInfo *TLI; AliasAnalysis *AA; LoopInfo *LI; DominatorTree *DT; @@ -542,9 +543,10 @@ private: IRBuilder<> Builder; }; -void BoUpSLP::buildTree(ArrayRef Roots, ValueSet *Rdx) { +void BoUpSLP::buildTree(ArrayRef Roots, + ArrayRef UserIgnoreLst) { deleteTree(); - RdxOps = Rdx; + UserIgnoreList = UserIgnoreLst; if (!getSameType(Roots)) return; buildTree_rec(Roots, 0); @@ -576,8 +578,9 @@ void BoUpSLP::buildTree(ArrayRef Roots, ValueSet *Rdx) { if (!UserInst) continue; - // Ignore uses that are part of the reduction. - if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end()) + // Ignore users in the user ignore list. + if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) != + UserIgnoreList.end()) continue; DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " << @@ -708,12 +711,13 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { continue; } - // This user is part of the reduction. - if (RdxOps && RdxOps->count(UI)) + // Ignore users in the user ignore list. + if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UI) != + UserIgnoreList.end()) continue; // Make sure that we can schedule this unknown user. - BlockNumbering &BN = BlocksNumbers[BB]; + BlockNumbering &BN = getBlockNumbering(BB); int UserIndex = BN.getIndex(UI); if (UserIndex < MyLastIndex) { @@ -948,32 +952,36 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth) { } case Instruction::Call: { // Check if the calls are all to the same vectorizable intrinsic. - IntrinsicInst *II = dyn_cast(VL[0]); - if (II==NULL) { + CallInst *CI = cast(VL[0]); + // Check if this is an Intrinsic call or something that can be + // represented by an intrinsic call + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); + if (!isTriviallyVectorizable(ID)) { newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; } - Function *Int = II->getCalledFunction(); + Function *Int = CI->getCalledFunction(); for (unsigned i = 1, e = VL.size(); i != e; ++i) { - IntrinsicInst *II2 = dyn_cast(VL[i]); - if (!II2 || II2->getCalledFunction() != Int) { + CallInst *CI2 = dyn_cast(VL[i]); + if (!CI2 || CI2->getCalledFunction() != Int || + getIntrinsicIDForCall(CI2, TLI) != ID) { newTreeEntry(VL, false); - DEBUG(dbgs() << "SLP: mismatched calls:" << *II << "!=" << *VL[i] + DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i] << "\n"); return; } } newTreeEntry(VL, true); - for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i) { + for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) { ValueList Operands; // Prepare the operand vector. for (unsigned j = 0; j < VL.size(); ++j) { - IntrinsicInst *II2 = dyn_cast(VL[j]); - Operands.push_back(II2->getArgOperand(i)); + CallInst *CI2 = dyn_cast(VL[j]); + Operands.push_back(CI2->getArgOperand(i)); } buildTree_rec(Operands, Depth + 1); } @@ -1090,7 +1098,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { // If instead not all operands are constants, then set the operand kind // to OK_AnyValue. If all operands are constants but not the same, // then set the operand kind to OK_NonUniformConstantValue. - ConstantInt *CInt = NULL; + ConstantInt *CInt = nullptr; for (unsigned i = 0; i < VL.size(); ++i) { const Instruction *I = cast(VL[i]); if (!isa(I->getOperand(1))) { @@ -1129,12 +1137,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { } case Instruction::Call: { CallInst *CI = cast(VL0); - IntrinsicInst *II = cast(CI); - Intrinsic::ID ID = II->getIntrinsicID(); + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); // Calculate the cost of the scalar and vector calls. SmallVector ScalarTys, VecTys; - for (unsigned op = 0, opc = II->getNumArgOperands(); op!= opc; ++op) { + for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op) { ScalarTys.push_back(CI->getArgOperand(op)->getType()); VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(), VecTy->getNumElements())); @@ -1147,7 +1154,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost << " (" << VecCallCost << "-" << ScalarCallCost << ")" - << " for " << *II << "\n"); + << " for " << *CI << "\n"); return VecCallCost - ScalarCallCost; } @@ -1244,7 +1251,7 @@ Value *BoUpSLP::getPointerOperand(Value *I) { return LI->getPointerOperand(); if (StoreInst *SI = dyn_cast(I)) return SI->getPointerOperand(); - return 0; + return nullptr; } unsigned BoUpSLP::getAddressSpaceOperand(Value *I) { @@ -1318,13 +1325,13 @@ Value *BoUpSLP::getSinkBarrier(Instruction *Src, Instruction *Dst) { if (!A.Ptr || !B.Ptr || AA->alias(A, B)) return I; } - return 0; + return nullptr; } int BoUpSLP::getLastIndex(ArrayRef VL) { BasicBlock *BB = cast(VL[0])->getParent(); - assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid block"); - BlockNumbering &BN = BlocksNumbers[BB]; + assert(BB == getSameBlock(VL) && "Invalid block"); + BlockNumbering &BN = getBlockNumbering(BB); int MaxIdx = BN.getIndex(BB->getFirstNonPHI()); for (unsigned i = 0, e = VL.size(); i < e; ++i) @@ -1334,8 +1341,8 @@ int BoUpSLP::getLastIndex(ArrayRef VL) { Instruction *BoUpSLP::getLastInstruction(ArrayRef VL) { BasicBlock *BB = cast(VL[0])->getParent(); - assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid block"); - BlockNumbering &BN = BlocksNumbers[BB]; + assert(BB == getSameBlock(VL) && "Invalid block"); + BlockNumbering &BN = getBlockNumbering(BB); int MaxIdx = BN.getIndex(cast(VL[0])); for (unsigned i = 1, e = VL.size(); i < e; ++i) @@ -1394,7 +1401,7 @@ Value *BoUpSLP::alreadyVectorized(ArrayRef VL) const { if (En->isSame(VL) && En->VectorizedValue) return En->VectorizedValue; } - return 0; + return nullptr; } Value *BoUpSLP::vectorizeTree(ArrayRef VL) { @@ -1615,6 +1622,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { VecTy->getPointerTo(AS)); unsigned Alignment = LI->getAlignment(); LI = Builder.CreateLoad(VecPtr); + if (!Alignment) + Alignment = DL->getABITypeAlignment(LI->getPointerOperand()->getType()); LI->setAlignment(Alignment); E->VectorizedValue = LI; return propagateMetadata(LI, E->Scalars); @@ -1634,13 +1643,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), VecTy->getPointerTo(AS)); StoreInst *S = Builder.CreateStore(VecValue, VecPtr); + if (!Alignment) + Alignment = DL->getABITypeAlignment(SI->getPointerOperand()->getType()); S->setAlignment(Alignment); E->VectorizedValue = S; return propagateMetadata(S, E->Scalars); } case Instruction::Call: { CallInst *CI = cast(VL0); - setInsertPointAfterBundle(E->Scalars); std::vector OpVecs; for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) { @@ -1656,8 +1666,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } Module *M = F->getParent(); - IntrinsicInst *II = cast(CI); - Intrinsic::ID ID = II->getIntrinsicID(); + Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) }; Function *CF = Intrinsic::getDeclaration(M, ID, Tys); Value *V = Builder.CreateCall(CF, OpVecs); @@ -1667,7 +1676,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { default: llvm_unreachable("unknown inst"); } - return 0; + return nullptr; } Value *BoUpSLP::vectorizeTree() { @@ -1746,8 +1755,9 @@ Value *BoUpSLP::vectorizeTree() { DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n"); assert((ScalarToTreeEntry.count(U) || - // It is legal to replace the reduction users by undef. - (RdxOps && RdxOps->count(U))) && + // It is legal to replace users in the ignorelist by undef. + (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), U) != + UserIgnoreList.end())) && "Replacing out-of-tree value with undef"); } #endif @@ -1759,9 +1769,9 @@ Value *BoUpSLP::vectorizeTree() { } } - for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) { - BlocksNumbers[it].forget(); - } + for (auto &BN : BlocksNumbers) + BN.second.forget(); + Builder.ClearInsertionPoint(); return VectorizableTree[0].VectorizedValue; @@ -1802,11 +1812,19 @@ void BoUpSLP::optimizeGatherSequence() { Insert->moveBefore(PreHeader->getTerminator()); } + // Make a list of all reachable blocks in our CSE queue. + SmallVector CSEWorkList; + CSEWorkList.reserve(CSEBlocks.size()); + for (BasicBlock *BB : CSEBlocks) + if (DomTreeNode *N = DT->getNode(BB)) { + assert(DT->isReachableFromEntry(N)); + CSEWorkList.push_back(N); + } + // Sort blocks by domination. This ensures we visit a block after all blocks // dominating it are visited. - SmallVector CSEWorkList(CSEBlocks.begin(), CSEBlocks.end()); std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(), - [this](const BasicBlock *A, const BasicBlock *B) { + [this](const DomTreeNode *A, const DomTreeNode *B) { return DT->properlyDominates(A, B); }); @@ -1814,12 +1832,10 @@ void BoUpSLP::optimizeGatherSequence() { // instructions. TODO: We can further optimize this scan if we split the // instructions into different buckets based on the insert lane. SmallVector Visited; - for (SmallVectorImpl::iterator I = CSEWorkList.begin(), - E = CSEWorkList.end(); - I != E; ++I) { + for (auto I = CSEWorkList.begin(), E = CSEWorkList.end(); I != E; ++I) { assert((I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) && "Worklist not sorted properly!"); - BasicBlock *BB = *I; + BasicBlock *BB = (*I)->getBlock(); // For all instructions in blocks containing gather sequences: for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) { Instruction *In = it++; @@ -1835,7 +1851,7 @@ void BoUpSLP::optimizeGatherSequence() { DT->dominates((*v)->getParent(), In->getParent())) { In->replaceAllUsesWith(*v); In->eraseFromParent(); - In = 0; + In = nullptr; break; } } @@ -1864,6 +1880,7 @@ struct SLPVectorizer : public FunctionPass { ScalarEvolution *SE; const DataLayout *DL; TargetTransformInfo *TTI; + TargetLibraryInfo *TLI; AliasAnalysis *AA; LoopInfo *LI; DominatorTree *DT; @@ -1874,8 +1891,9 @@ struct SLPVectorizer : public FunctionPass { SE = &getAnalysis(); DataLayoutPass *DLP = getAnalysisIfAvailable(); - DL = DLP ? &DLP->getDataLayout() : 0; + DL = DLP ? &DLP->getDataLayout() : nullptr; TTI = &getAnalysis(); + TLI = getAnalysisIfAvailable(); AA = &getAnalysis(); LI = &getAnalysis(); DT = &getAnalysis().getDomTree(); @@ -1900,8 +1918,8 @@ struct SLPVectorizer : public FunctionPass { DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n"); // Use the bottom up slp vectorizer to construct chains that start with - // he store instructions. - BoUpSLP R(&F, SE, DL, TTI, AA, LI, DT); + // store instructions. + BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT); // Scan the blocks in the function in post order. for (po_iterator it = po_begin(&F.getEntryBlock()), @@ -1951,8 +1969,11 @@ private: bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R); /// \brief Try to vectorize a list of operands. + /// \@param BuildVector A list of users to ignore for the purpose of + /// scheduling and that don't need extracting. /// \returns true if a value was vectorized. - bool tryToVectorizeList(ArrayRef VL, BoUpSLP &R); + bool tryToVectorizeList(ArrayRef VL, BoUpSLP &R, + ArrayRef BuildVector = None); /// \brief Try to vectorize a chain that may start at the operands of \V; bool tryToVectorize(BinaryOperator *V, BoUpSLP &R); @@ -2106,7 +2127,7 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) { // Check that the pointer points to scalars. Type *Ty = SI->getValueOperand()->getType(); if (Ty->isAggregateType() || Ty->isVectorTy()) - return 0; + continue; // Find the base pointer. Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), DL); @@ -2125,7 +2146,8 @@ bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) { return tryToVectorizeList(VL, R); } -bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R) { +bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, + ArrayRef BuildVector) { if (VL.size() < 2) return false; @@ -2153,7 +2175,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R) { bool Changed = false; - // Keep track of values that were delete by vectorizing in the loop below. + // Keep track of values that were deleted by vectorizing in the loop below. SmallVector TrackValues(VL.begin(), VL.end()); for (unsigned i = 0, e = VL.size(); i < e; ++i) { @@ -2175,13 +2197,38 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef VL, BoUpSLP &R) { << "\n"); ArrayRef Ops = VL.slice(i, OpsWidth); - R.buildTree(Ops); + ArrayRef BuildVectorSlice; + if (!BuildVector.empty()) + BuildVectorSlice = BuildVector.slice(i, OpsWidth); + + R.buildTree(Ops, BuildVectorSlice); int Cost = R.getTreeCost(); if (Cost < -SLPCostThreshold) { DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n"); - R.vectorizeTree(); - + Value *VectorizedRoot = R.vectorizeTree(); + + // Reconstruct the build vector by extracting the vectorized root. This + // way we handle the case where some elements of the vector are undefined. + // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2)) + if (!BuildVectorSlice.empty()) { + // The insert point is the last build vector instruction. The vectorized + // root will precede it. This guarantees that we get an instruction. The + // vectorized tree could have been constant folded. + Instruction *InsertAfter = cast(BuildVectorSlice.back()); + unsigned VecIdx = 0; + for (auto &V : BuildVectorSlice) { + IRBuilder Builder( + ++BasicBlock::iterator(InsertAfter)); + InsertElementInst *IE = cast(V); + Instruction *Extract = cast(Builder.CreateExtractElement( + VectorizedRoot, Builder.getInt32(VecIdx++))); + IE->setOperand(1, Extract); + IE->removeFromParent(); + IE->insertAfter(Extract); + InsertAfter = IE; + } + } // Move to the next bundle. i += VF - 1; Changed = true; @@ -2290,7 +2337,7 @@ static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx, /// *p = /// class HorizontalReduction { - SmallPtrSet ReductionOps; + SmallVector ReductionOps; SmallVector ReducedVals; BinaryOperator *ReductionRoot; @@ -2308,7 +2355,7 @@ class HorizontalReduction { public: HorizontalReduction() - : ReductionRoot(0), ReductionPHI(0), ReductionOpcode(0), + : ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0), ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {} /// \brief Try to find a reduction tree. @@ -2323,10 +2370,10 @@ public: // In such a case start looking for a tree rooted in the first '+'. if (Phi) { if (B->getOperand(0) == Phi) { - Phi = 0; + Phi = nullptr; B = dyn_cast(B->getOperand(1)); } else if (B->getOperand(1) == Phi) { - Phi = 0; + Phi = nullptr; B = dyn_cast(B->getOperand(0)); } } @@ -2384,7 +2431,7 @@ public: // We need to be able to reassociate the adds. if (!TreeN->isAssociative()) return false; - ReductionOps.insert(TreeN); + ReductionOps.push_back(TreeN); } // Retract. Stack.pop_back(); @@ -2412,7 +2459,7 @@ public: if (NumReducedVals < ReduxWidth) return false; - Value *VectorizedTree = 0; + Value *VectorizedTree = nullptr; IRBuilder<> Builder(ReductionRoot); FastMathFlags Unsafe; Unsafe.setUnsafeAlgebra(); @@ -2421,7 +2468,7 @@ public: for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) { ArrayRef ValsToReduce(&ReducedVals[i], ReduxWidth); - V.buildTree(ValsToReduce, &ReductionOps); + V.buildTree(ValsToReduce, ReductionOps); // Estimate cost. int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]); @@ -2455,13 +2502,13 @@ public: } // Update users. if (ReductionPHI) { - assert(ReductionRoot != NULL && "Need a reduction operation"); + assert(ReductionRoot && "Need a reduction operation"); ReductionRoot->setOperand(0, VectorizedTree); ReductionRoot->setOperand(1, ReductionPHI); } else ReductionRoot->replaceAllUsesWith(VectorizedTree); } - return VectorizedTree != 0; + return VectorizedTree != nullptr; } private: @@ -2540,13 +2587,16 @@ private: /// /// Returns true if it matches /// -static bool findBuildVector(InsertElementInst *IE, - SmallVectorImpl &Ops) { - if (!isa(IE->getOperand(0))) +static bool findBuildVector(InsertElementInst *FirstInsertElem, + SmallVectorImpl &BuildVector, + SmallVectorImpl &BuildVectorOpds) { + if (!isa(FirstInsertElem->getOperand(0))) return false; + InsertElementInst *IE = FirstInsertElem; while (true) { - Ops.push_back(IE->getOperand(1)); + BuildVector.push_back(IE); + BuildVectorOpds.push_back(IE->getOperand(1)); if (IE->use_empty()) return false; @@ -2641,7 +2691,8 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { Value *Rdx = (P->getIncomingBlock(0) == BB ? (P->getIncomingValue(0)) - : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) : 0)); + : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) + : nullptr)); // Check if this is a Binary Operator. BinaryOperator *BI = dyn_cast_or_null(Rdx); if (!BI) @@ -2680,7 +2731,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (BinaryOperator *BinOp = dyn_cast(SI->getValueOperand())) { HorizontalReduction HorRdx; - if (((HorRdx.matchAssociativeReduction(0, BinOp, DL) && + if (((HorRdx.matchAssociativeReduction(nullptr, BinOp, DL) && HorRdx.tryToReduce(R, TTI)) || tryToVectorize(BinOp, R))) { Changed = true; @@ -2716,12 +2767,16 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { } // Try to vectorize trees that start at insertelement instructions. - if (InsertElementInst *IE = dyn_cast(it)) { - SmallVector Ops; - if (!findBuildVector(IE, Ops)) + if (InsertElementInst *FirstInsertElem = dyn_cast(it)) { + SmallVector BuildVector; + SmallVector BuildVectorOpds; + if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds)) continue; - if (tryToVectorizeList(Ops, R)) { + // Vectorize starting with the build vector operands ignoring the + // BuildVector instructions for the purpose of scheduling and user + // extraction. + if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) { Changed = true; it = BB->begin(); e = BB->end(); -- cgit v1.1